fabrictools 0.7.2__tar.gz → 0.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {fabrictools-0.7.2 → fabrictools-0.7.4}/PKG-INFO +1 -1
  2. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/_version.py +1 -1
  3. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/io/lakehouse.py +36 -0
  4. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/business.py +1 -0
  5. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/PKG-INFO +1 -1
  6. {fabrictools-0.7.2 → fabrictools-0.7.4}/README.md +0 -0
  7. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/__init__.py +0 -0
  8. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/core/__init__.py +0 -0
  9. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/core/logging.py +0 -0
  10. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/core/paths.py +0 -0
  11. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/core/spark.py +0 -0
  12. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/__init__.py +0 -0
  13. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/_targets.py +0 -0
  14. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/date.py +0 -0
  15. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/geo.py +0 -0
  16. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/pipeline.py +0 -0
  17. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/io/__init__.py +0 -0
  18. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/io/discovery.py +0 -0
  19. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/io/warehouse.py +0 -0
  20. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/pipelines/__init__.py +0 -0
  21. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/pipelines/config.py +0 -0
  22. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/__init__.py +0 -0
  23. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/aggregations.py +0 -0
  24. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/pipeline.py +0 -0
  25. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/resolve.py +0 -0
  26. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/schema.py +0 -0
  27. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/semantic.py +0 -0
  28. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/transform.py +0 -0
  29. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/quality/__init__.py +0 -0
  30. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/quality/clean.py +0 -0
  31. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/quality/pipeline.py +0 -0
  32. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/quality/scan.py +0 -0
  33. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/__init__.py +0 -0
  34. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/columns.py +0 -0
  35. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/filter.py +0 -0
  36. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/merge.py +0 -0
  37. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/rows.py +0 -0
  38. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/text.py +0 -0
  39. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/wide_month_suffix.py +0 -0
  40. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/SOURCES.txt +0 -0
  41. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/dependency_links.txt +0 -0
  42. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/requires.txt +0 -0
  43. {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/top_level.txt +0 -0
  44. {fabrictools-0.7.2 → fabrictools-0.7.4}/pyproject.toml +0 -0
  45. {fabrictools-0.7.2 → fabrictools-0.7.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fabrictools
3
- Version: 0.7.2
3
+ Version: 0.7.4
4
4
  Summary: User-friendly PySpark helpers for Microsoft Fabric Lakehouses and Warehouses
5
5
  Author-email: Willy Kinfoussia <willy.kinfoussia@gmail.com>
6
6
  License: MIT
@@ -1,3 +1,3 @@
1
1
  """Single source of truth for package version."""
2
2
 
3
- __version__ = "0.7.2"
3
+ __version__ = "0.7.4"
@@ -186,6 +186,21 @@ def _dedupe_preserve_order(values: list[str]) -> list[str]:
186
186
  return deduped
187
187
 
188
188
 
189
+ def _enable_delta_column_mapping_on_path(spark: SparkSession, full_path: str) -> None:
190
+ """Upgrade an existing Delta table path to column mapping mode ``name``."""
191
+ escaped_path = full_path.replace("`", "``")
192
+ spark.sql(
193
+ f"""
194
+ ALTER TABLE delta.`{escaped_path}`
195
+ SET TBLPROPERTIES (
196
+ 'delta.columnMapping.mode' = 'name',
197
+ 'delta.minReaderVersion' = '2',
198
+ 'delta.minWriterVersion' = '5'
199
+ )
200
+ """
201
+ )
202
+
203
+
189
204
  def _detect_partition_columns(
190
205
  df: DataFrame, threshold_bytes: int = 1_073_741_824
191
206
  ) -> list[str]:
@@ -281,6 +296,7 @@ def write_lakehouse(
281
296
  spark: Optional[SparkSession] = None,
282
297
  *,
283
298
  normalize_column_names: bool = True,
299
+ enable_column_mapping: bool = False,
284
300
  auto_partition: bool = True,
285
301
  auto_partition_threshold_bytes: int = 1_073_741_824,
286
302
  ) -> None:
@@ -303,6 +319,9 @@ def write_lakehouse(
303
319
  :py:func:`fabrictools.rename_columns_normalized` before
304
320
  resolving ``partition_by`` and writing. If ``False``, keep physical column
305
321
  names unchanged.
322
+ :param enable_column_mapping: If ``True`` and ``format="delta"``, writes table
323
+ properties required for Delta column mapping (mode ``name``), allowing
324
+ column names with spaces or special characters.
306
325
  :param auto_partition: If ``True`` (default), automatically partition the data
307
326
  by detected date columns if they exist.
308
327
  :type df: ~pyspark.sql.DataFrame
@@ -313,6 +332,7 @@ def write_lakehouse(
313
332
  :type format: str
314
333
  :type spark: ~pyspark.sql.SparkSession | None
315
334
  :type normalize_column_names: bool
335
+ :type enable_column_mapping: bool
316
336
 
317
337
  .. rubric:: Example
318
338
 
@@ -367,6 +387,22 @@ def write_lakehouse(
367
387
  writer = df.write.format(format).option("overwriteSchema", "true").mode(mode)
368
388
  if format.lower() == "parquet":
369
389
  writer = writer.option("datetimeRebaseMode", "CORRECTED")
390
+ elif format.lower() == "delta" and enable_column_mapping:
391
+ # If target already exists as a Delta table, upgrade protocol first so
392
+ # overwrite with business-friendly names (spaces, capitals, etc.) works.
393
+ try:
394
+ from delta.tables import DeltaTable # type: ignore[import-untyped] # noqa: PLC0415
395
+
396
+ if DeltaTable.isDeltaTable(_, full_path):
397
+ _enable_delta_column_mapping_on_path(_, full_path)
398
+ except Exception:
399
+ # Non-blocking: the write options below still apply for new tables.
400
+ pass
401
+ writer = (
402
+ writer.option("delta.columnMapping.mode", "name")
403
+ .option("delta.minReaderVersion", "2")
404
+ .option("delta.minWriterVersion", "5")
405
+ )
370
406
 
371
407
  if effective_partition_by:
372
408
  writer = writer.partitionBy(*effective_partition_by)
@@ -130,6 +130,7 @@ def make_business_ready(
130
130
  mode=mode,
131
131
  spark=_spark,
132
132
  normalize_column_names=False,
133
+ enable_column_mapping=True,
133
134
  )
134
135
 
135
136
  processed_tables.append(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fabrictools
3
- Version: 0.7.2
3
+ Version: 0.7.4
4
4
  Summary: User-friendly PySpark helpers for Microsoft Fabric Lakehouses and Warehouses
5
5
  Author-email: Willy Kinfoussia <willy.kinfoussia@gmail.com>
6
6
  License: MIT
File without changes
File without changes
File without changes