fabrictools 0.7.2__tar.gz → 0.7.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fabrictools-0.7.2 → fabrictools-0.7.4}/PKG-INFO +1 -1
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/_version.py +1 -1
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/io/lakehouse.py +36 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/business.py +1 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/PKG-INFO +1 -1
- {fabrictools-0.7.2 → fabrictools-0.7.4}/README.md +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/__init__.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/core/__init__.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/core/logging.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/core/paths.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/core/spark.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/__init__.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/_targets.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/date.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/geo.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/dimensions/pipeline.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/io/__init__.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/io/discovery.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/io/warehouse.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/pipelines/__init__.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/pipelines/config.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/__init__.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/aggregations.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/pipeline.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/resolve.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/schema.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/semantic.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/prepare/transform.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/quality/__init__.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/quality/clean.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/quality/pipeline.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/quality/scan.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/__init__.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/columns.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/filter.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/merge.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/rows.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/text.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools/transform/wide_month_suffix.py +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/SOURCES.txt +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/dependency_links.txt +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/requires.txt +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/fabrictools.egg-info/top_level.txt +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/pyproject.toml +0 -0
- {fabrictools-0.7.2 → fabrictools-0.7.4}/setup.cfg +0 -0
|
@@ -186,6 +186,21 @@ def _dedupe_preserve_order(values: list[str]) -> list[str]:
|
|
|
186
186
|
return deduped
|
|
187
187
|
|
|
188
188
|
|
|
189
|
+
def _enable_delta_column_mapping_on_path(spark: SparkSession, full_path: str) -> None:
|
|
190
|
+
"""Upgrade an existing Delta table path to column mapping mode ``name``."""
|
|
191
|
+
escaped_path = full_path.replace("`", "``")
|
|
192
|
+
spark.sql(
|
|
193
|
+
f"""
|
|
194
|
+
ALTER TABLE delta.`{escaped_path}`
|
|
195
|
+
SET TBLPROPERTIES (
|
|
196
|
+
'delta.columnMapping.mode' = 'name',
|
|
197
|
+
'delta.minReaderVersion' = '2',
|
|
198
|
+
'delta.minWriterVersion' = '5'
|
|
199
|
+
)
|
|
200
|
+
"""
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
189
204
|
def _detect_partition_columns(
|
|
190
205
|
df: DataFrame, threshold_bytes: int = 1_073_741_824
|
|
191
206
|
) -> list[str]:
|
|
@@ -281,6 +296,7 @@ def write_lakehouse(
|
|
|
281
296
|
spark: Optional[SparkSession] = None,
|
|
282
297
|
*,
|
|
283
298
|
normalize_column_names: bool = True,
|
|
299
|
+
enable_column_mapping: bool = False,
|
|
284
300
|
auto_partition: bool = True,
|
|
285
301
|
auto_partition_threshold_bytes: int = 1_073_741_824,
|
|
286
302
|
) -> None:
|
|
@@ -303,6 +319,9 @@ def write_lakehouse(
|
|
|
303
319
|
:py:func:`fabrictools.rename_columns_normalized` before
|
|
304
320
|
resolving ``partition_by`` and writing. If ``False``, keep physical column
|
|
305
321
|
names unchanged.
|
|
322
|
+
:param enable_column_mapping: If ``True`` and ``format="delta"``, writes table
|
|
323
|
+
properties required for Delta column mapping (mode ``name``), allowing
|
|
324
|
+
column names with spaces or special characters.
|
|
306
325
|
:param auto_partition: If ``True`` (default), automatically partition the data
|
|
307
326
|
by detected date columns if they exist.
|
|
308
327
|
:type df: ~pyspark.sql.DataFrame
|
|
@@ -313,6 +332,7 @@ def write_lakehouse(
|
|
|
313
332
|
:type format: str
|
|
314
333
|
:type spark: ~pyspark.sql.SparkSession | None
|
|
315
334
|
:type normalize_column_names: bool
|
|
335
|
+
:type enable_column_mapping: bool
|
|
316
336
|
|
|
317
337
|
.. rubric:: Example
|
|
318
338
|
|
|
@@ -367,6 +387,22 @@ def write_lakehouse(
|
|
|
367
387
|
writer = df.write.format(format).option("overwriteSchema", "true").mode(mode)
|
|
368
388
|
if format.lower() == "parquet":
|
|
369
389
|
writer = writer.option("datetimeRebaseMode", "CORRECTED")
|
|
390
|
+
elif format.lower() == "delta" and enable_column_mapping:
|
|
391
|
+
# If target already exists as a Delta table, upgrade protocol first so
|
|
392
|
+
# overwrite with business-friendly names (spaces, capitals, etc.) works.
|
|
393
|
+
try:
|
|
394
|
+
from delta.tables import DeltaTable # type: ignore[import-untyped] # noqa: PLC0415
|
|
395
|
+
|
|
396
|
+
if DeltaTable.isDeltaTable(_, full_path):
|
|
397
|
+
_enable_delta_column_mapping_on_path(_, full_path)
|
|
398
|
+
except Exception:
|
|
399
|
+
# Non-blocking: the write options below still apply for new tables.
|
|
400
|
+
pass
|
|
401
|
+
writer = (
|
|
402
|
+
writer.option("delta.columnMapping.mode", "name")
|
|
403
|
+
.option("delta.minReaderVersion", "2")
|
|
404
|
+
.option("delta.minWriterVersion", "5")
|
|
405
|
+
)
|
|
370
406
|
|
|
371
407
|
if effective_partition_by:
|
|
372
408
|
writer = writer.partitionBy(*effective_partition_by)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|