PyPI - dlt-utils-lib - Versions diffs - 1.2.1__tar.gz → 1.2.2__tar.gz - Mend

dlt-utils-lib 1.2.1tar.gz → 1.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{dlt_utils_lib-1.2.1 → dlt_utils_lib-1.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dlt_utils_lib
-Version: 1.2.1
+Version: 1.2.2
 Summary: UNKNOWN
 Home-page: UNKNOWN
 License: UNKNOWN

dlt_utils_lib-1.2.2/dlt_utils/dlt_autoloader_recovery_configuration.py ADDED Viewed

@@ -0,0 +1,26 @@
+import logging
+from databricks.sdk import WorkspaceClient
+def _needs_autoloader_recovery(pipeline_id: str) -> bool:
+    event = next(iter(WorkspaceClient().pipelines.list_pipeline_events(
+        pipeline_id=pipeline_id, order_by=["timestamp desc"], max_results=1
+    )), None)
+    if event is None:
+        logging.warning("No pipeline events found for pipeline_id=%s", pipeline_id)
+        return False
+    if event.error:
+        for exc in event.error.exceptions or []:
+            if exc.message and "CF_MANAGED_FILE_EVENTS_INVALID_CONTINUATION_TOKEN" in exc.message:
+                return True
+    return False
+def resolve_autoloader_recovery(spark) -> bool:
+    method = spark.conf.get("autoloader_recovery_method", "AUTO").upper()
+    if method == "ON":
+        return True
+    if method == "AUTO":
+        return _needs_autoloader_recovery(spark.conf.get("pipelines.id"))
+    return False

{dlt_utils_lib-1.2.1 → dlt_utils_lib-1.2.2}/dlt_utils/main_cdc_utils.py RENAMED Viewed

@@ -3,6 +3,7 @@ from typing import Optional
 from pyspark.sql.functions import col, expr, current_timestamp
+from .dlt_autoloader_recovery_configuration import resolve_autoloader_recovery
 from .dlt_transformations import (
     add_default_value_for_removed_col,
     apply_partitions,
@@ -103,9 +104,14 @@ def create_bronze_table_definition(spark,
             .option("cloudFiles.inferColumnTypes", "true")
         if use_managed_file_events:
-            reader = reader.option("cloudFiles.useManagedFileEvents", "true") \
-                            .option("cloudFiles.listOnStart", "true") \
-                            .option("cloudFiles.validateOptions", "false")
+            reader = reader.option("cloudFiles.useManagedFileEvents", "true")
+        if resolve_autoloader_recovery(spark):
+            reader = (
+                reader
+                .option("cloudFiles.listOnStart", "true")
+                .option("cloudFiles.validateOptions", "false")
+            )
         return reader.load(files_path) \
             .withColumn('cdc_timestamp', col('cdc_timestamp').cast('timestamp')) \

{dlt_utils_lib-1.2.1 → dlt_utils_lib-1.2.2}/dlt_utils/main_json_utils.py RENAMED Viewed

@@ -4,6 +4,8 @@ from typing import Callable, Optional
 from pyspark.sql import DataFrame
 from pyspark.sql.functions import col, struct
+from .dlt_autoloader_recovery_configuration import resolve_autoloader_recovery
 def base_json_replication_process(
         dlt,
@@ -97,9 +99,14 @@ def _build_autoloader_reader(
         reader = reader.option("cloudFiles.schemaHints", schema_hints)
     if use_managed_file_events:
-        reader = reader.option("cloudFiles.useManagedFileEvents", "true") \
-                        .option("cloudFiles.listOnStart", "true") \
-                        .option("cloudFiles.validateOptions", "false")
+        reader = reader.option("cloudFiles.useManagedFileEvents", "true")
+    if resolve_autoloader_recovery(spark):
+        reader = (
+            reader
+            .option("cloudFiles.listOnStart", "true")
+            .option("cloudFiles.validateOptions", "false")
+        )
     return reader.load(source_path)

{dlt_utils_lib-1.2.1 → dlt_utils_lib-1.2.2}/dlt_utils_lib.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dlt-utils-lib
-Version: 1.2.1
+Version: 1.2.2
 Summary: UNKNOWN
 Home-page: UNKNOWN
 License: UNKNOWN

{dlt_utils_lib-1.2.1 → dlt_utils_lib-1.2.2}/dlt_utils_lib.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,5 +1,6 @@
 setup.py
 dlt_utils/__init__.py
+dlt_utils/dlt_autoloader_recovery_configuration.py
 dlt_utils/dlt_metadata_receiver.py
 dlt_utils/dlt_transformations.py
 dlt_utils/main_cdc_utils.py