PyPI - databricks-labs-lakebridge - Versions diffs - 0.10.7__py3-none-any.whl → 0.10.8__py3-none-any.whl - Mend

databricks-labs-lakebridge 0.10.7py3-none-any.whl → 0.10.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py CHANGED Viewed

@@ -10,15 +10,13 @@ from databricks.labs.lakebridge.reconcile.recon_capture import (
     ReconIntermediatePersist,
     generate_final_reconcile_aggregate_output,
 )
-from databricks.labs.lakebridge.reconcile.recon_config import Table, Schema, AGG_RECONCILE_OPERATION_NAME
+from databricks.labs.lakebridge.reconcile.recon_config import AGG_RECONCILE_OPERATION_NAME
 from databricks.labs.lakebridge.reconcile.recon_output_config import (
     ReconcileProcessDuration,
     AggregateQueryOutput,
     DataReconcileOutput,
 )
-from databricks.labs.lakebridge.reconcile.reconciliation import Reconciliation
 from databricks.labs.lakebridge.reconcile.trigger_recon_service import TriggerReconService
-from databricks.labs.lakebridge.reconcile.normalize_recon_config_service import NormalizeReconConfigService
 class TriggerReconAggregateService:
@@ -36,42 +34,36 @@ class TriggerReconAggregateService:
         # Get the Aggregated Reconciliation Output for each table
         for table_conf in table_recon.tables:
-            normalized_table_conf = NormalizeReconConfigService(
-                reconciler.source, reconciler.target
-            ).normalize_recon_table_config(table_conf)
             recon_process_duration = ReconcileProcessDuration(start_ts=str(datetime.now()), end_ts=None)
             try:
                 src_schema, tgt_schema = TriggerReconService.get_schemas(
-                    reconciler.source, reconciler.target, normalized_table_conf, reconcile_config.database_config
+                    reconciler.source, reconciler.target, table_conf, reconcile_config.database_config, False
                 )
             except DataSourceRuntimeException as e:
                 raise ReconciliationException(message=str(e)) from e
-            assert normalized_table_conf.aggregates, "Aggregates must be defined for Aggregates Reconciliation"
+            assert table_conf.aggregates, "Aggregates must be defined for Aggregates Reconciliation"
-            table_reconcile_agg_output_list: list[AggregateQueryOutput] = (
-                TriggerReconAggregateService._run_reconcile_aggregates(
-                    reconciler=reconciler,
-                    table_conf=normalized_table_conf,
-                    src_schema=src_schema,
-                    tgt_schema=tgt_schema,
-                )
-            )
+            try:
+                table_reconcile_agg_output_list = reconciler.reconcile_aggregates(table_conf, src_schema, tgt_schema)
+            except DataSourceRuntimeException as e:
+                table_reconcile_agg_output_list = [
+                    AggregateQueryOutput(reconcile_output=DataReconcileOutput(exception=str(e)), rule=None)
+                ]
             recon_process_duration.end_ts = str(datetime.now())
             # Persist the data to the delta tables
             recon_capture.store_aggregates_metrics(
                 reconcile_agg_output_list=table_reconcile_agg_output_list,
-                table_conf=normalized_table_conf,
+                table_conf=table_conf,
                 recon_process_duration=recon_process_duration,
             )
             (
                 ReconIntermediatePersist(
                     spark=spark,
-                    path=utils.generate_volume_path(normalized_table_conf, reconcile_config.metadata_config),
+                    path=utils.generate_volume_path(table_conf, reconcile_config.metadata_config),
                 ).clean_unmatched_df_from_volume()
             )
@@ -84,15 +76,3 @@ class TriggerReconAggregateService:
             ),
             operation_name=AGG_RECONCILE_OPERATION_NAME,
         )
-    @staticmethod
-    def _run_reconcile_aggregates(
-        reconciler: Reconciliation,
-        table_conf: Table,
-        src_schema: list[Schema],
-        tgt_schema: list[Schema],
-    ) -> list[AggregateQueryOutput]:
-        try:
-            return reconciler.reconcile_aggregates(table_conf, src_schema, tgt_schema)
-        except DataSourceRuntimeException as e:
-            return [AggregateQueryOutput(reconcile_output=DataReconcileOutput(exception=str(e)), rule=None)]

databricks/labs/lakebridge/reconcile/trigger_recon_service.py CHANGED Viewed

@@ -138,7 +138,7 @@ class TriggerReconService:
         try:
             src_schema, tgt_schema = TriggerReconService.get_schemas(
-                reconciler.source, reconciler.target, table_conf, reconcile_config.database_config
+                reconciler.source, reconciler.target, table_conf, reconcile_config.database_config, True
             )
         except DataSourceRuntimeException as e:
             schema_reconcile_output = SchemaReconcileOutput(is_valid=False, exception=str(e))
@@ -170,17 +170,20 @@ class TriggerReconService:
         target: DataSource,
         table_conf: Table,
         database_config: DatabaseConfig,
+        normalize: bool,
     ) -> tuple[list[Schema], list[Schema]]:
         src_schema = source.get_schema(
             catalog=database_config.source_catalog,
             schema=database_config.source_schema,
             table=table_conf.source_name,
+            normalize=normalize,
         )
         tgt_schema = target.get_schema(
             catalog=database_config.target_catalog,
             schema=database_config.target_schema,
             table=table_conf.target_name,
+            normalize=normalize,
         )
         return src_schema, tgt_schema

databricks/labs/lakebridge/transpiler/execute.py CHANGED Viewed

@@ -48,6 +48,26 @@ class TranspilingContext:
     transpiled_code: str | None = None
+def _validate_transpiled_sql(context: TranspilingContext, content: str, error_list: list[TranspileError]) -> str:
+    if context.validator is None:
+        return content
+    validation_result = _validation(context.validator, context.config, str(content))
+    # Potentially expensive, only evaluate if debug is enabled
+    if logger.isEnabledFor(logging.DEBUG):
+        msg = f"Finished validating transpiled code for file: {context.input_path} (result: {validation_result})"
+        logger.debug(msg)
+    if validation_result.exception_msg is not None:
+        error = TranspileError(
+            "VALIDATION_ERROR",
+            ErrorKind.VALIDATION,
+            ErrorSeverity.WARNING,
+            context.input_path,
+            validation_result.exception_msg,
+        )
+        error_list.append(error)
+    return validation_result.validated_sql
 async def _process_one_file(context: TranspilingContext) -> tuple[int, list[TranspileError]]:
     input_path = context.input_path
@@ -89,29 +109,29 @@ async def _process_one_file(context: TranspilingContext) -> tuple[int, list[Tran
     assert output_path is not None, "Output path must be set in the context"
     output_path.parent.mkdir(exist_ok=True)
-    if _is_combined_result(transpile_result):
-        _process_combined_result(context, error_list)
+    if _is_mime_result(transpile_result):
+        _process_mime_result(context, error_list)
     else:
-        _process_single_result(context, error_list)
+        _process_non_mime_result(context, error_list)
     return transpile_result.success_count, error_list
-def _is_combined_result(result: TranspileResult):
+def _is_mime_result(result: TranspileResult):
     return result.transpiled_code.startswith("Content-Type: multipart/mixed; boundary=")
-def _process_combined_result(context: TranspilingContext, _error_list: list[TranspileError]) -> None:
+def _process_mime_result(context: TranspilingContext, error_list: list[TranspileError]) -> None:
     # TODO error handling
     # Added policy to process quoted-printable encoded
     parser = EmailParser(policy=policy.default)
     transpiled_code: str = cast(str, context.transpiled_code)
     message: Message = parser.parsestr(transpiled_code)
     for part in message.walk():
-        _process_combined_part(context, part)
+        _process_combined_part(context, part, error_list)
-def _process_combined_part(context: TranspilingContext, part: Message) -> None:
+def _process_combined_part(context: TranspilingContext, part: Message, error_list: list[TranspileError]) -> None:
     if part.get_content_type() != "text/plain":
         return  # TODO Need to handle other content types, e.g., text/binary, application/json, etc.
     filename = part.get_filename()
@@ -133,35 +153,21 @@ def _process_combined_part(context: TranspilingContext, part: Message) -> None:
         folder.mkdir(parents=True, exist_ok=True)
     output = folder / segments[-1]
     logger.debug(f"Writing output to: {output}")
+    # Only validate if output file has .sql suffix
+    if output.suffix == ".sql":
+        content = _validate_transpiled_sql(context, content, error_list)
     output.write_text(content)
-def _process_single_result(context: TranspilingContext, error_list: list[TranspileError]) -> None:
+def _process_non_mime_result(context: TranspilingContext, error_list: list[TranspileError]) -> None:
     output_code: str = context.transpiled_code or ""
+    output_path = cast(Path, context.output_path)
     if any(err.kind == ErrorKind.PARSING for err in error_list):
         output_code = context.source_code or ""
-    elif context.validator:
-        logger.debug(f"Validating transpiled code for file: {context.input_path}")
-        validation_result = _validation(context.validator, context.config, str(context.transpiled_code))
-        # Potentially expensive, only evaluate if debug is enabled
-        if logger.isEnabledFor(logging.DEBUG):
-            msg = f"Finished validating transpiled code for file: {context.input_path} (result: {validation_result})"
-            logger.debug(msg)
-        if validation_result.exception_msg is not None:
-            error = TranspileError(
-                "VALIDATION_ERROR",
-                ErrorKind.VALIDATION,
-                ErrorSeverity.WARNING,
-                context.input_path,
-                validation_result.exception_msg,
-            )
-            error_list.append(error)
-        output_code = validation_result.validated_sql
-    output_path = cast(Path, context.output_path)
+    elif output_path.suffix == ".sql":
+        output_code = _validate_transpiled_sql(context, output_code, error_list)
     with output_path.open("w") as w:
         # The above adds a java-style comment block at the top of the output file
         # This would break .py or .json outputs so we disable it for now.

databricks-labs-lakebridge 0.10.7__py3-none-any.whl → 0.10.8__py3-none-any.whl

databricks-labs-lakebridge 0.10.7py3-none-any.whl → 0.10.8py3-none-any.whl