PyPI - databricks-labs-lakebridge - Versions diffs - 0.10.7__py3-none-any.whl → 0.10.8__py3-none-any.whl - Mend

databricks-labs-lakebridge 0.10.7py3-none-any.whl → 0.10.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

databricks/labs/lakebridge/reconcile/connectors/secrets.py CHANGED Viewed

@@ -11,8 +11,26 @@ class SecretsMixin:
     _ws: WorkspaceClient
     _secret_scope: str
+    def _get_secret_or_none(self, secret_key: str) -> str | None:
+        """
+        Get the secret value given a secret scope & secret key. Log a warning if secret does not exist
+        Used To ensure backwards compatibility when supporting new secrets
+        """
+        try:
+            # Return the decoded secret value in string format
+            return self._get_secret(secret_key)
+        except NotFound as e:
+            logger.warning(f"Secret not found: key={secret_key}")
+            logger.debug("Secret lookup failed", exc_info=e)
+            return None
     def _get_secret(self, secret_key: str) -> str:
-        """Get the secret value given a secret scope & secret key. Log a warning if secret does not exist"""
+        """Get the secret value given a secret scope & secret key.
+        Raises:
+          NotFound: The secret could not be found.
+          UnicodeDecodeError: The secret value was not Base64-encoded UTF-8.
+        """
         try:
             # Return the decoded secret value in string format
             secret = self._ws.secrets.get_secret(self._secret_scope, secret_key)

databricks/labs/lakebridge/reconcile/connectors/snowflake.py CHANGED Viewed

@@ -79,31 +79,6 @@ class SnowflakeDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
             f"&warehouse={self._get_secret('sfWarehouse')}&role={self._get_secret('sfRole')}"
         )
-    @staticmethod
-    def get_private_key(pem_private_key: str) -> str:
-        try:
-            private_key_bytes = pem_private_key.encode("UTF-8")
-            p_key = serialization.load_pem_private_key(
-                private_key_bytes,
-                password=None,
-                backend=default_backend(),
-            )
-            pkb = p_key.private_bytes(
-                encoding=serialization.Encoding.PEM,
-                format=serialization.PrivateFormat.PKCS8,
-                encryption_algorithm=serialization.NoEncryption(),
-            )
-            pkb_str = pkb.decode("UTF-8")
-            # Remove the first and last lines (BEGIN/END markers)
-            private_key_pem_lines = pkb_str.strip().split('\n')[1:-1]
-            # Join the lines to form the base64 encoded string
-            private_key_pem_str = ''.join(private_key_pem_lines)
-            return private_key_pem_str
-        except Exception as e:
-            message = f"Failed to load or process the provided PEM private key. --> {e}"
-            logger.error(message)
-            raise InvalidSnowflakePemPrivateKey(message) from e
     def read_data(
         self,
         catalog: str | None,
@@ -132,6 +107,7 @@ class SnowflakeDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
         catalog: str | None,
         schema: str,
         table: str,
+        normalize: bool = True,
     ) -> list[Schema]:
         """
         Fetch the Schema from the INFORMATION_SCHEMA.COLUMNS table in Snowflake.
@@ -151,11 +127,17 @@ class SnowflakeDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
             df = self.reader(schema_query).load()
             schema_metadata = df.select([col(c).alias(c.lower()) for c in df.columns]).collect()
             logger.info(f"Schema fetched successfully. Completed at: {datetime.now()}")
-            return [self._map_meta_column(field) for field in schema_metadata]
+            return [self._map_meta_column(field, normalize) for field in schema_metadata]
         except (RuntimeError, PySparkException) as e:
             return self.log_and_throw_exception(e, "schema", schema_query)
     def reader(self, query: str) -> DataFrameReader:
+        options = self._get_snowflake_options()
+        return self._spark.read.format("snowflake").option("dbtable", f"({query}) as tmp").options(**options)
+    # TODO cache this method using @functools.cache
+    # Pay attention to https://pylint.pycqa.org/en/latest/user_guide/messages/warning/method-cache-max-size-none.html
+    def _get_snowflake_options(self):
         options = {
             "sfUrl": self._get_secret('sfUrl'),
             "sfUser": self._get_secret('sfUser'),
@@ -164,18 +146,57 @@ class SnowflakeDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
             "sfWarehouse": self._get_secret('sfWarehouse'),
             "sfRole": self._get_secret('sfRole'),
         }
+        options = options | self._get_snowflake_auth_options()
+        return options
+    def _get_snowflake_auth_options(self):
         try:
-            options["pem_private_key"] = SnowflakeDataSource.get_private_key(self._get_secret('pem_private_key'))
+            key = SnowflakeDataSource._get_private_key(
+                self._get_secret('pem_private_key'), self._get_secret_or_none('pem_private_key_password')
+            )
+            return {"pem_private_key": key}
         except (NotFound, KeyError):
             logger.warning("pem_private_key not found. Checking for sfPassword")
             try:
-                options["sfPassword"] = self._get_secret('sfPassword')
+                password = self._get_secret('sfPassword')
+                return {"sfPassword": password}
             except (NotFound, KeyError) as e:
                 message = "sfPassword and pem_private_key not found. Either one is required for snowflake auth."
                 logger.error(message)
                 raise NotFound(message) from e
-        return self._spark.read.format("snowflake").option("dbtable", f"({query}) as tmp").options(**options)
+    @staticmethod
+    def _get_private_key(pem_private_key: str, pem_private_key_password: str | None) -> str:
+        try:
+            private_key_bytes = pem_private_key.encode("UTF-8")
+            password_bytes = pem_private_key_password.encode("UTF-8") if pem_private_key_password else None
+        except UnicodeEncodeError as e:
+            message = f"Invalid pem key and/or pem password: unable to encode. --> {e}"
+            logger.error(message)
+            raise ValueError(message) from e
+        try:
+            p_key = serialization.load_pem_private_key(
+                private_key_bytes,
+                password_bytes,
+                backend=default_backend(),
+            )
+            pkb = p_key.private_bytes(
+                encoding=serialization.Encoding.PEM,
+                format=serialization.PrivateFormat.PKCS8,
+                encryption_algorithm=serialization.NoEncryption(),
+            )
+            pkb_str = pkb.decode("UTF-8")
+            # Remove the first and last lines (BEGIN/END markers)
+            private_key_pem_lines = pkb_str.strip().split('\n')[1:-1]
+            # Join the lines to form the base64 encoded string
+            private_key_pem_str = ''.join(private_key_pem_lines)
+            return private_key_pem_str
+        except Exception as e:
+            message = f"Failed to load or process the provided PEM private key. --> {e}"
+            logger.error(message)
+            raise InvalidSnowflakePemPrivateKey(message) from e
     def normalize_identifier(self, identifier: str) -> NormalizedIdentifier:
         return DialectUtils.normalize_identifier(

databricks/labs/lakebridge/reconcile/connectors/tsql.py CHANGED Viewed

@@ -109,6 +109,7 @@ class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
         catalog: str | None,
         schema: str,
         table: str,
+        normalize: bool = True,
     ) -> list[Schema]:
         """
         Fetch the Schema from the INFORMATION_SCHEMA.COLUMNS table in SQL Server.
@@ -128,7 +129,7 @@ class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
             df = self.reader(schema_query).load()
             schema_metadata = df.select([col(c).alias(c.lower()) for c in df.columns]).collect()
             logger.info(f"Schema fetched successfully. Completed at: {datetime.now()}")
-            return [self._map_meta_column(field) for field in schema_metadata]
+            return [self._map_meta_column(field, normalize) for field in schema_metadata]
         except (RuntimeError, PySparkException) as e:
             return self.log_and_throw_exception(e, "schema", schema_query)

databricks/labs/lakebridge/reconcile/query_builder/base.py CHANGED Viewed

@@ -5,10 +5,12 @@ import sqlglot.expressions as exp
 from sqlglot import Dialect, parse_one
 from databricks.labs.lakebridge.reconcile.connectors.data_source import DataSource
+from databricks.labs.lakebridge.reconcile.connectors.dialect_utils import DialectUtils
 from databricks.labs.lakebridge.reconcile.exception import InvalidInputException
 from databricks.labs.lakebridge.reconcile.query_builder.expression_generator import (
     DataType_transform_mapping,
     transform_expression,
+    build_column,
 )
 from databricks.labs.lakebridge.reconcile.recon_config import Schema, Table, Aggregate
 from databricks.labs.lakebridge.transpiler.sqlglot.dialect_utils import get_dialect, SQLGLOT_DIALECTS
@@ -26,7 +28,7 @@ class QueryBuilder(ABC):
     @property
     def engine(self) -> Dialect:
-        return self._engine
+        return self._engine if self.layer == "source" else get_dialect("databricks")
     @property
     def layer(self) -> str:
@@ -66,7 +68,25 @@ class QueryBuilder(ABC):
     @property
     def user_transformations(self) -> dict[str, str]:
-        return self._table_conf.get_transformation_dict(self._layer)
+        if self._table_conf.transformations:
+            if self.layer == "source":
+                return {
+                    trans.column_name: (
+                        trans.source
+                        if trans.source
+                        else self._data_source.normalize_identifier(trans.column_name).source_normalized
+                    )
+                    for trans in self._table_conf.transformations
+                }
+            return {
+                self._table_conf.get_layer_src_to_tgt_col_mapping(trans.column_name, self.layer): (
+                    trans.target
+                    if trans.target
+                    else self._table_conf.get_layer_src_to_tgt_col_mapping(trans.column_name, self.layer)
+                )
+                for trans in self._table_conf.transformations
+            }
+        return {}
     @property
     def aggregates(self) -> list[Aggregate] | None:
@@ -89,10 +109,12 @@ class QueryBuilder(ABC):
     def _user_transformer(self, node: exp.Expression, user_transformations: dict[str, str]) -> exp.Expression:
         if isinstance(node, exp.Column) and user_transformations:
-            dialect = self.engine if self.layer == "source" else get_dialect("databricks")
-            column_name = node.name
-            if column_name in user_transformations.keys():
-                return parse_one(user_transformations.get(column_name, column_name), read=dialect)
+            normalized_column = self._data_source.normalize_identifier(node.name)
+            ansi_name = normalized_column.ansi_normalized
+            if ansi_name in user_transformations.keys():
+                return parse_one(
+                    user_transformations.get(ansi_name, normalized_column.source_normalized), read=self.engine
+                )
         return node
     def _apply_default_transformation(
@@ -103,8 +125,7 @@ class QueryBuilder(ABC):
             with_transform.append(alias.transform(self._default_transformer, schema, source))
         return with_transform
-    @staticmethod
-    def _default_transformer(node: exp.Expression, schema: list[Schema], source: Dialect) -> exp.Expression:
+    def _default_transformer(self, node: exp.Expression, schema: list[Schema], source: Dialect) -> exp.Expression:
         def _get_transform(datatype: str):
             source_dialects = [source_key for source_key, dialect in SQLGLOT_DIALECTS.items() if dialect == source]
@@ -121,9 +142,10 @@ class QueryBuilder(ABC):
         schema_dict = {v.column_name: v.data_type for v in schema}
         if isinstance(node, exp.Column):
-            column_name = node.name
-            if column_name in schema_dict.keys():
-                transform = _get_transform(schema_dict.get(column_name, column_name))
+            normalized_column = self._data_source.normalize_identifier(node.name)
+            ansi_name = normalized_column.ansi_normalized
+            if ansi_name in schema_dict.keys():
+                transform = _get_transform(schema_dict.get(ansi_name, normalized_column.source_normalized))
                 return transform_expression(node, transform)
         return node
@@ -132,3 +154,20 @@ class QueryBuilder(ABC):
             message = f"Exception for {self.table_conf.target_name} target table in {self.layer} layer --> {message}"
             logger.error(message)
             raise InvalidInputException(message)
+    def _build_column_with_alias(self, column: str):
+        return build_column(
+            this=self._build_column_name_source_normalized(column),
+            alias=DialectUtils.unnormalize_identifier(
+                self.table_conf.get_layer_tgt_to_src_col_mapping(column, self.layer)
+            ),
+            quoted=True,
+        )
+    def _build_column_name_source_normalized(self, column: str):
+        return self._data_source.normalize_identifier(column).source_normalized
+    def _build_alias_source_normalized(self, column: str):
+        return self._data_source.normalize_identifier(
+            self.table_conf.get_layer_tgt_to_src_col_mapping(column, self.layer)
+        ).source_normalized

databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py CHANGED Viewed

@@ -125,6 +125,7 @@ def anonymous(expr: exp.Column, func: str, is_expr: bool = False, dialect=None)
     return new_expr
+# TODO Standardize impl and use quoted and Identifier/Column consistently
 def build_column(this: exp.ExpOrStr, table_name="", quoted=False, alias=None) -> exp.Expression:
     if alias:
         if isinstance(this, str):
@@ -135,6 +136,10 @@ def build_column(this: exp.ExpOrStr, table_name="", quoted=False, alias=None) ->
     return exp.Column(this=exp.Identifier(this=this, quoted=quoted), table=table_name)
+def build_column_no_alias(this: str, table_name="") -> exp.Expression:
+    return exp.Column(this=this, table=table_name)
 def build_literal(this: exp.ExpOrStr, alias=None, quoted=False, is_string=True, cast=None) -> exp.Expression:
     base_literal = exp.Literal(this=this, is_string=is_string)
     if not cast and not alias:
@@ -207,10 +212,11 @@ def build_sub(
     right_column_name: str,
     left_table_name: str | None = None,
     right_table_name: str | None = None,
+    quoted: bool = False,
 ) -> exp.Sub:
     return exp.Sub(
-        this=build_column(left_column_name, left_table_name),
-        expression=build_column(right_column_name, right_table_name),
+        this=build_column(left_column_name, left_table_name, quoted=quoted),
+        expression=build_column(right_column_name, right_table_name, quoted=quoted),
     )

databricks/labs/lakebridge/reconcile/query_builder/hash_query.py CHANGED Viewed

@@ -11,8 +11,8 @@ from databricks.labs.lakebridge.reconcile.query_builder.expression_generator imp
     get_hash_transform,
     lower,
     transform_expression,
+    build_column_no_alias,
 )
-from databricks.labs.lakebridge.transpiler.sqlglot.dialect_utils import get_dialect
 logger = logging.getLogger(__name__)
@@ -41,15 +41,12 @@ class HashQueryBuilder(QueryBuilder):
         key_cols = hash_cols if report_type == "row" else sorted(_join_columns | self.partition_column)
-        cols_with_alias = [
-            build_column(this=col, alias=self.table_conf.get_layer_tgt_to_src_col_mapping(col, self.layer))
-            for col in key_cols
-        ]
+        cols_with_alias = [self._build_column_with_alias(col) for col in key_cols]
         # in case if we have column mapping, we need to sort the target columns in the order of source columns to get
         # same hash value
         hash_cols_with_alias = [
-            {"this": col, "alias": self.table_conf.get_layer_tgt_to_src_col_mapping(col, self.layer)}
+            {"this": self._build_column_name_source_normalized(col), "alias": self._build_alias_source_normalized(col)}
             for col in hash_cols
         ]
         sorted_hash_cols_with_alias = sorted(hash_cols_with_alias, key=lambda column: column["alias"])
@@ -60,12 +57,11 @@ class HashQueryBuilder(QueryBuilder):
         )
         hash_col_with_transform = [self._generate_hash_algorithm(hashcols_sorted_as_src_seq, _HASH_COLUMN_NAME)]
-        dialect = self.engine if self.layer == "source" else get_dialect("databricks")
         res = (
             exp.select(*hash_col_with_transform + key_cols_with_transform)
             .from_(":tbl")
-            .where(self.filter)
-            .sql(dialect=dialect)
+            .where(self.filter, dialect=self.engine)
+            .sql(dialect=self.engine)
         )
         logger.info(f"Hash Query for {self.layer}: {res}")
@@ -76,10 +72,8 @@ class HashQueryBuilder(QueryBuilder):
         cols: list[str],
         column_alias: str,
     ) -> exp.Expression:
-        cols_with_alias = [build_column(this=col, alias=None) for col in cols]
-        cols_with_transform = self.add_transformations(
-            cols_with_alias, self.engine if self.layer == "source" else get_dialect("databricks")
-        )
+        cols_no_alias = [build_column_no_alias(this=col) for col in cols]
+        cols_with_transform = self.add_transformations(cols_no_alias, self.engine)
         col_exprs = exp.select(*cols_with_transform).iter_expressions()
         concat_expr = concat(list(col_exprs))

databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py CHANGED Viewed

@@ -4,6 +4,7 @@ import sqlglot.expressions as exp
 from pyspark.sql import DataFrame
 from sqlglot import select
+from databricks.labs.lakebridge.reconcile.connectors.dialect_utils import DialectUtils
 from databricks.labs.lakebridge.transpiler.sqlglot.dialect_utils import get_key_from_dialect
 from databricks.labs.lakebridge.reconcile.query_builder.base import QueryBuilder
 from databricks.labs.lakebridge.reconcile.query_builder.expression_generator import (
@@ -37,12 +38,9 @@ class SamplingQueryBuilder(QueryBuilder):
         cols = sorted((join_columns | self.select_columns) - self.threshold_columns - self.drop_columns)
-        cols_with_alias = [
-            build_column(this=col, alias=self.table_conf.get_layer_tgt_to_src_col_mapping(col, self.layer))
-            for col in cols
-        ]
+        cols_with_alias = [self._build_column_with_alias(col) for col in cols]
-        query = select(*cols_with_alias).from_(":tbl").where(self.filter).sql(dialect=self.engine)
+        query = select(*cols_with_alias).from_(":tbl").where(self.filter, dialect=self.engine).sql(dialect=self.engine)
         logger.info(f"Sampling Query with Alias for {self.layer}: {query}")
         return query
@@ -59,22 +57,22 @@ class SamplingQueryBuilder(QueryBuilder):
         cols = sorted((join_columns | self.select_columns) - self.threshold_columns - self.drop_columns)
-        cols_with_alias = [
-            build_column(this=col, alias=self.table_conf.get_layer_tgt_to_src_col_mapping(col, self.layer))
-            for col in cols
-        ]
+        cols_with_alias = [self._build_column_with_alias(col) for col in cols]
         sql_with_transforms = self.add_transformations(cols_with_alias, self.engine)
-        query_sql = select(*sql_with_transforms).from_(":tbl").where(self.filter)
+        query_sql = select(*sql_with_transforms).from_(":tbl").where(self.filter, dialect=self.engine)
         if self.layer == "source":
-            with_select = [build_column(this=col, table_name="src") for col in sorted(cols)]
+            with_select = [
+                build_column(this=DialectUtils.unnormalize_identifier(col), table_name="src", quoted=True)
+                for col in sorted(cols)
+            ]
         else:
             with_select = [
-                build_column(this=col, table_name="src")
+                build_column(this=DialectUtils.unnormalize_identifier(col), table_name="src", quoted=True)
                 for col in sorted(self.table_conf.get_tgt_to_src_col_mapping_list(cols))
             ]
-        join_clause = SamplingQueryBuilder._get_join_clause(key_cols)
+        join_clause = self._get_join_clause(key_cols)
         query = (
             with_clause.with_(alias="src", as_=query_sql)
@@ -86,10 +84,10 @@ class SamplingQueryBuilder(QueryBuilder):
         logger.info(f"Sampling Query for {self.layer}: {query}")
         return query
-    @classmethod
-    def _get_join_clause(cls, key_cols: list):
+    def _get_join_clause(self, key_cols: list):
+        normalized = [self._build_column_name_source_normalized(col) for col in key_cols]
         return build_join_clause(
-            "recon", key_cols, source_table_alias="src", target_table_alias="recon", kind="inner", func=exp.EQ
+            "recon", normalized, source_table_alias="src", target_table_alias="recon", kind="inner", func=exp.EQ
         )
     def _get_with_clause(self, df: DataFrame) -> exp.Select:
@@ -106,12 +104,13 @@ class SamplingQueryBuilder(QueryBuilder):
                 (
                     build_literal(
                         this=str(value),
-                        alias=col,
+                        alias=DialectUtils.unnormalize_identifier(col),
                         is_string=_get_is_string(column_types_dict, col),
-                        cast=orig_types_dict.get(col),
+                        cast=orig_types_dict.get(DialectUtils.ansi_normalize_identifier(col)),
+                        quoted=True,
                     )
                     if value is not None
-                    else exp.Alias(this=exp.Null(), alias=col)
+                    else exp.Alias(this=exp.Null(), alias=DialectUtils.unnormalize_identifier(col), quoted=True)
                 )
                 for col, value in zip(df.columns, row)
             ]

databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py CHANGED Viewed

@@ -3,6 +3,7 @@ import logging
 from sqlglot import expressions as exp
 from sqlglot import select
+from databricks.labs.lakebridge.reconcile.connectors.dialect_utils import DialectUtils
 from databricks.labs.lakebridge.reconcile.query_builder.base import QueryBuilder
 from databricks.labs.lakebridge.reconcile.query_builder.expression_generator import (
     anonymous,
@@ -54,6 +55,7 @@ class ThresholdQueryBuilder(QueryBuilder):
                     left_table_name="source",
                     right_column_name=column,
                     right_table_name="databricks",
+                    quoted=False,
                 )
             ).transform(coalesce)
@@ -62,7 +64,14 @@ class ThresholdQueryBuilder(QueryBuilder):
             where_clause.append(where)
         # join columns
         for column in sorted(join_columns):
-            select_clause.append(build_column(this=column, alias=f"{column}_source", table_name="source"))
+            select_clause.append(
+                build_column(
+                    this=column,
+                    alias=f"{DialectUtils.unnormalize_identifier(column)}_source",
+                    table_name="source",
+                    quoted=True,
+                )
+            )
         where = build_where_clause(where_clause)
         return select_clause, where
@@ -76,10 +85,20 @@ class ThresholdQueryBuilder(QueryBuilder):
         select_clause = []
         column = threshold.column_name
         select_clause.append(
-            build_column(this=column, alias=f"{column}_source", table_name="source").transform(coalesce)
+            build_column(
+                this=column,
+                alias=f"{DialectUtils.unnormalize_identifier(column)}_source",
+                table_name="source",
+                quoted=True,
+            ).transform(coalesce)
         )
         select_clause.append(
-            build_column(this=column, alias=f"{column}_databricks", table_name="databricks").transform(coalesce)
+            build_column(
+                this=column,
+                alias=f"{DialectUtils.unnormalize_identifier(column)}_databricks",
+                table_name="databricks",
+                quoted=True,
+            ).transform(coalesce)
         )
         where_clause = exp.NEQ(this=base, expression=exp.Literal(this="0", is_string=False))
         return select_clause, where_clause
@@ -110,7 +129,13 @@ class ThresholdQueryBuilder(QueryBuilder):
             logger.error(error_message)
             raise ValueError(error_message)
-        select_clause.append(build_column(this=func(base=base, threshold=threshold), alias=f"{column}_match"))
+        select_clause.append(
+            build_column(
+                this=func(base=base, threshold=threshold),
+                alias=f"{DialectUtils.unnormalize_identifier(column)}_match",
+                quoted=True,
+            )
+        )
         return select_clause, where_clause
@@ -170,8 +195,8 @@ class ThresholdQueryBuilder(QueryBuilder):
                 ),
                 expression=exp.Is(
                     this=exp.Column(
-                        this=exp.Identifier(this=threshold.column_name, quoted=False),
-                        table=exp.Identifier(this='databricks'),
+                        this=threshold.column_name,
+                        table="databricks",
                     ),
                     expression=exp.Null(),
                 ),
@@ -211,21 +236,17 @@ class ThresholdQueryBuilder(QueryBuilder):
         self._validate(self.join_columns, "Join Columns are compulsory for threshold query")
         join_columns = self.join_columns if self.join_columns else set()
         keys: list[str] = sorted(self.partition_column.union(join_columns))
-        keys_select_alias = [
-            build_column(this=col, alias=self.table_conf.get_layer_tgt_to_src_col_mapping(col, self.layer))
-            for col in keys
-        ]
+        keys_select_alias = [self._build_column_with_alias(col) for col in keys]
         keys_expr = self._apply_user_transformation(keys_select_alias)
         # threshold column expression
-        threshold_alias = [
-            build_column(this=col, alias=self.table_conf.get_layer_tgt_to_src_col_mapping(col, self.layer))
-            for col in sorted(self.threshold_columns)
-        ]
+        threshold_alias = [self._build_column_with_alias(col) for col in sorted(self.threshold_columns)]
         thresholds_expr = threshold_alias
         if self.user_transformations:
             thresholds_expr = self._apply_user_transformation(threshold_alias)
-        query = (select(*keys_expr + thresholds_expr).from_(":tbl").where(self.filter)).sql(dialect=self.engine)
+        query = (select(*keys_expr + thresholds_expr).from_(":tbl").where(self.filter, dialect=self.engine)).sql(
+            dialect=self.engine
+        )
         logger.info(f"Threshold Query for {self.layer}: {query}")
         return query

databricks/labs/lakebridge/reconcile/recon_config.py CHANGED Viewed

@@ -257,21 +257,6 @@ class Table:
             return set()
         return {self.get_layer_src_to_tgt_col_mapping(col, layer) for col in self.drop_columns}
-    def get_transformation_dict(self, layer: str) -> dict[str, str]:
-        if self.transformations:
-            if layer == "source":
-                return {
-                    trans.column_name: (trans.source if trans.source else trans.column_name)
-                    for trans in self.transformations
-                }
-            return {
-                self.get_layer_src_to_tgt_col_mapping(trans.column_name, layer): (
-                    trans.target if trans.target else self.get_layer_src_to_tgt_col_mapping(trans.column_name, layer)
-                )
-                for trans in self.transformations
-            }
-        return {}
     def get_partition_column(self, layer: str) -> set[str]:
         if self.jdbc_reader_options and layer == "source":
             if self.jdbc_reader_options.partition_column:

databricks/labs/lakebridge/reconcile/reconciliation.py CHANGED Viewed

@@ -15,6 +15,7 @@ from databricks.labs.lakebridge.reconcile.compare import (
     reconcile_agg_data_per_rule,
 )
 from databricks.labs.lakebridge.reconcile.connectors.data_source import DataSource
+from databricks.labs.lakebridge.reconcile.connectors.dialect_utils import DialectUtils
 from databricks.labs.lakebridge.reconcile.exception import (
     DataSourceRuntimeException,
 )
@@ -455,7 +456,9 @@ class Reconciliation:
             options=table_conf.jdbc_reader_options,
         )
         threshold_columns = table_conf.get_threshold_columns("source")
-        failed_where_cond = " OR ".join([name + "_match = 'Failed'" for name in threshold_columns])
+        failed_where_cond = " OR ".join(
+            ["`" + DialectUtils.unnormalize_identifier(name) + "_match` = 'Failed'" for name in threshold_columns]
+        )
         mismatched_df = threshold_result.filter(failed_where_cond)
         mismatched_count = mismatched_df.count()
         threshold_df = None

databricks-labs-lakebridge 0.10.7__py3-none-any.whl → 0.10.8__py3-none-any.whl

databricks-labs-lakebridge 0.10.7py3-none-any.whl → 0.10.8py3-none-any.whl