databricks-labs-lakebridge 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. databricks/labs/lakebridge/__about__.py +1 -1
  2. databricks/labs/lakebridge/analyzer/__init__.py +0 -0
  3. databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +95 -0
  4. databricks/labs/lakebridge/assessments/profiler_validator.py +103 -0
  5. databricks/labs/lakebridge/base_install.py +20 -3
  6. databricks/labs/lakebridge/cli.py +32 -59
  7. databricks/labs/lakebridge/contexts/application.py +7 -0
  8. databricks/labs/lakebridge/deployment/job.py +2 -2
  9. databricks/labs/lakebridge/helpers/file_utils.py +36 -0
  10. databricks/labs/lakebridge/helpers/validation.py +5 -3
  11. databricks/labs/lakebridge/install.py +73 -484
  12. databricks/labs/lakebridge/reconcile/compare.py +70 -33
  13. databricks/labs/lakebridge/reconcile/connectors/data_source.py +24 -1
  14. databricks/labs/lakebridge/reconcile/connectors/databricks.py +12 -1
  15. databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +126 -0
  16. databricks/labs/lakebridge/reconcile/connectors/models.py +7 -0
  17. databricks/labs/lakebridge/reconcile/connectors/oracle.py +12 -1
  18. databricks/labs/lakebridge/reconcile/connectors/secrets.py +19 -1
  19. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +63 -30
  20. databricks/labs/lakebridge/reconcile/connectors/tsql.py +28 -2
  21. databricks/labs/lakebridge/reconcile/constants.py +4 -3
  22. databricks/labs/lakebridge/reconcile/execute.py +9 -810
  23. databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +133 -0
  24. databricks/labs/lakebridge/reconcile/query_builder/base.py +53 -18
  25. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +8 -2
  26. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +7 -13
  27. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +18 -19
  28. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +36 -15
  29. databricks/labs/lakebridge/reconcile/recon_config.py +3 -15
  30. databricks/labs/lakebridge/reconcile/recon_output_config.py +2 -1
  31. databricks/labs/lakebridge/reconcile/reconciliation.py +511 -0
  32. databricks/labs/lakebridge/reconcile/schema_compare.py +26 -19
  33. databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +78 -0
  34. databricks/labs/lakebridge/reconcile/trigger_recon_service.py +256 -0
  35. databricks/labs/lakebridge/reconcile/utils.py +38 -0
  36. databricks/labs/lakebridge/transpiler/execute.py +34 -28
  37. databricks/labs/lakebridge/transpiler/installers.py +523 -0
  38. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +47 -60
  39. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +2 -0
  40. databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -18
  41. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/METADATA +1 -1
  42. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/RECORD +46 -35
  43. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/WHEEL +0 -0
  44. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/entry_points.txt +0 -0
  45. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/LICENSE +0 -0
  46. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/NOTICE +0 -0
@@ -9,7 +9,9 @@ from sqlglot import Dialect
9
9
 
10
10
  from databricks.labs.lakebridge.reconcile.connectors.data_source import DataSource
11
11
  from databricks.labs.lakebridge.reconcile.connectors.jdbc_reader import JDBCReaderMixin
12
+ from databricks.labs.lakebridge.reconcile.connectors.models import NormalizedIdentifier
12
13
  from databricks.labs.lakebridge.reconcile.connectors.secrets import SecretsMixin
14
+ from databricks.labs.lakebridge.reconcile.connectors.dialect_utils import DialectUtils
13
15
  from databricks.labs.lakebridge.reconcile.recon_config import JdbcReaderOptions, Schema
14
16
  from databricks.sdk import WorkspaceClient
15
17
 
@@ -49,6 +51,7 @@ _SCHEMA_QUERY = """SELECT
49
51
 
50
52
  class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
51
53
  _DRIVER = "sqlserver"
54
+ _IDENTIFIER_DELIMITER = {"prefix": "[", "suffix": "]"}
52
55
 
53
56
  def __init__(
54
57
  self,
@@ -106,6 +109,7 @@ class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
106
109
  catalog: str | None,
107
110
  schema: str,
108
111
  table: str,
112
+ normalize: bool = True,
109
113
  ) -> list[Schema]:
110
114
  """
111
115
  Fetch the Schema from the INFORMATION_SCHEMA.COLUMNS table in SQL Server.
@@ -122,11 +126,33 @@ class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
122
126
  try:
123
127
  logger.debug(f"Fetching schema using query: \n`{schema_query}`")
124
128
  logger.info(f"Fetching Schema: Started at: {datetime.now()}")
125
- schema_metadata = self.reader(schema_query).load().collect()
129
+ df = self.reader(schema_query).load()
130
+ schema_metadata = df.select([col(c).alias(c.lower()) for c in df.columns]).collect()
126
131
  logger.info(f"Schema fetched successfully. Completed at: {datetime.now()}")
127
- return [Schema(field.COLUMN_NAME.lower(), field.DATA_TYPE.lower()) for field in schema_metadata]
132
+ return [self._map_meta_column(field, normalize) for field in schema_metadata]
128
133
  except (RuntimeError, PySparkException) as e:
129
134
  return self.log_and_throw_exception(e, "schema", schema_query)
130
135
 
131
136
  def reader(self, query: str, prepare_query_str="") -> DataFrameReader:
132
137
  return self._get_jdbc_reader(query, self.get_jdbc_url, self._DRIVER, prepare_query_str)
138
+
139
+ def normalize_identifier(self, identifier: str) -> NormalizedIdentifier:
140
+ return DialectUtils.normalize_identifier(
141
+ TSQLServerDataSource._normalize_quotes(identifier),
142
+ source_start_delimiter=TSQLServerDataSource._IDENTIFIER_DELIMITER["prefix"],
143
+ source_end_delimiter=TSQLServerDataSource._IDENTIFIER_DELIMITER["suffix"],
144
+ )
145
+
146
+ @staticmethod
147
+ def _normalize_quotes(identifier: str):
148
+ if DialectUtils.is_already_delimited(identifier, '"', '"'):
149
+ identifier = identifier[1:-1]
150
+ identifier = identifier.replace('""', '"')
151
+ identifier = (
152
+ TSQLServerDataSource._IDENTIFIER_DELIMITER["prefix"]
153
+ + identifier
154
+ + TSQLServerDataSource._IDENTIFIER_DELIMITER["suffix"]
155
+ )
156
+ return identifier
157
+
158
+ return identifier
@@ -15,10 +15,11 @@ class AutoName(Enum):
15
15
 
16
16
 
17
17
  class ReconSourceType(AutoName):
18
- SNOWFLAKE = auto()
19
- ORACLE = auto()
20
18
  DATABRICKS = auto()
21
- TSQL = auto()
19
+ MSSQL = auto()
20
+ ORACLE = auto()
21
+ SNOWFLAKE = auto()
22
+ SYNAPSE = auto()
22
23
 
23
24
 
24
25
  class ReconReportType(AutoName):