databricks-labs-lakebridge 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/labs/lakebridge/__about__.py +1 -1
- databricks/labs/lakebridge/analyzer/__init__.py +0 -0
- databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +95 -0
- databricks/labs/lakebridge/assessments/profiler_validator.py +103 -0
- databricks/labs/lakebridge/base_install.py +20 -3
- databricks/labs/lakebridge/cli.py +32 -59
- databricks/labs/lakebridge/contexts/application.py +7 -0
- databricks/labs/lakebridge/deployment/job.py +2 -2
- databricks/labs/lakebridge/helpers/file_utils.py +36 -0
- databricks/labs/lakebridge/helpers/validation.py +5 -3
- databricks/labs/lakebridge/install.py +73 -484
- databricks/labs/lakebridge/reconcile/compare.py +70 -33
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +24 -1
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +12 -1
- databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +126 -0
- databricks/labs/lakebridge/reconcile/connectors/models.py +7 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +12 -1
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +19 -1
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +63 -30
- databricks/labs/lakebridge/reconcile/connectors/tsql.py +28 -2
- databricks/labs/lakebridge/reconcile/constants.py +4 -3
- databricks/labs/lakebridge/reconcile/execute.py +9 -810
- databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +133 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +53 -18
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +8 -2
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +7 -13
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +18 -19
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +36 -15
- databricks/labs/lakebridge/reconcile/recon_config.py +3 -15
- databricks/labs/lakebridge/reconcile/recon_output_config.py +2 -1
- databricks/labs/lakebridge/reconcile/reconciliation.py +511 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +26 -19
- databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +78 -0
- databricks/labs/lakebridge/reconcile/trigger_recon_service.py +256 -0
- databricks/labs/lakebridge/reconcile/utils.py +38 -0
- databricks/labs/lakebridge/transpiler/execute.py +34 -28
- databricks/labs/lakebridge/transpiler/installers.py +523 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +47 -60
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +2 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -18
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/METADATA +1 -1
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/RECORD +46 -35
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/WHEEL +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/entry_points.txt +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/LICENSE +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/NOTICE +0 -0
@@ -9,7 +9,9 @@ from sqlglot import Dialect
|
|
9
9
|
|
10
10
|
from databricks.labs.lakebridge.reconcile.connectors.data_source import DataSource
|
11
11
|
from databricks.labs.lakebridge.reconcile.connectors.jdbc_reader import JDBCReaderMixin
|
12
|
+
from databricks.labs.lakebridge.reconcile.connectors.models import NormalizedIdentifier
|
12
13
|
from databricks.labs.lakebridge.reconcile.connectors.secrets import SecretsMixin
|
14
|
+
from databricks.labs.lakebridge.reconcile.connectors.dialect_utils import DialectUtils
|
13
15
|
from databricks.labs.lakebridge.reconcile.recon_config import JdbcReaderOptions, Schema
|
14
16
|
from databricks.sdk import WorkspaceClient
|
15
17
|
|
@@ -49,6 +51,7 @@ _SCHEMA_QUERY = """SELECT
|
|
49
51
|
|
50
52
|
class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
|
51
53
|
_DRIVER = "sqlserver"
|
54
|
+
_IDENTIFIER_DELIMITER = {"prefix": "[", "suffix": "]"}
|
52
55
|
|
53
56
|
def __init__(
|
54
57
|
self,
|
@@ -106,6 +109,7 @@ class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
|
|
106
109
|
catalog: str | None,
|
107
110
|
schema: str,
|
108
111
|
table: str,
|
112
|
+
normalize: bool = True,
|
109
113
|
) -> list[Schema]:
|
110
114
|
"""
|
111
115
|
Fetch the Schema from the INFORMATION_SCHEMA.COLUMNS table in SQL Server.
|
@@ -122,11 +126,33 @@ class TSQLServerDataSource(DataSource, SecretsMixin, JDBCReaderMixin):
|
|
122
126
|
try:
|
123
127
|
logger.debug(f"Fetching schema using query: \n`{schema_query}`")
|
124
128
|
logger.info(f"Fetching Schema: Started at: {datetime.now()}")
|
125
|
-
|
129
|
+
df = self.reader(schema_query).load()
|
130
|
+
schema_metadata = df.select([col(c).alias(c.lower()) for c in df.columns]).collect()
|
126
131
|
logger.info(f"Schema fetched successfully. Completed at: {datetime.now()}")
|
127
|
-
return [
|
132
|
+
return [self._map_meta_column(field, normalize) for field in schema_metadata]
|
128
133
|
except (RuntimeError, PySparkException) as e:
|
129
134
|
return self.log_and_throw_exception(e, "schema", schema_query)
|
130
135
|
|
131
136
|
def reader(self, query: str, prepare_query_str="") -> DataFrameReader:
|
132
137
|
return self._get_jdbc_reader(query, self.get_jdbc_url, self._DRIVER, prepare_query_str)
|
138
|
+
|
139
|
+
def normalize_identifier(self, identifier: str) -> NormalizedIdentifier:
|
140
|
+
return DialectUtils.normalize_identifier(
|
141
|
+
TSQLServerDataSource._normalize_quotes(identifier),
|
142
|
+
source_start_delimiter=TSQLServerDataSource._IDENTIFIER_DELIMITER["prefix"],
|
143
|
+
source_end_delimiter=TSQLServerDataSource._IDENTIFIER_DELIMITER["suffix"],
|
144
|
+
)
|
145
|
+
|
146
|
+
@staticmethod
|
147
|
+
def _normalize_quotes(identifier: str):
|
148
|
+
if DialectUtils.is_already_delimited(identifier, '"', '"'):
|
149
|
+
identifier = identifier[1:-1]
|
150
|
+
identifier = identifier.replace('""', '"')
|
151
|
+
identifier = (
|
152
|
+
TSQLServerDataSource._IDENTIFIER_DELIMITER["prefix"]
|
153
|
+
+ identifier
|
154
|
+
+ TSQLServerDataSource._IDENTIFIER_DELIMITER["suffix"]
|
155
|
+
)
|
156
|
+
return identifier
|
157
|
+
|
158
|
+
return identifier
|
@@ -15,10 +15,11 @@ class AutoName(Enum):
|
|
15
15
|
|
16
16
|
|
17
17
|
class ReconSourceType(AutoName):
|
18
|
-
SNOWFLAKE = auto()
|
19
|
-
ORACLE = auto()
|
20
18
|
DATABRICKS = auto()
|
21
|
-
|
19
|
+
MSSQL = auto()
|
20
|
+
ORACLE = auto()
|
21
|
+
SNOWFLAKE = auto()
|
22
|
+
SYNAPSE = auto()
|
22
23
|
|
23
24
|
|
24
25
|
class ReconReportType(AutoName):
|