databricks-labs-lakebridge 0.10.7__py3-none-any.whl → 0.10.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/labs/lakebridge/__about__.py +1 -1
- databricks/labs/lakebridge/assessments/profiler_validator.py +103 -0
- databricks/labs/lakebridge/base_install.py +1 -5
- databricks/labs/lakebridge/cli.py +13 -6
- databricks/labs/lakebridge/helpers/validation.py +5 -3
- databricks/labs/lakebridge/install.py +40 -481
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +9 -5
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +2 -1
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +2 -1
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +19 -1
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +50 -29
- databricks/labs/lakebridge/reconcile/connectors/tsql.py +2 -1
- databricks/labs/lakebridge/reconcile/query_builder/base.py +50 -11
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +8 -2
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +7 -13
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +18 -19
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +36 -15
- databricks/labs/lakebridge/reconcile/recon_config.py +0 -15
- databricks/labs/lakebridge/reconcile/reconciliation.py +4 -1
- databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +11 -31
- databricks/labs/lakebridge/reconcile/trigger_recon_service.py +4 -1
- databricks/labs/lakebridge/transpiler/execute.py +34 -28
- databricks/labs/lakebridge/transpiler/installers.py +523 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +2 -0
- {databricks_labs_lakebridge-0.10.7.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/METADATA +1 -1
- {databricks_labs_lakebridge-0.10.7.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/RECORD +30 -28
- {databricks_labs_lakebridge-0.10.7.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/WHEEL +0 -0
- {databricks_labs_lakebridge-0.10.7.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/entry_points.txt +0 -0
- {databricks_labs_lakebridge-0.10.7.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/LICENSE +0 -0
- {databricks_labs_lakebridge-0.10.7.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/NOTICE +0 -0
@@ -1,2 +1,2 @@
|
|
1
1
|
# DO NOT MODIFY THIS FILE
|
2
|
-
__version__ = "0.10.
|
2
|
+
__version__ = "0.10.8"
|
@@ -0,0 +1,103 @@
|
|
1
|
+
import os
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from duckdb import DuckDBPyConnection
|
4
|
+
|
5
|
+
from databricks.labs.lakebridge.assessments.pipeline import PipelineClass
|
6
|
+
|
7
|
+
PROFILER_DB_NAME = "profiler_extract.db"
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass(frozen=True)
|
11
|
+
class ValidationOutcome:
|
12
|
+
"""A data class that holds the outcome of a table validation check."""
|
13
|
+
|
14
|
+
table: str
|
15
|
+
column: str | None
|
16
|
+
strategy: str
|
17
|
+
outcome: str
|
18
|
+
severity: str
|
19
|
+
|
20
|
+
|
21
|
+
class ValidationStrategy:
|
22
|
+
"""Abstract class for validating a Profiler table"""
|
23
|
+
|
24
|
+
def validate(self, connection: DuckDBPyConnection) -> ValidationOutcome:
|
25
|
+
raise NotImplementedError
|
26
|
+
|
27
|
+
|
28
|
+
class NullValidationCheck(ValidationStrategy):
|
29
|
+
"""Concrete class for validating null values in a profiler table"""
|
30
|
+
|
31
|
+
def __init__(self, table, column, severity="WARN"):
|
32
|
+
self.name = self.__class__.__name__
|
33
|
+
self.table = table
|
34
|
+
self.column = column
|
35
|
+
self.severity = severity
|
36
|
+
|
37
|
+
def validate(self, connection: DuckDBPyConnection) -> ValidationOutcome:
|
38
|
+
"""
|
39
|
+
Validates that a column does not contain null values.
|
40
|
+
input:
|
41
|
+
connection: a DuckDB connection object
|
42
|
+
"""
|
43
|
+
result = connection.execute(f"SELECT COUNT(*) FROM {self.table} WHERE {self.column} IS NULL").fetchone()
|
44
|
+
if result:
|
45
|
+
row_count = result[0]
|
46
|
+
outcome = "FAIL" if row_count > 0 else "PASS"
|
47
|
+
else:
|
48
|
+
outcome = "FAIL"
|
49
|
+
return ValidationOutcome(self.table, self.column, self.name, outcome, self.severity)
|
50
|
+
|
51
|
+
|
52
|
+
class EmptyTableValidationCheck(ValidationStrategy):
|
53
|
+
"""Concrete class for validating empty tables from a profiler run."""
|
54
|
+
|
55
|
+
def __init__(self, table, severity="WARN"):
|
56
|
+
self.name = self.__class__.__name__
|
57
|
+
self.table = table
|
58
|
+
self.severity = severity
|
59
|
+
|
60
|
+
def validate(self, connection) -> ValidationOutcome:
|
61
|
+
"""Validates that a table is not empty.
|
62
|
+
input:
|
63
|
+
connection: a DuckDB connection object
|
64
|
+
returns:
|
65
|
+
a ValidationOutcome object
|
66
|
+
"""
|
67
|
+
result = connection.execute(f"SELECT COUNT(*) FROM {self.table}").fetchone()
|
68
|
+
if result:
|
69
|
+
row_count = result[0]
|
70
|
+
outcome = "PASS" if row_count > 0 else "FAIL"
|
71
|
+
else:
|
72
|
+
outcome = "FAIL"
|
73
|
+
return ValidationOutcome(self.table, None, self.name, outcome, self.severity)
|
74
|
+
|
75
|
+
|
76
|
+
def get_profiler_extract_path(pipeline_config_path: str) -> str:
|
77
|
+
"""
|
78
|
+
Returns the filesystem path of the profiler extract database.
|
79
|
+
input:
|
80
|
+
pipeline_config_path: the location of the pipeline definition .yml file
|
81
|
+
returns:
|
82
|
+
the filesystem path to the profiler extract database
|
83
|
+
"""
|
84
|
+
pipeline_config = PipelineClass.load_config_from_yaml(pipeline_config_path)
|
85
|
+
normalized_db_path = os.path.normpath(pipeline_config.extract_folder)
|
86
|
+
database_path = f"{normalized_db_path}/{PROFILER_DB_NAME}"
|
87
|
+
return database_path
|
88
|
+
|
89
|
+
|
90
|
+
def build_validation_report(
|
91
|
+
validations: list[ValidationStrategy], connection: DuckDBPyConnection
|
92
|
+
) -> list[ValidationOutcome]:
|
93
|
+
"""
|
94
|
+
Builds a list of ValidationOutcomes from list of validation checks.
|
95
|
+
input:
|
96
|
+
validations: a list of ValidationStrategy objects
|
97
|
+
connection: a DuckDB connection object
|
98
|
+
returns: a list of ValidationOutcomes
|
99
|
+
"""
|
100
|
+
validation_report = []
|
101
|
+
for validation in validations:
|
102
|
+
validation_report.append(validation.validate(connection))
|
103
|
+
return validation_report
|
@@ -19,11 +19,7 @@ def main() -> None:
|
|
19
19
|
WorkspaceClient(product="lakebridge", product_version=__version__),
|
20
20
|
transpiler_repository=TranspilerRepository.user_home(),
|
21
21
|
)
|
22
|
-
if installer.
|
23
|
-
logger.warning(
|
24
|
-
"Detected existing Lakebridge transpilers; run 'databricks labs lakebridge install-transpile' to upgrade them."
|
25
|
-
)
|
26
|
-
else:
|
22
|
+
if not installer.upgrade_installed_transpilers():
|
27
23
|
logger.debug("No existing Lakebridge transpilers detected; assuming fresh installation.")
|
28
24
|
|
29
25
|
logger.info("Successfully Setup Lakebridge Components Locally")
|
@@ -74,6 +74,7 @@ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str):
|
|
74
74
|
|
75
75
|
@lakebridge.command
|
76
76
|
def transpile(
|
77
|
+
*,
|
77
78
|
w: WorkspaceClient,
|
78
79
|
transpiler_config_path: str | None = None,
|
79
80
|
source_dialect: str | None = None,
|
@@ -340,6 +341,8 @@ class _TranspileConfigChecker:
|
|
340
341
|
supported_dialects = ", ".join(self._transpiler_repository.all_dialects())
|
341
342
|
msg = f"{msg_prefix}: {source_dialect!r} (supported dialects: {supported_dialects})"
|
342
343
|
raise_validation_exception(msg)
|
344
|
+
else:
|
345
|
+
self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
|
343
346
|
else:
|
344
347
|
# Check the source dialect against the engine.
|
345
348
|
if source_dialect not in engine.supported_dialects:
|
@@ -366,6 +369,7 @@ class _TranspileConfigChecker:
|
|
366
369
|
source_dialect = self._prompts.choice("Select the source dialect:", list(supported_dialects))
|
367
370
|
engine = self._configure_transpiler_config_path(source_dialect)
|
368
371
|
assert engine is not None, "No transpiler engine available for a supported dialect; configuration is invalid."
|
372
|
+
self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
|
369
373
|
return engine
|
370
374
|
|
371
375
|
def _check_lsp_engine(self) -> TranspileEngine:
|
@@ -518,7 +522,7 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s
|
|
518
522
|
|
519
523
|
|
520
524
|
@lakebridge.command
|
521
|
-
def reconcile(w: WorkspaceClient) -> None:
|
525
|
+
def reconcile(*, w: WorkspaceClient) -> None:
|
522
526
|
"""[EXPERIMENTAL] Reconciles source to Databricks datasets"""
|
523
527
|
with_user_agent_extra("cmd", "execute-reconcile")
|
524
528
|
ctx = ApplicationContext(w)
|
@@ -534,7 +538,7 @@ def reconcile(w: WorkspaceClient) -> None:
|
|
534
538
|
|
535
539
|
|
536
540
|
@lakebridge.command
|
537
|
-
def aggregates_reconcile(w: WorkspaceClient) -> None:
|
541
|
+
def aggregates_reconcile(*, w: WorkspaceClient) -> None:
|
538
542
|
"""[EXPERIMENTAL] Reconciles Aggregated source to Databricks datasets"""
|
539
543
|
with_user_agent_extra("cmd", "execute-aggregates-reconcile")
|
540
544
|
ctx = ApplicationContext(w)
|
@@ -552,8 +556,8 @@ def aggregates_reconcile(w: WorkspaceClient) -> None:
|
|
552
556
|
|
553
557
|
@lakebridge.command
|
554
558
|
def generate_lineage(
|
555
|
-
w: WorkspaceClient,
|
556
559
|
*,
|
560
|
+
w: WorkspaceClient,
|
557
561
|
source_dialect: str | None = None,
|
558
562
|
input_source: str,
|
559
563
|
output_folder: str,
|
@@ -578,7 +582,7 @@ def generate_lineage(
|
|
578
582
|
|
579
583
|
|
580
584
|
@lakebridge.command
|
581
|
-
def configure_secrets(w: WorkspaceClient) -> None:
|
585
|
+
def configure_secrets(*, w: WorkspaceClient) -> None:
|
582
586
|
"""Setup reconciliation connection profile details as Secrets on Databricks Workspace"""
|
583
587
|
recon_conf = ReconConfigPrompts(w)
|
584
588
|
|
@@ -604,8 +608,9 @@ def configure_database_profiler() -> None:
|
|
604
608
|
assessment.run()
|
605
609
|
|
606
610
|
|
607
|
-
@lakebridge.command
|
611
|
+
@lakebridge.command
|
608
612
|
def install_transpile(
|
613
|
+
*,
|
609
614
|
w: WorkspaceClient,
|
610
615
|
artifact: str | None = None,
|
611
616
|
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
|
@@ -622,6 +627,7 @@ def install_transpile(
|
|
622
627
|
|
623
628
|
@lakebridge.command(is_unauthenticated=False)
|
624
629
|
def configure_reconcile(
|
630
|
+
*,
|
625
631
|
w: WorkspaceClient,
|
626
632
|
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
|
627
633
|
) -> None:
|
@@ -637,8 +643,9 @@ def configure_reconcile(
|
|
637
643
|
reconcile_installer.run(module="reconcile")
|
638
644
|
|
639
645
|
|
640
|
-
@lakebridge.command
|
646
|
+
@lakebridge.command
|
641
647
|
def analyze(
|
648
|
+
*,
|
642
649
|
w: WorkspaceClient,
|
643
650
|
source_directory: str | None = None,
|
644
651
|
report_file: str | None = None,
|
@@ -37,19 +37,21 @@ class Validator:
|
|
37
37
|
config.catalog_name,
|
38
38
|
config.schema_name,
|
39
39
|
)
|
40
|
+
# Some errors doesn't return the query test alon with the error message so need to handle those separately
|
41
|
+
static_errors_lkp = ["[UNRESOLVED_ROUTINE]", "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION]"]
|
40
42
|
if is_valid:
|
41
43
|
result = sql_text
|
42
44
|
if exception_type is not None:
|
43
45
|
exception_msg = f"[{exception_type.upper()}]: {exception_msg}"
|
44
46
|
else:
|
45
47
|
query = ""
|
46
|
-
if
|
48
|
+
if any(err in str(exception_msg) for err in static_errors_lkp):
|
47
49
|
query = sql_text
|
48
50
|
buffer = StringIO()
|
49
51
|
buffer.write("-------------- Exception Start-------------------\n")
|
50
|
-
buffer.write("
|
52
|
+
buffer.write("/*\n")
|
51
53
|
buffer.write(str(exception_msg))
|
52
|
-
buffer.write("\n
|
54
|
+
buffer.write("\n*/\n")
|
53
55
|
buffer.write(query)
|
54
56
|
buffer.write("\n ---------------Exception End --------------------\n")
|
55
57
|
|