databricks-labs-lakebridge 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/__init__.py +3 -0
- databricks/labs/__init__.py +3 -0
- databricks/labs/lakebridge/__about__.py +2 -0
- databricks/labs/lakebridge/__init__.py +11 -0
- databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks/labs/lakebridge/base_install.py +12 -0
- databricks/labs/lakebridge/cli.py +449 -0
- databricks/labs/lakebridge/config.py +192 -0
- databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks/labs/lakebridge/install.py +849 -0
- databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks/labs/lakebridge/lineage.py +42 -0
- databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks/labs/lakebridge/uninstall.py +28 -0
- databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
- databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
- databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
- databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
- docs/lakebridge/src/components/Button.tsx +81 -0
- docs/lakebridge/src/css/custom.css +167 -0
- docs/lakebridge/src/css/table.css +20 -0
- docs/lakebridge/src/pages/index.tsx +57 -0
- docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,203 @@
|
|
1
|
+
import logging
|
2
|
+
import typing as t
|
3
|
+
from collections.abc import Iterable
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
from sqlglot import expressions as exp, parse, transpile, Dialect
|
8
|
+
from sqlglot.errors import ErrorLevel, ParseError, TokenError, UnsupportedError
|
9
|
+
from sqlglot.expressions import Expression
|
10
|
+
from sqlglot.tokens import Token, TokenType
|
11
|
+
|
12
|
+
from databricks.labs.lakebridge.config import TranspileResult, TranspileConfig
|
13
|
+
from databricks.labs.lakebridge.helpers.file_utils import is_sql_file
|
14
|
+
from databricks.labs.lakebridge.helpers.string_utils import format_error_message
|
15
|
+
from databricks.labs.lakebridge.transpiler.sqlglot import lca_utils
|
16
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.dialect_utils import get_dialect
|
17
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.dialect_utils import SQLGLOT_DIALECTS
|
18
|
+
from databricks.labs.lakebridge.transpiler.transpile_status import TranspileError, ErrorKind, ErrorSeverity
|
19
|
+
from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
|
20
|
+
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class ParsedExpression:
|
26
|
+
original_sql: str
|
27
|
+
parsed_expression: Expression
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class ParserProblem:
|
32
|
+
original_sql: str
|
33
|
+
transpile_error: TranspileError
|
34
|
+
|
35
|
+
|
36
|
+
class SqlglotEngine(TranspileEngine):
|
37
|
+
|
38
|
+
@property
|
39
|
+
def supported_dialects(self) -> list[str]:
|
40
|
+
return sorted(SQLGLOT_DIALECTS.keys())
|
41
|
+
|
42
|
+
def _partial_transpile(
|
43
|
+
self,
|
44
|
+
read_dialect: Dialect,
|
45
|
+
write_dialect: Dialect,
|
46
|
+
source_code: str,
|
47
|
+
file_path: Path,
|
48
|
+
) -> tuple[list[str], list[ParserProblem]]:
|
49
|
+
transpiled_sqls: list[str] = []
|
50
|
+
parsed_expressions, problem_list = self.safe_parse(read_dialect, source_code, file_path)
|
51
|
+
for parsed_expression in parsed_expressions:
|
52
|
+
try:
|
53
|
+
transpiled_sql = write_dialect.generate(parsed_expression.parsed_expression, pretty=True)
|
54
|
+
# Checking if the transpiled SQL is a comment and raise an error
|
55
|
+
if transpiled_sql.startswith("--"):
|
56
|
+
raise UnsupportedError("Unsupported SQL")
|
57
|
+
transpiled_sqls.append(transpiled_sql)
|
58
|
+
except TokenError as e:
|
59
|
+
error_msg = format_error_message("Token Error", e, parsed_expression.original_sql)
|
60
|
+
error = TranspileError("TOKEN_ERROR", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
61
|
+
problem_list.append(ParserProblem(parsed_expression.original_sql, error))
|
62
|
+
except ParseError as e:
|
63
|
+
error_msg = format_error_message("Parsing Error", e, parsed_expression.original_sql)
|
64
|
+
error = TranspileError("PARSE_ERROR", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
65
|
+
problem_list.append(ParserProblem(parsed_expression.original_sql, error))
|
66
|
+
except UnsupportedError as e:
|
67
|
+
error_msg = format_error_message("Unsupported SQL Error", e, parsed_expression.original_sql)
|
68
|
+
error = TranspileError("UNSUPPORTED_SQL", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
69
|
+
problem_list.append(ParserProblem(parsed_expression.original_sql, error))
|
70
|
+
return transpiled_sqls, problem_list
|
71
|
+
|
72
|
+
async def initialize(self, config: TranspileConfig) -> None:
|
73
|
+
pass
|
74
|
+
|
75
|
+
async def shutdown(self) -> None:
|
76
|
+
pass
|
77
|
+
|
78
|
+
async def transpile(
|
79
|
+
self, source_dialect: str, target_dialect: str, source_code: str, file_path: Path
|
80
|
+
) -> TranspileResult:
|
81
|
+
read_dialect = get_dialect(source_dialect)
|
82
|
+
error: TranspileError | None = self._check_supported(read_dialect, source_code, file_path)
|
83
|
+
if error:
|
84
|
+
return TranspileResult(source_code, 1, [error])
|
85
|
+
write_dialect = get_dialect(target_dialect)
|
86
|
+
try:
|
87
|
+
transpiled_expressions = transpile(
|
88
|
+
source_code, read=read_dialect, write=write_dialect, pretty=True, error_level=ErrorLevel.RAISE
|
89
|
+
)
|
90
|
+
transpiled_code = "\n".join(transpiled_expressions)
|
91
|
+
return TranspileResult(transpiled_code, len(transpiled_expressions), [])
|
92
|
+
except (ParseError, TokenError, UnsupportedError) as e:
|
93
|
+
logger.error(f"Exception caught for file {file_path!s}: {e}")
|
94
|
+
transpiled_expressions, problems = self._partial_transpile(
|
95
|
+
read_dialect, write_dialect, source_code, file_path
|
96
|
+
)
|
97
|
+
transpiled_code = "\n".join(transpiled_expressions)
|
98
|
+
return TranspileResult(transpiled_code, 1, [problem.transpile_error for problem in problems])
|
99
|
+
|
100
|
+
def parse(
|
101
|
+
self, source_dialect: str, source_sql: str, file_path: Path
|
102
|
+
) -> tuple[list[Expression | None] | None, TranspileError | None]:
|
103
|
+
expression = None
|
104
|
+
error = None
|
105
|
+
try:
|
106
|
+
expression = parse(source_sql, read=source_dialect, error_level=ErrorLevel.IMMEDIATE)
|
107
|
+
except TokenError as e:
|
108
|
+
error_msg = format_error_message("Token Error", e, source_sql)
|
109
|
+
error = TranspileError("TOKEN_ERROR", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
110
|
+
except ParseError as e:
|
111
|
+
error_msg = format_error_message("Parsing Error", e, source_sql)
|
112
|
+
error = TranspileError("PARSE_ERROR", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
113
|
+
except UnsupportedError as e:
|
114
|
+
error_msg = format_error_message("Unsupported SQL Error", e, source_sql)
|
115
|
+
error = TranspileError("UNSUPPORTED_SQL", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
116
|
+
return expression, error
|
117
|
+
|
118
|
+
def analyse_table_lineage(
|
119
|
+
self, source_dialect: str, source_code: str, file_path: Path
|
120
|
+
) -> Iterable[tuple[str, str]]:
|
121
|
+
parsed_expression, _ = self.parse(source_dialect, source_code, file_path)
|
122
|
+
if parsed_expression is not None:
|
123
|
+
for expr in parsed_expression:
|
124
|
+
child: str = str(file_path)
|
125
|
+
if expr is not None:
|
126
|
+
# TODO: fix possible issue where the file reference is lost (if we have a 'create')
|
127
|
+
for change in expr.find_all(exp.Create, exp.Insert, exp.Merge, bfs=False):
|
128
|
+
child = self._find_root_table(change)
|
129
|
+
|
130
|
+
for query in expr.find_all(exp.Select, exp.Join, exp.With, bfs=False):
|
131
|
+
table = self._find_root_table(query)
|
132
|
+
if table:
|
133
|
+
yield table, child
|
134
|
+
|
135
|
+
def safe_parse(
|
136
|
+
self, read_dialect: Dialect, source_code: str, file_path: Path
|
137
|
+
) -> tuple[list[ParsedExpression], list[ParserProblem]]:
|
138
|
+
try:
|
139
|
+
tokens = read_dialect.tokenize(sql=source_code)
|
140
|
+
return self._safe_parse(read_dialect, tokens, file_path)
|
141
|
+
except TokenError as e:
|
142
|
+
error_msg = format_error_message("Token error", e, source_code)
|
143
|
+
error = TranspileError("TOKEN_ERROR", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
144
|
+
return [], [ParserProblem(source_code, error)]
|
145
|
+
|
146
|
+
def _safe_parse(
|
147
|
+
self, read_dialect: Dialect, all_tokens: list[Token], file_path: Path
|
148
|
+
) -> tuple[list[ParsedExpression], list[ParserProblem]]:
|
149
|
+
chunks = self._make_chunks(all_tokens)
|
150
|
+
parsed_expressions: list[ParsedExpression] = []
|
151
|
+
problems: list[ParserProblem] = []
|
152
|
+
parser_opts = {"error_level": ErrorLevel.RAISE}
|
153
|
+
parser = read_dialect.parser(**parser_opts)
|
154
|
+
for sql, tokens in chunks:
|
155
|
+
try:
|
156
|
+
expressions = parser.parse(tokens)
|
157
|
+
expression = t.cast(Expression, expressions[0])
|
158
|
+
parsed_expressions.append(ParsedExpression(sql, expression))
|
159
|
+
except TokenError as e:
|
160
|
+
error_msg = format_error_message("Token error", e, sql)
|
161
|
+
error = TranspileError("TOKEN_ERROR", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
162
|
+
problems.append(ParserProblem(sql, error))
|
163
|
+
except ParseError as e:
|
164
|
+
error_msg = format_error_message("Parsing error", e, sql)
|
165
|
+
error = TranspileError("PARSE_ERROR", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
166
|
+
problems.append(ParserProblem(sql, error))
|
167
|
+
except UnsupportedError as e:
|
168
|
+
error_msg = format_error_message("Unsupported SQL error", e, sql)
|
169
|
+
error = TranspileError("UNSUPPORTED_SQL", ErrorKind.PARSING, ErrorSeverity.ERROR, file_path, error_msg)
|
170
|
+
problems.append(ParserProblem(sql, error))
|
171
|
+
finally:
|
172
|
+
parser.reset()
|
173
|
+
return parsed_expressions, problems
|
174
|
+
|
175
|
+
@staticmethod
|
176
|
+
def _make_chunks(tokens: list[Token]) -> list[tuple[str, list[Token]]]:
|
177
|
+
chunks: list[tuple[str, list[Token]]] = []
|
178
|
+
current_chunk: list[Token] = []
|
179
|
+
# Split tokens into chunks based on semicolons(or other separators)
|
180
|
+
# Need to define the separator in Class Tokenizer
|
181
|
+
for token in tokens:
|
182
|
+
current_chunk.append(token)
|
183
|
+
if token.token_type in {TokenType.SEMICOLON}:
|
184
|
+
original_sql = " ".join([token.text for token in current_chunk]).strip()
|
185
|
+
chunks.append((original_sql, current_chunk))
|
186
|
+
# reset
|
187
|
+
current_chunk = []
|
188
|
+
# don't forget the last chunk
|
189
|
+
if current_chunk:
|
190
|
+
original_sql = " ".join([token.text for token in current_chunk]).strip()
|
191
|
+
chunks.append((original_sql, current_chunk))
|
192
|
+
return chunks
|
193
|
+
|
194
|
+
@staticmethod
|
195
|
+
def _find_root_table(expression) -> str:
|
196
|
+
table = expression.find(exp.Table, bfs=False)
|
197
|
+
return table.name if table else ""
|
198
|
+
|
199
|
+
def _check_supported(self, source_dialect: Dialect, source_code: str, file_path: Path) -> TranspileError | None:
|
200
|
+
return lca_utils.check_for_unsupported_lca(source_dialect, source_code, file_path)
|
201
|
+
|
202
|
+
def is_supported_file(self, file: Path) -> bool:
|
203
|
+
return is_sql_file(file)
|
@@ -0,0 +1,49 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
import abc
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from databricks.labs.lakebridge.config import TranspileResult, TranspileConfig
|
6
|
+
|
7
|
+
|
8
|
+
class TranspileEngine(abc.ABC):
|
9
|
+
|
10
|
+
@classmethod
|
11
|
+
def load_engine(cls, transpiler_config_path: Path) -> TranspileEngine:
|
12
|
+
# TODO remove this once sqlglot transpiler is pluggable
|
13
|
+
if str(transpiler_config_path) == "sqlglot":
|
14
|
+
# pylint: disable=import-outside-toplevel, cyclic-import
|
15
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
|
16
|
+
|
17
|
+
return SqlglotEngine()
|
18
|
+
if not transpiler_config_path.exists():
|
19
|
+
raise ValueError(
|
20
|
+
f"Error: Invalid value for '--transpiler-config-path': '{str(transpiler_config_path)}', file does not exist."
|
21
|
+
)
|
22
|
+
# pylint: disable=import-outside-toplevel, cyclic-import
|
23
|
+
from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine
|
24
|
+
|
25
|
+
return LSPEngine.from_config_path(transpiler_config_path)
|
26
|
+
|
27
|
+
@abc.abstractmethod
|
28
|
+
async def initialize(self, config: TranspileConfig) -> None: ...
|
29
|
+
|
30
|
+
@abc.abstractmethod
|
31
|
+
async def shutdown(self) -> None: ...
|
32
|
+
|
33
|
+
@abc.abstractmethod
|
34
|
+
async def transpile(
|
35
|
+
self, source_dialect: str, target_dialect: str, source_code: str, file_path: Path
|
36
|
+
) -> TranspileResult: ...
|
37
|
+
|
38
|
+
@property
|
39
|
+
@abc.abstractmethod
|
40
|
+
def supported_dialects(self) -> list[str]: ...
|
41
|
+
|
42
|
+
def check_source_dialect(self, source_dialect: str | None) -> None:
|
43
|
+
if source_dialect not in self.supported_dialects:
|
44
|
+
raise ValueError(
|
45
|
+
f"Invalid value for '--source-dialect': '{source_dialect}' is not one of {self.supported_dialects}."
|
46
|
+
)
|
47
|
+
|
48
|
+
@abc.abstractmethod
|
49
|
+
def is_supported_file(self, file: Path) -> bool: ...
|
@@ -0,0 +1,68 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from enum import Enum
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
|
8
|
+
# not using StrEnum because they only appear with Python 3.11
|
9
|
+
class ErrorSeverity(Enum):
|
10
|
+
INFO = "INFO"
|
11
|
+
WARNING = "WARNING"
|
12
|
+
ERROR = "ERROR"
|
13
|
+
|
14
|
+
|
15
|
+
class ErrorKind(Enum):
|
16
|
+
ANALYSIS = "ANALYSIS"
|
17
|
+
PARSING = "PARSING"
|
18
|
+
GENERATION = "GENERATION"
|
19
|
+
VALIDATION = "VALIDATION"
|
20
|
+
INTERNAL = "INTERNAL"
|
21
|
+
|
22
|
+
|
23
|
+
@dataclass
|
24
|
+
class CodePosition:
|
25
|
+
line: int # 0-based line number
|
26
|
+
character: int # 0-based character number
|
27
|
+
|
28
|
+
|
29
|
+
@dataclass
|
30
|
+
class CodeRange:
|
31
|
+
start: CodePosition
|
32
|
+
end: CodePosition
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass
|
36
|
+
class TranspileError:
|
37
|
+
code: str
|
38
|
+
kind: ErrorKind
|
39
|
+
severity: ErrorSeverity
|
40
|
+
path: Path
|
41
|
+
message: str
|
42
|
+
range: CodeRange | None = None
|
43
|
+
|
44
|
+
def __str__(self):
|
45
|
+
return f"{type(self).__name__}(code={self.code}, kind={self.kind.name}, severity={self.severity.name}, path='{self.path!s}', message='{self.message}')"
|
46
|
+
|
47
|
+
|
48
|
+
@dataclass
|
49
|
+
class TranspileStatus:
|
50
|
+
file_list: list[Path]
|
51
|
+
no_of_transpiled_queries: int
|
52
|
+
error_list: list[TranspileError]
|
53
|
+
|
54
|
+
@property
|
55
|
+
def analysis_error_count(self) -> int:
|
56
|
+
return len([error for error in self.error_list if error.kind == ErrorKind.ANALYSIS])
|
57
|
+
|
58
|
+
@property
|
59
|
+
def parsing_error_count(self) -> int:
|
60
|
+
return len([error for error in self.error_list if error.kind == ErrorKind.PARSING])
|
61
|
+
|
62
|
+
@property
|
63
|
+
def generation_error_count(self) -> int:
|
64
|
+
return len([error for error in self.error_list if error.kind == ErrorKind.GENERATION])
|
65
|
+
|
66
|
+
@property
|
67
|
+
def validation_error_count(self) -> int:
|
68
|
+
return len([error for error in self.error_list if error.kind == ErrorKind.VALIDATION])
|
@@ -0,0 +1,28 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
from databricks.labs.blueprint.entrypoint import is_in_debug
|
4
|
+
from databricks.sdk import WorkspaceClient
|
5
|
+
|
6
|
+
from databricks.labs.lakebridge.__about__ import __version__
|
7
|
+
from databricks.labs.lakebridge.contexts.application import ApplicationContext
|
8
|
+
|
9
|
+
logger = logging.getLogger("databricks.labs.lakebridge.install")
|
10
|
+
|
11
|
+
|
12
|
+
def run(context: ApplicationContext):
|
13
|
+
context.workspace_installation.uninstall(context.remorph_config)
|
14
|
+
|
15
|
+
|
16
|
+
if __name__ == "__main__":
|
17
|
+
logger.setLevel("INFO")
|
18
|
+
if is_in_debug():
|
19
|
+
logging.getLogger("databricks").setLevel(logging.DEBUG)
|
20
|
+
|
21
|
+
run(
|
22
|
+
ApplicationContext(
|
23
|
+
WorkspaceClient(
|
24
|
+
product="remorph",
|
25
|
+
product_version=__version__,
|
26
|
+
)
|
27
|
+
)
|
28
|
+
)
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# pylint: disable=invalid-name
|
2
|
+
import logging
|
3
|
+
|
4
|
+
|
5
|
+
from databricks.labs.blueprint.installation import Installation
|
6
|
+
from databricks.sdk import WorkspaceClient
|
7
|
+
|
8
|
+
from databricks.labs.lakebridge.contexts.application import ApplicationContext
|
9
|
+
from databricks.labs.lakebridge.deployment.recon import RECON_JOB_NAME
|
10
|
+
from databricks.labs.lakebridge.helpers import db_sql
|
11
|
+
|
12
|
+
from databricks.labs.lakebridge.deployment.upgrade_common import (
|
13
|
+
current_table_columns,
|
14
|
+
installed_table_columns,
|
15
|
+
recreate_table_sql,
|
16
|
+
)
|
17
|
+
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
|
21
|
+
def _check_table_mismatch(
|
22
|
+
installed_table,
|
23
|
+
current_table,
|
24
|
+
) -> bool:
|
25
|
+
current_table = [x for x in current_table if x != "operation_name"]
|
26
|
+
# Compare the current main table columns with the installed main table columns
|
27
|
+
if "operation_name" in installed_table and len(sorted(installed_table)) != len(sorted(current_table)):
|
28
|
+
return True
|
29
|
+
return False
|
30
|
+
|
31
|
+
|
32
|
+
def _upgrade_reconcile_metadata_main_table(
|
33
|
+
installation: Installation,
|
34
|
+
ws: WorkspaceClient,
|
35
|
+
app_context: ApplicationContext,
|
36
|
+
):
|
37
|
+
"""
|
38
|
+
Add operation_name column to the main table as part of the upgrade process.
|
39
|
+
- Compare the current main table columns with the installed main table columns. If there is any mismatch:
|
40
|
+
* Verify all the current main table columns are present in the installed main table and then use CTAS to recreate the main table
|
41
|
+
* If any of the current main table columns are missing in the installed main table, prompt the user to recreate the main table:
|
42
|
+
- If the user confirms, recreate the main table using the main DDL file, else log an error message and exit
|
43
|
+
:param installation:
|
44
|
+
:param ws:
|
45
|
+
:param app_context:
|
46
|
+
"""
|
47
|
+
reconcile_config = app_context.recon_config
|
48
|
+
assert reconcile_config, "Reconcile config must be present to upgrade the reconcile metadata main table"
|
49
|
+
table_name = "main"
|
50
|
+
table_identifier = (
|
51
|
+
f"{reconcile_config.metadata_config.catalog}.{reconcile_config.metadata_config.schema}.{table_name}"
|
52
|
+
)
|
53
|
+
installed_columns = installed_table_columns(ws, table_identifier)
|
54
|
+
current_columns = current_table_columns(table_name, table_identifier)
|
55
|
+
sql: str | None = f"ALTER TABLE {table_identifier} ADD COLUMN operation_name STRING AFTER report_type"
|
56
|
+
if _check_table_mismatch(installed_columns, current_columns):
|
57
|
+
logger.info("Recreating main table")
|
58
|
+
sql = recreate_table_sql(table_identifier, installed_columns, current_columns, app_context.prompts)
|
59
|
+
if sql:
|
60
|
+
logger.debug(f"Executing SQL to upgrade main table: \n{sql}")
|
61
|
+
db_sql.get_sql_backend(ws).execute(sql)
|
62
|
+
installation.save(reconcile_config)
|
63
|
+
logger.debug("Upgraded Reconcile main table")
|
64
|
+
|
65
|
+
|
66
|
+
def _upgrade_reconcile_workflow(app_context: ApplicationContext):
|
67
|
+
if app_context.recon_config:
|
68
|
+
logger.info("Upgrading reconcile workflow")
|
69
|
+
wheels = app_context.product_info.wheels(app_context.workspace_client)
|
70
|
+
with wheels as wheel_builder:
|
71
|
+
wheel_path = f"/Workspace{wheel_builder.upload_to_wsfs()}"
|
72
|
+
app_context.job_deployment.deploy_recon_job(RECON_JOB_NAME, app_context.recon_config, wheel_path)
|
73
|
+
logger.debug("Upgraded reconcile workflow")
|
74
|
+
|
75
|
+
|
76
|
+
def upgrade(installation: Installation, ws: WorkspaceClient):
|
77
|
+
app_context = ApplicationContext(ws)
|
78
|
+
if app_context.recon_config is not None:
|
79
|
+
_upgrade_reconcile_metadata_main_table(installation, ws, app_context)
|
80
|
+
_upgrade_reconcile_workflow(app_context)
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# pylint: disable=invalid-name
|
2
|
+
import logging
|
3
|
+
|
4
|
+
from databricks.labs.blueprint.installation import Installation
|
5
|
+
from databricks.sdk import WorkspaceClient
|
6
|
+
|
7
|
+
from databricks.labs.lakebridge.contexts.application import ApplicationContext
|
8
|
+
from databricks.labs.lakebridge.deployment.upgrade_common import (
|
9
|
+
current_table_columns,
|
10
|
+
installed_table_columns,
|
11
|
+
check_table_mismatch,
|
12
|
+
recreate_table_sql,
|
13
|
+
)
|
14
|
+
from databricks.labs.lakebridge.helpers import db_sql
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
def _upgrade_reconcile_metadata_metrics_table(
|
20
|
+
installation: Installation, ws: WorkspaceClient, app_context: ApplicationContext
|
21
|
+
):
|
22
|
+
reconcile_config = app_context.recon_config
|
23
|
+
assert reconcile_config, "Reconcile config must be present to upgrade the reconcile metadata main table"
|
24
|
+
table_name = "metrics"
|
25
|
+
table_identifier = (
|
26
|
+
f"{reconcile_config.metadata_config.catalog}.{reconcile_config.metadata_config.schema}.{table_name}"
|
27
|
+
)
|
28
|
+
installed_columns = installed_table_columns(ws, table_identifier)
|
29
|
+
current_columns = current_table_columns(table_name, table_identifier)
|
30
|
+
sqls: list | None = [
|
31
|
+
f"ALTER TABLE {table_identifier} SET TBLPROPERTIES ('delta.enableTypeWidening' = 'true')",
|
32
|
+
f"ALTER TABLE {table_identifier} ALTER COLUMN recon_metrics.row_comparison.missing_in_source TYPE BIGINT",
|
33
|
+
f"ALTER TABLE {table_identifier} ALTER COLUMN recon_metrics.row_comparison.missing_in_target TYPE BIGINT",
|
34
|
+
f"ALTER TABLE {table_identifier} ALTER COLUMN recon_metrics.column_comparison.absolute_mismatch TYPE BIGINT",
|
35
|
+
f"ALTER TABLE {table_identifier} ALTER COLUMN recon_metrics.column_comparison.threshold_mismatch TYPE BIGINT",
|
36
|
+
]
|
37
|
+
if check_table_mismatch(installed_columns, current_columns):
|
38
|
+
logger.info("Recreating main table")
|
39
|
+
sqls = [recreate_table_sql(table_identifier, installed_columns, current_columns, app_context.prompts)]
|
40
|
+
if sqls:
|
41
|
+
for sql in sqls:
|
42
|
+
logger.debug(f"Executing SQL to upgrade metrics table: \n{sql}")
|
43
|
+
db_sql.get_sql_backend(ws).execute(sql)
|
44
|
+
installation.save(reconcile_config)
|
45
|
+
logger.debug("Upgraded Reconcile metrics table")
|
46
|
+
|
47
|
+
|
48
|
+
def upgrade(installation: Installation, ws: WorkspaceClient):
|
49
|
+
app_context = ApplicationContext(ws)
|
50
|
+
if app_context.recon_config is not None:
|
51
|
+
_upgrade_reconcile_metadata_metrics_table(installation, ws, app_context)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: databricks-labs-lakebridge
|
3
|
+
Version: 0.10.0
|
4
|
+
Summary: Fast and predictable migrations to Databricks Lakehouse Platform. This tool is designed to help you migrate your data and workloads to the Databricks Lakehouse Platform in a fast, predictable, and reliable way. It provides a set of tools and utilities to help you reconcile your data and workloads, assess your current state, and plan your migration.
|
5
|
+
Project-URL: Documentation, https://github.com/databrickslabs/lakebridge
|
6
|
+
Project-URL: Issues, https://github.com/databrickslabs/lakebridge/issues
|
7
|
+
Project-URL: Source, https://github.com/databrickslabs/lakebridge
|
8
|
+
Maintainer-email: Databricks Labs <labs-oss@databricks.com>
|
9
|
+
License-File: LICENSE
|
10
|
+
License-File: NOTICE
|
11
|
+
Keywords: Databricks
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Environment :: Console
|
14
|
+
Classifier: Framework :: Pytest
|
15
|
+
Classifier: Intended Audience :: Developers
|
16
|
+
Classifier: Intended Audience :: System Administrators
|
17
|
+
Classifier: License :: Other/Proprietary License
|
18
|
+
Classifier: Operating System :: MacOS
|
19
|
+
Classifier: Operating System :: Microsoft :: Windows
|
20
|
+
Classifier: Programming Language :: Python
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
23
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
24
|
+
Classifier: Topic :: Software Development :: Libraries
|
25
|
+
Classifier: Topic :: Utilities
|
26
|
+
Requires-Python: >=3.10
|
27
|
+
Requires-Dist: cryptography<45.1.0,>=44.0.2
|
28
|
+
Requires-Dist: databricks-bb-analyzer~=0.1.6
|
29
|
+
Requires-Dist: databricks-labs-blueprint[yaml]<0.12.0,>=0.11.0
|
30
|
+
Requires-Dist: databricks-labs-lsql==0.16.0
|
31
|
+
Requires-Dist: databricks-sdk~=0.51.0
|
32
|
+
Requires-Dist: duckdb~=1.2.2
|
33
|
+
Requires-Dist: pygls~=2.0.0a2
|
34
|
+
Requires-Dist: pyodbc~=5.2.0
|
35
|
+
Requires-Dist: sqlalchemy~=2.0.40
|
36
|
+
Requires-Dist: sqlglot==26.1.3
|
37
|
+
Requires-Dist: standard-distutils~=3.11.9; python_version >= '3.11'
|
38
|
+
Description-Content-Type: text/markdown
|
39
|
+
|
40
|
+
Databricks Labs Lakebridge
|
41
|
+
---
|
42
|
+

|
43
|
+
|
44
|
+
|
45
|
+
[](https://github.com/databrickslabs/remorph/actions/workflows/push.yml)
|
46
|
+

|
47
|
+
|
48
|
+
-----
|
49
|
+
Documentation
|
50
|
+
The complete documentation is available at: https://databrickslabs.github.io/lakebridge/
|
51
|
+
|
52
|
+
Contribution
|
53
|
+
Please see the contribution guidance here on how to contribute to the project (build, test, and submit a PR).
|
54
|
+
|
55
|
+
Project Support
|
56
|
+
Please note that this project is provided for your exploration only and is not formally supported by Databricks with Service Level Agreements (SLAs). They are provided AS-IS, and we do not make any guarantees. Please do not submit a support ticket relating to any issues arising from the use of this project.
|
57
|
+
|
58
|
+
Any issues discovered through the use of this project should be filed as GitHub Issues on this repository. They will be reviewed as time permits, but no formal SLAs for support exist.
|