databricks-labs-lakebridge 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/__init__.py +3 -0
- databricks/labs/__init__.py +3 -0
- databricks/labs/lakebridge/__about__.py +2 -0
- databricks/labs/lakebridge/__init__.py +11 -0
- databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks/labs/lakebridge/base_install.py +12 -0
- databricks/labs/lakebridge/cli.py +449 -0
- databricks/labs/lakebridge/config.py +192 -0
- databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks/labs/lakebridge/install.py +849 -0
- databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks/labs/lakebridge/lineage.py +42 -0
- databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks/labs/lakebridge/uninstall.py +28 -0
- databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
- databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
- databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
- databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
- docs/lakebridge/src/components/Button.tsx +81 -0
- docs/lakebridge/src/css/custom.css +167 -0
- docs/lakebridge/src/css/table.css +20 -0
- docs/lakebridge/src/pages/index.tsx +57 -0
- docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,564 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import abc
|
4
|
+
import asyncio
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import sys
|
8
|
+
from collections.abc import Callable, Sequence, Mapping
|
9
|
+
from dataclasses import dataclass
|
10
|
+
from pathlib import Path
|
11
|
+
from typing import Any, Literal
|
12
|
+
|
13
|
+
import attrs
|
14
|
+
import yaml
|
15
|
+
from lsprotocol import types as types_module
|
16
|
+
from lsprotocol.types import (
|
17
|
+
CLIENT_REGISTER_CAPABILITY,
|
18
|
+
METHOD_TO_TYPES,
|
19
|
+
ClientCapabilities,
|
20
|
+
ClientInfo,
|
21
|
+
Diagnostic,
|
22
|
+
DiagnosticSeverity,
|
23
|
+
DidCloseTextDocumentParams,
|
24
|
+
DidOpenTextDocumentParams,
|
25
|
+
InitializeParams,
|
26
|
+
InitializeResult,
|
27
|
+
LanguageKind,
|
28
|
+
)
|
29
|
+
from lsprotocol.types import Position as LSPPosition
|
30
|
+
from lsprotocol.types import Range as LSPRange
|
31
|
+
from lsprotocol.types import Registration, RegistrationParams, TextDocumentIdentifier, TextDocumentItem, TextEdit
|
32
|
+
from pygls.exceptions import FeatureRequestError
|
33
|
+
from pygls.lsp.client import BaseLanguageClient
|
34
|
+
|
35
|
+
from databricks.labs.blueprint.wheels import ProductInfo
|
36
|
+
from databricks.labs.lakebridge.config import LSPConfigOptionV1, TranspileConfig, TranspileResult
|
37
|
+
from databricks.labs.lakebridge.errors.exceptions import IllegalStateException
|
38
|
+
from databricks.labs.lakebridge.helpers.file_utils import is_sql_file, is_dbt_project_file
|
39
|
+
from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
|
40
|
+
from databricks.labs.lakebridge.transpiler.transpile_status import (
|
41
|
+
CodePosition,
|
42
|
+
CodeRange,
|
43
|
+
ErrorKind,
|
44
|
+
ErrorSeverity,
|
45
|
+
TranspileError,
|
46
|
+
)
|
47
|
+
|
48
|
+
logger = logging.getLogger(__name__)
|
49
|
+
|
50
|
+
|
51
|
+
@dataclass
|
52
|
+
class _LSPRemorphConfigV1:
|
53
|
+
name: str
|
54
|
+
dialects: list[str]
|
55
|
+
env_vars: dict[str, str]
|
56
|
+
command_line: list[str]
|
57
|
+
|
58
|
+
@classmethod
|
59
|
+
def parse(cls, data: dict[str, Any]) -> _LSPRemorphConfigV1:
|
60
|
+
version = data.get("version", 0)
|
61
|
+
if version != 1:
|
62
|
+
raise ValueError(f"Unsupported transpiler config version: {version}")
|
63
|
+
name: str | None = data.get("name", None)
|
64
|
+
if not name:
|
65
|
+
raise ValueError("Missing 'name' entry")
|
66
|
+
dialects = data.get("dialects", [])
|
67
|
+
if len(dialects) == 0:
|
68
|
+
raise ValueError("Missing 'dialects' entry")
|
69
|
+
env_vars = data.get("environment", {})
|
70
|
+
command_line = data.get("command_line", [])
|
71
|
+
if len(command_line) == 0:
|
72
|
+
raise ValueError("Missing 'command_line' entry")
|
73
|
+
return _LSPRemorphConfigV1(name, dialects, env_vars, command_line)
|
74
|
+
|
75
|
+
|
76
|
+
@dataclass
|
77
|
+
class LSPConfig:
|
78
|
+
path: Path
|
79
|
+
remorph: _LSPRemorphConfigV1
|
80
|
+
options: dict[str, list[LSPConfigOptionV1]]
|
81
|
+
custom: dict[str, Any]
|
82
|
+
|
83
|
+
@property
|
84
|
+
def name(self):
|
85
|
+
return self.remorph.name
|
86
|
+
|
87
|
+
def options_for_dialect(self, source_dialect: str) -> list[LSPConfigOptionV1]:
|
88
|
+
return self.options.get("all", []) + self.options.get(source_dialect, [])
|
89
|
+
|
90
|
+
@classmethod
|
91
|
+
def load(cls, path: Path) -> LSPConfig:
|
92
|
+
yaml_text = path.read_text()
|
93
|
+
data = yaml.safe_load(yaml_text)
|
94
|
+
if not isinstance(data, dict):
|
95
|
+
raise ValueError(f"Invalid transpiler config, expecting a dict, got a {type(data).__name__}")
|
96
|
+
remorph_data = data.get("remorph", None)
|
97
|
+
if not isinstance(remorph_data, dict):
|
98
|
+
raise ValueError(f"Invalid transpiler config, expecting a 'remorph' dict entry, got {remorph_data}")
|
99
|
+
remorph = _LSPRemorphConfigV1.parse(remorph_data)
|
100
|
+
options_data = data.get("options", {})
|
101
|
+
if not isinstance(options_data, dict):
|
102
|
+
raise ValueError(f"Invalid transpiler config, expecting an 'options' dict entry, got {options_data}")
|
103
|
+
options = LSPConfigOptionV1.parse_all(options_data)
|
104
|
+
custom = data.get("custom", {})
|
105
|
+
return LSPConfig(path, remorph, options, custom)
|
106
|
+
|
107
|
+
|
108
|
+
def lsp_feature(
|
109
|
+
name: str,
|
110
|
+
options: Any | None = None,
|
111
|
+
):
|
112
|
+
def wrapped(func: Callable):
|
113
|
+
_LSP_FEATURES.append((name, options, func))
|
114
|
+
return func
|
115
|
+
|
116
|
+
return wrapped
|
117
|
+
|
118
|
+
|
119
|
+
_LSP_FEATURES: list[tuple[str, Any | None, Callable]] = []
|
120
|
+
|
121
|
+
# the below code also exists in lsp_server.py
|
122
|
+
# it will be factorized as part of https://github.com/databrickslabs/remorph/issues/1304
|
123
|
+
TRANSPILE_TO_DATABRICKS_METHOD = "document/transpileToDatabricks"
|
124
|
+
|
125
|
+
|
126
|
+
@attrs.define
|
127
|
+
class TranspileDocumentParams:
|
128
|
+
uri: str = attrs.field()
|
129
|
+
language_id: LanguageKind | str = attrs.field()
|
130
|
+
|
131
|
+
|
132
|
+
@attrs.define
|
133
|
+
class TranspileDocumentRequest:
|
134
|
+
# 'id' is mandated by LSP
|
135
|
+
# pylint: disable=invalid-name
|
136
|
+
id: int | str = attrs.field()
|
137
|
+
params: TranspileDocumentParams = attrs.field()
|
138
|
+
method: Literal["document/transpileToDatabricks"] = "document/transpileToDatabricks"
|
139
|
+
jsonrpc: str = attrs.field(default="2.0")
|
140
|
+
|
141
|
+
|
142
|
+
@attrs.define
|
143
|
+
class TranspileDocumentResult:
|
144
|
+
uri: str = attrs.field()
|
145
|
+
language_id: LanguageKind | str = attrs.field()
|
146
|
+
changes: Sequence[TextEdit] = attrs.field()
|
147
|
+
diagnostics: Sequence[Diagnostic] = attrs.field()
|
148
|
+
|
149
|
+
|
150
|
+
@attrs.define
|
151
|
+
class TranspileDocumentResponse:
|
152
|
+
# 'id' is mandated by LSP
|
153
|
+
# pylint: disable=invalid-name
|
154
|
+
id: int | str = attrs.field()
|
155
|
+
result: TranspileDocumentResult = attrs.field()
|
156
|
+
jsonrpc: str = attrs.field(default="2.0")
|
157
|
+
|
158
|
+
|
159
|
+
def install_special_properties():
|
160
|
+
is_special_property = getattr(types_module, "is_special_property")
|
161
|
+
|
162
|
+
def customized(cls: type, property_name: str) -> bool:
|
163
|
+
if cls is TranspileDocumentRequest and property_name in {"method", "jsonrpc"}:
|
164
|
+
return True
|
165
|
+
return is_special_property(cls, property_name)
|
166
|
+
|
167
|
+
setattr(types_module, "is_special_property", customized)
|
168
|
+
|
169
|
+
|
170
|
+
install_special_properties()
|
171
|
+
|
172
|
+
METHOD_TO_TYPES[TRANSPILE_TO_DATABRICKS_METHOD] = (
|
173
|
+
TranspileDocumentRequest,
|
174
|
+
TranspileDocumentResponse,
|
175
|
+
TranspileDocumentParams,
|
176
|
+
None,
|
177
|
+
)
|
178
|
+
|
179
|
+
|
180
|
+
# subclass BaseLanguageClient so we can override stuff when required
|
181
|
+
class _LanguageClient(BaseLanguageClient):
|
182
|
+
|
183
|
+
def __init__(self, name: str, version: str) -> None:
|
184
|
+
super().__init__(name, version)
|
185
|
+
self._transpile_to_databricks_capability: Registration | None = None
|
186
|
+
self._register_lsp_features()
|
187
|
+
|
188
|
+
@property
|
189
|
+
def is_alive(self):
|
190
|
+
return self._server and self._server.returncode is None
|
191
|
+
|
192
|
+
@property
|
193
|
+
def transpile_to_databricks_capability(self):
|
194
|
+
return self._transpile_to_databricks_capability
|
195
|
+
|
196
|
+
@lsp_feature(CLIENT_REGISTER_CAPABILITY)
|
197
|
+
def register_capabilities(self, params: RegistrationParams) -> None:
|
198
|
+
for registration in params.registrations:
|
199
|
+
if registration.method == TRANSPILE_TO_DATABRICKS_METHOD:
|
200
|
+
logger.debug(f"Registered capability: {registration.method}")
|
201
|
+
self._transpile_to_databricks_capability = registration
|
202
|
+
continue
|
203
|
+
logger.debug(f"Unknown capability: {registration.method}")
|
204
|
+
|
205
|
+
async def transpile_document_async(self, params: TranspileDocumentParams) -> TranspileDocumentResult:
|
206
|
+
"""Transpile a document to Databricks SQL.
|
207
|
+
|
208
|
+
The caller is responsible for ensuring that the LSP server is capable of handling this request.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
params: The parameters for the transpile request to forward to the LSP server.
|
212
|
+
Returns:
|
213
|
+
The result of the transpile request, from the LSP server.
|
214
|
+
Raises:
|
215
|
+
IllegalStateException: If the client has been stopped or the server hasn't (yet) signalled that it is
|
216
|
+
capable of transpiling documents to Databricks SQL.
|
217
|
+
"""
|
218
|
+
if self.stopped:
|
219
|
+
raise IllegalStateException("Client has been stopped.")
|
220
|
+
if not self.transpile_to_databricks_capability:
|
221
|
+
raise IllegalStateException("Client has not yet registered its transpile capability.")
|
222
|
+
return await self.protocol.send_request_async(TRANSPILE_TO_DATABRICKS_METHOD, params)
|
223
|
+
|
224
|
+
# can't use @client.feature because it requires a global instance
|
225
|
+
def _register_lsp_features(self):
|
226
|
+
for name, options, func in _LSP_FEATURES:
|
227
|
+
decorator = self.protocol.fm.feature(name, options)
|
228
|
+
wrapper = self._wrapper_for_lsp_feature(func)
|
229
|
+
decorator(wrapper)
|
230
|
+
|
231
|
+
def _wrapper_for_lsp_feature(self, func):
|
232
|
+
def wrapper(params):
|
233
|
+
return func(self, params)
|
234
|
+
|
235
|
+
return wrapper
|
236
|
+
|
237
|
+
async def start_io(self, cmd: str, *args, **kwargs):
|
238
|
+
await super().start_io(cmd, *args, **kwargs)
|
239
|
+
# forward stderr
|
240
|
+
task = asyncio.create_task(self.pipe_stderr())
|
241
|
+
self._async_tasks.append(task)
|
242
|
+
|
243
|
+
async def pipe_stderr(self) -> None:
|
244
|
+
server = self._server
|
245
|
+
assert server is not None
|
246
|
+
stderr = server.stderr
|
247
|
+
assert stderr is not None
|
248
|
+
while not self._stop_event.is_set():
|
249
|
+
data: bytes = await stderr.readline()
|
250
|
+
if not data:
|
251
|
+
return
|
252
|
+
# Invalid UTF-8 isn't great, but we can at least log it with the replacement character rather
|
253
|
+
# than dropping it silently or triggering an exception.
|
254
|
+
message = data.decode("utf-8", errors="replace").strip()
|
255
|
+
# Although information may arrive via stderr, it's generally informational in nature and doesn't
|
256
|
+
# necessarily represent an error
|
257
|
+
# TODO: analyze message and log it accordingly (info/war/error...).
|
258
|
+
logger.info(message)
|
259
|
+
if not data.endswith(b"\n"):
|
260
|
+
break
|
261
|
+
|
262
|
+
|
263
|
+
class ChangeManager(abc.ABC):
|
264
|
+
|
265
|
+
@classmethod
|
266
|
+
def apply(
|
267
|
+
cls, source_code: str, changes: Sequence[TextEdit], diagnostics: Sequence[Diagnostic], file_path: Path
|
268
|
+
) -> TranspileResult:
|
269
|
+
if not changes and not diagnostics:
|
270
|
+
return TranspileResult(source_code, 1, [])
|
271
|
+
transpile_errors = [DiagnosticConverter.apply(file_path, diagnostic) for diagnostic in diagnostics]
|
272
|
+
try:
|
273
|
+
lines = source_code.split("\n")
|
274
|
+
for change in changes:
|
275
|
+
lines = cls._apply(lines, change)
|
276
|
+
transpiled_code = "\n".join(lines)
|
277
|
+
return TranspileResult(transpiled_code, 1, transpile_errors)
|
278
|
+
except IndexError as e:
|
279
|
+
logger.error("Failed to apply changes", exc_info=e)
|
280
|
+
error = TranspileError(
|
281
|
+
code="INTERNAL_ERROR",
|
282
|
+
kind=ErrorKind.INTERNAL,
|
283
|
+
severity=ErrorSeverity.ERROR,
|
284
|
+
path=file_path,
|
285
|
+
message="Internal error, failed to apply changes",
|
286
|
+
)
|
287
|
+
transpile_errors.append(error)
|
288
|
+
return TranspileResult(source_code, 1, transpile_errors)
|
289
|
+
|
290
|
+
@classmethod
|
291
|
+
def _apply(cls, lines: list[str], change: TextEdit) -> list[str]:
|
292
|
+
new_lines = change.new_text.split("\n")
|
293
|
+
if cls._is_full_document_change(lines, change):
|
294
|
+
return new_lines
|
295
|
+
# keep lines before
|
296
|
+
result: list[str] = [] if change.range.start.line <= 0 else lines[0 : change.range.start.line]
|
297
|
+
# special case where change covers full lines
|
298
|
+
if change.range.start.character <= 0 and change.range.end.character >= len(lines[change.range.end.line]):
|
299
|
+
pass
|
300
|
+
# special case where change is within 1 line
|
301
|
+
elif change.range.start.line == change.range.end.line:
|
302
|
+
old_line = lines[change.range.start.line]
|
303
|
+
if change.range.start.character > 0:
|
304
|
+
new_lines[0] = old_line[0 : change.range.start.character] + new_lines[0]
|
305
|
+
if change.range.end.character < len(old_line):
|
306
|
+
new_lines[-1] += old_line[change.range.end.character :]
|
307
|
+
else:
|
308
|
+
if change.range.start.character > 0:
|
309
|
+
old_line = lines[change.range.start.line]
|
310
|
+
new_lines[0] = old_line[0 : change.range.start.character] + new_lines[0]
|
311
|
+
if change.range.end.character < len(lines[change.range.end.line]):
|
312
|
+
old_line = lines[change.range.end.line]
|
313
|
+
new_lines[-1] += old_line[change.range.end.character :]
|
314
|
+
result.extend(new_lines)
|
315
|
+
# keep lines after
|
316
|
+
if change.range.end.line < len(lines) - 1:
|
317
|
+
result.extend(lines[change.range.end.line + 1 :])
|
318
|
+
return result
|
319
|
+
|
320
|
+
@classmethod
|
321
|
+
def _is_full_document_change(cls, lines: list[str], change: TextEdit) -> bool:
|
322
|
+
# A range's end is exclusive. Therefore full document range goes from (0, 0) to (l, 0) where l is the number
|
323
|
+
# of lines in the document.
|
324
|
+
return (
|
325
|
+
change.range.start.line == 0
|
326
|
+
and change.range.start.character == 0
|
327
|
+
and change.range.end.line >= len(lines)
|
328
|
+
and change.range.end.character >= 0
|
329
|
+
)
|
330
|
+
|
331
|
+
|
332
|
+
class DiagnosticConverter(abc.ABC):
|
333
|
+
|
334
|
+
_KIND_NAMES = {e.name for e in ErrorKind}
|
335
|
+
|
336
|
+
@classmethod
|
337
|
+
def apply(cls, file_path: Path, diagnostic: Diagnostic) -> TranspileError:
|
338
|
+
code = str(diagnostic.code)
|
339
|
+
kind = ErrorKind.INTERNAL
|
340
|
+
parts = code.split("-")
|
341
|
+
if len(parts) >= 2 and parts[0] in cls._KIND_NAMES:
|
342
|
+
kind = ErrorKind[parts[0]]
|
343
|
+
parts.pop(0)
|
344
|
+
code = "-".join(parts)
|
345
|
+
severity = cls._convert_severity(diagnostic.severity)
|
346
|
+
lsp_range = cls._convert_range(diagnostic.range)
|
347
|
+
return TranspileError(
|
348
|
+
code=code, kind=kind, severity=severity, path=file_path, message=diagnostic.message, range=lsp_range
|
349
|
+
)
|
350
|
+
|
351
|
+
@classmethod
|
352
|
+
def _convert_range(cls, lsp_range: LSPRange | None) -> CodeRange | None:
|
353
|
+
if not lsp_range:
|
354
|
+
return None
|
355
|
+
return CodeRange(cls._convert_position(lsp_range.start), cls._convert_position(lsp_range.end))
|
356
|
+
|
357
|
+
@classmethod
|
358
|
+
def _convert_position(cls, lsp_position: LSPPosition) -> CodePosition:
|
359
|
+
return CodePosition(lsp_position.line, lsp_position.character)
|
360
|
+
|
361
|
+
@classmethod
|
362
|
+
def _convert_severity(cls, severity: DiagnosticSeverity | None) -> ErrorSeverity:
|
363
|
+
if severity == DiagnosticSeverity.Information:
|
364
|
+
return ErrorSeverity.INFO
|
365
|
+
if severity == DiagnosticSeverity.Warning:
|
366
|
+
return ErrorSeverity.WARNING
|
367
|
+
if severity == DiagnosticSeverity.Error:
|
368
|
+
return ErrorSeverity.ERROR
|
369
|
+
return ErrorSeverity.INFO
|
370
|
+
|
371
|
+
|
372
|
+
class LSPEngine(TranspileEngine):
|
373
|
+
|
374
|
+
@classmethod
|
375
|
+
def from_config_path(cls, config_path: Path) -> LSPEngine:
|
376
|
+
config = LSPConfig.load(config_path)
|
377
|
+
return LSPEngine(config_path.parent, config)
|
378
|
+
|
379
|
+
@classmethod
|
380
|
+
def client_metadata(cls) -> tuple[str, str]:
|
381
|
+
"""Obtain the name and version for this LSP client, respectively in a tuple."""
|
382
|
+
product_info = ProductInfo.from_class(cls)
|
383
|
+
return product_info.product_name(), product_info.version()
|
384
|
+
|
385
|
+
def __init__(self, workdir: Path, config: LSPConfig) -> None:
|
386
|
+
self._workdir = workdir
|
387
|
+
self._config = config
|
388
|
+
name, version = self.client_metadata()
|
389
|
+
self._client = _LanguageClient(name, version)
|
390
|
+
self._init_response: InitializeResult | None = None
|
391
|
+
|
392
|
+
@property
|
393
|
+
def supported_dialects(self) -> list[str]:
|
394
|
+
return self._config.remorph.dialects
|
395
|
+
|
396
|
+
@property
|
397
|
+
def server_has_transpile_capability(self) -> bool:
|
398
|
+
return self._client.transpile_to_databricks_capability is not None
|
399
|
+
|
400
|
+
async def initialize(self, config: TranspileConfig) -> None:
|
401
|
+
if self.is_alive:
|
402
|
+
raise IllegalStateException("LSP engine is already initialized")
|
403
|
+
cwd = os.getcwd()
|
404
|
+
try:
|
405
|
+
os.chdir(self._workdir)
|
406
|
+
await self._do_initialize(config)
|
407
|
+
await self._await_for_transpile_capability()
|
408
|
+
# it is good practice to catch broad exceptions raised by launching a child process
|
409
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
410
|
+
logger.error("LSP initialization failed", exc_info=e)
|
411
|
+
os.chdir(cwd)
|
412
|
+
|
413
|
+
async def _do_initialize(self, config: TranspileConfig) -> None:
|
414
|
+
await self._start_server()
|
415
|
+
input_path = config.input_path
|
416
|
+
root_path = input_path if input_path.is_dir() else input_path.parent
|
417
|
+
params = InitializeParams(
|
418
|
+
capabilities=self._client_capabilities(),
|
419
|
+
client_info=ClientInfo(name=self._client.name, version=self._client.version),
|
420
|
+
process_id=os.getpid(),
|
421
|
+
root_uri=str(root_path.absolute().as_uri()),
|
422
|
+
workspace_folders=None, # for now, we only support a single workspace = root_uri
|
423
|
+
initialization_options=self._initialization_options(config),
|
424
|
+
)
|
425
|
+
logger.debug(f"LSP init params: {params}")
|
426
|
+
self._init_response = await self._client.initialize_async(params)
|
427
|
+
|
428
|
+
async def _start_server(self):
|
429
|
+
executable = self._config.remorph.command_line[0]
|
430
|
+
if executable in {"python", "python3"}:
|
431
|
+
await self._start_python_server()
|
432
|
+
else:
|
433
|
+
await self._start_other_server()
|
434
|
+
|
435
|
+
async def _start_python_server(self):
|
436
|
+
has_venv = (self._workdir / ".venv").exists()
|
437
|
+
if has_venv:
|
438
|
+
await self._start_python_server_with_venv()
|
439
|
+
else:
|
440
|
+
await self._start_python_server_without_venv()
|
441
|
+
|
442
|
+
async def _start_python_server_with_venv(self):
|
443
|
+
env: dict[str, str] = os.environ | self._config.remorph.env_vars
|
444
|
+
# ensure modules are searched within venv
|
445
|
+
if "PYTHONPATH" in env.keys():
|
446
|
+
del env["PYTHONPATH"]
|
447
|
+
if "VIRTUAL_ENV" in env.keys():
|
448
|
+
del env["VIRTUAL_ENV"]
|
449
|
+
if "VIRTUAL_ENV_PROMPT" in env.keys():
|
450
|
+
del env["VIRTUAL_ENV_PROMPT"]
|
451
|
+
path = self._workdir / ".venv" / "Scripts" if sys.platform == "win32" else self._workdir / ".venv" / "bin"
|
452
|
+
if "PATH" in env.keys():
|
453
|
+
env["PATH"] = str(path) + os.pathsep + env["PATH"]
|
454
|
+
else:
|
455
|
+
env["PATH"] = str(path)
|
456
|
+
python = "python.exe" if sys.platform == "win32" else "python3"
|
457
|
+
executable = path / python
|
458
|
+
await self._launch_executable(executable, env)
|
459
|
+
|
460
|
+
async def _start_python_server_without_venv(self):
|
461
|
+
env: dict[str, str] = os.environ | self._config.remorph.env_vars
|
462
|
+
# ensure modules are searched locally before being searched in remorph
|
463
|
+
if "PYTHONPATH" in env.keys():
|
464
|
+
env["PYTHONPATH"] = str(self._workdir) + os.pathsep + env["PYTHONPATH"]
|
465
|
+
else:
|
466
|
+
env["PYTHONPATH"] = str(self._workdir)
|
467
|
+
executable = Path(self._config.remorph.command_line[0])
|
468
|
+
await self._launch_executable(executable, env)
|
469
|
+
|
470
|
+
async def _start_other_server(self):
|
471
|
+
env: dict[str, str] = os.environ | self._config.remorph.env_vars
|
472
|
+
# ensure modules are searched within venv
|
473
|
+
if "PYTHONPATH" in env.keys():
|
474
|
+
del env["PYTHONPATH"]
|
475
|
+
if "VIRTUAL_ENV" in env.keys():
|
476
|
+
del env["VIRTUAL_ENV"]
|
477
|
+
if "VIRTUAL_ENV_PROMPT" in env.keys():
|
478
|
+
del env["VIRTUAL_ENV_PROMPT"]
|
479
|
+
executable = Path(self._config.remorph.command_line[0])
|
480
|
+
await self._launch_executable(executable, env)
|
481
|
+
|
482
|
+
async def _launch_executable(self, executable: Path, env: Mapping):
|
483
|
+
log_level = logging.getLevelName(logging.getLogger("databricks").level)
|
484
|
+
args = self._config.remorph.command_line[1:] + [f"--log_level={log_level}"]
|
485
|
+
logger.debug(f"Starting LSP engine: {executable} {args} (cwd={os.getcwd()})")
|
486
|
+
await self._client.start_io(str(executable), env=env, *args)
|
487
|
+
|
488
|
+
def _client_capabilities(self):
|
489
|
+
return ClientCapabilities() # TODO do we need to refine this ?
|
490
|
+
|
491
|
+
def _initialization_options(self, config: TranspileConfig):
|
492
|
+
return {
|
493
|
+
"remorph": {
|
494
|
+
"source-dialect": config.source_dialect,
|
495
|
+
},
|
496
|
+
"options": config.transpiler_options,
|
497
|
+
"custom": self._config.custom,
|
498
|
+
}
|
499
|
+
|
500
|
+
async def _await_for_transpile_capability(self):
|
501
|
+
for _ in range(1, 100):
|
502
|
+
if self._client.transpile_to_databricks_capability:
|
503
|
+
return
|
504
|
+
await asyncio.sleep(0.1)
|
505
|
+
if not self._client.transpile_to_databricks_capability:
|
506
|
+
msg = f"LSP server did not register its {TRANSPILE_TO_DATABRICKS_METHOD} capability"
|
507
|
+
raise FeatureRequestError(msg)
|
508
|
+
|
509
|
+
async def shutdown(self):
|
510
|
+
await self._client.shutdown_async(None)
|
511
|
+
self._client.exit(None)
|
512
|
+
await self._client.stop()
|
513
|
+
|
514
|
+
@property
|
515
|
+
def is_alive(self):
|
516
|
+
return self._client.is_alive
|
517
|
+
|
518
|
+
async def transpile(
|
519
|
+
self, source_dialect: str, target_dialect: str, source_code: str, file_path: Path
|
520
|
+
) -> TranspileResult:
|
521
|
+
self.open_document(file_path, source_code=source_code)
|
522
|
+
response = await self.transpile_document(file_path)
|
523
|
+
self.close_document(file_path)
|
524
|
+
return ChangeManager.apply(source_code, response.changes, response.diagnostics, file_path)
|
525
|
+
|
526
|
+
def open_document(self, file_path: Path, encoding="utf-8", source_code: str | None = None) -> None:
|
527
|
+
if source_code is None:
|
528
|
+
source_code = file_path.read_text(encoding)
|
529
|
+
text_document = TextDocumentItem(
|
530
|
+
uri=file_path.as_uri(), language_id=LanguageKind.Sql, version=1, text=source_code
|
531
|
+
)
|
532
|
+
params = DidOpenTextDocumentParams(text_document)
|
533
|
+
self._client.text_document_did_open(params)
|
534
|
+
|
535
|
+
def close_document(self, file_path: Path) -> None:
|
536
|
+
text_document = TextDocumentIdentifier(uri=file_path.as_uri())
|
537
|
+
params = DidCloseTextDocumentParams(text_document)
|
538
|
+
self._client.text_document_did_close(params)
|
539
|
+
|
540
|
+
async def transpile_document(self, file_path: Path) -> TranspileDocumentResult:
|
541
|
+
params = TranspileDocumentParams(uri=file_path.as_uri(), language_id=LanguageKind.Sql)
|
542
|
+
result = await self._client.transpile_document_async(params)
|
543
|
+
return result
|
544
|
+
|
545
|
+
# TODO infer the below from config file
|
546
|
+
def is_supported_file(self, file: Path) -> bool:
|
547
|
+
if self._is_bladebridge() or self._is_test_transpiler():
|
548
|
+
return True
|
549
|
+
if self._is_morpheus():
|
550
|
+
return is_sql_file(file) or is_dbt_project_file(file)
|
551
|
+
# then only support sql
|
552
|
+
return is_sql_file(file)
|
553
|
+
|
554
|
+
# TODO remove this
|
555
|
+
def _is_test_transpiler(self):
|
556
|
+
return self._config.remorph.name == "test-transpiler"
|
557
|
+
|
558
|
+
# TODO remove this
|
559
|
+
def _is_bladebridge(self):
|
560
|
+
return self._config.remorph.name == "Bladebridge"
|
561
|
+
|
562
|
+
# TODO remove this
|
563
|
+
def _is_morpheus(self):
|
564
|
+
return self._config.remorph.name == "Morpheus"
|
File without changes
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from sqlglot import Dialects, Dialect
|
2
|
+
|
3
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.parsers import oracle, presto, snowflake
|
4
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.generator.databricks import Databricks
|
5
|
+
|
6
|
+
SQLGLOT_DIALECTS: dict[str, type[Dialect] | str] = {
|
7
|
+
"athena": Dialects.ATHENA,
|
8
|
+
"bigquery": Dialects.BIGQUERY,
|
9
|
+
"databricks": Databricks,
|
10
|
+
"mysql": Dialects.MYSQL,
|
11
|
+
"netezza": Dialects.POSTGRES,
|
12
|
+
"oracle": oracle.Oracle,
|
13
|
+
"postgresql": Dialects.POSTGRES,
|
14
|
+
"presto": presto.Presto,
|
15
|
+
"redshift": Dialects.REDSHIFT,
|
16
|
+
"snowflake": snowflake.Snowflake,
|
17
|
+
"sqlite": Dialects.SQLITE,
|
18
|
+
"teradata": Dialects.TERADATA,
|
19
|
+
"trino": Dialects.TRINO,
|
20
|
+
"tsql": Dialects.TSQL,
|
21
|
+
"vertica": Dialects.POSTGRES,
|
22
|
+
}
|
23
|
+
|
24
|
+
|
25
|
+
def get_dialect(dialect: str) -> Dialect:
|
26
|
+
return Dialect.get_or_raise(SQLGLOT_DIALECTS.get(dialect))
|
27
|
+
|
28
|
+
|
29
|
+
def get_key_from_dialect(input_dialect: Dialect) -> str:
|
30
|
+
return [source_key for source_key, dialect in SQLGLOT_DIALECTS.items() if dialect == input_dialect][0]
|
File without changes
|