databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks/__init__.py +3 -0
  2. databricks/labs/__init__.py +3 -0
  3. databricks/labs/lakebridge/__about__.py +2 -0
  4. databricks/labs/lakebridge/__init__.py +11 -0
  5. databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  6. databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  7. databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  8. databricks/labs/lakebridge/base_install.py +12 -0
  9. databricks/labs/lakebridge/cli.py +449 -0
  10. databricks/labs/lakebridge/config.py +192 -0
  11. databricks/labs/lakebridge/connections/__init__.py +0 -0
  12. databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  13. databricks/labs/lakebridge/connections/database_manager.py +98 -0
  14. databricks/labs/lakebridge/connections/env_getter.py +13 -0
  15. databricks/labs/lakebridge/contexts/__init__.py +0 -0
  16. databricks/labs/lakebridge/contexts/application.py +133 -0
  17. databricks/labs/lakebridge/coverage/__init__.py +0 -0
  18. databricks/labs/lakebridge/coverage/commons.py +223 -0
  19. databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  20. databricks/labs/lakebridge/coverage/local_report.py +9 -0
  21. databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  22. databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  23. databricks/labs/lakebridge/deployment/__init__.py +0 -0
  24. databricks/labs/lakebridge/deployment/configurator.py +199 -0
  25. databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  26. databricks/labs/lakebridge/deployment/installation.py +125 -0
  27. databricks/labs/lakebridge/deployment/job.py +147 -0
  28. databricks/labs/lakebridge/deployment/recon.py +145 -0
  29. databricks/labs/lakebridge/deployment/table.py +30 -0
  30. databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  31. databricks/labs/lakebridge/discovery/table.py +36 -0
  32. databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  33. databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  34. databricks/labs/lakebridge/errors/exceptions.py +1 -0
  35. databricks/labs/lakebridge/helpers/__init__.py +0 -0
  36. databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  37. databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  38. databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  39. databricks/labs/lakebridge/helpers/metastore.py +164 -0
  40. databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  41. databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  42. databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  43. databricks/labs/lakebridge/helpers/validation.py +101 -0
  44. databricks/labs/lakebridge/install.py +849 -0
  45. databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  46. databricks/labs/lakebridge/intermediate/dag.py +88 -0
  47. databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  48. databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  49. databricks/labs/lakebridge/jvmproxy.py +56 -0
  50. databricks/labs/lakebridge/lineage.py +42 -0
  51. databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  52. databricks/labs/lakebridge/reconcile/compare.py +414 -0
  53. databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  54. databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  55. databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  56. databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  57. databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  58. databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  59. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  60. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  61. databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  62. databricks/labs/lakebridge/reconcile/constants.py +37 -0
  63. databricks/labs/lakebridge/reconcile/exception.py +42 -0
  64. databricks/labs/lakebridge/reconcile/execute.py +920 -0
  65. databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  66. databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  67. databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  68. databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  69. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  70. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  71. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  72. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  73. databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  74. databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  75. databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  76. databricks/labs/lakebridge/reconcile/runner.py +97 -0
  77. databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  78. databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  79. databricks/labs/lakebridge/resources/__init__.py +0 -0
  80. databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  81. databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  82. databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  83. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  84. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  85. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  86. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  87. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  88. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  89. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  90. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  91. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  92. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  93. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  94. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  95. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  96. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  97. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  98. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  99. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  100. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  101. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  102. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  103. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  104. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  105. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  106. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  107. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  108. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  109. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  110. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  111. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  112. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  113. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  114. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  115. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  116. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  117. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  118. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  119. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  120. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  121. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  122. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  123. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  124. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  125. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  126. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  127. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  128. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  129. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  130. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  131. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  132. databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  133. databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  134. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  135. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  136. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  137. databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  138. databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  139. databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  140. databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  141. databricks/labs/lakebridge/transpiler/execute.py +423 -0
  142. databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  143. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  144. databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  145. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  146. databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  147. databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  148. databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  149. databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  150. databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  151. databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  152. databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  153. databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  154. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  155. databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  156. databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  157. databricks/labs/lakebridge/uninstall.py +28 -0
  158. databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  159. databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  160. databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
  161. databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
  162. databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
  163. databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
  164. databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
  165. databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
  166. docs/lakebridge/src/components/Button.tsx +81 -0
  167. docs/lakebridge/src/css/custom.css +167 -0
  168. docs/lakebridge/src/css/table.css +20 -0
  169. docs/lakebridge/src/pages/index.tsx +57 -0
  170. docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  171. docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,564 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import asyncio
5
+ import logging
6
+ import os
7
+ import sys
8
+ from collections.abc import Callable, Sequence, Mapping
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from typing import Any, Literal
12
+
13
+ import attrs
14
+ import yaml
15
+ from lsprotocol import types as types_module
16
+ from lsprotocol.types import (
17
+ CLIENT_REGISTER_CAPABILITY,
18
+ METHOD_TO_TYPES,
19
+ ClientCapabilities,
20
+ ClientInfo,
21
+ Diagnostic,
22
+ DiagnosticSeverity,
23
+ DidCloseTextDocumentParams,
24
+ DidOpenTextDocumentParams,
25
+ InitializeParams,
26
+ InitializeResult,
27
+ LanguageKind,
28
+ )
29
+ from lsprotocol.types import Position as LSPPosition
30
+ from lsprotocol.types import Range as LSPRange
31
+ from lsprotocol.types import Registration, RegistrationParams, TextDocumentIdentifier, TextDocumentItem, TextEdit
32
+ from pygls.exceptions import FeatureRequestError
33
+ from pygls.lsp.client import BaseLanguageClient
34
+
35
+ from databricks.labs.blueprint.wheels import ProductInfo
36
+ from databricks.labs.lakebridge.config import LSPConfigOptionV1, TranspileConfig, TranspileResult
37
+ from databricks.labs.lakebridge.errors.exceptions import IllegalStateException
38
+ from databricks.labs.lakebridge.helpers.file_utils import is_sql_file, is_dbt_project_file
39
+ from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
40
+ from databricks.labs.lakebridge.transpiler.transpile_status import (
41
+ CodePosition,
42
+ CodeRange,
43
+ ErrorKind,
44
+ ErrorSeverity,
45
+ TranspileError,
46
+ )
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ @dataclass
52
+ class _LSPRemorphConfigV1:
53
+ name: str
54
+ dialects: list[str]
55
+ env_vars: dict[str, str]
56
+ command_line: list[str]
57
+
58
+ @classmethod
59
+ def parse(cls, data: dict[str, Any]) -> _LSPRemorphConfigV1:
60
+ version = data.get("version", 0)
61
+ if version != 1:
62
+ raise ValueError(f"Unsupported transpiler config version: {version}")
63
+ name: str | None = data.get("name", None)
64
+ if not name:
65
+ raise ValueError("Missing 'name' entry")
66
+ dialects = data.get("dialects", [])
67
+ if len(dialects) == 0:
68
+ raise ValueError("Missing 'dialects' entry")
69
+ env_vars = data.get("environment", {})
70
+ command_line = data.get("command_line", [])
71
+ if len(command_line) == 0:
72
+ raise ValueError("Missing 'command_line' entry")
73
+ return _LSPRemorphConfigV1(name, dialects, env_vars, command_line)
74
+
75
+
76
+ @dataclass
77
+ class LSPConfig:
78
+ path: Path
79
+ remorph: _LSPRemorphConfigV1
80
+ options: dict[str, list[LSPConfigOptionV1]]
81
+ custom: dict[str, Any]
82
+
83
+ @property
84
+ def name(self):
85
+ return self.remorph.name
86
+
87
+ def options_for_dialect(self, source_dialect: str) -> list[LSPConfigOptionV1]:
88
+ return self.options.get("all", []) + self.options.get(source_dialect, [])
89
+
90
+ @classmethod
91
+ def load(cls, path: Path) -> LSPConfig:
92
+ yaml_text = path.read_text()
93
+ data = yaml.safe_load(yaml_text)
94
+ if not isinstance(data, dict):
95
+ raise ValueError(f"Invalid transpiler config, expecting a dict, got a {type(data).__name__}")
96
+ remorph_data = data.get("remorph", None)
97
+ if not isinstance(remorph_data, dict):
98
+ raise ValueError(f"Invalid transpiler config, expecting a 'remorph' dict entry, got {remorph_data}")
99
+ remorph = _LSPRemorphConfigV1.parse(remorph_data)
100
+ options_data = data.get("options", {})
101
+ if not isinstance(options_data, dict):
102
+ raise ValueError(f"Invalid transpiler config, expecting an 'options' dict entry, got {options_data}")
103
+ options = LSPConfigOptionV1.parse_all(options_data)
104
+ custom = data.get("custom", {})
105
+ return LSPConfig(path, remorph, options, custom)
106
+
107
+
108
+ def lsp_feature(
109
+ name: str,
110
+ options: Any | None = None,
111
+ ):
112
+ def wrapped(func: Callable):
113
+ _LSP_FEATURES.append((name, options, func))
114
+ return func
115
+
116
+ return wrapped
117
+
118
+
119
+ _LSP_FEATURES: list[tuple[str, Any | None, Callable]] = []
120
+
121
+ # the below code also exists in lsp_server.py
122
+ # it will be factorized as part of https://github.com/databrickslabs/remorph/issues/1304
123
+ TRANSPILE_TO_DATABRICKS_METHOD = "document/transpileToDatabricks"
124
+
125
+
126
+ @attrs.define
127
+ class TranspileDocumentParams:
128
+ uri: str = attrs.field()
129
+ language_id: LanguageKind | str = attrs.field()
130
+
131
+
132
+ @attrs.define
133
+ class TranspileDocumentRequest:
134
+ # 'id' is mandated by LSP
135
+ # pylint: disable=invalid-name
136
+ id: int | str = attrs.field()
137
+ params: TranspileDocumentParams = attrs.field()
138
+ method: Literal["document/transpileToDatabricks"] = "document/transpileToDatabricks"
139
+ jsonrpc: str = attrs.field(default="2.0")
140
+
141
+
142
+ @attrs.define
143
+ class TranspileDocumentResult:
144
+ uri: str = attrs.field()
145
+ language_id: LanguageKind | str = attrs.field()
146
+ changes: Sequence[TextEdit] = attrs.field()
147
+ diagnostics: Sequence[Diagnostic] = attrs.field()
148
+
149
+
150
+ @attrs.define
151
+ class TranspileDocumentResponse:
152
+ # 'id' is mandated by LSP
153
+ # pylint: disable=invalid-name
154
+ id: int | str = attrs.field()
155
+ result: TranspileDocumentResult = attrs.field()
156
+ jsonrpc: str = attrs.field(default="2.0")
157
+
158
+
159
+ def install_special_properties():
160
+ is_special_property = getattr(types_module, "is_special_property")
161
+
162
+ def customized(cls: type, property_name: str) -> bool:
163
+ if cls is TranspileDocumentRequest and property_name in {"method", "jsonrpc"}:
164
+ return True
165
+ return is_special_property(cls, property_name)
166
+
167
+ setattr(types_module, "is_special_property", customized)
168
+
169
+
170
+ install_special_properties()
171
+
172
+ METHOD_TO_TYPES[TRANSPILE_TO_DATABRICKS_METHOD] = (
173
+ TranspileDocumentRequest,
174
+ TranspileDocumentResponse,
175
+ TranspileDocumentParams,
176
+ None,
177
+ )
178
+
179
+
180
+ # subclass BaseLanguageClient so we can override stuff when required
181
+ class _LanguageClient(BaseLanguageClient):
182
+
183
+ def __init__(self, name: str, version: str) -> None:
184
+ super().__init__(name, version)
185
+ self._transpile_to_databricks_capability: Registration | None = None
186
+ self._register_lsp_features()
187
+
188
+ @property
189
+ def is_alive(self):
190
+ return self._server and self._server.returncode is None
191
+
192
+ @property
193
+ def transpile_to_databricks_capability(self):
194
+ return self._transpile_to_databricks_capability
195
+
196
+ @lsp_feature(CLIENT_REGISTER_CAPABILITY)
197
+ def register_capabilities(self, params: RegistrationParams) -> None:
198
+ for registration in params.registrations:
199
+ if registration.method == TRANSPILE_TO_DATABRICKS_METHOD:
200
+ logger.debug(f"Registered capability: {registration.method}")
201
+ self._transpile_to_databricks_capability = registration
202
+ continue
203
+ logger.debug(f"Unknown capability: {registration.method}")
204
+
205
+ async def transpile_document_async(self, params: TranspileDocumentParams) -> TranspileDocumentResult:
206
+ """Transpile a document to Databricks SQL.
207
+
208
+ The caller is responsible for ensuring that the LSP server is capable of handling this request.
209
+
210
+ Args:
211
+ params: The parameters for the transpile request to forward to the LSP server.
212
+ Returns:
213
+ The result of the transpile request, from the LSP server.
214
+ Raises:
215
+ IllegalStateException: If the client has been stopped or the server hasn't (yet) signalled that it is
216
+ capable of transpiling documents to Databricks SQL.
217
+ """
218
+ if self.stopped:
219
+ raise IllegalStateException("Client has been stopped.")
220
+ if not self.transpile_to_databricks_capability:
221
+ raise IllegalStateException("Client has not yet registered its transpile capability.")
222
+ return await self.protocol.send_request_async(TRANSPILE_TO_DATABRICKS_METHOD, params)
223
+
224
+ # can't use @client.feature because it requires a global instance
225
+ def _register_lsp_features(self):
226
+ for name, options, func in _LSP_FEATURES:
227
+ decorator = self.protocol.fm.feature(name, options)
228
+ wrapper = self._wrapper_for_lsp_feature(func)
229
+ decorator(wrapper)
230
+
231
+ def _wrapper_for_lsp_feature(self, func):
232
+ def wrapper(params):
233
+ return func(self, params)
234
+
235
+ return wrapper
236
+
237
+ async def start_io(self, cmd: str, *args, **kwargs):
238
+ await super().start_io(cmd, *args, **kwargs)
239
+ # forward stderr
240
+ task = asyncio.create_task(self.pipe_stderr())
241
+ self._async_tasks.append(task)
242
+
243
+ async def pipe_stderr(self) -> None:
244
+ server = self._server
245
+ assert server is not None
246
+ stderr = server.stderr
247
+ assert stderr is not None
248
+ while not self._stop_event.is_set():
249
+ data: bytes = await stderr.readline()
250
+ if not data:
251
+ return
252
+ # Invalid UTF-8 isn't great, but we can at least log it with the replacement character rather
253
+ # than dropping it silently or triggering an exception.
254
+ message = data.decode("utf-8", errors="replace").strip()
255
+ # Although information may arrive via stderr, it's generally informational in nature and doesn't
256
+ # necessarily represent an error
257
+ # TODO: analyze message and log it accordingly (info/war/error...).
258
+ logger.info(message)
259
+ if not data.endswith(b"\n"):
260
+ break
261
+
262
+
263
+ class ChangeManager(abc.ABC):
264
+
265
+ @classmethod
266
+ def apply(
267
+ cls, source_code: str, changes: Sequence[TextEdit], diagnostics: Sequence[Diagnostic], file_path: Path
268
+ ) -> TranspileResult:
269
+ if not changes and not diagnostics:
270
+ return TranspileResult(source_code, 1, [])
271
+ transpile_errors = [DiagnosticConverter.apply(file_path, diagnostic) for diagnostic in diagnostics]
272
+ try:
273
+ lines = source_code.split("\n")
274
+ for change in changes:
275
+ lines = cls._apply(lines, change)
276
+ transpiled_code = "\n".join(lines)
277
+ return TranspileResult(transpiled_code, 1, transpile_errors)
278
+ except IndexError as e:
279
+ logger.error("Failed to apply changes", exc_info=e)
280
+ error = TranspileError(
281
+ code="INTERNAL_ERROR",
282
+ kind=ErrorKind.INTERNAL,
283
+ severity=ErrorSeverity.ERROR,
284
+ path=file_path,
285
+ message="Internal error, failed to apply changes",
286
+ )
287
+ transpile_errors.append(error)
288
+ return TranspileResult(source_code, 1, transpile_errors)
289
+
290
+ @classmethod
291
+ def _apply(cls, lines: list[str], change: TextEdit) -> list[str]:
292
+ new_lines = change.new_text.split("\n")
293
+ if cls._is_full_document_change(lines, change):
294
+ return new_lines
295
+ # keep lines before
296
+ result: list[str] = [] if change.range.start.line <= 0 else lines[0 : change.range.start.line]
297
+ # special case where change covers full lines
298
+ if change.range.start.character <= 0 and change.range.end.character >= len(lines[change.range.end.line]):
299
+ pass
300
+ # special case where change is within 1 line
301
+ elif change.range.start.line == change.range.end.line:
302
+ old_line = lines[change.range.start.line]
303
+ if change.range.start.character > 0:
304
+ new_lines[0] = old_line[0 : change.range.start.character] + new_lines[0]
305
+ if change.range.end.character < len(old_line):
306
+ new_lines[-1] += old_line[change.range.end.character :]
307
+ else:
308
+ if change.range.start.character > 0:
309
+ old_line = lines[change.range.start.line]
310
+ new_lines[0] = old_line[0 : change.range.start.character] + new_lines[0]
311
+ if change.range.end.character < len(lines[change.range.end.line]):
312
+ old_line = lines[change.range.end.line]
313
+ new_lines[-1] += old_line[change.range.end.character :]
314
+ result.extend(new_lines)
315
+ # keep lines after
316
+ if change.range.end.line < len(lines) - 1:
317
+ result.extend(lines[change.range.end.line + 1 :])
318
+ return result
319
+
320
+ @classmethod
321
+ def _is_full_document_change(cls, lines: list[str], change: TextEdit) -> bool:
322
+ # A range's end is exclusive. Therefore full document range goes from (0, 0) to (l, 0) where l is the number
323
+ # of lines in the document.
324
+ return (
325
+ change.range.start.line == 0
326
+ and change.range.start.character == 0
327
+ and change.range.end.line >= len(lines)
328
+ and change.range.end.character >= 0
329
+ )
330
+
331
+
332
+ class DiagnosticConverter(abc.ABC):
333
+
334
+ _KIND_NAMES = {e.name for e in ErrorKind}
335
+
336
+ @classmethod
337
+ def apply(cls, file_path: Path, diagnostic: Diagnostic) -> TranspileError:
338
+ code = str(diagnostic.code)
339
+ kind = ErrorKind.INTERNAL
340
+ parts = code.split("-")
341
+ if len(parts) >= 2 and parts[0] in cls._KIND_NAMES:
342
+ kind = ErrorKind[parts[0]]
343
+ parts.pop(0)
344
+ code = "-".join(parts)
345
+ severity = cls._convert_severity(diagnostic.severity)
346
+ lsp_range = cls._convert_range(diagnostic.range)
347
+ return TranspileError(
348
+ code=code, kind=kind, severity=severity, path=file_path, message=diagnostic.message, range=lsp_range
349
+ )
350
+
351
+ @classmethod
352
+ def _convert_range(cls, lsp_range: LSPRange | None) -> CodeRange | None:
353
+ if not lsp_range:
354
+ return None
355
+ return CodeRange(cls._convert_position(lsp_range.start), cls._convert_position(lsp_range.end))
356
+
357
+ @classmethod
358
+ def _convert_position(cls, lsp_position: LSPPosition) -> CodePosition:
359
+ return CodePosition(lsp_position.line, lsp_position.character)
360
+
361
+ @classmethod
362
+ def _convert_severity(cls, severity: DiagnosticSeverity | None) -> ErrorSeverity:
363
+ if severity == DiagnosticSeverity.Information:
364
+ return ErrorSeverity.INFO
365
+ if severity == DiagnosticSeverity.Warning:
366
+ return ErrorSeverity.WARNING
367
+ if severity == DiagnosticSeverity.Error:
368
+ return ErrorSeverity.ERROR
369
+ return ErrorSeverity.INFO
370
+
371
+
372
+ class LSPEngine(TranspileEngine):
373
+
374
+ @classmethod
375
+ def from_config_path(cls, config_path: Path) -> LSPEngine:
376
+ config = LSPConfig.load(config_path)
377
+ return LSPEngine(config_path.parent, config)
378
+
379
+ @classmethod
380
+ def client_metadata(cls) -> tuple[str, str]:
381
+ """Obtain the name and version for this LSP client, respectively in a tuple."""
382
+ product_info = ProductInfo.from_class(cls)
383
+ return product_info.product_name(), product_info.version()
384
+
385
+ def __init__(self, workdir: Path, config: LSPConfig) -> None:
386
+ self._workdir = workdir
387
+ self._config = config
388
+ name, version = self.client_metadata()
389
+ self._client = _LanguageClient(name, version)
390
+ self._init_response: InitializeResult | None = None
391
+
392
+ @property
393
+ def supported_dialects(self) -> list[str]:
394
+ return self._config.remorph.dialects
395
+
396
+ @property
397
+ def server_has_transpile_capability(self) -> bool:
398
+ return self._client.transpile_to_databricks_capability is not None
399
+
400
+ async def initialize(self, config: TranspileConfig) -> None:
401
+ if self.is_alive:
402
+ raise IllegalStateException("LSP engine is already initialized")
403
+ cwd = os.getcwd()
404
+ try:
405
+ os.chdir(self._workdir)
406
+ await self._do_initialize(config)
407
+ await self._await_for_transpile_capability()
408
+ # it is good practice to catch broad exceptions raised by launching a child process
409
+ except Exception as e: # pylint: disable=broad-exception-caught
410
+ logger.error("LSP initialization failed", exc_info=e)
411
+ os.chdir(cwd)
412
+
413
+ async def _do_initialize(self, config: TranspileConfig) -> None:
414
+ await self._start_server()
415
+ input_path = config.input_path
416
+ root_path = input_path if input_path.is_dir() else input_path.parent
417
+ params = InitializeParams(
418
+ capabilities=self._client_capabilities(),
419
+ client_info=ClientInfo(name=self._client.name, version=self._client.version),
420
+ process_id=os.getpid(),
421
+ root_uri=str(root_path.absolute().as_uri()),
422
+ workspace_folders=None, # for now, we only support a single workspace = root_uri
423
+ initialization_options=self._initialization_options(config),
424
+ )
425
+ logger.debug(f"LSP init params: {params}")
426
+ self._init_response = await self._client.initialize_async(params)
427
+
428
+ async def _start_server(self):
429
+ executable = self._config.remorph.command_line[0]
430
+ if executable in {"python", "python3"}:
431
+ await self._start_python_server()
432
+ else:
433
+ await self._start_other_server()
434
+
435
+ async def _start_python_server(self):
436
+ has_venv = (self._workdir / ".venv").exists()
437
+ if has_venv:
438
+ await self._start_python_server_with_venv()
439
+ else:
440
+ await self._start_python_server_without_venv()
441
+
442
+ async def _start_python_server_with_venv(self):
443
+ env: dict[str, str] = os.environ | self._config.remorph.env_vars
444
+ # ensure modules are searched within venv
445
+ if "PYTHONPATH" in env.keys():
446
+ del env["PYTHONPATH"]
447
+ if "VIRTUAL_ENV" in env.keys():
448
+ del env["VIRTUAL_ENV"]
449
+ if "VIRTUAL_ENV_PROMPT" in env.keys():
450
+ del env["VIRTUAL_ENV_PROMPT"]
451
+ path = self._workdir / ".venv" / "Scripts" if sys.platform == "win32" else self._workdir / ".venv" / "bin"
452
+ if "PATH" in env.keys():
453
+ env["PATH"] = str(path) + os.pathsep + env["PATH"]
454
+ else:
455
+ env["PATH"] = str(path)
456
+ python = "python.exe" if sys.platform == "win32" else "python3"
457
+ executable = path / python
458
+ await self._launch_executable(executable, env)
459
+
460
+ async def _start_python_server_without_venv(self):
461
+ env: dict[str, str] = os.environ | self._config.remorph.env_vars
462
+ # ensure modules are searched locally before being searched in remorph
463
+ if "PYTHONPATH" in env.keys():
464
+ env["PYTHONPATH"] = str(self._workdir) + os.pathsep + env["PYTHONPATH"]
465
+ else:
466
+ env["PYTHONPATH"] = str(self._workdir)
467
+ executable = Path(self._config.remorph.command_line[0])
468
+ await self._launch_executable(executable, env)
469
+
470
+ async def _start_other_server(self):
471
+ env: dict[str, str] = os.environ | self._config.remorph.env_vars
472
+ # ensure modules are searched within venv
473
+ if "PYTHONPATH" in env.keys():
474
+ del env["PYTHONPATH"]
475
+ if "VIRTUAL_ENV" in env.keys():
476
+ del env["VIRTUAL_ENV"]
477
+ if "VIRTUAL_ENV_PROMPT" in env.keys():
478
+ del env["VIRTUAL_ENV_PROMPT"]
479
+ executable = Path(self._config.remorph.command_line[0])
480
+ await self._launch_executable(executable, env)
481
+
482
+ async def _launch_executable(self, executable: Path, env: Mapping):
483
+ log_level = logging.getLevelName(logging.getLogger("databricks").level)
484
+ args = self._config.remorph.command_line[1:] + [f"--log_level={log_level}"]
485
+ logger.debug(f"Starting LSP engine: {executable} {args} (cwd={os.getcwd()})")
486
+ await self._client.start_io(str(executable), env=env, *args)
487
+
488
+ def _client_capabilities(self):
489
+ return ClientCapabilities() # TODO do we need to refine this ?
490
+
491
+ def _initialization_options(self, config: TranspileConfig):
492
+ return {
493
+ "remorph": {
494
+ "source-dialect": config.source_dialect,
495
+ },
496
+ "options": config.transpiler_options,
497
+ "custom": self._config.custom,
498
+ }
499
+
500
+ async def _await_for_transpile_capability(self):
501
+ for _ in range(1, 100):
502
+ if self._client.transpile_to_databricks_capability:
503
+ return
504
+ await asyncio.sleep(0.1)
505
+ if not self._client.transpile_to_databricks_capability:
506
+ msg = f"LSP server did not register its {TRANSPILE_TO_DATABRICKS_METHOD} capability"
507
+ raise FeatureRequestError(msg)
508
+
509
+ async def shutdown(self):
510
+ await self._client.shutdown_async(None)
511
+ self._client.exit(None)
512
+ await self._client.stop()
513
+
514
+ @property
515
+ def is_alive(self):
516
+ return self._client.is_alive
517
+
518
+ async def transpile(
519
+ self, source_dialect: str, target_dialect: str, source_code: str, file_path: Path
520
+ ) -> TranspileResult:
521
+ self.open_document(file_path, source_code=source_code)
522
+ response = await self.transpile_document(file_path)
523
+ self.close_document(file_path)
524
+ return ChangeManager.apply(source_code, response.changes, response.diagnostics, file_path)
525
+
526
+ def open_document(self, file_path: Path, encoding="utf-8", source_code: str | None = None) -> None:
527
+ if source_code is None:
528
+ source_code = file_path.read_text(encoding)
529
+ text_document = TextDocumentItem(
530
+ uri=file_path.as_uri(), language_id=LanguageKind.Sql, version=1, text=source_code
531
+ )
532
+ params = DidOpenTextDocumentParams(text_document)
533
+ self._client.text_document_did_open(params)
534
+
535
+ def close_document(self, file_path: Path) -> None:
536
+ text_document = TextDocumentIdentifier(uri=file_path.as_uri())
537
+ params = DidCloseTextDocumentParams(text_document)
538
+ self._client.text_document_did_close(params)
539
+
540
+ async def transpile_document(self, file_path: Path) -> TranspileDocumentResult:
541
+ params = TranspileDocumentParams(uri=file_path.as_uri(), language_id=LanguageKind.Sql)
542
+ result = await self._client.transpile_document_async(params)
543
+ return result
544
+
545
+ # TODO infer the below from config file
546
+ def is_supported_file(self, file: Path) -> bool:
547
+ if self._is_bladebridge() or self._is_test_transpiler():
548
+ return True
549
+ if self._is_morpheus():
550
+ return is_sql_file(file) or is_dbt_project_file(file)
551
+ # then only support sql
552
+ return is_sql_file(file)
553
+
554
+ # TODO remove this
555
+ def _is_test_transpiler(self):
556
+ return self._config.remorph.name == "test-transpiler"
557
+
558
+ # TODO remove this
559
+ def _is_bladebridge(self):
560
+ return self._config.remorph.name == "Bladebridge"
561
+
562
+ # TODO remove this
563
+ def _is_morpheus(self):
564
+ return self._config.remorph.name == "Morpheus"
@@ -0,0 +1,30 @@
1
+ from sqlglot import Dialects, Dialect
2
+
3
+ from databricks.labs.lakebridge.transpiler.sqlglot.parsers import oracle, presto, snowflake
4
+ from databricks.labs.lakebridge.transpiler.sqlglot.generator.databricks import Databricks
5
+
6
+ SQLGLOT_DIALECTS: dict[str, type[Dialect] | str] = {
7
+ "athena": Dialects.ATHENA,
8
+ "bigquery": Dialects.BIGQUERY,
9
+ "databricks": Databricks,
10
+ "mysql": Dialects.MYSQL,
11
+ "netezza": Dialects.POSTGRES,
12
+ "oracle": oracle.Oracle,
13
+ "postgresql": Dialects.POSTGRES,
14
+ "presto": presto.Presto,
15
+ "redshift": Dialects.REDSHIFT,
16
+ "snowflake": snowflake.Snowflake,
17
+ "sqlite": Dialects.SQLITE,
18
+ "teradata": Dialects.TERADATA,
19
+ "trino": Dialects.TRINO,
20
+ "tsql": Dialects.TSQL,
21
+ "vertica": Dialects.POSTGRES,
22
+ }
23
+
24
+
25
+ def get_dialect(dialect: str) -> Dialect:
26
+ return Dialect.get_or_raise(SQLGLOT_DIALECTS.get(dialect))
27
+
28
+
29
+ def get_key_from_dialect(input_dialect: Dialect) -> str:
30
+ return [source_key for source_key, dialect in SQLGLOT_DIALECTS.items() if dialect == input_dialect][0]