databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks/__init__.py +3 -0
  2. databricks/labs/__init__.py +3 -0
  3. databricks/labs/lakebridge/__about__.py +2 -0
  4. databricks/labs/lakebridge/__init__.py +11 -0
  5. databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  6. databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  7. databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  8. databricks/labs/lakebridge/base_install.py +12 -0
  9. databricks/labs/lakebridge/cli.py +449 -0
  10. databricks/labs/lakebridge/config.py +192 -0
  11. databricks/labs/lakebridge/connections/__init__.py +0 -0
  12. databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  13. databricks/labs/lakebridge/connections/database_manager.py +98 -0
  14. databricks/labs/lakebridge/connections/env_getter.py +13 -0
  15. databricks/labs/lakebridge/contexts/__init__.py +0 -0
  16. databricks/labs/lakebridge/contexts/application.py +133 -0
  17. databricks/labs/lakebridge/coverage/__init__.py +0 -0
  18. databricks/labs/lakebridge/coverage/commons.py +223 -0
  19. databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  20. databricks/labs/lakebridge/coverage/local_report.py +9 -0
  21. databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  22. databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  23. databricks/labs/lakebridge/deployment/__init__.py +0 -0
  24. databricks/labs/lakebridge/deployment/configurator.py +199 -0
  25. databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  26. databricks/labs/lakebridge/deployment/installation.py +125 -0
  27. databricks/labs/lakebridge/deployment/job.py +147 -0
  28. databricks/labs/lakebridge/deployment/recon.py +145 -0
  29. databricks/labs/lakebridge/deployment/table.py +30 -0
  30. databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  31. databricks/labs/lakebridge/discovery/table.py +36 -0
  32. databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  33. databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  34. databricks/labs/lakebridge/errors/exceptions.py +1 -0
  35. databricks/labs/lakebridge/helpers/__init__.py +0 -0
  36. databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  37. databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  38. databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  39. databricks/labs/lakebridge/helpers/metastore.py +164 -0
  40. databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  41. databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  42. databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  43. databricks/labs/lakebridge/helpers/validation.py +101 -0
  44. databricks/labs/lakebridge/install.py +849 -0
  45. databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  46. databricks/labs/lakebridge/intermediate/dag.py +88 -0
  47. databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  48. databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  49. databricks/labs/lakebridge/jvmproxy.py +56 -0
  50. databricks/labs/lakebridge/lineage.py +42 -0
  51. databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  52. databricks/labs/lakebridge/reconcile/compare.py +414 -0
  53. databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  54. databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  55. databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  56. databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  57. databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  58. databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  59. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  60. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  61. databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  62. databricks/labs/lakebridge/reconcile/constants.py +37 -0
  63. databricks/labs/lakebridge/reconcile/exception.py +42 -0
  64. databricks/labs/lakebridge/reconcile/execute.py +920 -0
  65. databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  66. databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  67. databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  68. databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  69. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  70. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  71. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  72. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  73. databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  74. databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  75. databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  76. databricks/labs/lakebridge/reconcile/runner.py +97 -0
  77. databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  78. databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  79. databricks/labs/lakebridge/resources/__init__.py +0 -0
  80. databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  81. databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  82. databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  83. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  84. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  85. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  86. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  87. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  88. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  89. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  90. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  91. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  92. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  93. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  94. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  95. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  96. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  97. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  98. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  99. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  100. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  101. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  102. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  103. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  104. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  105. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  106. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  107. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  108. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  109. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  110. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  111. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  112. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  113. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  114. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  115. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  116. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  117. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  118. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  119. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  120. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  121. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  122. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  123. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  124. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  125. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  126. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  127. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  128. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  129. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  130. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  131. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  132. databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  133. databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  134. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  135. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  136. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  137. databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  138. databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  139. databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  140. databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  141. databricks/labs/lakebridge/transpiler/execute.py +423 -0
  142. databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  143. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  144. databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  145. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  146. databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  147. databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  148. databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  149. databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  150. databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  151. databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  152. databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  153. databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  154. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  155. databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  156. databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  157. databricks/labs/lakebridge/uninstall.py +28 -0
  158. databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  159. databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  160. databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
  161. databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
  162. databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
  163. databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
  164. databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
  165. databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
  166. docs/lakebridge/src/components/Button.tsx +81 -0
  167. docs/lakebridge/src/css/custom.css +167 -0
  168. docs/lakebridge/src/css/table.css +20 -0
  169. docs/lakebridge/src/pages/index.tsx +57 -0
  170. docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  171. docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,449 @@
1
+ import asyncio
2
+ import dataclasses
3
+ import json
4
+ import os
5
+ import time
6
+ from pathlib import Path
7
+ from typing import NoReturn, cast
8
+
9
+ from databricks.sdk.core import with_user_agent_extra
10
+ from databricks.sdk.service.sql import CreateWarehouseRequestWarehouseType
11
+ from databricks.sdk import WorkspaceClient
12
+
13
+ from databricks.labs.blueprint.cli import App
14
+ from databricks.labs.blueprint.entrypoint import get_logger
15
+ from databricks.labs.blueprint.installation import JsonValue
16
+ from databricks.labs.blueprint.tui import Prompts
17
+
18
+ from databricks.labs.bladespector.analyzer import Analyzer
19
+
20
+
21
+ from databricks.labs.lakebridge.assessments.configure_assessment import (
22
+ create_assessment_configurator,
23
+ PROFILER_SOURCE_SYSTEM,
24
+ )
25
+
26
+ from databricks.labs.lakebridge.__about__ import __version__
27
+ from databricks.labs.lakebridge.config import TranspileConfig, LSPConfigOptionV1
28
+ from databricks.labs.lakebridge.contexts.application import ApplicationContext
29
+ from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts
30
+ from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
31
+ from databricks.labs.lakebridge.install import WorkspaceInstaller
32
+ from databricks.labs.lakebridge.install import TranspilerInstaller
33
+ from databricks.labs.lakebridge.reconcile.runner import ReconcileRunner
34
+ from databricks.labs.lakebridge.lineage import lineage_generator
35
+ from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME
36
+ from databricks.labs.lakebridge.transpiler.execute import transpile as do_transpile
37
+
38
+
39
+ from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig
40
+ from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
41
+ from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine
42
+
43
+
44
+ lakebridge = App(__file__)
45
+ logger = get_logger(__file__)
46
+
47
+
48
+ def raise_validation_exception(msg: str) -> NoReturn:
49
+ raise ValueError(msg)
50
+
51
+
52
+ def _installer(ws: WorkspaceClient) -> WorkspaceInstaller:
53
+ app_context = ApplicationContext(_verify_workspace_client(ws))
54
+ return WorkspaceInstaller(
55
+ app_context.workspace_client,
56
+ app_context.prompts,
57
+ app_context.installation,
58
+ app_context.install_state,
59
+ app_context.product_info,
60
+ app_context.resource_configurator,
61
+ app_context.workspace_installation,
62
+ )
63
+
64
+
65
+ def _create_warehouse(ws: WorkspaceClient) -> str:
66
+
67
+ dbsql = ws.warehouses.create_and_wait(
68
+ name=f"lakebridge-warehouse-{time.time_ns()}",
69
+ warehouse_type=CreateWarehouseRequestWarehouseType.PRO,
70
+ cluster_size="Small", # Adjust size as needed
71
+ auto_stop_mins=30, # Auto-stop after 30 minutes of inactivity
72
+ enable_serverless_compute=True,
73
+ max_num_clusters=1,
74
+ )
75
+
76
+ if dbsql.id is None:
77
+ raise RuntimeError(f"Failed to create warehouse {dbsql.name}")
78
+
79
+ logger.info(f"Created warehouse with id: {dbsql.id}")
80
+ return dbsql.id
81
+
82
+
83
+ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str):
84
+ ws.warehouses.delete(warehouse_id)
85
+ logger.info(f"Removed warehouse post installation with id: {warehouse_id}")
86
+
87
+
88
+ def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
89
+ """
90
+ [Private] Verifies and updates the workspace client configuration.
91
+ """
92
+
93
+ # Using reflection to set right value for _product_info for telemetry
94
+ product_info = getattr(ws.config, '_product_info')
95
+ if product_info[0] != "lakebridge":
96
+ setattr(ws.config, '_product_info', ('lakebridge', __version__))
97
+
98
+ return ws
99
+
100
+
101
+ @lakebridge.command
102
+ def transpile(
103
+ w: WorkspaceClient,
104
+ transpiler_config_path: str | None = None,
105
+ source_dialect: str | None = None,
106
+ input_source: str | None = None,
107
+ output_folder: str | None = None,
108
+ error_file_path: str | None = None,
109
+ skip_validation: str | None = None,
110
+ catalog_name: str | None = None,
111
+ schema_name: str | None = None,
112
+ ):
113
+ """Transpiles source dialect to databricks dialect"""
114
+ ctx = ApplicationContext(w)
115
+ logger.debug(f"Application transpiler config: {ctx.transpile_config}")
116
+ checker = _TranspileConfigChecker(ctx.transpile_config, ctx.prompts)
117
+ checker.check_input_source(input_source)
118
+ checker.check_source_dialect(source_dialect)
119
+ checker.check_transpiler_config_path(transpiler_config_path)
120
+ checker.check_transpiler_config_options()
121
+ checker.check_output_folder(output_folder)
122
+ checker.check_error_file_path(error_file_path)
123
+ checker.check_skip_validation(skip_validation)
124
+ checker.check_catalog_name(catalog_name)
125
+ checker.check_schema_name(schema_name)
126
+ config, engine = checker.check()
127
+ result = asyncio.run(_transpile(ctx, config, engine))
128
+ # DO NOT Modify this print statement, it is used by the CLI to display results in GO Table Template
129
+ print(json.dumps(result))
130
+
131
+
132
+ class _TranspileConfigChecker:
133
+
134
+ def __init__(self, config: TranspileConfig | None, prompts: Prompts):
135
+ if not config:
136
+ raise SystemExit("Installed transpile config not found. Please install lakebridge transpile first.")
137
+ self._config: TranspileConfig = config
138
+ self._prompts = prompts
139
+
140
+ def check_input_source(self, input_source: str | None):
141
+ if input_source == "None":
142
+ input_source = None
143
+ if not input_source:
144
+ input_source = self._config.input_source
145
+ if not input_source:
146
+ input_source = self._prompts.question("Enter input SQL path (directory/file)")
147
+ input_source = input_source.strip()
148
+ if not input_source:
149
+ raise_validation_exception("Missing '--input-source'")
150
+ if not os.path.exists(input_source):
151
+ raise_validation_exception(f"Invalid value for '--input-source': Path '{input_source}' does not exist.")
152
+ logger.debug(f"Setting input_source to '{input_source}'")
153
+ self._config = dataclasses.replace(self._config, input_source=input_source)
154
+
155
+ def check_source_dialect(self, source_dialect: str | None):
156
+ if source_dialect == "None":
157
+ source_dialect = None
158
+ if not source_dialect:
159
+ source_dialect = self._config.source_dialect
160
+ all_dialects = sorted(TranspilerInstaller.all_dialects())
161
+ if source_dialect and source_dialect not in all_dialects:
162
+ logger.error(f"'{source_dialect}' is not a supported dialect. Selecting a supported one...")
163
+ source_dialect = None
164
+ if not source_dialect:
165
+ source_dialect = self._prompts.choice("Select the source dialect:", all_dialects)
166
+ if not source_dialect:
167
+ raise_validation_exception("Missing '--source-dialect'")
168
+ logger.debug(f"Setting source_dialect to '{source_dialect}'")
169
+ self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
170
+
171
+ def check_transpiler_config_path(self, transpiler_config_path: str | None):
172
+ if transpiler_config_path == "None":
173
+ transpiler_config_path = None
174
+ if not transpiler_config_path:
175
+ transpiler_config_path = self._config.transpiler_config_path
176
+ # we allow pointing to a loose transpiler config (i.e. not installed under .databricks)
177
+ if transpiler_config_path:
178
+ if not os.path.exists(transpiler_config_path):
179
+ logger.error(f"The transpiler configuration does not exist '{transpiler_config_path}'.")
180
+ transpiler_config_path = None
181
+ if transpiler_config_path:
182
+ config = LSPConfig.load(Path(transpiler_config_path))
183
+ if self._config.source_dialect not in config.remorph.dialects:
184
+ logger.error(f"The configured transpiler does not support dialect '{self._config.source_dialect}'.")
185
+ transpiler_config_path = None
186
+ if not transpiler_config_path:
187
+ transpiler_names = TranspilerInstaller.transpilers_with_dialect(cast(str, self._config.source_dialect))
188
+ if len(transpiler_names) > 1:
189
+ transpiler_name = self._prompts.choice("Select the transpiler:", list(transpiler_names))
190
+ else:
191
+ transpiler_name = next(name for name in transpiler_names)
192
+ logger.info(f"lakebridge will use the {transpiler_name} transpiler")
193
+ transpiler_config_path = str(TranspilerInstaller.transpiler_config_path(transpiler_name))
194
+ logger.debug(f"Setting transpiler_config_path to '{transpiler_config_path}'")
195
+ self._config = dataclasses.replace(self._config, transpiler_config_path=cast(str, transpiler_config_path))
196
+
197
+ def check_transpiler_config_options(self):
198
+ lsp_config = LSPConfig.load(Path(self._config.transpiler_config_path))
199
+ options_to_configure = lsp_config.options_for_dialect(self._config.source_dialect) or []
200
+ transpiler_options = self._config.transpiler_options or {}
201
+ if len(options_to_configure) == 0:
202
+ transpiler_options = None
203
+ else:
204
+ # TODO delete stale options ?
205
+ for option in options_to_configure:
206
+ self._check_transpiler_config_option(option, transpiler_options)
207
+ logger.debug(f"Setting transpiler_options to {transpiler_options}")
208
+ self._config = dataclasses.replace(self._config, transpiler_options=transpiler_options)
209
+
210
+ def _check_transpiler_config_option(self, option: LSPConfigOptionV1, values: dict[str, JsonValue]):
211
+ if option.flag in values.keys():
212
+ return
213
+ values[option.flag] = option.prompt_for_value(self._prompts)
214
+
215
+ def check_output_folder(self, output_folder: str | None):
216
+ output_folder = output_folder if output_folder else self._config.output_folder
217
+ if not output_folder:
218
+ raise_validation_exception("Missing '--output-folder'")
219
+ if not os.path.exists(output_folder):
220
+ os.makedirs(output_folder, exist_ok=True)
221
+ logger.debug(f"Setting output_folder to '{output_folder}'")
222
+ self._config = dataclasses.replace(self._config, output_folder=output_folder)
223
+
224
+ def check_error_file_path(self, error_file_path: str | None):
225
+ error_file_path = error_file_path if error_file_path else self._config.error_file_path
226
+ if not error_file_path or error_file_path == "None":
227
+ raise_validation_exception("Missing '--error-file-path'")
228
+ if error_file_path == "errors.log":
229
+ error_file_path = str(Path.cwd() / "errors.log")
230
+ if not os.path.exists(Path(error_file_path).parent):
231
+ os.makedirs(Path(error_file_path).parent, exist_ok=True)
232
+
233
+ logger.debug(f"Setting error_file_path to '{error_file_path}'")
234
+ self._config = dataclasses.replace(self._config, error_file_path=error_file_path)
235
+
236
+ def check_skip_validation(self, skip_validation_str: str | None):
237
+ skip_validation: bool | None = None
238
+ if skip_validation_str == "None":
239
+ skip_validation_str = None
240
+ if skip_validation_str is not None:
241
+ if skip_validation_str.lower() not in {"true", "false"}:
242
+ raise_validation_exception(
243
+ f"Invalid value for '--skip-validation': '{skip_validation_str}' is not one of 'true', 'false'."
244
+ )
245
+ skip_validation = skip_validation_str.lower() == "true"
246
+ if skip_validation is None:
247
+ skip_validation = self._config.skip_validation
248
+ if skip_validation is None:
249
+ skip_validation = self._prompts.confirm(
250
+ "Would you like to validate the syntax and semantics of the transpiled queries?"
251
+ )
252
+ logger.debug(f"Setting skip_validation to '{skip_validation}'")
253
+ self._config = dataclasses.replace(self._config, skip_validation=skip_validation)
254
+
255
+ def check_catalog_name(self, catalog_name: str | None):
256
+ if self._config.skip_validation:
257
+ return
258
+ if catalog_name == "None":
259
+ catalog_name = None
260
+ if not catalog_name:
261
+ catalog_name = self._config.catalog_name
262
+ if not catalog_name:
263
+ raise_validation_exception(
264
+ "Missing '--catalog-name', please run 'databricks labs lakebridge install-transpile' to configure one"
265
+ )
266
+ logger.debug(f"Setting catalog_name to '{catalog_name}'")
267
+ self._config = dataclasses.replace(self._config, catalog_name=catalog_name)
268
+
269
+ def check_schema_name(self, schema_name: str | None):
270
+ if self._config.skip_validation:
271
+ return
272
+ if schema_name == "None":
273
+ schema_name = None
274
+ if not schema_name:
275
+ schema_name = self._config.schema_name
276
+ if not schema_name:
277
+ raise_validation_exception(
278
+ "Missing '--schema-name', please run 'databricks labs lakebridge install-transpile' to configure one"
279
+ )
280
+ logger.debug(f"Setting schema_name to '{schema_name}'")
281
+ self._config = dataclasses.replace(self._config, schema_name=schema_name)
282
+
283
+ def check(self) -> tuple[TranspileConfig, TranspileEngine]:
284
+ logger.debug(f"Checking config: {self!s}")
285
+ # not using os.path.exists because it sometimes fails mysteriously...
286
+ transpiler_path = self._config.transpiler_path
287
+ if not transpiler_path or not transpiler_path.exists():
288
+ raise_validation_exception(
289
+ f"Invalid value for '--transpiler-config-path': Path '{self._config.transpiler_config_path}' does not exist."
290
+ )
291
+ engine = TranspileEngine.load_engine(transpiler_path)
292
+ engine.check_source_dialect(self._config.source_dialect)
293
+ if not self._config.input_source or not os.path.exists(self._config.input_source):
294
+ raise_validation_exception(
295
+ f"Invalid value for '--input-source': Path '{self._config.input_source}' does not exist."
296
+ )
297
+ # 'transpiled' will be used as output_folder if not specified
298
+ # 'errors.log' will be used as errors file if not specified
299
+ return self._config, engine
300
+
301
+
302
+ async def _transpile(ctx: ApplicationContext, config: TranspileConfig, engine: TranspileEngine):
303
+ """Transpiles source dialect to databricks dialect"""
304
+ with_user_agent_extra("cmd", "execute-transpile")
305
+ user = ctx.current_user
306
+ logger.debug(f"User: {user}")
307
+ _override_workspace_client_config(ctx, config.sdk_config)
308
+ status, errors = await do_transpile(ctx.workspace_client, engine, config)
309
+ for error in errors:
310
+ logger.error(f"Error Transpiling: {str(error)}")
311
+
312
+ # Table Template in labs.yml requires the status to be list of dicts Do not change this
313
+ logger.info(f"lakebridge Transpiler encountered {len(status)} from given {config.input_source} files.")
314
+ return [status]
315
+
316
+
317
+ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[str, str] | None):
318
+ """
319
+ Override the Workspace client's SDK config with the user provided SDK config.
320
+ Users can provide the cluster_id and warehouse_id during the installation.
321
+ This will update the default config object in-place.
322
+ """
323
+ if not overrides:
324
+ return
325
+
326
+ warehouse_id = overrides.get("warehouse_id")
327
+ if warehouse_id:
328
+ ctx.connect_config.warehouse_id = warehouse_id
329
+
330
+ cluster_id = overrides.get("cluster_id")
331
+ if cluster_id:
332
+ ctx.connect_config.cluster_id = cluster_id
333
+
334
+
335
+ @lakebridge.command
336
+ def reconcile(w: WorkspaceClient):
337
+ """[EXPERIMENTAL] Reconciles source to Databricks datasets"""
338
+ with_user_agent_extra("cmd", "execute-reconcile")
339
+ ctx = ApplicationContext(w)
340
+ user = ctx.current_user
341
+ logger.debug(f"User: {user}")
342
+ recon_runner = ReconcileRunner(
343
+ ctx.workspace_client,
344
+ ctx.installation,
345
+ ctx.install_state,
346
+ ctx.prompts,
347
+ )
348
+ recon_runner.run(operation_name=RECONCILE_OPERATION_NAME)
349
+
350
+
351
+ @lakebridge.command
352
+ def aggregates_reconcile(w: WorkspaceClient):
353
+ """[EXPERIMENTAL] Reconciles Aggregated source to Databricks datasets"""
354
+ with_user_agent_extra("cmd", "execute-aggregates-reconcile")
355
+ ctx = ApplicationContext(w)
356
+ user = ctx.current_user
357
+ logger.debug(f"User: {user}")
358
+ recon_runner = ReconcileRunner(
359
+ ctx.workspace_client,
360
+ ctx.installation,
361
+ ctx.install_state,
362
+ ctx.prompts,
363
+ )
364
+
365
+ recon_runner.run(operation_name=AGG_RECONCILE_OPERATION_NAME)
366
+
367
+
368
+ @lakebridge.command
369
+ def generate_lineage(w: WorkspaceClient, source_dialect: str, input_source: str, output_folder: str):
370
+ """[Experimental] Generates a lineage of source SQL files or folder"""
371
+ ctx = ApplicationContext(w)
372
+ logger.debug(f"User: {ctx.current_user}")
373
+ engine = SqlglotEngine()
374
+ engine.check_source_dialect(source_dialect)
375
+ if not input_source or not os.path.exists(input_source):
376
+ raise_validation_exception(f"Invalid value for '--input-source': Path '{input_source}' does not exist.")
377
+ if not os.path.exists(output_folder) or output_folder in {None, ""}:
378
+ raise_validation_exception(f"Invalid value for '--output-folder': Path '{output_folder}' does not exist.")
379
+
380
+ lineage_generator(engine, source_dialect, input_source, output_folder)
381
+
382
+
383
+ @lakebridge.command
384
+ def configure_secrets(w: WorkspaceClient):
385
+ """Setup reconciliation connection profile details as Secrets on Databricks Workspace"""
386
+ recon_conf = ReconConfigPrompts(w)
387
+
388
+ # Prompt for source
389
+ source = recon_conf.prompt_source()
390
+
391
+ logger.info(f"Setting up Scope, Secrets for `{source}` reconciliation")
392
+ recon_conf.prompt_and_save_connection_details()
393
+
394
+
395
+ @lakebridge.command(is_unauthenticated=True)
396
+ def configure_database_profiler():
397
+ """[Experimental] Install the lakebridge Assessment package"""
398
+ prompts = Prompts()
399
+
400
+ # Prompt for source system
401
+ source_system = str(
402
+ prompts.choice("Please select the source system you want to configure", PROFILER_SOURCE_SYSTEM)
403
+ ).lower()
404
+
405
+ # Create appropriate assessment configurator
406
+ assessment = create_assessment_configurator(source_system=source_system, product_name="lakebridge", prompts=prompts)
407
+ assessment.run()
408
+
409
+
410
+ @lakebridge.command()
411
+ def install_transpile(w: WorkspaceClient, artifact: str | None = None):
412
+ """Install the lakebridge Transpilers"""
413
+ with_user_agent_extra("cmd", "install-transpile")
414
+ user = w.current_user
415
+ logger.debug(f"User: {user}")
416
+ installer = _installer(w)
417
+ installer.run(module="transpile", artifact=artifact)
418
+
419
+
420
+ @lakebridge.command(is_unauthenticated=False)
421
+ def configure_reconcile(w: WorkspaceClient):
422
+ """Configure the lakebridge Reconcile Package"""
423
+ with_user_agent_extra("cmd", "configure-reconcile")
424
+ user = w.current_user
425
+ logger.debug(f"User: {user}")
426
+ dbsql_id = _create_warehouse(w)
427
+ w.config.warehouse_id = dbsql_id
428
+ installer = _installer(w)
429
+ installer.run(module="reconcile")
430
+ _remove_warehouse(w, dbsql_id)
431
+
432
+
433
+ @lakebridge.command()
434
+ def analyze(w: WorkspaceClient, source_directory: str, report_file: str):
435
+ """Run the Analyzer"""
436
+ with_user_agent_extra("cmd", "analyze")
437
+ ctx = ApplicationContext(w)
438
+ prompts = ctx.prompts
439
+ output_file = report_file
440
+ input_folder = source_directory
441
+ source_tech = prompts.choice("Select the source technology", Analyzer.supported_source_technologies())
442
+ with_user_agent_extra("analyzer_source_tech", make_alphanum_or_semver(source_tech))
443
+ user = ctx.current_user
444
+ logger.debug(f"User: {user}")
445
+ Analyzer.analyze(Path(input_folder), Path(output_file), source_tech)
446
+
447
+
448
+ if __name__ == "__main__":
449
+ lakebridge()
@@ -0,0 +1,192 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from enum import Enum, auto
6
+ from pathlib import Path
7
+ from typing import Any, cast
8
+
9
+ from databricks.labs.blueprint.installation import JsonValue
10
+ from databricks.labs.blueprint.tui import Prompts
11
+ from databricks.labs.lakebridge.transpiler.transpile_status import TranspileError
12
+ from databricks.labs.lakebridge.reconcile.recon_config import Table
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class LSPPromptMethod(Enum):
19
+ FORCE = auto() # for mandatory values that are specific to a dialect
20
+ QUESTION = auto()
21
+ CHOICE = auto()
22
+ CONFIRM = auto()
23
+
24
+
25
+ @dataclass
26
+ class LSPConfigOptionV1:
27
+ flag: str
28
+ method: LSPPromptMethod
29
+ prompt: str = ""
30
+ choices: list[str] | None = None
31
+ default: Any = None
32
+
33
+ @classmethod
34
+ def parse_all(cls, data: dict[str, Any]) -> dict[str, list[LSPConfigOptionV1]]:
35
+ return {key: list(LSPConfigOptionV1.parse(item) for item in value) for (key, value) in data.items()}
36
+
37
+ @classmethod
38
+ def parse(cls, data: Any) -> LSPConfigOptionV1:
39
+ if not isinstance(data, dict):
40
+ raise ValueError(f"Invalid transpiler config option, expecting a dict entry, got {data}")
41
+ flag: str = data.get("flag", "")
42
+ if not flag:
43
+ raise ValueError(f"Missing 'flag' entry in {data}")
44
+ method_name: str = data.get("method", "")
45
+ if not method_name:
46
+ raise ValueError(f"Missing 'method' entry in {data}")
47
+ method: LSPPromptMethod = cast(LSPPromptMethod, LSPPromptMethod[method_name])
48
+ prompt: str = data.get("prompt", "")
49
+ if not prompt:
50
+ raise ValueError(f"Missing 'prompt' entry in {data}")
51
+ choices = data.get("choices", [])
52
+ default = data.get("default", None)
53
+ return LSPConfigOptionV1(flag, method, prompt, choices, default)
54
+
55
+ def prompt_for_value(self, prompts: Prompts) -> JsonValue:
56
+ if self.method == LSPPromptMethod.FORCE:
57
+ return self.default
58
+ if self.method == LSPPromptMethod.CONFIRM:
59
+ return prompts.confirm(self.prompt)
60
+ if self.method == LSPPromptMethod.QUESTION:
61
+ default = self.default if self.default else "None"
62
+ result = prompts.question(self.prompt, default=default)
63
+ if result == "<none>":
64
+ return None
65
+ return result
66
+ if self.method == LSPPromptMethod.CHOICE:
67
+ return prompts.choice(self.prompt, cast(list[str], self.choices))
68
+ raise ValueError(f"Unsupported prompt method: {self.method}")
69
+
70
+
71
+ @dataclass
72
+ class TranspileConfig:
73
+ __file__ = "config.yml"
74
+ __version__ = 3
75
+
76
+ transpiler_config_path: str | None = None
77
+ source_dialect: str | None = None
78
+ input_source: str | None = None
79
+ output_folder: str | None = None
80
+ error_file_path: str | None = None
81
+ sdk_config: dict[str, str] | None = None
82
+ skip_validation: bool | None = False
83
+ catalog_name: str = "remorph"
84
+ schema_name: str = "transpiler"
85
+ transpiler_options: JsonValue = None
86
+
87
+ @property
88
+ def transpiler_path(self) -> Path | None:
89
+ return Path(self.transpiler_config_path) if self.transpiler_config_path is not None else None
90
+
91
+ @property
92
+ def input_path(self):
93
+ if self.input_source is None:
94
+ raise ValueError("Missing input source!")
95
+ return Path(self.input_source)
96
+
97
+ @property
98
+ def output_path(self):
99
+ return None if self.output_folder is None else Path(self.output_folder)
100
+
101
+ @property
102
+ def error_path(self):
103
+ return Path(self.error_file_path) if self.error_file_path else None
104
+
105
+ @property
106
+ def target_dialect(self):
107
+ return "databricks"
108
+
109
+ @classmethod
110
+ def v1_migrate(cls, raw: dict) -> dict:
111
+ raw["version"] = 2
112
+ return raw
113
+
114
+ @classmethod
115
+ def v2_migrate(cls, raw: dict) -> dict:
116
+ del raw["mode"]
117
+ key_mapping = {"input_sql": "input_source", "output_folder": "output_path", "source": "source_dialect"}
118
+ raw["version"] = 3
119
+ raw["error_file_path"] = "error_log.txt"
120
+ return {key_mapping.get(key, key): value for key, value in raw.items()}
121
+
122
+
123
+ @dataclass
124
+ class TableRecon:
125
+ __file__ = "recon_config.yml"
126
+ __version__ = 1
127
+
128
+ source_schema: str
129
+ target_catalog: str
130
+ target_schema: str
131
+ tables: list[Table]
132
+ source_catalog: str | None = None
133
+
134
+ def __post_init__(self):
135
+ self.source_schema = self.source_schema.lower()
136
+ self.target_schema = self.target_schema.lower()
137
+ self.target_catalog = self.target_catalog.lower()
138
+ self.source_catalog = self.source_catalog.lower() if self.source_catalog else self.source_catalog
139
+
140
+
141
+ @dataclass
142
+ class DatabaseConfig:
143
+ source_schema: str
144
+ target_catalog: str
145
+ target_schema: str
146
+ source_catalog: str | None = None
147
+
148
+
149
+ @dataclass
150
+ class TranspileResult:
151
+ transpiled_code: str
152
+ success_count: int
153
+ error_list: list[TranspileError]
154
+
155
+
156
+ @dataclass
157
+ class ValidationResult:
158
+ validated_sql: str
159
+ exception_msg: str | None
160
+
161
+
162
+ @dataclass
163
+ class ReconcileTablesConfig:
164
+ filter_type: str # all/include/exclude
165
+ tables_list: list[str] # [*, table1, table2]
166
+
167
+
168
+ @dataclass
169
+ class ReconcileMetadataConfig:
170
+ catalog: str = "remorph"
171
+ schema: str = "reconcile"
172
+ volume: str = "reconcile_volume"
173
+
174
+
175
+ @dataclass
176
+ class ReconcileConfig:
177
+ __file__ = "reconcile.yml"
178
+ __version__ = 1
179
+
180
+ data_source: str
181
+ report_type: str
182
+ secret_scope: str
183
+ database_config: DatabaseConfig
184
+ metadata_config: ReconcileMetadataConfig
185
+ job_id: str | None = None
186
+ tables: ReconcileTablesConfig | None = None
187
+
188
+
189
+ @dataclass
190
+ class RemorphConfigs:
191
+ transpile: TranspileConfig | None = None
192
+ reconcile: ReconcileConfig | None = None
File without changes