databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks/__init__.py +3 -0
  2. databricks/labs/__init__.py +3 -0
  3. databricks/labs/lakebridge/__about__.py +2 -0
  4. databricks/labs/lakebridge/__init__.py +11 -0
  5. databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  6. databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  7. databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  8. databricks/labs/lakebridge/base_install.py +12 -0
  9. databricks/labs/lakebridge/cli.py +449 -0
  10. databricks/labs/lakebridge/config.py +192 -0
  11. databricks/labs/lakebridge/connections/__init__.py +0 -0
  12. databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  13. databricks/labs/lakebridge/connections/database_manager.py +98 -0
  14. databricks/labs/lakebridge/connections/env_getter.py +13 -0
  15. databricks/labs/lakebridge/contexts/__init__.py +0 -0
  16. databricks/labs/lakebridge/contexts/application.py +133 -0
  17. databricks/labs/lakebridge/coverage/__init__.py +0 -0
  18. databricks/labs/lakebridge/coverage/commons.py +223 -0
  19. databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  20. databricks/labs/lakebridge/coverage/local_report.py +9 -0
  21. databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  22. databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  23. databricks/labs/lakebridge/deployment/__init__.py +0 -0
  24. databricks/labs/lakebridge/deployment/configurator.py +199 -0
  25. databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  26. databricks/labs/lakebridge/deployment/installation.py +125 -0
  27. databricks/labs/lakebridge/deployment/job.py +147 -0
  28. databricks/labs/lakebridge/deployment/recon.py +145 -0
  29. databricks/labs/lakebridge/deployment/table.py +30 -0
  30. databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  31. databricks/labs/lakebridge/discovery/table.py +36 -0
  32. databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  33. databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  34. databricks/labs/lakebridge/errors/exceptions.py +1 -0
  35. databricks/labs/lakebridge/helpers/__init__.py +0 -0
  36. databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  37. databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  38. databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  39. databricks/labs/lakebridge/helpers/metastore.py +164 -0
  40. databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  41. databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  42. databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  43. databricks/labs/lakebridge/helpers/validation.py +101 -0
  44. databricks/labs/lakebridge/install.py +849 -0
  45. databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  46. databricks/labs/lakebridge/intermediate/dag.py +88 -0
  47. databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  48. databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  49. databricks/labs/lakebridge/jvmproxy.py +56 -0
  50. databricks/labs/lakebridge/lineage.py +42 -0
  51. databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  52. databricks/labs/lakebridge/reconcile/compare.py +414 -0
  53. databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  54. databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  55. databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  56. databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  57. databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  58. databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  59. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  60. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  61. databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  62. databricks/labs/lakebridge/reconcile/constants.py +37 -0
  63. databricks/labs/lakebridge/reconcile/exception.py +42 -0
  64. databricks/labs/lakebridge/reconcile/execute.py +920 -0
  65. databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  66. databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  67. databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  68. databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  69. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  70. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  71. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  72. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  73. databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  74. databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  75. databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  76. databricks/labs/lakebridge/reconcile/runner.py +97 -0
  77. databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  78. databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  79. databricks/labs/lakebridge/resources/__init__.py +0 -0
  80. databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  81. databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  82. databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  83. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  84. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  85. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  86. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  87. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  88. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  89. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  90. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  91. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  92. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  93. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  94. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  95. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  96. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  97. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  98. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  99. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  100. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  101. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  102. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  103. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  104. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  105. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  106. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  107. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  108. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  109. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  110. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  111. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  112. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  113. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  114. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  115. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  116. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  117. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  118. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  119. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  120. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  121. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  122. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  123. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  124. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  125. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  126. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  127. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  128. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  129. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  130. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  131. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  132. databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  133. databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  134. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  135. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  136. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  137. databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  138. databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  139. databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  140. databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  141. databricks/labs/lakebridge/transpiler/execute.py +423 -0
  142. databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  143. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  144. databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  145. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  146. databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  147. databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  148. databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  149. databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  150. databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  151. databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  152. databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  153. databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  154. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  155. databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  156. databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  157. databricks/labs/lakebridge/uninstall.py +28 -0
  158. databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  159. databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  160. databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
  161. databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
  162. databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
  163. databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
  164. databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
  165. databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
  166. docs/lakebridge/src/components/Button.tsx +81 -0
  167. docs/lakebridge/src/css/custom.css +167 -0
  168. docs/lakebridge/src/css/table.css +20 -0
  169. docs/lakebridge/src/pages/index.tsx +57 -0
  170. docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  171. docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,849 @@
1
+ import re
2
+ import abc
3
+ import dataclasses
4
+ import shutil
5
+ from collections.abc import Iterable
6
+ from json import loads, dump
7
+ import logging
8
+ import os
9
+ from shutil import rmtree, move
10
+ from subprocess import run, CalledProcessError
11
+ import sys
12
+ from typing import Any, cast
13
+ from urllib import request
14
+ from urllib.error import URLError, HTTPError
15
+ import webbrowser
16
+ from datetime import datetime, timezone
17
+ from pathlib import Path
18
+ import xml.etree.ElementTree as ET
19
+ from zipfile import ZipFile
20
+
21
+ from databricks.labs.blueprint.installation import Installation, JsonValue
22
+ from databricks.labs.blueprint.installation import SerdeError
23
+ from databricks.labs.blueprint.installer import InstallState
24
+ from databricks.labs.blueprint.tui import Prompts
25
+ from databricks.labs.blueprint.wheels import ProductInfo
26
+ from databricks.sdk import WorkspaceClient
27
+ from databricks.sdk.errors import NotFound, PermissionDenied
28
+
29
+ from databricks.labs.lakebridge.config import (
30
+ TranspileConfig,
31
+ ReconcileConfig,
32
+ DatabaseConfig,
33
+ RemorphConfigs,
34
+ ReconcileMetadataConfig,
35
+ LSPConfigOptionV1,
36
+ )
37
+
38
+ from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
39
+ from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
40
+ from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
41
+ from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+ TRANSPILER_WAREHOUSE_PREFIX = "Lakebridge Transpiler Validation"
46
+
47
+
48
+ class TranspilerInstaller(abc.ABC):
49
+
50
+ @classmethod
51
+ def labs_path(cls) -> Path:
52
+ return Path.home() / ".databricks" / "labs"
53
+
54
+ @classmethod
55
+ def transpilers_path(cls) -> Path:
56
+ return cls.labs_path() / "remorph-transpilers"
57
+
58
+ @classmethod
59
+ def install_from_pypi(cls, product_name: str, pypi_name: str, artifact: Path | None = None) -> Path | None:
60
+ installer = WheelInstaller(product_name, pypi_name, artifact)
61
+ return installer.install()
62
+
63
+ @classmethod
64
+ def install_from_maven(
65
+ cls, product_name: str, group_id: str, artifact_id: str, artifact: Path | None = None
66
+ ) -> Path | None:
67
+ installer = MavenInstaller(product_name, group_id, artifact_id, artifact)
68
+ return installer.install()
69
+
70
+ @classmethod
71
+ def get_installed_version(cls, product_name: str, is_transpiler=True) -> str | None:
72
+ product_path = (cls.transpilers_path() if is_transpiler else cls.labs_path()) / product_name
73
+ current_version_path = product_path / "state" / "version.json"
74
+ if not current_version_path.exists():
75
+ return None
76
+ text = current_version_path.read_text("utf-8")
77
+ data: dict[str, Any] = loads(text)
78
+ version: str | None = data.get("version", None)
79
+ if not version or not version.startswith("v"):
80
+ return None
81
+ return version[1:]
82
+
83
+ _version_pattern = re.compile(r"[_-](\d+(?:[.\-_]\w*\d+)+)")
84
+
85
+ @classmethod
86
+ def get_local_artifact_version(cls, artifact: Path) -> str | None:
87
+ # TODO: Get the version from the metadata inside the artifact rather than relying on the filename.
88
+ match = cls._version_pattern.search(artifact.stem)
89
+ if not match:
90
+ return None
91
+ group = match.group(0)
92
+ if not group:
93
+ return None
94
+ # TODO: Update the regex to take care of these trimming scenarios.
95
+ if group.startswith('-'):
96
+ group = group[1:]
97
+ if group.endswith("-py3"):
98
+ group = group[:-4]
99
+ return group
100
+
101
+ @classmethod
102
+ def all_transpiler_configs(cls) -> dict[str, LSPConfig]:
103
+ all_configs = cls._all_transpiler_configs()
104
+ return {config.name: config for config in all_configs}
105
+
106
+ @classmethod
107
+ def all_transpiler_names(cls) -> set[str]:
108
+ all_configs = cls.all_transpiler_configs()
109
+ return set(all_configs.keys())
110
+
111
+ @classmethod
112
+ def all_dialects(cls) -> set[str]:
113
+ all_dialects: set[str] = set()
114
+ for config in cls._all_transpiler_configs():
115
+ all_dialects = all_dialects.union(config.remorph.dialects)
116
+ return all_dialects
117
+
118
+ @classmethod
119
+ def transpilers_with_dialect(cls, dialect: str) -> set[str]:
120
+ configs = filter(lambda cfg: dialect in cfg.remorph.dialects, cls.all_transpiler_configs().values())
121
+ return set(config.name for config in configs)
122
+
123
+ @classmethod
124
+ def transpiler_config_path(cls, transpiler_name) -> Path:
125
+ config = cls.all_transpiler_configs().get(transpiler_name, None)
126
+ if not config:
127
+ raise ValueError(f"No such transpiler: {transpiler_name}")
128
+ return config.path
129
+
130
+ @classmethod
131
+ def transpiler_config_options(cls, transpiler_name, source_dialect) -> list[LSPConfigOptionV1]:
132
+ config = cls.all_transpiler_configs().get(transpiler_name, None)
133
+ if not config:
134
+ return [] # gracefully returns an empty list, since this can only happen during testing
135
+ return config.options_for_dialect(source_dialect)
136
+
137
+ @classmethod
138
+ def _all_transpiler_configs(cls) -> Iterable[LSPConfig]:
139
+ path = cls.transpilers_path()
140
+ if path.exists():
141
+ all_files = os.listdir(path)
142
+ for file in all_files:
143
+ config = cls._transpiler_config(cls.transpilers_path() / file)
144
+ if config:
145
+ yield config
146
+
147
+ @classmethod
148
+ def _transpiler_config(cls, path: Path) -> LSPConfig | None:
149
+ if not path.is_dir() or not (path / "lib").is_dir():
150
+ return None
151
+ config_path = path / "lib" / "config.yml"
152
+ if not config_path.is_file():
153
+ return None
154
+ try:
155
+ return LSPConfig.load(config_path)
156
+ except ValueError as e:
157
+ logger.error(f"Could not load config: {path!s}", exc_info=e)
158
+ return None
159
+
160
+ @classmethod
161
+ def _store_product_state(cls, product_path: Path, version: str) -> None:
162
+ state_path = product_path / "state"
163
+ state_path.mkdir()
164
+ version_data = {"version": f"v{version}", "date": datetime.now(timezone.utc).isoformat()}
165
+ version_path = state_path / "version.json"
166
+ with version_path.open("w", encoding="utf-8") as f:
167
+ dump(version_data, f)
168
+ f.write("\n")
169
+
170
+
171
+ class WheelInstaller(TranspilerInstaller):
172
+
173
+ @classmethod
174
+ def get_latest_artifact_version_from_pypi(cls, product_name: str) -> str | None:
175
+ try:
176
+ with request.urlopen(f"https://pypi.org/pypi/{product_name}/json") as server:
177
+ text: bytes = server.read()
178
+ data: dict[str, Any] = loads(text)
179
+ return data.get("info", {}).get('version', None)
180
+ except HTTPError as e:
181
+ logger.error(f"Error while fetching PyPI metadata: {product_name}", exc_info=e)
182
+ return None
183
+
184
+ @classmethod
185
+ def download_artifact_from_pypi(cls, product_name: str, version: str, target: Path, extension="whl") -> int:
186
+ suffix = "-py3-none-any.whl" if extension == "whl" else ".tar.gz" if extension == "tar" else f".{extension}"
187
+ filename = f"{product_name.replace('-', '_')}-{version}{suffix}"
188
+ url = f"https://pypi.debian.net/{product_name}/{filename}"
189
+ try:
190
+ path, _ = request.urlretrieve(url)
191
+ logger.info(f"Successfully downloaded {path}")
192
+ if not target.exists():
193
+ logger.info(f"Moving {path} to {target!s}")
194
+ move(path, target)
195
+ return 0
196
+ except URLError as e:
197
+ logger.error("While downloading from pypi", exc_info=e)
198
+ return -1
199
+
200
+ def __init__(self, product_name: str, pypi_name: str, artifact: Path | None = None):
201
+ self._product_name = product_name
202
+ self._pypi_name = pypi_name
203
+ self._artifact = artifact
204
+
205
+ def install(self) -> Path | None:
206
+ return self._install_checking_versions()
207
+
208
+ def _install_checking_versions(self) -> Path | None:
209
+ latest_version = (
210
+ self.get_local_artifact_version(self._artifact)
211
+ if self._artifact
212
+ else self.get_latest_artifact_version_from_pypi(self._pypi_name)
213
+ )
214
+ if latest_version is None:
215
+ logger.warning(f"Could not determine the latest version of {self._pypi_name}")
216
+ logger.error(f"Failed to install transpiler: {self._product_name}")
217
+ return None
218
+ installed_version = self.get_installed_version(self._product_name)
219
+ if installed_version == latest_version:
220
+ logger.info(f"{self._pypi_name} v{latest_version} already installed")
221
+ return None
222
+ return self._install_latest_version(latest_version)
223
+
224
+ def _install_latest_version(self, version: str) -> Path | None:
225
+ logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
226
+ # use type(self) to workaround a mock bug on class methods
227
+ self._product_path = type(self).transpilers_path() / self._product_name
228
+ backup_path = Path(f"{self._product_path!s}-saved")
229
+ if self._product_path.exists():
230
+ os.rename(self._product_path, backup_path)
231
+ self._product_path.mkdir(parents=True, exist_ok=True)
232
+ self._install_path = self._product_path / "lib"
233
+ self._install_path.mkdir(exist_ok=True)
234
+ try:
235
+ result = self._unsafe_install_latest_version(version)
236
+ logger.info(f"Successfully installed {self._pypi_name} v{version}")
237
+ if backup_path.exists():
238
+ rmtree(backup_path)
239
+ return result
240
+ except (CalledProcessError, ValueError) as e:
241
+ logger.error(f"Failed to install {self._pypi_name} v{version}", exc_info=e)
242
+ rmtree(self._product_path)
243
+ if backup_path.exists():
244
+ os.rename(backup_path, self._product_path)
245
+ return None
246
+
247
+ def _unsafe_install_latest_version(self, version: str) -> Path | None:
248
+ self._create_venv()
249
+ self._install_with_pip()
250
+ self._copy_lsp_resources()
251
+ return self._post_install(version)
252
+
253
+ def _create_venv(self) -> None:
254
+ cwd = os.getcwd()
255
+ try:
256
+ os.chdir(self._install_path)
257
+ self._unsafe_create_venv()
258
+ finally:
259
+ os.chdir(cwd)
260
+
261
+ def _unsafe_create_venv(self) -> None:
262
+ # using the venv module doesn't work (maybe it's not possible to create a venv from a venv ?)
263
+ # so falling back to something that works
264
+ # for some reason this requires shell=True, so pass full cmd line
265
+ cmd_line = f"{sys.executable} -m venv .venv"
266
+ completed = run(cmd_line, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
267
+ if completed.returncode:
268
+ logger.error(f"Failed to create venv, error code: {completed.returncode}")
269
+ if completed.stdout:
270
+ for line in completed.stdout:
271
+ logger.error(line)
272
+ if completed.stderr:
273
+ for line in completed.stderr:
274
+ logger.error(line)
275
+ completed.check_returncode()
276
+ self._venv = self._install_path / ".venv"
277
+ self._site_packages = self._locate_site_packages()
278
+
279
+ def _locate_site_packages(self) -> Path:
280
+ # can't use sysconfig because it only works for currently running python
281
+ if sys.platform == "win32":
282
+ return self._locate_site_packages_windows()
283
+ return self._locate_site_packages_linux_or_macos()
284
+
285
+ def _locate_site_packages_windows(self) -> Path:
286
+ packages = self._venv / "Lib" / "site-packages"
287
+ if packages.exists():
288
+ return packages
289
+ raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
290
+
291
+ def _locate_site_packages_linux_or_macos(self) -> Path:
292
+ lib = self._venv / "lib"
293
+ for dir_ in os.listdir(lib):
294
+ if dir_.startswith("python"):
295
+ packages = lib / dir_ / "site-packages"
296
+ if packages.exists():
297
+ return packages
298
+ raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
299
+
300
+ def _install_with_pip(self) -> None:
301
+ cwd = os.getcwd()
302
+ try:
303
+ os.chdir(self._install_path)
304
+ # the way to call pip from python is highly sensitive to os and source type
305
+ if self._artifact:
306
+ self._install_local_artifact()
307
+ else:
308
+ self._install_remote_artifact()
309
+ finally:
310
+ os.chdir(cwd)
311
+
312
+ def _install_local_artifact(self) -> None:
313
+ pip = self._locate_pip()
314
+ pip = pip.relative_to(self._install_path)
315
+ target = self._site_packages
316
+ target = target.relative_to(self._install_path)
317
+ if sys.platform == "win32":
318
+ command = f"{pip!s} install {self._artifact!s} -t {target!s}"
319
+ completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
320
+ else:
321
+ command = f"'{pip!s}' install '{self._artifact!s}' -t '{target!s}'"
322
+ completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
323
+ # checking return code later makes debugging easier
324
+ completed.check_returncode()
325
+
326
+ def _install_remote_artifact(self) -> None:
327
+ pip = self._locate_pip()
328
+ pip = pip.relative_to(self._install_path)
329
+ target = self._site_packages
330
+ target = target.relative_to(self._install_path)
331
+ if sys.platform == "win32":
332
+ args = [str(pip), "install", self._pypi_name, "-t", str(target)]
333
+ completed = run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
334
+ else:
335
+ command = f"'{pip!s}' install {self._pypi_name} -t '{target!s}'"
336
+ completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
337
+ # checking return code later makes debugging easier
338
+ completed.check_returncode()
339
+
340
+ def _locate_pip(self) -> Path:
341
+ return self._venv / "Scripts" / "pip3.exe" if sys.platform == "win32" else self._venv / "bin" / "pip3"
342
+
343
+ def _copy_lsp_resources(self):
344
+ lsp = self._site_packages / "lsp"
345
+ if not lsp.exists():
346
+ raise ValueError("Installed transpiler is missing a 'lsp' folder")
347
+ shutil.copytree(lsp, self._install_path, dirs_exist_ok=True)
348
+
349
+ def _post_install(self, version: str) -> Path | None:
350
+ config = self._install_path / "config.yml"
351
+ if not config.exists():
352
+ raise ValueError("Installed transpiler is missing a 'config.yml' file in its 'lsp' folder")
353
+ install_ext = "ps1" if sys.platform == "win32" else "sh"
354
+ install_script = f"installer.{install_ext}"
355
+ installer = self._install_path / install_script
356
+ if installer.exists():
357
+ self._run_custom_installer(installer)
358
+ self._store_product_state(product_path=self._product_path, version=version)
359
+ return self._install_path
360
+
361
+ def _run_custom_installer(self, installer):
362
+ args = [str(installer)]
363
+ run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, cwd=str(self._install_path), check=True)
364
+
365
+
366
+ class MavenInstaller(TranspilerInstaller):
367
+ # Maven Central, base URL.
368
+ _maven_central_repo: str = "https://repo.maven.apache.org/maven2/"
369
+
370
+ @classmethod
371
+ def _artifact_base_url(cls, group_id: str, artifact_id: str) -> str:
372
+ """Construct the base URL for a Maven artifact."""
373
+ # Reference: https://maven.apache.org/repositories/layout.html
374
+ group_path = group_id.replace(".", "/")
375
+ return f"{cls._maven_central_repo}{group_path}/{artifact_id}/"
376
+
377
+ @classmethod
378
+ def artifact_metadata_url(cls, group_id: str, artifact_id: str) -> str:
379
+ """Get the metadata URL for a Maven artifact."""
380
+ # TODO: Unit test this method.
381
+ return f"{cls._artifact_base_url(group_id, artifact_id)}maven-metadata.xml"
382
+
383
+ @classmethod
384
+ def artifact_url(
385
+ cls, group_id: str, artifact_id: str, version: str, classifier: str | None = None, extension: str = "jar"
386
+ ) -> str:
387
+ """Get the URL for a versioned Maven artifact."""
388
+ # TODO: Unit test this method, including classifier and extension.
389
+ _classifier = f"-{classifier}" if classifier else ""
390
+ artifact_base_url = cls._artifact_base_url(group_id, artifact_id)
391
+ return f"{artifact_base_url}{version}/{artifact_id}-{version}{_classifier}.{extension}"
392
+
393
+ @classmethod
394
+ def get_current_maven_artifact_version(cls, group_id: str, artifact_id: str) -> str | None:
395
+ url = cls.artifact_metadata_url(group_id, artifact_id)
396
+ try:
397
+ with request.urlopen(url) as server:
398
+ text = server.read()
399
+ except HTTPError as e:
400
+ logger.error(f"Error while fetching maven metadata: {group_id}:{artifact_id}", exc_info=e)
401
+ return None
402
+ logger.debug(f"Maven metadata for {group_id}:{artifact_id}: {text}")
403
+ return cls._extract_latest_release_version(text)
404
+
405
+ @classmethod
406
+ def _extract_latest_release_version(cls, maven_metadata: str) -> str | None:
407
+ """Extract the latest release version from Maven metadata."""
408
+ # Reference: https://maven.apache.org/repositories/metadata.html#The_A_Level_Metadata
409
+ # TODO: Unit test this method, to verify the sequence of things it checks for.
410
+ root = ET.fromstring(maven_metadata)
411
+ for label in ("release", "latest"):
412
+ version = root.findtext(f"./versioning/{label}")
413
+ if version is not None:
414
+ return version
415
+ return root.findtext("./versioning/versions/version[last()]")
416
+
417
+ @classmethod
418
+ def download_artifact_from_maven(
419
+ cls,
420
+ group_id: str,
421
+ artifact_id: str,
422
+ version: str,
423
+ target: Path,
424
+ classifier: str | None = None,
425
+ extension: str = "jar",
426
+ ) -> bool:
427
+ if target.exists():
428
+ logger.warning(f"Skipping download of {group_id}:{artifact_id}:{version}; target already exists: {target}")
429
+ return True
430
+ url = cls.artifact_url(group_id, artifact_id, version, classifier, extension)
431
+ try:
432
+ path, _ = request.urlretrieve(url)
433
+ logger.debug(f"Downloaded maven artefact from {url} to {path}")
434
+ except URLError as e:
435
+ logger.error(f"Unable to download maven artefact: {group_id}:{artifact_id}:{version}", exc_info=e)
436
+ return False
437
+ logger.debug(f"Moving {path} to {target}")
438
+ move(path, target)
439
+ logger.info(f"Successfully installed: {group_id}:{artifact_id}:{version}")
440
+ return True
441
+
442
+ def __init__(self, product_name: str, group_id: str, artifact_id: str, artifact: Path | None = None):
443
+ self._product_name = product_name
444
+ self._group_id = group_id
445
+ self._artifact_id = artifact_id
446
+ self._artifact = artifact
447
+
448
+ def install(self) -> Path | None:
449
+ return self._install_checking_versions()
450
+
451
+ def _install_checking_versions(self) -> Path | None:
452
+ if self._artifact:
453
+ latest_version = self.get_local_artifact_version(self._artifact)
454
+ else:
455
+ latest_version = self.get_current_maven_artifact_version(self._group_id, self._artifact_id)
456
+ if latest_version is None:
457
+ logger.warning(f"Could not determine the latest version of Databricks {self._product_name} transpiler")
458
+ logger.error("Failed to install transpiler: Databricks {self._product_name} transpiler")
459
+ return None
460
+ installed_version = self.get_installed_version(self._product_name)
461
+ if installed_version == latest_version:
462
+ logger.info(f"Databricks {self._product_name} transpiler v{latest_version} already installed")
463
+ return None
464
+ return self._install_version(latest_version)
465
+
466
+ def _install_version(self, version: str) -> Path | None:
467
+ logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
468
+ # use type(self) to workaround a mock bug on class methods
469
+ self._product_path = type(self).transpilers_path() / self._product_name
470
+ backup_path = Path(f"{self._product_path!s}-saved")
471
+ if backup_path.exists():
472
+ rmtree(backup_path)
473
+ if self._product_path.exists():
474
+ os.rename(self._product_path, backup_path)
475
+ self._product_path.mkdir(parents=True)
476
+ self._install_path = self._product_path / "lib"
477
+ self._install_path.mkdir()
478
+ try:
479
+ if self._unsafe_install_version(version):
480
+ logger.info(f"Successfully installed {self._product_name} v{version}")
481
+ self._store_product_state(self._product_path, version)
482
+ if backup_path.exists():
483
+ rmtree(backup_path)
484
+ return self._product_path
485
+ except (KeyError, ValueError) as e:
486
+ logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}", exc_info=e)
487
+ rmtree(self._product_path)
488
+ if backup_path.exists():
489
+ os.rename(backup_path, self._product_path)
490
+ return None
491
+
492
+ def _unsafe_install_version(self, version: str) -> bool:
493
+ jar_file_path = self._install_path / f"{self._artifact_id}.jar"
494
+ if self._artifact:
495
+ logger.debug(f"Copying '{self._artifact!s}' to '{jar_file_path!s}'")
496
+ shutil.copyfile(self._artifact, jar_file_path)
497
+ elif not self.download_artifact_from_maven(self._group_id, self._artifact_id, version, jar_file_path):
498
+ logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}")
499
+ return False
500
+ self._copy_lsp_config(jar_file_path)
501
+ return True
502
+
503
+ def _copy_lsp_config(self, jar_file_path: Path) -> None:
504
+ with ZipFile(jar_file_path) as zip_file:
505
+ zip_file.extract("lsp/config.yml", self._install_path)
506
+ shutil.move(self._install_path / "lsp" / "config.yml", self._install_path / "config.yml")
507
+ os.rmdir(self._install_path / "lsp")
508
+
509
+
510
+ class WorkspaceInstaller:
511
+ def __init__(
512
+ self,
513
+ ws: WorkspaceClient,
514
+ prompts: Prompts,
515
+ installation: Installation,
516
+ install_state: InstallState,
517
+ product_info: ProductInfo,
518
+ resource_configurator: ResourceConfigurator,
519
+ workspace_installation: WorkspaceInstallation,
520
+ environ: dict[str, str] | None = None,
521
+ ):
522
+ self._ws = ws
523
+ self._prompts = prompts
524
+ self._installation = installation
525
+ self._install_state = install_state
526
+ self._product_info = product_info
527
+ self._resource_configurator = resource_configurator
528
+ self._ws_installation = workspace_installation
529
+
530
+ if not environ:
531
+ environ = dict(os.environ.items())
532
+
533
+ if "DATABRICKS_RUNTIME_VERSION" in environ:
534
+ msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime"
535
+ raise SystemExit(msg)
536
+
537
+ def run(self, module: str, config: RemorphConfigs | None = None, artifact: str | None = None) -> RemorphConfigs:
538
+ logger.debug(f"Initializing workspace installation for module: {module} (config: {config})")
539
+ if module == "transpile" and artifact:
540
+ self.install_artifact(artifact)
541
+ elif module in {"transpile", "all"}:
542
+ self.install_bladebridge()
543
+ self.install_morpheus()
544
+ if not config:
545
+ config = self.configure(module)
546
+ if self._is_testing():
547
+ return config
548
+ self._ws_installation.install(config)
549
+ logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
550
+ return config
551
+
552
+ @classmethod
553
+ def install_bladebridge(cls, artifact: Path | None = None):
554
+ local_name = "bladebridge"
555
+ pypi_name = "databricks-bb-plugin"
556
+ TranspilerInstaller.install_from_pypi(local_name, pypi_name, artifact)
557
+
558
+ @classmethod
559
+ def install_morpheus(cls, artifact: Path | None = None):
560
+ java_version = cls.get_java_version()
561
+ if java_version is None or java_version < 110:
562
+ logger.warning(
563
+ "This software requires Java 11 or above. Please install Java and re-run 'install-transpile'."
564
+ )
565
+ return
566
+ product_name = "databricks-morph-plugin"
567
+ group_id = "com.databricks.labs"
568
+ artifact_id = product_name
569
+ TranspilerInstaller.install_from_maven(product_name, group_id, artifact_id, artifact)
570
+
571
+ @classmethod
572
+ def install_artifact(cls, artifact: str):
573
+ path = Path(artifact)
574
+ if not path.exists():
575
+ logger.error(f"Could not locate artifact {artifact}")
576
+ return
577
+ if "databricks-morph-plugin" in path.name:
578
+ cls.install_morpheus(path)
579
+ elif "databricks_bb_plugin" in path.name:
580
+ cls.install_bladebridge(path)
581
+ else:
582
+ logger.fatal(f"Cannot install unsupported artifact: {artifact}")
583
+
584
+ @classmethod
585
+ def get_java_version(cls) -> int | None:
586
+ completed = run(["java", "-version"], shell=False, capture_output=True, check=False)
587
+ try:
588
+ completed.check_returncode()
589
+ except CalledProcessError:
590
+ return None
591
+ result = completed.stderr.decode("utf-8")
592
+ start = result.find(" version ")
593
+ if start < 0:
594
+ return None
595
+ start = result.find('"', start + 1)
596
+ if start < 0:
597
+ return None
598
+ end = result.find('"', start + 1)
599
+ if end < 0:
600
+ return None
601
+ version = result[start + 1 : end]
602
+ parts = version.split('.')
603
+ return int(parts[0] + parts[1])
604
+
605
+ def configure(self, module: str) -> RemorphConfigs:
606
+ match module:
607
+ case "transpile":
608
+ logger.info("Configuring lakebridge `transpile`.")
609
+ return RemorphConfigs(self._configure_transpile(), None)
610
+ case "reconcile":
611
+ logger.info("Configuring lakebridge `reconcile`.")
612
+ return RemorphConfigs(None, self._configure_reconcile())
613
+ case "all":
614
+ logger.info("Configuring lakebridge `transpile` and `reconcile`.")
615
+ return RemorphConfigs(
616
+ self._configure_transpile(),
617
+ self._configure_reconcile(),
618
+ )
619
+ case _:
620
+ raise ValueError(f"Invalid input: {module}")
621
+
622
+ def _is_testing(self):
623
+ return self._product_info.product_name() != "lakebridge"
624
+
625
+ def _configure_transpile(self) -> TranspileConfig:
626
+ try:
627
+ self._installation.load(TranspileConfig)
628
+ logger.info("Lakebridge `transpile` is already installed on this workspace.")
629
+ if not self._prompts.confirm("Do you want to override the existing installation?"):
630
+ raise SystemExit("Lak `transpile` is already installed and no override has been requested. Exiting...")
631
+ except NotFound:
632
+ logger.info("Couldn't find existing `transpile` installation")
633
+ except (PermissionDenied, SerdeError, ValueError, AttributeError):
634
+ install_dir = self._installation.install_folder()
635
+ logger.warning(
636
+ f"Existing `transpile` installation at {install_dir} is corrupted. Continuing new installation..."
637
+ )
638
+
639
+ config = self._configure_new_transpile_installation()
640
+ logger.info("Finished configuring lakebridge `transpile`.")
641
+ return config
642
+
643
+ def _configure_new_transpile_installation(self) -> TranspileConfig:
644
+ default_config = self._prompt_for_new_transpile_installation()
645
+ runtime_config = None
646
+ catalog_name = "remorph"
647
+ schema_name = "transpiler"
648
+ if not default_config.skip_validation:
649
+ catalog_name = self._configure_catalog()
650
+ schema_name = self._configure_schema(catalog_name, "transpile")
651
+ self._has_necessary_access(catalog_name, schema_name)
652
+ warehouse_id = self._resource_configurator.prompt_for_warehouse_setup(TRANSPILER_WAREHOUSE_PREFIX)
653
+ runtime_config = {"warehouse_id": warehouse_id}
654
+
655
+ config = dataclasses.replace(
656
+ default_config,
657
+ catalog_name=catalog_name,
658
+ schema_name=schema_name,
659
+ sdk_config=runtime_config,
660
+ )
661
+ self._save_config(config)
662
+ return config
663
+
664
+ def _all_installed_dialects(self) -> list[str]:
665
+ return sorted(TranspilerInstaller.all_dialects())
666
+
667
+ def _transpilers_with_dialect(self, dialect: str) -> list[str]:
668
+ return sorted(TranspilerInstaller.transpilers_with_dialect(dialect))
669
+
670
+ def _transpiler_config_path(self, transpiler: str) -> Path:
671
+ return TranspilerInstaller.transpiler_config_path(transpiler)
672
+
673
+ def _prompt_for_new_transpile_installation(self) -> TranspileConfig:
674
+ install_later = "Set it later"
675
+ # TODO tidy this up, logger might not display the below in console...
676
+ logger.info("Please answer a few questions to configure lakebridge `transpile`")
677
+ all_dialects = [install_later] + self._all_installed_dialects()
678
+ source_dialect: str | None = self._prompts.choice("Select the source dialect:", all_dialects, sort=False)
679
+ if source_dialect == install_later:
680
+ source_dialect = None
681
+ transpiler_name: str | None = None
682
+ transpiler_config_path: Path | None = None
683
+ if source_dialect:
684
+ transpilers = self._transpilers_with_dialect(source_dialect)
685
+ if len(transpilers) > 1:
686
+ transpilers = [install_later] + transpilers
687
+ transpiler_name = self._prompts.choice("Select the transpiler:", transpilers, sort=False)
688
+ if transpiler_name == install_later:
689
+ transpiler_name = None
690
+ else:
691
+ transpiler_name = next(t for t in transpilers)
692
+ # TODO Change name for bladebridge
693
+ logger.info(f"lakebridge will use the {transpiler_name} transpiler")
694
+ if transpiler_name:
695
+ transpiler_config_path = self._transpiler_config_path(transpiler_name)
696
+ transpiler_options: dict[str, JsonValue] | None = None
697
+ if transpiler_config_path:
698
+ transpiler_options = self._prompt_for_transpiler_options(
699
+ cast(str, transpiler_name), cast(str, source_dialect)
700
+ )
701
+ input_source: str | None = self._prompts.question(
702
+ "Enter input SQL path (directory/file)", default=install_later
703
+ )
704
+ if input_source == install_later:
705
+ input_source = None
706
+ output_folder = self._prompts.question("Enter output directory", default="transpiled")
707
+ # When defaults are passed along we need to use absolute paths to avoid issues with relative paths
708
+ if output_folder == "transpiled":
709
+ output_folder = str(Path.cwd() / "transpiled")
710
+ error_file_path = self._prompts.question("Enter error file path", default="errors.log")
711
+ if error_file_path == "errors.log":
712
+ error_file_path = str(Path.cwd() / "errors.log")
713
+
714
+ run_validation = self._prompts.confirm(
715
+ "Would you like to validate the syntax and semantics of the transpiled queries?"
716
+ )
717
+
718
+ return TranspileConfig(
719
+ transpiler_config_path=str(transpiler_config_path) if transpiler_config_path is not None else None,
720
+ transpiler_options=transpiler_options,
721
+ source_dialect=source_dialect,
722
+ skip_validation=(not run_validation),
723
+ input_source=input_source,
724
+ output_folder=output_folder,
725
+ error_file_path=error_file_path,
726
+ )
727
+
728
+ def _prompt_for_transpiler_options(self, transpiler_name: str, source_dialect: str) -> dict[str, Any] | None:
729
+ config_options = TranspilerInstaller.transpiler_config_options(transpiler_name, source_dialect)
730
+ if len(config_options) == 0:
731
+ return None
732
+ return {option.flag: option.prompt_for_value(self._prompts) for option in config_options}
733
+
734
+ def _configure_catalog(
735
+ self,
736
+ ) -> str:
737
+ return self._resource_configurator.prompt_for_catalog_setup()
738
+
739
+ def _configure_schema(
740
+ self,
741
+ catalog: str,
742
+ default_schema_name: str,
743
+ ) -> str:
744
+ return self._resource_configurator.prompt_for_schema_setup(
745
+ catalog,
746
+ default_schema_name,
747
+ )
748
+
749
+ def _configure_reconcile(self) -> ReconcileConfig:
750
+ try:
751
+ self._installation.load(ReconcileConfig)
752
+ logger.info("lakebridge `reconcile` is already installed on this workspace.")
753
+ if not self._prompts.confirm("Do you want to override the existing installation?"):
754
+ raise SystemExit(
755
+ "lakebridge `reconcile` is already installed and no override has been requested. Exiting..."
756
+ )
757
+ except NotFound:
758
+ logger.info("Couldn't find existing `reconcile` installation")
759
+ except (PermissionDenied, SerdeError, ValueError, AttributeError):
760
+ install_dir = self._installation.install_folder()
761
+ logger.warning(
762
+ f"Existing `reconcile` installation at {install_dir} is corrupted. Continuing new installation..."
763
+ )
764
+
765
+ config = self._configure_new_reconcile_installation()
766
+ logger.info("Finished configuring lakebridge `reconcile`.")
767
+ return config
768
+
769
+ def _configure_new_reconcile_installation(self) -> ReconcileConfig:
770
+ default_config = self._prompt_for_new_reconcile_installation()
771
+ self._save_config(default_config)
772
+ return default_config
773
+
774
+ def _prompt_for_new_reconcile_installation(self) -> ReconcileConfig:
775
+ logger.info("Please answer a few questions to configure lakebridge `reconcile`")
776
+ data_source = self._prompts.choice(
777
+ "Select the Data Source:", [source_type.value for source_type in ReconSourceType]
778
+ )
779
+ report_type = self._prompts.choice(
780
+ "Select the report type:", [report_type.value for report_type in ReconReportType]
781
+ )
782
+ scope_name = self._prompts.question(
783
+ f"Enter Secret scope name to store `{data_source.capitalize()}` connection details / secrets",
784
+ default=f"remorph_{data_source}",
785
+ )
786
+
787
+ db_config = self._prompt_for_reconcile_database_config(data_source)
788
+ metadata_config = self._prompt_for_reconcile_metadata_config()
789
+
790
+ return ReconcileConfig(
791
+ data_source=data_source,
792
+ report_type=report_type,
793
+ secret_scope=scope_name,
794
+ database_config=db_config,
795
+ metadata_config=metadata_config,
796
+ )
797
+
798
+ def _prompt_for_reconcile_database_config(self, source) -> DatabaseConfig:
799
+ source_catalog = None
800
+ if source == ReconSourceType.SNOWFLAKE.value:
801
+ source_catalog = self._prompts.question(f"Enter source catalog name for `{source.capitalize()}`")
802
+
803
+ schema_prompt = f"Enter source schema name for `{source.capitalize()}`"
804
+ if source in {ReconSourceType.ORACLE.value}:
805
+ schema_prompt = f"Enter source database name for `{source.capitalize()}`"
806
+
807
+ source_schema = self._prompts.question(schema_prompt)
808
+ target_catalog = self._prompts.question("Enter target catalog name for Databricks")
809
+ target_schema = self._prompts.question("Enter target schema name for Databricks")
810
+
811
+ return DatabaseConfig(
812
+ source_schema=source_schema,
813
+ target_catalog=target_catalog,
814
+ target_schema=target_schema,
815
+ source_catalog=source_catalog,
816
+ )
817
+
818
+ def _prompt_for_reconcile_metadata_config(self) -> ReconcileMetadataConfig:
819
+ logger.info("Configuring reconcile metadata.")
820
+ catalog = self._configure_catalog()
821
+ schema = self._configure_schema(
822
+ catalog,
823
+ "reconcile",
824
+ )
825
+ volume = self._configure_volume(catalog, schema, "reconcile_volume")
826
+ self._has_necessary_access(catalog, schema, volume)
827
+ return ReconcileMetadataConfig(catalog=catalog, schema=schema, volume=volume)
828
+
829
+ def _configure_volume(
830
+ self,
831
+ catalog: str,
832
+ schema: str,
833
+ default_volume_name: str,
834
+ ) -> str:
835
+ return self._resource_configurator.prompt_for_volume_setup(
836
+ catalog,
837
+ schema,
838
+ default_volume_name,
839
+ )
840
+
841
+ def _save_config(self, config: TranspileConfig | ReconcileConfig):
842
+ logger.info(f"Saving configuration file {config.__file__}")
843
+ self._installation.save(config)
844
+ ws_file_url = self._installation.workspace_link(config.__file__)
845
+ if self._prompts.confirm(f"Open config file {ws_file_url} in the browser?"):
846
+ webbrowser.open(ws_file_url)
847
+
848
+ def _has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None = None):
849
+ self._resource_configurator.has_necessary_access(catalog_name, schema_name, volume_name)