databricks-labs-lakebridge 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/__init__.py +3 -0
- databricks/labs/__init__.py +3 -0
- databricks/labs/lakebridge/__about__.py +2 -0
- databricks/labs/lakebridge/__init__.py +11 -0
- databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks/labs/lakebridge/base_install.py +12 -0
- databricks/labs/lakebridge/cli.py +449 -0
- databricks/labs/lakebridge/config.py +192 -0
- databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks/labs/lakebridge/install.py +849 -0
- databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks/labs/lakebridge/lineage.py +42 -0
- databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks/labs/lakebridge/uninstall.py +28 -0
- databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
- databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
- databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
- databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
- docs/lakebridge/src/components/Button.tsx +81 -0
- docs/lakebridge/src/css/custom.css +167 -0
- docs/lakebridge/src/css/table.css +20 -0
- docs/lakebridge/src/pages/index.tsx +57 -0
- docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,849 @@
|
|
1
|
+
import re
|
2
|
+
import abc
|
3
|
+
import dataclasses
|
4
|
+
import shutil
|
5
|
+
from collections.abc import Iterable
|
6
|
+
from json import loads, dump
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
from shutil import rmtree, move
|
10
|
+
from subprocess import run, CalledProcessError
|
11
|
+
import sys
|
12
|
+
from typing import Any, cast
|
13
|
+
from urllib import request
|
14
|
+
from urllib.error import URLError, HTTPError
|
15
|
+
import webbrowser
|
16
|
+
from datetime import datetime, timezone
|
17
|
+
from pathlib import Path
|
18
|
+
import xml.etree.ElementTree as ET
|
19
|
+
from zipfile import ZipFile
|
20
|
+
|
21
|
+
from databricks.labs.blueprint.installation import Installation, JsonValue
|
22
|
+
from databricks.labs.blueprint.installation import SerdeError
|
23
|
+
from databricks.labs.blueprint.installer import InstallState
|
24
|
+
from databricks.labs.blueprint.tui import Prompts
|
25
|
+
from databricks.labs.blueprint.wheels import ProductInfo
|
26
|
+
from databricks.sdk import WorkspaceClient
|
27
|
+
from databricks.sdk.errors import NotFound, PermissionDenied
|
28
|
+
|
29
|
+
from databricks.labs.lakebridge.config import (
|
30
|
+
TranspileConfig,
|
31
|
+
ReconcileConfig,
|
32
|
+
DatabaseConfig,
|
33
|
+
RemorphConfigs,
|
34
|
+
ReconcileMetadataConfig,
|
35
|
+
LSPConfigOptionV1,
|
36
|
+
)
|
37
|
+
|
38
|
+
from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
|
39
|
+
from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
|
40
|
+
from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
|
41
|
+
from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig
|
42
|
+
|
43
|
+
logger = logging.getLogger(__name__)
|
44
|
+
|
45
|
+
TRANSPILER_WAREHOUSE_PREFIX = "Lakebridge Transpiler Validation"
|
46
|
+
|
47
|
+
|
48
|
+
class TranspilerInstaller(abc.ABC):
|
49
|
+
|
50
|
+
@classmethod
|
51
|
+
def labs_path(cls) -> Path:
|
52
|
+
return Path.home() / ".databricks" / "labs"
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def transpilers_path(cls) -> Path:
|
56
|
+
return cls.labs_path() / "remorph-transpilers"
|
57
|
+
|
58
|
+
@classmethod
|
59
|
+
def install_from_pypi(cls, product_name: str, pypi_name: str, artifact: Path | None = None) -> Path | None:
|
60
|
+
installer = WheelInstaller(product_name, pypi_name, artifact)
|
61
|
+
return installer.install()
|
62
|
+
|
63
|
+
@classmethod
|
64
|
+
def install_from_maven(
|
65
|
+
cls, product_name: str, group_id: str, artifact_id: str, artifact: Path | None = None
|
66
|
+
) -> Path | None:
|
67
|
+
installer = MavenInstaller(product_name, group_id, artifact_id, artifact)
|
68
|
+
return installer.install()
|
69
|
+
|
70
|
+
@classmethod
|
71
|
+
def get_installed_version(cls, product_name: str, is_transpiler=True) -> str | None:
|
72
|
+
product_path = (cls.transpilers_path() if is_transpiler else cls.labs_path()) / product_name
|
73
|
+
current_version_path = product_path / "state" / "version.json"
|
74
|
+
if not current_version_path.exists():
|
75
|
+
return None
|
76
|
+
text = current_version_path.read_text("utf-8")
|
77
|
+
data: dict[str, Any] = loads(text)
|
78
|
+
version: str | None = data.get("version", None)
|
79
|
+
if not version or not version.startswith("v"):
|
80
|
+
return None
|
81
|
+
return version[1:]
|
82
|
+
|
83
|
+
_version_pattern = re.compile(r"[_-](\d+(?:[.\-_]\w*\d+)+)")
|
84
|
+
|
85
|
+
@classmethod
|
86
|
+
def get_local_artifact_version(cls, artifact: Path) -> str | None:
|
87
|
+
# TODO: Get the version from the metadata inside the artifact rather than relying on the filename.
|
88
|
+
match = cls._version_pattern.search(artifact.stem)
|
89
|
+
if not match:
|
90
|
+
return None
|
91
|
+
group = match.group(0)
|
92
|
+
if not group:
|
93
|
+
return None
|
94
|
+
# TODO: Update the regex to take care of these trimming scenarios.
|
95
|
+
if group.startswith('-'):
|
96
|
+
group = group[1:]
|
97
|
+
if group.endswith("-py3"):
|
98
|
+
group = group[:-4]
|
99
|
+
return group
|
100
|
+
|
101
|
+
@classmethod
|
102
|
+
def all_transpiler_configs(cls) -> dict[str, LSPConfig]:
|
103
|
+
all_configs = cls._all_transpiler_configs()
|
104
|
+
return {config.name: config for config in all_configs}
|
105
|
+
|
106
|
+
@classmethod
|
107
|
+
def all_transpiler_names(cls) -> set[str]:
|
108
|
+
all_configs = cls.all_transpiler_configs()
|
109
|
+
return set(all_configs.keys())
|
110
|
+
|
111
|
+
@classmethod
|
112
|
+
def all_dialects(cls) -> set[str]:
|
113
|
+
all_dialects: set[str] = set()
|
114
|
+
for config in cls._all_transpiler_configs():
|
115
|
+
all_dialects = all_dialects.union(config.remorph.dialects)
|
116
|
+
return all_dialects
|
117
|
+
|
118
|
+
@classmethod
|
119
|
+
def transpilers_with_dialect(cls, dialect: str) -> set[str]:
|
120
|
+
configs = filter(lambda cfg: dialect in cfg.remorph.dialects, cls.all_transpiler_configs().values())
|
121
|
+
return set(config.name for config in configs)
|
122
|
+
|
123
|
+
@classmethod
|
124
|
+
def transpiler_config_path(cls, transpiler_name) -> Path:
|
125
|
+
config = cls.all_transpiler_configs().get(transpiler_name, None)
|
126
|
+
if not config:
|
127
|
+
raise ValueError(f"No such transpiler: {transpiler_name}")
|
128
|
+
return config.path
|
129
|
+
|
130
|
+
@classmethod
|
131
|
+
def transpiler_config_options(cls, transpiler_name, source_dialect) -> list[LSPConfigOptionV1]:
|
132
|
+
config = cls.all_transpiler_configs().get(transpiler_name, None)
|
133
|
+
if not config:
|
134
|
+
return [] # gracefully returns an empty list, since this can only happen during testing
|
135
|
+
return config.options_for_dialect(source_dialect)
|
136
|
+
|
137
|
+
@classmethod
|
138
|
+
def _all_transpiler_configs(cls) -> Iterable[LSPConfig]:
|
139
|
+
path = cls.transpilers_path()
|
140
|
+
if path.exists():
|
141
|
+
all_files = os.listdir(path)
|
142
|
+
for file in all_files:
|
143
|
+
config = cls._transpiler_config(cls.transpilers_path() / file)
|
144
|
+
if config:
|
145
|
+
yield config
|
146
|
+
|
147
|
+
@classmethod
|
148
|
+
def _transpiler_config(cls, path: Path) -> LSPConfig | None:
|
149
|
+
if not path.is_dir() or not (path / "lib").is_dir():
|
150
|
+
return None
|
151
|
+
config_path = path / "lib" / "config.yml"
|
152
|
+
if not config_path.is_file():
|
153
|
+
return None
|
154
|
+
try:
|
155
|
+
return LSPConfig.load(config_path)
|
156
|
+
except ValueError as e:
|
157
|
+
logger.error(f"Could not load config: {path!s}", exc_info=e)
|
158
|
+
return None
|
159
|
+
|
160
|
+
@classmethod
|
161
|
+
def _store_product_state(cls, product_path: Path, version: str) -> None:
|
162
|
+
state_path = product_path / "state"
|
163
|
+
state_path.mkdir()
|
164
|
+
version_data = {"version": f"v{version}", "date": datetime.now(timezone.utc).isoformat()}
|
165
|
+
version_path = state_path / "version.json"
|
166
|
+
with version_path.open("w", encoding="utf-8") as f:
|
167
|
+
dump(version_data, f)
|
168
|
+
f.write("\n")
|
169
|
+
|
170
|
+
|
171
|
+
class WheelInstaller(TranspilerInstaller):
|
172
|
+
|
173
|
+
@classmethod
|
174
|
+
def get_latest_artifact_version_from_pypi(cls, product_name: str) -> str | None:
|
175
|
+
try:
|
176
|
+
with request.urlopen(f"https://pypi.org/pypi/{product_name}/json") as server:
|
177
|
+
text: bytes = server.read()
|
178
|
+
data: dict[str, Any] = loads(text)
|
179
|
+
return data.get("info", {}).get('version', None)
|
180
|
+
except HTTPError as e:
|
181
|
+
logger.error(f"Error while fetching PyPI metadata: {product_name}", exc_info=e)
|
182
|
+
return None
|
183
|
+
|
184
|
+
@classmethod
|
185
|
+
def download_artifact_from_pypi(cls, product_name: str, version: str, target: Path, extension="whl") -> int:
|
186
|
+
suffix = "-py3-none-any.whl" if extension == "whl" else ".tar.gz" if extension == "tar" else f".{extension}"
|
187
|
+
filename = f"{product_name.replace('-', '_')}-{version}{suffix}"
|
188
|
+
url = f"https://pypi.debian.net/{product_name}/{filename}"
|
189
|
+
try:
|
190
|
+
path, _ = request.urlretrieve(url)
|
191
|
+
logger.info(f"Successfully downloaded {path}")
|
192
|
+
if not target.exists():
|
193
|
+
logger.info(f"Moving {path} to {target!s}")
|
194
|
+
move(path, target)
|
195
|
+
return 0
|
196
|
+
except URLError as e:
|
197
|
+
logger.error("While downloading from pypi", exc_info=e)
|
198
|
+
return -1
|
199
|
+
|
200
|
+
def __init__(self, product_name: str, pypi_name: str, artifact: Path | None = None):
|
201
|
+
self._product_name = product_name
|
202
|
+
self._pypi_name = pypi_name
|
203
|
+
self._artifact = artifact
|
204
|
+
|
205
|
+
def install(self) -> Path | None:
|
206
|
+
return self._install_checking_versions()
|
207
|
+
|
208
|
+
def _install_checking_versions(self) -> Path | None:
|
209
|
+
latest_version = (
|
210
|
+
self.get_local_artifact_version(self._artifact)
|
211
|
+
if self._artifact
|
212
|
+
else self.get_latest_artifact_version_from_pypi(self._pypi_name)
|
213
|
+
)
|
214
|
+
if latest_version is None:
|
215
|
+
logger.warning(f"Could not determine the latest version of {self._pypi_name}")
|
216
|
+
logger.error(f"Failed to install transpiler: {self._product_name}")
|
217
|
+
return None
|
218
|
+
installed_version = self.get_installed_version(self._product_name)
|
219
|
+
if installed_version == latest_version:
|
220
|
+
logger.info(f"{self._pypi_name} v{latest_version} already installed")
|
221
|
+
return None
|
222
|
+
return self._install_latest_version(latest_version)
|
223
|
+
|
224
|
+
def _install_latest_version(self, version: str) -> Path | None:
|
225
|
+
logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
|
226
|
+
# use type(self) to workaround a mock bug on class methods
|
227
|
+
self._product_path = type(self).transpilers_path() / self._product_name
|
228
|
+
backup_path = Path(f"{self._product_path!s}-saved")
|
229
|
+
if self._product_path.exists():
|
230
|
+
os.rename(self._product_path, backup_path)
|
231
|
+
self._product_path.mkdir(parents=True, exist_ok=True)
|
232
|
+
self._install_path = self._product_path / "lib"
|
233
|
+
self._install_path.mkdir(exist_ok=True)
|
234
|
+
try:
|
235
|
+
result = self._unsafe_install_latest_version(version)
|
236
|
+
logger.info(f"Successfully installed {self._pypi_name} v{version}")
|
237
|
+
if backup_path.exists():
|
238
|
+
rmtree(backup_path)
|
239
|
+
return result
|
240
|
+
except (CalledProcessError, ValueError) as e:
|
241
|
+
logger.error(f"Failed to install {self._pypi_name} v{version}", exc_info=e)
|
242
|
+
rmtree(self._product_path)
|
243
|
+
if backup_path.exists():
|
244
|
+
os.rename(backup_path, self._product_path)
|
245
|
+
return None
|
246
|
+
|
247
|
+
def _unsafe_install_latest_version(self, version: str) -> Path | None:
|
248
|
+
self._create_venv()
|
249
|
+
self._install_with_pip()
|
250
|
+
self._copy_lsp_resources()
|
251
|
+
return self._post_install(version)
|
252
|
+
|
253
|
+
def _create_venv(self) -> None:
|
254
|
+
cwd = os.getcwd()
|
255
|
+
try:
|
256
|
+
os.chdir(self._install_path)
|
257
|
+
self._unsafe_create_venv()
|
258
|
+
finally:
|
259
|
+
os.chdir(cwd)
|
260
|
+
|
261
|
+
def _unsafe_create_venv(self) -> None:
|
262
|
+
# using the venv module doesn't work (maybe it's not possible to create a venv from a venv ?)
|
263
|
+
# so falling back to something that works
|
264
|
+
# for some reason this requires shell=True, so pass full cmd line
|
265
|
+
cmd_line = f"{sys.executable} -m venv .venv"
|
266
|
+
completed = run(cmd_line, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
|
267
|
+
if completed.returncode:
|
268
|
+
logger.error(f"Failed to create venv, error code: {completed.returncode}")
|
269
|
+
if completed.stdout:
|
270
|
+
for line in completed.stdout:
|
271
|
+
logger.error(line)
|
272
|
+
if completed.stderr:
|
273
|
+
for line in completed.stderr:
|
274
|
+
logger.error(line)
|
275
|
+
completed.check_returncode()
|
276
|
+
self._venv = self._install_path / ".venv"
|
277
|
+
self._site_packages = self._locate_site_packages()
|
278
|
+
|
279
|
+
def _locate_site_packages(self) -> Path:
|
280
|
+
# can't use sysconfig because it only works for currently running python
|
281
|
+
if sys.platform == "win32":
|
282
|
+
return self._locate_site_packages_windows()
|
283
|
+
return self._locate_site_packages_linux_or_macos()
|
284
|
+
|
285
|
+
def _locate_site_packages_windows(self) -> Path:
|
286
|
+
packages = self._venv / "Lib" / "site-packages"
|
287
|
+
if packages.exists():
|
288
|
+
return packages
|
289
|
+
raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
|
290
|
+
|
291
|
+
def _locate_site_packages_linux_or_macos(self) -> Path:
|
292
|
+
lib = self._venv / "lib"
|
293
|
+
for dir_ in os.listdir(lib):
|
294
|
+
if dir_.startswith("python"):
|
295
|
+
packages = lib / dir_ / "site-packages"
|
296
|
+
if packages.exists():
|
297
|
+
return packages
|
298
|
+
raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
|
299
|
+
|
300
|
+
def _install_with_pip(self) -> None:
|
301
|
+
cwd = os.getcwd()
|
302
|
+
try:
|
303
|
+
os.chdir(self._install_path)
|
304
|
+
# the way to call pip from python is highly sensitive to os and source type
|
305
|
+
if self._artifact:
|
306
|
+
self._install_local_artifact()
|
307
|
+
else:
|
308
|
+
self._install_remote_artifact()
|
309
|
+
finally:
|
310
|
+
os.chdir(cwd)
|
311
|
+
|
312
|
+
def _install_local_artifact(self) -> None:
|
313
|
+
pip = self._locate_pip()
|
314
|
+
pip = pip.relative_to(self._install_path)
|
315
|
+
target = self._site_packages
|
316
|
+
target = target.relative_to(self._install_path)
|
317
|
+
if sys.platform == "win32":
|
318
|
+
command = f"{pip!s} install {self._artifact!s} -t {target!s}"
|
319
|
+
completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
|
320
|
+
else:
|
321
|
+
command = f"'{pip!s}' install '{self._artifact!s}' -t '{target!s}'"
|
322
|
+
completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
|
323
|
+
# checking return code later makes debugging easier
|
324
|
+
completed.check_returncode()
|
325
|
+
|
326
|
+
def _install_remote_artifact(self) -> None:
|
327
|
+
pip = self._locate_pip()
|
328
|
+
pip = pip.relative_to(self._install_path)
|
329
|
+
target = self._site_packages
|
330
|
+
target = target.relative_to(self._install_path)
|
331
|
+
if sys.platform == "win32":
|
332
|
+
args = [str(pip), "install", self._pypi_name, "-t", str(target)]
|
333
|
+
completed = run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
|
334
|
+
else:
|
335
|
+
command = f"'{pip!s}' install {self._pypi_name} -t '{target!s}'"
|
336
|
+
completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
|
337
|
+
# checking return code later makes debugging easier
|
338
|
+
completed.check_returncode()
|
339
|
+
|
340
|
+
def _locate_pip(self) -> Path:
|
341
|
+
return self._venv / "Scripts" / "pip3.exe" if sys.platform == "win32" else self._venv / "bin" / "pip3"
|
342
|
+
|
343
|
+
def _copy_lsp_resources(self):
|
344
|
+
lsp = self._site_packages / "lsp"
|
345
|
+
if not lsp.exists():
|
346
|
+
raise ValueError("Installed transpiler is missing a 'lsp' folder")
|
347
|
+
shutil.copytree(lsp, self._install_path, dirs_exist_ok=True)
|
348
|
+
|
349
|
+
def _post_install(self, version: str) -> Path | None:
|
350
|
+
config = self._install_path / "config.yml"
|
351
|
+
if not config.exists():
|
352
|
+
raise ValueError("Installed transpiler is missing a 'config.yml' file in its 'lsp' folder")
|
353
|
+
install_ext = "ps1" if sys.platform == "win32" else "sh"
|
354
|
+
install_script = f"installer.{install_ext}"
|
355
|
+
installer = self._install_path / install_script
|
356
|
+
if installer.exists():
|
357
|
+
self._run_custom_installer(installer)
|
358
|
+
self._store_product_state(product_path=self._product_path, version=version)
|
359
|
+
return self._install_path
|
360
|
+
|
361
|
+
def _run_custom_installer(self, installer):
|
362
|
+
args = [str(installer)]
|
363
|
+
run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, cwd=str(self._install_path), check=True)
|
364
|
+
|
365
|
+
|
366
|
+
class MavenInstaller(TranspilerInstaller):
|
367
|
+
# Maven Central, base URL.
|
368
|
+
_maven_central_repo: str = "https://repo.maven.apache.org/maven2/"
|
369
|
+
|
370
|
+
@classmethod
|
371
|
+
def _artifact_base_url(cls, group_id: str, artifact_id: str) -> str:
|
372
|
+
"""Construct the base URL for a Maven artifact."""
|
373
|
+
# Reference: https://maven.apache.org/repositories/layout.html
|
374
|
+
group_path = group_id.replace(".", "/")
|
375
|
+
return f"{cls._maven_central_repo}{group_path}/{artifact_id}/"
|
376
|
+
|
377
|
+
@classmethod
|
378
|
+
def artifact_metadata_url(cls, group_id: str, artifact_id: str) -> str:
|
379
|
+
"""Get the metadata URL for a Maven artifact."""
|
380
|
+
# TODO: Unit test this method.
|
381
|
+
return f"{cls._artifact_base_url(group_id, artifact_id)}maven-metadata.xml"
|
382
|
+
|
383
|
+
@classmethod
|
384
|
+
def artifact_url(
|
385
|
+
cls, group_id: str, artifact_id: str, version: str, classifier: str | None = None, extension: str = "jar"
|
386
|
+
) -> str:
|
387
|
+
"""Get the URL for a versioned Maven artifact."""
|
388
|
+
# TODO: Unit test this method, including classifier and extension.
|
389
|
+
_classifier = f"-{classifier}" if classifier else ""
|
390
|
+
artifact_base_url = cls._artifact_base_url(group_id, artifact_id)
|
391
|
+
return f"{artifact_base_url}{version}/{artifact_id}-{version}{_classifier}.{extension}"
|
392
|
+
|
393
|
+
@classmethod
|
394
|
+
def get_current_maven_artifact_version(cls, group_id: str, artifact_id: str) -> str | None:
|
395
|
+
url = cls.artifact_metadata_url(group_id, artifact_id)
|
396
|
+
try:
|
397
|
+
with request.urlopen(url) as server:
|
398
|
+
text = server.read()
|
399
|
+
except HTTPError as e:
|
400
|
+
logger.error(f"Error while fetching maven metadata: {group_id}:{artifact_id}", exc_info=e)
|
401
|
+
return None
|
402
|
+
logger.debug(f"Maven metadata for {group_id}:{artifact_id}: {text}")
|
403
|
+
return cls._extract_latest_release_version(text)
|
404
|
+
|
405
|
+
@classmethod
|
406
|
+
def _extract_latest_release_version(cls, maven_metadata: str) -> str | None:
|
407
|
+
"""Extract the latest release version from Maven metadata."""
|
408
|
+
# Reference: https://maven.apache.org/repositories/metadata.html#The_A_Level_Metadata
|
409
|
+
# TODO: Unit test this method, to verify the sequence of things it checks for.
|
410
|
+
root = ET.fromstring(maven_metadata)
|
411
|
+
for label in ("release", "latest"):
|
412
|
+
version = root.findtext(f"./versioning/{label}")
|
413
|
+
if version is not None:
|
414
|
+
return version
|
415
|
+
return root.findtext("./versioning/versions/version[last()]")
|
416
|
+
|
417
|
+
@classmethod
|
418
|
+
def download_artifact_from_maven(
|
419
|
+
cls,
|
420
|
+
group_id: str,
|
421
|
+
artifact_id: str,
|
422
|
+
version: str,
|
423
|
+
target: Path,
|
424
|
+
classifier: str | None = None,
|
425
|
+
extension: str = "jar",
|
426
|
+
) -> bool:
|
427
|
+
if target.exists():
|
428
|
+
logger.warning(f"Skipping download of {group_id}:{artifact_id}:{version}; target already exists: {target}")
|
429
|
+
return True
|
430
|
+
url = cls.artifact_url(group_id, artifact_id, version, classifier, extension)
|
431
|
+
try:
|
432
|
+
path, _ = request.urlretrieve(url)
|
433
|
+
logger.debug(f"Downloaded maven artefact from {url} to {path}")
|
434
|
+
except URLError as e:
|
435
|
+
logger.error(f"Unable to download maven artefact: {group_id}:{artifact_id}:{version}", exc_info=e)
|
436
|
+
return False
|
437
|
+
logger.debug(f"Moving {path} to {target}")
|
438
|
+
move(path, target)
|
439
|
+
logger.info(f"Successfully installed: {group_id}:{artifact_id}:{version}")
|
440
|
+
return True
|
441
|
+
|
442
|
+
def __init__(self, product_name: str, group_id: str, artifact_id: str, artifact: Path | None = None):
|
443
|
+
self._product_name = product_name
|
444
|
+
self._group_id = group_id
|
445
|
+
self._artifact_id = artifact_id
|
446
|
+
self._artifact = artifact
|
447
|
+
|
448
|
+
def install(self) -> Path | None:
|
449
|
+
return self._install_checking_versions()
|
450
|
+
|
451
|
+
def _install_checking_versions(self) -> Path | None:
|
452
|
+
if self._artifact:
|
453
|
+
latest_version = self.get_local_artifact_version(self._artifact)
|
454
|
+
else:
|
455
|
+
latest_version = self.get_current_maven_artifact_version(self._group_id, self._artifact_id)
|
456
|
+
if latest_version is None:
|
457
|
+
logger.warning(f"Could not determine the latest version of Databricks {self._product_name} transpiler")
|
458
|
+
logger.error("Failed to install transpiler: Databricks {self._product_name} transpiler")
|
459
|
+
return None
|
460
|
+
installed_version = self.get_installed_version(self._product_name)
|
461
|
+
if installed_version == latest_version:
|
462
|
+
logger.info(f"Databricks {self._product_name} transpiler v{latest_version} already installed")
|
463
|
+
return None
|
464
|
+
return self._install_version(latest_version)
|
465
|
+
|
466
|
+
def _install_version(self, version: str) -> Path | None:
|
467
|
+
logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
|
468
|
+
# use type(self) to workaround a mock bug on class methods
|
469
|
+
self._product_path = type(self).transpilers_path() / self._product_name
|
470
|
+
backup_path = Path(f"{self._product_path!s}-saved")
|
471
|
+
if backup_path.exists():
|
472
|
+
rmtree(backup_path)
|
473
|
+
if self._product_path.exists():
|
474
|
+
os.rename(self._product_path, backup_path)
|
475
|
+
self._product_path.mkdir(parents=True)
|
476
|
+
self._install_path = self._product_path / "lib"
|
477
|
+
self._install_path.mkdir()
|
478
|
+
try:
|
479
|
+
if self._unsafe_install_version(version):
|
480
|
+
logger.info(f"Successfully installed {self._product_name} v{version}")
|
481
|
+
self._store_product_state(self._product_path, version)
|
482
|
+
if backup_path.exists():
|
483
|
+
rmtree(backup_path)
|
484
|
+
return self._product_path
|
485
|
+
except (KeyError, ValueError) as e:
|
486
|
+
logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}", exc_info=e)
|
487
|
+
rmtree(self._product_path)
|
488
|
+
if backup_path.exists():
|
489
|
+
os.rename(backup_path, self._product_path)
|
490
|
+
return None
|
491
|
+
|
492
|
+
def _unsafe_install_version(self, version: str) -> bool:
|
493
|
+
jar_file_path = self._install_path / f"{self._artifact_id}.jar"
|
494
|
+
if self._artifact:
|
495
|
+
logger.debug(f"Copying '{self._artifact!s}' to '{jar_file_path!s}'")
|
496
|
+
shutil.copyfile(self._artifact, jar_file_path)
|
497
|
+
elif not self.download_artifact_from_maven(self._group_id, self._artifact_id, version, jar_file_path):
|
498
|
+
logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}")
|
499
|
+
return False
|
500
|
+
self._copy_lsp_config(jar_file_path)
|
501
|
+
return True
|
502
|
+
|
503
|
+
def _copy_lsp_config(self, jar_file_path: Path) -> None:
|
504
|
+
with ZipFile(jar_file_path) as zip_file:
|
505
|
+
zip_file.extract("lsp/config.yml", self._install_path)
|
506
|
+
shutil.move(self._install_path / "lsp" / "config.yml", self._install_path / "config.yml")
|
507
|
+
os.rmdir(self._install_path / "lsp")
|
508
|
+
|
509
|
+
|
510
|
+
class WorkspaceInstaller:
|
511
|
+
def __init__(
|
512
|
+
self,
|
513
|
+
ws: WorkspaceClient,
|
514
|
+
prompts: Prompts,
|
515
|
+
installation: Installation,
|
516
|
+
install_state: InstallState,
|
517
|
+
product_info: ProductInfo,
|
518
|
+
resource_configurator: ResourceConfigurator,
|
519
|
+
workspace_installation: WorkspaceInstallation,
|
520
|
+
environ: dict[str, str] | None = None,
|
521
|
+
):
|
522
|
+
self._ws = ws
|
523
|
+
self._prompts = prompts
|
524
|
+
self._installation = installation
|
525
|
+
self._install_state = install_state
|
526
|
+
self._product_info = product_info
|
527
|
+
self._resource_configurator = resource_configurator
|
528
|
+
self._ws_installation = workspace_installation
|
529
|
+
|
530
|
+
if not environ:
|
531
|
+
environ = dict(os.environ.items())
|
532
|
+
|
533
|
+
if "DATABRICKS_RUNTIME_VERSION" in environ:
|
534
|
+
msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime"
|
535
|
+
raise SystemExit(msg)
|
536
|
+
|
537
|
+
def run(self, module: str, config: RemorphConfigs | None = None, artifact: str | None = None) -> RemorphConfigs:
|
538
|
+
logger.debug(f"Initializing workspace installation for module: {module} (config: {config})")
|
539
|
+
if module == "transpile" and artifact:
|
540
|
+
self.install_artifact(artifact)
|
541
|
+
elif module in {"transpile", "all"}:
|
542
|
+
self.install_bladebridge()
|
543
|
+
self.install_morpheus()
|
544
|
+
if not config:
|
545
|
+
config = self.configure(module)
|
546
|
+
if self._is_testing():
|
547
|
+
return config
|
548
|
+
self._ws_installation.install(config)
|
549
|
+
logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
|
550
|
+
return config
|
551
|
+
|
552
|
+
@classmethod
|
553
|
+
def install_bladebridge(cls, artifact: Path | None = None):
|
554
|
+
local_name = "bladebridge"
|
555
|
+
pypi_name = "databricks-bb-plugin"
|
556
|
+
TranspilerInstaller.install_from_pypi(local_name, pypi_name, artifact)
|
557
|
+
|
558
|
+
@classmethod
|
559
|
+
def install_morpheus(cls, artifact: Path | None = None):
|
560
|
+
java_version = cls.get_java_version()
|
561
|
+
if java_version is None or java_version < 110:
|
562
|
+
logger.warning(
|
563
|
+
"This software requires Java 11 or above. Please install Java and re-run 'install-transpile'."
|
564
|
+
)
|
565
|
+
return
|
566
|
+
product_name = "databricks-morph-plugin"
|
567
|
+
group_id = "com.databricks.labs"
|
568
|
+
artifact_id = product_name
|
569
|
+
TranspilerInstaller.install_from_maven(product_name, group_id, artifact_id, artifact)
|
570
|
+
|
571
|
+
@classmethod
|
572
|
+
def install_artifact(cls, artifact: str):
|
573
|
+
path = Path(artifact)
|
574
|
+
if not path.exists():
|
575
|
+
logger.error(f"Could not locate artifact {artifact}")
|
576
|
+
return
|
577
|
+
if "databricks-morph-plugin" in path.name:
|
578
|
+
cls.install_morpheus(path)
|
579
|
+
elif "databricks_bb_plugin" in path.name:
|
580
|
+
cls.install_bladebridge(path)
|
581
|
+
else:
|
582
|
+
logger.fatal(f"Cannot install unsupported artifact: {artifact}")
|
583
|
+
|
584
|
+
@classmethod
|
585
|
+
def get_java_version(cls) -> int | None:
|
586
|
+
completed = run(["java", "-version"], shell=False, capture_output=True, check=False)
|
587
|
+
try:
|
588
|
+
completed.check_returncode()
|
589
|
+
except CalledProcessError:
|
590
|
+
return None
|
591
|
+
result = completed.stderr.decode("utf-8")
|
592
|
+
start = result.find(" version ")
|
593
|
+
if start < 0:
|
594
|
+
return None
|
595
|
+
start = result.find('"', start + 1)
|
596
|
+
if start < 0:
|
597
|
+
return None
|
598
|
+
end = result.find('"', start + 1)
|
599
|
+
if end < 0:
|
600
|
+
return None
|
601
|
+
version = result[start + 1 : end]
|
602
|
+
parts = version.split('.')
|
603
|
+
return int(parts[0] + parts[1])
|
604
|
+
|
605
|
+
def configure(self, module: str) -> RemorphConfigs:
|
606
|
+
match module:
|
607
|
+
case "transpile":
|
608
|
+
logger.info("Configuring lakebridge `transpile`.")
|
609
|
+
return RemorphConfigs(self._configure_transpile(), None)
|
610
|
+
case "reconcile":
|
611
|
+
logger.info("Configuring lakebridge `reconcile`.")
|
612
|
+
return RemorphConfigs(None, self._configure_reconcile())
|
613
|
+
case "all":
|
614
|
+
logger.info("Configuring lakebridge `transpile` and `reconcile`.")
|
615
|
+
return RemorphConfigs(
|
616
|
+
self._configure_transpile(),
|
617
|
+
self._configure_reconcile(),
|
618
|
+
)
|
619
|
+
case _:
|
620
|
+
raise ValueError(f"Invalid input: {module}")
|
621
|
+
|
622
|
+
def _is_testing(self):
|
623
|
+
return self._product_info.product_name() != "lakebridge"
|
624
|
+
|
625
|
+
def _configure_transpile(self) -> TranspileConfig:
|
626
|
+
try:
|
627
|
+
self._installation.load(TranspileConfig)
|
628
|
+
logger.info("Lakebridge `transpile` is already installed on this workspace.")
|
629
|
+
if not self._prompts.confirm("Do you want to override the existing installation?"):
|
630
|
+
raise SystemExit("Lak `transpile` is already installed and no override has been requested. Exiting...")
|
631
|
+
except NotFound:
|
632
|
+
logger.info("Couldn't find existing `transpile` installation")
|
633
|
+
except (PermissionDenied, SerdeError, ValueError, AttributeError):
|
634
|
+
install_dir = self._installation.install_folder()
|
635
|
+
logger.warning(
|
636
|
+
f"Existing `transpile` installation at {install_dir} is corrupted. Continuing new installation..."
|
637
|
+
)
|
638
|
+
|
639
|
+
config = self._configure_new_transpile_installation()
|
640
|
+
logger.info("Finished configuring lakebridge `transpile`.")
|
641
|
+
return config
|
642
|
+
|
643
|
+
def _configure_new_transpile_installation(self) -> TranspileConfig:
|
644
|
+
default_config = self._prompt_for_new_transpile_installation()
|
645
|
+
runtime_config = None
|
646
|
+
catalog_name = "remorph"
|
647
|
+
schema_name = "transpiler"
|
648
|
+
if not default_config.skip_validation:
|
649
|
+
catalog_name = self._configure_catalog()
|
650
|
+
schema_name = self._configure_schema(catalog_name, "transpile")
|
651
|
+
self._has_necessary_access(catalog_name, schema_name)
|
652
|
+
warehouse_id = self._resource_configurator.prompt_for_warehouse_setup(TRANSPILER_WAREHOUSE_PREFIX)
|
653
|
+
runtime_config = {"warehouse_id": warehouse_id}
|
654
|
+
|
655
|
+
config = dataclasses.replace(
|
656
|
+
default_config,
|
657
|
+
catalog_name=catalog_name,
|
658
|
+
schema_name=schema_name,
|
659
|
+
sdk_config=runtime_config,
|
660
|
+
)
|
661
|
+
self._save_config(config)
|
662
|
+
return config
|
663
|
+
|
664
|
+
def _all_installed_dialects(self) -> list[str]:
|
665
|
+
return sorted(TranspilerInstaller.all_dialects())
|
666
|
+
|
667
|
+
def _transpilers_with_dialect(self, dialect: str) -> list[str]:
|
668
|
+
return sorted(TranspilerInstaller.transpilers_with_dialect(dialect))
|
669
|
+
|
670
|
+
def _transpiler_config_path(self, transpiler: str) -> Path:
|
671
|
+
return TranspilerInstaller.transpiler_config_path(transpiler)
|
672
|
+
|
673
|
+
def _prompt_for_new_transpile_installation(self) -> TranspileConfig:
|
674
|
+
install_later = "Set it later"
|
675
|
+
# TODO tidy this up, logger might not display the below in console...
|
676
|
+
logger.info("Please answer a few questions to configure lakebridge `transpile`")
|
677
|
+
all_dialects = [install_later] + self._all_installed_dialects()
|
678
|
+
source_dialect: str | None = self._prompts.choice("Select the source dialect:", all_dialects, sort=False)
|
679
|
+
if source_dialect == install_later:
|
680
|
+
source_dialect = None
|
681
|
+
transpiler_name: str | None = None
|
682
|
+
transpiler_config_path: Path | None = None
|
683
|
+
if source_dialect:
|
684
|
+
transpilers = self._transpilers_with_dialect(source_dialect)
|
685
|
+
if len(transpilers) > 1:
|
686
|
+
transpilers = [install_later] + transpilers
|
687
|
+
transpiler_name = self._prompts.choice("Select the transpiler:", transpilers, sort=False)
|
688
|
+
if transpiler_name == install_later:
|
689
|
+
transpiler_name = None
|
690
|
+
else:
|
691
|
+
transpiler_name = next(t for t in transpilers)
|
692
|
+
# TODO Change name for bladebridge
|
693
|
+
logger.info(f"lakebridge will use the {transpiler_name} transpiler")
|
694
|
+
if transpiler_name:
|
695
|
+
transpiler_config_path = self._transpiler_config_path(transpiler_name)
|
696
|
+
transpiler_options: dict[str, JsonValue] | None = None
|
697
|
+
if transpiler_config_path:
|
698
|
+
transpiler_options = self._prompt_for_transpiler_options(
|
699
|
+
cast(str, transpiler_name), cast(str, source_dialect)
|
700
|
+
)
|
701
|
+
input_source: str | None = self._prompts.question(
|
702
|
+
"Enter input SQL path (directory/file)", default=install_later
|
703
|
+
)
|
704
|
+
if input_source == install_later:
|
705
|
+
input_source = None
|
706
|
+
output_folder = self._prompts.question("Enter output directory", default="transpiled")
|
707
|
+
# When defaults are passed along we need to use absolute paths to avoid issues with relative paths
|
708
|
+
if output_folder == "transpiled":
|
709
|
+
output_folder = str(Path.cwd() / "transpiled")
|
710
|
+
error_file_path = self._prompts.question("Enter error file path", default="errors.log")
|
711
|
+
if error_file_path == "errors.log":
|
712
|
+
error_file_path = str(Path.cwd() / "errors.log")
|
713
|
+
|
714
|
+
run_validation = self._prompts.confirm(
|
715
|
+
"Would you like to validate the syntax and semantics of the transpiled queries?"
|
716
|
+
)
|
717
|
+
|
718
|
+
return TranspileConfig(
|
719
|
+
transpiler_config_path=str(transpiler_config_path) if transpiler_config_path is not None else None,
|
720
|
+
transpiler_options=transpiler_options,
|
721
|
+
source_dialect=source_dialect,
|
722
|
+
skip_validation=(not run_validation),
|
723
|
+
input_source=input_source,
|
724
|
+
output_folder=output_folder,
|
725
|
+
error_file_path=error_file_path,
|
726
|
+
)
|
727
|
+
|
728
|
+
def _prompt_for_transpiler_options(self, transpiler_name: str, source_dialect: str) -> dict[str, Any] | None:
|
729
|
+
config_options = TranspilerInstaller.transpiler_config_options(transpiler_name, source_dialect)
|
730
|
+
if len(config_options) == 0:
|
731
|
+
return None
|
732
|
+
return {option.flag: option.prompt_for_value(self._prompts) for option in config_options}
|
733
|
+
|
734
|
+
def _configure_catalog(
|
735
|
+
self,
|
736
|
+
) -> str:
|
737
|
+
return self._resource_configurator.prompt_for_catalog_setup()
|
738
|
+
|
739
|
+
def _configure_schema(
|
740
|
+
self,
|
741
|
+
catalog: str,
|
742
|
+
default_schema_name: str,
|
743
|
+
) -> str:
|
744
|
+
return self._resource_configurator.prompt_for_schema_setup(
|
745
|
+
catalog,
|
746
|
+
default_schema_name,
|
747
|
+
)
|
748
|
+
|
749
|
+
def _configure_reconcile(self) -> ReconcileConfig:
|
750
|
+
try:
|
751
|
+
self._installation.load(ReconcileConfig)
|
752
|
+
logger.info("lakebridge `reconcile` is already installed on this workspace.")
|
753
|
+
if not self._prompts.confirm("Do you want to override the existing installation?"):
|
754
|
+
raise SystemExit(
|
755
|
+
"lakebridge `reconcile` is already installed and no override has been requested. Exiting..."
|
756
|
+
)
|
757
|
+
except NotFound:
|
758
|
+
logger.info("Couldn't find existing `reconcile` installation")
|
759
|
+
except (PermissionDenied, SerdeError, ValueError, AttributeError):
|
760
|
+
install_dir = self._installation.install_folder()
|
761
|
+
logger.warning(
|
762
|
+
f"Existing `reconcile` installation at {install_dir} is corrupted. Continuing new installation..."
|
763
|
+
)
|
764
|
+
|
765
|
+
config = self._configure_new_reconcile_installation()
|
766
|
+
logger.info("Finished configuring lakebridge `reconcile`.")
|
767
|
+
return config
|
768
|
+
|
769
|
+
def _configure_new_reconcile_installation(self) -> ReconcileConfig:
|
770
|
+
default_config = self._prompt_for_new_reconcile_installation()
|
771
|
+
self._save_config(default_config)
|
772
|
+
return default_config
|
773
|
+
|
774
|
+
def _prompt_for_new_reconcile_installation(self) -> ReconcileConfig:
|
775
|
+
logger.info("Please answer a few questions to configure lakebridge `reconcile`")
|
776
|
+
data_source = self._prompts.choice(
|
777
|
+
"Select the Data Source:", [source_type.value for source_type in ReconSourceType]
|
778
|
+
)
|
779
|
+
report_type = self._prompts.choice(
|
780
|
+
"Select the report type:", [report_type.value for report_type in ReconReportType]
|
781
|
+
)
|
782
|
+
scope_name = self._prompts.question(
|
783
|
+
f"Enter Secret scope name to store `{data_source.capitalize()}` connection details / secrets",
|
784
|
+
default=f"remorph_{data_source}",
|
785
|
+
)
|
786
|
+
|
787
|
+
db_config = self._prompt_for_reconcile_database_config(data_source)
|
788
|
+
metadata_config = self._prompt_for_reconcile_metadata_config()
|
789
|
+
|
790
|
+
return ReconcileConfig(
|
791
|
+
data_source=data_source,
|
792
|
+
report_type=report_type,
|
793
|
+
secret_scope=scope_name,
|
794
|
+
database_config=db_config,
|
795
|
+
metadata_config=metadata_config,
|
796
|
+
)
|
797
|
+
|
798
|
+
def _prompt_for_reconcile_database_config(self, source) -> DatabaseConfig:
|
799
|
+
source_catalog = None
|
800
|
+
if source == ReconSourceType.SNOWFLAKE.value:
|
801
|
+
source_catalog = self._prompts.question(f"Enter source catalog name for `{source.capitalize()}`")
|
802
|
+
|
803
|
+
schema_prompt = f"Enter source schema name for `{source.capitalize()}`"
|
804
|
+
if source in {ReconSourceType.ORACLE.value}:
|
805
|
+
schema_prompt = f"Enter source database name for `{source.capitalize()}`"
|
806
|
+
|
807
|
+
source_schema = self._prompts.question(schema_prompt)
|
808
|
+
target_catalog = self._prompts.question("Enter target catalog name for Databricks")
|
809
|
+
target_schema = self._prompts.question("Enter target schema name for Databricks")
|
810
|
+
|
811
|
+
return DatabaseConfig(
|
812
|
+
source_schema=source_schema,
|
813
|
+
target_catalog=target_catalog,
|
814
|
+
target_schema=target_schema,
|
815
|
+
source_catalog=source_catalog,
|
816
|
+
)
|
817
|
+
|
818
|
+
def _prompt_for_reconcile_metadata_config(self) -> ReconcileMetadataConfig:
|
819
|
+
logger.info("Configuring reconcile metadata.")
|
820
|
+
catalog = self._configure_catalog()
|
821
|
+
schema = self._configure_schema(
|
822
|
+
catalog,
|
823
|
+
"reconcile",
|
824
|
+
)
|
825
|
+
volume = self._configure_volume(catalog, schema, "reconcile_volume")
|
826
|
+
self._has_necessary_access(catalog, schema, volume)
|
827
|
+
return ReconcileMetadataConfig(catalog=catalog, schema=schema, volume=volume)
|
828
|
+
|
829
|
+
def _configure_volume(
|
830
|
+
self,
|
831
|
+
catalog: str,
|
832
|
+
schema: str,
|
833
|
+
default_volume_name: str,
|
834
|
+
) -> str:
|
835
|
+
return self._resource_configurator.prompt_for_volume_setup(
|
836
|
+
catalog,
|
837
|
+
schema,
|
838
|
+
default_volume_name,
|
839
|
+
)
|
840
|
+
|
841
|
+
def _save_config(self, config: TranspileConfig | ReconcileConfig):
|
842
|
+
logger.info(f"Saving configuration file {config.__file__}")
|
843
|
+
self._installation.save(config)
|
844
|
+
ws_file_url = self._installation.workspace_link(config.__file__)
|
845
|
+
if self._prompts.confirm(f"Open config file {ws_file_url} in the browser?"):
|
846
|
+
webbrowser.open(ws_file_url)
|
847
|
+
|
848
|
+
def _has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None = None):
|
849
|
+
self._resource_configurator.has_necessary_access(catalog_name, schema_name, volume_name)
|