databricks-labs-lakebridge 0.10.5__py3-none-any.whl → 0.10.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/labs/lakebridge/__about__.py +1 -1
- databricks/labs/lakebridge/analyzer/__init__.py +0 -0
- databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +95 -0
- databricks/labs/lakebridge/base_install.py +24 -3
- databricks/labs/lakebridge/cli.py +57 -72
- databricks/labs/lakebridge/config.py +1 -1
- databricks/labs/lakebridge/contexts/application.py +11 -4
- databricks/labs/lakebridge/deployment/dashboard.py +2 -1
- databricks/labs/lakebridge/deployment/installation.py +11 -11
- databricks/labs/lakebridge/deployment/job.py +2 -2
- databricks/labs/lakebridge/helpers/file_utils.py +36 -0
- databricks/labs/lakebridge/install.py +228 -278
- databricks/labs/lakebridge/reconcile/compare.py +70 -33
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +19 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +11 -1
- databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +126 -0
- databricks/labs/lakebridge/reconcile/connectors/models.py +7 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +11 -1
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +14 -2
- databricks/labs/lakebridge/reconcile/connectors/tsql.py +27 -2
- databricks/labs/lakebridge/reconcile/constants.py +4 -3
- databricks/labs/lakebridge/reconcile/execute.py +9 -810
- databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +133 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +3 -7
- databricks/labs/lakebridge/reconcile/recon_config.py +3 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +2 -1
- databricks/labs/lakebridge/reconcile/reconciliation.py +508 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +26 -19
- databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +98 -0
- databricks/labs/lakebridge/reconcile/trigger_recon_service.py +253 -0
- databricks/labs/lakebridge/reconcile/utils.py +38 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +48 -63
- databricks/labs/lakebridge/transpiler/repository.py +123 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +2 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -18
- {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/METADATA +1 -1
- {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/RECORD +41 -31
- {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/WHEEL +0 -0
- {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/entry_points.txt +0 -0
- {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/licenses/LICENSE +0 -0
- {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/licenses/NOTICE +0 -0
@@ -1,85 +1,113 @@
|
|
1
1
|
import re
|
2
2
|
import abc
|
3
3
|
import dataclasses
|
4
|
-
import shutil
|
5
|
-
from collections.abc import Iterable
|
6
|
-
from json import loads, dump
|
7
4
|
import logging
|
8
5
|
import os
|
9
|
-
|
10
|
-
from subprocess import run, CalledProcessError
|
6
|
+
import shutil
|
11
7
|
import sys
|
12
|
-
|
13
|
-
from urllib import request
|
14
|
-
from urllib.error import URLError, HTTPError
|
8
|
+
import venv
|
15
9
|
import webbrowser
|
10
|
+
import xml.etree.ElementTree as ET
|
16
11
|
from datetime import datetime, timezone
|
12
|
+
from json import loads, dump
|
17
13
|
from pathlib import Path
|
18
|
-
import
|
14
|
+
from shutil import rmtree, move
|
15
|
+
from subprocess import run, CalledProcessError
|
16
|
+
from typing import Any, Literal, cast
|
17
|
+
from urllib import request
|
18
|
+
from urllib.error import URLError, HTTPError
|
19
19
|
from zipfile import ZipFile
|
20
20
|
|
21
|
-
from databricks.labs.blueprint.installation import Installation, JsonValue
|
22
|
-
from databricks.labs.blueprint.installation import SerdeError
|
21
|
+
from databricks.labs.blueprint.installation import Installation, JsonValue, SerdeError
|
23
22
|
from databricks.labs.blueprint.installer import InstallState
|
24
23
|
from databricks.labs.blueprint.tui import Prompts
|
25
24
|
from databricks.labs.blueprint.wheels import ProductInfo
|
26
25
|
from databricks.sdk import WorkspaceClient
|
27
26
|
from databricks.sdk.errors import NotFound, PermissionDenied
|
28
27
|
|
28
|
+
from databricks.labs.lakebridge.__about__ import __version__
|
29
29
|
from databricks.labs.lakebridge.config import (
|
30
|
-
TranspileConfig,
|
31
|
-
ReconcileConfig,
|
32
30
|
DatabaseConfig,
|
33
|
-
|
31
|
+
ReconcileConfig,
|
32
|
+
LakebridgeConfiguration,
|
34
33
|
ReconcileMetadataConfig,
|
35
|
-
|
34
|
+
TranspileConfig,
|
36
35
|
)
|
37
|
-
|
36
|
+
from databricks.labs.lakebridge.contexts.application import ApplicationContext
|
38
37
|
from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
|
39
38
|
from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
|
40
|
-
from databricks.labs.lakebridge.helpers.file_utils import chdir
|
41
39
|
from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
|
42
|
-
from databricks.labs.lakebridge.transpiler.
|
40
|
+
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
|
43
41
|
|
44
42
|
logger = logging.getLogger(__name__)
|
45
43
|
|
46
44
|
TRANSPILER_WAREHOUSE_PREFIX = "Lakebridge Transpiler Validation"
|
47
45
|
|
48
46
|
|
49
|
-
class
|
47
|
+
class _PathBackup:
|
48
|
+
"""A context manager to preserve a path before performing an operation, and optionally restore it afterwards."""
|
50
49
|
|
51
|
-
|
52
|
-
|
53
|
-
|
50
|
+
def __init__(self, path: Path) -> None:
|
51
|
+
self._path = path
|
52
|
+
self._backup_path: Path | None = None
|
53
|
+
self._finished = False
|
54
54
|
|
55
|
-
|
56
|
-
|
57
|
-
return
|
55
|
+
def __enter__(self) -> "_PathBackup":
|
56
|
+
self.start()
|
57
|
+
return self
|
58
58
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
59
|
+
def start(self) -> None:
|
60
|
+
"""Start the backup process by creating a backup of the path, if it already exists."""
|
61
|
+
backup_path = self._path.with_name(f"{self._path.name}-saved")
|
62
|
+
if backup_path.exists():
|
63
|
+
logger.debug(f"Existing backup found, removing: {backup_path}")
|
64
|
+
rmtree(backup_path)
|
65
|
+
if self._path.exists():
|
66
|
+
logger.debug(f"Backing up existing path: {self._path} -> {backup_path}")
|
67
|
+
os.rename(self._path, backup_path)
|
68
|
+
self._backup_path = backup_path
|
69
|
+
else:
|
70
|
+
self._backup_path = None
|
71
|
+
|
72
|
+
def rollback(self) -> None:
|
73
|
+
"""Rollback the operation by restoring the backup path, if it exists."""
|
74
|
+
assert not self._finished, "Can only rollback/commit once."
|
75
|
+
logger.debug(f"Removing path: {self._path}")
|
76
|
+
rmtree(self._path)
|
77
|
+
if self._backup_path is not None:
|
78
|
+
logger.debug(f"Restoring previous path: {self._backup_path} -> {self._path}")
|
79
|
+
os.rename(self._backup_path, self._path)
|
80
|
+
self._backup_path = None
|
81
|
+
self._finished = True
|
82
|
+
|
83
|
+
def commit(self) -> None:
|
84
|
+
"""Commit the operation by removing the backup path, if it exists."""
|
85
|
+
assert not self._finished, "Can only rollback/commit once."
|
86
|
+
if self._backup_path is not None:
|
87
|
+
logger.debug(f"Removing backup path: {self._backup_path}")
|
88
|
+
rmtree(self._backup_path)
|
89
|
+
self._backup_path = None
|
90
|
+
self._finished = True
|
91
|
+
|
92
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> Literal[False]:
|
93
|
+
if not self._finished:
|
94
|
+
# Automatically commit or rollback based on whether an exception is underway.
|
95
|
+
if exc_val is None:
|
96
|
+
self.commit()
|
97
|
+
else:
|
98
|
+
self.rollback()
|
99
|
+
return False # Do not suppress any exception underway
|
63
100
|
|
64
|
-
@classmethod
|
65
|
-
def install_from_maven(
|
66
|
-
cls, product_name: str, group_id: str, artifact_id: str, artifact: Path | None = None
|
67
|
-
) -> Path | None:
|
68
|
-
installer = MavenInstaller(product_name, group_id, artifact_id, artifact)
|
69
|
-
return installer.install()
|
70
101
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
if not version or not version.startswith("v"):
|
81
|
-
return None
|
82
|
-
return version[1:]
|
102
|
+
class TranspilerInstaller(abc.ABC):
|
103
|
+
|
104
|
+
# TODO: Remove these properties when post-install is removed.
|
105
|
+
_install_path: Path
|
106
|
+
"""The path where the transpiler is being installed, once this starts."""
|
107
|
+
|
108
|
+
def __init__(self, repository: TranspilerRepository, product_name: str) -> None:
|
109
|
+
self._repository = repository
|
110
|
+
self._product_name = product_name
|
83
111
|
|
84
112
|
_version_pattern = re.compile(r"[_-](\d+(?:[.\-_]\w*\d+)+)")
|
85
113
|
|
@@ -99,65 +127,6 @@ class TranspilerInstaller(abc.ABC):
|
|
99
127
|
group = group[:-4]
|
100
128
|
return group
|
101
129
|
|
102
|
-
@classmethod
|
103
|
-
def all_transpiler_configs(cls) -> dict[str, LSPConfig]:
|
104
|
-
all_configs = cls._all_transpiler_configs()
|
105
|
-
return {config.name: config for config in all_configs}
|
106
|
-
|
107
|
-
@classmethod
|
108
|
-
def all_transpiler_names(cls) -> set[str]:
|
109
|
-
all_configs = cls.all_transpiler_configs()
|
110
|
-
return set(all_configs.keys())
|
111
|
-
|
112
|
-
@classmethod
|
113
|
-
def all_dialects(cls) -> set[str]:
|
114
|
-
all_dialects: set[str] = set()
|
115
|
-
for config in cls._all_transpiler_configs():
|
116
|
-
all_dialects = all_dialects.union(config.remorph.dialects)
|
117
|
-
return all_dialects
|
118
|
-
|
119
|
-
@classmethod
|
120
|
-
def transpilers_with_dialect(cls, dialect: str) -> set[str]:
|
121
|
-
configs = filter(lambda cfg: dialect in cfg.remorph.dialects, cls.all_transpiler_configs().values())
|
122
|
-
return set(config.name for config in configs)
|
123
|
-
|
124
|
-
@classmethod
|
125
|
-
def transpiler_config_path(cls, transpiler_name) -> Path:
|
126
|
-
config = cls.all_transpiler_configs().get(transpiler_name, None)
|
127
|
-
if not config:
|
128
|
-
raise ValueError(f"No such transpiler: {transpiler_name}")
|
129
|
-
return config.path
|
130
|
-
|
131
|
-
@classmethod
|
132
|
-
def transpiler_config_options(cls, transpiler_name, source_dialect) -> list[LSPConfigOptionV1]:
|
133
|
-
config = cls.all_transpiler_configs().get(transpiler_name, None)
|
134
|
-
if not config:
|
135
|
-
return [] # gracefully returns an empty list, since this can only happen during testing
|
136
|
-
return config.options_for_dialect(source_dialect)
|
137
|
-
|
138
|
-
@classmethod
|
139
|
-
def _all_transpiler_configs(cls) -> Iterable[LSPConfig]:
|
140
|
-
path = cls.transpilers_path()
|
141
|
-
if path.exists():
|
142
|
-
all_files = os.listdir(path)
|
143
|
-
for file in all_files:
|
144
|
-
config = cls._transpiler_config(cls.transpilers_path() / file)
|
145
|
-
if config:
|
146
|
-
yield config
|
147
|
-
|
148
|
-
@classmethod
|
149
|
-
def _transpiler_config(cls, path: Path) -> LSPConfig | None:
|
150
|
-
if not path.is_dir() or not (path / "lib").is_dir():
|
151
|
-
return None
|
152
|
-
config_path = path / "lib" / "config.yml"
|
153
|
-
if not config_path.is_file():
|
154
|
-
return None
|
155
|
-
try:
|
156
|
-
return LSPConfig.load(config_path)
|
157
|
-
except ValueError as e:
|
158
|
-
logger.error(f"Could not load config: {path!s}", exc_info=e)
|
159
|
-
return None
|
160
|
-
|
161
130
|
@classmethod
|
162
131
|
def _store_product_state(cls, product_path: Path, version: str) -> None:
|
163
132
|
state_path = product_path / "state"
|
@@ -168,9 +137,44 @@ class TranspilerInstaller(abc.ABC):
|
|
168
137
|
dump(version_data, f)
|
169
138
|
f.write("\n")
|
170
139
|
|
140
|
+
def _install_version_with_backup(self, version: str) -> Path | None:
|
141
|
+
"""Install a specific version of the transpiler, with backup handling."""
|
142
|
+
logger.info(f"Installing Databricks {self._product_name} transpiler (v{version})")
|
143
|
+
product_path = self._repository.transpilers_path() / self._product_name
|
144
|
+
with _PathBackup(product_path) as backup:
|
145
|
+
self._install_path = product_path / "lib"
|
146
|
+
self._install_path.mkdir(parents=True, exist_ok=True)
|
147
|
+
try:
|
148
|
+
result = self._install_version(version)
|
149
|
+
except (CalledProcessError, KeyError, ValueError) as e:
|
150
|
+
# Warning: if you end up here under the IntelliJ/PyCharm debugger, it can be because the debugger is
|
151
|
+
# trying to inject itself into the subprocess. Try disabling:
|
152
|
+
# Settings | Build, Execution, Deployment | Python Debugger | Attach to subprocess automatically while debugging
|
153
|
+
# Note: Subprocess output is not captured, and should already be visible in the console.
|
154
|
+
logger.error(f"Failed to install {self._product_name} transpiler (v{version})", exc_info=e)
|
155
|
+
result = False
|
156
|
+
|
157
|
+
if result:
|
158
|
+
logger.info(f"Successfully installed {self._product_name} transpiler (v{version})")
|
159
|
+
self._store_product_state(product_path=product_path, version=version)
|
160
|
+
backup.commit()
|
161
|
+
return product_path
|
162
|
+
backup.rollback()
|
163
|
+
return None
|
164
|
+
|
165
|
+
@abc.abstractmethod
|
166
|
+
def _install_version(self, version: str) -> bool:
|
167
|
+
"""Install a specific version of the transpiler, returning True if successful."""
|
168
|
+
|
171
169
|
|
172
170
|
class WheelInstaller(TranspilerInstaller):
|
173
171
|
|
172
|
+
_venv_exec_cmd: Path
|
173
|
+
"""Once created, the command to run the virtual environment's Python executable."""
|
174
|
+
|
175
|
+
_site_packages: Path
|
176
|
+
"""Once created, the path to the site-packages directory in the virtual environment."""
|
177
|
+
|
174
178
|
@classmethod
|
175
179
|
def get_latest_artifact_version_from_pypi(cls, product_name: str) -> str | None:
|
176
180
|
try:
|
@@ -182,8 +186,14 @@ class WheelInstaller(TranspilerInstaller):
|
|
182
186
|
logger.error(f"Error while fetching PyPI metadata: {product_name}", exc_info=e)
|
183
187
|
return None
|
184
188
|
|
185
|
-
def __init__(
|
186
|
-
self
|
189
|
+
def __init__(
|
190
|
+
self,
|
191
|
+
repository: TranspilerRepository,
|
192
|
+
product_name: str,
|
193
|
+
pypi_name: str,
|
194
|
+
artifact: Path | None = None,
|
195
|
+
) -> None:
|
196
|
+
super().__init__(repository, product_name)
|
187
197
|
self._pypi_name = pypi_name
|
188
198
|
self._artifact = artifact
|
189
199
|
|
@@ -200,122 +210,49 @@ class WheelInstaller(TranspilerInstaller):
|
|
200
210
|
logger.warning(f"Could not determine the latest version of {self._pypi_name}")
|
201
211
|
logger.error(f"Failed to install transpiler: {self._product_name}")
|
202
212
|
return None
|
203
|
-
installed_version = self.get_installed_version(self._product_name)
|
213
|
+
installed_version = self._repository.get_installed_version(self._product_name)
|
204
214
|
if installed_version == latest_version:
|
205
215
|
logger.info(f"{self._pypi_name} v{latest_version} already installed")
|
206
216
|
return None
|
207
|
-
return self.
|
208
|
-
|
209
|
-
def _install_latest_version(self, version: str) -> Path | None:
|
210
|
-
logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
|
211
|
-
# use type(self) to workaround a mock bug on class methods
|
212
|
-
self._product_path = type(self).transpilers_path() / self._product_name
|
213
|
-
backup_path = Path(f"{self._product_path!s}-saved")
|
214
|
-
if self._product_path.exists():
|
215
|
-
os.rename(self._product_path, backup_path)
|
216
|
-
self._product_path.mkdir(parents=True, exist_ok=True)
|
217
|
-
self._install_path = self._product_path / "lib"
|
218
|
-
self._install_path.mkdir(exist_ok=True)
|
219
|
-
try:
|
220
|
-
result = self._unsafe_install_latest_version(version)
|
221
|
-
logger.info(f"Successfully installed {self._pypi_name} v{version}")
|
222
|
-
if backup_path.exists():
|
223
|
-
rmtree(backup_path)
|
224
|
-
return result
|
225
|
-
except (CalledProcessError, ValueError) as e:
|
226
|
-
logger.error(f"Failed to install {self._pypi_name} v{version}", exc_info=e)
|
227
|
-
rmtree(self._product_path)
|
228
|
-
if backup_path.exists():
|
229
|
-
os.rename(backup_path, self._product_path)
|
230
|
-
return None
|
217
|
+
return self._install_version_with_backup(latest_version)
|
231
218
|
|
232
|
-
def
|
219
|
+
def _install_version(self, version: str) -> bool:
|
233
220
|
self._create_venv()
|
234
221
|
self._install_with_pip()
|
235
222
|
self._copy_lsp_resources()
|
236
|
-
return self._post_install(
|
223
|
+
return self._post_install() is not None
|
237
224
|
|
238
225
|
def _create_venv(self) -> None:
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
cmd_line = f"{sys.executable} -m venv .venv"
|
247
|
-
completed = run(cmd_line, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
|
248
|
-
if completed.returncode:
|
249
|
-
logger.error(f"Failed to create venv, error code: {completed.returncode}")
|
250
|
-
if completed.stdout:
|
251
|
-
for line in completed.stdout:
|
252
|
-
logger.error(line)
|
253
|
-
if completed.stderr:
|
254
|
-
for line in completed.stderr:
|
255
|
-
logger.error(line)
|
256
|
-
completed.check_returncode()
|
257
|
-
self._venv = self._install_path / ".venv"
|
258
|
-
self._site_packages = self._locate_site_packages()
|
259
|
-
|
260
|
-
def _locate_site_packages(self) -> Path:
|
261
|
-
# can't use sysconfig because it only works for currently running python
|
262
|
-
if sys.platform == "win32":
|
263
|
-
return self._locate_site_packages_windows()
|
264
|
-
return self._locate_site_packages_linux_or_macos()
|
265
|
-
|
266
|
-
def _locate_site_packages_windows(self) -> Path:
|
267
|
-
packages = self._venv / "Lib" / "site-packages"
|
268
|
-
if packages.exists():
|
269
|
-
return packages
|
270
|
-
raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
|
271
|
-
|
272
|
-
def _locate_site_packages_linux_or_macos(self) -> Path:
|
273
|
-
lib = self._venv / "lib"
|
274
|
-
for dir_ in os.listdir(lib):
|
275
|
-
if dir_.startswith("python"):
|
276
|
-
packages = lib / dir_ / "site-packages"
|
277
|
-
if packages.exists():
|
278
|
-
return packages
|
279
|
-
raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
|
280
|
-
|
281
|
-
def _install_with_pip(self) -> None:
|
282
|
-
with chdir(self._install_path):
|
283
|
-
# the way to call pip from python is highly sensitive to os and source type
|
284
|
-
if self._artifact:
|
285
|
-
self._install_local_artifact()
|
286
|
-
else:
|
287
|
-
self._install_remote_artifact()
|
288
|
-
|
289
|
-
def _install_local_artifact(self) -> None:
|
290
|
-
pip = self._locate_pip()
|
291
|
-
pip = pip.relative_to(self._install_path)
|
292
|
-
target = self._site_packages
|
293
|
-
target = target.relative_to(self._install_path)
|
294
|
-
if sys.platform == "win32":
|
295
|
-
command = f"{pip!s} install {self._artifact!s} -t {target!s}"
|
296
|
-
completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
|
297
|
-
else:
|
298
|
-
command = f"'{pip!s}' install '{self._artifact!s}' -t '{target!s}'"
|
299
|
-
completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
|
300
|
-
# checking return code later makes debugging easier
|
301
|
-
completed.check_returncode()
|
302
|
-
|
303
|
-
def _install_remote_artifact(self) -> None:
|
304
|
-
pip = self._locate_pip()
|
305
|
-
pip = pip.relative_to(self._install_path)
|
306
|
-
target = self._site_packages
|
307
|
-
target = target.relative_to(self._install_path)
|
308
|
-
if sys.platform == "win32":
|
309
|
-
args = [str(pip), "install", self._pypi_name, "-t", str(target)]
|
310
|
-
completed = run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
|
226
|
+
venv_path = self._install_path / ".venv"
|
227
|
+
# Sadly, some platform-specific variations need to be dealt with:
|
228
|
+
# - Windows venvs do not use symlinks, but rather copies, when populating the venv.
|
229
|
+
# - The library path is different.
|
230
|
+
if use_symlinks := sys.platform != "win32":
|
231
|
+
major, minor = sys.version_info[:2]
|
232
|
+
lib_path = venv_path / "lib" / f"python{major}.{minor}" / "site-packages"
|
311
233
|
else:
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
234
|
+
lib_path = venv_path / "Lib" / "site-packages"
|
235
|
+
builder = venv.EnvBuilder(with_pip=True, prompt=f"{self._product_name}", symlinks=use_symlinks)
|
236
|
+
builder.create(venv_path)
|
237
|
+
context = builder.ensure_directories(venv_path)
|
238
|
+
logger.debug(f"Created virtual environment with context: {context}")
|
239
|
+
self._venv_exec_cmd = context.env_exec_cmd
|
240
|
+
self._site_packages = lib_path
|
316
241
|
|
317
|
-
def
|
318
|
-
|
242
|
+
def _install_with_pip(self) -> None:
|
243
|
+
# Based on: https://pip.pypa.io/en/stable/user_guide/#using-pip-from-your-program
|
244
|
+
# (But with venv_exec_cmd instead of sys.executable, so that we use the venv's pip.)
|
245
|
+
to_install: Path | str = self._artifact if self._artifact is not None else self._pypi_name
|
246
|
+
command: list[Path | str] = [
|
247
|
+
self._venv_exec_cmd,
|
248
|
+
"-m",
|
249
|
+
"pip",
|
250
|
+
"--disable-pip-version-check",
|
251
|
+
"install",
|
252
|
+
to_install,
|
253
|
+
]
|
254
|
+
result = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, check=False)
|
255
|
+
result.check_returncode()
|
319
256
|
|
320
257
|
def _copy_lsp_resources(self):
|
321
258
|
lsp = self._site_packages / "lsp"
|
@@ -323,21 +260,20 @@ class WheelInstaller(TranspilerInstaller):
|
|
323
260
|
raise ValueError("Installed transpiler is missing a 'lsp' folder")
|
324
261
|
shutil.copytree(lsp, self._install_path, dirs_exist_ok=True)
|
325
262
|
|
326
|
-
def _post_install(self
|
263
|
+
def _post_install(self) -> Path | None:
|
327
264
|
config = self._install_path / "config.yml"
|
328
265
|
if not config.exists():
|
329
266
|
raise ValueError("Installed transpiler is missing a 'config.yml' file in its 'lsp' folder")
|
330
267
|
install_ext = "ps1" if sys.platform == "win32" else "sh"
|
331
268
|
install_script = f"installer.{install_ext}"
|
332
|
-
|
333
|
-
if
|
334
|
-
self._run_custom_installer(
|
335
|
-
self._store_product_state(product_path=self._product_path, version=version)
|
269
|
+
installer_path = self._install_path / install_script
|
270
|
+
if installer_path.exists():
|
271
|
+
self._run_custom_installer(installer_path)
|
336
272
|
return self._install_path
|
337
273
|
|
338
|
-
def _run_custom_installer(self,
|
339
|
-
args = [
|
340
|
-
run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, cwd=
|
274
|
+
def _run_custom_installer(self, installer_path: Path) -> None:
|
275
|
+
args = [installer_path]
|
276
|
+
run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, cwd=self._install_path, check=True)
|
341
277
|
|
342
278
|
|
343
279
|
class MavenInstaller(TranspilerInstaller):
|
@@ -416,8 +352,15 @@ class MavenInstaller(TranspilerInstaller):
|
|
416
352
|
logger.info(f"Successfully installed: {group_id}:{artifact_id}:{version}")
|
417
353
|
return True
|
418
354
|
|
419
|
-
def __init__(
|
420
|
-
self
|
355
|
+
def __init__(
|
356
|
+
self,
|
357
|
+
repository: TranspilerRepository,
|
358
|
+
product_name: str,
|
359
|
+
group_id: str,
|
360
|
+
artifact_id: str,
|
361
|
+
artifact: Path | None = None,
|
362
|
+
) -> None:
|
363
|
+
super().__init__(repository, product_name)
|
421
364
|
self._group_id = group_id
|
422
365
|
self._artifact_id = artifact_id
|
423
366
|
self._artifact = artifact
|
@@ -434,45 +377,19 @@ class MavenInstaller(TranspilerInstaller):
|
|
434
377
|
logger.warning(f"Could not determine the latest version of Databricks {self._product_name} transpiler")
|
435
378
|
logger.error("Failed to install transpiler: Databricks {self._product_name} transpiler")
|
436
379
|
return None
|
437
|
-
installed_version = self.get_installed_version(self._product_name)
|
380
|
+
installed_version = self._repository.get_installed_version(self._product_name)
|
438
381
|
if installed_version == latest_version:
|
439
382
|
logger.info(f"Databricks {self._product_name} transpiler v{latest_version} already installed")
|
440
383
|
return None
|
441
|
-
return self.
|
442
|
-
|
443
|
-
def _install_version(self, version: str) -> Path | None:
|
444
|
-
logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
|
445
|
-
# use type(self) to workaround a mock bug on class methods
|
446
|
-
self._product_path = type(self).transpilers_path() / self._product_name
|
447
|
-
backup_path = Path(f"{self._product_path!s}-saved")
|
448
|
-
if backup_path.exists():
|
449
|
-
rmtree(backup_path)
|
450
|
-
if self._product_path.exists():
|
451
|
-
os.rename(self._product_path, backup_path)
|
452
|
-
self._product_path.mkdir(parents=True)
|
453
|
-
self._install_path = self._product_path / "lib"
|
454
|
-
self._install_path.mkdir()
|
455
|
-
try:
|
456
|
-
if self._unsafe_install_version(version):
|
457
|
-
logger.info(f"Successfully installed {self._product_name} v{version}")
|
458
|
-
self._store_product_state(self._product_path, version)
|
459
|
-
if backup_path.exists():
|
460
|
-
rmtree(backup_path)
|
461
|
-
return self._product_path
|
462
|
-
except (KeyError, ValueError) as e:
|
463
|
-
logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}", exc_info=e)
|
464
|
-
rmtree(self._product_path)
|
465
|
-
if backup_path.exists():
|
466
|
-
os.rename(backup_path, self._product_path)
|
467
|
-
return None
|
384
|
+
return self._install_version_with_backup(latest_version)
|
468
385
|
|
469
|
-
def
|
386
|
+
def _install_version(self, version: str) -> bool:
|
470
387
|
jar_file_path = self._install_path / f"{self._artifact_id}.jar"
|
471
388
|
if self._artifact:
|
472
|
-
logger.debug(f"Copying
|
389
|
+
logger.debug(f"Copying: {self._artifact} -> {jar_file_path}")
|
473
390
|
shutil.copyfile(self._artifact, jar_file_path)
|
474
391
|
elif not self.download_artifact_from_maven(self._group_id, self._artifact_id, version, jar_file_path):
|
475
|
-
logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}")
|
392
|
+
logger.error(f"Failed to install Databricks {self._product_name} transpiler (v{version})")
|
476
393
|
return False
|
477
394
|
self._copy_lsp_config(jar_file_path)
|
478
395
|
return True
|
@@ -495,6 +412,7 @@ class WorkspaceInstaller:
|
|
495
412
|
resource_configurator: ResourceConfigurator,
|
496
413
|
workspace_installation: WorkspaceInstallation,
|
497
414
|
environ: dict[str, str] | None = None,
|
415
|
+
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
|
498
416
|
):
|
499
417
|
self._ws = ws
|
500
418
|
self._prompts = prompts
|
@@ -503,6 +421,7 @@ class WorkspaceInstaller:
|
|
503
421
|
self._product_info = product_info
|
504
422
|
self._resource_configurator = resource_configurator
|
505
423
|
self._ws_installation = workspace_installation
|
424
|
+
self._transpiler_repository = transpiler_repository
|
506
425
|
|
507
426
|
if not environ:
|
508
427
|
environ = dict(os.environ.items())
|
@@ -511,7 +430,9 @@ class WorkspaceInstaller:
|
|
511
430
|
msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime"
|
512
431
|
raise SystemExit(msg)
|
513
432
|
|
514
|
-
def run(
|
433
|
+
def run(
|
434
|
+
self, module: str, config: LakebridgeConfiguration | None = None, artifact: str | None = None
|
435
|
+
) -> LakebridgeConfiguration:
|
515
436
|
logger.debug(f"Initializing workspace installation for module: {module} (config: {config})")
|
516
437
|
if module == "transpile" and artifact:
|
517
438
|
self.install_artifact(artifact)
|
@@ -526,15 +447,21 @@ class WorkspaceInstaller:
|
|
526
447
|
logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
|
527
448
|
return config
|
528
449
|
|
529
|
-
|
530
|
-
|
450
|
+
def has_installed_transpilers(self) -> bool:
|
451
|
+
"""Detect whether there are transpilers currently installed."""
|
452
|
+
installed_transpilers = self._transpiler_repository.all_transpiler_names()
|
453
|
+
if installed_transpilers:
|
454
|
+
logger.info(f"Detected installed transpilers: {sorted(installed_transpilers)}")
|
455
|
+
return bool(installed_transpilers)
|
456
|
+
|
457
|
+
def install_bladebridge(self, artifact: Path | None = None) -> None:
|
531
458
|
local_name = "bladebridge"
|
532
459
|
pypi_name = "databricks-bb-plugin"
|
533
|
-
|
460
|
+
wheel_installer = WheelInstaller(self._transpiler_repository, local_name, pypi_name, artifact)
|
461
|
+
wheel_installer.install()
|
534
462
|
|
535
|
-
|
536
|
-
|
537
|
-
if not cls.is_java_version_okay():
|
463
|
+
def install_morpheus(self, artifact: Path | None = None) -> None:
|
464
|
+
if not self.is_java_version_okay():
|
538
465
|
logger.error(
|
539
466
|
"The morpheus transpiler requires Java 11 or above. Please install Java and re-run 'install-transpile'."
|
540
467
|
)
|
@@ -542,7 +469,8 @@ class WorkspaceInstaller:
|
|
542
469
|
product_name = "databricks-morph-plugin"
|
543
470
|
group_id = "com.databricks.labs"
|
544
471
|
artifact_id = product_name
|
545
|
-
|
472
|
+
maven_installer = MavenInstaller(self._transpiler_repository, product_name, group_id, artifact_id, artifact)
|
473
|
+
maven_installer.install()
|
546
474
|
|
547
475
|
@classmethod
|
548
476
|
def is_java_version_okay(cls) -> bool:
|
@@ -564,16 +492,15 @@ class WorkspaceInstaller:
|
|
564
492
|
case _:
|
565
493
|
return True
|
566
494
|
|
567
|
-
|
568
|
-
def install_artifact(cls, artifact: str):
|
495
|
+
def install_artifact(self, artifact: str):
|
569
496
|
path = Path(artifact)
|
570
497
|
if not path.exists():
|
571
498
|
logger.error(f"Could not locate artifact {artifact}")
|
572
499
|
return
|
573
500
|
if "databricks-morph-plugin" in path.name:
|
574
|
-
|
501
|
+
self.install_morpheus(path)
|
575
502
|
elif "databricks_bb_plugin" in path.name:
|
576
|
-
|
503
|
+
self.install_bladebridge(path)
|
577
504
|
else:
|
578
505
|
logger.fatal(f"Cannot install unsupported artifact: {artifact}")
|
579
506
|
|
@@ -637,17 +564,17 @@ class WorkspaceInstaller:
|
|
637
564
|
patch = int(match["patch"] or 0)
|
638
565
|
return feature, interim, update, patch
|
639
566
|
|
640
|
-
def configure(self, module: str) ->
|
567
|
+
def configure(self, module: str) -> LakebridgeConfiguration:
|
641
568
|
match module:
|
642
569
|
case "transpile":
|
643
570
|
logger.info("Configuring lakebridge `transpile`.")
|
644
|
-
return
|
571
|
+
return LakebridgeConfiguration(self._configure_transpile(), None)
|
645
572
|
case "reconcile":
|
646
573
|
logger.info("Configuring lakebridge `reconcile`.")
|
647
|
-
return
|
574
|
+
return LakebridgeConfiguration(None, self._configure_reconcile())
|
648
575
|
case "all":
|
649
576
|
logger.info("Configuring lakebridge `transpile` and `reconcile`.")
|
650
|
-
return
|
577
|
+
return LakebridgeConfiguration(
|
651
578
|
self._configure_transpile(),
|
652
579
|
self._configure_reconcile(),
|
653
580
|
)
|
@@ -697,19 +624,19 @@ class WorkspaceInstaller:
|
|
697
624
|
return config
|
698
625
|
|
699
626
|
def _all_installed_dialects(self) -> list[str]:
|
700
|
-
return sorted(
|
627
|
+
return sorted(self._transpiler_repository.all_dialects())
|
701
628
|
|
702
629
|
def _transpilers_with_dialect(self, dialect: str) -> list[str]:
|
703
|
-
return sorted(
|
630
|
+
return sorted(self._transpiler_repository.transpilers_with_dialect(dialect))
|
704
631
|
|
705
632
|
def _transpiler_config_path(self, transpiler: str) -> Path:
|
706
|
-
return
|
633
|
+
return self._transpiler_repository.transpiler_config_path(transpiler)
|
707
634
|
|
708
635
|
def _prompt_for_new_transpile_installation(self) -> TranspileConfig:
|
709
636
|
install_later = "Set it later"
|
710
637
|
# TODO tidy this up, logger might not display the below in console...
|
711
638
|
logger.info("Please answer a few questions to configure lakebridge `transpile`")
|
712
|
-
all_dialects = [install_later
|
639
|
+
all_dialects = [install_later, *self._all_installed_dialects()]
|
713
640
|
source_dialect: str | None = self._prompts.choice("Select the source dialect:", all_dialects, sort=False)
|
714
641
|
if source_dialect == install_later:
|
715
642
|
source_dialect = None
|
@@ -760,14 +687,12 @@ class WorkspaceInstaller:
|
|
760
687
|
)
|
761
688
|
|
762
689
|
def _prompt_for_transpiler_options(self, transpiler_name: str, source_dialect: str) -> dict[str, Any] | None:
|
763
|
-
config_options =
|
690
|
+
config_options = self._transpiler_repository.transpiler_config_options(transpiler_name, source_dialect)
|
764
691
|
if len(config_options) == 0:
|
765
692
|
return None
|
766
693
|
return {option.flag: option.prompt_for_value(self._prompts) for option in config_options}
|
767
694
|
|
768
|
-
def _configure_catalog(
|
769
|
-
self,
|
770
|
-
) -> str:
|
695
|
+
def _configure_catalog(self) -> str:
|
771
696
|
return self._resource_configurator.prompt_for_catalog_setup()
|
772
697
|
|
773
698
|
def _configure_schema(
|
@@ -882,3 +807,28 @@ class WorkspaceInstaller:
|
|
882
807
|
|
883
808
|
def _has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None = None):
|
884
809
|
self._resource_configurator.has_necessary_access(catalog_name, schema_name, volume_name)
|
810
|
+
|
811
|
+
|
812
|
+
def installer(ws: WorkspaceClient, transpiler_repository: TranspilerRepository) -> WorkspaceInstaller:
|
813
|
+
app_context = ApplicationContext(_verify_workspace_client(ws))
|
814
|
+
return WorkspaceInstaller(
|
815
|
+
app_context.workspace_client,
|
816
|
+
app_context.prompts,
|
817
|
+
app_context.installation,
|
818
|
+
app_context.install_state,
|
819
|
+
app_context.product_info,
|
820
|
+
app_context.resource_configurator,
|
821
|
+
app_context.workspace_installation,
|
822
|
+
transpiler_repository=transpiler_repository,
|
823
|
+
)
|
824
|
+
|
825
|
+
|
826
|
+
def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
|
827
|
+
"""Verifies the workspace client configuration, ensuring it has the correct product info."""
|
828
|
+
|
829
|
+
# Using reflection to set right value for _product_info for telemetry
|
830
|
+
product_info = getattr(ws.config, '_product_info')
|
831
|
+
if product_info[0] != "lakebridge":
|
832
|
+
setattr(ws.config, '_product_info', ('lakebridge', __version__))
|
833
|
+
|
834
|
+
return ws
|