databricks-labs-lakebridge 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/labs/lakebridge/__about__.py +1 -1
- databricks/labs/lakebridge/analyzer/__init__.py +0 -0
- databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +95 -0
- databricks/labs/lakebridge/assessments/profiler_validator.py +103 -0
- databricks/labs/lakebridge/base_install.py +20 -3
- databricks/labs/lakebridge/cli.py +32 -59
- databricks/labs/lakebridge/contexts/application.py +7 -0
- databricks/labs/lakebridge/deployment/job.py +2 -2
- databricks/labs/lakebridge/helpers/file_utils.py +36 -0
- databricks/labs/lakebridge/helpers/validation.py +5 -3
- databricks/labs/lakebridge/install.py +73 -484
- databricks/labs/lakebridge/reconcile/compare.py +70 -33
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +24 -1
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +12 -1
- databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +126 -0
- databricks/labs/lakebridge/reconcile/connectors/models.py +7 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +12 -1
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +19 -1
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +63 -30
- databricks/labs/lakebridge/reconcile/connectors/tsql.py +28 -2
- databricks/labs/lakebridge/reconcile/constants.py +4 -3
- databricks/labs/lakebridge/reconcile/execute.py +9 -810
- databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +133 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +53 -18
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +8 -2
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +7 -13
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +18 -19
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +36 -15
- databricks/labs/lakebridge/reconcile/recon_config.py +3 -15
- databricks/labs/lakebridge/reconcile/recon_output_config.py +2 -1
- databricks/labs/lakebridge/reconcile/reconciliation.py +511 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +26 -19
- databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +78 -0
- databricks/labs/lakebridge/reconcile/trigger_recon_service.py +256 -0
- databricks/labs/lakebridge/reconcile/utils.py +38 -0
- databricks/labs/lakebridge/transpiler/execute.py +34 -28
- databricks/labs/lakebridge/transpiler/installers.py +523 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +47 -60
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +2 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -18
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/METADATA +1 -1
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/RECORD +46 -35
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/WHEEL +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/entry_points.txt +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/LICENSE +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/NOTICE +0 -0
@@ -1,41 +1,35 @@
|
|
1
|
-
import re
|
2
|
-
import abc
|
3
1
|
import dataclasses
|
4
|
-
import shutil
|
5
|
-
from json import loads, dump
|
6
2
|
import logging
|
7
3
|
import os
|
8
|
-
from shutil import rmtree, move
|
9
|
-
from subprocess import run, CalledProcessError
|
10
|
-
import sys
|
11
|
-
from typing import Any, cast
|
12
|
-
from urllib import request
|
13
|
-
from urllib.error import URLError, HTTPError
|
14
4
|
import webbrowser
|
15
|
-
from
|
5
|
+
from collections.abc import Set, Callable, Sequence
|
16
6
|
from pathlib import Path
|
17
|
-
import
|
18
|
-
from zipfile import ZipFile
|
7
|
+
from typing import Any, cast
|
19
8
|
|
20
|
-
from databricks.labs.blueprint.installation import Installation, JsonValue
|
21
|
-
from databricks.labs.blueprint.installation import SerdeError
|
9
|
+
from databricks.labs.blueprint.installation import Installation, JsonValue, SerdeError
|
22
10
|
from databricks.labs.blueprint.installer import InstallState
|
23
11
|
from databricks.labs.blueprint.tui import Prompts
|
24
12
|
from databricks.labs.blueprint.wheels import ProductInfo
|
25
13
|
from databricks.sdk import WorkspaceClient
|
26
14
|
from databricks.sdk.errors import NotFound, PermissionDenied
|
27
15
|
|
16
|
+
from databricks.labs.lakebridge.__about__ import __version__
|
28
17
|
from databricks.labs.lakebridge.config import (
|
29
|
-
TranspileConfig,
|
30
|
-
ReconcileConfig,
|
31
18
|
DatabaseConfig,
|
19
|
+
ReconcileConfig,
|
32
20
|
LakebridgeConfiguration,
|
33
21
|
ReconcileMetadataConfig,
|
22
|
+
TranspileConfig,
|
34
23
|
)
|
24
|
+
from databricks.labs.lakebridge.contexts.application import ApplicationContext
|
35
25
|
from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
|
36
26
|
from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
|
37
|
-
from databricks.labs.lakebridge.helpers.file_utils import chdir
|
38
27
|
from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
|
28
|
+
from databricks.labs.lakebridge.transpiler.installers import (
|
29
|
+
BladebridgeInstaller,
|
30
|
+
MorpheusInstaller,
|
31
|
+
TranspilerInstaller,
|
32
|
+
)
|
39
33
|
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
|
40
34
|
|
41
35
|
logger = logging.getLogger(__name__)
|
@@ -43,366 +37,6 @@ logger = logging.getLogger(__name__)
|
|
43
37
|
TRANSPILER_WAREHOUSE_PREFIX = "Lakebridge Transpiler Validation"
|
44
38
|
|
45
39
|
|
46
|
-
class TranspilerInstaller(abc.ABC):
|
47
|
-
def __init__(self, repository: TranspilerRepository) -> None:
|
48
|
-
self._repository = repository
|
49
|
-
|
50
|
-
_version_pattern = re.compile(r"[_-](\d+(?:[.\-_]\w*\d+)+)")
|
51
|
-
|
52
|
-
@classmethod
|
53
|
-
def get_local_artifact_version(cls, artifact: Path) -> str | None:
|
54
|
-
# TODO: Get the version from the metadata inside the artifact rather than relying on the filename.
|
55
|
-
match = cls._version_pattern.search(artifact.stem)
|
56
|
-
if not match:
|
57
|
-
return None
|
58
|
-
group = match.group(0)
|
59
|
-
if not group:
|
60
|
-
return None
|
61
|
-
# TODO: Update the regex to take care of these trimming scenarios.
|
62
|
-
if group.startswith('-'):
|
63
|
-
group = group[1:]
|
64
|
-
if group.endswith("-py3"):
|
65
|
-
group = group[:-4]
|
66
|
-
return group
|
67
|
-
|
68
|
-
@classmethod
|
69
|
-
def _store_product_state(cls, product_path: Path, version: str) -> None:
|
70
|
-
state_path = product_path / "state"
|
71
|
-
state_path.mkdir()
|
72
|
-
version_data = {"version": f"v{version}", "date": datetime.now(timezone.utc).isoformat()}
|
73
|
-
version_path = state_path / "version.json"
|
74
|
-
with version_path.open("w", encoding="utf-8") as f:
|
75
|
-
dump(version_data, f)
|
76
|
-
f.write("\n")
|
77
|
-
|
78
|
-
|
79
|
-
class WheelInstaller(TranspilerInstaller):
|
80
|
-
|
81
|
-
@classmethod
|
82
|
-
def get_latest_artifact_version_from_pypi(cls, product_name: str) -> str | None:
|
83
|
-
try:
|
84
|
-
with request.urlopen(f"https://pypi.org/pypi/{product_name}/json") as server:
|
85
|
-
text: bytes = server.read()
|
86
|
-
data: dict[str, Any] = loads(text)
|
87
|
-
return data.get("info", {}).get('version', None)
|
88
|
-
except HTTPError as e:
|
89
|
-
logger.error(f"Error while fetching PyPI metadata: {product_name}", exc_info=e)
|
90
|
-
return None
|
91
|
-
|
92
|
-
def __init__(
|
93
|
-
self,
|
94
|
-
repository: TranspilerRepository,
|
95
|
-
product_name: str,
|
96
|
-
pypi_name: str,
|
97
|
-
artifact: Path | None = None,
|
98
|
-
) -> None:
|
99
|
-
super().__init__(repository)
|
100
|
-
self._product_name = product_name
|
101
|
-
self._pypi_name = pypi_name
|
102
|
-
self._artifact = artifact
|
103
|
-
|
104
|
-
def install(self) -> Path | None:
|
105
|
-
return self._install_checking_versions()
|
106
|
-
|
107
|
-
def _install_checking_versions(self) -> Path | None:
|
108
|
-
latest_version = (
|
109
|
-
self.get_local_artifact_version(self._artifact)
|
110
|
-
if self._artifact
|
111
|
-
else self.get_latest_artifact_version_from_pypi(self._pypi_name)
|
112
|
-
)
|
113
|
-
if latest_version is None:
|
114
|
-
logger.warning(f"Could not determine the latest version of {self._pypi_name}")
|
115
|
-
logger.error(f"Failed to install transpiler: {self._product_name}")
|
116
|
-
return None
|
117
|
-
installed_version = self._repository.get_installed_version(self._product_name)
|
118
|
-
if installed_version == latest_version:
|
119
|
-
logger.info(f"{self._pypi_name} v{latest_version} already installed")
|
120
|
-
return None
|
121
|
-
return self._install_latest_version(latest_version)
|
122
|
-
|
123
|
-
def _install_latest_version(self, version: str) -> Path | None:
|
124
|
-
logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
|
125
|
-
self._product_path = self._repository.transpilers_path() / self._product_name
|
126
|
-
backup_path = Path(f"{self._product_path!s}-saved")
|
127
|
-
if self._product_path.exists():
|
128
|
-
os.rename(self._product_path, backup_path)
|
129
|
-
self._install_path = self._product_path / "lib"
|
130
|
-
self._install_path.mkdir(parents=True, exist_ok=True)
|
131
|
-
try:
|
132
|
-
result = self._unsafe_install_latest_version(version)
|
133
|
-
logger.info(f"Successfully installed {self._pypi_name} v{version}")
|
134
|
-
if backup_path.exists():
|
135
|
-
rmtree(backup_path)
|
136
|
-
return result
|
137
|
-
except (CalledProcessError, ValueError) as e:
|
138
|
-
logger.error(f"Failed to install {self._pypi_name} v{version}", exc_info=e)
|
139
|
-
rmtree(self._product_path)
|
140
|
-
if backup_path.exists():
|
141
|
-
os.rename(backup_path, self._product_path)
|
142
|
-
return None
|
143
|
-
|
144
|
-
def _unsafe_install_latest_version(self, version: str) -> Path | None:
|
145
|
-
self._create_venv()
|
146
|
-
self._install_with_pip()
|
147
|
-
self._copy_lsp_resources()
|
148
|
-
return self._post_install(version)
|
149
|
-
|
150
|
-
def _create_venv(self) -> None:
|
151
|
-
with chdir(self._install_path):
|
152
|
-
self._unsafe_create_venv()
|
153
|
-
|
154
|
-
def _unsafe_create_venv(self) -> None:
|
155
|
-
# using the venv module doesn't work (maybe it's not possible to create a venv from a venv ?)
|
156
|
-
# so falling back to something that works
|
157
|
-
# for some reason this requires shell=True, so pass full cmd line
|
158
|
-
cmd_line = f"{sys.executable} -m venv .venv"
|
159
|
-
completed = run(cmd_line, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
|
160
|
-
if completed.returncode:
|
161
|
-
logger.error(f"Failed to create venv, error code: {completed.returncode}")
|
162
|
-
if completed.stdout:
|
163
|
-
for line in completed.stdout:
|
164
|
-
logger.error(line)
|
165
|
-
if completed.stderr:
|
166
|
-
for line in completed.stderr:
|
167
|
-
logger.error(line)
|
168
|
-
completed.check_returncode()
|
169
|
-
self._venv = self._install_path / ".venv"
|
170
|
-
self._site_packages = self._locate_site_packages()
|
171
|
-
|
172
|
-
def _locate_site_packages(self) -> Path:
|
173
|
-
# can't use sysconfig because it only works for currently running python
|
174
|
-
if sys.platform == "win32":
|
175
|
-
return self._locate_site_packages_windows()
|
176
|
-
return self._locate_site_packages_linux_or_macos()
|
177
|
-
|
178
|
-
def _locate_site_packages_windows(self) -> Path:
|
179
|
-
packages = self._venv / "Lib" / "site-packages"
|
180
|
-
if packages.exists():
|
181
|
-
return packages
|
182
|
-
raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
|
183
|
-
|
184
|
-
def _locate_site_packages_linux_or_macos(self) -> Path:
|
185
|
-
lib = self._venv / "lib"
|
186
|
-
for dir_ in os.listdir(lib):
|
187
|
-
if dir_.startswith("python"):
|
188
|
-
packages = lib / dir_ / "site-packages"
|
189
|
-
if packages.exists():
|
190
|
-
return packages
|
191
|
-
raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
|
192
|
-
|
193
|
-
def _install_with_pip(self) -> None:
|
194
|
-
with chdir(self._install_path):
|
195
|
-
# the way to call pip from python is highly sensitive to os and source type
|
196
|
-
if self._artifact:
|
197
|
-
self._install_local_artifact()
|
198
|
-
else:
|
199
|
-
self._install_remote_artifact()
|
200
|
-
|
201
|
-
def _install_local_artifact(self) -> None:
|
202
|
-
pip = self._locate_pip()
|
203
|
-
pip = pip.relative_to(self._install_path)
|
204
|
-
target = self._site_packages
|
205
|
-
target = target.relative_to(self._install_path)
|
206
|
-
if sys.platform == "win32":
|
207
|
-
command = f"{pip!s} install {self._artifact!s} -t {target!s}"
|
208
|
-
completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
|
209
|
-
else:
|
210
|
-
command = f"'{pip!s}' install '{self._artifact!s}' -t '{target!s}'"
|
211
|
-
completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
|
212
|
-
# checking return code later makes debugging easier
|
213
|
-
completed.check_returncode()
|
214
|
-
|
215
|
-
def _install_remote_artifact(self) -> None:
|
216
|
-
pip = self._locate_pip()
|
217
|
-
pip = pip.relative_to(self._install_path)
|
218
|
-
target = self._site_packages
|
219
|
-
target = target.relative_to(self._install_path)
|
220
|
-
if sys.platform == "win32":
|
221
|
-
args = [str(pip), "install", self._pypi_name, "-t", str(target)]
|
222
|
-
completed = run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
|
223
|
-
else:
|
224
|
-
command = f"'{pip!s}' install {self._pypi_name} -t '{target!s}'"
|
225
|
-
completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
|
226
|
-
# checking return code later makes debugging easier
|
227
|
-
completed.check_returncode()
|
228
|
-
|
229
|
-
def _locate_pip(self) -> Path:
|
230
|
-
return self._venv / "Scripts" / "pip3.exe" if sys.platform == "win32" else self._venv / "bin" / "pip3"
|
231
|
-
|
232
|
-
def _copy_lsp_resources(self):
|
233
|
-
lsp = self._site_packages / "lsp"
|
234
|
-
if not lsp.exists():
|
235
|
-
raise ValueError("Installed transpiler is missing a 'lsp' folder")
|
236
|
-
shutil.copytree(lsp, self._install_path, dirs_exist_ok=True)
|
237
|
-
|
238
|
-
def _post_install(self, version: str) -> Path | None:
|
239
|
-
config = self._install_path / "config.yml"
|
240
|
-
if not config.exists():
|
241
|
-
raise ValueError("Installed transpiler is missing a 'config.yml' file in its 'lsp' folder")
|
242
|
-
install_ext = "ps1" if sys.platform == "win32" else "sh"
|
243
|
-
install_script = f"installer.{install_ext}"
|
244
|
-
installer = self._install_path / install_script
|
245
|
-
if installer.exists():
|
246
|
-
self._run_custom_installer(installer)
|
247
|
-
self._store_product_state(product_path=self._product_path, version=version)
|
248
|
-
return self._install_path
|
249
|
-
|
250
|
-
def _run_custom_installer(self, installer):
|
251
|
-
args = [str(installer)]
|
252
|
-
run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, cwd=str(self._install_path), check=True)
|
253
|
-
|
254
|
-
|
255
|
-
class MavenInstaller(TranspilerInstaller):
|
256
|
-
# Maven Central, base URL.
|
257
|
-
_maven_central_repo: str = "https://repo.maven.apache.org/maven2/"
|
258
|
-
|
259
|
-
@classmethod
|
260
|
-
def _artifact_base_url(cls, group_id: str, artifact_id: str) -> str:
|
261
|
-
"""Construct the base URL for a Maven artifact."""
|
262
|
-
# Reference: https://maven.apache.org/repositories/layout.html
|
263
|
-
group_path = group_id.replace(".", "/")
|
264
|
-
return f"{cls._maven_central_repo}{group_path}/{artifact_id}/"
|
265
|
-
|
266
|
-
@classmethod
|
267
|
-
def artifact_metadata_url(cls, group_id: str, artifact_id: str) -> str:
|
268
|
-
"""Get the metadata URL for a Maven artifact."""
|
269
|
-
# TODO: Unit test this method.
|
270
|
-
return f"{cls._artifact_base_url(group_id, artifact_id)}maven-metadata.xml"
|
271
|
-
|
272
|
-
@classmethod
|
273
|
-
def artifact_url(
|
274
|
-
cls, group_id: str, artifact_id: str, version: str, classifier: str | None = None, extension: str = "jar"
|
275
|
-
) -> str:
|
276
|
-
"""Get the URL for a versioned Maven artifact."""
|
277
|
-
# TODO: Unit test this method, including classifier and extension.
|
278
|
-
_classifier = f"-{classifier}" if classifier else ""
|
279
|
-
artifact_base_url = cls._artifact_base_url(group_id, artifact_id)
|
280
|
-
return f"{artifact_base_url}{version}/{artifact_id}-{version}{_classifier}.{extension}"
|
281
|
-
|
282
|
-
@classmethod
|
283
|
-
def get_current_maven_artifact_version(cls, group_id: str, artifact_id: str) -> str | None:
|
284
|
-
url = cls.artifact_metadata_url(group_id, artifact_id)
|
285
|
-
try:
|
286
|
-
with request.urlopen(url) as server:
|
287
|
-
text = server.read()
|
288
|
-
except HTTPError as e:
|
289
|
-
logger.error(f"Error while fetching maven metadata: {group_id}:{artifact_id}", exc_info=e)
|
290
|
-
return None
|
291
|
-
logger.debug(f"Maven metadata for {group_id}:{artifact_id}: {text}")
|
292
|
-
return cls._extract_latest_release_version(text)
|
293
|
-
|
294
|
-
@classmethod
|
295
|
-
def _extract_latest_release_version(cls, maven_metadata: str) -> str | None:
|
296
|
-
"""Extract the latest release version from Maven metadata."""
|
297
|
-
# Reference: https://maven.apache.org/repositories/metadata.html#The_A_Level_Metadata
|
298
|
-
# TODO: Unit test this method, to verify the sequence of things it checks for.
|
299
|
-
root = ET.fromstring(maven_metadata)
|
300
|
-
for label in ("release", "latest"):
|
301
|
-
version = root.findtext(f"./versioning/{label}")
|
302
|
-
if version is not None:
|
303
|
-
return version
|
304
|
-
return root.findtext("./versioning/versions/version[last()]")
|
305
|
-
|
306
|
-
@classmethod
|
307
|
-
def download_artifact_from_maven(
|
308
|
-
cls,
|
309
|
-
group_id: str,
|
310
|
-
artifact_id: str,
|
311
|
-
version: str,
|
312
|
-
target: Path,
|
313
|
-
classifier: str | None = None,
|
314
|
-
extension: str = "jar",
|
315
|
-
) -> bool:
|
316
|
-
if target.exists():
|
317
|
-
logger.warning(f"Skipping download of {group_id}:{artifact_id}:{version}; target already exists: {target}")
|
318
|
-
return True
|
319
|
-
url = cls.artifact_url(group_id, artifact_id, version, classifier, extension)
|
320
|
-
try:
|
321
|
-
path, _ = request.urlretrieve(url)
|
322
|
-
logger.debug(f"Downloaded maven artefact from {url} to {path}")
|
323
|
-
except URLError as e:
|
324
|
-
logger.error(f"Unable to download maven artefact: {group_id}:{artifact_id}:{version}", exc_info=e)
|
325
|
-
return False
|
326
|
-
logger.debug(f"Moving {path} to {target}")
|
327
|
-
move(path, target)
|
328
|
-
logger.info(f"Successfully installed: {group_id}:{artifact_id}:{version}")
|
329
|
-
return True
|
330
|
-
|
331
|
-
def __init__(
|
332
|
-
self,
|
333
|
-
repository: TranspilerRepository,
|
334
|
-
product_name: str,
|
335
|
-
group_id: str,
|
336
|
-
artifact_id: str,
|
337
|
-
artifact: Path | None = None,
|
338
|
-
) -> None:
|
339
|
-
super().__init__(repository)
|
340
|
-
self._product_name = product_name
|
341
|
-
self._group_id = group_id
|
342
|
-
self._artifact_id = artifact_id
|
343
|
-
self._artifact = artifact
|
344
|
-
|
345
|
-
def install(self) -> Path | None:
|
346
|
-
return self._install_checking_versions()
|
347
|
-
|
348
|
-
def _install_checking_versions(self) -> Path | None:
|
349
|
-
if self._artifact:
|
350
|
-
latest_version = self.get_local_artifact_version(self._artifact)
|
351
|
-
else:
|
352
|
-
latest_version = self.get_current_maven_artifact_version(self._group_id, self._artifact_id)
|
353
|
-
if latest_version is None:
|
354
|
-
logger.warning(f"Could not determine the latest version of Databricks {self._product_name} transpiler")
|
355
|
-
logger.error("Failed to install transpiler: Databricks {self._product_name} transpiler")
|
356
|
-
return None
|
357
|
-
installed_version = self._repository.get_installed_version(self._product_name)
|
358
|
-
if installed_version == latest_version:
|
359
|
-
logger.info(f"Databricks {self._product_name} transpiler v{latest_version} already installed")
|
360
|
-
return None
|
361
|
-
return self._install_version(latest_version)
|
362
|
-
|
363
|
-
def _install_version(self, version: str) -> Path | None:
|
364
|
-
logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
|
365
|
-
self._product_path = self._repository.transpilers_path() / self._product_name
|
366
|
-
backup_path = Path(f"{self._product_path!s}-saved")
|
367
|
-
if backup_path.exists():
|
368
|
-
rmtree(backup_path)
|
369
|
-
if self._product_path.exists():
|
370
|
-
os.rename(self._product_path, backup_path)
|
371
|
-
self._product_path.mkdir(parents=True)
|
372
|
-
self._install_path = self._product_path / "lib"
|
373
|
-
self._install_path.mkdir()
|
374
|
-
try:
|
375
|
-
if self._unsafe_install_version(version):
|
376
|
-
logger.info(f"Successfully installed {self._product_name} v{version}")
|
377
|
-
self._store_product_state(self._product_path, version)
|
378
|
-
if backup_path.exists():
|
379
|
-
rmtree(backup_path)
|
380
|
-
return self._product_path
|
381
|
-
except (KeyError, ValueError) as e:
|
382
|
-
logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}", exc_info=e)
|
383
|
-
rmtree(self._product_path)
|
384
|
-
if backup_path.exists():
|
385
|
-
os.rename(backup_path, self._product_path)
|
386
|
-
return None
|
387
|
-
|
388
|
-
def _unsafe_install_version(self, version: str) -> bool:
|
389
|
-
jar_file_path = self._install_path / f"{self._artifact_id}.jar"
|
390
|
-
if self._artifact:
|
391
|
-
logger.debug(f"Copying '{self._artifact!s}' to '{jar_file_path!s}'")
|
392
|
-
shutil.copyfile(self._artifact, jar_file_path)
|
393
|
-
elif not self.download_artifact_from_maven(self._group_id, self._artifact_id, version, jar_file_path):
|
394
|
-
logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}")
|
395
|
-
return False
|
396
|
-
self._copy_lsp_config(jar_file_path)
|
397
|
-
return True
|
398
|
-
|
399
|
-
def _copy_lsp_config(self, jar_file_path: Path) -> None:
|
400
|
-
with ZipFile(jar_file_path) as zip_file:
|
401
|
-
zip_file.extract("lsp/config.yml", self._install_path)
|
402
|
-
shutil.move(self._install_path / "lsp" / "config.yml", self._install_path / "config.yml")
|
403
|
-
os.rmdir(self._install_path / "lsp")
|
404
|
-
|
405
|
-
|
406
40
|
class WorkspaceInstaller:
|
407
41
|
def __init__(
|
408
42
|
self,
|
@@ -414,7 +48,12 @@ class WorkspaceInstaller:
|
|
414
48
|
resource_configurator: ResourceConfigurator,
|
415
49
|
workspace_installation: WorkspaceInstallation,
|
416
50
|
environ: dict[str, str] | None = None,
|
51
|
+
*,
|
417
52
|
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
|
53
|
+
transpiler_installers: Sequence[Callable[[TranspilerRepository], TranspilerInstaller]] = (
|
54
|
+
BladebridgeInstaller,
|
55
|
+
MorpheusInstaller,
|
56
|
+
),
|
418
57
|
):
|
419
58
|
self._ws = ws
|
420
59
|
self._prompts = prompts
|
@@ -424,6 +63,7 @@ class WorkspaceInstaller:
|
|
424
63
|
self._resource_configurator = resource_configurator
|
425
64
|
self._ws_installation = workspace_installation
|
426
65
|
self._transpiler_repository = transpiler_repository
|
66
|
+
self._transpiler_installer_factories = transpiler_installers
|
427
67
|
|
428
68
|
if not environ:
|
429
69
|
environ = dict(os.environ.items())
|
@@ -432,15 +72,19 @@ class WorkspaceInstaller:
|
|
432
72
|
msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime"
|
433
73
|
raise SystemExit(msg)
|
434
74
|
|
75
|
+
@property
|
76
|
+
def _transpiler_installers(self) -> Set[TranspilerInstaller]:
|
77
|
+
return frozenset(factory(self._transpiler_repository) for factory in self._transpiler_installer_factories)
|
78
|
+
|
435
79
|
def run(
|
436
80
|
self, module: str, config: LakebridgeConfiguration | None = None, artifact: str | None = None
|
437
81
|
) -> LakebridgeConfiguration:
|
438
82
|
logger.debug(f"Initializing workspace installation for module: {module} (config: {config})")
|
439
83
|
if module == "transpile" and artifact:
|
440
|
-
self.
|
84
|
+
self._install_artifact(artifact)
|
441
85
|
elif module in {"transpile", "all"}:
|
442
|
-
self.
|
443
|
-
|
86
|
+
for transpiler_installer in self._transpiler_installers:
|
87
|
+
transpiler_installer.install()
|
444
88
|
if not config:
|
445
89
|
config = self.configure(module)
|
446
90
|
if self._is_testing():
|
@@ -449,116 +93,36 @@ class WorkspaceInstaller:
|
|
449
93
|
logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
|
450
94
|
return config
|
451
95
|
|
452
|
-
def
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
"
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
def is_java_version_okay(cls) -> bool:
|
472
|
-
detected_java = cls.find_java()
|
473
|
-
match detected_java:
|
474
|
-
case None:
|
475
|
-
logger.warning("No Java executable found in the system PATH.")
|
476
|
-
return False
|
477
|
-
case (java_executable, None):
|
478
|
-
logger.warning(f"Java found, but could not determine the version: {java_executable}.")
|
479
|
-
return False
|
480
|
-
case (java_executable, bytes(raw_version)):
|
481
|
-
logger.warning(f"Java found ({java_executable}), but could not parse the version:\n{raw_version}")
|
482
|
-
return False
|
483
|
-
case (java_executable, tuple(old_version)) if old_version < (11, 0, 0, 0):
|
484
|
-
version_str = ".".join(str(v) for v in old_version)
|
485
|
-
logger.warning(f"Java found ({java_executable}), but version {version_str} is too old.")
|
486
|
-
return False
|
487
|
-
case _:
|
488
|
-
return True
|
489
|
-
|
490
|
-
def install_artifact(self, artifact: str):
|
96
|
+
def upgrade_installed_transpilers(self) -> bool:
|
97
|
+
"""Detect and upgrade, if possible and necessary, installed transpilers."""
|
98
|
+
installed_transpilers = self._transpiler_repository.all_transpiler_names()
|
99
|
+
if installed_transpilers:
|
100
|
+
logger.info(f"Detected installed transpilers: {sorted(installed_transpilers)}")
|
101
|
+
upgraded = False
|
102
|
+
for transpiler_installer in self._transpiler_installers:
|
103
|
+
name = transpiler_installer.name
|
104
|
+
if name in installed_transpilers:
|
105
|
+
logger.info(f"Checking for {name} upgrades...")
|
106
|
+
upgraded |= transpiler_installer.install()
|
107
|
+
# If we upgraded anything, the configuration process needs to run again.
|
108
|
+
if upgraded:
|
109
|
+
config = self.configure("transpile")
|
110
|
+
if not self._is_testing():
|
111
|
+
self._ws_installation.install(config)
|
112
|
+
return upgraded
|
113
|
+
|
114
|
+
def _install_artifact(self, artifact: str) -> None:
|
491
115
|
path = Path(artifact)
|
492
116
|
if not path.exists():
|
493
117
|
logger.error(f"Could not locate artifact {artifact}")
|
494
118
|
return
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
119
|
+
for transpiler_installer in self._transpiler_installers:
|
120
|
+
if transpiler_installer.can_install(path):
|
121
|
+
transpiler_installer.install(path)
|
122
|
+
break
|
499
123
|
else:
|
500
124
|
logger.fatal(f"Cannot install unsupported artifact: {artifact}")
|
501
125
|
|
502
|
-
@classmethod
|
503
|
-
def find_java(cls) -> tuple[Path, tuple[int, int, int, int] | bytes | None] | None:
|
504
|
-
"""Locate Java and return its version, as reported by `java -version`.
|
505
|
-
|
506
|
-
The java executable is currently located by searching the system PATH. Its version is parsed from the output of
|
507
|
-
the `java -version` command, which has been standardized since Java 10.
|
508
|
-
|
509
|
-
Returns:
|
510
|
-
a tuple of its path and the version as a tuple of integers (feature, interim, update, patch), if the java
|
511
|
-
executable could be located. If the version cannot be parsed, instead the raw version information is
|
512
|
-
returned, or `None` as a last resort. When no java executable is found, `None` is returned instead of a
|
513
|
-
tuple.
|
514
|
-
"""
|
515
|
-
# Platform-independent way to reliably locate the java executable.
|
516
|
-
# Reference: https://docs.python.org/3.10/library/subprocess.html#popen-constructor
|
517
|
-
java_executable = shutil.which("java")
|
518
|
-
if java_executable is None:
|
519
|
-
return None
|
520
|
-
java_executable_path = Path(java_executable)
|
521
|
-
logger.debug(f"Using java executable: {java_executable_path!r}")
|
522
|
-
try:
|
523
|
-
completed = run([str(java_executable_path), "-version"], shell=False, capture_output=True, check=True)
|
524
|
-
except CalledProcessError as e:
|
525
|
-
logger.debug(
|
526
|
-
f"Failed to run {e.args!r} (exit-code={e.returncode}, stdout={e.stdout!r}, stderr={e.stderr!r})",
|
527
|
-
exc_info=e,
|
528
|
-
)
|
529
|
-
return java_executable_path, None
|
530
|
-
# It might not be ascii, but the bits we care about are so this will never fail.
|
531
|
-
raw_output = completed.stderr
|
532
|
-
java_version_output = raw_output.decode("ascii", errors="ignore")
|
533
|
-
java_version = cls._parse_java_version(java_version_output)
|
534
|
-
if java_version is None:
|
535
|
-
return java_executable_path, raw_output.strip()
|
536
|
-
logger.debug(f"Detected java version: {java_version}")
|
537
|
-
return java_executable_path, java_version
|
538
|
-
|
539
|
-
# Pattern to match a Java version string, compiled at import time to ensure it's valid.
|
540
|
-
# Ref: https://docs.oracle.com/en/java/javase/11/install/version-string-format.html
|
541
|
-
_java_version_pattern = re.compile(
|
542
|
-
r' version "(?P<feature>\d+)(?:\.(?P<interim>\d+)(?:\.(?P<update>\d+)(?:\.(?P<patch>\d+))?)?)?"'
|
543
|
-
)
|
544
|
-
|
545
|
-
@classmethod
|
546
|
-
def _parse_java_version(cls, version: str) -> tuple[int, int, int, int] | None:
|
547
|
-
"""Locate and parse the Java version in the output of `java -version`."""
|
548
|
-
# Output looks like this:
|
549
|
-
# openjdk version "24.0.1" 2025-04-15
|
550
|
-
# OpenJDK Runtime Environment Temurin-24.0.1+9 (build 24.0.1+9)
|
551
|
-
# OpenJDK 64-Bit Server VM Temurin-24.0.1+9 (build 24.0.1+9, mixed mode)
|
552
|
-
match = cls._java_version_pattern.search(version)
|
553
|
-
if not match:
|
554
|
-
logger.debug(f"Could not parse java version: {version!r}")
|
555
|
-
return None
|
556
|
-
feature = int(match["feature"])
|
557
|
-
interim = int(match["interim"] or 0)
|
558
|
-
update = int(match["update"] or 0)
|
559
|
-
patch = int(match["patch"] or 0)
|
560
|
-
return feature, interim, update, patch
|
561
|
-
|
562
126
|
def configure(self, module: str) -> LakebridgeConfiguration:
|
563
127
|
match module:
|
564
128
|
case "transpile":
|
@@ -802,3 +366,28 @@ class WorkspaceInstaller:
|
|
802
366
|
|
803
367
|
def _has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None = None):
|
804
368
|
self._resource_configurator.has_necessary_access(catalog_name, schema_name, volume_name)
|
369
|
+
|
370
|
+
|
371
|
+
def installer(ws: WorkspaceClient, transpiler_repository: TranspilerRepository) -> WorkspaceInstaller:
|
372
|
+
app_context = ApplicationContext(_verify_workspace_client(ws))
|
373
|
+
return WorkspaceInstaller(
|
374
|
+
app_context.workspace_client,
|
375
|
+
app_context.prompts,
|
376
|
+
app_context.installation,
|
377
|
+
app_context.install_state,
|
378
|
+
app_context.product_info,
|
379
|
+
app_context.resource_configurator,
|
380
|
+
app_context.workspace_installation,
|
381
|
+
transpiler_repository=transpiler_repository,
|
382
|
+
)
|
383
|
+
|
384
|
+
|
385
|
+
def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
|
386
|
+
"""Verifies the workspace client configuration, ensuring it has the correct product info."""
|
387
|
+
|
388
|
+
# Using reflection to set right value for _product_info for telemetry
|
389
|
+
product_info = getattr(ws.config, '_product_info')
|
390
|
+
if product_info[0] != "lakebridge":
|
391
|
+
setattr(ws.config, '_product_info', ('lakebridge', __version__))
|
392
|
+
|
393
|
+
return ws
|