databricks-labs-lakebridge 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. databricks/labs/lakebridge/__about__.py +1 -1
  2. databricks/labs/lakebridge/analyzer/__init__.py +0 -0
  3. databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +95 -0
  4. databricks/labs/lakebridge/assessments/profiler_validator.py +103 -0
  5. databricks/labs/lakebridge/base_install.py +20 -3
  6. databricks/labs/lakebridge/cli.py +32 -59
  7. databricks/labs/lakebridge/contexts/application.py +7 -0
  8. databricks/labs/lakebridge/deployment/job.py +2 -2
  9. databricks/labs/lakebridge/helpers/file_utils.py +36 -0
  10. databricks/labs/lakebridge/helpers/validation.py +5 -3
  11. databricks/labs/lakebridge/install.py +73 -484
  12. databricks/labs/lakebridge/reconcile/compare.py +70 -33
  13. databricks/labs/lakebridge/reconcile/connectors/data_source.py +24 -1
  14. databricks/labs/lakebridge/reconcile/connectors/databricks.py +12 -1
  15. databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +126 -0
  16. databricks/labs/lakebridge/reconcile/connectors/models.py +7 -0
  17. databricks/labs/lakebridge/reconcile/connectors/oracle.py +12 -1
  18. databricks/labs/lakebridge/reconcile/connectors/secrets.py +19 -1
  19. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +63 -30
  20. databricks/labs/lakebridge/reconcile/connectors/tsql.py +28 -2
  21. databricks/labs/lakebridge/reconcile/constants.py +4 -3
  22. databricks/labs/lakebridge/reconcile/execute.py +9 -810
  23. databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +133 -0
  24. databricks/labs/lakebridge/reconcile/query_builder/base.py +53 -18
  25. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +8 -2
  26. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +7 -13
  27. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +18 -19
  28. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +36 -15
  29. databricks/labs/lakebridge/reconcile/recon_config.py +3 -15
  30. databricks/labs/lakebridge/reconcile/recon_output_config.py +2 -1
  31. databricks/labs/lakebridge/reconcile/reconciliation.py +511 -0
  32. databricks/labs/lakebridge/reconcile/schema_compare.py +26 -19
  33. databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +78 -0
  34. databricks/labs/lakebridge/reconcile/trigger_recon_service.py +256 -0
  35. databricks/labs/lakebridge/reconcile/utils.py +38 -0
  36. databricks/labs/lakebridge/transpiler/execute.py +34 -28
  37. databricks/labs/lakebridge/transpiler/installers.py +523 -0
  38. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +47 -60
  39. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +2 -0
  40. databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -18
  41. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/METADATA +1 -1
  42. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/RECORD +46 -35
  43. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/WHEEL +0 -0
  44. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/entry_points.txt +0 -0
  45. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/LICENSE +0 -0
  46. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/NOTICE +0 -0
@@ -1,41 +1,35 @@
1
- import re
2
- import abc
3
1
  import dataclasses
4
- import shutil
5
- from json import loads, dump
6
2
  import logging
7
3
  import os
8
- from shutil import rmtree, move
9
- from subprocess import run, CalledProcessError
10
- import sys
11
- from typing import Any, cast
12
- from urllib import request
13
- from urllib.error import URLError, HTTPError
14
4
  import webbrowser
15
- from datetime import datetime, timezone
5
+ from collections.abc import Set, Callable, Sequence
16
6
  from pathlib import Path
17
- import xml.etree.ElementTree as ET
18
- from zipfile import ZipFile
7
+ from typing import Any, cast
19
8
 
20
- from databricks.labs.blueprint.installation import Installation, JsonValue
21
- from databricks.labs.blueprint.installation import SerdeError
9
+ from databricks.labs.blueprint.installation import Installation, JsonValue, SerdeError
22
10
  from databricks.labs.blueprint.installer import InstallState
23
11
  from databricks.labs.blueprint.tui import Prompts
24
12
  from databricks.labs.blueprint.wheels import ProductInfo
25
13
  from databricks.sdk import WorkspaceClient
26
14
  from databricks.sdk.errors import NotFound, PermissionDenied
27
15
 
16
+ from databricks.labs.lakebridge.__about__ import __version__
28
17
  from databricks.labs.lakebridge.config import (
29
- TranspileConfig,
30
- ReconcileConfig,
31
18
  DatabaseConfig,
19
+ ReconcileConfig,
32
20
  LakebridgeConfiguration,
33
21
  ReconcileMetadataConfig,
22
+ TranspileConfig,
34
23
  )
24
+ from databricks.labs.lakebridge.contexts.application import ApplicationContext
35
25
  from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
36
26
  from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
37
- from databricks.labs.lakebridge.helpers.file_utils import chdir
38
27
  from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
28
+ from databricks.labs.lakebridge.transpiler.installers import (
29
+ BladebridgeInstaller,
30
+ MorpheusInstaller,
31
+ TranspilerInstaller,
32
+ )
39
33
  from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
40
34
 
41
35
  logger = logging.getLogger(__name__)
@@ -43,366 +37,6 @@ logger = logging.getLogger(__name__)
43
37
  TRANSPILER_WAREHOUSE_PREFIX = "Lakebridge Transpiler Validation"
44
38
 
45
39
 
46
- class TranspilerInstaller(abc.ABC):
47
- def __init__(self, repository: TranspilerRepository) -> None:
48
- self._repository = repository
49
-
50
- _version_pattern = re.compile(r"[_-](\d+(?:[.\-_]\w*\d+)+)")
51
-
52
- @classmethod
53
- def get_local_artifact_version(cls, artifact: Path) -> str | None:
54
- # TODO: Get the version from the metadata inside the artifact rather than relying on the filename.
55
- match = cls._version_pattern.search(artifact.stem)
56
- if not match:
57
- return None
58
- group = match.group(0)
59
- if not group:
60
- return None
61
- # TODO: Update the regex to take care of these trimming scenarios.
62
- if group.startswith('-'):
63
- group = group[1:]
64
- if group.endswith("-py3"):
65
- group = group[:-4]
66
- return group
67
-
68
- @classmethod
69
- def _store_product_state(cls, product_path: Path, version: str) -> None:
70
- state_path = product_path / "state"
71
- state_path.mkdir()
72
- version_data = {"version": f"v{version}", "date": datetime.now(timezone.utc).isoformat()}
73
- version_path = state_path / "version.json"
74
- with version_path.open("w", encoding="utf-8") as f:
75
- dump(version_data, f)
76
- f.write("\n")
77
-
78
-
79
- class WheelInstaller(TranspilerInstaller):
80
-
81
- @classmethod
82
- def get_latest_artifact_version_from_pypi(cls, product_name: str) -> str | None:
83
- try:
84
- with request.urlopen(f"https://pypi.org/pypi/{product_name}/json") as server:
85
- text: bytes = server.read()
86
- data: dict[str, Any] = loads(text)
87
- return data.get("info", {}).get('version', None)
88
- except HTTPError as e:
89
- logger.error(f"Error while fetching PyPI metadata: {product_name}", exc_info=e)
90
- return None
91
-
92
- def __init__(
93
- self,
94
- repository: TranspilerRepository,
95
- product_name: str,
96
- pypi_name: str,
97
- artifact: Path | None = None,
98
- ) -> None:
99
- super().__init__(repository)
100
- self._product_name = product_name
101
- self._pypi_name = pypi_name
102
- self._artifact = artifact
103
-
104
- def install(self) -> Path | None:
105
- return self._install_checking_versions()
106
-
107
- def _install_checking_versions(self) -> Path | None:
108
- latest_version = (
109
- self.get_local_artifact_version(self._artifact)
110
- if self._artifact
111
- else self.get_latest_artifact_version_from_pypi(self._pypi_name)
112
- )
113
- if latest_version is None:
114
- logger.warning(f"Could not determine the latest version of {self._pypi_name}")
115
- logger.error(f"Failed to install transpiler: {self._product_name}")
116
- return None
117
- installed_version = self._repository.get_installed_version(self._product_name)
118
- if installed_version == latest_version:
119
- logger.info(f"{self._pypi_name} v{latest_version} already installed")
120
- return None
121
- return self._install_latest_version(latest_version)
122
-
123
- def _install_latest_version(self, version: str) -> Path | None:
124
- logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
125
- self._product_path = self._repository.transpilers_path() / self._product_name
126
- backup_path = Path(f"{self._product_path!s}-saved")
127
- if self._product_path.exists():
128
- os.rename(self._product_path, backup_path)
129
- self._install_path = self._product_path / "lib"
130
- self._install_path.mkdir(parents=True, exist_ok=True)
131
- try:
132
- result = self._unsafe_install_latest_version(version)
133
- logger.info(f"Successfully installed {self._pypi_name} v{version}")
134
- if backup_path.exists():
135
- rmtree(backup_path)
136
- return result
137
- except (CalledProcessError, ValueError) as e:
138
- logger.error(f"Failed to install {self._pypi_name} v{version}", exc_info=e)
139
- rmtree(self._product_path)
140
- if backup_path.exists():
141
- os.rename(backup_path, self._product_path)
142
- return None
143
-
144
- def _unsafe_install_latest_version(self, version: str) -> Path | None:
145
- self._create_venv()
146
- self._install_with_pip()
147
- self._copy_lsp_resources()
148
- return self._post_install(version)
149
-
150
- def _create_venv(self) -> None:
151
- with chdir(self._install_path):
152
- self._unsafe_create_venv()
153
-
154
- def _unsafe_create_venv(self) -> None:
155
- # using the venv module doesn't work (maybe it's not possible to create a venv from a venv ?)
156
- # so falling back to something that works
157
- # for some reason this requires shell=True, so pass full cmd line
158
- cmd_line = f"{sys.executable} -m venv .venv"
159
- completed = run(cmd_line, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
160
- if completed.returncode:
161
- logger.error(f"Failed to create venv, error code: {completed.returncode}")
162
- if completed.stdout:
163
- for line in completed.stdout:
164
- logger.error(line)
165
- if completed.stderr:
166
- for line in completed.stderr:
167
- logger.error(line)
168
- completed.check_returncode()
169
- self._venv = self._install_path / ".venv"
170
- self._site_packages = self._locate_site_packages()
171
-
172
- def _locate_site_packages(self) -> Path:
173
- # can't use sysconfig because it only works for currently running python
174
- if sys.platform == "win32":
175
- return self._locate_site_packages_windows()
176
- return self._locate_site_packages_linux_or_macos()
177
-
178
- def _locate_site_packages_windows(self) -> Path:
179
- packages = self._venv / "Lib" / "site-packages"
180
- if packages.exists():
181
- return packages
182
- raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
183
-
184
- def _locate_site_packages_linux_or_macos(self) -> Path:
185
- lib = self._venv / "lib"
186
- for dir_ in os.listdir(lib):
187
- if dir_.startswith("python"):
188
- packages = lib / dir_ / "site-packages"
189
- if packages.exists():
190
- return packages
191
- raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
192
-
193
- def _install_with_pip(self) -> None:
194
- with chdir(self._install_path):
195
- # the way to call pip from python is highly sensitive to os and source type
196
- if self._artifact:
197
- self._install_local_artifact()
198
- else:
199
- self._install_remote_artifact()
200
-
201
- def _install_local_artifact(self) -> None:
202
- pip = self._locate_pip()
203
- pip = pip.relative_to(self._install_path)
204
- target = self._site_packages
205
- target = target.relative_to(self._install_path)
206
- if sys.platform == "win32":
207
- command = f"{pip!s} install {self._artifact!s} -t {target!s}"
208
- completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
209
- else:
210
- command = f"'{pip!s}' install '{self._artifact!s}' -t '{target!s}'"
211
- completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
212
- # checking return code later makes debugging easier
213
- completed.check_returncode()
214
-
215
- def _install_remote_artifact(self) -> None:
216
- pip = self._locate_pip()
217
- pip = pip.relative_to(self._install_path)
218
- target = self._site_packages
219
- target = target.relative_to(self._install_path)
220
- if sys.platform == "win32":
221
- args = [str(pip), "install", self._pypi_name, "-t", str(target)]
222
- completed = run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
223
- else:
224
- command = f"'{pip!s}' install {self._pypi_name} -t '{target!s}'"
225
- completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
226
- # checking return code later makes debugging easier
227
- completed.check_returncode()
228
-
229
- def _locate_pip(self) -> Path:
230
- return self._venv / "Scripts" / "pip3.exe" if sys.platform == "win32" else self._venv / "bin" / "pip3"
231
-
232
- def _copy_lsp_resources(self):
233
- lsp = self._site_packages / "lsp"
234
- if not lsp.exists():
235
- raise ValueError("Installed transpiler is missing a 'lsp' folder")
236
- shutil.copytree(lsp, self._install_path, dirs_exist_ok=True)
237
-
238
- def _post_install(self, version: str) -> Path | None:
239
- config = self._install_path / "config.yml"
240
- if not config.exists():
241
- raise ValueError("Installed transpiler is missing a 'config.yml' file in its 'lsp' folder")
242
- install_ext = "ps1" if sys.platform == "win32" else "sh"
243
- install_script = f"installer.{install_ext}"
244
- installer = self._install_path / install_script
245
- if installer.exists():
246
- self._run_custom_installer(installer)
247
- self._store_product_state(product_path=self._product_path, version=version)
248
- return self._install_path
249
-
250
- def _run_custom_installer(self, installer):
251
- args = [str(installer)]
252
- run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, cwd=str(self._install_path), check=True)
253
-
254
-
255
- class MavenInstaller(TranspilerInstaller):
256
- # Maven Central, base URL.
257
- _maven_central_repo: str = "https://repo.maven.apache.org/maven2/"
258
-
259
- @classmethod
260
- def _artifact_base_url(cls, group_id: str, artifact_id: str) -> str:
261
- """Construct the base URL for a Maven artifact."""
262
- # Reference: https://maven.apache.org/repositories/layout.html
263
- group_path = group_id.replace(".", "/")
264
- return f"{cls._maven_central_repo}{group_path}/{artifact_id}/"
265
-
266
- @classmethod
267
- def artifact_metadata_url(cls, group_id: str, artifact_id: str) -> str:
268
- """Get the metadata URL for a Maven artifact."""
269
- # TODO: Unit test this method.
270
- return f"{cls._artifact_base_url(group_id, artifact_id)}maven-metadata.xml"
271
-
272
- @classmethod
273
- def artifact_url(
274
- cls, group_id: str, artifact_id: str, version: str, classifier: str | None = None, extension: str = "jar"
275
- ) -> str:
276
- """Get the URL for a versioned Maven artifact."""
277
- # TODO: Unit test this method, including classifier and extension.
278
- _classifier = f"-{classifier}" if classifier else ""
279
- artifact_base_url = cls._artifact_base_url(group_id, artifact_id)
280
- return f"{artifact_base_url}{version}/{artifact_id}-{version}{_classifier}.{extension}"
281
-
282
- @classmethod
283
- def get_current_maven_artifact_version(cls, group_id: str, artifact_id: str) -> str | None:
284
- url = cls.artifact_metadata_url(group_id, artifact_id)
285
- try:
286
- with request.urlopen(url) as server:
287
- text = server.read()
288
- except HTTPError as e:
289
- logger.error(f"Error while fetching maven metadata: {group_id}:{artifact_id}", exc_info=e)
290
- return None
291
- logger.debug(f"Maven metadata for {group_id}:{artifact_id}: {text}")
292
- return cls._extract_latest_release_version(text)
293
-
294
- @classmethod
295
- def _extract_latest_release_version(cls, maven_metadata: str) -> str | None:
296
- """Extract the latest release version from Maven metadata."""
297
- # Reference: https://maven.apache.org/repositories/metadata.html#The_A_Level_Metadata
298
- # TODO: Unit test this method, to verify the sequence of things it checks for.
299
- root = ET.fromstring(maven_metadata)
300
- for label in ("release", "latest"):
301
- version = root.findtext(f"./versioning/{label}")
302
- if version is not None:
303
- return version
304
- return root.findtext("./versioning/versions/version[last()]")
305
-
306
- @classmethod
307
- def download_artifact_from_maven(
308
- cls,
309
- group_id: str,
310
- artifact_id: str,
311
- version: str,
312
- target: Path,
313
- classifier: str | None = None,
314
- extension: str = "jar",
315
- ) -> bool:
316
- if target.exists():
317
- logger.warning(f"Skipping download of {group_id}:{artifact_id}:{version}; target already exists: {target}")
318
- return True
319
- url = cls.artifact_url(group_id, artifact_id, version, classifier, extension)
320
- try:
321
- path, _ = request.urlretrieve(url)
322
- logger.debug(f"Downloaded maven artefact from {url} to {path}")
323
- except URLError as e:
324
- logger.error(f"Unable to download maven artefact: {group_id}:{artifact_id}:{version}", exc_info=e)
325
- return False
326
- logger.debug(f"Moving {path} to {target}")
327
- move(path, target)
328
- logger.info(f"Successfully installed: {group_id}:{artifact_id}:{version}")
329
- return True
330
-
331
- def __init__(
332
- self,
333
- repository: TranspilerRepository,
334
- product_name: str,
335
- group_id: str,
336
- artifact_id: str,
337
- artifact: Path | None = None,
338
- ) -> None:
339
- super().__init__(repository)
340
- self._product_name = product_name
341
- self._group_id = group_id
342
- self._artifact_id = artifact_id
343
- self._artifact = artifact
344
-
345
- def install(self) -> Path | None:
346
- return self._install_checking_versions()
347
-
348
- def _install_checking_versions(self) -> Path | None:
349
- if self._artifact:
350
- latest_version = self.get_local_artifact_version(self._artifact)
351
- else:
352
- latest_version = self.get_current_maven_artifact_version(self._group_id, self._artifact_id)
353
- if latest_version is None:
354
- logger.warning(f"Could not determine the latest version of Databricks {self._product_name} transpiler")
355
- logger.error("Failed to install transpiler: Databricks {self._product_name} transpiler")
356
- return None
357
- installed_version = self._repository.get_installed_version(self._product_name)
358
- if installed_version == latest_version:
359
- logger.info(f"Databricks {self._product_name} transpiler v{latest_version} already installed")
360
- return None
361
- return self._install_version(latest_version)
362
-
363
- def _install_version(self, version: str) -> Path | None:
364
- logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
365
- self._product_path = self._repository.transpilers_path() / self._product_name
366
- backup_path = Path(f"{self._product_path!s}-saved")
367
- if backup_path.exists():
368
- rmtree(backup_path)
369
- if self._product_path.exists():
370
- os.rename(self._product_path, backup_path)
371
- self._product_path.mkdir(parents=True)
372
- self._install_path = self._product_path / "lib"
373
- self._install_path.mkdir()
374
- try:
375
- if self._unsafe_install_version(version):
376
- logger.info(f"Successfully installed {self._product_name} v{version}")
377
- self._store_product_state(self._product_path, version)
378
- if backup_path.exists():
379
- rmtree(backup_path)
380
- return self._product_path
381
- except (KeyError, ValueError) as e:
382
- logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}", exc_info=e)
383
- rmtree(self._product_path)
384
- if backup_path.exists():
385
- os.rename(backup_path, self._product_path)
386
- return None
387
-
388
- def _unsafe_install_version(self, version: str) -> bool:
389
- jar_file_path = self._install_path / f"{self._artifact_id}.jar"
390
- if self._artifact:
391
- logger.debug(f"Copying '{self._artifact!s}' to '{jar_file_path!s}'")
392
- shutil.copyfile(self._artifact, jar_file_path)
393
- elif not self.download_artifact_from_maven(self._group_id, self._artifact_id, version, jar_file_path):
394
- logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}")
395
- return False
396
- self._copy_lsp_config(jar_file_path)
397
- return True
398
-
399
- def _copy_lsp_config(self, jar_file_path: Path) -> None:
400
- with ZipFile(jar_file_path) as zip_file:
401
- zip_file.extract("lsp/config.yml", self._install_path)
402
- shutil.move(self._install_path / "lsp" / "config.yml", self._install_path / "config.yml")
403
- os.rmdir(self._install_path / "lsp")
404
-
405
-
406
40
  class WorkspaceInstaller:
407
41
  def __init__(
408
42
  self,
@@ -414,7 +48,12 @@ class WorkspaceInstaller:
414
48
  resource_configurator: ResourceConfigurator,
415
49
  workspace_installation: WorkspaceInstallation,
416
50
  environ: dict[str, str] | None = None,
51
+ *,
417
52
  transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
53
+ transpiler_installers: Sequence[Callable[[TranspilerRepository], TranspilerInstaller]] = (
54
+ BladebridgeInstaller,
55
+ MorpheusInstaller,
56
+ ),
418
57
  ):
419
58
  self._ws = ws
420
59
  self._prompts = prompts
@@ -424,6 +63,7 @@ class WorkspaceInstaller:
424
63
  self._resource_configurator = resource_configurator
425
64
  self._ws_installation = workspace_installation
426
65
  self._transpiler_repository = transpiler_repository
66
+ self._transpiler_installer_factories = transpiler_installers
427
67
 
428
68
  if not environ:
429
69
  environ = dict(os.environ.items())
@@ -432,15 +72,19 @@ class WorkspaceInstaller:
432
72
  msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime"
433
73
  raise SystemExit(msg)
434
74
 
75
+ @property
76
+ def _transpiler_installers(self) -> Set[TranspilerInstaller]:
77
+ return frozenset(factory(self._transpiler_repository) for factory in self._transpiler_installer_factories)
78
+
435
79
  def run(
436
80
  self, module: str, config: LakebridgeConfiguration | None = None, artifact: str | None = None
437
81
  ) -> LakebridgeConfiguration:
438
82
  logger.debug(f"Initializing workspace installation for module: {module} (config: {config})")
439
83
  if module == "transpile" and artifact:
440
- self.install_artifact(artifact)
84
+ self._install_artifact(artifact)
441
85
  elif module in {"transpile", "all"}:
442
- self.install_bladebridge()
443
- self.install_morpheus()
86
+ for transpiler_installer in self._transpiler_installers:
87
+ transpiler_installer.install()
444
88
  if not config:
445
89
  config = self.configure(module)
446
90
  if self._is_testing():
@@ -449,116 +93,36 @@ class WorkspaceInstaller:
449
93
  logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
450
94
  return config
451
95
 
452
- def install_bladebridge(self, artifact: Path | None = None) -> None:
453
- local_name = "bladebridge"
454
- pypi_name = "databricks-bb-plugin"
455
- wheel_installer = WheelInstaller(self._transpiler_repository, local_name, pypi_name, artifact)
456
- wheel_installer.install()
457
-
458
- def install_morpheus(self, artifact: Path | None = None) -> None:
459
- if not self.is_java_version_okay():
460
- logger.error(
461
- "The morpheus transpiler requires Java 11 or above. Please install Java and re-run 'install-transpile'."
462
- )
463
- return
464
- product_name = "databricks-morph-plugin"
465
- group_id = "com.databricks.labs"
466
- artifact_id = product_name
467
- maven_installer = MavenInstaller(self._transpiler_repository, product_name, group_id, artifact_id, artifact)
468
- maven_installer.install()
469
-
470
- @classmethod
471
- def is_java_version_okay(cls) -> bool:
472
- detected_java = cls.find_java()
473
- match detected_java:
474
- case None:
475
- logger.warning("No Java executable found in the system PATH.")
476
- return False
477
- case (java_executable, None):
478
- logger.warning(f"Java found, but could not determine the version: {java_executable}.")
479
- return False
480
- case (java_executable, bytes(raw_version)):
481
- logger.warning(f"Java found ({java_executable}), but could not parse the version:\n{raw_version}")
482
- return False
483
- case (java_executable, tuple(old_version)) if old_version < (11, 0, 0, 0):
484
- version_str = ".".join(str(v) for v in old_version)
485
- logger.warning(f"Java found ({java_executable}), but version {version_str} is too old.")
486
- return False
487
- case _:
488
- return True
489
-
490
- def install_artifact(self, artifact: str):
96
+ def upgrade_installed_transpilers(self) -> bool:
97
+ """Detect and upgrade, if possible and necessary, installed transpilers."""
98
+ installed_transpilers = self._transpiler_repository.all_transpiler_names()
99
+ if installed_transpilers:
100
+ logger.info(f"Detected installed transpilers: {sorted(installed_transpilers)}")
101
+ upgraded = False
102
+ for transpiler_installer in self._transpiler_installers:
103
+ name = transpiler_installer.name
104
+ if name in installed_transpilers:
105
+ logger.info(f"Checking for {name} upgrades...")
106
+ upgraded |= transpiler_installer.install()
107
+ # If we upgraded anything, the configuration process needs to run again.
108
+ if upgraded:
109
+ config = self.configure("transpile")
110
+ if not self._is_testing():
111
+ self._ws_installation.install(config)
112
+ return upgraded
113
+
114
+ def _install_artifact(self, artifact: str) -> None:
491
115
  path = Path(artifact)
492
116
  if not path.exists():
493
117
  logger.error(f"Could not locate artifact {artifact}")
494
118
  return
495
- if "databricks-morph-plugin" in path.name:
496
- self.install_morpheus(path)
497
- elif "databricks_bb_plugin" in path.name:
498
- self.install_bladebridge(path)
119
+ for transpiler_installer in self._transpiler_installers:
120
+ if transpiler_installer.can_install(path):
121
+ transpiler_installer.install(path)
122
+ break
499
123
  else:
500
124
  logger.fatal(f"Cannot install unsupported artifact: {artifact}")
501
125
 
502
- @classmethod
503
- def find_java(cls) -> tuple[Path, tuple[int, int, int, int] | bytes | None] | None:
504
- """Locate Java and return its version, as reported by `java -version`.
505
-
506
- The java executable is currently located by searching the system PATH. Its version is parsed from the output of
507
- the `java -version` command, which has been standardized since Java 10.
508
-
509
- Returns:
510
- a tuple of its path and the version as a tuple of integers (feature, interim, update, patch), if the java
511
- executable could be located. If the version cannot be parsed, instead the raw version information is
512
- returned, or `None` as a last resort. When no java executable is found, `None` is returned instead of a
513
- tuple.
514
- """
515
- # Platform-independent way to reliably locate the java executable.
516
- # Reference: https://docs.python.org/3.10/library/subprocess.html#popen-constructor
517
- java_executable = shutil.which("java")
518
- if java_executable is None:
519
- return None
520
- java_executable_path = Path(java_executable)
521
- logger.debug(f"Using java executable: {java_executable_path!r}")
522
- try:
523
- completed = run([str(java_executable_path), "-version"], shell=False, capture_output=True, check=True)
524
- except CalledProcessError as e:
525
- logger.debug(
526
- f"Failed to run {e.args!r} (exit-code={e.returncode}, stdout={e.stdout!r}, stderr={e.stderr!r})",
527
- exc_info=e,
528
- )
529
- return java_executable_path, None
530
- # It might not be ascii, but the bits we care about are so this will never fail.
531
- raw_output = completed.stderr
532
- java_version_output = raw_output.decode("ascii", errors="ignore")
533
- java_version = cls._parse_java_version(java_version_output)
534
- if java_version is None:
535
- return java_executable_path, raw_output.strip()
536
- logger.debug(f"Detected java version: {java_version}")
537
- return java_executable_path, java_version
538
-
539
- # Pattern to match a Java version string, compiled at import time to ensure it's valid.
540
- # Ref: https://docs.oracle.com/en/java/javase/11/install/version-string-format.html
541
- _java_version_pattern = re.compile(
542
- r' version "(?P<feature>\d+)(?:\.(?P<interim>\d+)(?:\.(?P<update>\d+)(?:\.(?P<patch>\d+))?)?)?"'
543
- )
544
-
545
- @classmethod
546
- def _parse_java_version(cls, version: str) -> tuple[int, int, int, int] | None:
547
- """Locate and parse the Java version in the output of `java -version`."""
548
- # Output looks like this:
549
- # openjdk version "24.0.1" 2025-04-15
550
- # OpenJDK Runtime Environment Temurin-24.0.1+9 (build 24.0.1+9)
551
- # OpenJDK 64-Bit Server VM Temurin-24.0.1+9 (build 24.0.1+9, mixed mode)
552
- match = cls._java_version_pattern.search(version)
553
- if not match:
554
- logger.debug(f"Could not parse java version: {version!r}")
555
- return None
556
- feature = int(match["feature"])
557
- interim = int(match["interim"] or 0)
558
- update = int(match["update"] or 0)
559
- patch = int(match["patch"] or 0)
560
- return feature, interim, update, patch
561
-
562
126
  def configure(self, module: str) -> LakebridgeConfiguration:
563
127
  match module:
564
128
  case "transpile":
@@ -802,3 +366,28 @@ class WorkspaceInstaller:
802
366
 
803
367
  def _has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None = None):
804
368
  self._resource_configurator.has_necessary_access(catalog_name, schema_name, volume_name)
369
+
370
+
371
+ def installer(ws: WorkspaceClient, transpiler_repository: TranspilerRepository) -> WorkspaceInstaller:
372
+ app_context = ApplicationContext(_verify_workspace_client(ws))
373
+ return WorkspaceInstaller(
374
+ app_context.workspace_client,
375
+ app_context.prompts,
376
+ app_context.installation,
377
+ app_context.install_state,
378
+ app_context.product_info,
379
+ app_context.resource_configurator,
380
+ app_context.workspace_installation,
381
+ transpiler_repository=transpiler_repository,
382
+ )
383
+
384
+
385
+ def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
386
+ """Verifies the workspace client configuration, ensuring it has the correct product info."""
387
+
388
+ # Using reflection to set right value for _product_info for telemetry
389
+ product_info = getattr(ws.config, '_product_info')
390
+ if product_info[0] != "lakebridge":
391
+ setattr(ws.config, '_product_info', ('lakebridge', __version__))
392
+
393
+ return ws