databricks-labs-lakebridge 0.10.5__py3-none-any.whl → 0.10.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. databricks/labs/lakebridge/__about__.py +1 -1
  2. databricks/labs/lakebridge/analyzer/__init__.py +0 -0
  3. databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +95 -0
  4. databricks/labs/lakebridge/base_install.py +24 -3
  5. databricks/labs/lakebridge/cli.py +57 -72
  6. databricks/labs/lakebridge/config.py +1 -1
  7. databricks/labs/lakebridge/contexts/application.py +11 -4
  8. databricks/labs/lakebridge/deployment/dashboard.py +2 -1
  9. databricks/labs/lakebridge/deployment/installation.py +11 -11
  10. databricks/labs/lakebridge/deployment/job.py +2 -2
  11. databricks/labs/lakebridge/helpers/file_utils.py +36 -0
  12. databricks/labs/lakebridge/install.py +228 -278
  13. databricks/labs/lakebridge/reconcile/compare.py +70 -33
  14. databricks/labs/lakebridge/reconcile/connectors/data_source.py +19 -0
  15. databricks/labs/lakebridge/reconcile/connectors/databricks.py +11 -1
  16. databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +126 -0
  17. databricks/labs/lakebridge/reconcile/connectors/models.py +7 -0
  18. databricks/labs/lakebridge/reconcile/connectors/oracle.py +11 -1
  19. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +14 -2
  20. databricks/labs/lakebridge/reconcile/connectors/tsql.py +27 -2
  21. databricks/labs/lakebridge/reconcile/constants.py +4 -3
  22. databricks/labs/lakebridge/reconcile/execute.py +9 -810
  23. databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +133 -0
  24. databricks/labs/lakebridge/reconcile/query_builder/base.py +3 -7
  25. databricks/labs/lakebridge/reconcile/recon_config.py +3 -0
  26. databricks/labs/lakebridge/reconcile/recon_output_config.py +2 -1
  27. databricks/labs/lakebridge/reconcile/reconciliation.py +508 -0
  28. databricks/labs/lakebridge/reconcile/schema_compare.py +26 -19
  29. databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +98 -0
  30. databricks/labs/lakebridge/reconcile/trigger_recon_service.py +253 -0
  31. databricks/labs/lakebridge/reconcile/utils.py +38 -0
  32. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +48 -63
  33. databricks/labs/lakebridge/transpiler/repository.py +123 -0
  34. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +2 -0
  35. databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -18
  36. {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/METADATA +1 -1
  37. {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/RECORD +41 -31
  38. {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/WHEEL +0 -0
  39. {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/entry_points.txt +0 -0
  40. {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/licenses/LICENSE +0 -0
  41. {databricks_labs_lakebridge-0.10.5.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/licenses/NOTICE +0 -0
@@ -1,85 +1,113 @@
1
1
  import re
2
2
  import abc
3
3
  import dataclasses
4
- import shutil
5
- from collections.abc import Iterable
6
- from json import loads, dump
7
4
  import logging
8
5
  import os
9
- from shutil import rmtree, move
10
- from subprocess import run, CalledProcessError
6
+ import shutil
11
7
  import sys
12
- from typing import Any, cast
13
- from urllib import request
14
- from urllib.error import URLError, HTTPError
8
+ import venv
15
9
  import webbrowser
10
+ import xml.etree.ElementTree as ET
16
11
  from datetime import datetime, timezone
12
+ from json import loads, dump
17
13
  from pathlib import Path
18
- import xml.etree.ElementTree as ET
14
+ from shutil import rmtree, move
15
+ from subprocess import run, CalledProcessError
16
+ from typing import Any, Literal, cast
17
+ from urllib import request
18
+ from urllib.error import URLError, HTTPError
19
19
  from zipfile import ZipFile
20
20
 
21
- from databricks.labs.blueprint.installation import Installation, JsonValue
22
- from databricks.labs.blueprint.installation import SerdeError
21
+ from databricks.labs.blueprint.installation import Installation, JsonValue, SerdeError
23
22
  from databricks.labs.blueprint.installer import InstallState
24
23
  from databricks.labs.blueprint.tui import Prompts
25
24
  from databricks.labs.blueprint.wheels import ProductInfo
26
25
  from databricks.sdk import WorkspaceClient
27
26
  from databricks.sdk.errors import NotFound, PermissionDenied
28
27
 
28
+ from databricks.labs.lakebridge.__about__ import __version__
29
29
  from databricks.labs.lakebridge.config import (
30
- TranspileConfig,
31
- ReconcileConfig,
32
30
  DatabaseConfig,
33
- RemorphConfigs,
31
+ ReconcileConfig,
32
+ LakebridgeConfiguration,
34
33
  ReconcileMetadataConfig,
35
- LSPConfigOptionV1,
34
+ TranspileConfig,
36
35
  )
37
-
36
+ from databricks.labs.lakebridge.contexts.application import ApplicationContext
38
37
  from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
39
38
  from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
40
- from databricks.labs.lakebridge.helpers.file_utils import chdir
41
39
  from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
42
- from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPConfig
40
+ from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
43
41
 
44
42
  logger = logging.getLogger(__name__)
45
43
 
46
44
  TRANSPILER_WAREHOUSE_PREFIX = "Lakebridge Transpiler Validation"
47
45
 
48
46
 
49
- class TranspilerInstaller(abc.ABC):
47
+ class _PathBackup:
48
+ """A context manager to preserve a path before performing an operation, and optionally restore it afterwards."""
50
49
 
51
- @classmethod
52
- def labs_path(cls) -> Path:
53
- return Path.home() / ".databricks" / "labs"
50
+ def __init__(self, path: Path) -> None:
51
+ self._path = path
52
+ self._backup_path: Path | None = None
53
+ self._finished = False
54
54
 
55
- @classmethod
56
- def transpilers_path(cls) -> Path:
57
- return cls.labs_path() / "remorph-transpilers"
55
+ def __enter__(self) -> "_PathBackup":
56
+ self.start()
57
+ return self
58
58
 
59
- @classmethod
60
- def install_from_pypi(cls, product_name: str, pypi_name: str, artifact: Path | None = None) -> Path | None:
61
- installer = WheelInstaller(product_name, pypi_name, artifact)
62
- return installer.install()
59
+ def start(self) -> None:
60
+ """Start the backup process by creating a backup of the path, if it already exists."""
61
+ backup_path = self._path.with_name(f"{self._path.name}-saved")
62
+ if backup_path.exists():
63
+ logger.debug(f"Existing backup found, removing: {backup_path}")
64
+ rmtree(backup_path)
65
+ if self._path.exists():
66
+ logger.debug(f"Backing up existing path: {self._path} -> {backup_path}")
67
+ os.rename(self._path, backup_path)
68
+ self._backup_path = backup_path
69
+ else:
70
+ self._backup_path = None
71
+
72
+ def rollback(self) -> None:
73
+ """Rollback the operation by restoring the backup path, if it exists."""
74
+ assert not self._finished, "Can only rollback/commit once."
75
+ logger.debug(f"Removing path: {self._path}")
76
+ rmtree(self._path)
77
+ if self._backup_path is not None:
78
+ logger.debug(f"Restoring previous path: {self._backup_path} -> {self._path}")
79
+ os.rename(self._backup_path, self._path)
80
+ self._backup_path = None
81
+ self._finished = True
82
+
83
+ def commit(self) -> None:
84
+ """Commit the operation by removing the backup path, if it exists."""
85
+ assert not self._finished, "Can only rollback/commit once."
86
+ if self._backup_path is not None:
87
+ logger.debug(f"Removing backup path: {self._backup_path}")
88
+ rmtree(self._backup_path)
89
+ self._backup_path = None
90
+ self._finished = True
91
+
92
+ def __exit__(self, exc_type, exc_val, exc_tb) -> Literal[False]:
93
+ if not self._finished:
94
+ # Automatically commit or rollback based on whether an exception is underway.
95
+ if exc_val is None:
96
+ self.commit()
97
+ else:
98
+ self.rollback()
99
+ return False # Do not suppress any exception underway
63
100
 
64
- @classmethod
65
- def install_from_maven(
66
- cls, product_name: str, group_id: str, artifact_id: str, artifact: Path | None = None
67
- ) -> Path | None:
68
- installer = MavenInstaller(product_name, group_id, artifact_id, artifact)
69
- return installer.install()
70
101
 
71
- @classmethod
72
- def get_installed_version(cls, product_name: str, is_transpiler=True) -> str | None:
73
- product_path = (cls.transpilers_path() if is_transpiler else cls.labs_path()) / product_name
74
- current_version_path = product_path / "state" / "version.json"
75
- if not current_version_path.exists():
76
- return None
77
- text = current_version_path.read_text("utf-8")
78
- data: dict[str, Any] = loads(text)
79
- version: str | None = data.get("version", None)
80
- if not version or not version.startswith("v"):
81
- return None
82
- return version[1:]
102
+ class TranspilerInstaller(abc.ABC):
103
+
104
+ # TODO: Remove these properties when post-install is removed.
105
+ _install_path: Path
106
+ """The path where the transpiler is being installed, once this starts."""
107
+
108
+ def __init__(self, repository: TranspilerRepository, product_name: str) -> None:
109
+ self._repository = repository
110
+ self._product_name = product_name
83
111
 
84
112
  _version_pattern = re.compile(r"[_-](\d+(?:[.\-_]\w*\d+)+)")
85
113
 
@@ -99,65 +127,6 @@ class TranspilerInstaller(abc.ABC):
99
127
  group = group[:-4]
100
128
  return group
101
129
 
102
- @classmethod
103
- def all_transpiler_configs(cls) -> dict[str, LSPConfig]:
104
- all_configs = cls._all_transpiler_configs()
105
- return {config.name: config for config in all_configs}
106
-
107
- @classmethod
108
- def all_transpiler_names(cls) -> set[str]:
109
- all_configs = cls.all_transpiler_configs()
110
- return set(all_configs.keys())
111
-
112
- @classmethod
113
- def all_dialects(cls) -> set[str]:
114
- all_dialects: set[str] = set()
115
- for config in cls._all_transpiler_configs():
116
- all_dialects = all_dialects.union(config.remorph.dialects)
117
- return all_dialects
118
-
119
- @classmethod
120
- def transpilers_with_dialect(cls, dialect: str) -> set[str]:
121
- configs = filter(lambda cfg: dialect in cfg.remorph.dialects, cls.all_transpiler_configs().values())
122
- return set(config.name for config in configs)
123
-
124
- @classmethod
125
- def transpiler_config_path(cls, transpiler_name) -> Path:
126
- config = cls.all_transpiler_configs().get(transpiler_name, None)
127
- if not config:
128
- raise ValueError(f"No such transpiler: {transpiler_name}")
129
- return config.path
130
-
131
- @classmethod
132
- def transpiler_config_options(cls, transpiler_name, source_dialect) -> list[LSPConfigOptionV1]:
133
- config = cls.all_transpiler_configs().get(transpiler_name, None)
134
- if not config:
135
- return [] # gracefully returns an empty list, since this can only happen during testing
136
- return config.options_for_dialect(source_dialect)
137
-
138
- @classmethod
139
- def _all_transpiler_configs(cls) -> Iterable[LSPConfig]:
140
- path = cls.transpilers_path()
141
- if path.exists():
142
- all_files = os.listdir(path)
143
- for file in all_files:
144
- config = cls._transpiler_config(cls.transpilers_path() / file)
145
- if config:
146
- yield config
147
-
148
- @classmethod
149
- def _transpiler_config(cls, path: Path) -> LSPConfig | None:
150
- if not path.is_dir() or not (path / "lib").is_dir():
151
- return None
152
- config_path = path / "lib" / "config.yml"
153
- if not config_path.is_file():
154
- return None
155
- try:
156
- return LSPConfig.load(config_path)
157
- except ValueError as e:
158
- logger.error(f"Could not load config: {path!s}", exc_info=e)
159
- return None
160
-
161
130
  @classmethod
162
131
  def _store_product_state(cls, product_path: Path, version: str) -> None:
163
132
  state_path = product_path / "state"
@@ -168,9 +137,44 @@ class TranspilerInstaller(abc.ABC):
168
137
  dump(version_data, f)
169
138
  f.write("\n")
170
139
 
140
+ def _install_version_with_backup(self, version: str) -> Path | None:
141
+ """Install a specific version of the transpiler, with backup handling."""
142
+ logger.info(f"Installing Databricks {self._product_name} transpiler (v{version})")
143
+ product_path = self._repository.transpilers_path() / self._product_name
144
+ with _PathBackup(product_path) as backup:
145
+ self._install_path = product_path / "lib"
146
+ self._install_path.mkdir(parents=True, exist_ok=True)
147
+ try:
148
+ result = self._install_version(version)
149
+ except (CalledProcessError, KeyError, ValueError) as e:
150
+ # Warning: if you end up here under the IntelliJ/PyCharm debugger, it can be because the debugger is
151
+ # trying to inject itself into the subprocess. Try disabling:
152
+ # Settings | Build, Execution, Deployment | Python Debugger | Attach to subprocess automatically while debugging
153
+ # Note: Subprocess output is not captured, and should already be visible in the console.
154
+ logger.error(f"Failed to install {self._product_name} transpiler (v{version})", exc_info=e)
155
+ result = False
156
+
157
+ if result:
158
+ logger.info(f"Successfully installed {self._product_name} transpiler (v{version})")
159
+ self._store_product_state(product_path=product_path, version=version)
160
+ backup.commit()
161
+ return product_path
162
+ backup.rollback()
163
+ return None
164
+
165
+ @abc.abstractmethod
166
+ def _install_version(self, version: str) -> bool:
167
+ """Install a specific version of the transpiler, returning True if successful."""
168
+
171
169
 
172
170
  class WheelInstaller(TranspilerInstaller):
173
171
 
172
+ _venv_exec_cmd: Path
173
+ """Once created, the command to run the virtual environment's Python executable."""
174
+
175
+ _site_packages: Path
176
+ """Once created, the path to the site-packages directory in the virtual environment."""
177
+
174
178
  @classmethod
175
179
  def get_latest_artifact_version_from_pypi(cls, product_name: str) -> str | None:
176
180
  try:
@@ -182,8 +186,14 @@ class WheelInstaller(TranspilerInstaller):
182
186
  logger.error(f"Error while fetching PyPI metadata: {product_name}", exc_info=e)
183
187
  return None
184
188
 
185
- def __init__(self, product_name: str, pypi_name: str, artifact: Path | None = None):
186
- self._product_name = product_name
189
+ def __init__(
190
+ self,
191
+ repository: TranspilerRepository,
192
+ product_name: str,
193
+ pypi_name: str,
194
+ artifact: Path | None = None,
195
+ ) -> None:
196
+ super().__init__(repository, product_name)
187
197
  self._pypi_name = pypi_name
188
198
  self._artifact = artifact
189
199
 
@@ -200,122 +210,49 @@ class WheelInstaller(TranspilerInstaller):
200
210
  logger.warning(f"Could not determine the latest version of {self._pypi_name}")
201
211
  logger.error(f"Failed to install transpiler: {self._product_name}")
202
212
  return None
203
- installed_version = self.get_installed_version(self._product_name)
213
+ installed_version = self._repository.get_installed_version(self._product_name)
204
214
  if installed_version == latest_version:
205
215
  logger.info(f"{self._pypi_name} v{latest_version} already installed")
206
216
  return None
207
- return self._install_latest_version(latest_version)
208
-
209
- def _install_latest_version(self, version: str) -> Path | None:
210
- logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
211
- # use type(self) to workaround a mock bug on class methods
212
- self._product_path = type(self).transpilers_path() / self._product_name
213
- backup_path = Path(f"{self._product_path!s}-saved")
214
- if self._product_path.exists():
215
- os.rename(self._product_path, backup_path)
216
- self._product_path.mkdir(parents=True, exist_ok=True)
217
- self._install_path = self._product_path / "lib"
218
- self._install_path.mkdir(exist_ok=True)
219
- try:
220
- result = self._unsafe_install_latest_version(version)
221
- logger.info(f"Successfully installed {self._pypi_name} v{version}")
222
- if backup_path.exists():
223
- rmtree(backup_path)
224
- return result
225
- except (CalledProcessError, ValueError) as e:
226
- logger.error(f"Failed to install {self._pypi_name} v{version}", exc_info=e)
227
- rmtree(self._product_path)
228
- if backup_path.exists():
229
- os.rename(backup_path, self._product_path)
230
- return None
217
+ return self._install_version_with_backup(latest_version)
231
218
 
232
- def _unsafe_install_latest_version(self, version: str) -> Path | None:
219
+ def _install_version(self, version: str) -> bool:
233
220
  self._create_venv()
234
221
  self._install_with_pip()
235
222
  self._copy_lsp_resources()
236
- return self._post_install(version)
223
+ return self._post_install() is not None
237
224
 
238
225
  def _create_venv(self) -> None:
239
- with chdir(self._install_path):
240
- self._unsafe_create_venv()
241
-
242
- def _unsafe_create_venv(self) -> None:
243
- # using the venv module doesn't work (maybe it's not possible to create a venv from a venv ?)
244
- # so falling back to something that works
245
- # for some reason this requires shell=True, so pass full cmd line
246
- cmd_line = f"{sys.executable} -m venv .venv"
247
- completed = run(cmd_line, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
248
- if completed.returncode:
249
- logger.error(f"Failed to create venv, error code: {completed.returncode}")
250
- if completed.stdout:
251
- for line in completed.stdout:
252
- logger.error(line)
253
- if completed.stderr:
254
- for line in completed.stderr:
255
- logger.error(line)
256
- completed.check_returncode()
257
- self._venv = self._install_path / ".venv"
258
- self._site_packages = self._locate_site_packages()
259
-
260
- def _locate_site_packages(self) -> Path:
261
- # can't use sysconfig because it only works for currently running python
262
- if sys.platform == "win32":
263
- return self._locate_site_packages_windows()
264
- return self._locate_site_packages_linux_or_macos()
265
-
266
- def _locate_site_packages_windows(self) -> Path:
267
- packages = self._venv / "Lib" / "site-packages"
268
- if packages.exists():
269
- return packages
270
- raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
271
-
272
- def _locate_site_packages_linux_or_macos(self) -> Path:
273
- lib = self._venv / "lib"
274
- for dir_ in os.listdir(lib):
275
- if dir_.startswith("python"):
276
- packages = lib / dir_ / "site-packages"
277
- if packages.exists():
278
- return packages
279
- raise ValueError(f"Could not locate 'site-packages' for {self._venv!s}")
280
-
281
- def _install_with_pip(self) -> None:
282
- with chdir(self._install_path):
283
- # the way to call pip from python is highly sensitive to os and source type
284
- if self._artifact:
285
- self._install_local_artifact()
286
- else:
287
- self._install_remote_artifact()
288
-
289
- def _install_local_artifact(self) -> None:
290
- pip = self._locate_pip()
291
- pip = pip.relative_to(self._install_path)
292
- target = self._site_packages
293
- target = target.relative_to(self._install_path)
294
- if sys.platform == "win32":
295
- command = f"{pip!s} install {self._artifact!s} -t {target!s}"
296
- completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
297
- else:
298
- command = f"'{pip!s}' install '{self._artifact!s}' -t '{target!s}'"
299
- completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
300
- # checking return code later makes debugging easier
301
- completed.check_returncode()
302
-
303
- def _install_remote_artifact(self) -> None:
304
- pip = self._locate_pip()
305
- pip = pip.relative_to(self._install_path)
306
- target = self._site_packages
307
- target = target.relative_to(self._install_path)
308
- if sys.platform == "win32":
309
- args = [str(pip), "install", self._pypi_name, "-t", str(target)]
310
- completed = run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=False, check=False)
226
+ venv_path = self._install_path / ".venv"
227
+ # Sadly, some platform-specific variations need to be dealt with:
228
+ # - Windows venvs do not use symlinks, but rather copies, when populating the venv.
229
+ # - The library path is different.
230
+ if use_symlinks := sys.platform != "win32":
231
+ major, minor = sys.version_info[:2]
232
+ lib_path = venv_path / "lib" / f"python{major}.{minor}" / "site-packages"
311
233
  else:
312
- command = f"'{pip!s}' install {self._pypi_name} -t '{target!s}'"
313
- completed = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, shell=True, check=False)
314
- # checking return code later makes debugging easier
315
- completed.check_returncode()
234
+ lib_path = venv_path / "Lib" / "site-packages"
235
+ builder = venv.EnvBuilder(with_pip=True, prompt=f"{self._product_name}", symlinks=use_symlinks)
236
+ builder.create(venv_path)
237
+ context = builder.ensure_directories(venv_path)
238
+ logger.debug(f"Created virtual environment with context: {context}")
239
+ self._venv_exec_cmd = context.env_exec_cmd
240
+ self._site_packages = lib_path
316
241
 
317
- def _locate_pip(self) -> Path:
318
- return self._venv / "Scripts" / "pip3.exe" if sys.platform == "win32" else self._venv / "bin" / "pip3"
242
+ def _install_with_pip(self) -> None:
243
+ # Based on: https://pip.pypa.io/en/stable/user_guide/#using-pip-from-your-program
244
+ # (But with venv_exec_cmd instead of sys.executable, so that we use the venv's pip.)
245
+ to_install: Path | str = self._artifact if self._artifact is not None else self._pypi_name
246
+ command: list[Path | str] = [
247
+ self._venv_exec_cmd,
248
+ "-m",
249
+ "pip",
250
+ "--disable-pip-version-check",
251
+ "install",
252
+ to_install,
253
+ ]
254
+ result = run(command, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, check=False)
255
+ result.check_returncode()
319
256
 
320
257
  def _copy_lsp_resources(self):
321
258
  lsp = self._site_packages / "lsp"
@@ -323,21 +260,20 @@ class WheelInstaller(TranspilerInstaller):
323
260
  raise ValueError("Installed transpiler is missing a 'lsp' folder")
324
261
  shutil.copytree(lsp, self._install_path, dirs_exist_ok=True)
325
262
 
326
- def _post_install(self, version: str) -> Path | None:
263
+ def _post_install(self) -> Path | None:
327
264
  config = self._install_path / "config.yml"
328
265
  if not config.exists():
329
266
  raise ValueError("Installed transpiler is missing a 'config.yml' file in its 'lsp' folder")
330
267
  install_ext = "ps1" if sys.platform == "win32" else "sh"
331
268
  install_script = f"installer.{install_ext}"
332
- installer = self._install_path / install_script
333
- if installer.exists():
334
- self._run_custom_installer(installer)
335
- self._store_product_state(product_path=self._product_path, version=version)
269
+ installer_path = self._install_path / install_script
270
+ if installer_path.exists():
271
+ self._run_custom_installer(installer_path)
336
272
  return self._install_path
337
273
 
338
- def _run_custom_installer(self, installer):
339
- args = [str(installer)]
340
- run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, cwd=str(self._install_path), check=True)
274
+ def _run_custom_installer(self, installer_path: Path) -> None:
275
+ args = [installer_path]
276
+ run(args, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, cwd=self._install_path, check=True)
341
277
 
342
278
 
343
279
  class MavenInstaller(TranspilerInstaller):
@@ -416,8 +352,15 @@ class MavenInstaller(TranspilerInstaller):
416
352
  logger.info(f"Successfully installed: {group_id}:{artifact_id}:{version}")
417
353
  return True
418
354
 
419
- def __init__(self, product_name: str, group_id: str, artifact_id: str, artifact: Path | None = None):
420
- self._product_name = product_name
355
+ def __init__(
356
+ self,
357
+ repository: TranspilerRepository,
358
+ product_name: str,
359
+ group_id: str,
360
+ artifact_id: str,
361
+ artifact: Path | None = None,
362
+ ) -> None:
363
+ super().__init__(repository, product_name)
421
364
  self._group_id = group_id
422
365
  self._artifact_id = artifact_id
423
366
  self._artifact = artifact
@@ -434,45 +377,19 @@ class MavenInstaller(TranspilerInstaller):
434
377
  logger.warning(f"Could not determine the latest version of Databricks {self._product_name} transpiler")
435
378
  logger.error("Failed to install transpiler: Databricks {self._product_name} transpiler")
436
379
  return None
437
- installed_version = self.get_installed_version(self._product_name)
380
+ installed_version = self._repository.get_installed_version(self._product_name)
438
381
  if installed_version == latest_version:
439
382
  logger.info(f"Databricks {self._product_name} transpiler v{latest_version} already installed")
440
383
  return None
441
- return self._install_version(latest_version)
442
-
443
- def _install_version(self, version: str) -> Path | None:
444
- logger.info(f"Installing Databricks {self._product_name} transpiler v{version}")
445
- # use type(self) to workaround a mock bug on class methods
446
- self._product_path = type(self).transpilers_path() / self._product_name
447
- backup_path = Path(f"{self._product_path!s}-saved")
448
- if backup_path.exists():
449
- rmtree(backup_path)
450
- if self._product_path.exists():
451
- os.rename(self._product_path, backup_path)
452
- self._product_path.mkdir(parents=True)
453
- self._install_path = self._product_path / "lib"
454
- self._install_path.mkdir()
455
- try:
456
- if self._unsafe_install_version(version):
457
- logger.info(f"Successfully installed {self._product_name} v{version}")
458
- self._store_product_state(self._product_path, version)
459
- if backup_path.exists():
460
- rmtree(backup_path)
461
- return self._product_path
462
- except (KeyError, ValueError) as e:
463
- logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}", exc_info=e)
464
- rmtree(self._product_path)
465
- if backup_path.exists():
466
- os.rename(backup_path, self._product_path)
467
- return None
384
+ return self._install_version_with_backup(latest_version)
468
385
 
469
- def _unsafe_install_version(self, version: str) -> bool:
386
+ def _install_version(self, version: str) -> bool:
470
387
  jar_file_path = self._install_path / f"{self._artifact_id}.jar"
471
388
  if self._artifact:
472
- logger.debug(f"Copying '{self._artifact!s}' to '{jar_file_path!s}'")
389
+ logger.debug(f"Copying: {self._artifact} -> {jar_file_path}")
473
390
  shutil.copyfile(self._artifact, jar_file_path)
474
391
  elif not self.download_artifact_from_maven(self._group_id, self._artifact_id, version, jar_file_path):
475
- logger.error(f"Failed to install Databricks {self._product_name} transpiler v{version}")
392
+ logger.error(f"Failed to install Databricks {self._product_name} transpiler (v{version})")
476
393
  return False
477
394
  self._copy_lsp_config(jar_file_path)
478
395
  return True
@@ -495,6 +412,7 @@ class WorkspaceInstaller:
495
412
  resource_configurator: ResourceConfigurator,
496
413
  workspace_installation: WorkspaceInstallation,
497
414
  environ: dict[str, str] | None = None,
415
+ transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
498
416
  ):
499
417
  self._ws = ws
500
418
  self._prompts = prompts
@@ -503,6 +421,7 @@ class WorkspaceInstaller:
503
421
  self._product_info = product_info
504
422
  self._resource_configurator = resource_configurator
505
423
  self._ws_installation = workspace_installation
424
+ self._transpiler_repository = transpiler_repository
506
425
 
507
426
  if not environ:
508
427
  environ = dict(os.environ.items())
@@ -511,7 +430,9 @@ class WorkspaceInstaller:
511
430
  msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime"
512
431
  raise SystemExit(msg)
513
432
 
514
- def run(self, module: str, config: RemorphConfigs | None = None, artifact: str | None = None) -> RemorphConfigs:
433
+ def run(
434
+ self, module: str, config: LakebridgeConfiguration | None = None, artifact: str | None = None
435
+ ) -> LakebridgeConfiguration:
515
436
  logger.debug(f"Initializing workspace installation for module: {module} (config: {config})")
516
437
  if module == "transpile" and artifact:
517
438
  self.install_artifact(artifact)
@@ -526,15 +447,21 @@ class WorkspaceInstaller:
526
447
  logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
527
448
  return config
528
449
 
529
- @classmethod
530
- def install_bladebridge(cls, artifact: Path | None = None):
450
+ def has_installed_transpilers(self) -> bool:
451
+ """Detect whether there are transpilers currently installed."""
452
+ installed_transpilers = self._transpiler_repository.all_transpiler_names()
453
+ if installed_transpilers:
454
+ logger.info(f"Detected installed transpilers: {sorted(installed_transpilers)}")
455
+ return bool(installed_transpilers)
456
+
457
+ def install_bladebridge(self, artifact: Path | None = None) -> None:
531
458
  local_name = "bladebridge"
532
459
  pypi_name = "databricks-bb-plugin"
533
- TranspilerInstaller.install_from_pypi(local_name, pypi_name, artifact)
460
+ wheel_installer = WheelInstaller(self._transpiler_repository, local_name, pypi_name, artifact)
461
+ wheel_installer.install()
534
462
 
535
- @classmethod
536
- def install_morpheus(cls, artifact: Path | None = None):
537
- if not cls.is_java_version_okay():
463
+ def install_morpheus(self, artifact: Path | None = None) -> None:
464
+ if not self.is_java_version_okay():
538
465
  logger.error(
539
466
  "The morpheus transpiler requires Java 11 or above. Please install Java and re-run 'install-transpile'."
540
467
  )
@@ -542,7 +469,8 @@ class WorkspaceInstaller:
542
469
  product_name = "databricks-morph-plugin"
543
470
  group_id = "com.databricks.labs"
544
471
  artifact_id = product_name
545
- TranspilerInstaller.install_from_maven(product_name, group_id, artifact_id, artifact)
472
+ maven_installer = MavenInstaller(self._transpiler_repository, product_name, group_id, artifact_id, artifact)
473
+ maven_installer.install()
546
474
 
547
475
  @classmethod
548
476
  def is_java_version_okay(cls) -> bool:
@@ -564,16 +492,15 @@ class WorkspaceInstaller:
564
492
  case _:
565
493
  return True
566
494
 
567
- @classmethod
568
- def install_artifact(cls, artifact: str):
495
+ def install_artifact(self, artifact: str):
569
496
  path = Path(artifact)
570
497
  if not path.exists():
571
498
  logger.error(f"Could not locate artifact {artifact}")
572
499
  return
573
500
  if "databricks-morph-plugin" in path.name:
574
- cls.install_morpheus(path)
501
+ self.install_morpheus(path)
575
502
  elif "databricks_bb_plugin" in path.name:
576
- cls.install_bladebridge(path)
503
+ self.install_bladebridge(path)
577
504
  else:
578
505
  logger.fatal(f"Cannot install unsupported artifact: {artifact}")
579
506
 
@@ -637,17 +564,17 @@ class WorkspaceInstaller:
637
564
  patch = int(match["patch"] or 0)
638
565
  return feature, interim, update, patch
639
566
 
640
- def configure(self, module: str) -> RemorphConfigs:
567
+ def configure(self, module: str) -> LakebridgeConfiguration:
641
568
  match module:
642
569
  case "transpile":
643
570
  logger.info("Configuring lakebridge `transpile`.")
644
- return RemorphConfigs(self._configure_transpile(), None)
571
+ return LakebridgeConfiguration(self._configure_transpile(), None)
645
572
  case "reconcile":
646
573
  logger.info("Configuring lakebridge `reconcile`.")
647
- return RemorphConfigs(None, self._configure_reconcile())
574
+ return LakebridgeConfiguration(None, self._configure_reconcile())
648
575
  case "all":
649
576
  logger.info("Configuring lakebridge `transpile` and `reconcile`.")
650
- return RemorphConfigs(
577
+ return LakebridgeConfiguration(
651
578
  self._configure_transpile(),
652
579
  self._configure_reconcile(),
653
580
  )
@@ -697,19 +624,19 @@ class WorkspaceInstaller:
697
624
  return config
698
625
 
699
626
  def _all_installed_dialects(self) -> list[str]:
700
- return sorted(TranspilerInstaller.all_dialects())
627
+ return sorted(self._transpiler_repository.all_dialects())
701
628
 
702
629
  def _transpilers_with_dialect(self, dialect: str) -> list[str]:
703
- return sorted(TranspilerInstaller.transpilers_with_dialect(dialect))
630
+ return sorted(self._transpiler_repository.transpilers_with_dialect(dialect))
704
631
 
705
632
  def _transpiler_config_path(self, transpiler: str) -> Path:
706
- return TranspilerInstaller.transpiler_config_path(transpiler)
633
+ return self._transpiler_repository.transpiler_config_path(transpiler)
707
634
 
708
635
  def _prompt_for_new_transpile_installation(self) -> TranspileConfig:
709
636
  install_later = "Set it later"
710
637
  # TODO tidy this up, logger might not display the below in console...
711
638
  logger.info("Please answer a few questions to configure lakebridge `transpile`")
712
- all_dialects = [install_later] + self._all_installed_dialects()
639
+ all_dialects = [install_later, *self._all_installed_dialects()]
713
640
  source_dialect: str | None = self._prompts.choice("Select the source dialect:", all_dialects, sort=False)
714
641
  if source_dialect == install_later:
715
642
  source_dialect = None
@@ -760,14 +687,12 @@ class WorkspaceInstaller:
760
687
  )
761
688
 
762
689
  def _prompt_for_transpiler_options(self, transpiler_name: str, source_dialect: str) -> dict[str, Any] | None:
763
- config_options = TranspilerInstaller.transpiler_config_options(transpiler_name, source_dialect)
690
+ config_options = self._transpiler_repository.transpiler_config_options(transpiler_name, source_dialect)
764
691
  if len(config_options) == 0:
765
692
  return None
766
693
  return {option.flag: option.prompt_for_value(self._prompts) for option in config_options}
767
694
 
768
- def _configure_catalog(
769
- self,
770
- ) -> str:
695
+ def _configure_catalog(self) -> str:
771
696
  return self._resource_configurator.prompt_for_catalog_setup()
772
697
 
773
698
  def _configure_schema(
@@ -882,3 +807,28 @@ class WorkspaceInstaller:
882
807
 
883
808
  def _has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None = None):
884
809
  self._resource_configurator.has_necessary_access(catalog_name, schema_name, volume_name)
810
+
811
+
812
+ def installer(ws: WorkspaceClient, transpiler_repository: TranspilerRepository) -> WorkspaceInstaller:
813
+ app_context = ApplicationContext(_verify_workspace_client(ws))
814
+ return WorkspaceInstaller(
815
+ app_context.workspace_client,
816
+ app_context.prompts,
817
+ app_context.installation,
818
+ app_context.install_state,
819
+ app_context.product_info,
820
+ app_context.resource_configurator,
821
+ app_context.workspace_installation,
822
+ transpiler_repository=transpiler_repository,
823
+ )
824
+
825
+
826
+ def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
827
+ """Verifies the workspace client configuration, ensuring it has the correct product info."""
828
+
829
+ # Using reflection to set right value for _product_info for telemetry
830
+ product_info = getattr(ws.config, '_product_info')
831
+ if product_info[0] != "lakebridge":
832
+ setattr(ws.config, '_product_info', ('lakebridge', __version__))
833
+
834
+ return ws