databricks-labs-lakebridge 0.10.6__py3-none-any.whl → 0.10.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/labs/lakebridge/__about__.py +1 -1
- databricks/labs/lakebridge/analyzer/__init__.py +0 -0
- databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +95 -0
- databricks/labs/lakebridge/base_install.py +24 -3
- databricks/labs/lakebridge/cli.py +19 -53
- databricks/labs/lakebridge/contexts/application.py +7 -0
- databricks/labs/lakebridge/deployment/job.py +2 -2
- databricks/labs/lakebridge/helpers/file_utils.py +36 -0
- databricks/labs/lakebridge/install.py +187 -157
- databricks/labs/lakebridge/reconcile/compare.py +70 -33
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +19 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +11 -1
- databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +126 -0
- databricks/labs/lakebridge/reconcile/connectors/models.py +7 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +11 -1
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +14 -2
- databricks/labs/lakebridge/reconcile/connectors/tsql.py +27 -2
- databricks/labs/lakebridge/reconcile/constants.py +4 -3
- databricks/labs/lakebridge/reconcile/execute.py +9 -810
- databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +133 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +3 -7
- databricks/labs/lakebridge/reconcile/recon_config.py +3 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +2 -1
- databricks/labs/lakebridge/reconcile/reconciliation.py +508 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +26 -19
- databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +98 -0
- databricks/labs/lakebridge/reconcile/trigger_recon_service.py +253 -0
- databricks/labs/lakebridge/reconcile/utils.py +38 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +45 -60
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +2 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -18
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/METADATA +1 -1
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/RECORD +37 -28
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/WHEEL +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/entry_points.txt +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/licenses/LICENSE +0 -0
- {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.7.dist-info}/licenses/NOTICE +0 -0
@@ -1,2 +1,2 @@
|
|
1
1
|
# DO NOT MODIFY THIS FILE
|
2
|
-
__version__ = "0.10.
|
2
|
+
__version__ = "0.10.7"
|
File without changes
|
@@ -0,0 +1,95 @@
|
|
1
|
+
import tempfile
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from databricks.sdk.service.iam import User
|
5
|
+
from databricks.sdk.core import with_user_agent_extra
|
6
|
+
|
7
|
+
from databricks.labs.blueprint.entrypoint import get_logger
|
8
|
+
from databricks.labs.blueprint.tui import Prompts
|
9
|
+
|
10
|
+
from databricks.labs.bladespector.analyzer import Analyzer, _PLATFORM_TO_SOURCE_TECHNOLOGY
|
11
|
+
|
12
|
+
from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
|
13
|
+
from databricks.labs.lakebridge.helpers.file_utils import check_path, move_tmp_file
|
14
|
+
|
15
|
+
logger = get_logger(__file__)
|
16
|
+
|
17
|
+
|
18
|
+
class LakebridgeAnalyzer(Analyzer):
|
19
|
+
def __init__(self, current_user: User, prompts: Prompts, is_debug: bool = False):
|
20
|
+
self._current_user = current_user
|
21
|
+
self._prompts = prompts
|
22
|
+
self._is_debug = is_debug
|
23
|
+
super().__init__()
|
24
|
+
|
25
|
+
def _get_source_directory(self) -> Path:
|
26
|
+
"""Get and validate the source directory from user input."""
|
27
|
+
directory_str = self._prompts.question(
|
28
|
+
"Enter full path to the source directory",
|
29
|
+
default=Path.cwd().as_posix(),
|
30
|
+
validate=check_path,
|
31
|
+
)
|
32
|
+
return Path(directory_str).resolve()
|
33
|
+
|
34
|
+
def _get_result_file_path(self, directory: Path) -> Path:
|
35
|
+
"""Get the result file path - accepts either filename or full path."""
|
36
|
+
filename = self._prompts.question(
|
37
|
+
"Enter report file name or custom export path including file name without extension",
|
38
|
+
default=f"{directory.as_posix()}/lakebridge-analyzer-results.xlsx",
|
39
|
+
validate=check_path,
|
40
|
+
)
|
41
|
+
return directory / Path(filename) if len(filename.split("/")) == 1 else Path(filename)
|
42
|
+
|
43
|
+
def _get_source_tech(self, platform: str | None = None) -> str:
|
44
|
+
"""Validate source technology or prompt for a valid source"""
|
45
|
+
if platform is None or platform not in self.supported_source_technologies():
|
46
|
+
if platform is not None:
|
47
|
+
logger.warning(f"Invalid source technology {platform}")
|
48
|
+
platform = self._prompts.choice("Select the source technology", self.supported_source_technologies())
|
49
|
+
with_user_agent_extra("analyzer_source_tech", make_alphanum_or_semver(platform))
|
50
|
+
logger.debug(f"User: {self._current_user}")
|
51
|
+
return _PLATFORM_TO_SOURCE_TECHNOLOGY[platform]
|
52
|
+
|
53
|
+
@staticmethod
|
54
|
+
def _temp_xlsx_path(results_dir: Path | str) -> Path:
|
55
|
+
return (Path(tempfile.mkdtemp()) / Path(results_dir).name).with_suffix(".xlsx")
|
56
|
+
|
57
|
+
def _run_prompt_analyzer(self):
|
58
|
+
"""Run the analyzer: prompt guided"""
|
59
|
+
source_dir = self._get_source_directory()
|
60
|
+
results_dir = self._get_result_file_path(source_dir)
|
61
|
+
tmp_dir = self._temp_xlsx_path(results_dir)
|
62
|
+
technology = self._get_source_tech()
|
63
|
+
|
64
|
+
self._run_binary(source_dir, tmp_dir, technology, self._is_debug)
|
65
|
+
|
66
|
+
move_tmp_file(tmp_dir, results_dir)
|
67
|
+
|
68
|
+
logger.info(f"Successfully Analyzed files in ${source_dir} for ${technology} and saved report to {results_dir}")
|
69
|
+
|
70
|
+
def _run_arg_analyzer(self, source_dir: str | None, results_dir: str | None, technology: str | None):
|
71
|
+
"""Run the analyzer: arg guided"""
|
72
|
+
if source_dir is None or results_dir is None or technology is None:
|
73
|
+
logger.error("All arguments (--source-directory, --report-file, --source-tech) must be provided")
|
74
|
+
return
|
75
|
+
|
76
|
+
if check_path(source_dir) and check_path(results_dir):
|
77
|
+
tmp_dir = self._temp_xlsx_path(results_dir)
|
78
|
+
technology = self._get_source_tech(technology)
|
79
|
+
self._run_binary(Path(source_dir), tmp_dir, technology, self._is_debug)
|
80
|
+
|
81
|
+
move_tmp_file(tmp_dir, Path(results_dir))
|
82
|
+
|
83
|
+
logger.info(
|
84
|
+
f"Successfully Analyzed files in ${source_dir} for ${technology} and saved report to {results_dir}"
|
85
|
+
)
|
86
|
+
|
87
|
+
def run_analyzer(
|
88
|
+
self, source_dir: str | None = None, results_dir: str | None = None, technology: str | None = None
|
89
|
+
):
|
90
|
+
"""Run the analyzer."""
|
91
|
+
if not any([source_dir, results_dir, technology]):
|
92
|
+
self._run_prompt_analyzer()
|
93
|
+
return
|
94
|
+
|
95
|
+
self._run_arg_analyzer(source_dir, results_dir, technology)
|
@@ -1,13 +1,34 @@
|
|
1
1
|
from databricks.labs.blueprint.logger import install_logger
|
2
2
|
from databricks.labs.blueprint.entrypoint import get_logger
|
3
|
+
from databricks.sdk import WorkspaceClient
|
3
4
|
from databricks.sdk.core import with_user_agent_extra
|
4
5
|
|
5
|
-
|
6
|
-
|
6
|
+
from databricks.labs.lakebridge import __version__
|
7
|
+
from databricks.labs.lakebridge.install import installer as _installer
|
8
|
+
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
|
9
|
+
|
10
|
+
|
11
|
+
def main() -> None:
|
12
|
+
install_logger()
|
13
|
+
with_user_agent_extra("cmd", "install")
|
7
14
|
|
8
|
-
if __name__ == "__main__":
|
9
15
|
logger = get_logger(__file__)
|
10
16
|
logger.setLevel("INFO")
|
11
17
|
|
18
|
+
installer = _installer(
|
19
|
+
WorkspaceClient(product="lakebridge", product_version=__version__),
|
20
|
+
transpiler_repository=TranspilerRepository.user_home(),
|
21
|
+
)
|
22
|
+
if installer.has_installed_transpilers():
|
23
|
+
logger.warning(
|
24
|
+
"Detected existing Lakebridge transpilers; run 'databricks labs lakebridge install-transpile' to upgrade them."
|
25
|
+
)
|
26
|
+
else:
|
27
|
+
logger.debug("No existing Lakebridge transpilers detected; assuming fresh installation.")
|
28
|
+
|
12
29
|
logger.info("Successfully Setup Lakebridge Components Locally")
|
13
30
|
logger.info("For more information, please visit https://databrickslabs.github.io/lakebridge/")
|
31
|
+
|
32
|
+
|
33
|
+
if __name__ == "__main__":
|
34
|
+
main()
|
@@ -19,20 +19,17 @@ from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug
|
|
19
19
|
from databricks.labs.blueprint.installation import RootJsonValue
|
20
20
|
from databricks.labs.blueprint.tui import Prompts
|
21
21
|
|
22
|
-
from databricks.labs.bladespector.analyzer import Analyzer
|
23
|
-
|
24
22
|
|
25
23
|
from databricks.labs.lakebridge.assessments.configure_assessment import (
|
26
24
|
create_assessment_configurator,
|
27
25
|
PROFILER_SOURCE_SYSTEM,
|
28
26
|
)
|
29
27
|
|
30
|
-
from databricks.labs.lakebridge.__about__ import __version__
|
31
28
|
from databricks.labs.lakebridge.config import TranspileConfig
|
32
29
|
from databricks.labs.lakebridge.contexts.application import ApplicationContext
|
33
30
|
from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts
|
34
31
|
from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
|
35
|
-
from databricks.labs.lakebridge.install import
|
32
|
+
from databricks.labs.lakebridge.install import installer
|
36
33
|
from databricks.labs.lakebridge.reconcile.runner import ReconcileRunner
|
37
34
|
from databricks.labs.lakebridge.lineage import lineage_generator
|
38
35
|
from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME
|
@@ -52,20 +49,6 @@ def raise_validation_exception(msg: str) -> NoReturn:
|
|
52
49
|
raise ValueError(msg)
|
53
50
|
|
54
51
|
|
55
|
-
def _installer(ws: WorkspaceClient, transpiler_repository: TranspilerRepository) -> WorkspaceInstaller:
|
56
|
-
app_context = ApplicationContext(_verify_workspace_client(ws))
|
57
|
-
return WorkspaceInstaller(
|
58
|
-
app_context.workspace_client,
|
59
|
-
app_context.prompts,
|
60
|
-
app_context.installation,
|
61
|
-
app_context.install_state,
|
62
|
-
app_context.product_info,
|
63
|
-
app_context.resource_configurator,
|
64
|
-
app_context.workspace_installation,
|
65
|
-
transpiler_repository=transpiler_repository,
|
66
|
-
)
|
67
|
-
|
68
|
-
|
69
52
|
def _create_warehouse(ws: WorkspaceClient) -> str:
|
70
53
|
|
71
54
|
dbsql = ws.warehouses.create_and_wait(
|
@@ -89,19 +72,6 @@ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str):
|
|
89
72
|
logger.info(f"Removed warehouse post installation with id: {warehouse_id}")
|
90
73
|
|
91
74
|
|
92
|
-
def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
|
93
|
-
"""
|
94
|
-
[Private] Verifies and updates the workspace client configuration.
|
95
|
-
"""
|
96
|
-
|
97
|
-
# Using reflection to set right value for _product_info for telemetry
|
98
|
-
product_info = getattr(ws.config, '_product_info')
|
99
|
-
if product_info[0] != "lakebridge":
|
100
|
-
setattr(ws.config, '_product_info', ('lakebridge', __version__))
|
101
|
-
|
102
|
-
return ws
|
103
|
-
|
104
|
-
|
105
75
|
@lakebridge.command
|
106
76
|
def transpile(
|
107
77
|
w: WorkspaceClient,
|
@@ -358,7 +328,7 @@ class _TranspileConfigChecker:
|
|
358
328
|
transpiler_config_path = self._transpiler_repository.transpiler_config_path(transpiler_name)
|
359
329
|
logger.info(f"Lakebridge will use the {transpiler_name} transpiler.")
|
360
330
|
self._config = dataclasses.replace(self._config, transpiler_config_path=str(transpiler_config_path))
|
361
|
-
return
|
331
|
+
return LSPEngine.from_config_path(transpiler_config_path)
|
362
332
|
|
363
333
|
def _configure_source_dialect(
|
364
334
|
self, source_dialect: str, engine: TranspileEngine | None, msg_prefix: str
|
@@ -426,14 +396,15 @@ class _TranspileConfigChecker:
|
|
426
396
|
#
|
427
397
|
|
428
398
|
# Step 1: Check the transpiler config path.
|
399
|
+
engine: TranspileEngine | None
|
429
400
|
transpiler_config_path = self._config.transpiler_config_path
|
430
401
|
if transpiler_config_path is not None:
|
431
402
|
self._validate_transpiler_config_path(
|
432
403
|
transpiler_config_path,
|
433
|
-
f"Invalid transpiler
|
404
|
+
f"Error: Invalid value for '--transpiler-config-path': '{str(transpiler_config_path)}', file does not exist.",
|
434
405
|
)
|
435
406
|
path = Path(transpiler_config_path)
|
436
|
-
engine =
|
407
|
+
engine = LSPEngine.from_config_path(path)
|
437
408
|
else:
|
438
409
|
engine = None
|
439
410
|
del transpiler_config_path
|
@@ -639,14 +610,14 @@ def install_transpile(
|
|
639
610
|
artifact: str | None = None,
|
640
611
|
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
|
641
612
|
) -> None:
|
642
|
-
"""Install the Lakebridge transpilers"""
|
613
|
+
"""Install or upgrade the Lakebridge transpilers."""
|
643
614
|
with_user_agent_extra("cmd", "install-transpile")
|
644
615
|
if artifact:
|
645
616
|
with_user_agent_extra("artifact-overload", Path(artifact).name)
|
646
617
|
user = w.current_user
|
647
618
|
logger.debug(f"User: {user}")
|
648
|
-
|
649
|
-
|
619
|
+
transpile_installer = installer(w, transpiler_repository)
|
620
|
+
transpile_installer.run(module="transpile", artifact=artifact)
|
650
621
|
|
651
622
|
|
652
623
|
@lakebridge.command(is_unauthenticated=False)
|
@@ -662,28 +633,23 @@ def configure_reconcile(
|
|
662
633
|
dbsql_id = _create_warehouse(w)
|
663
634
|
w.config.warehouse_id = dbsql_id
|
664
635
|
logger.debug(f"Warehouse ID used for configuring reconcile: {w.config.warehouse_id}.")
|
665
|
-
|
666
|
-
|
636
|
+
reconcile_installer = installer(w, transpiler_repository)
|
637
|
+
reconcile_installer.run(module="reconcile")
|
667
638
|
|
668
639
|
|
669
640
|
@lakebridge.command()
|
670
|
-
def analyze(
|
641
|
+
def analyze(
|
642
|
+
w: WorkspaceClient,
|
643
|
+
source_directory: str | None = None,
|
644
|
+
report_file: str | None = None,
|
645
|
+
source_tech: str | None = None,
|
646
|
+
):
|
671
647
|
"""Run the Analyzer"""
|
672
648
|
with_user_agent_extra("cmd", "analyze")
|
673
649
|
ctx = ApplicationContext(w)
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
if source_tech is None:
|
678
|
-
source_tech = prompts.choice("Select the source technology", Analyzer.supported_source_technologies())
|
679
|
-
with_user_agent_extra("analyzer_source_tech", make_alphanum_or_semver(source_tech))
|
680
|
-
user = ctx.current_user
|
681
|
-
logger.debug(f"User: {user}")
|
682
|
-
is_debug = logger.getEffectiveLevel() == logging.DEBUG
|
683
|
-
Analyzer.analyze(Path(input_folder), Path(output_file), source_tech, is_debug=is_debug)
|
684
|
-
logger.info(
|
685
|
-
f"Successfully Analyzed files in ${source_directory} for ${source_tech} and saved report to {report_file}"
|
686
|
-
)
|
650
|
+
|
651
|
+
logger.debug(f"User: {ctx.current_user}")
|
652
|
+
ctx.analyzer.run_analyzer(source_directory, report_file, source_tech)
|
687
653
|
|
688
654
|
|
689
655
|
if __name__ == "__main__":
|
@@ -12,6 +12,7 @@ from databricks.sdk.config import Config
|
|
12
12
|
from databricks.sdk.errors import NotFound
|
13
13
|
from databricks.sdk.service.iam import User
|
14
14
|
|
15
|
+
from databricks.labs.lakebridge.analyzer.lakebridge_analyzer import LakebridgeAnalyzer
|
15
16
|
from databricks.labs.lakebridge.config import TranspileConfig, ReconcileConfig, LakebridgeConfiguration
|
16
17
|
from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
|
17
18
|
from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment
|
@@ -22,6 +23,7 @@ from databricks.labs.lakebridge.helpers.metastore import CatalogOperations
|
|
22
23
|
logger = logging.getLogger(__name__)
|
23
24
|
|
24
25
|
|
26
|
+
# pylint: disable=too-many-public-methods
|
25
27
|
class ApplicationContext:
|
26
28
|
def __init__(self, ws: WorkspaceClient):
|
27
29
|
self._ws = ws
|
@@ -131,3 +133,8 @@ class ApplicationContext:
|
|
131
133
|
@cached_property
|
132
134
|
def upgrades(self):
|
133
135
|
return Upgrades(self.product_info, self.installation)
|
136
|
+
|
137
|
+
@cached_property
|
138
|
+
def analyzer(self):
|
139
|
+
is_debug = logger.getEffectiveLevel() == logging.DEBUG
|
140
|
+
return LakebridgeAnalyzer(self.current_user, self.prompts, is_debug)
|
@@ -112,8 +112,8 @@ class JobDeployment:
|
|
112
112
|
libraries = [
|
113
113
|
compute.Library(whl=remorph_wheel_path),
|
114
114
|
]
|
115
|
-
|
116
|
-
if
|
115
|
+
|
116
|
+
if recon_config.data_source == ReconSourceType.ORACLE.value:
|
117
117
|
# TODO: Automatically fetch a version list for `ojdbc8`
|
118
118
|
oracle_driver_version = "23.4.0.24.05"
|
119
119
|
libraries.append(
|
@@ -1,8 +1,13 @@
|
|
1
1
|
import contextlib
|
2
|
+
import logging
|
2
3
|
import os
|
3
4
|
from pathlib import Path
|
5
|
+
from shutil import move, Error
|
6
|
+
from datetime import datetime
|
4
7
|
from collections.abc import Generator
|
5
8
|
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
6
11
|
|
7
12
|
def is_sql_file(file: str | Path) -> bool:
|
8
13
|
"""
|
@@ -63,3 +68,34 @@ def chdir(new_path: Path) -> Generator[None, None, None]:
|
|
63
68
|
yield
|
64
69
|
finally:
|
65
70
|
os.chdir(saved_path)
|
71
|
+
|
72
|
+
|
73
|
+
def check_path(path: str) -> bool:
|
74
|
+
"""Validates a path for both existing files and writable files."""
|
75
|
+
try:
|
76
|
+
path_obj = Path(path) if not isinstance(path, Path) else path
|
77
|
+
|
78
|
+
if path_obj.exists():
|
79
|
+
return os.access(path_obj, os.W_OK)
|
80
|
+
|
81
|
+
parent = path_obj.parent
|
82
|
+
return parent.exists() and os.access(parent, os.W_OK)
|
83
|
+
|
84
|
+
except OSError as e:
|
85
|
+
logger.warning(f"Could not validate path: {path}, error: {e}")
|
86
|
+
return False
|
87
|
+
|
88
|
+
|
89
|
+
def move_tmp_file(tmp_path: Path, output_path: Path) -> None:
|
90
|
+
"""Process file from a temp directory"""
|
91
|
+
try:
|
92
|
+
move(tmp_path, output_path.parent)
|
93
|
+
except (FileExistsError, Error):
|
94
|
+
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
95
|
+
new_output_path = output_path.parent / timestamp
|
96
|
+
new_output_path.mkdir(exist_ok=True)
|
97
|
+
|
98
|
+
move(tmp_path, new_output_path)
|
99
|
+
finally:
|
100
|
+
tmp_path.parent.rmdir()
|
101
|
+
logger.info(f"Results store at {output_path}")
|