databricks-labs-lakebridge 0.10.6__py3-none-any.whl → 0.10.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. databricks/labs/lakebridge/__about__.py +1 -1
  2. databricks/labs/lakebridge/analyzer/__init__.py +0 -0
  3. databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +95 -0
  4. databricks/labs/lakebridge/assessments/profiler_validator.py +103 -0
  5. databricks/labs/lakebridge/base_install.py +20 -3
  6. databricks/labs/lakebridge/cli.py +32 -59
  7. databricks/labs/lakebridge/contexts/application.py +7 -0
  8. databricks/labs/lakebridge/deployment/job.py +2 -2
  9. databricks/labs/lakebridge/helpers/file_utils.py +36 -0
  10. databricks/labs/lakebridge/helpers/validation.py +5 -3
  11. databricks/labs/lakebridge/install.py +73 -484
  12. databricks/labs/lakebridge/reconcile/compare.py +70 -33
  13. databricks/labs/lakebridge/reconcile/connectors/data_source.py +24 -1
  14. databricks/labs/lakebridge/reconcile/connectors/databricks.py +12 -1
  15. databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +126 -0
  16. databricks/labs/lakebridge/reconcile/connectors/models.py +7 -0
  17. databricks/labs/lakebridge/reconcile/connectors/oracle.py +12 -1
  18. databricks/labs/lakebridge/reconcile/connectors/secrets.py +19 -1
  19. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +63 -30
  20. databricks/labs/lakebridge/reconcile/connectors/tsql.py +28 -2
  21. databricks/labs/lakebridge/reconcile/constants.py +4 -3
  22. databricks/labs/lakebridge/reconcile/execute.py +9 -810
  23. databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +133 -0
  24. databricks/labs/lakebridge/reconcile/query_builder/base.py +53 -18
  25. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +8 -2
  26. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +7 -13
  27. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +18 -19
  28. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +36 -15
  29. databricks/labs/lakebridge/reconcile/recon_config.py +3 -15
  30. databricks/labs/lakebridge/reconcile/recon_output_config.py +2 -1
  31. databricks/labs/lakebridge/reconcile/reconciliation.py +511 -0
  32. databricks/labs/lakebridge/reconcile/schema_compare.py +26 -19
  33. databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +78 -0
  34. databricks/labs/lakebridge/reconcile/trigger_recon_service.py +256 -0
  35. databricks/labs/lakebridge/reconcile/utils.py +38 -0
  36. databricks/labs/lakebridge/transpiler/execute.py +34 -28
  37. databricks/labs/lakebridge/transpiler/installers.py +523 -0
  38. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +47 -60
  39. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +2 -0
  40. databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -18
  41. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/METADATA +1 -1
  42. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/RECORD +46 -35
  43. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/WHEEL +0 -0
  44. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/entry_points.txt +0 -0
  45. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/LICENSE +0 -0
  46. {databricks_labs_lakebridge-0.10.6.dist-info → databricks_labs_lakebridge-0.10.8.dist-info}/licenses/NOTICE +0 -0
@@ -1,2 +1,2 @@
1
1
  # DO NOT MODIFY THIS FILE
2
- __version__ = "0.10.6"
2
+ __version__ = "0.10.8"
File without changes
@@ -0,0 +1,95 @@
1
+ import tempfile
2
+ from pathlib import Path
3
+
4
+ from databricks.sdk.service.iam import User
5
+ from databricks.sdk.core import with_user_agent_extra
6
+
7
+ from databricks.labs.blueprint.entrypoint import get_logger
8
+ from databricks.labs.blueprint.tui import Prompts
9
+
10
+ from databricks.labs.bladespector.analyzer import Analyzer, _PLATFORM_TO_SOURCE_TECHNOLOGY
11
+
12
+ from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
13
+ from databricks.labs.lakebridge.helpers.file_utils import check_path, move_tmp_file
14
+
15
+ logger = get_logger(__file__)
16
+
17
+
18
+ class LakebridgeAnalyzer(Analyzer):
19
+ def __init__(self, current_user: User, prompts: Prompts, is_debug: bool = False):
20
+ self._current_user = current_user
21
+ self._prompts = prompts
22
+ self._is_debug = is_debug
23
+ super().__init__()
24
+
25
+ def _get_source_directory(self) -> Path:
26
+ """Get and validate the source directory from user input."""
27
+ directory_str = self._prompts.question(
28
+ "Enter full path to the source directory",
29
+ default=Path.cwd().as_posix(),
30
+ validate=check_path,
31
+ )
32
+ return Path(directory_str).resolve()
33
+
34
+ def _get_result_file_path(self, directory: Path) -> Path:
35
+ """Get the result file path - accepts either filename or full path."""
36
+ filename = self._prompts.question(
37
+ "Enter report file name or custom export path including file name without extension",
38
+ default=f"{directory.as_posix()}/lakebridge-analyzer-results.xlsx",
39
+ validate=check_path,
40
+ )
41
+ return directory / Path(filename) if len(filename.split("/")) == 1 else Path(filename)
42
+
43
+ def _get_source_tech(self, platform: str | None = None) -> str:
44
+ """Validate source technology or prompt for a valid source"""
45
+ if platform is None or platform not in self.supported_source_technologies():
46
+ if platform is not None:
47
+ logger.warning(f"Invalid source technology {platform}")
48
+ platform = self._prompts.choice("Select the source technology", self.supported_source_technologies())
49
+ with_user_agent_extra("analyzer_source_tech", make_alphanum_or_semver(platform))
50
+ logger.debug(f"User: {self._current_user}")
51
+ return _PLATFORM_TO_SOURCE_TECHNOLOGY[platform]
52
+
53
+ @staticmethod
54
+ def _temp_xlsx_path(results_dir: Path | str) -> Path:
55
+ return (Path(tempfile.mkdtemp()) / Path(results_dir).name).with_suffix(".xlsx")
56
+
57
+ def _run_prompt_analyzer(self):
58
+ """Run the analyzer: prompt guided"""
59
+ source_dir = self._get_source_directory()
60
+ results_dir = self._get_result_file_path(source_dir)
61
+ tmp_dir = self._temp_xlsx_path(results_dir)
62
+ technology = self._get_source_tech()
63
+
64
+ self._run_binary(source_dir, tmp_dir, technology, self._is_debug)
65
+
66
+ move_tmp_file(tmp_dir, results_dir)
67
+
68
+ logger.info(f"Successfully Analyzed files in ${source_dir} for ${technology} and saved report to {results_dir}")
69
+
70
+ def _run_arg_analyzer(self, source_dir: str | None, results_dir: str | None, technology: str | None):
71
+ """Run the analyzer: arg guided"""
72
+ if source_dir is None or results_dir is None or technology is None:
73
+ logger.error("All arguments (--source-directory, --report-file, --source-tech) must be provided")
74
+ return
75
+
76
+ if check_path(source_dir) and check_path(results_dir):
77
+ tmp_dir = self._temp_xlsx_path(results_dir)
78
+ technology = self._get_source_tech(technology)
79
+ self._run_binary(Path(source_dir), tmp_dir, technology, self._is_debug)
80
+
81
+ move_tmp_file(tmp_dir, Path(results_dir))
82
+
83
+ logger.info(
84
+ f"Successfully Analyzed files in ${source_dir} for ${technology} and saved report to {results_dir}"
85
+ )
86
+
87
+ def run_analyzer(
88
+ self, source_dir: str | None = None, results_dir: str | None = None, technology: str | None = None
89
+ ):
90
+ """Run the analyzer."""
91
+ if not any([source_dir, results_dir, technology]):
92
+ self._run_prompt_analyzer()
93
+ return
94
+
95
+ self._run_arg_analyzer(source_dir, results_dir, technology)
@@ -0,0 +1,103 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from duckdb import DuckDBPyConnection
4
+
5
+ from databricks.labs.lakebridge.assessments.pipeline import PipelineClass
6
+
7
+ PROFILER_DB_NAME = "profiler_extract.db"
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class ValidationOutcome:
12
+ """A data class that holds the outcome of a table validation check."""
13
+
14
+ table: str
15
+ column: str | None
16
+ strategy: str
17
+ outcome: str
18
+ severity: str
19
+
20
+
21
+ class ValidationStrategy:
22
+ """Abstract class for validating a Profiler table"""
23
+
24
+ def validate(self, connection: DuckDBPyConnection) -> ValidationOutcome:
25
+ raise NotImplementedError
26
+
27
+
28
+ class NullValidationCheck(ValidationStrategy):
29
+ """Concrete class for validating null values in a profiler table"""
30
+
31
+ def __init__(self, table, column, severity="WARN"):
32
+ self.name = self.__class__.__name__
33
+ self.table = table
34
+ self.column = column
35
+ self.severity = severity
36
+
37
+ def validate(self, connection: DuckDBPyConnection) -> ValidationOutcome:
38
+ """
39
+ Validates that a column does not contain null values.
40
+ input:
41
+ connection: a DuckDB connection object
42
+ """
43
+ result = connection.execute(f"SELECT COUNT(*) FROM {self.table} WHERE {self.column} IS NULL").fetchone()
44
+ if result:
45
+ row_count = result[0]
46
+ outcome = "FAIL" if row_count > 0 else "PASS"
47
+ else:
48
+ outcome = "FAIL"
49
+ return ValidationOutcome(self.table, self.column, self.name, outcome, self.severity)
50
+
51
+
52
+ class EmptyTableValidationCheck(ValidationStrategy):
53
+ """Concrete class for validating empty tables from a profiler run."""
54
+
55
+ def __init__(self, table, severity="WARN"):
56
+ self.name = self.__class__.__name__
57
+ self.table = table
58
+ self.severity = severity
59
+
60
+ def validate(self, connection) -> ValidationOutcome:
61
+ """Validates that a table is not empty.
62
+ input:
63
+ connection: a DuckDB connection object
64
+ returns:
65
+ a ValidationOutcome object
66
+ """
67
+ result = connection.execute(f"SELECT COUNT(*) FROM {self.table}").fetchone()
68
+ if result:
69
+ row_count = result[0]
70
+ outcome = "PASS" if row_count > 0 else "FAIL"
71
+ else:
72
+ outcome = "FAIL"
73
+ return ValidationOutcome(self.table, None, self.name, outcome, self.severity)
74
+
75
+
76
+ def get_profiler_extract_path(pipeline_config_path: str) -> str:
77
+ """
78
+ Returns the filesystem path of the profiler extract database.
79
+ input:
80
+ pipeline_config_path: the location of the pipeline definition .yml file
81
+ returns:
82
+ the filesystem path to the profiler extract database
83
+ """
84
+ pipeline_config = PipelineClass.load_config_from_yaml(pipeline_config_path)
85
+ normalized_db_path = os.path.normpath(pipeline_config.extract_folder)
86
+ database_path = f"{normalized_db_path}/{PROFILER_DB_NAME}"
87
+ return database_path
88
+
89
+
90
+ def build_validation_report(
91
+ validations: list[ValidationStrategy], connection: DuckDBPyConnection
92
+ ) -> list[ValidationOutcome]:
93
+ """
94
+ Builds a list of ValidationOutcomes from list of validation checks.
95
+ input:
96
+ validations: a list of ValidationStrategy objects
97
+ connection: a DuckDB connection object
98
+ returns: a list of ValidationOutcomes
99
+ """
100
+ validation_report = []
101
+ for validation in validations:
102
+ validation_report.append(validation.validate(connection))
103
+ return validation_report
@@ -1,13 +1,30 @@
1
1
  from databricks.labs.blueprint.logger import install_logger
2
2
  from databricks.labs.blueprint.entrypoint import get_logger
3
+ from databricks.sdk import WorkspaceClient
3
4
  from databricks.sdk.core import with_user_agent_extra
4
5
 
5
- install_logger()
6
- with_user_agent_extra("cmd", "install")
6
+ from databricks.labs.lakebridge import __version__
7
+ from databricks.labs.lakebridge.install import installer as _installer
8
+ from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
9
+
10
+
11
+ def main() -> None:
12
+ install_logger()
13
+ with_user_agent_extra("cmd", "install")
7
14
 
8
- if __name__ == "__main__":
9
15
  logger = get_logger(__file__)
10
16
  logger.setLevel("INFO")
11
17
 
18
+ installer = _installer(
19
+ WorkspaceClient(product="lakebridge", product_version=__version__),
20
+ transpiler_repository=TranspilerRepository.user_home(),
21
+ )
22
+ if not installer.upgrade_installed_transpilers():
23
+ logger.debug("No existing Lakebridge transpilers detected; assuming fresh installation.")
24
+
12
25
  logger.info("Successfully Setup Lakebridge Components Locally")
13
26
  logger.info("For more information, please visit https://databrickslabs.github.io/lakebridge/")
27
+
28
+
29
+ if __name__ == "__main__":
30
+ main()
@@ -19,20 +19,17 @@ from databricks.labs.blueprint.entrypoint import get_logger, is_in_debug
19
19
  from databricks.labs.blueprint.installation import RootJsonValue
20
20
  from databricks.labs.blueprint.tui import Prompts
21
21
 
22
- from databricks.labs.bladespector.analyzer import Analyzer
23
-
24
22
 
25
23
  from databricks.labs.lakebridge.assessments.configure_assessment import (
26
24
  create_assessment_configurator,
27
25
  PROFILER_SOURCE_SYSTEM,
28
26
  )
29
27
 
30
- from databricks.labs.lakebridge.__about__ import __version__
31
28
  from databricks.labs.lakebridge.config import TranspileConfig
32
29
  from databricks.labs.lakebridge.contexts.application import ApplicationContext
33
30
  from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts
34
31
  from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
35
- from databricks.labs.lakebridge.install import WorkspaceInstaller
32
+ from databricks.labs.lakebridge.install import installer
36
33
  from databricks.labs.lakebridge.reconcile.runner import ReconcileRunner
37
34
  from databricks.labs.lakebridge.lineage import lineage_generator
38
35
  from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME
@@ -52,20 +49,6 @@ def raise_validation_exception(msg: str) -> NoReturn:
52
49
  raise ValueError(msg)
53
50
 
54
51
 
55
- def _installer(ws: WorkspaceClient, transpiler_repository: TranspilerRepository) -> WorkspaceInstaller:
56
- app_context = ApplicationContext(_verify_workspace_client(ws))
57
- return WorkspaceInstaller(
58
- app_context.workspace_client,
59
- app_context.prompts,
60
- app_context.installation,
61
- app_context.install_state,
62
- app_context.product_info,
63
- app_context.resource_configurator,
64
- app_context.workspace_installation,
65
- transpiler_repository=transpiler_repository,
66
- )
67
-
68
-
69
52
  def _create_warehouse(ws: WorkspaceClient) -> str:
70
53
 
71
54
  dbsql = ws.warehouses.create_and_wait(
@@ -89,21 +72,9 @@ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str):
89
72
  logger.info(f"Removed warehouse post installation with id: {warehouse_id}")
90
73
 
91
74
 
92
- def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
93
- """
94
- [Private] Verifies and updates the workspace client configuration.
95
- """
96
-
97
- # Using reflection to set right value for _product_info for telemetry
98
- product_info = getattr(ws.config, '_product_info')
99
- if product_info[0] != "lakebridge":
100
- setattr(ws.config, '_product_info', ('lakebridge', __version__))
101
-
102
- return ws
103
-
104
-
105
75
  @lakebridge.command
106
76
  def transpile(
77
+ *,
107
78
  w: WorkspaceClient,
108
79
  transpiler_config_path: str | None = None,
109
80
  source_dialect: str | None = None,
@@ -358,7 +329,7 @@ class _TranspileConfigChecker:
358
329
  transpiler_config_path = self._transpiler_repository.transpiler_config_path(transpiler_name)
359
330
  logger.info(f"Lakebridge will use the {transpiler_name} transpiler.")
360
331
  self._config = dataclasses.replace(self._config, transpiler_config_path=str(transpiler_config_path))
361
- return TranspileEngine.load_engine(transpiler_config_path)
332
+ return LSPEngine.from_config_path(transpiler_config_path)
362
333
 
363
334
  def _configure_source_dialect(
364
335
  self, source_dialect: str, engine: TranspileEngine | None, msg_prefix: str
@@ -370,6 +341,8 @@ class _TranspileConfigChecker:
370
341
  supported_dialects = ", ".join(self._transpiler_repository.all_dialects())
371
342
  msg = f"{msg_prefix}: {source_dialect!r} (supported dialects: {supported_dialects})"
372
343
  raise_validation_exception(msg)
344
+ else:
345
+ self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
373
346
  else:
374
347
  # Check the source dialect against the engine.
375
348
  if source_dialect not in engine.supported_dialects:
@@ -396,6 +369,7 @@ class _TranspileConfigChecker:
396
369
  source_dialect = self._prompts.choice("Select the source dialect:", list(supported_dialects))
397
370
  engine = self._configure_transpiler_config_path(source_dialect)
398
371
  assert engine is not None, "No transpiler engine available for a supported dialect; configuration is invalid."
372
+ self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
399
373
  return engine
400
374
 
401
375
  def _check_lsp_engine(self) -> TranspileEngine:
@@ -426,14 +400,15 @@ class _TranspileConfigChecker:
426
400
  #
427
401
 
428
402
  # Step 1: Check the transpiler config path.
403
+ engine: TranspileEngine | None
429
404
  transpiler_config_path = self._config.transpiler_config_path
430
405
  if transpiler_config_path is not None:
431
406
  self._validate_transpiler_config_path(
432
407
  transpiler_config_path,
433
- f"Invalid transpiler path configured, path does not exist: {transpiler_config_path}",
408
+ f"Error: Invalid value for '--transpiler-config-path': '{str(transpiler_config_path)}', file does not exist.",
434
409
  )
435
410
  path = Path(transpiler_config_path)
436
- engine = TranspileEngine.load_engine(path)
411
+ engine = LSPEngine.from_config_path(path)
437
412
  else:
438
413
  engine = None
439
414
  del transpiler_config_path
@@ -547,7 +522,7 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s
547
522
 
548
523
 
549
524
  @lakebridge.command
550
- def reconcile(w: WorkspaceClient) -> None:
525
+ def reconcile(*, w: WorkspaceClient) -> None:
551
526
  """[EXPERIMENTAL] Reconciles source to Databricks datasets"""
552
527
  with_user_agent_extra("cmd", "execute-reconcile")
553
528
  ctx = ApplicationContext(w)
@@ -563,7 +538,7 @@ def reconcile(w: WorkspaceClient) -> None:
563
538
 
564
539
 
565
540
  @lakebridge.command
566
- def aggregates_reconcile(w: WorkspaceClient) -> None:
541
+ def aggregates_reconcile(*, w: WorkspaceClient) -> None:
567
542
  """[EXPERIMENTAL] Reconciles Aggregated source to Databricks datasets"""
568
543
  with_user_agent_extra("cmd", "execute-aggregates-reconcile")
569
544
  ctx = ApplicationContext(w)
@@ -581,8 +556,8 @@ def aggregates_reconcile(w: WorkspaceClient) -> None:
581
556
 
582
557
  @lakebridge.command
583
558
  def generate_lineage(
584
- w: WorkspaceClient,
585
559
  *,
560
+ w: WorkspaceClient,
586
561
  source_dialect: str | None = None,
587
562
  input_source: str,
588
563
  output_folder: str,
@@ -607,7 +582,7 @@ def generate_lineage(
607
582
 
608
583
 
609
584
  @lakebridge.command
610
- def configure_secrets(w: WorkspaceClient) -> None:
585
+ def configure_secrets(*, w: WorkspaceClient) -> None:
611
586
  """Setup reconciliation connection profile details as Secrets on Databricks Workspace"""
612
587
  recon_conf = ReconConfigPrompts(w)
613
588
 
@@ -633,24 +608,26 @@ def configure_database_profiler() -> None:
633
608
  assessment.run()
634
609
 
635
610
 
636
- @lakebridge.command()
611
+ @lakebridge.command
637
612
  def install_transpile(
613
+ *,
638
614
  w: WorkspaceClient,
639
615
  artifact: str | None = None,
640
616
  transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
641
617
  ) -> None:
642
- """Install the Lakebridge transpilers"""
618
+ """Install or upgrade the Lakebridge transpilers."""
643
619
  with_user_agent_extra("cmd", "install-transpile")
644
620
  if artifact:
645
621
  with_user_agent_extra("artifact-overload", Path(artifact).name)
646
622
  user = w.current_user
647
623
  logger.debug(f"User: {user}")
648
- installer = _installer(w, transpiler_repository)
649
- installer.run(module="transpile", artifact=artifact)
624
+ transpile_installer = installer(w, transpiler_repository)
625
+ transpile_installer.run(module="transpile", artifact=artifact)
650
626
 
651
627
 
652
628
  @lakebridge.command(is_unauthenticated=False)
653
629
  def configure_reconcile(
630
+ *,
654
631
  w: WorkspaceClient,
655
632
  transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
656
633
  ) -> None:
@@ -662,28 +639,24 @@ def configure_reconcile(
662
639
  dbsql_id = _create_warehouse(w)
663
640
  w.config.warehouse_id = dbsql_id
664
641
  logger.debug(f"Warehouse ID used for configuring reconcile: {w.config.warehouse_id}.")
665
- installer = _installer(w, transpiler_repository)
666
- installer.run(module="reconcile")
642
+ reconcile_installer = installer(w, transpiler_repository)
643
+ reconcile_installer.run(module="reconcile")
667
644
 
668
645
 
669
- @lakebridge.command()
670
- def analyze(w: WorkspaceClient, source_directory: str, report_file: str, source_tech: str | None = None) -> None:
646
+ @lakebridge.command
647
+ def analyze(
648
+ *,
649
+ w: WorkspaceClient,
650
+ source_directory: str | None = None,
651
+ report_file: str | None = None,
652
+ source_tech: str | None = None,
653
+ ):
671
654
  """Run the Analyzer"""
672
655
  with_user_agent_extra("cmd", "analyze")
673
656
  ctx = ApplicationContext(w)
674
- prompts = ctx.prompts
675
- output_file = report_file
676
- input_folder = source_directory
677
- if source_tech is None:
678
- source_tech = prompts.choice("Select the source technology", Analyzer.supported_source_technologies())
679
- with_user_agent_extra("analyzer_source_tech", make_alphanum_or_semver(source_tech))
680
- user = ctx.current_user
681
- logger.debug(f"User: {user}")
682
- is_debug = logger.getEffectiveLevel() == logging.DEBUG
683
- Analyzer.analyze(Path(input_folder), Path(output_file), source_tech, is_debug=is_debug)
684
- logger.info(
685
- f"Successfully Analyzed files in ${source_directory} for ${source_tech} and saved report to {report_file}"
686
- )
657
+
658
+ logger.debug(f"User: {ctx.current_user}")
659
+ ctx.analyzer.run_analyzer(source_directory, report_file, source_tech)
687
660
 
688
661
 
689
662
  if __name__ == "__main__":
@@ -12,6 +12,7 @@ from databricks.sdk.config import Config
12
12
  from databricks.sdk.errors import NotFound
13
13
  from databricks.sdk.service.iam import User
14
14
 
15
+ from databricks.labs.lakebridge.analyzer.lakebridge_analyzer import LakebridgeAnalyzer
15
16
  from databricks.labs.lakebridge.config import TranspileConfig, ReconcileConfig, LakebridgeConfiguration
16
17
  from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
17
18
  from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment
@@ -22,6 +23,7 @@ from databricks.labs.lakebridge.helpers.metastore import CatalogOperations
22
23
  logger = logging.getLogger(__name__)
23
24
 
24
25
 
26
+ # pylint: disable=too-many-public-methods
25
27
  class ApplicationContext:
26
28
  def __init__(self, ws: WorkspaceClient):
27
29
  self._ws = ws
@@ -131,3 +133,8 @@ class ApplicationContext:
131
133
  @cached_property
132
134
  def upgrades(self):
133
135
  return Upgrades(self.product_info, self.installation)
136
+
137
+ @cached_property
138
+ def analyzer(self):
139
+ is_debug = logger.getEffectiveLevel() == logging.DEBUG
140
+ return LakebridgeAnalyzer(self.current_user, self.prompts, is_debug)
@@ -112,8 +112,8 @@ class JobDeployment:
112
112
  libraries = [
113
113
  compute.Library(whl=remorph_wheel_path),
114
114
  ]
115
- source = recon_config.data_source
116
- if source == ReconSourceType.ORACLE.value:
115
+
116
+ if recon_config.data_source == ReconSourceType.ORACLE.value:
117
117
  # TODO: Automatically fetch a version list for `ojdbc8`
118
118
  oracle_driver_version = "23.4.0.24.05"
119
119
  libraries.append(
@@ -1,8 +1,13 @@
1
1
  import contextlib
2
+ import logging
2
3
  import os
3
4
  from pathlib import Path
5
+ from shutil import move, Error
6
+ from datetime import datetime
4
7
  from collections.abc import Generator
5
8
 
9
+ logger = logging.getLogger(__name__)
10
+
6
11
 
7
12
  def is_sql_file(file: str | Path) -> bool:
8
13
  """
@@ -63,3 +68,34 @@ def chdir(new_path: Path) -> Generator[None, None, None]:
63
68
  yield
64
69
  finally:
65
70
  os.chdir(saved_path)
71
+
72
+
73
+ def check_path(path: str) -> bool:
74
+ """Validates a path for both existing files and writable files."""
75
+ try:
76
+ path_obj = Path(path) if not isinstance(path, Path) else path
77
+
78
+ if path_obj.exists():
79
+ return os.access(path_obj, os.W_OK)
80
+
81
+ parent = path_obj.parent
82
+ return parent.exists() and os.access(parent, os.W_OK)
83
+
84
+ except OSError as e:
85
+ logger.warning(f"Could not validate path: {path}, error: {e}")
86
+ return False
87
+
88
+
89
+ def move_tmp_file(tmp_path: Path, output_path: Path) -> None:
90
+ """Process file from a temp directory"""
91
+ try:
92
+ move(tmp_path, output_path.parent)
93
+ except (FileExistsError, Error):
94
+ timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
95
+ new_output_path = output_path.parent / timestamp
96
+ new_output_path.mkdir(exist_ok=True)
97
+
98
+ move(tmp_path, new_output_path)
99
+ finally:
100
+ tmp_path.parent.rmdir()
101
+ logger.info(f"Results store at {output_path}")
@@ -37,19 +37,21 @@ class Validator:
37
37
  config.catalog_name,
38
38
  config.schema_name,
39
39
  )
40
+ # Some errors doesn't return the query test alon with the error message so need to handle those separately
41
+ static_errors_lkp = ["[UNRESOLVED_ROUTINE]", "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION]"]
40
42
  if is_valid:
41
43
  result = sql_text
42
44
  if exception_type is not None:
43
45
  exception_msg = f"[{exception_type.upper()}]: {exception_msg}"
44
46
  else:
45
47
  query = ""
46
- if "[UNRESOLVED_ROUTINE]" in str(exception_msg):
48
+ if any(err in str(exception_msg) for err in static_errors_lkp):
47
49
  query = sql_text
48
50
  buffer = StringIO()
49
51
  buffer.write("-------------- Exception Start-------------------\n")
50
- buffer.write("/* \n")
52
+ buffer.write("/*\n")
51
53
  buffer.write(str(exception_msg))
52
- buffer.write("\n */ \n")
54
+ buffer.write("\n*/\n")
53
55
  buffer.write(query)
54
56
  buffer.write("\n ---------------Exception End --------------------\n")
55
57