databricks-labs-lakebridge 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/__init__.py +3 -0
- databricks/labs/__init__.py +3 -0
- databricks/labs/lakebridge/__about__.py +2 -0
- databricks/labs/lakebridge/__init__.py +11 -0
- databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks/labs/lakebridge/base_install.py +12 -0
- databricks/labs/lakebridge/cli.py +449 -0
- databricks/labs/lakebridge/config.py +192 -0
- databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks/labs/lakebridge/install.py +849 -0
- databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks/labs/lakebridge/lineage.py +42 -0
- databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks/labs/lakebridge/uninstall.py +28 -0
- databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
- databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
- databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
- databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
- docs/lakebridge/src/components/Button.tsx +81 -0
- docs/lakebridge/src/css/custom.css +167 -0
- docs/lakebridge/src/css/table.css +20 -0
- docs/lakebridge/src/pages/index.tsx +57 -0
- docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,89 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
import logging
|
3
|
+
from typing import Protocol
|
4
|
+
|
5
|
+
import yaml
|
6
|
+
|
7
|
+
from databricks.labs.lakebridge.connections.env_getter import EnvGetter
|
8
|
+
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
class SecretProvider(Protocol):
|
14
|
+
def get_secret(self, key: str) -> str:
|
15
|
+
pass
|
16
|
+
|
17
|
+
|
18
|
+
class LocalSecretProvider:
|
19
|
+
def get_secret(self, key: str) -> str:
|
20
|
+
return key
|
21
|
+
|
22
|
+
|
23
|
+
class EnvSecretProvider:
|
24
|
+
def __init__(self, env_getter: EnvGetter):
|
25
|
+
self._env_getter = env_getter
|
26
|
+
|
27
|
+
def get_secret(self, key: str) -> str:
|
28
|
+
try:
|
29
|
+
return self._env_getter.get(str(key))
|
30
|
+
except KeyError:
|
31
|
+
logger.debug(f"Environment variable {key} not found. Falling back to actual value")
|
32
|
+
return key
|
33
|
+
|
34
|
+
|
35
|
+
class DatabricksSecretProvider:
|
36
|
+
def get_secret(self, key: str) -> str:
|
37
|
+
raise NotImplementedError("Databricks secret vault not implemented")
|
38
|
+
|
39
|
+
|
40
|
+
class CredentialManager:
|
41
|
+
def __init__(self, credential_loader: dict, secret_providers: dict):
|
42
|
+
self._credentials = credential_loader
|
43
|
+
self._secret_providers = secret_providers
|
44
|
+
self._default_vault = self._credentials.get('secret_vault_type', 'local').lower()
|
45
|
+
|
46
|
+
def get_credentials(self, source: str) -> dict:
|
47
|
+
if source not in self._credentials:
|
48
|
+
raise KeyError(f"Source system: {source} credentials not found")
|
49
|
+
|
50
|
+
value = self._credentials[source]
|
51
|
+
if not isinstance(value, dict):
|
52
|
+
raise KeyError(f"Invalid credential format for source: {source}")
|
53
|
+
|
54
|
+
return {k: self._get_secret_value(v) for k, v in value.items()}
|
55
|
+
|
56
|
+
def _get_secret_value(self, key: str) -> str:
|
57
|
+
provider = self._secret_providers.get(self._default_vault)
|
58
|
+
if not provider:
|
59
|
+
raise ValueError(f"Unsupported secret vault type: {self._default_vault}")
|
60
|
+
return provider.get_secret(key)
|
61
|
+
|
62
|
+
|
63
|
+
def _get_home() -> Path:
|
64
|
+
return Path(__file__).home()
|
65
|
+
|
66
|
+
|
67
|
+
def cred_file(product_name) -> Path:
|
68
|
+
return Path(f"{_get_home()}/.databricks/labs/{product_name}/.credentials.yml")
|
69
|
+
|
70
|
+
|
71
|
+
def _load_credentials(path: Path) -> dict:
|
72
|
+
try:
|
73
|
+
with open(path, encoding="utf-8") as f:
|
74
|
+
return yaml.safe_load(f)
|
75
|
+
except FileNotFoundError as e:
|
76
|
+
raise FileNotFoundError(f"Credentials file not found at {path}") from e
|
77
|
+
|
78
|
+
|
79
|
+
def create_credential_manager(product_name: str, env_getter: EnvGetter):
|
80
|
+
file_path = Path(f"{_get_home()}/.databricks/labs/{product_name}/.credentials.yml")
|
81
|
+
|
82
|
+
secret_providers = {
|
83
|
+
'local': LocalSecretProvider(),
|
84
|
+
'env': EnvSecretProvider(env_getter),
|
85
|
+
'databricks': DatabricksSecretProvider(),
|
86
|
+
}
|
87
|
+
|
88
|
+
loader = _load_credentials(file_path)
|
89
|
+
return CredentialManager(loader, secret_providers)
|
@@ -0,0 +1,98 @@
|
|
1
|
+
import logging
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from sqlalchemy import create_engine
|
6
|
+
from sqlalchemy.engine import Engine, Result, URL
|
7
|
+
from sqlalchemy.orm import sessionmaker
|
8
|
+
from sqlalchemy import text
|
9
|
+
from sqlalchemy.exc import OperationalError
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
logger.setLevel("INFO")
|
13
|
+
|
14
|
+
|
15
|
+
class DatabaseConnector(ABC):
|
16
|
+
@abstractmethod
|
17
|
+
def _connect(self) -> Engine:
|
18
|
+
pass
|
19
|
+
|
20
|
+
@abstractmethod
|
21
|
+
def execute_query(self, query: str) -> Result[Any]:
|
22
|
+
pass
|
23
|
+
|
24
|
+
|
25
|
+
class _BaseConnector(DatabaseConnector):
|
26
|
+
def __init__(self, config: dict[str, Any]):
|
27
|
+
self.config = config
|
28
|
+
self.engine: Engine = self._connect()
|
29
|
+
|
30
|
+
def _connect(self) -> Engine:
|
31
|
+
raise NotImplementedError("Subclasses should implement this method")
|
32
|
+
|
33
|
+
def execute_query(self, query: str) -> Result[Any]:
|
34
|
+
if not self.engine:
|
35
|
+
raise ConnectionError("Not connected to the database.")
|
36
|
+
session = sessionmaker(bind=self.engine)
|
37
|
+
connection = session()
|
38
|
+
return connection.execute(text(query))
|
39
|
+
|
40
|
+
|
41
|
+
def _create_connector(db_type: str, config: dict[str, Any]) -> DatabaseConnector:
|
42
|
+
connectors = {
|
43
|
+
"snowflake": SnowflakeConnector,
|
44
|
+
"mssql": MSSQLConnector,
|
45
|
+
"tsql": MSSQLConnector,
|
46
|
+
"synapse": MSSQLConnector,
|
47
|
+
}
|
48
|
+
|
49
|
+
connector_class = connectors.get(db_type.lower())
|
50
|
+
|
51
|
+
if connector_class is None:
|
52
|
+
raise ValueError(f"Unsupported database type: {db_type}")
|
53
|
+
|
54
|
+
return connector_class(config)
|
55
|
+
|
56
|
+
|
57
|
+
class SnowflakeConnector(_BaseConnector):
|
58
|
+
def _connect(self) -> Engine:
|
59
|
+
raise NotImplementedError("Snowflake connector not implemented")
|
60
|
+
|
61
|
+
|
62
|
+
class MSSQLConnector(_BaseConnector):
|
63
|
+
def _connect(self) -> Engine:
|
64
|
+
query_params = {"driver": self.config['driver']}
|
65
|
+
|
66
|
+
for key, value in self.config.items():
|
67
|
+
if key not in ["user", "password", "server", "database", "port"]:
|
68
|
+
query_params[key] = value
|
69
|
+
connection_string = URL.create(
|
70
|
+
"mssql+pyodbc",
|
71
|
+
username=self.config['user'],
|
72
|
+
password=self.config['password'],
|
73
|
+
host=self.config['server'],
|
74
|
+
port=self.config.get('port', 1433),
|
75
|
+
database=self.config['database'],
|
76
|
+
query=query_params,
|
77
|
+
)
|
78
|
+
return create_engine(connection_string)
|
79
|
+
|
80
|
+
|
81
|
+
class DatabaseManager:
|
82
|
+
def __init__(self, db_type: str, config: dict[str, Any]):
|
83
|
+
self.connector = _create_connector(db_type, config)
|
84
|
+
|
85
|
+
def execute_query(self, query: str) -> Result[Any]:
|
86
|
+
try:
|
87
|
+
return self.connector.execute_query(query)
|
88
|
+
except OperationalError:
|
89
|
+
logger.error("Error connecting to the database check credentials")
|
90
|
+
raise ConnectionError("Error connecting to the database check credentials") from None
|
91
|
+
|
92
|
+
def check_connection(self) -> bool:
|
93
|
+
query = "SELECT 101 AS test_column"
|
94
|
+
result = self.execute_query(query)
|
95
|
+
row = result.fetchone()
|
96
|
+
if row is None:
|
97
|
+
return False
|
98
|
+
return row[0] == 101
|
@@ -0,0 +1,13 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
|
4
|
+
class EnvGetter:
|
5
|
+
"""Standardised inorder to support testing Capabilities, check debug_envgetter.py"""
|
6
|
+
|
7
|
+
def __init__(self):
|
8
|
+
self.env = dict(os.environ)
|
9
|
+
|
10
|
+
def get(self, key: str) -> str:
|
11
|
+
if key in self.env:
|
12
|
+
return self.env[key]
|
13
|
+
raise KeyError(f"not in env: {key}")
|
File without changes
|
@@ -0,0 +1,133 @@
|
|
1
|
+
import logging
|
2
|
+
from functools import cached_property
|
3
|
+
|
4
|
+
from databricks.labs.blueprint.installation import Installation
|
5
|
+
from databricks.labs.blueprint.installer import InstallState
|
6
|
+
from databricks.labs.blueprint.upgrades import Upgrades
|
7
|
+
from databricks.labs.blueprint.tui import Prompts
|
8
|
+
from databricks.labs.blueprint.wheels import ProductInfo
|
9
|
+
from databricks.labs.lsql.backends import SqlBackend, StatementExecutionBackend
|
10
|
+
from databricks.sdk import WorkspaceClient
|
11
|
+
from databricks.sdk.config import Config
|
12
|
+
from databricks.sdk.errors import NotFound
|
13
|
+
from databricks.sdk.service.iam import User
|
14
|
+
|
15
|
+
from databricks.labs.lakebridge.config import TranspileConfig, ReconcileConfig, RemorphConfigs
|
16
|
+
from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
|
17
|
+
from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment
|
18
|
+
from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
|
19
|
+
from databricks.labs.lakebridge.deployment.recon import TableDeployment, JobDeployment, ReconDeployment
|
20
|
+
from databricks.labs.lakebridge.helpers.metastore import CatalogOperations
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
|
25
|
+
class ApplicationContext:
|
26
|
+
def __init__(self, ws: WorkspaceClient):
|
27
|
+
self._ws = ws
|
28
|
+
|
29
|
+
def replace(self, **kwargs):
|
30
|
+
"""Replace cached properties for unit testing purposes."""
|
31
|
+
for key, value in kwargs.items():
|
32
|
+
self.__dict__[key] = value
|
33
|
+
return self
|
34
|
+
|
35
|
+
@cached_property
|
36
|
+
def workspace_client(self) -> WorkspaceClient:
|
37
|
+
return self._ws
|
38
|
+
|
39
|
+
@cached_property
|
40
|
+
def current_user(self) -> User:
|
41
|
+
return self.workspace_client.current_user.me()
|
42
|
+
|
43
|
+
@cached_property
|
44
|
+
def product_info(self) -> ProductInfo:
|
45
|
+
return ProductInfo.from_class(RemorphConfigs)
|
46
|
+
|
47
|
+
@cached_property
|
48
|
+
def installation(self) -> Installation:
|
49
|
+
return Installation.assume_user_home(self.workspace_client, self.product_info.product_name())
|
50
|
+
|
51
|
+
@cached_property
|
52
|
+
def transpile_config(self) -> TranspileConfig | None:
|
53
|
+
try:
|
54
|
+
return self.installation.load(TranspileConfig)
|
55
|
+
except NotFound as err:
|
56
|
+
logger.debug(f"Couldn't find existing `transpile` installation: {err}")
|
57
|
+
return None
|
58
|
+
|
59
|
+
@cached_property
|
60
|
+
def recon_config(self) -> ReconcileConfig | None:
|
61
|
+
try:
|
62
|
+
return self.installation.load(ReconcileConfig)
|
63
|
+
except NotFound as err:
|
64
|
+
logger.debug(f"Couldn't find existing `reconcile` installation: {err}")
|
65
|
+
return None
|
66
|
+
|
67
|
+
@cached_property
|
68
|
+
def remorph_config(self) -> RemorphConfigs:
|
69
|
+
return RemorphConfigs(transpile=self.transpile_config, reconcile=self.recon_config)
|
70
|
+
|
71
|
+
@cached_property
|
72
|
+
def connect_config(self) -> Config:
|
73
|
+
return self.workspace_client.config
|
74
|
+
|
75
|
+
@cached_property
|
76
|
+
def install_state(self) -> InstallState:
|
77
|
+
return InstallState.from_installation(self.installation)
|
78
|
+
|
79
|
+
@cached_property
|
80
|
+
def sql_backend(self) -> SqlBackend:
|
81
|
+
# Installer to use only StatementExecutionBackend to eliminate the need for Databricks Connect
|
82
|
+
return StatementExecutionBackend(self.workspace_client, self.connect_config.warehouse_id)
|
83
|
+
|
84
|
+
@cached_property
|
85
|
+
def catalog_operations(self) -> CatalogOperations:
|
86
|
+
return CatalogOperations(self.workspace_client)
|
87
|
+
|
88
|
+
@cached_property
|
89
|
+
def prompts(self) -> Prompts:
|
90
|
+
return Prompts()
|
91
|
+
|
92
|
+
@cached_property
|
93
|
+
def resource_configurator(self) -> ResourceConfigurator:
|
94
|
+
return ResourceConfigurator(self.workspace_client, self.prompts, self.catalog_operations)
|
95
|
+
|
96
|
+
@cached_property
|
97
|
+
def table_deployment(self) -> TableDeployment:
|
98
|
+
return TableDeployment(self.sql_backend)
|
99
|
+
|
100
|
+
@cached_property
|
101
|
+
def job_deployment(self) -> JobDeployment:
|
102
|
+
return JobDeployment(self.workspace_client, self.installation, self.install_state, self.product_info)
|
103
|
+
|
104
|
+
@cached_property
|
105
|
+
def dashboard_deployment(self) -> DashboardDeployment:
|
106
|
+
return DashboardDeployment(self.workspace_client, self.installation, self.install_state)
|
107
|
+
|
108
|
+
@cached_property
|
109
|
+
def recon_deployment(self) -> ReconDeployment:
|
110
|
+
return ReconDeployment(
|
111
|
+
self.workspace_client,
|
112
|
+
self.installation,
|
113
|
+
self.install_state,
|
114
|
+
self.product_info,
|
115
|
+
self.table_deployment,
|
116
|
+
self.job_deployment,
|
117
|
+
self.dashboard_deployment,
|
118
|
+
)
|
119
|
+
|
120
|
+
@cached_property
|
121
|
+
def workspace_installation(self) -> WorkspaceInstallation:
|
122
|
+
return WorkspaceInstallation(
|
123
|
+
self.workspace_client,
|
124
|
+
self.prompts,
|
125
|
+
self.installation,
|
126
|
+
self.recon_deployment,
|
127
|
+
self.product_info,
|
128
|
+
self.upgrades,
|
129
|
+
)
|
130
|
+
|
131
|
+
@cached_property
|
132
|
+
def upgrades(self):
|
133
|
+
return Upgrades(self.product_info, self.installation)
|
File without changes
|
@@ -0,0 +1,223 @@
|
|
1
|
+
# pylint: disable=all
|
2
|
+
import collections
|
3
|
+
import dataclasses
|
4
|
+
import json
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import subprocess
|
8
|
+
import time
|
9
|
+
from collections.abc import Generator
|
10
|
+
from datetime import datetime, timezone
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import TextIO, List
|
13
|
+
|
14
|
+
import sqlglot
|
15
|
+
from sqlglot.expressions import Expression
|
16
|
+
from sqlglot.dialects.dialect import Dialect
|
17
|
+
from sqlglot.dialects.databricks import Databricks
|
18
|
+
from sqlglot.errors import ErrorLevel
|
19
|
+
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
@dataclasses.dataclass
|
24
|
+
class ReportEntry:
|
25
|
+
project: str
|
26
|
+
commit_hash: str | None
|
27
|
+
version: str
|
28
|
+
timestamp: str
|
29
|
+
source_dialect: str
|
30
|
+
target_dialect: str
|
31
|
+
file: str
|
32
|
+
parsed: int = 0 # 1 for success, 0 for failure
|
33
|
+
statements: int = 0 # number of statements parsed
|
34
|
+
transpiled: int = 0 # 1 for success, 0 for failure
|
35
|
+
transpiled_statements: int = 0 # number of statements transpiled
|
36
|
+
failures: List[dict] = dataclasses.field(default_factory=lambda: [])
|
37
|
+
|
38
|
+
|
39
|
+
def sqlglot_run_coverage(dialect, subfolder):
|
40
|
+
input_dir = get_env_var("INPUT_DIR_PARENT", required=True)
|
41
|
+
output_dir = get_env_var("OUTPUT_DIR", required=True)
|
42
|
+
sqlglot_version = sqlglot.__version__
|
43
|
+
SQLGLOT_COMMIT_HASH = "" # C0103 pylint
|
44
|
+
|
45
|
+
if not input_dir:
|
46
|
+
raise ValueError("Environment variable `INPUT_DIR_PARENT` is required")
|
47
|
+
if not output_dir:
|
48
|
+
raise ValueError("Environment variable `OUTPUT_DIR` is required")
|
49
|
+
|
50
|
+
collect_transpilation_stats(
|
51
|
+
"SQLGlot",
|
52
|
+
SQLGLOT_COMMIT_HASH,
|
53
|
+
sqlglot_version,
|
54
|
+
dialect,
|
55
|
+
Databricks,
|
56
|
+
Path(input_dir) / subfolder,
|
57
|
+
Path(output_dir),
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
def local_report(output_dir: Path):
|
62
|
+
all = collections.defaultdict(list)
|
63
|
+
for file in output_dir.rglob("*.json"):
|
64
|
+
with file.open("r", encoding="utf8") as f:
|
65
|
+
for line in f:
|
66
|
+
raw = json.loads(line)
|
67
|
+
entry = ReportEntry(**raw)
|
68
|
+
all[(entry.project, entry.source_dialect)].append(entry)
|
69
|
+
for (project, dialect), entries in sorted(all.items()):
|
70
|
+
total = len(entries)
|
71
|
+
parsed = sum(entry.parsed for entry in entries)
|
72
|
+
transpiled = sum(entry.transpiled for entry in entries)
|
73
|
+
parse_ratio = parsed / total
|
74
|
+
transpile_ratio = transpiled / total
|
75
|
+
print(
|
76
|
+
f"{project} -> {dialect}: {parse_ratio:.2%} parsed ({parsed}/{total}), "
|
77
|
+
f"{transpile_ratio:.2%} transpiled ({transpiled}/{total})"
|
78
|
+
)
|
79
|
+
|
80
|
+
|
81
|
+
def get_supported_sql_files(input_dir: Path) -> Generator[Path, None, None]:
|
82
|
+
yield from filter(lambda item: item.is_file() and item.suffix.lower() in [".sql", ".ddl"], input_dir.rglob("*"))
|
83
|
+
|
84
|
+
|
85
|
+
def write_json_line(file: TextIO, content: ReportEntry):
|
86
|
+
json.dump(dataclasses.asdict(content), file)
|
87
|
+
file.write("\n")
|
88
|
+
|
89
|
+
|
90
|
+
def get_env_var(env_var: str, *, required: bool = False) -> str | None:
|
91
|
+
"""
|
92
|
+
Get the value of an environment variable.
|
93
|
+
|
94
|
+
:param env_var: The name of the environment variable to get the value of.
|
95
|
+
:param required: Indicates if the environment variable is required and raises a ValueError if it's not set.
|
96
|
+
:return: Returns the environment variable's value, or None if it's not set and not required.
|
97
|
+
"""
|
98
|
+
value = os.getenv(env_var)
|
99
|
+
if value is None and required:
|
100
|
+
message = f"Environment variable {env_var} is not set"
|
101
|
+
raise ValueError(message)
|
102
|
+
return value
|
103
|
+
|
104
|
+
|
105
|
+
def get_current_commit_hash() -> str | None:
|
106
|
+
try:
|
107
|
+
return (
|
108
|
+
subprocess.check_output(
|
109
|
+
["/usr/bin/git", "rev-parse", "--short", "HEAD"],
|
110
|
+
cwd=Path(__file__).resolve().parent,
|
111
|
+
)
|
112
|
+
.decode("ascii")
|
113
|
+
.strip()
|
114
|
+
)
|
115
|
+
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
116
|
+
logger.warning(f"Could not get the current commit hash. {e!s}")
|
117
|
+
return None
|
118
|
+
|
119
|
+
|
120
|
+
def get_current_time_utc() -> datetime:
|
121
|
+
return datetime.now(timezone.utc)
|
122
|
+
|
123
|
+
|
124
|
+
def parse_sql(sql: str, dialect: type[Dialect]) -> list[Expression]:
|
125
|
+
return [
|
126
|
+
expression for expression in sqlglot.parse(sql, read=dialect, error_level=ErrorLevel.IMMEDIATE) if expression
|
127
|
+
]
|
128
|
+
|
129
|
+
|
130
|
+
def generate_sql(expressions: list[Expression], dialect: type[Dialect]) -> list[str]:
|
131
|
+
generator_dialect = Dialect.get_or_raise(dialect)
|
132
|
+
return [generator_dialect.generate(expression, copy=False) for expression in expressions if expression]
|
133
|
+
|
134
|
+
|
135
|
+
def _ensure_valid_io_paths(input_dir: Path, result_dir: Path):
|
136
|
+
if not input_dir.exists() or not input_dir.is_dir():
|
137
|
+
message = f"The input path {input_dir} doesn't exist or is not a directory"
|
138
|
+
raise NotADirectoryError(message)
|
139
|
+
|
140
|
+
if not result_dir.exists():
|
141
|
+
logger.info(f"Creating the output directory {result_dir}")
|
142
|
+
result_dir.mkdir(parents=True)
|
143
|
+
elif not result_dir.is_dir():
|
144
|
+
message = f"The output path {result_dir} exists but is not a directory"
|
145
|
+
raise NotADirectoryError(message)
|
146
|
+
|
147
|
+
|
148
|
+
def _get_report_file_path(
|
149
|
+
project: str,
|
150
|
+
source_dialect: type[Dialect],
|
151
|
+
target_dialect: type[Dialect],
|
152
|
+
result_dir: Path,
|
153
|
+
) -> Path:
|
154
|
+
source_dialect_name = source_dialect.__name__
|
155
|
+
target_dialect_name = target_dialect.__name__
|
156
|
+
current_time_ns = time.time_ns()
|
157
|
+
return result_dir / f"{project}_{source_dialect_name}_{target_dialect_name}_{current_time_ns}.json".lower()
|
158
|
+
|
159
|
+
|
160
|
+
def _prepare_report_entry(
|
161
|
+
project: str,
|
162
|
+
commit_hash: str,
|
163
|
+
version: str,
|
164
|
+
source_dialect: type[Dialect],
|
165
|
+
target_dialect: type[Dialect],
|
166
|
+
file_path: str,
|
167
|
+
sql: str,
|
168
|
+
) -> ReportEntry:
|
169
|
+
report_entry = ReportEntry(
|
170
|
+
project=project,
|
171
|
+
commit_hash=commit_hash,
|
172
|
+
version=version,
|
173
|
+
timestamp=get_current_time_utc().isoformat(),
|
174
|
+
source_dialect=source_dialect.__name__,
|
175
|
+
target_dialect=target_dialect.__name__,
|
176
|
+
file=file_path,
|
177
|
+
)
|
178
|
+
try:
|
179
|
+
expressions = parse_sql(sql, source_dialect)
|
180
|
+
report_entry.parsed = 1
|
181
|
+
report_entry.statements = len(expressions)
|
182
|
+
except Exception as pe:
|
183
|
+
report_entry.failures.append({'error_code': type(pe).__name__, 'error_message': repr(pe)})
|
184
|
+
return report_entry
|
185
|
+
|
186
|
+
try:
|
187
|
+
generated_sqls = generate_sql(expressions, target_dialect)
|
188
|
+
report_entry.transpiled = 1
|
189
|
+
report_entry.transpiled_statements = len([sql for sql in generated_sqls if sql.strip()])
|
190
|
+
except Exception as te:
|
191
|
+
report_entry.failures.append({'error_code': type(te).__name__, 'error_message': repr(te)})
|
192
|
+
|
193
|
+
return report_entry
|
194
|
+
|
195
|
+
|
196
|
+
def collect_transpilation_stats(
|
197
|
+
project: str,
|
198
|
+
commit_hash: str,
|
199
|
+
version: str,
|
200
|
+
source_dialect: type[Dialect],
|
201
|
+
target_dialect: type[Dialect],
|
202
|
+
input_dir: Path,
|
203
|
+
result_dir: Path,
|
204
|
+
):
|
205
|
+
_ensure_valid_io_paths(input_dir, result_dir)
|
206
|
+
report_file_path = _get_report_file_path(project, source_dialect, target_dialect, result_dir)
|
207
|
+
|
208
|
+
with report_file_path.open("w", encoding="utf8") as report_file:
|
209
|
+
for input_file in get_supported_sql_files(input_dir):
|
210
|
+
with input_file.open("r", encoding="utf-8-sig") as file:
|
211
|
+
sql = file.read()
|
212
|
+
|
213
|
+
file_path = str(input_file.absolute().relative_to(input_dir.parent.absolute()))
|
214
|
+
report_entry = _prepare_report_entry(
|
215
|
+
project,
|
216
|
+
commit_hash,
|
217
|
+
version,
|
218
|
+
source_dialect,
|
219
|
+
target_dialect,
|
220
|
+
file_path,
|
221
|
+
sql,
|
222
|
+
)
|
223
|
+
write_json_line(report_file, report_entry)
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from databricks.labs.blueprint.wheels import ProductInfo
|
4
|
+
from databricks.labs.lakebridge.coverage import commons
|
5
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.generator.databricks import Databricks
|
6
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.parsers.snowflake import Snowflake
|
7
|
+
|
8
|
+
if __name__ == "__main__":
|
9
|
+
input_dir = commons.get_env_var("INPUT_DIR_PARENT", required=True)
|
10
|
+
output_dir = commons.get_env_var("OUTPUT_DIR", required=True)
|
11
|
+
|
12
|
+
REMORPH_COMMIT_HASH = commons.get_current_commit_hash() or "" # C0103 pylint
|
13
|
+
product_info = ProductInfo(__file__)
|
14
|
+
remorph_version = product_info.unreleased_version()
|
15
|
+
|
16
|
+
if not input_dir:
|
17
|
+
raise ValueError("Environment variable `INPUT_DIR_PARENT` is required")
|
18
|
+
if not output_dir:
|
19
|
+
raise ValueError("Environment variable `OUTPUT_DIR` is required")
|
20
|
+
|
21
|
+
commons.collect_transpilation_stats(
|
22
|
+
"Remorph",
|
23
|
+
REMORPH_COMMIT_HASH,
|
24
|
+
remorph_version,
|
25
|
+
Snowflake,
|
26
|
+
Databricks,
|
27
|
+
Path(input_dir) / 'snowflake',
|
28
|
+
Path(output_dir),
|
29
|
+
)
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
from databricks.labs.lakebridge.coverage import commons
|
4
|
+
|
5
|
+
if __name__ == "__main__":
|
6
|
+
output_dir = commons.get_env_var("OUTPUT_DIR", required=True)
|
7
|
+
if not output_dir:
|
8
|
+
raise ValueError("Environment variable `OUTPUT_DIR` is required")
|
9
|
+
commons.local_report(Path(output_dir))
|
File without changes
|