databricks-labs-lakebridge 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/__init__.py +3 -0
- databricks/labs/__init__.py +3 -0
- databricks/labs/lakebridge/__about__.py +2 -0
- databricks/labs/lakebridge/__init__.py +11 -0
- databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks/labs/lakebridge/base_install.py +12 -0
- databricks/labs/lakebridge/cli.py +449 -0
- databricks/labs/lakebridge/config.py +192 -0
- databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks/labs/lakebridge/install.py +849 -0
- databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks/labs/lakebridge/lineage.py +42 -0
- databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks/labs/lakebridge/uninstall.py +28 -0
- databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
- databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
- databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
- databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
- docs/lakebridge/src/components/Button.tsx +81 -0
- docs/lakebridge/src/css/custom.css +167 -0
- docs/lakebridge/src/css/table.css +20 -0
- docs/lakebridge/src/pages/index.tsx +57 -0
- docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
import logging
|
2
|
+
from importlib.resources import files
|
3
|
+
|
4
|
+
from databricks.labs.blueprint.installation import Installation
|
5
|
+
from databricks.labs.blueprint.installer import InstallState
|
6
|
+
from databricks.labs.blueprint.wheels import ProductInfo
|
7
|
+
from databricks.labs.blueprint.wheels import find_project_root
|
8
|
+
from databricks.sdk import WorkspaceClient
|
9
|
+
from databricks.sdk.errors import InvalidParameterValue, NotFound
|
10
|
+
|
11
|
+
import databricks.labs.lakebridge.resources
|
12
|
+
from databricks.labs.lakebridge.config import ReconcileConfig
|
13
|
+
from databricks.labs.lakebridge.deployment.dashboard import DashboardDeployment
|
14
|
+
from databricks.labs.lakebridge.deployment.job import JobDeployment
|
15
|
+
from databricks.labs.lakebridge.deployment.table import TableDeployment
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
_RECON_PREFIX = "Reconciliation"
|
20
|
+
RECON_JOB_NAME = f"{_RECON_PREFIX} Runner"
|
21
|
+
|
22
|
+
|
23
|
+
class ReconDeployment:
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
ws: WorkspaceClient,
|
27
|
+
installation: Installation,
|
28
|
+
install_state: InstallState,
|
29
|
+
product_info: ProductInfo,
|
30
|
+
table_deployer: TableDeployment,
|
31
|
+
job_deployer: JobDeployment,
|
32
|
+
dashboard_deployer: DashboardDeployment,
|
33
|
+
):
|
34
|
+
self._ws = ws
|
35
|
+
self._installation = installation
|
36
|
+
self._install_state = install_state
|
37
|
+
self._product_info = product_info
|
38
|
+
self._table_deployer = table_deployer
|
39
|
+
self._job_deployer = job_deployer
|
40
|
+
self._dashboard_deployer = dashboard_deployer
|
41
|
+
|
42
|
+
def install(self, recon_config: ReconcileConfig | None, wheel_paths: list[str]):
|
43
|
+
if not recon_config:
|
44
|
+
logger.warning("Recon Config is empty.")
|
45
|
+
return
|
46
|
+
logger.info("Installing reconcile components.")
|
47
|
+
self._deploy_tables(recon_config)
|
48
|
+
self._deploy_dashboards(recon_config)
|
49
|
+
remorph_wheel_path = [whl for whl in wheel_paths if "remorph" in whl][0]
|
50
|
+
self._deploy_jobs(recon_config, remorph_wheel_path)
|
51
|
+
self._install_state.save()
|
52
|
+
logger.info("Installation of reconcile components completed successfully.")
|
53
|
+
|
54
|
+
def uninstall(self, recon_config: ReconcileConfig | None):
|
55
|
+
if not recon_config:
|
56
|
+
return
|
57
|
+
logger.info("Uninstalling reconcile components.")
|
58
|
+
self._remove_dashboards()
|
59
|
+
self._remove_jobs()
|
60
|
+
logging.info(
|
61
|
+
f"Won't remove reconcile metadata schema `{recon_config.metadata_config.schema}` "
|
62
|
+
f"from catalog `{recon_config.metadata_config.catalog}`. Please remove it and the tables inside manually."
|
63
|
+
)
|
64
|
+
logging.info(
|
65
|
+
f"Won't remove configured reconcile secret scope `{recon_config.secret_scope}`. "
|
66
|
+
f"Please remove it manually."
|
67
|
+
)
|
68
|
+
|
69
|
+
def _deploy_tables(self, recon_config: ReconcileConfig):
|
70
|
+
logger.info("Deploying reconciliation metadata tables.")
|
71
|
+
catalog = recon_config.metadata_config.catalog
|
72
|
+
schema = recon_config.metadata_config.schema
|
73
|
+
resources = files(databricks.labs.lakebridge.resources)
|
74
|
+
query_dir = resources.joinpath("reconcile/queries/installation")
|
75
|
+
|
76
|
+
sqls_to_deploy = [
|
77
|
+
"main.sql",
|
78
|
+
"metrics.sql",
|
79
|
+
"details.sql",
|
80
|
+
"aggregate_metrics.sql",
|
81
|
+
"aggregate_details.sql",
|
82
|
+
"aggregate_rules.sql",
|
83
|
+
]
|
84
|
+
|
85
|
+
for sql_file in sqls_to_deploy:
|
86
|
+
table_sql_file = query_dir.joinpath(sql_file)
|
87
|
+
self._table_deployer.deploy_table_from_ddl_file(catalog, schema, sql_file.strip(".sql"), table_sql_file)
|
88
|
+
|
89
|
+
def _deploy_dashboards(self, recon_config: ReconcileConfig):
|
90
|
+
logger.info("Deploying reconciliation dashboards.")
|
91
|
+
dashboard_base_dir = (
|
92
|
+
find_project_root(__file__) / "src/databricks/labs/lakebridge/resources/reconcile/dashboards"
|
93
|
+
)
|
94
|
+
self._dashboard_deployer.deploy(dashboard_base_dir, recon_config)
|
95
|
+
|
96
|
+
def _get_dashboards(self) -> list[tuple[str, str]]:
|
97
|
+
return list(self._install_state.dashboards.items())
|
98
|
+
|
99
|
+
def _remove_dashboards(self):
|
100
|
+
logger.info("Removing reconciliation dashboards.")
|
101
|
+
for dashboard_ref, dashboard_id in self._get_dashboards():
|
102
|
+
try:
|
103
|
+
logger.info(f"Removing dashboard with id={dashboard_id}.")
|
104
|
+
del self._install_state.dashboards[dashboard_ref]
|
105
|
+
self._ws.lakeview.trash(dashboard_id)
|
106
|
+
except (InvalidParameterValue, NotFound):
|
107
|
+
logger.warning(f"Dashboard with id={dashboard_id} doesn't exist anymore for some reason.")
|
108
|
+
continue
|
109
|
+
|
110
|
+
def _deploy_jobs(self, recon_config: ReconcileConfig, remorph_wheel_path: str):
|
111
|
+
logger.info("Deploying reconciliation jobs.")
|
112
|
+
self._job_deployer.deploy_recon_job(RECON_JOB_NAME, recon_config, remorph_wheel_path)
|
113
|
+
for job_name, job_id in self._get_deprecated_jobs():
|
114
|
+
try:
|
115
|
+
logger.info(f"Removing job_id={job_id}, as it is no longer needed.")
|
116
|
+
del self._install_state.jobs[job_name]
|
117
|
+
self._ws.jobs.delete(job_id)
|
118
|
+
except (InvalidParameterValue, NotFound):
|
119
|
+
logger.warning(f"{job_name} doesn't exist anymore for some reason.")
|
120
|
+
continue
|
121
|
+
|
122
|
+
def _get_jobs(self) -> list[tuple[str, int]]:
|
123
|
+
return [
|
124
|
+
(job_name, int(job_id))
|
125
|
+
for job_name, job_id in self._install_state.jobs.items()
|
126
|
+
if job_name.startswith(_RECON_PREFIX)
|
127
|
+
]
|
128
|
+
|
129
|
+
def _get_deprecated_jobs(self) -> list[tuple[str, int]]:
|
130
|
+
return [
|
131
|
+
(job_name, int(job_id))
|
132
|
+
for job_name, job_id in self._install_state.jobs.items()
|
133
|
+
if job_name.startswith(_RECON_PREFIX) and job_name != RECON_JOB_NAME
|
134
|
+
]
|
135
|
+
|
136
|
+
def _remove_jobs(self):
|
137
|
+
logger.info("Removing Reconciliation Jobs.")
|
138
|
+
for job_name, job_id in self._get_jobs():
|
139
|
+
try:
|
140
|
+
logger.info(f"Removing job {job_name} with job_id={job_id}.")
|
141
|
+
del self._install_state.jobs[job_name]
|
142
|
+
self._ws.jobs.delete(int(job_id))
|
143
|
+
except (InvalidParameterValue, NotFound):
|
144
|
+
logger.warning(f"{job_name} doesn't exist anymore for some reason.")
|
145
|
+
continue
|
@@ -0,0 +1,30 @@
|
|
1
|
+
import logging
|
2
|
+
from importlib.abc import Traversable
|
3
|
+
|
4
|
+
from databricks.labs.lsql.backends import SqlBackend
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
|
9
|
+
class TableDeployment:
|
10
|
+
def __init__(self, sql_backend: SqlBackend):
|
11
|
+
self._sql_backend = sql_backend
|
12
|
+
|
13
|
+
def deploy_table_from_ddl_file(
|
14
|
+
self,
|
15
|
+
catalog: str,
|
16
|
+
schema: str,
|
17
|
+
table_name: str,
|
18
|
+
ddl_query_filepath: Traversable,
|
19
|
+
):
|
20
|
+
"""
|
21
|
+
Deploys a table to the given catalog and schema
|
22
|
+
:param catalog: The table catalog
|
23
|
+
:param schema: The table schema
|
24
|
+
:param table_name: The table to deploy
|
25
|
+
:param ddl_query_filepath: DDL file path
|
26
|
+
"""
|
27
|
+
query = ddl_query_filepath.read_text()
|
28
|
+
logger.info(f"Deploying table {table_name} in {catalog}.{schema}")
|
29
|
+
logger.info(f"SQL Backend used for deploying table: {type(self._sql_backend).__name__}")
|
30
|
+
self._sql_backend.execute(query, catalog=catalog, schema=schema)
|
@@ -0,0 +1,124 @@
|
|
1
|
+
import logging
|
2
|
+
import re
|
3
|
+
from importlib.resources import files
|
4
|
+
|
5
|
+
import databricks.labs.lakebridge.resources
|
6
|
+
|
7
|
+
from databricks.labs.blueprint.tui import Prompts
|
8
|
+
from databricks.sdk import WorkspaceClient
|
9
|
+
from databricks.labs.lakebridge.helpers import db_sql
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
def replace_patterns(sql_text: str) -> str:
|
15
|
+
"""
|
16
|
+
Replace the STRUCT and MAP datatypes in the SQL text with empty string
|
17
|
+
"""
|
18
|
+
# Pattern to match nested STRUCT and MAP datatypes
|
19
|
+
pattern = r'(STRUCT<[^<>]*?(?:<[^<>]*?>[^<>]*?)*>|MAP<[^<>]*?(?:<[^<>]*?>[^<>]*?)*>)'
|
20
|
+
parsed_sql_text = re.sub(pattern, "", sql_text, flags=re.DOTALL)
|
21
|
+
return parsed_sql_text
|
22
|
+
|
23
|
+
|
24
|
+
def extract_columns_with_datatype(sql_text: str) -> list[str]:
|
25
|
+
"""
|
26
|
+
Extract the columns with datatype from the SQL text
|
27
|
+
Example:
|
28
|
+
Input: CREATE TABLE main (
|
29
|
+
recon_table_id BIGINT NOT NULL,
|
30
|
+
report_type STRING NOT NULL
|
31
|
+
);
|
32
|
+
Output: [recon_table_id BIGINT NOT NULL,
|
33
|
+
report_type STRING NOT NULL]
|
34
|
+
"""
|
35
|
+
return sql_text[sql_text.index("(") + 1 : sql_text.index(")")].strip().split(",")
|
36
|
+
|
37
|
+
|
38
|
+
def extract_column_name(column_with_datatype: str) -> str:
|
39
|
+
"""
|
40
|
+
Extract the column name from the column with datatype.
|
41
|
+
Example:
|
42
|
+
Input: \n recon_table_id BIGINT NOT NULL,
|
43
|
+
Output: recon_table_id
|
44
|
+
"""
|
45
|
+
return column_with_datatype.strip("\n").strip().split(" ")[0]
|
46
|
+
|
47
|
+
|
48
|
+
def table_original_query(table_name: str, full_table_name: str) -> str:
|
49
|
+
"""
|
50
|
+
Get the main table DDL from the main.sql file
|
51
|
+
:return: str
|
52
|
+
"""
|
53
|
+
resources = files(databricks.labs.lakebridge.resources)
|
54
|
+
query_dir = resources.joinpath("reconcile/queries/installation")
|
55
|
+
return (
|
56
|
+
query_dir.joinpath(f"{table_name}.sql")
|
57
|
+
.read_text()
|
58
|
+
.replace(f"CREATE TABLE IF NOT EXISTS {table_name}", f"CREATE OR REPLACE TABLE {full_table_name}")
|
59
|
+
)
|
60
|
+
|
61
|
+
|
62
|
+
def current_table_columns(table_name: str, full_table_name: str) -> list[str]:
|
63
|
+
"""
|
64
|
+
Extract the column names from the main table DDL
|
65
|
+
:return: column_names: list[str]
|
66
|
+
"""
|
67
|
+
main_sql = replace_patterns(table_original_query(table_name, full_table_name))
|
68
|
+
main_table_columns = [
|
69
|
+
extract_column_name(main_table_column) for main_table_column in extract_columns_with_datatype(main_sql)
|
70
|
+
]
|
71
|
+
return main_table_columns
|
72
|
+
|
73
|
+
|
74
|
+
def installed_table_columns(ws: WorkspaceClient, table_identifier: str) -> list[str]:
|
75
|
+
"""
|
76
|
+
Fetch the column names from the installed table on Databricks Workspace using SQL Backend
|
77
|
+
:return: column_names: list[str]
|
78
|
+
"""
|
79
|
+
main_table_columns = list(db_sql.get_sql_backend(ws).fetch(f"DESC {table_identifier}"))
|
80
|
+
return [row.col_name for row in main_table_columns]
|
81
|
+
|
82
|
+
|
83
|
+
def check_table_mismatch(
|
84
|
+
installed_table,
|
85
|
+
current_table,
|
86
|
+
) -> bool:
|
87
|
+
# Compare the current main table columns with the installed main table columns
|
88
|
+
if len(installed_table) != len(current_table) or sorted(installed_table) != sorted(current_table):
|
89
|
+
return True
|
90
|
+
return False
|
91
|
+
|
92
|
+
|
93
|
+
def recreate_table_sql(
|
94
|
+
table_identifier: str,
|
95
|
+
installed_table: list[str],
|
96
|
+
current_table: list[str],
|
97
|
+
prompts: Prompts,
|
98
|
+
) -> str | None:
|
99
|
+
"""
|
100
|
+
* Verify all the current main table columns are present in the installed main table and then use CTAS to recreate the main table
|
101
|
+
* If any of the current main table columns are missing in the installed main table, prompt the user to recreate the main table:
|
102
|
+
- If the user confirms, recreate the main table using the main DDL file, else log an error message and exit
|
103
|
+
:param table_identifier:
|
104
|
+
:param installed_table:
|
105
|
+
:param current_table:
|
106
|
+
:param prompts:
|
107
|
+
:return:
|
108
|
+
"""
|
109
|
+
table_name = table_identifier.split('.')[-1]
|
110
|
+
sql: str | None = (
|
111
|
+
f"CREATE OR REPLACE TABLE {table_identifier} AS SELECT {','.join(current_table)} FROM {table_identifier}"
|
112
|
+
)
|
113
|
+
|
114
|
+
if not set(current_table).issubset(installed_table):
|
115
|
+
if prompts.confirm(
|
116
|
+
f"The `{table_identifier}` table columns are not as expected. Do you want to recreate the `{table_identifier}` table?"
|
117
|
+
):
|
118
|
+
sql = table_original_query(table_name, table_identifier)
|
119
|
+
else:
|
120
|
+
logger.error(
|
121
|
+
f"The `{table_identifier}` table columns are not as expected. Please check and recreate the `{table_identifier}` table."
|
122
|
+
)
|
123
|
+
sql = None
|
124
|
+
return sql
|
@@ -0,0 +1,36 @@
|
|
1
|
+
from dataclasses import dataclass, field
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class TableFQN:
|
7
|
+
catalog: str | None
|
8
|
+
schema: str
|
9
|
+
name: str
|
10
|
+
|
11
|
+
@property
|
12
|
+
def fqn(self) -> str:
|
13
|
+
if self.catalog:
|
14
|
+
return f"{self.catalog}.{self.schema}.{self.name}"
|
15
|
+
return f"{self.schema}.{self.name}"
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class FieldInfo:
|
20
|
+
name: str
|
21
|
+
data_type: str
|
22
|
+
nullable: bool | None = None
|
23
|
+
metadata: dict[str, Any] | None = None
|
24
|
+
comment: str | None = None
|
25
|
+
|
26
|
+
|
27
|
+
@dataclass
|
28
|
+
class TableDefinition:
|
29
|
+
fqn: TableFQN
|
30
|
+
location: str | None = None
|
31
|
+
table_format: str | None = None
|
32
|
+
view_text: str | None = None
|
33
|
+
columns: list[FieldInfo] = field(default_factory=list)
|
34
|
+
primary_keys: list[str] | None = None
|
35
|
+
size_gb: int | None = None
|
36
|
+
comment: str | None = None
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from collections.abc import Iterable
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from databricks.labs.lakebridge.discovery.table import TableDefinition
|
6
|
+
|
7
|
+
|
8
|
+
class TableDefinitionService(ABC):
|
9
|
+
|
10
|
+
def __init__(self, connection: Any):
|
11
|
+
self.connection = connection
|
12
|
+
|
13
|
+
@abstractmethod
|
14
|
+
def get_table_definition(self, catalog_name: str) -> Iterable[TableDefinition]:
|
15
|
+
pass
|
16
|
+
|
17
|
+
@abstractmethod
|
18
|
+
def _get_table_definition_query(self, catalog_name: str) -> str:
|
19
|
+
pass
|
20
|
+
|
21
|
+
@abstractmethod
|
22
|
+
def get_all_catalog(self) -> Iterable[str]:
|
23
|
+
pass
|
@@ -0,0 +1,185 @@
|
|
1
|
+
from collections.abc import Iterable
|
2
|
+
|
3
|
+
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
|
4
|
+
from databricks.labs.lakebridge.discovery.table import TableDefinition, TableFQN, FieldInfo
|
5
|
+
from databricks.labs.lakebridge.discovery.table_definition import TableDefinitionService
|
6
|
+
|
7
|
+
|
8
|
+
class TsqlTableDefinitionService(TableDefinitionService):
|
9
|
+
|
10
|
+
# Hexadecimal value of § is U+00A7.Hexadecimal value of ‡ (double dagger) is U+2021
|
11
|
+
def _get_table_definition_query(self, catalog_name: str) -> str:
|
12
|
+
query = f"""
|
13
|
+
WITH column_info AS (
|
14
|
+
SELECT
|
15
|
+
TABLE_CATALOG,
|
16
|
+
TABLE_SCHEMA,
|
17
|
+
TABLE_NAME,
|
18
|
+
STRING_AGG(
|
19
|
+
CONCAT(
|
20
|
+
column_name,
|
21
|
+
'§',
|
22
|
+
CASE
|
23
|
+
WHEN numeric_precision IS NOT NULL AND numeric_scale IS NOT NULL THEN CONCAT(data_type, '(', numeric_precision, ',', numeric_scale, ')')
|
24
|
+
WHEN LOWER(data_type) = 'text' THEN CONCAT('varchar', '(', CHARACTER_MAXIMUM_LENGTH, ')')
|
25
|
+
ELSE data_type
|
26
|
+
END,
|
27
|
+
'§',
|
28
|
+
CASE
|
29
|
+
WHEN cis.IS_NULLABLE = 'YES' THEN 'true'
|
30
|
+
ELSE 'false'
|
31
|
+
END,
|
32
|
+
'§',
|
33
|
+
ISNULL(CAST(ep_col.value AS NVARCHAR(MAX)), '')
|
34
|
+
),
|
35
|
+
'‡'
|
36
|
+
) WITHIN GROUP (ORDER BY ordinal_position) AS DERIVED_SCHEMA
|
37
|
+
FROM
|
38
|
+
{catalog_name}.sys.tables t
|
39
|
+
INNER JOIN {catalog_name}.sys.columns c ON t.object_id = c.object_id
|
40
|
+
INNER JOIN {catalog_name}.INFORMATION_SCHEMA.COLUMNS cis ON t.name = cis.TABLE_NAME AND c.name = cis.COLUMN_NAME
|
41
|
+
OUTER APPLY (
|
42
|
+
SELECT TOP 1 value
|
43
|
+
FROM {catalog_name}.sys.extended_properties
|
44
|
+
WHERE major_id = t.object_id AND minor_id = 0
|
45
|
+
ORDER BY name DESC
|
46
|
+
) ep_tbl
|
47
|
+
OUTER APPLY (
|
48
|
+
SELECT TOP 1 value
|
49
|
+
FROM {catalog_name}.sys.extended_properties
|
50
|
+
WHERE major_id = c.object_id AND minor_id = c.column_id
|
51
|
+
ORDER BY name DESC
|
52
|
+
) ep_col
|
53
|
+
GROUP BY
|
54
|
+
TABLE_CATALOG,
|
55
|
+
TABLE_SCHEMA,
|
56
|
+
TABLE_NAME
|
57
|
+
),
|
58
|
+
table_file_info AS (
|
59
|
+
SELECT
|
60
|
+
s.name AS TABLE_SCHEMA,
|
61
|
+
t.name AS TABLE_NAME,
|
62
|
+
f.physical_name AS location,
|
63
|
+
f.type_desc AS TABLE_FORMAT,
|
64
|
+
CAST(ROUND(SUM(a.used_pages) * 8.0 / 1024, 2) AS DECIMAL(18, 2)) AS SIZE_GB
|
65
|
+
FROM
|
66
|
+
{catalog_name}.sys.tables t
|
67
|
+
INNER JOIN {catalog_name}.sys.indexes i ON t.object_id = i.object_id
|
68
|
+
INNER JOIN {catalog_name}.sys.partitions p ON i.object_id = p.object_id AND i.index_id = p.index_id
|
69
|
+
INNER JOIN {catalog_name}.sys.allocation_units a ON p.partition_id = a.container_id
|
70
|
+
INNER JOIN {catalog_name}.sys.schemas s ON t.schema_id = s.schema_id
|
71
|
+
INNER JOIN {catalog_name}.sys.database_files f ON a.data_space_id = f.data_space_id
|
72
|
+
LEFT JOIN {catalog_name}.sys.extended_properties ep ON ep.major_id = t.object_id AND ep.minor_id = 0
|
73
|
+
GROUP BY
|
74
|
+
s.name,
|
75
|
+
t.name,
|
76
|
+
f.name,
|
77
|
+
f.physical_name,
|
78
|
+
f.type_desc
|
79
|
+
),
|
80
|
+
table_comment_info AS (
|
81
|
+
SELECT
|
82
|
+
s.name AS TABLE_SCHEMA,
|
83
|
+
t.name AS TABLE_NAME,
|
84
|
+
CAST(ep.value AS NVARCHAR(MAX)) AS TABLE_COMMENT
|
85
|
+
FROM
|
86
|
+
{catalog_name}.sys.tables t
|
87
|
+
INNER JOIN {catalog_name}.sys.schemas s ON t.schema_id = s.schema_id
|
88
|
+
OUTER APPLY (
|
89
|
+
SELECT TOP 1 value
|
90
|
+
FROM {catalog_name}.sys.extended_properties
|
91
|
+
WHERE major_id = t.object_id AND minor_id = 0
|
92
|
+
ORDER BY name DESC
|
93
|
+
) ep
|
94
|
+
),
|
95
|
+
table_pk_info AS (
|
96
|
+
SELECT
|
97
|
+
TC.TABLE_CATALOG,
|
98
|
+
TC.TABLE_SCHEMA,
|
99
|
+
TC.TABLE_NAME,
|
100
|
+
STRING_AGG(KU.COLUMN_NAME,':') as PK_COLUMN_NAME
|
101
|
+
FROM {catalog_name}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS TC
|
102
|
+
JOIN {catalog_name}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KU
|
103
|
+
ON TC.CONSTRAINT_NAME = KU.CONSTRAINT_NAME
|
104
|
+
AND TC.TABLE_NAME = KU.TABLE_NAME
|
105
|
+
WHERE TC.CONSTRAINT_TYPE = 'PRIMARY KEY' group by TC.TABLE_CATALOG, TC.TABLE_SCHEMA, TC.TABLE_NAME)
|
106
|
+
SELECT
|
107
|
+
sft.TABLE_CATALOG,
|
108
|
+
sft.TABLE_SCHEMA,
|
109
|
+
sft.TABLE_NAME,
|
110
|
+
tfi.location,
|
111
|
+
tfi.TABLE_FORMAT,
|
112
|
+
'' as view_definition,
|
113
|
+
column_info.DERIVED_SCHEMA,
|
114
|
+
tfi.SIZE_GB,
|
115
|
+
tci.TABLE_COMMENT,
|
116
|
+
tpK.PK_COLUMN_NAME
|
117
|
+
FROM
|
118
|
+
column_info
|
119
|
+
JOIN {catalog_name}.INFORMATION_SCHEMA.TABLES sft ON column_info.TABLE_CATALOG = sft.TABLE_CATALOG AND column_info.TABLE_SCHEMA = sft.TABLE_SCHEMA AND column_info.TABLE_NAME = sft.TABLE_NAME
|
120
|
+
LEFT JOIN table_file_info tfi ON column_info.TABLE_SCHEMA = tfi.TABLE_SCHEMA AND column_info.TABLE_NAME = tfi.TABLE_NAME
|
121
|
+
LEFT JOIN table_comment_info tci ON column_info.TABLE_SCHEMA = tci.TABLE_SCHEMA AND column_info.TABLE_NAME = tci.TABLE_NAME
|
122
|
+
LEFT JOIN table_pk_info tpK ON column_info.TABLE_SCHEMA = tpK.TABLE_SCHEMA AND column_info.TABLE_NAME = tpK.TABLE_NAME
|
123
|
+
|
124
|
+
UNION ALL
|
125
|
+
SELECT
|
126
|
+
sfv.TABLE_CATALOG,
|
127
|
+
sfv.TABLE_SCHEMA,
|
128
|
+
sfv.TABLE_NAME,
|
129
|
+
'' location,
|
130
|
+
'' TABLE_FORMAT,
|
131
|
+
sfv.view_definition,
|
132
|
+
'' DERIVED_SCHEMA,
|
133
|
+
0 SIZE_GB,
|
134
|
+
'' TABLE_COMMENT,
|
135
|
+
'' PK_COLUMN_NAME
|
136
|
+
FROM {catalog_name}.INFORMATION_SCHEMA.VIEWS sfv
|
137
|
+
"""
|
138
|
+
return query
|
139
|
+
|
140
|
+
def get_table_definition(self, catalog_name: str) -> Iterable[TableDefinition]:
|
141
|
+
sql = self._get_table_definition_query(catalog_name)
|
142
|
+
tsql_connection = self.connection
|
143
|
+
result = tsql_connection.execute_query(sql)
|
144
|
+
|
145
|
+
column_names = list(result.keys())
|
146
|
+
table_definitions = []
|
147
|
+
|
148
|
+
for row in result:
|
149
|
+
result = dict(zip(column_names, row))
|
150
|
+
table_fqn = TableFQN(
|
151
|
+
catalog=result["TABLE_CATALOG"], schema=result["TABLE_SCHEMA"], name=result["TABLE_NAME"]
|
152
|
+
)
|
153
|
+
columns = result["DERIVED_SCHEMA"].split("‡") if result["DERIVED_SCHEMA"] else None
|
154
|
+
field_info = []
|
155
|
+
if columns is not None:
|
156
|
+
for column in columns:
|
157
|
+
column_info = column.split("§")
|
158
|
+
field = FieldInfo(
|
159
|
+
name=column_info[0],
|
160
|
+
data_type=column_info[1],
|
161
|
+
nullable=column_info[2],
|
162
|
+
comment=column_info[3],
|
163
|
+
)
|
164
|
+
field_info.append(field)
|
165
|
+
|
166
|
+
pks = result["PK_COLUMN_NAME"].split(":") if result["PK_COLUMN_NAME"] else None
|
167
|
+
table_definition = TableDefinition(
|
168
|
+
fqn=table_fqn,
|
169
|
+
location=result["location"],
|
170
|
+
table_format=result["TABLE_FORMAT"],
|
171
|
+
view_text=result["view_definition"],
|
172
|
+
columns=field_info,
|
173
|
+
size_gb=result["SIZE_GB"],
|
174
|
+
comment=result["TABLE_COMMENT"],
|
175
|
+
primary_keys=pks,
|
176
|
+
)
|
177
|
+
table_definitions.append(table_definition)
|
178
|
+
return table_definitions
|
179
|
+
|
180
|
+
def get_all_catalog(self) -> Iterable[str]:
|
181
|
+
cursor: DatabaseManager = self.connection
|
182
|
+
result = cursor.connector.execute_query("""select name from sys.databases""")
|
183
|
+
catalogs = [row[0] for row in result]
|
184
|
+
print(catalogs)
|
185
|
+
return catalogs
|
@@ -0,0 +1 @@
|
|
1
|
+
class IllegalStateException(BaseException): ...
|
File without changes
|
@@ -0,0 +1,24 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
|
4
|
+
from databricks.labs.lsql.backends import (
|
5
|
+
DatabricksConnectBackend,
|
6
|
+
RuntimeBackend,
|
7
|
+
SqlBackend,
|
8
|
+
StatementExecutionBackend,
|
9
|
+
)
|
10
|
+
from databricks.sdk import WorkspaceClient
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
def get_sql_backend(ws: WorkspaceClient, warehouse_id: str | None = None) -> SqlBackend:
|
16
|
+
warehouse_id = warehouse_id or ws.config.warehouse_id
|
17
|
+
if warehouse_id:
|
18
|
+
logger.info(f"Using SQL backend with warehouse_id: {warehouse_id}")
|
19
|
+
return StatementExecutionBackend(ws, warehouse_id)
|
20
|
+
if "DATABRICKS_RUNTIME_VERSION" in os.environ:
|
21
|
+
logger.info("Using SQL backend with Databricks Runtime.")
|
22
|
+
return RuntimeBackend()
|
23
|
+
logger.info("Using SQL backend with Databricks Connect.")
|
24
|
+
return DatabricksConnectBackend(ws)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
import inspect
|
2
|
+
import logging
|
3
|
+
import time
|
4
|
+
from functools import wraps
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
|
9
|
+
def timeit(func):
|
10
|
+
@wraps(func)
|
11
|
+
def timeit_wrapper(*args, **kwargs):
|
12
|
+
start_time = time.perf_counter()
|
13
|
+
result = func(*args, **kwargs)
|
14
|
+
end_time = time.perf_counter()
|
15
|
+
total_time = end_time - start_time
|
16
|
+
name = inspect.getmodule(func).__name__.split(".")[3].capitalize()
|
17
|
+
logger.info(f"{name} took {total_time:.4f} seconds")
|
18
|
+
return result
|
19
|
+
|
20
|
+
return timeit_wrapper
|
@@ -0,0 +1,64 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from collections.abc import Generator
|
3
|
+
|
4
|
+
|
5
|
+
def is_sql_file(file: str | Path) -> bool:
|
6
|
+
"""
|
7
|
+
Checks if the given file is a SQL file.
|
8
|
+
|
9
|
+
:param file: The name of the file to check.
|
10
|
+
:return: True if the file is a SQL file (i.e., its extension is either .sql or .ddl), False otherwise.
|
11
|
+
"""
|
12
|
+
file_extension = Path(file).suffix
|
13
|
+
return file_extension.lower() in {".sql", ".ddl"}
|
14
|
+
|
15
|
+
|
16
|
+
def is_dbt_project_file(file: Path):
|
17
|
+
# it's ok to hardcode the file name here, see https://docs.getdbt.com/reference/dbt_project.yml
|
18
|
+
return file.name == "dbt_project.yml"
|
19
|
+
|
20
|
+
|
21
|
+
def make_dir(path: str | Path) -> None:
|
22
|
+
"""
|
23
|
+
Creates a directory at the specified path if it does not already exist.
|
24
|
+
|
25
|
+
:param path: The path where the directory should be created.
|
26
|
+
"""
|
27
|
+
Path(path).mkdir(parents=True, exist_ok=True)
|
28
|
+
|
29
|
+
|
30
|
+
def dir_walk(root: Path):
|
31
|
+
"""
|
32
|
+
Walks the directory tree rooted at the given path, yielding a tuple containing the root directory, a list of
|
33
|
+
:param root: Path
|
34
|
+
:return: tuple of root, subdirectory , files
|
35
|
+
"""
|
36
|
+
sub_dirs = [d for d in root.iterdir() if d.is_dir()]
|
37
|
+
files = [f for f in root.iterdir() if f.is_file()]
|
38
|
+
yield root, sub_dirs, files
|
39
|
+
|
40
|
+
for each_dir in sub_dirs:
|
41
|
+
yield from dir_walk(each_dir)
|
42
|
+
|
43
|
+
|
44
|
+
def get_sql_file(input_path: str | Path) -> Generator[Path, None, None]:
|
45
|
+
"""
|
46
|
+
Returns Generator that yields the names of all SQL files in the given directory.
|
47
|
+
:param input_path: Path
|
48
|
+
:return: List of SQL files
|
49
|
+
"""
|
50
|
+
for _, _, files in dir_walk(Path(input_path)):
|
51
|
+
for filename in files:
|
52
|
+
if is_sql_file(filename):
|
53
|
+
yield filename
|
54
|
+
|
55
|
+
|
56
|
+
def read_file(filename: str | Path) -> str:
|
57
|
+
"""
|
58
|
+
Reads the contents of the given file and returns it as a string.
|
59
|
+
:param filename: Input File Path
|
60
|
+
:return: File Contents as String
|
61
|
+
"""
|
62
|
+
# pylint: disable=unspecified-encoding
|
63
|
+
with Path(filename).open() as file:
|
64
|
+
return file.read()
|