databricks-labs-lakebridge 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks_labs_lakebridge-0.10.0/.gitignore +24 -0
- databricks_labs_lakebridge-0.10.0/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0/NOTICE +42 -0
- databricks_labs_lakebridge-0.10.0/PKG-INFO +58 -0
- databricks_labs_lakebridge-0.10.0/README.md +19 -0
- databricks_labs_lakebridge-0.10.0/databricks/__init__.py +3 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/__init__.py +3 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/__about__.py +2 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/__init__.py +11 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/base_install.py +12 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/cli.py +449 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/config.py +192 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/install.py +849 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/lineage.py +42 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/uninstall.py +28 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/components/Button.tsx +81 -0
- databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/css/custom.css +167 -0
- databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/css/table.css +20 -0
- databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/pages/index.tsx +57 -0
- databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/theme/Layout/index.tsx +18 -0
- databricks_labs_lakebridge-0.10.0/pyproject.toml +768 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
.venv
|
2
|
+
.python-version
|
3
|
+
.sdkmanrc
|
4
|
+
.DS_Store
|
5
|
+
*.pyc
|
6
|
+
__pycache__
|
7
|
+
dist
|
8
|
+
.idea
|
9
|
+
/htmlcov/
|
10
|
+
*.iml
|
11
|
+
target/
|
12
|
+
.coverage
|
13
|
+
.coverage.*
|
14
|
+
coverage.*
|
15
|
+
*.iws
|
16
|
+
/core/gen/
|
17
|
+
/antlrlinter/gen/
|
18
|
+
*.tokens
|
19
|
+
spark-warehouse/
|
20
|
+
remorph_transpile/
|
21
|
+
/linter/gen/
|
22
|
+
/linter/src/main/antlr4/library/gen/
|
23
|
+
.databricks-login.json
|
24
|
+
.mypy_cache
|
@@ -0,0 +1,69 @@
|
|
1
|
+
Databricks License
|
2
|
+
Copyright (2024) Databricks, Inc.
|
3
|
+
|
4
|
+
Definitions.
|
5
|
+
|
6
|
+
Agreement: The agreement between Databricks, Inc., and you governing
|
7
|
+
the use of the Databricks Services, as that term is defined in
|
8
|
+
the Master Cloud Services Agreement (MCSA) located at
|
9
|
+
www.databricks.com/legal/mcsa.
|
10
|
+
|
11
|
+
Licensed Materials: The source code, object code, data, and/or other
|
12
|
+
works to which this license applies.
|
13
|
+
|
14
|
+
Scope of Use. You may not use the Licensed Materials except in
|
15
|
+
connection with your use of the Databricks Services pursuant to
|
16
|
+
the Agreement. Your use of the Licensed Materials must comply at all
|
17
|
+
times with any restrictions applicable to the Databricks Services,
|
18
|
+
generally, and must be used in accordance with any applicable
|
19
|
+
documentation. You may view, use, copy, modify, publish, and/or
|
20
|
+
distribute the Licensed Materials solely for the purposes of using
|
21
|
+
the Licensed Materials within or connecting to the Databricks Services.
|
22
|
+
If you do not agree to these terms, you may not view, use, copy,
|
23
|
+
modify, publish, and/or distribute the Licensed Materials.
|
24
|
+
|
25
|
+
Redistribution. You may redistribute and sublicense the Licensed
|
26
|
+
Materials so long as all use is in compliance with these terms.
|
27
|
+
In addition:
|
28
|
+
|
29
|
+
- You must give any other recipients a copy of this License;
|
30
|
+
- You must cause any modified files to carry prominent notices
|
31
|
+
stating that you changed the files;
|
32
|
+
- You must retain, in any derivative works that you distribute,
|
33
|
+
all copyright, patent, trademark, and attribution notices,
|
34
|
+
excluding those notices that do not pertain to any part of
|
35
|
+
the derivative works; and
|
36
|
+
- If a "NOTICE" text file is provided as part of its
|
37
|
+
distribution, then any derivative works that you distribute
|
38
|
+
must include a readable copy of the attribution notices
|
39
|
+
contained within such NOTICE file, excluding those notices
|
40
|
+
that do not pertain to any part of the derivative works.
|
41
|
+
|
42
|
+
You may add your own copyright statement to your modifications and may
|
43
|
+
provide additional license terms and conditions for use, reproduction,
|
44
|
+
or distribution of your modifications, or for any such derivative works
|
45
|
+
as a whole, provided your use, reproduction, and distribution of
|
46
|
+
the Licensed Materials otherwise complies with the conditions stated
|
47
|
+
in this License.
|
48
|
+
|
49
|
+
Termination. This license terminates automatically upon your breach of
|
50
|
+
these terms or upon the termination of your Agreement. Additionally,
|
51
|
+
Databricks may terminate this license at any time on notice. Upon
|
52
|
+
termination, you must permanently delete the Licensed Materials and
|
53
|
+
all copies thereof.
|
54
|
+
|
55
|
+
DISCLAIMER; LIMITATION OF LIABILITY.
|
56
|
+
|
57
|
+
THE LICENSED MATERIALS ARE PROVIDED “AS-IS” AND WITH ALL FAULTS.
|
58
|
+
DATABRICKS, ON BEHALF OF ITSELF AND ITS LICENSORS, SPECIFICALLY
|
59
|
+
DISCLAIMS ALL WARRANTIES RELATING TO THE LICENSED MATERIALS, EXPRESS
|
60
|
+
AND IMPLIED, INCLUDING, WITHOUT LIMITATION, IMPLIED WARRANTIES,
|
61
|
+
CONDITIONS AND OTHER TERMS OF MERCHANTABILITY, SATISFACTORY QUALITY OR
|
62
|
+
FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. DATABRICKS AND
|
63
|
+
ITS LICENSORS TOTAL AGGREGATE LIABILITY RELATING TO OR ARISING OUT OF
|
64
|
+
YOUR USE OF OR DATABRICKS’ PROVISIONING OF THE LICENSED MATERIALS SHALL
|
65
|
+
BE LIMITED TO ONE THOUSAND ($1,000) DOLLARS. IN NO EVENT SHALL
|
66
|
+
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
67
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
68
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED MATERIALS OR
|
69
|
+
THE USE OR OTHER DEALINGS IN THE LICENSED MATERIALS.
|
@@ -0,0 +1,42 @@
|
|
1
|
+
Copyright (2024) Databricks, Inc.
|
2
|
+
|
3
|
+
This software includes software developed at Databricks (https://www.databricks.com/) and its use is subject to the included LICENSE file.
|
4
|
+
|
5
|
+
____________________
|
6
|
+
This software contains code from the following open source projects, licensed under the MIT license (https://opensource.org/license/mit):
|
7
|
+
|
8
|
+
SQL Glot - https://github.com/tobymao/sqlglot
|
9
|
+
Copyright 2023 Toby Mao
|
10
|
+
|
11
|
+
Sqlalchemy - https://github.com/sqlalchemy/sq
|
12
|
+
Copyright 2005-2025 SQLAlchemy authors and contributors <see AUTHORS file>.
|
13
|
+
|
14
|
+
Duckdb - https://github.com/duckdb/duckdb
|
15
|
+
Copyright 2018-2025 Stichting DuckDB Foundation
|
16
|
+
|
17
|
+
____________________
|
18
|
+
This software contains code from the following open source projects, licensed under the Apache 2.0 license (https://www.apache.org/licenses/LICENSE-2.0):
|
19
|
+
|
20
|
+
Databricks SDK for Python - https://github.com/databricks/databricks-sdk-py
|
21
|
+
Copyright 2023 Databricks, Inc. All rights reserved.
|
22
|
+
|
23
|
+
cryptography - https://github.com/pyca/cryptography
|
24
|
+
Copyright 2013-2023 The Python Cryptographic Authority and individual contributors.
|
25
|
+
|
26
|
+
Pygls - https://github.com/openlawlibrary/pygls
|
27
|
+
Copyright pygls authors.
|
28
|
+
|
29
|
+
____________________
|
30
|
+
This software contains code from the following open source projects, licensed under the Python Software license (https://opensource.org/license/python-2-0):
|
31
|
+
|
32
|
+
Standard-distutils - https://pypi.org/project/standard-distutils/
|
33
|
+
Copyright standard-distutils authors.
|
34
|
+
|
35
|
+
____________________
|
36
|
+
This software contains code from the following publicly available projects, licensed under the Databricks (DB) license (https://www.databricks.com/legal/db-license):
|
37
|
+
|
38
|
+
Databricks Labs Blueprint - https://github.com/databrickslabs/blueprint
|
39
|
+
Copyright (2023) Databricks, Inc.
|
40
|
+
|
41
|
+
Databricks Labs lsql - databricks-labs-lsql
|
42
|
+
Copyright (2024) Databricks, Inc.
|
@@ -0,0 +1,58 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: databricks-labs-lakebridge
|
3
|
+
Version: 0.10.0
|
4
|
+
Summary: Fast and predictable migrations to Databricks Lakehouse Platform. This tool is designed to help you migrate your data and workloads to the Databricks Lakehouse Platform in a fast, predictable, and reliable way. It provides a set of tools and utilities to help you reconcile your data and workloads, assess your current state, and plan your migration.
|
5
|
+
Project-URL: Documentation, https://github.com/databrickslabs/lakebridge
|
6
|
+
Project-URL: Issues, https://github.com/databrickslabs/lakebridge/issues
|
7
|
+
Project-URL: Source, https://github.com/databrickslabs/lakebridge
|
8
|
+
Maintainer-email: Databricks Labs <labs-oss@databricks.com>
|
9
|
+
License-File: LICENSE
|
10
|
+
License-File: NOTICE
|
11
|
+
Keywords: Databricks
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Environment :: Console
|
14
|
+
Classifier: Framework :: Pytest
|
15
|
+
Classifier: Intended Audience :: Developers
|
16
|
+
Classifier: Intended Audience :: System Administrators
|
17
|
+
Classifier: License :: Other/Proprietary License
|
18
|
+
Classifier: Operating System :: MacOS
|
19
|
+
Classifier: Operating System :: Microsoft :: Windows
|
20
|
+
Classifier: Programming Language :: Python
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
23
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
24
|
+
Classifier: Topic :: Software Development :: Libraries
|
25
|
+
Classifier: Topic :: Utilities
|
26
|
+
Requires-Python: >=3.10
|
27
|
+
Requires-Dist: cryptography<45.1.0,>=44.0.2
|
28
|
+
Requires-Dist: databricks-bb-analyzer~=0.1.6
|
29
|
+
Requires-Dist: databricks-labs-blueprint[yaml]<0.12.0,>=0.11.0
|
30
|
+
Requires-Dist: databricks-labs-lsql==0.16.0
|
31
|
+
Requires-Dist: databricks-sdk~=0.51.0
|
32
|
+
Requires-Dist: duckdb~=1.2.2
|
33
|
+
Requires-Dist: pygls~=2.0.0a2
|
34
|
+
Requires-Dist: pyodbc~=5.2.0
|
35
|
+
Requires-Dist: sqlalchemy~=2.0.40
|
36
|
+
Requires-Dist: sqlglot==26.1.3
|
37
|
+
Requires-Dist: standard-distutils~=3.11.9; python_version >= '3.11'
|
38
|
+
Description-Content-Type: text/markdown
|
39
|
+
|
40
|
+
Databricks Labs Lakebridge
|
41
|
+
---
|
42
|
+

|
43
|
+
|
44
|
+
|
45
|
+
[](https://github.com/databrickslabs/remorph/actions/workflows/push.yml)
|
46
|
+

|
47
|
+
|
48
|
+
-----
|
49
|
+
Documentation
|
50
|
+
The complete documentation is available at: https://databrickslabs.github.io/lakebridge/
|
51
|
+
|
52
|
+
Contribution
|
53
|
+
Please see the contribution guidance here on how to contribute to the project (build, test, and submit a PR).
|
54
|
+
|
55
|
+
Project Support
|
56
|
+
Please note that this project is provided for your exploration only and is not formally supported by Databricks with Service Level Agreements (SLAs). They are provided AS-IS, and we do not make any guarantees. Please do not submit a support ticket relating to any issues arising from the use of this project.
|
57
|
+
|
58
|
+
Any issues discovered through the use of this project should be filed as GitHub Issues on this repository. They will be reviewed as time permits, but no formal SLAs for support exist.
|
@@ -0,0 +1,19 @@
|
|
1
|
+
Databricks Labs Lakebridge
|
2
|
+
---
|
3
|
+

|
4
|
+
|
5
|
+
|
6
|
+
[](https://github.com/databrickslabs/remorph/actions/workflows/push.yml)
|
7
|
+

|
8
|
+
|
9
|
+
-----
|
10
|
+
Documentation
|
11
|
+
The complete documentation is available at: https://databrickslabs.github.io/lakebridge/
|
12
|
+
|
13
|
+
Contribution
|
14
|
+
Please see the contribution guidance here on how to contribute to the project (build, test, and submit a PR).
|
15
|
+
|
16
|
+
Project Support
|
17
|
+
Please note that this project is provided for your exploration only and is not formally supported by Databricks with Service Level Agreements (SLAs). They are provided AS-IS, and we do not make any guarantees. Please do not submit a support ticket relating to any issues arising from the use of this project.
|
18
|
+
|
19
|
+
Any issues discovered through the use of this project should be filed as GitHub Issues on this repository. They will be reviewed as time permits, but no formal SLAs for support exist.
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from databricks.sdk.core import with_user_agent_extra, with_product
|
2
|
+
from databricks.labs.blueprint.logger import install_logger
|
3
|
+
from databricks.labs.lakebridge.__about__ import __version__
|
4
|
+
|
5
|
+
install_logger()
|
6
|
+
|
7
|
+
# Add lakebridge/<version> for projects depending on lakebridge as a library
|
8
|
+
with_user_agent_extra("lakebridge", __version__)
|
9
|
+
|
10
|
+
# Add lakebridge/<version> for re-packaging of lakebridge, where product name is omitted
|
11
|
+
with_product("lakebridge", __version__)
|
databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/assessments/configure_assessment.py
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
import logging
|
3
|
+
import shutil
|
4
|
+
import yaml
|
5
|
+
|
6
|
+
from databricks.labs.blueprint.tui import Prompts
|
7
|
+
|
8
|
+
from databricks.labs.lakebridge.connections.credential_manager import (
|
9
|
+
cred_file as creds,
|
10
|
+
CredentialManager,
|
11
|
+
create_credential_manager,
|
12
|
+
)
|
13
|
+
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
|
14
|
+
from databricks.labs.lakebridge.connections.env_getter import EnvGetter
|
15
|
+
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
logger.setLevel(logging.INFO)
|
18
|
+
|
19
|
+
PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]
|
20
|
+
|
21
|
+
|
22
|
+
class AssessmentConfigurator(ABC):
|
23
|
+
"""Abstract base class for assessment configuration."""
|
24
|
+
|
25
|
+
def __init__(self, product_name: str, prompts: Prompts, credential_file=None):
|
26
|
+
self.prompts = prompts
|
27
|
+
self._product_name = product_name
|
28
|
+
self._credential_file = creds(product_name) if not credential_file else credential_file
|
29
|
+
|
30
|
+
@abstractmethod
|
31
|
+
def _configure_credentials(self) -> str:
|
32
|
+
pass
|
33
|
+
|
34
|
+
@staticmethod
|
35
|
+
def _test_connection(source: str, cred_manager: CredentialManager):
|
36
|
+
config = cred_manager.get_credentials(source)
|
37
|
+
|
38
|
+
try:
|
39
|
+
db_manager = DatabaseManager(source, config)
|
40
|
+
if db_manager.check_connection():
|
41
|
+
logger.info("Connection to the source system successful")
|
42
|
+
else:
|
43
|
+
logger.error("Connection to the source system failed, check logs in debug mode")
|
44
|
+
raise SystemExit("Connection validation failed. Exiting...")
|
45
|
+
|
46
|
+
except ConnectionError as e:
|
47
|
+
logger.error(f"Failed to connect to the source system: {e}")
|
48
|
+
raise SystemExit("Connection validation failed. Exiting...") from e
|
49
|
+
|
50
|
+
def run(self):
|
51
|
+
"""Run the assessment configuration process."""
|
52
|
+
logger.info(f"Welcome to the {self._product_name} Assessment Configuration")
|
53
|
+
source = self._configure_credentials()
|
54
|
+
logger.info(f"{source.capitalize()} details and credentials received.")
|
55
|
+
if self.prompts.confirm(f"Do you want to test the connection to {source}?"):
|
56
|
+
cred_manager = create_credential_manager("lakebridge", EnvGetter())
|
57
|
+
if cred_manager:
|
58
|
+
self._test_connection(source, cred_manager)
|
59
|
+
logger.info(f"{source.capitalize()} Assessment Configuration Completed")
|
60
|
+
|
61
|
+
|
62
|
+
class ConfigureSqlServerAssessment(AssessmentConfigurator):
|
63
|
+
"""SQL Server specific assessment configuration."""
|
64
|
+
|
65
|
+
def _configure_credentials(self) -> str:
|
66
|
+
cred_file = self._credential_file
|
67
|
+
source = "mssql"
|
68
|
+
|
69
|
+
logger.info(
|
70
|
+
"\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
|
71
|
+
"from environment variables fall back to plain text if not variable is not found\n",
|
72
|
+
)
|
73
|
+
secret_vault_type = str(self.prompts.choice("Enter secret vault type (local | env)", ["local", "env"])).lower()
|
74
|
+
secret_vault_name = None
|
75
|
+
|
76
|
+
logger.info("Please refer to the documentation to understand the difference between local and env.")
|
77
|
+
|
78
|
+
credential = {
|
79
|
+
"secret_vault_type": secret_vault_type,
|
80
|
+
"secret_vault_name": secret_vault_name,
|
81
|
+
source: {
|
82
|
+
"database": self.prompts.question("Enter the database name"),
|
83
|
+
"driver": self.prompts.question("Enter the driver details"),
|
84
|
+
"server": self.prompts.question("Enter the server or host details"),
|
85
|
+
"port": int(self.prompts.question("Enter the port details", valid_number=True)),
|
86
|
+
"user": self.prompts.question("Enter the user details"),
|
87
|
+
"password": self.prompts.question("Enter the password details"),
|
88
|
+
},
|
89
|
+
}
|
90
|
+
|
91
|
+
if cred_file.exists():
|
92
|
+
backup_filename = cred_file.with_suffix('.bak')
|
93
|
+
shutil.copy(cred_file, backup_filename)
|
94
|
+
logger.debug(f"Backup of the existing file created at {backup_filename}")
|
95
|
+
|
96
|
+
with open(cred_file, 'w', encoding='utf-8') as file:
|
97
|
+
yaml.dump(credential, file, default_flow_style=False)
|
98
|
+
|
99
|
+
logger.info(f"Credential template created for {source}.")
|
100
|
+
return source
|
101
|
+
|
102
|
+
|
103
|
+
class ConfigureSynapseAssessment(AssessmentConfigurator):
|
104
|
+
"""Synapse specific assessment configuration."""
|
105
|
+
|
106
|
+
def _configure_credentials(self) -> str:
|
107
|
+
cred_file = self._credential_file
|
108
|
+
source = "synapse"
|
109
|
+
|
110
|
+
logger.info(
|
111
|
+
"\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
|
112
|
+
"from environment variables fall back to plain text if not variable is not found\n",
|
113
|
+
)
|
114
|
+
secret_vault_type = str(self.prompts.choice("Enter secret vault type (local | env)", ["local", "env"])).lower()
|
115
|
+
secret_vault_name = None
|
116
|
+
|
117
|
+
# Synapse Workspace Settings
|
118
|
+
logger.info("Please provide Synapse Workspace settings:")
|
119
|
+
synapse_workspace = {
|
120
|
+
"name": self.prompts.question("Enter Synapse workspace name"),
|
121
|
+
"dedicated_sql_endpoint": self.prompts.question("Enter dedicated SQL endpoint"),
|
122
|
+
"serverless_sql_endpoint": self.prompts.question("Enter serverless SQL endpoint"),
|
123
|
+
"sql_user": self.prompts.question("Enter SQL user"),
|
124
|
+
"sql_password": self.prompts.question("Enter SQL password"),
|
125
|
+
"tz_info": self.prompts.question("Enter timezone (e.g. America/New_York)", default="UTC"),
|
126
|
+
}
|
127
|
+
|
128
|
+
# Azure API Access Settings
|
129
|
+
logger.info("Please provide Azure API access settings:")
|
130
|
+
azure_api_access = {
|
131
|
+
"development_endpoint": self.prompts.question("Enter development endpoint"),
|
132
|
+
"azure_client_id": self.prompts.question("Enter Azure client ID"),
|
133
|
+
"azure_tenant_id": self.prompts.question("Enter Azure tenant ID"),
|
134
|
+
"azure_client_secret": self.prompts.question("Enter Azure client secret"),
|
135
|
+
}
|
136
|
+
|
137
|
+
# JDBC Settings
|
138
|
+
logger.info("Please select JDBC authentication type:")
|
139
|
+
auth_type = self.prompts.choice(
|
140
|
+
"Select authentication type", ["sql_authentication", "ad_passwd_authentication", "spn_authentication"]
|
141
|
+
)
|
142
|
+
|
143
|
+
synapse_jdbc = {
|
144
|
+
"auth_type": auth_type,
|
145
|
+
"fetch_size": self.prompts.question("Enter fetch size", default="1000"),
|
146
|
+
"login_timeout": self.prompts.question("Enter login timeout (seconds)", default="30"),
|
147
|
+
}
|
148
|
+
|
149
|
+
# Profiler Settings
|
150
|
+
logger.info("Please configure profiler settings:")
|
151
|
+
synapse_profiler = {
|
152
|
+
"exclude_serverless_sql_pool": self.prompts.confirm("Exclude serverless SQL pool from profiling?"),
|
153
|
+
"exclude_dedicated_sql_pools": self.prompts.confirm("Exclude dedicated SQL pools from profiling?"),
|
154
|
+
"exclude_spark_pools": self.prompts.confirm("Exclude Spark pools from profiling?"),
|
155
|
+
"exclude_monitoring_metrics": self.prompts.confirm("Exclude monitoring metrics from profiling?"),
|
156
|
+
"redact_sql_pools_sql_text": self.prompts.confirm("Redact SQL pools SQL text?"),
|
157
|
+
}
|
158
|
+
|
159
|
+
credential = {
|
160
|
+
"secret_vault_type": secret_vault_type,
|
161
|
+
"secret_vault_name": secret_vault_name,
|
162
|
+
source: {
|
163
|
+
"workspace": synapse_workspace,
|
164
|
+
"azure_api_access": azure_api_access,
|
165
|
+
"jdbc": synapse_jdbc,
|
166
|
+
"profiler": synapse_profiler,
|
167
|
+
},
|
168
|
+
}
|
169
|
+
|
170
|
+
if cred_file.exists():
|
171
|
+
backup_filename = cred_file.with_suffix('.bak')
|
172
|
+
shutil.copy(cred_file, backup_filename)
|
173
|
+
logger.debug(f"Backup of the existing file created at {backup_filename}")
|
174
|
+
|
175
|
+
with open(cred_file, 'w', encoding='utf-8') as file:
|
176
|
+
yaml.dump(credential, file, default_flow_style=False)
|
177
|
+
|
178
|
+
logger.info(f"Credential template created for {source}.")
|
179
|
+
return source
|
180
|
+
|
181
|
+
|
182
|
+
def create_assessment_configurator(
|
183
|
+
source_system: str, product_name: str, prompts: Prompts, credential_file=None
|
184
|
+
) -> AssessmentConfigurator:
|
185
|
+
"""Factory function to create the appropriate assessment configurator."""
|
186
|
+
configurators = {
|
187
|
+
"mssql": ConfigureSqlServerAssessment,
|
188
|
+
"synapse": ConfigureSynapseAssessment,
|
189
|
+
}
|
190
|
+
|
191
|
+
if source_system not in configurators:
|
192
|
+
raise ValueError(f"Unsupported source system: {source_system}")
|
193
|
+
|
194
|
+
return configurators[source_system](product_name, prompts, credential_file)
|
@@ -0,0 +1,188 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from subprocess import run, CalledProcessError
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from enum import Enum
|
5
|
+
|
6
|
+
import venv
|
7
|
+
import tempfile
|
8
|
+
import json
|
9
|
+
import logging
|
10
|
+
import yaml
|
11
|
+
import duckdb
|
12
|
+
|
13
|
+
from databricks.labs.lakebridge.connections.credential_manager import cred_file
|
14
|
+
|
15
|
+
from databricks.labs.lakebridge.assessments.profiler_config import PipelineConfig, Step
|
16
|
+
from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
|
17
|
+
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
logger.setLevel("INFO")
|
20
|
+
|
21
|
+
DB_NAME = "profiler_extract.db"
|
22
|
+
|
23
|
+
|
24
|
+
class StepExecutionStatus(str, Enum):
|
25
|
+
COMPLETE = "COMPLETE"
|
26
|
+
ERROR = "ERROR"
|
27
|
+
SKIPPED = "SKIPPED"
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class StepExecutionResult:
|
32
|
+
step_name: str
|
33
|
+
status: StepExecutionStatus
|
34
|
+
error_message: str | None = None
|
35
|
+
|
36
|
+
|
37
|
+
class PipelineClass:
|
38
|
+
def __init__(self, config: PipelineConfig, executor: DatabaseManager):
|
39
|
+
self.config = config
|
40
|
+
self.executor = executor
|
41
|
+
self.db_path_prefix = Path(config.extract_folder)
|
42
|
+
|
43
|
+
def execute(self) -> list[StepExecutionResult]:
|
44
|
+
logging.info(f"Pipeline initialized with config: {self.config.name}, version: {self.config.version}")
|
45
|
+
execution_results: list[StepExecutionResult] = []
|
46
|
+
for step in self.config.steps:
|
47
|
+
result = self._process_step(step)
|
48
|
+
execution_results.append(result)
|
49
|
+
logging.info(f"Step '{step.name}' completed with status: {result.status}")
|
50
|
+
|
51
|
+
logging.info("Pipeline execution completed")
|
52
|
+
return execution_results
|
53
|
+
|
54
|
+
def _process_step(self, step: Step) -> StepExecutionResult:
|
55
|
+
if step.flag != "active":
|
56
|
+
logging.info(f"Skipping step: {step.name} as it is not active")
|
57
|
+
return StepExecutionResult(step_name=step.name, status=StepExecutionStatus.SKIPPED)
|
58
|
+
|
59
|
+
logging.debug(f"Executing step: {step.name}")
|
60
|
+
try:
|
61
|
+
status = self._execute_step(step)
|
62
|
+
return StepExecutionResult(step_name=step.name, status=status)
|
63
|
+
except RuntimeError as e:
|
64
|
+
return StepExecutionResult(step_name=step.name, status=StepExecutionStatus.ERROR, error_message=str(e))
|
65
|
+
|
66
|
+
def _execute_step(self, step: Step) -> StepExecutionStatus:
|
67
|
+
if step.type == "sql":
|
68
|
+
logging.info(f"Executing SQL step {step.name}")
|
69
|
+
self._execute_sql_step(step)
|
70
|
+
return StepExecutionStatus.COMPLETE
|
71
|
+
if step.type == "python":
|
72
|
+
logging.info(f"Executing Python step {step.name}")
|
73
|
+
self._execute_python_step(step)
|
74
|
+
return StepExecutionStatus.COMPLETE
|
75
|
+
logging.error(f"Unsupported step type: {step.type}")
|
76
|
+
raise RuntimeError(f"Unsupported step type: {step.type}")
|
77
|
+
|
78
|
+
def _execute_sql_step(self, step: Step):
|
79
|
+
logging.debug(f"Reading query from file: {step.extract_source}")
|
80
|
+
with open(step.extract_source, 'r', encoding='utf-8') as file:
|
81
|
+
query = file.read()
|
82
|
+
|
83
|
+
# Execute the query using the database manager
|
84
|
+
logging.info(f"Executing query: {query}")
|
85
|
+
try:
|
86
|
+
result = self.executor.execute_query(query)
|
87
|
+
|
88
|
+
# Save the result to duckdb
|
89
|
+
self._save_to_db(result, step.name, str(step.mode))
|
90
|
+
except Exception as e:
|
91
|
+
logging.error(f"SQL execution failed: {str(e)}")
|
92
|
+
raise RuntimeError(f"SQL execution failed: {str(e)}") from e
|
93
|
+
|
94
|
+
def _execute_python_step(self, step: Step):
|
95
|
+
|
96
|
+
logging.debug(f"Executing Python script: {step.extract_source}")
|
97
|
+
db_path = str(self.db_path_prefix / DB_NAME)
|
98
|
+
credential_config = str(cred_file("lakebridge"))
|
99
|
+
|
100
|
+
# Create a temporary directory for the virtual environment
|
101
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
102
|
+
venv_dir = Path(temp_dir) / "venv"
|
103
|
+
venv.create(venv_dir, with_pip=True)
|
104
|
+
venv_python = venv_dir / "bin" / "python"
|
105
|
+
venv_pip = venv_dir / "bin" / "pip"
|
106
|
+
|
107
|
+
logger.info(f"Creating a virtual environment for Python script execution: ${venv_dir}")
|
108
|
+
# Install dependencies in the virtual environment
|
109
|
+
if step.dependencies:
|
110
|
+
logging.info(f"Installing dependencies: {', '.join(step.dependencies)}")
|
111
|
+
try:
|
112
|
+
logging.debug("Upgrading local pip")
|
113
|
+
run([str(venv_pip), "install", "--upgrade", "pip"], check=True, capture_output=True, text=True)
|
114
|
+
|
115
|
+
run([str(venv_pip), "install", *step.dependencies], check=True, capture_output=True, text=True)
|
116
|
+
except CalledProcessError as e:
|
117
|
+
logging.error(f"Failed to install dependencies: {e.stderr}")
|
118
|
+
raise RuntimeError(f"Failed to install dependencies: {e.stderr}") from e
|
119
|
+
|
120
|
+
# Execute the Python script using the virtual environment's Python interpreter
|
121
|
+
try:
|
122
|
+
result = run(
|
123
|
+
[
|
124
|
+
str(venv_python),
|
125
|
+
str(step.extract_source),
|
126
|
+
"--db-path",
|
127
|
+
db_path,
|
128
|
+
"--credential-config-path",
|
129
|
+
credential_config,
|
130
|
+
],
|
131
|
+
check=True,
|
132
|
+
capture_output=True,
|
133
|
+
text=True,
|
134
|
+
)
|
135
|
+
|
136
|
+
try:
|
137
|
+
output = json.loads(result.stdout)
|
138
|
+
if output["status"] == "success":
|
139
|
+
logging.info(f"Python script completed: {output['message']}")
|
140
|
+
else:
|
141
|
+
raise RuntimeError(f"Script reported error: {output['message']}")
|
142
|
+
except json.JSONDecodeError:
|
143
|
+
logging.info(f"Python script output: {result.stdout}")
|
144
|
+
|
145
|
+
except CalledProcessError as e:
|
146
|
+
error_msg = e.stderr
|
147
|
+
logging.error(f"Python script failed: {error_msg}")
|
148
|
+
raise RuntimeError(f"Script execution failed: {error_msg}") from e
|
149
|
+
|
150
|
+
def _save_to_db(self, result, step_name: str, mode: str, batch_size: int = 1000):
|
151
|
+
self._create_dir(self.db_path_prefix)
|
152
|
+
db_path = str(self.db_path_prefix / DB_NAME)
|
153
|
+
|
154
|
+
with duckdb.connect(db_path) as conn:
|
155
|
+
columns = result.keys()
|
156
|
+
# TODO: Add support for figuring out data types from SQLALCHEMY result object result.cursor.description is not reliable
|
157
|
+
schema = ' STRING, '.join(columns) + ' STRING'
|
158
|
+
|
159
|
+
# Handle write modes
|
160
|
+
if mode == 'overwrite':
|
161
|
+
conn.execute(f"CREATE OR REPLACE TABLE {step_name} ({schema})")
|
162
|
+
elif mode == 'append' and step_name not in conn.get_table_names(""):
|
163
|
+
conn.execute(f"CREATE TABLE {step_name} ({schema})")
|
164
|
+
|
165
|
+
# Batch insert using prepared statements
|
166
|
+
placeholders = ', '.join(['?' for _ in columns])
|
167
|
+
insert_query = f"INSERT INTO {step_name} VALUES ({placeholders})"
|
168
|
+
|
169
|
+
# Fetch and insert rows in batches
|
170
|
+
while True:
|
171
|
+
rows = result.fetchmany(batch_size)
|
172
|
+
if not rows:
|
173
|
+
break
|
174
|
+
conn.executemany(insert_query, rows)
|
175
|
+
|
176
|
+
@staticmethod
|
177
|
+
def _create_dir(dir_path: Path):
|
178
|
+
if not Path(dir_path).exists():
|
179
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
180
|
+
|
181
|
+
@staticmethod
|
182
|
+
def load_config_from_yaml(file_path: str) -> PipelineConfig:
|
183
|
+
with open(file_path, 'r', encoding='utf-8') as file:
|
184
|
+
data = yaml.safe_load(file)
|
185
|
+
steps = [Step(**step) for step in data['steps']]
|
186
|
+
return PipelineConfig(
|
187
|
+
name=data['name'], version=data['version'], extract_folder=data['extract_folder'], steps=steps
|
188
|
+
)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from dataclasses import dataclass, field
|
2
|
+
|
3
|
+
|
4
|
+
@dataclass
|
5
|
+
class Step:
|
6
|
+
name: str
|
7
|
+
type: str | None
|
8
|
+
extract_source: str
|
9
|
+
mode: str | None
|
10
|
+
frequency: str | None
|
11
|
+
flag: str | None
|
12
|
+
dependencies: list[str] = field(default_factory=list)
|
13
|
+
comment: str | None = None
|
14
|
+
|
15
|
+
def __post_init__(self):
|
16
|
+
if self.frequency is None:
|
17
|
+
self.frequency = "once"
|
18
|
+
if self.flag is None:
|
19
|
+
self.flag = "active"
|
20
|
+
if self.mode is None:
|
21
|
+
self.mode = "append"
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class PipelineConfig:
|
26
|
+
name: str
|
27
|
+
version: str
|
28
|
+
extract_folder: str
|
29
|
+
comment: str | None = None
|
30
|
+
steps: list[Step] = field(default_factory=list)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from databricks.labs.blueprint.logger import install_logger
|
2
|
+
from databricks.labs.blueprint.entrypoint import get_logger
|
3
|
+
from databricks.sdk.core import with_user_agent_extra
|
4
|
+
|
5
|
+
install_logger()
|
6
|
+
with_user_agent_extra("cmd", "install")
|
7
|
+
|
8
|
+
if __name__ == "__main__":
|
9
|
+
logger = get_logger(__file__)
|
10
|
+
logger.setLevel("INFO")
|
11
|
+
|
12
|
+
logger.info("Successfully Setup Remorph Components Locally")
|