databricks-labs-lakebridge 0.10.7__tar.gz → 0.10.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/PKG-INFO +1 -1
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/__about__.py +1 -1
- databricks_labs_lakebridge-0.10.8/databricks/labs/lakebridge/assessments/profiler_validator.py +103 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/base_install.py +1 -5
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/cli.py +13 -6
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/validation.py +5 -3
- databricks_labs_lakebridge-0.10.8/databricks/labs/lakebridge/install.py +393 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/data_source.py +9 -5
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/databricks.py +2 -1
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/oracle.py +2 -1
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/secrets.py +19 -1
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/snowflake.py +50 -29
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/tsql.py +2 -1
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/base.py +50 -11
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +8 -2
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +7 -13
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +18 -19
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +36 -15
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/recon_config.py +0 -15
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/reconciliation.py +4 -1
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +11 -31
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/trigger_recon_service.py +4 -1
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/execute.py +34 -28
- databricks_labs_lakebridge-0.10.7/databricks/labs/lakebridge/install.py → databricks_labs_lakebridge-0.10.8/databricks/labs/lakebridge/transpiler/installers.py +66 -377
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +2 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/pyproject.toml +2 -1
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/.gitignore +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/LICENSE +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/NOTICE +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/README.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/analyzer/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/assessments/configure_assessment.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/assessments/pipeline.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/assessments/profiler_config.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/config.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/connections/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/connections/credential_manager.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/connections/database_manager.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/connections/env_getter.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/contexts/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/contexts/application.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/commons.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/local_report.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/configurator.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/dashboard.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/installation.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/job.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/recon.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/table.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/upgrade_common.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/discovery/table.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/discovery/table_definition.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/discovery/tsql_table_definition.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/errors/exceptions.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/db_sql.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/execution_time.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/file_utils.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/metastore.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/recon_config_utils.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/string_utils.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/telemetry_utils.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/intermediate/dag.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/intermediate/root_tables.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/jvmproxy.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/lineage.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/compare.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/models.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/constants.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/exception.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/execute.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/count_query.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/recon_capture.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/recon_output_config.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/runner.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/sampler.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/schema_compare.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/utils.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/config/credentials.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/repository.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/transpile_status.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/uninstall.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/components/Button.tsx +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/components/ReconcileTabs.tsx +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/css/custom.css +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/css/table.css +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/pages/index.tsx +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/theme/DocSidebarItems/index.tsx +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/theme/Footer/index.tsx +0 -0
- {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/theme/Layout/index.tsx +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: databricks-labs-lakebridge
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.8
|
4
4
|
Summary: Fast and predictable migrations to Databricks Lakehouse Platform. This tool is designed to help you migrate your data and workloads to the Databricks Lakehouse Platform in a fast, predictable, and reliable way. It provides a set of tools and utilities to help you reconcile your data and workloads, assess your current state, and plan your migration.
|
5
5
|
Project-URL: Documentation, https://databrickslabs.github.io/lakebridge
|
6
6
|
Project-URL: Issues, https://github.com/databrickslabs/lakebridge/issues
|
@@ -1,2 +1,2 @@
|
|
1
1
|
# DO NOT MODIFY THIS FILE
|
2
|
-
__version__ = "0.10.
|
2
|
+
__version__ = "0.10.8"
|
databricks_labs_lakebridge-0.10.8/databricks/labs/lakebridge/assessments/profiler_validator.py
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
import os
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from duckdb import DuckDBPyConnection
|
4
|
+
|
5
|
+
from databricks.labs.lakebridge.assessments.pipeline import PipelineClass
|
6
|
+
|
7
|
+
PROFILER_DB_NAME = "profiler_extract.db"
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass(frozen=True)
|
11
|
+
class ValidationOutcome:
|
12
|
+
"""A data class that holds the outcome of a table validation check."""
|
13
|
+
|
14
|
+
table: str
|
15
|
+
column: str | None
|
16
|
+
strategy: str
|
17
|
+
outcome: str
|
18
|
+
severity: str
|
19
|
+
|
20
|
+
|
21
|
+
class ValidationStrategy:
|
22
|
+
"""Abstract class for validating a Profiler table"""
|
23
|
+
|
24
|
+
def validate(self, connection: DuckDBPyConnection) -> ValidationOutcome:
|
25
|
+
raise NotImplementedError
|
26
|
+
|
27
|
+
|
28
|
+
class NullValidationCheck(ValidationStrategy):
|
29
|
+
"""Concrete class for validating null values in a profiler table"""
|
30
|
+
|
31
|
+
def __init__(self, table, column, severity="WARN"):
|
32
|
+
self.name = self.__class__.__name__
|
33
|
+
self.table = table
|
34
|
+
self.column = column
|
35
|
+
self.severity = severity
|
36
|
+
|
37
|
+
def validate(self, connection: DuckDBPyConnection) -> ValidationOutcome:
|
38
|
+
"""
|
39
|
+
Validates that a column does not contain null values.
|
40
|
+
input:
|
41
|
+
connection: a DuckDB connection object
|
42
|
+
"""
|
43
|
+
result = connection.execute(f"SELECT COUNT(*) FROM {self.table} WHERE {self.column} IS NULL").fetchone()
|
44
|
+
if result:
|
45
|
+
row_count = result[0]
|
46
|
+
outcome = "FAIL" if row_count > 0 else "PASS"
|
47
|
+
else:
|
48
|
+
outcome = "FAIL"
|
49
|
+
return ValidationOutcome(self.table, self.column, self.name, outcome, self.severity)
|
50
|
+
|
51
|
+
|
52
|
+
class EmptyTableValidationCheck(ValidationStrategy):
|
53
|
+
"""Concrete class for validating empty tables from a profiler run."""
|
54
|
+
|
55
|
+
def __init__(self, table, severity="WARN"):
|
56
|
+
self.name = self.__class__.__name__
|
57
|
+
self.table = table
|
58
|
+
self.severity = severity
|
59
|
+
|
60
|
+
def validate(self, connection) -> ValidationOutcome:
|
61
|
+
"""Validates that a table is not empty.
|
62
|
+
input:
|
63
|
+
connection: a DuckDB connection object
|
64
|
+
returns:
|
65
|
+
a ValidationOutcome object
|
66
|
+
"""
|
67
|
+
result = connection.execute(f"SELECT COUNT(*) FROM {self.table}").fetchone()
|
68
|
+
if result:
|
69
|
+
row_count = result[0]
|
70
|
+
outcome = "PASS" if row_count > 0 else "FAIL"
|
71
|
+
else:
|
72
|
+
outcome = "FAIL"
|
73
|
+
return ValidationOutcome(self.table, None, self.name, outcome, self.severity)
|
74
|
+
|
75
|
+
|
76
|
+
def get_profiler_extract_path(pipeline_config_path: str) -> str:
|
77
|
+
"""
|
78
|
+
Returns the filesystem path of the profiler extract database.
|
79
|
+
input:
|
80
|
+
pipeline_config_path: the location of the pipeline definition .yml file
|
81
|
+
returns:
|
82
|
+
the filesystem path to the profiler extract database
|
83
|
+
"""
|
84
|
+
pipeline_config = PipelineClass.load_config_from_yaml(pipeline_config_path)
|
85
|
+
normalized_db_path = os.path.normpath(pipeline_config.extract_folder)
|
86
|
+
database_path = f"{normalized_db_path}/{PROFILER_DB_NAME}"
|
87
|
+
return database_path
|
88
|
+
|
89
|
+
|
90
|
+
def build_validation_report(
|
91
|
+
validations: list[ValidationStrategy], connection: DuckDBPyConnection
|
92
|
+
) -> list[ValidationOutcome]:
|
93
|
+
"""
|
94
|
+
Builds a list of ValidationOutcomes from list of validation checks.
|
95
|
+
input:
|
96
|
+
validations: a list of ValidationStrategy objects
|
97
|
+
connection: a DuckDB connection object
|
98
|
+
returns: a list of ValidationOutcomes
|
99
|
+
"""
|
100
|
+
validation_report = []
|
101
|
+
for validation in validations:
|
102
|
+
validation_report.append(validation.validate(connection))
|
103
|
+
return validation_report
|
@@ -19,11 +19,7 @@ def main() -> None:
|
|
19
19
|
WorkspaceClient(product="lakebridge", product_version=__version__),
|
20
20
|
transpiler_repository=TranspilerRepository.user_home(),
|
21
21
|
)
|
22
|
-
if installer.
|
23
|
-
logger.warning(
|
24
|
-
"Detected existing Lakebridge transpilers; run 'databricks labs lakebridge install-transpile' to upgrade them."
|
25
|
-
)
|
26
|
-
else:
|
22
|
+
if not installer.upgrade_installed_transpilers():
|
27
23
|
logger.debug("No existing Lakebridge transpilers detected; assuming fresh installation.")
|
28
24
|
|
29
25
|
logger.info("Successfully Setup Lakebridge Components Locally")
|
@@ -74,6 +74,7 @@ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str):
|
|
74
74
|
|
75
75
|
@lakebridge.command
|
76
76
|
def transpile(
|
77
|
+
*,
|
77
78
|
w: WorkspaceClient,
|
78
79
|
transpiler_config_path: str | None = None,
|
79
80
|
source_dialect: str | None = None,
|
@@ -340,6 +341,8 @@ class _TranspileConfigChecker:
|
|
340
341
|
supported_dialects = ", ".join(self._transpiler_repository.all_dialects())
|
341
342
|
msg = f"{msg_prefix}: {source_dialect!r} (supported dialects: {supported_dialects})"
|
342
343
|
raise_validation_exception(msg)
|
344
|
+
else:
|
345
|
+
self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
|
343
346
|
else:
|
344
347
|
# Check the source dialect against the engine.
|
345
348
|
if source_dialect not in engine.supported_dialects:
|
@@ -366,6 +369,7 @@ class _TranspileConfigChecker:
|
|
366
369
|
source_dialect = self._prompts.choice("Select the source dialect:", list(supported_dialects))
|
367
370
|
engine = self._configure_transpiler_config_path(source_dialect)
|
368
371
|
assert engine is not None, "No transpiler engine available for a supported dialect; configuration is invalid."
|
372
|
+
self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
|
369
373
|
return engine
|
370
374
|
|
371
375
|
def _check_lsp_engine(self) -> TranspileEngine:
|
@@ -518,7 +522,7 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s
|
|
518
522
|
|
519
523
|
|
520
524
|
@lakebridge.command
|
521
|
-
def reconcile(w: WorkspaceClient) -> None:
|
525
|
+
def reconcile(*, w: WorkspaceClient) -> None:
|
522
526
|
"""[EXPERIMENTAL] Reconciles source to Databricks datasets"""
|
523
527
|
with_user_agent_extra("cmd", "execute-reconcile")
|
524
528
|
ctx = ApplicationContext(w)
|
@@ -534,7 +538,7 @@ def reconcile(w: WorkspaceClient) -> None:
|
|
534
538
|
|
535
539
|
|
536
540
|
@lakebridge.command
|
537
|
-
def aggregates_reconcile(w: WorkspaceClient) -> None:
|
541
|
+
def aggregates_reconcile(*, w: WorkspaceClient) -> None:
|
538
542
|
"""[EXPERIMENTAL] Reconciles Aggregated source to Databricks datasets"""
|
539
543
|
with_user_agent_extra("cmd", "execute-aggregates-reconcile")
|
540
544
|
ctx = ApplicationContext(w)
|
@@ -552,8 +556,8 @@ def aggregates_reconcile(w: WorkspaceClient) -> None:
|
|
552
556
|
|
553
557
|
@lakebridge.command
|
554
558
|
def generate_lineage(
|
555
|
-
w: WorkspaceClient,
|
556
559
|
*,
|
560
|
+
w: WorkspaceClient,
|
557
561
|
source_dialect: str | None = None,
|
558
562
|
input_source: str,
|
559
563
|
output_folder: str,
|
@@ -578,7 +582,7 @@ def generate_lineage(
|
|
578
582
|
|
579
583
|
|
580
584
|
@lakebridge.command
|
581
|
-
def configure_secrets(w: WorkspaceClient) -> None:
|
585
|
+
def configure_secrets(*, w: WorkspaceClient) -> None:
|
582
586
|
"""Setup reconciliation connection profile details as Secrets on Databricks Workspace"""
|
583
587
|
recon_conf = ReconConfigPrompts(w)
|
584
588
|
|
@@ -604,8 +608,9 @@ def configure_database_profiler() -> None:
|
|
604
608
|
assessment.run()
|
605
609
|
|
606
610
|
|
607
|
-
@lakebridge.command
|
611
|
+
@lakebridge.command
|
608
612
|
def install_transpile(
|
613
|
+
*,
|
609
614
|
w: WorkspaceClient,
|
610
615
|
artifact: str | None = None,
|
611
616
|
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
|
@@ -622,6 +627,7 @@ def install_transpile(
|
|
622
627
|
|
623
628
|
@lakebridge.command(is_unauthenticated=False)
|
624
629
|
def configure_reconcile(
|
630
|
+
*,
|
625
631
|
w: WorkspaceClient,
|
626
632
|
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
|
627
633
|
) -> None:
|
@@ -637,8 +643,9 @@ def configure_reconcile(
|
|
637
643
|
reconcile_installer.run(module="reconcile")
|
638
644
|
|
639
645
|
|
640
|
-
@lakebridge.command
|
646
|
+
@lakebridge.command
|
641
647
|
def analyze(
|
648
|
+
*,
|
642
649
|
w: WorkspaceClient,
|
643
650
|
source_directory: str | None = None,
|
644
651
|
report_file: str | None = None,
|
@@ -37,19 +37,21 @@ class Validator:
|
|
37
37
|
config.catalog_name,
|
38
38
|
config.schema_name,
|
39
39
|
)
|
40
|
+
# Some errors doesn't return the query test alon with the error message so need to handle those separately
|
41
|
+
static_errors_lkp = ["[UNRESOLVED_ROUTINE]", "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION]"]
|
40
42
|
if is_valid:
|
41
43
|
result = sql_text
|
42
44
|
if exception_type is not None:
|
43
45
|
exception_msg = f"[{exception_type.upper()}]: {exception_msg}"
|
44
46
|
else:
|
45
47
|
query = ""
|
46
|
-
if
|
48
|
+
if any(err in str(exception_msg) for err in static_errors_lkp):
|
47
49
|
query = sql_text
|
48
50
|
buffer = StringIO()
|
49
51
|
buffer.write("-------------- Exception Start-------------------\n")
|
50
|
-
buffer.write("
|
52
|
+
buffer.write("/*\n")
|
51
53
|
buffer.write(str(exception_msg))
|
52
|
-
buffer.write("\n
|
54
|
+
buffer.write("\n*/\n")
|
53
55
|
buffer.write(query)
|
54
56
|
buffer.write("\n ---------------Exception End --------------------\n")
|
55
57
|
|
@@ -0,0 +1,393 @@
|
|
1
|
+
import dataclasses
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
import webbrowser
|
5
|
+
from collections.abc import Set, Callable, Sequence
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, cast
|
8
|
+
|
9
|
+
from databricks.labs.blueprint.installation import Installation, JsonValue, SerdeError
|
10
|
+
from databricks.labs.blueprint.installer import InstallState
|
11
|
+
from databricks.labs.blueprint.tui import Prompts
|
12
|
+
from databricks.labs.blueprint.wheels import ProductInfo
|
13
|
+
from databricks.sdk import WorkspaceClient
|
14
|
+
from databricks.sdk.errors import NotFound, PermissionDenied
|
15
|
+
|
16
|
+
from databricks.labs.lakebridge.__about__ import __version__
|
17
|
+
from databricks.labs.lakebridge.config import (
|
18
|
+
DatabaseConfig,
|
19
|
+
ReconcileConfig,
|
20
|
+
LakebridgeConfiguration,
|
21
|
+
ReconcileMetadataConfig,
|
22
|
+
TranspileConfig,
|
23
|
+
)
|
24
|
+
from databricks.labs.lakebridge.contexts.application import ApplicationContext
|
25
|
+
from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
|
26
|
+
from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
|
27
|
+
from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
|
28
|
+
from databricks.labs.lakebridge.transpiler.installers import (
|
29
|
+
BladebridgeInstaller,
|
30
|
+
MorpheusInstaller,
|
31
|
+
TranspilerInstaller,
|
32
|
+
)
|
33
|
+
from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
|
34
|
+
|
35
|
+
logger = logging.getLogger(__name__)
|
36
|
+
|
37
|
+
TRANSPILER_WAREHOUSE_PREFIX = "Lakebridge Transpiler Validation"
|
38
|
+
|
39
|
+
|
40
|
+
class WorkspaceInstaller:
|
41
|
+
def __init__(
|
42
|
+
self,
|
43
|
+
ws: WorkspaceClient,
|
44
|
+
prompts: Prompts,
|
45
|
+
installation: Installation,
|
46
|
+
install_state: InstallState,
|
47
|
+
product_info: ProductInfo,
|
48
|
+
resource_configurator: ResourceConfigurator,
|
49
|
+
workspace_installation: WorkspaceInstallation,
|
50
|
+
environ: dict[str, str] | None = None,
|
51
|
+
*,
|
52
|
+
transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
|
53
|
+
transpiler_installers: Sequence[Callable[[TranspilerRepository], TranspilerInstaller]] = (
|
54
|
+
BladebridgeInstaller,
|
55
|
+
MorpheusInstaller,
|
56
|
+
),
|
57
|
+
):
|
58
|
+
self._ws = ws
|
59
|
+
self._prompts = prompts
|
60
|
+
self._installation = installation
|
61
|
+
self._install_state = install_state
|
62
|
+
self._product_info = product_info
|
63
|
+
self._resource_configurator = resource_configurator
|
64
|
+
self._ws_installation = workspace_installation
|
65
|
+
self._transpiler_repository = transpiler_repository
|
66
|
+
self._transpiler_installer_factories = transpiler_installers
|
67
|
+
|
68
|
+
if not environ:
|
69
|
+
environ = dict(os.environ.items())
|
70
|
+
|
71
|
+
if "DATABRICKS_RUNTIME_VERSION" in environ:
|
72
|
+
msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime"
|
73
|
+
raise SystemExit(msg)
|
74
|
+
|
75
|
+
@property
|
76
|
+
def _transpiler_installers(self) -> Set[TranspilerInstaller]:
|
77
|
+
return frozenset(factory(self._transpiler_repository) for factory in self._transpiler_installer_factories)
|
78
|
+
|
79
|
+
def run(
|
80
|
+
self, module: str, config: LakebridgeConfiguration | None = None, artifact: str | None = None
|
81
|
+
) -> LakebridgeConfiguration:
|
82
|
+
logger.debug(f"Initializing workspace installation for module: {module} (config: {config})")
|
83
|
+
if module == "transpile" and artifact:
|
84
|
+
self._install_artifact(artifact)
|
85
|
+
elif module in {"transpile", "all"}:
|
86
|
+
for transpiler_installer in self._transpiler_installers:
|
87
|
+
transpiler_installer.install()
|
88
|
+
if not config:
|
89
|
+
config = self.configure(module)
|
90
|
+
if self._is_testing():
|
91
|
+
return config
|
92
|
+
self._ws_installation.install(config)
|
93
|
+
logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
|
94
|
+
return config
|
95
|
+
|
96
|
+
def upgrade_installed_transpilers(self) -> bool:
|
97
|
+
"""Detect and upgrade, if possible and necessary, installed transpilers."""
|
98
|
+
installed_transpilers = self._transpiler_repository.all_transpiler_names()
|
99
|
+
if installed_transpilers:
|
100
|
+
logger.info(f"Detected installed transpilers: {sorted(installed_transpilers)}")
|
101
|
+
upgraded = False
|
102
|
+
for transpiler_installer in self._transpiler_installers:
|
103
|
+
name = transpiler_installer.name
|
104
|
+
if name in installed_transpilers:
|
105
|
+
logger.info(f"Checking for {name} upgrades...")
|
106
|
+
upgraded |= transpiler_installer.install()
|
107
|
+
# If we upgraded anything, the configuration process needs to run again.
|
108
|
+
if upgraded:
|
109
|
+
config = self.configure("transpile")
|
110
|
+
if not self._is_testing():
|
111
|
+
self._ws_installation.install(config)
|
112
|
+
return upgraded
|
113
|
+
|
114
|
+
def _install_artifact(self, artifact: str) -> None:
|
115
|
+
path = Path(artifact)
|
116
|
+
if not path.exists():
|
117
|
+
logger.error(f"Could not locate artifact {artifact}")
|
118
|
+
return
|
119
|
+
for transpiler_installer in self._transpiler_installers:
|
120
|
+
if transpiler_installer.can_install(path):
|
121
|
+
transpiler_installer.install(path)
|
122
|
+
break
|
123
|
+
else:
|
124
|
+
logger.fatal(f"Cannot install unsupported artifact: {artifact}")
|
125
|
+
|
126
|
+
def configure(self, module: str) -> LakebridgeConfiguration:
|
127
|
+
match module:
|
128
|
+
case "transpile":
|
129
|
+
logger.info("Configuring lakebridge `transpile`.")
|
130
|
+
return LakebridgeConfiguration(self._configure_transpile(), None)
|
131
|
+
case "reconcile":
|
132
|
+
logger.info("Configuring lakebridge `reconcile`.")
|
133
|
+
return LakebridgeConfiguration(None, self._configure_reconcile())
|
134
|
+
case "all":
|
135
|
+
logger.info("Configuring lakebridge `transpile` and `reconcile`.")
|
136
|
+
return LakebridgeConfiguration(
|
137
|
+
self._configure_transpile(),
|
138
|
+
self._configure_reconcile(),
|
139
|
+
)
|
140
|
+
case _:
|
141
|
+
raise ValueError(f"Invalid input: {module}")
|
142
|
+
|
143
|
+
def _is_testing(self):
|
144
|
+
return self._product_info.product_name() != "lakebridge"
|
145
|
+
|
146
|
+
def _configure_transpile(self) -> TranspileConfig:
|
147
|
+
try:
|
148
|
+
config = self._installation.load(TranspileConfig)
|
149
|
+
logger.info("Lakebridge `transpile` is already installed on this workspace.")
|
150
|
+
if not self._prompts.confirm("Do you want to override the existing installation?"):
|
151
|
+
return config
|
152
|
+
except NotFound:
|
153
|
+
logger.info("Couldn't find existing `transpile` installation")
|
154
|
+
except (PermissionDenied, SerdeError, ValueError, AttributeError):
|
155
|
+
install_dir = self._installation.install_folder()
|
156
|
+
logger.warning(
|
157
|
+
f"Existing `transpile` installation at {install_dir} is corrupted. Continuing new installation..."
|
158
|
+
)
|
159
|
+
|
160
|
+
config = self._configure_new_transpile_installation()
|
161
|
+
logger.info("Finished configuring lakebridge `transpile`.")
|
162
|
+
return config
|
163
|
+
|
164
|
+
def _configure_new_transpile_installation(self) -> TranspileConfig:
|
165
|
+
default_config = self._prompt_for_new_transpile_installation()
|
166
|
+
runtime_config = None
|
167
|
+
catalog_name = "remorph"
|
168
|
+
schema_name = "transpiler"
|
169
|
+
if not default_config.skip_validation:
|
170
|
+
catalog_name = self._configure_catalog()
|
171
|
+
schema_name = self._configure_schema(catalog_name, "transpile")
|
172
|
+
self._has_necessary_access(catalog_name, schema_name)
|
173
|
+
warehouse_id = self._resource_configurator.prompt_for_warehouse_setup(TRANSPILER_WAREHOUSE_PREFIX)
|
174
|
+
runtime_config = {"warehouse_id": warehouse_id}
|
175
|
+
|
176
|
+
config = dataclasses.replace(
|
177
|
+
default_config,
|
178
|
+
catalog_name=catalog_name,
|
179
|
+
schema_name=schema_name,
|
180
|
+
sdk_config=runtime_config,
|
181
|
+
)
|
182
|
+
self._save_config(config)
|
183
|
+
return config
|
184
|
+
|
185
|
+
def _all_installed_dialects(self) -> list[str]:
|
186
|
+
return sorted(self._transpiler_repository.all_dialects())
|
187
|
+
|
188
|
+
def _transpilers_with_dialect(self, dialect: str) -> list[str]:
|
189
|
+
return sorted(self._transpiler_repository.transpilers_with_dialect(dialect))
|
190
|
+
|
191
|
+
def _transpiler_config_path(self, transpiler: str) -> Path:
|
192
|
+
return self._transpiler_repository.transpiler_config_path(transpiler)
|
193
|
+
|
194
|
+
def _prompt_for_new_transpile_installation(self) -> TranspileConfig:
|
195
|
+
install_later = "Set it later"
|
196
|
+
# TODO tidy this up, logger might not display the below in console...
|
197
|
+
logger.info("Please answer a few questions to configure lakebridge `transpile`")
|
198
|
+
all_dialects = [install_later, *self._all_installed_dialects()]
|
199
|
+
source_dialect: str | None = self._prompts.choice("Select the source dialect:", all_dialects, sort=False)
|
200
|
+
if source_dialect == install_later:
|
201
|
+
source_dialect = None
|
202
|
+
transpiler_name: str | None = None
|
203
|
+
transpiler_config_path: Path | None = None
|
204
|
+
if source_dialect:
|
205
|
+
transpilers = self._transpilers_with_dialect(source_dialect)
|
206
|
+
if len(transpilers) > 1:
|
207
|
+
transpilers = [install_later] + transpilers
|
208
|
+
transpiler_name = self._prompts.choice("Select the transpiler:", transpilers, sort=False)
|
209
|
+
if transpiler_name == install_later:
|
210
|
+
transpiler_name = None
|
211
|
+
else:
|
212
|
+
transpiler_name = next(t for t in transpilers)
|
213
|
+
logger.info(f"Lakebridge will use the {transpiler_name} transpiler")
|
214
|
+
if transpiler_name:
|
215
|
+
transpiler_config_path = self._transpiler_config_path(transpiler_name)
|
216
|
+
transpiler_options: dict[str, JsonValue] | None = None
|
217
|
+
if transpiler_config_path:
|
218
|
+
transpiler_options = self._prompt_for_transpiler_options(
|
219
|
+
cast(str, transpiler_name), cast(str, source_dialect)
|
220
|
+
)
|
221
|
+
input_source: str | None = self._prompts.question(
|
222
|
+
"Enter input SQL path (directory/file)", default=install_later
|
223
|
+
)
|
224
|
+
if input_source == install_later:
|
225
|
+
input_source = None
|
226
|
+
output_folder = self._prompts.question("Enter output directory", default="transpiled")
|
227
|
+
# When defaults are passed along we need to use absolute paths to avoid issues with relative paths
|
228
|
+
if output_folder == "transpiled":
|
229
|
+
output_folder = str(Path.cwd() / "transpiled")
|
230
|
+
error_file_path = self._prompts.question("Enter error file path", default="errors.log")
|
231
|
+
if error_file_path == "errors.log":
|
232
|
+
error_file_path = str(Path.cwd() / "errors.log")
|
233
|
+
|
234
|
+
run_validation = self._prompts.confirm(
|
235
|
+
"Would you like to validate the syntax and semantics of the transpiled queries?"
|
236
|
+
)
|
237
|
+
|
238
|
+
return TranspileConfig(
|
239
|
+
transpiler_config_path=str(transpiler_config_path) if transpiler_config_path is not None else None,
|
240
|
+
transpiler_options=transpiler_options,
|
241
|
+
source_dialect=source_dialect,
|
242
|
+
skip_validation=(not run_validation),
|
243
|
+
input_source=input_source,
|
244
|
+
output_folder=output_folder,
|
245
|
+
error_file_path=error_file_path,
|
246
|
+
)
|
247
|
+
|
248
|
+
def _prompt_for_transpiler_options(self, transpiler_name: str, source_dialect: str) -> dict[str, Any] | None:
|
249
|
+
config_options = self._transpiler_repository.transpiler_config_options(transpiler_name, source_dialect)
|
250
|
+
if len(config_options) == 0:
|
251
|
+
return None
|
252
|
+
return {option.flag: option.prompt_for_value(self._prompts) for option in config_options}
|
253
|
+
|
254
|
+
def _configure_catalog(self) -> str:
|
255
|
+
return self._resource_configurator.prompt_for_catalog_setup()
|
256
|
+
|
257
|
+
def _configure_schema(
|
258
|
+
self,
|
259
|
+
catalog: str,
|
260
|
+
default_schema_name: str,
|
261
|
+
) -> str:
|
262
|
+
return self._resource_configurator.prompt_for_schema_setup(
|
263
|
+
catalog,
|
264
|
+
default_schema_name,
|
265
|
+
)
|
266
|
+
|
267
|
+
def _configure_reconcile(self) -> ReconcileConfig:
|
268
|
+
try:
|
269
|
+
self._installation.load(ReconcileConfig)
|
270
|
+
logger.info("Lakebridge `reconcile` is already installed on this workspace.")
|
271
|
+
if not self._prompts.confirm("Do you want to override the existing installation?"):
|
272
|
+
# TODO: Exit gracefully, without raising SystemExit
|
273
|
+
raise SystemExit(
|
274
|
+
"Lakebridge `reconcile` is already installed and no override has been requested. Exiting..."
|
275
|
+
)
|
276
|
+
except NotFound:
|
277
|
+
logger.info("Couldn't find existing `reconcile` installation")
|
278
|
+
except (PermissionDenied, SerdeError, ValueError, AttributeError):
|
279
|
+
install_dir = self._installation.install_folder()
|
280
|
+
logger.warning(
|
281
|
+
f"Existing `reconcile` installation at {install_dir} is corrupted. Continuing new installation..."
|
282
|
+
)
|
283
|
+
|
284
|
+
config = self._configure_new_reconcile_installation()
|
285
|
+
logger.info("Finished configuring lakebridge `reconcile`.")
|
286
|
+
return config
|
287
|
+
|
288
|
+
def _configure_new_reconcile_installation(self) -> ReconcileConfig:
|
289
|
+
default_config = self._prompt_for_new_reconcile_installation()
|
290
|
+
self._save_config(default_config)
|
291
|
+
return default_config
|
292
|
+
|
293
|
+
def _prompt_for_new_reconcile_installation(self) -> ReconcileConfig:
|
294
|
+
logger.info("Please answer a few questions to configure lakebridge `reconcile`")
|
295
|
+
data_source = self._prompts.choice(
|
296
|
+
"Select the Data Source:", [source_type.value for source_type in ReconSourceType]
|
297
|
+
)
|
298
|
+
report_type = self._prompts.choice(
|
299
|
+
"Select the report type:", [report_type.value for report_type in ReconReportType]
|
300
|
+
)
|
301
|
+
scope_name = self._prompts.question(
|
302
|
+
f"Enter Secret scope name to store `{data_source.capitalize()}` connection details / secrets",
|
303
|
+
default=f"remorph_{data_source}",
|
304
|
+
)
|
305
|
+
|
306
|
+
db_config = self._prompt_for_reconcile_database_config(data_source)
|
307
|
+
metadata_config = self._prompt_for_reconcile_metadata_config()
|
308
|
+
|
309
|
+
return ReconcileConfig(
|
310
|
+
data_source=data_source,
|
311
|
+
report_type=report_type,
|
312
|
+
secret_scope=scope_name,
|
313
|
+
database_config=db_config,
|
314
|
+
metadata_config=metadata_config,
|
315
|
+
)
|
316
|
+
|
317
|
+
def _prompt_for_reconcile_database_config(self, source) -> DatabaseConfig:
|
318
|
+
source_catalog = None
|
319
|
+
if source == ReconSourceType.SNOWFLAKE.value:
|
320
|
+
source_catalog = self._prompts.question(f"Enter source catalog name for `{source.capitalize()}`")
|
321
|
+
|
322
|
+
schema_prompt = f"Enter source schema name for `{source.capitalize()}`"
|
323
|
+
if source in {ReconSourceType.ORACLE.value}:
|
324
|
+
schema_prompt = f"Enter source database name for `{source.capitalize()}`"
|
325
|
+
|
326
|
+
source_schema = self._prompts.question(schema_prompt)
|
327
|
+
target_catalog = self._prompts.question("Enter target catalog name for Databricks")
|
328
|
+
target_schema = self._prompts.question("Enter target schema name for Databricks")
|
329
|
+
|
330
|
+
return DatabaseConfig(
|
331
|
+
source_schema=source_schema,
|
332
|
+
target_catalog=target_catalog,
|
333
|
+
target_schema=target_schema,
|
334
|
+
source_catalog=source_catalog,
|
335
|
+
)
|
336
|
+
|
337
|
+
def _prompt_for_reconcile_metadata_config(self) -> ReconcileMetadataConfig:
|
338
|
+
logger.info("Configuring reconcile metadata.")
|
339
|
+
catalog = self._configure_catalog()
|
340
|
+
schema = self._configure_schema(
|
341
|
+
catalog,
|
342
|
+
"reconcile",
|
343
|
+
)
|
344
|
+
volume = self._configure_volume(catalog, schema, "reconcile_volume")
|
345
|
+
self._has_necessary_access(catalog, schema, volume)
|
346
|
+
return ReconcileMetadataConfig(catalog=catalog, schema=schema, volume=volume)
|
347
|
+
|
348
|
+
def _configure_volume(
|
349
|
+
self,
|
350
|
+
catalog: str,
|
351
|
+
schema: str,
|
352
|
+
default_volume_name: str,
|
353
|
+
) -> str:
|
354
|
+
return self._resource_configurator.prompt_for_volume_setup(
|
355
|
+
catalog,
|
356
|
+
schema,
|
357
|
+
default_volume_name,
|
358
|
+
)
|
359
|
+
|
360
|
+
def _save_config(self, config: TranspileConfig | ReconcileConfig):
|
361
|
+
logger.info(f"Saving configuration file {config.__file__}")
|
362
|
+
self._installation.save(config)
|
363
|
+
ws_file_url = self._installation.workspace_link(config.__file__)
|
364
|
+
if self._prompts.confirm(f"Open config file {ws_file_url} in the browser?"):
|
365
|
+
webbrowser.open(ws_file_url)
|
366
|
+
|
367
|
+
def _has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None = None):
|
368
|
+
self._resource_configurator.has_necessary_access(catalog_name, schema_name, volume_name)
|
369
|
+
|
370
|
+
|
371
|
+
def installer(ws: WorkspaceClient, transpiler_repository: TranspilerRepository) -> WorkspaceInstaller:
|
372
|
+
app_context = ApplicationContext(_verify_workspace_client(ws))
|
373
|
+
return WorkspaceInstaller(
|
374
|
+
app_context.workspace_client,
|
375
|
+
app_context.prompts,
|
376
|
+
app_context.installation,
|
377
|
+
app_context.install_state,
|
378
|
+
app_context.product_info,
|
379
|
+
app_context.resource_configurator,
|
380
|
+
app_context.workspace_installation,
|
381
|
+
transpiler_repository=transpiler_repository,
|
382
|
+
)
|
383
|
+
|
384
|
+
|
385
|
+
def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
|
386
|
+
"""Verifies the workspace client configuration, ensuring it has the correct product info."""
|
387
|
+
|
388
|
+
# Using reflection to set right value for _product_info for telemetry
|
389
|
+
product_info = getattr(ws.config, '_product_info')
|
390
|
+
if product_info[0] != "lakebridge":
|
391
|
+
setattr(ws.config, '_product_info', ('lakebridge', __version__))
|
392
|
+
|
393
|
+
return ws
|
@@ -29,6 +29,7 @@ class DataSource(ABC):
|
|
29
29
|
catalog: str | None,
|
30
30
|
schema: str,
|
31
31
|
table: str,
|
32
|
+
normalize: bool = True,
|
32
33
|
) -> list[Schema]:
|
33
34
|
return NotImplemented
|
34
35
|
|
@@ -42,16 +43,19 @@ class DataSource(ABC):
|
|
42
43
|
logger.warning(error_msg)
|
43
44
|
raise DataSourceRuntimeException(error_msg) from exception
|
44
45
|
|
45
|
-
def _map_meta_column(self, meta_column) -> Schema:
|
46
|
+
def _map_meta_column(self, meta_column, normalize: bool) -> Schema:
|
46
47
|
"""Create a normalized Schema DTO from the database metadata
|
47
48
|
|
48
49
|
Used in the implementations of get_schema to build a Schema DTO from the `INFORMATION_SCHEMA` query result.
|
49
50
|
The returned Schema is normalized in case the database is having columns with special characters and standardize
|
50
51
|
"""
|
51
|
-
name = meta_column.col_name
|
52
|
+
name = meta_column.col_name.lower()
|
52
53
|
dtype = meta_column.data_type.strip().lower()
|
53
|
-
|
54
|
-
|
54
|
+
if normalize:
|
55
|
+
normalized = self.normalize_identifier(name)
|
56
|
+
return Schema(normalized.ansi_normalized, dtype, normalized.ansi_normalized, normalized.source_normalized)
|
57
|
+
|
58
|
+
return Schema(name, dtype, name, name)
|
55
59
|
|
56
60
|
|
57
61
|
class MockDataSource(DataSource):
|
@@ -80,7 +84,7 @@ class MockDataSource(DataSource):
|
|
80
84
|
return self.log_and_throw_exception(self._exception, "data", f"({catalog}, {schema}, {query})")
|
81
85
|
return mock_df
|
82
86
|
|
83
|
-
def get_schema(self, catalog: str | None, schema: str, table: str) -> list[Schema]:
|
87
|
+
def get_schema(self, catalog: str | None, schema: str, table: str, normalize: bool = True) -> list[Schema]:
|
84
88
|
catalog_str = catalog if catalog else ""
|
85
89
|
mock_schema = self._schema_repository.get((catalog_str, schema, table))
|
86
90
|
if not mock_schema:
|