databricks-labs-lakebridge 0.10.7__tar.gz → 0.10.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/PKG-INFO +1 -1
  2. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/__about__.py +1 -1
  3. databricks_labs_lakebridge-0.10.8/databricks/labs/lakebridge/assessments/profiler_validator.py +103 -0
  4. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/base_install.py +1 -5
  5. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/cli.py +13 -6
  6. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/validation.py +5 -3
  7. databricks_labs_lakebridge-0.10.8/databricks/labs/lakebridge/install.py +393 -0
  8. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/data_source.py +9 -5
  9. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/databricks.py +2 -1
  10. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/oracle.py +2 -1
  11. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/secrets.py +19 -1
  12. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/snowflake.py +50 -29
  13. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/tsql.py +2 -1
  14. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/base.py +50 -11
  15. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +8 -2
  16. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +7 -13
  17. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +18 -19
  18. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +36 -15
  19. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/recon_config.py +0 -15
  20. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/reconciliation.py +4 -1
  21. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/trigger_recon_aggregate_service.py +11 -31
  22. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/trigger_recon_service.py +4 -1
  23. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/execute.py +34 -28
  24. databricks_labs_lakebridge-0.10.7/databricks/labs/lakebridge/install.py → databricks_labs_lakebridge-0.10.8/databricks/labs/lakebridge/transpiler/installers.py +66 -377
  25. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +2 -0
  26. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/pyproject.toml +2 -1
  27. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/.gitignore +0 -0
  28. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/LICENSE +0 -0
  29. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/NOTICE +0 -0
  30. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/README.md +0 -0
  31. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/__init__.py +0 -0
  32. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/__init__.py +0 -0
  33. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/__init__.py +0 -0
  34. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/analyzer/__init__.py +0 -0
  35. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/analyzer/lakebridge_analyzer.py +0 -0
  36. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/assessments/configure_assessment.py +0 -0
  37. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/assessments/pipeline.py +0 -0
  38. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/assessments/profiler_config.py +0 -0
  39. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/config.py +0 -0
  40. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/connections/__init__.py +0 -0
  41. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/connections/credential_manager.py +0 -0
  42. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/connections/database_manager.py +0 -0
  43. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/connections/env_getter.py +0 -0
  44. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/contexts/__init__.py +0 -0
  45. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/contexts/application.py +0 -0
  46. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/__init__.py +0 -0
  47. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/commons.py +0 -0
  48. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +0 -0
  49. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/local_report.py +0 -0
  50. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +0 -0
  51. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +0 -0
  52. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/__init__.py +0 -0
  53. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/configurator.py +0 -0
  54. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/dashboard.py +0 -0
  55. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/installation.py +0 -0
  56. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/job.py +0 -0
  57. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/recon.py +0 -0
  58. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/table.py +0 -0
  59. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/deployment/upgrade_common.py +0 -0
  60. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/discovery/table.py +0 -0
  61. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/discovery/table_definition.py +0 -0
  62. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/discovery/tsql_table_definition.py +0 -0
  63. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/errors/exceptions.py +0 -0
  64. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/__init__.py +0 -0
  65. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/db_sql.py +0 -0
  66. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/execution_time.py +0 -0
  67. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/file_utils.py +0 -0
  68. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/metastore.py +0 -0
  69. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/recon_config_utils.py +0 -0
  70. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/string_utils.py +0 -0
  71. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/helpers/telemetry_utils.py +0 -0
  72. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  73. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/intermediate/dag.py +0 -0
  74. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  75. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/intermediate/root_tables.py +0 -0
  76. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/jvmproxy.py +0 -0
  77. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/lineage.py +0 -0
  78. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  79. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/compare.py +0 -0
  80. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  81. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/dialect_utils.py +0 -0
  82. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +0 -0
  83. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/models.py +0 -0
  84. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +0 -0
  85. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/constants.py +0 -0
  86. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/exception.py +0 -0
  87. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/execute.py +0 -0
  88. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/normalize_recon_config_service.py +0 -0
  89. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  90. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +0 -0
  91. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/query_builder/count_query.py +0 -0
  92. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/recon_capture.py +0 -0
  93. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/recon_output_config.py +0 -0
  94. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/runner.py +0 -0
  95. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/sampler.py +0 -0
  96. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/schema_compare.py +0 -0
  97. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/reconcile/utils.py +0 -0
  98. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/__init__.py +0 -0
  99. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/config/credentials.yml +0 -0
  100. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  101. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  102. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +0 -0
  103. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +0 -0
  104. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +0 -0
  105. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +0 -0
  106. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +0 -0
  107. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +0 -0
  108. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +0 -0
  109. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +0 -0
  110. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +0 -0
  111. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +0 -0
  112. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +0 -0
  113. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +0 -0
  114. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +0 -0
  115. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +0 -0
  116. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +0 -0
  117. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +0 -0
  118. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +0 -0
  119. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +0 -0
  120. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +0 -0
  121. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +0 -0
  122. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +0 -0
  123. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +0 -0
  124. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +0 -0
  125. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +0 -0
  126. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +0 -0
  127. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +0 -0
  128. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +0 -0
  129. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +0 -0
  130. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +0 -0
  131. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +0 -0
  132. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +0 -0
  133. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +0 -0
  134. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +0 -0
  135. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +0 -0
  136. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +0 -0
  137. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +0 -0
  138. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +0 -0
  139. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +0 -0
  140. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +0 -0
  141. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +0 -0
  142. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +0 -0
  143. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +0 -0
  144. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +0 -0
  145. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +0 -0
  146. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +0 -0
  147. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +0 -0
  148. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +0 -0
  149. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +0 -0
  150. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +0 -0
  151. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  152. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  153. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +0 -0
  154. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +0 -0
  155. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +0 -0
  156. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +0 -0
  157. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +0 -0
  158. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +0 -0
  159. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  160. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  161. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/repository.py +0 -0
  162. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  163. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +0 -0
  164. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  165. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +0 -0
  166. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +0 -0
  167. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +0 -0
  168. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  169. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +0 -0
  170. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +0 -0
  171. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +0 -0
  172. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +0 -0
  173. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/transpile_engine.py +0 -0
  174. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/transpiler/transpile_status.py +0 -0
  175. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/uninstall.py +0 -0
  176. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +0 -0
  177. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +0 -0
  178. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/components/Button.tsx +0 -0
  179. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/components/ReconcileTabs.tsx +0 -0
  180. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/css/custom.css +0 -0
  181. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/css/table.css +0 -0
  182. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/pages/index.tsx +0 -0
  183. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/theme/DocSidebarItems/index.tsx +0 -0
  184. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/theme/Footer/index.tsx +0 -0
  185. {databricks_labs_lakebridge-0.10.7 → databricks_labs_lakebridge-0.10.8}/docs/lakebridge/src/theme/Layout/index.tsx +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: databricks-labs-lakebridge
3
- Version: 0.10.7
3
+ Version: 0.10.8
4
4
  Summary: Fast and predictable migrations to Databricks Lakehouse Platform. This tool is designed to help you migrate your data and workloads to the Databricks Lakehouse Platform in a fast, predictable, and reliable way. It provides a set of tools and utilities to help you reconcile your data and workloads, assess your current state, and plan your migration.
5
5
  Project-URL: Documentation, https://databrickslabs.github.io/lakebridge
6
6
  Project-URL: Issues, https://github.com/databrickslabs/lakebridge/issues
@@ -1,2 +1,2 @@
1
1
  # DO NOT MODIFY THIS FILE
2
- __version__ = "0.10.7"
2
+ __version__ = "0.10.8"
@@ -0,0 +1,103 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from duckdb import DuckDBPyConnection
4
+
5
+ from databricks.labs.lakebridge.assessments.pipeline import PipelineClass
6
+
7
+ PROFILER_DB_NAME = "profiler_extract.db"
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class ValidationOutcome:
12
+ """A data class that holds the outcome of a table validation check."""
13
+
14
+ table: str
15
+ column: str | None
16
+ strategy: str
17
+ outcome: str
18
+ severity: str
19
+
20
+
21
+ class ValidationStrategy:
22
+ """Abstract class for validating a Profiler table"""
23
+
24
+ def validate(self, connection: DuckDBPyConnection) -> ValidationOutcome:
25
+ raise NotImplementedError
26
+
27
+
28
+ class NullValidationCheck(ValidationStrategy):
29
+ """Concrete class for validating null values in a profiler table"""
30
+
31
+ def __init__(self, table, column, severity="WARN"):
32
+ self.name = self.__class__.__name__
33
+ self.table = table
34
+ self.column = column
35
+ self.severity = severity
36
+
37
+ def validate(self, connection: DuckDBPyConnection) -> ValidationOutcome:
38
+ """
39
+ Validates that a column does not contain null values.
40
+ input:
41
+ connection: a DuckDB connection object
42
+ """
43
+ result = connection.execute(f"SELECT COUNT(*) FROM {self.table} WHERE {self.column} IS NULL").fetchone()
44
+ if result:
45
+ row_count = result[0]
46
+ outcome = "FAIL" if row_count > 0 else "PASS"
47
+ else:
48
+ outcome = "FAIL"
49
+ return ValidationOutcome(self.table, self.column, self.name, outcome, self.severity)
50
+
51
+
52
+ class EmptyTableValidationCheck(ValidationStrategy):
53
+ """Concrete class for validating empty tables from a profiler run."""
54
+
55
+ def __init__(self, table, severity="WARN"):
56
+ self.name = self.__class__.__name__
57
+ self.table = table
58
+ self.severity = severity
59
+
60
+ def validate(self, connection) -> ValidationOutcome:
61
+ """Validates that a table is not empty.
62
+ input:
63
+ connection: a DuckDB connection object
64
+ returns:
65
+ a ValidationOutcome object
66
+ """
67
+ result = connection.execute(f"SELECT COUNT(*) FROM {self.table}").fetchone()
68
+ if result:
69
+ row_count = result[0]
70
+ outcome = "PASS" if row_count > 0 else "FAIL"
71
+ else:
72
+ outcome = "FAIL"
73
+ return ValidationOutcome(self.table, None, self.name, outcome, self.severity)
74
+
75
+
76
+ def get_profiler_extract_path(pipeline_config_path: str) -> str:
77
+ """
78
+ Returns the filesystem path of the profiler extract database.
79
+ input:
80
+ pipeline_config_path: the location of the pipeline definition .yml file
81
+ returns:
82
+ the filesystem path to the profiler extract database
83
+ """
84
+ pipeline_config = PipelineClass.load_config_from_yaml(pipeline_config_path)
85
+ normalized_db_path = os.path.normpath(pipeline_config.extract_folder)
86
+ database_path = f"{normalized_db_path}/{PROFILER_DB_NAME}"
87
+ return database_path
88
+
89
+
90
+ def build_validation_report(
91
+ validations: list[ValidationStrategy], connection: DuckDBPyConnection
92
+ ) -> list[ValidationOutcome]:
93
+ """
94
+ Builds a list of ValidationOutcomes from list of validation checks.
95
+ input:
96
+ validations: a list of ValidationStrategy objects
97
+ connection: a DuckDB connection object
98
+ returns: a list of ValidationOutcomes
99
+ """
100
+ validation_report = []
101
+ for validation in validations:
102
+ validation_report.append(validation.validate(connection))
103
+ return validation_report
@@ -19,11 +19,7 @@ def main() -> None:
19
19
  WorkspaceClient(product="lakebridge", product_version=__version__),
20
20
  transpiler_repository=TranspilerRepository.user_home(),
21
21
  )
22
- if installer.has_installed_transpilers():
23
- logger.warning(
24
- "Detected existing Lakebridge transpilers; run 'databricks labs lakebridge install-transpile' to upgrade them."
25
- )
26
- else:
22
+ if not installer.upgrade_installed_transpilers():
27
23
  logger.debug("No existing Lakebridge transpilers detected; assuming fresh installation.")
28
24
 
29
25
  logger.info("Successfully Setup Lakebridge Components Locally")
@@ -74,6 +74,7 @@ def _remove_warehouse(ws: WorkspaceClient, warehouse_id: str):
74
74
 
75
75
  @lakebridge.command
76
76
  def transpile(
77
+ *,
77
78
  w: WorkspaceClient,
78
79
  transpiler_config_path: str | None = None,
79
80
  source_dialect: str | None = None,
@@ -340,6 +341,8 @@ class _TranspileConfigChecker:
340
341
  supported_dialects = ", ".join(self._transpiler_repository.all_dialects())
341
342
  msg = f"{msg_prefix}: {source_dialect!r} (supported dialects: {supported_dialects})"
342
343
  raise_validation_exception(msg)
344
+ else:
345
+ self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
343
346
  else:
344
347
  # Check the source dialect against the engine.
345
348
  if source_dialect not in engine.supported_dialects:
@@ -366,6 +369,7 @@ class _TranspileConfigChecker:
366
369
  source_dialect = self._prompts.choice("Select the source dialect:", list(supported_dialects))
367
370
  engine = self._configure_transpiler_config_path(source_dialect)
368
371
  assert engine is not None, "No transpiler engine available for a supported dialect; configuration is invalid."
372
+ self._config = dataclasses.replace(self._config, source_dialect=source_dialect)
369
373
  return engine
370
374
 
371
375
  def _check_lsp_engine(self) -> TranspileEngine:
@@ -518,7 +522,7 @@ def _override_workspace_client_config(ctx: ApplicationContext, overrides: dict[s
518
522
 
519
523
 
520
524
  @lakebridge.command
521
- def reconcile(w: WorkspaceClient) -> None:
525
+ def reconcile(*, w: WorkspaceClient) -> None:
522
526
  """[EXPERIMENTAL] Reconciles source to Databricks datasets"""
523
527
  with_user_agent_extra("cmd", "execute-reconcile")
524
528
  ctx = ApplicationContext(w)
@@ -534,7 +538,7 @@ def reconcile(w: WorkspaceClient) -> None:
534
538
 
535
539
 
536
540
  @lakebridge.command
537
- def aggregates_reconcile(w: WorkspaceClient) -> None:
541
+ def aggregates_reconcile(*, w: WorkspaceClient) -> None:
538
542
  """[EXPERIMENTAL] Reconciles Aggregated source to Databricks datasets"""
539
543
  with_user_agent_extra("cmd", "execute-aggregates-reconcile")
540
544
  ctx = ApplicationContext(w)
@@ -552,8 +556,8 @@ def aggregates_reconcile(w: WorkspaceClient) -> None:
552
556
 
553
557
  @lakebridge.command
554
558
  def generate_lineage(
555
- w: WorkspaceClient,
556
559
  *,
560
+ w: WorkspaceClient,
557
561
  source_dialect: str | None = None,
558
562
  input_source: str,
559
563
  output_folder: str,
@@ -578,7 +582,7 @@ def generate_lineage(
578
582
 
579
583
 
580
584
  @lakebridge.command
581
- def configure_secrets(w: WorkspaceClient) -> None:
585
+ def configure_secrets(*, w: WorkspaceClient) -> None:
582
586
  """Setup reconciliation connection profile details as Secrets on Databricks Workspace"""
583
587
  recon_conf = ReconConfigPrompts(w)
584
588
 
@@ -604,8 +608,9 @@ def configure_database_profiler() -> None:
604
608
  assessment.run()
605
609
 
606
610
 
607
- @lakebridge.command()
611
+ @lakebridge.command
608
612
  def install_transpile(
613
+ *,
609
614
  w: WorkspaceClient,
610
615
  artifact: str | None = None,
611
616
  transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
@@ -622,6 +627,7 @@ def install_transpile(
622
627
 
623
628
  @lakebridge.command(is_unauthenticated=False)
624
629
  def configure_reconcile(
630
+ *,
625
631
  w: WorkspaceClient,
626
632
  transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
627
633
  ) -> None:
@@ -637,8 +643,9 @@ def configure_reconcile(
637
643
  reconcile_installer.run(module="reconcile")
638
644
 
639
645
 
640
- @lakebridge.command()
646
+ @lakebridge.command
641
647
  def analyze(
648
+ *,
642
649
  w: WorkspaceClient,
643
650
  source_directory: str | None = None,
644
651
  report_file: str | None = None,
@@ -37,19 +37,21 @@ class Validator:
37
37
  config.catalog_name,
38
38
  config.schema_name,
39
39
  )
40
+ # Some errors doesn't return the query test alon with the error message so need to handle those separately
41
+ static_errors_lkp = ["[UNRESOLVED_ROUTINE]", "[UNRESOLVED_COLUMN.WITHOUT_SUGGESTION]"]
40
42
  if is_valid:
41
43
  result = sql_text
42
44
  if exception_type is not None:
43
45
  exception_msg = f"[{exception_type.upper()}]: {exception_msg}"
44
46
  else:
45
47
  query = ""
46
- if "[UNRESOLVED_ROUTINE]" in str(exception_msg):
48
+ if any(err in str(exception_msg) for err in static_errors_lkp):
47
49
  query = sql_text
48
50
  buffer = StringIO()
49
51
  buffer.write("-------------- Exception Start-------------------\n")
50
- buffer.write("/* \n")
52
+ buffer.write("/*\n")
51
53
  buffer.write(str(exception_msg))
52
- buffer.write("\n */ \n")
54
+ buffer.write("\n*/\n")
53
55
  buffer.write(query)
54
56
  buffer.write("\n ---------------Exception End --------------------\n")
55
57
 
@@ -0,0 +1,393 @@
1
+ import dataclasses
2
+ import logging
3
+ import os
4
+ import webbrowser
5
+ from collections.abc import Set, Callable, Sequence
6
+ from pathlib import Path
7
+ from typing import Any, cast
8
+
9
+ from databricks.labs.blueprint.installation import Installation, JsonValue, SerdeError
10
+ from databricks.labs.blueprint.installer import InstallState
11
+ from databricks.labs.blueprint.tui import Prompts
12
+ from databricks.labs.blueprint.wheels import ProductInfo
13
+ from databricks.sdk import WorkspaceClient
14
+ from databricks.sdk.errors import NotFound, PermissionDenied
15
+
16
+ from databricks.labs.lakebridge.__about__ import __version__
17
+ from databricks.labs.lakebridge.config import (
18
+ DatabaseConfig,
19
+ ReconcileConfig,
20
+ LakebridgeConfiguration,
21
+ ReconcileMetadataConfig,
22
+ TranspileConfig,
23
+ )
24
+ from databricks.labs.lakebridge.contexts.application import ApplicationContext
25
+ from databricks.labs.lakebridge.deployment.configurator import ResourceConfigurator
26
+ from databricks.labs.lakebridge.deployment.installation import WorkspaceInstallation
27
+ from databricks.labs.lakebridge.reconcile.constants import ReconReportType, ReconSourceType
28
+ from databricks.labs.lakebridge.transpiler.installers import (
29
+ BladebridgeInstaller,
30
+ MorpheusInstaller,
31
+ TranspilerInstaller,
32
+ )
33
+ from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ TRANSPILER_WAREHOUSE_PREFIX = "Lakebridge Transpiler Validation"
38
+
39
+
40
+ class WorkspaceInstaller:
41
+ def __init__(
42
+ self,
43
+ ws: WorkspaceClient,
44
+ prompts: Prompts,
45
+ installation: Installation,
46
+ install_state: InstallState,
47
+ product_info: ProductInfo,
48
+ resource_configurator: ResourceConfigurator,
49
+ workspace_installation: WorkspaceInstallation,
50
+ environ: dict[str, str] | None = None,
51
+ *,
52
+ transpiler_repository: TranspilerRepository = TranspilerRepository.user_home(),
53
+ transpiler_installers: Sequence[Callable[[TranspilerRepository], TranspilerInstaller]] = (
54
+ BladebridgeInstaller,
55
+ MorpheusInstaller,
56
+ ),
57
+ ):
58
+ self._ws = ws
59
+ self._prompts = prompts
60
+ self._installation = installation
61
+ self._install_state = install_state
62
+ self._product_info = product_info
63
+ self._resource_configurator = resource_configurator
64
+ self._ws_installation = workspace_installation
65
+ self._transpiler_repository = transpiler_repository
66
+ self._transpiler_installer_factories = transpiler_installers
67
+
68
+ if not environ:
69
+ environ = dict(os.environ.items())
70
+
71
+ if "DATABRICKS_RUNTIME_VERSION" in environ:
72
+ msg = "WorkspaceInstaller is not supposed to be executed in Databricks Runtime"
73
+ raise SystemExit(msg)
74
+
75
+ @property
76
+ def _transpiler_installers(self) -> Set[TranspilerInstaller]:
77
+ return frozenset(factory(self._transpiler_repository) for factory in self._transpiler_installer_factories)
78
+
79
+ def run(
80
+ self, module: str, config: LakebridgeConfiguration | None = None, artifact: str | None = None
81
+ ) -> LakebridgeConfiguration:
82
+ logger.debug(f"Initializing workspace installation for module: {module} (config: {config})")
83
+ if module == "transpile" and artifact:
84
+ self._install_artifact(artifact)
85
+ elif module in {"transpile", "all"}:
86
+ for transpiler_installer in self._transpiler_installers:
87
+ transpiler_installer.install()
88
+ if not config:
89
+ config = self.configure(module)
90
+ if self._is_testing():
91
+ return config
92
+ self._ws_installation.install(config)
93
+ logger.info("Installation completed successfully! Please refer to the documentation for the next steps.")
94
+ return config
95
+
96
+ def upgrade_installed_transpilers(self) -> bool:
97
+ """Detect and upgrade, if possible and necessary, installed transpilers."""
98
+ installed_transpilers = self._transpiler_repository.all_transpiler_names()
99
+ if installed_transpilers:
100
+ logger.info(f"Detected installed transpilers: {sorted(installed_transpilers)}")
101
+ upgraded = False
102
+ for transpiler_installer in self._transpiler_installers:
103
+ name = transpiler_installer.name
104
+ if name in installed_transpilers:
105
+ logger.info(f"Checking for {name} upgrades...")
106
+ upgraded |= transpiler_installer.install()
107
+ # If we upgraded anything, the configuration process needs to run again.
108
+ if upgraded:
109
+ config = self.configure("transpile")
110
+ if not self._is_testing():
111
+ self._ws_installation.install(config)
112
+ return upgraded
113
+
114
+ def _install_artifact(self, artifact: str) -> None:
115
+ path = Path(artifact)
116
+ if not path.exists():
117
+ logger.error(f"Could not locate artifact {artifact}")
118
+ return
119
+ for transpiler_installer in self._transpiler_installers:
120
+ if transpiler_installer.can_install(path):
121
+ transpiler_installer.install(path)
122
+ break
123
+ else:
124
+ logger.fatal(f"Cannot install unsupported artifact: {artifact}")
125
+
126
+ def configure(self, module: str) -> LakebridgeConfiguration:
127
+ match module:
128
+ case "transpile":
129
+ logger.info("Configuring lakebridge `transpile`.")
130
+ return LakebridgeConfiguration(self._configure_transpile(), None)
131
+ case "reconcile":
132
+ logger.info("Configuring lakebridge `reconcile`.")
133
+ return LakebridgeConfiguration(None, self._configure_reconcile())
134
+ case "all":
135
+ logger.info("Configuring lakebridge `transpile` and `reconcile`.")
136
+ return LakebridgeConfiguration(
137
+ self._configure_transpile(),
138
+ self._configure_reconcile(),
139
+ )
140
+ case _:
141
+ raise ValueError(f"Invalid input: {module}")
142
+
143
+ def _is_testing(self):
144
+ return self._product_info.product_name() != "lakebridge"
145
+
146
+ def _configure_transpile(self) -> TranspileConfig:
147
+ try:
148
+ config = self._installation.load(TranspileConfig)
149
+ logger.info("Lakebridge `transpile` is already installed on this workspace.")
150
+ if not self._prompts.confirm("Do you want to override the existing installation?"):
151
+ return config
152
+ except NotFound:
153
+ logger.info("Couldn't find existing `transpile` installation")
154
+ except (PermissionDenied, SerdeError, ValueError, AttributeError):
155
+ install_dir = self._installation.install_folder()
156
+ logger.warning(
157
+ f"Existing `transpile` installation at {install_dir} is corrupted. Continuing new installation..."
158
+ )
159
+
160
+ config = self._configure_new_transpile_installation()
161
+ logger.info("Finished configuring lakebridge `transpile`.")
162
+ return config
163
+
164
+ def _configure_new_transpile_installation(self) -> TranspileConfig:
165
+ default_config = self._prompt_for_new_transpile_installation()
166
+ runtime_config = None
167
+ catalog_name = "remorph"
168
+ schema_name = "transpiler"
169
+ if not default_config.skip_validation:
170
+ catalog_name = self._configure_catalog()
171
+ schema_name = self._configure_schema(catalog_name, "transpile")
172
+ self._has_necessary_access(catalog_name, schema_name)
173
+ warehouse_id = self._resource_configurator.prompt_for_warehouse_setup(TRANSPILER_WAREHOUSE_PREFIX)
174
+ runtime_config = {"warehouse_id": warehouse_id}
175
+
176
+ config = dataclasses.replace(
177
+ default_config,
178
+ catalog_name=catalog_name,
179
+ schema_name=schema_name,
180
+ sdk_config=runtime_config,
181
+ )
182
+ self._save_config(config)
183
+ return config
184
+
185
+ def _all_installed_dialects(self) -> list[str]:
186
+ return sorted(self._transpiler_repository.all_dialects())
187
+
188
+ def _transpilers_with_dialect(self, dialect: str) -> list[str]:
189
+ return sorted(self._transpiler_repository.transpilers_with_dialect(dialect))
190
+
191
+ def _transpiler_config_path(self, transpiler: str) -> Path:
192
+ return self._transpiler_repository.transpiler_config_path(transpiler)
193
+
194
+ def _prompt_for_new_transpile_installation(self) -> TranspileConfig:
195
+ install_later = "Set it later"
196
+ # TODO tidy this up, logger might not display the below in console...
197
+ logger.info("Please answer a few questions to configure lakebridge `transpile`")
198
+ all_dialects = [install_later, *self._all_installed_dialects()]
199
+ source_dialect: str | None = self._prompts.choice("Select the source dialect:", all_dialects, sort=False)
200
+ if source_dialect == install_later:
201
+ source_dialect = None
202
+ transpiler_name: str | None = None
203
+ transpiler_config_path: Path | None = None
204
+ if source_dialect:
205
+ transpilers = self._transpilers_with_dialect(source_dialect)
206
+ if len(transpilers) > 1:
207
+ transpilers = [install_later] + transpilers
208
+ transpiler_name = self._prompts.choice("Select the transpiler:", transpilers, sort=False)
209
+ if transpiler_name == install_later:
210
+ transpiler_name = None
211
+ else:
212
+ transpiler_name = next(t for t in transpilers)
213
+ logger.info(f"Lakebridge will use the {transpiler_name} transpiler")
214
+ if transpiler_name:
215
+ transpiler_config_path = self._transpiler_config_path(transpiler_name)
216
+ transpiler_options: dict[str, JsonValue] | None = None
217
+ if transpiler_config_path:
218
+ transpiler_options = self._prompt_for_transpiler_options(
219
+ cast(str, transpiler_name), cast(str, source_dialect)
220
+ )
221
+ input_source: str | None = self._prompts.question(
222
+ "Enter input SQL path (directory/file)", default=install_later
223
+ )
224
+ if input_source == install_later:
225
+ input_source = None
226
+ output_folder = self._prompts.question("Enter output directory", default="transpiled")
227
+ # When defaults are passed along we need to use absolute paths to avoid issues with relative paths
228
+ if output_folder == "transpiled":
229
+ output_folder = str(Path.cwd() / "transpiled")
230
+ error_file_path = self._prompts.question("Enter error file path", default="errors.log")
231
+ if error_file_path == "errors.log":
232
+ error_file_path = str(Path.cwd() / "errors.log")
233
+
234
+ run_validation = self._prompts.confirm(
235
+ "Would you like to validate the syntax and semantics of the transpiled queries?"
236
+ )
237
+
238
+ return TranspileConfig(
239
+ transpiler_config_path=str(transpiler_config_path) if transpiler_config_path is not None else None,
240
+ transpiler_options=transpiler_options,
241
+ source_dialect=source_dialect,
242
+ skip_validation=(not run_validation),
243
+ input_source=input_source,
244
+ output_folder=output_folder,
245
+ error_file_path=error_file_path,
246
+ )
247
+
248
+ def _prompt_for_transpiler_options(self, transpiler_name: str, source_dialect: str) -> dict[str, Any] | None:
249
+ config_options = self._transpiler_repository.transpiler_config_options(transpiler_name, source_dialect)
250
+ if len(config_options) == 0:
251
+ return None
252
+ return {option.flag: option.prompt_for_value(self._prompts) for option in config_options}
253
+
254
+ def _configure_catalog(self) -> str:
255
+ return self._resource_configurator.prompt_for_catalog_setup()
256
+
257
+ def _configure_schema(
258
+ self,
259
+ catalog: str,
260
+ default_schema_name: str,
261
+ ) -> str:
262
+ return self._resource_configurator.prompt_for_schema_setup(
263
+ catalog,
264
+ default_schema_name,
265
+ )
266
+
267
+ def _configure_reconcile(self) -> ReconcileConfig:
268
+ try:
269
+ self._installation.load(ReconcileConfig)
270
+ logger.info("Lakebridge `reconcile` is already installed on this workspace.")
271
+ if not self._prompts.confirm("Do you want to override the existing installation?"):
272
+ # TODO: Exit gracefully, without raising SystemExit
273
+ raise SystemExit(
274
+ "Lakebridge `reconcile` is already installed and no override has been requested. Exiting..."
275
+ )
276
+ except NotFound:
277
+ logger.info("Couldn't find existing `reconcile` installation")
278
+ except (PermissionDenied, SerdeError, ValueError, AttributeError):
279
+ install_dir = self._installation.install_folder()
280
+ logger.warning(
281
+ f"Existing `reconcile` installation at {install_dir} is corrupted. Continuing new installation..."
282
+ )
283
+
284
+ config = self._configure_new_reconcile_installation()
285
+ logger.info("Finished configuring lakebridge `reconcile`.")
286
+ return config
287
+
288
+ def _configure_new_reconcile_installation(self) -> ReconcileConfig:
289
+ default_config = self._prompt_for_new_reconcile_installation()
290
+ self._save_config(default_config)
291
+ return default_config
292
+
293
+ def _prompt_for_new_reconcile_installation(self) -> ReconcileConfig:
294
+ logger.info("Please answer a few questions to configure lakebridge `reconcile`")
295
+ data_source = self._prompts.choice(
296
+ "Select the Data Source:", [source_type.value for source_type in ReconSourceType]
297
+ )
298
+ report_type = self._prompts.choice(
299
+ "Select the report type:", [report_type.value for report_type in ReconReportType]
300
+ )
301
+ scope_name = self._prompts.question(
302
+ f"Enter Secret scope name to store `{data_source.capitalize()}` connection details / secrets",
303
+ default=f"remorph_{data_source}",
304
+ )
305
+
306
+ db_config = self._prompt_for_reconcile_database_config(data_source)
307
+ metadata_config = self._prompt_for_reconcile_metadata_config()
308
+
309
+ return ReconcileConfig(
310
+ data_source=data_source,
311
+ report_type=report_type,
312
+ secret_scope=scope_name,
313
+ database_config=db_config,
314
+ metadata_config=metadata_config,
315
+ )
316
+
317
+ def _prompt_for_reconcile_database_config(self, source) -> DatabaseConfig:
318
+ source_catalog = None
319
+ if source == ReconSourceType.SNOWFLAKE.value:
320
+ source_catalog = self._prompts.question(f"Enter source catalog name for `{source.capitalize()}`")
321
+
322
+ schema_prompt = f"Enter source schema name for `{source.capitalize()}`"
323
+ if source in {ReconSourceType.ORACLE.value}:
324
+ schema_prompt = f"Enter source database name for `{source.capitalize()}`"
325
+
326
+ source_schema = self._prompts.question(schema_prompt)
327
+ target_catalog = self._prompts.question("Enter target catalog name for Databricks")
328
+ target_schema = self._prompts.question("Enter target schema name for Databricks")
329
+
330
+ return DatabaseConfig(
331
+ source_schema=source_schema,
332
+ target_catalog=target_catalog,
333
+ target_schema=target_schema,
334
+ source_catalog=source_catalog,
335
+ )
336
+
337
+ def _prompt_for_reconcile_metadata_config(self) -> ReconcileMetadataConfig:
338
+ logger.info("Configuring reconcile metadata.")
339
+ catalog = self._configure_catalog()
340
+ schema = self._configure_schema(
341
+ catalog,
342
+ "reconcile",
343
+ )
344
+ volume = self._configure_volume(catalog, schema, "reconcile_volume")
345
+ self._has_necessary_access(catalog, schema, volume)
346
+ return ReconcileMetadataConfig(catalog=catalog, schema=schema, volume=volume)
347
+
348
+ def _configure_volume(
349
+ self,
350
+ catalog: str,
351
+ schema: str,
352
+ default_volume_name: str,
353
+ ) -> str:
354
+ return self._resource_configurator.prompt_for_volume_setup(
355
+ catalog,
356
+ schema,
357
+ default_volume_name,
358
+ )
359
+
360
+ def _save_config(self, config: TranspileConfig | ReconcileConfig):
361
+ logger.info(f"Saving configuration file {config.__file__}")
362
+ self._installation.save(config)
363
+ ws_file_url = self._installation.workspace_link(config.__file__)
364
+ if self._prompts.confirm(f"Open config file {ws_file_url} in the browser?"):
365
+ webbrowser.open(ws_file_url)
366
+
367
+ def _has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None = None):
368
+ self._resource_configurator.has_necessary_access(catalog_name, schema_name, volume_name)
369
+
370
+
371
+ def installer(ws: WorkspaceClient, transpiler_repository: TranspilerRepository) -> WorkspaceInstaller:
372
+ app_context = ApplicationContext(_verify_workspace_client(ws))
373
+ return WorkspaceInstaller(
374
+ app_context.workspace_client,
375
+ app_context.prompts,
376
+ app_context.installation,
377
+ app_context.install_state,
378
+ app_context.product_info,
379
+ app_context.resource_configurator,
380
+ app_context.workspace_installation,
381
+ transpiler_repository=transpiler_repository,
382
+ )
383
+
384
+
385
+ def _verify_workspace_client(ws: WorkspaceClient) -> WorkspaceClient:
386
+ """Verifies the workspace client configuration, ensuring it has the correct product info."""
387
+
388
+ # Using reflection to set right value for _product_info for telemetry
389
+ product_info = getattr(ws.config, '_product_info')
390
+ if product_info[0] != "lakebridge":
391
+ setattr(ws.config, '_product_info', ('lakebridge', __version__))
392
+
393
+ return ws
@@ -29,6 +29,7 @@ class DataSource(ABC):
29
29
  catalog: str | None,
30
30
  schema: str,
31
31
  table: str,
32
+ normalize: bool = True,
32
33
  ) -> list[Schema]:
33
34
  return NotImplemented
34
35
 
@@ -42,16 +43,19 @@ class DataSource(ABC):
42
43
  logger.warning(error_msg)
43
44
  raise DataSourceRuntimeException(error_msg) from exception
44
45
 
45
- def _map_meta_column(self, meta_column) -> Schema:
46
+ def _map_meta_column(self, meta_column, normalize: bool) -> Schema:
46
47
  """Create a normalized Schema DTO from the database metadata
47
48
 
48
49
  Used in the implementations of get_schema to build a Schema DTO from the `INFORMATION_SCHEMA` query result.
49
50
  The returned Schema is normalized in case the database is having columns with special characters and standardize
50
51
  """
51
- name = meta_column.col_name
52
+ name = meta_column.col_name.lower()
52
53
  dtype = meta_column.data_type.strip().lower()
53
- normalized = self.normalize_identifier(name)
54
- return Schema(normalized.ansi_normalized, dtype, normalized.ansi_normalized, normalized.source_normalized)
54
+ if normalize:
55
+ normalized = self.normalize_identifier(name)
56
+ return Schema(normalized.ansi_normalized, dtype, normalized.ansi_normalized, normalized.source_normalized)
57
+
58
+ return Schema(name, dtype, name, name)
55
59
 
56
60
 
57
61
  class MockDataSource(DataSource):
@@ -80,7 +84,7 @@ class MockDataSource(DataSource):
80
84
  return self.log_and_throw_exception(self._exception, "data", f"({catalog}, {schema}, {query})")
81
85
  return mock_df
82
86
 
83
- def get_schema(self, catalog: str | None, schema: str, table: str) -> list[Schema]:
87
+ def get_schema(self, catalog: str | None, schema: str, table: str, normalize: bool = True) -> list[Schema]:
84
88
  catalog_str = catalog if catalog else ""
85
89
  mock_schema = self._schema_repository.get((catalog_str, schema, table))
86
90
  if not mock_schema: