databricks-labs-lakebridge 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks_labs_lakebridge-0.10.0/.gitignore +24 -0
  2. databricks_labs_lakebridge-0.10.0/LICENSE +69 -0
  3. databricks_labs_lakebridge-0.10.0/NOTICE +42 -0
  4. databricks_labs_lakebridge-0.10.0/PKG-INFO +58 -0
  5. databricks_labs_lakebridge-0.10.0/README.md +19 -0
  6. databricks_labs_lakebridge-0.10.0/databricks/__init__.py +3 -0
  7. databricks_labs_lakebridge-0.10.0/databricks/labs/__init__.py +3 -0
  8. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/__about__.py +2 -0
  9. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/__init__.py +11 -0
  10. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  11. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  12. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  13. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/base_install.py +12 -0
  14. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/cli.py +449 -0
  15. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/config.py +192 -0
  16. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/connections/__init__.py +0 -0
  17. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  18. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/connections/database_manager.py +98 -0
  19. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/connections/env_getter.py +13 -0
  20. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/contexts/__init__.py +0 -0
  21. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/contexts/application.py +133 -0
  22. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/__init__.py +0 -0
  23. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/commons.py +223 -0
  24. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  25. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/local_report.py +9 -0
  26. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  27. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  28. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/__init__.py +0 -0
  29. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/configurator.py +199 -0
  30. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  31. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/installation.py +125 -0
  32. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/job.py +147 -0
  33. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/recon.py +145 -0
  34. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/table.py +30 -0
  35. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  36. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/discovery/table.py +36 -0
  37. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  38. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  39. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/errors/exceptions.py +1 -0
  40. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/__init__.py +0 -0
  41. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  42. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  43. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  44. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/metastore.py +164 -0
  45. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  46. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  47. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  48. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/helpers/validation.py +101 -0
  49. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/install.py +849 -0
  50. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  51. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/intermediate/dag.py +88 -0
  52. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  53. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  54. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/jvmproxy.py +56 -0
  55. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/lineage.py +42 -0
  56. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  57. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/compare.py +414 -0
  58. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  59. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  60. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  61. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  62. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  63. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  64. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  65. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  66. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  67. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/constants.py +37 -0
  68. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/exception.py +42 -0
  69. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/execute.py +920 -0
  70. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  71. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  72. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  73. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  74. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  75. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  76. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  77. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  78. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  79. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  80. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  81. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/runner.py +97 -0
  82. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  83. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  84. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/__init__.py +0 -0
  85. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  86. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  87. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  88. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  89. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  90. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  91. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  92. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  93. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  94. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  95. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  96. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  97. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  98. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  99. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  100. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  101. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  102. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  103. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  104. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  105. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  106. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  107. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  108. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  109. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  110. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  111. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  112. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  113. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  114. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  115. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  116. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  117. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  118. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  119. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  120. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  121. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  122. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  123. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  124. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  125. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  126. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  127. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  128. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  129. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  130. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  131. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  132. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  133. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  134. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  135. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  136. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  137. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  138. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  139. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  140. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  141. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  142. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  143. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  144. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  145. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  146. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/execute.py +423 -0
  147. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  148. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  149. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  150. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  151. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  152. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  153. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  154. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  155. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  156. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  157. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  158. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  159. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  160. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  161. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  162. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/uninstall.py +28 -0
  163. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  164. databricks_labs_lakebridge-0.10.0/databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  165. databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/components/Button.tsx +81 -0
  166. databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/css/custom.css +167 -0
  167. databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/css/table.css +20 -0
  168. databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/pages/index.tsx +57 -0
  169. databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  170. databricks_labs_lakebridge-0.10.0/docs/lakebridge/src/theme/Layout/index.tsx +18 -0
  171. databricks_labs_lakebridge-0.10.0/pyproject.toml +768 -0
@@ -0,0 +1,24 @@
1
+ .venv
2
+ .python-version
3
+ .sdkmanrc
4
+ .DS_Store
5
+ *.pyc
6
+ __pycache__
7
+ dist
8
+ .idea
9
+ /htmlcov/
10
+ *.iml
11
+ target/
12
+ .coverage
13
+ .coverage.*
14
+ coverage.*
15
+ *.iws
16
+ /core/gen/
17
+ /antlrlinter/gen/
18
+ *.tokens
19
+ spark-warehouse/
20
+ remorph_transpile/
21
+ /linter/gen/
22
+ /linter/src/main/antlr4/library/gen/
23
+ .databricks-login.json
24
+ .mypy_cache
@@ -0,0 +1,69 @@
1
+ Databricks License
2
+ Copyright (2024) Databricks, Inc.
3
+
4
+ Definitions.
5
+
6
+ Agreement: The agreement between Databricks, Inc., and you governing
7
+ the use of the Databricks Services, as that term is defined in
8
+ the Master Cloud Services Agreement (MCSA) located at
9
+ www.databricks.com/legal/mcsa.
10
+
11
+ Licensed Materials: The source code, object code, data, and/or other
12
+ works to which this license applies.
13
+
14
+ Scope of Use. You may not use the Licensed Materials except in
15
+ connection with your use of the Databricks Services pursuant to
16
+ the Agreement. Your use of the Licensed Materials must comply at all
17
+ times with any restrictions applicable to the Databricks Services,
18
+ generally, and must be used in accordance with any applicable
19
+ documentation. You may view, use, copy, modify, publish, and/or
20
+ distribute the Licensed Materials solely for the purposes of using
21
+ the Licensed Materials within or connecting to the Databricks Services.
22
+ If you do not agree to these terms, you may not view, use, copy,
23
+ modify, publish, and/or distribute the Licensed Materials.
24
+
25
+ Redistribution. You may redistribute and sublicense the Licensed
26
+ Materials so long as all use is in compliance with these terms.
27
+ In addition:
28
+
29
+ - You must give any other recipients a copy of this License;
30
+ - You must cause any modified files to carry prominent notices
31
+ stating that you changed the files;
32
+ - You must retain, in any derivative works that you distribute,
33
+ all copyright, patent, trademark, and attribution notices,
34
+ excluding those notices that do not pertain to any part of
35
+ the derivative works; and
36
+ - If a "NOTICE" text file is provided as part of its
37
+ distribution, then any derivative works that you distribute
38
+ must include a readable copy of the attribution notices
39
+ contained within such NOTICE file, excluding those notices
40
+ that do not pertain to any part of the derivative works.
41
+
42
+ You may add your own copyright statement to your modifications and may
43
+ provide additional license terms and conditions for use, reproduction,
44
+ or distribution of your modifications, or for any such derivative works
45
+ as a whole, provided your use, reproduction, and distribution of
46
+ the Licensed Materials otherwise complies with the conditions stated
47
+ in this License.
48
+
49
+ Termination. This license terminates automatically upon your breach of
50
+ these terms or upon the termination of your Agreement. Additionally,
51
+ Databricks may terminate this license at any time on notice. Upon
52
+ termination, you must permanently delete the Licensed Materials and
53
+ all copies thereof.
54
+
55
+ DISCLAIMER; LIMITATION OF LIABILITY.
56
+
57
+ THE LICENSED MATERIALS ARE PROVIDED “AS-IS” AND WITH ALL FAULTS.
58
+ DATABRICKS, ON BEHALF OF ITSELF AND ITS LICENSORS, SPECIFICALLY
59
+ DISCLAIMS ALL WARRANTIES RELATING TO THE LICENSED MATERIALS, EXPRESS
60
+ AND IMPLIED, INCLUDING, WITHOUT LIMITATION, IMPLIED WARRANTIES,
61
+ CONDITIONS AND OTHER TERMS OF MERCHANTABILITY, SATISFACTORY QUALITY OR
62
+ FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. DATABRICKS AND
63
+ ITS LICENSORS TOTAL AGGREGATE LIABILITY RELATING TO OR ARISING OUT OF
64
+ YOUR USE OF OR DATABRICKS’ PROVISIONING OF THE LICENSED MATERIALS SHALL
65
+ BE LIMITED TO ONE THOUSAND ($1,000) DOLLARS. IN NO EVENT SHALL
66
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
67
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
68
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE LICENSED MATERIALS OR
69
+ THE USE OR OTHER DEALINGS IN THE LICENSED MATERIALS.
@@ -0,0 +1,42 @@
1
+ Copyright (2024) Databricks, Inc.
2
+
3
+ This software includes software developed at Databricks (https://www.databricks.com/) and its use is subject to the included LICENSE file.
4
+
5
+ ____________________
6
+ This software contains code from the following open source projects, licensed under the MIT license (https://opensource.org/license/mit):
7
+
8
+ SQL Glot - https://github.com/tobymao/sqlglot
9
+ Copyright 2023 Toby Mao
10
+
11
+ Sqlalchemy - https://github.com/sqlalchemy/sq
12
+ Copyright 2005-2025 SQLAlchemy authors and contributors <see AUTHORS file>.
13
+
14
+ Duckdb - https://github.com/duckdb/duckdb
15
+ Copyright 2018-2025 Stichting DuckDB Foundation
16
+
17
+ ____________________
18
+ This software contains code from the following open source projects, licensed under the Apache 2.0 license (https://www.apache.org/licenses/LICENSE-2.0):
19
+
20
+ Databricks SDK for Python - https://github.com/databricks/databricks-sdk-py
21
+ Copyright 2023 Databricks, Inc. All rights reserved.
22
+
23
+ cryptography - https://github.com/pyca/cryptography
24
+ Copyright 2013-2023 The Python Cryptographic Authority and individual contributors.
25
+
26
+ Pygls - https://github.com/openlawlibrary/pygls
27
+ Copyright pygls authors.
28
+
29
+ ____________________
30
+ This software contains code from the following open source projects, licensed under the Python Software license (https://opensource.org/license/python-2-0):
31
+
32
+ Standard-distutils - https://pypi.org/project/standard-distutils/
33
+ Copyright standard-distutils authors.
34
+
35
+ ____________________
36
+ This software contains code from the following publicly available projects, licensed under the Databricks (DB) license (https://www.databricks.com/legal/db-license):
37
+
38
+ Databricks Labs Blueprint - https://github.com/databrickslabs/blueprint
39
+ Copyright (2023) Databricks, Inc.
40
+
41
+ Databricks Labs lsql - databricks-labs-lsql
42
+ Copyright (2024) Databricks, Inc.
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 2.4
2
+ Name: databricks-labs-lakebridge
3
+ Version: 0.10.0
4
+ Summary: Fast and predictable migrations to Databricks Lakehouse Platform. This tool is designed to help you migrate your data and workloads to the Databricks Lakehouse Platform in a fast, predictable, and reliable way. It provides a set of tools and utilities to help you reconcile your data and workloads, assess your current state, and plan your migration.
5
+ Project-URL: Documentation, https://github.com/databrickslabs/lakebridge
6
+ Project-URL: Issues, https://github.com/databrickslabs/lakebridge/issues
7
+ Project-URL: Source, https://github.com/databrickslabs/lakebridge
8
+ Maintainer-email: Databricks Labs <labs-oss@databricks.com>
9
+ License-File: LICENSE
10
+ License-File: NOTICE
11
+ Keywords: Databricks
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Framework :: Pytest
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: System Administrators
17
+ Classifier: License :: Other/Proprietary License
18
+ Classifier: Operating System :: MacOS
19
+ Classifier: Operating System :: Microsoft :: Windows
20
+ Classifier: Programming Language :: Python
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: Implementation :: CPython
24
+ Classifier: Topic :: Software Development :: Libraries
25
+ Classifier: Topic :: Utilities
26
+ Requires-Python: >=3.10
27
+ Requires-Dist: cryptography<45.1.0,>=44.0.2
28
+ Requires-Dist: databricks-bb-analyzer~=0.1.6
29
+ Requires-Dist: databricks-labs-blueprint[yaml]<0.12.0,>=0.11.0
30
+ Requires-Dist: databricks-labs-lsql==0.16.0
31
+ Requires-Dist: databricks-sdk~=0.51.0
32
+ Requires-Dist: duckdb~=1.2.2
33
+ Requires-Dist: pygls~=2.0.0a2
34
+ Requires-Dist: pyodbc~=5.2.0
35
+ Requires-Dist: sqlalchemy~=2.0.40
36
+ Requires-Dist: sqlglot==26.1.3
37
+ Requires-Dist: standard-distutils~=3.11.9; python_version >= '3.11'
38
+ Description-Content-Type: text/markdown
39
+
40
+ Databricks Labs Lakebridge
41
+ ---
42
+ ![Databricks Labs Lakebridge White](/docs/lakebridge/static/img/lakebridge-lockup-white-background.svg)
43
+
44
+
45
+ [![build](https://github.com/databrickslabs/remorph/actions/workflows/push.yml/badge.svg)](https://github.com/databrickslabs/remorph/actions/workflows/push.yml)
46
+ ![PyPI - Downloads](https://img.shields.io/pypi/dm/databricks-labs-remorph?cacheSeconds=3600)
47
+
48
+ -----
49
+ Documentation
50
+ The complete documentation is available at: https://databrickslabs.github.io/lakebridge/
51
+
52
+ Contribution
53
+ Please see the contribution guidance here on how to contribute to the project (build, test, and submit a PR).
54
+
55
+ Project Support
56
+ Please note that this project is provided for your exploration only and is not formally supported by Databricks with Service Level Agreements (SLAs). They are provided AS-IS, and we do not make any guarantees. Please do not submit a support ticket relating to any issues arising from the use of this project.
57
+
58
+ Any issues discovered through the use of this project should be filed as GitHub Issues on this repository. They will be reviewed as time permits, but no formal SLAs for support exist.
@@ -0,0 +1,19 @@
1
+ Databricks Labs Lakebridge
2
+ ---
3
+ ![Databricks Labs Lakebridge White](/docs/lakebridge/static/img/lakebridge-lockup-white-background.svg)
4
+
5
+
6
+ [![build](https://github.com/databrickslabs/remorph/actions/workflows/push.yml/badge.svg)](https://github.com/databrickslabs/remorph/actions/workflows/push.yml)
7
+ ![PyPI - Downloads](https://img.shields.io/pypi/dm/databricks-labs-remorph?cacheSeconds=3600)
8
+
9
+ -----
10
+ Documentation
11
+ The complete documentation is available at: https://databrickslabs.github.io/lakebridge/
12
+
13
+ Contribution
14
+ Please see the contribution guidance here on how to contribute to the project (build, test, and submit a PR).
15
+
16
+ Project Support
17
+ Please note that this project is provided for your exploration only and is not formally supported by Databricks with Service Level Agreements (SLAs). They are provided AS-IS, and we do not make any guarantees. Please do not submit a support ticket relating to any issues arising from the use of this project.
18
+
19
+ Any issues discovered through the use of this project should be filed as GitHub Issues on this repository. They will be reviewed as time permits, but no formal SLAs for support exist.
@@ -0,0 +1,3 @@
1
+ # DO NOT ADD ANYTHING ELSE TO THIS FILE FOR COMPATIBILITY WITH OTHER databricks.* PACKAGES
2
+ # SEE https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages
3
+ __path__ = __import__("pkgutil").extend_path(__path__, __name__)
@@ -0,0 +1,3 @@
1
+ # DO NOT ADD ANYTHING ELSE TO THIS FILE FOR COMPATIBILITY WITH OTHER databricks.* PACKAGES
2
+ # SEE https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages
3
+ __path__ = __import__("pkgutil").extend_path(__path__, __name__)
@@ -0,0 +1,2 @@
1
+ # DO NOT MODIFY THIS FILE
2
+ __version__ = "0.10.0"
@@ -0,0 +1,11 @@
1
+ from databricks.sdk.core import with_user_agent_extra, with_product
2
+ from databricks.labs.blueprint.logger import install_logger
3
+ from databricks.labs.lakebridge.__about__ import __version__
4
+
5
+ install_logger()
6
+
7
+ # Add lakebridge/<version> for projects depending on lakebridge as a library
8
+ with_user_agent_extra("lakebridge", __version__)
9
+
10
+ # Add lakebridge/<version> for re-packaging of lakebridge, where product name is omitted
11
+ with_product("lakebridge", __version__)
@@ -0,0 +1,194 @@
1
+ from abc import ABC, abstractmethod
2
+ import logging
3
+ import shutil
4
+ import yaml
5
+
6
+ from databricks.labs.blueprint.tui import Prompts
7
+
8
+ from databricks.labs.lakebridge.connections.credential_manager import (
9
+ cred_file as creds,
10
+ CredentialManager,
11
+ create_credential_manager,
12
+ )
13
+ from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
14
+ from databricks.labs.lakebridge.connections.env_getter import EnvGetter
15
+
16
+ logger = logging.getLogger(__name__)
17
+ logger.setLevel(logging.INFO)
18
+
19
+ PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]
20
+
21
+
22
+ class AssessmentConfigurator(ABC):
23
+ """Abstract base class for assessment configuration."""
24
+
25
+ def __init__(self, product_name: str, prompts: Prompts, credential_file=None):
26
+ self.prompts = prompts
27
+ self._product_name = product_name
28
+ self._credential_file = creds(product_name) if not credential_file else credential_file
29
+
30
+ @abstractmethod
31
+ def _configure_credentials(self) -> str:
32
+ pass
33
+
34
+ @staticmethod
35
+ def _test_connection(source: str, cred_manager: CredentialManager):
36
+ config = cred_manager.get_credentials(source)
37
+
38
+ try:
39
+ db_manager = DatabaseManager(source, config)
40
+ if db_manager.check_connection():
41
+ logger.info("Connection to the source system successful")
42
+ else:
43
+ logger.error("Connection to the source system failed, check logs in debug mode")
44
+ raise SystemExit("Connection validation failed. Exiting...")
45
+
46
+ except ConnectionError as e:
47
+ logger.error(f"Failed to connect to the source system: {e}")
48
+ raise SystemExit("Connection validation failed. Exiting...") from e
49
+
50
+ def run(self):
51
+ """Run the assessment configuration process."""
52
+ logger.info(f"Welcome to the {self._product_name} Assessment Configuration")
53
+ source = self._configure_credentials()
54
+ logger.info(f"{source.capitalize()} details and credentials received.")
55
+ if self.prompts.confirm(f"Do you want to test the connection to {source}?"):
56
+ cred_manager = create_credential_manager("lakebridge", EnvGetter())
57
+ if cred_manager:
58
+ self._test_connection(source, cred_manager)
59
+ logger.info(f"{source.capitalize()} Assessment Configuration Completed")
60
+
61
+
62
+ class ConfigureSqlServerAssessment(AssessmentConfigurator):
63
+ """SQL Server specific assessment configuration."""
64
+
65
+ def _configure_credentials(self) -> str:
66
+ cred_file = self._credential_file
67
+ source = "mssql"
68
+
69
+ logger.info(
70
+ "\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
71
+ "from environment variables fall back to plain text if not variable is not found\n",
72
+ )
73
+ secret_vault_type = str(self.prompts.choice("Enter secret vault type (local | env)", ["local", "env"])).lower()
74
+ secret_vault_name = None
75
+
76
+ logger.info("Please refer to the documentation to understand the difference between local and env.")
77
+
78
+ credential = {
79
+ "secret_vault_type": secret_vault_type,
80
+ "secret_vault_name": secret_vault_name,
81
+ source: {
82
+ "database": self.prompts.question("Enter the database name"),
83
+ "driver": self.prompts.question("Enter the driver details"),
84
+ "server": self.prompts.question("Enter the server or host details"),
85
+ "port": int(self.prompts.question("Enter the port details", valid_number=True)),
86
+ "user": self.prompts.question("Enter the user details"),
87
+ "password": self.prompts.question("Enter the password details"),
88
+ },
89
+ }
90
+
91
+ if cred_file.exists():
92
+ backup_filename = cred_file.with_suffix('.bak')
93
+ shutil.copy(cred_file, backup_filename)
94
+ logger.debug(f"Backup of the existing file created at {backup_filename}")
95
+
96
+ with open(cred_file, 'w', encoding='utf-8') as file:
97
+ yaml.dump(credential, file, default_flow_style=False)
98
+
99
+ logger.info(f"Credential template created for {source}.")
100
+ return source
101
+
102
+
103
+ class ConfigureSynapseAssessment(AssessmentConfigurator):
104
+ """Synapse specific assessment configuration."""
105
+
106
+ def _configure_credentials(self) -> str:
107
+ cred_file = self._credential_file
108
+ source = "synapse"
109
+
110
+ logger.info(
111
+ "\n(local | env) \nlocal means values are read as plain text \nenv means values are read "
112
+ "from environment variables fall back to plain text if not variable is not found\n",
113
+ )
114
+ secret_vault_type = str(self.prompts.choice("Enter secret vault type (local | env)", ["local", "env"])).lower()
115
+ secret_vault_name = None
116
+
117
+ # Synapse Workspace Settings
118
+ logger.info("Please provide Synapse Workspace settings:")
119
+ synapse_workspace = {
120
+ "name": self.prompts.question("Enter Synapse workspace name"),
121
+ "dedicated_sql_endpoint": self.prompts.question("Enter dedicated SQL endpoint"),
122
+ "serverless_sql_endpoint": self.prompts.question("Enter serverless SQL endpoint"),
123
+ "sql_user": self.prompts.question("Enter SQL user"),
124
+ "sql_password": self.prompts.question("Enter SQL password"),
125
+ "tz_info": self.prompts.question("Enter timezone (e.g. America/New_York)", default="UTC"),
126
+ }
127
+
128
+ # Azure API Access Settings
129
+ logger.info("Please provide Azure API access settings:")
130
+ azure_api_access = {
131
+ "development_endpoint": self.prompts.question("Enter development endpoint"),
132
+ "azure_client_id": self.prompts.question("Enter Azure client ID"),
133
+ "azure_tenant_id": self.prompts.question("Enter Azure tenant ID"),
134
+ "azure_client_secret": self.prompts.question("Enter Azure client secret"),
135
+ }
136
+
137
+ # JDBC Settings
138
+ logger.info("Please select JDBC authentication type:")
139
+ auth_type = self.prompts.choice(
140
+ "Select authentication type", ["sql_authentication", "ad_passwd_authentication", "spn_authentication"]
141
+ )
142
+
143
+ synapse_jdbc = {
144
+ "auth_type": auth_type,
145
+ "fetch_size": self.prompts.question("Enter fetch size", default="1000"),
146
+ "login_timeout": self.prompts.question("Enter login timeout (seconds)", default="30"),
147
+ }
148
+
149
+ # Profiler Settings
150
+ logger.info("Please configure profiler settings:")
151
+ synapse_profiler = {
152
+ "exclude_serverless_sql_pool": self.prompts.confirm("Exclude serverless SQL pool from profiling?"),
153
+ "exclude_dedicated_sql_pools": self.prompts.confirm("Exclude dedicated SQL pools from profiling?"),
154
+ "exclude_spark_pools": self.prompts.confirm("Exclude Spark pools from profiling?"),
155
+ "exclude_monitoring_metrics": self.prompts.confirm("Exclude monitoring metrics from profiling?"),
156
+ "redact_sql_pools_sql_text": self.prompts.confirm("Redact SQL pools SQL text?"),
157
+ }
158
+
159
+ credential = {
160
+ "secret_vault_type": secret_vault_type,
161
+ "secret_vault_name": secret_vault_name,
162
+ source: {
163
+ "workspace": synapse_workspace,
164
+ "azure_api_access": azure_api_access,
165
+ "jdbc": synapse_jdbc,
166
+ "profiler": synapse_profiler,
167
+ },
168
+ }
169
+
170
+ if cred_file.exists():
171
+ backup_filename = cred_file.with_suffix('.bak')
172
+ shutil.copy(cred_file, backup_filename)
173
+ logger.debug(f"Backup of the existing file created at {backup_filename}")
174
+
175
+ with open(cred_file, 'w', encoding='utf-8') as file:
176
+ yaml.dump(credential, file, default_flow_style=False)
177
+
178
+ logger.info(f"Credential template created for {source}.")
179
+ return source
180
+
181
+
182
+ def create_assessment_configurator(
183
+ source_system: str, product_name: str, prompts: Prompts, credential_file=None
184
+ ) -> AssessmentConfigurator:
185
+ """Factory function to create the appropriate assessment configurator."""
186
+ configurators = {
187
+ "mssql": ConfigureSqlServerAssessment,
188
+ "synapse": ConfigureSynapseAssessment,
189
+ }
190
+
191
+ if source_system not in configurators:
192
+ raise ValueError(f"Unsupported source system: {source_system}")
193
+
194
+ return configurators[source_system](product_name, prompts, credential_file)
@@ -0,0 +1,188 @@
1
+ from pathlib import Path
2
+ from subprocess import run, CalledProcessError
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+
6
+ import venv
7
+ import tempfile
8
+ import json
9
+ import logging
10
+ import yaml
11
+ import duckdb
12
+
13
+ from databricks.labs.lakebridge.connections.credential_manager import cred_file
14
+
15
+ from databricks.labs.lakebridge.assessments.profiler_config import PipelineConfig, Step
16
+ from databricks.labs.lakebridge.connections.database_manager import DatabaseManager
17
+
18
+ logger = logging.getLogger(__name__)
19
+ logger.setLevel("INFO")
20
+
21
+ DB_NAME = "profiler_extract.db"
22
+
23
+
24
+ class StepExecutionStatus(str, Enum):
25
+ COMPLETE = "COMPLETE"
26
+ ERROR = "ERROR"
27
+ SKIPPED = "SKIPPED"
28
+
29
+
30
+ @dataclass
31
+ class StepExecutionResult:
32
+ step_name: str
33
+ status: StepExecutionStatus
34
+ error_message: str | None = None
35
+
36
+
37
+ class PipelineClass:
38
+ def __init__(self, config: PipelineConfig, executor: DatabaseManager):
39
+ self.config = config
40
+ self.executor = executor
41
+ self.db_path_prefix = Path(config.extract_folder)
42
+
43
+ def execute(self) -> list[StepExecutionResult]:
44
+ logging.info(f"Pipeline initialized with config: {self.config.name}, version: {self.config.version}")
45
+ execution_results: list[StepExecutionResult] = []
46
+ for step in self.config.steps:
47
+ result = self._process_step(step)
48
+ execution_results.append(result)
49
+ logging.info(f"Step '{step.name}' completed with status: {result.status}")
50
+
51
+ logging.info("Pipeline execution completed")
52
+ return execution_results
53
+
54
+ def _process_step(self, step: Step) -> StepExecutionResult:
55
+ if step.flag != "active":
56
+ logging.info(f"Skipping step: {step.name} as it is not active")
57
+ return StepExecutionResult(step_name=step.name, status=StepExecutionStatus.SKIPPED)
58
+
59
+ logging.debug(f"Executing step: {step.name}")
60
+ try:
61
+ status = self._execute_step(step)
62
+ return StepExecutionResult(step_name=step.name, status=status)
63
+ except RuntimeError as e:
64
+ return StepExecutionResult(step_name=step.name, status=StepExecutionStatus.ERROR, error_message=str(e))
65
+
66
+ def _execute_step(self, step: Step) -> StepExecutionStatus:
67
+ if step.type == "sql":
68
+ logging.info(f"Executing SQL step {step.name}")
69
+ self._execute_sql_step(step)
70
+ return StepExecutionStatus.COMPLETE
71
+ if step.type == "python":
72
+ logging.info(f"Executing Python step {step.name}")
73
+ self._execute_python_step(step)
74
+ return StepExecutionStatus.COMPLETE
75
+ logging.error(f"Unsupported step type: {step.type}")
76
+ raise RuntimeError(f"Unsupported step type: {step.type}")
77
+
78
+ def _execute_sql_step(self, step: Step):
79
+ logging.debug(f"Reading query from file: {step.extract_source}")
80
+ with open(step.extract_source, 'r', encoding='utf-8') as file:
81
+ query = file.read()
82
+
83
+ # Execute the query using the database manager
84
+ logging.info(f"Executing query: {query}")
85
+ try:
86
+ result = self.executor.execute_query(query)
87
+
88
+ # Save the result to duckdb
89
+ self._save_to_db(result, step.name, str(step.mode))
90
+ except Exception as e:
91
+ logging.error(f"SQL execution failed: {str(e)}")
92
+ raise RuntimeError(f"SQL execution failed: {str(e)}") from e
93
+
94
+ def _execute_python_step(self, step: Step):
95
+
96
+ logging.debug(f"Executing Python script: {step.extract_source}")
97
+ db_path = str(self.db_path_prefix / DB_NAME)
98
+ credential_config = str(cred_file("lakebridge"))
99
+
100
+ # Create a temporary directory for the virtual environment
101
+ with tempfile.TemporaryDirectory() as temp_dir:
102
+ venv_dir = Path(temp_dir) / "venv"
103
+ venv.create(venv_dir, with_pip=True)
104
+ venv_python = venv_dir / "bin" / "python"
105
+ venv_pip = venv_dir / "bin" / "pip"
106
+
107
+ logger.info(f"Creating a virtual environment for Python script execution: ${venv_dir}")
108
+ # Install dependencies in the virtual environment
109
+ if step.dependencies:
110
+ logging.info(f"Installing dependencies: {', '.join(step.dependencies)}")
111
+ try:
112
+ logging.debug("Upgrading local pip")
113
+ run([str(venv_pip), "install", "--upgrade", "pip"], check=True, capture_output=True, text=True)
114
+
115
+ run([str(venv_pip), "install", *step.dependencies], check=True, capture_output=True, text=True)
116
+ except CalledProcessError as e:
117
+ logging.error(f"Failed to install dependencies: {e.stderr}")
118
+ raise RuntimeError(f"Failed to install dependencies: {e.stderr}") from e
119
+
120
+ # Execute the Python script using the virtual environment's Python interpreter
121
+ try:
122
+ result = run(
123
+ [
124
+ str(venv_python),
125
+ str(step.extract_source),
126
+ "--db-path",
127
+ db_path,
128
+ "--credential-config-path",
129
+ credential_config,
130
+ ],
131
+ check=True,
132
+ capture_output=True,
133
+ text=True,
134
+ )
135
+
136
+ try:
137
+ output = json.loads(result.stdout)
138
+ if output["status"] == "success":
139
+ logging.info(f"Python script completed: {output['message']}")
140
+ else:
141
+ raise RuntimeError(f"Script reported error: {output['message']}")
142
+ except json.JSONDecodeError:
143
+ logging.info(f"Python script output: {result.stdout}")
144
+
145
+ except CalledProcessError as e:
146
+ error_msg = e.stderr
147
+ logging.error(f"Python script failed: {error_msg}")
148
+ raise RuntimeError(f"Script execution failed: {error_msg}") from e
149
+
150
+ def _save_to_db(self, result, step_name: str, mode: str, batch_size: int = 1000):
151
+ self._create_dir(self.db_path_prefix)
152
+ db_path = str(self.db_path_prefix / DB_NAME)
153
+
154
+ with duckdb.connect(db_path) as conn:
155
+ columns = result.keys()
156
+ # TODO: Add support for figuring out data types from SQLALCHEMY result object result.cursor.description is not reliable
157
+ schema = ' STRING, '.join(columns) + ' STRING'
158
+
159
+ # Handle write modes
160
+ if mode == 'overwrite':
161
+ conn.execute(f"CREATE OR REPLACE TABLE {step_name} ({schema})")
162
+ elif mode == 'append' and step_name not in conn.get_table_names(""):
163
+ conn.execute(f"CREATE TABLE {step_name} ({schema})")
164
+
165
+ # Batch insert using prepared statements
166
+ placeholders = ', '.join(['?' for _ in columns])
167
+ insert_query = f"INSERT INTO {step_name} VALUES ({placeholders})"
168
+
169
+ # Fetch and insert rows in batches
170
+ while True:
171
+ rows = result.fetchmany(batch_size)
172
+ if not rows:
173
+ break
174
+ conn.executemany(insert_query, rows)
175
+
176
+ @staticmethod
177
+ def _create_dir(dir_path: Path):
178
+ if not Path(dir_path).exists():
179
+ dir_path.mkdir(parents=True, exist_ok=True)
180
+
181
+ @staticmethod
182
+ def load_config_from_yaml(file_path: str) -> PipelineConfig:
183
+ with open(file_path, 'r', encoding='utf-8') as file:
184
+ data = yaml.safe_load(file)
185
+ steps = [Step(**step) for step in data['steps']]
186
+ return PipelineConfig(
187
+ name=data['name'], version=data['version'], extract_folder=data['extract_folder'], steps=steps
188
+ )
@@ -0,0 +1,30 @@
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ @dataclass
5
+ class Step:
6
+ name: str
7
+ type: str | None
8
+ extract_source: str
9
+ mode: str | None
10
+ frequency: str | None
11
+ flag: str | None
12
+ dependencies: list[str] = field(default_factory=list)
13
+ comment: str | None = None
14
+
15
+ def __post_init__(self):
16
+ if self.frequency is None:
17
+ self.frequency = "once"
18
+ if self.flag is None:
19
+ self.flag = "active"
20
+ if self.mode is None:
21
+ self.mode = "append"
22
+
23
+
24
+ @dataclass
25
+ class PipelineConfig:
26
+ name: str
27
+ version: str
28
+ extract_folder: str
29
+ comment: str | None = None
30
+ steps: list[Step] = field(default_factory=list)
@@ -0,0 +1,12 @@
1
+ from databricks.labs.blueprint.logger import install_logger
2
+ from databricks.labs.blueprint.entrypoint import get_logger
3
+ from databricks.sdk.core import with_user_agent_extra
4
+
5
+ install_logger()
6
+ with_user_agent_extra("cmd", "install")
7
+
8
+ if __name__ == "__main__":
9
+ logger = get_logger(__file__)
10
+ logger.setLevel("INFO")
11
+
12
+ logger.info("Successfully Setup Remorph Components Locally")