databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks/__init__.py +3 -0
  2. databricks/labs/__init__.py +3 -0
  3. databricks/labs/lakebridge/__about__.py +2 -0
  4. databricks/labs/lakebridge/__init__.py +11 -0
  5. databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  6. databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  7. databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  8. databricks/labs/lakebridge/base_install.py +12 -0
  9. databricks/labs/lakebridge/cli.py +449 -0
  10. databricks/labs/lakebridge/config.py +192 -0
  11. databricks/labs/lakebridge/connections/__init__.py +0 -0
  12. databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  13. databricks/labs/lakebridge/connections/database_manager.py +98 -0
  14. databricks/labs/lakebridge/connections/env_getter.py +13 -0
  15. databricks/labs/lakebridge/contexts/__init__.py +0 -0
  16. databricks/labs/lakebridge/contexts/application.py +133 -0
  17. databricks/labs/lakebridge/coverage/__init__.py +0 -0
  18. databricks/labs/lakebridge/coverage/commons.py +223 -0
  19. databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  20. databricks/labs/lakebridge/coverage/local_report.py +9 -0
  21. databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  22. databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  23. databricks/labs/lakebridge/deployment/__init__.py +0 -0
  24. databricks/labs/lakebridge/deployment/configurator.py +199 -0
  25. databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  26. databricks/labs/lakebridge/deployment/installation.py +125 -0
  27. databricks/labs/lakebridge/deployment/job.py +147 -0
  28. databricks/labs/lakebridge/deployment/recon.py +145 -0
  29. databricks/labs/lakebridge/deployment/table.py +30 -0
  30. databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  31. databricks/labs/lakebridge/discovery/table.py +36 -0
  32. databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  33. databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  34. databricks/labs/lakebridge/errors/exceptions.py +1 -0
  35. databricks/labs/lakebridge/helpers/__init__.py +0 -0
  36. databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  37. databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  38. databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  39. databricks/labs/lakebridge/helpers/metastore.py +164 -0
  40. databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  41. databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  42. databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  43. databricks/labs/lakebridge/helpers/validation.py +101 -0
  44. databricks/labs/lakebridge/install.py +849 -0
  45. databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  46. databricks/labs/lakebridge/intermediate/dag.py +88 -0
  47. databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  48. databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  49. databricks/labs/lakebridge/jvmproxy.py +56 -0
  50. databricks/labs/lakebridge/lineage.py +42 -0
  51. databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  52. databricks/labs/lakebridge/reconcile/compare.py +414 -0
  53. databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  54. databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  55. databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  56. databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  57. databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  58. databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  59. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  60. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  61. databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  62. databricks/labs/lakebridge/reconcile/constants.py +37 -0
  63. databricks/labs/lakebridge/reconcile/exception.py +42 -0
  64. databricks/labs/lakebridge/reconcile/execute.py +920 -0
  65. databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  66. databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  67. databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  68. databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  69. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  70. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  71. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  72. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  73. databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  74. databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  75. databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  76. databricks/labs/lakebridge/reconcile/runner.py +97 -0
  77. databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  78. databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  79. databricks/labs/lakebridge/resources/__init__.py +0 -0
  80. databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  81. databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  82. databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  83. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  84. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  85. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  86. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  87. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  88. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  89. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  90. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  91. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  92. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  93. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  94. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  95. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  96. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  97. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  98. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  99. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  100. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  101. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  102. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  103. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  104. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  105. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  106. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  107. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  108. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  109. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  110. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  111. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  112. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  113. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  114. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  115. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  116. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  117. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  118. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  119. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  120. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  121. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  122. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  123. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  124. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  125. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  126. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  127. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  128. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  129. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  130. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  131. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  132. databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  133. databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  134. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  135. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  136. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  137. databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  138. databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  139. databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  140. databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  141. databricks/labs/lakebridge/transpiler/execute.py +423 -0
  142. databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  143. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  144. databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  145. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  146. databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  147. databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  148. databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  149. databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  150. databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  151. databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  152. databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  153. databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  154. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  155. databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  156. databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  157. databricks/labs/lakebridge/uninstall.py +28 -0
  158. databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  159. databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  160. databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
  161. databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
  162. databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
  163. databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
  164. databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
  165. databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
  166. docs/lakebridge/src/components/Button.tsx +81 -0
  167. docs/lakebridge/src/css/custom.css +167 -0
  168. docs/lakebridge/src/css/table.css +20 -0
  169. docs/lakebridge/src/pages/index.tsx +57 -0
  170. docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  171. docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,199 @@
1
+ import logging
2
+ import time
3
+
4
+ from databricks.labs.blueprint.tui import Prompts
5
+ from databricks.sdk import WorkspaceClient
6
+ from databricks.sdk.service.catalog import Privilege, SecurableType
7
+ from databricks.sdk.service.sql import (
8
+ CreateWarehouseRequestWarehouseType,
9
+ EndpointInfoWarehouseType,
10
+ SpotInstancePolicy,
11
+ )
12
+
13
+ from databricks.labs.lakebridge.helpers.metastore import CatalogOperations
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class ResourceConfigurator:
19
+ """
20
+ Handles the setup of common Databricks resources like
21
+ catalogs, schemas, volumes, and warehouses used across remorph modules.
22
+ """
23
+
24
+ def __init__(self, ws: WorkspaceClient, prompts: Prompts, catalog_ops: CatalogOperations):
25
+ self._ws = ws
26
+ self._user = ws.current_user.me()
27
+ self._prompts = prompts
28
+ self._catalog_ops = catalog_ops
29
+
30
+ def prompt_for_catalog_setup(
31
+ self,
32
+ ) -> str:
33
+ catalog_name = self._prompts.question("Enter catalog name", default="remorph")
34
+ catalog = self._catalog_ops.get_catalog(catalog_name)
35
+ if catalog:
36
+ logger.info(f"Found existing catalog `{catalog_name}`")
37
+ return catalog_name
38
+ if self._prompts.confirm(f"Catalog `{catalog_name}` doesn't exist. Create it?"):
39
+ result = self._catalog_ops.create_catalog(catalog_name)
40
+ assert result.name is not None
41
+ return result.name
42
+ raise SystemExit("Cannot continue installation, without a valid catalog, Aborting the installation.")
43
+
44
+ def prompt_for_schema_setup(
45
+ self,
46
+ catalog: str,
47
+ default_schema_name: str,
48
+ ) -> str:
49
+ schema_name = self._prompts.question("Enter schema name", default=default_schema_name)
50
+ schema = self._catalog_ops.get_schema(catalog, schema_name)
51
+ if schema:
52
+ logger.info(f"Found existing schema `{schema_name}` in catalog `{catalog}`")
53
+ return schema_name
54
+ if self._prompts.confirm(f"Schema `{schema_name}` doesn't exist in catalog `{catalog}`. Create it?"):
55
+ result = self._catalog_ops.create_schema(schema_name, catalog)
56
+ assert result.name is not None
57
+ return result.name
58
+ raise SystemExit("Cannot continue installation, without a valid schema. Aborting the installation.")
59
+
60
+ def prompt_for_volume_setup(
61
+ self,
62
+ catalog: str,
63
+ schema: str,
64
+ default_volume_name: str,
65
+ ) -> str:
66
+ volume_name = self._prompts.question("Enter volume name", default=default_volume_name)
67
+ volume = self._catalog_ops.get_volume(catalog, schema, volume_name)
68
+ if volume:
69
+ logger.info(f"Found existing volume `{volume_name}` in catalog `{catalog}` and schema `{schema}`")
70
+ return volume_name
71
+ if self._prompts.confirm(
72
+ f"Volume `{volume_name}` doesn't exist in catalog `{catalog}` and schema `{schema}`. Create it?"
73
+ ):
74
+ result = self._catalog_ops.create_volume(catalog, schema, volume_name)
75
+ assert result.name is not None
76
+ return result.name
77
+ raise SystemExit("Cannot continue installation, without a valid volume. Aborting the installation.")
78
+
79
+ def prompt_for_warehouse_setup(self, warehouse_name_prefix: str) -> str:
80
+ def warehouse_type(_):
81
+ return _.warehouse_type.value if not _.enable_serverless_compute else "SERVERLESS"
82
+
83
+ pro_warehouses = {"[Create new PRO SQL warehouse]": "create_new"} | {
84
+ f"{_.name} ({_.id}, {warehouse_type(_)}, {_.state.value})": _.id
85
+ for _ in self._ws.warehouses.list()
86
+ if _.warehouse_type == EndpointInfoWarehouseType.PRO
87
+ }
88
+ warehouse_id = self._prompts.choice_from_dict(
89
+ "Select PRO or SERVERLESS SQL warehouse",
90
+ pro_warehouses,
91
+ )
92
+ if warehouse_id == "create_new":
93
+ new_warehouse = self._ws.warehouses.create(
94
+ name=f"{warehouse_name_prefix} {time.time_ns()}",
95
+ spot_instance_policy=SpotInstancePolicy.COST_OPTIMIZED,
96
+ warehouse_type=CreateWarehouseRequestWarehouseType.PRO,
97
+ cluster_size="Small",
98
+ max_num_clusters=1,
99
+ )
100
+ warehouse_id = new_warehouse.id
101
+ return warehouse_id
102
+
103
+ def has_necessary_catalog_access(
104
+ self, catalog_name: str, user_name: str, privilege_sets: tuple[set[Privilege], ...]
105
+ ):
106
+ catalog = self._catalog_ops.get_catalog(catalog_name)
107
+ assert catalog, f"Catalog not found {catalog_name}"
108
+ if self._catalog_ops.has_catalog_access(catalog, user_name, privilege_sets):
109
+ return True
110
+ missing_permissions = self._get_missing_permissions(
111
+ user_name, SecurableType.CATALOG, catalog.name, privilege_sets
112
+ )
113
+ logger.error(
114
+ f"User `{user_name}` doesn't have required privileges :: \n`{missing_permissions}`\n to access catalog `{catalog_name}` "
115
+ )
116
+ return False
117
+
118
+ def has_necessary_schema_access(
119
+ self, catalog_name: str, schema_name: str, user_name: str, privilege_sets: tuple[set[Privilege], ...]
120
+ ):
121
+ schema = self._catalog_ops.get_schema(catalog_name, schema_name)
122
+ assert schema, f"Schema not found {catalog_name}.{schema_name}"
123
+ if self._catalog_ops.has_schema_access(schema, user_name, privilege_sets):
124
+ return True
125
+ missing_permissions = self._get_missing_permissions(
126
+ user_name, SecurableType.SCHEMA, schema.full_name, privilege_sets
127
+ )
128
+ logger.error(
129
+ f"User `{user_name}` doesn't have required privileges :: \n`{missing_permissions}`\n to access schema `{schema.full_name}` "
130
+ )
131
+ return False
132
+
133
+ def has_necessary_volume_access(
134
+ self,
135
+ catalog_name: str,
136
+ schema_name: str,
137
+ volume_name: str,
138
+ user_name: str,
139
+ privilege_sets: tuple[set[Privilege], ...],
140
+ ):
141
+ volume = self._catalog_ops.get_volume(catalog_name, schema_name, volume_name)
142
+ assert volume, f"Volume not found {catalog_name}.{schema_name}.{volume_name}"
143
+ if self._catalog_ops.has_volume_access(volume, user_name, privilege_sets):
144
+ return True
145
+ missing_permissions = self._get_missing_permissions(
146
+ user_name, SecurableType.VOLUME, volume.full_name, privilege_sets
147
+ )
148
+ logger.error(
149
+ f"User `{user_name}` doesn't have required privileges :: \n`{missing_permissions}`\n to access volume `{volume.full_name}` "
150
+ )
151
+ return False
152
+
153
+ def _get_missing_permissions(
154
+ self,
155
+ user_name: str,
156
+ securable_type: SecurableType,
157
+ resource_name: str | None,
158
+ privilege_sets: tuple[set[Privilege], ...],
159
+ ):
160
+ assert resource_name, f"Catalog Resource name must be provided {resource_name}"
161
+ missing_permissions_list = []
162
+ for privilege_set in privilege_sets:
163
+ permissions = self._catalog_ops.has_privileges(user_name, securable_type, resource_name, privilege_set)
164
+ if not permissions:
165
+ missing_privileges = ", ".join([privilege.name for privilege in privilege_set])
166
+ missing_permissions_list.append(f" * '{missing_privileges}' ")
167
+
168
+ return " OR \n".join(missing_permissions_list)
169
+
170
+ def has_necessary_access(self, catalog_name: str, schema_name: str, volume_name: str | None):
171
+ catalog_required_privileges: tuple[set[Privilege], ...] = (
172
+ {Privilege.ALL_PRIVILEGES},
173
+ {Privilege.USE_CATALOG},
174
+ )
175
+ schema_required_privileges: tuple[set[Privilege], ...] = (
176
+ {Privilege.ALL_PRIVILEGES},
177
+ {Privilege.USE_SCHEMA, Privilege.MODIFY, Privilege.SELECT, Privilege.CREATE_VOLUME},
178
+ {Privilege.USE_SCHEMA, Privilege.MODIFY, Privilege.SELECT},
179
+ )
180
+ volume_required_privileges: tuple[set[Privilege], ...] = (
181
+ {Privilege.ALL_PRIVILEGES},
182
+ {Privilege.READ_VOLUME, Privilege.WRITE_VOLUME},
183
+ )
184
+
185
+ user_name = self._user.user_name
186
+ assert user_name is not None
187
+
188
+ catalog_access = self.has_necessary_catalog_access(catalog_name, user_name, catalog_required_privileges)
189
+ schema_access = self.has_necessary_schema_access(
190
+ catalog_name, schema_name, user_name, schema_required_privileges
191
+ )
192
+ required_access = catalog_access and schema_access
193
+ if volume_name:
194
+ volume_access = self.has_necessary_volume_access(
195
+ catalog_name, schema_name, volume_name, user_name, volume_required_privileges
196
+ )
197
+ required_access = required_access and volume_access
198
+ if not required_access:
199
+ raise SystemExit("Cannot continue installation, without necessary access. Aborting the installation.")
@@ -0,0 +1,140 @@
1
+ import logging
2
+ from datetime import timedelta
3
+ from pathlib import Path
4
+
5
+ from databricks.labs.blueprint.installation import Installation
6
+ from databricks.labs.blueprint.installer import InstallState
7
+ from databricks.labs.lsql.dashboards import DashboardMetadata, Dashboards
8
+ from databricks.sdk import WorkspaceClient
9
+ from databricks.sdk.errors import (
10
+ InvalidParameterValue,
11
+ NotFound,
12
+ DeadlineExceeded,
13
+ InternalError,
14
+ ResourceAlreadyExists,
15
+ )
16
+ from databricks.sdk.retries import retried
17
+ from databricks.sdk.service.dashboards import LifecycleState, Dashboard
18
+
19
+ from databricks.labs.lakebridge.config import ReconcileConfig, ReconcileMetadataConfig
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class DashboardDeployment:
25
+
26
+ def __init__(
27
+ self,
28
+ ws: WorkspaceClient,
29
+ installation: Installation,
30
+ install_state: InstallState,
31
+ ):
32
+ self._ws = ws
33
+ self._installation = installation
34
+ self._install_state = install_state
35
+
36
+ def deploy(
37
+ self,
38
+ folder: Path,
39
+ config: ReconcileConfig,
40
+ ):
41
+ """
42
+ Create dashboards from Dashboard metadata files.
43
+ The given folder is expected to contain subfolders each containing metadata for individual dashboards.
44
+
45
+ :param folder: Path to the base folder.
46
+ :param config: Configuration for reconciliation.
47
+ """
48
+ logger.info(f"Deploying dashboards from base folder {folder}")
49
+ parent_path = f"{self._installation.install_folder()}/dashboards"
50
+ try:
51
+ self._ws.workspace.mkdirs(parent_path)
52
+ except ResourceAlreadyExists:
53
+ logger.info(f"Dashboard parent path already exists: {parent_path}")
54
+
55
+ valid_dashboard_refs = set()
56
+ for dashboard_folder in folder.iterdir():
57
+ if not dashboard_folder.is_dir():
58
+ continue
59
+ valid_dashboard_refs.add(self._dashboard_reference(dashboard_folder))
60
+ dashboard = self._update_or_create_dashboard(dashboard_folder, parent_path, config.metadata_config)
61
+ logger.info(
62
+ f"Dashboard deployed with URL: {self._ws.config.host}/sql/dashboardsv3/{dashboard.dashboard_id}"
63
+ )
64
+ self._install_state.save()
65
+
66
+ self._remove_deprecated_dashboards(valid_dashboard_refs)
67
+
68
+ def _dashboard_reference(self, folder: Path) -> str:
69
+ return f"{folder.stem}".lower()
70
+
71
+ # InternalError and DeadlineExceeded are retried because of Lakeview internal issues
72
+ # These issues have been reported to and are resolved by the Lakeview team
73
+ # Keeping the retry for resilience
74
+ @retried(on=[InternalError, DeadlineExceeded], timeout=timedelta(minutes=3))
75
+ def _update_or_create_dashboard(
76
+ self,
77
+ folder: Path,
78
+ ws_parent_path: str,
79
+ config: ReconcileMetadataConfig,
80
+ ) -> Dashboard:
81
+ logging.info(f"Reading dashboard folder {folder}")
82
+ metadata = DashboardMetadata.from_path(folder).replace_database(
83
+ catalog=config.catalog,
84
+ catalog_to_replace="remorph",
85
+ database=config.schema,
86
+ database_to_replace="reconcile",
87
+ )
88
+
89
+ metadata.display_name = self._name_with_prefix(metadata.display_name)
90
+ reference = self._dashboard_reference(folder)
91
+ dashboard_id = self._install_state.dashboards.get(reference)
92
+ if dashboard_id is not None:
93
+ try:
94
+ dashboard_id = self._handle_existing_dashboard(dashboard_id, metadata.display_name)
95
+ except (NotFound, InvalidParameterValue):
96
+ logger.info(f"Recovering invalid dashboard: {metadata.display_name} ({dashboard_id})")
97
+ try:
98
+ dashboard_path = f"{ws_parent_path}/{metadata.display_name}.lvdash.json"
99
+ self._ws.workspace.delete(dashboard_path) # Cannot recreate dashboard if file still exists
100
+ logger.debug(
101
+ f"Deleted dangling dashboard {metadata.display_name} ({dashboard_id}): {dashboard_path}"
102
+ )
103
+ except NotFound:
104
+ pass
105
+ dashboard_id = None # Recreate the dashboard if it's reference is corrupted (manually)
106
+
107
+ dashboard = Dashboards(self._ws).create_dashboard(
108
+ metadata,
109
+ dashboard_id=dashboard_id,
110
+ parent_path=ws_parent_path,
111
+ warehouse_id=self._ws.config.warehouse_id,
112
+ publish=True,
113
+ )
114
+ assert dashboard.dashboard_id is not None
115
+ self._install_state.dashboards[reference] = dashboard.dashboard_id
116
+ return dashboard
117
+
118
+ def _name_with_prefix(self, name: str) -> str:
119
+ prefix = self._installation.product()
120
+ return f"{prefix.upper()}_{name}".replace(" ", "_")
121
+
122
+ def _handle_existing_dashboard(self, dashboard_id: str, display_name: str) -> str | None:
123
+ dashboard = self._ws.lakeview.get(dashboard_id)
124
+ if dashboard.lifecycle_state is None:
125
+ raise NotFound(f"Dashboard life cycle state: {display_name} ({dashboard_id})")
126
+ if dashboard.lifecycle_state == LifecycleState.TRASHED:
127
+ logger.info(f"Recreating trashed dashboard: {display_name} ({dashboard_id})")
128
+ return None # Recreate the dashboard if it is trashed (manually)
129
+ return dashboard_id # Update the existing dashboard
130
+
131
+ def _remove_deprecated_dashboards(self, valid_dashboard_refs: set[str]):
132
+ for ref, dashboard_id in self._install_state.dashboards.items():
133
+ if ref not in valid_dashboard_refs:
134
+ try:
135
+ logger.info(f"Removing dashboard_id={dashboard_id}, as it is no longer needed.")
136
+ del self._install_state.dashboards[ref]
137
+ self._ws.lakeview.trash(dashboard_id)
138
+ except (InvalidParameterValue, NotFound):
139
+ logger.warning(f"Dashboard `{dashboard_id}` doesn't exist anymore for some reason.")
140
+ continue
@@ -0,0 +1,125 @@
1
+ import logging
2
+ from ast import literal_eval
3
+ from pathlib import Path
4
+
5
+ from databricks.labs.blueprint.installation import Installation
6
+ from databricks.labs.blueprint.tui import Prompts
7
+ from databricks.labs.blueprint.upgrades import Upgrades
8
+ from databricks.labs.blueprint.wheels import ProductInfo, Version
9
+ from databricks.sdk import WorkspaceClient
10
+ from databricks.sdk.errors import NotFound
11
+ from databricks.sdk.mixins.compute import SemVer
12
+ from databricks.sdk.errors.platform import InvalidParameterValue, ResourceDoesNotExist
13
+
14
+ from databricks.labs.lakebridge.config import RemorphConfigs
15
+ from databricks.labs.lakebridge.deployment.recon import ReconDeployment
16
+
17
+ logger = logging.getLogger("databricks.labs.lakebridge.install")
18
+
19
+
20
+ class WorkspaceInstallation:
21
+ def __init__(
22
+ self,
23
+ ws: WorkspaceClient,
24
+ prompts: Prompts,
25
+ installation: Installation,
26
+ recon_deployment: ReconDeployment,
27
+ product_info: ProductInfo,
28
+ upgrades: Upgrades,
29
+ ):
30
+ self._ws = ws
31
+ self._prompts = prompts
32
+ self._installation = installation
33
+ self._recon_deployment = recon_deployment
34
+ self._product_info = product_info
35
+ self._upgrades = upgrades
36
+
37
+ def _get_local_version_file_path(self):
38
+ user_home = f"{Path(__file__).home()}"
39
+ return Path(f"{user_home}/.databricks/labs/{self._product_info.product_name()}/state/version.json")
40
+
41
+ def _get_local_version_file(self, file_path: Path):
42
+ data = None
43
+ with file_path.open("r") as f:
44
+ data = literal_eval(f.read())
45
+ assert data, "Unable to read local version file."
46
+ local_installed_version = data["version"]
47
+ try:
48
+ SemVer.parse(local_installed_version)
49
+ except ValueError:
50
+ logger.warning(f"{local_installed_version} is not a valid version.")
51
+ local_installed_version = "v0.3.0"
52
+ local_installed_date = data["date"]
53
+ logger.debug(f"Found local installation version: {local_installed_version} {local_installed_date}")
54
+ return Version(
55
+ version=local_installed_version,
56
+ date=local_installed_date,
57
+ wheel=f"databricks_labs_remorph-{local_installed_version}-py3-none-any.whl",
58
+ )
59
+
60
+ def _get_ws_version(self):
61
+ try:
62
+ return self._installation.load(Version)
63
+ except ResourceDoesNotExist as err:
64
+ logger.warning(f"Unable to get Workspace Version due to: {err}")
65
+ return None
66
+
67
+ def _apply_upgrades(self):
68
+ """
69
+ * If remote version doesn't exist and local version exists:
70
+ Upload Version file to workspace to handle previous installations.
71
+ * If remote version or local_version exists, then only apply upgrades.
72
+ * No need to apply upgrades for fresh installation.
73
+ """
74
+ ws_version = self._get_ws_version()
75
+ local_version_path = self._get_local_version_file_path()
76
+ local_version = local_version_path.exists()
77
+ if not ws_version and local_version:
78
+ self._installation.save(self._get_local_version_file(local_version_path))
79
+
80
+ if ws_version or local_version:
81
+ try:
82
+ self._upgrades.apply(self._ws)
83
+ logger.debug("Upgrades applied successfully.")
84
+ except (InvalidParameterValue, NotFound) as err:
85
+ logger.warning(f"Unable to apply Upgrades due to: {err}")
86
+
87
+ def _upload_wheel(self):
88
+ wheels = self._product_info.wheels(self._ws)
89
+ with wheels:
90
+ wheel_paths = [wheels.upload_to_wsfs()]
91
+ wheel_paths = [f"/Workspace{wheel}" for wheel in wheel_paths]
92
+ return wheel_paths
93
+
94
+ def install(self, config: RemorphConfigs):
95
+ self._apply_upgrades()
96
+ wheel_paths: list[str] = self._upload_wheel()
97
+ if config.reconcile:
98
+ logger.info("Installing Remorph reconcile Metadata components.")
99
+ self._recon_deployment.install(config.reconcile, wheel_paths)
100
+
101
+ def uninstall(self, config: RemorphConfigs):
102
+ # This will remove all the Remorph modules
103
+ if not self._prompts.confirm(
104
+ "Do you want to uninstall Remorph from the workspace too, this would "
105
+ "remove Remorph project folder, jobs, metadata and dashboards"
106
+ ):
107
+ return
108
+ logger.info(f"Uninstalling Remorph from {self._ws.config.host}.")
109
+ try:
110
+ self._installation.files()
111
+ except NotFound:
112
+ logger.error(f"Check if {self._installation.install_folder()} is present. Aborting uninstallation.")
113
+ return
114
+
115
+ if config.transpile:
116
+ logging.info(
117
+ f"Won't remove transpile validation schema `{config.transpile.schema_name}` "
118
+ f"from catalog `{config.transpile.catalog_name}`. Please remove it manually."
119
+ )
120
+
121
+ if config.reconcile:
122
+ self._recon_deployment.uninstall(config.reconcile)
123
+
124
+ self._installation.remove()
125
+ logger.info("Uninstallation completed successfully.")
@@ -0,0 +1,147 @@
1
+ import dataclasses
2
+ import logging
3
+ from datetime import datetime, timezone, timedelta
4
+ from typing import Any
5
+
6
+ from databricks.labs.blueprint.installation import Installation
7
+ from databricks.labs.blueprint.installer import InstallState
8
+ from databricks.labs.blueprint.wheels import ProductInfo
9
+ from databricks.sdk import WorkspaceClient
10
+ from databricks.sdk.errors import InvalidParameterValue
11
+ from databricks.sdk.service import compute
12
+ from databricks.sdk.service.jobs import Task, PythonWheelTask, JobCluster, JobSettings, JobParameterDefinition
13
+
14
+ from databricks.labs.lakebridge.config import ReconcileConfig
15
+ from databricks.labs.lakebridge.reconcile.constants import ReconSourceType
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ _TEST_JOBS_PURGE_TIMEOUT = timedelta(hours=1, minutes=15)
20
+
21
+
22
+ class JobDeployment:
23
+ def __init__(
24
+ self,
25
+ ws: WorkspaceClient,
26
+ installation: Installation,
27
+ install_state: InstallState,
28
+ product_info: ProductInfo,
29
+ ):
30
+ self._ws = ws
31
+ self._installation = installation
32
+ self._install_state = install_state
33
+ self._product_info = product_info
34
+
35
+ def deploy_recon_job(self, name, recon_config: ReconcileConfig, remorph_wheel_path: str):
36
+ logger.info("Deploying reconciliation job.")
37
+ job_id = self._update_or_create_recon_job(name, recon_config, remorph_wheel_path)
38
+ logger.info(f"Reconciliation job deployed with job_id={job_id}")
39
+ logger.info(f"Job URL: {self._ws.config.host}#job/{job_id}")
40
+ self._install_state.save()
41
+
42
+ def _update_or_create_recon_job(self, name, recon_config: ReconcileConfig, remorph_wheel_path: str) -> str:
43
+ description = "Run the reconciliation process"
44
+ task_key = "run_reconciliation"
45
+
46
+ job_settings = self._recon_job_settings(name, task_key, description, recon_config, remorph_wheel_path)
47
+ if name in self._install_state.jobs:
48
+ try:
49
+ job_id = int(self._install_state.jobs[name])
50
+ logger.info(f"Updating configuration for job `{name}`, job_id={job_id}")
51
+ self._ws.jobs.reset(job_id, JobSettings(**job_settings))
52
+ return str(job_id)
53
+ except InvalidParameterValue:
54
+ del self._install_state.jobs[name]
55
+ logger.warning(f"Job `{name}` does not exist anymore for some reason")
56
+ return self._update_or_create_recon_job(name, recon_config, remorph_wheel_path)
57
+
58
+ logger.info(f"Creating new job configuration for job `{name}`")
59
+ new_job = self._ws.jobs.create(**job_settings)
60
+ assert new_job.job_id is not None
61
+ self._install_state.jobs[name] = str(new_job.job_id)
62
+ return str(new_job.job_id)
63
+
64
+ def _recon_job_settings(
65
+ self,
66
+ job_name: str,
67
+ task_key: str,
68
+ description: str,
69
+ recon_config: ReconcileConfig,
70
+ remorph_wheel_path: str,
71
+ ) -> dict[str, Any]:
72
+ latest_lts_spark = self._ws.clusters.select_spark_version(latest=True, long_term_support=True)
73
+ version = self._product_info.version()
74
+ version = version if not self._ws.config.is_gcp else version.replace("+", "-")
75
+ tags = {"version": f"v{version}"}
76
+ if self._is_testing():
77
+ # Add RemoveAfter tag for test job cleanup
78
+ date_to_remove = self._get_test_purge_time()
79
+ tags.update({"RemoveAfter": date_to_remove})
80
+
81
+ return {
82
+ "name": self._name_with_prefix(job_name),
83
+ "tags": tags,
84
+ "job_clusters": [
85
+ JobCluster(
86
+ job_cluster_key="Remorph_Reconciliation_Cluster",
87
+ new_cluster=compute.ClusterSpec(
88
+ data_security_mode=compute.DataSecurityMode.USER_ISOLATION,
89
+ spark_conf={},
90
+ node_type_id=self._get_default_node_type_id(),
91
+ autoscale=compute.AutoScale(min_workers=2, max_workers=10),
92
+ spark_version=latest_lts_spark,
93
+ ),
94
+ )
95
+ ],
96
+ "tasks": [
97
+ self._job_recon_task(
98
+ Task(
99
+ task_key=task_key,
100
+ description=description,
101
+ job_cluster_key="Remorph_Reconciliation_Cluster",
102
+ ),
103
+ recon_config,
104
+ remorph_wheel_path,
105
+ ),
106
+ ],
107
+ "max_concurrent_runs": 2,
108
+ "parameters": [JobParameterDefinition(name="operation_name", default="reconcile")],
109
+ }
110
+
111
+ def _job_recon_task(self, jobs_task: Task, recon_config: ReconcileConfig, remorph_wheel_path: str) -> Task:
112
+ libraries = [
113
+ compute.Library(whl=remorph_wheel_path),
114
+ ]
115
+ source = recon_config.data_source
116
+ if source == ReconSourceType.ORACLE.value:
117
+ # TODO: Automatically fetch a version list for `ojdbc8`
118
+ oracle_driver_version = "23.4.0.24.05"
119
+ libraries.append(
120
+ compute.Library(
121
+ maven=compute.MavenLibrary(f"com.oracle.database.jdbc:ojdbc8:{oracle_driver_version}"),
122
+ ),
123
+ )
124
+
125
+ return dataclasses.replace(
126
+ jobs_task,
127
+ libraries=libraries,
128
+ python_wheel_task=PythonWheelTask(
129
+ package_name="databricks_labs_remorph",
130
+ entry_point="reconcile",
131
+ parameters=["{{job.parameters.[operation_name]}}"],
132
+ ),
133
+ )
134
+
135
+ def _is_testing(self):
136
+ return self._product_info.product_name() != "remorph"
137
+
138
+ @staticmethod
139
+ def _get_test_purge_time() -> str:
140
+ return (datetime.now(timezone.utc) + _TEST_JOBS_PURGE_TIMEOUT).strftime("%Y%m%d%H")
141
+
142
+ def _get_default_node_type_id(self) -> str:
143
+ return self._ws.clusters.select_node_type(local_disk=True, min_memory_gb=16)
144
+
145
+ def _name_with_prefix(self, name: str) -> str:
146
+ prefix = self._installation.product()
147
+ return f"{prefix.upper()}_{name}".replace(" ", "_")