databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks/__init__.py +3 -0
  2. databricks/labs/__init__.py +3 -0
  3. databricks/labs/lakebridge/__about__.py +2 -0
  4. databricks/labs/lakebridge/__init__.py +11 -0
  5. databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  6. databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  7. databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  8. databricks/labs/lakebridge/base_install.py +12 -0
  9. databricks/labs/lakebridge/cli.py +449 -0
  10. databricks/labs/lakebridge/config.py +192 -0
  11. databricks/labs/lakebridge/connections/__init__.py +0 -0
  12. databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  13. databricks/labs/lakebridge/connections/database_manager.py +98 -0
  14. databricks/labs/lakebridge/connections/env_getter.py +13 -0
  15. databricks/labs/lakebridge/contexts/__init__.py +0 -0
  16. databricks/labs/lakebridge/contexts/application.py +133 -0
  17. databricks/labs/lakebridge/coverage/__init__.py +0 -0
  18. databricks/labs/lakebridge/coverage/commons.py +223 -0
  19. databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  20. databricks/labs/lakebridge/coverage/local_report.py +9 -0
  21. databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  22. databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  23. databricks/labs/lakebridge/deployment/__init__.py +0 -0
  24. databricks/labs/lakebridge/deployment/configurator.py +199 -0
  25. databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  26. databricks/labs/lakebridge/deployment/installation.py +125 -0
  27. databricks/labs/lakebridge/deployment/job.py +147 -0
  28. databricks/labs/lakebridge/deployment/recon.py +145 -0
  29. databricks/labs/lakebridge/deployment/table.py +30 -0
  30. databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  31. databricks/labs/lakebridge/discovery/table.py +36 -0
  32. databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  33. databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  34. databricks/labs/lakebridge/errors/exceptions.py +1 -0
  35. databricks/labs/lakebridge/helpers/__init__.py +0 -0
  36. databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  37. databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  38. databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  39. databricks/labs/lakebridge/helpers/metastore.py +164 -0
  40. databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  41. databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  42. databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  43. databricks/labs/lakebridge/helpers/validation.py +101 -0
  44. databricks/labs/lakebridge/install.py +849 -0
  45. databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  46. databricks/labs/lakebridge/intermediate/dag.py +88 -0
  47. databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  48. databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  49. databricks/labs/lakebridge/jvmproxy.py +56 -0
  50. databricks/labs/lakebridge/lineage.py +42 -0
  51. databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  52. databricks/labs/lakebridge/reconcile/compare.py +414 -0
  53. databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  54. databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  55. databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  56. databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  57. databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  58. databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  59. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  60. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  61. databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  62. databricks/labs/lakebridge/reconcile/constants.py +37 -0
  63. databricks/labs/lakebridge/reconcile/exception.py +42 -0
  64. databricks/labs/lakebridge/reconcile/execute.py +920 -0
  65. databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  66. databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  67. databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  68. databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  69. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  70. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  71. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  72. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  73. databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  74. databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  75. databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  76. databricks/labs/lakebridge/reconcile/runner.py +97 -0
  77. databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  78. databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  79. databricks/labs/lakebridge/resources/__init__.py +0 -0
  80. databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  81. databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  82. databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  83. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  84. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  85. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  86. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  87. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  88. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  89. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  90. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  91. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  92. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  93. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  94. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  95. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  96. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  97. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  98. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  99. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  100. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  101. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  102. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  103. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  104. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  105. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  106. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  107. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  108. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  109. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  110. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  111. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  112. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  113. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  114. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  115. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  116. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  117. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  118. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  119. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  120. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  121. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  122. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  123. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  124. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  125. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  126. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  127. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  128. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  129. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  130. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  131. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  132. databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  133. databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  134. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  135. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  136. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  137. databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  138. databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  139. databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  140. databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  141. databricks/labs/lakebridge/transpiler/execute.py +423 -0
  142. databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  143. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  144. databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  145. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  146. databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  147. databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  148. databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  149. databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  150. databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  151. databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  152. databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  153. databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  154. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  155. databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  156. databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  157. databricks/labs/lakebridge/uninstall.py +28 -0
  158. databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  159. databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  160. databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
  161. databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
  162. databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
  163. databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
  164. databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
  165. databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
  166. docs/lakebridge/src/components/Button.tsx +81 -0
  167. docs/lakebridge/src/css/custom.css +167 -0
  168. docs/lakebridge/src/css/table.css +20 -0
  169. docs/lakebridge/src/pages/index.tsx +57 -0
  170. docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  171. docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,164 @@
1
+ import functools
2
+ import logging
3
+ from itertools import chain
4
+
5
+ from databricks.sdk import WorkspaceClient
6
+ from databricks.sdk.errors import NotFound
7
+ from databricks.sdk.service.catalog import (
8
+ CatalogInfo,
9
+ Privilege,
10
+ SchemaInfo,
11
+ SecurableType,
12
+ VolumeInfo,
13
+ VolumeType,
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class CatalogOperations:
20
+ def __init__(self, ws: WorkspaceClient):
21
+ self._ws = ws
22
+
23
+ def get_catalog(self, name: str) -> CatalogInfo | None:
24
+ try:
25
+ return self._ws.catalogs.get(name)
26
+ except NotFound:
27
+ return None
28
+
29
+ def get_schema(self, catalog_name: str, schema_name: str) -> SchemaInfo | None:
30
+ try:
31
+ return self._ws.schemas.get(f"{catalog_name}.{schema_name}")
32
+ except NotFound:
33
+ return None
34
+
35
+ def get_volume(self, catalog: str, schema: str, name: str) -> VolumeInfo | None:
36
+ try:
37
+ return self._ws.volumes.read(f"{catalog}.{schema}.{name}")
38
+ except NotFound:
39
+ return None
40
+
41
+ def create_catalog(self, name: str) -> CatalogInfo:
42
+ logger.debug(f"Creating catalog `{name}`.")
43
+ catalog_info = self._ws.catalogs.create(name)
44
+ logger.info(f"Created catalog `{name}`.")
45
+ return catalog_info
46
+
47
+ def create_schema(self, schema_name: str, catalog_name: str) -> SchemaInfo:
48
+ logger.debug(f"Creating schema `{schema_name}` in catalog `{catalog_name}`.")
49
+ schema_info = self._ws.schemas.create(schema_name, catalog_name)
50
+ logger.info(f"Created schema `{schema_name}` in catalog `{catalog_name}`.")
51
+ return schema_info
52
+
53
+ def create_volume(
54
+ self,
55
+ catalog: str,
56
+ schema: str,
57
+ name: str,
58
+ volume_type: VolumeType = VolumeType.MANAGED,
59
+ ) -> VolumeInfo:
60
+ logger.debug(f"Creating volume `{name}` in catalog `{catalog}` and schema `{schema}`")
61
+ volume_info = self._ws.volumes.create(catalog, schema, name, volume_type)
62
+ logger.info(f"Created volume `{name}` in catalog `{catalog}` and schema `{schema}`")
63
+ return volume_info
64
+
65
+ def has_catalog_access(
66
+ self,
67
+ catalog: CatalogInfo,
68
+ user_name: str,
69
+ privilege_sets: tuple[set[Privilege], ...],
70
+ ) -> bool:
71
+ """
72
+ Check if a user has access to a catalog based on ownership or a set of privileges.
73
+ :param catalog: A catalog to check access for.
74
+ :param user_name: Username to check.
75
+ :param privilege_sets: A tuple of sets, where each set contains Privilege objects.
76
+ The function checks if the user has any of these sets of privileges. For example:
77
+ ({Privilege.ALL_PRIVILEGES}, {Privilege.USE_CATALOG, Privilege.APPLY_TAG})
78
+ In this case, the user would need either ALL_PRIVILEGES,
79
+ or both USE_CATALOG and APPLY_TAG.
80
+ """
81
+ if user_name == catalog.owner:
82
+ return True
83
+
84
+ return any(
85
+ self.has_privileges(user_name, SecurableType.CATALOG, catalog.name, privilege_set)
86
+ for privilege_set in privilege_sets
87
+ )
88
+
89
+ def has_schema_access(
90
+ self,
91
+ schema: SchemaInfo,
92
+ user_name: str,
93
+ privilege_sets: tuple[set[Privilege], ...],
94
+ ) -> bool:
95
+ """
96
+ Check if a user has access to a schema based on ownership or a set of privileges.
97
+ :param schema: A schema to check access for.
98
+ :param user_name: Username to check.
99
+ :param privilege_sets: The function checks if the user has any of these sets of privileges. For example:
100
+ ({Privilege.ALL_PRIVILEGES}, {Privilege.USE_SCHEMA, Privilege.CREATE_TABLE})
101
+ In this case, the user would need either ALL_PRIVILEGES,
102
+ or both USE_SCHEMA and CREATE_TABLE.
103
+ """
104
+ if user_name == schema.owner:
105
+ return True
106
+
107
+ return any(
108
+ self.has_privileges(user_name, SecurableType.SCHEMA, schema.full_name, privilege_set)
109
+ for privilege_set in privilege_sets
110
+ )
111
+
112
+ def has_volume_access(
113
+ self,
114
+ volume: VolumeInfo,
115
+ user_name: str,
116
+ privilege_sets: tuple[set[Privilege], ...],
117
+ ) -> bool:
118
+ """
119
+ Check if a user has access to a volume based on ownership or a set of privileges.
120
+ :param volume: A volume to check access for.
121
+ :param user_name: Username to check.
122
+ :param privilege_sets: The function checks if the user has any of these sets of privileges. For example:
123
+ ({Privilege.ALL_PRIVILEGES}, {Privilege.READ_VOLUME, Privilege.WRITE_VOLUME})
124
+ In this case, the user would need either ALL_PRIVILEGES,
125
+ or both READ_VOLUME and WRITE_VOLUME.
126
+ """
127
+ if user_name == volume.owner:
128
+ return True
129
+
130
+ return any(
131
+ self.has_privileges(user_name, SecurableType.VOLUME, volume.full_name, privilege_set)
132
+ for privilege_set in privilege_sets
133
+ )
134
+
135
+ def has_privileges(
136
+ self,
137
+ user: str | None,
138
+ securable_type: SecurableType,
139
+ full_name: str | None,
140
+ privileges: set[Privilege],
141
+ ) -> bool:
142
+ """
143
+ Check if a user has a set of privileges for a securable object.
144
+ """
145
+ assert user, "User must be provided"
146
+ assert full_name, "Full name must be provided"
147
+ user_privileges = self._get_user_privileges(user, securable_type, full_name)
148
+ result = privileges.issubset(user_privileges)
149
+ if not result:
150
+ logger.debug(f"User {user} doesn't have privilege set {privileges} for {securable_type} {full_name}")
151
+ return result
152
+
153
+ @functools.lru_cache(maxsize=1024)
154
+ def _get_user_privileges(self, user: str, securable_type: SecurableType, full_name: str) -> set[Privilege]:
155
+ permissions = self._ws.grants.get_effective(securable_type, full_name, principal=user)
156
+ if not permissions or not permissions.privilege_assignments:
157
+ return set()
158
+ return {
159
+ p.privilege
160
+ for p in chain.from_iterable(
161
+ privilege.privileges for privilege in permissions.privilege_assignments if privilege.privileges
162
+ )
163
+ if p.privilege
164
+ }
@@ -0,0 +1,176 @@
1
+ import logging
2
+
3
+ from databricks.labs.blueprint.tui import Prompts
4
+ from databricks.labs.lakebridge.reconcile.constants import ReconSourceType
5
+ from databricks.sdk import WorkspaceClient
6
+ from databricks.sdk.errors.platform import ResourceDoesNotExist
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class ReconConfigPrompts:
12
+ def __init__(self, ws: WorkspaceClient, prompts: Prompts = Prompts()):
13
+ self._source = None
14
+ self._prompts = prompts
15
+ self._ws = ws
16
+
17
+ def _scope_exists(self, scope_name: str) -> bool:
18
+ scope_exists = scope_name in [scope.name for scope in self._ws.secrets.list_scopes()]
19
+
20
+ if not scope_exists:
21
+ logger.error(
22
+ f"Error: Cannot find Secret Scope: `{scope_name}` in Databricks Workspace."
23
+ f"\nUse `remorph configure-secrets` to setup Scope and Secrets"
24
+ )
25
+ return False
26
+ logger.debug(f"Found Scope: `{scope_name}` in Databricks Workspace")
27
+ return True
28
+
29
+ def _ensure_scope_exists(self, scope_name: str):
30
+ """
31
+ Get or Create a new Scope in Databricks Workspace
32
+ :param scope_name:
33
+ """
34
+ scope_exists = self._scope_exists(scope_name)
35
+ if not scope_exists:
36
+ allow_scope_creation = self._prompts.confirm("Do you want to create a new one?")
37
+ if not allow_scope_creation:
38
+ msg = "Scope is needed to store Secrets in Databricks Workspace"
39
+ raise SystemExit(msg)
40
+
41
+ try:
42
+ logger.debug(f" Creating a new Scope: `{scope_name}`")
43
+ self._ws.secrets.create_scope(scope_name)
44
+ except Exception as ex:
45
+ logger.error(f"Exception while creating Scope `{scope_name}`: {ex}")
46
+ raise ex
47
+
48
+ logger.info(f" Created a new Scope: `{scope_name}`")
49
+ logger.info(f" Using Scope: `{scope_name}`...")
50
+
51
+ def _secret_key_exists(self, scope_name: str, secret_key: str) -> bool:
52
+ try:
53
+ self._ws.secrets.get_secret(scope_name, secret_key)
54
+ logger.info(f"Found Secret key `{secret_key}` in Scope `{scope_name}`")
55
+ return True
56
+ except ResourceDoesNotExist:
57
+ logger.debug(f"Secret key `{secret_key}` not found in Scope `{scope_name}`")
58
+ return False
59
+
60
+ def _store_secret(self, scope_name: str, secret_key: str, secret_value: str):
61
+ try:
62
+ logger.debug(f"Storing Secret: *{secret_key}* in Scope: `{scope_name}`")
63
+ self._ws.secrets.put_secret(scope=scope_name, key=secret_key, string_value=secret_value)
64
+ except Exception as ex:
65
+ logger.error(f"Exception while storing Secret `{secret_key}`: {ex}")
66
+ raise ex
67
+
68
+ def store_connection_secrets(self, scope_name: str, conn_details: tuple[str, dict[str, str]]):
69
+ engine = conn_details[0]
70
+ secrets = conn_details[1]
71
+
72
+ logger.debug(f"Storing `{engine}` Connection Secrets in Scope: `{scope_name}`")
73
+
74
+ for key, value in secrets.items():
75
+ secret_key = key
76
+ logger.debug(f"Processing Secret: *{secret_key}*")
77
+ debug_op = "Storing"
78
+ info_op = "Stored"
79
+ if self._secret_key_exists(scope_name, secret_key):
80
+ overwrite_secret = self._prompts.confirm(f"Do you want to overwrite `{secret_key}`?")
81
+ if not overwrite_secret:
82
+ continue
83
+ debug_op = "Overwriting"
84
+ info_op = "Overwritten"
85
+
86
+ logger.debug(f"{debug_op} Secret: *{secret_key}* in Scope: `{scope_name}`")
87
+ self._store_secret(scope_name, secret_key, value)
88
+ logger.info(f"{info_op} Secret: *{secret_key}* in Scope: `{scope_name}`")
89
+
90
+ def prompt_source(self):
91
+ source = self._prompts.choice(
92
+ "Select the source dialect", [source_type.value for source_type in ReconSourceType]
93
+ )
94
+ self._source = source
95
+ return source
96
+
97
+ def _prompt_snowflake_connection_details(self) -> tuple[str, dict[str, str]]:
98
+ """
99
+ Prompt for Snowflake connection details
100
+ :return: tuple[str, dict[str, str]]
101
+ """
102
+ logger.info(
103
+ f"Please answer a couple of questions to configure `{ReconSourceType.SNOWFLAKE.value}` Connection profile"
104
+ )
105
+
106
+ sf_url = self._prompts.question("Enter Snowflake URL")
107
+ account = self._prompts.question("Enter Account Name")
108
+ sf_user = self._prompts.question("Enter User")
109
+ sf_password = self._prompts.question("Enter Password")
110
+ sf_db = self._prompts.question("Enter Database")
111
+ sf_schema = self._prompts.question("Enter Schema")
112
+ sf_warehouse = self._prompts.question("Enter Snowflake Warehouse")
113
+ sf_role = self._prompts.question("Enter Role", default=" ")
114
+
115
+ sf_conn_details = {
116
+ "sfUrl": sf_url,
117
+ "account": account,
118
+ "sfUser": sf_user,
119
+ "sfPassword": sf_password,
120
+ "sfDatabase": sf_db,
121
+ "sfSchema": sf_schema,
122
+ "sfWarehouse": sf_warehouse,
123
+ "sfRole": sf_role,
124
+ }
125
+
126
+ sf_conn_dict = (ReconSourceType.SNOWFLAKE.value, sf_conn_details)
127
+ return sf_conn_dict
128
+
129
+ def _prompt_oracle_connection_details(self) -> tuple[str, dict[str, str]]:
130
+ """
131
+ Prompt for Oracle connection details
132
+ :return: tuple[str, dict[str, str]]
133
+ """
134
+ logger.info(
135
+ f"Please answer a couple of questions to configure `{ReconSourceType.ORACLE.value}` Connection profile"
136
+ )
137
+ user = self._prompts.question("Enter User")
138
+ password = self._prompts.question("Enter Password")
139
+ host = self._prompts.question("Enter host")
140
+ port = self._prompts.question("Enter port")
141
+ database = self._prompts.question("Enter database/SID")
142
+
143
+ oracle_conn_details = {"user": user, "password": password, "host": host, "port": port, "database": database}
144
+
145
+ oracle_conn_dict = (ReconSourceType.ORACLE.value, oracle_conn_details)
146
+ return oracle_conn_dict
147
+
148
+ def _connection_details(self):
149
+ """
150
+ Prompt for connection details based on the source
151
+ :return: None
152
+ """
153
+ logger.debug(f"Prompting for `{self._source}` connection details")
154
+ match self._source:
155
+ case ReconSourceType.SNOWFLAKE.value:
156
+ return self._prompt_snowflake_connection_details()
157
+ case ReconSourceType.ORACLE.value:
158
+ return self._prompt_oracle_connection_details()
159
+
160
+ def prompt_and_save_connection_details(self):
161
+ """
162
+ Prompt for connection details and save them as Secrets in Databricks Workspace
163
+ """
164
+ # prompt for connection_details only if source is other than Databricks
165
+ if self._source == ReconSourceType.DATABRICKS.value:
166
+ logger.info("*Databricks* as a source is supported only for **Hive MetaStore (HMS) setup**")
167
+ return
168
+
169
+ # Prompt for secret scope
170
+ scope_name = self._prompts.question("Enter Secret Scope name")
171
+ self._ensure_scope_exists(scope_name)
172
+
173
+ # Prompt for connection details
174
+ connection_details = self._connection_details()
175
+ logger.debug(f"Storing `{self._source}` connection details as Secrets in Databricks Workspace...")
176
+ self.store_connection_secrets(scope_name, connection_details)
@@ -0,0 +1,62 @@
1
+ import codecs
2
+
3
+
4
+ # Optionally check to see if a string begins with a Byte Order Mark
5
+ # such a character will cause the transpiler to fail
6
+ def remove_bom(input_string: str) -> str:
7
+ """
8
+ Removes the Byte Order Mark (BOM) from the given string if it exists.
9
+ :param input_string: String to remove BOM from
10
+ :return: String without BOM
11
+ """
12
+ output_string = input_string
13
+
14
+ # Check and remove UTF-16 (LE and BE) BOM
15
+ if input_string.startswith(codecs.BOM_UTF16_BE.decode("utf-16-be")):
16
+ output_string = input_string[len(codecs.BOM_UTF16_BE.decode("utf-16-be")) :]
17
+ elif input_string.startswith(codecs.BOM_UTF16_LE.decode("utf-16-le")):
18
+ output_string = input_string[len(codecs.BOM_UTF16_LE.decode("utf-16-le")) :]
19
+ elif input_string.startswith(codecs.BOM_UTF16.decode("utf-16")):
20
+ output_string = input_string[len(codecs.BOM_UTF16.decode("utf-16")) :]
21
+ # Check and remove UTF-32 (LE and BE) BOM
22
+ elif input_string.startswith(codecs.BOM_UTF32_BE.decode("utf-32-be")):
23
+ output_string = input_string[len(codecs.BOM_UTF32_BE.decode("utf-32-be")) :]
24
+ elif input_string.startswith(codecs.BOM_UTF32_LE.decode("utf-32-le")):
25
+ output_string = input_string[len(codecs.BOM_UTF32_LE.decode("utf-32-le")) :]
26
+ elif input_string.startswith(codecs.BOM_UTF32.decode("utf-32")):
27
+ output_string = input_string[len(codecs.BOM_UTF32.decode("utf-32")) :]
28
+ # Check and remove UTF-8 BOM
29
+ elif input_string.startswith(codecs.BOM_UTF8.decode("utf-8")):
30
+ output_string = input_string[len(codecs.BOM_UTF8.decode("utf-8")) :]
31
+
32
+ return output_string
33
+
34
+
35
+ def refactor_hexadecimal_chars(input_string: str) -> str:
36
+ """
37
+ Updates the HexaDecimal characters ( \x1b[\\d+m ) in the given string as below.
38
+ :param input_string: String with HexaDecimal characters. ex: ( \x1b[4mWHERE\x1b[0m )
39
+ :return: String with HexaDecimal characters refactored to arrows. ex: ( --> WHERE <--)
40
+ """
41
+ output_string = input_string
42
+ highlight = {"\x1b[4m": "--> ", "\x1b[0m": " <--"}
43
+ for key, value in highlight.items():
44
+ output_string = output_string.replace(key, value)
45
+ return output_string
46
+
47
+
48
+ def format_error_message(error_type: str, error_message: Exception, error_sql: str) -> str:
49
+ """
50
+ Formats the error message with the error SQL.
51
+ :param error_type: Error Type
52
+ :param error_message: Error message
53
+ :param error_sql: Error SQL
54
+ :return: Formatted error message
55
+ """
56
+ error_str = (
57
+ f"------------------------ {error_type} Start:------------------------\n"
58
+ f"/*\n{str(error_message)}\n*/\n\n"
59
+ f"/*\nOriginal Query:\n\n{str(error_sql)}\n*/\n"
60
+ f"------------------------- {error_type} End:-------------------------"
61
+ ).strip()
62
+ return error_str
@@ -0,0 +1,13 @@
1
+ from databricks.sdk.useragent import alphanum_pattern, semver_pattern
2
+
3
+
4
+ def make_alphanum_or_semver(value: str) -> str:
5
+ if alphanum_pattern.match(value) or semver_pattern.match(value):
6
+ return value
7
+ # assume it's not a semver, replace illegal alphanum chars
8
+ result = []
9
+ for char in value:
10
+ if not alphanum_pattern.match(char):
11
+ char = '_'
12
+ result.append(char)
13
+ return "".join(result)
@@ -0,0 +1,101 @@
1
+ import logging
2
+ from io import StringIO
3
+
4
+ from databricks.labs.lsql.backends import SqlBackend
5
+ from databricks.labs.lakebridge.config import TranspileConfig, ValidationResult
6
+ from databricks.sdk.errors.base import DatabricksError
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class Validator:
12
+ """
13
+ The Validator class is used to validate SQL queries.
14
+ """
15
+
16
+ def __init__(self, sql_backend: SqlBackend):
17
+ self._sql_backend = sql_backend
18
+
19
+ def validate_format_result(self, config: TranspileConfig, sql_text: str) -> ValidationResult:
20
+ """
21
+ Validates the SQL query and formats the result.
22
+
23
+ This function validates the SQL query based on the provided configuration. If the query is valid,
24
+ it appends a semicolon to the end of the query. If the query is not valid, it formats the error message.
25
+
26
+ Parameters:
27
+ - config (MorphConfig): The configuration for the validation.
28
+ - sql_text (str): The SQL query to be validated.
29
+
30
+ Returns:
31
+ - tuple: A tuple containing the result of the validation and the exception message (if any).
32
+ """
33
+ logger.debug(f"Validating query with catalog {config.catalog_name} and schema {config.schema_name}")
34
+ (is_valid, exception_type, exception_msg) = self._query(
35
+ self._sql_backend,
36
+ sql_text,
37
+ config.catalog_name,
38
+ config.schema_name,
39
+ )
40
+ if is_valid:
41
+ result = sql_text
42
+ if exception_type is not None:
43
+ exception_msg = f"[{exception_type.upper()}]: {exception_msg}"
44
+ else:
45
+ query = ""
46
+ if "[UNRESOLVED_ROUTINE]" in str(exception_msg):
47
+ query = sql_text
48
+ buffer = StringIO()
49
+ buffer.write("-------------- Exception Start-------------------\n")
50
+ buffer.write("/* \n")
51
+ buffer.write(str(exception_msg))
52
+ buffer.write("\n */ \n")
53
+ buffer.write(query)
54
+ buffer.write("\n ---------------Exception End --------------------\n")
55
+
56
+ result = buffer.getvalue()
57
+
58
+ return ValidationResult(result, exception_msg)
59
+
60
+ def _query(
61
+ self, sql_backend: SqlBackend, query: str, catalog: str, schema: str
62
+ ) -> tuple[bool, str | None, str | None]:
63
+ """
64
+ Validate a given SQL query using the provided SQL backend
65
+
66
+ Parameters:
67
+ - query (str): The SQL query to be validated.
68
+ - sql_backend (SqlBackend): The SQL backend to be used for validation.
69
+
70
+ Returns:
71
+ - tuple: A tuple containing a boolean indicating whether the query is valid or not,
72
+ and a string containing a success message or an exception message.
73
+ """
74
+ # When variables is mentioned Explain fails we need way to replace them before explain is executed.
75
+ explain_query = f'EXPLAIN {query.replace("${", "`{").replace("}", "}`").replace("``", "`")}'
76
+ try:
77
+ rows = list(sql_backend.fetch(explain_query, catalog=catalog, schema=schema))
78
+ if not rows:
79
+ return False, "error", "No results returned from explain query."
80
+
81
+ if "Error occurred during query planning" in rows[0].asDict().get("plan", ""):
82
+ error_details = rows[1].asDict().get("plan", "Unknown error.") if len(rows) > 1 else "Unknown error."
83
+ raise DatabricksError(error_details)
84
+ return True, None, None
85
+ except DatabricksError as dbe:
86
+ err_msg = str(dbe)
87
+ if "[PARSE_SYNTAX_ERROR]" in err_msg:
88
+ logger.debug(f"Syntax Exception : NOT IGNORED. Flag as syntax error: {err_msg}")
89
+ return False, "error", err_msg
90
+ if "[UNRESOLVED_ROUTINE]" in err_msg:
91
+ logger.debug(f"Analysis Exception : NOT IGNORED: Flag as Function Missing error {err_msg}")
92
+ return False, "error", err_msg
93
+ if "[TABLE_OR_VIEW_NOT_FOUND]" in err_msg or "[TABLE_OR_VIEW_ALREADY_EXISTS]" in err_msg:
94
+ logger.debug(f"Analysis Exception : IGNORED: {err_msg}")
95
+ return True, "warning", err_msg
96
+ if "Hive support is required to CREATE Hive TABLE (AS SELECT).;" in err_msg:
97
+ logger.debug(f"Analysis Exception : IGNORED: {err_msg}")
98
+ return True, "warning", err_msg
99
+
100
+ logger.debug(f"Unknown Exception: {err_msg}")
101
+ return False, "error", err_msg