databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. databricks/__init__.py +3 -0
  2. databricks/labs/__init__.py +3 -0
  3. databricks/labs/lakebridge/__about__.py +2 -0
  4. databricks/labs/lakebridge/__init__.py +11 -0
  5. databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
  6. databricks/labs/lakebridge/assessments/pipeline.py +188 -0
  7. databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
  8. databricks/labs/lakebridge/base_install.py +12 -0
  9. databricks/labs/lakebridge/cli.py +449 -0
  10. databricks/labs/lakebridge/config.py +192 -0
  11. databricks/labs/lakebridge/connections/__init__.py +0 -0
  12. databricks/labs/lakebridge/connections/credential_manager.py +89 -0
  13. databricks/labs/lakebridge/connections/database_manager.py +98 -0
  14. databricks/labs/lakebridge/connections/env_getter.py +13 -0
  15. databricks/labs/lakebridge/contexts/__init__.py +0 -0
  16. databricks/labs/lakebridge/contexts/application.py +133 -0
  17. databricks/labs/lakebridge/coverage/__init__.py +0 -0
  18. databricks/labs/lakebridge/coverage/commons.py +223 -0
  19. databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
  20. databricks/labs/lakebridge/coverage/local_report.py +9 -0
  21. databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
  22. databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
  23. databricks/labs/lakebridge/deployment/__init__.py +0 -0
  24. databricks/labs/lakebridge/deployment/configurator.py +199 -0
  25. databricks/labs/lakebridge/deployment/dashboard.py +140 -0
  26. databricks/labs/lakebridge/deployment/installation.py +125 -0
  27. databricks/labs/lakebridge/deployment/job.py +147 -0
  28. databricks/labs/lakebridge/deployment/recon.py +145 -0
  29. databricks/labs/lakebridge/deployment/table.py +30 -0
  30. databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
  31. databricks/labs/lakebridge/discovery/table.py +36 -0
  32. databricks/labs/lakebridge/discovery/table_definition.py +23 -0
  33. databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
  34. databricks/labs/lakebridge/errors/exceptions.py +1 -0
  35. databricks/labs/lakebridge/helpers/__init__.py +0 -0
  36. databricks/labs/lakebridge/helpers/db_sql.py +24 -0
  37. databricks/labs/lakebridge/helpers/execution_time.py +20 -0
  38. databricks/labs/lakebridge/helpers/file_utils.py +64 -0
  39. databricks/labs/lakebridge/helpers/metastore.py +164 -0
  40. databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
  41. databricks/labs/lakebridge/helpers/string_utils.py +62 -0
  42. databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
  43. databricks/labs/lakebridge/helpers/validation.py +101 -0
  44. databricks/labs/lakebridge/install.py +849 -0
  45. databricks/labs/lakebridge/intermediate/__init__.py +0 -0
  46. databricks/labs/lakebridge/intermediate/dag.py +88 -0
  47. databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
  48. databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
  49. databricks/labs/lakebridge/jvmproxy.py +56 -0
  50. databricks/labs/lakebridge/lineage.py +42 -0
  51. databricks/labs/lakebridge/reconcile/__init__.py +0 -0
  52. databricks/labs/lakebridge/reconcile/compare.py +414 -0
  53. databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
  54. databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
  55. databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
  56. databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
  57. databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
  58. databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
  59. databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
  60. databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
  61. databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
  62. databricks/labs/lakebridge/reconcile/constants.py +37 -0
  63. databricks/labs/lakebridge/reconcile/exception.py +42 -0
  64. databricks/labs/lakebridge/reconcile/execute.py +920 -0
  65. databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
  66. databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
  67. databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
  68. databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
  69. databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
  70. databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
  71. databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
  72. databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
  73. databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
  74. databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
  75. databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
  76. databricks/labs/lakebridge/reconcile/runner.py +97 -0
  77. databricks/labs/lakebridge/reconcile/sampler.py +239 -0
  78. databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
  79. databricks/labs/lakebridge/resources/__init__.py +0 -0
  80. databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
  81. databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
  82. databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
  83. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
  84. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  85. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
  86. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
  87. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  88. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
  89. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
  90. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
  91. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
  92. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
  93. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
  94. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
  95. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
  96. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
  97. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
  98. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
  99. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
  100. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
  101. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
  102. databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
  103. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
  104. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
  105. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
  106. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
  107. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
  108. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
  109. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
  110. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
  111. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
  112. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
  113. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
  114. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
  115. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
  116. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
  117. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
  118. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
  119. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
  120. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
  121. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
  122. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
  123. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
  124. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
  125. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
  126. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
  127. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
  128. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
  129. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
  130. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
  131. databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
  132. databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
  133. databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
  134. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
  135. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
  136. databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
  137. databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
  138. databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
  139. databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
  140. databricks/labs/lakebridge/transpiler/__init__.py +0 -0
  141. databricks/labs/lakebridge/transpiler/execute.py +423 -0
  142. databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
  143. databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
  144. databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
  145. databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
  146. databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
  147. databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
  148. databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
  149. databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
  150. databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
  151. databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
  152. databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
  153. databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
  154. databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
  155. databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
  156. databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
  157. databricks/labs/lakebridge/uninstall.py +28 -0
  158. databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
  159. databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
  160. databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
  161. databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
  162. databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
  163. databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
  164. databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
  165. databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
  166. docs/lakebridge/src/components/Button.tsx +81 -0
  167. docs/lakebridge/src/css/custom.css +167 -0
  168. docs/lakebridge/src/css/table.css +20 -0
  169. docs/lakebridge/src/pages/index.tsx +57 -0
  170. docs/lakebridge/src/theme/Footer/index.tsx +24 -0
  171. docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,138 @@
1
+ import logging
2
+ from collections.abc import Iterable
3
+ from pathlib import Path
4
+
5
+ from sqlglot import expressions as exp, Dialect
6
+ from sqlglot import parse
7
+ from sqlglot.errors import ErrorLevel, ParseError, TokenError, UnsupportedError
8
+ from sqlglot.expressions import Expression, Select
9
+ from sqlglot.optimizer.scope import Scope, build_scope
10
+
11
+ from databricks.labs.lakebridge.transpiler.transpile_status import TranspileError, ErrorKind, ErrorSeverity
12
+ from databricks.labs.lakebridge.transpiler.sqlglot.local_expression import AliasInfo
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def check_for_unsupported_lca(
18
+ from_dialect: Dialect,
19
+ source_sql: str,
20
+ file_path: Path,
21
+ ) -> TranspileError | None:
22
+ """
23
+ Check for presence of unsupported lateral column aliases in window expressions and where clauses
24
+ :return: An error if found
25
+ """
26
+ try:
27
+ all_parsed_expressions: Iterable[Expression | None] = parse(
28
+ source_sql, read=from_dialect, error_level=ErrorLevel.RAISE
29
+ )
30
+ root_expressions: Iterable[Expression] = [pe for pe in all_parsed_expressions if pe is not None]
31
+ except (ParseError, TokenError, UnsupportedError) as e:
32
+ logger.warning(f"Error while preprocessing {file_path}: {e}")
33
+ return None
34
+
35
+ aliases_in_where = set()
36
+ aliases_in_window = set()
37
+
38
+ for expr in root_expressions:
39
+ for select in expr.find_all(exp.Select, bfs=False):
40
+ alias_info = _find_aliases_in_select(select)
41
+ aliases_in_where.update(_find_invalid_lca_in_where(select, alias_info))
42
+ aliases_in_window.update(_find_invalid_lca_in_window(select, alias_info))
43
+
44
+ if not (aliases_in_where or aliases_in_window):
45
+ return None
46
+
47
+ err_messages = [f"Unsupported operation found in file {file_path}. Needs manual review of transpiled query."]
48
+ if aliases_in_where:
49
+ err_messages.append(f"Lateral column aliases `{', '.join(aliases_in_where)}` found in where clause.")
50
+
51
+ if aliases_in_window:
52
+ err_messages.append(f"Lateral column aliases `{', '.join(aliases_in_window)}` found in window expressions.")
53
+
54
+ return TranspileError("UNSUPPORTED_LCA", ErrorKind.ANALYSIS, ErrorSeverity.ERROR, file_path, " ".join(err_messages))
55
+
56
+
57
+ def unalias_lca_in_select(expr: exp.Expression) -> exp.Expression:
58
+ if not isinstance(expr, exp.Select):
59
+ return expr
60
+
61
+ root_select: Scope | None = build_scope(expr)
62
+ if not root_select:
63
+ return expr
64
+
65
+ # We won't search inside nested selects, they will be visited separately
66
+ nested_selects = {*root_select.derived_tables, *root_select.subqueries}
67
+ alias_info = _find_aliases_in_select(expr)
68
+ where_ast: Expression | None = expr.args.get("where")
69
+ if where_ast:
70
+ for column in where_ast.walk(prune=lambda n: n in nested_selects):
71
+ _replace_aliases(column, alias_info)
72
+ for window in _find_windows_in_select(expr):
73
+ for column in window.walk():
74
+ _replace_aliases(column, alias_info)
75
+ return expr
76
+
77
+
78
+ def _replace_aliases(column: Expression, alias_info: dict[str, AliasInfo]):
79
+ if (
80
+ isinstance(column, exp.Column)
81
+ and column.name in alias_info
82
+ and not alias_info[column.name].is_same_name_as_column
83
+ ):
84
+ unaliased_expr = alias_info[column.name].expression
85
+ column.replace(unaliased_expr)
86
+ for col in unaliased_expr.walk():
87
+ _replace_aliases(col, alias_info)
88
+
89
+
90
+ def _find_windows_in_select(select: Select) -> list[exp.Window]:
91
+ window_expressions = []
92
+ for expr in select.expressions:
93
+ window_expr = expr.find(exp.Window)
94
+ if window_expr:
95
+ window_expressions.append(window_expr)
96
+ return window_expressions
97
+
98
+
99
+ def _find_aliases_in_select(select_expr: Select) -> dict[str, AliasInfo]:
100
+ aliases = {}
101
+ for expr in select_expr.expressions:
102
+ if isinstance(expr, exp.Alias):
103
+ alias_name = expr.output_name
104
+ is_same_name_as_column = False
105
+ for column in expr.find_all(exp.Column):
106
+ if column.name == alias_name:
107
+ is_same_name_as_column = True
108
+ break
109
+ aliases[alias_name] = AliasInfo(alias_name, expr.unalias().copy(), is_same_name_as_column)
110
+ return aliases
111
+
112
+
113
+ def _find_invalid_lca_in_where(
114
+ select_expr: Select,
115
+ aliases: dict[str, AliasInfo],
116
+ ) -> set[str]:
117
+ aliases_in_where = set()
118
+ where_ast: Expression | None = select_expr.args.get("where")
119
+ if where_ast:
120
+ for column in where_ast.find_all(exp.Column):
121
+ if column.name in aliases and not aliases[column.name].is_same_name_as_column:
122
+ aliases_in_where.add(column.name)
123
+
124
+ return aliases_in_where
125
+
126
+
127
+ def _find_invalid_lca_in_window(
128
+ select_expr: Select,
129
+ aliases: dict[str, AliasInfo],
130
+ ) -> set[str]:
131
+ aliases_in_window = set()
132
+ windows = _find_windows_in_select(select_expr)
133
+ for window in windows:
134
+ for column in window.find_all(exp.Column):
135
+ if column.name in aliases and not aliases[column.name].is_same_name_as_column:
136
+ aliases_in_window.add(column.name)
137
+
138
+ return aliases_in_window
@@ -0,0 +1,197 @@
1
+ from dataclasses import dataclass
2
+
3
+ from sqlglot import expressions as exp
4
+ from sqlglot.expressions import AggFunc, Condition, Expression, Func
5
+
6
+
7
+ class NthValue(AggFunc):
8
+ arg_types = {"this": True, "offset": False}
9
+
10
+
11
+ class Parameter(Expression):
12
+ arg_types = {"this": True, "wrapped": False, "suffix": False}
13
+
14
+
15
+ class Collate(Func):
16
+ arg_types = {"this": True, "expressions": True}
17
+
18
+
19
+ class Bracket(Condition):
20
+ arg_types = {"this": True, "expressions": True}
21
+
22
+
23
+ class Split(Func):
24
+ """
25
+ Redefined Split(sqlglot/expression) class with expression: False to handle default delimiter
26
+ Please refer the test case `test_strtok_to_array` -> `select STRTOK_TO_ARRAY('my text is divided')`
27
+ """
28
+
29
+ arg_types = {"this": True, "expression": False, "limit": False}
30
+
31
+
32
+ class MakeDate(Func):
33
+ arg_types = {"this": True, "expression": False, "zone": False}
34
+
35
+
36
+ class ConvertTimeZone(Func):
37
+ arg_types = {"srcTZ": True, "tgtTZ": True, "this": False}
38
+
39
+
40
+ class TryToDate(Func):
41
+ arg_types = {"this": True, "format": False}
42
+
43
+
44
+ class TryToTimestamp(Func):
45
+ arg_types = {"this": True, "format": False}
46
+
47
+
48
+ class SplitPart(Func):
49
+ arg_types = {"this": True, "expression": False, "partNum": False}
50
+
51
+
52
+ class StrTok(Func):
53
+ arg_types = {"this": True, "expression": False, "partNum": False}
54
+
55
+
56
+ class TryToNumber(Func):
57
+ arg_types = {"this": True, "expression": False, "precision": False, "scale": False}
58
+
59
+ _sql_names = ["TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"]
60
+
61
+
62
+ class DateFormat(Func):
63
+ arg_types = {"this": True, "expression": False}
64
+
65
+
66
+ class IsInteger(Func):
67
+ pass
68
+
69
+
70
+ class JsonExtractPathText(Func):
71
+ arg_types = {"this": True, "path_name": True}
72
+
73
+
74
+ class BitOr(AggFunc):
75
+ pass
76
+
77
+
78
+ class ArrayConstructCompact(Func):
79
+ arg_types = {"expressions": False}
80
+
81
+ is_var_len_args = True
82
+
83
+
84
+ class ArrayIntersection(Func):
85
+ arg_types = {"this": True, "expression": True}
86
+
87
+
88
+ class ArraySlice(Func):
89
+ arg_types = {"this": True, "from": True, "to": True}
90
+
91
+
92
+ class ObjectKeys(Func):
93
+ arg_types = {"this": True}
94
+
95
+
96
+ class ToBoolean(Func):
97
+ arg_types = {"this": True, "raise_error": False}
98
+
99
+
100
+ class ToDouble(Func):
101
+ pass
102
+
103
+
104
+ class ToObject(Func):
105
+ pass
106
+
107
+
108
+ class ToNumber(Func):
109
+ arg_types = {"this": True, "expression": False, "precision": False, "scale": False}
110
+
111
+ _sql_names = ["TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"]
112
+
113
+
114
+ class TimestampFromParts(Func):
115
+ arg_types = {
116
+ "this": True,
117
+ "expression": True,
118
+ "day": True,
119
+ "hour": True,
120
+ "min": True,
121
+ "sec": True,
122
+ "nanosec": False,
123
+ "Zone": False,
124
+ }
125
+
126
+
127
+ class ToVariant(Func):
128
+ pass
129
+
130
+
131
+ class UUID(Func):
132
+ arg_types = {"this": False, "name": False}
133
+
134
+
135
+ class DateTrunc(Func):
136
+ arg_types = {"unit": False, "this": True, "zone": False}
137
+
138
+
139
+ class Median(Func):
140
+ arg_types = {"this": True}
141
+
142
+
143
+ class CumeDist(Func):
144
+ arg_types = {"this": False}
145
+
146
+
147
+ class DenseRank(Func):
148
+ arg_types = {"this": False}
149
+
150
+
151
+ class Rank(Func):
152
+ arg_types = {"this": False}
153
+
154
+
155
+ class PercentRank(Func):
156
+ arg_types = {"this": False}
157
+
158
+
159
+ class Ntile(Func):
160
+ arg_types = {"this": True, "is_string": False}
161
+
162
+
163
+ class ToArray(Func):
164
+ arg_types = {"this": True, "expression": False}
165
+
166
+
167
+ @dataclass
168
+ class WithinGroupParams:
169
+ agg_col: exp.Column
170
+ order_cols: list[tuple[exp.Column, bool]] # List of (column, is ascending)
171
+
172
+
173
+ @dataclass
174
+ class AliasInfo:
175
+ name: str
176
+ expression: exp.Expression
177
+ is_same_name_as_column: bool
178
+
179
+
180
+ class MapKeys(Func):
181
+ arg_types = {"this": True}
182
+
183
+
184
+ class ArrayExists(Func):
185
+ arg_types = {"this": True, "expression": True}
186
+
187
+
188
+ class Locate(Func):
189
+ arg_types = {"substring": True, "this": True, "position": False}
190
+
191
+
192
+ class NamedStruct(Func):
193
+ arg_types = {"expressions": True}
194
+
195
+
196
+ class GetJsonObject(Func):
197
+ arg_types = {"this": True, "path": True}
@@ -0,0 +1,23 @@
1
+ from sqlglot.dialects.oracle import Oracle as Orc
2
+ from sqlglot.tokens import TokenType
3
+
4
+
5
+ class Oracle(Orc):
6
+ # Instantiate Oracle Dialect
7
+ oracle = Orc()
8
+
9
+ class Tokenizer(Orc.Tokenizer):
10
+ KEYWORDS = {
11
+ **Orc.Tokenizer.KEYWORDS,
12
+ 'LONG': TokenType.TEXT,
13
+ 'NCLOB': TokenType.TEXT,
14
+ 'ROWID': TokenType.TEXT,
15
+ 'UROWID': TokenType.TEXT,
16
+ 'ANYTYPE': TokenType.TEXT,
17
+ 'ANYDATA': TokenType.TEXT,
18
+ 'ANYDATASET': TokenType.TEXT,
19
+ 'XMLTYPE': TokenType.TEXT,
20
+ 'SDO_GEOMETRY': TokenType.TEXT,
21
+ 'SDO_TOPO_GEOMETRY': TokenType.TEXT,
22
+ 'SDO_GEORASTER': TokenType.TEXT,
23
+ }
@@ -0,0 +1,202 @@
1
+ import logging
2
+ from sqlglot.dialects.presto import Presto as presto
3
+ from sqlglot import exp
4
+ from sqlglot.helper import seq_get
5
+ from sqlglot.errors import ParseError
6
+ from sqlglot.tokens import TokenType
7
+
8
+ from databricks.labs.lakebridge.transpiler.sqlglot import local_expression
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def _build_approx_percentile(args: list) -> exp.Expression:
15
+ if len(args) == 4:
16
+ arg3 = seq_get(args, 3)
17
+ try:
18
+ number = float(arg3.this) if arg3 is not None else 0
19
+ return exp.ApproxQuantile(
20
+ this=seq_get(args, 0),
21
+ weight=seq_get(args, 1),
22
+ quantile=seq_get(args, 2),
23
+ accuracy=exp.Literal(this=f'{int((1/number) * 100)} ', is_string=False),
24
+ )
25
+ except ValueError as exc:
26
+ raise ParseError(f"Expected a string representation of a number for argument 2, but got {arg3}") from exc
27
+ if len(args) == 3:
28
+ arg2 = seq_get(args, 2)
29
+ try:
30
+ number = float(arg2.this) if arg2 is not None else 0
31
+ return exp.ApproxQuantile(
32
+ this=seq_get(args, 0),
33
+ quantile=seq_get(args, 1),
34
+ accuracy=exp.Literal(this=f'{int((1/number) * 100)}', is_string=False),
35
+ )
36
+ except ValueError as exc:
37
+ raise ParseError(f"Expected a string representation of a number for argument 2, but got {arg2}") from exc
38
+ return exp.ApproxQuantile.from_arg_list(args)
39
+
40
+
41
+ def _build_any_keys_match(args: list) -> local_expression.ArrayExists:
42
+ return local_expression.ArrayExists(
43
+ this=local_expression.MapKeys(this=seq_get(args, 0)), expression=seq_get(args, 1)
44
+ )
45
+
46
+
47
+ def _build_str_position(args: list) -> local_expression.Locate:
48
+ # TODO the 3rd param in presto strpos and databricks locate has different implementation.
49
+ # For now we haven't implemented the logic same as presto for 3rd param.
50
+ # Users should be vigilant when using 3 param function in presto strpos.
51
+ if len(args) == 3:
52
+ msg = (
53
+ "*Warning:: The third parameter in Presto's `strpos` function and Databricks' `locate` function "
54
+ "have different implementations. Please exercise caution when using the three-parameter version "
55
+ "of the `strpos` function in Presto."
56
+ )
57
+ logger.warning(msg)
58
+ return local_expression.Locate(substring=seq_get(args, 1), this=seq_get(args, 0), position=seq_get(args, 2))
59
+ return local_expression.Locate(substring=seq_get(args, 1), this=seq_get(args, 0))
60
+
61
+
62
+ def _build_array_average(args: list) -> exp.Reduce:
63
+ return exp.Reduce(
64
+ this=exp.ArrayFilter(
65
+ this=seq_get(args, 0),
66
+ expression=exp.Lambda(
67
+ this=exp.Not(this=exp.Is(this=exp.Identifier(this="x", quoted=False), expression=exp.Null())),
68
+ expressions=[exp.Identifier(this="x", quoted=False)],
69
+ ),
70
+ ),
71
+ initial=local_expression.NamedStruct(
72
+ expressions=[
73
+ exp.Literal(this="sum", is_string=True),
74
+ exp.Cast(this=exp.Literal(this="0", is_string=False), to=exp.DataType(this="DOUBLE")),
75
+ exp.Literal(this="cnt", is_string=True),
76
+ exp.Literal(this="0", is_string=False),
77
+ ],
78
+ ),
79
+ merge=exp.Lambda(
80
+ this=local_expression.NamedStruct(
81
+ expressions=[
82
+ exp.Literal(this="sum", is_string=True),
83
+ exp.Add(
84
+ this=exp.Dot(
85
+ this=exp.Identifier(this="acc", quoted=False),
86
+ expression=exp.Identifier(this="sum", quoted=False),
87
+ ),
88
+ expression=exp.Identifier(this="x", quoted=False),
89
+ ),
90
+ exp.Literal(this="cnt", is_string=True),
91
+ exp.Add(
92
+ this=exp.Dot(
93
+ this=exp.Identifier(this="acc", quoted=False),
94
+ expression=exp.Identifier(this="cnt", quoted=False),
95
+ ),
96
+ expression=exp.Literal(this="1", is_string=False),
97
+ ),
98
+ ],
99
+ ),
100
+ expressions=[exp.Identifier(this="acc", quoted=False), exp.Identifier(this="x", quoted=False)],
101
+ ),
102
+ finish=exp.Lambda(
103
+ this=exp.Anonymous(
104
+ this="try_divide",
105
+ expressions=[
106
+ exp.Dot(
107
+ this=exp.Identifier(this="acc", quoted=False),
108
+ expression=exp.Identifier(this="sum", quoted=False),
109
+ ),
110
+ exp.Dot(
111
+ this=exp.Identifier(this="acc", quoted=False),
112
+ expression=exp.Identifier(this="cnt", quoted=False),
113
+ ),
114
+ ],
115
+ ),
116
+ expressions=[exp.Identifier(this="acc", quoted=False)],
117
+ ),
118
+ )
119
+
120
+
121
+ def _build_json_size(args: list):
122
+ return exp.Case(
123
+ ifs=[
124
+ exp.If(
125
+ this=exp.Like(
126
+ this=local_expression.GetJsonObject(
127
+ this=exp.Column(this=seq_get(args, 0)),
128
+ path=exp.Column(this=seq_get(args, 1)),
129
+ ),
130
+ expression=exp.Literal(this="{%", is_string=True),
131
+ ),
132
+ true=exp.ArraySize(
133
+ this=exp.Anonymous(
134
+ this="from_json",
135
+ expressions=[
136
+ local_expression.GetJsonObject(
137
+ this=exp.Column(this=seq_get(args, 0)),
138
+ path=exp.Column(this=seq_get(args, 1)),
139
+ ),
140
+ exp.Literal(this="map<string,string>", is_string=True),
141
+ ],
142
+ )
143
+ ),
144
+ ),
145
+ exp.If(
146
+ this=exp.Like(
147
+ this=local_expression.GetJsonObject(
148
+ this=exp.Column(this=seq_get(args, 0)),
149
+ path=exp.Column(this=seq_get(args, 1)),
150
+ ),
151
+ expression=exp.Literal(this="[%", is_string=True),
152
+ ),
153
+ true=exp.ArraySize(
154
+ this=exp.Anonymous(
155
+ this="from_json",
156
+ expressions=[
157
+ local_expression.GetJsonObject(
158
+ this=exp.Column(this=seq_get(args, 0)),
159
+ path=exp.Column(this=seq_get(args, 1)),
160
+ ),
161
+ exp.Literal(this="array<string>", is_string=True),
162
+ ],
163
+ )
164
+ ),
165
+ ),
166
+ exp.If(
167
+ this=exp.Not(
168
+ this=exp.Is(
169
+ this=local_expression.GetJsonObject(
170
+ this=exp.Column(this=seq_get(args, 0)),
171
+ path=exp.Column(this=seq_get(args, 1)),
172
+ ),
173
+ expression=exp.Null(),
174
+ )
175
+ ),
176
+ true=exp.Literal(this="0", is_string=False),
177
+ ),
178
+ ],
179
+ default=exp.Null(),
180
+ )
181
+
182
+
183
+ class Presto(presto):
184
+
185
+ class Parser(presto.Parser):
186
+ VALUES_FOLLOWED_BY_PAREN = False
187
+
188
+ FUNCTIONS = {
189
+ **presto.Parser.FUNCTIONS,
190
+ "APPROX_PERCENTILE": _build_approx_percentile,
191
+ "STRPOS": _build_str_position,
192
+ "ANY_KEYS_MATCH": _build_any_keys_match,
193
+ "ARRAY_AVERAGE": _build_array_average,
194
+ "JSON_SIZE": _build_json_size,
195
+ "FORMAT_DATETIME": local_expression.DateFormat.from_arg_list,
196
+ }
197
+
198
+ class Tokenizer(presto.Tokenizer):
199
+ KEYWORDS = {
200
+ **presto.Tokenizer.KEYWORDS,
201
+ "JSON": TokenType.TEXT,
202
+ }