databricks-labs-lakebridge 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/__init__.py +3 -0
- databricks/labs/__init__.py +3 -0
- databricks/labs/lakebridge/__about__.py +2 -0
- databricks/labs/lakebridge/__init__.py +11 -0
- databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks/labs/lakebridge/base_install.py +12 -0
- databricks/labs/lakebridge/cli.py +449 -0
- databricks/labs/lakebridge/config.py +192 -0
- databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks/labs/lakebridge/install.py +849 -0
- databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks/labs/lakebridge/lineage.py +42 -0
- databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks/labs/lakebridge/uninstall.py +28 -0
- databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
- databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
- databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
- databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
- docs/lakebridge/src/components/Button.tsx +81 -0
- docs/lakebridge/src/css/custom.css +167 -0
- docs/lakebridge/src/css/table.css +20 -0
- docs/lakebridge/src/pages/index.tsx +57 -0
- docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,138 @@
|
|
1
|
+
import logging
|
2
|
+
from collections.abc import Iterable
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from sqlglot import expressions as exp, Dialect
|
6
|
+
from sqlglot import parse
|
7
|
+
from sqlglot.errors import ErrorLevel, ParseError, TokenError, UnsupportedError
|
8
|
+
from sqlglot.expressions import Expression, Select
|
9
|
+
from sqlglot.optimizer.scope import Scope, build_scope
|
10
|
+
|
11
|
+
from databricks.labs.lakebridge.transpiler.transpile_status import TranspileError, ErrorKind, ErrorSeverity
|
12
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.local_expression import AliasInfo
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
def check_for_unsupported_lca(
|
18
|
+
from_dialect: Dialect,
|
19
|
+
source_sql: str,
|
20
|
+
file_path: Path,
|
21
|
+
) -> TranspileError | None:
|
22
|
+
"""
|
23
|
+
Check for presence of unsupported lateral column aliases in window expressions and where clauses
|
24
|
+
:return: An error if found
|
25
|
+
"""
|
26
|
+
try:
|
27
|
+
all_parsed_expressions: Iterable[Expression | None] = parse(
|
28
|
+
source_sql, read=from_dialect, error_level=ErrorLevel.RAISE
|
29
|
+
)
|
30
|
+
root_expressions: Iterable[Expression] = [pe for pe in all_parsed_expressions if pe is not None]
|
31
|
+
except (ParseError, TokenError, UnsupportedError) as e:
|
32
|
+
logger.warning(f"Error while preprocessing {file_path}: {e}")
|
33
|
+
return None
|
34
|
+
|
35
|
+
aliases_in_where = set()
|
36
|
+
aliases_in_window = set()
|
37
|
+
|
38
|
+
for expr in root_expressions:
|
39
|
+
for select in expr.find_all(exp.Select, bfs=False):
|
40
|
+
alias_info = _find_aliases_in_select(select)
|
41
|
+
aliases_in_where.update(_find_invalid_lca_in_where(select, alias_info))
|
42
|
+
aliases_in_window.update(_find_invalid_lca_in_window(select, alias_info))
|
43
|
+
|
44
|
+
if not (aliases_in_where or aliases_in_window):
|
45
|
+
return None
|
46
|
+
|
47
|
+
err_messages = [f"Unsupported operation found in file {file_path}. Needs manual review of transpiled query."]
|
48
|
+
if aliases_in_where:
|
49
|
+
err_messages.append(f"Lateral column aliases `{', '.join(aliases_in_where)}` found in where clause.")
|
50
|
+
|
51
|
+
if aliases_in_window:
|
52
|
+
err_messages.append(f"Lateral column aliases `{', '.join(aliases_in_window)}` found in window expressions.")
|
53
|
+
|
54
|
+
return TranspileError("UNSUPPORTED_LCA", ErrorKind.ANALYSIS, ErrorSeverity.ERROR, file_path, " ".join(err_messages))
|
55
|
+
|
56
|
+
|
57
|
+
def unalias_lca_in_select(expr: exp.Expression) -> exp.Expression:
|
58
|
+
if not isinstance(expr, exp.Select):
|
59
|
+
return expr
|
60
|
+
|
61
|
+
root_select: Scope | None = build_scope(expr)
|
62
|
+
if not root_select:
|
63
|
+
return expr
|
64
|
+
|
65
|
+
# We won't search inside nested selects, they will be visited separately
|
66
|
+
nested_selects = {*root_select.derived_tables, *root_select.subqueries}
|
67
|
+
alias_info = _find_aliases_in_select(expr)
|
68
|
+
where_ast: Expression | None = expr.args.get("where")
|
69
|
+
if where_ast:
|
70
|
+
for column in where_ast.walk(prune=lambda n: n in nested_selects):
|
71
|
+
_replace_aliases(column, alias_info)
|
72
|
+
for window in _find_windows_in_select(expr):
|
73
|
+
for column in window.walk():
|
74
|
+
_replace_aliases(column, alias_info)
|
75
|
+
return expr
|
76
|
+
|
77
|
+
|
78
|
+
def _replace_aliases(column: Expression, alias_info: dict[str, AliasInfo]):
|
79
|
+
if (
|
80
|
+
isinstance(column, exp.Column)
|
81
|
+
and column.name in alias_info
|
82
|
+
and not alias_info[column.name].is_same_name_as_column
|
83
|
+
):
|
84
|
+
unaliased_expr = alias_info[column.name].expression
|
85
|
+
column.replace(unaliased_expr)
|
86
|
+
for col in unaliased_expr.walk():
|
87
|
+
_replace_aliases(col, alias_info)
|
88
|
+
|
89
|
+
|
90
|
+
def _find_windows_in_select(select: Select) -> list[exp.Window]:
|
91
|
+
window_expressions = []
|
92
|
+
for expr in select.expressions:
|
93
|
+
window_expr = expr.find(exp.Window)
|
94
|
+
if window_expr:
|
95
|
+
window_expressions.append(window_expr)
|
96
|
+
return window_expressions
|
97
|
+
|
98
|
+
|
99
|
+
def _find_aliases_in_select(select_expr: Select) -> dict[str, AliasInfo]:
|
100
|
+
aliases = {}
|
101
|
+
for expr in select_expr.expressions:
|
102
|
+
if isinstance(expr, exp.Alias):
|
103
|
+
alias_name = expr.output_name
|
104
|
+
is_same_name_as_column = False
|
105
|
+
for column in expr.find_all(exp.Column):
|
106
|
+
if column.name == alias_name:
|
107
|
+
is_same_name_as_column = True
|
108
|
+
break
|
109
|
+
aliases[alias_name] = AliasInfo(alias_name, expr.unalias().copy(), is_same_name_as_column)
|
110
|
+
return aliases
|
111
|
+
|
112
|
+
|
113
|
+
def _find_invalid_lca_in_where(
|
114
|
+
select_expr: Select,
|
115
|
+
aliases: dict[str, AliasInfo],
|
116
|
+
) -> set[str]:
|
117
|
+
aliases_in_where = set()
|
118
|
+
where_ast: Expression | None = select_expr.args.get("where")
|
119
|
+
if where_ast:
|
120
|
+
for column in where_ast.find_all(exp.Column):
|
121
|
+
if column.name in aliases and not aliases[column.name].is_same_name_as_column:
|
122
|
+
aliases_in_where.add(column.name)
|
123
|
+
|
124
|
+
return aliases_in_where
|
125
|
+
|
126
|
+
|
127
|
+
def _find_invalid_lca_in_window(
|
128
|
+
select_expr: Select,
|
129
|
+
aliases: dict[str, AliasInfo],
|
130
|
+
) -> set[str]:
|
131
|
+
aliases_in_window = set()
|
132
|
+
windows = _find_windows_in_select(select_expr)
|
133
|
+
for window in windows:
|
134
|
+
for column in window.find_all(exp.Column):
|
135
|
+
if column.name in aliases and not aliases[column.name].is_same_name_as_column:
|
136
|
+
aliases_in_window.add(column.name)
|
137
|
+
|
138
|
+
return aliases_in_window
|
@@ -0,0 +1,197 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
|
3
|
+
from sqlglot import expressions as exp
|
4
|
+
from sqlglot.expressions import AggFunc, Condition, Expression, Func
|
5
|
+
|
6
|
+
|
7
|
+
class NthValue(AggFunc):
|
8
|
+
arg_types = {"this": True, "offset": False}
|
9
|
+
|
10
|
+
|
11
|
+
class Parameter(Expression):
|
12
|
+
arg_types = {"this": True, "wrapped": False, "suffix": False}
|
13
|
+
|
14
|
+
|
15
|
+
class Collate(Func):
|
16
|
+
arg_types = {"this": True, "expressions": True}
|
17
|
+
|
18
|
+
|
19
|
+
class Bracket(Condition):
|
20
|
+
arg_types = {"this": True, "expressions": True}
|
21
|
+
|
22
|
+
|
23
|
+
class Split(Func):
|
24
|
+
"""
|
25
|
+
Redefined Split(sqlglot/expression) class with expression: False to handle default delimiter
|
26
|
+
Please refer the test case `test_strtok_to_array` -> `select STRTOK_TO_ARRAY('my text is divided')`
|
27
|
+
"""
|
28
|
+
|
29
|
+
arg_types = {"this": True, "expression": False, "limit": False}
|
30
|
+
|
31
|
+
|
32
|
+
class MakeDate(Func):
|
33
|
+
arg_types = {"this": True, "expression": False, "zone": False}
|
34
|
+
|
35
|
+
|
36
|
+
class ConvertTimeZone(Func):
|
37
|
+
arg_types = {"srcTZ": True, "tgtTZ": True, "this": False}
|
38
|
+
|
39
|
+
|
40
|
+
class TryToDate(Func):
|
41
|
+
arg_types = {"this": True, "format": False}
|
42
|
+
|
43
|
+
|
44
|
+
class TryToTimestamp(Func):
|
45
|
+
arg_types = {"this": True, "format": False}
|
46
|
+
|
47
|
+
|
48
|
+
class SplitPart(Func):
|
49
|
+
arg_types = {"this": True, "expression": False, "partNum": False}
|
50
|
+
|
51
|
+
|
52
|
+
class StrTok(Func):
|
53
|
+
arg_types = {"this": True, "expression": False, "partNum": False}
|
54
|
+
|
55
|
+
|
56
|
+
class TryToNumber(Func):
|
57
|
+
arg_types = {"this": True, "expression": False, "precision": False, "scale": False}
|
58
|
+
|
59
|
+
_sql_names = ["TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"]
|
60
|
+
|
61
|
+
|
62
|
+
class DateFormat(Func):
|
63
|
+
arg_types = {"this": True, "expression": False}
|
64
|
+
|
65
|
+
|
66
|
+
class IsInteger(Func):
|
67
|
+
pass
|
68
|
+
|
69
|
+
|
70
|
+
class JsonExtractPathText(Func):
|
71
|
+
arg_types = {"this": True, "path_name": True}
|
72
|
+
|
73
|
+
|
74
|
+
class BitOr(AggFunc):
|
75
|
+
pass
|
76
|
+
|
77
|
+
|
78
|
+
class ArrayConstructCompact(Func):
|
79
|
+
arg_types = {"expressions": False}
|
80
|
+
|
81
|
+
is_var_len_args = True
|
82
|
+
|
83
|
+
|
84
|
+
class ArrayIntersection(Func):
|
85
|
+
arg_types = {"this": True, "expression": True}
|
86
|
+
|
87
|
+
|
88
|
+
class ArraySlice(Func):
|
89
|
+
arg_types = {"this": True, "from": True, "to": True}
|
90
|
+
|
91
|
+
|
92
|
+
class ObjectKeys(Func):
|
93
|
+
arg_types = {"this": True}
|
94
|
+
|
95
|
+
|
96
|
+
class ToBoolean(Func):
|
97
|
+
arg_types = {"this": True, "raise_error": False}
|
98
|
+
|
99
|
+
|
100
|
+
class ToDouble(Func):
|
101
|
+
pass
|
102
|
+
|
103
|
+
|
104
|
+
class ToObject(Func):
|
105
|
+
pass
|
106
|
+
|
107
|
+
|
108
|
+
class ToNumber(Func):
|
109
|
+
arg_types = {"this": True, "expression": False, "precision": False, "scale": False}
|
110
|
+
|
111
|
+
_sql_names = ["TO_DECIMAL", "TO_NUMBER", "TO_NUMERIC"]
|
112
|
+
|
113
|
+
|
114
|
+
class TimestampFromParts(Func):
|
115
|
+
arg_types = {
|
116
|
+
"this": True,
|
117
|
+
"expression": True,
|
118
|
+
"day": True,
|
119
|
+
"hour": True,
|
120
|
+
"min": True,
|
121
|
+
"sec": True,
|
122
|
+
"nanosec": False,
|
123
|
+
"Zone": False,
|
124
|
+
}
|
125
|
+
|
126
|
+
|
127
|
+
class ToVariant(Func):
|
128
|
+
pass
|
129
|
+
|
130
|
+
|
131
|
+
class UUID(Func):
|
132
|
+
arg_types = {"this": False, "name": False}
|
133
|
+
|
134
|
+
|
135
|
+
class DateTrunc(Func):
|
136
|
+
arg_types = {"unit": False, "this": True, "zone": False}
|
137
|
+
|
138
|
+
|
139
|
+
class Median(Func):
|
140
|
+
arg_types = {"this": True}
|
141
|
+
|
142
|
+
|
143
|
+
class CumeDist(Func):
|
144
|
+
arg_types = {"this": False}
|
145
|
+
|
146
|
+
|
147
|
+
class DenseRank(Func):
|
148
|
+
arg_types = {"this": False}
|
149
|
+
|
150
|
+
|
151
|
+
class Rank(Func):
|
152
|
+
arg_types = {"this": False}
|
153
|
+
|
154
|
+
|
155
|
+
class PercentRank(Func):
|
156
|
+
arg_types = {"this": False}
|
157
|
+
|
158
|
+
|
159
|
+
class Ntile(Func):
|
160
|
+
arg_types = {"this": True, "is_string": False}
|
161
|
+
|
162
|
+
|
163
|
+
class ToArray(Func):
|
164
|
+
arg_types = {"this": True, "expression": False}
|
165
|
+
|
166
|
+
|
167
|
+
@dataclass
|
168
|
+
class WithinGroupParams:
|
169
|
+
agg_col: exp.Column
|
170
|
+
order_cols: list[tuple[exp.Column, bool]] # List of (column, is ascending)
|
171
|
+
|
172
|
+
|
173
|
+
@dataclass
|
174
|
+
class AliasInfo:
|
175
|
+
name: str
|
176
|
+
expression: exp.Expression
|
177
|
+
is_same_name_as_column: bool
|
178
|
+
|
179
|
+
|
180
|
+
class MapKeys(Func):
|
181
|
+
arg_types = {"this": True}
|
182
|
+
|
183
|
+
|
184
|
+
class ArrayExists(Func):
|
185
|
+
arg_types = {"this": True, "expression": True}
|
186
|
+
|
187
|
+
|
188
|
+
class Locate(Func):
|
189
|
+
arg_types = {"substring": True, "this": True, "position": False}
|
190
|
+
|
191
|
+
|
192
|
+
class NamedStruct(Func):
|
193
|
+
arg_types = {"expressions": True}
|
194
|
+
|
195
|
+
|
196
|
+
class GetJsonObject(Func):
|
197
|
+
arg_types = {"this": True, "path": True}
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from sqlglot.dialects.oracle import Oracle as Orc
|
2
|
+
from sqlglot.tokens import TokenType
|
3
|
+
|
4
|
+
|
5
|
+
class Oracle(Orc):
|
6
|
+
# Instantiate Oracle Dialect
|
7
|
+
oracle = Orc()
|
8
|
+
|
9
|
+
class Tokenizer(Orc.Tokenizer):
|
10
|
+
KEYWORDS = {
|
11
|
+
**Orc.Tokenizer.KEYWORDS,
|
12
|
+
'LONG': TokenType.TEXT,
|
13
|
+
'NCLOB': TokenType.TEXT,
|
14
|
+
'ROWID': TokenType.TEXT,
|
15
|
+
'UROWID': TokenType.TEXT,
|
16
|
+
'ANYTYPE': TokenType.TEXT,
|
17
|
+
'ANYDATA': TokenType.TEXT,
|
18
|
+
'ANYDATASET': TokenType.TEXT,
|
19
|
+
'XMLTYPE': TokenType.TEXT,
|
20
|
+
'SDO_GEOMETRY': TokenType.TEXT,
|
21
|
+
'SDO_TOPO_GEOMETRY': TokenType.TEXT,
|
22
|
+
'SDO_GEORASTER': TokenType.TEXT,
|
23
|
+
}
|
@@ -0,0 +1,202 @@
|
|
1
|
+
import logging
|
2
|
+
from sqlglot.dialects.presto import Presto as presto
|
3
|
+
from sqlglot import exp
|
4
|
+
from sqlglot.helper import seq_get
|
5
|
+
from sqlglot.errors import ParseError
|
6
|
+
from sqlglot.tokens import TokenType
|
7
|
+
|
8
|
+
from databricks.labs.lakebridge.transpiler.sqlglot import local_expression
|
9
|
+
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
def _build_approx_percentile(args: list) -> exp.Expression:
|
15
|
+
if len(args) == 4:
|
16
|
+
arg3 = seq_get(args, 3)
|
17
|
+
try:
|
18
|
+
number = float(arg3.this) if arg3 is not None else 0
|
19
|
+
return exp.ApproxQuantile(
|
20
|
+
this=seq_get(args, 0),
|
21
|
+
weight=seq_get(args, 1),
|
22
|
+
quantile=seq_get(args, 2),
|
23
|
+
accuracy=exp.Literal(this=f'{int((1/number) * 100)} ', is_string=False),
|
24
|
+
)
|
25
|
+
except ValueError as exc:
|
26
|
+
raise ParseError(f"Expected a string representation of a number for argument 2, but got {arg3}") from exc
|
27
|
+
if len(args) == 3:
|
28
|
+
arg2 = seq_get(args, 2)
|
29
|
+
try:
|
30
|
+
number = float(arg2.this) if arg2 is not None else 0
|
31
|
+
return exp.ApproxQuantile(
|
32
|
+
this=seq_get(args, 0),
|
33
|
+
quantile=seq_get(args, 1),
|
34
|
+
accuracy=exp.Literal(this=f'{int((1/number) * 100)}', is_string=False),
|
35
|
+
)
|
36
|
+
except ValueError as exc:
|
37
|
+
raise ParseError(f"Expected a string representation of a number for argument 2, but got {arg2}") from exc
|
38
|
+
return exp.ApproxQuantile.from_arg_list(args)
|
39
|
+
|
40
|
+
|
41
|
+
def _build_any_keys_match(args: list) -> local_expression.ArrayExists:
|
42
|
+
return local_expression.ArrayExists(
|
43
|
+
this=local_expression.MapKeys(this=seq_get(args, 0)), expression=seq_get(args, 1)
|
44
|
+
)
|
45
|
+
|
46
|
+
|
47
|
+
def _build_str_position(args: list) -> local_expression.Locate:
|
48
|
+
# TODO the 3rd param in presto strpos and databricks locate has different implementation.
|
49
|
+
# For now we haven't implemented the logic same as presto for 3rd param.
|
50
|
+
# Users should be vigilant when using 3 param function in presto strpos.
|
51
|
+
if len(args) == 3:
|
52
|
+
msg = (
|
53
|
+
"*Warning:: The third parameter in Presto's `strpos` function and Databricks' `locate` function "
|
54
|
+
"have different implementations. Please exercise caution when using the three-parameter version "
|
55
|
+
"of the `strpos` function in Presto."
|
56
|
+
)
|
57
|
+
logger.warning(msg)
|
58
|
+
return local_expression.Locate(substring=seq_get(args, 1), this=seq_get(args, 0), position=seq_get(args, 2))
|
59
|
+
return local_expression.Locate(substring=seq_get(args, 1), this=seq_get(args, 0))
|
60
|
+
|
61
|
+
|
62
|
+
def _build_array_average(args: list) -> exp.Reduce:
|
63
|
+
return exp.Reduce(
|
64
|
+
this=exp.ArrayFilter(
|
65
|
+
this=seq_get(args, 0),
|
66
|
+
expression=exp.Lambda(
|
67
|
+
this=exp.Not(this=exp.Is(this=exp.Identifier(this="x", quoted=False), expression=exp.Null())),
|
68
|
+
expressions=[exp.Identifier(this="x", quoted=False)],
|
69
|
+
),
|
70
|
+
),
|
71
|
+
initial=local_expression.NamedStruct(
|
72
|
+
expressions=[
|
73
|
+
exp.Literal(this="sum", is_string=True),
|
74
|
+
exp.Cast(this=exp.Literal(this="0", is_string=False), to=exp.DataType(this="DOUBLE")),
|
75
|
+
exp.Literal(this="cnt", is_string=True),
|
76
|
+
exp.Literal(this="0", is_string=False),
|
77
|
+
],
|
78
|
+
),
|
79
|
+
merge=exp.Lambda(
|
80
|
+
this=local_expression.NamedStruct(
|
81
|
+
expressions=[
|
82
|
+
exp.Literal(this="sum", is_string=True),
|
83
|
+
exp.Add(
|
84
|
+
this=exp.Dot(
|
85
|
+
this=exp.Identifier(this="acc", quoted=False),
|
86
|
+
expression=exp.Identifier(this="sum", quoted=False),
|
87
|
+
),
|
88
|
+
expression=exp.Identifier(this="x", quoted=False),
|
89
|
+
),
|
90
|
+
exp.Literal(this="cnt", is_string=True),
|
91
|
+
exp.Add(
|
92
|
+
this=exp.Dot(
|
93
|
+
this=exp.Identifier(this="acc", quoted=False),
|
94
|
+
expression=exp.Identifier(this="cnt", quoted=False),
|
95
|
+
),
|
96
|
+
expression=exp.Literal(this="1", is_string=False),
|
97
|
+
),
|
98
|
+
],
|
99
|
+
),
|
100
|
+
expressions=[exp.Identifier(this="acc", quoted=False), exp.Identifier(this="x", quoted=False)],
|
101
|
+
),
|
102
|
+
finish=exp.Lambda(
|
103
|
+
this=exp.Anonymous(
|
104
|
+
this="try_divide",
|
105
|
+
expressions=[
|
106
|
+
exp.Dot(
|
107
|
+
this=exp.Identifier(this="acc", quoted=False),
|
108
|
+
expression=exp.Identifier(this="sum", quoted=False),
|
109
|
+
),
|
110
|
+
exp.Dot(
|
111
|
+
this=exp.Identifier(this="acc", quoted=False),
|
112
|
+
expression=exp.Identifier(this="cnt", quoted=False),
|
113
|
+
),
|
114
|
+
],
|
115
|
+
),
|
116
|
+
expressions=[exp.Identifier(this="acc", quoted=False)],
|
117
|
+
),
|
118
|
+
)
|
119
|
+
|
120
|
+
|
121
|
+
def _build_json_size(args: list):
|
122
|
+
return exp.Case(
|
123
|
+
ifs=[
|
124
|
+
exp.If(
|
125
|
+
this=exp.Like(
|
126
|
+
this=local_expression.GetJsonObject(
|
127
|
+
this=exp.Column(this=seq_get(args, 0)),
|
128
|
+
path=exp.Column(this=seq_get(args, 1)),
|
129
|
+
),
|
130
|
+
expression=exp.Literal(this="{%", is_string=True),
|
131
|
+
),
|
132
|
+
true=exp.ArraySize(
|
133
|
+
this=exp.Anonymous(
|
134
|
+
this="from_json",
|
135
|
+
expressions=[
|
136
|
+
local_expression.GetJsonObject(
|
137
|
+
this=exp.Column(this=seq_get(args, 0)),
|
138
|
+
path=exp.Column(this=seq_get(args, 1)),
|
139
|
+
),
|
140
|
+
exp.Literal(this="map<string,string>", is_string=True),
|
141
|
+
],
|
142
|
+
)
|
143
|
+
),
|
144
|
+
),
|
145
|
+
exp.If(
|
146
|
+
this=exp.Like(
|
147
|
+
this=local_expression.GetJsonObject(
|
148
|
+
this=exp.Column(this=seq_get(args, 0)),
|
149
|
+
path=exp.Column(this=seq_get(args, 1)),
|
150
|
+
),
|
151
|
+
expression=exp.Literal(this="[%", is_string=True),
|
152
|
+
),
|
153
|
+
true=exp.ArraySize(
|
154
|
+
this=exp.Anonymous(
|
155
|
+
this="from_json",
|
156
|
+
expressions=[
|
157
|
+
local_expression.GetJsonObject(
|
158
|
+
this=exp.Column(this=seq_get(args, 0)),
|
159
|
+
path=exp.Column(this=seq_get(args, 1)),
|
160
|
+
),
|
161
|
+
exp.Literal(this="array<string>", is_string=True),
|
162
|
+
],
|
163
|
+
)
|
164
|
+
),
|
165
|
+
),
|
166
|
+
exp.If(
|
167
|
+
this=exp.Not(
|
168
|
+
this=exp.Is(
|
169
|
+
this=local_expression.GetJsonObject(
|
170
|
+
this=exp.Column(this=seq_get(args, 0)),
|
171
|
+
path=exp.Column(this=seq_get(args, 1)),
|
172
|
+
),
|
173
|
+
expression=exp.Null(),
|
174
|
+
)
|
175
|
+
),
|
176
|
+
true=exp.Literal(this="0", is_string=False),
|
177
|
+
),
|
178
|
+
],
|
179
|
+
default=exp.Null(),
|
180
|
+
)
|
181
|
+
|
182
|
+
|
183
|
+
class Presto(presto):
|
184
|
+
|
185
|
+
class Parser(presto.Parser):
|
186
|
+
VALUES_FOLLOWED_BY_PAREN = False
|
187
|
+
|
188
|
+
FUNCTIONS = {
|
189
|
+
**presto.Parser.FUNCTIONS,
|
190
|
+
"APPROX_PERCENTILE": _build_approx_percentile,
|
191
|
+
"STRPOS": _build_str_position,
|
192
|
+
"ANY_KEYS_MATCH": _build_any_keys_match,
|
193
|
+
"ARRAY_AVERAGE": _build_array_average,
|
194
|
+
"JSON_SIZE": _build_json_size,
|
195
|
+
"FORMAT_DATETIME": local_expression.DateFormat.from_arg_list,
|
196
|
+
}
|
197
|
+
|
198
|
+
class Tokenizer(presto.Tokenizer):
|
199
|
+
KEYWORDS = {
|
200
|
+
**presto.Tokenizer.KEYWORDS,
|
201
|
+
"JSON": TokenType.TEXT,
|
202
|
+
}
|