databricks-labs-lakebridge 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks/__init__.py +3 -0
- databricks/labs/__init__.py +3 -0
- databricks/labs/lakebridge/__about__.py +2 -0
- databricks/labs/lakebridge/__init__.py +11 -0
- databricks/labs/lakebridge/assessments/configure_assessment.py +194 -0
- databricks/labs/lakebridge/assessments/pipeline.py +188 -0
- databricks/labs/lakebridge/assessments/profiler_config.py +30 -0
- databricks/labs/lakebridge/base_install.py +12 -0
- databricks/labs/lakebridge/cli.py +449 -0
- databricks/labs/lakebridge/config.py +192 -0
- databricks/labs/lakebridge/connections/__init__.py +0 -0
- databricks/labs/lakebridge/connections/credential_manager.py +89 -0
- databricks/labs/lakebridge/connections/database_manager.py +98 -0
- databricks/labs/lakebridge/connections/env_getter.py +13 -0
- databricks/labs/lakebridge/contexts/__init__.py +0 -0
- databricks/labs/lakebridge/contexts/application.py +133 -0
- databricks/labs/lakebridge/coverage/__init__.py +0 -0
- databricks/labs/lakebridge/coverage/commons.py +223 -0
- databricks/labs/lakebridge/coverage/lakebridge_snow_transpilation_coverage.py +29 -0
- databricks/labs/lakebridge/coverage/local_report.py +9 -0
- databricks/labs/lakebridge/coverage/sqlglot_snow_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/coverage/sqlglot_tsql_transpilation_coverage.py +5 -0
- databricks/labs/lakebridge/deployment/__init__.py +0 -0
- databricks/labs/lakebridge/deployment/configurator.py +199 -0
- databricks/labs/lakebridge/deployment/dashboard.py +140 -0
- databricks/labs/lakebridge/deployment/installation.py +125 -0
- databricks/labs/lakebridge/deployment/job.py +147 -0
- databricks/labs/lakebridge/deployment/recon.py +145 -0
- databricks/labs/lakebridge/deployment/table.py +30 -0
- databricks/labs/lakebridge/deployment/upgrade_common.py +124 -0
- databricks/labs/lakebridge/discovery/table.py +36 -0
- databricks/labs/lakebridge/discovery/table_definition.py +23 -0
- databricks/labs/lakebridge/discovery/tsql_table_definition.py +185 -0
- databricks/labs/lakebridge/errors/exceptions.py +1 -0
- databricks/labs/lakebridge/helpers/__init__.py +0 -0
- databricks/labs/lakebridge/helpers/db_sql.py +24 -0
- databricks/labs/lakebridge/helpers/execution_time.py +20 -0
- databricks/labs/lakebridge/helpers/file_utils.py +64 -0
- databricks/labs/lakebridge/helpers/metastore.py +164 -0
- databricks/labs/lakebridge/helpers/recon_config_utils.py +176 -0
- databricks/labs/lakebridge/helpers/string_utils.py +62 -0
- databricks/labs/lakebridge/helpers/telemetry_utils.py +13 -0
- databricks/labs/lakebridge/helpers/validation.py +101 -0
- databricks/labs/lakebridge/install.py +849 -0
- databricks/labs/lakebridge/intermediate/__init__.py +0 -0
- databricks/labs/lakebridge/intermediate/dag.py +88 -0
- databricks/labs/lakebridge/intermediate/engine_adapter.py +0 -0
- databricks/labs/lakebridge/intermediate/root_tables.py +44 -0
- databricks/labs/lakebridge/jvmproxy.py +56 -0
- databricks/labs/lakebridge/lineage.py +42 -0
- databricks/labs/lakebridge/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/compare.py +414 -0
- databricks/labs/lakebridge/reconcile/connectors/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/connectors/data_source.py +72 -0
- databricks/labs/lakebridge/reconcile/connectors/databricks.py +87 -0
- databricks/labs/lakebridge/reconcile/connectors/jdbc_reader.py +41 -0
- databricks/labs/lakebridge/reconcile/connectors/oracle.py +108 -0
- databricks/labs/lakebridge/reconcile/connectors/secrets.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/snowflake.py +173 -0
- databricks/labs/lakebridge/reconcile/connectors/source_adapter.py +30 -0
- databricks/labs/lakebridge/reconcile/connectors/sql_server.py +132 -0
- databricks/labs/lakebridge/reconcile/constants.py +37 -0
- databricks/labs/lakebridge/reconcile/exception.py +42 -0
- databricks/labs/lakebridge/reconcile/execute.py +920 -0
- databricks/labs/lakebridge/reconcile/query_builder/__init__.py +0 -0
- databricks/labs/lakebridge/reconcile/query_builder/aggregate_query.py +293 -0
- databricks/labs/lakebridge/reconcile/query_builder/base.py +138 -0
- databricks/labs/lakebridge/reconcile/query_builder/count_query.py +33 -0
- databricks/labs/lakebridge/reconcile/query_builder/expression_generator.py +292 -0
- databricks/labs/lakebridge/reconcile/query_builder/hash_query.py +91 -0
- databricks/labs/lakebridge/reconcile/query_builder/sampling_query.py +123 -0
- databricks/labs/lakebridge/reconcile/query_builder/threshold_query.py +231 -0
- databricks/labs/lakebridge/reconcile/recon_capture.py +635 -0
- databricks/labs/lakebridge/reconcile/recon_config.py +363 -0
- databricks/labs/lakebridge/reconcile/recon_output_config.py +85 -0
- databricks/labs/lakebridge/reconcile/runner.py +97 -0
- databricks/labs/lakebridge/reconcile/sampler.py +239 -0
- databricks/labs/lakebridge/reconcile/schema_compare.py +126 -0
- databricks/labs/lakebridge/resources/__init__.py +0 -0
- databricks/labs/lakebridge/resources/config/credentials.yml +33 -0
- databricks/labs/lakebridge/resources/reconcile/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/00_0_aggregate_recon_header.md +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_1_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/01_2_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_1_source_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/02_2_target_table.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/04_0_aggregate_summary_table.sql +46 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/05_0_aggregate_recon_drilldown_header.md +2 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_0_recon_id.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_1_category.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/06_2_aggregate_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/07_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/08_0_aggregate_details_table.sql +92 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/09_0_aggregate_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/10_0_aggr_mismatched_records.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_0_aggr_missing_in_databricks.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/11_1_aggr_missing_in_source.sql +19 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/aggregate_reconciliation_metrics/dashboard.yml +365 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/00_0_recon_main.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_0_recon_id.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_1_report_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/01_2_executed_by.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_0_source_type.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_1_source_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/02_2_target_table.filter.yml +6 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/03_0_started_at.filter.yml +5 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/05_0_summary_table.sql +38 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/06_0_schema_comparison_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/07_0_schema_details_table.sql +42 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/08_0_drill_down_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_0_recon_id.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/09_1_category.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_0_target_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/10_1_source_table.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/11_0_recon_details_pivot.sql +40 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/12_0_daily_data_validation_issue_header.md +3 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/13_0_success_fail_.filter.yml +4 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/14_0_failed_recon_ids.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_0_total_failed_runs.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_1_failed_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/15_2_successful_targets.sql +10 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/16_0_missing_mismatch_header.md +1 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_0_mismatched_records.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/17_1_threshold_mismatches.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_0_missing_in_databricks.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/18_1_missing_in_source.sql +14 -0
- databricks/labs/lakebridge/resources/reconcile/dashboards/reconciliation_metrics/dashboard.yml +545 -0
- databricks/labs/lakebridge/resources/reconcile/queries/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/__init__.py +0 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_metrics.sql +15 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/aggregate_rules.sql +6 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/details.sql +7 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/main.sql +24 -0
- databricks/labs/lakebridge/resources/reconcile/queries/installation/metrics.sql +21 -0
- databricks/labs/lakebridge/transpiler/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/execute.py +423 -0
- databricks/labs/lakebridge/transpiler/lsp/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/lsp/lsp_engine.py +564 -0
- databricks/labs/lakebridge/transpiler/sqlglot/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/dialect_utils.py +30 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/generator/databricks.py +771 -0
- databricks/labs/lakebridge/transpiler/sqlglot/lca_utils.py +138 -0
- databricks/labs/lakebridge/transpiler/sqlglot/local_expression.py +197 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/__init__.py +0 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/oracle.py +23 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/presto.py +202 -0
- databricks/labs/lakebridge/transpiler/sqlglot/parsers/snowflake.py +535 -0
- databricks/labs/lakebridge/transpiler/sqlglot/sqlglot_engine.py +203 -0
- databricks/labs/lakebridge/transpiler/transpile_engine.py +49 -0
- databricks/labs/lakebridge/transpiler/transpile_status.py +68 -0
- databricks/labs/lakebridge/uninstall.py +28 -0
- databricks/labs/lakebridge/upgrades/v0.4.0_add_main_table_operation_name_column.py +80 -0
- databricks/labs/lakebridge/upgrades/v0.6.0_alter_metrics_datatype.py +51 -0
- databricks_labs_lakebridge-0.10.0.dist-info/METADATA +58 -0
- databricks_labs_lakebridge-0.10.0.dist-info/RECORD +171 -0
- databricks_labs_lakebridge-0.10.0.dist-info/WHEEL +4 -0
- databricks_labs_lakebridge-0.10.0.dist-info/entry_points.txt +2 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/LICENSE +69 -0
- databricks_labs_lakebridge-0.10.0.dist-info/licenses/NOTICE +42 -0
- docs/lakebridge/src/components/Button.tsx +81 -0
- docs/lakebridge/src/css/custom.css +167 -0
- docs/lakebridge/src/css/table.css +20 -0
- docs/lakebridge/src/pages/index.tsx +57 -0
- docs/lakebridge/src/theme/Footer/index.tsx +24 -0
- docs/lakebridge/src/theme/Layout/index.tsx +18 -0
@@ -0,0 +1,771 @@
|
|
1
|
+
import logging
|
2
|
+
import re
|
3
|
+
|
4
|
+
from sqlglot import expressions as exp
|
5
|
+
from sqlglot.dialects.databricks import Databricks as SqlglotDatabricks
|
6
|
+
from sqlglot.dialects.hive import Hive
|
7
|
+
from sqlglot.dialects.dialect import if_sql
|
8
|
+
from sqlglot.dialects.dialect import rename_func
|
9
|
+
from sqlglot.errors import UnsupportedError
|
10
|
+
from sqlglot.helper import apply_index_offset, csv
|
11
|
+
|
12
|
+
from databricks.labs.lakebridge.transpiler.sqlglot import local_expression
|
13
|
+
from databricks.labs.lakebridge.transpiler.sqlglot.lca_utils import unalias_lca_in_select
|
14
|
+
|
15
|
+
# pylint: disable=too-many-public-methods
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
VALID_DATABRICKS_TYPES = {
|
20
|
+
"BIGINT",
|
21
|
+
"BINARY",
|
22
|
+
"BOOLEAN",
|
23
|
+
"DATE",
|
24
|
+
"DECIMAL",
|
25
|
+
"DOUBLE",
|
26
|
+
"FLOAT",
|
27
|
+
"INT",
|
28
|
+
"INTERVAL",
|
29
|
+
"VOID",
|
30
|
+
"SMALLINT",
|
31
|
+
"STRING",
|
32
|
+
"TIMESTAMP",
|
33
|
+
"TINYINT",
|
34
|
+
"ARRAY",
|
35
|
+
"MAP",
|
36
|
+
"STRUCT",
|
37
|
+
}
|
38
|
+
|
39
|
+
PRECISION_CONST = 38
|
40
|
+
SCALE_CONST = 0
|
41
|
+
|
42
|
+
|
43
|
+
def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
|
44
|
+
return self.func("DATE_TRUNC", exp.Literal.string(expression.text("unit").upper()), self.sql(expression.this))
|
45
|
+
|
46
|
+
|
47
|
+
def _parm_sfx(self, expression: local_expression.Parameter) -> str:
|
48
|
+
this = self.sql(expression, "this")
|
49
|
+
this = f"{{{this}}}" if expression.args.get("wrapped") else f"{this}"
|
50
|
+
suffix = self.sql(expression, "suffix")
|
51
|
+
PARAMETER_TOKEN = "$" # noqa: N806 pylint: disable=invalid-name
|
52
|
+
return f"{PARAMETER_TOKEN}{this}{suffix}"
|
53
|
+
|
54
|
+
|
55
|
+
def _lateral_bracket_sql(self, expression: local_expression.Bracket) -> str:
|
56
|
+
"""Overwrites `sqlglot/generator.py` `bracket_sql()` function
|
57
|
+
to convert <TABLE_ALIAS>`[COL_NAME]` to <TABLE_ALIAS>`.COL_NAME`.
|
58
|
+
Example: c[val] ==> c.val
|
59
|
+
"""
|
60
|
+
expressions = apply_index_offset(expression.this, expression.expressions, self.dialect.INDEX_OFFSET)
|
61
|
+
expressions = [self.sql(e.alias_or_name.strip("'")) for e in expressions]
|
62
|
+
# If expression contains space in between encode it in backticks(``):
|
63
|
+
# e.g. ref."ID Number" -> ref.`ID Number`.
|
64
|
+
expressions_sql = ", ".join(f"`{e}`" if " " in e else e for e in expressions)
|
65
|
+
return f"{self.sql(expression, 'this')}:{expressions_sql}"
|
66
|
+
|
67
|
+
|
68
|
+
def _format_create_sql(self, expression: exp.Create) -> str:
|
69
|
+
expression = expression.copy()
|
70
|
+
|
71
|
+
# Remove modifiers in order to simplify the schema. For example, this removes things like "IF NOT EXISTS"
|
72
|
+
# from "CREATE TABLE foo IF NOT EXISTS".
|
73
|
+
args_to_delete = ["temporary", "transient", "external", "exists", "unique", "materialized", "properties"]
|
74
|
+
for arg_to_delete in args_to_delete:
|
75
|
+
if expression.args.get(arg_to_delete):
|
76
|
+
del expression.args[arg_to_delete]
|
77
|
+
|
78
|
+
return self.create_sql(expression)
|
79
|
+
|
80
|
+
|
81
|
+
def _curr_time():
|
82
|
+
return "date_format(current_timestamp(), 'HH:mm:ss')"
|
83
|
+
|
84
|
+
|
85
|
+
def _select_contains_index(expression: exp.Select) -> bool:
|
86
|
+
for expr in expression.expressions:
|
87
|
+
column = expr.unalias() if isinstance(expr, exp.Alias) else expr
|
88
|
+
if column.name == "index":
|
89
|
+
return True
|
90
|
+
return False
|
91
|
+
|
92
|
+
|
93
|
+
def _has_parse_json(expression):
|
94
|
+
if expression.find(exp.ParseJSON):
|
95
|
+
return True
|
96
|
+
_select = expression.find_ancestor(exp.Select)
|
97
|
+
if _select:
|
98
|
+
_from = _select.find(exp.From)
|
99
|
+
if _from:
|
100
|
+
_parse_json = _from.find(exp.ParseJSON)
|
101
|
+
if _parse_json:
|
102
|
+
return True
|
103
|
+
return False
|
104
|
+
|
105
|
+
|
106
|
+
def _generate_function_str(select_contains_index, has_parse_json, generator_expr, alias, is_outer, alias_str):
|
107
|
+
if select_contains_index:
|
108
|
+
generator_function_str = f"POSEXPLODE({generator_expr})"
|
109
|
+
alias_str = f"{' ' + alias.name if isinstance(alias, exp.TableAlias) else ''} AS index, value"
|
110
|
+
elif has_parse_json and is_outer:
|
111
|
+
generator_function_str = f"VARIANT_EXPLODE_OUTER({generator_expr})"
|
112
|
+
elif has_parse_json:
|
113
|
+
generator_function_str = f"VARIANT_EXPLODE({generator_expr})"
|
114
|
+
else:
|
115
|
+
generator_function_str = f"VIEW EXPLODE({generator_expr})"
|
116
|
+
|
117
|
+
return generator_function_str, alias_str
|
118
|
+
|
119
|
+
|
120
|
+
def _generate_lateral_statement(self, select_contains_index, has_parse_json, generator_function_str, alias_str):
|
121
|
+
if select_contains_index:
|
122
|
+
lateral_statement = self.sql(f"LATERAL VIEW OUTER {generator_function_str}{alias_str}")
|
123
|
+
elif has_parse_json:
|
124
|
+
lateral_statement = self.sql(f", LATERAL {generator_function_str}{alias_str}")
|
125
|
+
else:
|
126
|
+
lateral_statement = self.sql(f" LATERAL {generator_function_str}{alias_str}")
|
127
|
+
|
128
|
+
return lateral_statement
|
129
|
+
|
130
|
+
|
131
|
+
def _lateral_view(self: SqlglotDatabricks.Generator, expression: exp.Lateral) -> str:
|
132
|
+
has_parse_json = _has_parse_json(expression)
|
133
|
+
this = expression.args['this']
|
134
|
+
alias = expression.args['alias']
|
135
|
+
alias_str = f" AS {alias.name}" if isinstance(alias, exp.TableAlias) else ""
|
136
|
+
generator_function_str = self.sql(this)
|
137
|
+
is_outer = False
|
138
|
+
select_contains_index = False
|
139
|
+
|
140
|
+
if isinstance(this, exp.Explode):
|
141
|
+
explode_expr = this
|
142
|
+
parent_select = explode_expr.parent_select
|
143
|
+
select_contains_index = _select_contains_index(parent_select) if parent_select else False
|
144
|
+
generator_expr = ""
|
145
|
+
if isinstance(explode_expr.this, exp.Kwarg):
|
146
|
+
generator_expr = self.sql(explode_expr.this, 'expression')
|
147
|
+
if not isinstance(explode_expr.this.expression, exp.ParseJSON):
|
148
|
+
generator_expr = generator_expr.replace("{", "").replace("}", "")
|
149
|
+
for expr in explode_expr.expressions:
|
150
|
+
node = str(expr.this).upper()
|
151
|
+
if node == "PATH":
|
152
|
+
generator_expr += "." + self.sql(expr, 'expression').replace("'", "")
|
153
|
+
if node == "OUTER":
|
154
|
+
is_outer = True
|
155
|
+
|
156
|
+
if not generator_expr:
|
157
|
+
generator_expr = expression.this.this
|
158
|
+
|
159
|
+
generator_function_str, alias_str = _generate_function_str(
|
160
|
+
select_contains_index, has_parse_json, generator_expr, alias, is_outer, alias_str
|
161
|
+
)
|
162
|
+
|
163
|
+
alias_cols = alias.columns if alias else []
|
164
|
+
if len(alias_cols) <= 2:
|
165
|
+
alias_str = f" As {', '.join([item.this for item in alias_cols])}"
|
166
|
+
|
167
|
+
lateral_statement = _generate_lateral_statement(
|
168
|
+
self, select_contains_index, has_parse_json, generator_function_str, alias_str
|
169
|
+
)
|
170
|
+
return lateral_statement
|
171
|
+
|
172
|
+
|
173
|
+
# [TODO] Add more datatype coverage https://docs.databricks.com/sql/language-manual/sql-ref-datatypes.html
|
174
|
+
def _datatype_map(self, expression) -> str:
|
175
|
+
if expression.this in [exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR, exp.DataType.Type.CHAR]:
|
176
|
+
return "STRING"
|
177
|
+
if expression.this in [exp.DataType.Type.TIMESTAMP, exp.DataType.Type.TIMESTAMPLTZ]:
|
178
|
+
return "TIMESTAMP"
|
179
|
+
if expression.this == exp.DataType.Type.BINARY:
|
180
|
+
return "BINARY"
|
181
|
+
if expression.this == exp.DataType.Type.NCHAR:
|
182
|
+
return "STRING"
|
183
|
+
return self.datatype_sql(expression)
|
184
|
+
|
185
|
+
|
186
|
+
def try_to_date(self, expression: local_expression.TryToDate):
|
187
|
+
func = "TRY_TO_TIMESTAMP"
|
188
|
+
time_format = self.sql(expression, "format")
|
189
|
+
if not time_format:
|
190
|
+
time_format = Hive.DATE_FORMAT
|
191
|
+
|
192
|
+
ts_result = self.func(func, expression.this, time_format)
|
193
|
+
return exp.Date(this=ts_result)
|
194
|
+
|
195
|
+
|
196
|
+
def try_to_number(self, expression: local_expression.TryToNumber):
|
197
|
+
func = "TRY_TO_NUMBER"
|
198
|
+
precision = self.sql(expression, "precision")
|
199
|
+
scale = self.sql(expression, "scale")
|
200
|
+
|
201
|
+
if not precision:
|
202
|
+
precision = 38
|
203
|
+
|
204
|
+
if not scale:
|
205
|
+
scale = 0
|
206
|
+
|
207
|
+
func_expr = self.func(func, expression.this)
|
208
|
+
if expression.expression:
|
209
|
+
func_expr = self.func(func, expression.this, expression.expression)
|
210
|
+
else:
|
211
|
+
func_expr = expression.this
|
212
|
+
|
213
|
+
return f"CAST({func_expr} AS DECIMAL({precision}, {scale}))"
|
214
|
+
|
215
|
+
|
216
|
+
def _to_boolean(self: SqlglotDatabricks.Generator, expression: local_expression.ToBoolean) -> str:
|
217
|
+
this = self.sql(expression, "this")
|
218
|
+
logger.debug(f"Converting {this} to Boolean")
|
219
|
+
raise_error = self.sql(expression, "raise_error")
|
220
|
+
raise_error_str = "RAISE_ERROR('Invalid parameter type for TO_BOOLEAN')" if bool(int(raise_error)) else "NULL"
|
221
|
+
transformed = f"""
|
222
|
+
CASE
|
223
|
+
WHEN {this} IS NULL THEN NULL
|
224
|
+
WHEN TYPEOF({this}) = 'boolean' THEN BOOLEAN({this})
|
225
|
+
WHEN TYPEOF({this}) = 'string' THEN
|
226
|
+
CASE
|
227
|
+
WHEN LOWER({this}) IN ('true', 't', 'yes', 'y', 'on', '1') THEN TRUE
|
228
|
+
WHEN LOWER({this}) IN ('false', 'f', 'no', 'n', 'off', '0') THEN FALSE
|
229
|
+
ELSE RAISE_ERROR('Boolean value of x is not recognized by TO_BOOLEAN')
|
230
|
+
END
|
231
|
+
WHEN TRY_CAST({this} AS DOUBLE) IS NOT NULL THEN
|
232
|
+
CASE
|
233
|
+
WHEN ISNAN(CAST({this} AS DOUBLE)) OR CAST({this} AS DOUBLE) = DOUBLE('infinity') THEN
|
234
|
+
RAISE_ERROR('Invalid parameter type for TO_BOOLEAN')
|
235
|
+
ELSE CAST({this} AS DOUBLE) != 0.0
|
236
|
+
END
|
237
|
+
ELSE {raise_error_str}
|
238
|
+
END
|
239
|
+
"""
|
240
|
+
return transformed
|
241
|
+
|
242
|
+
|
243
|
+
def _is_integer(self: SqlglotDatabricks.Generator, expression: local_expression.IsInteger) -> str:
|
244
|
+
this = self.sql(expression, "this")
|
245
|
+
transformed = f"""
|
246
|
+
CASE
|
247
|
+
WHEN {this} IS NULL THEN NULL
|
248
|
+
WHEN {this} RLIKE '^-?[0-9]+$' AND TRY_CAST({this} AS INT) IS NOT NULL THEN TRUE
|
249
|
+
ELSE FALSE
|
250
|
+
END
|
251
|
+
"""
|
252
|
+
return transformed
|
253
|
+
|
254
|
+
|
255
|
+
def _parse_json_extract_path_text(
|
256
|
+
self: SqlglotDatabricks.Generator, expression: local_expression.JsonExtractPathText
|
257
|
+
) -> str:
|
258
|
+
this = self.sql(expression, "this")
|
259
|
+
path_name = expression.args["path_name"]
|
260
|
+
if path_name.is_string:
|
261
|
+
path = f"{self.dialect.QUOTE_START}$.{expression.text('path_name')}{self.dialect.QUOTE_END}"
|
262
|
+
else:
|
263
|
+
path = f"CONCAT('$.', {self.sql(expression, 'path_name')})"
|
264
|
+
return f"GET_JSON_OBJECT({this}, {path})"
|
265
|
+
|
266
|
+
|
267
|
+
def _array_construct_compact(
|
268
|
+
self: SqlglotDatabricks.Generator, expression: local_expression.ArrayConstructCompact
|
269
|
+
) -> str:
|
270
|
+
exclude = "ARRAY(NULL)"
|
271
|
+
array_expr = f"ARRAY({self.expressions(expression, flat=True)})"
|
272
|
+
return f"ARRAY_EXCEPT({array_expr}, {exclude})"
|
273
|
+
|
274
|
+
|
275
|
+
def _array_slice(self: SqlglotDatabricks.Generator, expression: local_expression.ArraySlice) -> str:
|
276
|
+
from_expr = self.sql(expression, "from")
|
277
|
+
# In Databricks: array indices start at 1 in function `slice(array, start, length)`
|
278
|
+
parsed_from_expr = 1 if from_expr == "0" else from_expr
|
279
|
+
|
280
|
+
to_expr = self.sql(expression, "to")
|
281
|
+
# Convert string expression to number and check if it is negative number
|
282
|
+
if int(to_expr) < 0:
|
283
|
+
err_message = "In Databricks: function `slice` length must be greater than or equal to 0"
|
284
|
+
raise UnsupportedError(err_message)
|
285
|
+
|
286
|
+
func = "SLICE"
|
287
|
+
func_expr = self.func(func, expression.this, exp.Literal.number(parsed_from_expr), expression.args["to"])
|
288
|
+
return func_expr
|
289
|
+
|
290
|
+
|
291
|
+
def _to_command(self, expr: exp.Command):
|
292
|
+
this_sql = self.sql(expr, 'this')
|
293
|
+
expression = self.sql(expr.expression, 'this')
|
294
|
+
prefix = f"-- {this_sql}"
|
295
|
+
if this_sql == "!":
|
296
|
+
return f"{prefix}{expression}"
|
297
|
+
return f"{prefix} {expression}"
|
298
|
+
|
299
|
+
|
300
|
+
def _parse_json(self, expression: exp.ParseJSON) -> str:
|
301
|
+
return self.func("PARSE_JSON", expression.this, expression.expression)
|
302
|
+
|
303
|
+
|
304
|
+
def _to_number(self, expression: local_expression.ToNumber):
|
305
|
+
func = "TO_NUMBER"
|
306
|
+
precision = self.sql(expression, "precision")
|
307
|
+
scale = self.sql(expression, "scale")
|
308
|
+
|
309
|
+
func_expr = expression.this
|
310
|
+
# if format is provided, else it will be vanilla cast to decimal
|
311
|
+
if expression.expression:
|
312
|
+
func_expr = self.func(func, expression.this, expression.expression)
|
313
|
+
if precision:
|
314
|
+
return f"CAST({func_expr} AS DECIMAL({precision}, {scale}))"
|
315
|
+
return func_expr
|
316
|
+
if not precision:
|
317
|
+
precision = 38
|
318
|
+
if not scale:
|
319
|
+
scale = 0
|
320
|
+
if not expression.expression and not precision:
|
321
|
+
exception_msg = f"""Error Parsing expression {expression}:
|
322
|
+
* `format`: is required in Databricks [mandatory]
|
323
|
+
* `precision` and `scale`: are considered as (38, 0) if not specified.
|
324
|
+
"""
|
325
|
+
raise UnsupportedError(exception_msg)
|
326
|
+
|
327
|
+
precision = PRECISION_CONST if not precision else precision
|
328
|
+
scale = SCALE_CONST if not scale else scale
|
329
|
+
return f"CAST({func_expr} AS DECIMAL({precision}, {scale}))"
|
330
|
+
|
331
|
+
|
332
|
+
def _uuid(self: SqlglotDatabricks.Generator, expression: local_expression.UUID) -> str:
|
333
|
+
namespace = self.sql(expression, "this")
|
334
|
+
name = self.sql(expression, "name")
|
335
|
+
|
336
|
+
if namespace and name:
|
337
|
+
logger.warning("UUID version 5 is not supported currently. Needs manual intervention.")
|
338
|
+
return f"UUID({namespace}, {name})"
|
339
|
+
|
340
|
+
return "UUID()"
|
341
|
+
|
342
|
+
|
343
|
+
def _parse_date_trunc(self: SqlglotDatabricks.Generator, expression: local_expression.DateTrunc) -> str:
|
344
|
+
if not expression.args.get("unit"):
|
345
|
+
error_message = f"Required keyword: 'unit' missing for {exp.DateTrunc}"
|
346
|
+
raise UnsupportedError(error_message)
|
347
|
+
return self.func("TRUNC", expression.this, expression.args.get("unit"))
|
348
|
+
|
349
|
+
|
350
|
+
def _get_within_group_params(
|
351
|
+
expr: exp.ArrayAgg | exp.GroupConcat,
|
352
|
+
within_group: exp.WithinGroup,
|
353
|
+
) -> local_expression.WithinGroupParams:
|
354
|
+
has_distinct = isinstance(expr.this, exp.Distinct)
|
355
|
+
agg_col = expr.this.expressions[0] if has_distinct else expr.this
|
356
|
+
order_clause = within_group.expression
|
357
|
+
order_cols = []
|
358
|
+
for e in order_clause.expressions:
|
359
|
+
desc = e.args.get("desc")
|
360
|
+
is_order_a = not desc or exp.false() == desc
|
361
|
+
order_cols.append((e.this, is_order_a))
|
362
|
+
return local_expression.WithinGroupParams(
|
363
|
+
agg_col=agg_col,
|
364
|
+
order_cols=order_cols,
|
365
|
+
)
|
366
|
+
|
367
|
+
|
368
|
+
def _create_named_struct_for_cmp(wg_params: local_expression.WithinGroupParams) -> exp.Expression:
|
369
|
+
agg_col = wg_params.agg_col
|
370
|
+
order_kv = []
|
371
|
+
for i, (col, _) in enumerate(wg_params.order_cols):
|
372
|
+
order_kv.extend([exp.Literal(this=f"sort_by_{i}", is_string=True), col])
|
373
|
+
|
374
|
+
named_struct_func = exp.Anonymous(
|
375
|
+
this="named_struct",
|
376
|
+
expressions=[
|
377
|
+
exp.Literal(this="value", is_string=True),
|
378
|
+
agg_col,
|
379
|
+
*order_kv,
|
380
|
+
],
|
381
|
+
)
|
382
|
+
return named_struct_func
|
383
|
+
|
384
|
+
|
385
|
+
def _current_date(self, expression: exp.CurrentDate) -> str:
|
386
|
+
zone = self.sql(expression, "this")
|
387
|
+
return f"CURRENT_DATE({zone})" if zone else "CURRENT_DATE()"
|
388
|
+
|
389
|
+
|
390
|
+
def _not_sql(self, expression: exp.Not) -> str:
|
391
|
+
if isinstance(expression.this, exp.Is):
|
392
|
+
return f"{self.sql(expression.this, 'this')} IS NOT {self.sql(expression.this, 'expression')}"
|
393
|
+
return f"NOT {self.sql(expression, 'this')}"
|
394
|
+
|
395
|
+
|
396
|
+
def to_array(self, expression: exp.ToArray) -> str:
|
397
|
+
return f"IF({self.sql(expression.this)} IS NULL, NULL, {self.func('ARRAY', expression.this)})"
|
398
|
+
|
399
|
+
|
400
|
+
class Databricks(SqlglotDatabricks): #
|
401
|
+
# Instantiate Databricks Dialect
|
402
|
+
databricks = SqlglotDatabricks()
|
403
|
+
NULL_ORDERING = "nulls_are_small"
|
404
|
+
|
405
|
+
class Generator(SqlglotDatabricks.Generator):
|
406
|
+
INVERSE_TIME_MAPPING: dict[str, str] = {
|
407
|
+
**{v: k for k, v in SqlglotDatabricks.TIME_MAPPING.items()},
|
408
|
+
"%-d": "dd",
|
409
|
+
}
|
410
|
+
|
411
|
+
COLLATE_IS_FUNC = True
|
412
|
+
# [TODO]: Variant needs to be transformed better, for now parsing to string was deemed as the choice.
|
413
|
+
TYPE_MAPPING = {
|
414
|
+
**SqlglotDatabricks.Generator.TYPE_MAPPING,
|
415
|
+
exp.DataType.Type.TINYINT: "TINYINT",
|
416
|
+
exp.DataType.Type.SMALLINT: "SMALLINT",
|
417
|
+
exp.DataType.Type.INT: "INT",
|
418
|
+
exp.DataType.Type.BIGINT: "BIGINT",
|
419
|
+
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
420
|
+
exp.DataType.Type.VARCHAR: "STRING",
|
421
|
+
exp.DataType.Type.VARIANT: "VARIANT",
|
422
|
+
exp.DataType.Type.FLOAT: "DOUBLE",
|
423
|
+
exp.DataType.Type.OBJECT: "STRING",
|
424
|
+
exp.DataType.Type.GEOGRAPHY: "STRING",
|
425
|
+
}
|
426
|
+
|
427
|
+
TRANSFORMS = {
|
428
|
+
**SqlglotDatabricks.Generator.TRANSFORMS,
|
429
|
+
exp.Create: _format_create_sql,
|
430
|
+
exp.DataType: _datatype_map,
|
431
|
+
exp.CurrentTime: _curr_time(),
|
432
|
+
exp.Lateral: _lateral_view,
|
433
|
+
exp.FromBase64: rename_func("UNBASE64"),
|
434
|
+
exp.AutoIncrementColumnConstraint: lambda *_: "GENERATED ALWAYS AS IDENTITY",
|
435
|
+
local_expression.Parameter: _parm_sfx,
|
436
|
+
local_expression.ToBoolean: _to_boolean,
|
437
|
+
local_expression.Bracket: _lateral_bracket_sql,
|
438
|
+
local_expression.MakeDate: rename_func("MAKE_DATE"),
|
439
|
+
local_expression.TryToDate: try_to_date,
|
440
|
+
local_expression.TryToNumber: try_to_number,
|
441
|
+
local_expression.IsInteger: _is_integer,
|
442
|
+
local_expression.JsonExtractPathText: _parse_json_extract_path_text,
|
443
|
+
local_expression.BitOr: rename_func("BIT_OR"),
|
444
|
+
local_expression.ArrayConstructCompact: _array_construct_compact,
|
445
|
+
local_expression.ArrayIntersection: rename_func("ARRAY_INTERSECT"),
|
446
|
+
local_expression.ArraySlice: _array_slice,
|
447
|
+
local_expression.ObjectKeys: rename_func("JSON_OBJECT_KEYS"),
|
448
|
+
exp.ParseJSON: _parse_json,
|
449
|
+
local_expression.TimestampFromParts: rename_func("MAKE_TIMESTAMP"),
|
450
|
+
local_expression.ToDouble: rename_func("DOUBLE"),
|
451
|
+
exp.Rand: rename_func("RANDOM"),
|
452
|
+
local_expression.ToVariant: rename_func("TO_JSON"),
|
453
|
+
local_expression.ToObject: rename_func("TO_JSON"),
|
454
|
+
exp.ToBase64: rename_func("BASE64"),
|
455
|
+
local_expression.ToNumber: _to_number,
|
456
|
+
local_expression.UUID: _uuid,
|
457
|
+
local_expression.DateTrunc: _parse_date_trunc,
|
458
|
+
exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
|
459
|
+
exp.TimestampTrunc: timestamptrunc_sql,
|
460
|
+
exp.Mod: rename_func("MOD"),
|
461
|
+
exp.NullSafeEQ: lambda self, e: self.binary(e, "<=>"),
|
462
|
+
exp.If: if_sql(false_value="NULL"),
|
463
|
+
exp.Command: _to_command,
|
464
|
+
exp.CurrentDate: _current_date,
|
465
|
+
exp.Not: _not_sql,
|
466
|
+
local_expression.ToArray: to_array,
|
467
|
+
local_expression.ArrayExists: rename_func("EXISTS"),
|
468
|
+
}
|
469
|
+
|
470
|
+
def preprocess(self, expression: exp.Expression) -> exp.Expression:
|
471
|
+
fixed_ast = expression.transform(unalias_lca_in_select, copy=False)
|
472
|
+
return super().preprocess(fixed_ast)
|
473
|
+
|
474
|
+
def format_time(self, expression: exp.Expression, inverse_time_mapping=None, inverse_time_trie=None):
|
475
|
+
return super().format_time(expression, self.INVERSE_TIME_MAPPING)
|
476
|
+
|
477
|
+
def join_sql(self, expression: exp.Join) -> str:
|
478
|
+
"""Overwrites `join_sql()` in `sqlglot/generator.py`
|
479
|
+
Added logic to handle Lateral View
|
480
|
+
"""
|
481
|
+
op_list = [
|
482
|
+
expression.method,
|
483
|
+
"GLOBAL" if expression.args.get("global") else None,
|
484
|
+
expression.side,
|
485
|
+
expression.kind,
|
486
|
+
expression.hint if self.JOIN_HINTS else None,
|
487
|
+
]
|
488
|
+
|
489
|
+
op_sql = " ".join(op for op in op_list if op)
|
490
|
+
on_sql = self.sql(expression, "on")
|
491
|
+
using = expression.args.get("using")
|
492
|
+
|
493
|
+
if not on_sql and using:
|
494
|
+
on_sql = csv(*(self.sql(column) for column in using))
|
495
|
+
|
496
|
+
this_sql = self.sql(expression, "this")
|
497
|
+
|
498
|
+
if on_sql:
|
499
|
+
on_sql = self.indent(on_sql, skip_first=True)
|
500
|
+
space = self.seg(" " * self.pad) if self.pretty else " "
|
501
|
+
if using:
|
502
|
+
on_sql = f"{space}USING ({on_sql})"
|
503
|
+
else:
|
504
|
+
on_sql = f"{space}ON {on_sql}"
|
505
|
+
# Added the below elif block to handle Lateral View clause
|
506
|
+
elif not op_sql and isinstance(expression.this, exp.Lateral):
|
507
|
+
return f"\n {this_sql}"
|
508
|
+
elif not op_sql:
|
509
|
+
return f", {this_sql}"
|
510
|
+
|
511
|
+
op_sql = f"{op_sql} JOIN" if op_sql else "JOIN"
|
512
|
+
return f"{self.seg(op_sql)} {this_sql}{on_sql}"
|
513
|
+
|
514
|
+
def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
|
515
|
+
sql = self.func("ARRAY_AGG", expression.this)
|
516
|
+
within_group = expression.parent if isinstance(expression.parent, exp.WithinGroup) else None
|
517
|
+
if not within_group:
|
518
|
+
return sql
|
519
|
+
|
520
|
+
wg_params = _get_within_group_params(expression, within_group)
|
521
|
+
if len(wg_params.order_cols) == 1:
|
522
|
+
order_col, is_order_asc = wg_params.order_cols[0]
|
523
|
+
if wg_params.agg_col == order_col:
|
524
|
+
return f"SORT_ARRAY({sql}{'' if is_order_asc else ', FALSE'})"
|
525
|
+
|
526
|
+
named_struct_func = _create_named_struct_for_cmp(wg_params)
|
527
|
+
comparisons = []
|
528
|
+
for i, (_, is_order_asc) in enumerate(wg_params.order_cols):
|
529
|
+
comparisons.append(
|
530
|
+
f"WHEN left.sort_by_{i} < right.sort_by_{i} THEN {'-1' if is_order_asc else '1'} "
|
531
|
+
f"WHEN left.sort_by_{i} > right.sort_by_{i} THEN {'1' if is_order_asc else '-1'}"
|
532
|
+
)
|
533
|
+
|
534
|
+
array_sort = self.func(
|
535
|
+
"ARRAY_SORT",
|
536
|
+
self.func("ARRAY_AGG", named_struct_func),
|
537
|
+
f"""(left, right) -> CASE
|
538
|
+
{' '.join(comparisons)}
|
539
|
+
ELSE 0
|
540
|
+
END""",
|
541
|
+
)
|
542
|
+
return self.func("TRANSFORM", array_sort, "s -> s.value")
|
543
|
+
|
544
|
+
def groupconcat_sql(self, expr: exp.GroupConcat) -> str:
|
545
|
+
arr_agg = exp.ArrayAgg(this=expr.this)
|
546
|
+
within_group = expr.parent.copy() if isinstance(expr.parent, exp.WithinGroup) else None
|
547
|
+
if within_group:
|
548
|
+
arr_agg.parent = within_group
|
549
|
+
|
550
|
+
return self.func(
|
551
|
+
"ARRAY_JOIN",
|
552
|
+
arr_agg,
|
553
|
+
expr.args.get("separator") or exp.Literal(this="", is_string=True),
|
554
|
+
)
|
555
|
+
|
556
|
+
def withingroup_sql(self, expression: exp.WithinGroup) -> str:
|
557
|
+
agg_expr = expression.this
|
558
|
+
if isinstance(agg_expr, (exp.ArrayAgg, exp.GroupConcat)):
|
559
|
+
return self.sql(agg_expr)
|
560
|
+
|
561
|
+
return super().withingroup_sql(expression)
|
562
|
+
|
563
|
+
def split_sql(self, expression: local_expression.Split) -> str:
|
564
|
+
"""
|
565
|
+
:param expression: local_expression.Split expression to be parsed
|
566
|
+
:return: Converted expression (SPLIT) compatible with Databricks
|
567
|
+
"""
|
568
|
+
delimiter = " "
|
569
|
+
# To handle default delimiter
|
570
|
+
if expression.expression:
|
571
|
+
delimiter = expression.expression.name
|
572
|
+
|
573
|
+
# Parsing logic to handle String and Table columns
|
574
|
+
if expression.name and isinstance(expression.name, str):
|
575
|
+
expr_name = f"'{expression.name}'"
|
576
|
+
else:
|
577
|
+
expr_name = expression.args["this"]
|
578
|
+
return f"""SPLIT({expr_name},'[{delimiter}]')"""
|
579
|
+
|
580
|
+
def delete_sql(self, expression: exp.Delete) -> str:
|
581
|
+
this = self.sql(expression, "this")
|
582
|
+
using = self.sql(expression, "using")
|
583
|
+
where = self.sql(expression, "where")
|
584
|
+
returning = self.sql(expression, "returning")
|
585
|
+
limit = self.sql(expression, "limit")
|
586
|
+
tables = self.expressions(expression, key="tables")
|
587
|
+
tables = f" {tables}" if tables else ""
|
588
|
+
|
589
|
+
if using:
|
590
|
+
using = f" USING {using}" if using else ""
|
591
|
+
where = where.replace("WHERE", "ON")
|
592
|
+
else:
|
593
|
+
this = f"FROM {this}" if this else ""
|
594
|
+
|
595
|
+
if self.RETURNING_END:
|
596
|
+
expression_sql = f" {this}{using}{where}{returning}{limit}"
|
597
|
+
else:
|
598
|
+
expression_sql = f"{returning}{this}{where}{limit}"
|
599
|
+
|
600
|
+
if using:
|
601
|
+
return self.prepend_ctes(expression, f"MERGE INTO {tables}{expression_sql} WHEN MATCHED THEN DELETE;")
|
602
|
+
|
603
|
+
return self.prepend_ctes(expression, f"DELETE{tables}{expression_sql};")
|
604
|
+
|
605
|
+
def converttimezone_sql(self, expression: exp.ConvertTimezone):
|
606
|
+
func = "CONVERT_TIMEZONE"
|
607
|
+
expr = expression.args["tgtTZ"]
|
608
|
+
if len(expression.args) == 3 and expression.args.get("this"):
|
609
|
+
expr = expression.args["this"]
|
610
|
+
|
611
|
+
result = self.func(func, expression.args["srcTZ"], expr)
|
612
|
+
if len(expression.args) == 3:
|
613
|
+
result = self.func(func, expression.args["srcTZ"], expression.args["tgtTZ"], expr)
|
614
|
+
|
615
|
+
return result
|
616
|
+
|
617
|
+
def strtok_sql(self, expression: local_expression.StrTok) -> str:
|
618
|
+
"""
|
619
|
+
:param expression: local_expression.StrTok expression to be parsed
|
620
|
+
:return: Converted expression (SPLIT_PART) compatible with Databricks
|
621
|
+
"""
|
622
|
+
# To handle default delimiter
|
623
|
+
if expression.expression:
|
624
|
+
delimiter = expression.expression.name
|
625
|
+
else:
|
626
|
+
delimiter = " "
|
627
|
+
|
628
|
+
# Handle String and Table columns
|
629
|
+
if expression.name and isinstance(expression.name, str):
|
630
|
+
expr_name = f"'{expression.name}'"
|
631
|
+
else:
|
632
|
+
expr_name = expression.args["this"]
|
633
|
+
|
634
|
+
# Handle Partition Number
|
635
|
+
if len(expression.args) == 3 and expression.args.get("partNum"):
|
636
|
+
part_num = expression.args["partNum"]
|
637
|
+
else:
|
638
|
+
part_num = 1
|
639
|
+
|
640
|
+
return f"SPLIT_PART({expr_name}, '{delimiter}', {part_num})"
|
641
|
+
|
642
|
+
def splitpart_sql(self, expression: local_expression.SplitPart) -> str:
|
643
|
+
"""
|
644
|
+
:param expression: local_expression.SplitPart expression to be parsed
|
645
|
+
:return: Converted expression (SPLIT_PART) compatible with Databricks
|
646
|
+
"""
|
647
|
+
expr_name = self.sql(expression.this)
|
648
|
+
delimiter = self.sql(expression.expression)
|
649
|
+
part_num = self.sql(expression.args["partNum"])
|
650
|
+
return f"SPLIT_PART({expr_name}, {delimiter}, {part_num})"
|
651
|
+
|
652
|
+
def transaction_sql(self, expression: exp.Transaction) -> str:
|
653
|
+
"""
|
654
|
+
Skip begin command
|
655
|
+
:param expression:
|
656
|
+
:return: Empty string for unsupported operation
|
657
|
+
"""
|
658
|
+
return ""
|
659
|
+
|
660
|
+
def rollback_sql(self, expression: exp.Rollback) -> str:
|
661
|
+
"""
|
662
|
+
Skip rollback command
|
663
|
+
:param expression:
|
664
|
+
:return: Empty string for unsupported operation
|
665
|
+
"""
|
666
|
+
return ""
|
667
|
+
|
668
|
+
def commit_sql(self, expression: exp.Commit) -> str:
|
669
|
+
"""
|
670
|
+
Skip commit command
|
671
|
+
:param expression:
|
672
|
+
:return: Empty string for unsupported operation
|
673
|
+
"""
|
674
|
+
return ""
|
675
|
+
|
676
|
+
def command_sql(self, expression: exp.Command) -> str:
|
677
|
+
"""
|
678
|
+
Skip any session, stream, task related commands
|
679
|
+
:param expression:
|
680
|
+
:return: Empty string for unsupported operations or objects
|
681
|
+
"""
|
682
|
+
filtered_commands = [
|
683
|
+
"CREATE",
|
684
|
+
"ALTER",
|
685
|
+
"DESCRIBE",
|
686
|
+
"DROP",
|
687
|
+
"SHOW",
|
688
|
+
"EXECUTE",
|
689
|
+
]
|
690
|
+
ignored_objects = [
|
691
|
+
"STREAM",
|
692
|
+
"TASK",
|
693
|
+
"STREAMS",
|
694
|
+
"TASKS",
|
695
|
+
"SESSION",
|
696
|
+
]
|
697
|
+
|
698
|
+
command = self.sql(expression, "this").upper()
|
699
|
+
expr = expression.text("expression").strip()
|
700
|
+
obj = re.split(r"\s+", expr, maxsplit=2)[0].upper() if expr else ""
|
701
|
+
if command in filtered_commands and obj in ignored_objects:
|
702
|
+
return ""
|
703
|
+
return f"{command} {expr}"
|
704
|
+
|
705
|
+
def currenttimestamp_sql(self, _: exp.CurrentTimestamp) -> str:
|
706
|
+
return self.func("CURRENT_TIMESTAMP")
|
707
|
+
|
708
|
+
def update_sql(self, expression: exp.Update) -> str:
|
709
|
+
this = self.sql(expression, "this")
|
710
|
+
set_sql = self.expressions(expression, flat=True)
|
711
|
+
from_sql = self.sql(expression, "from")
|
712
|
+
where_sql = self.sql(expression, "where")
|
713
|
+
returning = self.sql(expression, "returning")
|
714
|
+
order = self.sql(expression, "order")
|
715
|
+
limit = self.sql(expression, "limit")
|
716
|
+
|
717
|
+
if from_sql:
|
718
|
+
from_sql = from_sql.replace("FROM", "USING", 1)
|
719
|
+
where_sql = where_sql.replace("WHERE", "ON")
|
720
|
+
|
721
|
+
if self.RETURNING_END:
|
722
|
+
expression_sql = f"{from_sql}{where_sql}{returning}"
|
723
|
+
else:
|
724
|
+
expression_sql = f"{returning}{from_sql}{where_sql}"
|
725
|
+
|
726
|
+
if from_sql:
|
727
|
+
sql = f"MERGE INTO {this}{expression_sql} WHEN MATCHED THEN UPDATE SET {set_sql}{order}{limit}"
|
728
|
+
else:
|
729
|
+
sql = f"UPDATE {this} SET {set_sql}{expression_sql}{order}{limit}"
|
730
|
+
|
731
|
+
return self.prepend_ctes(expression, sql)
|
732
|
+
|
733
|
+
def struct_sql(self, expression: exp.Struct) -> str:
|
734
|
+
expression.set(
|
735
|
+
"expressions",
|
736
|
+
[
|
737
|
+
(
|
738
|
+
exp.alias_(
|
739
|
+
e.expression, e.name if hasattr(e.this, "is_string") and e.this.is_string else e.this
|
740
|
+
)
|
741
|
+
if isinstance(e, exp.PropertyEQ)
|
742
|
+
else e
|
743
|
+
)
|
744
|
+
for e in expression.expressions
|
745
|
+
],
|
746
|
+
)
|
747
|
+
|
748
|
+
return self.function_fallback_sql(expression)
|
749
|
+
|
750
|
+
def anonymous_sql(self: SqlglotDatabricks.Generator, expression: exp.Anonymous) -> str:
|
751
|
+
if expression.this == "EDITDISTANCE":
|
752
|
+
return self.func("LEVENSHTEIN", *expression.expressions)
|
753
|
+
if expression.this == "TO_TIMESTAMP":
|
754
|
+
return self.sql(
|
755
|
+
exp.Cast(this=expression.expressions[0], to=exp.DataType(this=exp.DataType.Type.TIMESTAMP))
|
756
|
+
)
|
757
|
+
|
758
|
+
return self.func(self.sql(expression, "this"), *expression.expressions)
|
759
|
+
|
760
|
+
def order_sql(self, expression: exp.Order, flat: bool = False) -> str:
|
761
|
+
if isinstance(expression.parent, exp.Window):
|
762
|
+
for ordered_expression in expression.expressions:
|
763
|
+
if isinstance(ordered_expression, exp.Ordered) and ordered_expression.args.get('desc') is None:
|
764
|
+
ordered_expression.args['desc'] = False
|
765
|
+
return super().order_sql(expression, flat)
|
766
|
+
|
767
|
+
def add_column_sql(self, expression: exp.Alter) -> str:
|
768
|
+
# Final output contains ADD COLUMN before each column
|
769
|
+
# This function will handle this issue and return the final output
|
770
|
+
columns = self.expressions(expression, key="actions", flat=True)
|
771
|
+
return f"ADD COLUMN {columns}"
|