altimate-code 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/bin/altimate +6 -0
- package/bin/altimate-code +6 -0
- package/dbt-tools/bin/altimate-dbt +2 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
- package/dbt-tools/dist/index.js +23859 -0
- package/package.json +14 -18
- package/postinstall.mjs +42 -0
- package/skills/altimate-setup/SKILL.md +31 -0
|
@@ -0,0 +1,788 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp, generator, parser, tokens, transforms
|
|
6
|
+
from sqlglot.dialects.dialect import (
|
|
7
|
+
Dialect,
|
|
8
|
+
NormalizationStrategy,
|
|
9
|
+
binary_from_function,
|
|
10
|
+
bool_xor_sql,
|
|
11
|
+
date_trunc_to_time,
|
|
12
|
+
datestrtodate_sql,
|
|
13
|
+
encode_decode_sql,
|
|
14
|
+
build_formatted_time,
|
|
15
|
+
if_sql,
|
|
16
|
+
left_to_substring_sql,
|
|
17
|
+
no_ilike_sql,
|
|
18
|
+
no_pivot_sql,
|
|
19
|
+
no_timestamp_sql,
|
|
20
|
+
regexp_extract_sql,
|
|
21
|
+
rename_func,
|
|
22
|
+
right_to_substring_sql,
|
|
23
|
+
sha256_sql,
|
|
24
|
+
strposition_sql,
|
|
25
|
+
struct_extract_sql,
|
|
26
|
+
timestamptrunc_sql,
|
|
27
|
+
timestrtotime_sql,
|
|
28
|
+
ts_or_ds_add_cast,
|
|
29
|
+
unit_to_str,
|
|
30
|
+
sequence_sql,
|
|
31
|
+
build_regexp_extract,
|
|
32
|
+
explode_to_unnest_sql,
|
|
33
|
+
)
|
|
34
|
+
from sqlglot.dialects.hive import Hive
|
|
35
|
+
from sqlglot.dialects.mysql import MySQL
|
|
36
|
+
from sqlglot.helper import apply_index_offset, seq_get
|
|
37
|
+
from sqlglot.optimizer.scope import find_all_in_scope
|
|
38
|
+
from sqlglot.tokens import TokenType
|
|
39
|
+
from sqlglot.transforms import unqualify_columns
|
|
40
|
+
from sqlglot.generator import unsupported_args
|
|
41
|
+
|
|
42
|
+
DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str:
|
|
46
|
+
regex = r"(\w)(\w*)"
|
|
47
|
+
return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str:
|
|
51
|
+
if expression.args.get("asc") == exp.false():
|
|
52
|
+
comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END"
|
|
53
|
+
else:
|
|
54
|
+
comparator = None
|
|
55
|
+
return self.func("ARRAY_SORT", expression.this, comparator)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str:
|
|
59
|
+
if isinstance(expression.parent, exp.PartitionedByProperty):
|
|
60
|
+
# Any columns in the ARRAY[] string literals should not be quoted
|
|
61
|
+
expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False)
|
|
62
|
+
|
|
63
|
+
partition_exprs = [
|
|
64
|
+
self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this")
|
|
65
|
+
for c in expression.expressions
|
|
66
|
+
]
|
|
67
|
+
return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs]))
|
|
68
|
+
|
|
69
|
+
if expression.parent:
|
|
70
|
+
for schema in expression.parent.find_all(exp.Schema):
|
|
71
|
+
if schema is expression:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
column_defs = schema.find_all(exp.ColumnDef)
|
|
75
|
+
if column_defs and isinstance(schema.parent, exp.Property):
|
|
76
|
+
expression.expressions.extend(column_defs)
|
|
77
|
+
|
|
78
|
+
return self.schema_sql(expression)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str:
|
|
82
|
+
self.unsupported("Presto does not support exact quantiles")
|
|
83
|
+
return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _str_to_time_sql(
|
|
87
|
+
self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate
|
|
88
|
+
) -> str:
|
|
89
|
+
return self.func("DATE_PARSE", expression.this, self.format_time(expression))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str:
|
|
93
|
+
time_format = self.format_time(expression)
|
|
94
|
+
if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT):
|
|
95
|
+
return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE))
|
|
96
|
+
return self.sql(
|
|
97
|
+
exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str:
|
|
102
|
+
expression = ts_or_ds_add_cast(expression)
|
|
103
|
+
unit = unit_to_str(expression)
|
|
104
|
+
return self.func("DATE_ADD", unit, expression.expression, expression.this)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str:
|
|
108
|
+
this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)
|
|
109
|
+
expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)
|
|
110
|
+
unit = unit_to_str(expression)
|
|
111
|
+
return self.func("DATE_DIFF", unit, expr, this)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _build_approx_percentile(args: t.List) -> exp.Expression:
|
|
115
|
+
if len(args) == 4:
|
|
116
|
+
return exp.ApproxQuantile(
|
|
117
|
+
this=seq_get(args, 0),
|
|
118
|
+
weight=seq_get(args, 1),
|
|
119
|
+
quantile=seq_get(args, 2),
|
|
120
|
+
accuracy=seq_get(args, 3),
|
|
121
|
+
)
|
|
122
|
+
if len(args) == 3:
|
|
123
|
+
return exp.ApproxQuantile(
|
|
124
|
+
this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2)
|
|
125
|
+
)
|
|
126
|
+
return exp.ApproxQuantile.from_arg_list(args)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _build_from_unixtime(args: t.List) -> exp.Expression:
|
|
130
|
+
if len(args) == 3:
|
|
131
|
+
return exp.UnixToTime(
|
|
132
|
+
this=seq_get(args, 0),
|
|
133
|
+
hours=seq_get(args, 1),
|
|
134
|
+
minutes=seq_get(args, 2),
|
|
135
|
+
)
|
|
136
|
+
if len(args) == 2:
|
|
137
|
+
return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1))
|
|
138
|
+
|
|
139
|
+
return exp.UnixToTime.from_arg_list(args)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Trino doesn't support FIRST / LAST as functions, but they're valid in the context
|
|
145
|
+
of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases
|
|
146
|
+
they're converted into an ARBITRARY call.
|
|
147
|
+
|
|
148
|
+
Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions
|
|
149
|
+
"""
|
|
150
|
+
if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize):
|
|
151
|
+
return self.function_fallback_sql(expression)
|
|
152
|
+
|
|
153
|
+
return rename_func("ARBITRARY")(self, expression)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str:
|
|
157
|
+
scale = expression.args.get("scale")
|
|
158
|
+
timestamp = self.sql(expression, "this")
|
|
159
|
+
if scale in (None, exp.UnixToTime.SECONDS):
|
|
160
|
+
return rename_func("FROM_UNIXTIME")(self, expression)
|
|
161
|
+
|
|
162
|
+
return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression:
|
|
166
|
+
if not expression.type:
|
|
167
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
168
|
+
|
|
169
|
+
annotate_types(expression, dialect=self.dialect)
|
|
170
|
+
if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES:
|
|
171
|
+
return exp.cast(expression, to=exp.DataType.Type.BIGINT)
|
|
172
|
+
return expression
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _build_to_char(args: t.List) -> exp.TimeToStr:
|
|
176
|
+
fmt = seq_get(args, 1)
|
|
177
|
+
if isinstance(fmt, exp.Literal):
|
|
178
|
+
# We uppercase this to match Teradata's format mapping keys
|
|
179
|
+
fmt.set("this", fmt.this.upper())
|
|
180
|
+
|
|
181
|
+
# We use "teradata" on purpose here, because the time formats are different in Presto.
|
|
182
|
+
# See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char
|
|
183
|
+
return build_formatted_time(exp.TimeToStr, "teradata")(args)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _date_delta_sql(
|
|
187
|
+
name: str, negate_interval: bool = False
|
|
188
|
+
) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]:
|
|
189
|
+
def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str:
|
|
190
|
+
interval = _to_int(self, expression.expression)
|
|
191
|
+
return self.func(
|
|
192
|
+
name,
|
|
193
|
+
unit_to_str(expression),
|
|
194
|
+
interval * (-1) if negate_interval else interval,
|
|
195
|
+
expression.this,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return _delta_sql
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str:
|
|
202
|
+
explode = expression.this
|
|
203
|
+
if isinstance(explode, exp.Explode):
|
|
204
|
+
exploded_type = explode.this.type
|
|
205
|
+
alias = expression.args.get("alias")
|
|
206
|
+
|
|
207
|
+
# This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array
|
|
208
|
+
if (
|
|
209
|
+
isinstance(alias, exp.TableAlias)
|
|
210
|
+
and isinstance(exploded_type, exp.DataType)
|
|
211
|
+
and exploded_type.is_type(exp.DataType.Type.ARRAY)
|
|
212
|
+
and exploded_type.expressions
|
|
213
|
+
and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT)
|
|
214
|
+
):
|
|
215
|
+
# When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias
|
|
216
|
+
alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions])
|
|
217
|
+
elif isinstance(explode, exp.Inline):
|
|
218
|
+
explode.replace(exp.Explode(this=explode.this.copy()))
|
|
219
|
+
|
|
220
|
+
return explode_to_unnest_sql(self, expression)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression:
|
|
224
|
+
# We check for expression.type because the columns can be amended only if types were inferred
|
|
225
|
+
if isinstance(expression, exp.Select) and expression.type:
|
|
226
|
+
for lateral in expression.args.get("laterals") or []:
|
|
227
|
+
alias = lateral.args.get("alias")
|
|
228
|
+
if (
|
|
229
|
+
not isinstance(lateral.this, exp.Explode)
|
|
230
|
+
or not isinstance(alias, exp.TableAlias)
|
|
231
|
+
or len(alias.columns) != 1
|
|
232
|
+
):
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
new_table = alias.this
|
|
236
|
+
old_table = alias.columns[0].name.lower()
|
|
237
|
+
|
|
238
|
+
# When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified
|
|
239
|
+
# with the struct column, resulting in invalid Presto references that need to be amended
|
|
240
|
+
for column in find_all_in_scope(expression, exp.Column):
|
|
241
|
+
if column.db.lower() == old_table:
|
|
242
|
+
column.set("table", column.args["db"].pop())
|
|
243
|
+
elif column.table.lower() == old_table:
|
|
244
|
+
column.set("table", new_table.copy())
|
|
245
|
+
elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot):
|
|
246
|
+
column.parent.replace(exp.column(column.parent.expression, table=new_table))
|
|
247
|
+
|
|
248
|
+
return expression
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class Presto(Dialect):
|
|
252
|
+
INDEX_OFFSET = 1
|
|
253
|
+
NULL_ORDERING = "nulls_are_last"
|
|
254
|
+
TIME_FORMAT = MySQL.TIME_FORMAT
|
|
255
|
+
STRICT_STRING_CONCAT = True
|
|
256
|
+
SUPPORTS_SEMI_ANTI_JOIN = False
|
|
257
|
+
TYPED_DIVISION = True
|
|
258
|
+
TABLESAMPLE_SIZE_IS_PERCENT = True
|
|
259
|
+
LOG_BASE_FIRST: t.Optional[bool] = None
|
|
260
|
+
SUPPORTS_VALUES_DEFAULT = False
|
|
261
|
+
|
|
262
|
+
TIME_MAPPING = MySQL.TIME_MAPPING
|
|
263
|
+
|
|
264
|
+
# https://github.com/trinodb/trino/issues/17
|
|
265
|
+
# https://github.com/trinodb/trino/issues/12289
|
|
266
|
+
# https://github.com/prestodb/presto/issues/2863
|
|
267
|
+
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
|
|
268
|
+
|
|
269
|
+
# The result of certain math functions in Presto/Trino is of type
|
|
270
|
+
# equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT
|
|
271
|
+
ANNOTATORS = {
|
|
272
|
+
**Dialect.ANNOTATORS,
|
|
273
|
+
exp.Floor: lambda self, e: self._annotate_by_args(e, "this"),
|
|
274
|
+
exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"),
|
|
275
|
+
exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"),
|
|
276
|
+
exp.Round: lambda self, e: self._annotate_by_args(e, "this"),
|
|
277
|
+
exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
|
|
278
|
+
exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
|
|
279
|
+
exp.Rand: lambda self, e: self._annotate_by_args(e, "this")
|
|
280
|
+
if e.this
|
|
281
|
+
else self._set_type(e, exp.DataType.Type.DOUBLE),
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
SUPPORTED_SETTINGS = {
|
|
285
|
+
*Dialect.SUPPORTED_SETTINGS,
|
|
286
|
+
"variant_extract_is_json_extract",
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
class Tokenizer(tokens.Tokenizer):
|
|
290
|
+
HEX_STRINGS = [("x'", "'"), ("X'", "'")]
|
|
291
|
+
UNICODE_STRINGS = [
|
|
292
|
+
(prefix + q, q)
|
|
293
|
+
for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES)
|
|
294
|
+
for prefix in ("U&", "u&")
|
|
295
|
+
]
|
|
296
|
+
|
|
297
|
+
NESTED_COMMENTS = False
|
|
298
|
+
|
|
299
|
+
KEYWORDS = {
|
|
300
|
+
**tokens.Tokenizer.KEYWORDS,
|
|
301
|
+
"DEALLOCATE PREPARE": TokenType.COMMAND,
|
|
302
|
+
"DESCRIBE INPUT": TokenType.COMMAND,
|
|
303
|
+
"DESCRIBE OUTPUT": TokenType.COMMAND,
|
|
304
|
+
"RESET SESSION": TokenType.COMMAND,
|
|
305
|
+
"START": TokenType.BEGIN,
|
|
306
|
+
"MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
|
|
307
|
+
"ROW": TokenType.STRUCT,
|
|
308
|
+
"IPADDRESS": TokenType.IPADDRESS,
|
|
309
|
+
"IPPREFIX": TokenType.IPPREFIX,
|
|
310
|
+
"TDIGEST": TokenType.TDIGEST,
|
|
311
|
+
"HYPERLOGLOG": TokenType.HLLSKETCH,
|
|
312
|
+
}
|
|
313
|
+
KEYWORDS.pop("/*+")
|
|
314
|
+
KEYWORDS.pop("QUALIFY")
|
|
315
|
+
|
|
316
|
+
class Parser(parser.Parser):
|
|
317
|
+
VALUES_FOLLOWED_BY_PAREN = False
|
|
318
|
+
|
|
319
|
+
FUNCTIONS = {
|
|
320
|
+
**parser.Parser.FUNCTIONS,
|
|
321
|
+
"ARBITRARY": exp.AnyValue.from_arg_list,
|
|
322
|
+
"APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list,
|
|
323
|
+
"APPROX_PERCENTILE": _build_approx_percentile,
|
|
324
|
+
"BITWISE_AND": binary_from_function(exp.BitwiseAnd),
|
|
325
|
+
"BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)),
|
|
326
|
+
"BITWISE_OR": binary_from_function(exp.BitwiseOr),
|
|
327
|
+
"BITWISE_XOR": binary_from_function(exp.BitwiseXor),
|
|
328
|
+
"CARDINALITY": exp.ArraySize.from_arg_list,
|
|
329
|
+
"CONTAINS": exp.ArrayContains.from_arg_list,
|
|
330
|
+
"DATE_ADD": lambda args: exp.DateAdd(
|
|
331
|
+
this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
|
|
332
|
+
),
|
|
333
|
+
"DATE_DIFF": lambda args: exp.DateDiff(
|
|
334
|
+
this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
|
|
335
|
+
),
|
|
336
|
+
"DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"),
|
|
337
|
+
"DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"),
|
|
338
|
+
"DATE_TRUNC": date_trunc_to_time,
|
|
339
|
+
"DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list,
|
|
340
|
+
"DOW": exp.DayOfWeekIso.from_arg_list,
|
|
341
|
+
"DOY": exp.DayOfYear.from_arg_list,
|
|
342
|
+
"ELEMENT_AT": lambda args: exp.Bracket(
|
|
343
|
+
this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True
|
|
344
|
+
),
|
|
345
|
+
"FROM_HEX": exp.Unhex.from_arg_list,
|
|
346
|
+
"FROM_UNIXTIME": _build_from_unixtime,
|
|
347
|
+
"FROM_UTF8": lambda args: exp.Decode(
|
|
348
|
+
this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8")
|
|
349
|
+
),
|
|
350
|
+
"JSON_FORMAT": lambda args: exp.JSONFormat(
|
|
351
|
+
this=seq_get(args, 0), options=seq_get(args, 1), is_json=True
|
|
352
|
+
),
|
|
353
|
+
"LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list,
|
|
354
|
+
"NOW": exp.CurrentTimestamp.from_arg_list,
|
|
355
|
+
"REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract),
|
|
356
|
+
"REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll),
|
|
357
|
+
"REGEXP_REPLACE": lambda args: exp.RegexpReplace(
|
|
358
|
+
this=seq_get(args, 0),
|
|
359
|
+
expression=seq_get(args, 1),
|
|
360
|
+
replacement=seq_get(args, 2) or exp.Literal.string(""),
|
|
361
|
+
),
|
|
362
|
+
"ROW": exp.Struct.from_arg_list,
|
|
363
|
+
"SEQUENCE": exp.GenerateSeries.from_arg_list,
|
|
364
|
+
"SET_AGG": exp.ArrayUniqueAgg.from_arg_list,
|
|
365
|
+
"SPLIT_TO_MAP": exp.StrToMap.from_arg_list,
|
|
366
|
+
"STRPOS": lambda args: exp.StrPosition(
|
|
367
|
+
this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2)
|
|
368
|
+
),
|
|
369
|
+
"TO_CHAR": _build_to_char,
|
|
370
|
+
"TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
|
|
371
|
+
"TO_UTF8": lambda args: exp.Encode(
|
|
372
|
+
this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
|
|
373
|
+
),
|
|
374
|
+
"MD5": exp.MD5Digest.from_arg_list,
|
|
375
|
+
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
|
|
376
|
+
"SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy()
|
|
380
|
+
FUNCTION_PARSERS.pop("TRIM")
|
|
381
|
+
|
|
382
|
+
class Generator(generator.Generator):
|
|
383
|
+
INTERVAL_ALLOWS_PLURAL_FORM = False
|
|
384
|
+
JOIN_HINTS = False
|
|
385
|
+
TABLE_HINTS = False
|
|
386
|
+
QUERY_HINTS = False
|
|
387
|
+
IS_BOOL_ALLOWED = False
|
|
388
|
+
TZ_TO_WITH_TIME_ZONE = True
|
|
389
|
+
NVL2_SUPPORTED = False
|
|
390
|
+
STRUCT_DELIMITER = ("(", ")")
|
|
391
|
+
LIMIT_ONLY_LITERALS = True
|
|
392
|
+
SUPPORTS_SINGLE_ARG_CONCAT = False
|
|
393
|
+
LIKE_PROPERTY_INSIDE_SCHEMA = True
|
|
394
|
+
MULTI_ARG_DISTINCT = False
|
|
395
|
+
SUPPORTS_TO_NUMBER = False
|
|
396
|
+
HEX_FUNC = "TO_HEX"
|
|
397
|
+
PARSE_JSON_NAME = "JSON_PARSE"
|
|
398
|
+
PAD_FILL_PATTERN_IS_REQUIRED = True
|
|
399
|
+
EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
|
|
400
|
+
SUPPORTS_MEDIAN = False
|
|
401
|
+
ARRAY_SIZE_NAME = "CARDINALITY"
|
|
402
|
+
|
|
403
|
+
PROPERTIES_LOCATION = {
|
|
404
|
+
**generator.Generator.PROPERTIES_LOCATION,
|
|
405
|
+
exp.LocationProperty: exp.Properties.Location.UNSUPPORTED,
|
|
406
|
+
exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
TYPE_MAPPING = {
|
|
410
|
+
**generator.Generator.TYPE_MAPPING,
|
|
411
|
+
exp.DataType.Type.BINARY: "VARBINARY",
|
|
412
|
+
exp.DataType.Type.BIT: "BOOLEAN",
|
|
413
|
+
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
|
414
|
+
exp.DataType.Type.DATETIME64: "TIMESTAMP",
|
|
415
|
+
exp.DataType.Type.FLOAT: "REAL",
|
|
416
|
+
exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG",
|
|
417
|
+
exp.DataType.Type.INT: "INTEGER",
|
|
418
|
+
exp.DataType.Type.STRUCT: "ROW",
|
|
419
|
+
exp.DataType.Type.TEXT: "VARCHAR",
|
|
420
|
+
exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
|
|
421
|
+
exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP",
|
|
422
|
+
exp.DataType.Type.TIMETZ: "TIME",
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
TRANSFORMS = {
|
|
426
|
+
**generator.Generator.TRANSFORMS,
|
|
427
|
+
exp.AnyValue: rename_func("ARBITRARY"),
|
|
428
|
+
exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
|
|
429
|
+
exp.ArgMax: rename_func("MAX_BY"),
|
|
430
|
+
exp.ArgMin: rename_func("MIN_BY"),
|
|
431
|
+
exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
|
|
432
|
+
exp.ArrayAny: rename_func("ANY_MATCH"),
|
|
433
|
+
exp.ArrayConcat: rename_func("CONCAT"),
|
|
434
|
+
exp.ArrayContains: rename_func("CONTAINS"),
|
|
435
|
+
exp.ArrayToString: rename_func("ARRAY_JOIN"),
|
|
436
|
+
exp.ArrayUniqueAgg: rename_func("SET_AGG"),
|
|
437
|
+
exp.AtTimeZone: rename_func("AT_TIMEZONE"),
|
|
438
|
+
exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression),
|
|
439
|
+
exp.BitwiseLeftShift: lambda self, e: self.func(
|
|
440
|
+
"BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression
|
|
441
|
+
),
|
|
442
|
+
exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this),
|
|
443
|
+
exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression),
|
|
444
|
+
exp.BitwiseRightShift: lambda self, e: self.func(
|
|
445
|
+
"BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression
|
|
446
|
+
),
|
|
447
|
+
exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression),
|
|
448
|
+
exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]),
|
|
449
|
+
exp.CurrentTime: lambda *_: "CURRENT_TIME",
|
|
450
|
+
exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
|
|
451
|
+
exp.CurrentUser: lambda *_: "CURRENT_USER",
|
|
452
|
+
exp.DateAdd: _date_delta_sql("DATE_ADD"),
|
|
453
|
+
exp.DateDiff: lambda self, e: self.func(
|
|
454
|
+
"DATE_DIFF", unit_to_str(e), e.expression, e.this
|
|
455
|
+
),
|
|
456
|
+
exp.DateStrToDate: datestrtodate_sql,
|
|
457
|
+
exp.DateToDi: lambda self,
|
|
458
|
+
e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)",
|
|
459
|
+
exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True),
|
|
460
|
+
exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)",
|
|
461
|
+
exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"),
|
|
462
|
+
exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"),
|
|
463
|
+
exp.DiToDate: lambda self,
|
|
464
|
+
e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)",
|
|
465
|
+
exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"),
|
|
466
|
+
exp.FileFormatProperty: lambda self,
|
|
467
|
+
e: f"format={self.sql(exp.Literal.string(e.name))}",
|
|
468
|
+
exp.First: _first_last_sql,
|
|
469
|
+
exp.FromTimeZone: lambda self,
|
|
470
|
+
e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'",
|
|
471
|
+
exp.GenerateSeries: sequence_sql,
|
|
472
|
+
exp.GenerateDateArray: sequence_sql,
|
|
473
|
+
exp.Group: transforms.preprocess([transforms.unalias_group]),
|
|
474
|
+
exp.If: if_sql(),
|
|
475
|
+
exp.ILike: no_ilike_sql,
|
|
476
|
+
exp.Initcap: _initcap_sql,
|
|
477
|
+
exp.Last: _first_last_sql,
|
|
478
|
+
exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this),
|
|
479
|
+
exp.Lateral: _explode_to_unnest_sql,
|
|
480
|
+
exp.Left: left_to_substring_sql,
|
|
481
|
+
exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
|
|
482
|
+
rename_func("LEVENSHTEIN_DISTANCE")
|
|
483
|
+
),
|
|
484
|
+
exp.LogicalAnd: rename_func("BOOL_AND"),
|
|
485
|
+
exp.LogicalOr: rename_func("BOOL_OR"),
|
|
486
|
+
exp.Pivot: no_pivot_sql,
|
|
487
|
+
exp.Quantile: _quantile_sql,
|
|
488
|
+
exp.RegexpExtract: regexp_extract_sql,
|
|
489
|
+
exp.RegexpExtractAll: regexp_extract_sql,
|
|
490
|
+
exp.Right: right_to_substring_sql,
|
|
491
|
+
exp.Schema: _schema_sql,
|
|
492
|
+
exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
|
|
493
|
+
exp.Select: transforms.preprocess(
|
|
494
|
+
[
|
|
495
|
+
transforms.eliminate_window_clause,
|
|
496
|
+
transforms.eliminate_qualify,
|
|
497
|
+
transforms.eliminate_distinct_on,
|
|
498
|
+
transforms.explode_projection_to_unnest(1),
|
|
499
|
+
transforms.eliminate_semi_and_anti_joins,
|
|
500
|
+
amend_exploded_column_table,
|
|
501
|
+
]
|
|
502
|
+
),
|
|
503
|
+
exp.SortArray: _no_sort_array,
|
|
504
|
+
exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True),
|
|
505
|
+
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
|
|
506
|
+
exp.StrToMap: rename_func("SPLIT_TO_MAP"),
|
|
507
|
+
exp.StrToTime: _str_to_time_sql,
|
|
508
|
+
exp.StructExtract: struct_extract_sql,
|
|
509
|
+
exp.Table: transforms.preprocess([transforms.unnest_generate_series]),
|
|
510
|
+
exp.Timestamp: no_timestamp_sql,
|
|
511
|
+
exp.TimestampAdd: _date_delta_sql("DATE_ADD"),
|
|
512
|
+
exp.TimestampTrunc: timestamptrunc_sql(),
|
|
513
|
+
exp.TimeStrToDate: timestrtotime_sql,
|
|
514
|
+
exp.TimeStrToTime: timestrtotime_sql,
|
|
515
|
+
exp.TimeStrToUnix: lambda self, e: self.func(
|
|
516
|
+
"TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT)
|
|
517
|
+
),
|
|
518
|
+
exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)),
|
|
519
|
+
exp.TimeToUnix: rename_func("TO_UNIXTIME"),
|
|
520
|
+
exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)),
|
|
521
|
+
exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]),
|
|
522
|
+
exp.TsOrDiToDi: lambda self,
|
|
523
|
+
e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)",
|
|
524
|
+
exp.TsOrDsAdd: _ts_or_ds_add_sql,
|
|
525
|
+
exp.TsOrDsDiff: _ts_or_ds_diff_sql,
|
|
526
|
+
exp.TsOrDsToDate: _ts_or_ds_to_date_sql,
|
|
527
|
+
exp.Unhex: rename_func("FROM_HEX"),
|
|
528
|
+
exp.UnixToStr: lambda self,
|
|
529
|
+
e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})",
|
|
530
|
+
exp.UnixToTime: _unix_to_time_sql,
|
|
531
|
+
exp.UnixToTimeStr: lambda self,
|
|
532
|
+
e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)",
|
|
533
|
+
exp.VariancePop: rename_func("VAR_POP"),
|
|
534
|
+
exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]),
|
|
535
|
+
exp.WithinGroup: transforms.preprocess(
|
|
536
|
+
[transforms.remove_within_group_for_percentiles]
|
|
537
|
+
),
|
|
538
|
+
exp.Xor: bool_xor_sql,
|
|
539
|
+
exp.MD5Digest: rename_func("MD5"),
|
|
540
|
+
exp.SHA: rename_func("SHA1"),
|
|
541
|
+
exp.SHA2: sha256_sql,
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
RESERVED_KEYWORDS = {
|
|
545
|
+
"alter",
|
|
546
|
+
"and",
|
|
547
|
+
"as",
|
|
548
|
+
"between",
|
|
549
|
+
"by",
|
|
550
|
+
"case",
|
|
551
|
+
"cast",
|
|
552
|
+
"constraint",
|
|
553
|
+
"create",
|
|
554
|
+
"cross",
|
|
555
|
+
"current_time",
|
|
556
|
+
"current_timestamp",
|
|
557
|
+
"deallocate",
|
|
558
|
+
"delete",
|
|
559
|
+
"describe",
|
|
560
|
+
"distinct",
|
|
561
|
+
"drop",
|
|
562
|
+
"else",
|
|
563
|
+
"end",
|
|
564
|
+
"escape",
|
|
565
|
+
"except",
|
|
566
|
+
"execute",
|
|
567
|
+
"exists",
|
|
568
|
+
"extract",
|
|
569
|
+
"false",
|
|
570
|
+
"for",
|
|
571
|
+
"from",
|
|
572
|
+
"full",
|
|
573
|
+
"group",
|
|
574
|
+
"having",
|
|
575
|
+
"in",
|
|
576
|
+
"inner",
|
|
577
|
+
"insert",
|
|
578
|
+
"intersect",
|
|
579
|
+
"into",
|
|
580
|
+
"is",
|
|
581
|
+
"join",
|
|
582
|
+
"left",
|
|
583
|
+
"like",
|
|
584
|
+
"natural",
|
|
585
|
+
"not",
|
|
586
|
+
"null",
|
|
587
|
+
"on",
|
|
588
|
+
"or",
|
|
589
|
+
"order",
|
|
590
|
+
"outer",
|
|
591
|
+
"prepare",
|
|
592
|
+
"right",
|
|
593
|
+
"select",
|
|
594
|
+
"table",
|
|
595
|
+
"then",
|
|
596
|
+
"true",
|
|
597
|
+
"union",
|
|
598
|
+
"using",
|
|
599
|
+
"values",
|
|
600
|
+
"when",
|
|
601
|
+
"where",
|
|
602
|
+
"with",
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
def jsonformat_sql(self, expression: exp.JSONFormat) -> str:
|
|
606
|
+
this = expression.this
|
|
607
|
+
is_json = expression.args.get("is_json")
|
|
608
|
+
|
|
609
|
+
if this and not (is_json or this.type):
|
|
610
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
611
|
+
|
|
612
|
+
this = annotate_types(this, dialect=self.dialect)
|
|
613
|
+
|
|
614
|
+
if not (is_json or this.is_type(exp.DataType.Type.JSON)):
|
|
615
|
+
this.replace(exp.cast(this, exp.DataType.Type.JSON))
|
|
616
|
+
|
|
617
|
+
return self.function_fallback_sql(expression)
|
|
618
|
+
|
|
619
|
+
def md5_sql(self, expression: exp.MD5) -> str:
|
|
620
|
+
this = expression.this
|
|
621
|
+
|
|
622
|
+
if not this.type:
|
|
623
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
624
|
+
|
|
625
|
+
this = annotate_types(this, dialect=self.dialect)
|
|
626
|
+
|
|
627
|
+
if this.is_type(*exp.DataType.TEXT_TYPES):
|
|
628
|
+
this = exp.Encode(this=this, charset=exp.Literal.string("utf-8"))
|
|
629
|
+
|
|
630
|
+
return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
|
|
631
|
+
|
|
632
|
+
def strtounix_sql(self, expression: exp.StrToUnix) -> str:
|
|
633
|
+
# Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one.
|
|
634
|
+
# To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a
|
|
635
|
+
# timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback,
|
|
636
|
+
# which seems to be using the same time mapping as Hive, as per:
|
|
637
|
+
# https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html
|
|
638
|
+
this = expression.this
|
|
639
|
+
value_as_text = exp.cast(this, exp.DataType.Type.TEXT)
|
|
640
|
+
value_as_timestamp = (
|
|
641
|
+
exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression))
|
|
645
|
+
|
|
646
|
+
formatted_value = self.func(
|
|
647
|
+
"DATE_FORMAT", value_as_timestamp, self.format_time(expression)
|
|
648
|
+
)
|
|
649
|
+
parse_with_tz = self.func(
|
|
650
|
+
"PARSE_DATETIME",
|
|
651
|
+
formatted_value,
|
|
652
|
+
self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE),
|
|
653
|
+
)
|
|
654
|
+
coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz)
|
|
655
|
+
return self.func("TO_UNIXTIME", coalesced)
|
|
656
|
+
|
|
657
|
+
def bracket_sql(self, expression: exp.Bracket) -> str:
|
|
658
|
+
if expression.args.get("safe"):
|
|
659
|
+
return self.func(
|
|
660
|
+
"ELEMENT_AT",
|
|
661
|
+
expression.this,
|
|
662
|
+
seq_get(
|
|
663
|
+
apply_index_offset(
|
|
664
|
+
expression.this,
|
|
665
|
+
expression.expressions,
|
|
666
|
+
1 - expression.args.get("offset", 0),
|
|
667
|
+
dialect=self.dialect,
|
|
668
|
+
),
|
|
669
|
+
0,
|
|
670
|
+
),
|
|
671
|
+
)
|
|
672
|
+
return super().bracket_sql(expression)
|
|
673
|
+
|
|
674
|
+
def struct_sql(self, expression: exp.Struct) -> str:
|
|
675
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
676
|
+
|
|
677
|
+
expression = annotate_types(expression, dialect=self.dialect)
|
|
678
|
+
values: t.List[str] = []
|
|
679
|
+
schema: t.List[str] = []
|
|
680
|
+
unknown_type = False
|
|
681
|
+
|
|
682
|
+
for e in expression.expressions:
|
|
683
|
+
if isinstance(e, exp.PropertyEQ):
|
|
684
|
+
if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN):
|
|
685
|
+
unknown_type = True
|
|
686
|
+
else:
|
|
687
|
+
schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}")
|
|
688
|
+
values.append(self.sql(e, "expression"))
|
|
689
|
+
else:
|
|
690
|
+
values.append(self.sql(e))
|
|
691
|
+
|
|
692
|
+
size = len(expression.expressions)
|
|
693
|
+
|
|
694
|
+
if not size or len(schema) != size:
|
|
695
|
+
if unknown_type:
|
|
696
|
+
self.unsupported(
|
|
697
|
+
"Cannot convert untyped key-value definitions (try annotate_types)."
|
|
698
|
+
)
|
|
699
|
+
return self.func("ROW", *values)
|
|
700
|
+
return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
|
|
701
|
+
|
|
702
|
+
def interval_sql(self, expression: exp.Interval) -> str:
|
|
703
|
+
if expression.this and expression.text("unit").upper().startswith("WEEK"):
|
|
704
|
+
return f"({expression.this.name} * INTERVAL '7' DAY)"
|
|
705
|
+
return super().interval_sql(expression)
|
|
706
|
+
|
|
707
|
+
def transaction_sql(self, expression: exp.Transaction) -> str:
|
|
708
|
+
modes = expression.args.get("modes")
|
|
709
|
+
modes = f" {', '.join(modes)}" if modes else ""
|
|
710
|
+
return f"START TRANSACTION{modes}"
|
|
711
|
+
|
|
712
|
+
def offset_limit_modifiers(
|
|
713
|
+
self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit]
|
|
714
|
+
) -> t.List[str]:
|
|
715
|
+
return [
|
|
716
|
+
self.sql(expression, "offset"),
|
|
717
|
+
self.sql(limit),
|
|
718
|
+
]
|
|
719
|
+
|
|
720
|
+
def create_sql(self, expression: exp.Create) -> str:
|
|
721
|
+
"""
|
|
722
|
+
Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression),
|
|
723
|
+
so we need to remove them
|
|
724
|
+
"""
|
|
725
|
+
kind = expression.args["kind"]
|
|
726
|
+
schema = expression.this
|
|
727
|
+
if kind == "VIEW" and schema.expressions:
|
|
728
|
+
expression.this.set("expressions", None)
|
|
729
|
+
return super().create_sql(expression)
|
|
730
|
+
|
|
731
|
+
def delete_sql(self, expression: exp.Delete) -> str:
|
|
732
|
+
"""
|
|
733
|
+
Presto only supports DELETE FROM for a single table without an alias, so we need
|
|
734
|
+
to remove the unnecessary parts. If the original DELETE statement contains more
|
|
735
|
+
than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
|
|
736
|
+
"""
|
|
737
|
+
tables = expression.args.get("tables") or [expression.this]
|
|
738
|
+
if len(tables) > 1:
|
|
739
|
+
return super().delete_sql(expression)
|
|
740
|
+
|
|
741
|
+
table = tables[0]
|
|
742
|
+
expression.set("this", table)
|
|
743
|
+
expression.set("tables", None)
|
|
744
|
+
|
|
745
|
+
if isinstance(table, exp.Table):
|
|
746
|
+
table_alias = table.args.get("alias")
|
|
747
|
+
if table_alias:
|
|
748
|
+
table_alias.pop()
|
|
749
|
+
expression = t.cast(exp.Delete, expression.transform(unqualify_columns))
|
|
750
|
+
|
|
751
|
+
return super().delete_sql(expression)
|
|
752
|
+
|
|
753
|
+
def jsonextract_sql(self, expression: exp.JSONExtract) -> str:
|
|
754
|
+
is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True)
|
|
755
|
+
|
|
756
|
+
# Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks
|
|
757
|
+
# VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino
|
|
758
|
+
if not expression.args.get("variant_extract") or is_json_extract:
|
|
759
|
+
return self.func(
|
|
760
|
+
"JSON_EXTRACT", expression.this, expression.expression, *expression.expressions
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
this = self.sql(expression, "this")
|
|
764
|
+
|
|
765
|
+
# Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y
|
|
766
|
+
segments = []
|
|
767
|
+
for path_key in expression.expression.expressions[1:]:
|
|
768
|
+
if not isinstance(path_key, exp.JSONPathKey):
|
|
769
|
+
# Cannot transpile subscripts, wildcards etc to dot notation
|
|
770
|
+
self.unsupported(
|
|
771
|
+
f"Cannot transpile JSONPath segment '{path_key}' to ROW access"
|
|
772
|
+
)
|
|
773
|
+
continue
|
|
774
|
+
key = path_key.this
|
|
775
|
+
if not exp.SAFE_IDENTIFIER_RE.match(key):
|
|
776
|
+
key = f'"{key}"'
|
|
777
|
+
segments.append(f".{key}")
|
|
778
|
+
|
|
779
|
+
expr = "".join(segments)
|
|
780
|
+
|
|
781
|
+
return f"{this}{expr}"
|
|
782
|
+
|
|
783
|
+
def groupconcat_sql(self, expression: exp.GroupConcat) -> str:
|
|
784
|
+
return self.func(
|
|
785
|
+
"ARRAY_JOIN",
|
|
786
|
+
self.func("ARRAY_AGG", expression.this),
|
|
787
|
+
expression.args.get("separator"),
|
|
788
|
+
)
|