@altimateai/altimate-code 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/bin/altimate +6 -0
- package/bin/altimate-code +6 -0
- package/dbt-tools/bin/altimate-dbt +2 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
- package/dbt-tools/dist/index.js +23859 -0
- package/package.json +13 -13
- package/postinstall.mjs +42 -0
- package/skills/altimate-setup/SKILL.md +31 -0
|
@@ -0,0 +1,787 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from functools import partial
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
|
|
8
|
+
from sqlglot import exp, generator, parser, tokens, transforms
|
|
9
|
+
from sqlglot.dialects.dialect import (
|
|
10
|
+
DATE_ADD_OR_SUB,
|
|
11
|
+
Dialect,
|
|
12
|
+
NormalizationStrategy,
|
|
13
|
+
approx_count_distinct_sql,
|
|
14
|
+
arg_max_or_min_no_count,
|
|
15
|
+
datestrtodate_sql,
|
|
16
|
+
build_formatted_time,
|
|
17
|
+
if_sql,
|
|
18
|
+
is_parse_json,
|
|
19
|
+
left_to_substring_sql,
|
|
20
|
+
max_or_greatest,
|
|
21
|
+
min_or_least,
|
|
22
|
+
no_ilike_sql,
|
|
23
|
+
no_recursive_cte_sql,
|
|
24
|
+
no_trycast_sql,
|
|
25
|
+
regexp_extract_sql,
|
|
26
|
+
regexp_replace_sql,
|
|
27
|
+
rename_func,
|
|
28
|
+
right_to_substring_sql,
|
|
29
|
+
strposition_sql,
|
|
30
|
+
struct_extract_sql,
|
|
31
|
+
time_format,
|
|
32
|
+
timestrtotime_sql,
|
|
33
|
+
unit_to_str,
|
|
34
|
+
var_map_sql,
|
|
35
|
+
sequence_sql,
|
|
36
|
+
property_sql,
|
|
37
|
+
build_regexp_extract,
|
|
38
|
+
)
|
|
39
|
+
from sqlglot.transforms import (
|
|
40
|
+
remove_unique_constraints,
|
|
41
|
+
ctas_with_tmp_tables_to_create_tmp_view,
|
|
42
|
+
preprocess,
|
|
43
|
+
move_schema_columns_to_partitioned_by,
|
|
44
|
+
)
|
|
45
|
+
from sqlglot.helper import seq_get
|
|
46
|
+
from sqlglot.tokens import TokenType
|
|
47
|
+
from sqlglot.generator import unsupported_args
|
|
48
|
+
from sqlglot.optimizer.annotate_types import TypeAnnotator
|
|
49
|
+
|
|
50
|
+
# (FuncType, Multiplier)
|
|
51
|
+
DATE_DELTA_INTERVAL = {
|
|
52
|
+
"YEAR": ("ADD_MONTHS", 12),
|
|
53
|
+
"MONTH": ("ADD_MONTHS", 1),
|
|
54
|
+
"QUARTER": ("ADD_MONTHS", 3),
|
|
55
|
+
"WEEK": ("DATE_ADD", 7),
|
|
56
|
+
"DAY": ("DATE_ADD", 1),
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
TIME_DIFF_FACTOR = {
|
|
60
|
+
"MILLISECOND": " * 1000",
|
|
61
|
+
"SECOND": "",
|
|
62
|
+
"MINUTE": " / 60",
|
|
63
|
+
"HOUR": " / 3600",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH")
|
|
67
|
+
|
|
68
|
+
TS_OR_DS_EXPRESSIONS = (
|
|
69
|
+
exp.DateDiff,
|
|
70
|
+
exp.Day,
|
|
71
|
+
exp.Month,
|
|
72
|
+
exp.Year,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str:
|
|
77
|
+
if isinstance(expression, exp.TsOrDsAdd) and not expression.unit:
|
|
78
|
+
return self.func("DATE_ADD", expression.this, expression.expression)
|
|
79
|
+
|
|
80
|
+
unit = expression.text("unit").upper()
|
|
81
|
+
func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1))
|
|
82
|
+
|
|
83
|
+
if isinstance(expression, exp.DateSub):
|
|
84
|
+
multiplier *= -1
|
|
85
|
+
|
|
86
|
+
increment = expression.expression
|
|
87
|
+
if isinstance(increment, exp.Literal):
|
|
88
|
+
value = increment.to_py() if increment.is_number else int(increment.name)
|
|
89
|
+
increment = exp.Literal.number(value * multiplier)
|
|
90
|
+
elif multiplier != 1:
|
|
91
|
+
increment *= exp.Literal.number(multiplier)
|
|
92
|
+
|
|
93
|
+
return self.func(func, expression.this, increment)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str:
|
|
97
|
+
unit = expression.text("unit").upper()
|
|
98
|
+
|
|
99
|
+
factor = TIME_DIFF_FACTOR.get(unit)
|
|
100
|
+
if factor is not None:
|
|
101
|
+
left = self.sql(expression, "this")
|
|
102
|
+
right = self.sql(expression, "expression")
|
|
103
|
+
sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})"
|
|
104
|
+
return f"({sec_diff}){factor}" if factor else sec_diff
|
|
105
|
+
|
|
106
|
+
months_between = unit in DIFF_MONTH_SWITCH
|
|
107
|
+
sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF"
|
|
108
|
+
_, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1))
|
|
109
|
+
multiplier_sql = f" / {multiplier}" if multiplier > 1 else ""
|
|
110
|
+
diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})"
|
|
111
|
+
|
|
112
|
+
if months_between or multiplier_sql:
|
|
113
|
+
# MONTHS_BETWEEN returns a float, so we need to truncate the fractional part.
|
|
114
|
+
# For the same reason, we want to truncate if there's a divisor present.
|
|
115
|
+
diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)"
|
|
116
|
+
|
|
117
|
+
return diff_sql
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str:
|
|
121
|
+
this = expression.this
|
|
122
|
+
|
|
123
|
+
if is_parse_json(this):
|
|
124
|
+
if this.this.is_string:
|
|
125
|
+
# Since FROM_JSON requires a nested type, we always wrap the json string with
|
|
126
|
+
# an array to ensure that "naked" strings like "'a'" will be handled correctly
|
|
127
|
+
wrapped_json = exp.Literal.string(f"[{this.this.name}]")
|
|
128
|
+
|
|
129
|
+
from_json = self.func(
|
|
130
|
+
"FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)
|
|
131
|
+
)
|
|
132
|
+
to_json = self.func("TO_JSON", from_json)
|
|
133
|
+
|
|
134
|
+
# This strips the [, ] delimiters of the dummy array printed by TO_JSON
|
|
135
|
+
return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1")
|
|
136
|
+
return self.sql(this)
|
|
137
|
+
|
|
138
|
+
return self.func("TO_JSON", this, expression.args.get("options"))
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@generator.unsupported_args(("expression", "Hive's SORT_ARRAY does not support a comparator."))
|
|
142
|
+
def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str:
|
|
143
|
+
return self.func("SORT_ARRAY", expression.this)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str:
|
|
147
|
+
return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression))
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _unix_to_time_sql(self: Hive.Generator, expression: exp.UnixToTime) -> str:
|
|
151
|
+
timestamp = self.sql(expression, "this")
|
|
152
|
+
scale = expression.args.get("scale")
|
|
153
|
+
if scale in (None, exp.UnixToTime.SECONDS):
|
|
154
|
+
return rename_func("FROM_UNIXTIME")(self, expression)
|
|
155
|
+
|
|
156
|
+
return f"FROM_UNIXTIME({timestamp} / POW(10, {scale}))"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str:
|
|
160
|
+
this = self.sql(expression, "this")
|
|
161
|
+
time_format = self.format_time(expression)
|
|
162
|
+
if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
|
|
163
|
+
this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
|
|
164
|
+
return f"CAST({this} AS DATE)"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str:
|
|
168
|
+
this = self.sql(expression, "this")
|
|
169
|
+
time_format = self.format_time(expression)
|
|
170
|
+
if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
|
|
171
|
+
this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
|
|
172
|
+
return f"CAST({this} AS TIMESTAMP)"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str:
|
|
176
|
+
time_format = self.format_time(expression)
|
|
177
|
+
if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
|
|
178
|
+
return self.func("TO_DATE", expression.this, time_format)
|
|
179
|
+
|
|
180
|
+
if isinstance(expression.parent, TS_OR_DS_EXPRESSIONS):
|
|
181
|
+
return self.sql(expression, "this")
|
|
182
|
+
|
|
183
|
+
return self.func("TO_DATE", expression.this)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _build_with_ignore_nulls(
|
|
187
|
+
exp_class: t.Type[exp.Expression],
|
|
188
|
+
) -> t.Callable[[t.List[exp.Expression]], exp.Expression]:
|
|
189
|
+
def _parse(args: t.List[exp.Expression]) -> exp.Expression:
|
|
190
|
+
this = exp_class(this=seq_get(args, 0))
|
|
191
|
+
if seq_get(args, 1) == exp.true():
|
|
192
|
+
return exp.IgnoreNulls(this=this)
|
|
193
|
+
return this
|
|
194
|
+
|
|
195
|
+
return _parse
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _build_to_date(args: t.List) -> exp.TsOrDsToDate:
|
|
199
|
+
expr = build_formatted_time(exp.TsOrDsToDate, "hive")(args)
|
|
200
|
+
expr.set("safe", True)
|
|
201
|
+
return expr
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class Hive(Dialect):
|
|
205
|
+
ALIAS_POST_TABLESAMPLE = True
|
|
206
|
+
IDENTIFIERS_CAN_START_WITH_DIGIT = True
|
|
207
|
+
SUPPORTS_USER_DEFINED_TYPES = False
|
|
208
|
+
SAFE_DIVISION = True
|
|
209
|
+
ARRAY_AGG_INCLUDES_NULLS = None
|
|
210
|
+
REGEXP_EXTRACT_DEFAULT_GROUP = 1
|
|
211
|
+
|
|
212
|
+
# https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
|
|
213
|
+
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
|
|
214
|
+
|
|
215
|
+
ANNOTATORS = {
|
|
216
|
+
**Dialect.ANNOTATORS,
|
|
217
|
+
exp.If: lambda self, e: self._annotate_by_args(e, "true", "false", promote=True),
|
|
218
|
+
exp.Coalesce: lambda self, e: self._annotate_by_args(
|
|
219
|
+
e, "this", "expressions", promote=True
|
|
220
|
+
),
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
# Support only the non-ANSI mode (default for Hive, Spark2, Spark)
|
|
224
|
+
COERCES_TO = defaultdict(set, deepcopy(TypeAnnotator.COERCES_TO))
|
|
225
|
+
for target_type in {
|
|
226
|
+
*exp.DataType.NUMERIC_TYPES,
|
|
227
|
+
*exp.DataType.TEMPORAL_TYPES,
|
|
228
|
+
exp.DataType.Type.INTERVAL,
|
|
229
|
+
}:
|
|
230
|
+
COERCES_TO[target_type] |= exp.DataType.TEXT_TYPES
|
|
231
|
+
|
|
232
|
+
TIME_MAPPING = {
|
|
233
|
+
"y": "%Y",
|
|
234
|
+
"Y": "%Y",
|
|
235
|
+
"YYYY": "%Y",
|
|
236
|
+
"yyyy": "%Y",
|
|
237
|
+
"YY": "%y",
|
|
238
|
+
"yy": "%y",
|
|
239
|
+
"MMMM": "%B",
|
|
240
|
+
"MMM": "%b",
|
|
241
|
+
"MM": "%m",
|
|
242
|
+
"M": "%-m",
|
|
243
|
+
"dd": "%d",
|
|
244
|
+
"d": "%-d",
|
|
245
|
+
"HH": "%H",
|
|
246
|
+
"H": "%-H",
|
|
247
|
+
"hh": "%I",
|
|
248
|
+
"h": "%-I",
|
|
249
|
+
"mm": "%M",
|
|
250
|
+
"m": "%-M",
|
|
251
|
+
"ss": "%S",
|
|
252
|
+
"s": "%-S",
|
|
253
|
+
"SSSSSS": "%f",
|
|
254
|
+
"a": "%p",
|
|
255
|
+
"DD": "%j",
|
|
256
|
+
"D": "%-j",
|
|
257
|
+
"E": "%a",
|
|
258
|
+
"EE": "%a",
|
|
259
|
+
"EEE": "%a",
|
|
260
|
+
"EEEE": "%A",
|
|
261
|
+
"z": "%Z",
|
|
262
|
+
"Z": "%z",
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
DATE_FORMAT = "'yyyy-MM-dd'"
|
|
266
|
+
DATEINT_FORMAT = "'yyyyMMdd'"
|
|
267
|
+
TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
|
|
268
|
+
|
|
269
|
+
class Tokenizer(tokens.Tokenizer):
|
|
270
|
+
QUOTES = ["'", '"']
|
|
271
|
+
IDENTIFIERS = ["`"]
|
|
272
|
+
STRING_ESCAPES = ["\\"]
|
|
273
|
+
|
|
274
|
+
SINGLE_TOKENS = {
|
|
275
|
+
**tokens.Tokenizer.SINGLE_TOKENS,
|
|
276
|
+
"$": TokenType.PARAMETER,
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
KEYWORDS = {
|
|
280
|
+
**tokens.Tokenizer.KEYWORDS,
|
|
281
|
+
"ADD ARCHIVE": TokenType.COMMAND,
|
|
282
|
+
"ADD ARCHIVES": TokenType.COMMAND,
|
|
283
|
+
"ADD FILE": TokenType.COMMAND,
|
|
284
|
+
"ADD FILES": TokenType.COMMAND,
|
|
285
|
+
"ADD JAR": TokenType.COMMAND,
|
|
286
|
+
"ADD JARS": TokenType.COMMAND,
|
|
287
|
+
"MINUS": TokenType.EXCEPT,
|
|
288
|
+
"MSCK REPAIR": TokenType.COMMAND,
|
|
289
|
+
"REFRESH": TokenType.REFRESH,
|
|
290
|
+
"TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
|
|
291
|
+
"VERSION AS OF": TokenType.VERSION_SNAPSHOT,
|
|
292
|
+
"SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
NUMERIC_LITERALS = {
|
|
296
|
+
"L": "BIGINT",
|
|
297
|
+
"S": "SMALLINT",
|
|
298
|
+
"Y": "TINYINT",
|
|
299
|
+
"D": "DOUBLE",
|
|
300
|
+
"F": "FLOAT",
|
|
301
|
+
"BD": "DECIMAL",
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
class Parser(parser.Parser):
|
|
305
|
+
LOG_DEFAULTS_TO_LN = True
|
|
306
|
+
STRICT_CAST = False
|
|
307
|
+
VALUES_FOLLOWED_BY_PAREN = False
|
|
308
|
+
|
|
309
|
+
FUNCTIONS = {
|
|
310
|
+
**parser.Parser.FUNCTIONS,
|
|
311
|
+
"ASCII": exp.Unicode.from_arg_list,
|
|
312
|
+
"BASE64": exp.ToBase64.from_arg_list,
|
|
313
|
+
"COLLECT_LIST": lambda args: exp.ArrayAgg(this=seq_get(args, 0), nulls_excluded=True),
|
|
314
|
+
"COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
|
|
315
|
+
"DATE_ADD": lambda args: exp.TsOrDsAdd(
|
|
316
|
+
this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
|
|
317
|
+
),
|
|
318
|
+
"DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")(
|
|
319
|
+
[
|
|
320
|
+
exp.TimeStrToTime(this=seq_get(args, 0)),
|
|
321
|
+
seq_get(args, 1),
|
|
322
|
+
]
|
|
323
|
+
),
|
|
324
|
+
"DATE_SUB": lambda args: exp.TsOrDsAdd(
|
|
325
|
+
this=seq_get(args, 0),
|
|
326
|
+
expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
|
|
327
|
+
unit=exp.Literal.string("DAY"),
|
|
328
|
+
),
|
|
329
|
+
"DATEDIFF": lambda args: exp.DateDiff(
|
|
330
|
+
this=exp.TsOrDsToDate(this=seq_get(args, 0)),
|
|
331
|
+
expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
|
|
332
|
+
),
|
|
333
|
+
"DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
|
|
334
|
+
"FIRST": _build_with_ignore_nulls(exp.First),
|
|
335
|
+
"FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue),
|
|
336
|
+
"FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True),
|
|
337
|
+
"GET_JSON_OBJECT": lambda args, dialect: exp.JSONExtractScalar(
|
|
338
|
+
this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
|
|
339
|
+
),
|
|
340
|
+
"LAST": _build_with_ignore_nulls(exp.Last),
|
|
341
|
+
"LAST_VALUE": _build_with_ignore_nulls(exp.LastValue),
|
|
342
|
+
"MAP": parser.build_var_map,
|
|
343
|
+
"MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
|
|
344
|
+
"PERCENTILE": exp.Quantile.from_arg_list,
|
|
345
|
+
"PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
|
|
346
|
+
"REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract),
|
|
347
|
+
"REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll),
|
|
348
|
+
"SEQUENCE": exp.GenerateSeries.from_arg_list,
|
|
349
|
+
"SIZE": exp.ArraySize.from_arg_list,
|
|
350
|
+
"SPLIT": exp.RegexpSplit.from_arg_list,
|
|
351
|
+
"STR_TO_MAP": lambda args: exp.StrToMap(
|
|
352
|
+
this=seq_get(args, 0),
|
|
353
|
+
pair_delim=seq_get(args, 1) or exp.Literal.string(","),
|
|
354
|
+
key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
|
|
355
|
+
),
|
|
356
|
+
"TO_DATE": _build_to_date,
|
|
357
|
+
"TO_JSON": exp.JSONFormat.from_arg_list,
|
|
358
|
+
"TRUNC": exp.TimestampTrunc.from_arg_list,
|
|
359
|
+
"UNBASE64": exp.FromBase64.from_arg_list,
|
|
360
|
+
"UNIX_TIMESTAMP": lambda args: build_formatted_time(exp.StrToUnix, "hive", True)(
|
|
361
|
+
args or [exp.CurrentTimestamp()]
|
|
362
|
+
),
|
|
363
|
+
"YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
NO_PAREN_FUNCTION_PARSERS = {
|
|
367
|
+
**parser.Parser.NO_PAREN_FUNCTION_PARSERS,
|
|
368
|
+
"TRANSFORM": lambda self: self._parse_transform(),
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy()
|
|
372
|
+
NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIME)
|
|
373
|
+
|
|
374
|
+
PROPERTY_PARSERS = {
|
|
375
|
+
**parser.Parser.PROPERTY_PARSERS,
|
|
376
|
+
"SERDEPROPERTIES": lambda self: exp.SerdeProperties(
|
|
377
|
+
expressions=self._parse_wrapped_csv(self._parse_property)
|
|
378
|
+
),
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
|
|
382
|
+
if not self._match(TokenType.L_PAREN, advance=False):
|
|
383
|
+
self._retreat(self._index - 1)
|
|
384
|
+
return None
|
|
385
|
+
|
|
386
|
+
args = self._parse_wrapped_csv(self._parse_lambda)
|
|
387
|
+
row_format_before = self._parse_row_format(match_row=True)
|
|
388
|
+
|
|
389
|
+
record_writer = None
|
|
390
|
+
if self._match_text_seq("RECORDWRITER"):
|
|
391
|
+
record_writer = self._parse_string()
|
|
392
|
+
|
|
393
|
+
if not self._match(TokenType.USING):
|
|
394
|
+
return exp.Transform.from_arg_list(args)
|
|
395
|
+
|
|
396
|
+
command_script = self._parse_string()
|
|
397
|
+
|
|
398
|
+
self._match(TokenType.ALIAS)
|
|
399
|
+
schema = self._parse_schema()
|
|
400
|
+
|
|
401
|
+
row_format_after = self._parse_row_format(match_row=True)
|
|
402
|
+
record_reader = None
|
|
403
|
+
if self._match_text_seq("RECORDREADER"):
|
|
404
|
+
record_reader = self._parse_string()
|
|
405
|
+
|
|
406
|
+
return self.expression(
|
|
407
|
+
exp.QueryTransform,
|
|
408
|
+
expressions=args,
|
|
409
|
+
command_script=command_script,
|
|
410
|
+
schema=schema,
|
|
411
|
+
row_format_before=row_format_before,
|
|
412
|
+
record_writer=record_writer,
|
|
413
|
+
row_format_after=row_format_after,
|
|
414
|
+
record_reader=record_reader,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
def _parse_types(
|
|
418
|
+
self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
|
|
419
|
+
) -> t.Optional[exp.Expression]:
|
|
420
|
+
"""
|
|
421
|
+
Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
|
|
422
|
+
STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
|
|
423
|
+
|
|
424
|
+
spark-sql (default)> select cast(1234 as varchar(2));
|
|
425
|
+
23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
|
|
426
|
+
char/varchar type and simply treats them as string type. Please use string type
|
|
427
|
+
directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
|
|
428
|
+
to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
|
|
429
|
+
|
|
430
|
+
1234
|
|
431
|
+
Time taken: 4.265 seconds, Fetched 1 row(s)
|
|
432
|
+
|
|
433
|
+
This shows that Spark doesn't truncate the value into '12', which is inconsistent with
|
|
434
|
+
what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
|
|
435
|
+
|
|
436
|
+
Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
|
|
437
|
+
"""
|
|
438
|
+
this = super()._parse_types(
|
|
439
|
+
check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
if this and not schema:
|
|
443
|
+
return this.transform(
|
|
444
|
+
lambda node: (
|
|
445
|
+
node.replace(exp.DataType.build("text"))
|
|
446
|
+
if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
|
|
447
|
+
else node
|
|
448
|
+
),
|
|
449
|
+
copy=False,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
return this
|
|
453
|
+
|
|
454
|
+
def _parse_partition_and_order(
|
|
455
|
+
self,
|
|
456
|
+
) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
|
|
457
|
+
return (
|
|
458
|
+
(
|
|
459
|
+
self._parse_csv(self._parse_assignment)
|
|
460
|
+
if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
|
|
461
|
+
else []
|
|
462
|
+
),
|
|
463
|
+
super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
def _parse_parameter(self) -> exp.Parameter:
|
|
467
|
+
self._match(TokenType.L_BRACE)
|
|
468
|
+
this = self._parse_identifier() or self._parse_primary_or_var()
|
|
469
|
+
expression = self._match(TokenType.COLON) and (
|
|
470
|
+
self._parse_identifier() or self._parse_primary_or_var()
|
|
471
|
+
)
|
|
472
|
+
self._match(TokenType.R_BRACE)
|
|
473
|
+
return self.expression(exp.Parameter, this=this, expression=expression)
|
|
474
|
+
|
|
475
|
+
def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression:
|
|
476
|
+
if expression.is_star:
|
|
477
|
+
return expression
|
|
478
|
+
|
|
479
|
+
if isinstance(expression, exp.Column):
|
|
480
|
+
key = expression.this
|
|
481
|
+
else:
|
|
482
|
+
key = exp.to_identifier(f"col{index + 1}")
|
|
483
|
+
|
|
484
|
+
return self.expression(exp.PropertyEQ, this=key, expression=expression)
|
|
485
|
+
|
|
486
|
+
class Generator(generator.Generator):
|
|
487
|
+
LIMIT_FETCH = "LIMIT"
|
|
488
|
+
TABLESAMPLE_WITH_METHOD = False
|
|
489
|
+
JOIN_HINTS = False
|
|
490
|
+
TABLE_HINTS = False
|
|
491
|
+
QUERY_HINTS = False
|
|
492
|
+
INDEX_ON = "ON TABLE"
|
|
493
|
+
EXTRACT_ALLOWS_QUOTES = False
|
|
494
|
+
NVL2_SUPPORTED = False
|
|
495
|
+
LAST_DAY_SUPPORTS_DATE_PART = False
|
|
496
|
+
JSON_PATH_SINGLE_QUOTE_ESCAPE = True
|
|
497
|
+
SUPPORTS_TO_NUMBER = False
|
|
498
|
+
WITH_PROPERTIES_PREFIX = "TBLPROPERTIES"
|
|
499
|
+
PARSE_JSON_NAME: t.Optional[str] = None
|
|
500
|
+
PAD_FILL_PATTERN_IS_REQUIRED = True
|
|
501
|
+
SUPPORTS_MEDIAN = False
|
|
502
|
+
ARRAY_SIZE_NAME = "SIZE"
|
|
503
|
+
|
|
504
|
+
EXPRESSIONS_WITHOUT_NESTED_CTES = {
|
|
505
|
+
exp.Insert,
|
|
506
|
+
exp.Select,
|
|
507
|
+
exp.Subquery,
|
|
508
|
+
exp.SetOperation,
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
SUPPORTED_JSON_PATH_PARTS = {
|
|
512
|
+
exp.JSONPathKey,
|
|
513
|
+
exp.JSONPathRoot,
|
|
514
|
+
exp.JSONPathSubscript,
|
|
515
|
+
exp.JSONPathWildcard,
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
TYPE_MAPPING = {
|
|
519
|
+
**generator.Generator.TYPE_MAPPING,
|
|
520
|
+
exp.DataType.Type.BIT: "BOOLEAN",
|
|
521
|
+
exp.DataType.Type.BLOB: "BINARY",
|
|
522
|
+
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
|
523
|
+
exp.DataType.Type.ROWVERSION: "BINARY",
|
|
524
|
+
exp.DataType.Type.TEXT: "STRING",
|
|
525
|
+
exp.DataType.Type.TIME: "TIMESTAMP",
|
|
526
|
+
exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP",
|
|
527
|
+
exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
|
|
528
|
+
exp.DataType.Type.UTINYINT: "SMALLINT",
|
|
529
|
+
exp.DataType.Type.VARBINARY: "BINARY",
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
TRANSFORMS = {
|
|
533
|
+
**generator.Generator.TRANSFORMS,
|
|
534
|
+
exp.Group: transforms.preprocess([transforms.unalias_group]),
|
|
535
|
+
exp.Property: property_sql,
|
|
536
|
+
exp.AnyValue: rename_func("FIRST"),
|
|
537
|
+
exp.ApproxDistinct: approx_count_distinct_sql,
|
|
538
|
+
exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
|
|
539
|
+
exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
|
|
540
|
+
exp.ArrayConcat: rename_func("CONCAT"),
|
|
541
|
+
exp.ArrayToString: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
|
|
542
|
+
exp.ArraySort: _array_sort_sql,
|
|
543
|
+
exp.With: no_recursive_cte_sql,
|
|
544
|
+
exp.DateAdd: _add_date_sql,
|
|
545
|
+
exp.DateDiff: _date_diff_sql,
|
|
546
|
+
exp.DateStrToDate: datestrtodate_sql,
|
|
547
|
+
exp.DateSub: _add_date_sql,
|
|
548
|
+
exp.DateToDi: lambda self,
|
|
549
|
+
e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
|
|
550
|
+
exp.DiToDate: lambda self,
|
|
551
|
+
e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
|
|
552
|
+
exp.FileFormatProperty: lambda self,
|
|
553
|
+
e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
|
|
554
|
+
exp.StorageHandlerProperty: lambda self, e: f"STORED BY {self.sql(e, 'this')}",
|
|
555
|
+
exp.FromBase64: rename_func("UNBASE64"),
|
|
556
|
+
exp.GenerateSeries: sequence_sql,
|
|
557
|
+
exp.GenerateDateArray: sequence_sql,
|
|
558
|
+
exp.If: if_sql(),
|
|
559
|
+
exp.ILike: no_ilike_sql,
|
|
560
|
+
exp.IntDiv: lambda self, e: self.binary(e, "DIV"),
|
|
561
|
+
exp.IsNan: rename_func("ISNAN"),
|
|
562
|
+
exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression),
|
|
563
|
+
exp.JSONExtractScalar: lambda self, e: self.func(
|
|
564
|
+
"GET_JSON_OBJECT", e.this, e.expression
|
|
565
|
+
),
|
|
566
|
+
exp.JSONFormat: _json_format_sql,
|
|
567
|
+
exp.Left: left_to_substring_sql,
|
|
568
|
+
exp.Map: var_map_sql,
|
|
569
|
+
exp.Max: max_or_greatest,
|
|
570
|
+
exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
|
|
571
|
+
exp.Min: min_or_least,
|
|
572
|
+
exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
|
|
573
|
+
exp.NotNullColumnConstraint: lambda _, e: (
|
|
574
|
+
"" if e.args.get("allow_null") else "NOT NULL"
|
|
575
|
+
),
|
|
576
|
+
exp.VarMap: var_map_sql,
|
|
577
|
+
exp.Create: preprocess(
|
|
578
|
+
[
|
|
579
|
+
remove_unique_constraints,
|
|
580
|
+
ctas_with_tmp_tables_to_create_tmp_view,
|
|
581
|
+
move_schema_columns_to_partitioned_by,
|
|
582
|
+
]
|
|
583
|
+
),
|
|
584
|
+
exp.Quantile: rename_func("PERCENTILE"),
|
|
585
|
+
exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
|
|
586
|
+
exp.RegexpExtract: regexp_extract_sql,
|
|
587
|
+
exp.RegexpExtractAll: regexp_extract_sql,
|
|
588
|
+
exp.RegexpReplace: regexp_replace_sql,
|
|
589
|
+
exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
|
|
590
|
+
exp.RegexpSplit: rename_func("SPLIT"),
|
|
591
|
+
exp.Right: right_to_substring_sql,
|
|
592
|
+
exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
|
|
593
|
+
exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
|
|
594
|
+
exp.Split: lambda self, e: self.func(
|
|
595
|
+
"SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression, "'\\\\E'")
|
|
596
|
+
),
|
|
597
|
+
exp.Select: transforms.preprocess(
|
|
598
|
+
[
|
|
599
|
+
transforms.eliminate_qualify,
|
|
600
|
+
transforms.eliminate_distinct_on,
|
|
601
|
+
partial(transforms.unnest_to_explode, unnest_using_arrays_zip=False),
|
|
602
|
+
transforms.any_to_exists,
|
|
603
|
+
]
|
|
604
|
+
),
|
|
605
|
+
exp.StrPosition: lambda self, e: strposition_sql(
|
|
606
|
+
self, e, func_name="LOCATE", supports_position=True
|
|
607
|
+
),
|
|
608
|
+
exp.StrToDate: _str_to_date_sql,
|
|
609
|
+
exp.StrToTime: _str_to_time_sql,
|
|
610
|
+
exp.StrToUnix: _str_to_unix_sql,
|
|
611
|
+
exp.StructExtract: struct_extract_sql,
|
|
612
|
+
exp.StarMap: rename_func("MAP"),
|
|
613
|
+
exp.Table: transforms.preprocess([transforms.unnest_generate_series]),
|
|
614
|
+
exp.TimeStrToDate: rename_func("TO_DATE"),
|
|
615
|
+
exp.TimeStrToTime: timestrtotime_sql,
|
|
616
|
+
exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
|
|
617
|
+
exp.TimestampTrunc: lambda self, e: self.func("TRUNC", e.this, unit_to_str(e)),
|
|
618
|
+
exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
|
|
619
|
+
exp.ToBase64: rename_func("BASE64"),
|
|
620
|
+
exp.TsOrDiToDi: lambda self,
|
|
621
|
+
e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
|
|
622
|
+
exp.TsOrDsAdd: _add_date_sql,
|
|
623
|
+
exp.TsOrDsDiff: _date_diff_sql,
|
|
624
|
+
exp.TsOrDsToDate: _to_date_sql,
|
|
625
|
+
exp.TryCast: no_trycast_sql,
|
|
626
|
+
exp.Unicode: rename_func("ASCII"),
|
|
627
|
+
exp.UnixToStr: lambda self, e: self.func(
|
|
628
|
+
"FROM_UNIXTIME", e.this, time_format("hive")(self, e)
|
|
629
|
+
),
|
|
630
|
+
exp.UnixToTime: _unix_to_time_sql,
|
|
631
|
+
exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
|
|
632
|
+
exp.Unnest: rename_func("EXPLODE"),
|
|
633
|
+
exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
|
|
634
|
+
exp.NumberToStr: rename_func("FORMAT_NUMBER"),
|
|
635
|
+
exp.National: lambda self, e: self.national_sql(e, prefix=""),
|
|
636
|
+
exp.ClusteredColumnConstraint: lambda self,
|
|
637
|
+
e: f"({self.expressions(e, 'this', indent=False)})",
|
|
638
|
+
exp.NonClusteredColumnConstraint: lambda self,
|
|
639
|
+
e: f"({self.expressions(e, 'this', indent=False)})",
|
|
640
|
+
exp.NotForReplicationColumnConstraint: lambda *_: "",
|
|
641
|
+
exp.OnProperty: lambda *_: "",
|
|
642
|
+
exp.PartitionedByBucket: lambda self, e: self.func("BUCKET", e.expression, e.this),
|
|
643
|
+
exp.PartitionByTruncate: lambda self, e: self.func("TRUNCATE", e.expression, e.this),
|
|
644
|
+
exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY",
|
|
645
|
+
exp.WeekOfYear: rename_func("WEEKOFYEAR"),
|
|
646
|
+
exp.DayOfMonth: rename_func("DAYOFMONTH"),
|
|
647
|
+
exp.DayOfWeek: rename_func("DAYOFWEEK"),
|
|
648
|
+
exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
|
|
649
|
+
rename_func("LEVENSHTEIN")
|
|
650
|
+
),
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
PROPERTIES_LOCATION = {
|
|
654
|
+
**generator.Generator.PROPERTIES_LOCATION,
|
|
655
|
+
exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
|
|
656
|
+
exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
|
|
657
|
+
exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
|
|
658
|
+
exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
def unnest_sql(self, expression: exp.Unnest) -> str:
|
|
662
|
+
return rename_func("EXPLODE")(self, expression)
|
|
663
|
+
|
|
664
|
+
def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
|
|
665
|
+
if isinstance(expression.this, exp.JSONPathWildcard):
|
|
666
|
+
self.unsupported("Unsupported wildcard in JSONPathKey expression")
|
|
667
|
+
return ""
|
|
668
|
+
|
|
669
|
+
return super()._jsonpathkey_sql(expression)
|
|
670
|
+
|
|
671
|
+
def parameter_sql(self, expression: exp.Parameter) -> str:
|
|
672
|
+
this = self.sql(expression, "this")
|
|
673
|
+
expression_sql = self.sql(expression, "expression")
|
|
674
|
+
|
|
675
|
+
parent = expression.parent
|
|
676
|
+
this = f"{this}:{expression_sql}" if expression_sql else this
|
|
677
|
+
|
|
678
|
+
if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
|
|
679
|
+
# We need to produce SET key = value instead of SET ${key} = value
|
|
680
|
+
return this
|
|
681
|
+
|
|
682
|
+
return f"${{{this}}}"
|
|
683
|
+
|
|
684
|
+
def schema_sql(self, expression: exp.Schema) -> str:
|
|
685
|
+
for ordered in expression.find_all(exp.Ordered):
|
|
686
|
+
if ordered.args.get("desc") is False:
|
|
687
|
+
ordered.set("desc", None)
|
|
688
|
+
|
|
689
|
+
return super().schema_sql(expression)
|
|
690
|
+
|
|
691
|
+
def constraint_sql(self, expression: exp.Constraint) -> str:
|
|
692
|
+
for prop in list(expression.find_all(exp.Properties)):
|
|
693
|
+
prop.pop()
|
|
694
|
+
|
|
695
|
+
this = self.sql(expression, "this")
|
|
696
|
+
expressions = self.expressions(expression, sep=" ", flat=True)
|
|
697
|
+
return f"CONSTRAINT {this} {expressions}"
|
|
698
|
+
|
|
699
|
+
def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
|
|
700
|
+
serde_props = self.sql(expression, "serde_properties")
|
|
701
|
+
serde_props = f" {serde_props}" if serde_props else ""
|
|
702
|
+
return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
|
|
703
|
+
|
|
704
|
+
def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
|
|
705
|
+
return self.func(
|
|
706
|
+
"COLLECT_LIST",
|
|
707
|
+
expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
def datatype_sql(self, expression: exp.DataType) -> str:
|
|
711
|
+
if expression.this in self.PARAMETERIZABLE_TEXT_TYPES and (
|
|
712
|
+
not expression.expressions or expression.expressions[0].name == "MAX"
|
|
713
|
+
):
|
|
714
|
+
expression = exp.DataType.build("text")
|
|
715
|
+
elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
|
|
716
|
+
expression.set("this", exp.DataType.Type.VARCHAR)
|
|
717
|
+
elif expression.this in exp.DataType.TEMPORAL_TYPES:
|
|
718
|
+
expression = exp.DataType.build(expression.this)
|
|
719
|
+
elif expression.is_type("float"):
|
|
720
|
+
size_expression = expression.find(exp.DataTypeParam)
|
|
721
|
+
if size_expression:
|
|
722
|
+
size = int(size_expression.name)
|
|
723
|
+
expression = (
|
|
724
|
+
exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
return super().datatype_sql(expression)
|
|
728
|
+
|
|
729
|
+
def version_sql(self, expression: exp.Version) -> str:
|
|
730
|
+
sql = super().version_sql(expression)
|
|
731
|
+
return sql.replace("FOR ", "", 1)
|
|
732
|
+
|
|
733
|
+
def struct_sql(self, expression: exp.Struct) -> str:
|
|
734
|
+
values = []
|
|
735
|
+
|
|
736
|
+
for i, e in enumerate(expression.expressions):
|
|
737
|
+
if isinstance(e, exp.PropertyEQ):
|
|
738
|
+
self.unsupported("Hive does not support named structs.")
|
|
739
|
+
values.append(e.expression)
|
|
740
|
+
else:
|
|
741
|
+
values.append(e)
|
|
742
|
+
|
|
743
|
+
return self.func("STRUCT", *values)
|
|
744
|
+
|
|
745
|
+
def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
|
|
746
|
+
return super().columndef_sql(
|
|
747
|
+
expression,
|
|
748
|
+
sep=(
|
|
749
|
+
": "
|
|
750
|
+
if isinstance(expression.parent, exp.DataType)
|
|
751
|
+
and expression.parent.is_type("struct")
|
|
752
|
+
else sep
|
|
753
|
+
),
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
def alterset_sql(self, expression: exp.AlterSet) -> str:
|
|
757
|
+
exprs = self.expressions(expression, flat=True)
|
|
758
|
+
exprs = f" {exprs}" if exprs else ""
|
|
759
|
+
location = self.sql(expression, "location")
|
|
760
|
+
location = f" LOCATION {location}" if location else ""
|
|
761
|
+
file_format = self.expressions(expression, key="file_format", flat=True, sep=" ")
|
|
762
|
+
file_format = f" FILEFORMAT {file_format}" if file_format else ""
|
|
763
|
+
serde = self.sql(expression, "serde")
|
|
764
|
+
serde = f" SERDE {serde}" if serde else ""
|
|
765
|
+
tags = self.expressions(expression, key="tag", flat=True, sep="")
|
|
766
|
+
tags = f" TAGS {tags}" if tags else ""
|
|
767
|
+
|
|
768
|
+
return f"SET{serde}{exprs}{location}{file_format}{tags}"
|
|
769
|
+
|
|
770
|
+
def serdeproperties_sql(self, expression: exp.SerdeProperties) -> str:
|
|
771
|
+
prefix = "WITH " if expression.args.get("with") else ""
|
|
772
|
+
exprs = self.expressions(expression, flat=True)
|
|
773
|
+
|
|
774
|
+
return f"{prefix}SERDEPROPERTIES ({exprs})"
|
|
775
|
+
|
|
776
|
+
def exists_sql(self, expression: exp.Exists) -> str:
|
|
777
|
+
if expression.expression:
|
|
778
|
+
return self.function_fallback_sql(expression)
|
|
779
|
+
|
|
780
|
+
return super().exists_sql(expression)
|
|
781
|
+
|
|
782
|
+
def timetostr_sql(self, expression: exp.TimeToStr) -> str:
|
|
783
|
+
this = expression.this
|
|
784
|
+
if isinstance(this, exp.TimeStrToTime):
|
|
785
|
+
this = this.this
|
|
786
|
+
|
|
787
|
+
return self.func("DATE_FORMAT", this, self.format_time(expression))
|