altimate-code 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/bin/altimate +6 -0
- package/bin/altimate-code +6 -0
- package/dbt-tools/bin/altimate-dbt +2 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
- package/dbt-tools/dist/index.js +23859 -0
- package/package.json +13 -13
- package/postinstall.mjs +42 -0
- package/skills/altimate-setup/SKILL.md +31 -0
package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py
ADDED
|
@@ -0,0 +1,1393 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import typing as t
|
|
3
|
+
import datetime
|
|
4
|
+
from sqlglot import exp, generator, parser, tokens
|
|
5
|
+
from sqlglot.dialects.dialect import (
|
|
6
|
+
Dialect,
|
|
7
|
+
NormalizationStrategy,
|
|
8
|
+
arg_max_or_min_no_count,
|
|
9
|
+
build_date_delta,
|
|
10
|
+
build_formatted_time,
|
|
11
|
+
inline_array_sql,
|
|
12
|
+
json_extract_segments,
|
|
13
|
+
json_path_key_only_name,
|
|
14
|
+
length_or_char_length_sql,
|
|
15
|
+
no_pivot_sql,
|
|
16
|
+
build_json_extract_path,
|
|
17
|
+
rename_func,
|
|
18
|
+
remove_from_array_using_filter,
|
|
19
|
+
sha256_sql,
|
|
20
|
+
strposition_sql,
|
|
21
|
+
var_map_sql,
|
|
22
|
+
timestamptrunc_sql,
|
|
23
|
+
unit_to_var,
|
|
24
|
+
trim_sql,
|
|
25
|
+
)
|
|
26
|
+
from sqlglot.generator import Generator
|
|
27
|
+
from sqlglot.helper import is_int, seq_get
|
|
28
|
+
from sqlglot.tokens import Token, TokenType
|
|
29
|
+
from sqlglot.generator import unsupported_args
|
|
30
|
+
|
|
31
|
+
DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _build_date_format(args: t.List) -> exp.TimeToStr:
|
|
35
|
+
expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args)
|
|
36
|
+
|
|
37
|
+
timezone = seq_get(args, 2)
|
|
38
|
+
if timezone:
|
|
39
|
+
expr.set("zone", timezone)
|
|
40
|
+
|
|
41
|
+
return expr
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str:
|
|
45
|
+
scale = expression.args.get("scale")
|
|
46
|
+
timestamp = expression.this
|
|
47
|
+
|
|
48
|
+
if scale in (None, exp.UnixToTime.SECONDS):
|
|
49
|
+
return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT))
|
|
50
|
+
if scale == exp.UnixToTime.MILLIS:
|
|
51
|
+
return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT))
|
|
52
|
+
if scale == exp.UnixToTime.MICROS:
|
|
53
|
+
return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT))
|
|
54
|
+
if scale == exp.UnixToTime.NANOS:
|
|
55
|
+
return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT))
|
|
56
|
+
|
|
57
|
+
return self.func(
|
|
58
|
+
"fromUnixTimestamp",
|
|
59
|
+
exp.cast(
|
|
60
|
+
exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT
|
|
61
|
+
),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _lower_func(sql: str) -> str:
|
|
66
|
+
index = sql.index("(")
|
|
67
|
+
return sql[:index].lower() + sql[index:]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str:
|
|
71
|
+
quantile = expression.args["quantile"]
|
|
72
|
+
args = f"({self.sql(expression, 'this')})"
|
|
73
|
+
|
|
74
|
+
if isinstance(quantile, exp.Array):
|
|
75
|
+
func = self.func("quantiles", *quantile)
|
|
76
|
+
else:
|
|
77
|
+
func = self.func("quantile", quantile)
|
|
78
|
+
|
|
79
|
+
return func + args
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc:
|
|
83
|
+
if len(args) == 1:
|
|
84
|
+
return exp.CountIf(this=seq_get(args, 0))
|
|
85
|
+
|
|
86
|
+
return exp.CombinedAggFunc(this="countIf", expressions=args)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous:
|
|
90
|
+
if len(args) == 3:
|
|
91
|
+
return exp.Anonymous(this="STR_TO_DATE", expressions=args)
|
|
92
|
+
|
|
93
|
+
strtodate = exp.StrToDate.from_arg_list(args)
|
|
94
|
+
return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME))
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]:
|
|
98
|
+
def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str:
|
|
99
|
+
if not expression.unit:
|
|
100
|
+
return rename_func(name)(self, expression)
|
|
101
|
+
|
|
102
|
+
return self.func(
|
|
103
|
+
name,
|
|
104
|
+
unit_to_var(expression),
|
|
105
|
+
expression.expression,
|
|
106
|
+
expression.this,
|
|
107
|
+
expression.args.get("zone"),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return _delta_sql
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime):
|
|
114
|
+
ts = expression.this
|
|
115
|
+
|
|
116
|
+
tz = expression.args.get("zone")
|
|
117
|
+
if tz and isinstance(ts, exp.Literal):
|
|
118
|
+
# Clickhouse will not accept timestamps that include a UTC offset, so we must remove them.
|
|
119
|
+
# The first step to removing is parsing the string with `datetime.datetime.fromisoformat`.
|
|
120
|
+
#
|
|
121
|
+
# In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit)
|
|
122
|
+
# or microsecond (6 digit) precision. It will error if passed any other number of fractional
|
|
123
|
+
# digits, so we extract the fractional seconds and pad to 6 digits before parsing.
|
|
124
|
+
ts_string = ts.name.strip()
|
|
125
|
+
|
|
126
|
+
# separate [date and time] from [fractional seconds and UTC offset]
|
|
127
|
+
ts_parts = ts_string.split(".")
|
|
128
|
+
if len(ts_parts) == 2:
|
|
129
|
+
# separate fractional seconds and UTC offset
|
|
130
|
+
offset_sep = "+" if "+" in ts_parts[1] else "-"
|
|
131
|
+
ts_frac_parts = ts_parts[1].split(offset_sep)
|
|
132
|
+
num_frac_parts = len(ts_frac_parts)
|
|
133
|
+
|
|
134
|
+
# pad to 6 digits if fractional seconds present
|
|
135
|
+
ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0")
|
|
136
|
+
ts_string = "".join(
|
|
137
|
+
[
|
|
138
|
+
ts_parts[0], # date and time
|
|
139
|
+
".",
|
|
140
|
+
ts_frac_parts[0], # fractional seconds
|
|
141
|
+
offset_sep if num_frac_parts > 1 else "",
|
|
142
|
+
ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present)
|
|
143
|
+
]
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14'
|
|
147
|
+
# this is because Clickhouse encodes the timezone as a data type parameter and throws an error if
|
|
148
|
+
# it's part of the timestamp string
|
|
149
|
+
ts_without_tz = (
|
|
150
|
+
datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ")
|
|
151
|
+
)
|
|
152
|
+
ts = exp.Literal.string(ts_without_tz)
|
|
153
|
+
|
|
154
|
+
# Non-nullable DateTime64 with microsecond precision
|
|
155
|
+
expressions = [exp.DataTypeParam(this=tz)] if tz else []
|
|
156
|
+
datatype = exp.DataType.build(
|
|
157
|
+
exp.DataType.Type.DATETIME64,
|
|
158
|
+
expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions],
|
|
159
|
+
nullable=False,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return self.sql(exp.cast(ts, datatype, dialect=self.dialect))
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str:
|
|
166
|
+
if not (expression.parent and expression.parent.arg_key == "settings"):
|
|
167
|
+
return _lower_func(var_map_sql(self, expression))
|
|
168
|
+
|
|
169
|
+
keys = expression.args.get("keys")
|
|
170
|
+
values = expression.args.get("values")
|
|
171
|
+
|
|
172
|
+
if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
|
|
173
|
+
self.unsupported("Cannot convert array columns into map.")
|
|
174
|
+
return ""
|
|
175
|
+
|
|
176
|
+
args = []
|
|
177
|
+
for key, value in zip(keys.expressions, values.expressions):
|
|
178
|
+
args.append(f"{self.sql(key)}: {self.sql(value)}")
|
|
179
|
+
|
|
180
|
+
csv_args = ", ".join(args)
|
|
181
|
+
|
|
182
|
+
return f"{{{csv_args}}}"
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class ClickHouse(Dialect):
|
|
186
|
+
NORMALIZE_FUNCTIONS: bool | str = False
|
|
187
|
+
NULL_ORDERING = "nulls_are_last"
|
|
188
|
+
SUPPORTS_USER_DEFINED_TYPES = False
|
|
189
|
+
SAFE_DIVISION = True
|
|
190
|
+
LOG_BASE_FIRST: t.Optional[bool] = None
|
|
191
|
+
FORCE_EARLY_ALIAS_REF_EXPANSION = True
|
|
192
|
+
PRESERVE_ORIGINAL_NAMES = True
|
|
193
|
+
NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True
|
|
194
|
+
IDENTIFIERS_CAN_START_WITH_DIGIT = True
|
|
195
|
+
HEX_STRING_IS_INTEGER_TYPE = True
|
|
196
|
+
|
|
197
|
+
# https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779
|
|
198
|
+
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
|
|
199
|
+
|
|
200
|
+
UNESCAPED_SEQUENCES = {
|
|
201
|
+
"\\0": "\0",
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"}
|
|
205
|
+
|
|
206
|
+
SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = {
|
|
207
|
+
exp.Except: False,
|
|
208
|
+
exp.Intersect: False,
|
|
209
|
+
exp.Union: None,
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]:
|
|
213
|
+
# Clickhouse allows VALUES to have an embedded structure e.g:
|
|
214
|
+
# VALUES('person String, place String', ('Noah', 'Paris'), ...)
|
|
215
|
+
# In this case, we don't want to qualify the columns
|
|
216
|
+
values = expression.expressions[0].expressions
|
|
217
|
+
|
|
218
|
+
structure = (
|
|
219
|
+
values[0]
|
|
220
|
+
if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple))
|
|
221
|
+
else None
|
|
222
|
+
)
|
|
223
|
+
if structure:
|
|
224
|
+
# Split each column definition into the column name e.g:
|
|
225
|
+
# 'person String, place String' -> ['person', 'place']
|
|
226
|
+
structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")]
|
|
227
|
+
column_aliases = [
|
|
228
|
+
exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs
|
|
229
|
+
]
|
|
230
|
+
else:
|
|
231
|
+
# Default column aliases in CH are "c1", "c2", etc.
|
|
232
|
+
column_aliases = [
|
|
233
|
+
exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions))
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
return column_aliases
|
|
237
|
+
|
|
238
|
+
class Tokenizer(tokens.Tokenizer):
|
|
239
|
+
COMMENTS = ["--", "#", "#!", ("/*", "*/")]
|
|
240
|
+
IDENTIFIERS = ['"', "`"]
|
|
241
|
+
IDENTIFIER_ESCAPES = ["\\"]
|
|
242
|
+
STRING_ESCAPES = ["'", "\\"]
|
|
243
|
+
BIT_STRINGS = [("0b", "")]
|
|
244
|
+
HEX_STRINGS = [("0x", ""), ("0X", "")]
|
|
245
|
+
HEREDOC_STRINGS = ["$"]
|
|
246
|
+
|
|
247
|
+
KEYWORDS = {
|
|
248
|
+
**tokens.Tokenizer.KEYWORDS,
|
|
249
|
+
".:": TokenType.DOTCOLON,
|
|
250
|
+
"ATTACH": TokenType.COMMAND,
|
|
251
|
+
"DATE32": TokenType.DATE32,
|
|
252
|
+
"DATETIME64": TokenType.DATETIME64,
|
|
253
|
+
"DICTIONARY": TokenType.DICTIONARY,
|
|
254
|
+
"DYNAMIC": TokenType.DYNAMIC,
|
|
255
|
+
"ENUM8": TokenType.ENUM8,
|
|
256
|
+
"ENUM16": TokenType.ENUM16,
|
|
257
|
+
"EXCHANGE": TokenType.COMMAND,
|
|
258
|
+
"FINAL": TokenType.FINAL,
|
|
259
|
+
"FIXEDSTRING": TokenType.FIXEDSTRING,
|
|
260
|
+
"FLOAT32": TokenType.FLOAT,
|
|
261
|
+
"FLOAT64": TokenType.DOUBLE,
|
|
262
|
+
"GLOBAL": TokenType.GLOBAL,
|
|
263
|
+
"LOWCARDINALITY": TokenType.LOWCARDINALITY,
|
|
264
|
+
"MAP": TokenType.MAP,
|
|
265
|
+
"NESTED": TokenType.NESTED,
|
|
266
|
+
"NOTHING": TokenType.NOTHING,
|
|
267
|
+
"SAMPLE": TokenType.TABLE_SAMPLE,
|
|
268
|
+
"TUPLE": TokenType.STRUCT,
|
|
269
|
+
"UINT16": TokenType.USMALLINT,
|
|
270
|
+
"UINT32": TokenType.UINT,
|
|
271
|
+
"UINT64": TokenType.UBIGINT,
|
|
272
|
+
"UINT8": TokenType.UTINYINT,
|
|
273
|
+
"IPV4": TokenType.IPV4,
|
|
274
|
+
"IPV6": TokenType.IPV6,
|
|
275
|
+
"POINT": TokenType.POINT,
|
|
276
|
+
"RING": TokenType.RING,
|
|
277
|
+
"LINESTRING": TokenType.LINESTRING,
|
|
278
|
+
"MULTILINESTRING": TokenType.MULTILINESTRING,
|
|
279
|
+
"POLYGON": TokenType.POLYGON,
|
|
280
|
+
"MULTIPOLYGON": TokenType.MULTIPOLYGON,
|
|
281
|
+
"AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION,
|
|
282
|
+
"SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION,
|
|
283
|
+
"SYSTEM": TokenType.COMMAND,
|
|
284
|
+
"PREWHERE": TokenType.PREWHERE,
|
|
285
|
+
}
|
|
286
|
+
KEYWORDS.pop("/*+")
|
|
287
|
+
|
|
288
|
+
SINGLE_TOKENS = {
|
|
289
|
+
**tokens.Tokenizer.SINGLE_TOKENS,
|
|
290
|
+
"$": TokenType.HEREDOC_STRING,
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
class Parser(parser.Parser):
|
|
294
|
+
# Tested in ClickHouse's playground, it seems that the following two queries do the same thing
|
|
295
|
+
# * select x from t1 union all select x from t2 limit 1;
|
|
296
|
+
# * select x from t1 union all (select x from t2 limit 1);
|
|
297
|
+
MODIFIERS_ATTACHED_TO_SET_OP = False
|
|
298
|
+
INTERVAL_SPANS = False
|
|
299
|
+
OPTIONAL_ALIAS_TOKEN_CTE = False
|
|
300
|
+
|
|
301
|
+
FUNCTIONS = {
|
|
302
|
+
**parser.Parser.FUNCTIONS,
|
|
303
|
+
"ANY": exp.AnyValue.from_arg_list,
|
|
304
|
+
"ARRAYSUM": exp.ArraySum.from_arg_list,
|
|
305
|
+
"COUNTIF": _build_count_if,
|
|
306
|
+
"DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
|
|
307
|
+
"DATEADD": build_date_delta(exp.DateAdd, default_unit=None),
|
|
308
|
+
"DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
|
|
309
|
+
"DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
|
|
310
|
+
"DATE_FORMAT": _build_date_format,
|
|
311
|
+
"DATE_SUB": build_date_delta(exp.DateSub, default_unit=None),
|
|
312
|
+
"DATESUB": build_date_delta(exp.DateSub, default_unit=None),
|
|
313
|
+
"FORMATDATETIME": _build_date_format,
|
|
314
|
+
"JSONEXTRACTSTRING": build_json_extract_path(
|
|
315
|
+
exp.JSONExtractScalar, zero_based_indexing=False
|
|
316
|
+
),
|
|
317
|
+
"LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
|
|
318
|
+
"MAP": parser.build_var_map,
|
|
319
|
+
"MATCH": exp.RegexpLike.from_arg_list,
|
|
320
|
+
"RANDCANONICAL": exp.Rand.from_arg_list,
|
|
321
|
+
"STR_TO_DATE": _build_str_to_date,
|
|
322
|
+
"TUPLE": exp.Struct.from_arg_list,
|
|
323
|
+
"TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None),
|
|
324
|
+
"TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None),
|
|
325
|
+
"TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None),
|
|
326
|
+
"TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None),
|
|
327
|
+
"UNIQ": exp.ApproxDistinct.from_arg_list,
|
|
328
|
+
"XOR": lambda args: exp.Xor(expressions=args),
|
|
329
|
+
"MD5": exp.MD5Digest.from_arg_list,
|
|
330
|
+
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
|
|
331
|
+
"SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
|
|
332
|
+
"EDITDISTANCE": exp.Levenshtein.from_arg_list,
|
|
333
|
+
"LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
|
|
334
|
+
}
|
|
335
|
+
FUNCTIONS.pop("TRANSFORM")
|
|
336
|
+
|
|
337
|
+
AGG_FUNCTIONS = {
|
|
338
|
+
"count",
|
|
339
|
+
"min",
|
|
340
|
+
"max",
|
|
341
|
+
"sum",
|
|
342
|
+
"avg",
|
|
343
|
+
"any",
|
|
344
|
+
"stddevPop",
|
|
345
|
+
"stddevSamp",
|
|
346
|
+
"varPop",
|
|
347
|
+
"varSamp",
|
|
348
|
+
"corr",
|
|
349
|
+
"covarPop",
|
|
350
|
+
"covarSamp",
|
|
351
|
+
"entropy",
|
|
352
|
+
"exponentialMovingAverage",
|
|
353
|
+
"intervalLengthSum",
|
|
354
|
+
"kolmogorovSmirnovTest",
|
|
355
|
+
"mannWhitneyUTest",
|
|
356
|
+
"median",
|
|
357
|
+
"rankCorr",
|
|
358
|
+
"sumKahan",
|
|
359
|
+
"studentTTest",
|
|
360
|
+
"welchTTest",
|
|
361
|
+
"anyHeavy",
|
|
362
|
+
"anyLast",
|
|
363
|
+
"boundingRatio",
|
|
364
|
+
"first_value",
|
|
365
|
+
"last_value",
|
|
366
|
+
"argMin",
|
|
367
|
+
"argMax",
|
|
368
|
+
"avgWeighted",
|
|
369
|
+
"topK",
|
|
370
|
+
"topKWeighted",
|
|
371
|
+
"deltaSum",
|
|
372
|
+
"deltaSumTimestamp",
|
|
373
|
+
"groupArray",
|
|
374
|
+
"groupArrayLast",
|
|
375
|
+
"groupUniqArray",
|
|
376
|
+
"groupArrayInsertAt",
|
|
377
|
+
"groupArrayMovingAvg",
|
|
378
|
+
"groupArrayMovingSum",
|
|
379
|
+
"groupArraySample",
|
|
380
|
+
"groupBitAnd",
|
|
381
|
+
"groupBitOr",
|
|
382
|
+
"groupBitXor",
|
|
383
|
+
"groupBitmap",
|
|
384
|
+
"groupBitmapAnd",
|
|
385
|
+
"groupBitmapOr",
|
|
386
|
+
"groupBitmapXor",
|
|
387
|
+
"sumWithOverflow",
|
|
388
|
+
"sumMap",
|
|
389
|
+
"minMap",
|
|
390
|
+
"maxMap",
|
|
391
|
+
"skewSamp",
|
|
392
|
+
"skewPop",
|
|
393
|
+
"kurtSamp",
|
|
394
|
+
"kurtPop",
|
|
395
|
+
"uniq",
|
|
396
|
+
"uniqExact",
|
|
397
|
+
"uniqCombined",
|
|
398
|
+
"uniqCombined64",
|
|
399
|
+
"uniqHLL12",
|
|
400
|
+
"uniqTheta",
|
|
401
|
+
"quantile",
|
|
402
|
+
"quantiles",
|
|
403
|
+
"quantileExact",
|
|
404
|
+
"quantilesExact",
|
|
405
|
+
"quantileExactLow",
|
|
406
|
+
"quantilesExactLow",
|
|
407
|
+
"quantileExactHigh",
|
|
408
|
+
"quantilesExactHigh",
|
|
409
|
+
"quantileExactWeighted",
|
|
410
|
+
"quantilesExactWeighted",
|
|
411
|
+
"quantileTiming",
|
|
412
|
+
"quantilesTiming",
|
|
413
|
+
"quantileTimingWeighted",
|
|
414
|
+
"quantilesTimingWeighted",
|
|
415
|
+
"quantileDeterministic",
|
|
416
|
+
"quantilesDeterministic",
|
|
417
|
+
"quantileTDigest",
|
|
418
|
+
"quantilesTDigest",
|
|
419
|
+
"quantileTDigestWeighted",
|
|
420
|
+
"quantilesTDigestWeighted",
|
|
421
|
+
"quantileBFloat16",
|
|
422
|
+
"quantilesBFloat16",
|
|
423
|
+
"quantileBFloat16Weighted",
|
|
424
|
+
"quantilesBFloat16Weighted",
|
|
425
|
+
"simpleLinearRegression",
|
|
426
|
+
"stochasticLinearRegression",
|
|
427
|
+
"stochasticLogisticRegression",
|
|
428
|
+
"categoricalInformationValue",
|
|
429
|
+
"contingency",
|
|
430
|
+
"cramersV",
|
|
431
|
+
"cramersVBiasCorrected",
|
|
432
|
+
"theilsU",
|
|
433
|
+
"maxIntersections",
|
|
434
|
+
"maxIntersectionsPosition",
|
|
435
|
+
"meanZTest",
|
|
436
|
+
"quantileInterpolatedWeighted",
|
|
437
|
+
"quantilesInterpolatedWeighted",
|
|
438
|
+
"quantileGK",
|
|
439
|
+
"quantilesGK",
|
|
440
|
+
"sparkBar",
|
|
441
|
+
"sumCount",
|
|
442
|
+
"largestTriangleThreeBuckets",
|
|
443
|
+
"histogram",
|
|
444
|
+
"sequenceMatch",
|
|
445
|
+
"sequenceCount",
|
|
446
|
+
"windowFunnel",
|
|
447
|
+
"retention",
|
|
448
|
+
"uniqUpTo",
|
|
449
|
+
"sequenceNextNode",
|
|
450
|
+
"exponentialTimeDecayedAvg",
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
AGG_FUNCTIONS_SUFFIXES = [
|
|
454
|
+
"If",
|
|
455
|
+
"Array",
|
|
456
|
+
"ArrayIf",
|
|
457
|
+
"Map",
|
|
458
|
+
"SimpleState",
|
|
459
|
+
"State",
|
|
460
|
+
"Merge",
|
|
461
|
+
"MergeState",
|
|
462
|
+
"ForEach",
|
|
463
|
+
"Distinct",
|
|
464
|
+
"OrDefault",
|
|
465
|
+
"OrNull",
|
|
466
|
+
"Resample",
|
|
467
|
+
"ArgMin",
|
|
468
|
+
"ArgMax",
|
|
469
|
+
]
|
|
470
|
+
|
|
471
|
+
FUNC_TOKENS = {
|
|
472
|
+
*parser.Parser.FUNC_TOKENS,
|
|
473
|
+
TokenType.AND,
|
|
474
|
+
TokenType.OR,
|
|
475
|
+
TokenType.SET,
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT}
|
|
479
|
+
|
|
480
|
+
ID_VAR_TOKENS = {
|
|
481
|
+
*parser.Parser.ID_VAR_TOKENS,
|
|
482
|
+
TokenType.LIKE,
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
AGG_FUNC_MAPPING = (
|
|
486
|
+
lambda functions, suffixes: {
|
|
487
|
+
f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions
|
|
488
|
+
}
|
|
489
|
+
)(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES)
|
|
490
|
+
|
|
491
|
+
FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"}
|
|
492
|
+
|
|
493
|
+
FUNCTION_PARSERS = {
|
|
494
|
+
**parser.Parser.FUNCTION_PARSERS,
|
|
495
|
+
"ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()),
|
|
496
|
+
"QUANTILE": lambda self: self._parse_quantile(),
|
|
497
|
+
"MEDIAN": lambda self: self._parse_quantile(),
|
|
498
|
+
"COLUMNS": lambda self: self._parse_columns(),
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
FUNCTION_PARSERS.pop("MATCH")
|
|
502
|
+
|
|
503
|
+
PROPERTY_PARSERS = {
|
|
504
|
+
**parser.Parser.PROPERTY_PARSERS,
|
|
505
|
+
"ENGINE": lambda self: self._parse_engine_property(),
|
|
506
|
+
}
|
|
507
|
+
PROPERTY_PARSERS.pop("DYNAMIC")
|
|
508
|
+
|
|
509
|
+
NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy()
|
|
510
|
+
NO_PAREN_FUNCTION_PARSERS.pop("ANY")
|
|
511
|
+
|
|
512
|
+
NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy()
|
|
513
|
+
NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP)
|
|
514
|
+
|
|
515
|
+
RANGE_PARSERS = {
|
|
516
|
+
**parser.Parser.RANGE_PARSERS,
|
|
517
|
+
TokenType.GLOBAL: lambda self, this: self._parse_global_in(this),
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
# The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to
|
|
521
|
+
# the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler.
|
|
522
|
+
COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy()
|
|
523
|
+
COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER)
|
|
524
|
+
|
|
525
|
+
JOIN_KINDS = {
|
|
526
|
+
*parser.Parser.JOIN_KINDS,
|
|
527
|
+
TokenType.ANY,
|
|
528
|
+
TokenType.ASOF,
|
|
529
|
+
TokenType.ARRAY,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {
|
|
533
|
+
TokenType.ANY,
|
|
534
|
+
TokenType.ARRAY,
|
|
535
|
+
TokenType.FINAL,
|
|
536
|
+
TokenType.FORMAT,
|
|
537
|
+
TokenType.SETTINGS,
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {
|
|
541
|
+
TokenType.FORMAT,
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
LOG_DEFAULTS_TO_LN = True
|
|
545
|
+
|
|
546
|
+
QUERY_MODIFIER_PARSERS = {
|
|
547
|
+
**parser.Parser.QUERY_MODIFIER_PARSERS,
|
|
548
|
+
TokenType.SETTINGS: lambda self: (
|
|
549
|
+
"settings",
|
|
550
|
+
self._advance() or self._parse_csv(self._parse_assignment),
|
|
551
|
+
),
|
|
552
|
+
TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()),
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
CONSTRAINT_PARSERS = {
|
|
556
|
+
**parser.Parser.CONSTRAINT_PARSERS,
|
|
557
|
+
"INDEX": lambda self: self._parse_index_constraint(),
|
|
558
|
+
"CODEC": lambda self: self._parse_compress(),
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
ALTER_PARSERS = {
|
|
562
|
+
**parser.Parser.ALTER_PARSERS,
|
|
563
|
+
"REPLACE": lambda self: self._parse_alter_table_replace(),
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
SCHEMA_UNNAMED_CONSTRAINTS = {
|
|
567
|
+
*parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS,
|
|
568
|
+
"INDEX",
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
PLACEHOLDER_PARSERS = {
|
|
572
|
+
**parser.Parser.PLACEHOLDER_PARSERS,
|
|
573
|
+
TokenType.L_BRACE: lambda self: self._parse_query_parameter(),
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
def _parse_engine_property(self) -> exp.EngineProperty:
|
|
577
|
+
self._match(TokenType.EQ)
|
|
578
|
+
return self.expression(
|
|
579
|
+
exp.EngineProperty,
|
|
580
|
+
this=self._parse_field(any_token=True, anonymous_func=True),
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
# https://clickhouse.com/docs/en/sql-reference/statements/create/function
|
|
584
|
+
def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]:
|
|
585
|
+
return self._parse_lambda()
|
|
586
|
+
|
|
587
|
+
def _parse_types(
|
|
588
|
+
self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
|
|
589
|
+
) -> t.Optional[exp.Expression]:
|
|
590
|
+
dtype = super()._parse_types(
|
|
591
|
+
check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
|
|
592
|
+
)
|
|
593
|
+
if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True:
|
|
594
|
+
# Mark every type as non-nullable which is ClickHouse's default, unless it's
|
|
595
|
+
# already marked as nullable. This marker helps us transpile types from other
|
|
596
|
+
# dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))`
|
|
597
|
+
# from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would
|
|
598
|
+
# fail in ClickHouse without the `Nullable` type constructor.
|
|
599
|
+
dtype.set("nullable", False)
|
|
600
|
+
|
|
601
|
+
return dtype
|
|
602
|
+
|
|
603
|
+
def _parse_extract(self) -> exp.Extract | exp.Anonymous:
|
|
604
|
+
index = self._index
|
|
605
|
+
this = self._parse_bitwise()
|
|
606
|
+
if self._match(TokenType.FROM):
|
|
607
|
+
self._retreat(index)
|
|
608
|
+
return super()._parse_extract()
|
|
609
|
+
|
|
610
|
+
# We return Anonymous here because extract and regexpExtract have different semantics,
|
|
611
|
+
# so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g.,
|
|
612
|
+
# `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`.
|
|
613
|
+
#
|
|
614
|
+
# TODO: can we somehow convert the former into an equivalent `regexpExtract` call?
|
|
615
|
+
self._match(TokenType.COMMA)
|
|
616
|
+
return self.expression(
|
|
617
|
+
exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()]
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
def _parse_assignment(self) -> t.Optional[exp.Expression]:
|
|
621
|
+
this = super()._parse_assignment()
|
|
622
|
+
|
|
623
|
+
if self._match(TokenType.PLACEHOLDER):
|
|
624
|
+
return self.expression(
|
|
625
|
+
exp.If,
|
|
626
|
+
this=this,
|
|
627
|
+
true=self._parse_assignment(),
|
|
628
|
+
false=self._match(TokenType.COLON) and self._parse_assignment(),
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
return this
|
|
632
|
+
|
|
633
|
+
def _parse_query_parameter(self) -> t.Optional[exp.Expression]:
|
|
634
|
+
"""
|
|
635
|
+
Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier}
|
|
636
|
+
https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters
|
|
637
|
+
"""
|
|
638
|
+
index = self._index
|
|
639
|
+
|
|
640
|
+
this = self._parse_id_var()
|
|
641
|
+
self._match(TokenType.COLON)
|
|
642
|
+
kind = self._parse_types(check_func=False, allow_identifiers=False) or (
|
|
643
|
+
self._match_text_seq("IDENTIFIER") and "Identifier"
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
if not kind:
|
|
647
|
+
self._retreat(index)
|
|
648
|
+
return None
|
|
649
|
+
elif not self._match(TokenType.R_BRACE):
|
|
650
|
+
self.raise_error("Expecting }")
|
|
651
|
+
|
|
652
|
+
if isinstance(this, exp.Identifier) and not this.quoted:
|
|
653
|
+
this = exp.var(this.name)
|
|
654
|
+
|
|
655
|
+
return self.expression(exp.Placeholder, this=this, kind=kind)
|
|
656
|
+
|
|
657
|
+
def _parse_bracket(
|
|
658
|
+
self, this: t.Optional[exp.Expression] = None
|
|
659
|
+
) -> t.Optional[exp.Expression]:
|
|
660
|
+
l_brace = self._match(TokenType.L_BRACE, advance=False)
|
|
661
|
+
bracket = super()._parse_bracket(this)
|
|
662
|
+
|
|
663
|
+
if l_brace and isinstance(bracket, exp.Struct):
|
|
664
|
+
varmap = exp.VarMap(keys=exp.Array(), values=exp.Array())
|
|
665
|
+
for expression in bracket.expressions:
|
|
666
|
+
if not isinstance(expression, exp.PropertyEQ):
|
|
667
|
+
break
|
|
668
|
+
|
|
669
|
+
varmap.args["keys"].append("expressions", exp.Literal.string(expression.name))
|
|
670
|
+
varmap.args["values"].append("expressions", expression.expression)
|
|
671
|
+
|
|
672
|
+
return varmap
|
|
673
|
+
|
|
674
|
+
return bracket
|
|
675
|
+
|
|
676
|
+
def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In:
|
|
677
|
+
this = super()._parse_in(this)
|
|
678
|
+
this.set("is_global", is_global)
|
|
679
|
+
return this
|
|
680
|
+
|
|
681
|
+
def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In:
|
|
682
|
+
is_negated = self._match(TokenType.NOT)
|
|
683
|
+
this = self._match(TokenType.IN) and self._parse_in(this, is_global=True)
|
|
684
|
+
return self.expression(exp.Not, this=this) if is_negated else this
|
|
685
|
+
|
|
686
|
+
def _parse_table(
|
|
687
|
+
self,
|
|
688
|
+
schema: bool = False,
|
|
689
|
+
joins: bool = False,
|
|
690
|
+
alias_tokens: t.Optional[t.Collection[TokenType]] = None,
|
|
691
|
+
parse_bracket: bool = False,
|
|
692
|
+
is_db_reference: bool = False,
|
|
693
|
+
parse_partition: bool = False,
|
|
694
|
+
) -> t.Optional[exp.Expression]:
|
|
695
|
+
this = super()._parse_table(
|
|
696
|
+
schema=schema,
|
|
697
|
+
joins=joins,
|
|
698
|
+
alias_tokens=alias_tokens,
|
|
699
|
+
parse_bracket=parse_bracket,
|
|
700
|
+
is_db_reference=is_db_reference,
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
if isinstance(this, exp.Table):
|
|
704
|
+
inner = this.this
|
|
705
|
+
alias = this.args.get("alias")
|
|
706
|
+
|
|
707
|
+
if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns:
|
|
708
|
+
alias.set("columns", [exp.to_identifier("generate_series")])
|
|
709
|
+
|
|
710
|
+
if self._match(TokenType.FINAL):
|
|
711
|
+
this = self.expression(exp.Final, this=this)
|
|
712
|
+
|
|
713
|
+
return this
|
|
714
|
+
|
|
715
|
+
def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition:
|
|
716
|
+
return super()._parse_position(haystack_first=True)
|
|
717
|
+
|
|
718
|
+
# https://clickhouse.com/docs/en/sql-reference/statements/select/with/
|
|
719
|
+
def _parse_cte(self) -> t.Optional[exp.CTE]:
|
|
720
|
+
# WITH <identifier> AS <subquery expression>
|
|
721
|
+
cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte)
|
|
722
|
+
|
|
723
|
+
if not cte:
|
|
724
|
+
# WITH <expression> AS <identifier>
|
|
725
|
+
cte = self.expression(
|
|
726
|
+
exp.CTE,
|
|
727
|
+
this=self._parse_assignment(),
|
|
728
|
+
alias=self._parse_table_alias(),
|
|
729
|
+
scalar=True,
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
return cte
|
|
733
|
+
|
|
734
|
+
def _parse_join_parts(
|
|
735
|
+
self,
|
|
736
|
+
) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
|
|
737
|
+
is_global = self._match(TokenType.GLOBAL) and self._prev
|
|
738
|
+
kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev
|
|
739
|
+
|
|
740
|
+
if kind_pre:
|
|
741
|
+
kind = self._match_set(self.JOIN_KINDS) and self._prev
|
|
742
|
+
side = self._match_set(self.JOIN_SIDES) and self._prev
|
|
743
|
+
return is_global, side, kind
|
|
744
|
+
|
|
745
|
+
return (
|
|
746
|
+
is_global,
|
|
747
|
+
self._match_set(self.JOIN_SIDES) and self._prev,
|
|
748
|
+
self._match_set(self.JOIN_KINDS) and self._prev,
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
def _parse_join(
|
|
752
|
+
self, skip_join_token: bool = False, parse_bracket: bool = False
|
|
753
|
+
) -> t.Optional[exp.Join]:
|
|
754
|
+
join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True)
|
|
755
|
+
if join:
|
|
756
|
+
join.set("global", join.args.pop("method", None))
|
|
757
|
+
|
|
758
|
+
# tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table`
|
|
759
|
+
# https://clickhouse.com/docs/en/sql-reference/statements/select/array-join
|
|
760
|
+
if join.kind == "ARRAY":
|
|
761
|
+
for table in join.find_all(exp.Table):
|
|
762
|
+
table.replace(table.to_column())
|
|
763
|
+
|
|
764
|
+
return join
|
|
765
|
+
|
|
766
|
+
def _parse_function(
|
|
767
|
+
self,
|
|
768
|
+
functions: t.Optional[t.Dict[str, t.Callable]] = None,
|
|
769
|
+
anonymous: bool = False,
|
|
770
|
+
optional_parens: bool = True,
|
|
771
|
+
any_token: bool = False,
|
|
772
|
+
) -> t.Optional[exp.Expression]:
|
|
773
|
+
expr = super()._parse_function(
|
|
774
|
+
functions=functions,
|
|
775
|
+
anonymous=anonymous,
|
|
776
|
+
optional_parens=optional_parens,
|
|
777
|
+
any_token=any_token,
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
func = expr.this if isinstance(expr, exp.Window) else expr
|
|
781
|
+
|
|
782
|
+
# Aggregate functions can be split in 2 parts: <func_name><suffix>
|
|
783
|
+
parts = (
|
|
784
|
+
self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
if parts:
|
|
788
|
+
anon_func: exp.Anonymous = t.cast(exp.Anonymous, func)
|
|
789
|
+
params = self._parse_func_params(anon_func)
|
|
790
|
+
|
|
791
|
+
kwargs = {
|
|
792
|
+
"this": anon_func.this,
|
|
793
|
+
"expressions": anon_func.expressions,
|
|
794
|
+
}
|
|
795
|
+
if parts[1]:
|
|
796
|
+
exp_class: t.Type[exp.Expression] = (
|
|
797
|
+
exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc
|
|
798
|
+
)
|
|
799
|
+
else:
|
|
800
|
+
exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc
|
|
801
|
+
|
|
802
|
+
kwargs["exp_class"] = exp_class
|
|
803
|
+
if params:
|
|
804
|
+
kwargs["params"] = params
|
|
805
|
+
|
|
806
|
+
func = self.expression(**kwargs)
|
|
807
|
+
|
|
808
|
+
if isinstance(expr, exp.Window):
|
|
809
|
+
# The window's func was parsed as Anonymous in base parser, fix its
|
|
810
|
+
# type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc
|
|
811
|
+
expr.set("this", func)
|
|
812
|
+
elif params:
|
|
813
|
+
# Params have blocked super()._parse_function() from parsing the following window
|
|
814
|
+
# (if that exists) as they're standing between the function call and the window spec
|
|
815
|
+
expr = self._parse_window(func)
|
|
816
|
+
else:
|
|
817
|
+
expr = func
|
|
818
|
+
|
|
819
|
+
return expr
|
|
820
|
+
|
|
821
|
+
def _parse_func_params(
|
|
822
|
+
self, this: t.Optional[exp.Func] = None
|
|
823
|
+
) -> t.Optional[t.List[exp.Expression]]:
|
|
824
|
+
if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
|
|
825
|
+
return self._parse_csv(self._parse_lambda)
|
|
826
|
+
|
|
827
|
+
if self._match(TokenType.L_PAREN):
|
|
828
|
+
params = self._parse_csv(self._parse_lambda)
|
|
829
|
+
self._match_r_paren(this)
|
|
830
|
+
return params
|
|
831
|
+
|
|
832
|
+
return None
|
|
833
|
+
|
|
834
|
+
def _parse_quantile(self) -> exp.Quantile:
|
|
835
|
+
this = self._parse_lambda()
|
|
836
|
+
params = self._parse_func_params()
|
|
837
|
+
if params:
|
|
838
|
+
return self.expression(exp.Quantile, this=params[0], quantile=this)
|
|
839
|
+
return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5))
|
|
840
|
+
|
|
841
|
+
def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]:
|
|
842
|
+
return super()._parse_wrapped_id_vars(optional=True)
|
|
843
|
+
|
|
844
|
+
def _parse_primary_key(
|
|
845
|
+
self, wrapped_optional: bool = False, in_props: bool = False
|
|
846
|
+
) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
|
|
847
|
+
return super()._parse_primary_key(
|
|
848
|
+
wrapped_optional=wrapped_optional or in_props, in_props=in_props
|
|
849
|
+
)
|
|
850
|
+
|
|
851
|
+
def _parse_on_property(self) -> t.Optional[exp.Expression]:
|
|
852
|
+
index = self._index
|
|
853
|
+
if self._match_text_seq("CLUSTER"):
|
|
854
|
+
this = self._parse_string() or self._parse_id_var()
|
|
855
|
+
if this:
|
|
856
|
+
return self.expression(exp.OnCluster, this=this)
|
|
857
|
+
else:
|
|
858
|
+
self._retreat(index)
|
|
859
|
+
return None
|
|
860
|
+
|
|
861
|
+
def _parse_index_constraint(
|
|
862
|
+
self, kind: t.Optional[str] = None
|
|
863
|
+
) -> exp.IndexColumnConstraint:
|
|
864
|
+
# INDEX name1 expr TYPE type1(args) GRANULARITY value
|
|
865
|
+
this = self._parse_id_var()
|
|
866
|
+
expression = self._parse_assignment()
|
|
867
|
+
|
|
868
|
+
index_type = self._match_text_seq("TYPE") and (
|
|
869
|
+
self._parse_function() or self._parse_var()
|
|
870
|
+
)
|
|
871
|
+
|
|
872
|
+
granularity = self._match_text_seq("GRANULARITY") and self._parse_term()
|
|
873
|
+
|
|
874
|
+
return self.expression(
|
|
875
|
+
exp.IndexColumnConstraint,
|
|
876
|
+
this=this,
|
|
877
|
+
expression=expression,
|
|
878
|
+
index_type=index_type,
|
|
879
|
+
granularity=granularity,
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
def _parse_partition(self) -> t.Optional[exp.Partition]:
|
|
883
|
+
# https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression
|
|
884
|
+
if not self._match(TokenType.PARTITION):
|
|
885
|
+
return None
|
|
886
|
+
|
|
887
|
+
if self._match_text_seq("ID"):
|
|
888
|
+
# Corresponds to the PARTITION ID <string_value> syntax
|
|
889
|
+
expressions: t.List[exp.Expression] = [
|
|
890
|
+
self.expression(exp.PartitionId, this=self._parse_string())
|
|
891
|
+
]
|
|
892
|
+
else:
|
|
893
|
+
expressions = self._parse_expressions()
|
|
894
|
+
|
|
895
|
+
return self.expression(exp.Partition, expressions=expressions)
|
|
896
|
+
|
|
897
|
+
def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]:
|
|
898
|
+
partition = self._parse_partition()
|
|
899
|
+
|
|
900
|
+
if not partition or not self._match(TokenType.FROM):
|
|
901
|
+
return None
|
|
902
|
+
|
|
903
|
+
return self.expression(
|
|
904
|
+
exp.ReplacePartition, expression=partition, source=self._parse_table_parts()
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]:
|
|
908
|
+
if not self._match_text_seq("PROJECTION"):
|
|
909
|
+
return None
|
|
910
|
+
|
|
911
|
+
return self.expression(
|
|
912
|
+
exp.ProjectionDef,
|
|
913
|
+
this=self._parse_id_var(),
|
|
914
|
+
expression=self._parse_wrapped(self._parse_statement),
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
def _parse_constraint(self) -> t.Optional[exp.Expression]:
|
|
918
|
+
return super()._parse_constraint() or self._parse_projection_def()
|
|
919
|
+
|
|
920
|
+
def _parse_alias(
|
|
921
|
+
self, this: t.Optional[exp.Expression], explicit: bool = False
|
|
922
|
+
) -> t.Optional[exp.Expression]:
|
|
923
|
+
# In clickhouse "SELECT <expr> APPLY(...)" is a query modifier,
|
|
924
|
+
# so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias
|
|
925
|
+
if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False):
|
|
926
|
+
return this
|
|
927
|
+
|
|
928
|
+
return super()._parse_alias(this=this, explicit=explicit)
|
|
929
|
+
|
|
930
|
+
def _parse_expression(self) -> t.Optional[exp.Expression]:
|
|
931
|
+
this = super()._parse_expression()
|
|
932
|
+
|
|
933
|
+
# Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier
|
|
934
|
+
while self._match_pair(TokenType.APPLY, TokenType.L_PAREN):
|
|
935
|
+
this = exp.Apply(this=this, expression=self._parse_var(any_token=True))
|
|
936
|
+
self._match(TokenType.R_PAREN)
|
|
937
|
+
|
|
938
|
+
return this
|
|
939
|
+
|
|
940
|
+
def _parse_columns(self) -> exp.Expression:
|
|
941
|
+
this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda())
|
|
942
|
+
|
|
943
|
+
while self._next and self._match_text_seq(")", "APPLY", "("):
|
|
944
|
+
self._match(TokenType.R_PAREN)
|
|
945
|
+
this = exp.Apply(this=this, expression=self._parse_var(any_token=True))
|
|
946
|
+
return this
|
|
947
|
+
|
|
948
|
+
def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]:
|
|
949
|
+
value = super()._parse_value(values=values)
|
|
950
|
+
if not value:
|
|
951
|
+
return None
|
|
952
|
+
|
|
953
|
+
# In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast
|
|
954
|
+
# to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one.
|
|
955
|
+
# In INSERT INTO statements the same clause actually references multiple columns (opposite semantics),
|
|
956
|
+
# but the final result is not altered by the extra parentheses.
|
|
957
|
+
# Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression
|
|
958
|
+
expressions = value.expressions
|
|
959
|
+
if values and not isinstance(expressions[-1], exp.Tuple):
|
|
960
|
+
value.set(
|
|
961
|
+
"expressions",
|
|
962
|
+
[self.expression(exp.Tuple, expressions=[expr]) for expr in expressions],
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
return value
|
|
966
|
+
|
|
967
|
+
class Generator(generator.Generator):
|
|
968
|
+
QUERY_HINTS = False
|
|
969
|
+
STRUCT_DELIMITER = ("(", ")")
|
|
970
|
+
NVL2_SUPPORTED = False
|
|
971
|
+
TABLESAMPLE_REQUIRES_PARENS = False
|
|
972
|
+
TABLESAMPLE_SIZE_IS_ROWS = False
|
|
973
|
+
TABLESAMPLE_KEYWORDS = "SAMPLE"
|
|
974
|
+
LAST_DAY_SUPPORTS_DATE_PART = False
|
|
975
|
+
CAN_IMPLEMENT_ARRAY_ANY = True
|
|
976
|
+
SUPPORTS_TO_NUMBER = False
|
|
977
|
+
JOIN_HINTS = False
|
|
978
|
+
TABLE_HINTS = False
|
|
979
|
+
GROUPINGS_SEP = ""
|
|
980
|
+
SET_OP_MODIFIERS = False
|
|
981
|
+
ARRAY_SIZE_NAME = "LENGTH"
|
|
982
|
+
WRAP_DERIVED_VALUES = False
|
|
983
|
+
|
|
984
|
+
STRING_TYPE_MAPPING = {
|
|
985
|
+
exp.DataType.Type.BLOB: "String",
|
|
986
|
+
exp.DataType.Type.CHAR: "String",
|
|
987
|
+
exp.DataType.Type.LONGBLOB: "String",
|
|
988
|
+
exp.DataType.Type.LONGTEXT: "String",
|
|
989
|
+
exp.DataType.Type.MEDIUMBLOB: "String",
|
|
990
|
+
exp.DataType.Type.MEDIUMTEXT: "String",
|
|
991
|
+
exp.DataType.Type.TINYBLOB: "String",
|
|
992
|
+
exp.DataType.Type.TINYTEXT: "String",
|
|
993
|
+
exp.DataType.Type.TEXT: "String",
|
|
994
|
+
exp.DataType.Type.VARBINARY: "String",
|
|
995
|
+
exp.DataType.Type.VARCHAR: "String",
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
SUPPORTED_JSON_PATH_PARTS = {
|
|
999
|
+
exp.JSONPathKey,
|
|
1000
|
+
exp.JSONPathRoot,
|
|
1001
|
+
exp.JSONPathSubscript,
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
TYPE_MAPPING = {
|
|
1005
|
+
**generator.Generator.TYPE_MAPPING,
|
|
1006
|
+
**STRING_TYPE_MAPPING,
|
|
1007
|
+
exp.DataType.Type.ARRAY: "Array",
|
|
1008
|
+
exp.DataType.Type.BOOLEAN: "Bool",
|
|
1009
|
+
exp.DataType.Type.BIGINT: "Int64",
|
|
1010
|
+
exp.DataType.Type.DATE32: "Date32",
|
|
1011
|
+
exp.DataType.Type.DATETIME: "DateTime",
|
|
1012
|
+
exp.DataType.Type.DATETIME2: "DateTime",
|
|
1013
|
+
exp.DataType.Type.SMALLDATETIME: "DateTime",
|
|
1014
|
+
exp.DataType.Type.DATETIME64: "DateTime64",
|
|
1015
|
+
exp.DataType.Type.DECIMAL: "Decimal",
|
|
1016
|
+
exp.DataType.Type.DECIMAL32: "Decimal32",
|
|
1017
|
+
exp.DataType.Type.DECIMAL64: "Decimal64",
|
|
1018
|
+
exp.DataType.Type.DECIMAL128: "Decimal128",
|
|
1019
|
+
exp.DataType.Type.DECIMAL256: "Decimal256",
|
|
1020
|
+
exp.DataType.Type.TIMESTAMP: "DateTime",
|
|
1021
|
+
exp.DataType.Type.TIMESTAMPNTZ: "DateTime",
|
|
1022
|
+
exp.DataType.Type.TIMESTAMPTZ: "DateTime",
|
|
1023
|
+
exp.DataType.Type.DOUBLE: "Float64",
|
|
1024
|
+
exp.DataType.Type.ENUM: "Enum",
|
|
1025
|
+
exp.DataType.Type.ENUM8: "Enum8",
|
|
1026
|
+
exp.DataType.Type.ENUM16: "Enum16",
|
|
1027
|
+
exp.DataType.Type.FIXEDSTRING: "FixedString",
|
|
1028
|
+
exp.DataType.Type.FLOAT: "Float32",
|
|
1029
|
+
exp.DataType.Type.INT: "Int32",
|
|
1030
|
+
exp.DataType.Type.MEDIUMINT: "Int32",
|
|
1031
|
+
exp.DataType.Type.INT128: "Int128",
|
|
1032
|
+
exp.DataType.Type.INT256: "Int256",
|
|
1033
|
+
exp.DataType.Type.LOWCARDINALITY: "LowCardinality",
|
|
1034
|
+
exp.DataType.Type.MAP: "Map",
|
|
1035
|
+
exp.DataType.Type.NESTED: "Nested",
|
|
1036
|
+
exp.DataType.Type.NOTHING: "Nothing",
|
|
1037
|
+
exp.DataType.Type.SMALLINT: "Int16",
|
|
1038
|
+
exp.DataType.Type.STRUCT: "Tuple",
|
|
1039
|
+
exp.DataType.Type.TINYINT: "Int8",
|
|
1040
|
+
exp.DataType.Type.UBIGINT: "UInt64",
|
|
1041
|
+
exp.DataType.Type.UINT: "UInt32",
|
|
1042
|
+
exp.DataType.Type.UINT128: "UInt128",
|
|
1043
|
+
exp.DataType.Type.UINT256: "UInt256",
|
|
1044
|
+
exp.DataType.Type.USMALLINT: "UInt16",
|
|
1045
|
+
exp.DataType.Type.UTINYINT: "UInt8",
|
|
1046
|
+
exp.DataType.Type.IPV4: "IPv4",
|
|
1047
|
+
exp.DataType.Type.IPV6: "IPv6",
|
|
1048
|
+
exp.DataType.Type.POINT: "Point",
|
|
1049
|
+
exp.DataType.Type.RING: "Ring",
|
|
1050
|
+
exp.DataType.Type.LINESTRING: "LineString",
|
|
1051
|
+
exp.DataType.Type.MULTILINESTRING: "MultiLineString",
|
|
1052
|
+
exp.DataType.Type.POLYGON: "Polygon",
|
|
1053
|
+
exp.DataType.Type.MULTIPOLYGON: "MultiPolygon",
|
|
1054
|
+
exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction",
|
|
1055
|
+
exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction",
|
|
1056
|
+
exp.DataType.Type.DYNAMIC: "Dynamic",
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
TRANSFORMS = {
|
|
1060
|
+
**generator.Generator.TRANSFORMS,
|
|
1061
|
+
exp.AnyValue: rename_func("any"),
|
|
1062
|
+
exp.ApproxDistinct: rename_func("uniq"),
|
|
1063
|
+
exp.ArrayConcat: rename_func("arrayConcat"),
|
|
1064
|
+
exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
|
|
1065
|
+
exp.ArrayRemove: remove_from_array_using_filter,
|
|
1066
|
+
exp.ArraySum: rename_func("arraySum"),
|
|
1067
|
+
exp.ArgMax: arg_max_or_min_no_count("argMax"),
|
|
1068
|
+
exp.ArgMin: arg_max_or_min_no_count("argMin"),
|
|
1069
|
+
exp.Array: inline_array_sql,
|
|
1070
|
+
exp.CastToStrType: rename_func("CAST"),
|
|
1071
|
+
exp.CountIf: rename_func("countIf"),
|
|
1072
|
+
exp.CompressColumnConstraint: lambda self,
|
|
1073
|
+
e: f"CODEC({self.expressions(e, key='this', flat=True)})",
|
|
1074
|
+
exp.ComputedColumnConstraint: lambda self,
|
|
1075
|
+
e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}",
|
|
1076
|
+
exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"),
|
|
1077
|
+
exp.DateAdd: _datetime_delta_sql("DATE_ADD"),
|
|
1078
|
+
exp.DateDiff: _datetime_delta_sql("DATE_DIFF"),
|
|
1079
|
+
exp.DateStrToDate: rename_func("toDate"),
|
|
1080
|
+
exp.DateSub: _datetime_delta_sql("DATE_SUB"),
|
|
1081
|
+
exp.Explode: rename_func("arrayJoin"),
|
|
1082
|
+
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
|
|
1083
|
+
exp.IsNan: rename_func("isNaN"),
|
|
1084
|
+
exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",
|
|
1085
|
+
exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False),
|
|
1086
|
+
exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False),
|
|
1087
|
+
exp.JSONPathKey: json_path_key_only_name,
|
|
1088
|
+
exp.JSONPathRoot: lambda *_: "",
|
|
1089
|
+
exp.Length: length_or_char_length_sql,
|
|
1090
|
+
exp.Map: _map_sql,
|
|
1091
|
+
exp.Median: rename_func("median"),
|
|
1092
|
+
exp.Nullif: rename_func("nullIf"),
|
|
1093
|
+
exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
|
|
1094
|
+
exp.Pivot: no_pivot_sql,
|
|
1095
|
+
exp.Quantile: _quantile_sql,
|
|
1096
|
+
exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression),
|
|
1097
|
+
exp.Rand: rename_func("randCanonical"),
|
|
1098
|
+
exp.StartsWith: rename_func("startsWith"),
|
|
1099
|
+
exp.EndsWith: rename_func("endsWith"),
|
|
1100
|
+
exp.StrPosition: lambda self, e: strposition_sql(
|
|
1101
|
+
self,
|
|
1102
|
+
e,
|
|
1103
|
+
func_name="POSITION",
|
|
1104
|
+
supports_position=True,
|
|
1105
|
+
use_ansi_position=False,
|
|
1106
|
+
),
|
|
1107
|
+
exp.TimeToStr: lambda self, e: self.func(
|
|
1108
|
+
"formatDateTime", e.this, self.format_time(e), e.args.get("zone")
|
|
1109
|
+
),
|
|
1110
|
+
exp.TimeStrToTime: _timestrtotime_sql,
|
|
1111
|
+
exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"),
|
|
1112
|
+
exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"),
|
|
1113
|
+
exp.VarMap: _map_sql,
|
|
1114
|
+
exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions),
|
|
1115
|
+
exp.MD5Digest: rename_func("MD5"),
|
|
1116
|
+
exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))),
|
|
1117
|
+
exp.SHA: rename_func("SHA1"),
|
|
1118
|
+
exp.SHA2: sha256_sql,
|
|
1119
|
+
exp.UnixToTime: _unix_to_time_sql,
|
|
1120
|
+
exp.TimestampTrunc: timestamptrunc_sql(zone=True),
|
|
1121
|
+
exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"),
|
|
1122
|
+
exp.Variance: rename_func("varSamp"),
|
|
1123
|
+
exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
|
|
1124
|
+
exp.Stddev: rename_func("stddevSamp"),
|
|
1125
|
+
exp.Chr: rename_func("CHAR"),
|
|
1126
|
+
exp.Lag: lambda self, e: self.func(
|
|
1127
|
+
"lagInFrame", e.this, e.args.get("offset"), e.args.get("default")
|
|
1128
|
+
),
|
|
1129
|
+
exp.Lead: lambda self, e: self.func(
|
|
1130
|
+
"leadInFrame", e.this, e.args.get("offset"), e.args.get("default")
|
|
1131
|
+
),
|
|
1132
|
+
exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
|
|
1133
|
+
rename_func("editDistance")
|
|
1134
|
+
),
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
PROPERTIES_LOCATION = {
|
|
1138
|
+
**generator.Generator.PROPERTIES_LOCATION,
|
|
1139
|
+
exp.OnCluster: exp.Properties.Location.POST_NAME,
|
|
1140
|
+
exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
|
|
1141
|
+
exp.ToTableProperty: exp.Properties.Location.POST_NAME,
|
|
1142
|
+
exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
# There's no list in docs, but it can be found in Clickhouse code
|
|
1146
|
+
# see `ClickHouse/src/Parsers/ParserCreate*.cpp`
|
|
1147
|
+
ON_CLUSTER_TARGETS = {
|
|
1148
|
+
"SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set
|
|
1149
|
+
"DATABASE",
|
|
1150
|
+
"TABLE",
|
|
1151
|
+
"VIEW",
|
|
1152
|
+
"DICTIONARY",
|
|
1153
|
+
"INDEX",
|
|
1154
|
+
"FUNCTION",
|
|
1155
|
+
"NAMED COLLECTION",
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
# https://clickhouse.com/docs/en/sql-reference/data-types/nullable
|
|
1159
|
+
NON_NULLABLE_TYPES = {
|
|
1160
|
+
exp.DataType.Type.ARRAY,
|
|
1161
|
+
exp.DataType.Type.MAP,
|
|
1162
|
+
exp.DataType.Type.STRUCT,
|
|
1163
|
+
exp.DataType.Type.POINT,
|
|
1164
|
+
exp.DataType.Type.RING,
|
|
1165
|
+
exp.DataType.Type.LINESTRING,
|
|
1166
|
+
exp.DataType.Type.MULTILINESTRING,
|
|
1167
|
+
exp.DataType.Type.POLYGON,
|
|
1168
|
+
exp.DataType.Type.MULTIPOLYGON,
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
def strtodate_sql(self, expression: exp.StrToDate) -> str:
|
|
1172
|
+
strtodate_sql = self.function_fallback_sql(expression)
|
|
1173
|
+
|
|
1174
|
+
if not isinstance(expression.parent, exp.Cast):
|
|
1175
|
+
# StrToDate returns DATEs in other dialects (eg. postgres), so
|
|
1176
|
+
# this branch aims to improve the transpilation to clickhouse
|
|
1177
|
+
return self.cast_sql(exp.cast(expression, "DATE"))
|
|
1178
|
+
|
|
1179
|
+
return strtodate_sql
|
|
1180
|
+
|
|
1181
|
+
def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
|
|
1182
|
+
this = expression.this
|
|
1183
|
+
|
|
1184
|
+
if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"):
|
|
1185
|
+
return self.sql(this)
|
|
1186
|
+
|
|
1187
|
+
return super().cast_sql(expression, safe_prefix=safe_prefix)
|
|
1188
|
+
|
|
1189
|
+
def trycast_sql(self, expression: exp.TryCast) -> str:
|
|
1190
|
+
dtype = expression.to
|
|
1191
|
+
if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True):
|
|
1192
|
+
# Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T)
|
|
1193
|
+
dtype.set("nullable", True)
|
|
1194
|
+
|
|
1195
|
+
return super().cast_sql(expression)
|
|
1196
|
+
|
|
1197
|
+
def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str:
|
|
1198
|
+
this = self.json_path_part(expression.this)
|
|
1199
|
+
return str(int(this) + 1) if is_int(this) else this
|
|
1200
|
+
|
|
1201
|
+
def likeproperty_sql(self, expression: exp.LikeProperty) -> str:
|
|
1202
|
+
return f"AS {self.sql(expression, 'this')}"
|
|
1203
|
+
|
|
1204
|
+
def _any_to_has(
|
|
1205
|
+
self,
|
|
1206
|
+
expression: exp.EQ | exp.NEQ,
|
|
1207
|
+
default: t.Callable[[t.Any], str],
|
|
1208
|
+
prefix: str = "",
|
|
1209
|
+
) -> str:
|
|
1210
|
+
if isinstance(expression.left, exp.Any):
|
|
1211
|
+
arr = expression.left
|
|
1212
|
+
this = expression.right
|
|
1213
|
+
elif isinstance(expression.right, exp.Any):
|
|
1214
|
+
arr = expression.right
|
|
1215
|
+
this = expression.left
|
|
1216
|
+
else:
|
|
1217
|
+
return default(expression)
|
|
1218
|
+
|
|
1219
|
+
return prefix + self.func("has", arr.this.unnest(), this)
|
|
1220
|
+
|
|
1221
|
+
def eq_sql(self, expression: exp.EQ) -> str:
|
|
1222
|
+
return self._any_to_has(expression, super().eq_sql)
|
|
1223
|
+
|
|
1224
|
+
def neq_sql(self, expression: exp.NEQ) -> str:
|
|
1225
|
+
return self._any_to_has(expression, super().neq_sql, "NOT ")
|
|
1226
|
+
|
|
1227
|
+
def regexpilike_sql(self, expression: exp.RegexpILike) -> str:
|
|
1228
|
+
# Manually add a flag to make the search case-insensitive
|
|
1229
|
+
regex = self.func("CONCAT", "'(?i)'", expression.expression)
|
|
1230
|
+
return self.func("match", expression.this, regex)
|
|
1231
|
+
|
|
1232
|
+
def datatype_sql(self, expression: exp.DataType) -> str:
|
|
1233
|
+
# String is the standard ClickHouse type, every other variant is just an alias.
|
|
1234
|
+
# Additionally, any supplied length parameter will be ignored.
|
|
1235
|
+
#
|
|
1236
|
+
# https://clickhouse.com/docs/en/sql-reference/data-types/string
|
|
1237
|
+
if expression.this in self.STRING_TYPE_MAPPING:
|
|
1238
|
+
dtype = "String"
|
|
1239
|
+
else:
|
|
1240
|
+
dtype = super().datatype_sql(expression)
|
|
1241
|
+
|
|
1242
|
+
# This section changes the type to `Nullable(...)` if the following conditions hold:
|
|
1243
|
+
# - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable`
|
|
1244
|
+
# and change their semantics
|
|
1245
|
+
# - It's not the key type of a `Map`. This is because ClickHouse enforces the following
|
|
1246
|
+
# constraint: "Type of Map key must be a type, that can be represented by integer or
|
|
1247
|
+
# String or FixedString (possibly LowCardinality) or UUID or IPv6"
|
|
1248
|
+
# - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type
|
|
1249
|
+
parent = expression.parent
|
|
1250
|
+
nullable = expression.args.get("nullable")
|
|
1251
|
+
if nullable is True or (
|
|
1252
|
+
nullable is None
|
|
1253
|
+
and not (
|
|
1254
|
+
isinstance(parent, exp.DataType)
|
|
1255
|
+
and parent.is_type(exp.DataType.Type.MAP, check_nullable=True)
|
|
1256
|
+
and expression.index in (None, 0)
|
|
1257
|
+
)
|
|
1258
|
+
and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True)
|
|
1259
|
+
):
|
|
1260
|
+
dtype = f"Nullable({dtype})"
|
|
1261
|
+
|
|
1262
|
+
return dtype
|
|
1263
|
+
|
|
1264
|
+
def cte_sql(self, expression: exp.CTE) -> str:
|
|
1265
|
+
if expression.args.get("scalar"):
|
|
1266
|
+
this = self.sql(expression, "this")
|
|
1267
|
+
alias = self.sql(expression, "alias")
|
|
1268
|
+
return f"{this} AS {alias}"
|
|
1269
|
+
|
|
1270
|
+
return super().cte_sql(expression)
|
|
1271
|
+
|
|
1272
|
+
def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]:
|
|
1273
|
+
return super().after_limit_modifiers(expression) + [
|
|
1274
|
+
(
|
|
1275
|
+
self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True)
|
|
1276
|
+
if expression.args.get("settings")
|
|
1277
|
+
else ""
|
|
1278
|
+
),
|
|
1279
|
+
(
|
|
1280
|
+
self.seg("FORMAT ") + self.sql(expression, "format")
|
|
1281
|
+
if expression.args.get("format")
|
|
1282
|
+
else ""
|
|
1283
|
+
),
|
|
1284
|
+
]
|
|
1285
|
+
|
|
1286
|
+
def placeholder_sql(self, expression: exp.Placeholder) -> str:
|
|
1287
|
+
return f"{{{expression.name}: {self.sql(expression, 'kind')}}}"
|
|
1288
|
+
|
|
1289
|
+
def oncluster_sql(self, expression: exp.OnCluster) -> str:
|
|
1290
|
+
return f"ON CLUSTER {self.sql(expression, 'this')}"
|
|
1291
|
+
|
|
1292
|
+
def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str:
|
|
1293
|
+
if expression.kind in self.ON_CLUSTER_TARGETS and locations.get(
|
|
1294
|
+
exp.Properties.Location.POST_NAME
|
|
1295
|
+
):
|
|
1296
|
+
this_name = self.sql(
|
|
1297
|
+
expression.this if isinstance(expression.this, exp.Schema) else expression,
|
|
1298
|
+
"this",
|
|
1299
|
+
)
|
|
1300
|
+
this_properties = " ".join(
|
|
1301
|
+
[self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]]
|
|
1302
|
+
)
|
|
1303
|
+
this_schema = self.schema_columns_sql(expression.this)
|
|
1304
|
+
this_schema = f"{self.sep()}{this_schema}" if this_schema else ""
|
|
1305
|
+
|
|
1306
|
+
return f"{this_name}{self.sep()}{this_properties}{this_schema}"
|
|
1307
|
+
|
|
1308
|
+
return super().createable_sql(expression, locations)
|
|
1309
|
+
|
|
1310
|
+
def create_sql(self, expression: exp.Create) -> str:
|
|
1311
|
+
# The comment property comes last in CTAS statements, i.e. after the query
|
|
1312
|
+
query = expression.expression
|
|
1313
|
+
if isinstance(query, exp.Query):
|
|
1314
|
+
comment_prop = expression.find(exp.SchemaCommentProperty)
|
|
1315
|
+
if comment_prop:
|
|
1316
|
+
comment_prop.pop()
|
|
1317
|
+
query.replace(exp.paren(query))
|
|
1318
|
+
else:
|
|
1319
|
+
comment_prop = None
|
|
1320
|
+
|
|
1321
|
+
create_sql = super().create_sql(expression)
|
|
1322
|
+
|
|
1323
|
+
comment_sql = self.sql(comment_prop)
|
|
1324
|
+
comment_sql = f" {comment_sql}" if comment_sql else ""
|
|
1325
|
+
|
|
1326
|
+
return f"{create_sql}{comment_sql}"
|
|
1327
|
+
|
|
1328
|
+
def prewhere_sql(self, expression: exp.PreWhere) -> str:
|
|
1329
|
+
this = self.indent(self.sql(expression, "this"))
|
|
1330
|
+
return f"{self.seg('PREWHERE')}{self.sep()}{this}"
|
|
1331
|
+
|
|
1332
|
+
def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str:
|
|
1333
|
+
this = self.sql(expression, "this")
|
|
1334
|
+
this = f" {this}" if this else ""
|
|
1335
|
+
expr = self.sql(expression, "expression")
|
|
1336
|
+
expr = f" {expr}" if expr else ""
|
|
1337
|
+
index_type = self.sql(expression, "index_type")
|
|
1338
|
+
index_type = f" TYPE {index_type}" if index_type else ""
|
|
1339
|
+
granularity = self.sql(expression, "granularity")
|
|
1340
|
+
granularity = f" GRANULARITY {granularity}" if granularity else ""
|
|
1341
|
+
|
|
1342
|
+
return f"INDEX{this}{expr}{index_type}{granularity}"
|
|
1343
|
+
|
|
1344
|
+
def partition_sql(self, expression: exp.Partition) -> str:
|
|
1345
|
+
return f"PARTITION {self.expressions(expression, flat=True)}"
|
|
1346
|
+
|
|
1347
|
+
def partitionid_sql(self, expression: exp.PartitionId) -> str:
|
|
1348
|
+
return f"ID {self.sql(expression.this)}"
|
|
1349
|
+
|
|
1350
|
+
def replacepartition_sql(self, expression: exp.ReplacePartition) -> str:
|
|
1351
|
+
return (
|
|
1352
|
+
f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}"
|
|
1353
|
+
)
|
|
1354
|
+
|
|
1355
|
+
def projectiondef_sql(self, expression: exp.ProjectionDef) -> str:
|
|
1356
|
+
return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
|
|
1357
|
+
|
|
1358
|
+
def is_sql(self, expression: exp.Is) -> str:
|
|
1359
|
+
is_sql = super().is_sql(expression)
|
|
1360
|
+
|
|
1361
|
+
if isinstance(expression.parent, exp.Not):
|
|
1362
|
+
# value IS NOT NULL -> NOT (value IS NULL)
|
|
1363
|
+
is_sql = self.wrap(is_sql)
|
|
1364
|
+
|
|
1365
|
+
return is_sql
|
|
1366
|
+
|
|
1367
|
+
def in_sql(self, expression: exp.In) -> str:
|
|
1368
|
+
in_sql = super().in_sql(expression)
|
|
1369
|
+
|
|
1370
|
+
if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"):
|
|
1371
|
+
in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1)
|
|
1372
|
+
|
|
1373
|
+
return in_sql
|
|
1374
|
+
|
|
1375
|
+
def not_sql(self, expression: exp.Not) -> str:
|
|
1376
|
+
if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"):
|
|
1377
|
+
# let `GLOBAL IN` child interpose `NOT`
|
|
1378
|
+
return self.sql(expression, "this")
|
|
1379
|
+
|
|
1380
|
+
return super().not_sql(expression)
|
|
1381
|
+
|
|
1382
|
+
def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str:
|
|
1383
|
+
# If the VALUES clause contains tuples of expressions, we need to treat it
|
|
1384
|
+
# as a table since Clickhouse will automatically alias it as such.
|
|
1385
|
+
alias = expression.args.get("alias")
|
|
1386
|
+
|
|
1387
|
+
if alias and alias.args.get("columns") and expression.expressions:
|
|
1388
|
+
values = expression.expressions[0].expressions
|
|
1389
|
+
values_as_table = any(isinstance(value, exp.Tuple) for value in values)
|
|
1390
|
+
else:
|
|
1391
|
+
values_as_table = True
|
|
1392
|
+
|
|
1393
|
+
return super().values_sql(expression, values_as_table=values_as_table)
|