@altimateai/altimate-code 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/bin/altimate +6 -0
  3. package/bin/altimate-code +6 -0
  4. package/dbt-tools/bin/altimate-dbt +2 -0
  5. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
  6. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
  7. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
  8. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
  9. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
  10. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
  11. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
  12. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
  13. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
  14. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
  15. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
  16. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
  17. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
  18. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
  19. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
  20. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
  21. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
  22. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
  23. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
  24. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
  25. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
  26. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
  27. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
  28. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
  29. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
  30. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
  31. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
  32. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
  33. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
  34. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
  35. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
  36. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
  37. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
  38. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
  39. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
  40. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
  41. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
  42. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
  43. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
  44. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
  45. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
  46. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
  47. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
  48. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
  49. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
  50. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
  51. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
  52. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
  53. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
  54. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
  55. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
  56. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
  57. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
  58. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
  59. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
  60. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
  61. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
  62. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
  63. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
  64. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
  65. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
  66. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
  67. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
  68. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
  69. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
  70. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
  71. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
  72. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
  73. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
  74. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
  75. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
  76. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
  77. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
  78. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
  79. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
  80. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
  81. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
  82. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
  83. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
  84. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
  85. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
  86. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
  87. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
  88. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
  89. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
  90. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
  91. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
  92. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
  93. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
  94. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
  95. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
  96. package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
  97. package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
  98. package/dbt-tools/dist/index.js +23859 -0
  99. package/package.json +13 -13
  100. package/postinstall.mjs +42 -0
  101. package/skills/altimate-setup/SKILL.md +31 -0
@@ -0,0 +1,1393 @@
1
+ from __future__ import annotations
2
+ import typing as t
3
+ import datetime
4
+ from sqlglot import exp, generator, parser, tokens
5
+ from sqlglot.dialects.dialect import (
6
+ Dialect,
7
+ NormalizationStrategy,
8
+ arg_max_or_min_no_count,
9
+ build_date_delta,
10
+ build_formatted_time,
11
+ inline_array_sql,
12
+ json_extract_segments,
13
+ json_path_key_only_name,
14
+ length_or_char_length_sql,
15
+ no_pivot_sql,
16
+ build_json_extract_path,
17
+ rename_func,
18
+ remove_from_array_using_filter,
19
+ sha256_sql,
20
+ strposition_sql,
21
+ var_map_sql,
22
+ timestamptrunc_sql,
23
+ unit_to_var,
24
+ trim_sql,
25
+ )
26
+ from sqlglot.generator import Generator
27
+ from sqlglot.helper import is_int, seq_get
28
+ from sqlglot.tokens import Token, TokenType
29
+ from sqlglot.generator import unsupported_args
30
+
31
+ DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd]
32
+
33
+
34
+ def _build_date_format(args: t.List) -> exp.TimeToStr:
35
+ expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args)
36
+
37
+ timezone = seq_get(args, 2)
38
+ if timezone:
39
+ expr.set("zone", timezone)
40
+
41
+ return expr
42
+
43
+
44
+ def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str:
45
+ scale = expression.args.get("scale")
46
+ timestamp = expression.this
47
+
48
+ if scale in (None, exp.UnixToTime.SECONDS):
49
+ return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT))
50
+ if scale == exp.UnixToTime.MILLIS:
51
+ return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT))
52
+ if scale == exp.UnixToTime.MICROS:
53
+ return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT))
54
+ if scale == exp.UnixToTime.NANOS:
55
+ return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT))
56
+
57
+ return self.func(
58
+ "fromUnixTimestamp",
59
+ exp.cast(
60
+ exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT
61
+ ),
62
+ )
63
+
64
+
65
+ def _lower_func(sql: str) -> str:
66
+ index = sql.index("(")
67
+ return sql[:index].lower() + sql[index:]
68
+
69
+
70
+ def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str:
71
+ quantile = expression.args["quantile"]
72
+ args = f"({self.sql(expression, 'this')})"
73
+
74
+ if isinstance(quantile, exp.Array):
75
+ func = self.func("quantiles", *quantile)
76
+ else:
77
+ func = self.func("quantile", quantile)
78
+
79
+ return func + args
80
+
81
+
82
+ def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc:
83
+ if len(args) == 1:
84
+ return exp.CountIf(this=seq_get(args, 0))
85
+
86
+ return exp.CombinedAggFunc(this="countIf", expressions=args)
87
+
88
+
89
+ def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous:
90
+ if len(args) == 3:
91
+ return exp.Anonymous(this="STR_TO_DATE", expressions=args)
92
+
93
+ strtodate = exp.StrToDate.from_arg_list(args)
94
+ return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME))
95
+
96
+
97
+ def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]:
98
+ def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str:
99
+ if not expression.unit:
100
+ return rename_func(name)(self, expression)
101
+
102
+ return self.func(
103
+ name,
104
+ unit_to_var(expression),
105
+ expression.expression,
106
+ expression.this,
107
+ expression.args.get("zone"),
108
+ )
109
+
110
+ return _delta_sql
111
+
112
+
113
+ def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime):
114
+ ts = expression.this
115
+
116
+ tz = expression.args.get("zone")
117
+ if tz and isinstance(ts, exp.Literal):
118
+ # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them.
119
+ # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`.
120
+ #
121
+ # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit)
122
+ # or microsecond (6 digit) precision. It will error if passed any other number of fractional
123
+ # digits, so we extract the fractional seconds and pad to 6 digits before parsing.
124
+ ts_string = ts.name.strip()
125
+
126
+ # separate [date and time] from [fractional seconds and UTC offset]
127
+ ts_parts = ts_string.split(".")
128
+ if len(ts_parts) == 2:
129
+ # separate fractional seconds and UTC offset
130
+ offset_sep = "+" if "+" in ts_parts[1] else "-"
131
+ ts_frac_parts = ts_parts[1].split(offset_sep)
132
+ num_frac_parts = len(ts_frac_parts)
133
+
134
+ # pad to 6 digits if fractional seconds present
135
+ ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0")
136
+ ts_string = "".join(
137
+ [
138
+ ts_parts[0], # date and time
139
+ ".",
140
+ ts_frac_parts[0], # fractional seconds
141
+ offset_sep if num_frac_parts > 1 else "",
142
+ ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present)
143
+ ]
144
+ )
145
+
146
+ # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14'
147
+ # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if
148
+ # it's part of the timestamp string
149
+ ts_without_tz = (
150
+ datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ")
151
+ )
152
+ ts = exp.Literal.string(ts_without_tz)
153
+
154
+ # Non-nullable DateTime64 with microsecond precision
155
+ expressions = [exp.DataTypeParam(this=tz)] if tz else []
156
+ datatype = exp.DataType.build(
157
+ exp.DataType.Type.DATETIME64,
158
+ expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions],
159
+ nullable=False,
160
+ )
161
+
162
+ return self.sql(exp.cast(ts, datatype, dialect=self.dialect))
163
+
164
+
165
+ def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str:
166
+ if not (expression.parent and expression.parent.arg_key == "settings"):
167
+ return _lower_func(var_map_sql(self, expression))
168
+
169
+ keys = expression.args.get("keys")
170
+ values = expression.args.get("values")
171
+
172
+ if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
173
+ self.unsupported("Cannot convert array columns into map.")
174
+ return ""
175
+
176
+ args = []
177
+ for key, value in zip(keys.expressions, values.expressions):
178
+ args.append(f"{self.sql(key)}: {self.sql(value)}")
179
+
180
+ csv_args = ", ".join(args)
181
+
182
+ return f"{{{csv_args}}}"
183
+
184
+
185
+ class ClickHouse(Dialect):
186
+ NORMALIZE_FUNCTIONS: bool | str = False
187
+ NULL_ORDERING = "nulls_are_last"
188
+ SUPPORTS_USER_DEFINED_TYPES = False
189
+ SAFE_DIVISION = True
190
+ LOG_BASE_FIRST: t.Optional[bool] = None
191
+ FORCE_EARLY_ALIAS_REF_EXPANSION = True
192
+ PRESERVE_ORIGINAL_NAMES = True
193
+ NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True
194
+ IDENTIFIERS_CAN_START_WITH_DIGIT = True
195
+ HEX_STRING_IS_INTEGER_TYPE = True
196
+
197
+ # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779
198
+ NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
199
+
200
+ UNESCAPED_SEQUENCES = {
201
+ "\\0": "\0",
202
+ }
203
+
204
+ CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"}
205
+
206
+ SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = {
207
+ exp.Except: False,
208
+ exp.Intersect: False,
209
+ exp.Union: None,
210
+ }
211
+
212
+ def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]:
213
+ # Clickhouse allows VALUES to have an embedded structure e.g:
214
+ # VALUES('person String, place String', ('Noah', 'Paris'), ...)
215
+ # In this case, we don't want to qualify the columns
216
+ values = expression.expressions[0].expressions
217
+
218
+ structure = (
219
+ values[0]
220
+ if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple))
221
+ else None
222
+ )
223
+ if structure:
224
+ # Split each column definition into the column name e.g:
225
+ # 'person String, place String' -> ['person', 'place']
226
+ structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")]
227
+ column_aliases = [
228
+ exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs
229
+ ]
230
+ else:
231
+ # Default column aliases in CH are "c1", "c2", etc.
232
+ column_aliases = [
233
+ exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions))
234
+ ]
235
+
236
+ return column_aliases
237
+
238
+ class Tokenizer(tokens.Tokenizer):
239
+ COMMENTS = ["--", "#", "#!", ("/*", "*/")]
240
+ IDENTIFIERS = ['"', "`"]
241
+ IDENTIFIER_ESCAPES = ["\\"]
242
+ STRING_ESCAPES = ["'", "\\"]
243
+ BIT_STRINGS = [("0b", "")]
244
+ HEX_STRINGS = [("0x", ""), ("0X", "")]
245
+ HEREDOC_STRINGS = ["$"]
246
+
247
+ KEYWORDS = {
248
+ **tokens.Tokenizer.KEYWORDS,
249
+ ".:": TokenType.DOTCOLON,
250
+ "ATTACH": TokenType.COMMAND,
251
+ "DATE32": TokenType.DATE32,
252
+ "DATETIME64": TokenType.DATETIME64,
253
+ "DICTIONARY": TokenType.DICTIONARY,
254
+ "DYNAMIC": TokenType.DYNAMIC,
255
+ "ENUM8": TokenType.ENUM8,
256
+ "ENUM16": TokenType.ENUM16,
257
+ "EXCHANGE": TokenType.COMMAND,
258
+ "FINAL": TokenType.FINAL,
259
+ "FIXEDSTRING": TokenType.FIXEDSTRING,
260
+ "FLOAT32": TokenType.FLOAT,
261
+ "FLOAT64": TokenType.DOUBLE,
262
+ "GLOBAL": TokenType.GLOBAL,
263
+ "LOWCARDINALITY": TokenType.LOWCARDINALITY,
264
+ "MAP": TokenType.MAP,
265
+ "NESTED": TokenType.NESTED,
266
+ "NOTHING": TokenType.NOTHING,
267
+ "SAMPLE": TokenType.TABLE_SAMPLE,
268
+ "TUPLE": TokenType.STRUCT,
269
+ "UINT16": TokenType.USMALLINT,
270
+ "UINT32": TokenType.UINT,
271
+ "UINT64": TokenType.UBIGINT,
272
+ "UINT8": TokenType.UTINYINT,
273
+ "IPV4": TokenType.IPV4,
274
+ "IPV6": TokenType.IPV6,
275
+ "POINT": TokenType.POINT,
276
+ "RING": TokenType.RING,
277
+ "LINESTRING": TokenType.LINESTRING,
278
+ "MULTILINESTRING": TokenType.MULTILINESTRING,
279
+ "POLYGON": TokenType.POLYGON,
280
+ "MULTIPOLYGON": TokenType.MULTIPOLYGON,
281
+ "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION,
282
+ "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION,
283
+ "SYSTEM": TokenType.COMMAND,
284
+ "PREWHERE": TokenType.PREWHERE,
285
+ }
286
+ KEYWORDS.pop("/*+")
287
+
288
+ SINGLE_TOKENS = {
289
+ **tokens.Tokenizer.SINGLE_TOKENS,
290
+ "$": TokenType.HEREDOC_STRING,
291
+ }
292
+
293
+ class Parser(parser.Parser):
294
+ # Tested in ClickHouse's playground, it seems that the following two queries do the same thing
295
+ # * select x from t1 union all select x from t2 limit 1;
296
+ # * select x from t1 union all (select x from t2 limit 1);
297
+ MODIFIERS_ATTACHED_TO_SET_OP = False
298
+ INTERVAL_SPANS = False
299
+ OPTIONAL_ALIAS_TOKEN_CTE = False
300
+
301
+ FUNCTIONS = {
302
+ **parser.Parser.FUNCTIONS,
303
+ "ANY": exp.AnyValue.from_arg_list,
304
+ "ARRAYSUM": exp.ArraySum.from_arg_list,
305
+ "COUNTIF": _build_count_if,
306
+ "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
307
+ "DATEADD": build_date_delta(exp.DateAdd, default_unit=None),
308
+ "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
309
+ "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
310
+ "DATE_FORMAT": _build_date_format,
311
+ "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None),
312
+ "DATESUB": build_date_delta(exp.DateSub, default_unit=None),
313
+ "FORMATDATETIME": _build_date_format,
314
+ "JSONEXTRACTSTRING": build_json_extract_path(
315
+ exp.JSONExtractScalar, zero_based_indexing=False
316
+ ),
317
+ "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
318
+ "MAP": parser.build_var_map,
319
+ "MATCH": exp.RegexpLike.from_arg_list,
320
+ "RANDCANONICAL": exp.Rand.from_arg_list,
321
+ "STR_TO_DATE": _build_str_to_date,
322
+ "TUPLE": exp.Struct.from_arg_list,
323
+ "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None),
324
+ "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None),
325
+ "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None),
326
+ "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None),
327
+ "UNIQ": exp.ApproxDistinct.from_arg_list,
328
+ "XOR": lambda args: exp.Xor(expressions=args),
329
+ "MD5": exp.MD5Digest.from_arg_list,
330
+ "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
331
+ "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
332
+ "EDITDISTANCE": exp.Levenshtein.from_arg_list,
333
+ "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
334
+ }
335
+ FUNCTIONS.pop("TRANSFORM")
336
+
337
+ AGG_FUNCTIONS = {
338
+ "count",
339
+ "min",
340
+ "max",
341
+ "sum",
342
+ "avg",
343
+ "any",
344
+ "stddevPop",
345
+ "stddevSamp",
346
+ "varPop",
347
+ "varSamp",
348
+ "corr",
349
+ "covarPop",
350
+ "covarSamp",
351
+ "entropy",
352
+ "exponentialMovingAverage",
353
+ "intervalLengthSum",
354
+ "kolmogorovSmirnovTest",
355
+ "mannWhitneyUTest",
356
+ "median",
357
+ "rankCorr",
358
+ "sumKahan",
359
+ "studentTTest",
360
+ "welchTTest",
361
+ "anyHeavy",
362
+ "anyLast",
363
+ "boundingRatio",
364
+ "first_value",
365
+ "last_value",
366
+ "argMin",
367
+ "argMax",
368
+ "avgWeighted",
369
+ "topK",
370
+ "topKWeighted",
371
+ "deltaSum",
372
+ "deltaSumTimestamp",
373
+ "groupArray",
374
+ "groupArrayLast",
375
+ "groupUniqArray",
376
+ "groupArrayInsertAt",
377
+ "groupArrayMovingAvg",
378
+ "groupArrayMovingSum",
379
+ "groupArraySample",
380
+ "groupBitAnd",
381
+ "groupBitOr",
382
+ "groupBitXor",
383
+ "groupBitmap",
384
+ "groupBitmapAnd",
385
+ "groupBitmapOr",
386
+ "groupBitmapXor",
387
+ "sumWithOverflow",
388
+ "sumMap",
389
+ "minMap",
390
+ "maxMap",
391
+ "skewSamp",
392
+ "skewPop",
393
+ "kurtSamp",
394
+ "kurtPop",
395
+ "uniq",
396
+ "uniqExact",
397
+ "uniqCombined",
398
+ "uniqCombined64",
399
+ "uniqHLL12",
400
+ "uniqTheta",
401
+ "quantile",
402
+ "quantiles",
403
+ "quantileExact",
404
+ "quantilesExact",
405
+ "quantileExactLow",
406
+ "quantilesExactLow",
407
+ "quantileExactHigh",
408
+ "quantilesExactHigh",
409
+ "quantileExactWeighted",
410
+ "quantilesExactWeighted",
411
+ "quantileTiming",
412
+ "quantilesTiming",
413
+ "quantileTimingWeighted",
414
+ "quantilesTimingWeighted",
415
+ "quantileDeterministic",
416
+ "quantilesDeterministic",
417
+ "quantileTDigest",
418
+ "quantilesTDigest",
419
+ "quantileTDigestWeighted",
420
+ "quantilesTDigestWeighted",
421
+ "quantileBFloat16",
422
+ "quantilesBFloat16",
423
+ "quantileBFloat16Weighted",
424
+ "quantilesBFloat16Weighted",
425
+ "simpleLinearRegression",
426
+ "stochasticLinearRegression",
427
+ "stochasticLogisticRegression",
428
+ "categoricalInformationValue",
429
+ "contingency",
430
+ "cramersV",
431
+ "cramersVBiasCorrected",
432
+ "theilsU",
433
+ "maxIntersections",
434
+ "maxIntersectionsPosition",
435
+ "meanZTest",
436
+ "quantileInterpolatedWeighted",
437
+ "quantilesInterpolatedWeighted",
438
+ "quantileGK",
439
+ "quantilesGK",
440
+ "sparkBar",
441
+ "sumCount",
442
+ "largestTriangleThreeBuckets",
443
+ "histogram",
444
+ "sequenceMatch",
445
+ "sequenceCount",
446
+ "windowFunnel",
447
+ "retention",
448
+ "uniqUpTo",
449
+ "sequenceNextNode",
450
+ "exponentialTimeDecayedAvg",
451
+ }
452
+
453
+ AGG_FUNCTIONS_SUFFIXES = [
454
+ "If",
455
+ "Array",
456
+ "ArrayIf",
457
+ "Map",
458
+ "SimpleState",
459
+ "State",
460
+ "Merge",
461
+ "MergeState",
462
+ "ForEach",
463
+ "Distinct",
464
+ "OrDefault",
465
+ "OrNull",
466
+ "Resample",
467
+ "ArgMin",
468
+ "ArgMax",
469
+ ]
470
+
471
+ FUNC_TOKENS = {
472
+ *parser.Parser.FUNC_TOKENS,
473
+ TokenType.AND,
474
+ TokenType.OR,
475
+ TokenType.SET,
476
+ }
477
+
478
+ RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT}
479
+
480
+ ID_VAR_TOKENS = {
481
+ *parser.Parser.ID_VAR_TOKENS,
482
+ TokenType.LIKE,
483
+ }
484
+
485
+ AGG_FUNC_MAPPING = (
486
+ lambda functions, suffixes: {
487
+ f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions
488
+ }
489
+ )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES)
490
+
491
+ FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"}
492
+
493
+ FUNCTION_PARSERS = {
494
+ **parser.Parser.FUNCTION_PARSERS,
495
+ "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()),
496
+ "QUANTILE": lambda self: self._parse_quantile(),
497
+ "MEDIAN": lambda self: self._parse_quantile(),
498
+ "COLUMNS": lambda self: self._parse_columns(),
499
+ }
500
+
501
+ FUNCTION_PARSERS.pop("MATCH")
502
+
503
+ PROPERTY_PARSERS = {
504
+ **parser.Parser.PROPERTY_PARSERS,
505
+ "ENGINE": lambda self: self._parse_engine_property(),
506
+ }
507
+ PROPERTY_PARSERS.pop("DYNAMIC")
508
+
509
+ NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy()
510
+ NO_PAREN_FUNCTION_PARSERS.pop("ANY")
511
+
512
+ NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy()
513
+ NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP)
514
+
515
+ RANGE_PARSERS = {
516
+ **parser.Parser.RANGE_PARSERS,
517
+ TokenType.GLOBAL: lambda self, this: self._parse_global_in(this),
518
+ }
519
+
520
+ # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to
521
+ # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler.
522
+ COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy()
523
+ COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER)
524
+
525
+ JOIN_KINDS = {
526
+ *parser.Parser.JOIN_KINDS,
527
+ TokenType.ANY,
528
+ TokenType.ASOF,
529
+ TokenType.ARRAY,
530
+ }
531
+
532
+ TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {
533
+ TokenType.ANY,
534
+ TokenType.ARRAY,
535
+ TokenType.FINAL,
536
+ TokenType.FORMAT,
537
+ TokenType.SETTINGS,
538
+ }
539
+
540
+ ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {
541
+ TokenType.FORMAT,
542
+ }
543
+
544
+ LOG_DEFAULTS_TO_LN = True
545
+
546
+ QUERY_MODIFIER_PARSERS = {
547
+ **parser.Parser.QUERY_MODIFIER_PARSERS,
548
+ TokenType.SETTINGS: lambda self: (
549
+ "settings",
550
+ self._advance() or self._parse_csv(self._parse_assignment),
551
+ ),
552
+ TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()),
553
+ }
554
+
555
+ CONSTRAINT_PARSERS = {
556
+ **parser.Parser.CONSTRAINT_PARSERS,
557
+ "INDEX": lambda self: self._parse_index_constraint(),
558
+ "CODEC": lambda self: self._parse_compress(),
559
+ }
560
+
561
+ ALTER_PARSERS = {
562
+ **parser.Parser.ALTER_PARSERS,
563
+ "REPLACE": lambda self: self._parse_alter_table_replace(),
564
+ }
565
+
566
+ SCHEMA_UNNAMED_CONSTRAINTS = {
567
+ *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS,
568
+ "INDEX",
569
+ }
570
+
571
+ PLACEHOLDER_PARSERS = {
572
+ **parser.Parser.PLACEHOLDER_PARSERS,
573
+ TokenType.L_BRACE: lambda self: self._parse_query_parameter(),
574
+ }
575
+
576
+ def _parse_engine_property(self) -> exp.EngineProperty:
577
+ self._match(TokenType.EQ)
578
+ return self.expression(
579
+ exp.EngineProperty,
580
+ this=self._parse_field(any_token=True, anonymous_func=True),
581
+ )
582
+
583
+ # https://clickhouse.com/docs/en/sql-reference/statements/create/function
584
+ def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]:
585
+ return self._parse_lambda()
586
+
587
+ def _parse_types(
588
+ self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
589
+ ) -> t.Optional[exp.Expression]:
590
+ dtype = super()._parse_types(
591
+ check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
592
+ )
593
+ if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True:
594
+ # Mark every type as non-nullable which is ClickHouse's default, unless it's
595
+ # already marked as nullable. This marker helps us transpile types from other
596
+ # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))`
597
+ # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would
598
+ # fail in ClickHouse without the `Nullable` type constructor.
599
+ dtype.set("nullable", False)
600
+
601
+ return dtype
602
+
603
+ def _parse_extract(self) -> exp.Extract | exp.Anonymous:
604
+ index = self._index
605
+ this = self._parse_bitwise()
606
+ if self._match(TokenType.FROM):
607
+ self._retreat(index)
608
+ return super()._parse_extract()
609
+
610
+ # We return Anonymous here because extract and regexpExtract have different semantics,
611
+ # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g.,
612
+ # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`.
613
+ #
614
+ # TODO: can we somehow convert the former into an equivalent `regexpExtract` call?
615
+ self._match(TokenType.COMMA)
616
+ return self.expression(
617
+ exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()]
618
+ )
619
+
620
+ def _parse_assignment(self) -> t.Optional[exp.Expression]:
621
+ this = super()._parse_assignment()
622
+
623
+ if self._match(TokenType.PLACEHOLDER):
624
+ return self.expression(
625
+ exp.If,
626
+ this=this,
627
+ true=self._parse_assignment(),
628
+ false=self._match(TokenType.COLON) and self._parse_assignment(),
629
+ )
630
+
631
+ return this
632
+
633
+ def _parse_query_parameter(self) -> t.Optional[exp.Expression]:
634
+ """
635
+ Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier}
636
+ https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters
637
+ """
638
+ index = self._index
639
+
640
+ this = self._parse_id_var()
641
+ self._match(TokenType.COLON)
642
+ kind = self._parse_types(check_func=False, allow_identifiers=False) or (
643
+ self._match_text_seq("IDENTIFIER") and "Identifier"
644
+ )
645
+
646
+ if not kind:
647
+ self._retreat(index)
648
+ return None
649
+ elif not self._match(TokenType.R_BRACE):
650
+ self.raise_error("Expecting }")
651
+
652
+ if isinstance(this, exp.Identifier) and not this.quoted:
653
+ this = exp.var(this.name)
654
+
655
+ return self.expression(exp.Placeholder, this=this, kind=kind)
656
+
657
+ def _parse_bracket(
658
+ self, this: t.Optional[exp.Expression] = None
659
+ ) -> t.Optional[exp.Expression]:
660
+ l_brace = self._match(TokenType.L_BRACE, advance=False)
661
+ bracket = super()._parse_bracket(this)
662
+
663
+ if l_brace and isinstance(bracket, exp.Struct):
664
+ varmap = exp.VarMap(keys=exp.Array(), values=exp.Array())
665
+ for expression in bracket.expressions:
666
+ if not isinstance(expression, exp.PropertyEQ):
667
+ break
668
+
669
+ varmap.args["keys"].append("expressions", exp.Literal.string(expression.name))
670
+ varmap.args["values"].append("expressions", expression.expression)
671
+
672
+ return varmap
673
+
674
+ return bracket
675
+
676
+ def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In:
677
+ this = super()._parse_in(this)
678
+ this.set("is_global", is_global)
679
+ return this
680
+
681
+ def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In:
682
+ is_negated = self._match(TokenType.NOT)
683
+ this = self._match(TokenType.IN) and self._parse_in(this, is_global=True)
684
+ return self.expression(exp.Not, this=this) if is_negated else this
685
+
686
+ def _parse_table(
687
+ self,
688
+ schema: bool = False,
689
+ joins: bool = False,
690
+ alias_tokens: t.Optional[t.Collection[TokenType]] = None,
691
+ parse_bracket: bool = False,
692
+ is_db_reference: bool = False,
693
+ parse_partition: bool = False,
694
+ ) -> t.Optional[exp.Expression]:
695
+ this = super()._parse_table(
696
+ schema=schema,
697
+ joins=joins,
698
+ alias_tokens=alias_tokens,
699
+ parse_bracket=parse_bracket,
700
+ is_db_reference=is_db_reference,
701
+ )
702
+
703
+ if isinstance(this, exp.Table):
704
+ inner = this.this
705
+ alias = this.args.get("alias")
706
+
707
+ if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns:
708
+ alias.set("columns", [exp.to_identifier("generate_series")])
709
+
710
+ if self._match(TokenType.FINAL):
711
+ this = self.expression(exp.Final, this=this)
712
+
713
+ return this
714
+
715
+ def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition:
716
+ return super()._parse_position(haystack_first=True)
717
+
718
+ # https://clickhouse.com/docs/en/sql-reference/statements/select/with/
719
+ def _parse_cte(self) -> t.Optional[exp.CTE]:
720
+ # WITH <identifier> AS <subquery expression>
721
+ cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte)
722
+
723
+ if not cte:
724
+ # WITH <expression> AS <identifier>
725
+ cte = self.expression(
726
+ exp.CTE,
727
+ this=self._parse_assignment(),
728
+ alias=self._parse_table_alias(),
729
+ scalar=True,
730
+ )
731
+
732
+ return cte
733
+
734
+ def _parse_join_parts(
735
+ self,
736
+ ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
737
+ is_global = self._match(TokenType.GLOBAL) and self._prev
738
+ kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev
739
+
740
+ if kind_pre:
741
+ kind = self._match_set(self.JOIN_KINDS) and self._prev
742
+ side = self._match_set(self.JOIN_SIDES) and self._prev
743
+ return is_global, side, kind
744
+
745
+ return (
746
+ is_global,
747
+ self._match_set(self.JOIN_SIDES) and self._prev,
748
+ self._match_set(self.JOIN_KINDS) and self._prev,
749
+ )
750
+
751
+ def _parse_join(
752
+ self, skip_join_token: bool = False, parse_bracket: bool = False
753
+ ) -> t.Optional[exp.Join]:
754
+ join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True)
755
+ if join:
756
+ join.set("global", join.args.pop("method", None))
757
+
758
+ # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table`
759
+ # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join
760
+ if join.kind == "ARRAY":
761
+ for table in join.find_all(exp.Table):
762
+ table.replace(table.to_column())
763
+
764
+ return join
765
+
766
+ def _parse_function(
767
+ self,
768
+ functions: t.Optional[t.Dict[str, t.Callable]] = None,
769
+ anonymous: bool = False,
770
+ optional_parens: bool = True,
771
+ any_token: bool = False,
772
+ ) -> t.Optional[exp.Expression]:
773
+ expr = super()._parse_function(
774
+ functions=functions,
775
+ anonymous=anonymous,
776
+ optional_parens=optional_parens,
777
+ any_token=any_token,
778
+ )
779
+
780
+ func = expr.this if isinstance(expr, exp.Window) else expr
781
+
782
+ # Aggregate functions can be split in 2 parts: <func_name><suffix>
783
+ parts = (
784
+ self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None
785
+ )
786
+
787
+ if parts:
788
+ anon_func: exp.Anonymous = t.cast(exp.Anonymous, func)
789
+ params = self._parse_func_params(anon_func)
790
+
791
+ kwargs = {
792
+ "this": anon_func.this,
793
+ "expressions": anon_func.expressions,
794
+ }
795
+ if parts[1]:
796
+ exp_class: t.Type[exp.Expression] = (
797
+ exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc
798
+ )
799
+ else:
800
+ exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc
801
+
802
+ kwargs["exp_class"] = exp_class
803
+ if params:
804
+ kwargs["params"] = params
805
+
806
+ func = self.expression(**kwargs)
807
+
808
+ if isinstance(expr, exp.Window):
809
+ # The window's func was parsed as Anonymous in base parser, fix its
810
+ # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc
811
+ expr.set("this", func)
812
+ elif params:
813
+ # Params have blocked super()._parse_function() from parsing the following window
814
+ # (if that exists) as they're standing between the function call and the window spec
815
+ expr = self._parse_window(func)
816
+ else:
817
+ expr = func
818
+
819
+ return expr
820
+
821
+ def _parse_func_params(
822
+ self, this: t.Optional[exp.Func] = None
823
+ ) -> t.Optional[t.List[exp.Expression]]:
824
+ if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
825
+ return self._parse_csv(self._parse_lambda)
826
+
827
+ if self._match(TokenType.L_PAREN):
828
+ params = self._parse_csv(self._parse_lambda)
829
+ self._match_r_paren(this)
830
+ return params
831
+
832
+ return None
833
+
834
+ def _parse_quantile(self) -> exp.Quantile:
835
+ this = self._parse_lambda()
836
+ params = self._parse_func_params()
837
+ if params:
838
+ return self.expression(exp.Quantile, this=params[0], quantile=this)
839
+ return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5))
840
+
841
+ def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]:
842
+ return super()._parse_wrapped_id_vars(optional=True)
843
+
844
+ def _parse_primary_key(
845
+ self, wrapped_optional: bool = False, in_props: bool = False
846
+ ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
847
+ return super()._parse_primary_key(
848
+ wrapped_optional=wrapped_optional or in_props, in_props=in_props
849
+ )
850
+
851
+ def _parse_on_property(self) -> t.Optional[exp.Expression]:
852
+ index = self._index
853
+ if self._match_text_seq("CLUSTER"):
854
+ this = self._parse_string() or self._parse_id_var()
855
+ if this:
856
+ return self.expression(exp.OnCluster, this=this)
857
+ else:
858
+ self._retreat(index)
859
+ return None
860
+
861
+ def _parse_index_constraint(
862
+ self, kind: t.Optional[str] = None
863
+ ) -> exp.IndexColumnConstraint:
864
+ # INDEX name1 expr TYPE type1(args) GRANULARITY value
865
+ this = self._parse_id_var()
866
+ expression = self._parse_assignment()
867
+
868
+ index_type = self._match_text_seq("TYPE") and (
869
+ self._parse_function() or self._parse_var()
870
+ )
871
+
872
+ granularity = self._match_text_seq("GRANULARITY") and self._parse_term()
873
+
874
+ return self.expression(
875
+ exp.IndexColumnConstraint,
876
+ this=this,
877
+ expression=expression,
878
+ index_type=index_type,
879
+ granularity=granularity,
880
+ )
881
+
882
+ def _parse_partition(self) -> t.Optional[exp.Partition]:
883
+ # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression
884
+ if not self._match(TokenType.PARTITION):
885
+ return None
886
+
887
+ if self._match_text_seq("ID"):
888
+ # Corresponds to the PARTITION ID <string_value> syntax
889
+ expressions: t.List[exp.Expression] = [
890
+ self.expression(exp.PartitionId, this=self._parse_string())
891
+ ]
892
+ else:
893
+ expressions = self._parse_expressions()
894
+
895
+ return self.expression(exp.Partition, expressions=expressions)
896
+
897
+ def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]:
898
+ partition = self._parse_partition()
899
+
900
+ if not partition or not self._match(TokenType.FROM):
901
+ return None
902
+
903
+ return self.expression(
904
+ exp.ReplacePartition, expression=partition, source=self._parse_table_parts()
905
+ )
906
+
907
+ def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]:
908
+ if not self._match_text_seq("PROJECTION"):
909
+ return None
910
+
911
+ return self.expression(
912
+ exp.ProjectionDef,
913
+ this=self._parse_id_var(),
914
+ expression=self._parse_wrapped(self._parse_statement),
915
+ )
916
+
917
+ def _parse_constraint(self) -> t.Optional[exp.Expression]:
918
+ return super()._parse_constraint() or self._parse_projection_def()
919
+
920
+ def _parse_alias(
921
+ self, this: t.Optional[exp.Expression], explicit: bool = False
922
+ ) -> t.Optional[exp.Expression]:
923
+ # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier,
924
+ # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias
925
+ if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False):
926
+ return this
927
+
928
+ return super()._parse_alias(this=this, explicit=explicit)
929
+
930
+ def _parse_expression(self) -> t.Optional[exp.Expression]:
931
+ this = super()._parse_expression()
932
+
933
+ # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier
934
+ while self._match_pair(TokenType.APPLY, TokenType.L_PAREN):
935
+ this = exp.Apply(this=this, expression=self._parse_var(any_token=True))
936
+ self._match(TokenType.R_PAREN)
937
+
938
+ return this
939
+
940
+ def _parse_columns(self) -> exp.Expression:
941
+ this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda())
942
+
943
+ while self._next and self._match_text_seq(")", "APPLY", "("):
944
+ self._match(TokenType.R_PAREN)
945
+ this = exp.Apply(this=this, expression=self._parse_var(any_token=True))
946
+ return this
947
+
948
+ def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]:
949
+ value = super()._parse_value(values=values)
950
+ if not value:
951
+ return None
952
+
953
+ # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast
954
+ # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one.
955
+ # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics),
956
+ # but the final result is not altered by the extra parentheses.
957
+ # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression
958
+ expressions = value.expressions
959
+ if values and not isinstance(expressions[-1], exp.Tuple):
960
+ value.set(
961
+ "expressions",
962
+ [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions],
963
+ )
964
+
965
+ return value
966
+
967
+ class Generator(generator.Generator):
968
+ QUERY_HINTS = False
969
+ STRUCT_DELIMITER = ("(", ")")
970
+ NVL2_SUPPORTED = False
971
+ TABLESAMPLE_REQUIRES_PARENS = False
972
+ TABLESAMPLE_SIZE_IS_ROWS = False
973
+ TABLESAMPLE_KEYWORDS = "SAMPLE"
974
+ LAST_DAY_SUPPORTS_DATE_PART = False
975
+ CAN_IMPLEMENT_ARRAY_ANY = True
976
+ SUPPORTS_TO_NUMBER = False
977
+ JOIN_HINTS = False
978
+ TABLE_HINTS = False
979
+ GROUPINGS_SEP = ""
980
+ SET_OP_MODIFIERS = False
981
+ ARRAY_SIZE_NAME = "LENGTH"
982
+ WRAP_DERIVED_VALUES = False
983
+
984
+ STRING_TYPE_MAPPING = {
985
+ exp.DataType.Type.BLOB: "String",
986
+ exp.DataType.Type.CHAR: "String",
987
+ exp.DataType.Type.LONGBLOB: "String",
988
+ exp.DataType.Type.LONGTEXT: "String",
989
+ exp.DataType.Type.MEDIUMBLOB: "String",
990
+ exp.DataType.Type.MEDIUMTEXT: "String",
991
+ exp.DataType.Type.TINYBLOB: "String",
992
+ exp.DataType.Type.TINYTEXT: "String",
993
+ exp.DataType.Type.TEXT: "String",
994
+ exp.DataType.Type.VARBINARY: "String",
995
+ exp.DataType.Type.VARCHAR: "String",
996
+ }
997
+
998
+ SUPPORTED_JSON_PATH_PARTS = {
999
+ exp.JSONPathKey,
1000
+ exp.JSONPathRoot,
1001
+ exp.JSONPathSubscript,
1002
+ }
1003
+
1004
+ TYPE_MAPPING = {
1005
+ **generator.Generator.TYPE_MAPPING,
1006
+ **STRING_TYPE_MAPPING,
1007
+ exp.DataType.Type.ARRAY: "Array",
1008
+ exp.DataType.Type.BOOLEAN: "Bool",
1009
+ exp.DataType.Type.BIGINT: "Int64",
1010
+ exp.DataType.Type.DATE32: "Date32",
1011
+ exp.DataType.Type.DATETIME: "DateTime",
1012
+ exp.DataType.Type.DATETIME2: "DateTime",
1013
+ exp.DataType.Type.SMALLDATETIME: "DateTime",
1014
+ exp.DataType.Type.DATETIME64: "DateTime64",
1015
+ exp.DataType.Type.DECIMAL: "Decimal",
1016
+ exp.DataType.Type.DECIMAL32: "Decimal32",
1017
+ exp.DataType.Type.DECIMAL64: "Decimal64",
1018
+ exp.DataType.Type.DECIMAL128: "Decimal128",
1019
+ exp.DataType.Type.DECIMAL256: "Decimal256",
1020
+ exp.DataType.Type.TIMESTAMP: "DateTime",
1021
+ exp.DataType.Type.TIMESTAMPNTZ: "DateTime",
1022
+ exp.DataType.Type.TIMESTAMPTZ: "DateTime",
1023
+ exp.DataType.Type.DOUBLE: "Float64",
1024
+ exp.DataType.Type.ENUM: "Enum",
1025
+ exp.DataType.Type.ENUM8: "Enum8",
1026
+ exp.DataType.Type.ENUM16: "Enum16",
1027
+ exp.DataType.Type.FIXEDSTRING: "FixedString",
1028
+ exp.DataType.Type.FLOAT: "Float32",
1029
+ exp.DataType.Type.INT: "Int32",
1030
+ exp.DataType.Type.MEDIUMINT: "Int32",
1031
+ exp.DataType.Type.INT128: "Int128",
1032
+ exp.DataType.Type.INT256: "Int256",
1033
+ exp.DataType.Type.LOWCARDINALITY: "LowCardinality",
1034
+ exp.DataType.Type.MAP: "Map",
1035
+ exp.DataType.Type.NESTED: "Nested",
1036
+ exp.DataType.Type.NOTHING: "Nothing",
1037
+ exp.DataType.Type.SMALLINT: "Int16",
1038
+ exp.DataType.Type.STRUCT: "Tuple",
1039
+ exp.DataType.Type.TINYINT: "Int8",
1040
+ exp.DataType.Type.UBIGINT: "UInt64",
1041
+ exp.DataType.Type.UINT: "UInt32",
1042
+ exp.DataType.Type.UINT128: "UInt128",
1043
+ exp.DataType.Type.UINT256: "UInt256",
1044
+ exp.DataType.Type.USMALLINT: "UInt16",
1045
+ exp.DataType.Type.UTINYINT: "UInt8",
1046
+ exp.DataType.Type.IPV4: "IPv4",
1047
+ exp.DataType.Type.IPV6: "IPv6",
1048
+ exp.DataType.Type.POINT: "Point",
1049
+ exp.DataType.Type.RING: "Ring",
1050
+ exp.DataType.Type.LINESTRING: "LineString",
1051
+ exp.DataType.Type.MULTILINESTRING: "MultiLineString",
1052
+ exp.DataType.Type.POLYGON: "Polygon",
1053
+ exp.DataType.Type.MULTIPOLYGON: "MultiPolygon",
1054
+ exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction",
1055
+ exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction",
1056
+ exp.DataType.Type.DYNAMIC: "Dynamic",
1057
+ }
1058
+
1059
+ TRANSFORMS = {
1060
+ **generator.Generator.TRANSFORMS,
1061
+ exp.AnyValue: rename_func("any"),
1062
+ exp.ApproxDistinct: rename_func("uniq"),
1063
+ exp.ArrayConcat: rename_func("arrayConcat"),
1064
+ exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
1065
+ exp.ArrayRemove: remove_from_array_using_filter,
1066
+ exp.ArraySum: rename_func("arraySum"),
1067
+ exp.ArgMax: arg_max_or_min_no_count("argMax"),
1068
+ exp.ArgMin: arg_max_or_min_no_count("argMin"),
1069
+ exp.Array: inline_array_sql,
1070
+ exp.CastToStrType: rename_func("CAST"),
1071
+ exp.CountIf: rename_func("countIf"),
1072
+ exp.CompressColumnConstraint: lambda self,
1073
+ e: f"CODEC({self.expressions(e, key='this', flat=True)})",
1074
+ exp.ComputedColumnConstraint: lambda self,
1075
+ e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}",
1076
+ exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"),
1077
+ exp.DateAdd: _datetime_delta_sql("DATE_ADD"),
1078
+ exp.DateDiff: _datetime_delta_sql("DATE_DIFF"),
1079
+ exp.DateStrToDate: rename_func("toDate"),
1080
+ exp.DateSub: _datetime_delta_sql("DATE_SUB"),
1081
+ exp.Explode: rename_func("arrayJoin"),
1082
+ exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
1083
+ exp.IsNan: rename_func("isNaN"),
1084
+ exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",
1085
+ exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False),
1086
+ exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False),
1087
+ exp.JSONPathKey: json_path_key_only_name,
1088
+ exp.JSONPathRoot: lambda *_: "",
1089
+ exp.Length: length_or_char_length_sql,
1090
+ exp.Map: _map_sql,
1091
+ exp.Median: rename_func("median"),
1092
+ exp.Nullif: rename_func("nullIf"),
1093
+ exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
1094
+ exp.Pivot: no_pivot_sql,
1095
+ exp.Quantile: _quantile_sql,
1096
+ exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression),
1097
+ exp.Rand: rename_func("randCanonical"),
1098
+ exp.StartsWith: rename_func("startsWith"),
1099
+ exp.EndsWith: rename_func("endsWith"),
1100
+ exp.StrPosition: lambda self, e: strposition_sql(
1101
+ self,
1102
+ e,
1103
+ func_name="POSITION",
1104
+ supports_position=True,
1105
+ use_ansi_position=False,
1106
+ ),
1107
+ exp.TimeToStr: lambda self, e: self.func(
1108
+ "formatDateTime", e.this, self.format_time(e), e.args.get("zone")
1109
+ ),
1110
+ exp.TimeStrToTime: _timestrtotime_sql,
1111
+ exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"),
1112
+ exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"),
1113
+ exp.VarMap: _map_sql,
1114
+ exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions),
1115
+ exp.MD5Digest: rename_func("MD5"),
1116
+ exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))),
1117
+ exp.SHA: rename_func("SHA1"),
1118
+ exp.SHA2: sha256_sql,
1119
+ exp.UnixToTime: _unix_to_time_sql,
1120
+ exp.TimestampTrunc: timestamptrunc_sql(zone=True),
1121
+ exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"),
1122
+ exp.Variance: rename_func("varSamp"),
1123
+ exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
1124
+ exp.Stddev: rename_func("stddevSamp"),
1125
+ exp.Chr: rename_func("CHAR"),
1126
+ exp.Lag: lambda self, e: self.func(
1127
+ "lagInFrame", e.this, e.args.get("offset"), e.args.get("default")
1128
+ ),
1129
+ exp.Lead: lambda self, e: self.func(
1130
+ "leadInFrame", e.this, e.args.get("offset"), e.args.get("default")
1131
+ ),
1132
+ exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
1133
+ rename_func("editDistance")
1134
+ ),
1135
+ }
1136
+
1137
+ PROPERTIES_LOCATION = {
1138
+ **generator.Generator.PROPERTIES_LOCATION,
1139
+ exp.OnCluster: exp.Properties.Location.POST_NAME,
1140
+ exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
1141
+ exp.ToTableProperty: exp.Properties.Location.POST_NAME,
1142
+ exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
1143
+ }
1144
+
1145
+ # There's no list in docs, but it can be found in Clickhouse code
1146
+ # see `ClickHouse/src/Parsers/ParserCreate*.cpp`
1147
+ ON_CLUSTER_TARGETS = {
1148
+ "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set
1149
+ "DATABASE",
1150
+ "TABLE",
1151
+ "VIEW",
1152
+ "DICTIONARY",
1153
+ "INDEX",
1154
+ "FUNCTION",
1155
+ "NAMED COLLECTION",
1156
+ }
1157
+
1158
+ # https://clickhouse.com/docs/en/sql-reference/data-types/nullable
1159
+ NON_NULLABLE_TYPES = {
1160
+ exp.DataType.Type.ARRAY,
1161
+ exp.DataType.Type.MAP,
1162
+ exp.DataType.Type.STRUCT,
1163
+ exp.DataType.Type.POINT,
1164
+ exp.DataType.Type.RING,
1165
+ exp.DataType.Type.LINESTRING,
1166
+ exp.DataType.Type.MULTILINESTRING,
1167
+ exp.DataType.Type.POLYGON,
1168
+ exp.DataType.Type.MULTIPOLYGON,
1169
+ }
1170
+
1171
+ def strtodate_sql(self, expression: exp.StrToDate) -> str:
1172
+ strtodate_sql = self.function_fallback_sql(expression)
1173
+
1174
+ if not isinstance(expression.parent, exp.Cast):
1175
+ # StrToDate returns DATEs in other dialects (eg. postgres), so
1176
+ # this branch aims to improve the transpilation to clickhouse
1177
+ return self.cast_sql(exp.cast(expression, "DATE"))
1178
+
1179
+ return strtodate_sql
1180
+
1181
+ def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
1182
+ this = expression.this
1183
+
1184
+ if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"):
1185
+ return self.sql(this)
1186
+
1187
+ return super().cast_sql(expression, safe_prefix=safe_prefix)
1188
+
1189
+ def trycast_sql(self, expression: exp.TryCast) -> str:
1190
+ dtype = expression.to
1191
+ if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True):
1192
+ # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T)
1193
+ dtype.set("nullable", True)
1194
+
1195
+ return super().cast_sql(expression)
1196
+
1197
+ def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str:
1198
+ this = self.json_path_part(expression.this)
1199
+ return str(int(this) + 1) if is_int(this) else this
1200
+
1201
+ def likeproperty_sql(self, expression: exp.LikeProperty) -> str:
1202
+ return f"AS {self.sql(expression, 'this')}"
1203
+
1204
+ def _any_to_has(
1205
+ self,
1206
+ expression: exp.EQ | exp.NEQ,
1207
+ default: t.Callable[[t.Any], str],
1208
+ prefix: str = "",
1209
+ ) -> str:
1210
+ if isinstance(expression.left, exp.Any):
1211
+ arr = expression.left
1212
+ this = expression.right
1213
+ elif isinstance(expression.right, exp.Any):
1214
+ arr = expression.right
1215
+ this = expression.left
1216
+ else:
1217
+ return default(expression)
1218
+
1219
+ return prefix + self.func("has", arr.this.unnest(), this)
1220
+
1221
+ def eq_sql(self, expression: exp.EQ) -> str:
1222
+ return self._any_to_has(expression, super().eq_sql)
1223
+
1224
+ def neq_sql(self, expression: exp.NEQ) -> str:
1225
+ return self._any_to_has(expression, super().neq_sql, "NOT ")
1226
+
1227
+ def regexpilike_sql(self, expression: exp.RegexpILike) -> str:
1228
+ # Manually add a flag to make the search case-insensitive
1229
+ regex = self.func("CONCAT", "'(?i)'", expression.expression)
1230
+ return self.func("match", expression.this, regex)
1231
+
1232
+ def datatype_sql(self, expression: exp.DataType) -> str:
1233
+ # String is the standard ClickHouse type, every other variant is just an alias.
1234
+ # Additionally, any supplied length parameter will be ignored.
1235
+ #
1236
+ # https://clickhouse.com/docs/en/sql-reference/data-types/string
1237
+ if expression.this in self.STRING_TYPE_MAPPING:
1238
+ dtype = "String"
1239
+ else:
1240
+ dtype = super().datatype_sql(expression)
1241
+
1242
+ # This section changes the type to `Nullable(...)` if the following conditions hold:
1243
+ # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable`
1244
+ # and change their semantics
1245
+ # - It's not the key type of a `Map`. This is because ClickHouse enforces the following
1246
+ # constraint: "Type of Map key must be a type, that can be represented by integer or
1247
+ # String or FixedString (possibly LowCardinality) or UUID or IPv6"
1248
+ # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type
1249
+ parent = expression.parent
1250
+ nullable = expression.args.get("nullable")
1251
+ if nullable is True or (
1252
+ nullable is None
1253
+ and not (
1254
+ isinstance(parent, exp.DataType)
1255
+ and parent.is_type(exp.DataType.Type.MAP, check_nullable=True)
1256
+ and expression.index in (None, 0)
1257
+ )
1258
+ and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True)
1259
+ ):
1260
+ dtype = f"Nullable({dtype})"
1261
+
1262
+ return dtype
1263
+
1264
+ def cte_sql(self, expression: exp.CTE) -> str:
1265
+ if expression.args.get("scalar"):
1266
+ this = self.sql(expression, "this")
1267
+ alias = self.sql(expression, "alias")
1268
+ return f"{this} AS {alias}"
1269
+
1270
+ return super().cte_sql(expression)
1271
+
1272
+ def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]:
1273
+ return super().after_limit_modifiers(expression) + [
1274
+ (
1275
+ self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True)
1276
+ if expression.args.get("settings")
1277
+ else ""
1278
+ ),
1279
+ (
1280
+ self.seg("FORMAT ") + self.sql(expression, "format")
1281
+ if expression.args.get("format")
1282
+ else ""
1283
+ ),
1284
+ ]
1285
+
1286
+ def placeholder_sql(self, expression: exp.Placeholder) -> str:
1287
+ return f"{{{expression.name}: {self.sql(expression, 'kind')}}}"
1288
+
1289
+ def oncluster_sql(self, expression: exp.OnCluster) -> str:
1290
+ return f"ON CLUSTER {self.sql(expression, 'this')}"
1291
+
1292
+ def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str:
1293
+ if expression.kind in self.ON_CLUSTER_TARGETS and locations.get(
1294
+ exp.Properties.Location.POST_NAME
1295
+ ):
1296
+ this_name = self.sql(
1297
+ expression.this if isinstance(expression.this, exp.Schema) else expression,
1298
+ "this",
1299
+ )
1300
+ this_properties = " ".join(
1301
+ [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]]
1302
+ )
1303
+ this_schema = self.schema_columns_sql(expression.this)
1304
+ this_schema = f"{self.sep()}{this_schema}" if this_schema else ""
1305
+
1306
+ return f"{this_name}{self.sep()}{this_properties}{this_schema}"
1307
+
1308
+ return super().createable_sql(expression, locations)
1309
+
1310
+ def create_sql(self, expression: exp.Create) -> str:
1311
+ # The comment property comes last in CTAS statements, i.e. after the query
1312
+ query = expression.expression
1313
+ if isinstance(query, exp.Query):
1314
+ comment_prop = expression.find(exp.SchemaCommentProperty)
1315
+ if comment_prop:
1316
+ comment_prop.pop()
1317
+ query.replace(exp.paren(query))
1318
+ else:
1319
+ comment_prop = None
1320
+
1321
+ create_sql = super().create_sql(expression)
1322
+
1323
+ comment_sql = self.sql(comment_prop)
1324
+ comment_sql = f" {comment_sql}" if comment_sql else ""
1325
+
1326
+ return f"{create_sql}{comment_sql}"
1327
+
1328
+ def prewhere_sql(self, expression: exp.PreWhere) -> str:
1329
+ this = self.indent(self.sql(expression, "this"))
1330
+ return f"{self.seg('PREWHERE')}{self.sep()}{this}"
1331
+
1332
+ def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str:
1333
+ this = self.sql(expression, "this")
1334
+ this = f" {this}" if this else ""
1335
+ expr = self.sql(expression, "expression")
1336
+ expr = f" {expr}" if expr else ""
1337
+ index_type = self.sql(expression, "index_type")
1338
+ index_type = f" TYPE {index_type}" if index_type else ""
1339
+ granularity = self.sql(expression, "granularity")
1340
+ granularity = f" GRANULARITY {granularity}" if granularity else ""
1341
+
1342
+ return f"INDEX{this}{expr}{index_type}{granularity}"
1343
+
1344
+ def partition_sql(self, expression: exp.Partition) -> str:
1345
+ return f"PARTITION {self.expressions(expression, flat=True)}"
1346
+
1347
+ def partitionid_sql(self, expression: exp.PartitionId) -> str:
1348
+ return f"ID {self.sql(expression.this)}"
1349
+
1350
+ def replacepartition_sql(self, expression: exp.ReplacePartition) -> str:
1351
+ return (
1352
+ f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}"
1353
+ )
1354
+
1355
+ def projectiondef_sql(self, expression: exp.ProjectionDef) -> str:
1356
+ return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
1357
+
1358
+ def is_sql(self, expression: exp.Is) -> str:
1359
+ is_sql = super().is_sql(expression)
1360
+
1361
+ if isinstance(expression.parent, exp.Not):
1362
+ # value IS NOT NULL -> NOT (value IS NULL)
1363
+ is_sql = self.wrap(is_sql)
1364
+
1365
+ return is_sql
1366
+
1367
+ def in_sql(self, expression: exp.In) -> str:
1368
+ in_sql = super().in_sql(expression)
1369
+
1370
+ if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"):
1371
+ in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1)
1372
+
1373
+ return in_sql
1374
+
1375
+ def not_sql(self, expression: exp.Not) -> str:
1376
+ if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"):
1377
+ # let `GLOBAL IN` child interpose `NOT`
1378
+ return self.sql(expression, "this")
1379
+
1380
+ return super().not_sql(expression)
1381
+
1382
+ def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str:
1383
+ # If the VALUES clause contains tuples of expressions, we need to treat it
1384
+ # as a table since Clickhouse will automatically alias it as such.
1385
+ alias = expression.args.get("alias")
1386
+
1387
+ if alias and alias.args.get("columns") and expression.expressions:
1388
+ values = expression.expressions[0].expressions
1389
+ values_as_table = any(isinstance(value, exp.Tuple) for value in values)
1390
+ else:
1391
+ values_as_table = True
1392
+
1393
+ return super().values_sql(expression, values_as_table=values_as_table)