altimate-code 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +1 -5
  3. package/bin/altimate +6 -0
  4. package/bin/altimate-code +6 -0
  5. package/dbt-tools/bin/altimate-dbt +2 -0
  6. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
  7. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
  8. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
  9. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
  10. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
  11. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
  12. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
  13. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
  14. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
  15. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
  16. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
  17. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
  18. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
  19. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
  20. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
  21. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
  22. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
  23. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
  24. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
  25. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
  26. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
  27. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
  28. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
  29. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
  30. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
  31. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
  32. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
  33. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
  34. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
  35. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
  36. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
  37. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
  38. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
  39. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
  40. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
  41. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
  42. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
  43. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
  44. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
  45. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
  46. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
  47. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
  48. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
  49. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
  50. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
  51. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
  52. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
  53. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
  54. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
  55. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
  56. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
  57. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
  58. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
  59. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
  60. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
  61. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
  62. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
  63. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
  64. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
  65. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
  66. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
  67. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
  68. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
  69. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
  70. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
  71. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
  72. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
  73. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
  74. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
  75. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
  76. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
  77. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
  78. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
  79. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
  80. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
  81. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
  82. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
  83. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
  84. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
  85. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
  86. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
  87. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
  88. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
  89. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
  90. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
  91. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
  92. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
  93. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
  94. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
  95. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
  96. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
  97. package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
  98. package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
  99. package/dbt-tools/dist/index.js +23859 -0
  100. package/package.json +13 -13
  101. package/postinstall.mjs +42 -0
  102. package/skills/altimate-setup/SKILL.md +31 -0
@@ -0,0 +1,1159 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ from sqlglot import exp, generator, parser, tokens, transforms
6
+ from sqlglot.expressions import DATA_TYPE
7
+ from sqlglot.dialects.dialect import (
8
+ Dialect,
9
+ JSON_EXTRACT_TYPE,
10
+ NormalizationStrategy,
11
+ Version,
12
+ approx_count_distinct_sql,
13
+ arrow_json_extract_sql,
14
+ binary_from_function,
15
+ bool_xor_sql,
16
+ build_default_decimal_type,
17
+ count_if_to_sum,
18
+ date_trunc_to_time,
19
+ datestrtodate_sql,
20
+ no_datetime_sql,
21
+ encode_decode_sql,
22
+ build_formatted_time,
23
+ inline_array_unless_query,
24
+ no_comment_column_constraint_sql,
25
+ no_time_sql,
26
+ no_timestamp_sql,
27
+ pivot_column_names,
28
+ rename_func,
29
+ remove_from_array_using_filter,
30
+ strposition_sql,
31
+ str_to_time_sql,
32
+ timestamptrunc_sql,
33
+ timestrtotime_sql,
34
+ unit_to_var,
35
+ unit_to_str,
36
+ sha256_sql,
37
+ build_regexp_extract,
38
+ explode_to_unnest_sql,
39
+ no_make_interval_sql,
40
+ groupconcat_sql,
41
+ )
42
+ from sqlglot.generator import unsupported_args
43
+ from sqlglot.helper import seq_get
44
+ from sqlglot.tokens import TokenType
45
+ from sqlglot.parser import binary_range_parser
46
+
47
+ DATETIME_DELTA = t.Union[
48
+ exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub
49
+ ]
50
+
51
+
52
+ def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str:
53
+ this = expression.this
54
+ unit = unit_to_var(expression)
55
+ op = (
56
+ "+"
57
+ if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd))
58
+ else "-"
59
+ )
60
+
61
+ to_type: t.Optional[DATA_TYPE] = None
62
+ if isinstance(expression, exp.TsOrDsAdd):
63
+ to_type = expression.return_type
64
+ elif this.is_string:
65
+ # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work
66
+ to_type = (
67
+ exp.DataType.Type.DATETIME
68
+ if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub))
69
+ else exp.DataType.Type.DATE
70
+ )
71
+
72
+ this = exp.cast(this, to_type) if to_type else this
73
+
74
+ expr = expression.expression
75
+ interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit)
76
+
77
+ return f"{self.sql(this)} {op} {self.sql(interval)}"
78
+
79
+
80
+ # BigQuery -> DuckDB conversion for the DATE function
81
+ def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
82
+ result = f"CAST({self.sql(expression, 'this')} AS DATE)"
83
+ zone = self.sql(expression, "zone")
84
+
85
+ if zone:
86
+ date_str = self.func("STRFTIME", result, "'%d/%m/%Y'")
87
+ date_str = f"{date_str} || ' ' || {zone}"
88
+
89
+ # This will create a TIMESTAMP with time zone information
90
+ result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'")
91
+
92
+ return result
93
+
94
+
95
+ # BigQuery -> DuckDB conversion for the TIME_DIFF function
96
+ def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
97
+ this = exp.cast(expression.this, exp.DataType.Type.TIME)
98
+ expr = exp.cast(expression.expression, exp.DataType.Type.TIME)
99
+
100
+ # Although the 2 dialects share similar signatures, BQ seems to inverse
101
+ # the sign of the result so the start/end time operands are flipped
102
+ return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
103
+
104
+
105
+ @unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator."))
106
+ def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str:
107
+ return self.func("ARRAY_SORT", expression.this)
108
+
109
+
110
+ def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str:
111
+ name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT"
112
+ return self.func(name, expression.this)
113
+
114
+
115
+ def _build_sort_array_desc(args: t.List) -> exp.Expression:
116
+ return exp.SortArray(this=seq_get(args, 0), asc=exp.false())
117
+
118
+
119
+ def _build_date_diff(args: t.List) -> exp.Expression:
120
+ return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0))
121
+
122
+
123
+ def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]:
124
+ def _builder(args: t.List) -> exp.GenerateSeries:
125
+ # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions
126
+ if len(args) == 1:
127
+ # DuckDB uses 0 as a default for the series' start when it's omitted
128
+ args.insert(0, exp.Literal.number("0"))
129
+
130
+ gen_series = exp.GenerateSeries.from_arg_list(args)
131
+ gen_series.set("is_end_exclusive", end_exclusive)
132
+
133
+ return gen_series
134
+
135
+ return _builder
136
+
137
+
138
+ def _build_make_timestamp(args: t.List) -> exp.Expression:
139
+ if len(args) == 1:
140
+ return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS)
141
+
142
+ return exp.TimestampFromParts(
143
+ year=seq_get(args, 0),
144
+ month=seq_get(args, 1),
145
+ day=seq_get(args, 2),
146
+ hour=seq_get(args, 3),
147
+ min=seq_get(args, 4),
148
+ sec=seq_get(args, 5),
149
+ )
150
+
151
+
152
+ def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]:
153
+ def _parse(self: DuckDB.Parser) -> exp.Show:
154
+ return self._parse_show_duckdb(*args, **kwargs)
155
+
156
+ return _parse
157
+
158
+
159
+ def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str:
160
+ args: t.List[str] = []
161
+
162
+ # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is
163
+ # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB
164
+ # The transformation to ROW will take place if:
165
+ # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would
166
+ # 2. A cast to STRUCT / ARRAY of STRUCTs is found
167
+ ancestor_cast = expression.find_ancestor(exp.Cast)
168
+ is_bq_inline_struct = (
169
+ (expression.find(exp.PropertyEQ) is None)
170
+ and ancestor_cast
171
+ and any(
172
+ casted_type.is_type(exp.DataType.Type.STRUCT)
173
+ for casted_type in ancestor_cast.find_all(exp.DataType)
174
+ )
175
+ )
176
+
177
+ for i, expr in enumerate(expression.expressions):
178
+ is_property_eq = isinstance(expr, exp.PropertyEQ)
179
+ value = expr.expression if is_property_eq else expr
180
+
181
+ if is_bq_inline_struct:
182
+ args.append(self.sql(value))
183
+ else:
184
+ key = expr.name if is_property_eq else f"_{i}"
185
+ args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}")
186
+
187
+ csv_args = ", ".join(args)
188
+
189
+ return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}"
190
+
191
+
192
+ def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str:
193
+ if expression.is_type("array"):
194
+ return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]"
195
+
196
+ # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE
197
+ if expression.is_type(
198
+ exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ
199
+ ):
200
+ return expression.this.value
201
+
202
+ return self.datatype_sql(expression)
203
+
204
+
205
+ def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str:
206
+ sql = self.func("TO_JSON", expression.this, expression.args.get("options"))
207
+ return f"CAST({sql} AS TEXT)"
208
+
209
+
210
+ def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str:
211
+ scale = expression.args.get("scale")
212
+ timestamp = expression.this
213
+
214
+ if scale in (None, exp.UnixToTime.SECONDS):
215
+ return self.func("TO_TIMESTAMP", timestamp)
216
+ if scale == exp.UnixToTime.MILLIS:
217
+ return self.func("EPOCH_MS", timestamp)
218
+ if scale == exp.UnixToTime.MICROS:
219
+ return self.func("MAKE_TIMESTAMP", timestamp)
220
+
221
+ return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)))
222
+
223
+
224
+ WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In)
225
+
226
+
227
+ def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str:
228
+ arrow_sql = arrow_json_extract_sql(self, expression)
229
+ if not expression.same_parent and isinstance(
230
+ expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS
231
+ ):
232
+ arrow_sql = self.wrap(arrow_sql)
233
+ return arrow_sql
234
+
235
+
236
+ def _implicit_datetime_cast(
237
+ arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE
238
+ ) -> t.Optional[exp.Expression]:
239
+ return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg
240
+
241
+
242
+ def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
243
+ this = _implicit_datetime_cast(expression.this)
244
+ expr = _implicit_datetime_cast(expression.expression)
245
+
246
+ return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
247
+
248
+
249
+ def _generate_datetime_array_sql(
250
+ self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray]
251
+ ) -> str:
252
+ is_generate_date_array = isinstance(expression, exp.GenerateDateArray)
253
+
254
+ type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP
255
+ start = _implicit_datetime_cast(expression.args.get("start"), type=type)
256
+ end = _implicit_datetime_cast(expression.args.get("end"), type=type)
257
+
258
+ # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES
259
+ gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries(
260
+ start=start, end=end, step=expression.args.get("step")
261
+ )
262
+
263
+ if is_generate_date_array:
264
+ # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for
265
+ # GENERATE_DATE_ARRAY we must cast it back to DATE array
266
+ gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>"))
267
+
268
+ return self.sql(gen_series)
269
+
270
+
271
+ def _json_extract_value_array_sql(
272
+ self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray
273
+ ) -> str:
274
+ json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression)
275
+ data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>"
276
+ return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type)))
277
+
278
+
279
+ class DuckDB(Dialect):
280
+ NULL_ORDERING = "nulls_are_last"
281
+ SUPPORTS_USER_DEFINED_TYPES = True
282
+ SAFE_DIVISION = True
283
+ INDEX_OFFSET = 1
284
+ CONCAT_COALESCE = True
285
+ SUPPORTS_ORDER_BY_ALL = True
286
+ SUPPORTS_FIXED_SIZE_ARRAYS = True
287
+ STRICT_JSON_PATH_SYNTAX = False
288
+ NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True
289
+
290
+ # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table
291
+ NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
292
+
293
+ def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
294
+ if isinstance(path, exp.Literal):
295
+ # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
296
+ # Additionally, it allows accessing the back of lists using the `[#-i]` syntax.
297
+ # This check ensures we'll avoid trying to parse these as JSON paths, which can
298
+ # either result in a noisy warning or in an invalid representation of the path.
299
+ path_text = path.name
300
+ if path_text.startswith("/") or "[#" in path_text:
301
+ return path
302
+
303
+ return super().to_json_path(path)
304
+
305
+ class Tokenizer(tokens.Tokenizer):
306
+ BYTE_STRINGS = [("e'", "'"), ("E'", "'")]
307
+ HEREDOC_STRINGS = ["$"]
308
+
309
+ HEREDOC_TAG_IS_IDENTIFIER = True
310
+ HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER
311
+
312
+ KEYWORDS = {
313
+ **tokens.Tokenizer.KEYWORDS,
314
+ "//": TokenType.DIV,
315
+ "**": TokenType.DSTAR,
316
+ "^@": TokenType.CARET_AT,
317
+ "@>": TokenType.AT_GT,
318
+ "<@": TokenType.LT_AT,
319
+ "ATTACH": TokenType.ATTACH,
320
+ "BINARY": TokenType.VARBINARY,
321
+ "BITSTRING": TokenType.BIT,
322
+ "BPCHAR": TokenType.TEXT,
323
+ "CHAR": TokenType.TEXT,
324
+ "DATETIME": TokenType.TIMESTAMPNTZ,
325
+ "DETACH": TokenType.DETACH,
326
+ "EXCLUDE": TokenType.EXCEPT,
327
+ "LOGICAL": TokenType.BOOLEAN,
328
+ "ONLY": TokenType.ONLY,
329
+ "PIVOT_WIDER": TokenType.PIVOT,
330
+ "POSITIONAL": TokenType.POSITIONAL,
331
+ "SIGNED": TokenType.INT,
332
+ "STRING": TokenType.TEXT,
333
+ "SUMMARIZE": TokenType.SUMMARIZE,
334
+ "TIMESTAMP": TokenType.TIMESTAMPNTZ,
335
+ "TIMESTAMP_S": TokenType.TIMESTAMP_S,
336
+ "TIMESTAMP_MS": TokenType.TIMESTAMP_MS,
337
+ "TIMESTAMP_NS": TokenType.TIMESTAMP_NS,
338
+ "TIMESTAMP_US": TokenType.TIMESTAMP,
339
+ "UBIGINT": TokenType.UBIGINT,
340
+ "UINTEGER": TokenType.UINT,
341
+ "USMALLINT": TokenType.USMALLINT,
342
+ "UTINYINT": TokenType.UTINYINT,
343
+ "VARCHAR": TokenType.TEXT,
344
+ }
345
+ KEYWORDS.pop("/*+")
346
+
347
+ SINGLE_TOKENS = {
348
+ **tokens.Tokenizer.SINGLE_TOKENS,
349
+ "$": TokenType.PARAMETER,
350
+ }
351
+
352
+ COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
353
+
354
+ class Parser(parser.Parser):
355
+ BITWISE = {
356
+ **parser.Parser.BITWISE,
357
+ TokenType.TILDA: exp.RegexpLike,
358
+ }
359
+ BITWISE.pop(TokenType.CARET)
360
+
361
+ RANGE_PARSERS = {
362
+ **parser.Parser.RANGE_PARSERS,
363
+ TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps),
364
+ TokenType.CARET_AT: binary_range_parser(exp.StartsWith),
365
+ }
366
+
367
+ EXPONENT = {
368
+ **parser.Parser.EXPONENT,
369
+ TokenType.CARET: exp.Pow,
370
+ TokenType.DSTAR: exp.Pow,
371
+ }
372
+
373
+ FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"}
374
+
375
+ SHOW_PARSERS = {
376
+ "TABLES": _show_parser("TABLES"),
377
+ "ALL TABLES": _show_parser("ALL TABLES"),
378
+ }
379
+
380
+ FUNCTIONS = {
381
+ **parser.Parser.FUNCTIONS,
382
+ "ARRAY_REVERSE_SORT": _build_sort_array_desc,
383
+ "ARRAY_SORT": exp.SortArray.from_arg_list,
384
+ "DATEDIFF": _build_date_diff,
385
+ "DATE_DIFF": _build_date_diff,
386
+ "DATE_TRUNC": date_trunc_to_time,
387
+ "DATETRUNC": date_trunc_to_time,
388
+ "DECODE": lambda args: exp.Decode(
389
+ this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
390
+ ),
391
+ "EDITDIST3": exp.Levenshtein.from_arg_list,
392
+ "ENCODE": lambda args: exp.Encode(
393
+ this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
394
+ ),
395
+ "EPOCH": exp.TimeToUnix.from_arg_list,
396
+ "EPOCH_MS": lambda args: exp.UnixToTime(
397
+ this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
398
+ ),
399
+ "GENERATE_SERIES": _build_generate_series(),
400
+ "JSON": exp.ParseJSON.from_arg_list,
401
+ "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract),
402
+ "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar),
403
+ "LIST_HAS": exp.ArrayContains.from_arg_list,
404
+ "LIST_REVERSE_SORT": _build_sort_array_desc,
405
+ "LIST_SORT": exp.SortArray.from_arg_list,
406
+ "LIST_VALUE": lambda args: exp.Array(expressions=args),
407
+ "MAKE_TIME": exp.TimeFromParts.from_arg_list,
408
+ "MAKE_TIMESTAMP": _build_make_timestamp,
409
+ "QUANTILE_CONT": exp.PercentileCont.from_arg_list,
410
+ "QUANTILE_DISC": exp.PercentileDisc.from_arg_list,
411
+ "RANGE": _build_generate_series(end_exclusive=True),
412
+ "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract),
413
+ "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll),
414
+ "REGEXP_MATCHES": exp.RegexpLike.from_arg_list,
415
+ "REGEXP_REPLACE": lambda args: exp.RegexpReplace(
416
+ this=seq_get(args, 0),
417
+ expression=seq_get(args, 1),
418
+ replacement=seq_get(args, 2),
419
+ modifiers=seq_get(args, 3),
420
+ ),
421
+ "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
422
+ "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"),
423
+ "STRING_SPLIT": exp.Split.from_arg_list,
424
+ "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list,
425
+ "STRING_TO_ARRAY": exp.Split.from_arg_list,
426
+ "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"),
427
+ "STRUCT_PACK": exp.Struct.from_arg_list,
428
+ "STR_SPLIT": exp.Split.from_arg_list,
429
+ "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list,
430
+ "TIME_BUCKET": exp.DateBin.from_arg_list,
431
+ "TO_TIMESTAMP": exp.UnixToTime.from_arg_list,
432
+ "UNNEST": exp.Explode.from_arg_list,
433
+ "XOR": binary_from_function(exp.BitwiseXor),
434
+ }
435
+
436
+ FUNCTIONS.pop("DATE_SUB")
437
+ FUNCTIONS.pop("GLOB")
438
+
439
+ FUNCTION_PARSERS = {
440
+ **parser.Parser.FUNCTION_PARSERS,
441
+ **dict.fromkeys(
442
+ ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg()
443
+ ),
444
+ }
445
+ FUNCTION_PARSERS.pop("DECODE")
446
+
447
+ NO_PAREN_FUNCTION_PARSERS = {
448
+ **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
449
+ "MAP": lambda self: self._parse_map(),
450
+ "@": lambda self: exp.Abs(this=self._parse_bitwise()),
451
+ }
452
+
453
+ TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {
454
+ TokenType.SEMI,
455
+ TokenType.ANTI,
456
+ }
457
+
458
+ PLACEHOLDER_PARSERS = {
459
+ **parser.Parser.PLACEHOLDER_PARSERS,
460
+ TokenType.PARAMETER: lambda self: (
461
+ self.expression(exp.Placeholder, this=self._prev.text)
462
+ if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS)
463
+ else None
464
+ ),
465
+ }
466
+
467
+ TYPE_CONVERTERS = {
468
+ # https://duckdb.org/docs/sql/data_types/numeric
469
+ exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3),
470
+ # https://duckdb.org/docs/sql/data_types/text
471
+ exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"),
472
+ }
473
+
474
+ STATEMENT_PARSERS = {
475
+ **parser.Parser.STATEMENT_PARSERS,
476
+ TokenType.ATTACH: lambda self: self._parse_attach_detach(),
477
+ TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False),
478
+ TokenType.SHOW: lambda self: self._parse_show(),
479
+ }
480
+
481
+ def _parse_expression(self) -> t.Optional[exp.Expression]:
482
+ # DuckDB supports prefix aliases, e.g. foo: 1
483
+ if self._next and self._next.token_type == TokenType.COLON:
484
+ alias = self._parse_id_var(tokens=self.ALIAS_TOKENS)
485
+ self._match(TokenType.COLON)
486
+ comments = self._prev_comments or []
487
+
488
+ this = self._parse_assignment()
489
+ if isinstance(this, exp.Expression):
490
+ # Moves the comment next to the alias in `alias: expr /* comment */`
491
+ comments += this.pop_comments() or []
492
+
493
+ return self.expression(exp.Alias, comments=comments, this=this, alias=alias)
494
+
495
+ return super()._parse_expression()
496
+
497
+ def _parse_table(
498
+ self,
499
+ schema: bool = False,
500
+ joins: bool = False,
501
+ alias_tokens: t.Optional[t.Collection[TokenType]] = None,
502
+ parse_bracket: bool = False,
503
+ is_db_reference: bool = False,
504
+ parse_partition: bool = False,
505
+ ) -> t.Optional[exp.Expression]:
506
+ # DuckDB supports prefix aliases, e.g. FROM foo: bar
507
+ if self._next and self._next.token_type == TokenType.COLON:
508
+ alias = self._parse_table_alias(
509
+ alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
510
+ )
511
+ self._match(TokenType.COLON)
512
+ comments = self._prev_comments or []
513
+ else:
514
+ alias = None
515
+ comments = []
516
+
517
+ table = super()._parse_table(
518
+ schema=schema,
519
+ joins=joins,
520
+ alias_tokens=alias_tokens,
521
+ parse_bracket=parse_bracket,
522
+ is_db_reference=is_db_reference,
523
+ parse_partition=parse_partition,
524
+ )
525
+ if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias):
526
+ # Moves the comment next to the alias in `alias: table /* comment */`
527
+ comments += table.pop_comments() or []
528
+ alias.comments = alias.pop_comments() + comments
529
+ table.set("alias", alias)
530
+
531
+ return table
532
+
533
+ def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
534
+ # https://duckdb.org/docs/sql/samples.html
535
+ sample = super()._parse_table_sample(as_modifier=as_modifier)
536
+ if sample and not sample.args.get("method"):
537
+ if sample.args.get("size"):
538
+ sample.set("method", exp.var("RESERVOIR"))
539
+ else:
540
+ sample.set("method", exp.var("SYSTEM"))
541
+
542
+ return sample
543
+
544
+ def _parse_bracket(
545
+ self, this: t.Optional[exp.Expression] = None
546
+ ) -> t.Optional[exp.Expression]:
547
+ bracket = super()._parse_bracket(this)
548
+
549
+ if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket):
550
+ # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
551
+ bracket.set("returns_list_for_maps", True)
552
+
553
+ return bracket
554
+
555
+ def _parse_map(self) -> exp.ToMap | exp.Map:
556
+ if self._match(TokenType.L_BRACE, advance=False):
557
+ return self.expression(exp.ToMap, this=self._parse_bracket())
558
+
559
+ args = self._parse_wrapped_csv(self._parse_assignment)
560
+ return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1))
561
+
562
+ def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]:
563
+ return self._parse_field_def()
564
+
565
+ def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
566
+ if len(aggregations) == 1:
567
+ return super()._pivot_column_names(aggregations)
568
+ return pivot_column_names(aggregations, dialect="duckdb")
569
+
570
+ def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach:
571
+ def _parse_attach_option() -> exp.AttachOption:
572
+ return self.expression(
573
+ exp.AttachOption,
574
+ this=self._parse_var(any_token=True),
575
+ expression=self._parse_field(any_token=True),
576
+ )
577
+
578
+ self._match(TokenType.DATABASE)
579
+ exists = self._parse_exists(not_=is_attach)
580
+ this = self._parse_alias(self._parse_primary_or_var(), explicit=True)
581
+
582
+ if self._match(TokenType.L_PAREN, advance=False):
583
+ expressions = self._parse_wrapped_csv(_parse_attach_option)
584
+ else:
585
+ expressions = None
586
+
587
+ return (
588
+ self.expression(exp.Attach, this=this, exists=exists, expressions=expressions)
589
+ if is_attach
590
+ else self.expression(exp.Detach, this=this, exists=exists)
591
+ )
592
+
593
+ def _parse_show_duckdb(self, this: str) -> exp.Show:
594
+ return self.expression(exp.Show, this=this)
595
+
596
+ class Generator(generator.Generator):
597
+ PARAMETER_TOKEN = "$"
598
+ NAMED_PLACEHOLDER_TOKEN = "$"
599
+ JOIN_HINTS = False
600
+ TABLE_HINTS = False
601
+ QUERY_HINTS = False
602
+ LIMIT_FETCH = "LIMIT"
603
+ STRUCT_DELIMITER = ("(", ")")
604
+ RENAME_TABLE_WITH_DB = False
605
+ NVL2_SUPPORTED = False
606
+ SEMI_ANTI_JOIN_WITH_SIDE = False
607
+ TABLESAMPLE_KEYWORDS = "USING SAMPLE"
608
+ TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
609
+ LAST_DAY_SUPPORTS_DATE_PART = False
610
+ JSON_KEY_VALUE_PAIR_SEP = ","
611
+ IGNORE_NULLS_IN_FUNC = True
612
+ JSON_PATH_BRACKETED_KEY_SUPPORTED = False
613
+ SUPPORTS_CREATE_TABLE_LIKE = False
614
+ MULTI_ARG_DISTINCT = False
615
+ CAN_IMPLEMENT_ARRAY_ANY = True
616
+ SUPPORTS_TO_NUMBER = False
617
+ SUPPORTS_WINDOW_EXCLUDE = True
618
+ COPY_HAS_INTO_KEYWORD = False
619
+ STAR_EXCEPT = "EXCLUDE"
620
+ PAD_FILL_PATTERN_IS_REQUIRED = True
621
+ ARRAY_CONCAT_IS_VAR_LEN = False
622
+ ARRAY_SIZE_DIM_REQUIRED = False
623
+
624
+ TRANSFORMS = {
625
+ **generator.Generator.TRANSFORMS,
626
+ exp.ApproxDistinct: approx_count_distinct_sql,
627
+ exp.Array: inline_array_unless_query,
628
+ exp.ArrayFilter: rename_func("LIST_FILTER"),
629
+ exp.ArrayRemove: remove_from_array_using_filter,
630
+ exp.ArraySort: _array_sort_sql,
631
+ exp.ArraySum: rename_func("LIST_SUM"),
632
+ exp.BitwiseXor: rename_func("XOR"),
633
+ exp.CommentColumnConstraint: no_comment_column_constraint_sql,
634
+ exp.CurrentDate: lambda *_: "CURRENT_DATE",
635
+ exp.CurrentTime: lambda *_: "CURRENT_TIME",
636
+ exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
637
+ exp.DayOfMonth: rename_func("DAYOFMONTH"),
638
+ exp.DayOfWeek: rename_func("DAYOFWEEK"),
639
+ exp.DayOfWeekIso: rename_func("ISODOW"),
640
+ exp.DayOfYear: rename_func("DAYOFYEAR"),
641
+ exp.DataType: _datatype_sql,
642
+ exp.Date: _date_sql,
643
+ exp.DateAdd: _date_delta_sql,
644
+ exp.DateFromParts: rename_func("MAKE_DATE"),
645
+ exp.DateSub: _date_delta_sql,
646
+ exp.DateDiff: _date_diff_sql,
647
+ exp.DateStrToDate: datestrtodate_sql,
648
+ exp.Datetime: no_datetime_sql,
649
+ exp.DatetimeSub: _date_delta_sql,
650
+ exp.DatetimeAdd: _date_delta_sql,
651
+ exp.DateToDi: lambda self,
652
+ e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
653
+ exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
654
+ exp.DiToDate: lambda self,
655
+ e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)",
656
+ exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
657
+ exp.GenerateDateArray: _generate_datetime_array_sql,
658
+ exp.GenerateTimestampArray: _generate_datetime_array_sql,
659
+ exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),
660
+ exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"),
661
+ exp.Explode: rename_func("UNNEST"),
662
+ exp.IntDiv: lambda self, e: self.binary(e, "//"),
663
+ exp.IsInf: rename_func("ISINF"),
664
+ exp.IsNan: rename_func("ISNAN"),
665
+ exp.JSONBExists: rename_func("JSON_EXISTS"),
666
+ exp.JSONExtract: _arrow_json_extract_sql,
667
+ exp.JSONExtractArray: _json_extract_value_array_sql,
668
+ exp.JSONExtractScalar: _arrow_json_extract_sql,
669
+ exp.JSONFormat: _json_format_sql,
670
+ exp.JSONValueArray: _json_extract_value_array_sql,
671
+ exp.Lateral: explode_to_unnest_sql,
672
+ exp.LogicalOr: rename_func("BOOL_OR"),
673
+ exp.LogicalAnd: rename_func("BOOL_AND"),
674
+ exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
675
+ exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
676
+ exp.MonthsBetween: lambda self, e: self.func(
677
+ "DATEDIFF",
678
+ "'month'",
679
+ exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True),
680
+ exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True),
681
+ ),
682
+ exp.PercentileCont: rename_func("QUANTILE_CONT"),
683
+ exp.PercentileDisc: rename_func("QUANTILE_DISC"),
684
+ # DuckDB doesn't allow qualified columns inside of PIVOT expressions.
685
+ # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
686
+ exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
687
+ exp.RegexpReplace: lambda self, e: self.func(
688
+ "REGEXP_REPLACE",
689
+ e.this,
690
+ e.expression,
691
+ e.args.get("replacement"),
692
+ e.args.get("modifiers"),
693
+ ),
694
+ exp.RegexpLike: rename_func("REGEXP_MATCHES"),
695
+ exp.RegexpILike: lambda self, e: self.func(
696
+ "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
697
+ ),
698
+ exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
699
+ exp.Return: lambda self, e: self.sql(e, "this"),
700
+ exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
701
+ exp.Rand: rename_func("RANDOM"),
702
+ exp.SHA: rename_func("SHA1"),
703
+ exp.SHA2: sha256_sql,
704
+ exp.Split: rename_func("STR_SPLIT"),
705
+ exp.SortArray: _sort_array_sql,
706
+ exp.StrPosition: strposition_sql,
707
+ exp.StrToUnix: lambda self, e: self.func(
708
+ "EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
709
+ ),
710
+ exp.Struct: _struct_sql,
711
+ exp.Transform: rename_func("LIST_TRANSFORM"),
712
+ exp.TimeAdd: _date_delta_sql,
713
+ exp.Time: no_time_sql,
714
+ exp.TimeDiff: _timediff_sql,
715
+ exp.Timestamp: no_timestamp_sql,
716
+ exp.TimestampDiff: lambda self, e: self.func(
717
+ "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
718
+ ),
719
+ exp.TimestampTrunc: timestamptrunc_sql(),
720
+ exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
721
+ exp.TimeStrToTime: timestrtotime_sql,
722
+ exp.TimeStrToUnix: lambda self, e: self.func(
723
+ "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP)
724
+ ),
725
+ exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
726
+ exp.TimeToUnix: rename_func("EPOCH"),
727
+ exp.TsOrDiToDi: lambda self,
728
+ e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
729
+ exp.TsOrDsAdd: _date_delta_sql,
730
+ exp.TsOrDsDiff: lambda self, e: self.func(
731
+ "DATE_DIFF",
732
+ f"'{e.args.get('unit') or 'DAY'}'",
733
+ exp.cast(e.expression, exp.DataType.Type.TIMESTAMP),
734
+ exp.cast(e.this, exp.DataType.Type.TIMESTAMP),
735
+ ),
736
+ exp.UnixToStr: lambda self, e: self.func(
737
+ "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
738
+ ),
739
+ exp.DatetimeTrunc: lambda self, e: self.func(
740
+ "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME)
741
+ ),
742
+ exp.UnixToTime: _unix_to_time_sql,
743
+ exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
744
+ exp.VariancePop: rename_func("VAR_POP"),
745
+ exp.WeekOfYear: rename_func("WEEKOFYEAR"),
746
+ exp.Xor: bool_xor_sql,
747
+ exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
748
+ rename_func("LEVENSHTEIN")
749
+ ),
750
+ exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
751
+ exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
752
+ exp.DateBin: rename_func("TIME_BUCKET"),
753
+ }
754
+
755
+ SUPPORTED_JSON_PATH_PARTS = {
756
+ exp.JSONPathKey,
757
+ exp.JSONPathRoot,
758
+ exp.JSONPathSubscript,
759
+ exp.JSONPathWildcard,
760
+ }
761
+
762
+ TYPE_MAPPING = {
763
+ **generator.Generator.TYPE_MAPPING,
764
+ exp.DataType.Type.BINARY: "BLOB",
765
+ exp.DataType.Type.BPCHAR: "TEXT",
766
+ exp.DataType.Type.CHAR: "TEXT",
767
+ exp.DataType.Type.DATETIME: "TIMESTAMP",
768
+ exp.DataType.Type.FLOAT: "REAL",
769
+ exp.DataType.Type.JSONB: "JSON",
770
+ exp.DataType.Type.NCHAR: "TEXT",
771
+ exp.DataType.Type.NVARCHAR: "TEXT",
772
+ exp.DataType.Type.UINT: "UINTEGER",
773
+ exp.DataType.Type.VARBINARY: "BLOB",
774
+ exp.DataType.Type.ROWVERSION: "BLOB",
775
+ exp.DataType.Type.VARCHAR: "TEXT",
776
+ exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP",
777
+ exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S",
778
+ exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS",
779
+ exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS",
780
+ }
781
+
782
+ # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
783
+ RESERVED_KEYWORDS = {
784
+ "array",
785
+ "analyse",
786
+ "union",
787
+ "all",
788
+ "when",
789
+ "in_p",
790
+ "default",
791
+ "create_p",
792
+ "window",
793
+ "asymmetric",
794
+ "to",
795
+ "else",
796
+ "localtime",
797
+ "from",
798
+ "end_p",
799
+ "select",
800
+ "current_date",
801
+ "foreign",
802
+ "with",
803
+ "grant",
804
+ "session_user",
805
+ "or",
806
+ "except",
807
+ "references",
808
+ "fetch",
809
+ "limit",
810
+ "group_p",
811
+ "leading",
812
+ "into",
813
+ "collate",
814
+ "offset",
815
+ "do",
816
+ "then",
817
+ "localtimestamp",
818
+ "check_p",
819
+ "lateral_p",
820
+ "current_role",
821
+ "where",
822
+ "asc_p",
823
+ "placing",
824
+ "desc_p",
825
+ "user",
826
+ "unique",
827
+ "initially",
828
+ "column",
829
+ "both",
830
+ "some",
831
+ "as",
832
+ "any",
833
+ "only",
834
+ "deferrable",
835
+ "null_p",
836
+ "current_time",
837
+ "true_p",
838
+ "table",
839
+ "case",
840
+ "trailing",
841
+ "variadic",
842
+ "for",
843
+ "on",
844
+ "distinct",
845
+ "false_p",
846
+ "not",
847
+ "constraint",
848
+ "current_timestamp",
849
+ "returning",
850
+ "primary",
851
+ "intersect",
852
+ "having",
853
+ "analyze",
854
+ "current_user",
855
+ "and",
856
+ "cast",
857
+ "symmetric",
858
+ "using",
859
+ "order",
860
+ "current_catalog",
861
+ }
862
+
863
+ UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)
864
+
865
+ # DuckDB doesn't generally support CREATE TABLE .. properties
866
+ # https://duckdb.org/docs/sql/statements/create_table.html
867
+ PROPERTIES_LOCATION = {
868
+ prop: exp.Properties.Location.UNSUPPORTED
869
+ for prop in generator.Generator.PROPERTIES_LOCATION
870
+ }
871
+
872
+ # There are a few exceptions (e.g. temporary tables) which are supported or
873
+ # can be transpiled to DuckDB, so we explicitly override them accordingly
874
+ PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA
875
+ PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE
876
+ PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS
877
+
878
+ IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = (
879
+ exp.FirstValue,
880
+ exp.Lag,
881
+ exp.LastValue,
882
+ exp.Lead,
883
+ exp.NthValue,
884
+ )
885
+
886
+ def show_sql(self, expression: exp.Show) -> str:
887
+ return f"SHOW {expression.name}"
888
+
889
+ def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
890
+ return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ))
891
+
892
+ def strtotime_sql(self, expression: exp.StrToTime) -> str:
893
+ if expression.args.get("safe"):
894
+ formatted_time = self.format_time(expression)
895
+ return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)"
896
+ return str_to_time_sql(self, expression)
897
+
898
+ def strtodate_sql(self, expression: exp.StrToDate) -> str:
899
+ if expression.args.get("safe"):
900
+ formatted_time = self.format_time(expression)
901
+ return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)"
902
+ return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
903
+
904
+ def parsejson_sql(self, expression: exp.ParseJSON) -> str:
905
+ arg = expression.this
906
+ if expression.args.get("safe"):
907
+ return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null()))
908
+ return self.func("JSON", arg)
909
+
910
+ def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
911
+ nano = expression.args.get("nano")
912
+ if nano is not None:
913
+ expression.set(
914
+ "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
915
+ )
916
+
917
+ return rename_func("MAKE_TIME")(self, expression)
918
+
919
+ def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
920
+ sec = expression.args["sec"]
921
+
922
+ milli = expression.args.get("milli")
923
+ if milli is not None:
924
+ sec += milli.pop() / exp.Literal.number(1000.0)
925
+
926
+ nano = expression.args.get("nano")
927
+ if nano is not None:
928
+ sec += nano.pop() / exp.Literal.number(1000000000.0)
929
+
930
+ if milli or nano:
931
+ expression.set("sec", sec)
932
+
933
+ return rename_func("MAKE_TIMESTAMP")(self, expression)
934
+
935
+ def tablesample_sql(
936
+ self,
937
+ expression: exp.TableSample,
938
+ tablesample_keyword: t.Optional[str] = None,
939
+ ) -> str:
940
+ if not isinstance(expression.parent, exp.Select):
941
+ # This sample clause only applies to a single source, not the entire resulting relation
942
+ tablesample_keyword = "TABLESAMPLE"
943
+
944
+ if expression.args.get("size"):
945
+ method = expression.args.get("method")
946
+ if method and method.name.upper() != "RESERVOIR":
947
+ self.unsupported(
948
+ f"Sampling method {method} is not supported with a discrete sample count, "
949
+ "defaulting to reservoir sampling"
950
+ )
951
+ expression.set("method", exp.var("RESERVOIR"))
952
+
953
+ return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
954
+
955
+ def interval_sql(self, expression: exp.Interval) -> str:
956
+ multiplier: t.Optional[int] = None
957
+ unit = expression.text("unit").lower()
958
+
959
+ if unit.startswith("week"):
960
+ multiplier = 7
961
+ if unit.startswith("quarter"):
962
+ multiplier = 90
963
+
964
+ if multiplier:
965
+ return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})"
966
+
967
+ return super().interval_sql(expression)
968
+
969
+ def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
970
+ if isinstance(expression.parent, exp.UserDefinedFunction):
971
+ return self.sql(expression, "this")
972
+ return super().columndef_sql(expression, sep)
973
+
974
+ def join_sql(self, expression: exp.Join) -> str:
975
+ if (
976
+ expression.side == "LEFT"
977
+ and not expression.args.get("on")
978
+ and isinstance(expression.this, exp.Unnest)
979
+ ):
980
+ # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause
981
+ # DuckDB doesn't, but we can just add a dummy ON clause that is always true
982
+ return super().join_sql(expression.on(exp.true()))
983
+
984
+ return super().join_sql(expression)
985
+
986
+ def generateseries_sql(self, expression: exp.GenerateSeries) -> str:
987
+ # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b)
988
+ if expression.args.get("is_end_exclusive"):
989
+ return rename_func("RANGE")(self, expression)
990
+
991
+ return self.function_fallback_sql(expression)
992
+
993
+ def countif_sql(self, expression: exp.CountIf) -> str:
994
+ if self.dialect.version >= Version("1.2"):
995
+ return self.function_fallback_sql(expression)
996
+
997
+ # https://github.com/tobymao/sqlglot/pull/4749
998
+ return count_if_to_sum(self, expression)
999
+
1000
+ def bracket_sql(self, expression: exp.Bracket) -> str:
1001
+ if self.dialect.version >= Version("1.2"):
1002
+ return super().bracket_sql(expression)
1003
+
1004
+ # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
1005
+ this = expression.this
1006
+ if isinstance(this, exp.Array):
1007
+ this.replace(exp.paren(this))
1008
+
1009
+ bracket = super().bracket_sql(expression)
1010
+
1011
+ if not expression.args.get("returns_list_for_maps"):
1012
+ if not this.type:
1013
+ from sqlglot.optimizer.annotate_types import annotate_types
1014
+
1015
+ this = annotate_types(this, dialect=self.dialect)
1016
+
1017
+ if this.is_type(exp.DataType.Type.MAP):
1018
+ bracket = f"({bracket})[1]"
1019
+
1020
+ return bracket
1021
+
1022
+ def withingroup_sql(self, expression: exp.WithinGroup) -> str:
1023
+ expression_sql = self.sql(expression, "expression")
1024
+
1025
+ func = expression.this
1026
+ if isinstance(func, exp.PERCENTILES):
1027
+ # Make the order key the first arg and slide the fraction to the right
1028
+ # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
1029
+ order_col = expression.find(exp.Ordered)
1030
+ if order_col:
1031
+ func.set("expression", func.this)
1032
+ func.set("this", order_col.this)
1033
+
1034
+ this = self.sql(expression, "this").rstrip(")")
1035
+
1036
+ return f"{this}{expression_sql})"
1037
+
1038
+ def length_sql(self, expression: exp.Length) -> str:
1039
+ arg = expression.this
1040
+
1041
+ # Dialects like BQ and Snowflake also accept binary values as args, so
1042
+ # DDB will attempt to infer the type or resort to case/when resolution
1043
+ if not expression.args.get("binary") or arg.is_string:
1044
+ return self.func("LENGTH", arg)
1045
+
1046
+ if not arg.type:
1047
+ from sqlglot.optimizer.annotate_types import annotate_types
1048
+
1049
+ arg = annotate_types(arg, dialect=self.dialect)
1050
+
1051
+ if arg.is_type(*exp.DataType.TEXT_TYPES):
1052
+ return self.func("LENGTH", arg)
1053
+
1054
+ # We need these casts to make duckdb's static type checker happy
1055
+ blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
1056
+ varchar = exp.cast(arg, exp.DataType.Type.VARCHAR)
1057
+
1058
+ case = (
1059
+ exp.case(self.func("TYPEOF", arg))
1060
+ .when("'BLOB'", self.func("OCTET_LENGTH", blob))
1061
+ .else_(
1062
+ exp.Anonymous(this="LENGTH", expressions=[varchar])
1063
+ ) # anonymous to break length_sql recursion
1064
+ )
1065
+
1066
+ return self.sql(case)
1067
+
1068
+ def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
1069
+ this = expression.this
1070
+ key = expression.args.get("key")
1071
+ key_sql = key.name if isinstance(key, exp.Expression) else ""
1072
+ value_sql = self.sql(expression, "value")
1073
+
1074
+ kv_sql = f"{key_sql} := {value_sql}"
1075
+
1076
+ # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
1077
+ # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
1078
+ if isinstance(this, exp.Struct) and not this.expressions:
1079
+ return self.func("STRUCT_PACK", kv_sql)
1080
+
1081
+ return self.func("STRUCT_INSERT", this, kv_sql)
1082
+
1083
+ def unnest_sql(self, expression: exp.Unnest) -> str:
1084
+ explode_array = expression.args.get("explode_array")
1085
+ if explode_array:
1086
+ # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
1087
+ # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
1088
+ expression.expressions.append(
1089
+ exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
1090
+ )
1091
+
1092
+ # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
1093
+ alias = expression.args.get("alias")
1094
+ if alias:
1095
+ expression.set("alias", None)
1096
+ alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0))
1097
+
1098
+ unnest_sql = super().unnest_sql(expression)
1099
+ select = exp.Select(expressions=[unnest_sql]).subquery(alias)
1100
+ return self.sql(select)
1101
+
1102
+ return super().unnest_sql(expression)
1103
+
1104
+ def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
1105
+ if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
1106
+ # DuckDB should render IGNORE NULLS only for the general-purpose
1107
+ # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
1108
+ return super().ignorenulls_sql(expression)
1109
+
1110
+ self.unsupported("IGNORE NULLS is not supported for non-window functions.")
1111
+ return self.sql(expression, "this")
1112
+
1113
+ def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
1114
+ if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
1115
+ # DuckDB should render RESPECT NULLS only for the general-purpose
1116
+ # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
1117
+ return super().respectnulls_sql(expression)
1118
+
1119
+ self.unsupported("RESPECT NULLS is not supported for non-window functions.")
1120
+ return self.sql(expression, "this")
1121
+
1122
+ def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
1123
+ this = self.sql(expression, "this")
1124
+ null_text = self.sql(expression, "null")
1125
+
1126
+ if null_text:
1127
+ this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))"
1128
+
1129
+ return self.func("ARRAY_TO_STRING", this, expression.expression)
1130
+
1131
+ @unsupported_args("position", "occurrence")
1132
+ def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
1133
+ group = expression.args.get("group")
1134
+ params = expression.args.get("parameters")
1135
+
1136
+ # Do not render group if there is no following argument,
1137
+ # and it's the default value for this dialect
1138
+ if (
1139
+ not params
1140
+ and group
1141
+ and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
1142
+ ):
1143
+ group = None
1144
+ return self.func(
1145
+ "REGEXP_EXTRACT", expression.this, expression.expression, group, params
1146
+ )
1147
+
1148
+ @unsupported_args("culture")
1149
+ def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
1150
+ fmt = expression.args.get("format")
1151
+ if fmt and fmt.is_int:
1152
+ return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
1153
+
1154
+ self.unsupported("Only integer formats are supported by NumberToStr")
1155
+ return self.function_fallback_sql(expression)
1156
+
1157
+ def autoincrementcolumnconstraint_sql(self, _) -> str:
1158
+ self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
1159
+ return ""