altimate-code 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/README.md +1 -5
- package/bin/altimate +6 -0
- package/bin/altimate-code +6 -0
- package/dbt-tools/bin/altimate-dbt +2 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
- package/dbt-tools/dist/index.js +23859 -0
- package/package.json +13 -13
- package/postinstall.mjs +42 -0
- package/skills/altimate-setup/SKILL.md +31 -0
|
@@ -0,0 +1,1159 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
|
|
5
|
+
from sqlglot import exp, generator, parser, tokens, transforms
|
|
6
|
+
from sqlglot.expressions import DATA_TYPE
|
|
7
|
+
from sqlglot.dialects.dialect import (
|
|
8
|
+
Dialect,
|
|
9
|
+
JSON_EXTRACT_TYPE,
|
|
10
|
+
NormalizationStrategy,
|
|
11
|
+
Version,
|
|
12
|
+
approx_count_distinct_sql,
|
|
13
|
+
arrow_json_extract_sql,
|
|
14
|
+
binary_from_function,
|
|
15
|
+
bool_xor_sql,
|
|
16
|
+
build_default_decimal_type,
|
|
17
|
+
count_if_to_sum,
|
|
18
|
+
date_trunc_to_time,
|
|
19
|
+
datestrtodate_sql,
|
|
20
|
+
no_datetime_sql,
|
|
21
|
+
encode_decode_sql,
|
|
22
|
+
build_formatted_time,
|
|
23
|
+
inline_array_unless_query,
|
|
24
|
+
no_comment_column_constraint_sql,
|
|
25
|
+
no_time_sql,
|
|
26
|
+
no_timestamp_sql,
|
|
27
|
+
pivot_column_names,
|
|
28
|
+
rename_func,
|
|
29
|
+
remove_from_array_using_filter,
|
|
30
|
+
strposition_sql,
|
|
31
|
+
str_to_time_sql,
|
|
32
|
+
timestamptrunc_sql,
|
|
33
|
+
timestrtotime_sql,
|
|
34
|
+
unit_to_var,
|
|
35
|
+
unit_to_str,
|
|
36
|
+
sha256_sql,
|
|
37
|
+
build_regexp_extract,
|
|
38
|
+
explode_to_unnest_sql,
|
|
39
|
+
no_make_interval_sql,
|
|
40
|
+
groupconcat_sql,
|
|
41
|
+
)
|
|
42
|
+
from sqlglot.generator import unsupported_args
|
|
43
|
+
from sqlglot.helper import seq_get
|
|
44
|
+
from sqlglot.tokens import TokenType
|
|
45
|
+
from sqlglot.parser import binary_range_parser
|
|
46
|
+
|
|
47
|
+
DATETIME_DELTA = t.Union[
|
|
48
|
+
exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str:
|
|
53
|
+
this = expression.this
|
|
54
|
+
unit = unit_to_var(expression)
|
|
55
|
+
op = (
|
|
56
|
+
"+"
|
|
57
|
+
if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd))
|
|
58
|
+
else "-"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
to_type: t.Optional[DATA_TYPE] = None
|
|
62
|
+
if isinstance(expression, exp.TsOrDsAdd):
|
|
63
|
+
to_type = expression.return_type
|
|
64
|
+
elif this.is_string:
|
|
65
|
+
# Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work
|
|
66
|
+
to_type = (
|
|
67
|
+
exp.DataType.Type.DATETIME
|
|
68
|
+
if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub))
|
|
69
|
+
else exp.DataType.Type.DATE
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
this = exp.cast(this, to_type) if to_type else this
|
|
73
|
+
|
|
74
|
+
expr = expression.expression
|
|
75
|
+
interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit)
|
|
76
|
+
|
|
77
|
+
return f"{self.sql(this)} {op} {self.sql(interval)}"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# BigQuery -> DuckDB conversion for the DATE function
|
|
81
|
+
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
|
|
82
|
+
result = f"CAST({self.sql(expression, 'this')} AS DATE)"
|
|
83
|
+
zone = self.sql(expression, "zone")
|
|
84
|
+
|
|
85
|
+
if zone:
|
|
86
|
+
date_str = self.func("STRFTIME", result, "'%d/%m/%Y'")
|
|
87
|
+
date_str = f"{date_str} || ' ' || {zone}"
|
|
88
|
+
|
|
89
|
+
# This will create a TIMESTAMP with time zone information
|
|
90
|
+
result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'")
|
|
91
|
+
|
|
92
|
+
return result
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# BigQuery -> DuckDB conversion for the TIME_DIFF function
|
|
96
|
+
def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
|
|
97
|
+
this = exp.cast(expression.this, exp.DataType.Type.TIME)
|
|
98
|
+
expr = exp.cast(expression.expression, exp.DataType.Type.TIME)
|
|
99
|
+
|
|
100
|
+
# Although the 2 dialects share similar signatures, BQ seems to inverse
|
|
101
|
+
# the sign of the result so the start/end time operands are flipped
|
|
102
|
+
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator."))
|
|
106
|
+
def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str:
|
|
107
|
+
return self.func("ARRAY_SORT", expression.this)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str:
|
|
111
|
+
name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT"
|
|
112
|
+
return self.func(name, expression.this)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _build_sort_array_desc(args: t.List) -> exp.Expression:
|
|
116
|
+
return exp.SortArray(this=seq_get(args, 0), asc=exp.false())
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _build_date_diff(args: t.List) -> exp.Expression:
|
|
120
|
+
return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0))
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]:
|
|
124
|
+
def _builder(args: t.List) -> exp.GenerateSeries:
|
|
125
|
+
# Check https://duckdb.org/docs/sql/functions/nested.html#range-functions
|
|
126
|
+
if len(args) == 1:
|
|
127
|
+
# DuckDB uses 0 as a default for the series' start when it's omitted
|
|
128
|
+
args.insert(0, exp.Literal.number("0"))
|
|
129
|
+
|
|
130
|
+
gen_series = exp.GenerateSeries.from_arg_list(args)
|
|
131
|
+
gen_series.set("is_end_exclusive", end_exclusive)
|
|
132
|
+
|
|
133
|
+
return gen_series
|
|
134
|
+
|
|
135
|
+
return _builder
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _build_make_timestamp(args: t.List) -> exp.Expression:
|
|
139
|
+
if len(args) == 1:
|
|
140
|
+
return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS)
|
|
141
|
+
|
|
142
|
+
return exp.TimestampFromParts(
|
|
143
|
+
year=seq_get(args, 0),
|
|
144
|
+
month=seq_get(args, 1),
|
|
145
|
+
day=seq_get(args, 2),
|
|
146
|
+
hour=seq_get(args, 3),
|
|
147
|
+
min=seq_get(args, 4),
|
|
148
|
+
sec=seq_get(args, 5),
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]:
|
|
153
|
+
def _parse(self: DuckDB.Parser) -> exp.Show:
|
|
154
|
+
return self._parse_show_duckdb(*args, **kwargs)
|
|
155
|
+
|
|
156
|
+
return _parse
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str:
|
|
160
|
+
args: t.List[str] = []
|
|
161
|
+
|
|
162
|
+
# BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is
|
|
163
|
+
# canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB
|
|
164
|
+
# The transformation to ROW will take place if:
|
|
165
|
+
# 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would
|
|
166
|
+
# 2. A cast to STRUCT / ARRAY of STRUCTs is found
|
|
167
|
+
ancestor_cast = expression.find_ancestor(exp.Cast)
|
|
168
|
+
is_bq_inline_struct = (
|
|
169
|
+
(expression.find(exp.PropertyEQ) is None)
|
|
170
|
+
and ancestor_cast
|
|
171
|
+
and any(
|
|
172
|
+
casted_type.is_type(exp.DataType.Type.STRUCT)
|
|
173
|
+
for casted_type in ancestor_cast.find_all(exp.DataType)
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
for i, expr in enumerate(expression.expressions):
|
|
178
|
+
is_property_eq = isinstance(expr, exp.PropertyEQ)
|
|
179
|
+
value = expr.expression if is_property_eq else expr
|
|
180
|
+
|
|
181
|
+
if is_bq_inline_struct:
|
|
182
|
+
args.append(self.sql(value))
|
|
183
|
+
else:
|
|
184
|
+
key = expr.name if is_property_eq else f"_{i}"
|
|
185
|
+
args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}")
|
|
186
|
+
|
|
187
|
+
csv_args = ", ".join(args)
|
|
188
|
+
|
|
189
|
+
return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str:
|
|
193
|
+
if expression.is_type("array"):
|
|
194
|
+
return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]"
|
|
195
|
+
|
|
196
|
+
# Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE
|
|
197
|
+
if expression.is_type(
|
|
198
|
+
exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ
|
|
199
|
+
):
|
|
200
|
+
return expression.this.value
|
|
201
|
+
|
|
202
|
+
return self.datatype_sql(expression)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str:
|
|
206
|
+
sql = self.func("TO_JSON", expression.this, expression.args.get("options"))
|
|
207
|
+
return f"CAST({sql} AS TEXT)"
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str:
|
|
211
|
+
scale = expression.args.get("scale")
|
|
212
|
+
timestamp = expression.this
|
|
213
|
+
|
|
214
|
+
if scale in (None, exp.UnixToTime.SECONDS):
|
|
215
|
+
return self.func("TO_TIMESTAMP", timestamp)
|
|
216
|
+
if scale == exp.UnixToTime.MILLIS:
|
|
217
|
+
return self.func("EPOCH_MS", timestamp)
|
|
218
|
+
if scale == exp.UnixToTime.MICROS:
|
|
219
|
+
return self.func("MAKE_TIMESTAMP", timestamp)
|
|
220
|
+
|
|
221
|
+
return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)))
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str:
|
|
228
|
+
arrow_sql = arrow_json_extract_sql(self, expression)
|
|
229
|
+
if not expression.same_parent and isinstance(
|
|
230
|
+
expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS
|
|
231
|
+
):
|
|
232
|
+
arrow_sql = self.wrap(arrow_sql)
|
|
233
|
+
return arrow_sql
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _implicit_datetime_cast(
|
|
237
|
+
arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE
|
|
238
|
+
) -> t.Optional[exp.Expression]:
|
|
239
|
+
return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
|
|
243
|
+
this = _implicit_datetime_cast(expression.this)
|
|
244
|
+
expr = _implicit_datetime_cast(expression.expression)
|
|
245
|
+
|
|
246
|
+
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _generate_datetime_array_sql(
|
|
250
|
+
self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray]
|
|
251
|
+
) -> str:
|
|
252
|
+
is_generate_date_array = isinstance(expression, exp.GenerateDateArray)
|
|
253
|
+
|
|
254
|
+
type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP
|
|
255
|
+
start = _implicit_datetime_cast(expression.args.get("start"), type=type)
|
|
256
|
+
end = _implicit_datetime_cast(expression.args.get("end"), type=type)
|
|
257
|
+
|
|
258
|
+
# BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES
|
|
259
|
+
gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries(
|
|
260
|
+
start=start, end=end, step=expression.args.get("step")
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if is_generate_date_array:
|
|
264
|
+
# The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for
|
|
265
|
+
# GENERATE_DATE_ARRAY we must cast it back to DATE array
|
|
266
|
+
gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>"))
|
|
267
|
+
|
|
268
|
+
return self.sql(gen_series)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _json_extract_value_array_sql(
|
|
272
|
+
self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray
|
|
273
|
+
) -> str:
|
|
274
|
+
json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression)
|
|
275
|
+
data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>"
|
|
276
|
+
return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type)))
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
class DuckDB(Dialect):
|
|
280
|
+
NULL_ORDERING = "nulls_are_last"
|
|
281
|
+
SUPPORTS_USER_DEFINED_TYPES = True
|
|
282
|
+
SAFE_DIVISION = True
|
|
283
|
+
INDEX_OFFSET = 1
|
|
284
|
+
CONCAT_COALESCE = True
|
|
285
|
+
SUPPORTS_ORDER_BY_ALL = True
|
|
286
|
+
SUPPORTS_FIXED_SIZE_ARRAYS = True
|
|
287
|
+
STRICT_JSON_PATH_SYNTAX = False
|
|
288
|
+
NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True
|
|
289
|
+
|
|
290
|
+
# https://duckdb.org/docs/sql/introduction.html#creating-a-new-table
|
|
291
|
+
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
|
|
292
|
+
|
|
293
|
+
def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
294
|
+
if isinstance(path, exp.Literal):
|
|
295
|
+
# DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
|
|
296
|
+
# Additionally, it allows accessing the back of lists using the `[#-i]` syntax.
|
|
297
|
+
# This check ensures we'll avoid trying to parse these as JSON paths, which can
|
|
298
|
+
# either result in a noisy warning or in an invalid representation of the path.
|
|
299
|
+
path_text = path.name
|
|
300
|
+
if path_text.startswith("/") or "[#" in path_text:
|
|
301
|
+
return path
|
|
302
|
+
|
|
303
|
+
return super().to_json_path(path)
|
|
304
|
+
|
|
305
|
+
class Tokenizer(tokens.Tokenizer):
|
|
306
|
+
BYTE_STRINGS = [("e'", "'"), ("E'", "'")]
|
|
307
|
+
HEREDOC_STRINGS = ["$"]
|
|
308
|
+
|
|
309
|
+
HEREDOC_TAG_IS_IDENTIFIER = True
|
|
310
|
+
HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER
|
|
311
|
+
|
|
312
|
+
KEYWORDS = {
|
|
313
|
+
**tokens.Tokenizer.KEYWORDS,
|
|
314
|
+
"//": TokenType.DIV,
|
|
315
|
+
"**": TokenType.DSTAR,
|
|
316
|
+
"^@": TokenType.CARET_AT,
|
|
317
|
+
"@>": TokenType.AT_GT,
|
|
318
|
+
"<@": TokenType.LT_AT,
|
|
319
|
+
"ATTACH": TokenType.ATTACH,
|
|
320
|
+
"BINARY": TokenType.VARBINARY,
|
|
321
|
+
"BITSTRING": TokenType.BIT,
|
|
322
|
+
"BPCHAR": TokenType.TEXT,
|
|
323
|
+
"CHAR": TokenType.TEXT,
|
|
324
|
+
"DATETIME": TokenType.TIMESTAMPNTZ,
|
|
325
|
+
"DETACH": TokenType.DETACH,
|
|
326
|
+
"EXCLUDE": TokenType.EXCEPT,
|
|
327
|
+
"LOGICAL": TokenType.BOOLEAN,
|
|
328
|
+
"ONLY": TokenType.ONLY,
|
|
329
|
+
"PIVOT_WIDER": TokenType.PIVOT,
|
|
330
|
+
"POSITIONAL": TokenType.POSITIONAL,
|
|
331
|
+
"SIGNED": TokenType.INT,
|
|
332
|
+
"STRING": TokenType.TEXT,
|
|
333
|
+
"SUMMARIZE": TokenType.SUMMARIZE,
|
|
334
|
+
"TIMESTAMP": TokenType.TIMESTAMPNTZ,
|
|
335
|
+
"TIMESTAMP_S": TokenType.TIMESTAMP_S,
|
|
336
|
+
"TIMESTAMP_MS": TokenType.TIMESTAMP_MS,
|
|
337
|
+
"TIMESTAMP_NS": TokenType.TIMESTAMP_NS,
|
|
338
|
+
"TIMESTAMP_US": TokenType.TIMESTAMP,
|
|
339
|
+
"UBIGINT": TokenType.UBIGINT,
|
|
340
|
+
"UINTEGER": TokenType.UINT,
|
|
341
|
+
"USMALLINT": TokenType.USMALLINT,
|
|
342
|
+
"UTINYINT": TokenType.UTINYINT,
|
|
343
|
+
"VARCHAR": TokenType.TEXT,
|
|
344
|
+
}
|
|
345
|
+
KEYWORDS.pop("/*+")
|
|
346
|
+
|
|
347
|
+
SINGLE_TOKENS = {
|
|
348
|
+
**tokens.Tokenizer.SINGLE_TOKENS,
|
|
349
|
+
"$": TokenType.PARAMETER,
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
|
|
353
|
+
|
|
354
|
+
class Parser(parser.Parser):
|
|
355
|
+
BITWISE = {
|
|
356
|
+
**parser.Parser.BITWISE,
|
|
357
|
+
TokenType.TILDA: exp.RegexpLike,
|
|
358
|
+
}
|
|
359
|
+
BITWISE.pop(TokenType.CARET)
|
|
360
|
+
|
|
361
|
+
RANGE_PARSERS = {
|
|
362
|
+
**parser.Parser.RANGE_PARSERS,
|
|
363
|
+
TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps),
|
|
364
|
+
TokenType.CARET_AT: binary_range_parser(exp.StartsWith),
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
EXPONENT = {
|
|
368
|
+
**parser.Parser.EXPONENT,
|
|
369
|
+
TokenType.CARET: exp.Pow,
|
|
370
|
+
TokenType.DSTAR: exp.Pow,
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"}
|
|
374
|
+
|
|
375
|
+
SHOW_PARSERS = {
|
|
376
|
+
"TABLES": _show_parser("TABLES"),
|
|
377
|
+
"ALL TABLES": _show_parser("ALL TABLES"),
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
FUNCTIONS = {
|
|
381
|
+
**parser.Parser.FUNCTIONS,
|
|
382
|
+
"ARRAY_REVERSE_SORT": _build_sort_array_desc,
|
|
383
|
+
"ARRAY_SORT": exp.SortArray.from_arg_list,
|
|
384
|
+
"DATEDIFF": _build_date_diff,
|
|
385
|
+
"DATE_DIFF": _build_date_diff,
|
|
386
|
+
"DATE_TRUNC": date_trunc_to_time,
|
|
387
|
+
"DATETRUNC": date_trunc_to_time,
|
|
388
|
+
"DECODE": lambda args: exp.Decode(
|
|
389
|
+
this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
|
|
390
|
+
),
|
|
391
|
+
"EDITDIST3": exp.Levenshtein.from_arg_list,
|
|
392
|
+
"ENCODE": lambda args: exp.Encode(
|
|
393
|
+
this=seq_get(args, 0), charset=exp.Literal.string("utf-8")
|
|
394
|
+
),
|
|
395
|
+
"EPOCH": exp.TimeToUnix.from_arg_list,
|
|
396
|
+
"EPOCH_MS": lambda args: exp.UnixToTime(
|
|
397
|
+
this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
|
|
398
|
+
),
|
|
399
|
+
"GENERATE_SERIES": _build_generate_series(),
|
|
400
|
+
"JSON": exp.ParseJSON.from_arg_list,
|
|
401
|
+
"JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract),
|
|
402
|
+
"JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar),
|
|
403
|
+
"LIST_HAS": exp.ArrayContains.from_arg_list,
|
|
404
|
+
"LIST_REVERSE_SORT": _build_sort_array_desc,
|
|
405
|
+
"LIST_SORT": exp.SortArray.from_arg_list,
|
|
406
|
+
"LIST_VALUE": lambda args: exp.Array(expressions=args),
|
|
407
|
+
"MAKE_TIME": exp.TimeFromParts.from_arg_list,
|
|
408
|
+
"MAKE_TIMESTAMP": _build_make_timestamp,
|
|
409
|
+
"QUANTILE_CONT": exp.PercentileCont.from_arg_list,
|
|
410
|
+
"QUANTILE_DISC": exp.PercentileDisc.from_arg_list,
|
|
411
|
+
"RANGE": _build_generate_series(end_exclusive=True),
|
|
412
|
+
"REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract),
|
|
413
|
+
"REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll),
|
|
414
|
+
"REGEXP_MATCHES": exp.RegexpLike.from_arg_list,
|
|
415
|
+
"REGEXP_REPLACE": lambda args: exp.RegexpReplace(
|
|
416
|
+
this=seq_get(args, 0),
|
|
417
|
+
expression=seq_get(args, 1),
|
|
418
|
+
replacement=seq_get(args, 2),
|
|
419
|
+
modifiers=seq_get(args, 3),
|
|
420
|
+
),
|
|
421
|
+
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
|
|
422
|
+
"STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"),
|
|
423
|
+
"STRING_SPLIT": exp.Split.from_arg_list,
|
|
424
|
+
"STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list,
|
|
425
|
+
"STRING_TO_ARRAY": exp.Split.from_arg_list,
|
|
426
|
+
"STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"),
|
|
427
|
+
"STRUCT_PACK": exp.Struct.from_arg_list,
|
|
428
|
+
"STR_SPLIT": exp.Split.from_arg_list,
|
|
429
|
+
"STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list,
|
|
430
|
+
"TIME_BUCKET": exp.DateBin.from_arg_list,
|
|
431
|
+
"TO_TIMESTAMP": exp.UnixToTime.from_arg_list,
|
|
432
|
+
"UNNEST": exp.Explode.from_arg_list,
|
|
433
|
+
"XOR": binary_from_function(exp.BitwiseXor),
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
FUNCTIONS.pop("DATE_SUB")
|
|
437
|
+
FUNCTIONS.pop("GLOB")
|
|
438
|
+
|
|
439
|
+
FUNCTION_PARSERS = {
|
|
440
|
+
**parser.Parser.FUNCTION_PARSERS,
|
|
441
|
+
**dict.fromkeys(
|
|
442
|
+
("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg()
|
|
443
|
+
),
|
|
444
|
+
}
|
|
445
|
+
FUNCTION_PARSERS.pop("DECODE")
|
|
446
|
+
|
|
447
|
+
NO_PAREN_FUNCTION_PARSERS = {
|
|
448
|
+
**parser.Parser.NO_PAREN_FUNCTION_PARSERS,
|
|
449
|
+
"MAP": lambda self: self._parse_map(),
|
|
450
|
+
"@": lambda self: exp.Abs(this=self._parse_bitwise()),
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {
|
|
454
|
+
TokenType.SEMI,
|
|
455
|
+
TokenType.ANTI,
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
PLACEHOLDER_PARSERS = {
|
|
459
|
+
**parser.Parser.PLACEHOLDER_PARSERS,
|
|
460
|
+
TokenType.PARAMETER: lambda self: (
|
|
461
|
+
self.expression(exp.Placeholder, this=self._prev.text)
|
|
462
|
+
if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS)
|
|
463
|
+
else None
|
|
464
|
+
),
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
TYPE_CONVERTERS = {
|
|
468
|
+
# https://duckdb.org/docs/sql/data_types/numeric
|
|
469
|
+
exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3),
|
|
470
|
+
# https://duckdb.org/docs/sql/data_types/text
|
|
471
|
+
exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"),
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
STATEMENT_PARSERS = {
|
|
475
|
+
**parser.Parser.STATEMENT_PARSERS,
|
|
476
|
+
TokenType.ATTACH: lambda self: self._parse_attach_detach(),
|
|
477
|
+
TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False),
|
|
478
|
+
TokenType.SHOW: lambda self: self._parse_show(),
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
def _parse_expression(self) -> t.Optional[exp.Expression]:
|
|
482
|
+
# DuckDB supports prefix aliases, e.g. foo: 1
|
|
483
|
+
if self._next and self._next.token_type == TokenType.COLON:
|
|
484
|
+
alias = self._parse_id_var(tokens=self.ALIAS_TOKENS)
|
|
485
|
+
self._match(TokenType.COLON)
|
|
486
|
+
comments = self._prev_comments or []
|
|
487
|
+
|
|
488
|
+
this = self._parse_assignment()
|
|
489
|
+
if isinstance(this, exp.Expression):
|
|
490
|
+
# Moves the comment next to the alias in `alias: expr /* comment */`
|
|
491
|
+
comments += this.pop_comments() or []
|
|
492
|
+
|
|
493
|
+
return self.expression(exp.Alias, comments=comments, this=this, alias=alias)
|
|
494
|
+
|
|
495
|
+
return super()._parse_expression()
|
|
496
|
+
|
|
497
|
+
def _parse_table(
|
|
498
|
+
self,
|
|
499
|
+
schema: bool = False,
|
|
500
|
+
joins: bool = False,
|
|
501
|
+
alias_tokens: t.Optional[t.Collection[TokenType]] = None,
|
|
502
|
+
parse_bracket: bool = False,
|
|
503
|
+
is_db_reference: bool = False,
|
|
504
|
+
parse_partition: bool = False,
|
|
505
|
+
) -> t.Optional[exp.Expression]:
|
|
506
|
+
# DuckDB supports prefix aliases, e.g. FROM foo: bar
|
|
507
|
+
if self._next and self._next.token_type == TokenType.COLON:
|
|
508
|
+
alias = self._parse_table_alias(
|
|
509
|
+
alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
|
|
510
|
+
)
|
|
511
|
+
self._match(TokenType.COLON)
|
|
512
|
+
comments = self._prev_comments or []
|
|
513
|
+
else:
|
|
514
|
+
alias = None
|
|
515
|
+
comments = []
|
|
516
|
+
|
|
517
|
+
table = super()._parse_table(
|
|
518
|
+
schema=schema,
|
|
519
|
+
joins=joins,
|
|
520
|
+
alias_tokens=alias_tokens,
|
|
521
|
+
parse_bracket=parse_bracket,
|
|
522
|
+
is_db_reference=is_db_reference,
|
|
523
|
+
parse_partition=parse_partition,
|
|
524
|
+
)
|
|
525
|
+
if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias):
|
|
526
|
+
# Moves the comment next to the alias in `alias: table /* comment */`
|
|
527
|
+
comments += table.pop_comments() or []
|
|
528
|
+
alias.comments = alias.pop_comments() + comments
|
|
529
|
+
table.set("alias", alias)
|
|
530
|
+
|
|
531
|
+
return table
|
|
532
|
+
|
|
533
|
+
def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]:
|
|
534
|
+
# https://duckdb.org/docs/sql/samples.html
|
|
535
|
+
sample = super()._parse_table_sample(as_modifier=as_modifier)
|
|
536
|
+
if sample and not sample.args.get("method"):
|
|
537
|
+
if sample.args.get("size"):
|
|
538
|
+
sample.set("method", exp.var("RESERVOIR"))
|
|
539
|
+
else:
|
|
540
|
+
sample.set("method", exp.var("SYSTEM"))
|
|
541
|
+
|
|
542
|
+
return sample
|
|
543
|
+
|
|
544
|
+
def _parse_bracket(
|
|
545
|
+
self, this: t.Optional[exp.Expression] = None
|
|
546
|
+
) -> t.Optional[exp.Expression]:
|
|
547
|
+
bracket = super()._parse_bracket(this)
|
|
548
|
+
|
|
549
|
+
if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket):
|
|
550
|
+
# https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
|
|
551
|
+
bracket.set("returns_list_for_maps", True)
|
|
552
|
+
|
|
553
|
+
return bracket
|
|
554
|
+
|
|
555
|
+
def _parse_map(self) -> exp.ToMap | exp.Map:
|
|
556
|
+
if self._match(TokenType.L_BRACE, advance=False):
|
|
557
|
+
return self.expression(exp.ToMap, this=self._parse_bracket())
|
|
558
|
+
|
|
559
|
+
args = self._parse_wrapped_csv(self._parse_assignment)
|
|
560
|
+
return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1))
|
|
561
|
+
|
|
562
|
+
def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]:
|
|
563
|
+
return self._parse_field_def()
|
|
564
|
+
|
|
565
|
+
def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
|
|
566
|
+
if len(aggregations) == 1:
|
|
567
|
+
return super()._pivot_column_names(aggregations)
|
|
568
|
+
return pivot_column_names(aggregations, dialect="duckdb")
|
|
569
|
+
|
|
570
|
+
def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach:
|
|
571
|
+
def _parse_attach_option() -> exp.AttachOption:
|
|
572
|
+
return self.expression(
|
|
573
|
+
exp.AttachOption,
|
|
574
|
+
this=self._parse_var(any_token=True),
|
|
575
|
+
expression=self._parse_field(any_token=True),
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
self._match(TokenType.DATABASE)
|
|
579
|
+
exists = self._parse_exists(not_=is_attach)
|
|
580
|
+
this = self._parse_alias(self._parse_primary_or_var(), explicit=True)
|
|
581
|
+
|
|
582
|
+
if self._match(TokenType.L_PAREN, advance=False):
|
|
583
|
+
expressions = self._parse_wrapped_csv(_parse_attach_option)
|
|
584
|
+
else:
|
|
585
|
+
expressions = None
|
|
586
|
+
|
|
587
|
+
return (
|
|
588
|
+
self.expression(exp.Attach, this=this, exists=exists, expressions=expressions)
|
|
589
|
+
if is_attach
|
|
590
|
+
else self.expression(exp.Detach, this=this, exists=exists)
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
def _parse_show_duckdb(self, this: str) -> exp.Show:
|
|
594
|
+
return self.expression(exp.Show, this=this)
|
|
595
|
+
|
|
596
|
+
class Generator(generator.Generator):
|
|
597
|
+
PARAMETER_TOKEN = "$"
|
|
598
|
+
NAMED_PLACEHOLDER_TOKEN = "$"
|
|
599
|
+
JOIN_HINTS = False
|
|
600
|
+
TABLE_HINTS = False
|
|
601
|
+
QUERY_HINTS = False
|
|
602
|
+
LIMIT_FETCH = "LIMIT"
|
|
603
|
+
STRUCT_DELIMITER = ("(", ")")
|
|
604
|
+
RENAME_TABLE_WITH_DB = False
|
|
605
|
+
NVL2_SUPPORTED = False
|
|
606
|
+
SEMI_ANTI_JOIN_WITH_SIDE = False
|
|
607
|
+
TABLESAMPLE_KEYWORDS = "USING SAMPLE"
|
|
608
|
+
TABLESAMPLE_SEED_KEYWORD = "REPEATABLE"
|
|
609
|
+
LAST_DAY_SUPPORTS_DATE_PART = False
|
|
610
|
+
JSON_KEY_VALUE_PAIR_SEP = ","
|
|
611
|
+
IGNORE_NULLS_IN_FUNC = True
|
|
612
|
+
JSON_PATH_BRACKETED_KEY_SUPPORTED = False
|
|
613
|
+
SUPPORTS_CREATE_TABLE_LIKE = False
|
|
614
|
+
MULTI_ARG_DISTINCT = False
|
|
615
|
+
CAN_IMPLEMENT_ARRAY_ANY = True
|
|
616
|
+
SUPPORTS_TO_NUMBER = False
|
|
617
|
+
SUPPORTS_WINDOW_EXCLUDE = True
|
|
618
|
+
COPY_HAS_INTO_KEYWORD = False
|
|
619
|
+
STAR_EXCEPT = "EXCLUDE"
|
|
620
|
+
PAD_FILL_PATTERN_IS_REQUIRED = True
|
|
621
|
+
ARRAY_CONCAT_IS_VAR_LEN = False
|
|
622
|
+
ARRAY_SIZE_DIM_REQUIRED = False
|
|
623
|
+
|
|
624
|
+
TRANSFORMS = {
|
|
625
|
+
**generator.Generator.TRANSFORMS,
|
|
626
|
+
exp.ApproxDistinct: approx_count_distinct_sql,
|
|
627
|
+
exp.Array: inline_array_unless_query,
|
|
628
|
+
exp.ArrayFilter: rename_func("LIST_FILTER"),
|
|
629
|
+
exp.ArrayRemove: remove_from_array_using_filter,
|
|
630
|
+
exp.ArraySort: _array_sort_sql,
|
|
631
|
+
exp.ArraySum: rename_func("LIST_SUM"),
|
|
632
|
+
exp.BitwiseXor: rename_func("XOR"),
|
|
633
|
+
exp.CommentColumnConstraint: no_comment_column_constraint_sql,
|
|
634
|
+
exp.CurrentDate: lambda *_: "CURRENT_DATE",
|
|
635
|
+
exp.CurrentTime: lambda *_: "CURRENT_TIME",
|
|
636
|
+
exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
|
|
637
|
+
exp.DayOfMonth: rename_func("DAYOFMONTH"),
|
|
638
|
+
exp.DayOfWeek: rename_func("DAYOFWEEK"),
|
|
639
|
+
exp.DayOfWeekIso: rename_func("ISODOW"),
|
|
640
|
+
exp.DayOfYear: rename_func("DAYOFYEAR"),
|
|
641
|
+
exp.DataType: _datatype_sql,
|
|
642
|
+
exp.Date: _date_sql,
|
|
643
|
+
exp.DateAdd: _date_delta_sql,
|
|
644
|
+
exp.DateFromParts: rename_func("MAKE_DATE"),
|
|
645
|
+
exp.DateSub: _date_delta_sql,
|
|
646
|
+
exp.DateDiff: _date_diff_sql,
|
|
647
|
+
exp.DateStrToDate: datestrtodate_sql,
|
|
648
|
+
exp.Datetime: no_datetime_sql,
|
|
649
|
+
exp.DatetimeSub: _date_delta_sql,
|
|
650
|
+
exp.DatetimeAdd: _date_delta_sql,
|
|
651
|
+
exp.DateToDi: lambda self,
|
|
652
|
+
e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
|
|
653
|
+
exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
|
|
654
|
+
exp.DiToDate: lambda self,
|
|
655
|
+
e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)",
|
|
656
|
+
exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
|
|
657
|
+
exp.GenerateDateArray: _generate_datetime_array_sql,
|
|
658
|
+
exp.GenerateTimestampArray: _generate_datetime_array_sql,
|
|
659
|
+
exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),
|
|
660
|
+
exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"),
|
|
661
|
+
exp.Explode: rename_func("UNNEST"),
|
|
662
|
+
exp.IntDiv: lambda self, e: self.binary(e, "//"),
|
|
663
|
+
exp.IsInf: rename_func("ISINF"),
|
|
664
|
+
exp.IsNan: rename_func("ISNAN"),
|
|
665
|
+
exp.JSONBExists: rename_func("JSON_EXISTS"),
|
|
666
|
+
exp.JSONExtract: _arrow_json_extract_sql,
|
|
667
|
+
exp.JSONExtractArray: _json_extract_value_array_sql,
|
|
668
|
+
exp.JSONExtractScalar: _arrow_json_extract_sql,
|
|
669
|
+
exp.JSONFormat: _json_format_sql,
|
|
670
|
+
exp.JSONValueArray: _json_extract_value_array_sql,
|
|
671
|
+
exp.Lateral: explode_to_unnest_sql,
|
|
672
|
+
exp.LogicalOr: rename_func("BOOL_OR"),
|
|
673
|
+
exp.LogicalAnd: rename_func("BOOL_AND"),
|
|
674
|
+
exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
|
|
675
|
+
exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
|
|
676
|
+
exp.MonthsBetween: lambda self, e: self.func(
|
|
677
|
+
"DATEDIFF",
|
|
678
|
+
"'month'",
|
|
679
|
+
exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True),
|
|
680
|
+
exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True),
|
|
681
|
+
),
|
|
682
|
+
exp.PercentileCont: rename_func("QUANTILE_CONT"),
|
|
683
|
+
exp.PercentileDisc: rename_func("QUANTILE_DISC"),
|
|
684
|
+
# DuckDB doesn't allow qualified columns inside of PIVOT expressions.
|
|
685
|
+
# See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
|
|
686
|
+
exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
|
|
687
|
+
exp.RegexpReplace: lambda self, e: self.func(
|
|
688
|
+
"REGEXP_REPLACE",
|
|
689
|
+
e.this,
|
|
690
|
+
e.expression,
|
|
691
|
+
e.args.get("replacement"),
|
|
692
|
+
e.args.get("modifiers"),
|
|
693
|
+
),
|
|
694
|
+
exp.RegexpLike: rename_func("REGEXP_MATCHES"),
|
|
695
|
+
exp.RegexpILike: lambda self, e: self.func(
|
|
696
|
+
"REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
|
|
697
|
+
),
|
|
698
|
+
exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
|
|
699
|
+
exp.Return: lambda self, e: self.sql(e, "this"),
|
|
700
|
+
exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
|
|
701
|
+
exp.Rand: rename_func("RANDOM"),
|
|
702
|
+
exp.SHA: rename_func("SHA1"),
|
|
703
|
+
exp.SHA2: sha256_sql,
|
|
704
|
+
exp.Split: rename_func("STR_SPLIT"),
|
|
705
|
+
exp.SortArray: _sort_array_sql,
|
|
706
|
+
exp.StrPosition: strposition_sql,
|
|
707
|
+
exp.StrToUnix: lambda self, e: self.func(
|
|
708
|
+
"EPOCH", self.func("STRPTIME", e.this, self.format_time(e))
|
|
709
|
+
),
|
|
710
|
+
exp.Struct: _struct_sql,
|
|
711
|
+
exp.Transform: rename_func("LIST_TRANSFORM"),
|
|
712
|
+
exp.TimeAdd: _date_delta_sql,
|
|
713
|
+
exp.Time: no_time_sql,
|
|
714
|
+
exp.TimeDiff: _timediff_sql,
|
|
715
|
+
exp.Timestamp: no_timestamp_sql,
|
|
716
|
+
exp.TimestampDiff: lambda self, e: self.func(
|
|
717
|
+
"DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
|
|
718
|
+
),
|
|
719
|
+
exp.TimestampTrunc: timestamptrunc_sql(),
|
|
720
|
+
exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
|
|
721
|
+
exp.TimeStrToTime: timestrtotime_sql,
|
|
722
|
+
exp.TimeStrToUnix: lambda self, e: self.func(
|
|
723
|
+
"EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP)
|
|
724
|
+
),
|
|
725
|
+
exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
|
|
726
|
+
exp.TimeToUnix: rename_func("EPOCH"),
|
|
727
|
+
exp.TsOrDiToDi: lambda self,
|
|
728
|
+
e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
|
|
729
|
+
exp.TsOrDsAdd: _date_delta_sql,
|
|
730
|
+
exp.TsOrDsDiff: lambda self, e: self.func(
|
|
731
|
+
"DATE_DIFF",
|
|
732
|
+
f"'{e.args.get('unit') or 'DAY'}'",
|
|
733
|
+
exp.cast(e.expression, exp.DataType.Type.TIMESTAMP),
|
|
734
|
+
exp.cast(e.this, exp.DataType.Type.TIMESTAMP),
|
|
735
|
+
),
|
|
736
|
+
exp.UnixToStr: lambda self, e: self.func(
|
|
737
|
+
"STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
|
|
738
|
+
),
|
|
739
|
+
exp.DatetimeTrunc: lambda self, e: self.func(
|
|
740
|
+
"DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME)
|
|
741
|
+
),
|
|
742
|
+
exp.UnixToTime: _unix_to_time_sql,
|
|
743
|
+
exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
|
|
744
|
+
exp.VariancePop: rename_func("VAR_POP"),
|
|
745
|
+
exp.WeekOfYear: rename_func("WEEKOFYEAR"),
|
|
746
|
+
exp.Xor: bool_xor_sql,
|
|
747
|
+
exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
|
|
748
|
+
rename_func("LEVENSHTEIN")
|
|
749
|
+
),
|
|
750
|
+
exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
|
|
751
|
+
exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
|
|
752
|
+
exp.DateBin: rename_func("TIME_BUCKET"),
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
SUPPORTED_JSON_PATH_PARTS = {
|
|
756
|
+
exp.JSONPathKey,
|
|
757
|
+
exp.JSONPathRoot,
|
|
758
|
+
exp.JSONPathSubscript,
|
|
759
|
+
exp.JSONPathWildcard,
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
TYPE_MAPPING = {
|
|
763
|
+
**generator.Generator.TYPE_MAPPING,
|
|
764
|
+
exp.DataType.Type.BINARY: "BLOB",
|
|
765
|
+
exp.DataType.Type.BPCHAR: "TEXT",
|
|
766
|
+
exp.DataType.Type.CHAR: "TEXT",
|
|
767
|
+
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
|
768
|
+
exp.DataType.Type.FLOAT: "REAL",
|
|
769
|
+
exp.DataType.Type.JSONB: "JSON",
|
|
770
|
+
exp.DataType.Type.NCHAR: "TEXT",
|
|
771
|
+
exp.DataType.Type.NVARCHAR: "TEXT",
|
|
772
|
+
exp.DataType.Type.UINT: "UINTEGER",
|
|
773
|
+
exp.DataType.Type.VARBINARY: "BLOB",
|
|
774
|
+
exp.DataType.Type.ROWVERSION: "BLOB",
|
|
775
|
+
exp.DataType.Type.VARCHAR: "TEXT",
|
|
776
|
+
exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP",
|
|
777
|
+
exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S",
|
|
778
|
+
exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS",
|
|
779
|
+
exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS",
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
# https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
|
|
783
|
+
RESERVED_KEYWORDS = {
|
|
784
|
+
"array",
|
|
785
|
+
"analyse",
|
|
786
|
+
"union",
|
|
787
|
+
"all",
|
|
788
|
+
"when",
|
|
789
|
+
"in_p",
|
|
790
|
+
"default",
|
|
791
|
+
"create_p",
|
|
792
|
+
"window",
|
|
793
|
+
"asymmetric",
|
|
794
|
+
"to",
|
|
795
|
+
"else",
|
|
796
|
+
"localtime",
|
|
797
|
+
"from",
|
|
798
|
+
"end_p",
|
|
799
|
+
"select",
|
|
800
|
+
"current_date",
|
|
801
|
+
"foreign",
|
|
802
|
+
"with",
|
|
803
|
+
"grant",
|
|
804
|
+
"session_user",
|
|
805
|
+
"or",
|
|
806
|
+
"except",
|
|
807
|
+
"references",
|
|
808
|
+
"fetch",
|
|
809
|
+
"limit",
|
|
810
|
+
"group_p",
|
|
811
|
+
"leading",
|
|
812
|
+
"into",
|
|
813
|
+
"collate",
|
|
814
|
+
"offset",
|
|
815
|
+
"do",
|
|
816
|
+
"then",
|
|
817
|
+
"localtimestamp",
|
|
818
|
+
"check_p",
|
|
819
|
+
"lateral_p",
|
|
820
|
+
"current_role",
|
|
821
|
+
"where",
|
|
822
|
+
"asc_p",
|
|
823
|
+
"placing",
|
|
824
|
+
"desc_p",
|
|
825
|
+
"user",
|
|
826
|
+
"unique",
|
|
827
|
+
"initially",
|
|
828
|
+
"column",
|
|
829
|
+
"both",
|
|
830
|
+
"some",
|
|
831
|
+
"as",
|
|
832
|
+
"any",
|
|
833
|
+
"only",
|
|
834
|
+
"deferrable",
|
|
835
|
+
"null_p",
|
|
836
|
+
"current_time",
|
|
837
|
+
"true_p",
|
|
838
|
+
"table",
|
|
839
|
+
"case",
|
|
840
|
+
"trailing",
|
|
841
|
+
"variadic",
|
|
842
|
+
"for",
|
|
843
|
+
"on",
|
|
844
|
+
"distinct",
|
|
845
|
+
"false_p",
|
|
846
|
+
"not",
|
|
847
|
+
"constraint",
|
|
848
|
+
"current_timestamp",
|
|
849
|
+
"returning",
|
|
850
|
+
"primary",
|
|
851
|
+
"intersect",
|
|
852
|
+
"having",
|
|
853
|
+
"analyze",
|
|
854
|
+
"current_user",
|
|
855
|
+
"and",
|
|
856
|
+
"cast",
|
|
857
|
+
"symmetric",
|
|
858
|
+
"using",
|
|
859
|
+
"order",
|
|
860
|
+
"current_catalog",
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren)
|
|
864
|
+
|
|
865
|
+
# DuckDB doesn't generally support CREATE TABLE .. properties
|
|
866
|
+
# https://duckdb.org/docs/sql/statements/create_table.html
|
|
867
|
+
PROPERTIES_LOCATION = {
|
|
868
|
+
prop: exp.Properties.Location.UNSUPPORTED
|
|
869
|
+
for prop in generator.Generator.PROPERTIES_LOCATION
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
# There are a few exceptions (e.g. temporary tables) which are supported or
|
|
873
|
+
# can be transpiled to DuckDB, so we explicitly override them accordingly
|
|
874
|
+
PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA
|
|
875
|
+
PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE
|
|
876
|
+
PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS
|
|
877
|
+
|
|
878
|
+
IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = (
|
|
879
|
+
exp.FirstValue,
|
|
880
|
+
exp.Lag,
|
|
881
|
+
exp.LastValue,
|
|
882
|
+
exp.Lead,
|
|
883
|
+
exp.NthValue,
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
def show_sql(self, expression: exp.Show) -> str:
|
|
887
|
+
return f"SHOW {expression.name}"
|
|
888
|
+
|
|
889
|
+
def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
|
|
890
|
+
return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ))
|
|
891
|
+
|
|
892
|
+
def strtotime_sql(self, expression: exp.StrToTime) -> str:
|
|
893
|
+
if expression.args.get("safe"):
|
|
894
|
+
formatted_time = self.format_time(expression)
|
|
895
|
+
return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)"
|
|
896
|
+
return str_to_time_sql(self, expression)
|
|
897
|
+
|
|
898
|
+
def strtodate_sql(self, expression: exp.StrToDate) -> str:
|
|
899
|
+
if expression.args.get("safe"):
|
|
900
|
+
formatted_time = self.format_time(expression)
|
|
901
|
+
return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)"
|
|
902
|
+
return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
|
|
903
|
+
|
|
904
|
+
def parsejson_sql(self, expression: exp.ParseJSON) -> str:
|
|
905
|
+
arg = expression.this
|
|
906
|
+
if expression.args.get("safe"):
|
|
907
|
+
return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null()))
|
|
908
|
+
return self.func("JSON", arg)
|
|
909
|
+
|
|
910
|
+
def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
|
|
911
|
+
nano = expression.args.get("nano")
|
|
912
|
+
if nano is not None:
|
|
913
|
+
expression.set(
|
|
914
|
+
"sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
return rename_func("MAKE_TIME")(self, expression)
|
|
918
|
+
|
|
919
|
+
def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
|
|
920
|
+
sec = expression.args["sec"]
|
|
921
|
+
|
|
922
|
+
milli = expression.args.get("milli")
|
|
923
|
+
if milli is not None:
|
|
924
|
+
sec += milli.pop() / exp.Literal.number(1000.0)
|
|
925
|
+
|
|
926
|
+
nano = expression.args.get("nano")
|
|
927
|
+
if nano is not None:
|
|
928
|
+
sec += nano.pop() / exp.Literal.number(1000000000.0)
|
|
929
|
+
|
|
930
|
+
if milli or nano:
|
|
931
|
+
expression.set("sec", sec)
|
|
932
|
+
|
|
933
|
+
return rename_func("MAKE_TIMESTAMP")(self, expression)
|
|
934
|
+
|
|
935
|
+
def tablesample_sql(
|
|
936
|
+
self,
|
|
937
|
+
expression: exp.TableSample,
|
|
938
|
+
tablesample_keyword: t.Optional[str] = None,
|
|
939
|
+
) -> str:
|
|
940
|
+
if not isinstance(expression.parent, exp.Select):
|
|
941
|
+
# This sample clause only applies to a single source, not the entire resulting relation
|
|
942
|
+
tablesample_keyword = "TABLESAMPLE"
|
|
943
|
+
|
|
944
|
+
if expression.args.get("size"):
|
|
945
|
+
method = expression.args.get("method")
|
|
946
|
+
if method and method.name.upper() != "RESERVOIR":
|
|
947
|
+
self.unsupported(
|
|
948
|
+
f"Sampling method {method} is not supported with a discrete sample count, "
|
|
949
|
+
"defaulting to reservoir sampling"
|
|
950
|
+
)
|
|
951
|
+
expression.set("method", exp.var("RESERVOIR"))
|
|
952
|
+
|
|
953
|
+
return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
|
|
954
|
+
|
|
955
|
+
def interval_sql(self, expression: exp.Interval) -> str:
|
|
956
|
+
multiplier: t.Optional[int] = None
|
|
957
|
+
unit = expression.text("unit").lower()
|
|
958
|
+
|
|
959
|
+
if unit.startswith("week"):
|
|
960
|
+
multiplier = 7
|
|
961
|
+
if unit.startswith("quarter"):
|
|
962
|
+
multiplier = 90
|
|
963
|
+
|
|
964
|
+
if multiplier:
|
|
965
|
+
return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})"
|
|
966
|
+
|
|
967
|
+
return super().interval_sql(expression)
|
|
968
|
+
|
|
969
|
+
def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
|
|
970
|
+
if isinstance(expression.parent, exp.UserDefinedFunction):
|
|
971
|
+
return self.sql(expression, "this")
|
|
972
|
+
return super().columndef_sql(expression, sep)
|
|
973
|
+
|
|
974
|
+
def join_sql(self, expression: exp.Join) -> str:
|
|
975
|
+
if (
|
|
976
|
+
expression.side == "LEFT"
|
|
977
|
+
and not expression.args.get("on")
|
|
978
|
+
and isinstance(expression.this, exp.Unnest)
|
|
979
|
+
):
|
|
980
|
+
# Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause
|
|
981
|
+
# DuckDB doesn't, but we can just add a dummy ON clause that is always true
|
|
982
|
+
return super().join_sql(expression.on(exp.true()))
|
|
983
|
+
|
|
984
|
+
return super().join_sql(expression)
|
|
985
|
+
|
|
986
|
+
def generateseries_sql(self, expression: exp.GenerateSeries) -> str:
|
|
987
|
+
# GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b)
|
|
988
|
+
if expression.args.get("is_end_exclusive"):
|
|
989
|
+
return rename_func("RANGE")(self, expression)
|
|
990
|
+
|
|
991
|
+
return self.function_fallback_sql(expression)
|
|
992
|
+
|
|
993
|
+
def countif_sql(self, expression: exp.CountIf) -> str:
|
|
994
|
+
if self.dialect.version >= Version("1.2"):
|
|
995
|
+
return self.function_fallback_sql(expression)
|
|
996
|
+
|
|
997
|
+
# https://github.com/tobymao/sqlglot/pull/4749
|
|
998
|
+
return count_if_to_sum(self, expression)
|
|
999
|
+
|
|
1000
|
+
def bracket_sql(self, expression: exp.Bracket) -> str:
|
|
1001
|
+
if self.dialect.version >= Version("1.2"):
|
|
1002
|
+
return super().bracket_sql(expression)
|
|
1003
|
+
|
|
1004
|
+
# https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
|
|
1005
|
+
this = expression.this
|
|
1006
|
+
if isinstance(this, exp.Array):
|
|
1007
|
+
this.replace(exp.paren(this))
|
|
1008
|
+
|
|
1009
|
+
bracket = super().bracket_sql(expression)
|
|
1010
|
+
|
|
1011
|
+
if not expression.args.get("returns_list_for_maps"):
|
|
1012
|
+
if not this.type:
|
|
1013
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
1014
|
+
|
|
1015
|
+
this = annotate_types(this, dialect=self.dialect)
|
|
1016
|
+
|
|
1017
|
+
if this.is_type(exp.DataType.Type.MAP):
|
|
1018
|
+
bracket = f"({bracket})[1]"
|
|
1019
|
+
|
|
1020
|
+
return bracket
|
|
1021
|
+
|
|
1022
|
+
def withingroup_sql(self, expression: exp.WithinGroup) -> str:
|
|
1023
|
+
expression_sql = self.sql(expression, "expression")
|
|
1024
|
+
|
|
1025
|
+
func = expression.this
|
|
1026
|
+
if isinstance(func, exp.PERCENTILES):
|
|
1027
|
+
# Make the order key the first arg and slide the fraction to the right
|
|
1028
|
+
# https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
|
|
1029
|
+
order_col = expression.find(exp.Ordered)
|
|
1030
|
+
if order_col:
|
|
1031
|
+
func.set("expression", func.this)
|
|
1032
|
+
func.set("this", order_col.this)
|
|
1033
|
+
|
|
1034
|
+
this = self.sql(expression, "this").rstrip(")")
|
|
1035
|
+
|
|
1036
|
+
return f"{this}{expression_sql})"
|
|
1037
|
+
|
|
1038
|
+
def length_sql(self, expression: exp.Length) -> str:
|
|
1039
|
+
arg = expression.this
|
|
1040
|
+
|
|
1041
|
+
# Dialects like BQ and Snowflake also accept binary values as args, so
|
|
1042
|
+
# DDB will attempt to infer the type or resort to case/when resolution
|
|
1043
|
+
if not expression.args.get("binary") or arg.is_string:
|
|
1044
|
+
return self.func("LENGTH", arg)
|
|
1045
|
+
|
|
1046
|
+
if not arg.type:
|
|
1047
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
1048
|
+
|
|
1049
|
+
arg = annotate_types(arg, dialect=self.dialect)
|
|
1050
|
+
|
|
1051
|
+
if arg.is_type(*exp.DataType.TEXT_TYPES):
|
|
1052
|
+
return self.func("LENGTH", arg)
|
|
1053
|
+
|
|
1054
|
+
# We need these casts to make duckdb's static type checker happy
|
|
1055
|
+
blob = exp.cast(arg, exp.DataType.Type.VARBINARY)
|
|
1056
|
+
varchar = exp.cast(arg, exp.DataType.Type.VARCHAR)
|
|
1057
|
+
|
|
1058
|
+
case = (
|
|
1059
|
+
exp.case(self.func("TYPEOF", arg))
|
|
1060
|
+
.when("'BLOB'", self.func("OCTET_LENGTH", blob))
|
|
1061
|
+
.else_(
|
|
1062
|
+
exp.Anonymous(this="LENGTH", expressions=[varchar])
|
|
1063
|
+
) # anonymous to break length_sql recursion
|
|
1064
|
+
)
|
|
1065
|
+
|
|
1066
|
+
return self.sql(case)
|
|
1067
|
+
|
|
1068
|
+
def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
|
|
1069
|
+
this = expression.this
|
|
1070
|
+
key = expression.args.get("key")
|
|
1071
|
+
key_sql = key.name if isinstance(key, exp.Expression) else ""
|
|
1072
|
+
value_sql = self.sql(expression, "value")
|
|
1073
|
+
|
|
1074
|
+
kv_sql = f"{key_sql} := {value_sql}"
|
|
1075
|
+
|
|
1076
|
+
# If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake
|
|
1077
|
+
# then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB
|
|
1078
|
+
if isinstance(this, exp.Struct) and not this.expressions:
|
|
1079
|
+
return self.func("STRUCT_PACK", kv_sql)
|
|
1080
|
+
|
|
1081
|
+
return self.func("STRUCT_INSERT", this, kv_sql)
|
|
1082
|
+
|
|
1083
|
+
def unnest_sql(self, expression: exp.Unnest) -> str:
|
|
1084
|
+
explode_array = expression.args.get("explode_array")
|
|
1085
|
+
if explode_array:
|
|
1086
|
+
# In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct
|
|
1087
|
+
# This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))"
|
|
1088
|
+
expression.expressions.append(
|
|
1089
|
+
exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2))
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
# If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB
|
|
1093
|
+
alias = expression.args.get("alias")
|
|
1094
|
+
if alias:
|
|
1095
|
+
expression.set("alias", None)
|
|
1096
|
+
alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0))
|
|
1097
|
+
|
|
1098
|
+
unnest_sql = super().unnest_sql(expression)
|
|
1099
|
+
select = exp.Select(expressions=[unnest_sql]).subquery(alias)
|
|
1100
|
+
return self.sql(select)
|
|
1101
|
+
|
|
1102
|
+
return super().unnest_sql(expression)
|
|
1103
|
+
|
|
1104
|
+
def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
|
|
1105
|
+
if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
|
|
1106
|
+
# DuckDB should render IGNORE NULLS only for the general-purpose
|
|
1107
|
+
# window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
|
|
1108
|
+
return super().ignorenulls_sql(expression)
|
|
1109
|
+
|
|
1110
|
+
self.unsupported("IGNORE NULLS is not supported for non-window functions.")
|
|
1111
|
+
return self.sql(expression, "this")
|
|
1112
|
+
|
|
1113
|
+
def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
|
|
1114
|
+
if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
|
|
1115
|
+
# DuckDB should render RESPECT NULLS only for the general-purpose
|
|
1116
|
+
# window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...)
|
|
1117
|
+
return super().respectnulls_sql(expression)
|
|
1118
|
+
|
|
1119
|
+
self.unsupported("RESPECT NULLS is not supported for non-window functions.")
|
|
1120
|
+
return self.sql(expression, "this")
|
|
1121
|
+
|
|
1122
|
+
def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
|
|
1123
|
+
this = self.sql(expression, "this")
|
|
1124
|
+
null_text = self.sql(expression, "null")
|
|
1125
|
+
|
|
1126
|
+
if null_text:
|
|
1127
|
+
this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))"
|
|
1128
|
+
|
|
1129
|
+
return self.func("ARRAY_TO_STRING", this, expression.expression)
|
|
1130
|
+
|
|
1131
|
+
@unsupported_args("position", "occurrence")
|
|
1132
|
+
def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
|
|
1133
|
+
group = expression.args.get("group")
|
|
1134
|
+
params = expression.args.get("parameters")
|
|
1135
|
+
|
|
1136
|
+
# Do not render group if there is no following argument,
|
|
1137
|
+
# and it's the default value for this dialect
|
|
1138
|
+
if (
|
|
1139
|
+
not params
|
|
1140
|
+
and group
|
|
1141
|
+
and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
|
|
1142
|
+
):
|
|
1143
|
+
group = None
|
|
1144
|
+
return self.func(
|
|
1145
|
+
"REGEXP_EXTRACT", expression.this, expression.expression, group, params
|
|
1146
|
+
)
|
|
1147
|
+
|
|
1148
|
+
@unsupported_args("culture")
|
|
1149
|
+
def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
|
|
1150
|
+
fmt = expression.args.get("format")
|
|
1151
|
+
if fmt and fmt.is_int:
|
|
1152
|
+
return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this)
|
|
1153
|
+
|
|
1154
|
+
self.unsupported("Only integer formats are supported by NumberToStr")
|
|
1155
|
+
return self.function_fallback_sql(expression)
|
|
1156
|
+
|
|
1157
|
+
def autoincrementcolumnconstraint_sql(self, _) -> str:
|
|
1158
|
+
self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
|
|
1159
|
+
return ""
|