altimate-code 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/README.md +1 -5
- package/bin/altimate +6 -0
- package/bin/altimate-code +6 -0
- package/dbt-tools/bin/altimate-dbt +2 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
- package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
- package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
- package/dbt-tools/dist/index.js +23859 -0
- package/package.json +13 -13
- package/postinstall.mjs +42 -0
- package/skills/altimate-setup/SKILL.md +31 -0
package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py
ADDED
|
@@ -0,0 +1,1331 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
import typing as t
|
|
6
|
+
|
|
7
|
+
from sqlglot import exp, generator, parser, tokens, transforms
|
|
8
|
+
from sqlglot._typing import E
|
|
9
|
+
from sqlglot.dialects.dialect import (
|
|
10
|
+
Dialect,
|
|
11
|
+
NormalizationStrategy,
|
|
12
|
+
annotate_with_type_lambda,
|
|
13
|
+
arg_max_or_min_no_count,
|
|
14
|
+
binary_from_function,
|
|
15
|
+
date_add_interval_sql,
|
|
16
|
+
datestrtodate_sql,
|
|
17
|
+
build_formatted_time,
|
|
18
|
+
filter_array_using_unnest,
|
|
19
|
+
if_sql,
|
|
20
|
+
inline_array_unless_query,
|
|
21
|
+
max_or_greatest,
|
|
22
|
+
min_or_least,
|
|
23
|
+
no_ilike_sql,
|
|
24
|
+
build_date_delta_with_interval,
|
|
25
|
+
regexp_replace_sql,
|
|
26
|
+
rename_func,
|
|
27
|
+
sha256_sql,
|
|
28
|
+
timestrtotime_sql,
|
|
29
|
+
ts_or_ds_add_cast,
|
|
30
|
+
unit_to_var,
|
|
31
|
+
strposition_sql,
|
|
32
|
+
groupconcat_sql,
|
|
33
|
+
)
|
|
34
|
+
from sqlglot.helper import seq_get, split_num_words
|
|
35
|
+
from sqlglot.tokens import TokenType
|
|
36
|
+
from sqlglot.generator import unsupported_args
|
|
37
|
+
|
|
38
|
+
if t.TYPE_CHECKING:
|
|
39
|
+
from sqlglot._typing import Lit
|
|
40
|
+
|
|
41
|
+
from sqlglot.optimizer.annotate_types import TypeAnnotator
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger("sqlglot")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray]
|
|
47
|
+
|
|
48
|
+
DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str:
|
|
52
|
+
if not expression.find_ancestor(exp.From, exp.Join):
|
|
53
|
+
return self.values_sql(expression)
|
|
54
|
+
|
|
55
|
+
structs = []
|
|
56
|
+
alias = expression.args.get("alias")
|
|
57
|
+
for tup in expression.find_all(exp.Tuple):
|
|
58
|
+
field_aliases = (
|
|
59
|
+
alias.columns
|
|
60
|
+
if alias and alias.columns
|
|
61
|
+
else (f"_c{i}" for i in range(len(tup.expressions)))
|
|
62
|
+
)
|
|
63
|
+
expressions = [
|
|
64
|
+
exp.PropertyEQ(this=exp.to_identifier(name), expression=fld)
|
|
65
|
+
for name, fld in zip(field_aliases, tup.expressions)
|
|
66
|
+
]
|
|
67
|
+
structs.append(exp.Struct(expressions=expressions))
|
|
68
|
+
|
|
69
|
+
# Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression
|
|
70
|
+
alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None
|
|
71
|
+
return self.unnest_sql(
|
|
72
|
+
exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str:
|
|
77
|
+
this = expression.this
|
|
78
|
+
if isinstance(this, exp.Schema):
|
|
79
|
+
this = f"{self.sql(this, 'this')} <{self.expressions(this)}>"
|
|
80
|
+
else:
|
|
81
|
+
this = self.sql(this)
|
|
82
|
+
return f"RETURNS {this}"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str:
|
|
86
|
+
returns = expression.find(exp.ReturnsProperty)
|
|
87
|
+
if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"):
|
|
88
|
+
expression.set("kind", "TABLE FUNCTION")
|
|
89
|
+
|
|
90
|
+
if isinstance(expression.expression, (exp.Subquery, exp.Literal)):
|
|
91
|
+
expression.set("expression", expression.expression.this)
|
|
92
|
+
|
|
93
|
+
return self.create_sql(expression)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# https://issuetracker.google.com/issues/162294746
|
|
97
|
+
# workaround for bigquery bug when grouping by an expression and then ordering
|
|
98
|
+
# WITH x AS (SELECT 1 y)
|
|
99
|
+
# SELECT y + 1 z
|
|
100
|
+
# FROM x
|
|
101
|
+
# GROUP BY x + 1
|
|
102
|
+
# ORDER by z
|
|
103
|
+
def _alias_ordered_group(expression: exp.Expression) -> exp.Expression:
|
|
104
|
+
if isinstance(expression, exp.Select):
|
|
105
|
+
group = expression.args.get("group")
|
|
106
|
+
order = expression.args.get("order")
|
|
107
|
+
|
|
108
|
+
if group and order:
|
|
109
|
+
aliases = {
|
|
110
|
+
select.this: select.args["alias"]
|
|
111
|
+
for select in expression.selects
|
|
112
|
+
if isinstance(select, exp.Alias)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
for grouped in group.expressions:
|
|
116
|
+
if grouped.is_int:
|
|
117
|
+
continue
|
|
118
|
+
alias = aliases.get(grouped)
|
|
119
|
+
if alias:
|
|
120
|
+
grouped.replace(exp.column(alias))
|
|
121
|
+
|
|
122
|
+
return expression
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression:
|
|
126
|
+
"""BigQuery doesn't allow column names when defining a CTE, so we try to push them down."""
|
|
127
|
+
if isinstance(expression, exp.CTE) and expression.alias_column_names:
|
|
128
|
+
cte_query = expression.this
|
|
129
|
+
|
|
130
|
+
if cte_query.is_star:
|
|
131
|
+
logger.warning(
|
|
132
|
+
"Can't push down CTE column names for star queries. Run the query through"
|
|
133
|
+
" the optimizer or use 'qualify' to expand the star projections first."
|
|
134
|
+
)
|
|
135
|
+
return expression
|
|
136
|
+
|
|
137
|
+
column_names = expression.alias_column_names
|
|
138
|
+
expression.args["alias"].set("columns", None)
|
|
139
|
+
|
|
140
|
+
for name, select in zip(column_names, cte_query.selects):
|
|
141
|
+
to_replace = select
|
|
142
|
+
|
|
143
|
+
if isinstance(select, exp.Alias):
|
|
144
|
+
select = select.this
|
|
145
|
+
|
|
146
|
+
# Inner aliases are shadowed by the CTE column names
|
|
147
|
+
to_replace.replace(exp.alias_(select, name))
|
|
148
|
+
|
|
149
|
+
return expression
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _build_parse_timestamp(args: t.List) -> exp.StrToTime:
|
|
153
|
+
this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)])
|
|
154
|
+
this.set("zone", seq_get(args, 2))
|
|
155
|
+
return this
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _build_timestamp(args: t.List) -> exp.Timestamp:
|
|
159
|
+
timestamp = exp.Timestamp.from_arg_list(args)
|
|
160
|
+
timestamp.set("with_tz", True)
|
|
161
|
+
return timestamp
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _build_date(args: t.List) -> exp.Date | exp.DateFromParts:
|
|
165
|
+
expr_type = exp.DateFromParts if len(args) == 3 else exp.Date
|
|
166
|
+
return expr_type.from_arg_list(args)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5:
|
|
170
|
+
# TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation
|
|
171
|
+
arg = seq_get(args, 0)
|
|
172
|
+
return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str:
|
|
176
|
+
return self.sql(
|
|
177
|
+
exp.Exists(
|
|
178
|
+
this=exp.select("1")
|
|
179
|
+
.from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"]))
|
|
180
|
+
.where(exp.column("_col").eq(expression.right))
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str:
|
|
186
|
+
return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression))
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str:
|
|
190
|
+
expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP))
|
|
191
|
+
expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP))
|
|
192
|
+
unit = unit_to_var(expression)
|
|
193
|
+
return self.func("DATE_DIFF", expression.this, expression.expression, unit)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str:
|
|
197
|
+
scale = expression.args.get("scale")
|
|
198
|
+
timestamp = expression.this
|
|
199
|
+
|
|
200
|
+
if scale in (None, exp.UnixToTime.SECONDS):
|
|
201
|
+
return self.func("TIMESTAMP_SECONDS", timestamp)
|
|
202
|
+
if scale == exp.UnixToTime.MILLIS:
|
|
203
|
+
return self.func("TIMESTAMP_MILLIS", timestamp)
|
|
204
|
+
if scale == exp.UnixToTime.MICROS:
|
|
205
|
+
return self.func("TIMESTAMP_MICROS", timestamp)
|
|
206
|
+
|
|
207
|
+
unix_seconds = exp.cast(
|
|
208
|
+
exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT
|
|
209
|
+
)
|
|
210
|
+
return self.func("TIMESTAMP_SECONDS", unix_seconds)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _build_time(args: t.List) -> exp.Func:
|
|
214
|
+
if len(args) == 1:
|
|
215
|
+
return exp.TsOrDsToTime(this=args[0])
|
|
216
|
+
if len(args) == 2:
|
|
217
|
+
return exp.Time.from_arg_list(args)
|
|
218
|
+
return exp.TimeFromParts.from_arg_list(args)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _build_datetime(args: t.List) -> exp.Func:
|
|
222
|
+
if len(args) == 1:
|
|
223
|
+
return exp.TsOrDsToDatetime.from_arg_list(args)
|
|
224
|
+
if len(args) == 2:
|
|
225
|
+
return exp.Datetime.from_arg_list(args)
|
|
226
|
+
return exp.TimestampFromParts.from_arg_list(args)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _build_regexp_extract(
|
|
230
|
+
expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
|
|
231
|
+
) -> t.Callable[[t.List], E]:
|
|
232
|
+
def _builder(args: t.List) -> E:
|
|
233
|
+
try:
|
|
234
|
+
group = re.compile(args[1].name).groups == 1
|
|
235
|
+
except re.error:
|
|
236
|
+
group = False
|
|
237
|
+
|
|
238
|
+
# Default group is used for the transpilation of REGEXP_EXTRACT_ALL
|
|
239
|
+
return expr_type(
|
|
240
|
+
this=seq_get(args, 0),
|
|
241
|
+
expression=seq_get(args, 1),
|
|
242
|
+
position=seq_get(args, 2),
|
|
243
|
+
occurrence=seq_get(args, 3),
|
|
244
|
+
group=exp.Literal.number(1) if group else default_group,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
return _builder
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]:
|
|
251
|
+
def _builder(args: t.List, dialect: Dialect) -> E:
|
|
252
|
+
if len(args) == 1:
|
|
253
|
+
# The default value for the JSONPath is '$' i.e all of the data
|
|
254
|
+
args.append(exp.Literal.string("$"))
|
|
255
|
+
return parser.build_extract_json_with_path(expr_type)(args, dialect)
|
|
256
|
+
|
|
257
|
+
return _builder
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _str_to_datetime_sql(
|
|
261
|
+
self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime
|
|
262
|
+
) -> str:
|
|
263
|
+
this = self.sql(expression, "this")
|
|
264
|
+
dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP"
|
|
265
|
+
|
|
266
|
+
if expression.args.get("safe"):
|
|
267
|
+
fmt = self.format_time(
|
|
268
|
+
expression,
|
|
269
|
+
self.dialect.INVERSE_FORMAT_MAPPING,
|
|
270
|
+
self.dialect.INVERSE_FORMAT_TRIE,
|
|
271
|
+
)
|
|
272
|
+
return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})"
|
|
273
|
+
|
|
274
|
+
fmt = self.format_time(expression)
|
|
275
|
+
return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone"))
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
|
|
279
|
+
"""
|
|
280
|
+
Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention:
|
|
281
|
+
+---------+---------+---------+------------+---------+
|
|
282
|
+
| INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
283
|
+
+---------+---------+---------+------------+---------+
|
|
284
|
+
| OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
285
|
+
+---------+---------+---------+------------+---------+
|
|
286
|
+
"""
|
|
287
|
+
self._annotate_args(expression)
|
|
288
|
+
|
|
289
|
+
this: exp.Expression = expression.this
|
|
290
|
+
|
|
291
|
+
self._set_type(
|
|
292
|
+
expression,
|
|
293
|
+
exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type,
|
|
294
|
+
)
|
|
295
|
+
return expression
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
@unsupported_args("ins_cost", "del_cost", "sub_cost")
|
|
299
|
+
def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
|
|
300
|
+
max_dist = expression.args.get("max_dist")
|
|
301
|
+
if max_dist:
|
|
302
|
+
max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist)
|
|
303
|
+
|
|
304
|
+
return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist)
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _build_levenshtein(args: t.List) -> exp.Levenshtein:
|
|
308
|
+
max_dist = seq_get(args, 2)
|
|
309
|
+
return exp.Levenshtein(
|
|
310
|
+
this=seq_get(args, 0),
|
|
311
|
+
expression=seq_get(args, 1),
|
|
312
|
+
max_dist=max_dist.expression if max_dist else None,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]:
|
|
317
|
+
def _builder(args: t.List) -> exp.TimeToStr:
|
|
318
|
+
return exp.TimeToStr(
|
|
319
|
+
this=expr_type(this=seq_get(args, 1)),
|
|
320
|
+
format=seq_get(args, 0),
|
|
321
|
+
zone=seq_get(args, 2),
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
return _builder
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous:
|
|
328
|
+
if len(args) == 3:
|
|
329
|
+
return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args)
|
|
330
|
+
|
|
331
|
+
# Lowercase the operands in case of transpilation, as exp.Contains
|
|
332
|
+
# is case-sensitive on other dialects
|
|
333
|
+
this = exp.Lower(this=seq_get(args, 0))
|
|
334
|
+
expr = exp.Lower(this=seq_get(args, 1))
|
|
335
|
+
|
|
336
|
+
return exp.Contains(this=this, expression=expr)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str:
|
|
340
|
+
name = (expression._meta and expression.meta.get("name")) or expression.sql_name()
|
|
341
|
+
upper = name.upper()
|
|
342
|
+
|
|
343
|
+
dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS
|
|
344
|
+
|
|
345
|
+
if dquote_escaping:
|
|
346
|
+
self._quote_json_path_key_using_brackets = False
|
|
347
|
+
|
|
348
|
+
sql = rename_func(upper)(self, expression)
|
|
349
|
+
|
|
350
|
+
if dquote_escaping:
|
|
351
|
+
self._quote_json_path_key_using_brackets = True
|
|
352
|
+
|
|
353
|
+
return sql
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat:
|
|
357
|
+
annotated = self._annotate_by_args(expression, "expressions")
|
|
358
|
+
|
|
359
|
+
# Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING
|
|
360
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat
|
|
361
|
+
if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN):
|
|
362
|
+
annotated.type = exp.DataType.Type.VARCHAR
|
|
363
|
+
|
|
364
|
+
return annotated
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
class BigQuery(Dialect):
|
|
368
|
+
WEEK_OFFSET = -1
|
|
369
|
+
UNNEST_COLUMN_ONLY = True
|
|
370
|
+
SUPPORTS_USER_DEFINED_TYPES = False
|
|
371
|
+
SUPPORTS_SEMI_ANTI_JOIN = False
|
|
372
|
+
LOG_BASE_FIRST = False
|
|
373
|
+
HEX_LOWERCASE = True
|
|
374
|
+
FORCE_EARLY_ALIAS_REF_EXPANSION = True
|
|
375
|
+
PRESERVE_ORIGINAL_NAMES = True
|
|
376
|
+
HEX_STRING_IS_INTEGER_TYPE = True
|
|
377
|
+
|
|
378
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
|
|
379
|
+
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
|
|
380
|
+
|
|
381
|
+
# bigquery udfs are case sensitive
|
|
382
|
+
NORMALIZE_FUNCTIONS = False
|
|
383
|
+
|
|
384
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time
|
|
385
|
+
TIME_MAPPING = {
|
|
386
|
+
"%D": "%m/%d/%y",
|
|
387
|
+
"%E6S": "%S.%f",
|
|
388
|
+
"%e": "%-d",
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
FORMAT_MAPPING = {
|
|
392
|
+
"DD": "%d",
|
|
393
|
+
"MM": "%m",
|
|
394
|
+
"MON": "%b",
|
|
395
|
+
"MONTH": "%B",
|
|
396
|
+
"YYYY": "%Y",
|
|
397
|
+
"YY": "%y",
|
|
398
|
+
"HH": "%I",
|
|
399
|
+
"HH12": "%I",
|
|
400
|
+
"HH24": "%H",
|
|
401
|
+
"MI": "%M",
|
|
402
|
+
"SS": "%S",
|
|
403
|
+
"SSSSS": "%f",
|
|
404
|
+
"TZH": "%z",
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
# The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement
|
|
408
|
+
# https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
|
|
409
|
+
PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"}
|
|
410
|
+
|
|
411
|
+
# All set operations require either a DISTINCT or ALL specifier
|
|
412
|
+
SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None)
|
|
413
|
+
|
|
414
|
+
# BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types
|
|
415
|
+
TYPE_TO_EXPRESSIONS = {
|
|
416
|
+
**Dialect.TYPE_TO_EXPRESSIONS,
|
|
417
|
+
exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP],
|
|
418
|
+
}
|
|
419
|
+
TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP)
|
|
420
|
+
|
|
421
|
+
ANNOTATORS = {
|
|
422
|
+
**Dialect.ANNOTATORS,
|
|
423
|
+
**{
|
|
424
|
+
expr_type: annotate_with_type_lambda(data_type)
|
|
425
|
+
for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
|
|
426
|
+
for expr_type in expressions
|
|
427
|
+
},
|
|
428
|
+
**{
|
|
429
|
+
expr_type: lambda self, e: _annotate_math_functions(self, e)
|
|
430
|
+
for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round)
|
|
431
|
+
},
|
|
432
|
+
**{
|
|
433
|
+
expr_type: lambda self, e: self._annotate_by_args(e, "this")
|
|
434
|
+
for expr_type in (
|
|
435
|
+
exp.Left,
|
|
436
|
+
exp.Right,
|
|
437
|
+
exp.Lower,
|
|
438
|
+
exp.Upper,
|
|
439
|
+
exp.Pad,
|
|
440
|
+
exp.Trim,
|
|
441
|
+
exp.RegexpExtract,
|
|
442
|
+
exp.RegexpReplace,
|
|
443
|
+
exp.Repeat,
|
|
444
|
+
exp.Substring,
|
|
445
|
+
)
|
|
446
|
+
},
|
|
447
|
+
exp.Concat: _annotate_concat,
|
|
448
|
+
exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
|
|
449
|
+
exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
def normalize_identifier(self, expression: E) -> E:
|
|
453
|
+
if (
|
|
454
|
+
isinstance(expression, exp.Identifier)
|
|
455
|
+
and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
|
|
456
|
+
):
|
|
457
|
+
parent = expression.parent
|
|
458
|
+
while isinstance(parent, exp.Dot):
|
|
459
|
+
parent = parent.parent
|
|
460
|
+
|
|
461
|
+
# In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive
|
|
462
|
+
# by default. The following check uses a heuristic to detect tables based on whether
|
|
463
|
+
# they are qualified. This should generally be correct, because tables in BigQuery
|
|
464
|
+
# must be qualified with at least a dataset, unless @@dataset_id is set.
|
|
465
|
+
case_sensitive = (
|
|
466
|
+
isinstance(parent, exp.UserDefinedFunction)
|
|
467
|
+
or (
|
|
468
|
+
isinstance(parent, exp.Table)
|
|
469
|
+
and parent.db
|
|
470
|
+
and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column"))
|
|
471
|
+
)
|
|
472
|
+
or expression.meta.get("is_table")
|
|
473
|
+
)
|
|
474
|
+
if not case_sensitive:
|
|
475
|
+
expression.set("this", expression.this.lower())
|
|
476
|
+
|
|
477
|
+
return t.cast(E, expression)
|
|
478
|
+
|
|
479
|
+
return super().normalize_identifier(expression)
|
|
480
|
+
|
|
481
|
+
class Tokenizer(tokens.Tokenizer):
|
|
482
|
+
QUOTES = ["'", '"', '"""', "'''"]
|
|
483
|
+
COMMENTS = ["--", "#", ("/*", "*/")]
|
|
484
|
+
IDENTIFIERS = ["`"]
|
|
485
|
+
STRING_ESCAPES = ["\\"]
|
|
486
|
+
|
|
487
|
+
HEX_STRINGS = [("0x", ""), ("0X", "")]
|
|
488
|
+
|
|
489
|
+
BYTE_STRINGS = [
|
|
490
|
+
(prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B")
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
RAW_STRINGS = [
|
|
494
|
+
(prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R")
|
|
495
|
+
]
|
|
496
|
+
|
|
497
|
+
NESTED_COMMENTS = False
|
|
498
|
+
|
|
499
|
+
KEYWORDS = {
|
|
500
|
+
**tokens.Tokenizer.KEYWORDS,
|
|
501
|
+
"ANY TYPE": TokenType.VARIANT,
|
|
502
|
+
"BEGIN": TokenType.COMMAND,
|
|
503
|
+
"BEGIN TRANSACTION": TokenType.BEGIN,
|
|
504
|
+
"BYTEINT": TokenType.INT,
|
|
505
|
+
"BYTES": TokenType.BINARY,
|
|
506
|
+
"CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
|
|
507
|
+
"DATETIME": TokenType.TIMESTAMP,
|
|
508
|
+
"DECLARE": TokenType.COMMAND,
|
|
509
|
+
"ELSEIF": TokenType.COMMAND,
|
|
510
|
+
"EXCEPTION": TokenType.COMMAND,
|
|
511
|
+
"EXPORT": TokenType.EXPORT,
|
|
512
|
+
"FLOAT64": TokenType.DOUBLE,
|
|
513
|
+
"FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
|
|
514
|
+
"MODEL": TokenType.MODEL,
|
|
515
|
+
"NOT DETERMINISTIC": TokenType.VOLATILE,
|
|
516
|
+
"RECORD": TokenType.STRUCT,
|
|
517
|
+
"TIMESTAMP": TokenType.TIMESTAMPTZ,
|
|
518
|
+
}
|
|
519
|
+
KEYWORDS.pop("DIV")
|
|
520
|
+
KEYWORDS.pop("VALUES")
|
|
521
|
+
KEYWORDS.pop("/*+")
|
|
522
|
+
|
|
523
|
+
class Parser(parser.Parser):
|
|
524
|
+
PREFIXED_PIVOT_COLUMNS = True
|
|
525
|
+
LOG_DEFAULTS_TO_LN = True
|
|
526
|
+
SUPPORTS_IMPLICIT_UNNEST = True
|
|
527
|
+
|
|
528
|
+
# BigQuery does not allow ASC/DESC to be used as an identifier
|
|
529
|
+
ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC}
|
|
530
|
+
ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
|
|
531
|
+
TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
|
|
532
|
+
COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - {
|
|
533
|
+
TokenType.ASC,
|
|
534
|
+
TokenType.DESC,
|
|
535
|
+
}
|
|
536
|
+
UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
|
|
537
|
+
|
|
538
|
+
FUNCTIONS = {
|
|
539
|
+
**parser.Parser.FUNCTIONS,
|
|
540
|
+
"CONTAINS_SUBSTR": _build_contains_substring,
|
|
541
|
+
"DATE": _build_date,
|
|
542
|
+
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
|
|
543
|
+
"DATE_SUB": build_date_delta_with_interval(exp.DateSub),
|
|
544
|
+
"DATE_TRUNC": lambda args: exp.DateTrunc(
|
|
545
|
+
unit=exp.Literal.string(str(seq_get(args, 1))),
|
|
546
|
+
this=seq_get(args, 0),
|
|
547
|
+
zone=seq_get(args, 2),
|
|
548
|
+
),
|
|
549
|
+
"DATETIME": _build_datetime,
|
|
550
|
+
"DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd),
|
|
551
|
+
"DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub),
|
|
552
|
+
"DIV": binary_from_function(exp.IntDiv),
|
|
553
|
+
"EDIT_DISTANCE": _build_levenshtein,
|
|
554
|
+
"FORMAT_DATE": _build_format_time(exp.TsOrDsToDate),
|
|
555
|
+
"GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
|
|
556
|
+
"JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
|
|
557
|
+
"JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
|
|
558
|
+
"JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract),
|
|
559
|
+
"JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
|
|
560
|
+
"JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar),
|
|
561
|
+
"JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
|
|
562
|
+
"LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
|
|
563
|
+
"MD5": exp.MD5Digest.from_arg_list,
|
|
564
|
+
"TO_HEX": _build_to_hex,
|
|
565
|
+
"PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")(
|
|
566
|
+
[seq_get(args, 1), seq_get(args, 0)]
|
|
567
|
+
),
|
|
568
|
+
"PARSE_TIMESTAMP": _build_parse_timestamp,
|
|
569
|
+
"REGEXP_CONTAINS": exp.RegexpLike.from_arg_list,
|
|
570
|
+
"REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract),
|
|
571
|
+
"REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
|
|
572
|
+
"REGEXP_EXTRACT_ALL": _build_regexp_extract(
|
|
573
|
+
exp.RegexpExtractAll, default_group=exp.Literal.number(0)
|
|
574
|
+
),
|
|
575
|
+
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
|
|
576
|
+
"SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
|
|
577
|
+
"SPLIT": lambda args: exp.Split(
|
|
578
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
|
|
579
|
+
this=seq_get(args, 0),
|
|
580
|
+
expression=seq_get(args, 1) or exp.Literal.string(","),
|
|
581
|
+
),
|
|
582
|
+
"STRPOS": exp.StrPosition.from_arg_list,
|
|
583
|
+
"TIME": _build_time,
|
|
584
|
+
"TIME_ADD": build_date_delta_with_interval(exp.TimeAdd),
|
|
585
|
+
"TIME_SUB": build_date_delta_with_interval(exp.TimeSub),
|
|
586
|
+
"TIMESTAMP": _build_timestamp,
|
|
587
|
+
"TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd),
|
|
588
|
+
"TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub),
|
|
589
|
+
"TIMESTAMP_MICROS": lambda args: exp.UnixToTime(
|
|
590
|
+
this=seq_get(args, 0), scale=exp.UnixToTime.MICROS
|
|
591
|
+
),
|
|
592
|
+
"TIMESTAMP_MILLIS": lambda args: exp.UnixToTime(
|
|
593
|
+
this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
|
|
594
|
+
),
|
|
595
|
+
"TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)),
|
|
596
|
+
"TO_JSON_STRING": exp.JSONFormat.from_arg_list,
|
|
597
|
+
"FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
|
|
598
|
+
"FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
FUNCTION_PARSERS = {
|
|
602
|
+
**parser.Parser.FUNCTION_PARSERS,
|
|
603
|
+
"ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
|
|
604
|
+
"MAKE_INTERVAL": lambda self: self._parse_make_interval(),
|
|
605
|
+
"FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
|
|
606
|
+
}
|
|
607
|
+
FUNCTION_PARSERS.pop("TRIM")
|
|
608
|
+
|
|
609
|
+
NO_PAREN_FUNCTIONS = {
|
|
610
|
+
**parser.Parser.NO_PAREN_FUNCTIONS,
|
|
611
|
+
TokenType.CURRENT_DATETIME: exp.CurrentDatetime,
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
NESTED_TYPE_TOKENS = {
|
|
615
|
+
*parser.Parser.NESTED_TYPE_TOKENS,
|
|
616
|
+
TokenType.TABLE,
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
PROPERTY_PARSERS = {
|
|
620
|
+
**parser.Parser.PROPERTY_PARSERS,
|
|
621
|
+
"NOT DETERMINISTIC": lambda self: self.expression(
|
|
622
|
+
exp.StabilityProperty, this=exp.Literal.string("VOLATILE")
|
|
623
|
+
),
|
|
624
|
+
"OPTIONS": lambda self: self._parse_with_property(),
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
CONSTRAINT_PARSERS = {
|
|
628
|
+
**parser.Parser.CONSTRAINT_PARSERS,
|
|
629
|
+
"OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()),
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy()
|
|
633
|
+
RANGE_PARSERS.pop(TokenType.OVERLAPS)
|
|
634
|
+
|
|
635
|
+
NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN}
|
|
636
|
+
|
|
637
|
+
DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN}
|
|
638
|
+
|
|
639
|
+
STATEMENT_PARSERS = {
|
|
640
|
+
**parser.Parser.STATEMENT_PARSERS,
|
|
641
|
+
TokenType.ELSE: lambda self: self._parse_as_command(self._prev),
|
|
642
|
+
TokenType.END: lambda self: self._parse_as_command(self._prev),
|
|
643
|
+
TokenType.FOR: lambda self: self._parse_for_in(),
|
|
644
|
+
TokenType.EXPORT: lambda self: self._parse_export_data(),
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
BRACKET_OFFSETS = {
|
|
648
|
+
"OFFSET": (0, False),
|
|
649
|
+
"ORDINAL": (1, False),
|
|
650
|
+
"SAFE_OFFSET": (0, True),
|
|
651
|
+
"SAFE_ORDINAL": (1, True),
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
def _parse_for_in(self) -> exp.ForIn:
|
|
655
|
+
this = self._parse_range()
|
|
656
|
+
self._match_text_seq("DO")
|
|
657
|
+
return self.expression(exp.ForIn, this=this, expression=self._parse_statement())
|
|
658
|
+
|
|
659
|
+
def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
|
|
660
|
+
this = super()._parse_table_part(schema=schema) or self._parse_number()
|
|
661
|
+
|
|
662
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names
|
|
663
|
+
if isinstance(this, exp.Identifier):
|
|
664
|
+
table_name = this.name
|
|
665
|
+
while self._match(TokenType.DASH, advance=False) and self._next:
|
|
666
|
+
start = self._curr
|
|
667
|
+
while self._is_connected() and not self._match_set(
|
|
668
|
+
self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False
|
|
669
|
+
):
|
|
670
|
+
self._advance()
|
|
671
|
+
|
|
672
|
+
if start == self._curr:
|
|
673
|
+
break
|
|
674
|
+
|
|
675
|
+
table_name += self._find_sql(start, self._prev)
|
|
676
|
+
|
|
677
|
+
this = exp.Identifier(
|
|
678
|
+
this=table_name, quoted=this.args.get("quoted")
|
|
679
|
+
).update_positions(this)
|
|
680
|
+
elif isinstance(this, exp.Literal):
|
|
681
|
+
table_name = this.name
|
|
682
|
+
|
|
683
|
+
if self._is_connected() and self._parse_var(any_token=True):
|
|
684
|
+
table_name += self._prev.text
|
|
685
|
+
|
|
686
|
+
this = exp.Identifier(this=table_name, quoted=True).update_positions(this)
|
|
687
|
+
|
|
688
|
+
return this
|
|
689
|
+
|
|
690
|
+
def _parse_table_parts(
|
|
691
|
+
self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False
|
|
692
|
+
) -> exp.Table:
|
|
693
|
+
table = super()._parse_table_parts(
|
|
694
|
+
schema=schema, is_db_reference=is_db_reference, wildcard=True
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
# proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here
|
|
698
|
+
if not table.catalog:
|
|
699
|
+
if table.db:
|
|
700
|
+
previous_db = table.args["db"]
|
|
701
|
+
parts = table.db.split(".")
|
|
702
|
+
if len(parts) == 2 and not table.args["db"].quoted:
|
|
703
|
+
table.set(
|
|
704
|
+
"catalog", exp.Identifier(this=parts[0]).update_positions(previous_db)
|
|
705
|
+
)
|
|
706
|
+
table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db))
|
|
707
|
+
else:
|
|
708
|
+
previous_this = table.this
|
|
709
|
+
parts = table.name.split(".")
|
|
710
|
+
if len(parts) == 2 and not table.this.quoted:
|
|
711
|
+
table.set(
|
|
712
|
+
"db", exp.Identifier(this=parts[0]).update_positions(previous_this)
|
|
713
|
+
)
|
|
714
|
+
table.set(
|
|
715
|
+
"this", exp.Identifier(this=parts[1]).update_positions(previous_this)
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts):
|
|
719
|
+
alias = table.this
|
|
720
|
+
catalog, db, this, *rest = (
|
|
721
|
+
exp.to_identifier(p, quoted=True)
|
|
722
|
+
for p in split_num_words(".".join(p.name for p in table.parts), ".", 3)
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
for part in (catalog, db, this):
|
|
726
|
+
if part:
|
|
727
|
+
part.update_positions(table.this)
|
|
728
|
+
|
|
729
|
+
if rest and this:
|
|
730
|
+
this = exp.Dot.build([this, *rest]) # type: ignore
|
|
731
|
+
|
|
732
|
+
table = exp.Table(
|
|
733
|
+
this=this, db=db, catalog=catalog, pivots=table.args.get("pivots")
|
|
734
|
+
)
|
|
735
|
+
table.meta["quoted_table"] = True
|
|
736
|
+
else:
|
|
737
|
+
alias = None
|
|
738
|
+
|
|
739
|
+
# The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or
|
|
740
|
+
# dataset, so if the project identifier is omitted we need to fix the ast so that
|
|
741
|
+
# the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier.
|
|
742
|
+
# Otherwise, we wouldn't correctly qualify a `Table` node that references these
|
|
743
|
+
# views, because it would seem like the "catalog" part is set, when it'd actually
|
|
744
|
+
# be the region/dataset. Merging the two identifiers into a single one is done to
|
|
745
|
+
# avoid producing a 4-part Table reference, which would cause issues in the schema
|
|
746
|
+
# module, when there are 3-part table names mixed with information schema views.
|
|
747
|
+
#
|
|
748
|
+
# See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax
|
|
749
|
+
table_parts = table.parts
|
|
750
|
+
if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA":
|
|
751
|
+
# We need to alias the table here to avoid breaking existing qualified columns.
|
|
752
|
+
# This is expected to be safe, because if there's an actual alias coming up in
|
|
753
|
+
# the token stream, it will overwrite this one. If there isn't one, we are only
|
|
754
|
+
# exposing the name that can be used to reference the view explicitly (a no-op).
|
|
755
|
+
exp.alias_(
|
|
756
|
+
table,
|
|
757
|
+
t.cast(exp.Identifier, alias or table_parts[-1]),
|
|
758
|
+
table=True,
|
|
759
|
+
copy=False,
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}"
|
|
763
|
+
new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions(
|
|
764
|
+
line=table_parts[-2].meta.get("line"),
|
|
765
|
+
col=table_parts[-1].meta.get("col"),
|
|
766
|
+
start=table_parts[-2].meta.get("start"),
|
|
767
|
+
end=table_parts[-1].meta.get("end"),
|
|
768
|
+
)
|
|
769
|
+
table.set("this", new_this)
|
|
770
|
+
table.set("db", seq_get(table_parts, -3))
|
|
771
|
+
table.set("catalog", seq_get(table_parts, -4))
|
|
772
|
+
|
|
773
|
+
return table
|
|
774
|
+
|
|
775
|
+
def _parse_column(self) -> t.Optional[exp.Expression]:
|
|
776
|
+
column = super()._parse_column()
|
|
777
|
+
if isinstance(column, exp.Column):
|
|
778
|
+
parts = column.parts
|
|
779
|
+
if any("." in p.name for p in parts):
|
|
780
|
+
catalog, db, table, this, *rest = (
|
|
781
|
+
exp.to_identifier(p, quoted=True)
|
|
782
|
+
for p in split_num_words(".".join(p.name for p in parts), ".", 4)
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
if rest and this:
|
|
786
|
+
this = exp.Dot.build([this, *rest]) # type: ignore
|
|
787
|
+
|
|
788
|
+
column = exp.Column(this=this, table=table, db=db, catalog=catalog)
|
|
789
|
+
column.meta["quoted_column"] = True
|
|
790
|
+
|
|
791
|
+
return column
|
|
792
|
+
|
|
793
|
+
@t.overload
|
|
794
|
+
def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ...
|
|
795
|
+
|
|
796
|
+
@t.overload
|
|
797
|
+
def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ...
|
|
798
|
+
|
|
799
|
+
def _parse_json_object(self, agg=False):
|
|
800
|
+
json_object = super()._parse_json_object()
|
|
801
|
+
array_kv_pair = seq_get(json_object.expressions, 0)
|
|
802
|
+
|
|
803
|
+
# Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation
|
|
804
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2
|
|
805
|
+
if (
|
|
806
|
+
array_kv_pair
|
|
807
|
+
and isinstance(array_kv_pair.this, exp.Array)
|
|
808
|
+
and isinstance(array_kv_pair.expression, exp.Array)
|
|
809
|
+
):
|
|
810
|
+
keys = array_kv_pair.this.expressions
|
|
811
|
+
values = array_kv_pair.expression.expressions
|
|
812
|
+
|
|
813
|
+
json_object.set(
|
|
814
|
+
"expressions",
|
|
815
|
+
[exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)],
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
return json_object
|
|
819
|
+
|
|
820
|
+
def _parse_bracket(
|
|
821
|
+
self, this: t.Optional[exp.Expression] = None
|
|
822
|
+
) -> t.Optional[exp.Expression]:
|
|
823
|
+
bracket = super()._parse_bracket(this)
|
|
824
|
+
|
|
825
|
+
if this is bracket:
|
|
826
|
+
return bracket
|
|
827
|
+
|
|
828
|
+
if isinstance(bracket, exp.Bracket):
|
|
829
|
+
for expression in bracket.expressions:
|
|
830
|
+
name = expression.name.upper()
|
|
831
|
+
|
|
832
|
+
if name not in self.BRACKET_OFFSETS:
|
|
833
|
+
break
|
|
834
|
+
|
|
835
|
+
offset, safe = self.BRACKET_OFFSETS[name]
|
|
836
|
+
bracket.set("offset", offset)
|
|
837
|
+
bracket.set("safe", safe)
|
|
838
|
+
expression.replace(expression.expressions[0])
|
|
839
|
+
|
|
840
|
+
return bracket
|
|
841
|
+
|
|
842
|
+
def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]:
|
|
843
|
+
unnest = super()._parse_unnest(with_alias=with_alias)
|
|
844
|
+
|
|
845
|
+
if not unnest:
|
|
846
|
+
return None
|
|
847
|
+
|
|
848
|
+
unnest_expr = seq_get(unnest.expressions, 0)
|
|
849
|
+
if unnest_expr:
|
|
850
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
851
|
+
|
|
852
|
+
unnest_expr = annotate_types(unnest_expr, dialect=self.dialect)
|
|
853
|
+
|
|
854
|
+
# Unnesting a nested array (i.e array of structs) explodes the top-level struct fields,
|
|
855
|
+
# in contrast to other dialects such as DuckDB which flattens only the array by default
|
|
856
|
+
if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any(
|
|
857
|
+
array_elem.is_type(exp.DataType.Type.STRUCT)
|
|
858
|
+
for array_elem in unnest_expr._type.expressions
|
|
859
|
+
):
|
|
860
|
+
unnest.set("explode_array", True)
|
|
861
|
+
|
|
862
|
+
return unnest
|
|
863
|
+
|
|
864
|
+
def _parse_make_interval(self) -> exp.MakeInterval:
|
|
865
|
+
expr = exp.MakeInterval()
|
|
866
|
+
|
|
867
|
+
for arg_key in expr.arg_types:
|
|
868
|
+
value = self._parse_lambda()
|
|
869
|
+
|
|
870
|
+
if not value:
|
|
871
|
+
break
|
|
872
|
+
|
|
873
|
+
# Non-named arguments are filled sequentially, (optionally) followed by named arguments
|
|
874
|
+
# that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2)
|
|
875
|
+
if isinstance(value, exp.Kwarg):
|
|
876
|
+
arg_key = value.this.name
|
|
877
|
+
|
|
878
|
+
expr.set(arg_key, value)
|
|
879
|
+
|
|
880
|
+
self._match(TokenType.COMMA)
|
|
881
|
+
|
|
882
|
+
return expr
|
|
883
|
+
|
|
884
|
+
def _parse_features_at_time(self) -> exp.FeaturesAtTime:
|
|
885
|
+
expr = self.expression(
|
|
886
|
+
exp.FeaturesAtTime,
|
|
887
|
+
this=(self._match(TokenType.TABLE) and self._parse_table())
|
|
888
|
+
or self._parse_select(nested=True),
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
while self._match(TokenType.COMMA):
|
|
892
|
+
arg = self._parse_lambda()
|
|
893
|
+
|
|
894
|
+
# Get the LHS of the Kwarg and set the arg to that value, e.g
|
|
895
|
+
# "num_rows => 1" sets the expr's `num_rows` arg
|
|
896
|
+
if arg:
|
|
897
|
+
expr.set(arg.this.name, arg)
|
|
898
|
+
|
|
899
|
+
return expr
|
|
900
|
+
|
|
901
|
+
def _parse_export_data(self) -> exp.Export:
|
|
902
|
+
self._match_text_seq("DATA")
|
|
903
|
+
|
|
904
|
+
return self.expression(
|
|
905
|
+
exp.Export,
|
|
906
|
+
connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(),
|
|
907
|
+
options=self._parse_properties(),
|
|
908
|
+
this=self._match_text_seq("AS") and self._parse_select(),
|
|
909
|
+
)
|
|
910
|
+
|
|
911
|
+
class Generator(generator.Generator):
|
|
912
|
+
INTERVAL_ALLOWS_PLURAL_FORM = False
|
|
913
|
+
JOIN_HINTS = False
|
|
914
|
+
QUERY_HINTS = False
|
|
915
|
+
TABLE_HINTS = False
|
|
916
|
+
LIMIT_FETCH = "LIMIT"
|
|
917
|
+
RENAME_TABLE_WITH_DB = False
|
|
918
|
+
NVL2_SUPPORTED = False
|
|
919
|
+
UNNEST_WITH_ORDINALITY = False
|
|
920
|
+
COLLATE_IS_FUNC = True
|
|
921
|
+
LIMIT_ONLY_LITERALS = True
|
|
922
|
+
SUPPORTS_TABLE_ALIAS_COLUMNS = False
|
|
923
|
+
UNPIVOT_ALIASES_ARE_IDENTIFIERS = False
|
|
924
|
+
JSON_KEY_VALUE_PAIR_SEP = ","
|
|
925
|
+
NULL_ORDERING_SUPPORTED = False
|
|
926
|
+
IGNORE_NULLS_IN_FUNC = True
|
|
927
|
+
JSON_PATH_SINGLE_QUOTE_ESCAPE = True
|
|
928
|
+
CAN_IMPLEMENT_ARRAY_ANY = True
|
|
929
|
+
SUPPORTS_TO_NUMBER = False
|
|
930
|
+
NAMED_PLACEHOLDER_TOKEN = "@"
|
|
931
|
+
HEX_FUNC = "TO_HEX"
|
|
932
|
+
WITH_PROPERTIES_PREFIX = "OPTIONS"
|
|
933
|
+
SUPPORTS_EXPLODING_PROJECTIONS = False
|
|
934
|
+
EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
|
|
935
|
+
SUPPORTS_UNIX_SECONDS = True
|
|
936
|
+
|
|
937
|
+
TRANSFORMS = {
|
|
938
|
+
**generator.Generator.TRANSFORMS,
|
|
939
|
+
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
|
|
940
|
+
exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
|
|
941
|
+
exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
|
|
942
|
+
exp.Array: inline_array_unless_query,
|
|
943
|
+
exp.ArrayContains: _array_contains_sql,
|
|
944
|
+
exp.ArrayFilter: filter_array_using_unnest,
|
|
945
|
+
exp.ArrayRemove: filter_array_using_unnest,
|
|
946
|
+
exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
|
|
947
|
+
exp.CollateProperty: lambda self, e: (
|
|
948
|
+
f"DEFAULT COLLATE {self.sql(e, 'this')}"
|
|
949
|
+
if e.args.get("default")
|
|
950
|
+
else f"COLLATE {self.sql(e, 'this')}"
|
|
951
|
+
),
|
|
952
|
+
exp.Commit: lambda *_: "COMMIT TRANSACTION",
|
|
953
|
+
exp.CountIf: rename_func("COUNTIF"),
|
|
954
|
+
exp.Create: _create_sql,
|
|
955
|
+
exp.CTE: transforms.preprocess([_pushdown_cte_column_names]),
|
|
956
|
+
exp.DateAdd: date_add_interval_sql("DATE", "ADD"),
|
|
957
|
+
exp.DateDiff: lambda self, e: self.func(
|
|
958
|
+
"DATE_DIFF", e.this, e.expression, unit_to_var(e)
|
|
959
|
+
),
|
|
960
|
+
exp.DateFromParts: rename_func("DATE"),
|
|
961
|
+
exp.DateStrToDate: datestrtodate_sql,
|
|
962
|
+
exp.DateSub: date_add_interval_sql("DATE", "SUB"),
|
|
963
|
+
exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"),
|
|
964
|
+
exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"),
|
|
965
|
+
exp.DateTrunc: lambda self, e: self.func(
|
|
966
|
+
"DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone")
|
|
967
|
+
),
|
|
968
|
+
exp.FromTimeZone: lambda self, e: self.func(
|
|
969
|
+
"DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'"
|
|
970
|
+
),
|
|
971
|
+
exp.GenerateSeries: rename_func("GENERATE_ARRAY"),
|
|
972
|
+
exp.GroupConcat: lambda self, e: groupconcat_sql(
|
|
973
|
+
self, e, func_name="STRING_AGG", within_group=False
|
|
974
|
+
),
|
|
975
|
+
exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
|
|
976
|
+
exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"),
|
|
977
|
+
exp.If: if_sql(false_value="NULL"),
|
|
978
|
+
exp.ILike: no_ilike_sql,
|
|
979
|
+
exp.IntDiv: rename_func("DIV"),
|
|
980
|
+
exp.Int64: rename_func("INT64"),
|
|
981
|
+
exp.JSONExtract: _json_extract_sql,
|
|
982
|
+
exp.JSONExtractArray: _json_extract_sql,
|
|
983
|
+
exp.JSONExtractScalar: _json_extract_sql,
|
|
984
|
+
exp.JSONFormat: rename_func("TO_JSON_STRING"),
|
|
985
|
+
exp.Levenshtein: _levenshtein_sql,
|
|
986
|
+
exp.Max: max_or_greatest,
|
|
987
|
+
exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)),
|
|
988
|
+
exp.MD5Digest: rename_func("MD5"),
|
|
989
|
+
exp.Min: min_or_least,
|
|
990
|
+
exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
|
|
991
|
+
exp.RegexpExtract: lambda self, e: self.func(
|
|
992
|
+
"REGEXP_EXTRACT",
|
|
993
|
+
e.this,
|
|
994
|
+
e.expression,
|
|
995
|
+
e.args.get("position"),
|
|
996
|
+
e.args.get("occurrence"),
|
|
997
|
+
),
|
|
998
|
+
exp.RegexpExtractAll: lambda self, e: self.func(
|
|
999
|
+
"REGEXP_EXTRACT_ALL", e.this, e.expression
|
|
1000
|
+
),
|
|
1001
|
+
exp.RegexpReplace: regexp_replace_sql,
|
|
1002
|
+
exp.RegexpLike: rename_func("REGEXP_CONTAINS"),
|
|
1003
|
+
exp.ReturnsProperty: _returnsproperty_sql,
|
|
1004
|
+
exp.Rollback: lambda *_: "ROLLBACK TRANSACTION",
|
|
1005
|
+
exp.Select: transforms.preprocess(
|
|
1006
|
+
[
|
|
1007
|
+
transforms.explode_projection_to_unnest(),
|
|
1008
|
+
transforms.unqualify_unnest,
|
|
1009
|
+
transforms.eliminate_distinct_on,
|
|
1010
|
+
_alias_ordered_group,
|
|
1011
|
+
transforms.eliminate_semi_and_anti_joins,
|
|
1012
|
+
]
|
|
1013
|
+
),
|
|
1014
|
+
exp.SHA: rename_func("SHA1"),
|
|
1015
|
+
exp.SHA2: sha256_sql,
|
|
1016
|
+
exp.StabilityProperty: lambda self, e: (
|
|
1017
|
+
"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
|
|
1018
|
+
),
|
|
1019
|
+
exp.String: rename_func("STRING"),
|
|
1020
|
+
exp.StrPosition: lambda self, e: (
|
|
1021
|
+
strposition_sql(
|
|
1022
|
+
self, e, func_name="INSTR", supports_position=True, supports_occurrence=True
|
|
1023
|
+
)
|
|
1024
|
+
),
|
|
1025
|
+
exp.StrToDate: _str_to_datetime_sql,
|
|
1026
|
+
exp.StrToTime: _str_to_datetime_sql,
|
|
1027
|
+
exp.TimeAdd: date_add_interval_sql("TIME", "ADD"),
|
|
1028
|
+
exp.TimeFromParts: rename_func("TIME"),
|
|
1029
|
+
exp.TimestampFromParts: rename_func("DATETIME"),
|
|
1030
|
+
exp.TimeSub: date_add_interval_sql("TIME", "SUB"),
|
|
1031
|
+
exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"),
|
|
1032
|
+
exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"),
|
|
1033
|
+
exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"),
|
|
1034
|
+
exp.TimeStrToTime: timestrtotime_sql,
|
|
1035
|
+
exp.Transaction: lambda *_: "BEGIN TRANSACTION",
|
|
1036
|
+
exp.TsOrDsAdd: _ts_or_ds_add_sql,
|
|
1037
|
+
exp.TsOrDsDiff: _ts_or_ds_diff_sql,
|
|
1038
|
+
exp.TsOrDsToTime: rename_func("TIME"),
|
|
1039
|
+
exp.TsOrDsToDatetime: rename_func("DATETIME"),
|
|
1040
|
+
exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"),
|
|
1041
|
+
exp.Unhex: rename_func("FROM_HEX"),
|
|
1042
|
+
exp.UnixDate: rename_func("UNIX_DATE"),
|
|
1043
|
+
exp.UnixToTime: _unix_to_time_sql,
|
|
1044
|
+
exp.Uuid: lambda *_: "GENERATE_UUID()",
|
|
1045
|
+
exp.Values: _derived_table_values_to_unnest,
|
|
1046
|
+
exp.VariancePop: rename_func("VAR_POP"),
|
|
1047
|
+
exp.SafeDivide: rename_func("SAFE_DIVIDE"),
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
SUPPORTED_JSON_PATH_PARTS = {
|
|
1051
|
+
exp.JSONPathKey,
|
|
1052
|
+
exp.JSONPathRoot,
|
|
1053
|
+
exp.JSONPathSubscript,
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
TYPE_MAPPING = {
|
|
1057
|
+
**generator.Generator.TYPE_MAPPING,
|
|
1058
|
+
exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC",
|
|
1059
|
+
exp.DataType.Type.BIGINT: "INT64",
|
|
1060
|
+
exp.DataType.Type.BINARY: "BYTES",
|
|
1061
|
+
exp.DataType.Type.BLOB: "BYTES",
|
|
1062
|
+
exp.DataType.Type.BOOLEAN: "BOOL",
|
|
1063
|
+
exp.DataType.Type.CHAR: "STRING",
|
|
1064
|
+
exp.DataType.Type.DECIMAL: "NUMERIC",
|
|
1065
|
+
exp.DataType.Type.DOUBLE: "FLOAT64",
|
|
1066
|
+
exp.DataType.Type.FLOAT: "FLOAT64",
|
|
1067
|
+
exp.DataType.Type.INT: "INT64",
|
|
1068
|
+
exp.DataType.Type.NCHAR: "STRING",
|
|
1069
|
+
exp.DataType.Type.NVARCHAR: "STRING",
|
|
1070
|
+
exp.DataType.Type.SMALLINT: "INT64",
|
|
1071
|
+
exp.DataType.Type.TEXT: "STRING",
|
|
1072
|
+
exp.DataType.Type.TIMESTAMP: "DATETIME",
|
|
1073
|
+
exp.DataType.Type.TIMESTAMPNTZ: "DATETIME",
|
|
1074
|
+
exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
|
|
1075
|
+
exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP",
|
|
1076
|
+
exp.DataType.Type.TINYINT: "INT64",
|
|
1077
|
+
exp.DataType.Type.ROWVERSION: "BYTES",
|
|
1078
|
+
exp.DataType.Type.UUID: "STRING",
|
|
1079
|
+
exp.DataType.Type.VARBINARY: "BYTES",
|
|
1080
|
+
exp.DataType.Type.VARCHAR: "STRING",
|
|
1081
|
+
exp.DataType.Type.VARIANT: "ANY TYPE",
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
PROPERTIES_LOCATION = {
|
|
1085
|
+
**generator.Generator.PROPERTIES_LOCATION,
|
|
1086
|
+
exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
|
|
1087
|
+
exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
# WINDOW comes after QUALIFY
|
|
1091
|
+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause
|
|
1092
|
+
AFTER_HAVING_MODIFIER_TRANSFORMS = {
|
|
1093
|
+
"qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"],
|
|
1094
|
+
"windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"],
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
# from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords
|
|
1098
|
+
RESERVED_KEYWORDS = {
|
|
1099
|
+
"all",
|
|
1100
|
+
"and",
|
|
1101
|
+
"any",
|
|
1102
|
+
"array",
|
|
1103
|
+
"as",
|
|
1104
|
+
"asc",
|
|
1105
|
+
"assert_rows_modified",
|
|
1106
|
+
"at",
|
|
1107
|
+
"between",
|
|
1108
|
+
"by",
|
|
1109
|
+
"case",
|
|
1110
|
+
"cast",
|
|
1111
|
+
"collate",
|
|
1112
|
+
"contains",
|
|
1113
|
+
"create",
|
|
1114
|
+
"cross",
|
|
1115
|
+
"cube",
|
|
1116
|
+
"current",
|
|
1117
|
+
"default",
|
|
1118
|
+
"define",
|
|
1119
|
+
"desc",
|
|
1120
|
+
"distinct",
|
|
1121
|
+
"else",
|
|
1122
|
+
"end",
|
|
1123
|
+
"enum",
|
|
1124
|
+
"escape",
|
|
1125
|
+
"except",
|
|
1126
|
+
"exclude",
|
|
1127
|
+
"exists",
|
|
1128
|
+
"extract",
|
|
1129
|
+
"false",
|
|
1130
|
+
"fetch",
|
|
1131
|
+
"following",
|
|
1132
|
+
"for",
|
|
1133
|
+
"from",
|
|
1134
|
+
"full",
|
|
1135
|
+
"group",
|
|
1136
|
+
"grouping",
|
|
1137
|
+
"groups",
|
|
1138
|
+
"hash",
|
|
1139
|
+
"having",
|
|
1140
|
+
"if",
|
|
1141
|
+
"ignore",
|
|
1142
|
+
"in",
|
|
1143
|
+
"inner",
|
|
1144
|
+
"intersect",
|
|
1145
|
+
"interval",
|
|
1146
|
+
"into",
|
|
1147
|
+
"is",
|
|
1148
|
+
"join",
|
|
1149
|
+
"lateral",
|
|
1150
|
+
"left",
|
|
1151
|
+
"like",
|
|
1152
|
+
"limit",
|
|
1153
|
+
"lookup",
|
|
1154
|
+
"merge",
|
|
1155
|
+
"natural",
|
|
1156
|
+
"new",
|
|
1157
|
+
"no",
|
|
1158
|
+
"not",
|
|
1159
|
+
"null",
|
|
1160
|
+
"nulls",
|
|
1161
|
+
"of",
|
|
1162
|
+
"on",
|
|
1163
|
+
"or",
|
|
1164
|
+
"order",
|
|
1165
|
+
"outer",
|
|
1166
|
+
"over",
|
|
1167
|
+
"partition",
|
|
1168
|
+
"preceding",
|
|
1169
|
+
"proto",
|
|
1170
|
+
"qualify",
|
|
1171
|
+
"range",
|
|
1172
|
+
"recursive",
|
|
1173
|
+
"respect",
|
|
1174
|
+
"right",
|
|
1175
|
+
"rollup",
|
|
1176
|
+
"rows",
|
|
1177
|
+
"select",
|
|
1178
|
+
"set",
|
|
1179
|
+
"some",
|
|
1180
|
+
"struct",
|
|
1181
|
+
"tablesample",
|
|
1182
|
+
"then",
|
|
1183
|
+
"to",
|
|
1184
|
+
"treat",
|
|
1185
|
+
"true",
|
|
1186
|
+
"unbounded",
|
|
1187
|
+
"union",
|
|
1188
|
+
"unnest",
|
|
1189
|
+
"using",
|
|
1190
|
+
"when",
|
|
1191
|
+
"where",
|
|
1192
|
+
"window",
|
|
1193
|
+
"with",
|
|
1194
|
+
"within",
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
def mod_sql(self, expression: exp.Mod) -> str:
|
|
1198
|
+
this = expression.this
|
|
1199
|
+
expr = expression.expression
|
|
1200
|
+
return self.func(
|
|
1201
|
+
"MOD",
|
|
1202
|
+
this.unnest() if isinstance(this, exp.Paren) else this,
|
|
1203
|
+
expr.unnest() if isinstance(expr, exp.Paren) else expr,
|
|
1204
|
+
)
|
|
1205
|
+
|
|
1206
|
+
def column_parts(self, expression: exp.Column) -> str:
|
|
1207
|
+
if expression.meta.get("quoted_column"):
|
|
1208
|
+
# If a column reference is of the form `dataset.table`.name, we need
|
|
1209
|
+
# to preserve the quoted table path, otherwise the reference breaks
|
|
1210
|
+
table_parts = ".".join(p.name for p in expression.parts[:-1])
|
|
1211
|
+
table_path = self.sql(exp.Identifier(this=table_parts, quoted=True))
|
|
1212
|
+
return f"{table_path}.{self.sql(expression, 'this')}"
|
|
1213
|
+
|
|
1214
|
+
return super().column_parts(expression)
|
|
1215
|
+
|
|
1216
|
+
def table_parts(self, expression: exp.Table) -> str:
|
|
1217
|
+
# Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so
|
|
1218
|
+
# we need to make sure the correct quoting is used in each case.
|
|
1219
|
+
#
|
|
1220
|
+
# For example, if there is a CTE x that clashes with a schema name, then the former will
|
|
1221
|
+
# return the table y in that schema, whereas the latter will return the CTE's y column:
|
|
1222
|
+
#
|
|
1223
|
+
# - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join
|
|
1224
|
+
# - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest
|
|
1225
|
+
if expression.meta.get("quoted_table"):
|
|
1226
|
+
table_parts = ".".join(p.name for p in expression.parts)
|
|
1227
|
+
return self.sql(exp.Identifier(this=table_parts, quoted=True))
|
|
1228
|
+
|
|
1229
|
+
return super().table_parts(expression)
|
|
1230
|
+
|
|
1231
|
+
def timetostr_sql(self, expression: exp.TimeToStr) -> str:
|
|
1232
|
+
this = expression.this
|
|
1233
|
+
if isinstance(this, exp.TsOrDsToDatetime):
|
|
1234
|
+
func_name = "FORMAT_DATETIME"
|
|
1235
|
+
elif isinstance(this, exp.TsOrDsToTimestamp):
|
|
1236
|
+
func_name = "FORMAT_TIMESTAMP"
|
|
1237
|
+
else:
|
|
1238
|
+
func_name = "FORMAT_DATE"
|
|
1239
|
+
|
|
1240
|
+
time_expr = (
|
|
1241
|
+
this
|
|
1242
|
+
if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate))
|
|
1243
|
+
else expression
|
|
1244
|
+
)
|
|
1245
|
+
return self.func(
|
|
1246
|
+
func_name, self.format_time(expression), time_expr.this, expression.args.get("zone")
|
|
1247
|
+
)
|
|
1248
|
+
|
|
1249
|
+
def eq_sql(self, expression: exp.EQ) -> str:
|
|
1250
|
+
# Operands of = cannot be NULL in BigQuery
|
|
1251
|
+
if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null):
|
|
1252
|
+
if not isinstance(expression.parent, exp.Update):
|
|
1253
|
+
return "NULL"
|
|
1254
|
+
|
|
1255
|
+
return self.binary(expression, "=")
|
|
1256
|
+
|
|
1257
|
+
def attimezone_sql(self, expression: exp.AtTimeZone) -> str:
|
|
1258
|
+
parent = expression.parent
|
|
1259
|
+
|
|
1260
|
+
# BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]).
|
|
1261
|
+
# Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included.
|
|
1262
|
+
if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"):
|
|
1263
|
+
return self.func(
|
|
1264
|
+
"TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone"))
|
|
1265
|
+
)
|
|
1266
|
+
|
|
1267
|
+
return super().attimezone_sql(expression)
|
|
1268
|
+
|
|
1269
|
+
def trycast_sql(self, expression: exp.TryCast) -> str:
|
|
1270
|
+
return self.cast_sql(expression, safe_prefix="SAFE_")
|
|
1271
|
+
|
|
1272
|
+
def bracket_sql(self, expression: exp.Bracket) -> str:
|
|
1273
|
+
this = expression.this
|
|
1274
|
+
expressions = expression.expressions
|
|
1275
|
+
|
|
1276
|
+
if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT):
|
|
1277
|
+
arg = expressions[0]
|
|
1278
|
+
if arg.type is None:
|
|
1279
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
1280
|
+
|
|
1281
|
+
arg = annotate_types(arg, dialect=self.dialect)
|
|
1282
|
+
|
|
1283
|
+
if arg.type and arg.type.this in exp.DataType.TEXT_TYPES:
|
|
1284
|
+
# BQ doesn't support bracket syntax with string values for structs
|
|
1285
|
+
return f"{self.sql(this)}.{arg.name}"
|
|
1286
|
+
|
|
1287
|
+
expressions_sql = self.expressions(expression, flat=True)
|
|
1288
|
+
offset = expression.args.get("offset")
|
|
1289
|
+
|
|
1290
|
+
if offset == 0:
|
|
1291
|
+
expressions_sql = f"OFFSET({expressions_sql})"
|
|
1292
|
+
elif offset == 1:
|
|
1293
|
+
expressions_sql = f"ORDINAL({expressions_sql})"
|
|
1294
|
+
elif offset is not None:
|
|
1295
|
+
self.unsupported(f"Unsupported array offset: {offset}")
|
|
1296
|
+
|
|
1297
|
+
if expression.args.get("safe"):
|
|
1298
|
+
expressions_sql = f"SAFE_{expressions_sql}"
|
|
1299
|
+
|
|
1300
|
+
return f"{self.sql(this)}[{expressions_sql}]"
|
|
1301
|
+
|
|
1302
|
+
def in_unnest_op(self, expression: exp.Unnest) -> str:
|
|
1303
|
+
return self.sql(expression)
|
|
1304
|
+
|
|
1305
|
+
def version_sql(self, expression: exp.Version) -> str:
|
|
1306
|
+
if expression.name == "TIMESTAMP":
|
|
1307
|
+
expression.set("this", "SYSTEM_TIME")
|
|
1308
|
+
return super().version_sql(expression)
|
|
1309
|
+
|
|
1310
|
+
def contains_sql(self, expression: exp.Contains) -> str:
|
|
1311
|
+
this = expression.this
|
|
1312
|
+
expr = expression.expression
|
|
1313
|
+
|
|
1314
|
+
if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower):
|
|
1315
|
+
this = this.this
|
|
1316
|
+
expr = expr.this
|
|
1317
|
+
|
|
1318
|
+
return self.func("CONTAINS_SUBSTR", this, expr)
|
|
1319
|
+
|
|
1320
|
+
def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
|
|
1321
|
+
this = expression.this
|
|
1322
|
+
|
|
1323
|
+
# This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3]
|
|
1324
|
+
# are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions,
|
|
1325
|
+
# because they aren't literals and so the above syntax is invalid BigQuery.
|
|
1326
|
+
if isinstance(this, exp.Array):
|
|
1327
|
+
elem = seq_get(this.expressions, 0)
|
|
1328
|
+
if not (elem and elem.find(exp.Query)):
|
|
1329
|
+
return f"{self.sql(expression, 'to')}{self.sql(this)}"
|
|
1330
|
+
|
|
1331
|
+
return super().cast_sql(expression, safe_prefix=safe_prefix)
|