altimate-code 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +1 -5
  3. package/bin/altimate +6 -0
  4. package/bin/altimate-code +6 -0
  5. package/dbt-tools/bin/altimate-dbt +2 -0
  6. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
  7. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
  8. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
  9. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
  10. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
  11. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
  12. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
  13. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
  14. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
  15. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
  16. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
  17. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
  18. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
  19. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
  20. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
  21. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
  22. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
  23. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
  24. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
  25. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
  26. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
  27. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
  28. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
  29. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
  30. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
  31. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
  32. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
  33. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
  34. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
  35. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
  36. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
  37. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
  38. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
  39. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
  40. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
  41. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
  42. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
  43. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
  44. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
  45. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
  46. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
  47. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
  48. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
  49. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
  50. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
  51. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
  52. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
  53. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
  54. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
  55. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
  56. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
  57. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
  58. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
  59. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
  60. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
  61. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
  62. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
  63. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
  64. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
  65. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
  66. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
  67. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
  68. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
  69. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
  70. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
  71. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
  72. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
  73. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
  74. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
  75. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
  76. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
  77. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
  78. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
  79. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
  80. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
  81. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
  82. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
  83. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
  84. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
  85. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
  86. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
  87. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
  88. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
  89. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
  90. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
  91. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
  92. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
  93. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
  94. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
  95. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
  96. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
  97. package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
  98. package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
  99. package/dbt-tools/dist/index.js +23859 -0
  100. package/package.json +13 -13
  101. package/postinstall.mjs +42 -0
  102. package/skills/altimate-setup/SKILL.md +31 -0
@@ -0,0 +1,1331 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import re
5
+ import typing as t
6
+
7
+ from sqlglot import exp, generator, parser, tokens, transforms
8
+ from sqlglot._typing import E
9
+ from sqlglot.dialects.dialect import (
10
+ Dialect,
11
+ NormalizationStrategy,
12
+ annotate_with_type_lambda,
13
+ arg_max_or_min_no_count,
14
+ binary_from_function,
15
+ date_add_interval_sql,
16
+ datestrtodate_sql,
17
+ build_formatted_time,
18
+ filter_array_using_unnest,
19
+ if_sql,
20
+ inline_array_unless_query,
21
+ max_or_greatest,
22
+ min_or_least,
23
+ no_ilike_sql,
24
+ build_date_delta_with_interval,
25
+ regexp_replace_sql,
26
+ rename_func,
27
+ sha256_sql,
28
+ timestrtotime_sql,
29
+ ts_or_ds_add_cast,
30
+ unit_to_var,
31
+ strposition_sql,
32
+ groupconcat_sql,
33
+ )
34
+ from sqlglot.helper import seq_get, split_num_words
35
+ from sqlglot.tokens import TokenType
36
+ from sqlglot.generator import unsupported_args
37
+
38
+ if t.TYPE_CHECKING:
39
+ from sqlglot._typing import Lit
40
+
41
+ from sqlglot.optimizer.annotate_types import TypeAnnotator
42
+
43
+ logger = logging.getLogger("sqlglot")
44
+
45
+
46
+ JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray]
47
+
48
+ DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY")
49
+
50
+
51
+ def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str:
52
+ if not expression.find_ancestor(exp.From, exp.Join):
53
+ return self.values_sql(expression)
54
+
55
+ structs = []
56
+ alias = expression.args.get("alias")
57
+ for tup in expression.find_all(exp.Tuple):
58
+ field_aliases = (
59
+ alias.columns
60
+ if alias and alias.columns
61
+ else (f"_c{i}" for i in range(len(tup.expressions)))
62
+ )
63
+ expressions = [
64
+ exp.PropertyEQ(this=exp.to_identifier(name), expression=fld)
65
+ for name, fld in zip(field_aliases, tup.expressions)
66
+ ]
67
+ structs.append(exp.Struct(expressions=expressions))
68
+
69
+ # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression
70
+ alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None
71
+ return self.unnest_sql(
72
+ exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only)
73
+ )
74
+
75
+
76
+ def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str:
77
+ this = expression.this
78
+ if isinstance(this, exp.Schema):
79
+ this = f"{self.sql(this, 'this')} <{self.expressions(this)}>"
80
+ else:
81
+ this = self.sql(this)
82
+ return f"RETURNS {this}"
83
+
84
+
85
+ def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str:
86
+ returns = expression.find(exp.ReturnsProperty)
87
+ if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"):
88
+ expression.set("kind", "TABLE FUNCTION")
89
+
90
+ if isinstance(expression.expression, (exp.Subquery, exp.Literal)):
91
+ expression.set("expression", expression.expression.this)
92
+
93
+ return self.create_sql(expression)
94
+
95
+
96
+ # https://issuetracker.google.com/issues/162294746
97
+ # workaround for bigquery bug when grouping by an expression and then ordering
98
+ # WITH x AS (SELECT 1 y)
99
+ # SELECT y + 1 z
100
+ # FROM x
101
+ # GROUP BY x + 1
102
+ # ORDER by z
103
+ def _alias_ordered_group(expression: exp.Expression) -> exp.Expression:
104
+ if isinstance(expression, exp.Select):
105
+ group = expression.args.get("group")
106
+ order = expression.args.get("order")
107
+
108
+ if group and order:
109
+ aliases = {
110
+ select.this: select.args["alias"]
111
+ for select in expression.selects
112
+ if isinstance(select, exp.Alias)
113
+ }
114
+
115
+ for grouped in group.expressions:
116
+ if grouped.is_int:
117
+ continue
118
+ alias = aliases.get(grouped)
119
+ if alias:
120
+ grouped.replace(exp.column(alias))
121
+
122
+ return expression
123
+
124
+
125
+ def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression:
126
+ """BigQuery doesn't allow column names when defining a CTE, so we try to push them down."""
127
+ if isinstance(expression, exp.CTE) and expression.alias_column_names:
128
+ cte_query = expression.this
129
+
130
+ if cte_query.is_star:
131
+ logger.warning(
132
+ "Can't push down CTE column names for star queries. Run the query through"
133
+ " the optimizer or use 'qualify' to expand the star projections first."
134
+ )
135
+ return expression
136
+
137
+ column_names = expression.alias_column_names
138
+ expression.args["alias"].set("columns", None)
139
+
140
+ for name, select in zip(column_names, cte_query.selects):
141
+ to_replace = select
142
+
143
+ if isinstance(select, exp.Alias):
144
+ select = select.this
145
+
146
+ # Inner aliases are shadowed by the CTE column names
147
+ to_replace.replace(exp.alias_(select, name))
148
+
149
+ return expression
150
+
151
+
152
+ def _build_parse_timestamp(args: t.List) -> exp.StrToTime:
153
+ this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)])
154
+ this.set("zone", seq_get(args, 2))
155
+ return this
156
+
157
+
158
+ def _build_timestamp(args: t.List) -> exp.Timestamp:
159
+ timestamp = exp.Timestamp.from_arg_list(args)
160
+ timestamp.set("with_tz", True)
161
+ return timestamp
162
+
163
+
164
+ def _build_date(args: t.List) -> exp.Date | exp.DateFromParts:
165
+ expr_type = exp.DateFromParts if len(args) == 3 else exp.Date
166
+ return expr_type.from_arg_list(args)
167
+
168
+
169
+ def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5:
170
+ # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation
171
+ arg = seq_get(args, 0)
172
+ return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg)
173
+
174
+
175
+ def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str:
176
+ return self.sql(
177
+ exp.Exists(
178
+ this=exp.select("1")
179
+ .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"]))
180
+ .where(exp.column("_col").eq(expression.right))
181
+ )
182
+ )
183
+
184
+
185
+ def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str:
186
+ return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression))
187
+
188
+
189
+ def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str:
190
+ expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP))
191
+ expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP))
192
+ unit = unit_to_var(expression)
193
+ return self.func("DATE_DIFF", expression.this, expression.expression, unit)
194
+
195
+
196
+ def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str:
197
+ scale = expression.args.get("scale")
198
+ timestamp = expression.this
199
+
200
+ if scale in (None, exp.UnixToTime.SECONDS):
201
+ return self.func("TIMESTAMP_SECONDS", timestamp)
202
+ if scale == exp.UnixToTime.MILLIS:
203
+ return self.func("TIMESTAMP_MILLIS", timestamp)
204
+ if scale == exp.UnixToTime.MICROS:
205
+ return self.func("TIMESTAMP_MICROS", timestamp)
206
+
207
+ unix_seconds = exp.cast(
208
+ exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT
209
+ )
210
+ return self.func("TIMESTAMP_SECONDS", unix_seconds)
211
+
212
+
213
+ def _build_time(args: t.List) -> exp.Func:
214
+ if len(args) == 1:
215
+ return exp.TsOrDsToTime(this=args[0])
216
+ if len(args) == 2:
217
+ return exp.Time.from_arg_list(args)
218
+ return exp.TimeFromParts.from_arg_list(args)
219
+
220
+
221
+ def _build_datetime(args: t.List) -> exp.Func:
222
+ if len(args) == 1:
223
+ return exp.TsOrDsToDatetime.from_arg_list(args)
224
+ if len(args) == 2:
225
+ return exp.Datetime.from_arg_list(args)
226
+ return exp.TimestampFromParts.from_arg_list(args)
227
+
228
+
229
+ def _build_regexp_extract(
230
+ expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
231
+ ) -> t.Callable[[t.List], E]:
232
+ def _builder(args: t.List) -> E:
233
+ try:
234
+ group = re.compile(args[1].name).groups == 1
235
+ except re.error:
236
+ group = False
237
+
238
+ # Default group is used for the transpilation of REGEXP_EXTRACT_ALL
239
+ return expr_type(
240
+ this=seq_get(args, 0),
241
+ expression=seq_get(args, 1),
242
+ position=seq_get(args, 2),
243
+ occurrence=seq_get(args, 3),
244
+ group=exp.Literal.number(1) if group else default_group,
245
+ )
246
+
247
+ return _builder
248
+
249
+
250
+ def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]:
251
+ def _builder(args: t.List, dialect: Dialect) -> E:
252
+ if len(args) == 1:
253
+ # The default value for the JSONPath is '$' i.e all of the data
254
+ args.append(exp.Literal.string("$"))
255
+ return parser.build_extract_json_with_path(expr_type)(args, dialect)
256
+
257
+ return _builder
258
+
259
+
260
+ def _str_to_datetime_sql(
261
+ self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime
262
+ ) -> str:
263
+ this = self.sql(expression, "this")
264
+ dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP"
265
+
266
+ if expression.args.get("safe"):
267
+ fmt = self.format_time(
268
+ expression,
269
+ self.dialect.INVERSE_FORMAT_MAPPING,
270
+ self.dialect.INVERSE_FORMAT_TRIE,
271
+ )
272
+ return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})"
273
+
274
+ fmt = self.format_time(expression)
275
+ return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone"))
276
+
277
+
278
+ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
279
+ """
280
+ Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention:
281
+ +---------+---------+---------+------------+---------+
282
+ | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
283
+ +---------+---------+---------+------------+---------+
284
+ | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
285
+ +---------+---------+---------+------------+---------+
286
+ """
287
+ self._annotate_args(expression)
288
+
289
+ this: exp.Expression = expression.this
290
+
291
+ self._set_type(
292
+ expression,
293
+ exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type,
294
+ )
295
+ return expression
296
+
297
+
298
+ @unsupported_args("ins_cost", "del_cost", "sub_cost")
299
+ def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
300
+ max_dist = expression.args.get("max_dist")
301
+ if max_dist:
302
+ max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist)
303
+
304
+ return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist)
305
+
306
+
307
+ def _build_levenshtein(args: t.List) -> exp.Levenshtein:
308
+ max_dist = seq_get(args, 2)
309
+ return exp.Levenshtein(
310
+ this=seq_get(args, 0),
311
+ expression=seq_get(args, 1),
312
+ max_dist=max_dist.expression if max_dist else None,
313
+ )
314
+
315
+
316
+ def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]:
317
+ def _builder(args: t.List) -> exp.TimeToStr:
318
+ return exp.TimeToStr(
319
+ this=expr_type(this=seq_get(args, 1)),
320
+ format=seq_get(args, 0),
321
+ zone=seq_get(args, 2),
322
+ )
323
+
324
+ return _builder
325
+
326
+
327
+ def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous:
328
+ if len(args) == 3:
329
+ return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args)
330
+
331
+ # Lowercase the operands in case of transpilation, as exp.Contains
332
+ # is case-sensitive on other dialects
333
+ this = exp.Lower(this=seq_get(args, 0))
334
+ expr = exp.Lower(this=seq_get(args, 1))
335
+
336
+ return exp.Contains(this=this, expression=expr)
337
+
338
+
339
+ def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str:
340
+ name = (expression._meta and expression.meta.get("name")) or expression.sql_name()
341
+ upper = name.upper()
342
+
343
+ dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS
344
+
345
+ if dquote_escaping:
346
+ self._quote_json_path_key_using_brackets = False
347
+
348
+ sql = rename_func(upper)(self, expression)
349
+
350
+ if dquote_escaping:
351
+ self._quote_json_path_key_using_brackets = True
352
+
353
+ return sql
354
+
355
+
356
+ def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat:
357
+ annotated = self._annotate_by_args(expression, "expressions")
358
+
359
+ # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING
360
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat
361
+ if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN):
362
+ annotated.type = exp.DataType.Type.VARCHAR
363
+
364
+ return annotated
365
+
366
+
367
+ class BigQuery(Dialect):
368
+ WEEK_OFFSET = -1
369
+ UNNEST_COLUMN_ONLY = True
370
+ SUPPORTS_USER_DEFINED_TYPES = False
371
+ SUPPORTS_SEMI_ANTI_JOIN = False
372
+ LOG_BASE_FIRST = False
373
+ HEX_LOWERCASE = True
374
+ FORCE_EARLY_ALIAS_REF_EXPANSION = True
375
+ PRESERVE_ORIGINAL_NAMES = True
376
+ HEX_STRING_IS_INTEGER_TYPE = True
377
+
378
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
379
+ NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
380
+
381
+ # bigquery udfs are case sensitive
382
+ NORMALIZE_FUNCTIONS = False
383
+
384
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time
385
+ TIME_MAPPING = {
386
+ "%D": "%m/%d/%y",
387
+ "%E6S": "%S.%f",
388
+ "%e": "%-d",
389
+ }
390
+
391
+ FORMAT_MAPPING = {
392
+ "DD": "%d",
393
+ "MM": "%m",
394
+ "MON": "%b",
395
+ "MONTH": "%B",
396
+ "YYYY": "%Y",
397
+ "YY": "%y",
398
+ "HH": "%I",
399
+ "HH12": "%I",
400
+ "HH24": "%H",
401
+ "MI": "%M",
402
+ "SS": "%S",
403
+ "SSSSS": "%f",
404
+ "TZH": "%z",
405
+ }
406
+
407
+ # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement
408
+ # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
409
+ PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"}
410
+
411
+ # All set operations require either a DISTINCT or ALL specifier
412
+ SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None)
413
+
414
+ # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types
415
+ TYPE_TO_EXPRESSIONS = {
416
+ **Dialect.TYPE_TO_EXPRESSIONS,
417
+ exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP],
418
+ }
419
+ TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP)
420
+
421
+ ANNOTATORS = {
422
+ **Dialect.ANNOTATORS,
423
+ **{
424
+ expr_type: annotate_with_type_lambda(data_type)
425
+ for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
426
+ for expr_type in expressions
427
+ },
428
+ **{
429
+ expr_type: lambda self, e: _annotate_math_functions(self, e)
430
+ for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round)
431
+ },
432
+ **{
433
+ expr_type: lambda self, e: self._annotate_by_args(e, "this")
434
+ for expr_type in (
435
+ exp.Left,
436
+ exp.Right,
437
+ exp.Lower,
438
+ exp.Upper,
439
+ exp.Pad,
440
+ exp.Trim,
441
+ exp.RegexpExtract,
442
+ exp.RegexpReplace,
443
+ exp.Repeat,
444
+ exp.Substring,
445
+ )
446
+ },
447
+ exp.Concat: _annotate_concat,
448
+ exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
449
+ exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
450
+ }
451
+
452
+ def normalize_identifier(self, expression: E) -> E:
453
+ if (
454
+ isinstance(expression, exp.Identifier)
455
+ and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
456
+ ):
457
+ parent = expression.parent
458
+ while isinstance(parent, exp.Dot):
459
+ parent = parent.parent
460
+
461
+ # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive
462
+ # by default. The following check uses a heuristic to detect tables based on whether
463
+ # they are qualified. This should generally be correct, because tables in BigQuery
464
+ # must be qualified with at least a dataset, unless @@dataset_id is set.
465
+ case_sensitive = (
466
+ isinstance(parent, exp.UserDefinedFunction)
467
+ or (
468
+ isinstance(parent, exp.Table)
469
+ and parent.db
470
+ and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column"))
471
+ )
472
+ or expression.meta.get("is_table")
473
+ )
474
+ if not case_sensitive:
475
+ expression.set("this", expression.this.lower())
476
+
477
+ return t.cast(E, expression)
478
+
479
+ return super().normalize_identifier(expression)
480
+
481
+ class Tokenizer(tokens.Tokenizer):
482
+ QUOTES = ["'", '"', '"""', "'''"]
483
+ COMMENTS = ["--", "#", ("/*", "*/")]
484
+ IDENTIFIERS = ["`"]
485
+ STRING_ESCAPES = ["\\"]
486
+
487
+ HEX_STRINGS = [("0x", ""), ("0X", "")]
488
+
489
+ BYTE_STRINGS = [
490
+ (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B")
491
+ ]
492
+
493
+ RAW_STRINGS = [
494
+ (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R")
495
+ ]
496
+
497
+ NESTED_COMMENTS = False
498
+
499
+ KEYWORDS = {
500
+ **tokens.Tokenizer.KEYWORDS,
501
+ "ANY TYPE": TokenType.VARIANT,
502
+ "BEGIN": TokenType.COMMAND,
503
+ "BEGIN TRANSACTION": TokenType.BEGIN,
504
+ "BYTEINT": TokenType.INT,
505
+ "BYTES": TokenType.BINARY,
506
+ "CURRENT_DATETIME": TokenType.CURRENT_DATETIME,
507
+ "DATETIME": TokenType.TIMESTAMP,
508
+ "DECLARE": TokenType.COMMAND,
509
+ "ELSEIF": TokenType.COMMAND,
510
+ "EXCEPTION": TokenType.COMMAND,
511
+ "EXPORT": TokenType.EXPORT,
512
+ "FLOAT64": TokenType.DOUBLE,
513
+ "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
514
+ "MODEL": TokenType.MODEL,
515
+ "NOT DETERMINISTIC": TokenType.VOLATILE,
516
+ "RECORD": TokenType.STRUCT,
517
+ "TIMESTAMP": TokenType.TIMESTAMPTZ,
518
+ }
519
+ KEYWORDS.pop("DIV")
520
+ KEYWORDS.pop("VALUES")
521
+ KEYWORDS.pop("/*+")
522
+
523
+ class Parser(parser.Parser):
524
+ PREFIXED_PIVOT_COLUMNS = True
525
+ LOG_DEFAULTS_TO_LN = True
526
+ SUPPORTS_IMPLICIT_UNNEST = True
527
+
528
+ # BigQuery does not allow ASC/DESC to be used as an identifier
529
+ ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC}
530
+ ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
531
+ TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
532
+ COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - {
533
+ TokenType.ASC,
534
+ TokenType.DESC,
535
+ }
536
+ UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC}
537
+
538
+ FUNCTIONS = {
539
+ **parser.Parser.FUNCTIONS,
540
+ "CONTAINS_SUBSTR": _build_contains_substring,
541
+ "DATE": _build_date,
542
+ "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
543
+ "DATE_SUB": build_date_delta_with_interval(exp.DateSub),
544
+ "DATE_TRUNC": lambda args: exp.DateTrunc(
545
+ unit=exp.Literal.string(str(seq_get(args, 1))),
546
+ this=seq_get(args, 0),
547
+ zone=seq_get(args, 2),
548
+ ),
549
+ "DATETIME": _build_datetime,
550
+ "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd),
551
+ "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub),
552
+ "DIV": binary_from_function(exp.IntDiv),
553
+ "EDIT_DISTANCE": _build_levenshtein,
554
+ "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate),
555
+ "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
556
+ "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
557
+ "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
558
+ "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract),
559
+ "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
560
+ "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar),
561
+ "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
562
+ "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
563
+ "MD5": exp.MD5Digest.from_arg_list,
564
+ "TO_HEX": _build_to_hex,
565
+ "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")(
566
+ [seq_get(args, 1), seq_get(args, 0)]
567
+ ),
568
+ "PARSE_TIMESTAMP": _build_parse_timestamp,
569
+ "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list,
570
+ "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract),
571
+ "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
572
+ "REGEXP_EXTRACT_ALL": _build_regexp_extract(
573
+ exp.RegexpExtractAll, default_group=exp.Literal.number(0)
574
+ ),
575
+ "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
576
+ "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
577
+ "SPLIT": lambda args: exp.Split(
578
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
579
+ this=seq_get(args, 0),
580
+ expression=seq_get(args, 1) or exp.Literal.string(","),
581
+ ),
582
+ "STRPOS": exp.StrPosition.from_arg_list,
583
+ "TIME": _build_time,
584
+ "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd),
585
+ "TIME_SUB": build_date_delta_with_interval(exp.TimeSub),
586
+ "TIMESTAMP": _build_timestamp,
587
+ "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd),
588
+ "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub),
589
+ "TIMESTAMP_MICROS": lambda args: exp.UnixToTime(
590
+ this=seq_get(args, 0), scale=exp.UnixToTime.MICROS
591
+ ),
592
+ "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime(
593
+ this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
594
+ ),
595
+ "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)),
596
+ "TO_JSON_STRING": exp.JSONFormat.from_arg_list,
597
+ "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
598
+ "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
599
+ }
600
+
601
+ FUNCTION_PARSERS = {
602
+ **parser.Parser.FUNCTION_PARSERS,
603
+ "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]),
604
+ "MAKE_INTERVAL": lambda self: self._parse_make_interval(),
605
+ "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
606
+ }
607
+ FUNCTION_PARSERS.pop("TRIM")
608
+
609
+ NO_PAREN_FUNCTIONS = {
610
+ **parser.Parser.NO_PAREN_FUNCTIONS,
611
+ TokenType.CURRENT_DATETIME: exp.CurrentDatetime,
612
+ }
613
+
614
+ NESTED_TYPE_TOKENS = {
615
+ *parser.Parser.NESTED_TYPE_TOKENS,
616
+ TokenType.TABLE,
617
+ }
618
+
619
+ PROPERTY_PARSERS = {
620
+ **parser.Parser.PROPERTY_PARSERS,
621
+ "NOT DETERMINISTIC": lambda self: self.expression(
622
+ exp.StabilityProperty, this=exp.Literal.string("VOLATILE")
623
+ ),
624
+ "OPTIONS": lambda self: self._parse_with_property(),
625
+ }
626
+
627
+ CONSTRAINT_PARSERS = {
628
+ **parser.Parser.CONSTRAINT_PARSERS,
629
+ "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()),
630
+ }
631
+
632
+ RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy()
633
+ RANGE_PARSERS.pop(TokenType.OVERLAPS)
634
+
635
+ NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN}
636
+
637
+ DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN}
638
+
639
+ STATEMENT_PARSERS = {
640
+ **parser.Parser.STATEMENT_PARSERS,
641
+ TokenType.ELSE: lambda self: self._parse_as_command(self._prev),
642
+ TokenType.END: lambda self: self._parse_as_command(self._prev),
643
+ TokenType.FOR: lambda self: self._parse_for_in(),
644
+ TokenType.EXPORT: lambda self: self._parse_export_data(),
645
+ }
646
+
647
+ BRACKET_OFFSETS = {
648
+ "OFFSET": (0, False),
649
+ "ORDINAL": (1, False),
650
+ "SAFE_OFFSET": (0, True),
651
+ "SAFE_ORDINAL": (1, True),
652
+ }
653
+
654
+ def _parse_for_in(self) -> exp.ForIn:
655
+ this = self._parse_range()
656
+ self._match_text_seq("DO")
657
+ return self.expression(exp.ForIn, this=this, expression=self._parse_statement())
658
+
659
+ def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
660
+ this = super()._parse_table_part(schema=schema) or self._parse_number()
661
+
662
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names
663
+ if isinstance(this, exp.Identifier):
664
+ table_name = this.name
665
+ while self._match(TokenType.DASH, advance=False) and self._next:
666
+ start = self._curr
667
+ while self._is_connected() and not self._match_set(
668
+ self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False
669
+ ):
670
+ self._advance()
671
+
672
+ if start == self._curr:
673
+ break
674
+
675
+ table_name += self._find_sql(start, self._prev)
676
+
677
+ this = exp.Identifier(
678
+ this=table_name, quoted=this.args.get("quoted")
679
+ ).update_positions(this)
680
+ elif isinstance(this, exp.Literal):
681
+ table_name = this.name
682
+
683
+ if self._is_connected() and self._parse_var(any_token=True):
684
+ table_name += self._prev.text
685
+
686
+ this = exp.Identifier(this=table_name, quoted=True).update_positions(this)
687
+
688
+ return this
689
+
690
+ def _parse_table_parts(
691
+ self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False
692
+ ) -> exp.Table:
693
+ table = super()._parse_table_parts(
694
+ schema=schema, is_db_reference=is_db_reference, wildcard=True
695
+ )
696
+
697
+ # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here
698
+ if not table.catalog:
699
+ if table.db:
700
+ previous_db = table.args["db"]
701
+ parts = table.db.split(".")
702
+ if len(parts) == 2 and not table.args["db"].quoted:
703
+ table.set(
704
+ "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db)
705
+ )
706
+ table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db))
707
+ else:
708
+ previous_this = table.this
709
+ parts = table.name.split(".")
710
+ if len(parts) == 2 and not table.this.quoted:
711
+ table.set(
712
+ "db", exp.Identifier(this=parts[0]).update_positions(previous_this)
713
+ )
714
+ table.set(
715
+ "this", exp.Identifier(this=parts[1]).update_positions(previous_this)
716
+ )
717
+
718
+ if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts):
719
+ alias = table.this
720
+ catalog, db, this, *rest = (
721
+ exp.to_identifier(p, quoted=True)
722
+ for p in split_num_words(".".join(p.name for p in table.parts), ".", 3)
723
+ )
724
+
725
+ for part in (catalog, db, this):
726
+ if part:
727
+ part.update_positions(table.this)
728
+
729
+ if rest and this:
730
+ this = exp.Dot.build([this, *rest]) # type: ignore
731
+
732
+ table = exp.Table(
733
+ this=this, db=db, catalog=catalog, pivots=table.args.get("pivots")
734
+ )
735
+ table.meta["quoted_table"] = True
736
+ else:
737
+ alias = None
738
+
739
+ # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or
740
+ # dataset, so if the project identifier is omitted we need to fix the ast so that
741
+ # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier.
742
+ # Otherwise, we wouldn't correctly qualify a `Table` node that references these
743
+ # views, because it would seem like the "catalog" part is set, when it'd actually
744
+ # be the region/dataset. Merging the two identifiers into a single one is done to
745
+ # avoid producing a 4-part Table reference, which would cause issues in the schema
746
+ # module, when there are 3-part table names mixed with information schema views.
747
+ #
748
+ # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax
749
+ table_parts = table.parts
750
+ if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA":
751
+ # We need to alias the table here to avoid breaking existing qualified columns.
752
+ # This is expected to be safe, because if there's an actual alias coming up in
753
+ # the token stream, it will overwrite this one. If there isn't one, we are only
754
+ # exposing the name that can be used to reference the view explicitly (a no-op).
755
+ exp.alias_(
756
+ table,
757
+ t.cast(exp.Identifier, alias or table_parts[-1]),
758
+ table=True,
759
+ copy=False,
760
+ )
761
+
762
+ info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}"
763
+ new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions(
764
+ line=table_parts[-2].meta.get("line"),
765
+ col=table_parts[-1].meta.get("col"),
766
+ start=table_parts[-2].meta.get("start"),
767
+ end=table_parts[-1].meta.get("end"),
768
+ )
769
+ table.set("this", new_this)
770
+ table.set("db", seq_get(table_parts, -3))
771
+ table.set("catalog", seq_get(table_parts, -4))
772
+
773
+ return table
774
+
775
+ def _parse_column(self) -> t.Optional[exp.Expression]:
776
+ column = super()._parse_column()
777
+ if isinstance(column, exp.Column):
778
+ parts = column.parts
779
+ if any("." in p.name for p in parts):
780
+ catalog, db, table, this, *rest = (
781
+ exp.to_identifier(p, quoted=True)
782
+ for p in split_num_words(".".join(p.name for p in parts), ".", 4)
783
+ )
784
+
785
+ if rest and this:
786
+ this = exp.Dot.build([this, *rest]) # type: ignore
787
+
788
+ column = exp.Column(this=this, table=table, db=db, catalog=catalog)
789
+ column.meta["quoted_column"] = True
790
+
791
+ return column
792
+
793
+ @t.overload
794
+ def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ...
795
+
796
+ @t.overload
797
+ def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ...
798
+
799
+ def _parse_json_object(self, agg=False):
800
+ json_object = super()._parse_json_object()
801
+ array_kv_pair = seq_get(json_object.expressions, 0)
802
+
803
+ # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation
804
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2
805
+ if (
806
+ array_kv_pair
807
+ and isinstance(array_kv_pair.this, exp.Array)
808
+ and isinstance(array_kv_pair.expression, exp.Array)
809
+ ):
810
+ keys = array_kv_pair.this.expressions
811
+ values = array_kv_pair.expression.expressions
812
+
813
+ json_object.set(
814
+ "expressions",
815
+ [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)],
816
+ )
817
+
818
+ return json_object
819
+
820
+ def _parse_bracket(
821
+ self, this: t.Optional[exp.Expression] = None
822
+ ) -> t.Optional[exp.Expression]:
823
+ bracket = super()._parse_bracket(this)
824
+
825
+ if this is bracket:
826
+ return bracket
827
+
828
+ if isinstance(bracket, exp.Bracket):
829
+ for expression in bracket.expressions:
830
+ name = expression.name.upper()
831
+
832
+ if name not in self.BRACKET_OFFSETS:
833
+ break
834
+
835
+ offset, safe = self.BRACKET_OFFSETS[name]
836
+ bracket.set("offset", offset)
837
+ bracket.set("safe", safe)
838
+ expression.replace(expression.expressions[0])
839
+
840
+ return bracket
841
+
842
+ def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]:
843
+ unnest = super()._parse_unnest(with_alias=with_alias)
844
+
845
+ if not unnest:
846
+ return None
847
+
848
+ unnest_expr = seq_get(unnest.expressions, 0)
849
+ if unnest_expr:
850
+ from sqlglot.optimizer.annotate_types import annotate_types
851
+
852
+ unnest_expr = annotate_types(unnest_expr, dialect=self.dialect)
853
+
854
+ # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields,
855
+ # in contrast to other dialects such as DuckDB which flattens only the array by default
856
+ if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any(
857
+ array_elem.is_type(exp.DataType.Type.STRUCT)
858
+ for array_elem in unnest_expr._type.expressions
859
+ ):
860
+ unnest.set("explode_array", True)
861
+
862
+ return unnest
863
+
864
+ def _parse_make_interval(self) -> exp.MakeInterval:
865
+ expr = exp.MakeInterval()
866
+
867
+ for arg_key in expr.arg_types:
868
+ value = self._parse_lambda()
869
+
870
+ if not value:
871
+ break
872
+
873
+ # Non-named arguments are filled sequentially, (optionally) followed by named arguments
874
+ # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2)
875
+ if isinstance(value, exp.Kwarg):
876
+ arg_key = value.this.name
877
+
878
+ expr.set(arg_key, value)
879
+
880
+ self._match(TokenType.COMMA)
881
+
882
+ return expr
883
+
884
+ def _parse_features_at_time(self) -> exp.FeaturesAtTime:
885
+ expr = self.expression(
886
+ exp.FeaturesAtTime,
887
+ this=(self._match(TokenType.TABLE) and self._parse_table())
888
+ or self._parse_select(nested=True),
889
+ )
890
+
891
+ while self._match(TokenType.COMMA):
892
+ arg = self._parse_lambda()
893
+
894
+ # Get the LHS of the Kwarg and set the arg to that value, e.g
895
+ # "num_rows => 1" sets the expr's `num_rows` arg
896
+ if arg:
897
+ expr.set(arg.this.name, arg)
898
+
899
+ return expr
900
+
901
+ def _parse_export_data(self) -> exp.Export:
902
+ self._match_text_seq("DATA")
903
+
904
+ return self.expression(
905
+ exp.Export,
906
+ connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(),
907
+ options=self._parse_properties(),
908
+ this=self._match_text_seq("AS") and self._parse_select(),
909
+ )
910
+
911
+ class Generator(generator.Generator):
912
+ INTERVAL_ALLOWS_PLURAL_FORM = False
913
+ JOIN_HINTS = False
914
+ QUERY_HINTS = False
915
+ TABLE_HINTS = False
916
+ LIMIT_FETCH = "LIMIT"
917
+ RENAME_TABLE_WITH_DB = False
918
+ NVL2_SUPPORTED = False
919
+ UNNEST_WITH_ORDINALITY = False
920
+ COLLATE_IS_FUNC = True
921
+ LIMIT_ONLY_LITERALS = True
922
+ SUPPORTS_TABLE_ALIAS_COLUMNS = False
923
+ UNPIVOT_ALIASES_ARE_IDENTIFIERS = False
924
+ JSON_KEY_VALUE_PAIR_SEP = ","
925
+ NULL_ORDERING_SUPPORTED = False
926
+ IGNORE_NULLS_IN_FUNC = True
927
+ JSON_PATH_SINGLE_QUOTE_ESCAPE = True
928
+ CAN_IMPLEMENT_ARRAY_ANY = True
929
+ SUPPORTS_TO_NUMBER = False
930
+ NAMED_PLACEHOLDER_TOKEN = "@"
931
+ HEX_FUNC = "TO_HEX"
932
+ WITH_PROPERTIES_PREFIX = "OPTIONS"
933
+ SUPPORTS_EXPLODING_PROJECTIONS = False
934
+ EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
935
+ SUPPORTS_UNIX_SECONDS = True
936
+
937
+ TRANSFORMS = {
938
+ **generator.Generator.TRANSFORMS,
939
+ exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
940
+ exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
941
+ exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
942
+ exp.Array: inline_array_unless_query,
943
+ exp.ArrayContains: _array_contains_sql,
944
+ exp.ArrayFilter: filter_array_using_unnest,
945
+ exp.ArrayRemove: filter_array_using_unnest,
946
+ exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
947
+ exp.CollateProperty: lambda self, e: (
948
+ f"DEFAULT COLLATE {self.sql(e, 'this')}"
949
+ if e.args.get("default")
950
+ else f"COLLATE {self.sql(e, 'this')}"
951
+ ),
952
+ exp.Commit: lambda *_: "COMMIT TRANSACTION",
953
+ exp.CountIf: rename_func("COUNTIF"),
954
+ exp.Create: _create_sql,
955
+ exp.CTE: transforms.preprocess([_pushdown_cte_column_names]),
956
+ exp.DateAdd: date_add_interval_sql("DATE", "ADD"),
957
+ exp.DateDiff: lambda self, e: self.func(
958
+ "DATE_DIFF", e.this, e.expression, unit_to_var(e)
959
+ ),
960
+ exp.DateFromParts: rename_func("DATE"),
961
+ exp.DateStrToDate: datestrtodate_sql,
962
+ exp.DateSub: date_add_interval_sql("DATE", "SUB"),
963
+ exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"),
964
+ exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"),
965
+ exp.DateTrunc: lambda self, e: self.func(
966
+ "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone")
967
+ ),
968
+ exp.FromTimeZone: lambda self, e: self.func(
969
+ "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'"
970
+ ),
971
+ exp.GenerateSeries: rename_func("GENERATE_ARRAY"),
972
+ exp.GroupConcat: lambda self, e: groupconcat_sql(
973
+ self, e, func_name="STRING_AGG", within_group=False
974
+ ),
975
+ exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
976
+ exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"),
977
+ exp.If: if_sql(false_value="NULL"),
978
+ exp.ILike: no_ilike_sql,
979
+ exp.IntDiv: rename_func("DIV"),
980
+ exp.Int64: rename_func("INT64"),
981
+ exp.JSONExtract: _json_extract_sql,
982
+ exp.JSONExtractArray: _json_extract_sql,
983
+ exp.JSONExtractScalar: _json_extract_sql,
984
+ exp.JSONFormat: rename_func("TO_JSON_STRING"),
985
+ exp.Levenshtein: _levenshtein_sql,
986
+ exp.Max: max_or_greatest,
987
+ exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)),
988
+ exp.MD5Digest: rename_func("MD5"),
989
+ exp.Min: min_or_least,
990
+ exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
991
+ exp.RegexpExtract: lambda self, e: self.func(
992
+ "REGEXP_EXTRACT",
993
+ e.this,
994
+ e.expression,
995
+ e.args.get("position"),
996
+ e.args.get("occurrence"),
997
+ ),
998
+ exp.RegexpExtractAll: lambda self, e: self.func(
999
+ "REGEXP_EXTRACT_ALL", e.this, e.expression
1000
+ ),
1001
+ exp.RegexpReplace: regexp_replace_sql,
1002
+ exp.RegexpLike: rename_func("REGEXP_CONTAINS"),
1003
+ exp.ReturnsProperty: _returnsproperty_sql,
1004
+ exp.Rollback: lambda *_: "ROLLBACK TRANSACTION",
1005
+ exp.Select: transforms.preprocess(
1006
+ [
1007
+ transforms.explode_projection_to_unnest(),
1008
+ transforms.unqualify_unnest,
1009
+ transforms.eliminate_distinct_on,
1010
+ _alias_ordered_group,
1011
+ transforms.eliminate_semi_and_anti_joins,
1012
+ ]
1013
+ ),
1014
+ exp.SHA: rename_func("SHA1"),
1015
+ exp.SHA2: sha256_sql,
1016
+ exp.StabilityProperty: lambda self, e: (
1017
+ "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
1018
+ ),
1019
+ exp.String: rename_func("STRING"),
1020
+ exp.StrPosition: lambda self, e: (
1021
+ strposition_sql(
1022
+ self, e, func_name="INSTR", supports_position=True, supports_occurrence=True
1023
+ )
1024
+ ),
1025
+ exp.StrToDate: _str_to_datetime_sql,
1026
+ exp.StrToTime: _str_to_datetime_sql,
1027
+ exp.TimeAdd: date_add_interval_sql("TIME", "ADD"),
1028
+ exp.TimeFromParts: rename_func("TIME"),
1029
+ exp.TimestampFromParts: rename_func("DATETIME"),
1030
+ exp.TimeSub: date_add_interval_sql("TIME", "SUB"),
1031
+ exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"),
1032
+ exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"),
1033
+ exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"),
1034
+ exp.TimeStrToTime: timestrtotime_sql,
1035
+ exp.Transaction: lambda *_: "BEGIN TRANSACTION",
1036
+ exp.TsOrDsAdd: _ts_or_ds_add_sql,
1037
+ exp.TsOrDsDiff: _ts_or_ds_diff_sql,
1038
+ exp.TsOrDsToTime: rename_func("TIME"),
1039
+ exp.TsOrDsToDatetime: rename_func("DATETIME"),
1040
+ exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"),
1041
+ exp.Unhex: rename_func("FROM_HEX"),
1042
+ exp.UnixDate: rename_func("UNIX_DATE"),
1043
+ exp.UnixToTime: _unix_to_time_sql,
1044
+ exp.Uuid: lambda *_: "GENERATE_UUID()",
1045
+ exp.Values: _derived_table_values_to_unnest,
1046
+ exp.VariancePop: rename_func("VAR_POP"),
1047
+ exp.SafeDivide: rename_func("SAFE_DIVIDE"),
1048
+ }
1049
+
1050
+ SUPPORTED_JSON_PATH_PARTS = {
1051
+ exp.JSONPathKey,
1052
+ exp.JSONPathRoot,
1053
+ exp.JSONPathSubscript,
1054
+ }
1055
+
1056
+ TYPE_MAPPING = {
1057
+ **generator.Generator.TYPE_MAPPING,
1058
+ exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC",
1059
+ exp.DataType.Type.BIGINT: "INT64",
1060
+ exp.DataType.Type.BINARY: "BYTES",
1061
+ exp.DataType.Type.BLOB: "BYTES",
1062
+ exp.DataType.Type.BOOLEAN: "BOOL",
1063
+ exp.DataType.Type.CHAR: "STRING",
1064
+ exp.DataType.Type.DECIMAL: "NUMERIC",
1065
+ exp.DataType.Type.DOUBLE: "FLOAT64",
1066
+ exp.DataType.Type.FLOAT: "FLOAT64",
1067
+ exp.DataType.Type.INT: "INT64",
1068
+ exp.DataType.Type.NCHAR: "STRING",
1069
+ exp.DataType.Type.NVARCHAR: "STRING",
1070
+ exp.DataType.Type.SMALLINT: "INT64",
1071
+ exp.DataType.Type.TEXT: "STRING",
1072
+ exp.DataType.Type.TIMESTAMP: "DATETIME",
1073
+ exp.DataType.Type.TIMESTAMPNTZ: "DATETIME",
1074
+ exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
1075
+ exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP",
1076
+ exp.DataType.Type.TINYINT: "INT64",
1077
+ exp.DataType.Type.ROWVERSION: "BYTES",
1078
+ exp.DataType.Type.UUID: "STRING",
1079
+ exp.DataType.Type.VARBINARY: "BYTES",
1080
+ exp.DataType.Type.VARCHAR: "STRING",
1081
+ exp.DataType.Type.VARIANT: "ANY TYPE",
1082
+ }
1083
+
1084
+ PROPERTIES_LOCATION = {
1085
+ **generator.Generator.PROPERTIES_LOCATION,
1086
+ exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
1087
+ exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
1088
+ }
1089
+
1090
+ # WINDOW comes after QUALIFY
1091
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause
1092
+ AFTER_HAVING_MODIFIER_TRANSFORMS = {
1093
+ "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"],
1094
+ "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"],
1095
+ }
1096
+
1097
+ # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords
1098
+ RESERVED_KEYWORDS = {
1099
+ "all",
1100
+ "and",
1101
+ "any",
1102
+ "array",
1103
+ "as",
1104
+ "asc",
1105
+ "assert_rows_modified",
1106
+ "at",
1107
+ "between",
1108
+ "by",
1109
+ "case",
1110
+ "cast",
1111
+ "collate",
1112
+ "contains",
1113
+ "create",
1114
+ "cross",
1115
+ "cube",
1116
+ "current",
1117
+ "default",
1118
+ "define",
1119
+ "desc",
1120
+ "distinct",
1121
+ "else",
1122
+ "end",
1123
+ "enum",
1124
+ "escape",
1125
+ "except",
1126
+ "exclude",
1127
+ "exists",
1128
+ "extract",
1129
+ "false",
1130
+ "fetch",
1131
+ "following",
1132
+ "for",
1133
+ "from",
1134
+ "full",
1135
+ "group",
1136
+ "grouping",
1137
+ "groups",
1138
+ "hash",
1139
+ "having",
1140
+ "if",
1141
+ "ignore",
1142
+ "in",
1143
+ "inner",
1144
+ "intersect",
1145
+ "interval",
1146
+ "into",
1147
+ "is",
1148
+ "join",
1149
+ "lateral",
1150
+ "left",
1151
+ "like",
1152
+ "limit",
1153
+ "lookup",
1154
+ "merge",
1155
+ "natural",
1156
+ "new",
1157
+ "no",
1158
+ "not",
1159
+ "null",
1160
+ "nulls",
1161
+ "of",
1162
+ "on",
1163
+ "or",
1164
+ "order",
1165
+ "outer",
1166
+ "over",
1167
+ "partition",
1168
+ "preceding",
1169
+ "proto",
1170
+ "qualify",
1171
+ "range",
1172
+ "recursive",
1173
+ "respect",
1174
+ "right",
1175
+ "rollup",
1176
+ "rows",
1177
+ "select",
1178
+ "set",
1179
+ "some",
1180
+ "struct",
1181
+ "tablesample",
1182
+ "then",
1183
+ "to",
1184
+ "treat",
1185
+ "true",
1186
+ "unbounded",
1187
+ "union",
1188
+ "unnest",
1189
+ "using",
1190
+ "when",
1191
+ "where",
1192
+ "window",
1193
+ "with",
1194
+ "within",
1195
+ }
1196
+
1197
+ def mod_sql(self, expression: exp.Mod) -> str:
1198
+ this = expression.this
1199
+ expr = expression.expression
1200
+ return self.func(
1201
+ "MOD",
1202
+ this.unnest() if isinstance(this, exp.Paren) else this,
1203
+ expr.unnest() if isinstance(expr, exp.Paren) else expr,
1204
+ )
1205
+
1206
+ def column_parts(self, expression: exp.Column) -> str:
1207
+ if expression.meta.get("quoted_column"):
1208
+ # If a column reference is of the form `dataset.table`.name, we need
1209
+ # to preserve the quoted table path, otherwise the reference breaks
1210
+ table_parts = ".".join(p.name for p in expression.parts[:-1])
1211
+ table_path = self.sql(exp.Identifier(this=table_parts, quoted=True))
1212
+ return f"{table_path}.{self.sql(expression, 'this')}"
1213
+
1214
+ return super().column_parts(expression)
1215
+
1216
+ def table_parts(self, expression: exp.Table) -> str:
1217
+ # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so
1218
+ # we need to make sure the correct quoting is used in each case.
1219
+ #
1220
+ # For example, if there is a CTE x that clashes with a schema name, then the former will
1221
+ # return the table y in that schema, whereas the latter will return the CTE's y column:
1222
+ #
1223
+ # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join
1224
+ # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest
1225
+ if expression.meta.get("quoted_table"):
1226
+ table_parts = ".".join(p.name for p in expression.parts)
1227
+ return self.sql(exp.Identifier(this=table_parts, quoted=True))
1228
+
1229
+ return super().table_parts(expression)
1230
+
1231
+ def timetostr_sql(self, expression: exp.TimeToStr) -> str:
1232
+ this = expression.this
1233
+ if isinstance(this, exp.TsOrDsToDatetime):
1234
+ func_name = "FORMAT_DATETIME"
1235
+ elif isinstance(this, exp.TsOrDsToTimestamp):
1236
+ func_name = "FORMAT_TIMESTAMP"
1237
+ else:
1238
+ func_name = "FORMAT_DATE"
1239
+
1240
+ time_expr = (
1241
+ this
1242
+ if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate))
1243
+ else expression
1244
+ )
1245
+ return self.func(
1246
+ func_name, self.format_time(expression), time_expr.this, expression.args.get("zone")
1247
+ )
1248
+
1249
+ def eq_sql(self, expression: exp.EQ) -> str:
1250
+ # Operands of = cannot be NULL in BigQuery
1251
+ if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null):
1252
+ if not isinstance(expression.parent, exp.Update):
1253
+ return "NULL"
1254
+
1255
+ return self.binary(expression, "=")
1256
+
1257
+ def attimezone_sql(self, expression: exp.AtTimeZone) -> str:
1258
+ parent = expression.parent
1259
+
1260
+ # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]).
1261
+ # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included.
1262
+ if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"):
1263
+ return self.func(
1264
+ "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone"))
1265
+ )
1266
+
1267
+ return super().attimezone_sql(expression)
1268
+
1269
+ def trycast_sql(self, expression: exp.TryCast) -> str:
1270
+ return self.cast_sql(expression, safe_prefix="SAFE_")
1271
+
1272
+ def bracket_sql(self, expression: exp.Bracket) -> str:
1273
+ this = expression.this
1274
+ expressions = expression.expressions
1275
+
1276
+ if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT):
1277
+ arg = expressions[0]
1278
+ if arg.type is None:
1279
+ from sqlglot.optimizer.annotate_types import annotate_types
1280
+
1281
+ arg = annotate_types(arg, dialect=self.dialect)
1282
+
1283
+ if arg.type and arg.type.this in exp.DataType.TEXT_TYPES:
1284
+ # BQ doesn't support bracket syntax with string values for structs
1285
+ return f"{self.sql(this)}.{arg.name}"
1286
+
1287
+ expressions_sql = self.expressions(expression, flat=True)
1288
+ offset = expression.args.get("offset")
1289
+
1290
+ if offset == 0:
1291
+ expressions_sql = f"OFFSET({expressions_sql})"
1292
+ elif offset == 1:
1293
+ expressions_sql = f"ORDINAL({expressions_sql})"
1294
+ elif offset is not None:
1295
+ self.unsupported(f"Unsupported array offset: {offset}")
1296
+
1297
+ if expression.args.get("safe"):
1298
+ expressions_sql = f"SAFE_{expressions_sql}"
1299
+
1300
+ return f"{self.sql(this)}[{expressions_sql}]"
1301
+
1302
+ def in_unnest_op(self, expression: exp.Unnest) -> str:
1303
+ return self.sql(expression)
1304
+
1305
+ def version_sql(self, expression: exp.Version) -> str:
1306
+ if expression.name == "TIMESTAMP":
1307
+ expression.set("this", "SYSTEM_TIME")
1308
+ return super().version_sql(expression)
1309
+
1310
+ def contains_sql(self, expression: exp.Contains) -> str:
1311
+ this = expression.this
1312
+ expr = expression.expression
1313
+
1314
+ if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower):
1315
+ this = this.this
1316
+ expr = expr.this
1317
+
1318
+ return self.func("CONTAINS_SUBSTR", this, expr)
1319
+
1320
+ def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
1321
+ this = expression.this
1322
+
1323
+ # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3]
1324
+ # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions,
1325
+ # because they aren't literals and so the above syntax is invalid BigQuery.
1326
+ if isinstance(this, exp.Array):
1327
+ elem = seq_get(this.expressions, 0)
1328
+ if not (elem and elem.find(exp.Query)):
1329
+ return f"{self.sql(expression, 'to')}{self.sql(this)}"
1330
+
1331
+ return super().cast_sql(expression, safe_prefix=safe_prefix)