altimate-code 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/bin/altimate +6 -0
  3. package/bin/altimate-code +6 -0
  4. package/dbt-tools/bin/altimate-dbt +2 -0
  5. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/__init__.py +0 -0
  6. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/fetch_schema.py +35 -0
  7. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/utils.py +353 -0
  8. package/dbt-tools/dist/altimate_python_packages/altimate_packages/altimate/validate_sql.py +114 -0
  9. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__init__.py +178 -0
  10. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/__main__.py +96 -0
  11. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/_typing.py +17 -0
  12. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/__init__.py +3 -0
  13. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/__init__.py +18 -0
  14. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/_typing.py +18 -0
  15. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/column.py +332 -0
  16. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/dataframe.py +866 -0
  17. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/functions.py +1267 -0
  18. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/group.py +59 -0
  19. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/normalize.py +78 -0
  20. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/operations.py +53 -0
  21. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/readwriter.py +108 -0
  22. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/session.py +190 -0
  23. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/transforms.py +9 -0
  24. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/types.py +212 -0
  25. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/util.py +32 -0
  26. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dataframe/sql/window.py +134 -0
  27. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/__init__.py +118 -0
  28. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/athena.py +166 -0
  29. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/bigquery.py +1331 -0
  30. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/clickhouse.py +1393 -0
  31. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/databricks.py +131 -0
  32. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dialect.py +1915 -0
  33. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/doris.py +561 -0
  34. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/drill.py +157 -0
  35. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/druid.py +20 -0
  36. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/duckdb.py +1159 -0
  37. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/dune.py +16 -0
  38. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/hive.py +787 -0
  39. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/materialize.py +94 -0
  40. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/mysql.py +1324 -0
  41. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/oracle.py +378 -0
  42. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/postgres.py +778 -0
  43. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/presto.py +788 -0
  44. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/prql.py +203 -0
  45. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/redshift.py +448 -0
  46. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/risingwave.py +78 -0
  47. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/snowflake.py +1464 -0
  48. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark.py +202 -0
  49. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/spark2.py +349 -0
  50. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/sqlite.py +320 -0
  51. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/starrocks.py +343 -0
  52. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tableau.py +61 -0
  53. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/teradata.py +356 -0
  54. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/trino.py +115 -0
  55. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/dialects/tsql.py +1403 -0
  56. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/diff.py +456 -0
  57. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/errors.py +93 -0
  58. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/__init__.py +95 -0
  59. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/context.py +101 -0
  60. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/env.py +246 -0
  61. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/python.py +460 -0
  62. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/executor/table.py +155 -0
  63. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/expressions.py +8870 -0
  64. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/generator.py +4993 -0
  65. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/helper.py +582 -0
  66. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/jsonpath.py +227 -0
  67. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/lineage.py +423 -0
  68. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/__init__.py +11 -0
  69. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/annotate_types.py +589 -0
  70. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/canonicalize.py +222 -0
  71. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_ctes.py +43 -0
  72. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_joins.py +181 -0
  73. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/eliminate_subqueries.py +189 -0
  74. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/isolate_table_selects.py +50 -0
  75. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/merge_subqueries.py +415 -0
  76. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize.py +200 -0
  77. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/normalize_identifiers.py +64 -0
  78. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimize_joins.py +91 -0
  79. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/optimizer.py +94 -0
  80. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_predicates.py +222 -0
  81. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/pushdown_projections.py +172 -0
  82. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify.py +104 -0
  83. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_columns.py +1024 -0
  84. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/qualify_tables.py +155 -0
  85. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/scope.py +904 -0
  86. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/simplify.py +1587 -0
  87. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/optimizer/unnest_subqueries.py +302 -0
  88. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/parser.py +8501 -0
  89. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/planner.py +463 -0
  90. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/schema.py +588 -0
  91. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/serde.py +68 -0
  92. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/time.py +687 -0
  93. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/tokens.py +1520 -0
  94. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/transforms.py +1020 -0
  95. package/dbt-tools/dist/altimate_python_packages/altimate_packages/sqlglot/trie.py +81 -0
  96. package/dbt-tools/dist/altimate_python_packages/dbt_core_integration.py +825 -0
  97. package/dbt-tools/dist/altimate_python_packages/dbt_utils.py +157 -0
  98. package/dbt-tools/dist/index.js +23859 -0
  99. package/package.json +14 -18
  100. package/postinstall.mjs +42 -0
  101. package/skills/altimate-setup/SKILL.md +31 -0
@@ -0,0 +1,1464 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ from sqlglot import exp, generator, jsonpath, parser, tokens, transforms
6
+ from sqlglot.dialects.dialect import (
7
+ Dialect,
8
+ NormalizationStrategy,
9
+ build_timetostr_or_tochar,
10
+ binary_from_function,
11
+ build_default_decimal_type,
12
+ build_timestamp_from_parts,
13
+ date_delta_sql,
14
+ date_trunc_to_time,
15
+ datestrtodate_sql,
16
+ build_formatted_time,
17
+ if_sql,
18
+ inline_array_sql,
19
+ max_or_greatest,
20
+ min_or_least,
21
+ rename_func,
22
+ timestamptrunc_sql,
23
+ timestrtotime_sql,
24
+ var_map_sql,
25
+ map_date_part,
26
+ no_timestamp_sql,
27
+ strposition_sql,
28
+ timestampdiff_sql,
29
+ no_make_interval_sql,
30
+ groupconcat_sql,
31
+ )
32
+ from sqlglot.generator import unsupported_args
33
+ from sqlglot.helper import flatten, is_float, is_int, seq_get
34
+ from sqlglot.tokens import TokenType
35
+
36
+ if t.TYPE_CHECKING:
37
+ from sqlglot._typing import E, B
38
+
39
+
40
+ # from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html
41
+ def _build_datetime(
42
+ name: str, kind: exp.DataType.Type, safe: bool = False
43
+ ) -> t.Callable[[t.List], exp.Func]:
44
+ def _builder(args: t.List) -> exp.Func:
45
+ value = seq_get(args, 0)
46
+ scale_or_fmt = seq_get(args, 1)
47
+
48
+ int_value = value is not None and is_int(value.name)
49
+ int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int
50
+
51
+ if isinstance(value, exp.Literal) or (value and scale_or_fmt):
52
+ # Converts calls like `TO_TIME('01:02:03')` into casts
53
+ if len(args) == 1 and value.is_string and not int_value:
54
+ return (
55
+ exp.TryCast(this=value, to=exp.DataType.build(kind))
56
+ if safe
57
+ else exp.cast(value, kind)
58
+ )
59
+
60
+ # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special
61
+ # cases so we can transpile them, since they're relatively common
62
+ if kind == exp.DataType.Type.TIMESTAMP:
63
+ if not safe and (int_value or int_scale_or_fmt):
64
+ # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as
65
+ # it's not easily transpilable
66
+ return exp.UnixToTime(this=value, scale=scale_or_fmt)
67
+ if not int_scale_or_fmt and not is_float(value.name):
68
+ expr = build_formatted_time(exp.StrToTime, "snowflake")(args)
69
+ expr.set("safe", safe)
70
+ return expr
71
+
72
+ if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value:
73
+ klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime
74
+ formatted_exp = build_formatted_time(klass, "snowflake")(args)
75
+ formatted_exp.set("safe", safe)
76
+ return formatted_exp
77
+
78
+ return exp.Anonymous(this=name, expressions=args)
79
+
80
+ return _builder
81
+
82
+
83
+ def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]:
84
+ expression = parser.build_var_map(args)
85
+
86
+ if isinstance(expression, exp.StarMap):
87
+ return expression
88
+
89
+ return exp.Struct(
90
+ expressions=[
91
+ exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values)
92
+ ]
93
+ )
94
+
95
+
96
+ def _build_datediff(args: t.List) -> exp.DateDiff:
97
+ return exp.DateDiff(
98
+ this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0))
99
+ )
100
+
101
+
102
+ def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]:
103
+ def _builder(args: t.List) -> E:
104
+ return expr_type(
105
+ this=seq_get(args, 2),
106
+ expression=seq_get(args, 1),
107
+ unit=map_date_part(seq_get(args, 0)),
108
+ )
109
+
110
+ return _builder
111
+
112
+
113
+ def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]:
114
+ def _builder(args: t.List) -> B | exp.Anonymous:
115
+ if len(args) == 3:
116
+ return exp.Anonymous(this=name, expressions=args)
117
+
118
+ return binary_from_function(expr_type)(args)
119
+
120
+ return _builder
121
+
122
+
123
+ # https://docs.snowflake.com/en/sql-reference/functions/div0
124
+ def _build_if_from_div0(args: t.List) -> exp.If:
125
+ lhs = exp._wrap(seq_get(args, 0), exp.Binary)
126
+ rhs = exp._wrap(seq_get(args, 1), exp.Binary)
127
+
128
+ cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_(
129
+ exp.Is(this=lhs, expression=exp.null()).not_()
130
+ )
131
+ true = exp.Literal.number(0)
132
+ false = exp.Div(this=lhs, expression=rhs)
133
+ return exp.If(this=cond, true=true, false=false)
134
+
135
+
136
+ # https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
137
+ def _build_if_from_zeroifnull(args: t.List) -> exp.If:
138
+ cond = exp.Is(this=seq_get(args, 0), expression=exp.Null())
139
+ return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0))
140
+
141
+
142
+ # https://docs.snowflake.com/en/sql-reference/functions/zeroifnull
143
+ def _build_if_from_nullifzero(args: t.List) -> exp.If:
144
+ cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0))
145
+ return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0))
146
+
147
+
148
+ def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str:
149
+ flag = expression.text("flag")
150
+
151
+ if "i" not in flag:
152
+ flag += "i"
153
+
154
+ return self.func(
155
+ "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag)
156
+ )
157
+
158
+
159
+ def _build_regexp_replace(args: t.List) -> exp.RegexpReplace:
160
+ regexp_replace = exp.RegexpReplace.from_arg_list(args)
161
+
162
+ if not regexp_replace.args.get("replacement"):
163
+ regexp_replace.set("replacement", exp.Literal.string(""))
164
+
165
+ return regexp_replace
166
+
167
+
168
+ def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]:
169
+ def _parse(self: Snowflake.Parser) -> exp.Show:
170
+ return self._parse_show_snowflake(*args, **kwargs)
171
+
172
+ return _parse
173
+
174
+
175
+ def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
176
+ trunc = date_trunc_to_time(args)
177
+ trunc.set("unit", map_date_part(trunc.args["unit"]))
178
+ return trunc
179
+
180
+
181
+ def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression:
182
+ """
183
+ Snowflake doesn't allow columns referenced in UNPIVOT to be qualified,
184
+ so we need to unqualify them. Same goes for ANY ORDER BY <column>.
185
+
186
+ Example:
187
+ >>> from sqlglot import parse_one
188
+ >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))")
189
+ >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake"))
190
+ SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april))
191
+ """
192
+ if isinstance(expression, exp.Pivot):
193
+ if expression.unpivot:
194
+ expression = transforms.unqualify_columns(expression)
195
+ else:
196
+ for field in expression.fields:
197
+ field_expr = seq_get(field.expressions if field else [], 0)
198
+
199
+ if isinstance(field_expr, exp.PivotAny):
200
+ unqualified_field_expr = transforms.unqualify_columns(field_expr)
201
+ t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0)
202
+
203
+ return expression
204
+
205
+
206
+ def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression:
207
+ assert isinstance(expression, exp.Create)
208
+
209
+ def _flatten_structured_type(expression: exp.DataType) -> exp.DataType:
210
+ if expression.this in exp.DataType.NESTED_TYPES:
211
+ expression.set("expressions", None)
212
+ return expression
213
+
214
+ props = expression.args.get("properties")
215
+ if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)):
216
+ for schema_expression in expression.this.expressions:
217
+ if isinstance(schema_expression, exp.ColumnDef):
218
+ column_type = schema_expression.kind
219
+ if isinstance(column_type, exp.DataType):
220
+ column_type.transform(_flatten_structured_type, copy=False)
221
+
222
+ return expression
223
+
224
+
225
+ def _unnest_generate_date_array(unnest: exp.Unnest) -> None:
226
+ generate_date_array = unnest.expressions[0]
227
+ start = generate_date_array.args.get("start")
228
+ end = generate_date_array.args.get("end")
229
+ step = generate_date_array.args.get("step")
230
+
231
+ if not start or not end or not isinstance(step, exp.Interval) or step.name != "1":
232
+ return
233
+
234
+ unit = step.args.get("unit")
235
+
236
+ unnest_alias = unnest.args.get("alias")
237
+ if unnest_alias:
238
+ unnest_alias = unnest_alias.copy()
239
+ sequence_value_name = seq_get(unnest_alias.columns, 0) or "value"
240
+ else:
241
+ sequence_value_name = "value"
242
+
243
+ # We'll add the next sequence value to the starting date and project the result
244
+ date_add = _build_date_time_add(exp.DateAdd)(
245
+ [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")]
246
+ ).as_(sequence_value_name)
247
+
248
+ # We use DATEDIFF to compute the number of sequence values needed
249
+ number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"](
250
+ [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1]
251
+ )
252
+
253
+ unnest.set("expressions", [number_sequence])
254
+ unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias))
255
+
256
+
257
+ def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression:
258
+ if isinstance(expression, exp.Select):
259
+ for generate_date_array in expression.find_all(exp.GenerateDateArray):
260
+ parent = generate_date_array.parent
261
+
262
+ # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake
263
+ # query is the following (it'll be unnested properly on the next iteration due to copy):
264
+ # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...))))
265
+ if not isinstance(parent, exp.Unnest):
266
+ unnest = exp.Unnest(expressions=[generate_date_array.copy()])
267
+ generate_date_array.replace(
268
+ exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery()
269
+ )
270
+
271
+ if (
272
+ isinstance(parent, exp.Unnest)
273
+ and isinstance(parent.parent, (exp.From, exp.Join))
274
+ and len(parent.expressions) == 1
275
+ ):
276
+ _unnest_generate_date_array(parent)
277
+
278
+ return expression
279
+
280
+
281
+ def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]:
282
+ def _builder(args: t.List) -> E:
283
+ return expr_type(
284
+ this=seq_get(args, 0),
285
+ expression=seq_get(args, 1),
286
+ position=seq_get(args, 2),
287
+ occurrence=seq_get(args, 3),
288
+ parameters=seq_get(args, 4),
289
+ group=seq_get(args, 5) or exp.Literal.number(0),
290
+ )
291
+
292
+ return _builder
293
+
294
+
295
+ def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str:
296
+ # Other dialects don't support all of the following parameters, so we need to
297
+ # generate default values as necessary to ensure the transpilation is correct
298
+ group = expression.args.get("group")
299
+
300
+ # To avoid generating all these default values, we set group to None if
301
+ # it's 0 (also default value) which doesn't trigger the following chain
302
+ if group and group.name == "0":
303
+ group = None
304
+
305
+ parameters = expression.args.get("parameters") or (group and exp.Literal.string("c"))
306
+ occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1))
307
+ position = expression.args.get("position") or (occurrence and exp.Literal.number(1))
308
+
309
+ return self.func(
310
+ "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL",
311
+ expression.this,
312
+ expression.expression,
313
+ position,
314
+ occurrence,
315
+ parameters,
316
+ group,
317
+ )
318
+
319
+
320
+ def _json_extract_value_array_sql(
321
+ self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray
322
+ ) -> str:
323
+ json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression)
324
+ ident = exp.to_identifier("x")
325
+
326
+ if isinstance(expression, exp.JSONValueArray):
327
+ this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR)
328
+ else:
329
+ this = exp.ParseJSON(this=f"TO_JSON({ident})")
330
+
331
+ transform_lambda = exp.Lambda(expressions=[ident], this=this)
332
+
333
+ return self.func("TRANSFORM", json_extract, transform_lambda)
334
+
335
+
336
+ class Snowflake(Dialect):
337
+ # https://docs.snowflake.com/en/sql-reference/identifiers-syntax
338
+ NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE
339
+ NULL_ORDERING = "nulls_are_large"
340
+ TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'"
341
+ SUPPORTS_USER_DEFINED_TYPES = False
342
+ SUPPORTS_SEMI_ANTI_JOIN = False
343
+ PREFER_CTE_ALIAS_COLUMN = True
344
+ TABLESAMPLE_SIZE_IS_PERCENT = True
345
+ COPY_PARAMS_ARE_CSV = False
346
+ ARRAY_AGG_INCLUDES_NULLS = None
347
+
348
+ TIME_MAPPING = {
349
+ "YYYY": "%Y",
350
+ "yyyy": "%Y",
351
+ "YY": "%y",
352
+ "yy": "%y",
353
+ "MMMM": "%B",
354
+ "mmmm": "%B",
355
+ "MON": "%b",
356
+ "mon": "%b",
357
+ "MM": "%m",
358
+ "mm": "%m",
359
+ "DD": "%d",
360
+ "dd": "%-d",
361
+ "DY": "%a",
362
+ "dy": "%w",
363
+ "HH24": "%H",
364
+ "hh24": "%H",
365
+ "HH12": "%I",
366
+ "hh12": "%I",
367
+ "MI": "%M",
368
+ "mi": "%M",
369
+ "SS": "%S",
370
+ "ss": "%S",
371
+ "FF6": "%f",
372
+ "ff6": "%f",
373
+ }
374
+
375
+ DATE_PART_MAPPING = {
376
+ **Dialect.DATE_PART_MAPPING,
377
+ "ISOWEEK": "WEEKISO",
378
+ }
379
+
380
+ def quote_identifier(self, expression: E, identify: bool = True) -> E:
381
+ # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an
382
+ # unquoted DUAL keyword in a special way and does not map it to a user-defined table
383
+ if (
384
+ isinstance(expression, exp.Identifier)
385
+ and isinstance(expression.parent, exp.Table)
386
+ and expression.name.lower() == "dual"
387
+ ):
388
+ return expression # type: ignore
389
+
390
+ return super().quote_identifier(expression, identify=identify)
391
+
392
+ class JSONPathTokenizer(jsonpath.JSONPathTokenizer):
393
+ SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy()
394
+ SINGLE_TOKENS.pop("$")
395
+
396
+ class Parser(parser.Parser):
397
+ IDENTIFY_PIVOT_STRINGS = True
398
+ DEFAULT_SAMPLING_METHOD = "BERNOULLI"
399
+ COLON_IS_VARIANT_EXTRACT = True
400
+
401
+ ID_VAR_TOKENS = {
402
+ *parser.Parser.ID_VAR_TOKENS,
403
+ TokenType.MATCH_CONDITION,
404
+ }
405
+
406
+ TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW}
407
+ TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION)
408
+
409
+ COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER}
410
+
411
+ FUNCTIONS = {
412
+ **parser.Parser.FUNCTIONS,
413
+ "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list,
414
+ "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args),
415
+ "ARRAY_CONTAINS": lambda args: exp.ArrayContains(
416
+ this=seq_get(args, 1), expression=seq_get(args, 0)
417
+ ),
418
+ "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries(
419
+ # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive
420
+ start=seq_get(args, 0),
421
+ end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)),
422
+ step=seq_get(args, 2),
423
+ ),
424
+ "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"),
425
+ "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"),
426
+ "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"),
427
+ "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"),
428
+ "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"),
429
+ "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"),
430
+ "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"),
431
+ "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"),
432
+ "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"),
433
+ "DATE": _build_datetime("DATE", exp.DataType.Type.DATE),
434
+ "DATE_TRUNC": _date_trunc_to_time,
435
+ "DATEADD": _build_date_time_add(exp.DateAdd),
436
+ "DATEDIFF": _build_datediff,
437
+ "DIV0": _build_if_from_div0,
438
+ "EDITDISTANCE": lambda args: exp.Levenshtein(
439
+ this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2)
440
+ ),
441
+ "FLATTEN": exp.Explode.from_arg_list,
442
+ "GET_PATH": lambda args, dialect: exp.JSONExtract(
443
+ this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
444
+ ),
445
+ "HEX_DECODE_BINARY": exp.Unhex.from_arg_list,
446
+ "IFF": exp.If.from_arg_list,
447
+ "LAST_DAY": lambda args: exp.LastDay(
448
+ this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1))
449
+ ),
450
+ "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
451
+ "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
452
+ "NULLIFZERO": _build_if_from_nullifzero,
453
+ "OBJECT_CONSTRUCT": _build_object_construct,
454
+ "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll),
455
+ "REGEXP_REPLACE": _build_regexp_replace,
456
+ "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
457
+ "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll),
458
+ "RLIKE": exp.RegexpLike.from_arg_list,
459
+ "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
460
+ "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)),
461
+ "TIMEADD": _build_date_time_add(exp.TimeAdd),
462
+ "TIMEDIFF": _build_datediff,
463
+ "TIMESTAMPADD": _build_date_time_add(exp.DateAdd),
464
+ "TIMESTAMPDIFF": _build_datediff,
465
+ "TIMESTAMPFROMPARTS": build_timestamp_from_parts,
466
+ "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts,
467
+ "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts,
468
+ "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts,
469
+ "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True),
470
+ "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True),
471
+ "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True),
472
+ "TRY_TO_TIMESTAMP": _build_datetime(
473
+ "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True
474
+ ),
475
+ "TO_CHAR": build_timetostr_or_tochar,
476
+ "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE),
477
+ "TO_NUMBER": lambda args: exp.ToNumber(
478
+ this=seq_get(args, 0),
479
+ format=seq_get(args, 1),
480
+ precision=seq_get(args, 2),
481
+ scale=seq_get(args, 3),
482
+ ),
483
+ "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME),
484
+ "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP),
485
+ "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ),
486
+ "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP),
487
+ "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ),
488
+ "TO_VARCHAR": exp.ToChar.from_arg_list,
489
+ "ZEROIFNULL": _build_if_from_zeroifnull,
490
+ }
491
+
492
+ FUNCTION_PARSERS = {
493
+ **parser.Parser.FUNCTION_PARSERS,
494
+ "DATE_PART": lambda self: self._parse_date_part(),
495
+ "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(),
496
+ "LISTAGG": lambda self: self._parse_string_agg(),
497
+ }
498
+ FUNCTION_PARSERS.pop("TRIM")
499
+
500
+ TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME}
501
+
502
+ RANGE_PARSERS = {
503
+ **parser.Parser.RANGE_PARSERS,
504
+ TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny),
505
+ TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny),
506
+ }
507
+
508
+ ALTER_PARSERS = {
509
+ **parser.Parser.ALTER_PARSERS,
510
+ "UNSET": lambda self: self.expression(
511
+ exp.Set,
512
+ tag=self._match_text_seq("TAG"),
513
+ expressions=self._parse_csv(self._parse_id_var),
514
+ unset=True,
515
+ ),
516
+ }
517
+
518
+ STATEMENT_PARSERS = {
519
+ **parser.Parser.STATEMENT_PARSERS,
520
+ TokenType.GET: lambda self: self._parse_get(),
521
+ TokenType.PUT: lambda self: self._parse_put(),
522
+ TokenType.SHOW: lambda self: self._parse_show(),
523
+ }
524
+
525
+ PROPERTY_PARSERS = {
526
+ **parser.Parser.PROPERTY_PARSERS,
527
+ "CREDENTIALS": lambda self: self._parse_credentials_property(),
528
+ "FILE_FORMAT": lambda self: self._parse_file_format_property(),
529
+ "LOCATION": lambda self: self._parse_location_property(),
530
+ "TAG": lambda self: self._parse_tag(),
531
+ "USING": lambda self: self._match_text_seq("TEMPLATE")
532
+ and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()),
533
+ }
534
+
535
+ TYPE_CONVERTERS = {
536
+ # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number
537
+ exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0),
538
+ }
539
+
540
+ SHOW_PARSERS = {
541
+ "DATABASES": _show_parser("DATABASES"),
542
+ "TERSE DATABASES": _show_parser("DATABASES"),
543
+ "SCHEMAS": _show_parser("SCHEMAS"),
544
+ "TERSE SCHEMAS": _show_parser("SCHEMAS"),
545
+ "OBJECTS": _show_parser("OBJECTS"),
546
+ "TERSE OBJECTS": _show_parser("OBJECTS"),
547
+ "TABLES": _show_parser("TABLES"),
548
+ "TERSE TABLES": _show_parser("TABLES"),
549
+ "VIEWS": _show_parser("VIEWS"),
550
+ "TERSE VIEWS": _show_parser("VIEWS"),
551
+ "PRIMARY KEYS": _show_parser("PRIMARY KEYS"),
552
+ "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"),
553
+ "IMPORTED KEYS": _show_parser("IMPORTED KEYS"),
554
+ "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"),
555
+ "UNIQUE KEYS": _show_parser("UNIQUE KEYS"),
556
+ "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"),
557
+ "SEQUENCES": _show_parser("SEQUENCES"),
558
+ "TERSE SEQUENCES": _show_parser("SEQUENCES"),
559
+ "STAGES": _show_parser("STAGES"),
560
+ "COLUMNS": _show_parser("COLUMNS"),
561
+ "USERS": _show_parser("USERS"),
562
+ "TERSE USERS": _show_parser("USERS"),
563
+ "FILE FORMATS": _show_parser("FILE FORMATS"),
564
+ "FUNCTIONS": _show_parser("FUNCTIONS"),
565
+ "PROCEDURES": _show_parser("PROCEDURES"),
566
+ "WAREHOUSES": _show_parser("WAREHOUSES"),
567
+ }
568
+
569
+ CONSTRAINT_PARSERS = {
570
+ **parser.Parser.CONSTRAINT_PARSERS,
571
+ "WITH": lambda self: self._parse_with_constraint(),
572
+ "MASKING": lambda self: self._parse_with_constraint(),
573
+ "PROJECTION": lambda self: self._parse_with_constraint(),
574
+ "TAG": lambda self: self._parse_with_constraint(),
575
+ }
576
+
577
+ STAGED_FILE_SINGLE_TOKENS = {
578
+ TokenType.DOT,
579
+ TokenType.MOD,
580
+ TokenType.SLASH,
581
+ }
582
+
583
+ FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"]
584
+
585
+ SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"}
586
+
587
+ NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"}
588
+
589
+ LAMBDAS = {
590
+ **parser.Parser.LAMBDAS,
591
+ TokenType.ARROW: lambda self, expressions: self.expression(
592
+ exp.Lambda,
593
+ this=self._replace_lambda(
594
+ self._parse_assignment(),
595
+ expressions,
596
+ ),
597
+ expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions],
598
+ ),
599
+ }
600
+
601
+ def _parse_use(self) -> exp.Use:
602
+ if self._match_text_seq("SECONDARY", "ROLES"):
603
+ this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper())
604
+ roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False))
605
+ return self.expression(
606
+ exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles
607
+ )
608
+
609
+ return super()._parse_use()
610
+
611
+ def _negate_range(
612
+ self, this: t.Optional[exp.Expression] = None
613
+ ) -> t.Optional[exp.Expression]:
614
+ if not this:
615
+ return this
616
+
617
+ query = this.args.get("query")
618
+ if isinstance(this, exp.In) and isinstance(query, exp.Query):
619
+ # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so
620
+ # we do this conversion here to avoid parsing it into `NOT value IN (subquery)`
621
+ # which can produce different results (most likely a SnowFlake bug).
622
+ #
623
+ # https://docs.snowflake.com/en/sql-reference/functions/in
624
+ # Context: https://github.com/tobymao/sqlglot/issues/3890
625
+ return self.expression(
626
+ exp.NEQ, this=this.this, expression=exp.All(this=query.unnest())
627
+ )
628
+
629
+ return self.expression(exp.Not, this=this)
630
+
631
+ def _parse_tag(self) -> exp.Tags:
632
+ return self.expression(
633
+ exp.Tags,
634
+ expressions=self._parse_wrapped_csv(self._parse_property),
635
+ )
636
+
637
+ def _parse_with_constraint(self) -> t.Optional[exp.Expression]:
638
+ if self._prev.token_type != TokenType.WITH:
639
+ self._retreat(self._index - 1)
640
+
641
+ if self._match_text_seq("MASKING", "POLICY"):
642
+ policy = self._parse_column()
643
+ return self.expression(
644
+ exp.MaskingPolicyColumnConstraint,
645
+ this=policy.to_dot() if isinstance(policy, exp.Column) else policy,
646
+ expressions=self._match(TokenType.USING)
647
+ and self._parse_wrapped_csv(self._parse_id_var),
648
+ )
649
+ if self._match_text_seq("PROJECTION", "POLICY"):
650
+ policy = self._parse_column()
651
+ return self.expression(
652
+ exp.ProjectionPolicyColumnConstraint,
653
+ this=policy.to_dot() if isinstance(policy, exp.Column) else policy,
654
+ )
655
+ if self._match(TokenType.TAG):
656
+ return self._parse_tag()
657
+
658
+ return None
659
+
660
+ def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]:
661
+ if self._match(TokenType.TAG):
662
+ return self._parse_tag()
663
+
664
+ return super()._parse_with_property()
665
+
666
+ def _parse_create(self) -> exp.Create | exp.Command:
667
+ expression = super()._parse_create()
668
+ if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES:
669
+ # Replace the Table node with the enclosed Identifier
670
+ expression.this.replace(expression.this.this)
671
+
672
+ return expression
673
+
674
+ # https://docs.snowflake.com/en/sql-reference/functions/date_part.html
675
+ # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts
676
+ def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]:
677
+ this = self._parse_var() or self._parse_type()
678
+
679
+ if not this:
680
+ return None
681
+
682
+ self._match(TokenType.COMMA)
683
+ expression = self._parse_bitwise()
684
+ this = map_date_part(this)
685
+ name = this.name.upper()
686
+
687
+ if name.startswith("EPOCH"):
688
+ if name == "EPOCH_MILLISECOND":
689
+ scale = 10**3
690
+ elif name == "EPOCH_MICROSECOND":
691
+ scale = 10**6
692
+ elif name == "EPOCH_NANOSECOND":
693
+ scale = 10**9
694
+ else:
695
+ scale = None
696
+
697
+ ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP"))
698
+ to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts)
699
+
700
+ if scale:
701
+ to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
702
+
703
+ return to_unix
704
+
705
+ return self.expression(exp.Extract, this=this, expression=expression)
706
+
707
+ def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]:
708
+ if is_map:
709
+ # Keys are strings in Snowflake's objects, see also:
710
+ # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured
711
+ # - https://docs.snowflake.com/en/sql-reference/functions/object_construct
712
+ return self._parse_slice(self._parse_string())
713
+
714
+ return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True))
715
+
716
+ def _parse_lateral(self) -> t.Optional[exp.Lateral]:
717
+ lateral = super()._parse_lateral()
718
+ if not lateral:
719
+ return lateral
720
+
721
+ if isinstance(lateral.this, exp.Explode):
722
+ table_alias = lateral.args.get("alias")
723
+ columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS]
724
+ if table_alias and not table_alias.args.get("columns"):
725
+ table_alias.set("columns", columns)
726
+ elif not table_alias:
727
+ exp.alias_(lateral, "_flattened", table=columns, copy=False)
728
+
729
+ return lateral
730
+
731
+ def _parse_table_parts(
732
+ self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False
733
+ ) -> exp.Table:
734
+ # https://docs.snowflake.com/en/user-guide/querying-stage
735
+ if self._match(TokenType.STRING, advance=False):
736
+ table = self._parse_string()
737
+ elif self._match_text_seq("@", advance=False):
738
+ table = self._parse_location_path()
739
+ else:
740
+ table = None
741
+
742
+ if table:
743
+ file_format = None
744
+ pattern = None
745
+
746
+ wrapped = self._match(TokenType.L_PAREN)
747
+ while self._curr and wrapped and not self._match(TokenType.R_PAREN):
748
+ if self._match_text_seq("FILE_FORMAT", "=>"):
749
+ file_format = self._parse_string() or super()._parse_table_parts(
750
+ is_db_reference=is_db_reference
751
+ )
752
+ elif self._match_text_seq("PATTERN", "=>"):
753
+ pattern = self._parse_string()
754
+ else:
755
+ break
756
+
757
+ self._match(TokenType.COMMA)
758
+
759
+ table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern)
760
+ else:
761
+ table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference)
762
+
763
+ return table
764
+
765
+ def _parse_table(
766
+ self,
767
+ schema: bool = False,
768
+ joins: bool = False,
769
+ alias_tokens: t.Optional[t.Collection[TokenType]] = None,
770
+ parse_bracket: bool = False,
771
+ is_db_reference: bool = False,
772
+ parse_partition: bool = False,
773
+ ) -> t.Optional[exp.Expression]:
774
+ table = super()._parse_table(
775
+ schema=schema,
776
+ joins=joins,
777
+ alias_tokens=alias_tokens,
778
+ parse_bracket=parse_bracket,
779
+ is_db_reference=is_db_reference,
780
+ parse_partition=parse_partition,
781
+ )
782
+ if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows):
783
+ table_from_rows = table.this
784
+ for arg in exp.TableFromRows.arg_types:
785
+ if arg != "this":
786
+ table_from_rows.set(arg, table.args.get(arg))
787
+
788
+ table = table_from_rows
789
+
790
+ return table
791
+
792
+ def _parse_id_var(
793
+ self,
794
+ any_token: bool = True,
795
+ tokens: t.Optional[t.Collection[TokenType]] = None,
796
+ ) -> t.Optional[exp.Expression]:
797
+ if self._match_text_seq("IDENTIFIER", "("):
798
+ identifier = (
799
+ super()._parse_id_var(any_token=any_token, tokens=tokens)
800
+ or self._parse_string()
801
+ )
802
+ self._match_r_paren()
803
+ return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier])
804
+
805
+ return super()._parse_id_var(any_token=any_token, tokens=tokens)
806
+
807
+ def _parse_show_snowflake(self, this: str) -> exp.Show:
808
+ scope = None
809
+ scope_kind = None
810
+
811
+ # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS
812
+ # which is syntactically valid but has no effect on the output
813
+ terse = self._tokens[self._index - 2].text.upper() == "TERSE"
814
+
815
+ history = self._match_text_seq("HISTORY")
816
+
817
+ like = self._parse_string() if self._match(TokenType.LIKE) else None
818
+
819
+ if self._match(TokenType.IN):
820
+ if self._match_text_seq("ACCOUNT"):
821
+ scope_kind = "ACCOUNT"
822
+ elif self._match_text_seq("CLASS"):
823
+ scope_kind = "CLASS"
824
+ scope = self._parse_table_parts()
825
+ elif self._match_text_seq("APPLICATION"):
826
+ scope_kind = "APPLICATION"
827
+ if self._match_text_seq("PACKAGE"):
828
+ scope_kind += " PACKAGE"
829
+ scope = self._parse_table_parts()
830
+ elif self._match_set(self.DB_CREATABLES):
831
+ scope_kind = self._prev.text.upper()
832
+ if self._curr:
833
+ scope = self._parse_table_parts()
834
+ elif self._curr:
835
+ scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE"
836
+ scope = self._parse_table_parts()
837
+
838
+ return self.expression(
839
+ exp.Show,
840
+ **{
841
+ "terse": terse,
842
+ "this": this,
843
+ "history": history,
844
+ "like": like,
845
+ "scope": scope,
846
+ "scope_kind": scope_kind,
847
+ "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(),
848
+ "limit": self._parse_limit(),
849
+ "from": self._parse_string() if self._match(TokenType.FROM) else None,
850
+ "privileges": self._match_text_seq("WITH", "PRIVILEGES")
851
+ and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)),
852
+ },
853
+ )
854
+
855
+ def _parse_put(self) -> exp.Put | exp.Command:
856
+ if self._curr.token_type != TokenType.STRING:
857
+ return self._parse_as_command(self._prev)
858
+
859
+ return self.expression(
860
+ exp.Put,
861
+ this=self._parse_string(),
862
+ target=self._parse_location_path(),
863
+ properties=self._parse_properties(),
864
+ )
865
+
866
+ def _parse_get(self) -> t.Optional[exp.Expression]:
867
+ start = self._prev
868
+
869
+ # If we detect GET( then we need to parse a function, not a statement
870
+ if self._match(TokenType.L_PAREN):
871
+ self._retreat(self._index - 2)
872
+ return self._parse_expression()
873
+
874
+ target = self._parse_location_path()
875
+
876
+ # Parse as command if unquoted file path
877
+ if self._curr.token_type == TokenType.URI_START:
878
+ return self._parse_as_command(start)
879
+
880
+ return self.expression(
881
+ exp.Get,
882
+ this=self._parse_string(),
883
+ target=target,
884
+ properties=self._parse_properties(),
885
+ )
886
+
887
+ def _parse_location_property(self) -> exp.LocationProperty:
888
+ self._match(TokenType.EQ)
889
+ return self.expression(exp.LocationProperty, this=self._parse_location_path())
890
+
891
+ def _parse_file_location(self) -> t.Optional[exp.Expression]:
892
+ # Parse either a subquery or a staged file
893
+ return (
894
+ self._parse_select(table=True, parse_subquery_alias=False)
895
+ if self._match(TokenType.L_PAREN, advance=False)
896
+ else self._parse_table_parts()
897
+ )
898
+
899
+ def _parse_location_path(self) -> exp.Var:
900
+ start = self._curr
901
+ self._advance_any(ignore_reserved=True)
902
+
903
+ # We avoid consuming a comma token because external tables like @foo and @bar
904
+ # can be joined in a query with a comma separator, as well as closing paren
905
+ # in case of subqueries
906
+ while self._is_connected() and not self._match_set(
907
+ (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False
908
+ ):
909
+ self._advance_any(ignore_reserved=True)
910
+
911
+ return exp.var(self._find_sql(start, self._prev))
912
+
913
+ def _parse_lambda_arg(self) -> t.Optional[exp.Expression]:
914
+ this = super()._parse_lambda_arg()
915
+
916
+ if not this:
917
+ return this
918
+
919
+ typ = self._parse_types()
920
+
921
+ if typ:
922
+ return self.expression(exp.Cast, this=this, to=typ)
923
+
924
+ return this
925
+
926
+ def _parse_foreign_key(self) -> exp.ForeignKey:
927
+ # inlineFK, the REFERENCES columns are implied
928
+ if self._match(TokenType.REFERENCES, advance=False):
929
+ return self.expression(exp.ForeignKey)
930
+
931
+ # outoflineFK, explicitly names the columns
932
+ return super()._parse_foreign_key()
933
+
934
+ def _parse_file_format_property(self) -> exp.FileFormatProperty:
935
+ self._match(TokenType.EQ)
936
+ if self._match(TokenType.L_PAREN, advance=False):
937
+ expressions = self._parse_wrapped_options()
938
+ else:
939
+ expressions = [self._parse_format_name()]
940
+
941
+ return self.expression(
942
+ exp.FileFormatProperty,
943
+ expressions=expressions,
944
+ )
945
+
946
+ def _parse_credentials_property(self) -> exp.CredentialsProperty:
947
+ return self.expression(
948
+ exp.CredentialsProperty,
949
+ expressions=self._parse_wrapped_options(),
950
+ )
951
+
952
+ class Tokenizer(tokens.Tokenizer):
953
+ STRING_ESCAPES = ["\\", "'"]
954
+ HEX_STRINGS = [("x'", "'"), ("X'", "'")]
955
+ RAW_STRINGS = ["$$"]
956
+ COMMENTS = ["--", "//", ("/*", "*/")]
957
+ NESTED_COMMENTS = False
958
+
959
+ KEYWORDS = {
960
+ **tokens.Tokenizer.KEYWORDS,
961
+ "FILE://": TokenType.URI_START,
962
+ "BYTEINT": TokenType.INT,
963
+ "EXCLUDE": TokenType.EXCEPT,
964
+ "FILE FORMAT": TokenType.FILE_FORMAT,
965
+ "GET": TokenType.GET,
966
+ "ILIKE ANY": TokenType.ILIKE_ANY,
967
+ "LIKE ANY": TokenType.LIKE_ANY,
968
+ "MATCH_CONDITION": TokenType.MATCH_CONDITION,
969
+ "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE,
970
+ "MINUS": TokenType.EXCEPT,
971
+ "NCHAR VARYING": TokenType.VARCHAR,
972
+ "PUT": TokenType.PUT,
973
+ "REMOVE": TokenType.COMMAND,
974
+ "RM": TokenType.COMMAND,
975
+ "SAMPLE": TokenType.TABLE_SAMPLE,
976
+ "SQL_DOUBLE": TokenType.DOUBLE,
977
+ "SQL_VARCHAR": TokenType.VARCHAR,
978
+ "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION,
979
+ "TAG": TokenType.TAG,
980
+ "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ,
981
+ "TOP": TokenType.TOP,
982
+ "WAREHOUSE": TokenType.WAREHOUSE,
983
+ "STAGE": TokenType.STAGE,
984
+ "STREAMLIT": TokenType.STREAMLIT,
985
+ }
986
+ KEYWORDS.pop("/*+")
987
+
988
+ SINGLE_TOKENS = {
989
+ **tokens.Tokenizer.SINGLE_TOKENS,
990
+ "$": TokenType.PARAMETER,
991
+ }
992
+
993
+ VAR_SINGLE_TOKENS = {"$"}
994
+
995
+ COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
996
+
997
+ class Generator(generator.Generator):
998
+ PARAMETER_TOKEN = "$"
999
+ MATCHED_BY_SOURCE = False
1000
+ SINGLE_STRING_INTERVAL = True
1001
+ JOIN_HINTS = False
1002
+ TABLE_HINTS = False
1003
+ QUERY_HINTS = False
1004
+ AGGREGATE_FILTER_SUPPORTED = False
1005
+ SUPPORTS_TABLE_COPY = False
1006
+ COLLATE_IS_FUNC = True
1007
+ LIMIT_ONLY_LITERALS = True
1008
+ JSON_KEY_VALUE_PAIR_SEP = ","
1009
+ INSERT_OVERWRITE = " OVERWRITE INTO"
1010
+ STRUCT_DELIMITER = ("(", ")")
1011
+ COPY_PARAMS_ARE_WRAPPED = False
1012
+ COPY_PARAMS_EQ_REQUIRED = True
1013
+ STAR_EXCEPT = "EXCLUDE"
1014
+ SUPPORTS_EXPLODING_PROJECTIONS = False
1015
+ ARRAY_CONCAT_IS_VAR_LEN = False
1016
+ SUPPORTS_CONVERT_TIMEZONE = True
1017
+ EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
1018
+ SUPPORTS_MEDIAN = True
1019
+ ARRAY_SIZE_NAME = "ARRAY_SIZE"
1020
+
1021
+ TRANSFORMS = {
1022
+ **generator.Generator.TRANSFORMS,
1023
+ exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
1024
+ exp.ArgMax: rename_func("MAX_BY"),
1025
+ exp.ArgMin: rename_func("MIN_BY"),
1026
+ exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"),
1027
+ exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this),
1028
+ exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"),
1029
+ exp.AtTimeZone: lambda self, e: self.func(
1030
+ "CONVERT_TIMEZONE", e.args.get("zone"), e.this
1031
+ ),
1032
+ exp.BitwiseOr: rename_func("BITOR"),
1033
+ exp.BitwiseXor: rename_func("BITXOR"),
1034
+ exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"),
1035
+ exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"),
1036
+ exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]),
1037
+ exp.DateAdd: date_delta_sql("DATEADD"),
1038
+ exp.DateDiff: date_delta_sql("DATEDIFF"),
1039
+ exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"),
1040
+ exp.DatetimeDiff: timestampdiff_sql,
1041
+ exp.DateStrToDate: datestrtodate_sql,
1042
+ exp.DayOfMonth: rename_func("DAYOFMONTH"),
1043
+ exp.DayOfWeek: rename_func("DAYOFWEEK"),
1044
+ exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"),
1045
+ exp.DayOfYear: rename_func("DAYOFYEAR"),
1046
+ exp.Explode: rename_func("FLATTEN"),
1047
+ exp.Extract: lambda self, e: self.func(
1048
+ "DATE_PART", map_date_part(e.this, self.dialect), e.expression
1049
+ ),
1050
+ exp.FileFormatProperty: lambda self,
1051
+ e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})",
1052
+ exp.FromTimeZone: lambda self, e: self.func(
1053
+ "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this
1054
+ ),
1055
+ exp.GenerateSeries: lambda self, e: self.func(
1056
+ "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step")
1057
+ ),
1058
+ exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""),
1059
+ exp.If: if_sql(name="IFF", false_value="NULL"),
1060
+ exp.JSONExtractArray: _json_extract_value_array_sql,
1061
+ exp.JSONExtractScalar: lambda self, e: self.func(
1062
+ "JSON_EXTRACT_PATH_TEXT", e.this, e.expression
1063
+ ),
1064
+ exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions),
1065
+ exp.JSONPathRoot: lambda *_: "",
1066
+ exp.JSONValueArray: _json_extract_value_array_sql,
1067
+ exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")(
1068
+ rename_func("EDITDISTANCE")
1069
+ ),
1070
+ exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}",
1071
+ exp.LogicalAnd: rename_func("BOOLAND_AGG"),
1072
+ exp.LogicalOr: rename_func("BOOLOR_AGG"),
1073
+ exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
1074
+ exp.MakeInterval: no_make_interval_sql,
1075
+ exp.Max: max_or_greatest,
1076
+ exp.Min: min_or_least,
1077
+ exp.ParseJSON: lambda self, e: self.func(
1078
+ "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this
1079
+ ),
1080
+ exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
1081
+ exp.PercentileCont: transforms.preprocess(
1082
+ [transforms.add_within_group_for_percentiles]
1083
+ ),
1084
+ exp.PercentileDisc: transforms.preprocess(
1085
+ [transforms.add_within_group_for_percentiles]
1086
+ ),
1087
+ exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]),
1088
+ exp.RegexpExtract: _regexpextract_sql,
1089
+ exp.RegexpExtractAll: _regexpextract_sql,
1090
+ exp.RegexpILike: _regexpilike_sql,
1091
+ exp.Rand: rename_func("RANDOM"),
1092
+ exp.Select: transforms.preprocess(
1093
+ [
1094
+ transforms.eliminate_window_clause,
1095
+ transforms.eliminate_distinct_on,
1096
+ transforms.explode_projection_to_unnest(),
1097
+ transforms.eliminate_semi_and_anti_joins,
1098
+ _transform_generate_date_array,
1099
+ ]
1100
+ ),
1101
+ exp.SHA: rename_func("SHA1"),
1102
+ exp.StarMap: rename_func("OBJECT_CONSTRUCT"),
1103
+ exp.StartsWith: rename_func("STARTSWITH"),
1104
+ exp.EndsWith: rename_func("ENDSWITH"),
1105
+ exp.StrPosition: lambda self, e: strposition_sql(
1106
+ self, e, func_name="CHARINDEX", supports_position=True
1107
+ ),
1108
+ exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)),
1109
+ exp.StringToArray: rename_func("STRTOK_TO_ARRAY"),
1110
+ exp.Stuff: rename_func("INSERT"),
1111
+ exp.StPoint: rename_func("ST_MAKEPOINT"),
1112
+ exp.TimeAdd: date_delta_sql("TIMEADD"),
1113
+ exp.Timestamp: no_timestamp_sql,
1114
+ exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"),
1115
+ exp.TimestampDiff: lambda self, e: self.func(
1116
+ "TIMESTAMPDIFF", e.unit, e.expression, e.this
1117
+ ),
1118
+ exp.TimestampTrunc: timestamptrunc_sql(),
1119
+ exp.TimeStrToTime: timestrtotime_sql,
1120
+ exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
1121
+ exp.ToArray: rename_func("TO_ARRAY"),
1122
+ exp.ToChar: lambda self, e: self.function_fallback_sql(e),
1123
+ exp.ToDouble: rename_func("TO_DOUBLE"),
1124
+ exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True),
1125
+ exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
1126
+ exp.TsOrDsToDate: lambda self, e: self.func(
1127
+ "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e)
1128
+ ),
1129
+ exp.TsOrDsToTime: lambda self, e: self.func(
1130
+ "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e)
1131
+ ),
1132
+ exp.Unhex: rename_func("HEX_DECODE_BINARY"),
1133
+ exp.UnixToTime: rename_func("TO_TIMESTAMP"),
1134
+ exp.Uuid: rename_func("UUID_STRING"),
1135
+ exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"),
1136
+ exp.WeekOfYear: rename_func("WEEKOFYEAR"),
1137
+ exp.Xor: rename_func("BOOLXOR"),
1138
+ }
1139
+
1140
+ SUPPORTED_JSON_PATH_PARTS = {
1141
+ exp.JSONPathKey,
1142
+ exp.JSONPathRoot,
1143
+ exp.JSONPathSubscript,
1144
+ }
1145
+
1146
+ TYPE_MAPPING = {
1147
+ **generator.Generator.TYPE_MAPPING,
1148
+ exp.DataType.Type.NESTED: "OBJECT",
1149
+ exp.DataType.Type.STRUCT: "OBJECT",
1150
+ exp.DataType.Type.BIGDECIMAL: "DOUBLE",
1151
+ }
1152
+
1153
+ TOKEN_MAPPING = {
1154
+ TokenType.AUTO_INCREMENT: "AUTOINCREMENT",
1155
+ }
1156
+
1157
+ PROPERTIES_LOCATION = {
1158
+ **generator.Generator.PROPERTIES_LOCATION,
1159
+ exp.CredentialsProperty: exp.Properties.Location.POST_WITH,
1160
+ exp.LocationProperty: exp.Properties.Location.POST_WITH,
1161
+ exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
1162
+ exp.SetProperty: exp.Properties.Location.UNSUPPORTED,
1163
+ exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
1164
+ }
1165
+
1166
+ UNSUPPORTED_VALUES_EXPRESSIONS = {
1167
+ exp.Map,
1168
+ exp.StarMap,
1169
+ exp.Struct,
1170
+ exp.VarMap,
1171
+ }
1172
+
1173
+ RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,)
1174
+
1175
+ def with_properties(self, properties: exp.Properties) -> str:
1176
+ return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ")
1177
+
1178
+ def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str:
1179
+ if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS):
1180
+ values_as_table = False
1181
+
1182
+ return super().values_sql(expression, values_as_table=values_as_table)
1183
+
1184
+ def datatype_sql(self, expression: exp.DataType) -> str:
1185
+ expressions = expression.expressions
1186
+ if (
1187
+ expressions
1188
+ and expression.is_type(*exp.DataType.STRUCT_TYPES)
1189
+ and any(isinstance(field_type, exp.DataType) for field_type in expressions)
1190
+ ):
1191
+ # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ]
1192
+ return "OBJECT"
1193
+
1194
+ return super().datatype_sql(expression)
1195
+
1196
+ def tonumber_sql(self, expression: exp.ToNumber) -> str:
1197
+ return self.func(
1198
+ "TO_NUMBER",
1199
+ expression.this,
1200
+ expression.args.get("format"),
1201
+ expression.args.get("precision"),
1202
+ expression.args.get("scale"),
1203
+ )
1204
+
1205
+ def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
1206
+ milli = expression.args.get("milli")
1207
+ if milli is not None:
1208
+ milli_to_nano = milli.pop() * exp.Literal.number(1000000)
1209
+ expression.set("nano", milli_to_nano)
1210
+
1211
+ return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1212
+
1213
+ def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
1214
+ if expression.is_type(exp.DataType.Type.GEOGRAPHY):
1215
+ return self.func("TO_GEOGRAPHY", expression.this)
1216
+ if expression.is_type(exp.DataType.Type.GEOMETRY):
1217
+ return self.func("TO_GEOMETRY", expression.this)
1218
+
1219
+ return super().cast_sql(expression, safe_prefix=safe_prefix)
1220
+
1221
+ def trycast_sql(self, expression: exp.TryCast) -> str:
1222
+ value = expression.this
1223
+
1224
+ if value.type is None:
1225
+ from sqlglot.optimizer.annotate_types import annotate_types
1226
+
1227
+ value = annotate_types(value, dialect=self.dialect)
1228
+
1229
+ if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN):
1230
+ return super().trycast_sql(expression)
1231
+
1232
+ # TRY_CAST only works for string values in Snowflake
1233
+ return self.cast_sql(expression)
1234
+
1235
+ def log_sql(self, expression: exp.Log) -> str:
1236
+ if not expression.expression:
1237
+ return self.func("LN", expression.this)
1238
+
1239
+ return super().log_sql(expression)
1240
+
1241
+ def unnest_sql(self, expression: exp.Unnest) -> str:
1242
+ unnest_alias = expression.args.get("alias")
1243
+ offset = expression.args.get("offset")
1244
+
1245
+ unnest_alias_columns = unnest_alias.columns if unnest_alias else []
1246
+ value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value")
1247
+
1248
+ columns = [
1249
+ exp.to_identifier("seq"),
1250
+ exp.to_identifier("key"),
1251
+ exp.to_identifier("path"),
1252
+ offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"),
1253
+ value,
1254
+ exp.to_identifier("this"),
1255
+ ]
1256
+
1257
+ if unnest_alias:
1258
+ unnest_alias.set("columns", columns)
1259
+ else:
1260
+ unnest_alias = exp.TableAlias(this="_u", columns=columns)
1261
+
1262
+ table_input = self.sql(expression.expressions[0])
1263
+ if not table_input.startswith("INPUT =>"):
1264
+ table_input = f"INPUT => {table_input}"
1265
+
1266
+ explode = f"TABLE(FLATTEN({table_input}))"
1267
+ alias = self.sql(unnest_alias)
1268
+ alias = f" AS {alias}" if alias else ""
1269
+ value = "" if isinstance(expression.parent, (exp.From, exp.Join)) else f"{value} FROM "
1270
+
1271
+ return f"{value}{explode}{alias}"
1272
+
1273
+ def show_sql(self, expression: exp.Show) -> str:
1274
+ terse = "TERSE " if expression.args.get("terse") else ""
1275
+ history = " HISTORY" if expression.args.get("history") else ""
1276
+ like = self.sql(expression, "like")
1277
+ like = f" LIKE {like}" if like else ""
1278
+
1279
+ scope = self.sql(expression, "scope")
1280
+ scope = f" {scope}" if scope else ""
1281
+
1282
+ scope_kind = self.sql(expression, "scope_kind")
1283
+ if scope_kind:
1284
+ scope_kind = f" IN {scope_kind}"
1285
+
1286
+ starts_with = self.sql(expression, "starts_with")
1287
+ if starts_with:
1288
+ starts_with = f" STARTS WITH {starts_with}"
1289
+
1290
+ limit = self.sql(expression, "limit")
1291
+
1292
+ from_ = self.sql(expression, "from")
1293
+ if from_:
1294
+ from_ = f" FROM {from_}"
1295
+
1296
+ privileges = self.expressions(expression, key="privileges", flat=True)
1297
+ privileges = f" WITH PRIVILEGES {privileges}" if privileges else ""
1298
+
1299
+ return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1300
+
1301
+ def describe_sql(self, expression: exp.Describe) -> str:
1302
+ # Default to table if kind is unknown
1303
+ kind_value = expression.args.get("kind") or "TABLE"
1304
+ kind = f" {kind_value}" if kind_value else ""
1305
+ this = f" {self.sql(expression, 'this')}"
1306
+ expressions = self.expressions(expression, flat=True)
1307
+ expressions = f" {expressions}" if expressions else ""
1308
+ return f"DESCRIBE{kind}{this}{expressions}"
1309
+
1310
+ def generatedasidentitycolumnconstraint_sql(
1311
+ self, expression: exp.GeneratedAsIdentityColumnConstraint
1312
+ ) -> str:
1313
+ start = expression.args.get("start")
1314
+ start = f" START {start}" if start else ""
1315
+ increment = expression.args.get("increment")
1316
+ increment = f" INCREMENT {increment}" if increment else ""
1317
+ return f"AUTOINCREMENT{start}{increment}"
1318
+
1319
+ def cluster_sql(self, expression: exp.Cluster) -> str:
1320
+ return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
1321
+
1322
+ def struct_sql(self, expression: exp.Struct) -> str:
1323
+ keys = []
1324
+ values = []
1325
+
1326
+ for i, e in enumerate(expression.expressions):
1327
+ if isinstance(e, exp.PropertyEQ):
1328
+ keys.append(
1329
+ exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this
1330
+ )
1331
+ values.append(e.expression)
1332
+ else:
1333
+ keys.append(exp.Literal.string(f"_{i}"))
1334
+ values.append(e)
1335
+
1336
+ return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1337
+
1338
+ @unsupported_args("weight", "accuracy")
1339
+ def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
1340
+ return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1341
+
1342
+ def alterset_sql(self, expression: exp.AlterSet) -> str:
1343
+ exprs = self.expressions(expression, flat=True)
1344
+ exprs = f" {exprs}" if exprs else ""
1345
+ file_format = self.expressions(expression, key="file_format", flat=True, sep=" ")
1346
+ file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else ""
1347
+ copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ")
1348
+ copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else ""
1349
+ tag = self.expressions(expression, key="tag", flat=True)
1350
+ tag = f" TAG {tag}" if tag else ""
1351
+
1352
+ return f"SET{exprs}{file_format}{copy_options}{tag}"
1353
+
1354
+ def strtotime_sql(self, expression: exp.StrToTime):
1355
+ safe_prefix = "TRY_" if expression.args.get("safe") else ""
1356
+ return self.func(
1357
+ f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression)
1358
+ )
1359
+
1360
+ def timestampsub_sql(self, expression: exp.TimestampSub):
1361
+ return self.sql(
1362
+ exp.TimestampAdd(
1363
+ this=expression.this,
1364
+ expression=expression.expression * -1,
1365
+ unit=expression.unit,
1366
+ )
1367
+ )
1368
+
1369
+ def jsonextract_sql(self, expression: exp.JSONExtract):
1370
+ this = expression.this
1371
+
1372
+ # JSON strings are valid coming from other dialects such as BQ
1373
+ return self.func(
1374
+ "GET_PATH",
1375
+ exp.ParseJSON(this=this) if this.is_string else this,
1376
+ expression.expression,
1377
+ )
1378
+
1379
+ def timetostr_sql(self, expression: exp.TimeToStr) -> str:
1380
+ this = expression.this
1381
+ if not isinstance(this, exp.TsOrDsToTimestamp):
1382
+ this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1383
+
1384
+ return self.func("TO_CHAR", this, self.format_time(expression))
1385
+
1386
+ def datesub_sql(self, expression: exp.DateSub) -> str:
1387
+ value = expression.expression
1388
+ if value:
1389
+ value.replace(value * (-1))
1390
+ else:
1391
+ self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown")
1392
+
1393
+ return date_delta_sql("DATEADD")(self, expression)
1394
+
1395
+ def select_sql(self, expression: exp.Select) -> str:
1396
+ limit = expression.args.get("limit")
1397
+ offset = expression.args.get("offset")
1398
+ if offset and not limit:
1399
+ expression.limit(exp.Null(), copy=False)
1400
+ return super().select_sql(expression)
1401
+
1402
+ def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str:
1403
+ is_materialized = expression.find(exp.MaterializedProperty)
1404
+ copy_grants_property = expression.find(exp.CopyGrantsProperty)
1405
+
1406
+ if expression.kind == "VIEW" and is_materialized and copy_grants_property:
1407
+ # For materialized views, COPY GRANTS is located *before* the columns list
1408
+ # This is in contrast to normal views where COPY GRANTS is located *after* the columns list
1409
+ # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected
1410
+ # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax
1411
+ # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax
1412
+ post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA]
1413
+ post_schema_properties.pop(post_schema_properties.index(copy_grants_property))
1414
+
1415
+ this_name = self.sql(expression.this, "this")
1416
+ copy_grants = self.sql(copy_grants_property)
1417
+ this_schema = self.schema_columns_sql(expression.this)
1418
+ this_schema = f"{self.sep()}{this_schema}" if this_schema else ""
1419
+
1420
+ return f"{this_name}{self.sep()}{copy_grants}{this_schema}"
1421
+
1422
+ return super().createable_sql(expression, locations)
1423
+
1424
+ def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
1425
+ this = expression.this
1426
+
1427
+ # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG
1428
+ # and add it later as part of the WITHIN GROUP clause
1429
+ order = this if isinstance(this, exp.Order) else None
1430
+ if order:
1431
+ expression.set("this", order.this.pop())
1432
+
1433
+ expr_sql = super().arrayagg_sql(expression)
1434
+
1435
+ if order:
1436
+ expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order))
1437
+
1438
+ return expr_sql
1439
+
1440
+ def array_sql(self, expression: exp.Array) -> str:
1441
+ expressions = expression.expressions
1442
+
1443
+ first_expr = seq_get(expressions, 0)
1444
+ if isinstance(first_expr, exp.Select):
1445
+ # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo))
1446
+ if first_expr.text("kind").upper() == "STRUCT":
1447
+ object_construct_args = []
1448
+ for expr in first_expr.expressions:
1449
+ # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo)
1450
+ # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo)
1451
+ name = expr.this if isinstance(expr, exp.Alias) else expr
1452
+
1453
+ object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name])
1454
+
1455
+ array_agg = exp.ArrayAgg(
1456
+ this=_build_object_construct(args=object_construct_args)
1457
+ )
1458
+
1459
+ first_expr.set("kind", None)
1460
+ first_expr.set("expressions", [array_agg])
1461
+
1462
+ return self.sql(first_expr.subquery())
1463
+
1464
+ return inline_array_sql(self, expression)