fakesnow 0.9.36__py3-none-any.whl → 0.9.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pathlib import Path
4
- from string import Template
5
- from typing import ClassVar, cast
6
-
7
- import sqlglot
8
- from sqlglot import exp
9
-
10
- from fakesnow.transforms.copy_into import copy_into as copy_into
11
3
  from fakesnow.transforms.merge import merge as merge
12
4
  from fakesnow.transforms.show import (
13
5
  show_columns as show_columns,
@@ -20,1361 +12,56 @@ from fakesnow.transforms.show import (
20
12
  show_users as show_users,
21
13
  show_warehouses as show_warehouses,
22
14
  )
23
- from fakesnow.variables import Variables
24
-
25
- SUCCESS_NOP = sqlglot.parse_one("SELECT 'Statement executed successfully.' as status")
26
-
27
-
28
- def alias_in_join(expression: exp.Expression) -> exp.Expression:
29
- if (
30
- isinstance(expression, exp.Select)
31
- and (aliases := {e.args.get("alias"): e for e in expression.expressions if isinstance(e, exp.Alias)})
32
- and (joins := expression.args.get("joins"))
33
- ):
34
- j: exp.Join
35
- for j in joins:
36
- if (
37
- (on := j.args.get("on"))
38
- and (col := on.this)
39
- and (isinstance(col, exp.Column))
40
- and (alias := aliases.get(col.this))
41
- # don't rewrite col with table identifier
42
- and not col.table
43
- ):
44
- col.args["this"] = alias.this
45
-
46
- return expression
47
-
48
-
49
- def alter_table_strip_cluster_by(expression: exp.Expression) -> exp.Expression:
50
- """Turn alter table cluster by into a no-op"""
51
- if (
52
- isinstance(expression, exp.Alter)
53
- and (actions := expression.args.get("actions"))
54
- and len(actions) == 1
55
- and (isinstance(actions[0], exp.Cluster))
56
- ):
57
- return SUCCESS_NOP
58
- return expression
59
-
60
-
61
- def array_size(expression: exp.Expression) -> exp.Expression:
62
- if isinstance(expression, exp.ArraySize):
63
- # return null if not json array
64
- jal = exp.Anonymous(this="json_array_length", expressions=[expression.this])
65
- is_json_array = exp.EQ(
66
- this=exp.Anonymous(this="json_type", expressions=[expression.this]),
67
- expression=exp.Literal(this="ARRAY", is_string=True),
68
- )
69
- return exp.Case(ifs=[exp.If(this=is_json_array, true=jal)])
70
-
71
- return expression
72
-
73
-
74
- def array_agg(expression: exp.Expression) -> exp.Expression:
75
- if isinstance(expression, exp.ArrayAgg) and not isinstance(expression.parent, exp.Window):
76
- return exp.Anonymous(this="TO_JSON", expressions=[expression])
77
-
78
- if isinstance(expression, exp.Window) and isinstance(expression.this, exp.ArrayAgg):
79
- return exp.Anonymous(this="TO_JSON", expressions=[expression])
80
-
81
- return expression
82
-
83
-
84
- def array_agg_within_group(expression: exp.Expression) -> exp.Expression:
85
- """Convert ARRAY_AGG(<expr>) WITHIN GROUP (<order-by-clause>) to ARRAY_AGG( <expr> <order-by-clause> )
86
- Snowflake uses ARRAY_AGG(<expr>) WITHIN GROUP (ORDER BY <order-by-clause>)
87
- to order the array, but DuckDB uses ARRAY_AGG( <expr> <order-by-clause> ).
88
- See;
89
- - https://docs.snowflake.com/en/sql-reference/functions/array_agg
90
- - https://duckdb.org/docs/sql/aggregates.html#order-by-clause-in-aggregate-functions
91
- Note; Snowflake has following restriction;
92
- If you specify DISTINCT and WITHIN GROUP, both must refer to the same column.
93
- Transformation does not handle this restriction.
94
- """
95
- if (
96
- isinstance(expression, exp.WithinGroup)
97
- and (agg := expression.find(exp.ArrayAgg))
98
- and (order := expression.expression)
99
- ):
100
- return exp.ArrayAgg(
101
- this=exp.Order(
102
- this=agg.this,
103
- expressions=order.expressions,
104
- )
105
- )
106
-
107
- return expression
108
-
109
-
110
- def create_clone(expression: exp.Expression) -> exp.Expression:
111
- """Transform create table clone to create table as select."""
112
-
113
- if (
114
- isinstance(expression, exp.Create)
115
- and str(expression.args.get("kind")).upper() == "TABLE"
116
- and (clone := expression.find(exp.Clone))
117
- ):
118
- return exp.Create(
119
- this=expression.this,
120
- kind="TABLE",
121
- expression=exp.Select(
122
- expressions=[
123
- exp.Star(),
124
- ],
125
- **{"from": exp.From(this=clone.this)},
126
- ),
127
- )
128
- return expression
129
-
130
-
131
- # TODO: move this into a Dialect as a transpilation
132
- def create_database(expression: exp.Expression, db_path: Path | None = None) -> exp.Expression:
133
- """Transform create database to attach database.
134
-
135
- Example:
136
- >>> import sqlglot
137
- >>> sqlglot.parse_one("CREATE database foo").transform(create_database).sql()
138
- 'ATTACH DATABASE ':memory:' as foo'
139
- Args:
140
- expression (exp.Expression): the expression that will be transformed.
141
-
142
- Returns:
143
- exp.Expression: The transformed expression, with the database name stored in the create_db_name arg.
144
- """
145
-
146
- if isinstance(expression, exp.Create) and str(expression.args.get("kind")).upper() == "DATABASE":
147
- ident = expression.find(exp.Identifier)
148
- assert ident, f"No identifier in {expression.sql}"
149
- db_name = ident.this
150
- db_file = f"{db_path / db_name}.db" if db_path else ":memory:"
151
-
152
- if_not_exists = "IF NOT EXISTS " if expression.args.get("exists") else ""
153
-
154
- return exp.Command(
155
- this="ATTACH",
156
- expression=exp.Literal(this=f"{if_not_exists}DATABASE '{db_file}' AS {db_name}", is_string=True),
157
- create_db_name=db_name,
158
- )
159
-
160
- return expression
161
-
162
-
163
- SQL_DESCRIBE_TABLE = Template(
164
- """
165
- SELECT
166
- column_name AS "name",
167
- CASE WHEN data_type = 'NUMBER' THEN 'NUMBER(' || numeric_precision || ',' || numeric_scale || ')'
168
- WHEN data_type = 'TEXT' THEN 'VARCHAR(' || coalesce(character_maximum_length,16777216) || ')'
169
- WHEN data_type = 'TIMESTAMP_NTZ' THEN 'TIMESTAMP_NTZ(9)'
170
- WHEN data_type = 'TIMESTAMP_TZ' THEN 'TIMESTAMP_TZ(9)'
171
- WHEN data_type = 'TIME' THEN 'TIME(9)'
172
- WHEN data_type = 'BINARY' THEN 'BINARY(8388608)'
173
- ELSE data_type END AS "type",
174
- 'COLUMN' AS "kind",
175
- CASE WHEN is_nullable = 'YES' THEN 'Y' ELSE 'N' END AS "null?",
176
- column_default AS "default",
177
- 'N' AS "primary key",
178
- 'N' AS "unique key",
179
- NULL::VARCHAR AS "check",
180
- NULL::VARCHAR AS "expression",
181
- NULL::VARCHAR AS "comment",
182
- NULL::VARCHAR AS "policy name",
183
- NULL::JSON AS "privacy domain",
184
- FROM _fs_information_schema._fs_columns
185
- WHERE table_catalog = '${catalog}' AND table_schema = '${schema}' AND table_name = '${table}'
186
- ORDER BY ordinal_position
187
- """
188
- )
189
-
190
- SQL_DESCRIBE_INFO_SCHEMA = Template(
191
- """
192
- SELECT
193
- column_name AS "name",
194
- column_type as "type",
195
- 'COLUMN' AS "kind",
196
- CASE WHEN "null" = 'YES' THEN 'Y' ELSE 'N' END AS "null?",
197
- NULL::VARCHAR AS "default",
198
- 'N' AS "primary key",
199
- 'N' AS "unique key",
200
- NULL::VARCHAR AS "check",
201
- NULL::VARCHAR AS "expression",
202
- NULL::VARCHAR AS "comment",
203
- NULL::VARCHAR AS "policy name",
204
- NULL::JSON AS "privacy domain",
205
- FROM (DESCRIBE ${view})
206
- """
15
+ from fakesnow.transforms.transforms import (
16
+ SUCCESS_NOP as SUCCESS_NOP,
17
+ alias_in_join as alias_in_join,
18
+ alter_table_strip_cluster_by as alter_table_strip_cluster_by,
19
+ array_agg as array_agg,
20
+ array_agg_within_group as array_agg_within_group,
21
+ array_size as array_size,
22
+ create_clone as create_clone,
23
+ create_database as create_database,
24
+ create_user as create_user,
25
+ dateadd_date_cast as dateadd_date_cast,
26
+ dateadd_string_literal_timestamp_cast as dateadd_string_literal_timestamp_cast,
27
+ datediff_string_literal_timestamp_cast as datediff_string_literal_timestamp_cast,
28
+ describe_table as describe_table,
29
+ drop_schema_cascade as drop_schema_cascade,
30
+ extract_comment_on_columns as extract_comment_on_columns,
31
+ extract_comment_on_table as extract_comment_on_table,
32
+ extract_text_length as extract_text_length,
33
+ flatten as flatten,
34
+ flatten_value_cast_as_varchar as flatten_value_cast_as_varchar,
35
+ float_to_double as float_to_double,
36
+ identifier as identifier,
37
+ indices_to_json_extract as indices_to_json_extract,
38
+ information_schema_databases as information_schema_databases,
39
+ information_schema_fs_columns as information_schema_fs_columns,
40
+ information_schema_fs_tables as information_schema_fs_tables,
41
+ information_schema_fs_views as information_schema_fs_views,
42
+ integer_precision as integer_precision,
43
+ json_extract_cased_as_varchar as json_extract_cased_as_varchar,
44
+ json_extract_cast_as_varchar as json_extract_cast_as_varchar,
45
+ json_extract_precedence as json_extract_precedence,
46
+ object_construct as object_construct,
47
+ random as random,
48
+ regex_replace as regex_replace,
49
+ regex_substr as regex_substr,
50
+ sample as sample,
51
+ semi_structured_types as semi_structured_types,
52
+ set_schema as set_schema,
53
+ sha256 as sha256,
54
+ split as split,
55
+ tag as tag,
56
+ timestamp_ntz as timestamp_ntz,
57
+ to_date as to_date,
58
+ to_decimal as to_decimal,
59
+ to_timestamp as to_timestamp,
60
+ to_timestamp_ntz as to_timestamp_ntz,
61
+ trim_cast_varchar as trim_cast_varchar,
62
+ try_parse_json as try_parse_json,
63
+ try_to_decimal as try_to_decimal,
64
+ update_variables as update_variables,
65
+ upper_case_unquoted_identifiers as upper_case_unquoted_identifiers,
66
+ values_columns as values_columns,
207
67
  )
208
-
209
-
210
- def describe_table(
211
- expression: exp.Expression, current_database: str | None = None, current_schema: str | None = None
212
- ) -> exp.Expression:
213
- """Redirect to the information_schema._fs_columns to match snowflake.
214
-
215
- See https://docs.snowflake.com/en/sql-reference/sql/desc-table
216
- """
217
-
218
- if (
219
- isinstance(expression, exp.Describe)
220
- and (kind := expression.args.get("kind"))
221
- and isinstance(kind, str)
222
- and kind.upper() in ("TABLE", "VIEW")
223
- and (table := expression.find(exp.Table))
224
- ):
225
- catalog = table.catalog or current_database
226
- schema = table.db or current_schema
227
-
228
- if schema and schema.upper() == "_FS_INFORMATION_SCHEMA":
229
- # describing an information_schema view
230
- # (schema already transformed from information_schema -> _fs_information_schema)
231
- return sqlglot.parse_one(SQL_DESCRIBE_INFO_SCHEMA.substitute(view=f"{schema}.{table.name}"), read="duckdb")
232
-
233
- return sqlglot.parse_one(
234
- SQL_DESCRIBE_TABLE.substitute(catalog=catalog, schema=schema, table=table.name),
235
- read="duckdb",
236
- )
237
-
238
- return expression
239
-
240
-
241
- def drop_schema_cascade(expression: exp.Expression) -> exp.Expression: #
242
- """Drop schema cascade.
243
-
244
- By default duckdb won't delete a schema if it contains tables, whereas snowflake will.
245
- So we add the cascade keyword to mimic snowflake's behaviour.
246
-
247
- Example:
248
- >>> import sqlglot
249
- >>> sqlglot.parse_one("DROP SCHEMA schema1").transform(remove_comment).sql()
250
- 'DROP SCHEMA schema1 cascade'
251
- Args:
252
- expression (exp.Expression): the expression that will be transformed.
253
-
254
- Returns:
255
- exp.Expression: The transformed expression.
256
- """
257
-
258
- if (
259
- not isinstance(expression, exp.Drop)
260
- or not (kind := expression.args.get("kind"))
261
- or not isinstance(kind, str)
262
- or kind.upper() != "SCHEMA"
263
- ):
264
- return expression
265
-
266
- new = expression.copy()
267
- new.args["cascade"] = True
268
- return new
269
-
270
-
271
- def dateadd_date_cast(expression: exp.Expression) -> exp.Expression:
272
- """Cast result of DATEADD to DATE if the given expression is a cast to DATE
273
- and unit is either DAY, WEEK, MONTH or YEAR to mimic Snowflake's DATEADD
274
- behaviour.
275
-
276
- Snowflake;
277
- SELECT DATEADD(DAY, 3, '2023-03-03'::DATE) as D;
278
- D: 2023-03-06 (DATE)
279
- DuckDB;
280
- SELECT CAST('2023-03-03' AS DATE) + INTERVAL 3 DAY AS D
281
- D: 2023-03-06 00:00:00 (TIMESTAMP)
282
- """
283
-
284
- if not isinstance(expression, exp.DateAdd):
285
- return expression
286
-
287
- if expression.unit is None:
288
- return expression
289
-
290
- if not isinstance(expression.unit.this, str):
291
- return expression
292
-
293
- if (unit := expression.unit.this.upper()) and unit.upper() not in {"DAY", "WEEK", "MONTH", "YEAR"}:
294
- return expression
295
-
296
- if not isinstance(expression.this, exp.Cast):
297
- return expression
298
-
299
- if expression.this.to.this != exp.DataType.Type.DATE:
300
- return expression
301
-
302
- return exp.Cast(
303
- this=expression,
304
- to=exp.DataType(this=exp.DataType.Type.DATE, nested=False, prefix=False),
305
- )
306
-
307
-
308
- def dateadd_string_literal_timestamp_cast(expression: exp.Expression) -> exp.Expression:
309
- """Snowflake's DATEADD function implicitly casts string literals to
310
- timestamps regardless of unit.
311
- """
312
- if not isinstance(expression, exp.DateAdd):
313
- return expression
314
-
315
- if not isinstance(expression.this, exp.Literal) or not expression.this.is_string:
316
- return expression
317
-
318
- new_dateadd = expression.copy()
319
- new_dateadd.set(
320
- "this",
321
- exp.Cast(
322
- this=expression.this,
323
- # TODO: support TIMESTAMP_TYPE_MAPPING of TIMESTAMP_LTZ/TZ
324
- to=exp.DataType(this=exp.DataType.Type.TIMESTAMP, nested=False, prefix=False),
325
- ),
326
- )
327
-
328
- return new_dateadd
329
-
330
-
331
- def datediff_string_literal_timestamp_cast(expression: exp.Expression) -> exp.Expression:
332
- """Snowflake's DATEDIFF function implicitly casts string literals to
333
- timestamps regardless of unit.
334
- """
335
-
336
- if not isinstance(expression, exp.DateDiff):
337
- return expression
338
-
339
- op1 = expression.this.copy()
340
- op2 = expression.expression.copy()
341
-
342
- if isinstance(op1, exp.Literal) and op1.is_string:
343
- op1 = exp.Cast(
344
- this=op1,
345
- # TODO: support TIMESTAMP_TYPE_MAPPING of TIMESTAMP_LTZ/TZ
346
- to=exp.DataType(this=exp.DataType.Type.TIMESTAMP, nested=False, prefix=False),
347
- )
348
-
349
- if isinstance(op2, exp.Literal) and op2.is_string:
350
- op2 = exp.Cast(
351
- this=op2,
352
- # TODO: support TIMESTAMP_TYPE_MAPPING of TIMESTAMP_LTZ/TZ
353
- to=exp.DataType(this=exp.DataType.Type.TIMESTAMP, nested=False, prefix=False),
354
- )
355
-
356
- new_datediff = expression.copy()
357
- new_datediff.set("this", op1)
358
- new_datediff.set("expression", op2)
359
-
360
- return new_datediff
361
-
362
-
363
- def extract_comment_on_columns(expression: exp.Expression) -> exp.Expression:
364
- """Extract column comments, removing it from the Expression.
365
-
366
- duckdb doesn't support comments. So we remove them from the expression and store them in the column_comment arg.
367
- We also replace the transform the expression to NOP if the statement can't be executed by duckdb.
368
-
369
- Args:
370
- expression (exp.Expression): the expression that will be transformed.
371
-
372
- Returns:
373
- exp.Expression: The transformed expression, with any comment stored in the new 'table_comment' arg.
374
- """
375
-
376
- if isinstance(expression, exp.Alter) and (actions := expression.args.get("actions")):
377
- new_actions: list[exp.Expression] = []
378
- col_comments: list[tuple[str, str]] = []
379
- for a in actions:
380
- if isinstance(a, exp.AlterColumn) and (comment := a.args.get("comment")):
381
- col_comments.append((a.name, comment.this))
382
- else:
383
- new_actions.append(a)
384
- if not new_actions:
385
- expression = SUCCESS_NOP.copy()
386
- else:
387
- expression.set("actions", new_actions)
388
- expression.args["col_comments"] = col_comments
389
-
390
- return expression
391
-
392
-
393
- def extract_comment_on_table(expression: exp.Expression) -> exp.Expression:
394
- """Extract table comment, removing it from the Expression.
395
-
396
- duckdb doesn't support comments. So we remove them from the expression and store them in the table_comment arg.
397
- We also replace the transform the expression to NOP if the statement can't be executed by duckdb.
398
-
399
- Args:
400
- expression (exp.Expression): the expression that will be transformed.
401
-
402
- Returns:
403
- exp.Expression: The transformed expression, with any comment stored in the new 'table_comment' arg.
404
- """
405
-
406
- if isinstance(expression, exp.Create) and (table := expression.find(exp.Table)):
407
- comment = None
408
- if props := cast(exp.Properties, expression.args.get("properties")):
409
- other_props = []
410
- for p in props.expressions:
411
- if isinstance(p, exp.SchemaCommentProperty) and (isinstance(p.this, (exp.Literal, exp.Var))):
412
- comment = p.this.this
413
- else:
414
- other_props.append(p)
415
-
416
- new = expression.copy()
417
- new_props: exp.Properties = new.args["properties"]
418
- new_props.set("expressions", other_props)
419
- new.args["table_comment"] = (table, comment)
420
- return new
421
- elif (
422
- isinstance(expression, exp.Comment)
423
- and (cexp := expression.args.get("expression"))
424
- and (table := expression.find(exp.Table))
425
- ):
426
- new = SUCCESS_NOP.copy()
427
- new.args["table_comment"] = (table, cexp.this)
428
- return new
429
- elif (
430
- isinstance(expression, exp.Alter)
431
- and (sexp := expression.find(exp.AlterSet))
432
- and (scp := sexp.find(exp.SchemaCommentProperty))
433
- and isinstance(scp.this, exp.Literal)
434
- and (table := expression.find(exp.Table))
435
- ):
436
- new = SUCCESS_NOP.copy()
437
- new.args["table_comment"] = (table, scp.this.this)
438
- return new
439
-
440
- return expression
441
-
442
-
443
- def extract_text_length(expression: exp.Expression) -> exp.Expression:
444
- """Extract length of text columns.
445
-
446
- duckdb doesn't have fixed-sized text types. So we capture the size of text types and store that in the
447
- character_maximum_length arg.
448
-
449
- Args:
450
- expression (exp.Expression): the expression that will be transformed.
451
-
452
- Returns:
453
- exp.Expression: The original expression, with any text lengths stored in the new 'text_lengths' arg.
454
- """
455
-
456
- if isinstance(expression, (exp.Create, exp.Alter)):
457
- text_lengths = []
458
-
459
- # exp.Select is for a ctas, exp.Schema is a plain definition
460
- if cols := expression.find(exp.Select, exp.Schema):
461
- expressions = cols.expressions
462
- else:
463
- # alter table
464
- expressions = expression.args.get("actions") or []
465
- for e in expressions:
466
- if dts := [
467
- dt for dt in e.find_all(exp.DataType) if dt.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.TEXT)
468
- ]:
469
- col_name = e.alias if isinstance(e, exp.Alias) else e.name
470
- if len(dts) == 1 and (dt_size := dts[0].find(exp.DataTypeParam)):
471
- size = (
472
- isinstance(dt_size.this, exp.Literal)
473
- and isinstance(dt_size.this.this, str)
474
- and int(dt_size.this.this)
475
- )
476
- else:
477
- size = 16777216
478
- text_lengths.append((col_name, size))
479
-
480
- if text_lengths:
481
- expression.args["text_lengths"] = text_lengths
482
-
483
- return expression
484
-
485
-
486
- def flatten(expression: exp.Expression) -> exp.Expression:
487
- """Flatten an array.
488
-
489
- See https://docs.snowflake.com/en/sql-reference/functions/flatten
490
-
491
- TODO: support objects.
492
- """
493
- if (
494
- isinstance(expression, exp.Lateral)
495
- and isinstance(expression.this, exp.Explode)
496
- and (alias := expression.args.get("alias"))
497
- # always true; when no explicit alias provided this will be flattened
498
- and isinstance(alias, exp.TableAlias)
499
- ):
500
- explode_expression = expression.this.this.expression
501
-
502
- value = exp.Cast(
503
- this=explode_expression,
504
- to=exp.DataType(
505
- this=exp.DataType.Type.ARRAY,
506
- expressions=[exp.DataType(this=exp.DataType.Type.JSON, nested=False, prefix=False)],
507
- nested=True,
508
- ),
509
- )
510
-
511
- return exp.Subquery(
512
- this=exp.Select(
513
- expressions=[
514
- exp.Unnest(
515
- expressions=[value],
516
- alias=exp.Identifier(this="VALUE", quoted=False),
517
- ),
518
- exp.Alias(
519
- this=exp.Sub(
520
- this=exp.Anonymous(
521
- this="generate_subscripts", expressions=[value, exp.Literal(this="1", is_string=False)]
522
- ),
523
- expression=exp.Literal(this="1", is_string=False),
524
- ),
525
- alias=exp.Identifier(this="INDEX", quoted=False),
526
- ),
527
- ],
528
- ),
529
- alias=exp.TableAlias(this=alias.this),
530
- )
531
-
532
- return expression
533
-
534
-
535
- def flatten_value_cast_as_varchar(expression: exp.Expression) -> exp.Expression:
536
- """Return raw unquoted string when flatten VALUE is cast to varchar.
537
-
538
- Returns a raw string using the Duckdb ->> operator, aka the json_extract_string function, see
539
- https://duckdb.org/docs/extensions/json#json-extraction-functions
540
- """
541
- if (
542
- isinstance(expression, exp.Cast)
543
- and isinstance(expression.this, exp.Column)
544
- and expression.this.name.upper() == "VALUE"
545
- and expression.to.this in [exp.DataType.Type.VARCHAR, exp.DataType.Type.TEXT]
546
- and (select := expression.find_ancestor(exp.Select))
547
- and select.find(exp.Explode)
548
- ):
549
- return exp.JSONExtractScalar(this=expression.this, expression=exp.JSONPath(expressions=[exp.JSONPathRoot()]))
550
-
551
- return expression
552
-
553
-
554
- def float_to_double(expression: exp.Expression) -> exp.Expression:
555
- """Convert float to double for 64 bit precision.
556
-
557
- Snowflake floats are all 64 bit (ie: double)
558
- see https://docs.snowflake.com/en/sql-reference/data-types-numeric#float-float4-float8
559
- """
560
-
561
- if isinstance(expression, exp.DataType) and expression.this == exp.DataType.Type.FLOAT:
562
- expression.args["this"] = exp.DataType.Type.DOUBLE
563
-
564
- return expression
565
-
566
-
567
- def identifier(expression: exp.Expression) -> exp.Expression:
568
- """Convert identifier function to an identifier.
569
-
570
- See https://docs.snowflake.com/en/sql-reference/identifier-literal
571
- """
572
-
573
- if (
574
- isinstance(expression, exp.Anonymous)
575
- and isinstance(expression.this, str)
576
- and expression.this.upper() == "IDENTIFIER"
577
- ):
578
- expression = exp.Identifier(this=expression.expressions[0].this, quoted=False)
579
-
580
- return expression
581
-
582
-
583
- def indices_to_json_extract(expression: exp.Expression) -> exp.Expression:
584
- """Convert indices on objects and arrays to json_extract or json_extract_string
585
-
586
- Supports Snowflake array indices, see
587
- https://docs.snowflake.com/en/sql-reference/data-types-semistructured#accessing-elements-of-an-array-by-index-or-by-slice
588
- and object indices, see
589
- https://docs.snowflake.com/en/sql-reference/data-types-semistructured#accessing-elements-of-an-object-by-key
590
-
591
- Duckdb uses the -> operator, aka the json_extract function, see
592
- https://duckdb.org/docs/extensions/json#json-extraction-functions
593
-
594
- This works for Snowflake arrays too because we convert them to JSON in duckdb.
595
- """
596
- if (
597
- isinstance(expression, exp.Bracket)
598
- and len(expression.expressions) == 1
599
- and (index := expression.expressions[0])
600
- and isinstance(index, exp.Literal)
601
- and index.this
602
- ):
603
- if isinstance(expression.parent, exp.Cast) and expression.parent.to.this == exp.DataType.Type.VARCHAR:
604
- # If the parent is a cast to varchar, we need to use JSONExtractScalar
605
- # to get the unquoted string value.
606
- klass = exp.JSONExtractScalar
607
- else:
608
- klass = exp.JSONExtract
609
- if index.is_string:
610
- return klass(this=expression.this, expression=exp.Literal(this=f"$.{index.this}", is_string=True))
611
- else:
612
- return klass(this=expression.this, expression=exp.Literal(this=f"$[{index.this}]", is_string=True))
613
-
614
- return expression
615
-
616
-
617
- def information_schema_fs_columns(expression: exp.Expression) -> exp.Expression:
618
- """Redirect to the _FS_COLUMNS view which has metadata that matches snowflake.
619
-
620
- Because duckdb doesn't store character_maximum_length or character_octet_length.
621
- """
622
-
623
- if (
624
- isinstance(expression, exp.Table)
625
- and expression.db
626
- and expression.db.upper() == "INFORMATION_SCHEMA"
627
- and expression.name
628
- and expression.name.upper() == "COLUMNS"
629
- ):
630
- expression.set("this", exp.Identifier(this="_FS_COLUMNS", quoted=False))
631
- expression.set("db", exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False))
632
-
633
- return expression
634
-
635
-
636
- def information_schema_databases(
637
- expression: exp.Expression,
638
- current_schema: str | None = None,
639
- ) -> exp.Expression:
640
- if (
641
- isinstance(expression, exp.Table)
642
- and (
643
- expression.db.upper() == "INFORMATION_SCHEMA"
644
- or (current_schema and current_schema.upper() == "INFORMATION_SCHEMA")
645
- )
646
- and expression.name.upper() == "DATABASES"
647
- ):
648
- return exp.Table(
649
- this=exp.Identifier(this="DATABASES", quoted=False),
650
- db=exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False),
651
- )
652
- return expression
653
-
654
-
655
- def information_schema_fs_tables(
656
- expression: exp.Expression,
657
- ) -> exp.Expression:
658
- """Use _FS_TABLES to access additional metadata columns (eg: comment)."""
659
-
660
- if (
661
- isinstance(expression, exp.Select)
662
- and (tbl := expression.find(exp.Table))
663
- and tbl.db.upper() == "INFORMATION_SCHEMA"
664
- and tbl.name.upper() == "TABLES"
665
- ):
666
- tbl.set("this", exp.Identifier(this="_FS_TABLES", quoted=False))
667
- tbl.set("db", exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False))
668
-
669
- return expression
670
-
671
-
672
- def information_schema_fs_views(expression: exp.Expression) -> exp.Expression:
673
- """Use _FS_VIEWS to return Snowflake's version instead of duckdb's."""
674
-
675
- if (
676
- isinstance(expression, exp.Select)
677
- and (tbl := expression.find(exp.Table))
678
- and tbl.db.upper() == "INFORMATION_SCHEMA"
679
- and tbl.name.upper() == "VIEWS"
680
- ):
681
- tbl.set("this", exp.Identifier(this="_FS_VIEWS", quoted=False))
682
- tbl.set("db", exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False))
683
-
684
- return expression
685
-
686
-
687
- NUMBER_38_0 = [
688
- exp.DataTypeParam(this=exp.Literal(this="38", is_string=False)),
689
- exp.DataTypeParam(this=exp.Literal(this="0", is_string=False)),
690
- ]
691
-
692
-
693
- def integer_precision(expression: exp.Expression) -> exp.Expression:
694
- """Convert integers and number(38,0) to bigint.
695
-
696
- So fetch_all will return int and dataframes will return them with a dtype of int64.
697
- """
698
- if (
699
- isinstance(expression, exp.DataType)
700
- and expression.this == exp.DataType.Type.DECIMAL
701
- and (not expression.expressions or expression.expressions == NUMBER_38_0)
702
- ) or expression.this in (exp.DataType.Type.INT, exp.DataType.Type.SMALLINT, exp.DataType.Type.TINYINT):
703
- return exp.DataType(
704
- this=exp.DataType.Type.BIGINT,
705
- nested=False,
706
- prefix=False,
707
- )
708
-
709
- return expression
710
-
711
-
712
- def json_extract_cased_as_varchar(expression: exp.Expression) -> exp.Expression:
713
- """Convert json to varchar inside JSONExtract.
714
-
715
- Snowflake case conversion (upper/lower) turns variant into varchar. This
716
- mimics that behaviour within get_path.
717
-
718
- TODO: a generic version that works on any variant, not just JSONExtract
719
-
720
- Returns a raw string using the Duckdb ->> operator, aka the json_extract_string function, see
721
- https://duckdb.org/docs/extensions/json#json-extraction-functions
722
- """
723
- if (
724
- isinstance(expression, (exp.Upper, exp.Lower))
725
- and (gp := expression.this)
726
- and isinstance(gp, exp.JSONExtract)
727
- and (path := gp.expression)
728
- and isinstance(path, exp.JSONPath)
729
- ):
730
- expression.set("this", exp.JSONExtractScalar(this=gp.this, expression=path))
731
-
732
- return expression
733
-
734
-
735
- def json_extract_cast_as_varchar(expression: exp.Expression) -> exp.Expression:
736
- """Return raw unquoted string when casting json extraction to varchar.
737
-
738
- Returns a raw string using the Duckdb ->> operator, aka the json_extract_string function, see
739
- https://duckdb.org/docs/extensions/json#json-extraction-functions
740
- """
741
- if (
742
- isinstance(expression, exp.Cast)
743
- and (je := expression.this)
744
- and isinstance(je, exp.JSONExtract)
745
- and (path := je.expression)
746
- and isinstance(path, exp.JSONPath)
747
- ):
748
- je.replace(exp.JSONExtractScalar(this=je.this, expression=path))
749
- return expression
750
-
751
-
752
- def json_extract_precedence(expression: exp.Expression) -> exp.Expression:
753
- """Associate json extract operands to avoid duckdb operators of higher precedence transforming the expression.
754
-
755
- See https://github.com/tekumara/fakesnow/issues/53
756
- """
757
- if isinstance(expression, (exp.JSONExtract, exp.JSONExtractScalar)):
758
- return exp.Paren(this=expression)
759
- return expression
760
-
761
-
762
- def random(expression: exp.Expression) -> exp.Expression:
763
- """Convert random() and random(seed).
764
-
765
- Snowflake random() is an signed 64 bit integer.
766
- Duckdb random() is a double between 0 and 1 and uses setseed() to set the seed.
767
- """
768
- if isinstance(expression, exp.Select) and (rand := expression.find(exp.Rand)):
769
- # shift result to between min and max signed 64bit integer
770
- new_rand = exp.Cast(
771
- this=exp.Paren(
772
- this=exp.Mul(
773
- this=exp.Paren(this=exp.Sub(this=exp.Rand(), expression=exp.Literal(this="0.5", is_string=False))),
774
- expression=exp.Literal(this="9223372036854775807", is_string=False),
775
- )
776
- ),
777
- to=exp.DataType(this=exp.DataType.Type.BIGINT, nested=False, prefix=False),
778
- )
779
-
780
- rand.replace(new_rand)
781
-
782
- # convert seed to double between 0 and 1 by dividing by max INTEGER (int32)
783
- # (not max BIGINT (int64) because we don't have enough floating point precision to distinguish seeds)
784
- # then attach to SELECT as the seed arg
785
- # (we can't attach it to exp.Rand because it will be rendered in the sql)
786
- if rand.this and isinstance(rand.this, exp.Literal):
787
- expression.args["seed"] = f"{rand.this}/2147483647-0.5"
788
-
789
- return expression
790
-
791
-
792
- def sample(expression: exp.Expression) -> exp.Expression:
793
- if isinstance(expression, exp.TableSample) and not expression.args.get("method"):
794
- # set snowflake default (bernoulli) rather than use the duckdb default (system)
795
- # because bernoulli works better at small row sizes like we have in tests
796
- expression.set("method", exp.Var(this="BERNOULLI"))
797
-
798
- return expression
799
-
800
-
801
- def object_construct(expression: exp.Expression) -> exp.Expression:
802
- """Convert OBJECT_CONSTRUCT to TO_JSON.
803
-
804
- Internally snowflake stores OBJECT types as a json string, so the Duckdb JSON type most closely matches.
805
-
806
- See https://docs.snowflake.com/en/sql-reference/functions/object_construct
807
- """
808
-
809
- if not isinstance(expression, exp.Struct):
810
- return expression
811
-
812
- non_null_expressions = []
813
- for e in expression.expressions:
814
- if not (isinstance(e, exp.PropertyEQ)):
815
- non_null_expressions.append(e)
816
- continue
817
-
818
- left = e.left
819
- right = e.right
820
-
821
- left_is_null = isinstance(left, exp.Null)
822
- right_is_null = isinstance(right, exp.Null)
823
-
824
- if left_is_null or right_is_null:
825
- continue
826
-
827
- non_null_expressions.append(e)
828
-
829
- new_struct = expression.copy()
830
- new_struct.set("expressions", non_null_expressions)
831
- return exp.Anonymous(this="TO_JSON", expressions=[new_struct])
832
-
833
-
834
- def regex_replace(expression: exp.Expression) -> exp.Expression:
835
- """Transform regex_replace expressions from snowflake to duckdb."""
836
-
837
- if isinstance(expression, exp.RegexpReplace) and isinstance(expression.expression, exp.Literal):
838
- if len(expression.args) > 3:
839
- # see https://docs.snowflake.com/en/sql-reference/functions/regexp_replace
840
- raise NotImplementedError(
841
- "REGEXP_REPLACE with additional parameters (eg: <position>, <occurrence>, <parameters>)"
842
- )
843
-
844
- # pattern: snowflake requires escaping backslashes in single-quoted string constants, but duckdb doesn't
845
- # see https://docs.snowflake.com/en/sql-reference/functions-regexp#label-regexp-escape-character-caveats
846
- expression.args["expression"] = exp.Literal(
847
- this=expression.expression.this.replace("\\\\", "\\"), is_string=True
848
- )
849
-
850
- if not expression.args.get("replacement"):
851
- # if no replacement string, the snowflake default is ''
852
- expression.args["replacement"] = exp.Literal(this="", is_string=True)
853
-
854
- # snowflake regex replacements are global
855
- expression.args["modifiers"] = exp.Literal(this="g", is_string=True)
856
-
857
- return expression
858
-
859
-
860
- def regex_substr(expression: exp.Expression) -> exp.Expression:
861
- """Transform regex_substr expressions from snowflake to duckdb.
862
-
863
- See https://docs.snowflake.com/en/sql-reference/functions/regexp_substr
864
- """
865
-
866
- if isinstance(expression, exp.RegexpExtract):
867
- subject = expression.this
868
-
869
- # pattern: snowflake requires escaping backslashes in single-quoted string constants, but duckdb doesn't
870
- # see https://docs.snowflake.com/en/sql-reference/functions-regexp#label-regexp-escape-character-caveats
871
- pattern = expression.expression
872
- pattern.args["this"] = pattern.this.replace("\\\\", "\\")
873
-
874
- # number of characters from the beginning of the string where the function starts searching for matches
875
- position = expression.args["position"] or exp.Literal(this="1", is_string=False)
876
-
877
- # which occurrence of the pattern to match
878
- occurrence = expression.args["occurrence"]
879
- occurrence = int(occurrence.this) if occurrence else 1
880
-
881
- # the duckdb dialect increments bracket (ie: index) expressions by 1 because duckdb is 1-indexed,
882
- # so we need to compensate by subtracting 1
883
- occurrence = exp.Literal(this=str(occurrence - 1), is_string=False)
884
-
885
- if parameters := expression.args["parameters"]:
886
- # 'e' parameter doesn't make sense for duckdb
887
- regex_parameters = exp.Literal(this=parameters.this.replace("e", ""), is_string=True)
888
- else:
889
- regex_parameters = exp.Literal(is_string=True)
890
-
891
- group_num = expression.args["group"]
892
- if not group_num:
893
- if isinstance(regex_parameters.this, str) and "e" in regex_parameters.this:
894
- group_num = exp.Literal(this="1", is_string=False)
895
- else:
896
- group_num = exp.Literal(this="0", is_string=False)
897
-
898
- expression = exp.Bracket(
899
- this=exp.Anonymous(
900
- this="regexp_extract_all",
901
- expressions=[
902
- # slice subject from position onwards
903
- exp.Bracket(this=subject, expressions=[exp.Slice(this=position)]),
904
- pattern,
905
- group_num,
906
- regex_parameters,
907
- ],
908
- ),
909
- # select index of occurrence
910
- expressions=[occurrence],
911
- )
912
-
913
- return expression
914
-
915
-
916
- # TODO: move this into a Dialect as a transpilation
917
- def set_schema(expression: exp.Expression, current_database: str | None) -> exp.Expression:
918
- """Transform USE SCHEMA/DATABASE to SET schema.
919
-
920
- Example:
921
- >>> import sqlglot
922
- >>> sqlglot.parse_one("USE SCHEMA bar").transform(set_schema, current_database="foo").sql()
923
- "SET schema = 'foo.bar'"
924
- >>> sqlglot.parse_one("USE SCHEMA foo.bar").transform(set_schema).sql()
925
- "SET schema = 'foo.bar'"
926
- >>> sqlglot.parse_one("USE DATABASE marts").transform(set_schema).sql()
927
- "SET schema = 'marts.main'"
928
-
929
- See tests for more examples.
930
- Args:
931
- expression (exp.Expression): the expression that will be transformed.
932
-
933
- Returns:
934
- exp.Expression: A SET schema expression if the input is a USE
935
- expression, otherwise expression is returned as-is.
936
- """
937
-
938
- if (
939
- isinstance(expression, exp.Use)
940
- and (kind := expression.args.get("kind"))
941
- and isinstance(kind, exp.Var)
942
- and kind.name
943
- and kind.name.upper() in ["SCHEMA", "DATABASE"]
944
- ):
945
- assert expression.this, f"No identifier for USE expression {expression}"
946
-
947
- if kind.name.upper() == "DATABASE":
948
- # duckdb's default schema is main
949
- database = expression.this.name
950
- return exp.Command(
951
- this="SET", expression=exp.Literal.string(f"schema = '{database}.main'"), set_database=database
952
- )
953
- else:
954
- # SCHEMA
955
- if db := expression.this.args.get("db"): # noqa: SIM108
956
- db_name = db.name
957
- else:
958
- # isn't qualified with a database
959
- db_name = current_database
960
-
961
- # assertion always true because check_db_schema is called before this
962
- assert db_name
963
-
964
- schema = expression.this.name
965
- return exp.Command(
966
- this="SET", expression=exp.Literal.string(f"schema = '{db_name}.{schema}'"), set_schema=schema
967
- )
968
-
969
- return expression
970
-
971
-
972
- def split(expression: exp.Expression) -> exp.Expression:
973
- """
974
- Convert output of duckdb str_split from varchar[] to JSON array to match Snowflake.
975
- """
976
- if isinstance(expression, exp.Split):
977
- return exp.Anonymous(this="to_json", expressions=[expression])
978
-
979
- return expression
980
-
981
-
982
- def tag(expression: exp.Expression) -> exp.Expression:
983
- """Handle tags. Transfer tags into upserts of the tag table.
984
-
985
- duckdb doesn't support tags. In lieu of a full implementation, for now we make it a NOP.
986
-
987
- Example:
988
- >>> import sqlglot
989
- >>> sqlglot.parse_one("ALTER TABLE table1 SET TAG foo='bar'").transform(tag).sql()
990
- "SELECT 'Statement executed successfully.'"
991
- Args:
992
- expression (exp.Expression): the expression that will be transformed.
993
-
994
- Returns:
995
- exp.Expression: The transformed expression.
996
- """
997
-
998
- if isinstance(expression, exp.Alter) and (actions := expression.args.get("actions")):
999
- for a in actions:
1000
- if isinstance(a, exp.AlterSet) and a.args.get("tag"):
1001
- return SUCCESS_NOP
1002
- elif (
1003
- isinstance(expression, exp.Command)
1004
- and (cexp := expression.args.get("expression"))
1005
- and isinstance(cexp, str)
1006
- and "SET TAG" in cexp.upper()
1007
- ):
1008
- # alter table modify column set tag
1009
- return SUCCESS_NOP
1010
- elif (
1011
- isinstance(expression, exp.Create)
1012
- and (kind := expression.args.get("kind"))
1013
- and isinstance(kind, str)
1014
- and kind.upper() == "TAG"
1015
- ):
1016
- return SUCCESS_NOP
1017
-
1018
- return expression
1019
-
1020
-
1021
- def to_date(expression: exp.Expression) -> exp.Expression:
1022
- """Convert to_date() to a cast.
1023
-
1024
- See https://docs.snowflake.com/en/sql-reference/functions/to_date
1025
-
1026
- Example:
1027
- >>> import sqlglot
1028
- >>> sqlglot.parse_one("SELECT to_date(to_timestamp(0))").transform(to_date).sql()
1029
- "SELECT CAST(DATE_TRUNC('day', TO_TIMESTAMP(0)) AS DATE)"
1030
- Args:
1031
- expression (exp.Expression): the expression that will be transformed.
1032
-
1033
- Returns:
1034
- exp.Expression: The transformed expression.
1035
- """
1036
-
1037
- if (
1038
- isinstance(expression, exp.Anonymous)
1039
- and isinstance(expression.this, str)
1040
- and expression.this.upper() == "TO_DATE"
1041
- ):
1042
- return exp.Cast(
1043
- this=expression.expressions[0],
1044
- to=exp.DataType(this=exp.DataType.Type.DATE, nested=False, prefix=False),
1045
- )
1046
- return expression
1047
-
1048
-
1049
- def _get_to_number_args(e: exp.ToNumber) -> tuple[exp.Expression | None, exp.Expression | None, exp.Expression | None]:
1050
- arg_format = e.args.get("format")
1051
- arg_precision = e.args.get("precision")
1052
- arg_scale = e.args.get("scale")
1053
-
1054
- _format = None
1055
- _precision = None
1056
- _scale = None
1057
-
1058
- # to_number(value, <format>, <precision>, <scale>)
1059
- if arg_format:
1060
- if arg_format.is_string:
1061
- # to_number('100', 'TM9' ...)
1062
- _format = arg_format
1063
-
1064
- # to_number('100', 'TM9', 10 ...)
1065
- if arg_precision:
1066
- _precision = arg_precision
1067
-
1068
- # to_number('100', 'TM9', 10, 2)
1069
- if arg_scale:
1070
- _scale = arg_scale
1071
- else:
1072
- # to_number('100', 10, ...)
1073
- # arg_format is not a string, so it must be precision.
1074
- _precision = arg_format
1075
-
1076
- # to_number('100', 10, 2)
1077
- # And arg_precision must be scale
1078
- if arg_precision:
1079
- _scale = arg_precision
1080
- elif arg_precision:
1081
- _precision = arg_precision
1082
- if arg_scale:
1083
- _scale = arg_scale
1084
-
1085
- return _format, _precision, _scale
1086
-
1087
-
1088
- def _to_decimal(expression: exp.Expression, cast_node: type[exp.Cast]) -> exp.Expression:
1089
- expressions: list[exp.Expression] = expression.expressions
1090
-
1091
- if len(expressions) > 1 and expressions[1].is_string:
1092
- # see https://docs.snowflake.com/en/sql-reference/functions/to_decimal#arguments
1093
- raise NotImplementedError(f"{expression.this} with format argument")
1094
-
1095
- precision = expressions[1] if len(expressions) > 1 else exp.Literal(this="38", is_string=False)
1096
- scale = expressions[2] if len(expressions) > 2 else exp.Literal(this="0", is_string=False)
1097
-
1098
- return cast_node(
1099
- this=expressions[0],
1100
- to=exp.DataType(this=exp.DataType.Type.DECIMAL, expressions=[precision, scale], nested=False, prefix=False),
1101
- )
1102
-
1103
-
1104
- def to_decimal(expression: exp.Expression) -> exp.Expression:
1105
- """Transform to_decimal, to_number, to_numeric expressions from snowflake to duckdb.
1106
-
1107
- See https://docs.snowflake.com/en/sql-reference/functions/to_decimal
1108
- """
1109
-
1110
- if isinstance(expression, exp.ToNumber):
1111
- format_, precision, scale = _get_to_number_args(expression)
1112
- if format_:
1113
- raise NotImplementedError(f"{expression.this} with format argument")
1114
-
1115
- if not precision:
1116
- precision = exp.Literal(this="38", is_string=False)
1117
- if not scale:
1118
- scale = exp.Literal(this="0", is_string=False)
1119
-
1120
- return exp.Cast(
1121
- this=expression.this,
1122
- to=exp.DataType(this=exp.DataType.Type.DECIMAL, expressions=[precision, scale], nested=False, prefix=False),
1123
- )
1124
-
1125
- if (
1126
- isinstance(expression, exp.Anonymous)
1127
- and isinstance(expression.this, str)
1128
- and expression.this.upper() in ["TO_DECIMAL", "TO_NUMERIC"]
1129
- ):
1130
- return _to_decimal(expression, exp.Cast)
1131
-
1132
- return expression
1133
-
1134
-
1135
- def try_to_decimal(expression: exp.Expression) -> exp.Expression:
1136
- """Transform try_to_decimal, try_to_number, try_to_numeric expressions from snowflake to duckdb.
1137
- See https://docs.snowflake.com/en/sql-reference/functions/try_to_decimal
1138
- """
1139
-
1140
- if (
1141
- isinstance(expression, exp.Anonymous)
1142
- and isinstance(expression.this, str)
1143
- and expression.this.upper() in ["TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"]
1144
- ):
1145
- return _to_decimal(expression, exp.TryCast)
1146
-
1147
- return expression
1148
-
1149
-
1150
- def to_timestamp(expression: exp.Expression) -> exp.Expression:
1151
- """Convert to_timestamp(seconds) to timestamp without timezone (ie: TIMESTAMP_NTZ).
1152
-
1153
- See https://docs.snowflake.com/en/sql-reference/functions/to_timestamp
1154
- """
1155
-
1156
- if isinstance(expression, exp.UnixToTime):
1157
- return exp.Cast(
1158
- this=expression,
1159
- to=exp.DataType(this=exp.DataType.Type.TIMESTAMP, nested=False, prefix=False),
1160
- )
1161
- return expression
1162
-
1163
-
1164
- def to_timestamp_ntz(expression: exp.Expression) -> exp.Expression:
1165
- """Convert to_timestamp_ntz to to_timestamp (StrToTime).
1166
-
1167
- Because it's not yet supported by sqlglot, see https://github.com/tobymao/sqlglot/issues/2748
1168
- """
1169
-
1170
- if isinstance(expression, exp.Anonymous) and (
1171
- isinstance(expression.this, str) and expression.this.upper() == "TO_TIMESTAMP_NTZ"
1172
- ):
1173
- return exp.StrToTime(
1174
- this=expression.expressions[0],
1175
- format=exp.Literal(this="%Y-%m-%d %H:%M:%S", is_string=True),
1176
- )
1177
- return expression
1178
-
1179
-
1180
- def timestamp_ntz(expression: exp.Expression) -> exp.Expression:
1181
- """Convert timestamp_ntz (snowflake) to timestamp (duckdb).
1182
-
1183
- NB: timestamp_ntz defaults to nanosecond precision (ie: NTZ(9)). The duckdb equivalent is TIMESTAMP_NS.
1184
- However we use TIMESTAMP (ie: microsecond precision) here rather than TIMESTAMP_NS to avoid
1185
- https://github.com/duckdb/duckdb/issues/7980 in test_write_pandas_timestamp_ntz.
1186
- """
1187
-
1188
- if isinstance(expression, exp.DataType) and expression.this == exp.DataType.Type.TIMESTAMPNTZ:
1189
- return exp.DataType(this=exp.DataType.Type.TIMESTAMP)
1190
-
1191
- return expression
1192
-
1193
-
1194
- def trim_cast_varchar(expression: exp.Expression) -> exp.Expression:
1195
- """Snowflake's TRIM casts input to VARCHAR implicitly."""
1196
-
1197
- if not (isinstance(expression, exp.Trim)):
1198
- return expression
1199
-
1200
- operand = expression.this
1201
- if isinstance(operand, exp.Cast) and operand.to.this in [exp.DataType.Type.VARCHAR, exp.DataType.Type.TEXT]:
1202
- return expression
1203
-
1204
- return exp.Trim(
1205
- this=exp.Cast(this=operand, to=exp.DataType(this=exp.DataType.Type.VARCHAR, nested=False, prefix=False))
1206
- )
1207
-
1208
-
1209
- def try_parse_json(expression: exp.Expression) -> exp.Expression:
1210
- """Convert TRY_PARSE_JSON() to TRY_CAST(... as JSON).
1211
-
1212
- Example:
1213
- >>> import sqlglot
1214
- >>> sqlglot.parse_one("select try_parse_json('{}')").transform(parse_json).sql()
1215
- "SELECT TRY_CAST('{}' AS JSON)"
1216
- Args:
1217
- expression (exp.Expression): the expression that will be transformed.
1218
-
1219
- Returns:
1220
- exp.Expression: The transformed expression.
1221
- """
1222
-
1223
- if (
1224
- isinstance(expression, exp.Anonymous)
1225
- and isinstance(expression.this, str)
1226
- and expression.this.upper() == "TRY_PARSE_JSON"
1227
- ):
1228
- expressions = expression.expressions
1229
- return exp.TryCast(
1230
- this=expressions[0],
1231
- to=exp.DataType(this=exp.DataType.Type.JSON, nested=False),
1232
- )
1233
-
1234
- return expression
1235
-
1236
-
1237
- def semi_structured_types(expression: exp.Expression) -> exp.Expression:
1238
- """Convert OBJECT, ARRAY, and VARIANT types to duckdb compatible types.
1239
-
1240
- Example:
1241
- >>> import sqlglot
1242
- >>> sqlglot.parse_one("CREATE TABLE table1 (name object)").transform(semi_structured_types).sql()
1243
- "CREATE TABLE table1 (name JSON)"
1244
- Args:
1245
- expression (exp.Expression): the expression that will be transformed.
1246
-
1247
- Returns:
1248
- exp.Expression: The transformed expression.
1249
- """
1250
-
1251
- if isinstance(expression, exp.DataType) and expression.this in [
1252
- exp.DataType.Type.ARRAY,
1253
- exp.DataType.Type.OBJECT,
1254
- exp.DataType.Type.VARIANT,
1255
- ]:
1256
- new = expression.copy()
1257
- new.args["this"] = exp.DataType.Type.JSON
1258
- return new
1259
-
1260
- return expression
1261
-
1262
-
1263
- def upper_case_unquoted_identifiers(expression: exp.Expression) -> exp.Expression:
1264
- """Upper case unquoted identifiers.
1265
-
1266
- Snowflake represents case-insensitivity using upper-case identifiers in cursor results.
1267
- duckdb uses lowercase. We convert all unquoted identifiers to uppercase to match snowflake.
1268
-
1269
- Example:
1270
- >>> import sqlglot
1271
- >>> sqlglot.parse_one("select name, name as fname from table1").transform(upper_case_unquoted_identifiers).sql()
1272
- 'SELECT NAME, NAME AS FNAME FROM TABLE1'
1273
- Args:
1274
- expression (exp.Expression): the expression that will be transformed.
1275
-
1276
- Returns:
1277
- exp.Expression: The transformed expression.
1278
- """
1279
-
1280
- if isinstance(expression, exp.Identifier) and not expression.quoted and isinstance(expression.this, str):
1281
- new = expression.copy()
1282
- new.set("this", expression.this.upper())
1283
- return new
1284
-
1285
- return expression
1286
-
1287
-
1288
- def values_columns(expression: exp.Expression) -> exp.Expression:
1289
- """Support column1, column2 expressions in VALUES.
1290
-
1291
- Snowflake uses column1, column2 .. for unnamed columns in VALUES. Whereas duckdb uses col0, col1 ..
1292
- See https://docs.snowflake.com/en/sql-reference/constructs/values#examples
1293
- """
1294
-
1295
- if (
1296
- isinstance(expression, exp.Values)
1297
- and not expression.alias
1298
- and expression.find_ancestor(exp.Select)
1299
- and (values := expression.find(exp.Tuple))
1300
- ):
1301
- num_columns = len(values.expressions)
1302
- columns = [exp.Identifier(this=f"COLUMN{i + 1}", quoted=True) for i in range(num_columns)]
1303
- expression.set("alias", exp.TableAlias(this=exp.Identifier(this="_", quoted=False), columns=columns))
1304
-
1305
- return expression
1306
-
1307
-
1308
- def create_user(expression: exp.Expression) -> exp.Expression:
1309
- """Transform CREATE USER to a query against the global database's information_schema._fs_users table.
1310
-
1311
- https://docs.snowflake.com/en/sql-reference/sql/create-user
1312
- """
1313
- # XXX: this is a placeholder. We need to implement the full CREATE USER syntax, but
1314
- # sqlglot doesnt yet support Create for snowflake.
1315
- if isinstance(expression, exp.Command) and expression.this == "CREATE":
1316
- sub_exp = expression.expression.strip()
1317
- if sub_exp.upper().startswith("USER"):
1318
- _, name, *ignored = sub_exp.split(" ")
1319
- if ignored:
1320
- raise NotImplementedError(f"`CREATE USER` with {ignored}")
1321
- return sqlglot.parse_one(
1322
- f"INSERT INTO _fs_global._fs_information_schema._fs_users (name) VALUES ('{name}')", read="duckdb"
1323
- )
1324
-
1325
- return expression
1326
-
1327
-
1328
- def update_variables(
1329
- expression: exp.Expression,
1330
- variables: Variables,
1331
- ) -> exp.Expression:
1332
- if Variables.is_variable_modifier(expression):
1333
- variables.update_variables(expression)
1334
- return SUCCESS_NOP # Nothing further to do if its a SET/UNSET operation.
1335
- return expression
1336
-
1337
-
1338
- class SHA256(exp.Func):
1339
- _sql_names: ClassVar = ["SHA256"]
1340
- arg_types: ClassVar = {"this": True}
1341
-
1342
-
1343
- def sha256(expression: exp.Expression) -> exp.Expression:
1344
- """Convert sha2() or sha2_hex() to sha256().
1345
-
1346
- Convert sha2_binary() to unhex(sha256()).
1347
-
1348
- Example:
1349
- >>> import sqlglot
1350
- >>> sqlglot.parse_one("insert into table1 (name) select sha2('foo')").transform(sha256).sql()
1351
- "INSERT INTO table1 (name) SELECT SHA256('foo')"
1352
- Args:
1353
- expression (exp.Expression): the expression that will be transformed.
1354
-
1355
- Returns:
1356
- exp.Expression: The transformed expression.
1357
- """
1358
-
1359
- if isinstance(expression, exp.SHA2) and expression.args.get("length", exp.Literal.number(256)).this == "256":
1360
- return SHA256(this=expression.this)
1361
- elif (
1362
- isinstance(expression, exp.Anonymous)
1363
- and expression.this.upper() == "SHA2_HEX"
1364
- and (
1365
- len(expression.expressions) == 1
1366
- or (len(expression.expressions) == 2 and expression.expressions[1].this == "256")
1367
- )
1368
- ):
1369
- return SHA256(this=expression.expressions[0])
1370
- elif (
1371
- isinstance(expression, exp.Anonymous)
1372
- and expression.this.upper() == "SHA2_BINARY"
1373
- and (
1374
- len(expression.expressions) == 1
1375
- or (len(expression.expressions) == 2 and expression.expressions[1].this == "256")
1376
- )
1377
- ):
1378
- return exp.Unhex(this=SHA256(this=expression.expressions[0]))
1379
-
1380
- return expression