fakesnow 0.9.35__py3-none-any.whl → 0.9.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pathlib import Path
4
- from string import Template
5
- from typing import ClassVar, cast
6
-
7
- import sqlglot
8
- from sqlglot import exp
9
-
10
3
  from fakesnow.transforms.merge import merge as merge
11
4
  from fakesnow.transforms.show import (
12
5
  show_columns as show_columns,
@@ -19,1361 +12,56 @@ from fakesnow.transforms.show import (
19
12
  show_users as show_users,
20
13
  show_warehouses as show_warehouses,
21
14
  )
22
- from fakesnow.variables import Variables
23
-
24
- SUCCESS_NOP = sqlglot.parse_one("SELECT 'Statement executed successfully.' as status")
25
-
26
-
27
- def alias_in_join(expression: exp.Expression) -> exp.Expression:
28
- if (
29
- isinstance(expression, exp.Select)
30
- and (aliases := {e.args.get("alias"): e for e in expression.expressions if isinstance(e, exp.Alias)})
31
- and (joins := expression.args.get("joins"))
32
- ):
33
- j: exp.Join
34
- for j in joins:
35
- if (
36
- (on := j.args.get("on"))
37
- and (col := on.this)
38
- and (isinstance(col, exp.Column))
39
- and (alias := aliases.get(col.this))
40
- # don't rewrite col with table identifier
41
- and not col.table
42
- ):
43
- col.args["this"] = alias.this
44
-
45
- return expression
46
-
47
-
48
- def alter_table_strip_cluster_by(expression: exp.Expression) -> exp.Expression:
49
- """Turn alter table cluster by into a no-op"""
50
- if (
51
- isinstance(expression, exp.Alter)
52
- and (actions := expression.args.get("actions"))
53
- and len(actions) == 1
54
- and (isinstance(actions[0], exp.Cluster))
55
- ):
56
- return SUCCESS_NOP
57
- return expression
58
-
59
-
60
- def array_size(expression: exp.Expression) -> exp.Expression:
61
- if isinstance(expression, exp.ArraySize):
62
- # return null if not json array
63
- jal = exp.Anonymous(this="json_array_length", expressions=[expression.this])
64
- is_json_array = exp.EQ(
65
- this=exp.Anonymous(this="json_type", expressions=[expression.this]),
66
- expression=exp.Literal(this="ARRAY", is_string=True),
67
- )
68
- return exp.Case(ifs=[exp.If(this=is_json_array, true=jal)])
69
-
70
- return expression
71
-
72
-
73
- def array_agg(expression: exp.Expression) -> exp.Expression:
74
- if isinstance(expression, exp.ArrayAgg) and not isinstance(expression.parent, exp.Window):
75
- return exp.Anonymous(this="TO_JSON", expressions=[expression])
76
-
77
- if isinstance(expression, exp.Window) and isinstance(expression.this, exp.ArrayAgg):
78
- return exp.Anonymous(this="TO_JSON", expressions=[expression])
79
-
80
- return expression
81
-
82
-
83
- def array_agg_within_group(expression: exp.Expression) -> exp.Expression:
84
- """Convert ARRAY_AGG(<expr>) WITHIN GROUP (<order-by-clause>) to ARRAY_AGG( <expr> <order-by-clause> )
85
- Snowflake uses ARRAY_AGG(<expr>) WITHIN GROUP (ORDER BY <order-by-clause>)
86
- to order the array, but DuckDB uses ARRAY_AGG( <expr> <order-by-clause> ).
87
- See;
88
- - https://docs.snowflake.com/en/sql-reference/functions/array_agg
89
- - https://duckdb.org/docs/sql/aggregates.html#order-by-clause-in-aggregate-functions
90
- Note; Snowflake has following restriction;
91
- If you specify DISTINCT and WITHIN GROUP, both must refer to the same column.
92
- Transformation does not handle this restriction.
93
- """
94
- if (
95
- isinstance(expression, exp.WithinGroup)
96
- and (agg := expression.find(exp.ArrayAgg))
97
- and (order := expression.expression)
98
- ):
99
- return exp.ArrayAgg(
100
- this=exp.Order(
101
- this=agg.this,
102
- expressions=order.expressions,
103
- )
104
- )
105
-
106
- return expression
107
-
108
-
109
- def create_clone(expression: exp.Expression) -> exp.Expression:
110
- """Transform create table clone to create table as select."""
111
-
112
- if (
113
- isinstance(expression, exp.Create)
114
- and str(expression.args.get("kind")).upper() == "TABLE"
115
- and (clone := expression.find(exp.Clone))
116
- ):
117
- return exp.Create(
118
- this=expression.this,
119
- kind="TABLE",
120
- expression=exp.Select(
121
- expressions=[
122
- exp.Star(),
123
- ],
124
- **{"from": exp.From(this=clone.this)},
125
- ),
126
- )
127
- return expression
128
-
129
-
130
- # TODO: move this into a Dialect as a transpilation
131
- def create_database(expression: exp.Expression, db_path: Path | None = None) -> exp.Expression:
132
- """Transform create database to attach database.
133
-
134
- Example:
135
- >>> import sqlglot
136
- >>> sqlglot.parse_one("CREATE database foo").transform(create_database).sql()
137
- 'ATTACH DATABASE ':memory:' as foo'
138
- Args:
139
- expression (exp.Expression): the expression that will be transformed.
140
-
141
- Returns:
142
- exp.Expression: The transformed expression, with the database name stored in the create_db_name arg.
143
- """
144
-
145
- if isinstance(expression, exp.Create) and str(expression.args.get("kind")).upper() == "DATABASE":
146
- ident = expression.find(exp.Identifier)
147
- assert ident, f"No identifier in {expression.sql}"
148
- db_name = ident.this
149
- db_file = f"{db_path / db_name}.db" if db_path else ":memory:"
150
-
151
- if_not_exists = "IF NOT EXISTS " if expression.args.get("exists") else ""
152
-
153
- return exp.Command(
154
- this="ATTACH",
155
- expression=exp.Literal(this=f"{if_not_exists}DATABASE '{db_file}' AS {db_name}", is_string=True),
156
- create_db_name=db_name,
157
- )
158
-
159
- return expression
160
-
161
-
162
- SQL_DESCRIBE_TABLE = Template(
163
- """
164
- SELECT
165
- column_name AS "name",
166
- CASE WHEN data_type = 'NUMBER' THEN 'NUMBER(' || numeric_precision || ',' || numeric_scale || ')'
167
- WHEN data_type = 'TEXT' THEN 'VARCHAR(' || coalesce(character_maximum_length,16777216) || ')'
168
- WHEN data_type = 'TIMESTAMP_NTZ' THEN 'TIMESTAMP_NTZ(9)'
169
- WHEN data_type = 'TIMESTAMP_TZ' THEN 'TIMESTAMP_TZ(9)'
170
- WHEN data_type = 'TIME' THEN 'TIME(9)'
171
- WHEN data_type = 'BINARY' THEN 'BINARY(8388608)'
172
- ELSE data_type END AS "type",
173
- 'COLUMN' AS "kind",
174
- CASE WHEN is_nullable = 'YES' THEN 'Y' ELSE 'N' END AS "null?",
175
- column_default AS "default",
176
- 'N' AS "primary key",
177
- 'N' AS "unique key",
178
- NULL::VARCHAR AS "check",
179
- NULL::VARCHAR AS "expression",
180
- NULL::VARCHAR AS "comment",
181
- NULL::VARCHAR AS "policy name",
182
- NULL::JSON AS "privacy domain",
183
- FROM _fs_information_schema._fs_columns
184
- WHERE table_catalog = '${catalog}' AND table_schema = '${schema}' AND table_name = '${table}'
185
- ORDER BY ordinal_position
186
- """
187
- )
188
-
189
- SQL_DESCRIBE_INFO_SCHEMA = Template(
190
- """
191
- SELECT
192
- column_name AS "name",
193
- column_type as "type",
194
- 'COLUMN' AS "kind",
195
- CASE WHEN "null" = 'YES' THEN 'Y' ELSE 'N' END AS "null?",
196
- NULL::VARCHAR AS "default",
197
- 'N' AS "primary key",
198
- 'N' AS "unique key",
199
- NULL::VARCHAR AS "check",
200
- NULL::VARCHAR AS "expression",
201
- NULL::VARCHAR AS "comment",
202
- NULL::VARCHAR AS "policy name",
203
- NULL::JSON AS "privacy domain",
204
- FROM (DESCRIBE ${view})
205
- """
15
+ from fakesnow.transforms.transforms import (
16
+ SUCCESS_NOP as SUCCESS_NOP,
17
+ alias_in_join as alias_in_join,
18
+ alter_table_strip_cluster_by as alter_table_strip_cluster_by,
19
+ array_agg as array_agg,
20
+ array_agg_within_group as array_agg_within_group,
21
+ array_size as array_size,
22
+ create_clone as create_clone,
23
+ create_database as create_database,
24
+ create_user as create_user,
25
+ dateadd_date_cast as dateadd_date_cast,
26
+ dateadd_string_literal_timestamp_cast as dateadd_string_literal_timestamp_cast,
27
+ datediff_string_literal_timestamp_cast as datediff_string_literal_timestamp_cast,
28
+ describe_table as describe_table,
29
+ drop_schema_cascade as drop_schema_cascade,
30
+ extract_comment_on_columns as extract_comment_on_columns,
31
+ extract_comment_on_table as extract_comment_on_table,
32
+ extract_text_length as extract_text_length,
33
+ flatten as flatten,
34
+ flatten_value_cast_as_varchar as flatten_value_cast_as_varchar,
35
+ float_to_double as float_to_double,
36
+ identifier as identifier,
37
+ indices_to_json_extract as indices_to_json_extract,
38
+ information_schema_databases as information_schema_databases,
39
+ information_schema_fs_columns as information_schema_fs_columns,
40
+ information_schema_fs_tables as information_schema_fs_tables,
41
+ information_schema_fs_views as information_schema_fs_views,
42
+ integer_precision as integer_precision,
43
+ json_extract_cased_as_varchar as json_extract_cased_as_varchar,
44
+ json_extract_cast_as_varchar as json_extract_cast_as_varchar,
45
+ json_extract_precedence as json_extract_precedence,
46
+ object_construct as object_construct,
47
+ random as random,
48
+ regex_replace as regex_replace,
49
+ regex_substr as regex_substr,
50
+ sample as sample,
51
+ semi_structured_types as semi_structured_types,
52
+ set_schema as set_schema,
53
+ sha256 as sha256,
54
+ split as split,
55
+ tag as tag,
56
+ timestamp_ntz as timestamp_ntz,
57
+ to_date as to_date,
58
+ to_decimal as to_decimal,
59
+ to_timestamp as to_timestamp,
60
+ to_timestamp_ntz as to_timestamp_ntz,
61
+ trim_cast_varchar as trim_cast_varchar,
62
+ try_parse_json as try_parse_json,
63
+ try_to_decimal as try_to_decimal,
64
+ update_variables as update_variables,
65
+ upper_case_unquoted_identifiers as upper_case_unquoted_identifiers,
66
+ values_columns as values_columns,
206
67
  )
207
-
208
-
209
- def describe_table(
210
- expression: exp.Expression, current_database: str | None = None, current_schema: str | None = None
211
- ) -> exp.Expression:
212
- """Redirect to the information_schema._fs_columns to match snowflake.
213
-
214
- See https://docs.snowflake.com/en/sql-reference/sql/desc-table
215
- """
216
-
217
- if (
218
- isinstance(expression, exp.Describe)
219
- and (kind := expression.args.get("kind"))
220
- and isinstance(kind, str)
221
- and kind.upper() in ("TABLE", "VIEW")
222
- and (table := expression.find(exp.Table))
223
- ):
224
- catalog = table.catalog or current_database
225
- schema = table.db or current_schema
226
-
227
- if schema and schema.upper() == "_FS_INFORMATION_SCHEMA":
228
- # describing an information_schema view
229
- # (schema already transformed from information_schema -> _fs_information_schema)
230
- return sqlglot.parse_one(SQL_DESCRIBE_INFO_SCHEMA.substitute(view=f"{schema}.{table.name}"), read="duckdb")
231
-
232
- return sqlglot.parse_one(
233
- SQL_DESCRIBE_TABLE.substitute(catalog=catalog, schema=schema, table=table.name),
234
- read="duckdb",
235
- )
236
-
237
- return expression
238
-
239
-
240
- def drop_schema_cascade(expression: exp.Expression) -> exp.Expression: #
241
- """Drop schema cascade.
242
-
243
- By default duckdb won't delete a schema if it contains tables, whereas snowflake will.
244
- So we add the cascade keyword to mimic snowflake's behaviour.
245
-
246
- Example:
247
- >>> import sqlglot
248
- >>> sqlglot.parse_one("DROP SCHEMA schema1").transform(remove_comment).sql()
249
- 'DROP SCHEMA schema1 cascade'
250
- Args:
251
- expression (exp.Expression): the expression that will be transformed.
252
-
253
- Returns:
254
- exp.Expression: The transformed expression.
255
- """
256
-
257
- if (
258
- not isinstance(expression, exp.Drop)
259
- or not (kind := expression.args.get("kind"))
260
- or not isinstance(kind, str)
261
- or kind.upper() != "SCHEMA"
262
- ):
263
- return expression
264
-
265
- new = expression.copy()
266
- new.args["cascade"] = True
267
- return new
268
-
269
-
270
- def dateadd_date_cast(expression: exp.Expression) -> exp.Expression:
271
- """Cast result of DATEADD to DATE if the given expression is a cast to DATE
272
- and unit is either DAY, WEEK, MONTH or YEAR to mimic Snowflake's DATEADD
273
- behaviour.
274
-
275
- Snowflake;
276
- SELECT DATEADD(DAY, 3, '2023-03-03'::DATE) as D;
277
- D: 2023-03-06 (DATE)
278
- DuckDB;
279
- SELECT CAST('2023-03-03' AS DATE) + INTERVAL 3 DAY AS D
280
- D: 2023-03-06 00:00:00 (TIMESTAMP)
281
- """
282
-
283
- if not isinstance(expression, exp.DateAdd):
284
- return expression
285
-
286
- if expression.unit is None:
287
- return expression
288
-
289
- if not isinstance(expression.unit.this, str):
290
- return expression
291
-
292
- if (unit := expression.unit.this.upper()) and unit.upper() not in {"DAY", "WEEK", "MONTH", "YEAR"}:
293
- return expression
294
-
295
- if not isinstance(expression.this, exp.Cast):
296
- return expression
297
-
298
- if expression.this.to.this != exp.DataType.Type.DATE:
299
- return expression
300
-
301
- return exp.Cast(
302
- this=expression,
303
- to=exp.DataType(this=exp.DataType.Type.DATE, nested=False, prefix=False),
304
- )
305
-
306
-
307
- def dateadd_string_literal_timestamp_cast(expression: exp.Expression) -> exp.Expression:
308
- """Snowflake's DATEADD function implicitly casts string literals to
309
- timestamps regardless of unit.
310
- """
311
- if not isinstance(expression, exp.DateAdd):
312
- return expression
313
-
314
- if not isinstance(expression.this, exp.Literal) or not expression.this.is_string:
315
- return expression
316
-
317
- new_dateadd = expression.copy()
318
- new_dateadd.set(
319
- "this",
320
- exp.Cast(
321
- this=expression.this,
322
- # TODO: support TIMESTAMP_TYPE_MAPPING of TIMESTAMP_LTZ/TZ
323
- to=exp.DataType(this=exp.DataType.Type.TIMESTAMP, nested=False, prefix=False),
324
- ),
325
- )
326
-
327
- return new_dateadd
328
-
329
-
330
- def datediff_string_literal_timestamp_cast(expression: exp.Expression) -> exp.Expression:
331
- """Snowflake's DATEDIFF function implicitly casts string literals to
332
- timestamps regardless of unit.
333
- """
334
-
335
- if not isinstance(expression, exp.DateDiff):
336
- return expression
337
-
338
- op1 = expression.this.copy()
339
- op2 = expression.expression.copy()
340
-
341
- if isinstance(op1, exp.Literal) and op1.is_string:
342
- op1 = exp.Cast(
343
- this=op1,
344
- # TODO: support TIMESTAMP_TYPE_MAPPING of TIMESTAMP_LTZ/TZ
345
- to=exp.DataType(this=exp.DataType.Type.TIMESTAMP, nested=False, prefix=False),
346
- )
347
-
348
- if isinstance(op2, exp.Literal) and op2.is_string:
349
- op2 = exp.Cast(
350
- this=op2,
351
- # TODO: support TIMESTAMP_TYPE_MAPPING of TIMESTAMP_LTZ/TZ
352
- to=exp.DataType(this=exp.DataType.Type.TIMESTAMP, nested=False, prefix=False),
353
- )
354
-
355
- new_datediff = expression.copy()
356
- new_datediff.set("this", op1)
357
- new_datediff.set("expression", op2)
358
-
359
- return new_datediff
360
-
361
-
362
- def extract_comment_on_columns(expression: exp.Expression) -> exp.Expression:
363
- """Extract column comments, removing it from the Expression.
364
-
365
- duckdb doesn't support comments. So we remove them from the expression and store them in the column_comment arg.
366
- We also replace the transform the expression to NOP if the statement can't be executed by duckdb.
367
-
368
- Args:
369
- expression (exp.Expression): the expression that will be transformed.
370
-
371
- Returns:
372
- exp.Expression: The transformed expression, with any comment stored in the new 'table_comment' arg.
373
- """
374
-
375
- if isinstance(expression, exp.Alter) and (actions := expression.args.get("actions")):
376
- new_actions: list[exp.Expression] = []
377
- col_comments: list[tuple[str, str]] = []
378
- for a in actions:
379
- if isinstance(a, exp.AlterColumn) and (comment := a.args.get("comment")):
380
- col_comments.append((a.name, comment.this))
381
- else:
382
- new_actions.append(a)
383
- if not new_actions:
384
- expression = SUCCESS_NOP.copy()
385
- else:
386
- expression.set("actions", new_actions)
387
- expression.args["col_comments"] = col_comments
388
-
389
- return expression
390
-
391
-
392
- def extract_comment_on_table(expression: exp.Expression) -> exp.Expression:
393
- """Extract table comment, removing it from the Expression.
394
-
395
- duckdb doesn't support comments. So we remove them from the expression and store them in the table_comment arg.
396
- We also replace the transform the expression to NOP if the statement can't be executed by duckdb.
397
-
398
- Args:
399
- expression (exp.Expression): the expression that will be transformed.
400
-
401
- Returns:
402
- exp.Expression: The transformed expression, with any comment stored in the new 'table_comment' arg.
403
- """
404
-
405
- if isinstance(expression, exp.Create) and (table := expression.find(exp.Table)):
406
- comment = None
407
- if props := cast(exp.Properties, expression.args.get("properties")):
408
- other_props = []
409
- for p in props.expressions:
410
- if isinstance(p, exp.SchemaCommentProperty) and (isinstance(p.this, (exp.Literal, exp.Var))):
411
- comment = p.this.this
412
- else:
413
- other_props.append(p)
414
-
415
- new = expression.copy()
416
- new_props: exp.Properties = new.args["properties"]
417
- new_props.set("expressions", other_props)
418
- new.args["table_comment"] = (table, comment)
419
- return new
420
- elif (
421
- isinstance(expression, exp.Comment)
422
- and (cexp := expression.args.get("expression"))
423
- and (table := expression.find(exp.Table))
424
- ):
425
- new = SUCCESS_NOP.copy()
426
- new.args["table_comment"] = (table, cexp.this)
427
- return new
428
- elif (
429
- isinstance(expression, exp.Alter)
430
- and (sexp := expression.find(exp.AlterSet))
431
- and (scp := sexp.find(exp.SchemaCommentProperty))
432
- and isinstance(scp.this, exp.Literal)
433
- and (table := expression.find(exp.Table))
434
- ):
435
- new = SUCCESS_NOP.copy()
436
- new.args["table_comment"] = (table, scp.this.this)
437
- return new
438
-
439
- return expression
440
-
441
-
442
- def extract_text_length(expression: exp.Expression) -> exp.Expression:
443
- """Extract length of text columns.
444
-
445
- duckdb doesn't have fixed-sized text types. So we capture the size of text types and store that in the
446
- character_maximum_length arg.
447
-
448
- Args:
449
- expression (exp.Expression): the expression that will be transformed.
450
-
451
- Returns:
452
- exp.Expression: The original expression, with any text lengths stored in the new 'text_lengths' arg.
453
- """
454
-
455
- if isinstance(expression, (exp.Create, exp.Alter)):
456
- text_lengths = []
457
-
458
- # exp.Select is for a ctas, exp.Schema is a plain definition
459
- if cols := expression.find(exp.Select, exp.Schema):
460
- expressions = cols.expressions
461
- else:
462
- # alter table
463
- expressions = expression.args.get("actions") or []
464
- for e in expressions:
465
- if dts := [
466
- dt for dt in e.find_all(exp.DataType) if dt.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.TEXT)
467
- ]:
468
- col_name = e.alias if isinstance(e, exp.Alias) else e.name
469
- if len(dts) == 1 and (dt_size := dts[0].find(exp.DataTypeParam)):
470
- size = (
471
- isinstance(dt_size.this, exp.Literal)
472
- and isinstance(dt_size.this.this, str)
473
- and int(dt_size.this.this)
474
- )
475
- else:
476
- size = 16777216
477
- text_lengths.append((col_name, size))
478
-
479
- if text_lengths:
480
- expression.args["text_lengths"] = text_lengths
481
-
482
- return expression
483
-
484
-
485
- def flatten(expression: exp.Expression) -> exp.Expression:
486
- """Flatten an array.
487
-
488
- See https://docs.snowflake.com/en/sql-reference/functions/flatten
489
-
490
- TODO: support objects.
491
- """
492
- if (
493
- isinstance(expression, exp.Lateral)
494
- and isinstance(expression.this, exp.Explode)
495
- and (alias := expression.args.get("alias"))
496
- # always true; when no explicit alias provided this will be flattened
497
- and isinstance(alias, exp.TableAlias)
498
- ):
499
- explode_expression = expression.this.this.expression
500
-
501
- value = exp.Cast(
502
- this=explode_expression,
503
- to=exp.DataType(
504
- this=exp.DataType.Type.ARRAY,
505
- expressions=[exp.DataType(this=exp.DataType.Type.JSON, nested=False, prefix=False)],
506
- nested=True,
507
- ),
508
- )
509
-
510
- return exp.Subquery(
511
- this=exp.Select(
512
- expressions=[
513
- exp.Unnest(
514
- expressions=[value],
515
- alias=exp.Identifier(this="VALUE", quoted=False),
516
- ),
517
- exp.Alias(
518
- this=exp.Sub(
519
- this=exp.Anonymous(
520
- this="generate_subscripts", expressions=[value, exp.Literal(this="1", is_string=False)]
521
- ),
522
- expression=exp.Literal(this="1", is_string=False),
523
- ),
524
- alias=exp.Identifier(this="INDEX", quoted=False),
525
- ),
526
- ],
527
- ),
528
- alias=exp.TableAlias(this=alias.this),
529
- )
530
-
531
- return expression
532
-
533
-
534
- def flatten_value_cast_as_varchar(expression: exp.Expression) -> exp.Expression:
535
- """Return raw unquoted string when flatten VALUE is cast to varchar.
536
-
537
- Returns a raw string using the Duckdb ->> operator, aka the json_extract_string function, see
538
- https://duckdb.org/docs/extensions/json#json-extraction-functions
539
- """
540
- if (
541
- isinstance(expression, exp.Cast)
542
- and isinstance(expression.this, exp.Column)
543
- and expression.this.name.upper() == "VALUE"
544
- and expression.to.this in [exp.DataType.Type.VARCHAR, exp.DataType.Type.TEXT]
545
- and (select := expression.find_ancestor(exp.Select))
546
- and select.find(exp.Explode)
547
- ):
548
- return exp.JSONExtractScalar(this=expression.this, expression=exp.JSONPath(expressions=[exp.JSONPathRoot()]))
549
-
550
- return expression
551
-
552
-
553
- def float_to_double(expression: exp.Expression) -> exp.Expression:
554
- """Convert float to double for 64 bit precision.
555
-
556
- Snowflake floats are all 64 bit (ie: double)
557
- see https://docs.snowflake.com/en/sql-reference/data-types-numeric#float-float4-float8
558
- """
559
-
560
- if isinstance(expression, exp.DataType) and expression.this == exp.DataType.Type.FLOAT:
561
- expression.args["this"] = exp.DataType.Type.DOUBLE
562
-
563
- return expression
564
-
565
-
566
- def identifier(expression: exp.Expression) -> exp.Expression:
567
- """Convert identifier function to an identifier.
568
-
569
- See https://docs.snowflake.com/en/sql-reference/identifier-literal
570
- """
571
-
572
- if (
573
- isinstance(expression, exp.Anonymous)
574
- and isinstance(expression.this, str)
575
- and expression.this.upper() == "IDENTIFIER"
576
- ):
577
- expression = exp.Identifier(this=expression.expressions[0].this, quoted=False)
578
-
579
- return expression
580
-
581
-
582
- def indices_to_json_extract(expression: exp.Expression) -> exp.Expression:
583
- """Convert indices on objects and arrays to json_extract or json_extract_string
584
-
585
- Supports Snowflake array indices, see
586
- https://docs.snowflake.com/en/sql-reference/data-types-semistructured#accessing-elements-of-an-array-by-index-or-by-slice
587
- and object indices, see
588
- https://docs.snowflake.com/en/sql-reference/data-types-semistructured#accessing-elements-of-an-object-by-key
589
-
590
- Duckdb uses the -> operator, aka the json_extract function, see
591
- https://duckdb.org/docs/extensions/json#json-extraction-functions
592
-
593
- This works for Snowflake arrays too because we convert them to JSON in duckdb.
594
- """
595
- if (
596
- isinstance(expression, exp.Bracket)
597
- and len(expression.expressions) == 1
598
- and (index := expression.expressions[0])
599
- and isinstance(index, exp.Literal)
600
- and index.this
601
- ):
602
- if isinstance(expression.parent, exp.Cast) and expression.parent.to.this == exp.DataType.Type.VARCHAR:
603
- # If the parent is a cast to varchar, we need to use JSONExtractScalar
604
- # to get the unquoted string value.
605
- klass = exp.JSONExtractScalar
606
- else:
607
- klass = exp.JSONExtract
608
- if index.is_string:
609
- return klass(this=expression.this, expression=exp.Literal(this=f"$.{index.this}", is_string=True))
610
- else:
611
- return klass(this=expression.this, expression=exp.Literal(this=f"$[{index.this}]", is_string=True))
612
-
613
- return expression
614
-
615
-
616
- def information_schema_fs_columns(expression: exp.Expression) -> exp.Expression:
617
- """Redirect to the _FS_COLUMNS view which has metadata that matches snowflake.
618
-
619
- Because duckdb doesn't store character_maximum_length or character_octet_length.
620
- """
621
-
622
- if (
623
- isinstance(expression, exp.Table)
624
- and expression.db
625
- and expression.db.upper() == "INFORMATION_SCHEMA"
626
- and expression.name
627
- and expression.name.upper() == "COLUMNS"
628
- ):
629
- expression.set("this", exp.Identifier(this="_FS_COLUMNS", quoted=False))
630
- expression.set("db", exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False))
631
-
632
- return expression
633
-
634
-
635
- def information_schema_databases(
636
- expression: exp.Expression,
637
- current_schema: str | None = None,
638
- ) -> exp.Expression:
639
- if (
640
- isinstance(expression, exp.Table)
641
- and (
642
- expression.db.upper() == "INFORMATION_SCHEMA"
643
- or (current_schema and current_schema.upper() == "INFORMATION_SCHEMA")
644
- )
645
- and expression.name.upper() == "DATABASES"
646
- ):
647
- return exp.Table(
648
- this=exp.Identifier(this="DATABASES", quoted=False),
649
- db=exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False),
650
- )
651
- return expression
652
-
653
-
654
- def information_schema_fs_tables(
655
- expression: exp.Expression,
656
- ) -> exp.Expression:
657
- """Use _FS_TABLES to access additional metadata columns (eg: comment)."""
658
-
659
- if (
660
- isinstance(expression, exp.Select)
661
- and (tbl := expression.find(exp.Table))
662
- and tbl.db.upper() == "INFORMATION_SCHEMA"
663
- and tbl.name.upper() == "TABLES"
664
- ):
665
- tbl.set("this", exp.Identifier(this="_FS_TABLES", quoted=False))
666
- tbl.set("db", exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False))
667
-
668
- return expression
669
-
670
-
671
- def information_schema_fs_views(expression: exp.Expression) -> exp.Expression:
672
- """Use _FS_VIEWS to return Snowflake's version instead of duckdb's."""
673
-
674
- if (
675
- isinstance(expression, exp.Select)
676
- and (tbl := expression.find(exp.Table))
677
- and tbl.db.upper() == "INFORMATION_SCHEMA"
678
- and tbl.name.upper() == "VIEWS"
679
- ):
680
- tbl.set("this", exp.Identifier(this="_FS_VIEWS", quoted=False))
681
- tbl.set("db", exp.Identifier(this="_FS_INFORMATION_SCHEMA", quoted=False))
682
-
683
- return expression
684
-
685
-
686
- NUMBER_38_0 = [
687
- exp.DataTypeParam(this=exp.Literal(this="38", is_string=False)),
688
- exp.DataTypeParam(this=exp.Literal(this="0", is_string=False)),
689
- ]
690
-
691
-
692
- def integer_precision(expression: exp.Expression) -> exp.Expression:
693
- """Convert integers and number(38,0) to bigint.
694
-
695
- So fetch_all will return int and dataframes will return them with a dtype of int64.
696
- """
697
- if (
698
- isinstance(expression, exp.DataType)
699
- and expression.this == exp.DataType.Type.DECIMAL
700
- and (not expression.expressions or expression.expressions == NUMBER_38_0)
701
- ) or expression.this in (exp.DataType.Type.INT, exp.DataType.Type.SMALLINT, exp.DataType.Type.TINYINT):
702
- return exp.DataType(
703
- this=exp.DataType.Type.BIGINT,
704
- nested=False,
705
- prefix=False,
706
- )
707
-
708
- return expression
709
-
710
-
711
- def json_extract_cased_as_varchar(expression: exp.Expression) -> exp.Expression:
712
- """Convert json to varchar inside JSONExtract.
713
-
714
- Snowflake case conversion (upper/lower) turns variant into varchar. This
715
- mimics that behaviour within get_path.
716
-
717
- TODO: a generic version that works on any variant, not just JSONExtract
718
-
719
- Returns a raw string using the Duckdb ->> operator, aka the json_extract_string function, see
720
- https://duckdb.org/docs/extensions/json#json-extraction-functions
721
- """
722
- if (
723
- isinstance(expression, (exp.Upper, exp.Lower))
724
- and (gp := expression.this)
725
- and isinstance(gp, exp.JSONExtract)
726
- and (path := gp.expression)
727
- and isinstance(path, exp.JSONPath)
728
- ):
729
- expression.set("this", exp.JSONExtractScalar(this=gp.this, expression=path))
730
-
731
- return expression
732
-
733
-
734
- def json_extract_cast_as_varchar(expression: exp.Expression) -> exp.Expression:
735
- """Return raw unquoted string when casting json extraction to varchar.
736
-
737
- Returns a raw string using the Duckdb ->> operator, aka the json_extract_string function, see
738
- https://duckdb.org/docs/extensions/json#json-extraction-functions
739
- """
740
- if (
741
- isinstance(expression, exp.Cast)
742
- and (je := expression.this)
743
- and isinstance(je, exp.JSONExtract)
744
- and (path := je.expression)
745
- and isinstance(path, exp.JSONPath)
746
- ):
747
- je.replace(exp.JSONExtractScalar(this=je.this, expression=path))
748
- return expression
749
-
750
-
751
- def json_extract_precedence(expression: exp.Expression) -> exp.Expression:
752
- """Associate json extract operands to avoid duckdb operators of higher precedence transforming the expression.
753
-
754
- See https://github.com/tekumara/fakesnow/issues/53
755
- """
756
- if isinstance(expression, (exp.JSONExtract, exp.JSONExtractScalar)):
757
- return exp.Paren(this=expression)
758
- return expression
759
-
760
-
761
- def random(expression: exp.Expression) -> exp.Expression:
762
- """Convert random() and random(seed).
763
-
764
- Snowflake random() is an signed 64 bit integer.
765
- Duckdb random() is a double between 0 and 1 and uses setseed() to set the seed.
766
- """
767
- if isinstance(expression, exp.Select) and (rand := expression.find(exp.Rand)):
768
- # shift result to between min and max signed 64bit integer
769
- new_rand = exp.Cast(
770
- this=exp.Paren(
771
- this=exp.Mul(
772
- this=exp.Paren(this=exp.Sub(this=exp.Rand(), expression=exp.Literal(this="0.5", is_string=False))),
773
- expression=exp.Literal(this="9223372036854775807", is_string=False),
774
- )
775
- ),
776
- to=exp.DataType(this=exp.DataType.Type.BIGINT, nested=False, prefix=False),
777
- )
778
-
779
- rand.replace(new_rand)
780
-
781
- # convert seed to double between 0 and 1 by dividing by max INTEGER (int32)
782
- # (not max BIGINT (int64) because we don't have enough floating point precision to distinguish seeds)
783
- # then attach to SELECT as the seed arg
784
- # (we can't attach it to exp.Rand because it will be rendered in the sql)
785
- if rand.this and isinstance(rand.this, exp.Literal):
786
- expression.args["seed"] = f"{rand.this}/2147483647-0.5"
787
-
788
- return expression
789
-
790
-
791
- def sample(expression: exp.Expression) -> exp.Expression:
792
- if isinstance(expression, exp.TableSample) and not expression.args.get("method"):
793
- # set snowflake default (bernoulli) rather than use the duckdb default (system)
794
- # because bernoulli works better at small row sizes like we have in tests
795
- expression.set("method", exp.Var(this="BERNOULLI"))
796
-
797
- return expression
798
-
799
-
800
- def object_construct(expression: exp.Expression) -> exp.Expression:
801
- """Convert OBJECT_CONSTRUCT to TO_JSON.
802
-
803
- Internally snowflake stores OBJECT types as a json string, so the Duckdb JSON type most closely matches.
804
-
805
- See https://docs.snowflake.com/en/sql-reference/functions/object_construct
806
- """
807
-
808
- if not isinstance(expression, exp.Struct):
809
- return expression
810
-
811
- non_null_expressions = []
812
- for e in expression.expressions:
813
- if not (isinstance(e, exp.PropertyEQ)):
814
- non_null_expressions.append(e)
815
- continue
816
-
817
- left = e.left
818
- right = e.right
819
-
820
- left_is_null = isinstance(left, exp.Null)
821
- right_is_null = isinstance(right, exp.Null)
822
-
823
- if left_is_null or right_is_null:
824
- continue
825
-
826
- non_null_expressions.append(e)
827
-
828
- new_struct = expression.copy()
829
- new_struct.set("expressions", non_null_expressions)
830
- return exp.Anonymous(this="TO_JSON", expressions=[new_struct])
831
-
832
-
833
- def regex_replace(expression: exp.Expression) -> exp.Expression:
834
- """Transform regex_replace expressions from snowflake to duckdb."""
835
-
836
- if isinstance(expression, exp.RegexpReplace) and isinstance(expression.expression, exp.Literal):
837
- if len(expression.args) > 3:
838
- # see https://docs.snowflake.com/en/sql-reference/functions/regexp_replace
839
- raise NotImplementedError(
840
- "REGEXP_REPLACE with additional parameters (eg: <position>, <occurrence>, <parameters>)"
841
- )
842
-
843
- # pattern: snowflake requires escaping backslashes in single-quoted string constants, but duckdb doesn't
844
- # see https://docs.snowflake.com/en/sql-reference/functions-regexp#label-regexp-escape-character-caveats
845
- expression.args["expression"] = exp.Literal(
846
- this=expression.expression.this.replace("\\\\", "\\"), is_string=True
847
- )
848
-
849
- if not expression.args.get("replacement"):
850
- # if no replacement string, the snowflake default is ''
851
- expression.args["replacement"] = exp.Literal(this="", is_string=True)
852
-
853
- # snowflake regex replacements are global
854
- expression.args["modifiers"] = exp.Literal(this="g", is_string=True)
855
-
856
- return expression
857
-
858
-
859
- def regex_substr(expression: exp.Expression) -> exp.Expression:
860
- """Transform regex_substr expressions from snowflake to duckdb.
861
-
862
- See https://docs.snowflake.com/en/sql-reference/functions/regexp_substr
863
- """
864
-
865
- if isinstance(expression, exp.RegexpExtract):
866
- subject = expression.this
867
-
868
- # pattern: snowflake requires escaping backslashes in single-quoted string constants, but duckdb doesn't
869
- # see https://docs.snowflake.com/en/sql-reference/functions-regexp#label-regexp-escape-character-caveats
870
- pattern = expression.expression
871
- pattern.args["this"] = pattern.this.replace("\\\\", "\\")
872
-
873
- # number of characters from the beginning of the string where the function starts searching for matches
874
- position = expression.args["position"] or exp.Literal(this="1", is_string=False)
875
-
876
- # which occurrence of the pattern to match
877
- occurrence = expression.args["occurrence"]
878
- occurrence = int(occurrence.this) if occurrence else 1
879
-
880
- # the duckdb dialect increments bracket (ie: index) expressions by 1 because duckdb is 1-indexed,
881
- # so we need to compensate by subtracting 1
882
- occurrence = exp.Literal(this=str(occurrence - 1), is_string=False)
883
-
884
- if parameters := expression.args["parameters"]:
885
- # 'e' parameter doesn't make sense for duckdb
886
- regex_parameters = exp.Literal(this=parameters.this.replace("e", ""), is_string=True)
887
- else:
888
- regex_parameters = exp.Literal(is_string=True)
889
-
890
- group_num = expression.args["group"]
891
- if not group_num:
892
- if isinstance(regex_parameters.this, str) and "e" in regex_parameters.this:
893
- group_num = exp.Literal(this="1", is_string=False)
894
- else:
895
- group_num = exp.Literal(this="0", is_string=False)
896
-
897
- expression = exp.Bracket(
898
- this=exp.Anonymous(
899
- this="regexp_extract_all",
900
- expressions=[
901
- # slice subject from position onwards
902
- exp.Bracket(this=subject, expressions=[exp.Slice(this=position)]),
903
- pattern,
904
- group_num,
905
- regex_parameters,
906
- ],
907
- ),
908
- # select index of occurrence
909
- expressions=[occurrence],
910
- )
911
-
912
- return expression
913
-
914
-
915
- # TODO: move this into a Dialect as a transpilation
916
- def set_schema(expression: exp.Expression, current_database: str | None) -> exp.Expression:
917
- """Transform USE SCHEMA/DATABASE to SET schema.
918
-
919
- Example:
920
- >>> import sqlglot
921
- >>> sqlglot.parse_one("USE SCHEMA bar").transform(set_schema, current_database="foo").sql()
922
- "SET schema = 'foo.bar'"
923
- >>> sqlglot.parse_one("USE SCHEMA foo.bar").transform(set_schema).sql()
924
- "SET schema = 'foo.bar'"
925
- >>> sqlglot.parse_one("USE DATABASE marts").transform(set_schema).sql()
926
- "SET schema = 'marts.main'"
927
-
928
- See tests for more examples.
929
- Args:
930
- expression (exp.Expression): the expression that will be transformed.
931
-
932
- Returns:
933
- exp.Expression: A SET schema expression if the input is a USE
934
- expression, otherwise expression is returned as-is.
935
- """
936
-
937
- if (
938
- isinstance(expression, exp.Use)
939
- and (kind := expression.args.get("kind"))
940
- and isinstance(kind, exp.Var)
941
- and kind.name
942
- and kind.name.upper() in ["SCHEMA", "DATABASE"]
943
- ):
944
- assert expression.this, f"No identifier for USE expression {expression}"
945
-
946
- if kind.name.upper() == "DATABASE":
947
- # duckdb's default schema is main
948
- database = expression.this.name
949
- return exp.Command(
950
- this="SET", expression=exp.Literal.string(f"schema = '{database}.main'"), set_database=database
951
- )
952
- else:
953
- # SCHEMA
954
- if db := expression.this.args.get("db"): # noqa: SIM108
955
- db_name = db.name
956
- else:
957
- # isn't qualified with a database
958
- db_name = current_database
959
-
960
- # assertion always true because check_db_schema is called before this
961
- assert db_name
962
-
963
- schema = expression.this.name
964
- return exp.Command(
965
- this="SET", expression=exp.Literal.string(f"schema = '{db_name}.{schema}'"), set_schema=schema
966
- )
967
-
968
- return expression
969
-
970
-
971
- def split(expression: exp.Expression) -> exp.Expression:
972
- """
973
- Convert output of duckdb str_split from varchar[] to JSON array to match Snowflake.
974
- """
975
- if isinstance(expression, exp.Split):
976
- return exp.Anonymous(this="to_json", expressions=[expression])
977
-
978
- return expression
979
-
980
-
981
- def tag(expression: exp.Expression) -> exp.Expression:
982
- """Handle tags. Transfer tags into upserts of the tag table.
983
-
984
- duckdb doesn't support tags. In lieu of a full implementation, for now we make it a NOP.
985
-
986
- Example:
987
- >>> import sqlglot
988
- >>> sqlglot.parse_one("ALTER TABLE table1 SET TAG foo='bar'").transform(tag).sql()
989
- "SELECT 'Statement executed successfully.'"
990
- Args:
991
- expression (exp.Expression): the expression that will be transformed.
992
-
993
- Returns:
994
- exp.Expression: The transformed expression.
995
- """
996
-
997
- if isinstance(expression, exp.Alter) and (actions := expression.args.get("actions")):
998
- for a in actions:
999
- if isinstance(a, exp.AlterSet) and a.args.get("tag"):
1000
- return SUCCESS_NOP
1001
- elif (
1002
- isinstance(expression, exp.Command)
1003
- and (cexp := expression.args.get("expression"))
1004
- and isinstance(cexp, str)
1005
- and "SET TAG" in cexp.upper()
1006
- ):
1007
- # alter table modify column set tag
1008
- return SUCCESS_NOP
1009
- elif (
1010
- isinstance(expression, exp.Create)
1011
- and (kind := expression.args.get("kind"))
1012
- and isinstance(kind, str)
1013
- and kind.upper() == "TAG"
1014
- ):
1015
- return SUCCESS_NOP
1016
-
1017
- return expression
1018
-
1019
-
1020
- def to_date(expression: exp.Expression) -> exp.Expression:
1021
- """Convert to_date() to a cast.
1022
-
1023
- See https://docs.snowflake.com/en/sql-reference/functions/to_date
1024
-
1025
- Example:
1026
- >>> import sqlglot
1027
- >>> sqlglot.parse_one("SELECT to_date(to_timestamp(0))").transform(to_date).sql()
1028
- "SELECT CAST(DATE_TRUNC('day', TO_TIMESTAMP(0)) AS DATE)"
1029
- Args:
1030
- expression (exp.Expression): the expression that will be transformed.
1031
-
1032
- Returns:
1033
- exp.Expression: The transformed expression.
1034
- """
1035
-
1036
- if (
1037
- isinstance(expression, exp.Anonymous)
1038
- and isinstance(expression.this, str)
1039
- and expression.this.upper() == "TO_DATE"
1040
- ):
1041
- return exp.Cast(
1042
- this=expression.expressions[0],
1043
- to=exp.DataType(this=exp.DataType.Type.DATE, nested=False, prefix=False),
1044
- )
1045
- return expression
1046
-
1047
-
1048
- def _get_to_number_args(e: exp.ToNumber) -> tuple[exp.Expression | None, exp.Expression | None, exp.Expression | None]:
1049
- arg_format = e.args.get("format")
1050
- arg_precision = e.args.get("precision")
1051
- arg_scale = e.args.get("scale")
1052
-
1053
- _format = None
1054
- _precision = None
1055
- _scale = None
1056
-
1057
- # to_number(value, <format>, <precision>, <scale>)
1058
- if arg_format:
1059
- if arg_format.is_string:
1060
- # to_number('100', 'TM9' ...)
1061
- _format = arg_format
1062
-
1063
- # to_number('100', 'TM9', 10 ...)
1064
- if arg_precision:
1065
- _precision = arg_precision
1066
-
1067
- # to_number('100', 'TM9', 10, 2)
1068
- if arg_scale:
1069
- _scale = arg_scale
1070
- else:
1071
- # to_number('100', 10, ...)
1072
- # arg_format is not a string, so it must be precision.
1073
- _precision = arg_format
1074
-
1075
- # to_number('100', 10, 2)
1076
- # And arg_precision must be scale
1077
- if arg_precision:
1078
- _scale = arg_precision
1079
- elif arg_precision:
1080
- _precision = arg_precision
1081
- if arg_scale:
1082
- _scale = arg_scale
1083
-
1084
- return _format, _precision, _scale
1085
-
1086
-
1087
- def _to_decimal(expression: exp.Expression, cast_node: type[exp.Cast]) -> exp.Expression:
1088
- expressions: list[exp.Expression] = expression.expressions
1089
-
1090
- if len(expressions) > 1 and expressions[1].is_string:
1091
- # see https://docs.snowflake.com/en/sql-reference/functions/to_decimal#arguments
1092
- raise NotImplementedError(f"{expression.this} with format argument")
1093
-
1094
- precision = expressions[1] if len(expressions) > 1 else exp.Literal(this="38", is_string=False)
1095
- scale = expressions[2] if len(expressions) > 2 else exp.Literal(this="0", is_string=False)
1096
-
1097
- return cast_node(
1098
- this=expressions[0],
1099
- to=exp.DataType(this=exp.DataType.Type.DECIMAL, expressions=[precision, scale], nested=False, prefix=False),
1100
- )
1101
-
1102
-
1103
- def to_decimal(expression: exp.Expression) -> exp.Expression:
1104
- """Transform to_decimal, to_number, to_numeric expressions from snowflake to duckdb.
1105
-
1106
- See https://docs.snowflake.com/en/sql-reference/functions/to_decimal
1107
- """
1108
-
1109
- if isinstance(expression, exp.ToNumber):
1110
- format_, precision, scale = _get_to_number_args(expression)
1111
- if format_:
1112
- raise NotImplementedError(f"{expression.this} with format argument")
1113
-
1114
- if not precision:
1115
- precision = exp.Literal(this="38", is_string=False)
1116
- if not scale:
1117
- scale = exp.Literal(this="0", is_string=False)
1118
-
1119
- return exp.Cast(
1120
- this=expression.this,
1121
- to=exp.DataType(this=exp.DataType.Type.DECIMAL, expressions=[precision, scale], nested=False, prefix=False),
1122
- )
1123
-
1124
- if (
1125
- isinstance(expression, exp.Anonymous)
1126
- and isinstance(expression.this, str)
1127
- and expression.this.upper() in ["TO_DECIMAL", "TO_NUMERIC"]
1128
- ):
1129
- return _to_decimal(expression, exp.Cast)
1130
-
1131
- return expression
1132
-
1133
-
1134
- def try_to_decimal(expression: exp.Expression) -> exp.Expression:
1135
- """Transform try_to_decimal, try_to_number, try_to_numeric expressions from snowflake to duckdb.
1136
- See https://docs.snowflake.com/en/sql-reference/functions/try_to_decimal
1137
- """
1138
-
1139
- if (
1140
- isinstance(expression, exp.Anonymous)
1141
- and isinstance(expression.this, str)
1142
- and expression.this.upper() in ["TRY_TO_DECIMAL", "TRY_TO_NUMBER", "TRY_TO_NUMERIC"]
1143
- ):
1144
- return _to_decimal(expression, exp.TryCast)
1145
-
1146
- return expression
1147
-
1148
-
1149
- def to_timestamp(expression: exp.Expression) -> exp.Expression:
1150
- """Convert to_timestamp(seconds) to timestamp without timezone (ie: TIMESTAMP_NTZ).
1151
-
1152
- See https://docs.snowflake.com/en/sql-reference/functions/to_timestamp
1153
- """
1154
-
1155
- if isinstance(expression, exp.UnixToTime):
1156
- return exp.Cast(
1157
- this=expression,
1158
- to=exp.DataType(this=exp.DataType.Type.TIMESTAMP, nested=False, prefix=False),
1159
- )
1160
- return expression
1161
-
1162
-
1163
- def to_timestamp_ntz(expression: exp.Expression) -> exp.Expression:
1164
- """Convert to_timestamp_ntz to to_timestamp (StrToTime).
1165
-
1166
- Because it's not yet supported by sqlglot, see https://github.com/tobymao/sqlglot/issues/2748
1167
- """
1168
-
1169
- if isinstance(expression, exp.Anonymous) and (
1170
- isinstance(expression.this, str) and expression.this.upper() == "TO_TIMESTAMP_NTZ"
1171
- ):
1172
- return exp.StrToTime(
1173
- this=expression.expressions[0],
1174
- format=exp.Literal(this="%Y-%m-%d %H:%M:%S", is_string=True),
1175
- )
1176
- return expression
1177
-
1178
-
1179
- def timestamp_ntz(expression: exp.Expression) -> exp.Expression:
1180
- """Convert timestamp_ntz (snowflake) to timestamp (duckdb).
1181
-
1182
- NB: timestamp_ntz defaults to nanosecond precision (ie: NTZ(9)). The duckdb equivalent is TIMESTAMP_NS.
1183
- However we use TIMESTAMP (ie: microsecond precision) here rather than TIMESTAMP_NS to avoid
1184
- https://github.com/duckdb/duckdb/issues/7980 in test_write_pandas_timestamp_ntz.
1185
- """
1186
-
1187
- if isinstance(expression, exp.DataType) and expression.this == exp.DataType.Type.TIMESTAMPNTZ:
1188
- return exp.DataType(this=exp.DataType.Type.TIMESTAMP)
1189
-
1190
- return expression
1191
-
1192
-
1193
- def trim_cast_varchar(expression: exp.Expression) -> exp.Expression:
1194
- """Snowflake's TRIM casts input to VARCHAR implicitly."""
1195
-
1196
- if not (isinstance(expression, exp.Trim)):
1197
- return expression
1198
-
1199
- operand = expression.this
1200
- if isinstance(operand, exp.Cast) and operand.to.this in [exp.DataType.Type.VARCHAR, exp.DataType.Type.TEXT]:
1201
- return expression
1202
-
1203
- return exp.Trim(
1204
- this=exp.Cast(this=operand, to=exp.DataType(this=exp.DataType.Type.VARCHAR, nested=False, prefix=False))
1205
- )
1206
-
1207
-
1208
- def try_parse_json(expression: exp.Expression) -> exp.Expression:
1209
- """Convert TRY_PARSE_JSON() to TRY_CAST(... as JSON).
1210
-
1211
- Example:
1212
- >>> import sqlglot
1213
- >>> sqlglot.parse_one("select try_parse_json('{}')").transform(parse_json).sql()
1214
- "SELECT TRY_CAST('{}' AS JSON)"
1215
- Args:
1216
- expression (exp.Expression): the expression that will be transformed.
1217
-
1218
- Returns:
1219
- exp.Expression: The transformed expression.
1220
- """
1221
-
1222
- if (
1223
- isinstance(expression, exp.Anonymous)
1224
- and isinstance(expression.this, str)
1225
- and expression.this.upper() == "TRY_PARSE_JSON"
1226
- ):
1227
- expressions = expression.expressions
1228
- return exp.TryCast(
1229
- this=expressions[0],
1230
- to=exp.DataType(this=exp.DataType.Type.JSON, nested=False),
1231
- )
1232
-
1233
- return expression
1234
-
1235
-
1236
- def semi_structured_types(expression: exp.Expression) -> exp.Expression:
1237
- """Convert OBJECT, ARRAY, and VARIANT types to duckdb compatible types.
1238
-
1239
- Example:
1240
- >>> import sqlglot
1241
- >>> sqlglot.parse_one("CREATE TABLE table1 (name object)").transform(semi_structured_types).sql()
1242
- "CREATE TABLE table1 (name JSON)"
1243
- Args:
1244
- expression (exp.Expression): the expression that will be transformed.
1245
-
1246
- Returns:
1247
- exp.Expression: The transformed expression.
1248
- """
1249
-
1250
- if isinstance(expression, exp.DataType) and expression.this in [
1251
- exp.DataType.Type.ARRAY,
1252
- exp.DataType.Type.OBJECT,
1253
- exp.DataType.Type.VARIANT,
1254
- ]:
1255
- new = expression.copy()
1256
- new.args["this"] = exp.DataType.Type.JSON
1257
- return new
1258
-
1259
- return expression
1260
-
1261
-
1262
- def upper_case_unquoted_identifiers(expression: exp.Expression) -> exp.Expression:
1263
- """Upper case unquoted identifiers.
1264
-
1265
- Snowflake represents case-insensitivity using upper-case identifiers in cursor results.
1266
- duckdb uses lowercase. We convert all unquoted identifiers to uppercase to match snowflake.
1267
-
1268
- Example:
1269
- >>> import sqlglot
1270
- >>> sqlglot.parse_one("select name, name as fname from table1").transform(upper_case_unquoted_identifiers).sql()
1271
- 'SELECT NAME, NAME AS FNAME FROM TABLE1'
1272
- Args:
1273
- expression (exp.Expression): the expression that will be transformed.
1274
-
1275
- Returns:
1276
- exp.Expression: The transformed expression.
1277
- """
1278
-
1279
- if isinstance(expression, exp.Identifier) and not expression.quoted and isinstance(expression.this, str):
1280
- new = expression.copy()
1281
- new.set("this", expression.this.upper())
1282
- return new
1283
-
1284
- return expression
1285
-
1286
-
1287
- def values_columns(expression: exp.Expression) -> exp.Expression:
1288
- """Support column1, column2 expressions in VALUES.
1289
-
1290
- Snowflake uses column1, column2 .. for unnamed columns in VALUES. Whereas duckdb uses col0, col1 ..
1291
- See https://docs.snowflake.com/en/sql-reference/constructs/values#examples
1292
- """
1293
-
1294
- if (
1295
- isinstance(expression, exp.Values)
1296
- and not expression.alias
1297
- and expression.find_ancestor(exp.Select)
1298
- and (values := expression.find(exp.Tuple))
1299
- ):
1300
- num_columns = len(values.expressions)
1301
- columns = [exp.Identifier(this=f"COLUMN{i + 1}", quoted=True) for i in range(num_columns)]
1302
- expression.set("alias", exp.TableAlias(this=exp.Identifier(this="_", quoted=False), columns=columns))
1303
-
1304
- return expression
1305
-
1306
-
1307
- def create_user(expression: exp.Expression) -> exp.Expression:
1308
- """Transform CREATE USER to a query against the global database's information_schema._fs_users table.
1309
-
1310
- https://docs.snowflake.com/en/sql-reference/sql/create-user
1311
- """
1312
- # XXX: this is a placeholder. We need to implement the full CREATE USER syntax, but
1313
- # sqlglot doesnt yet support Create for snowflake.
1314
- if isinstance(expression, exp.Command) and expression.this == "CREATE":
1315
- sub_exp = expression.expression.strip()
1316
- if sub_exp.upper().startswith("USER"):
1317
- _, name, *ignored = sub_exp.split(" ")
1318
- if ignored:
1319
- raise NotImplementedError(f"`CREATE USER` with {ignored}")
1320
- return sqlglot.parse_one(
1321
- f"INSERT INTO _fs_global._fs_information_schema._fs_users (name) VALUES ('{name}')", read="duckdb"
1322
- )
1323
-
1324
- return expression
1325
-
1326
-
1327
- def update_variables(
1328
- expression: exp.Expression,
1329
- variables: Variables,
1330
- ) -> exp.Expression:
1331
- if Variables.is_variable_modifier(expression):
1332
- variables.update_variables(expression)
1333
- return SUCCESS_NOP # Nothing further to do if its a SET/UNSET operation.
1334
- return expression
1335
-
1336
-
1337
- class SHA256(exp.Func):
1338
- _sql_names: ClassVar = ["SHA256"]
1339
- arg_types: ClassVar = {"this": True}
1340
-
1341
-
1342
- def sha256(expression: exp.Expression) -> exp.Expression:
1343
- """Convert sha2() or sha2_hex() to sha256().
1344
-
1345
- Convert sha2_binary() to unhex(sha256()).
1346
-
1347
- Example:
1348
- >>> import sqlglot
1349
- >>> sqlglot.parse_one("insert into table1 (name) select sha2('foo')").transform(sha256).sql()
1350
- "INSERT INTO table1 (name) SELECT SHA256('foo')"
1351
- Args:
1352
- expression (exp.Expression): the expression that will be transformed.
1353
-
1354
- Returns:
1355
- exp.Expression: The transformed expression.
1356
- """
1357
-
1358
- if isinstance(expression, exp.SHA2) and expression.args.get("length", exp.Literal.number(256)).this == "256":
1359
- return SHA256(this=expression.this)
1360
- elif (
1361
- isinstance(expression, exp.Anonymous)
1362
- and expression.this.upper() == "SHA2_HEX"
1363
- and (
1364
- len(expression.expressions) == 1
1365
- or (len(expression.expressions) == 2 and expression.expressions[1].this == "256")
1366
- )
1367
- ):
1368
- return SHA256(this=expression.expressions[0])
1369
- elif (
1370
- isinstance(expression, exp.Anonymous)
1371
- and expression.this.upper() == "SHA2_BINARY"
1372
- and (
1373
- len(expression.expressions) == 1
1374
- or (len(expression.expressions) == 2 and expression.expressions[1].this == "256")
1375
- )
1376
- ):
1377
- return exp.Unhex(this=SHA256(this=expression.expressions[0]))
1378
-
1379
- return expression