fakesnow 0.8.2__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fakesnow/__init__.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import contextlib
4
4
  import importlib
5
+ import os
5
6
  import sys
6
7
  import unittest.mock as mock
7
8
  from collections.abc import Iterator, Sequence
@@ -19,6 +20,7 @@ def patch(
19
20
  extra_targets: str | Sequence[str] = [],
20
21
  create_database_on_connect: bool = True,
21
22
  create_schema_on_connect: bool = True,
23
+ db_path: str | os.PathLike | None = None,
22
24
  ) -> Iterator[None]:
23
25
  """Patch snowflake targets with fakes.
24
26
 
@@ -28,12 +30,15 @@ def patch(
28
30
 
29
31
  Args:
30
32
  extra_targets (str | Sequence[str], optional): Extra targets to patch. Defaults to [].
31
- create_database_on_connect (bool, optional): Create database if provided in connection. Defaults to True.
32
- create_schema_on_connect (bool, optional): Create schema if provided in connection. Defaults to True.
33
33
 
34
34
  Allows extra targets beyond the standard snowflake.connector targets to be patched. Needed because we cannot
35
35
  patch definitions, only usages, see https://docs.python.org/3/library/unittest.mock.html#where-to-patch
36
36
 
37
+ create_database_on_connect (bool, optional): Create database if provided in connection. Defaults to True.
38
+ create_schema_on_connect (bool, optional): Create schema if provided in connection. Defaults to True.
39
+ db_path (str | os.PathLike | None, optional): _description_. Use existing database files from this path
40
+ or create them here if they don't already exist. If None databases are in-memory. Defaults to None.
41
+
37
42
  Yields:
38
43
  Iterator[None]: None.
39
44
  """
@@ -51,6 +56,7 @@ def patch(
51
56
  duck_conn.cursor(),
52
57
  create_database=create_database_on_connect,
53
58
  create_schema=create_schema_on_connect,
59
+ db_path=db_path,
54
60
  **kwargs,
55
61
  ),
56
62
  snowflake.connector.pandas_tools.write_pandas: fakes.write_pandas,
fakesnow/checks.py CHANGED
@@ -37,10 +37,10 @@ def is_unqualified_table_expression(expression: exp.Expression) -> tuple[bool, b
37
37
  no_schema = False
38
38
  elif parent_kind.upper() == "SCHEMA":
39
39
  # "CREATE/DROP SCHEMA"
40
- no_database = not node.args.get("db")
40
+ no_database = not node.args.get("catalog")
41
41
  no_schema = False
42
- elif parent_kind.upper() == "TABLE":
43
- # "DROP TABLE"
42
+ elif parent_kind.upper() in {"TABLE", "VIEW"}:
43
+ # "CREATE/DROP TABLE/VIEW"
44
44
  no_database = not node.args.get("catalog")
45
45
  no_schema = not node.args.get("db")
46
46
  else:
fakesnow/cli.py CHANGED
@@ -1,28 +1,70 @@
1
+ import argparse
1
2
  import runpy
2
3
  import sys
3
4
  from collections.abc import Sequence
4
5
 
5
6
  import fakesnow
6
7
 
7
- USAGE = "Usage: fakesnow <path> | -m <module> [<arg>]..."
8
8
 
9
+ def arg_parser() -> argparse.ArgumentParser:
10
+ parser = argparse.ArgumentParser(
11
+ description="""eg: fakesnow script.py OR fakesnow -m pytest""",
12
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
13
+ )
14
+ parser.add_argument(
15
+ "-d",
16
+ "--db_path",
17
+ help="databases path. Use existing database files from this path or create them here if they don't already "
18
+ "exist. If None databases are in-memory.",
19
+ )
20
+ parser.add_argument("-m", "--module", help="target module")
21
+ parser.add_argument("path", type=str, nargs="?", help="target path")
22
+ parser.add_argument("targs", nargs="*", help="target args")
23
+ return parser
9
24
 
10
- def main(args: Sequence[str] = sys.argv) -> int:
11
- if len(args) < 2 or (len(args) == 2 and args[1] == "-m"):
12
- print(USAGE, file=sys.stderr)
13
- return 42
14
25
 
15
- with fakesnow.patch():
16
- if args[1] == "-m":
17
- module = args[2]
18
- sys.argv = args[2:]
26
+ def split(args: Sequence[str]) -> tuple[Sequence[str], Sequence[str]]:
27
+ # split the arguments into two lists either:
28
+ # 1) after the first -m flag, or
29
+ # 2) after the first positional arg
30
+ in_flag = False
31
+ i = 0
32
+ for i in range(len(args)):
33
+ a = args[i]
34
+ if a in ["-m", "--module"]:
35
+ i = min(i + 1, len(args) - 1)
36
+ break
37
+ elif a.startswith("-"):
38
+ in_flag = True
39
+ elif not in_flag:
40
+ break
41
+ else:
42
+ in_flag = False
43
+
44
+ return args[: i + 1], args[i + 1 :]
45
+
46
+
47
+ def main(args: Sequence[str] = sys.argv[1:]) -> int:
48
+ parser = arg_parser()
49
+ # split args so the fakesnow cli doesn't consume from the target's args (eg: -m and -d)
50
+ fsargs, targs = split(args)
51
+ pargs = parser.parse_args(fsargs)
52
+
53
+ with fakesnow.patch(db_path=pargs.db_path):
54
+ if module := pargs.module:
55
+ # NB: pargs.path and pargs.args are consumed by targs
56
+ sys.argv = [module, *targs]
19
57
 
20
58
  # add current directory to path to mimic python -m
21
59
  sys.path.insert(0, "")
22
60
  runpy.run_module(module, run_name="__main__", alter_sys=True)
23
- else:
24
- path = args[1]
25
- sys.argv = args[1:]
61
+ elif path := pargs.path:
62
+ # NB: pargs.args is consumed by targs
63
+ sys.argv = [path, *targs]
64
+
26
65
  runpy.run_path(path, run_name="__main__")
66
+ else:
67
+ parser.print_usage()
68
+ return 42
27
69
 
28
70
  return 0
fakesnow/fakes.py CHANGED
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import json
3
4
  import os
4
5
  import re
5
6
  import sys
6
7
  from collections.abc import Iterable, Iterator, Sequence
8
+ from pathlib import Path
7
9
  from string import Template
8
10
  from types import TracebackType
9
11
  from typing import TYPE_CHECKING, Any, Literal, Optional, cast
@@ -26,15 +28,18 @@ from typing_extensions import Self
26
28
  import fakesnow.checks as checks
27
29
  import fakesnow.expr as expr
28
30
  import fakesnow.info_schema as info_schema
31
+ import fakesnow.macros as macros
29
32
  import fakesnow.transforms as transforms
30
33
 
31
34
  SCHEMA_UNSET = "schema_unset"
32
- SUCCESS_SQL = "SELECT 'Statement executed successfully.' as status"
33
- DATABASE_CREATED_SQL = Template("SELECT 'Database ${name} successfully created.' as status")
34
- TABLE_CREATED_SQL = Template("SELECT 'Table ${name} successfully created.' as status")
35
- DROPPED_SQL = Template("SELECT '${name} successfully dropped.' as status")
36
- SCHEMA_CREATED_SQL = Template("SELECT 'Schema ${name} successfully created.' as status")
37
- INSERTED_SQL = Template("SELECT ${count} as 'number of rows inserted'")
35
+ SQL_SUCCESS = "SELECT 'Statement executed successfully.' as 'status'"
36
+ SQL_CREATED_DATABASE = Template("SELECT 'Database ${name} successfully created.' as 'status'")
37
+ SQL_CREATED_SCHEMA = Template("SELECT 'Schema ${name} successfully created.' as 'status'")
38
+ SQL_CREATED_TABLE = Template("SELECT 'Table ${name} successfully created.' as 'status'")
39
+ SQL_DROPPED = Template("SELECT '${name} successfully dropped.' as 'status'")
40
+ SQL_INSERTED_ROWS = Template("SELECT ${count} as 'number of rows inserted'")
41
+ SQL_UPDATED_ROWS = Template("SELECT ${count} as 'number of rows updated', 0 as 'number of multi-joined rows updated'")
42
+ SQL_DELETED_ROWS = Template("SELECT ${count} as 'number of rows deleted'")
38
43
 
39
44
 
40
45
  class FakeSnowflakeCursor:
@@ -59,6 +64,9 @@ class FakeSnowflakeCursor:
59
64
  self._last_params = None
60
65
  self._sqlstate = None
61
66
  self._arraysize = 1
67
+ self._arrow_table = None
68
+ self._arrow_table_fetch_index = None
69
+ self._rowcount = None
62
70
  self._converter = snowflake.connector.converter.SnowflakeConverter()
63
71
 
64
72
  def __enter__(self) -> Self:
@@ -69,8 +77,8 @@ class FakeSnowflakeCursor:
69
77
  exc_type: type[BaseException] | None,
70
78
  exc_value: BaseException | None,
71
79
  traceback: TracebackType | None,
72
- ) -> bool:
73
- return False
80
+ ) -> None:
81
+ pass
74
82
 
75
83
  @property
76
84
  def arraysize(self) -> int:
@@ -96,22 +104,16 @@ class FakeSnowflakeCursor:
96
104
 
97
105
  describe = f"DESCRIBE {command}"
98
106
  self.execute(describe, *args, **kwargs)
99
- return FakeSnowflakeCursor._describe_as_result_metadata(self._duck_conn.fetchall())
107
+ return FakeSnowflakeCursor._describe_as_result_metadata(self.fetchall())
100
108
 
101
109
  @property
102
110
  def description(self) -> list[ResultMetadata]:
103
- # use a cursor to avoid destroying an unfetched result on the main connection
104
- with self._duck_conn.cursor() as cur:
105
- # TODO: allow sql alchemy connection with no database or schema
106
- assert self._conn.database, ".description not implemented when database is None"
107
- assert self._conn.schema, ".description not implemented when schema is None"
108
-
109
- # match database and schema used on the main connection
110
- cur.execute(f"SET SCHEMA = '{self._conn.database}.{self._conn.schema}'")
111
+ # use a separate cursor to avoid consuming the result set on this cursor
112
+ with self._conn.cursor() as cur:
111
113
  cur.execute(f"DESCRIBE {self._last_sql}", self._last_params)
112
114
  meta = FakeSnowflakeCursor._describe_as_result_metadata(cur.fetchall())
113
115
 
114
- return meta # type: ignore see https://github.com/duckdb/duckdb/issues/7816
116
+ return meta
115
117
 
116
118
  def execute(
117
119
  self,
@@ -135,6 +137,8 @@ class FakeSnowflakeCursor:
135
137
  **kwargs: Any,
136
138
  ) -> FakeSnowflakeCursor:
137
139
  self._arrow_table = None
140
+ self._arrow_table_fetch_index = None
141
+ self._rowcount = None
138
142
 
139
143
  command, params = self._rewrite_with_params(command, params)
140
144
  expression = parse_one(command, read="snowflake")
@@ -159,10 +163,11 @@ class FakeSnowflakeCursor:
159
163
  transformed = (
160
164
  expression.transform(transforms.upper_case_unquoted_identifiers)
161
165
  .transform(transforms.set_schema, current_database=self._conn.database)
162
- .transform(transforms.create_database)
163
- .transform(transforms.extract_comment)
164
- .transform(transforms.information_schema_columns_snowflake)
165
- .transform(transforms.information_schema_tables_ext)
166
+ .transform(transforms.create_database, db_path=self._conn.db_path)
167
+ .transform(transforms.extract_comment_on_table)
168
+ .transform(transforms.extract_comment_on_columns)
169
+ .transform(transforms.information_schema_fs_columns_snowflake)
170
+ .transform(transforms.information_schema_fs_tables_ext)
166
171
  .transform(transforms.drop_schema_cascade)
167
172
  .transform(transforms.tag)
168
173
  .transform(transforms.semi_structured_types)
@@ -188,20 +193,20 @@ class FakeSnowflakeCursor:
188
193
  .transform(transforms.array_size)
189
194
  .transform(transforms.random)
190
195
  .transform(transforms.identifier)
196
+ .transform(lambda e: transforms.show_schemas(e, self._conn.database))
197
+ .transform(lambda e: transforms.show_objects_tables(e, self._conn.database))
191
198
  )
192
199
  sql = transformed.sql(dialect="duckdb")
200
+ result_sql = None
193
201
 
194
202
  if transformed.find(exp.Select) and (seed := transformed.args.get("seed")):
195
203
  sql = f"SELECT setseed({seed}); {sql}"
196
204
 
197
- if os.environ.get("FAKESNOW_DEBUG") == "snowflake":
198
- print(f"{command};", file=sys.stderr)
199
- elif os.environ.get("FAKESNOW_DEBUG"):
200
- print(f"{sql};", file=sys.stderr)
205
+ if fs_debug := os.environ.get("FAKESNOW_DEBUG"):
206
+ debug = command if fs_debug == "snowflake" else sql
207
+ print(f"{debug};{params=}" if params else f"{debug};", file=sys.stderr)
201
208
 
202
209
  try:
203
- self._last_sql = sql
204
- self._last_params = params
205
210
  self._duck_conn.execute(sql, params)
206
211
  except duckdb.BinderException as e:
207
212
  msg = e.args[0]
@@ -215,49 +220,62 @@ class FakeSnowflakeCursor:
215
220
  e
216
221
  ) or "cannot commit - no transaction is active" in str(e):
217
222
  # snowflake doesn't error on rollback or commit outside a tx
218
- self._duck_conn.execute(SUCCESS_SQL)
219
- self._last_sql = SUCCESS_SQL
223
+ result_sql = SQL_SUCCESS
220
224
  else:
221
225
  raise e
222
226
 
227
+ affected_count = None
223
228
  if cmd == "USE DATABASE" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
224
229
  self._conn.database = ident.this.upper()
225
230
  self._conn.database_set = True
226
231
 
227
- if cmd == "USE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
232
+ elif cmd == "USE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
228
233
  self._conn.schema = ident.this.upper()
229
234
  self._conn.schema_set = True
230
235
 
231
- if create_db_name := transformed.args.get("create_db_name"):
236
+ elif create_db_name := transformed.args.get("create_db_name"):
232
237
  # we created a new database, so create the info schema extensions
233
238
  self._duck_conn.execute(info_schema.creation_sql(create_db_name))
234
- created_sql = DATABASE_CREATED_SQL.substitute(name=create_db_name)
235
- self._duck_conn.execute(created_sql)
236
- self._last_sql = created_sql
239
+ result_sql = SQL_CREATED_DATABASE.substitute(name=create_db_name)
237
240
 
238
- if cmd == "CREATE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
241
+ elif cmd == "CREATE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
239
242
  name = ident.this if ident.quoted else ident.this.upper()
240
- created_sql = SCHEMA_CREATED_SQL.substitute(name=name)
241
- self._duck_conn.execute(created_sql)
242
- self._last_sql = created_sql
243
+ result_sql = SQL_CREATED_SCHEMA.substitute(name=name)
243
244
 
244
- if cmd == "CREATE TABLE" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
245
+ elif cmd == "CREATE TABLE" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
245
246
  name = ident.this if ident.quoted else ident.this.upper()
246
- created_sql = TABLE_CREATED_SQL.substitute(name=name)
247
- self._duck_conn.execute(created_sql)
248
- self._last_sql = created_sql
247
+ result_sql = SQL_CREATED_TABLE.substitute(name=name)
249
248
 
250
- if cmd.startswith("DROP") and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
249
+ elif cmd.startswith("DROP") and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
251
250
  name = ident.this if ident.quoted else ident.this.upper()
252
- dropped_sql = DROPPED_SQL.substitute(name=name)
253
- self._duck_conn.execute(dropped_sql)
254
- self._last_sql = dropped_sql
251
+ result_sql = SQL_DROPPED.substitute(name=name)
252
+
253
+ # if dropping the current database/schema then reset conn metadata
254
+ if cmd == "DROP DATABASE" and name == self._conn.database:
255
+ self._conn.database = None
256
+ self._conn.schema = None
257
+
258
+ elif cmd == "DROP SCHEMA" and name == self._conn.schema:
259
+ self._conn.schema = None
260
+
261
+ elif cmd == "INSERT":
262
+ (affected_count,) = self._duck_conn.fetchall()[0]
263
+ result_sql = SQL_INSERTED_ROWS.substitute(count=affected_count)
255
264
 
256
- if cmd == "INSERT":
257
- (count,) = self._duck_conn.fetchall()[0]
258
- inserted_sql = INSERTED_SQL.substitute(count=count)
259
- self._duck_conn.execute(inserted_sql)
260
- self._last_sql = inserted_sql
265
+ elif cmd == "UPDATE":
266
+ (affected_count,) = self._duck_conn.fetchall()[0]
267
+ result_sql = SQL_UPDATED_ROWS.substitute(count=affected_count)
268
+
269
+ elif cmd == "DELETE":
270
+ (affected_count,) = self._duck_conn.fetchall()[0]
271
+ result_sql = SQL_DELETED_ROWS.substitute(count=affected_count)
272
+
273
+ elif cmd == "DESCRIBE TABLE":
274
+ # DESCRIBE TABLE has already been run above to detect and error if the table exists
275
+ # We now rerun DESCRIBE TABLE but transformed with columns to match Snowflake
276
+ result_sql = transformed.transform(
277
+ lambda e: transforms.describe_table(e, self._conn.database, self._conn.schema)
278
+ ).sql(dialect="duckdb")
261
279
 
262
280
  if table_comment := cast(tuple[exp.Table, str], transformed.args.get("table_comment")):
263
281
  # record table comment
@@ -276,6 +294,15 @@ class FakeSnowflakeCursor:
276
294
  assert catalog and schema
277
295
  self._duck_conn.execute(info_schema.insert_text_lengths_sql(catalog, schema, table.name, text_lengths))
278
296
 
297
+ if result_sql:
298
+ self._duck_conn.execute(result_sql)
299
+
300
+ self._arrow_table = self._duck_conn.fetch_arrow_table()
301
+ self._rowcount = affected_count or self._arrow_table.num_rows
302
+
303
+ self._last_sql = result_sql or sql
304
+ self._last_params = params
305
+
279
306
  return self
280
307
 
281
308
  def executemany(
@@ -298,13 +325,16 @@ class FakeSnowflakeCursor:
298
325
  return self
299
326
 
300
327
  def fetchall(self) -> list[tuple] | list[dict]:
301
- if self._use_dict_result:
302
- return self._duck_conn.fetch_arrow_table().to_pylist()
303
- else:
304
- return self._duck_conn.fetchall()
328
+ if self._arrow_table is None:
329
+ # mimic snowflake python connector error type
330
+ raise TypeError("No open result set")
331
+ return self.fetchmany(self._arrow_table.num_rows)
305
332
 
306
333
  def fetch_pandas_all(self, **kwargs: dict[str, Any]) -> pd.DataFrame:
307
- return self._duck_conn.fetch_df()
334
+ if self._arrow_table is None:
335
+ # mimic snowflake python connector error type
336
+ raise snowflake.connector.NotSupportedError("No open result set")
337
+ return self._arrow_table.to_pandas()
308
338
 
309
339
  def fetchone(self) -> dict | tuple | None:
310
340
  result = self.fetchmany(1)
@@ -313,35 +343,26 @@ class FakeSnowflakeCursor:
313
343
  def fetchmany(self, size: int | None = None) -> list[tuple] | list[dict]:
314
344
  # https://peps.python.org/pep-0249/#fetchmany
315
345
  size = size or self._arraysize
316
- if not self._use_dict_result:
317
- return cast(list[tuple], self._duck_conn.fetchmany(size))
318
-
319
- if not self._arrow_table:
320
- self._arrow_table = self._duck_conn.fetch_arrow_table()
321
- self._arrow_table_fetch_index = -size
322
346
 
323
- self._arrow_table_fetch_index += size
347
+ if self._arrow_table is None:
348
+ # mimic snowflake python connector error type
349
+ raise TypeError("No open result set")
350
+ if self._arrow_table_fetch_index is None:
351
+ self._arrow_table_fetch_index = 0
352
+ else:
353
+ self._arrow_table_fetch_index += size
324
354
 
325
- return self._arrow_table.slice(offset=self._arrow_table_fetch_index, length=size).to_pylist()
355
+ tslice = self._arrow_table.slice(offset=self._arrow_table_fetch_index, length=size).to_pylist()
356
+ return tslice if self._use_dict_result else [tuple(d.values()) for d in tslice]
326
357
 
327
358
  def get_result_batches(self) -> list[ResultBatch] | None:
328
- # rows_per_batch is approximate
329
- # see https://github.com/duckdb/duckdb/issues/4755
330
- reader = self._duck_conn.fetch_record_batch(rows_per_batch=1000)
331
-
332
- batches = []
333
- try:
334
- while True:
335
- batches.append(FakeResultBatch(self._use_dict_result, reader.read_next_batch()))
336
- except StopIteration:
337
- pass
338
-
339
- return batches
359
+ if self._arrow_table is None:
360
+ return None
361
+ return [FakeResultBatch(self._use_dict_result, b) for b in self._arrow_table.to_batches(max_chunksize=1000)]
340
362
 
341
363
  @property
342
364
  def rowcount(self) -> int | None:
343
- # TODO: return number of rows updated/inserted (using returning)
344
- return None
365
+ return self._rowcount
345
366
 
346
367
  @property
347
368
  def sfqid(self) -> str | None:
@@ -392,6 +413,10 @@ class FakeSnowflakeCursor:
392
413
  return ResultMetadata(
393
414
  name=column_name, type_code=8, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
394
415
  )
416
+ elif column_type == "TIMESTAMP WITH TIME ZONE":
417
+ return ResultMetadata(
418
+ name=column_name, type_code=7, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
419
+ )
395
420
  elif column_type == "BLOB":
396
421
  return ResultMetadata(
397
422
  name=column_name, type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=True # noqa: E501
@@ -446,15 +471,18 @@ class FakeSnowflakeConnection:
446
471
  schema: str | None = None,
447
472
  create_database: bool = True,
448
473
  create_schema: bool = True,
474
+ db_path: str | os.PathLike | None = None,
449
475
  *args: Any,
450
476
  **kwargs: Any,
451
477
  ):
452
478
  self._duck_conn = duck_conn
453
- # upper case database and schema like snowflake
479
+ # upper case database and schema like snowflake unquoted identifiers
480
+ # NB: catalog names are not case-sensitive in duckdb but stored as cased in information_schema.schemata
454
481
  self.database = database and database.upper()
455
482
  self.schema = schema and schema.upper()
456
483
  self.database_set = False
457
484
  self.schema_set = False
485
+ self.db_path = db_path
458
486
  self._paramstyle = "pyformat"
459
487
 
460
488
  # create database if needed
@@ -466,8 +494,10 @@ class FakeSnowflakeConnection:
466
494
  where catalog_name = '{self.database}'"""
467
495
  ).fetchone()
468
496
  ):
469
- duck_conn.execute(f"ATTACH DATABASE ':memory:' AS {self.database}")
497
+ db_file = f"{Path(db_path)/self.database}.db" if db_path else ":memory:"
498
+ duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
470
499
  duck_conn.execute(info_schema.creation_sql(self.database))
500
+ duck_conn.execute(macros.creation_sql(self.database))
471
501
 
472
502
  # create schema if needed
473
503
  if (
@@ -505,7 +535,7 @@ class FakeSnowflakeConnection:
505
535
  self.database_set = True
506
536
 
507
537
  # use UTC instead of local time zone for consistent testing
508
- duck_conn.execute("SET TimeZone = 'UTC'")
538
+ duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
509
539
 
510
540
  def __enter__(self) -> Self:
511
541
  return self
@@ -515,8 +545,8 @@ class FakeSnowflakeConnection:
515
545
  exc_type: type[BaseException] | None,
516
546
  exc_value: BaseException | None,
517
547
  traceback: TracebackType | None,
518
- ) -> bool:
519
- return False
548
+ ) -> None:
549
+ pass
520
550
 
521
551
  def commit(self) -> None:
522
552
  self.cursor().execute("COMMIT")
@@ -545,12 +575,27 @@ class FakeSnowflakeConnection:
545
575
  def _insert_df(
546
576
  self, df: pd.DataFrame, table_name: str, database: str | None = None, schema: str | None = None
547
577
  ) -> int:
548
- # dicts in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings
549
- # whereas duckdb loads them as a struct, so we convert them to json here
550
- cols = [f"TO_JSON({c})" if isinstance(df[c][0], dict) else c for c in df.columns]
551
- cols = ",".join(cols)
552
-
553
- self._duck_conn.execute(f"INSERT INTO {table_name}({','.join(df.columns.to_list())}) SELECT {cols} FROM df")
578
+ # Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
579
+ # Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
580
+ # and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
581
+ # For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
582
+ #
583
+ # When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
584
+ # varchar and value a string containing a struct representation. In order to support dicts with different keys
585
+ # we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
586
+ # copy and transform in python.
587
+
588
+ df = df.copy()
589
+
590
+ # Identify columns of type object
591
+ object_cols = df.select_dtypes(include=["object"]).columns
592
+
593
+ # Apply json.dumps to these columns
594
+ for col in object_cols:
595
+ # don't jsonify string
596
+ df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
597
+
598
+ self._duck_conn.execute(f"INSERT INTO {table_name}({','.join(df.columns.to_list())}) SELECT * FROM df")
554
599
  return self._duck_conn.fetchall()[0][0]
555
600
 
556
601
 
fakesnow/fixtures.py CHANGED
@@ -7,17 +7,17 @@ import fakesnow
7
7
 
8
8
  @pytest.fixture
9
9
  def _fakesnow() -> Iterator[None]:
10
- with fakesnow.patch() as fake_fns:
11
- yield fake_fns
10
+ with fakesnow.patch():
11
+ yield
12
12
 
13
13
 
14
14
  @pytest.fixture
15
15
  def _fakesnow_no_auto_create() -> Iterator[None]:
16
- with fakesnow.patch(create_database_on_connect=False, create_schema_on_connect=False) as fake_fns:
17
- yield fake_fns
16
+ with fakesnow.patch(create_database_on_connect=False, create_schema_on_connect=False):
17
+ yield
18
18
 
19
19
 
20
20
  @pytest.fixture(scope="session")
21
21
  def _fakesnow_session() -> Iterator[None]:
22
- with fakesnow.patch() as fake_fns:
23
- yield fake_fns
22
+ with fakesnow.patch():
23
+ yield
fakesnow/info_schema.py CHANGED
@@ -1,11 +1,12 @@
1
1
  """Info schema extension tables/views used for storing snowflake metadata not captured by duckdb."""
2
+ from __future__ import annotations
2
3
 
3
4
  from string import Template
4
5
 
5
6
  # use ext prefix in columns to disambiguate when joining with information_schema.tables
6
7
  SQL_CREATE_INFORMATION_SCHEMA_TABLES_EXT = Template(
7
8
  """
8
- create table ${catalog}.information_schema.tables_ext (
9
+ create table if not exists ${catalog}.information_schema._fs_tables_ext (
9
10
  ext_table_catalog varchar,
10
11
  ext_table_schema varchar,
11
12
  ext_table_name varchar,
@@ -17,7 +18,7 @@ create table ${catalog}.information_schema.tables_ext (
17
18
 
18
19
  SQL_CREATE_INFORMATION_SCHEMA_COLUMNS_EXT = Template(
19
20
  """
20
- create table ${catalog}.information_schema.columns_ext (
21
+ create table if not exists ${catalog}.information_schema._fs_columns_ext (
21
22
  ext_table_catalog varchar,
22
23
  ext_table_schema varchar,
23
24
  ext_table_name varchar,
@@ -33,13 +34,14 @@ create table ${catalog}.information_schema.columns_ext (
33
34
  # snowflake integers are 38 digits, base 10, See https://docs.snowflake.com/en/sql-reference/data-types-numeric
34
35
  SQL_CREATE_INFORMATION_SCHEMA_COLUMNS_VIEW = Template(
35
36
  """
36
- create view ${catalog}.information_schema.columns_snowflake AS
37
+ create view if not exists ${catalog}.information_schema._fs_columns_snowflake AS
37
38
  select table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable,
38
39
  case when starts_with(data_type, 'DECIMAL') or data_type='BIGINT' then 'NUMBER'
39
40
  when data_type='VARCHAR' then 'TEXT'
40
41
  when data_type='DOUBLE' then 'FLOAT'
41
42
  when data_type='BLOB' then 'BINARY'
42
43
  when data_type='TIMESTAMP' then 'TIMESTAMP_NTZ'
44
+ when data_type='TIMESTAMP WITH TIME ZONE' then 'TIMESTAMP_TZ'
43
45
  when data_type='JSON' then 'VARIANT'
44
46
  else data_type end as data_type,
45
47
  ext_character_maximum_length as character_maximum_length, ext_character_octet_length as character_octet_length,
@@ -52,7 +54,7 @@ case when data_type='BIGINT' then 10
52
54
  case when data_type='DOUBLE' then NULL else numeric_scale end as numeric_scale,
53
55
  collation_name, is_identity, identity_generation, identity_cycle
54
56
  from ${catalog}.information_schema.columns
55
- left join ${catalog}.information_schema.columns_ext ext
57
+ left join ${catalog}.information_schema._fs_columns_ext ext
56
58
  on ext_table_catalog = table_catalog AND ext_table_schema = table_schema
57
59
  AND ext_table_name = table_name AND ext_column_name = column_name
58
60
  """
@@ -61,7 +63,7 @@ AND ext_table_name = table_name AND ext_column_name = column_name
61
63
  # replicates https://docs.snowflake.com/sql-reference/info-schema/databases
62
64
  SQL_CREATE_INFORMATION_SCHEMA_DATABASES_VIEW = Template(
63
65
  """
64
- create view ${catalog}.information_schema.databases AS
66
+ create view if not exists ${catalog}.information_schema.databases AS
65
67
  select
66
68
  catalog_name as database_name,
67
69
  'SYSADMIN' as database_owner,
@@ -88,7 +90,7 @@ def creation_sql(catalog: str) -> str:
88
90
 
89
91
  def insert_table_comment_sql(catalog: str, schema: str, table: str, comment: str) -> str:
90
92
  return f"""
91
- INSERT INTO {catalog}.information_schema.tables_ext
93
+ INSERT INTO {catalog}.information_schema._fs_tables_ext
92
94
  values ('{catalog}', '{schema}', '{table}', '{comment}')
93
95
  ON CONFLICT (ext_table_catalog, ext_table_schema, ext_table_name)
94
96
  DO UPDATE SET comment = excluded.comment
@@ -102,7 +104,7 @@ def insert_text_lengths_sql(catalog: str, schema: str, table: str, text_lengths:
102
104
  )
103
105
 
104
106
  return f"""
105
- INSERT INTO {catalog}.information_schema.columns_ext
107
+ INSERT INTO {catalog}.information_schema._fs_columns_ext
106
108
  values {values}
107
109
  ON CONFLICT (ext_table_catalog, ext_table_schema, ext_table_name, ext_column_name)
108
110
  DO UPDATE SET ext_character_maximum_length = excluded.ext_character_maximum_length,
fakesnow/macros.py ADDED
@@ -0,0 +1,13 @@
1
+ from string import Template
2
+
3
+ EQUAL_NULL = Template(
4
+ """
5
+ CREATE MACRO IF NOT EXISTS ${catalog}.equal_null(a, b) AS a IS NOT DISTINCT FROM b;
6
+ """
7
+ )
8
+
9
+
10
+ def creation_sql(catalog: str) -> str:
11
+ return f"""
12
+ {EQUAL_NULL.substitute(catalog=catalog)};
13
+ """