fakesnow 0.8.2__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fakesnow/__init__.py +8 -2
- fakesnow/checks.py +3 -3
- fakesnow/cli.py +54 -12
- fakesnow/fakes.py +135 -90
- fakesnow/fixtures.py +6 -6
- fakesnow/info_schema.py +9 -7
- fakesnow/macros.py +13 -0
- fakesnow/transforms.py +195 -32
- fakesnow-0.9.1.dist-info/LICENSE +201 -0
- fakesnow-0.9.1.dist-info/METADATA +362 -0
- fakesnow-0.9.1.dist-info/RECORD +17 -0
- fakesnow-0.8.2.dist-info/LICENSE +0 -20
- fakesnow-0.8.2.dist-info/METADATA +0 -174
- fakesnow-0.8.2.dist-info/RECORD +0 -16
- {fakesnow-0.8.2.dist-info → fakesnow-0.9.1.dist-info}/WHEEL +0 -0
- {fakesnow-0.8.2.dist-info → fakesnow-0.9.1.dist-info}/entry_points.txt +0 -0
- {fakesnow-0.8.2.dist-info → fakesnow-0.9.1.dist-info}/top_level.txt +0 -0
fakesnow/__init__.py
CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import contextlib
|
4
4
|
import importlib
|
5
|
+
import os
|
5
6
|
import sys
|
6
7
|
import unittest.mock as mock
|
7
8
|
from collections.abc import Iterator, Sequence
|
@@ -19,6 +20,7 @@ def patch(
|
|
19
20
|
extra_targets: str | Sequence[str] = [],
|
20
21
|
create_database_on_connect: bool = True,
|
21
22
|
create_schema_on_connect: bool = True,
|
23
|
+
db_path: str | os.PathLike | None = None,
|
22
24
|
) -> Iterator[None]:
|
23
25
|
"""Patch snowflake targets with fakes.
|
24
26
|
|
@@ -28,12 +30,15 @@ def patch(
|
|
28
30
|
|
29
31
|
Args:
|
30
32
|
extra_targets (str | Sequence[str], optional): Extra targets to patch. Defaults to [].
|
31
|
-
create_database_on_connect (bool, optional): Create database if provided in connection. Defaults to True.
|
32
|
-
create_schema_on_connect (bool, optional): Create schema if provided in connection. Defaults to True.
|
33
33
|
|
34
34
|
Allows extra targets beyond the standard snowflake.connector targets to be patched. Needed because we cannot
|
35
35
|
patch definitions, only usages, see https://docs.python.org/3/library/unittest.mock.html#where-to-patch
|
36
36
|
|
37
|
+
create_database_on_connect (bool, optional): Create database if provided in connection. Defaults to True.
|
38
|
+
create_schema_on_connect (bool, optional): Create schema if provided in connection. Defaults to True.
|
39
|
+
db_path (str | os.PathLike | None, optional): _description_. Use existing database files from this path
|
40
|
+
or create them here if they don't already exist. If None databases are in-memory. Defaults to None.
|
41
|
+
|
37
42
|
Yields:
|
38
43
|
Iterator[None]: None.
|
39
44
|
"""
|
@@ -51,6 +56,7 @@ def patch(
|
|
51
56
|
duck_conn.cursor(),
|
52
57
|
create_database=create_database_on_connect,
|
53
58
|
create_schema=create_schema_on_connect,
|
59
|
+
db_path=db_path,
|
54
60
|
**kwargs,
|
55
61
|
),
|
56
62
|
snowflake.connector.pandas_tools.write_pandas: fakes.write_pandas,
|
fakesnow/checks.py
CHANGED
@@ -37,10 +37,10 @@ def is_unqualified_table_expression(expression: exp.Expression) -> tuple[bool, b
|
|
37
37
|
no_schema = False
|
38
38
|
elif parent_kind.upper() == "SCHEMA":
|
39
39
|
# "CREATE/DROP SCHEMA"
|
40
|
-
no_database = not node.args.get("
|
40
|
+
no_database = not node.args.get("catalog")
|
41
41
|
no_schema = False
|
42
|
-
elif parent_kind.upper()
|
43
|
-
# "DROP TABLE"
|
42
|
+
elif parent_kind.upper() in {"TABLE", "VIEW"}:
|
43
|
+
# "CREATE/DROP TABLE/VIEW"
|
44
44
|
no_database = not node.args.get("catalog")
|
45
45
|
no_schema = not node.args.get("db")
|
46
46
|
else:
|
fakesnow/cli.py
CHANGED
@@ -1,28 +1,70 @@
|
|
1
|
+
import argparse
|
1
2
|
import runpy
|
2
3
|
import sys
|
3
4
|
from collections.abc import Sequence
|
4
5
|
|
5
6
|
import fakesnow
|
6
7
|
|
7
|
-
USAGE = "Usage: fakesnow <path> | -m <module> [<arg>]..."
|
8
8
|
|
9
|
+
def arg_parser() -> argparse.ArgumentParser:
|
10
|
+
parser = argparse.ArgumentParser(
|
11
|
+
description="""eg: fakesnow script.py OR fakesnow -m pytest""",
|
12
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
13
|
+
)
|
14
|
+
parser.add_argument(
|
15
|
+
"-d",
|
16
|
+
"--db_path",
|
17
|
+
help="databases path. Use existing database files from this path or create them here if they don't already "
|
18
|
+
"exist. If None databases are in-memory.",
|
19
|
+
)
|
20
|
+
parser.add_argument("-m", "--module", help="target module")
|
21
|
+
parser.add_argument("path", type=str, nargs="?", help="target path")
|
22
|
+
parser.add_argument("targs", nargs="*", help="target args")
|
23
|
+
return parser
|
9
24
|
|
10
|
-
def main(args: Sequence[str] = sys.argv) -> int:
|
11
|
-
if len(args) < 2 or (len(args) == 2 and args[1] == "-m"):
|
12
|
-
print(USAGE, file=sys.stderr)
|
13
|
-
return 42
|
14
25
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
26
|
+
def split(args: Sequence[str]) -> tuple[Sequence[str], Sequence[str]]:
|
27
|
+
# split the arguments into two lists either:
|
28
|
+
# 1) after the first -m flag, or
|
29
|
+
# 2) after the first positional arg
|
30
|
+
in_flag = False
|
31
|
+
i = 0
|
32
|
+
for i in range(len(args)):
|
33
|
+
a = args[i]
|
34
|
+
if a in ["-m", "--module"]:
|
35
|
+
i = min(i + 1, len(args) - 1)
|
36
|
+
break
|
37
|
+
elif a.startswith("-"):
|
38
|
+
in_flag = True
|
39
|
+
elif not in_flag:
|
40
|
+
break
|
41
|
+
else:
|
42
|
+
in_flag = False
|
43
|
+
|
44
|
+
return args[: i + 1], args[i + 1 :]
|
45
|
+
|
46
|
+
|
47
|
+
def main(args: Sequence[str] = sys.argv[1:]) -> int:
|
48
|
+
parser = arg_parser()
|
49
|
+
# split args so the fakesnow cli doesn't consume from the target's args (eg: -m and -d)
|
50
|
+
fsargs, targs = split(args)
|
51
|
+
pargs = parser.parse_args(fsargs)
|
52
|
+
|
53
|
+
with fakesnow.patch(db_path=pargs.db_path):
|
54
|
+
if module := pargs.module:
|
55
|
+
# NB: pargs.path and pargs.args are consumed by targs
|
56
|
+
sys.argv = [module, *targs]
|
19
57
|
|
20
58
|
# add current directory to path to mimic python -m
|
21
59
|
sys.path.insert(0, "")
|
22
60
|
runpy.run_module(module, run_name="__main__", alter_sys=True)
|
23
|
-
|
24
|
-
|
25
|
-
sys.argv =
|
61
|
+
elif path := pargs.path:
|
62
|
+
# NB: pargs.args is consumed by targs
|
63
|
+
sys.argv = [path, *targs]
|
64
|
+
|
26
65
|
runpy.run_path(path, run_name="__main__")
|
66
|
+
else:
|
67
|
+
parser.print_usage()
|
68
|
+
return 42
|
27
69
|
|
28
70
|
return 0
|
fakesnow/fakes.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import json
|
3
4
|
import os
|
4
5
|
import re
|
5
6
|
import sys
|
6
7
|
from collections.abc import Iterable, Iterator, Sequence
|
8
|
+
from pathlib import Path
|
7
9
|
from string import Template
|
8
10
|
from types import TracebackType
|
9
11
|
from typing import TYPE_CHECKING, Any, Literal, Optional, cast
|
@@ -26,15 +28,18 @@ from typing_extensions import Self
|
|
26
28
|
import fakesnow.checks as checks
|
27
29
|
import fakesnow.expr as expr
|
28
30
|
import fakesnow.info_schema as info_schema
|
31
|
+
import fakesnow.macros as macros
|
29
32
|
import fakesnow.transforms as transforms
|
30
33
|
|
31
34
|
SCHEMA_UNSET = "schema_unset"
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
35
|
+
SQL_SUCCESS = "SELECT 'Statement executed successfully.' as 'status'"
|
36
|
+
SQL_CREATED_DATABASE = Template("SELECT 'Database ${name} successfully created.' as 'status'")
|
37
|
+
SQL_CREATED_SCHEMA = Template("SELECT 'Schema ${name} successfully created.' as 'status'")
|
38
|
+
SQL_CREATED_TABLE = Template("SELECT 'Table ${name} successfully created.' as 'status'")
|
39
|
+
SQL_DROPPED = Template("SELECT '${name} successfully dropped.' as 'status'")
|
40
|
+
SQL_INSERTED_ROWS = Template("SELECT ${count} as 'number of rows inserted'")
|
41
|
+
SQL_UPDATED_ROWS = Template("SELECT ${count} as 'number of rows updated', 0 as 'number of multi-joined rows updated'")
|
42
|
+
SQL_DELETED_ROWS = Template("SELECT ${count} as 'number of rows deleted'")
|
38
43
|
|
39
44
|
|
40
45
|
class FakeSnowflakeCursor:
|
@@ -59,6 +64,9 @@ class FakeSnowflakeCursor:
|
|
59
64
|
self._last_params = None
|
60
65
|
self._sqlstate = None
|
61
66
|
self._arraysize = 1
|
67
|
+
self._arrow_table = None
|
68
|
+
self._arrow_table_fetch_index = None
|
69
|
+
self._rowcount = None
|
62
70
|
self._converter = snowflake.connector.converter.SnowflakeConverter()
|
63
71
|
|
64
72
|
def __enter__(self) -> Self:
|
@@ -69,8 +77,8 @@ class FakeSnowflakeCursor:
|
|
69
77
|
exc_type: type[BaseException] | None,
|
70
78
|
exc_value: BaseException | None,
|
71
79
|
traceback: TracebackType | None,
|
72
|
-
) ->
|
73
|
-
|
80
|
+
) -> None:
|
81
|
+
pass
|
74
82
|
|
75
83
|
@property
|
76
84
|
def arraysize(self) -> int:
|
@@ -96,22 +104,16 @@ class FakeSnowflakeCursor:
|
|
96
104
|
|
97
105
|
describe = f"DESCRIBE {command}"
|
98
106
|
self.execute(describe, *args, **kwargs)
|
99
|
-
return FakeSnowflakeCursor._describe_as_result_metadata(self.
|
107
|
+
return FakeSnowflakeCursor._describe_as_result_metadata(self.fetchall())
|
100
108
|
|
101
109
|
@property
|
102
110
|
def description(self) -> list[ResultMetadata]:
|
103
|
-
# use a cursor to avoid
|
104
|
-
with self.
|
105
|
-
# TODO: allow sql alchemy connection with no database or schema
|
106
|
-
assert self._conn.database, ".description not implemented when database is None"
|
107
|
-
assert self._conn.schema, ".description not implemented when schema is None"
|
108
|
-
|
109
|
-
# match database and schema used on the main connection
|
110
|
-
cur.execute(f"SET SCHEMA = '{self._conn.database}.{self._conn.schema}'")
|
111
|
+
# use a separate cursor to avoid consuming the result set on this cursor
|
112
|
+
with self._conn.cursor() as cur:
|
111
113
|
cur.execute(f"DESCRIBE {self._last_sql}", self._last_params)
|
112
114
|
meta = FakeSnowflakeCursor._describe_as_result_metadata(cur.fetchall())
|
113
115
|
|
114
|
-
return meta
|
116
|
+
return meta
|
115
117
|
|
116
118
|
def execute(
|
117
119
|
self,
|
@@ -135,6 +137,8 @@ class FakeSnowflakeCursor:
|
|
135
137
|
**kwargs: Any,
|
136
138
|
) -> FakeSnowflakeCursor:
|
137
139
|
self._arrow_table = None
|
140
|
+
self._arrow_table_fetch_index = None
|
141
|
+
self._rowcount = None
|
138
142
|
|
139
143
|
command, params = self._rewrite_with_params(command, params)
|
140
144
|
expression = parse_one(command, read="snowflake")
|
@@ -159,10 +163,11 @@ class FakeSnowflakeCursor:
|
|
159
163
|
transformed = (
|
160
164
|
expression.transform(transforms.upper_case_unquoted_identifiers)
|
161
165
|
.transform(transforms.set_schema, current_database=self._conn.database)
|
162
|
-
.transform(transforms.create_database)
|
163
|
-
.transform(transforms.
|
164
|
-
.transform(transforms.
|
165
|
-
.transform(transforms.
|
166
|
+
.transform(transforms.create_database, db_path=self._conn.db_path)
|
167
|
+
.transform(transforms.extract_comment_on_table)
|
168
|
+
.transform(transforms.extract_comment_on_columns)
|
169
|
+
.transform(transforms.information_schema_fs_columns_snowflake)
|
170
|
+
.transform(transforms.information_schema_fs_tables_ext)
|
166
171
|
.transform(transforms.drop_schema_cascade)
|
167
172
|
.transform(transforms.tag)
|
168
173
|
.transform(transforms.semi_structured_types)
|
@@ -188,20 +193,20 @@ class FakeSnowflakeCursor:
|
|
188
193
|
.transform(transforms.array_size)
|
189
194
|
.transform(transforms.random)
|
190
195
|
.transform(transforms.identifier)
|
196
|
+
.transform(lambda e: transforms.show_schemas(e, self._conn.database))
|
197
|
+
.transform(lambda e: transforms.show_objects_tables(e, self._conn.database))
|
191
198
|
)
|
192
199
|
sql = transformed.sql(dialect="duckdb")
|
200
|
+
result_sql = None
|
193
201
|
|
194
202
|
if transformed.find(exp.Select) and (seed := transformed.args.get("seed")):
|
195
203
|
sql = f"SELECT setseed({seed}); {sql}"
|
196
204
|
|
197
|
-
if os.environ.get("FAKESNOW_DEBUG")
|
198
|
-
|
199
|
-
|
200
|
-
print(f"{sql};", file=sys.stderr)
|
205
|
+
if fs_debug := os.environ.get("FAKESNOW_DEBUG"):
|
206
|
+
debug = command if fs_debug == "snowflake" else sql
|
207
|
+
print(f"{debug};{params=}" if params else f"{debug};", file=sys.stderr)
|
201
208
|
|
202
209
|
try:
|
203
|
-
self._last_sql = sql
|
204
|
-
self._last_params = params
|
205
210
|
self._duck_conn.execute(sql, params)
|
206
211
|
except duckdb.BinderException as e:
|
207
212
|
msg = e.args[0]
|
@@ -215,49 +220,62 @@ class FakeSnowflakeCursor:
|
|
215
220
|
e
|
216
221
|
) or "cannot commit - no transaction is active" in str(e):
|
217
222
|
# snowflake doesn't error on rollback or commit outside a tx
|
218
|
-
|
219
|
-
self._last_sql = SUCCESS_SQL
|
223
|
+
result_sql = SQL_SUCCESS
|
220
224
|
else:
|
221
225
|
raise e
|
222
226
|
|
227
|
+
affected_count = None
|
223
228
|
if cmd == "USE DATABASE" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
|
224
229
|
self._conn.database = ident.this.upper()
|
225
230
|
self._conn.database_set = True
|
226
231
|
|
227
|
-
|
232
|
+
elif cmd == "USE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
|
228
233
|
self._conn.schema = ident.this.upper()
|
229
234
|
self._conn.schema_set = True
|
230
235
|
|
231
|
-
|
236
|
+
elif create_db_name := transformed.args.get("create_db_name"):
|
232
237
|
# we created a new database, so create the info schema extensions
|
233
238
|
self._duck_conn.execute(info_schema.creation_sql(create_db_name))
|
234
|
-
|
235
|
-
self._duck_conn.execute(created_sql)
|
236
|
-
self._last_sql = created_sql
|
239
|
+
result_sql = SQL_CREATED_DATABASE.substitute(name=create_db_name)
|
237
240
|
|
238
|
-
|
241
|
+
elif cmd == "CREATE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
|
239
242
|
name = ident.this if ident.quoted else ident.this.upper()
|
240
|
-
|
241
|
-
self._duck_conn.execute(created_sql)
|
242
|
-
self._last_sql = created_sql
|
243
|
+
result_sql = SQL_CREATED_SCHEMA.substitute(name=name)
|
243
244
|
|
244
|
-
|
245
|
+
elif cmd == "CREATE TABLE" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
|
245
246
|
name = ident.this if ident.quoted else ident.this.upper()
|
246
|
-
|
247
|
-
self._duck_conn.execute(created_sql)
|
248
|
-
self._last_sql = created_sql
|
247
|
+
result_sql = SQL_CREATED_TABLE.substitute(name=name)
|
249
248
|
|
250
|
-
|
249
|
+
elif cmd.startswith("DROP") and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
|
251
250
|
name = ident.this if ident.quoted else ident.this.upper()
|
252
|
-
|
253
|
-
|
254
|
-
|
251
|
+
result_sql = SQL_DROPPED.substitute(name=name)
|
252
|
+
|
253
|
+
# if dropping the current database/schema then reset conn metadata
|
254
|
+
if cmd == "DROP DATABASE" and name == self._conn.database:
|
255
|
+
self._conn.database = None
|
256
|
+
self._conn.schema = None
|
257
|
+
|
258
|
+
elif cmd == "DROP SCHEMA" and name == self._conn.schema:
|
259
|
+
self._conn.schema = None
|
260
|
+
|
261
|
+
elif cmd == "INSERT":
|
262
|
+
(affected_count,) = self._duck_conn.fetchall()[0]
|
263
|
+
result_sql = SQL_INSERTED_ROWS.substitute(count=affected_count)
|
255
264
|
|
256
|
-
|
257
|
-
(
|
258
|
-
|
259
|
-
|
260
|
-
|
265
|
+
elif cmd == "UPDATE":
|
266
|
+
(affected_count,) = self._duck_conn.fetchall()[0]
|
267
|
+
result_sql = SQL_UPDATED_ROWS.substitute(count=affected_count)
|
268
|
+
|
269
|
+
elif cmd == "DELETE":
|
270
|
+
(affected_count,) = self._duck_conn.fetchall()[0]
|
271
|
+
result_sql = SQL_DELETED_ROWS.substitute(count=affected_count)
|
272
|
+
|
273
|
+
elif cmd == "DESCRIBE TABLE":
|
274
|
+
# DESCRIBE TABLE has already been run above to detect and error if the table exists
|
275
|
+
# We now rerun DESCRIBE TABLE but transformed with columns to match Snowflake
|
276
|
+
result_sql = transformed.transform(
|
277
|
+
lambda e: transforms.describe_table(e, self._conn.database, self._conn.schema)
|
278
|
+
).sql(dialect="duckdb")
|
261
279
|
|
262
280
|
if table_comment := cast(tuple[exp.Table, str], transformed.args.get("table_comment")):
|
263
281
|
# record table comment
|
@@ -276,6 +294,15 @@ class FakeSnowflakeCursor:
|
|
276
294
|
assert catalog and schema
|
277
295
|
self._duck_conn.execute(info_schema.insert_text_lengths_sql(catalog, schema, table.name, text_lengths))
|
278
296
|
|
297
|
+
if result_sql:
|
298
|
+
self._duck_conn.execute(result_sql)
|
299
|
+
|
300
|
+
self._arrow_table = self._duck_conn.fetch_arrow_table()
|
301
|
+
self._rowcount = affected_count or self._arrow_table.num_rows
|
302
|
+
|
303
|
+
self._last_sql = result_sql or sql
|
304
|
+
self._last_params = params
|
305
|
+
|
279
306
|
return self
|
280
307
|
|
281
308
|
def executemany(
|
@@ -298,13 +325,16 @@ class FakeSnowflakeCursor:
|
|
298
325
|
return self
|
299
326
|
|
300
327
|
def fetchall(self) -> list[tuple] | list[dict]:
|
301
|
-
if self.
|
302
|
-
|
303
|
-
|
304
|
-
|
328
|
+
if self._arrow_table is None:
|
329
|
+
# mimic snowflake python connector error type
|
330
|
+
raise TypeError("No open result set")
|
331
|
+
return self.fetchmany(self._arrow_table.num_rows)
|
305
332
|
|
306
333
|
def fetch_pandas_all(self, **kwargs: dict[str, Any]) -> pd.DataFrame:
|
307
|
-
|
334
|
+
if self._arrow_table is None:
|
335
|
+
# mimic snowflake python connector error type
|
336
|
+
raise snowflake.connector.NotSupportedError("No open result set")
|
337
|
+
return self._arrow_table.to_pandas()
|
308
338
|
|
309
339
|
def fetchone(self) -> dict | tuple | None:
|
310
340
|
result = self.fetchmany(1)
|
@@ -313,35 +343,26 @@ class FakeSnowflakeCursor:
|
|
313
343
|
def fetchmany(self, size: int | None = None) -> list[tuple] | list[dict]:
|
314
344
|
# https://peps.python.org/pep-0249/#fetchmany
|
315
345
|
size = size or self._arraysize
|
316
|
-
if not self._use_dict_result:
|
317
|
-
return cast(list[tuple], self._duck_conn.fetchmany(size))
|
318
|
-
|
319
|
-
if not self._arrow_table:
|
320
|
-
self._arrow_table = self._duck_conn.fetch_arrow_table()
|
321
|
-
self._arrow_table_fetch_index = -size
|
322
346
|
|
323
|
-
self.
|
347
|
+
if self._arrow_table is None:
|
348
|
+
# mimic snowflake python connector error type
|
349
|
+
raise TypeError("No open result set")
|
350
|
+
if self._arrow_table_fetch_index is None:
|
351
|
+
self._arrow_table_fetch_index = 0
|
352
|
+
else:
|
353
|
+
self._arrow_table_fetch_index += size
|
324
354
|
|
325
|
-
|
355
|
+
tslice = self._arrow_table.slice(offset=self._arrow_table_fetch_index, length=size).to_pylist()
|
356
|
+
return tslice if self._use_dict_result else [tuple(d.values()) for d in tslice]
|
326
357
|
|
327
358
|
def get_result_batches(self) -> list[ResultBatch] | None:
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
batches = []
|
333
|
-
try:
|
334
|
-
while True:
|
335
|
-
batches.append(FakeResultBatch(self._use_dict_result, reader.read_next_batch()))
|
336
|
-
except StopIteration:
|
337
|
-
pass
|
338
|
-
|
339
|
-
return batches
|
359
|
+
if self._arrow_table is None:
|
360
|
+
return None
|
361
|
+
return [FakeResultBatch(self._use_dict_result, b) for b in self._arrow_table.to_batches(max_chunksize=1000)]
|
340
362
|
|
341
363
|
@property
|
342
364
|
def rowcount(self) -> int | None:
|
343
|
-
|
344
|
-
return None
|
365
|
+
return self._rowcount
|
345
366
|
|
346
367
|
@property
|
347
368
|
def sfqid(self) -> str | None:
|
@@ -392,6 +413,10 @@ class FakeSnowflakeCursor:
|
|
392
413
|
return ResultMetadata(
|
393
414
|
name=column_name, type_code=8, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
|
394
415
|
)
|
416
|
+
elif column_type == "TIMESTAMP WITH TIME ZONE":
|
417
|
+
return ResultMetadata(
|
418
|
+
name=column_name, type_code=7, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
|
419
|
+
)
|
395
420
|
elif column_type == "BLOB":
|
396
421
|
return ResultMetadata(
|
397
422
|
name=column_name, type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=True # noqa: E501
|
@@ -446,15 +471,18 @@ class FakeSnowflakeConnection:
|
|
446
471
|
schema: str | None = None,
|
447
472
|
create_database: bool = True,
|
448
473
|
create_schema: bool = True,
|
474
|
+
db_path: str | os.PathLike | None = None,
|
449
475
|
*args: Any,
|
450
476
|
**kwargs: Any,
|
451
477
|
):
|
452
478
|
self._duck_conn = duck_conn
|
453
|
-
# upper case database and schema like snowflake
|
479
|
+
# upper case database and schema like snowflake unquoted identifiers
|
480
|
+
# NB: catalog names are not case-sensitive in duckdb but stored as cased in information_schema.schemata
|
454
481
|
self.database = database and database.upper()
|
455
482
|
self.schema = schema and schema.upper()
|
456
483
|
self.database_set = False
|
457
484
|
self.schema_set = False
|
485
|
+
self.db_path = db_path
|
458
486
|
self._paramstyle = "pyformat"
|
459
487
|
|
460
488
|
# create database if needed
|
@@ -466,8 +494,10 @@ class FakeSnowflakeConnection:
|
|
466
494
|
where catalog_name = '{self.database}'"""
|
467
495
|
).fetchone()
|
468
496
|
):
|
469
|
-
|
497
|
+
db_file = f"{Path(db_path)/self.database}.db" if db_path else ":memory:"
|
498
|
+
duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
|
470
499
|
duck_conn.execute(info_schema.creation_sql(self.database))
|
500
|
+
duck_conn.execute(macros.creation_sql(self.database))
|
471
501
|
|
472
502
|
# create schema if needed
|
473
503
|
if (
|
@@ -505,7 +535,7 @@ class FakeSnowflakeConnection:
|
|
505
535
|
self.database_set = True
|
506
536
|
|
507
537
|
# use UTC instead of local time zone for consistent testing
|
508
|
-
duck_conn.execute("SET TimeZone = 'UTC'")
|
538
|
+
duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
|
509
539
|
|
510
540
|
def __enter__(self) -> Self:
|
511
541
|
return self
|
@@ -515,8 +545,8 @@ class FakeSnowflakeConnection:
|
|
515
545
|
exc_type: type[BaseException] | None,
|
516
546
|
exc_value: BaseException | None,
|
517
547
|
traceback: TracebackType | None,
|
518
|
-
) ->
|
519
|
-
|
548
|
+
) -> None:
|
549
|
+
pass
|
520
550
|
|
521
551
|
def commit(self) -> None:
|
522
552
|
self.cursor().execute("COMMIT")
|
@@ -545,12 +575,27 @@ class FakeSnowflakeConnection:
|
|
545
575
|
def _insert_df(
|
546
576
|
self, df: pd.DataFrame, table_name: str, database: str | None = None, schema: str | None = None
|
547
577
|
) -> int:
|
548
|
-
#
|
549
|
-
#
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
578
|
+
# Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
|
579
|
+
# Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
|
580
|
+
# and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
|
581
|
+
# For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
|
582
|
+
#
|
583
|
+
# When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
|
584
|
+
# varchar and value a string containing a struct representation. In order to support dicts with different keys
|
585
|
+
# we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
|
586
|
+
# copy and transform in python.
|
587
|
+
|
588
|
+
df = df.copy()
|
589
|
+
|
590
|
+
# Identify columns of type object
|
591
|
+
object_cols = df.select_dtypes(include=["object"]).columns
|
592
|
+
|
593
|
+
# Apply json.dumps to these columns
|
594
|
+
for col in object_cols:
|
595
|
+
# don't jsonify string
|
596
|
+
df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
|
597
|
+
|
598
|
+
self._duck_conn.execute(f"INSERT INTO {table_name}({','.join(df.columns.to_list())}) SELECT * FROM df")
|
554
599
|
return self._duck_conn.fetchall()[0][0]
|
555
600
|
|
556
601
|
|
fakesnow/fixtures.py
CHANGED
@@ -7,17 +7,17 @@ import fakesnow
|
|
7
7
|
|
8
8
|
@pytest.fixture
|
9
9
|
def _fakesnow() -> Iterator[None]:
|
10
|
-
with fakesnow.patch()
|
11
|
-
yield
|
10
|
+
with fakesnow.patch():
|
11
|
+
yield
|
12
12
|
|
13
13
|
|
14
14
|
@pytest.fixture
|
15
15
|
def _fakesnow_no_auto_create() -> Iterator[None]:
|
16
|
-
with fakesnow.patch(create_database_on_connect=False, create_schema_on_connect=False)
|
17
|
-
yield
|
16
|
+
with fakesnow.patch(create_database_on_connect=False, create_schema_on_connect=False):
|
17
|
+
yield
|
18
18
|
|
19
19
|
|
20
20
|
@pytest.fixture(scope="session")
|
21
21
|
def _fakesnow_session() -> Iterator[None]:
|
22
|
-
with fakesnow.patch()
|
23
|
-
yield
|
22
|
+
with fakesnow.patch():
|
23
|
+
yield
|
fakesnow/info_schema.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
"""Info schema extension tables/views used for storing snowflake metadata not captured by duckdb."""
|
2
|
+
from __future__ import annotations
|
2
3
|
|
3
4
|
from string import Template
|
4
5
|
|
5
6
|
# use ext prefix in columns to disambiguate when joining with information_schema.tables
|
6
7
|
SQL_CREATE_INFORMATION_SCHEMA_TABLES_EXT = Template(
|
7
8
|
"""
|
8
|
-
create table ${catalog}.information_schema.
|
9
|
+
create table if not exists ${catalog}.information_schema._fs_tables_ext (
|
9
10
|
ext_table_catalog varchar,
|
10
11
|
ext_table_schema varchar,
|
11
12
|
ext_table_name varchar,
|
@@ -17,7 +18,7 @@ create table ${catalog}.information_schema.tables_ext (
|
|
17
18
|
|
18
19
|
SQL_CREATE_INFORMATION_SCHEMA_COLUMNS_EXT = Template(
|
19
20
|
"""
|
20
|
-
create table ${catalog}.information_schema.
|
21
|
+
create table if not exists ${catalog}.information_schema._fs_columns_ext (
|
21
22
|
ext_table_catalog varchar,
|
22
23
|
ext_table_schema varchar,
|
23
24
|
ext_table_name varchar,
|
@@ -33,13 +34,14 @@ create table ${catalog}.information_schema.columns_ext (
|
|
33
34
|
# snowflake integers are 38 digits, base 10, See https://docs.snowflake.com/en/sql-reference/data-types-numeric
|
34
35
|
SQL_CREATE_INFORMATION_SCHEMA_COLUMNS_VIEW = Template(
|
35
36
|
"""
|
36
|
-
create view ${catalog}.information_schema.
|
37
|
+
create view if not exists ${catalog}.information_schema._fs_columns_snowflake AS
|
37
38
|
select table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable,
|
38
39
|
case when starts_with(data_type, 'DECIMAL') or data_type='BIGINT' then 'NUMBER'
|
39
40
|
when data_type='VARCHAR' then 'TEXT'
|
40
41
|
when data_type='DOUBLE' then 'FLOAT'
|
41
42
|
when data_type='BLOB' then 'BINARY'
|
42
43
|
when data_type='TIMESTAMP' then 'TIMESTAMP_NTZ'
|
44
|
+
when data_type='TIMESTAMP WITH TIME ZONE' then 'TIMESTAMP_TZ'
|
43
45
|
when data_type='JSON' then 'VARIANT'
|
44
46
|
else data_type end as data_type,
|
45
47
|
ext_character_maximum_length as character_maximum_length, ext_character_octet_length as character_octet_length,
|
@@ -52,7 +54,7 @@ case when data_type='BIGINT' then 10
|
|
52
54
|
case when data_type='DOUBLE' then NULL else numeric_scale end as numeric_scale,
|
53
55
|
collation_name, is_identity, identity_generation, identity_cycle
|
54
56
|
from ${catalog}.information_schema.columns
|
55
|
-
left join ${catalog}.information_schema.
|
57
|
+
left join ${catalog}.information_schema._fs_columns_ext ext
|
56
58
|
on ext_table_catalog = table_catalog AND ext_table_schema = table_schema
|
57
59
|
AND ext_table_name = table_name AND ext_column_name = column_name
|
58
60
|
"""
|
@@ -61,7 +63,7 @@ AND ext_table_name = table_name AND ext_column_name = column_name
|
|
61
63
|
# replicates https://docs.snowflake.com/sql-reference/info-schema/databases
|
62
64
|
SQL_CREATE_INFORMATION_SCHEMA_DATABASES_VIEW = Template(
|
63
65
|
"""
|
64
|
-
create view ${catalog}.information_schema.databases AS
|
66
|
+
create view if not exists ${catalog}.information_schema.databases AS
|
65
67
|
select
|
66
68
|
catalog_name as database_name,
|
67
69
|
'SYSADMIN' as database_owner,
|
@@ -88,7 +90,7 @@ def creation_sql(catalog: str) -> str:
|
|
88
90
|
|
89
91
|
def insert_table_comment_sql(catalog: str, schema: str, table: str, comment: str) -> str:
|
90
92
|
return f"""
|
91
|
-
INSERT INTO {catalog}.information_schema.
|
93
|
+
INSERT INTO {catalog}.information_schema._fs_tables_ext
|
92
94
|
values ('{catalog}', '{schema}', '{table}', '{comment}')
|
93
95
|
ON CONFLICT (ext_table_catalog, ext_table_schema, ext_table_name)
|
94
96
|
DO UPDATE SET comment = excluded.comment
|
@@ -102,7 +104,7 @@ def insert_text_lengths_sql(catalog: str, schema: str, table: str, text_lengths:
|
|
102
104
|
)
|
103
105
|
|
104
106
|
return f"""
|
105
|
-
INSERT INTO {catalog}.information_schema.
|
107
|
+
INSERT INTO {catalog}.information_schema._fs_columns_ext
|
106
108
|
values {values}
|
107
109
|
ON CONFLICT (ext_table_catalog, ext_table_schema, ext_table_name, ext_column_name)
|
108
110
|
DO UPDATE SET ext_character_maximum_length = excluded.ext_character_maximum_length,
|
fakesnow/macros.py
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
from string import Template
|
2
|
+
|
3
|
+
EQUAL_NULL = Template(
|
4
|
+
"""
|
5
|
+
CREATE MACRO IF NOT EXISTS ${catalog}.equal_null(a, b) AS a IS NOT DISTINCT FROM b;
|
6
|
+
"""
|
7
|
+
)
|
8
|
+
|
9
|
+
|
10
|
+
def creation_sql(catalog: str) -> str:
|
11
|
+
return f"""
|
12
|
+
{EQUAL_NULL.substitute(catalog=catalog)};
|
13
|
+
"""
|