fakesnow 0.9.22__tar.gz → 0.9.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fakesnow-0.9.22 → fakesnow-0.9.23}/PKG-INFO +2 -1
- fakesnow-0.9.23/fakesnow/arrow.py +67 -0
- fakesnow-0.9.23/fakesnow/conn.py +175 -0
- fakesnow-0.9.22/fakesnow/fakes.py → fakesnow-0.9.23/fakesnow/cursor.py +14 -301
- fakesnow-0.9.23/fakesnow/fakes.py +3 -0
- fakesnow-0.9.23/fakesnow/pandas_tools.py +77 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/server.py +5 -11
- fakesnow-0.9.23/fakesnow/types.py +89 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/PKG-INFO +2 -1
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/SOURCES.txt +4 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/requires.txt +1 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/pyproject.toml +2 -1
- fakesnow-0.9.23/tests/test_arrow.py +99 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_fakes.py +6 -1
- fakesnow-0.9.23/tests/test_server.py +98 -0
- fakesnow-0.9.22/fakesnow/arrow.py +0 -32
- fakesnow-0.9.22/tests/test_arrow.py +0 -53
- fakesnow-0.9.22/tests/test_server.py +0 -67
- {fakesnow-0.9.22 → fakesnow-0.9.23}/LICENSE +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/README.md +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/__init__.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/__main__.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/checks.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/cli.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/expr.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/fixtures.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/info_schema.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/instance.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/macros.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/py.typed +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/transforms.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/variables.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/dependency_links.txt +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/entry_points.txt +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/top_level.txt +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/setup.cfg +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_checks.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_cli.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_connect.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_expr.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_info_schema.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_patch.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_sqlalchemy.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_transforms.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_users.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_write_pandas.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: fakesnow
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.23
|
4
4
|
Summary: Fake Snowflake Connector for Python. Run, mock and test Snowflake DB locally.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -216,6 +216,7 @@ Requires-Dist: snowflake-connector-python
|
|
216
216
|
Requires-Dist: sqlglot~=25.9.0
|
217
217
|
Provides-Extra: dev
|
218
218
|
Requires-Dist: build~=1.0; extra == "dev"
|
219
|
+
Requires-Dist: dirty-equals; extra == "dev"
|
219
220
|
Requires-Dist: pandas-stubs; extra == "dev"
|
220
221
|
Requires-Dist: snowflake-connector-python[pandas,secure-local-storage]; extra == "dev"
|
221
222
|
Requires-Dist: pre-commit~=3.4; extra == "dev"
|
@@ -0,0 +1,67 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
|
5
|
+
|
6
|
+
def with_sf_metadata(schema: pa.Schema) -> pa.Schema:
|
7
|
+
# see https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp#L32
|
8
|
+
# and https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/SnowflakeType.cpp#L10
|
9
|
+
fms = []
|
10
|
+
for i, t in enumerate(schema.types):
|
11
|
+
f = schema.field(i)
|
12
|
+
|
13
|
+
# TODO: precision, scale, charLength etc. for all types
|
14
|
+
|
15
|
+
if t == pa.bool_():
|
16
|
+
fm = f.with_metadata({"logicalType": "BOOLEAN"})
|
17
|
+
elif t == pa.int64():
|
18
|
+
# scale and precision required, see here
|
19
|
+
# https://github.com/snowflakedb/snowflake-connector-python/blob/416ff57/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowChunkIterator.cpp#L147
|
20
|
+
fm = f.with_metadata({"logicalType": "FIXED", "precision": "38", "scale": "0"})
|
21
|
+
elif t == pa.float64():
|
22
|
+
fm = f.with_metadata({"logicalType": "REAL"})
|
23
|
+
elif isinstance(t, pa.Decimal128Type):
|
24
|
+
fm = f.with_metadata({"logicalType": "FIXED", "precision": str(t.precision), "scale": str(t.scale)})
|
25
|
+
elif t == pa.string():
|
26
|
+
# TODO: set charLength to size of column
|
27
|
+
fm = f.with_metadata({"logicalType": "TEXT", "charLength": "16777216"})
|
28
|
+
else:
|
29
|
+
raise NotImplementedError(f"Unsupported Arrow type: {t}")
|
30
|
+
fms.append(fm)
|
31
|
+
return pa.schema(fms)
|
32
|
+
|
33
|
+
|
34
|
+
def to_ipc(table: pa.Table) -> pa.Buffer:
|
35
|
+
batches = table.to_batches()
|
36
|
+
if len(batches) != 1:
|
37
|
+
raise NotImplementedError(f"{len(batches)} batches")
|
38
|
+
batch = batches[0]
|
39
|
+
|
40
|
+
sink = pa.BufferOutputStream()
|
41
|
+
|
42
|
+
with pa.ipc.new_stream(sink, with_sf_metadata(table.schema)) as writer:
|
43
|
+
writer.write_batch(batch)
|
44
|
+
|
45
|
+
return sink.getvalue()
|
46
|
+
|
47
|
+
|
48
|
+
# TODO: should this be derived before with_schema?
|
49
|
+
def to_rowtype(schema: pa.Schema) -> list[dict[str, Any]]:
|
50
|
+
return [
|
51
|
+
{
|
52
|
+
"name": f.name,
|
53
|
+
# TODO
|
54
|
+
# "database": "",
|
55
|
+
# "schema": "",
|
56
|
+
# "table": "",
|
57
|
+
"nullable": f.nullable,
|
58
|
+
"type": f.metadata.get(b"logicalType").decode("utf-8").lower(), # type: ignore
|
59
|
+
# TODO
|
60
|
+
# "byteLength": 20,
|
61
|
+
"length": int(f.metadata.get(b"charLength")) if f.metadata.get(b"charLength") else None, # type: ignore
|
62
|
+
"scale": int(f.metadata.get(b"scale")) if f.metadata.get(b"scale") else None, # type: ignore
|
63
|
+
"precision": int(f.metadata.get(b"precision")) if f.metadata.get(b"precision") else None, # type: ignore
|
64
|
+
"collation": None,
|
65
|
+
}
|
66
|
+
for f in schema
|
67
|
+
]
|
@@ -0,0 +1,175 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import json
|
4
|
+
import os
|
5
|
+
from collections.abc import Iterable
|
6
|
+
from pathlib import Path
|
7
|
+
from types import TracebackType
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
import snowflake.connector.converter
|
12
|
+
import snowflake.connector.errors
|
13
|
+
import sqlglot
|
14
|
+
from duckdb import DuckDBPyConnection
|
15
|
+
from snowflake.connector.cursor import DictCursor, SnowflakeCursor
|
16
|
+
from sqlglot import exp
|
17
|
+
from typing_extensions import Self
|
18
|
+
|
19
|
+
import fakesnow.info_schema as info_schema
|
20
|
+
import fakesnow.macros as macros
|
21
|
+
from fakesnow.cursor import FakeSnowflakeCursor
|
22
|
+
from fakesnow.variables import Variables
|
23
|
+
|
24
|
+
|
25
|
+
class FakeSnowflakeConnection:
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
duck_conn: DuckDBPyConnection,
|
29
|
+
database: str | None = None,
|
30
|
+
schema: str | None = None,
|
31
|
+
create_database: bool = True,
|
32
|
+
create_schema: bool = True,
|
33
|
+
db_path: str | os.PathLike | None = None,
|
34
|
+
nop_regexes: list[str] | None = None,
|
35
|
+
*args: Any,
|
36
|
+
**kwargs: Any,
|
37
|
+
):
|
38
|
+
self._duck_conn = duck_conn
|
39
|
+
self._is_closed = False
|
40
|
+
# upper case database and schema like snowflake unquoted identifiers
|
41
|
+
# so they appear as upper-cased in information_schema
|
42
|
+
# catalog and schema names are not actually case-sensitive in duckdb even though
|
43
|
+
# they are as cased in information_schema.schemata, so when selecting from
|
44
|
+
# information_schema.schemata below we use upper-case to match any existing duckdb
|
45
|
+
# catalog or schemas like "information_schema"
|
46
|
+
self.database = database and database.upper()
|
47
|
+
self.schema = schema and schema.upper()
|
48
|
+
|
49
|
+
self.database_set = False
|
50
|
+
self.schema_set = False
|
51
|
+
self.db_path = Path(db_path) if db_path else None
|
52
|
+
self.nop_regexes = nop_regexes
|
53
|
+
self._paramstyle = snowflake.connector.paramstyle
|
54
|
+
self.variables = Variables()
|
55
|
+
|
56
|
+
# create database if needed
|
57
|
+
if (
|
58
|
+
create_database
|
59
|
+
and self.database
|
60
|
+
and not duck_conn.execute(
|
61
|
+
f"""select * from information_schema.schemata
|
62
|
+
where upper(catalog_name) = '{self.database}'"""
|
63
|
+
).fetchone()
|
64
|
+
):
|
65
|
+
db_file = f"{self.db_path/self.database}.db" if self.db_path else ":memory:"
|
66
|
+
duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
|
67
|
+
duck_conn.execute(info_schema.creation_sql(self.database))
|
68
|
+
duck_conn.execute(macros.creation_sql(self.database))
|
69
|
+
|
70
|
+
# create schema if needed
|
71
|
+
if (
|
72
|
+
create_schema
|
73
|
+
and self.database
|
74
|
+
and self.schema
|
75
|
+
and not duck_conn.execute(
|
76
|
+
f"""select * from information_schema.schemata
|
77
|
+
where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
|
78
|
+
).fetchone()
|
79
|
+
):
|
80
|
+
duck_conn.execute(f"CREATE SCHEMA {self.database}.{self.schema}")
|
81
|
+
|
82
|
+
# set database and schema if both exist
|
83
|
+
if (
|
84
|
+
self.database
|
85
|
+
and self.schema
|
86
|
+
and duck_conn.execute(
|
87
|
+
f"""select * from information_schema.schemata
|
88
|
+
where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
|
89
|
+
).fetchone()
|
90
|
+
):
|
91
|
+
duck_conn.execute(f"SET schema='{self.database}.{self.schema}'")
|
92
|
+
self.database_set = True
|
93
|
+
self.schema_set = True
|
94
|
+
# set database if only that exists
|
95
|
+
elif (
|
96
|
+
self.database
|
97
|
+
and duck_conn.execute(
|
98
|
+
f"""select * from information_schema.schemata
|
99
|
+
where upper(catalog_name) = '{self.database}'"""
|
100
|
+
).fetchone()
|
101
|
+
):
|
102
|
+
duck_conn.execute(f"SET schema='{self.database}.main'")
|
103
|
+
self.database_set = True
|
104
|
+
|
105
|
+
# use UTC instead of local time zone for consistent testing
|
106
|
+
duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
|
107
|
+
|
108
|
+
def __enter__(self) -> Self:
|
109
|
+
return self
|
110
|
+
|
111
|
+
def __exit__(
|
112
|
+
self,
|
113
|
+
exc_type: type[BaseException] | None,
|
114
|
+
exc_value: BaseException | None,
|
115
|
+
traceback: TracebackType | None,
|
116
|
+
) -> None:
|
117
|
+
pass
|
118
|
+
|
119
|
+
def close(self, retry: bool = True) -> None:
|
120
|
+
self._duck_conn.close()
|
121
|
+
self._is_closed = True
|
122
|
+
|
123
|
+
def commit(self) -> None:
|
124
|
+
self.cursor().execute("COMMIT")
|
125
|
+
|
126
|
+
def cursor(self, cursor_class: type[SnowflakeCursor] = SnowflakeCursor) -> FakeSnowflakeCursor:
|
127
|
+
# TODO: use duck_conn cursor for thread-safety
|
128
|
+
return FakeSnowflakeCursor(conn=self, duck_conn=self._duck_conn, use_dict_result=cursor_class == DictCursor)
|
129
|
+
|
130
|
+
def execute_string(
|
131
|
+
self,
|
132
|
+
sql_text: str,
|
133
|
+
remove_comments: bool = False,
|
134
|
+
return_cursors: bool = True,
|
135
|
+
cursor_class: type[SnowflakeCursor] = SnowflakeCursor,
|
136
|
+
**kwargs: dict[str, Any],
|
137
|
+
) -> Iterable[FakeSnowflakeCursor]:
|
138
|
+
cursors = [
|
139
|
+
self.cursor(cursor_class).execute(e.sql(dialect="snowflake"))
|
140
|
+
for e in sqlglot.parse(sql_text, read="snowflake")
|
141
|
+
if e and not isinstance(e, exp.Semicolon) # ignore comments
|
142
|
+
]
|
143
|
+
return cursors if return_cursors else []
|
144
|
+
|
145
|
+
def is_closed(self) -> bool:
|
146
|
+
return self._is_closed
|
147
|
+
|
148
|
+
def rollback(self) -> None:
|
149
|
+
self.cursor().execute("ROLLBACK")
|
150
|
+
|
151
|
+
def _insert_df(self, df: pd.DataFrame, table_name: str) -> int:
|
152
|
+
# Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
|
153
|
+
# Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
|
154
|
+
# and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
|
155
|
+
# For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
|
156
|
+
#
|
157
|
+
# When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
|
158
|
+
# varchar and value a string containing a struct representation. In order to support dicts with different keys
|
159
|
+
# we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
|
160
|
+
# copy and transform in python.
|
161
|
+
|
162
|
+
df = df.copy()
|
163
|
+
|
164
|
+
# Identify columns of type object
|
165
|
+
object_cols = df.select_dtypes(include=["object"]).columns
|
166
|
+
|
167
|
+
# Apply json.dumps to these columns
|
168
|
+
for col in object_cols:
|
169
|
+
# don't jsonify string
|
170
|
+
df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
|
171
|
+
|
172
|
+
escaped_cols = ",".join(f'"{col}"' for col in df.columns.to_list())
|
173
|
+
self._duck_conn.execute(f"INSERT INTO {table_name}({escaped_cols}) SELECT * FROM df")
|
174
|
+
|
175
|
+
return self._duck_conn.fetchall()[0][0]
|
@@ -1,38 +1,36 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import json
|
4
3
|
import os
|
5
4
|
import re
|
6
5
|
import sys
|
7
|
-
from collections.abc import
|
8
|
-
from pathlib import Path
|
6
|
+
from collections.abc import Iterator, Sequence
|
9
7
|
from string import Template
|
10
8
|
from types import TracebackType
|
11
|
-
from typing import TYPE_CHECKING, Any,
|
9
|
+
from typing import TYPE_CHECKING, Any, cast
|
12
10
|
|
13
11
|
import duckdb
|
14
|
-
from sqlglot import exp
|
15
|
-
|
16
|
-
if TYPE_CHECKING:
|
17
|
-
import pandas as pd
|
18
|
-
import pyarrow.lib
|
19
|
-
import numpy as np
|
20
12
|
import pyarrow
|
21
13
|
import snowflake.connector.converter
|
22
14
|
import snowflake.connector.errors
|
23
15
|
import sqlglot
|
24
16
|
from duckdb import DuckDBPyConnection
|
25
|
-
from snowflake.connector.cursor import
|
17
|
+
from snowflake.connector.cursor import ResultMetadata
|
26
18
|
from snowflake.connector.result_batch import ResultBatch
|
27
|
-
from sqlglot import parse_one
|
19
|
+
from sqlglot import exp, parse_one
|
28
20
|
from typing_extensions import Self
|
29
21
|
|
30
22
|
import fakesnow.checks as checks
|
31
23
|
import fakesnow.expr as expr
|
32
24
|
import fakesnow.info_schema as info_schema
|
33
|
-
import fakesnow.macros as macros
|
34
25
|
import fakesnow.transforms as transforms
|
35
|
-
from fakesnow.
|
26
|
+
from fakesnow.types import describe_as_result_metadata
|
27
|
+
|
28
|
+
if TYPE_CHECKING:
|
29
|
+
import pandas as pd
|
30
|
+
import pyarrow.lib
|
31
|
+
|
32
|
+
from fakesnow.conn import FakeSnowflakeConnection
|
33
|
+
|
36
34
|
|
37
35
|
SCHEMA_UNSET = "schema_unset"
|
38
36
|
SQL_SUCCESS = "SELECT 'Statement executed successfully.' as 'status'"
|
@@ -108,7 +106,7 @@ class FakeSnowflakeCursor:
|
|
108
106
|
|
109
107
|
describe = f"DESCRIBE {command}"
|
110
108
|
self.execute(describe, *args, **kwargs)
|
111
|
-
return
|
109
|
+
return describe_as_result_metadata(self.fetchall())
|
112
110
|
|
113
111
|
@property
|
114
112
|
def description(self) -> list[ResultMetadata]:
|
@@ -116,7 +114,7 @@ class FakeSnowflakeCursor:
|
|
116
114
|
with self._conn.cursor() as cur:
|
117
115
|
expression = sqlglot.parse_one(f"DESCRIBE {self._last_sql}", read="duckdb")
|
118
116
|
cur._execute(expression, self._last_params) # noqa: SLF001
|
119
|
-
meta =
|
117
|
+
meta = describe_as_result_metadata(cur.fetchall())
|
120
118
|
|
121
119
|
return meta
|
122
120
|
|
@@ -417,76 +415,6 @@ class FakeSnowflakeCursor:
|
|
417
415
|
def sqlstate(self) -> str | None:
|
418
416
|
return self._sqlstate
|
419
417
|
|
420
|
-
@staticmethod
|
421
|
-
def _describe_as_result_metadata(describe_results: list) -> list[ResultMetadata]:
|
422
|
-
# fmt: off
|
423
|
-
def as_result_metadata(column_name: str, column_type: str, _: str) -> ResultMetadata:
|
424
|
-
# see https://docs.snowflake.com/en/user-guide/python-connector-api.html#type-codes
|
425
|
-
# and https://arrow.apache.org/docs/python/api/datatypes.html#type-checking
|
426
|
-
if column_type in {"BIGINT", "INTEGER"}:
|
427
|
-
return ResultMetadata(
|
428
|
-
name=column_name, type_code=0, display_size=None, internal_size=None, precision=38, scale=0, is_nullable=True # noqa: E501
|
429
|
-
)
|
430
|
-
elif column_type.startswith("DECIMAL"):
|
431
|
-
match = re.search(r'\((\d+),(\d+)\)', column_type)
|
432
|
-
if match:
|
433
|
-
precision = int(match[1])
|
434
|
-
scale = int(match[2])
|
435
|
-
else:
|
436
|
-
precision = scale = None
|
437
|
-
return ResultMetadata(
|
438
|
-
name=column_name, type_code=0, display_size=None, internal_size=None, precision=precision, scale=scale, is_nullable=True # noqa: E501
|
439
|
-
)
|
440
|
-
elif column_type == "VARCHAR":
|
441
|
-
# TODO: fetch internal_size from varchar size
|
442
|
-
return ResultMetadata(
|
443
|
-
name=column_name, type_code=2, display_size=None, internal_size=16777216, precision=None, scale=None, is_nullable=True # noqa: E501
|
444
|
-
)
|
445
|
-
elif column_type == "DOUBLE":
|
446
|
-
return ResultMetadata(
|
447
|
-
name=column_name, type_code=1, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
|
448
|
-
)
|
449
|
-
elif column_type == "BOOLEAN":
|
450
|
-
return ResultMetadata(
|
451
|
-
name=column_name, type_code=13, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
|
452
|
-
)
|
453
|
-
elif column_type == "DATE":
|
454
|
-
return ResultMetadata(
|
455
|
-
name=column_name, type_code=3, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
|
456
|
-
)
|
457
|
-
elif column_type in {"TIMESTAMP", "TIMESTAMP_NS"}:
|
458
|
-
return ResultMetadata(
|
459
|
-
name=column_name, type_code=8, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
|
460
|
-
)
|
461
|
-
elif column_type == "TIMESTAMP WITH TIME ZONE":
|
462
|
-
return ResultMetadata(
|
463
|
-
name=column_name, type_code=7, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
|
464
|
-
)
|
465
|
-
elif column_type == "BLOB":
|
466
|
-
return ResultMetadata(
|
467
|
-
name=column_name, type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=True # noqa: E501
|
468
|
-
)
|
469
|
-
elif column_type == "TIME":
|
470
|
-
return ResultMetadata(
|
471
|
-
name=column_name, type_code=12, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
|
472
|
-
)
|
473
|
-
elif column_type == "JSON":
|
474
|
-
# TODO: correctly map OBJECT and ARRAY see https://github.com/tekumara/fakesnow/issues/26
|
475
|
-
return ResultMetadata(
|
476
|
-
name=column_name, type_code=5, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
|
477
|
-
)
|
478
|
-
else:
|
479
|
-
# TODO handle more types
|
480
|
-
raise NotImplementedError(f"for column type {column_type}")
|
481
|
-
|
482
|
-
# fmt: on
|
483
|
-
|
484
|
-
meta = [
|
485
|
-
as_result_metadata(column_name, column_type, null)
|
486
|
-
for (column_name, column_type, null, _, _, _) in describe_results
|
487
|
-
]
|
488
|
-
return meta
|
489
|
-
|
490
418
|
def _rewrite_with_params(
|
491
419
|
self,
|
492
420
|
command: str,
|
@@ -511,154 +439,6 @@ class FakeSnowflakeCursor:
|
|
511
439
|
return self._conn.variables.inline_variables(sql)
|
512
440
|
|
513
441
|
|
514
|
-
class FakeSnowflakeConnection:
|
515
|
-
def __init__(
|
516
|
-
self,
|
517
|
-
duck_conn: DuckDBPyConnection,
|
518
|
-
database: str | None = None,
|
519
|
-
schema: str | None = None,
|
520
|
-
create_database: bool = True,
|
521
|
-
create_schema: bool = True,
|
522
|
-
db_path: str | os.PathLike | None = None,
|
523
|
-
nop_regexes: list[str] | None = None,
|
524
|
-
*args: Any,
|
525
|
-
**kwargs: Any,
|
526
|
-
):
|
527
|
-
self._duck_conn = duck_conn
|
528
|
-
# upper case database and schema like snowflake unquoted identifiers
|
529
|
-
# so they appear as upper-cased in information_schema
|
530
|
-
# catalog and schema names are not actually case-sensitive in duckdb even though
|
531
|
-
# they are as cased in information_schema.schemata, so when selecting from
|
532
|
-
# information_schema.schemata below we use upper-case to match any existing duckdb
|
533
|
-
# catalog or schemas like "information_schema"
|
534
|
-
self.database = database and database.upper()
|
535
|
-
self.schema = schema and schema.upper()
|
536
|
-
|
537
|
-
self.database_set = False
|
538
|
-
self.schema_set = False
|
539
|
-
self.db_path = Path(db_path) if db_path else None
|
540
|
-
self.nop_regexes = nop_regexes
|
541
|
-
self._paramstyle = snowflake.connector.paramstyle
|
542
|
-
self.variables = Variables()
|
543
|
-
|
544
|
-
# create database if needed
|
545
|
-
if (
|
546
|
-
create_database
|
547
|
-
and self.database
|
548
|
-
and not duck_conn.execute(
|
549
|
-
f"""select * from information_schema.schemata
|
550
|
-
where upper(catalog_name) = '{self.database}'"""
|
551
|
-
).fetchone()
|
552
|
-
):
|
553
|
-
db_file = f"{self.db_path/self.database}.db" if self.db_path else ":memory:"
|
554
|
-
duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
|
555
|
-
duck_conn.execute(info_schema.creation_sql(self.database))
|
556
|
-
duck_conn.execute(macros.creation_sql(self.database))
|
557
|
-
|
558
|
-
# create schema if needed
|
559
|
-
if (
|
560
|
-
create_schema
|
561
|
-
and self.database
|
562
|
-
and self.schema
|
563
|
-
and not duck_conn.execute(
|
564
|
-
f"""select * from information_schema.schemata
|
565
|
-
where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
|
566
|
-
).fetchone()
|
567
|
-
):
|
568
|
-
duck_conn.execute(f"CREATE SCHEMA {self.database}.{self.schema}")
|
569
|
-
|
570
|
-
# set database and schema if both exist
|
571
|
-
if (
|
572
|
-
self.database
|
573
|
-
and self.schema
|
574
|
-
and duck_conn.execute(
|
575
|
-
f"""select * from information_schema.schemata
|
576
|
-
where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
|
577
|
-
).fetchone()
|
578
|
-
):
|
579
|
-
duck_conn.execute(f"SET schema='{self.database}.{self.schema}'")
|
580
|
-
self.database_set = True
|
581
|
-
self.schema_set = True
|
582
|
-
# set database if only that exists
|
583
|
-
elif (
|
584
|
-
self.database
|
585
|
-
and duck_conn.execute(
|
586
|
-
f"""select * from information_schema.schemata
|
587
|
-
where upper(catalog_name) = '{self.database}'"""
|
588
|
-
).fetchone()
|
589
|
-
):
|
590
|
-
duck_conn.execute(f"SET schema='{self.database}.main'")
|
591
|
-
self.database_set = True
|
592
|
-
|
593
|
-
# use UTC instead of local time zone for consistent testing
|
594
|
-
duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
|
595
|
-
|
596
|
-
def __enter__(self) -> Self:
|
597
|
-
return self
|
598
|
-
|
599
|
-
def __exit__(
|
600
|
-
self,
|
601
|
-
exc_type: type[BaseException] | None,
|
602
|
-
exc_value: BaseException | None,
|
603
|
-
traceback: TracebackType | None,
|
604
|
-
) -> None:
|
605
|
-
pass
|
606
|
-
|
607
|
-
def close(self, retry: bool = True) -> None:
|
608
|
-
self._duck_conn.close()
|
609
|
-
|
610
|
-
def commit(self) -> None:
|
611
|
-
self.cursor().execute("COMMIT")
|
612
|
-
|
613
|
-
def cursor(self, cursor_class: type[SnowflakeCursor] = SnowflakeCursor) -> FakeSnowflakeCursor:
|
614
|
-
# TODO: use duck_conn cursor for thread-safety
|
615
|
-
return FakeSnowflakeCursor(conn=self, duck_conn=self._duck_conn, use_dict_result=cursor_class == DictCursor)
|
616
|
-
|
617
|
-
def execute_string(
|
618
|
-
self,
|
619
|
-
sql_text: str,
|
620
|
-
remove_comments: bool = False,
|
621
|
-
return_cursors: bool = True,
|
622
|
-
cursor_class: type[SnowflakeCursor] = SnowflakeCursor,
|
623
|
-
**kwargs: dict[str, Any],
|
624
|
-
) -> Iterable[FakeSnowflakeCursor]:
|
625
|
-
cursors = [
|
626
|
-
self.cursor(cursor_class).execute(e.sql(dialect="snowflake"))
|
627
|
-
for e in sqlglot.parse(sql_text, read="snowflake")
|
628
|
-
if e and not isinstance(e, exp.Semicolon) # ignore comments
|
629
|
-
]
|
630
|
-
return cursors if return_cursors else []
|
631
|
-
|
632
|
-
def rollback(self) -> None:
|
633
|
-
self.cursor().execute("ROLLBACK")
|
634
|
-
|
635
|
-
def _insert_df(self, df: pd.DataFrame, table_name: str) -> int:
|
636
|
-
# Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
|
637
|
-
# Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
|
638
|
-
# and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
|
639
|
-
# For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
|
640
|
-
#
|
641
|
-
# When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
|
642
|
-
# varchar and value a string containing a struct representation. In order to support dicts with different keys
|
643
|
-
# we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
|
644
|
-
# copy and transform in python.
|
645
|
-
|
646
|
-
df = df.copy()
|
647
|
-
|
648
|
-
# Identify columns of type object
|
649
|
-
object_cols = df.select_dtypes(include=["object"]).columns
|
650
|
-
|
651
|
-
# Apply json.dumps to these columns
|
652
|
-
for col in object_cols:
|
653
|
-
# don't jsonify string
|
654
|
-
df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
|
655
|
-
|
656
|
-
escaped_cols = ",".join(f'"{col}"' for col in df.columns.to_list())
|
657
|
-
self._duck_conn.execute(f"INSERT INTO {table_name}({escaped_cols}) SELECT * FROM df")
|
658
|
-
|
659
|
-
return self._duck_conn.fetchall()[0][0]
|
660
|
-
|
661
|
-
|
662
442
|
class FakeResultBatch(ResultBatch):
|
663
443
|
def __init__(self, use_dict_result: bool, batch: pyarrow.RecordBatch):
|
664
444
|
self._use_dict_result = use_dict_result
|
@@ -681,70 +461,3 @@ class FakeResultBatch(ResultBatch):
|
|
681
461
|
|
682
462
|
def to_arrow(self) -> pyarrow.Table:
|
683
463
|
raise NotImplementedError()
|
684
|
-
|
685
|
-
|
686
|
-
CopyResult = tuple[
|
687
|
-
str,
|
688
|
-
str,
|
689
|
-
int,
|
690
|
-
int,
|
691
|
-
int,
|
692
|
-
int,
|
693
|
-
Optional[str],
|
694
|
-
Optional[int],
|
695
|
-
Optional[int],
|
696
|
-
Optional[str],
|
697
|
-
]
|
698
|
-
|
699
|
-
WritePandasResult = tuple[
|
700
|
-
bool,
|
701
|
-
int,
|
702
|
-
int,
|
703
|
-
Sequence[CopyResult],
|
704
|
-
]
|
705
|
-
|
706
|
-
|
707
|
-
def sql_type(dtype: np.dtype) -> str:
|
708
|
-
if str(dtype) == "int64":
|
709
|
-
return "NUMBER"
|
710
|
-
elif str(dtype) == "object":
|
711
|
-
return "VARCHAR"
|
712
|
-
else:
|
713
|
-
raise NotImplementedError(f"sql_type {dtype=}")
|
714
|
-
|
715
|
-
|
716
|
-
def write_pandas(
|
717
|
-
conn: FakeSnowflakeConnection,
|
718
|
-
df: pd.DataFrame,
|
719
|
-
table_name: str,
|
720
|
-
database: str | None = None,
|
721
|
-
schema: str | None = None,
|
722
|
-
chunk_size: int | None = None,
|
723
|
-
compression: str = "gzip",
|
724
|
-
on_error: str = "abort_statement",
|
725
|
-
parallel: int = 4,
|
726
|
-
quote_identifiers: bool = True,
|
727
|
-
auto_create_table: bool = False,
|
728
|
-
create_temp_table: bool = False,
|
729
|
-
overwrite: bool = False,
|
730
|
-
table_type: Literal["", "temp", "temporary", "transient"] = "",
|
731
|
-
**kwargs: Any,
|
732
|
-
) -> WritePandasResult:
|
733
|
-
name = table_name
|
734
|
-
if schema:
|
735
|
-
name = f"{schema}.{name}"
|
736
|
-
if database:
|
737
|
-
name = f"{database}.{name}"
|
738
|
-
|
739
|
-
if auto_create_table:
|
740
|
-
cols = [f"{c} {sql_type(t)}" for c, t in df.dtypes.to_dict().items()]
|
741
|
-
|
742
|
-
conn.cursor().execute(f"CREATE TABLE IF NOT EXISTS {name} ({','.join(cols)})")
|
743
|
-
|
744
|
-
count = conn._insert_df(df, name) # noqa: SLF001
|
745
|
-
|
746
|
-
# mocks https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#output
|
747
|
-
mock_copy_results = [("fakesnow/file0.txt", "LOADED", count, count, 1, 0, None, None, None, None)]
|
748
|
-
|
749
|
-
# return success
|
750
|
-
return (True, len(mock_copy_results), count, mock_copy_results)
|