fakesnow 0.9.22__tar.gz → 0.9.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fakesnow-0.9.22 → fakesnow-0.9.24}/PKG-INFO +2 -1
- fakesnow-0.9.24/fakesnow/arrow.py +67 -0
- fakesnow-0.9.24/fakesnow/conn.py +147 -0
- fakesnow-0.9.22/fakesnow/fakes.py → fakesnow-0.9.24/fakesnow/cursor.py +16 -301
- fakesnow-0.9.24/fakesnow/fakes.py +3 -0
- fakesnow-0.9.24/fakesnow/pandas_tools.py +108 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/server.py +5 -11
- fakesnow-0.9.24/fakesnow/types.py +89 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow.egg-info/PKG-INFO +2 -1
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow.egg-info/SOURCES.txt +4 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow.egg-info/requires.txt +1 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/pyproject.toml +2 -1
- fakesnow-0.9.24/tests/test_arrow.py +99 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_fakes.py +6 -1
- fakesnow-0.9.24/tests/test_server.py +98 -0
- fakesnow-0.9.22/fakesnow/arrow.py +0 -32
- fakesnow-0.9.22/tests/test_arrow.py +0 -53
- fakesnow-0.9.22/tests/test_server.py +0 -67
- {fakesnow-0.9.22 → fakesnow-0.9.24}/LICENSE +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/README.md +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/__init__.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/__main__.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/checks.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/cli.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/expr.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/fixtures.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/info_schema.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/instance.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/macros.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/py.typed +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/transforms.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow/variables.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow.egg-info/dependency_links.txt +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow.egg-info/entry_points.txt +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/fakesnow.egg-info/top_level.txt +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/setup.cfg +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_checks.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_cli.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_connect.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_expr.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_info_schema.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_patch.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_sqlalchemy.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_transforms.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_users.py +0 -0
- {fakesnow-0.9.22 → fakesnow-0.9.24}/tests/test_write_pandas.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: fakesnow
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.24
|
4
4
|
Summary: Fake Snowflake Connector for Python. Run, mock and test Snowflake DB locally.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -216,6 +216,7 @@ Requires-Dist: snowflake-connector-python
|
|
216
216
|
Requires-Dist: sqlglot~=25.9.0
|
217
217
|
Provides-Extra: dev
|
218
218
|
Requires-Dist: build~=1.0; extra == "dev"
|
219
|
+
Requires-Dist: dirty-equals; extra == "dev"
|
219
220
|
Requires-Dist: pandas-stubs; extra == "dev"
|
220
221
|
Requires-Dist: snowflake-connector-python[pandas,secure-local-storage]; extra == "dev"
|
221
222
|
Requires-Dist: pre-commit~=3.4; extra == "dev"
|
@@ -0,0 +1,67 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
|
5
|
+
|
6
|
+
def with_sf_metadata(schema: pa.Schema) -> pa.Schema:
|
7
|
+
# see https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp#L32
|
8
|
+
# and https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/SnowflakeType.cpp#L10
|
9
|
+
fms = []
|
10
|
+
for i, t in enumerate(schema.types):
|
11
|
+
f = schema.field(i)
|
12
|
+
|
13
|
+
# TODO: precision, scale, charLength etc. for all types
|
14
|
+
|
15
|
+
if t == pa.bool_():
|
16
|
+
fm = f.with_metadata({"logicalType": "BOOLEAN"})
|
17
|
+
elif t == pa.int64():
|
18
|
+
# scale and precision required, see here
|
19
|
+
# https://github.com/snowflakedb/snowflake-connector-python/blob/416ff57/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowChunkIterator.cpp#L147
|
20
|
+
fm = f.with_metadata({"logicalType": "FIXED", "precision": "38", "scale": "0"})
|
21
|
+
elif t == pa.float64():
|
22
|
+
fm = f.with_metadata({"logicalType": "REAL"})
|
23
|
+
elif isinstance(t, pa.Decimal128Type):
|
24
|
+
fm = f.with_metadata({"logicalType": "FIXED", "precision": str(t.precision), "scale": str(t.scale)})
|
25
|
+
elif t == pa.string():
|
26
|
+
# TODO: set charLength to size of column
|
27
|
+
fm = f.with_metadata({"logicalType": "TEXT", "charLength": "16777216"})
|
28
|
+
else:
|
29
|
+
raise NotImplementedError(f"Unsupported Arrow type: {t}")
|
30
|
+
fms.append(fm)
|
31
|
+
return pa.schema(fms)
|
32
|
+
|
33
|
+
|
34
|
+
def to_ipc(table: pa.Table) -> pa.Buffer:
|
35
|
+
batches = table.to_batches()
|
36
|
+
if len(batches) != 1:
|
37
|
+
raise NotImplementedError(f"{len(batches)} batches")
|
38
|
+
batch = batches[0]
|
39
|
+
|
40
|
+
sink = pa.BufferOutputStream()
|
41
|
+
|
42
|
+
with pa.ipc.new_stream(sink, with_sf_metadata(table.schema)) as writer:
|
43
|
+
writer.write_batch(batch)
|
44
|
+
|
45
|
+
return sink.getvalue()
|
46
|
+
|
47
|
+
|
48
|
+
# TODO: should this be derived before with_schema?
|
49
|
+
def to_rowtype(schema: pa.Schema) -> list[dict[str, Any]]:
|
50
|
+
return [
|
51
|
+
{
|
52
|
+
"name": f.name,
|
53
|
+
# TODO
|
54
|
+
# "database": "",
|
55
|
+
# "schema": "",
|
56
|
+
# "table": "",
|
57
|
+
"nullable": f.nullable,
|
58
|
+
"type": f.metadata.get(b"logicalType").decode("utf-8").lower(), # type: ignore
|
59
|
+
# TODO
|
60
|
+
# "byteLength": 20,
|
61
|
+
"length": int(f.metadata.get(b"charLength")) if f.metadata.get(b"charLength") else None, # type: ignore
|
62
|
+
"scale": int(f.metadata.get(b"scale")) if f.metadata.get(b"scale") else None, # type: ignore
|
63
|
+
"precision": int(f.metadata.get(b"precision")) if f.metadata.get(b"precision") else None, # type: ignore
|
64
|
+
"collation": None,
|
65
|
+
}
|
66
|
+
for f in schema
|
67
|
+
]
|
@@ -0,0 +1,147 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
from collections.abc import Iterable
|
5
|
+
from pathlib import Path
|
6
|
+
from types import TracebackType
|
7
|
+
from typing import Any
|
8
|
+
|
9
|
+
import snowflake.connector.converter
|
10
|
+
import snowflake.connector.errors
|
11
|
+
import sqlglot
|
12
|
+
from duckdb import DuckDBPyConnection
|
13
|
+
from snowflake.connector.cursor import DictCursor, SnowflakeCursor
|
14
|
+
from sqlglot import exp
|
15
|
+
from typing_extensions import Self
|
16
|
+
|
17
|
+
import fakesnow.info_schema as info_schema
|
18
|
+
import fakesnow.macros as macros
|
19
|
+
from fakesnow.cursor import FakeSnowflakeCursor
|
20
|
+
from fakesnow.variables import Variables
|
21
|
+
|
22
|
+
|
23
|
+
class FakeSnowflakeConnection:
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
duck_conn: DuckDBPyConnection,
|
27
|
+
database: str | None = None,
|
28
|
+
schema: str | None = None,
|
29
|
+
create_database: bool = True,
|
30
|
+
create_schema: bool = True,
|
31
|
+
db_path: str | os.PathLike | None = None,
|
32
|
+
nop_regexes: list[str] | None = None,
|
33
|
+
*args: Any,
|
34
|
+
**kwargs: Any,
|
35
|
+
):
|
36
|
+
self._duck_conn = duck_conn
|
37
|
+
self._is_closed = False
|
38
|
+
# upper case database and schema like snowflake unquoted identifiers
|
39
|
+
# so they appear as upper-cased in information_schema
|
40
|
+
# catalog and schema names are not actually case-sensitive in duckdb even though
|
41
|
+
# they are as cased in information_schema.schemata, so when selecting from
|
42
|
+
# information_schema.schemata below we use upper-case to match any existing duckdb
|
43
|
+
# catalog or schemas like "information_schema"
|
44
|
+
self.database = database and database.upper()
|
45
|
+
self.schema = schema and schema.upper()
|
46
|
+
|
47
|
+
self.database_set = False
|
48
|
+
self.schema_set = False
|
49
|
+
self.db_path = Path(db_path) if db_path else None
|
50
|
+
self.nop_regexes = nop_regexes
|
51
|
+
self._paramstyle = snowflake.connector.paramstyle
|
52
|
+
self.variables = Variables()
|
53
|
+
|
54
|
+
# create database if needed
|
55
|
+
if (
|
56
|
+
create_database
|
57
|
+
and self.database
|
58
|
+
and not duck_conn.execute(
|
59
|
+
f"""select * from information_schema.schemata
|
60
|
+
where upper(catalog_name) = '{self.database}'"""
|
61
|
+
).fetchone()
|
62
|
+
):
|
63
|
+
db_file = f"{self.db_path/self.database}.db" if self.db_path else ":memory:"
|
64
|
+
duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
|
65
|
+
duck_conn.execute(info_schema.creation_sql(self.database))
|
66
|
+
duck_conn.execute(macros.creation_sql(self.database))
|
67
|
+
|
68
|
+
# create schema if needed
|
69
|
+
if (
|
70
|
+
create_schema
|
71
|
+
and self.database
|
72
|
+
and self.schema
|
73
|
+
and not duck_conn.execute(
|
74
|
+
f"""select * from information_schema.schemata
|
75
|
+
where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
|
76
|
+
).fetchone()
|
77
|
+
):
|
78
|
+
duck_conn.execute(f"CREATE SCHEMA {self.database}.{self.schema}")
|
79
|
+
|
80
|
+
# set database and schema if both exist
|
81
|
+
if (
|
82
|
+
self.database
|
83
|
+
and self.schema
|
84
|
+
and duck_conn.execute(
|
85
|
+
f"""select * from information_schema.schemata
|
86
|
+
where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
|
87
|
+
).fetchone()
|
88
|
+
):
|
89
|
+
duck_conn.execute(f"SET schema='{self.database}.{self.schema}'")
|
90
|
+
self.database_set = True
|
91
|
+
self.schema_set = True
|
92
|
+
# set database if only that exists
|
93
|
+
elif (
|
94
|
+
self.database
|
95
|
+
and duck_conn.execute(
|
96
|
+
f"""select * from information_schema.schemata
|
97
|
+
where upper(catalog_name) = '{self.database}'"""
|
98
|
+
).fetchone()
|
99
|
+
):
|
100
|
+
duck_conn.execute(f"SET schema='{self.database}.main'")
|
101
|
+
self.database_set = True
|
102
|
+
|
103
|
+
# use UTC instead of local time zone for consistent testing
|
104
|
+
duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
|
105
|
+
|
106
|
+
def __enter__(self) -> Self:
|
107
|
+
return self
|
108
|
+
|
109
|
+
def __exit__(
|
110
|
+
self,
|
111
|
+
exc_type: type[BaseException] | None,
|
112
|
+
exc_value: BaseException | None,
|
113
|
+
traceback: TracebackType | None,
|
114
|
+
) -> None:
|
115
|
+
pass
|
116
|
+
|
117
|
+
def close(self, retry: bool = True) -> None:
|
118
|
+
self._duck_conn.close()
|
119
|
+
self._is_closed = True
|
120
|
+
|
121
|
+
def commit(self) -> None:
|
122
|
+
self.cursor().execute("COMMIT")
|
123
|
+
|
124
|
+
def cursor(self, cursor_class: type[SnowflakeCursor] = SnowflakeCursor) -> FakeSnowflakeCursor:
|
125
|
+
# TODO: use duck_conn cursor for thread-safety
|
126
|
+
return FakeSnowflakeCursor(conn=self, duck_conn=self._duck_conn, use_dict_result=cursor_class == DictCursor)
|
127
|
+
|
128
|
+
def execute_string(
|
129
|
+
self,
|
130
|
+
sql_text: str,
|
131
|
+
remove_comments: bool = False,
|
132
|
+
return_cursors: bool = True,
|
133
|
+
cursor_class: type[SnowflakeCursor] = SnowflakeCursor,
|
134
|
+
**kwargs: dict[str, Any],
|
135
|
+
) -> Iterable[FakeSnowflakeCursor]:
|
136
|
+
cursors = [
|
137
|
+
self.cursor(cursor_class).execute(e.sql(dialect="snowflake"))
|
138
|
+
for e in sqlglot.parse(sql_text, read="snowflake")
|
139
|
+
if e and not isinstance(e, exp.Semicolon) # ignore comments
|
140
|
+
]
|
141
|
+
return cursors if return_cursors else []
|
142
|
+
|
143
|
+
def is_closed(self) -> bool:
|
144
|
+
return self._is_closed
|
145
|
+
|
146
|
+
def rollback(self) -> None:
|
147
|
+
self.cursor().execute("ROLLBACK")
|
@@ -1,38 +1,38 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import json
|
4
3
|
import os
|
5
4
|
import re
|
6
5
|
import sys
|
7
|
-
from collections.abc import
|
8
|
-
from pathlib import Path
|
6
|
+
from collections.abc import Iterator, Sequence
|
9
7
|
from string import Template
|
10
8
|
from types import TracebackType
|
11
|
-
from typing import TYPE_CHECKING, Any,
|
9
|
+
from typing import TYPE_CHECKING, Any, cast
|
12
10
|
|
13
11
|
import duckdb
|
14
|
-
from sqlglot import exp
|
15
|
-
|
16
|
-
if TYPE_CHECKING:
|
17
|
-
import pandas as pd
|
18
|
-
import pyarrow.lib
|
19
|
-
import numpy as np
|
20
12
|
import pyarrow
|
21
13
|
import snowflake.connector.converter
|
22
14
|
import snowflake.connector.errors
|
23
15
|
import sqlglot
|
24
16
|
from duckdb import DuckDBPyConnection
|
25
|
-
from snowflake.connector.cursor import
|
17
|
+
from snowflake.connector.cursor import ResultMetadata
|
26
18
|
from snowflake.connector.result_batch import ResultBatch
|
27
|
-
from sqlglot import parse_one
|
19
|
+
from sqlglot import exp, parse_one
|
28
20
|
from typing_extensions import Self
|
29
21
|
|
30
22
|
import fakesnow.checks as checks
|
31
23
|
import fakesnow.expr as expr
|
32
24
|
import fakesnow.info_schema as info_schema
|
33
|
-
import fakesnow.macros as macros
|
34
25
|
import fakesnow.transforms as transforms
|
35
|
-
from fakesnow.
|
26
|
+
from fakesnow.types import describe_as_result_metadata
|
27
|
+
|
28
|
+
if TYPE_CHECKING:
|
29
|
+
# don't require pandas at import time
|
30
|
+
import pandas as pd
|
31
|
+
import pyarrow.lib
|
32
|
+
|
33
|
+
# avoid circular import
|
34
|
+
from fakesnow.conn import FakeSnowflakeConnection
|
35
|
+
|
36
36
|
|
37
37
|
SCHEMA_UNSET = "schema_unset"
|
38
38
|
SQL_SUCCESS = "SELECT 'Statement executed successfully.' as 'status'"
|
@@ -108,7 +108,7 @@ class FakeSnowflakeCursor:
|
|
108
108
|
|
109
109
|
describe = f"DESCRIBE {command}"
|
110
110
|
self.execute(describe, *args, **kwargs)
|
111
|
-
return
|
111
|
+
return describe_as_result_metadata(self.fetchall())
|
112
112
|
|
113
113
|
@property
|
114
114
|
def description(self) -> list[ResultMetadata]:
|
@@ -116,7 +116,7 @@ class FakeSnowflakeCursor:
|
|
116
116
|
with self._conn.cursor() as cur:
|
117
117
|
expression = sqlglot.parse_one(f"DESCRIBE {self._last_sql}", read="duckdb")
|
118
118
|
cur._execute(expression, self._last_params) # noqa: SLF001
|
119
|
-
meta =
|
119
|
+
meta = describe_as_result_metadata(cur.fetchall())
|
120
120
|
|
121
121
|
return meta
|
122
122
|
|
@@ -417,76 +417,6 @@ class FakeSnowflakeCursor:
|
|
417
417
|
def sqlstate(self) -> str | None:
|
418
418
|
return self._sqlstate
|
419
419
|
|
420
|
-
@staticmethod
|
421
|
-
def _describe_as_result_metadata(describe_results: list) -> list[ResultMetadata]:
|
422
|
-
# fmt: off
|
423
|
-
def as_result_metadata(column_name: str, column_type: str, _: str) -> ResultMetadata:
|
424
|
-
# see https://docs.snowflake.com/en/user-guide/python-connector-api.html#type-codes
|
425
|
-
# and https://arrow.apache.org/docs/python/api/datatypes.html#type-checking
|
426
|
-
if column_type in {"BIGINT", "INTEGER"}:
|
427
|
-
return ResultMetadata(
|
428
|
-
name=column_name, type_code=0, display_size=None, internal_size=None, precision=38, scale=0, is_nullable=True # noqa: E501
|
429
|
-
)
|
430
|
-
elif column_type.startswith("DECIMAL"):
|
431
|
-
match = re.search(r'\((\d+),(\d+)\)', column_type)
|
432
|
-
if match:
|
433
|
-
precision = int(match[1])
|
434
|
-
scale = int(match[2])
|
435
|
-
else:
|
436
|
-
precision = scale = None
|
437
|
-
return ResultMetadata(
|
438
|
-
name=column_name, type_code=0, display_size=None, internal_size=None, precision=precision, scale=scale, is_nullable=True # noqa: E501
|
439
|
-
)
|
440
|
-
elif column_type == "VARCHAR":
|
441
|
-
# TODO: fetch internal_size from varchar size
|
442
|
-
return ResultMetadata(
|
443
|
-
name=column_name, type_code=2, display_size=None, internal_size=16777216, precision=None, scale=None, is_nullable=True # noqa: E501
|
444
|
-
)
|
445
|
-
elif column_type == "DOUBLE":
|
446
|
-
return ResultMetadata(
|
447
|
-
name=column_name, type_code=1, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
|
448
|
-
)
|
449
|
-
elif column_type == "BOOLEAN":
|
450
|
-
return ResultMetadata(
|
451
|
-
name=column_name, type_code=13, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
|
452
|
-
)
|
453
|
-
elif column_type == "DATE":
|
454
|
-
return ResultMetadata(
|
455
|
-
name=column_name, type_code=3, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
|
456
|
-
)
|
457
|
-
elif column_type in {"TIMESTAMP", "TIMESTAMP_NS"}:
|
458
|
-
return ResultMetadata(
|
459
|
-
name=column_name, type_code=8, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
|
460
|
-
)
|
461
|
-
elif column_type == "TIMESTAMP WITH TIME ZONE":
|
462
|
-
return ResultMetadata(
|
463
|
-
name=column_name, type_code=7, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
|
464
|
-
)
|
465
|
-
elif column_type == "BLOB":
|
466
|
-
return ResultMetadata(
|
467
|
-
name=column_name, type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=True # noqa: E501
|
468
|
-
)
|
469
|
-
elif column_type == "TIME":
|
470
|
-
return ResultMetadata(
|
471
|
-
name=column_name, type_code=12, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
|
472
|
-
)
|
473
|
-
elif column_type == "JSON":
|
474
|
-
# TODO: correctly map OBJECT and ARRAY see https://github.com/tekumara/fakesnow/issues/26
|
475
|
-
return ResultMetadata(
|
476
|
-
name=column_name, type_code=5, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
|
477
|
-
)
|
478
|
-
else:
|
479
|
-
# TODO handle more types
|
480
|
-
raise NotImplementedError(f"for column type {column_type}")
|
481
|
-
|
482
|
-
# fmt: on
|
483
|
-
|
484
|
-
meta = [
|
485
|
-
as_result_metadata(column_name, column_type, null)
|
486
|
-
for (column_name, column_type, null, _, _, _) in describe_results
|
487
|
-
]
|
488
|
-
return meta
|
489
|
-
|
490
420
|
def _rewrite_with_params(
|
491
421
|
self,
|
492
422
|
command: str,
|
@@ -511,154 +441,6 @@ class FakeSnowflakeCursor:
|
|
511
441
|
return self._conn.variables.inline_variables(sql)
|
512
442
|
|
513
443
|
|
514
|
-
class FakeSnowflakeConnection:
|
515
|
-
def __init__(
|
516
|
-
self,
|
517
|
-
duck_conn: DuckDBPyConnection,
|
518
|
-
database: str | None = None,
|
519
|
-
schema: str | None = None,
|
520
|
-
create_database: bool = True,
|
521
|
-
create_schema: bool = True,
|
522
|
-
db_path: str | os.PathLike | None = None,
|
523
|
-
nop_regexes: list[str] | None = None,
|
524
|
-
*args: Any,
|
525
|
-
**kwargs: Any,
|
526
|
-
):
|
527
|
-
self._duck_conn = duck_conn
|
528
|
-
# upper case database and schema like snowflake unquoted identifiers
|
529
|
-
# so they appear as upper-cased in information_schema
|
530
|
-
# catalog and schema names are not actually case-sensitive in duckdb even though
|
531
|
-
# they are as cased in information_schema.schemata, so when selecting from
|
532
|
-
# information_schema.schemata below we use upper-case to match any existing duckdb
|
533
|
-
# catalog or schemas like "information_schema"
|
534
|
-
self.database = database and database.upper()
|
535
|
-
self.schema = schema and schema.upper()
|
536
|
-
|
537
|
-
self.database_set = False
|
538
|
-
self.schema_set = False
|
539
|
-
self.db_path = Path(db_path) if db_path else None
|
540
|
-
self.nop_regexes = nop_regexes
|
541
|
-
self._paramstyle = snowflake.connector.paramstyle
|
542
|
-
self.variables = Variables()
|
543
|
-
|
544
|
-
# create database if needed
|
545
|
-
if (
|
546
|
-
create_database
|
547
|
-
and self.database
|
548
|
-
and not duck_conn.execute(
|
549
|
-
f"""select * from information_schema.schemata
|
550
|
-
where upper(catalog_name) = '{self.database}'"""
|
551
|
-
).fetchone()
|
552
|
-
):
|
553
|
-
db_file = f"{self.db_path/self.database}.db" if self.db_path else ":memory:"
|
554
|
-
duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
|
555
|
-
duck_conn.execute(info_schema.creation_sql(self.database))
|
556
|
-
duck_conn.execute(macros.creation_sql(self.database))
|
557
|
-
|
558
|
-
# create schema if needed
|
559
|
-
if (
|
560
|
-
create_schema
|
561
|
-
and self.database
|
562
|
-
and self.schema
|
563
|
-
and not duck_conn.execute(
|
564
|
-
f"""select * from information_schema.schemata
|
565
|
-
where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
|
566
|
-
).fetchone()
|
567
|
-
):
|
568
|
-
duck_conn.execute(f"CREATE SCHEMA {self.database}.{self.schema}")
|
569
|
-
|
570
|
-
# set database and schema if both exist
|
571
|
-
if (
|
572
|
-
self.database
|
573
|
-
and self.schema
|
574
|
-
and duck_conn.execute(
|
575
|
-
f"""select * from information_schema.schemata
|
576
|
-
where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
|
577
|
-
).fetchone()
|
578
|
-
):
|
579
|
-
duck_conn.execute(f"SET schema='{self.database}.{self.schema}'")
|
580
|
-
self.database_set = True
|
581
|
-
self.schema_set = True
|
582
|
-
# set database if only that exists
|
583
|
-
elif (
|
584
|
-
self.database
|
585
|
-
and duck_conn.execute(
|
586
|
-
f"""select * from information_schema.schemata
|
587
|
-
where upper(catalog_name) = '{self.database}'"""
|
588
|
-
).fetchone()
|
589
|
-
):
|
590
|
-
duck_conn.execute(f"SET schema='{self.database}.main'")
|
591
|
-
self.database_set = True
|
592
|
-
|
593
|
-
# use UTC instead of local time zone for consistent testing
|
594
|
-
duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
|
595
|
-
|
596
|
-
def __enter__(self) -> Self:
|
597
|
-
return self
|
598
|
-
|
599
|
-
def __exit__(
|
600
|
-
self,
|
601
|
-
exc_type: type[BaseException] | None,
|
602
|
-
exc_value: BaseException | None,
|
603
|
-
traceback: TracebackType | None,
|
604
|
-
) -> None:
|
605
|
-
pass
|
606
|
-
|
607
|
-
def close(self, retry: bool = True) -> None:
|
608
|
-
self._duck_conn.close()
|
609
|
-
|
610
|
-
def commit(self) -> None:
|
611
|
-
self.cursor().execute("COMMIT")
|
612
|
-
|
613
|
-
def cursor(self, cursor_class: type[SnowflakeCursor] = SnowflakeCursor) -> FakeSnowflakeCursor:
|
614
|
-
# TODO: use duck_conn cursor for thread-safety
|
615
|
-
return FakeSnowflakeCursor(conn=self, duck_conn=self._duck_conn, use_dict_result=cursor_class == DictCursor)
|
616
|
-
|
617
|
-
def execute_string(
|
618
|
-
self,
|
619
|
-
sql_text: str,
|
620
|
-
remove_comments: bool = False,
|
621
|
-
return_cursors: bool = True,
|
622
|
-
cursor_class: type[SnowflakeCursor] = SnowflakeCursor,
|
623
|
-
**kwargs: dict[str, Any],
|
624
|
-
) -> Iterable[FakeSnowflakeCursor]:
|
625
|
-
cursors = [
|
626
|
-
self.cursor(cursor_class).execute(e.sql(dialect="snowflake"))
|
627
|
-
for e in sqlglot.parse(sql_text, read="snowflake")
|
628
|
-
if e and not isinstance(e, exp.Semicolon) # ignore comments
|
629
|
-
]
|
630
|
-
return cursors if return_cursors else []
|
631
|
-
|
632
|
-
def rollback(self) -> None:
|
633
|
-
self.cursor().execute("ROLLBACK")
|
634
|
-
|
635
|
-
def _insert_df(self, df: pd.DataFrame, table_name: str) -> int:
|
636
|
-
# Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
|
637
|
-
# Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
|
638
|
-
# and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
|
639
|
-
# For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
|
640
|
-
#
|
641
|
-
# When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
|
642
|
-
# varchar and value a string containing a struct representation. In order to support dicts with different keys
|
643
|
-
# we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
|
644
|
-
# copy and transform in python.
|
645
|
-
|
646
|
-
df = df.copy()
|
647
|
-
|
648
|
-
# Identify columns of type object
|
649
|
-
object_cols = df.select_dtypes(include=["object"]).columns
|
650
|
-
|
651
|
-
# Apply json.dumps to these columns
|
652
|
-
for col in object_cols:
|
653
|
-
# don't jsonify string
|
654
|
-
df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
|
655
|
-
|
656
|
-
escaped_cols = ",".join(f'"{col}"' for col in df.columns.to_list())
|
657
|
-
self._duck_conn.execute(f"INSERT INTO {table_name}({escaped_cols}) SELECT * FROM df")
|
658
|
-
|
659
|
-
return self._duck_conn.fetchall()[0][0]
|
660
|
-
|
661
|
-
|
662
444
|
class FakeResultBatch(ResultBatch):
|
663
445
|
def __init__(self, use_dict_result: bool, batch: pyarrow.RecordBatch):
|
664
446
|
self._use_dict_result = use_dict_result
|
@@ -681,70 +463,3 @@ class FakeResultBatch(ResultBatch):
|
|
681
463
|
|
682
464
|
def to_arrow(self) -> pyarrow.Table:
|
683
465
|
raise NotImplementedError()
|
684
|
-
|
685
|
-
|
686
|
-
CopyResult = tuple[
|
687
|
-
str,
|
688
|
-
str,
|
689
|
-
int,
|
690
|
-
int,
|
691
|
-
int,
|
692
|
-
int,
|
693
|
-
Optional[str],
|
694
|
-
Optional[int],
|
695
|
-
Optional[int],
|
696
|
-
Optional[str],
|
697
|
-
]
|
698
|
-
|
699
|
-
WritePandasResult = tuple[
|
700
|
-
bool,
|
701
|
-
int,
|
702
|
-
int,
|
703
|
-
Sequence[CopyResult],
|
704
|
-
]
|
705
|
-
|
706
|
-
|
707
|
-
def sql_type(dtype: np.dtype) -> str:
|
708
|
-
if str(dtype) == "int64":
|
709
|
-
return "NUMBER"
|
710
|
-
elif str(dtype) == "object":
|
711
|
-
return "VARCHAR"
|
712
|
-
else:
|
713
|
-
raise NotImplementedError(f"sql_type {dtype=}")
|
714
|
-
|
715
|
-
|
716
|
-
def write_pandas(
|
717
|
-
conn: FakeSnowflakeConnection,
|
718
|
-
df: pd.DataFrame,
|
719
|
-
table_name: str,
|
720
|
-
database: str | None = None,
|
721
|
-
schema: str | None = None,
|
722
|
-
chunk_size: int | None = None,
|
723
|
-
compression: str = "gzip",
|
724
|
-
on_error: str = "abort_statement",
|
725
|
-
parallel: int = 4,
|
726
|
-
quote_identifiers: bool = True,
|
727
|
-
auto_create_table: bool = False,
|
728
|
-
create_temp_table: bool = False,
|
729
|
-
overwrite: bool = False,
|
730
|
-
table_type: Literal["", "temp", "temporary", "transient"] = "",
|
731
|
-
**kwargs: Any,
|
732
|
-
) -> WritePandasResult:
|
733
|
-
name = table_name
|
734
|
-
if schema:
|
735
|
-
name = f"{schema}.{name}"
|
736
|
-
if database:
|
737
|
-
name = f"{database}.{name}"
|
738
|
-
|
739
|
-
if auto_create_table:
|
740
|
-
cols = [f"{c} {sql_type(t)}" for c, t in df.dtypes.to_dict().items()]
|
741
|
-
|
742
|
-
conn.cursor().execute(f"CREATE TABLE IF NOT EXISTS {name} ({','.join(cols)})")
|
743
|
-
|
744
|
-
count = conn._insert_df(df, name) # noqa: SLF001
|
745
|
-
|
746
|
-
# mocks https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#output
|
747
|
-
mock_copy_results = [("fakesnow/file0.txt", "LOADED", count, count, 1, 0, None, None, None, None)]
|
748
|
-
|
749
|
-
# return success
|
750
|
-
return (True, len(mock_copy_results), count, mock_copy_results)
|