fakesnow 0.9.22__py3-none-any.whl → 0.9.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fakesnow/arrow.py +38 -3
- fakesnow/conn.py +147 -0
- fakesnow/cursor.py +465 -0
- fakesnow/fakes.py +3 -750
- fakesnow/pandas_tools.py +108 -0
- fakesnow/server.py +5 -11
- fakesnow/types.py +89 -0
- {fakesnow-0.9.22.dist-info → fakesnow-0.9.24.dist-info}/METADATA +2 -1
- {fakesnow-0.9.22.dist-info → fakesnow-0.9.24.dist-info}/RECORD +13 -9
- {fakesnow-0.9.22.dist-info → fakesnow-0.9.24.dist-info}/LICENSE +0 -0
- {fakesnow-0.9.22.dist-info → fakesnow-0.9.24.dist-info}/WHEEL +0 -0
- {fakesnow-0.9.22.dist-info → fakesnow-0.9.24.dist-info}/entry_points.txt +0 -0
- {fakesnow-0.9.22.dist-info → fakesnow-0.9.24.dist-info}/top_level.txt +0 -0
fakesnow/pandas_tools.py
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import json
|
4
|
+
from collections.abc import Sequence
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
from duckdb import DuckDBPyConnection
|
9
|
+
|
10
|
+
from fakesnow.conn import FakeSnowflakeConnection
|
11
|
+
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
# don't require pandas at import time
|
14
|
+
import pandas as pd
|
15
|
+
|
16
|
+
|
17
|
+
CopyResult = tuple[
|
18
|
+
str,
|
19
|
+
str,
|
20
|
+
int,
|
21
|
+
int,
|
22
|
+
int,
|
23
|
+
int,
|
24
|
+
Optional[str],
|
25
|
+
Optional[int],
|
26
|
+
Optional[int],
|
27
|
+
Optional[str],
|
28
|
+
]
|
29
|
+
|
30
|
+
WritePandasResult = tuple[
|
31
|
+
bool,
|
32
|
+
int,
|
33
|
+
int,
|
34
|
+
Sequence[CopyResult],
|
35
|
+
]
|
36
|
+
|
37
|
+
|
38
|
+
def sql_type(dtype: np.dtype) -> str:
|
39
|
+
if str(dtype) == "int64":
|
40
|
+
return "NUMBER"
|
41
|
+
elif str(dtype) == "object":
|
42
|
+
return "VARCHAR"
|
43
|
+
else:
|
44
|
+
raise NotImplementedError(f"sql_type {dtype=}")
|
45
|
+
|
46
|
+
|
47
|
+
def write_pandas(
|
48
|
+
conn: FakeSnowflakeConnection,
|
49
|
+
df: pd.DataFrame,
|
50
|
+
table_name: str,
|
51
|
+
database: str | None = None,
|
52
|
+
schema: str | None = None,
|
53
|
+
chunk_size: int | None = None,
|
54
|
+
compression: str = "gzip",
|
55
|
+
on_error: str = "abort_statement",
|
56
|
+
parallel: int = 4,
|
57
|
+
quote_identifiers: bool = True,
|
58
|
+
auto_create_table: bool = False,
|
59
|
+
create_temp_table: bool = False,
|
60
|
+
overwrite: bool = False,
|
61
|
+
table_type: Literal["", "temp", "temporary", "transient"] = "",
|
62
|
+
**kwargs: Any,
|
63
|
+
) -> WritePandasResult:
|
64
|
+
name = table_name
|
65
|
+
if schema:
|
66
|
+
name = f"{schema}.{name}"
|
67
|
+
if database:
|
68
|
+
name = f"{database}.{name}"
|
69
|
+
|
70
|
+
if auto_create_table:
|
71
|
+
cols = [f"{c} {sql_type(t)}" for c, t in df.dtypes.to_dict().items()]
|
72
|
+
|
73
|
+
conn.cursor().execute(f"CREATE TABLE IF NOT EXISTS {name} ({','.join(cols)})")
|
74
|
+
|
75
|
+
count = _insert_df(conn._duck_conn, df, name) # noqa: SLF001
|
76
|
+
|
77
|
+
# mocks https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#output
|
78
|
+
mock_copy_results = [("fakesnow/file0.txt", "LOADED", count, count, 1, 0, None, None, None, None)]
|
79
|
+
|
80
|
+
# return success
|
81
|
+
return (True, len(mock_copy_results), count, mock_copy_results)
|
82
|
+
|
83
|
+
|
84
|
+
def _insert_df(duck_conn: DuckDBPyConnection, df: pd.DataFrame, table_name: str) -> int:
|
85
|
+
# Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
|
86
|
+
# Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
|
87
|
+
# and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
|
88
|
+
# For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
|
89
|
+
#
|
90
|
+
# When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
|
91
|
+
# varchar and value a string containing a struct representation. In order to support dicts with different keys
|
92
|
+
# we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
|
93
|
+
# copy and transform in python.
|
94
|
+
|
95
|
+
df = df.copy()
|
96
|
+
|
97
|
+
# Identify columns of type object
|
98
|
+
object_cols = df.select_dtypes(include=["object"]).columns
|
99
|
+
|
100
|
+
# Apply json.dumps to these columns
|
101
|
+
for col in object_cols:
|
102
|
+
# don't jsonify string
|
103
|
+
df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
|
104
|
+
|
105
|
+
escaped_cols = ",".join(f'"{col}"' for col in df.columns.to_list())
|
106
|
+
duck_conn.execute(f"INSERT INTO {table_name}({escaped_cols}) SELECT * FROM df")
|
107
|
+
|
108
|
+
return duck_conn.fetchall()[0][0]
|
fakesnow/server.py
CHANGED
@@ -12,7 +12,7 @@ from starlette.requests import Request
|
|
12
12
|
from starlette.responses import JSONResponse
|
13
13
|
from starlette.routing import Route
|
14
14
|
|
15
|
-
from fakesnow.arrow import to_ipc
|
15
|
+
from fakesnow.arrow import to_ipc, to_rowtype, with_sf_metadata
|
16
16
|
from fakesnow.fakes import FakeSnowflakeConnection
|
17
17
|
from fakesnow.instance import FakeSnow
|
18
18
|
|
@@ -52,19 +52,13 @@ async def query_request(request: Request) -> JSONResponse:
|
|
52
52
|
batch_bytes = to_ipc(cur._arrow_table) # noqa: SLF001
|
53
53
|
rowset_b64 = b64encode(batch_bytes).decode("utf-8")
|
54
54
|
|
55
|
+
# TODO: avoid calling with_sf_metadata twice
|
56
|
+
rowtype = to_rowtype(with_sf_metadata(cur._arrow_table.schema)) # noqa: SLF001
|
57
|
+
|
55
58
|
return JSONResponse(
|
56
59
|
{
|
57
60
|
"data": {
|
58
|
-
"rowtype":
|
59
|
-
{
|
60
|
-
"name": "'HELLO WORLD'",
|
61
|
-
"nullable": False,
|
62
|
-
"type": "text",
|
63
|
-
"length": 11,
|
64
|
-
"scale": None,
|
65
|
-
"precision": None,
|
66
|
-
}
|
67
|
-
],
|
61
|
+
"rowtype": rowtype,
|
68
62
|
"rowsetBase64": rowset_b64,
|
69
63
|
"total": 1,
|
70
64
|
"queryResultFormat": "arrow",
|
fakesnow/types.py
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
import re
|
2
|
+
from typing import Optional, TypedDict
|
3
|
+
|
4
|
+
from snowflake.connector.cursor import ResultMetadata
|
5
|
+
|
6
|
+
|
7
|
+
class ColumnInfo(TypedDict):
|
8
|
+
name: str
|
9
|
+
database: str
|
10
|
+
schema: str
|
11
|
+
table: str
|
12
|
+
nullable: bool
|
13
|
+
type: str
|
14
|
+
byteLength: Optional[int]
|
15
|
+
length: Optional[int]
|
16
|
+
scale: Optional[int]
|
17
|
+
precision: Optional[int]
|
18
|
+
collation: Optional[str]
|
19
|
+
|
20
|
+
|
21
|
+
duckdb_to_sf_type = {
|
22
|
+
"BIGINT": "fixed",
|
23
|
+
"BLOB": "binary",
|
24
|
+
"BOOLEAN": "boolean",
|
25
|
+
"DATE": "date",
|
26
|
+
"DECIMAL": "fixed",
|
27
|
+
"DOUBLE": "real",
|
28
|
+
"INTEGER": "fixed",
|
29
|
+
"JSON": "variant",
|
30
|
+
"TIME": "time",
|
31
|
+
"TIMESTAMP WITH TIME ZONE": "timestamp_tz",
|
32
|
+
"TIMESTAMP_NS": "timestamp_ntz",
|
33
|
+
"TIMESTAMP": "timestamp_ntz",
|
34
|
+
"VARCHAR": "text",
|
35
|
+
}
|
36
|
+
|
37
|
+
|
38
|
+
def describe_as_rowtype(describe_results: list) -> list[ColumnInfo]:
|
39
|
+
"""Convert duckdb column type to snowflake rowtype returned by the API."""
|
40
|
+
|
41
|
+
def as_column_info(column_name: str, column_type: str) -> ColumnInfo:
|
42
|
+
if not (sf_type := duckdb_to_sf_type.get("DECIMAL" if column_type.startswith("DECIMAL") else column_type)):
|
43
|
+
raise NotImplementedError(f"for column type {column_type}")
|
44
|
+
|
45
|
+
info: ColumnInfo = {
|
46
|
+
"name": column_name,
|
47
|
+
# TODO
|
48
|
+
"database": "",
|
49
|
+
"schema": "",
|
50
|
+
"table": "",
|
51
|
+
# TODO
|
52
|
+
"nullable": True,
|
53
|
+
"type": sf_type,
|
54
|
+
"byteLength": None,
|
55
|
+
"length": None,
|
56
|
+
"scale": None,
|
57
|
+
"precision": None,
|
58
|
+
"collation": None,
|
59
|
+
}
|
60
|
+
|
61
|
+
if column_type.startswith("DECIMAL"):
|
62
|
+
match = re.search(r"\((\d+),(\d+)\)", column_type)
|
63
|
+
info["precision"] = int(match[1]) if match else 38
|
64
|
+
info["scale"] = int(match[2]) if match else 0
|
65
|
+
elif sf_type == "fixed":
|
66
|
+
info["precision"] = 38
|
67
|
+
info["scale"] = 0
|
68
|
+
elif sf_type == "text":
|
69
|
+
# TODO: fetch actual varchar size
|
70
|
+
info["byteLength"] = 16777216
|
71
|
+
info["length"] = 16777216
|
72
|
+
elif sf_type.startswith("time"):
|
73
|
+
info["precision"] = 0
|
74
|
+
info["scale"] = 9
|
75
|
+
elif sf_type == "binary":
|
76
|
+
info["byteLength"] = 8388608
|
77
|
+
info["length"] = 8388608
|
78
|
+
|
79
|
+
return info
|
80
|
+
|
81
|
+
column_infos = [
|
82
|
+
as_column_info(column_name, column_type)
|
83
|
+
for (column_name, column_type, _null, _key, _default, _extra) in describe_results
|
84
|
+
]
|
85
|
+
return column_infos
|
86
|
+
|
87
|
+
|
88
|
+
def describe_as_result_metadata(describe_results: list) -> list[ResultMetadata]:
|
89
|
+
return [ResultMetadata.from_column(c) for c in describe_as_rowtype(describe_results)] # pyright: ignore[reportArgumentType]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: fakesnow
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.24
|
4
4
|
Summary: Fake Snowflake Connector for Python. Run, mock and test Snowflake DB locally.
|
5
5
|
License: Apache License
|
6
6
|
Version 2.0, January 2004
|
@@ -216,6 +216,7 @@ Requires-Dist: snowflake-connector-python
|
|
216
216
|
Requires-Dist: sqlglot~=25.9.0
|
217
217
|
Provides-Extra: dev
|
218
218
|
Requires-Dist: build~=1.0; extra == "dev"
|
219
|
+
Requires-Dist: dirty-equals; extra == "dev"
|
219
220
|
Requires-Dist: pandas-stubs; extra == "dev"
|
220
221
|
Requires-Dist: snowflake-connector-python[pandas,secure-local-storage]; extra == "dev"
|
221
222
|
Requires-Dist: pre-commit~=3.4; extra == "dev"
|
@@ -1,21 +1,25 @@
|
|
1
1
|
fakesnow/__init__.py,sha256=9tFJJKvowKNW3vfnlmza6hOLN1I52DwChgNc5Ew6CcA,3499
|
2
2
|
fakesnow/__main__.py,sha256=GDrGyNTvBFuqn_UfDjKs7b3LPtU6gDv1KwosVDrukIM,76
|
3
|
-
fakesnow/arrow.py,sha256=
|
3
|
+
fakesnow/arrow.py,sha256=WLkr1nEiNxUcPdzadKSM33sRAiQJsN6LvuzTVIsi3D0,2766
|
4
4
|
fakesnow/checks.py,sha256=-QMvdcrRbhN60rnzxLBJ0IkUBWyLR8gGGKKmCS0w9mA,2383
|
5
5
|
fakesnow/cli.py,sha256=9qfI-Ssr6mo8UmIlXkUAOz2z2YPBgDsrEVaZv9FjGFs,2201
|
6
|
+
fakesnow/conn.py,sha256=Gy_Z7BZRm5yMjV3x6hR4iegDQFdG9aJBjqWdc3iWYFU,5353
|
7
|
+
fakesnow/cursor.py,sha256=2PtW9hzfXs3mzv6BBxXLoS-pPtD4otrfQ2KnPNNanGI,19441
|
6
8
|
fakesnow/expr.py,sha256=CAxuYIUkwI339DQIBzvFF0F-m1tcVGKEPA5rDTzmH9A,892
|
7
|
-
fakesnow/fakes.py,sha256=
|
9
|
+
fakesnow/fakes.py,sha256=JQTiUkkwPeQrJ8FDWhPFPK6pGwd_aR2oiOrNzCWznlM,187
|
8
10
|
fakesnow/fixtures.py,sha256=G-NkVeruSQAJ7fvSS2fR2oysUn0Yra1pohHlOvacKEk,455
|
9
11
|
fakesnow/info_schema.py,sha256=DObVOrhzppAFHsdtj4YI9oRISn9SkJUG6ONjVleQQ_Y,6303
|
10
12
|
fakesnow/instance.py,sha256=3cJvPRuFy19dMKXbtBLl6imzO48pEw8uTYhZyFDuwhk,3133
|
11
13
|
fakesnow/macros.py,sha256=pX1YJDnQOkFJSHYUjQ6ErEkYIKvFI6Ncz_au0vv1csA,265
|
14
|
+
fakesnow/pandas_tools.py,sha256=WjyjTV8QUCQQaCGboaEOvx2uo4BkknpWYjtLwkeCY6U,3468
|
12
15
|
fakesnow/py.typed,sha256=B-DLSjYBi7pkKjwxCSdpVj2J02wgfJr-E7B1wOUyxYU,80
|
13
|
-
fakesnow/server.py,sha256=
|
16
|
+
fakesnow/server.py,sha256=8dzaLUUXPzCMm6-ESn0CBws6XSwwOpnUuHQAZJ-4SwU,3011
|
14
17
|
fakesnow/transforms.py,sha256=ellcY5OBc7mqgL9ChNolrqcCLWXF9RH21Jt88FcFl-I,54419
|
18
|
+
fakesnow/types.py,sha256=9Tt83Z7ctc9_v6SYyayXYz4MEI4RZo4zq_uqdj4g3Dk,2681
|
15
19
|
fakesnow/variables.py,sha256=WXyPnkeNwD08gy52yF66CVe2twiYC50tztNfgXV4q1k,3032
|
16
|
-
fakesnow-0.9.
|
17
|
-
fakesnow-0.9.
|
18
|
-
fakesnow-0.9.
|
19
|
-
fakesnow-0.9.
|
20
|
-
fakesnow-0.9.
|
21
|
-
fakesnow-0.9.
|
20
|
+
fakesnow-0.9.24.dist-info/LICENSE,sha256=kW-7NWIyaRMQiDpryfSmF2DObDZHGR1cJZ39s6B1Svg,11344
|
21
|
+
fakesnow-0.9.24.dist-info/METADATA,sha256=LHKc6JYn9sxxFh6_i7kqlWz1fmloFv2CCmpalwPVFrE,18064
|
22
|
+
fakesnow-0.9.24.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
23
|
+
fakesnow-0.9.24.dist-info/entry_points.txt,sha256=2riAUgu928ZIHawtO8EsfrMEJhi-EH-z_Vq7Q44xKPM,47
|
24
|
+
fakesnow-0.9.24.dist-info/top_level.txt,sha256=500evXI1IFX9so82cizGIEMHAb_dJNPaZvd2H9dcKTA,24
|
25
|
+
fakesnow-0.9.24.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|