fakesnow 0.9.22__tar.gz → 0.9.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {fakesnow-0.9.22 → fakesnow-0.9.23}/PKG-INFO +2 -1
  2. fakesnow-0.9.23/fakesnow/arrow.py +67 -0
  3. fakesnow-0.9.23/fakesnow/conn.py +175 -0
  4. fakesnow-0.9.22/fakesnow/fakes.py → fakesnow-0.9.23/fakesnow/cursor.py +14 -301
  5. fakesnow-0.9.23/fakesnow/fakes.py +3 -0
  6. fakesnow-0.9.23/fakesnow/pandas_tools.py +77 -0
  7. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/server.py +5 -11
  8. fakesnow-0.9.23/fakesnow/types.py +89 -0
  9. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/PKG-INFO +2 -1
  10. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/SOURCES.txt +4 -0
  11. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/requires.txt +1 -0
  12. {fakesnow-0.9.22 → fakesnow-0.9.23}/pyproject.toml +2 -1
  13. fakesnow-0.9.23/tests/test_arrow.py +99 -0
  14. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_fakes.py +6 -1
  15. fakesnow-0.9.23/tests/test_server.py +98 -0
  16. fakesnow-0.9.22/fakesnow/arrow.py +0 -32
  17. fakesnow-0.9.22/tests/test_arrow.py +0 -53
  18. fakesnow-0.9.22/tests/test_server.py +0 -67
  19. {fakesnow-0.9.22 → fakesnow-0.9.23}/LICENSE +0 -0
  20. {fakesnow-0.9.22 → fakesnow-0.9.23}/README.md +0 -0
  21. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/__init__.py +0 -0
  22. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/__main__.py +0 -0
  23. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/checks.py +0 -0
  24. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/cli.py +0 -0
  25. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/expr.py +0 -0
  26. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/fixtures.py +0 -0
  27. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/info_schema.py +0 -0
  28. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/instance.py +0 -0
  29. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/macros.py +0 -0
  30. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/py.typed +0 -0
  31. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/transforms.py +0 -0
  32. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow/variables.py +0 -0
  33. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/dependency_links.txt +0 -0
  34. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/entry_points.txt +0 -0
  35. {fakesnow-0.9.22 → fakesnow-0.9.23}/fakesnow.egg-info/top_level.txt +0 -0
  36. {fakesnow-0.9.22 → fakesnow-0.9.23}/setup.cfg +0 -0
  37. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_checks.py +0 -0
  38. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_cli.py +0 -0
  39. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_connect.py +0 -0
  40. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_expr.py +0 -0
  41. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_info_schema.py +0 -0
  42. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_patch.py +0 -0
  43. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_sqlalchemy.py +0 -0
  44. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_transforms.py +0 -0
  45. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_users.py +0 -0
  46. {fakesnow-0.9.22 → fakesnow-0.9.23}/tests/test_write_pandas.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fakesnow
3
- Version: 0.9.22
3
+ Version: 0.9.23
4
4
  Summary: Fake Snowflake Connector for Python. Run, mock and test Snowflake DB locally.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -216,6 +216,7 @@ Requires-Dist: snowflake-connector-python
216
216
  Requires-Dist: sqlglot~=25.9.0
217
217
  Provides-Extra: dev
218
218
  Requires-Dist: build~=1.0; extra == "dev"
219
+ Requires-Dist: dirty-equals; extra == "dev"
219
220
  Requires-Dist: pandas-stubs; extra == "dev"
220
221
  Requires-Dist: snowflake-connector-python[pandas,secure-local-storage]; extra == "dev"
221
222
  Requires-Dist: pre-commit~=3.4; extra == "dev"
@@ -0,0 +1,67 @@
1
+ from typing import Any
2
+
3
+ import pyarrow as pa
4
+
5
+
6
+ def with_sf_metadata(schema: pa.Schema) -> pa.Schema:
7
+ # see https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp#L32
8
+ # and https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/SnowflakeType.cpp#L10
9
+ fms = []
10
+ for i, t in enumerate(schema.types):
11
+ f = schema.field(i)
12
+
13
+ # TODO: precision, scale, charLength etc. for all types
14
+
15
+ if t == pa.bool_():
16
+ fm = f.with_metadata({"logicalType": "BOOLEAN"})
17
+ elif t == pa.int64():
18
+ # scale and precision required, see here
19
+ # https://github.com/snowflakedb/snowflake-connector-python/blob/416ff57/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowChunkIterator.cpp#L147
20
+ fm = f.with_metadata({"logicalType": "FIXED", "precision": "38", "scale": "0"})
21
+ elif t == pa.float64():
22
+ fm = f.with_metadata({"logicalType": "REAL"})
23
+ elif isinstance(t, pa.Decimal128Type):
24
+ fm = f.with_metadata({"logicalType": "FIXED", "precision": str(t.precision), "scale": str(t.scale)})
25
+ elif t == pa.string():
26
+ # TODO: set charLength to size of column
27
+ fm = f.with_metadata({"logicalType": "TEXT", "charLength": "16777216"})
28
+ else:
29
+ raise NotImplementedError(f"Unsupported Arrow type: {t}")
30
+ fms.append(fm)
31
+ return pa.schema(fms)
32
+
33
+
34
+ def to_ipc(table: pa.Table) -> pa.Buffer:
35
+ batches = table.to_batches()
36
+ if len(batches) != 1:
37
+ raise NotImplementedError(f"{len(batches)} batches")
38
+ batch = batches[0]
39
+
40
+ sink = pa.BufferOutputStream()
41
+
42
+ with pa.ipc.new_stream(sink, with_sf_metadata(table.schema)) as writer:
43
+ writer.write_batch(batch)
44
+
45
+ return sink.getvalue()
46
+
47
+
48
+ # TODO: should this be derived before with_schema?
49
+ def to_rowtype(schema: pa.Schema) -> list[dict[str, Any]]:
50
+ return [
51
+ {
52
+ "name": f.name,
53
+ # TODO
54
+ # "database": "",
55
+ # "schema": "",
56
+ # "table": "",
57
+ "nullable": f.nullable,
58
+ "type": f.metadata.get(b"logicalType").decode("utf-8").lower(), # type: ignore
59
+ # TODO
60
+ # "byteLength": 20,
61
+ "length": int(f.metadata.get(b"charLength")) if f.metadata.get(b"charLength") else None, # type: ignore
62
+ "scale": int(f.metadata.get(b"scale")) if f.metadata.get(b"scale") else None, # type: ignore
63
+ "precision": int(f.metadata.get(b"precision")) if f.metadata.get(b"precision") else None, # type: ignore
64
+ "collation": None,
65
+ }
66
+ for f in schema
67
+ ]
@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from collections.abc import Iterable
6
+ from pathlib import Path
7
+ from types import TracebackType
8
+ from typing import Any
9
+
10
+ import pandas as pd
11
+ import snowflake.connector.converter
12
+ import snowflake.connector.errors
13
+ import sqlglot
14
+ from duckdb import DuckDBPyConnection
15
+ from snowflake.connector.cursor import DictCursor, SnowflakeCursor
16
+ from sqlglot import exp
17
+ from typing_extensions import Self
18
+
19
+ import fakesnow.info_schema as info_schema
20
+ import fakesnow.macros as macros
21
+ from fakesnow.cursor import FakeSnowflakeCursor
22
+ from fakesnow.variables import Variables
23
+
24
+
25
+ class FakeSnowflakeConnection:
26
+ def __init__(
27
+ self,
28
+ duck_conn: DuckDBPyConnection,
29
+ database: str | None = None,
30
+ schema: str | None = None,
31
+ create_database: bool = True,
32
+ create_schema: bool = True,
33
+ db_path: str | os.PathLike | None = None,
34
+ nop_regexes: list[str] | None = None,
35
+ *args: Any,
36
+ **kwargs: Any,
37
+ ):
38
+ self._duck_conn = duck_conn
39
+ self._is_closed = False
40
+ # upper case database and schema like snowflake unquoted identifiers
41
+ # so they appear as upper-cased in information_schema
42
+ # catalog and schema names are not actually case-sensitive in duckdb even though
43
+ # they are as cased in information_schema.schemata, so when selecting from
44
+ # information_schema.schemata below we use upper-case to match any existing duckdb
45
+ # catalog or schemas like "information_schema"
46
+ self.database = database and database.upper()
47
+ self.schema = schema and schema.upper()
48
+
49
+ self.database_set = False
50
+ self.schema_set = False
51
+ self.db_path = Path(db_path) if db_path else None
52
+ self.nop_regexes = nop_regexes
53
+ self._paramstyle = snowflake.connector.paramstyle
54
+ self.variables = Variables()
55
+
56
+ # create database if needed
57
+ if (
58
+ create_database
59
+ and self.database
60
+ and not duck_conn.execute(
61
+ f"""select * from information_schema.schemata
62
+ where upper(catalog_name) = '{self.database}'"""
63
+ ).fetchone()
64
+ ):
65
+ db_file = f"{self.db_path/self.database}.db" if self.db_path else ":memory:"
66
+ duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
67
+ duck_conn.execute(info_schema.creation_sql(self.database))
68
+ duck_conn.execute(macros.creation_sql(self.database))
69
+
70
+ # create schema if needed
71
+ if (
72
+ create_schema
73
+ and self.database
74
+ and self.schema
75
+ and not duck_conn.execute(
76
+ f"""select * from information_schema.schemata
77
+ where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
78
+ ).fetchone()
79
+ ):
80
+ duck_conn.execute(f"CREATE SCHEMA {self.database}.{self.schema}")
81
+
82
+ # set database and schema if both exist
83
+ if (
84
+ self.database
85
+ and self.schema
86
+ and duck_conn.execute(
87
+ f"""select * from information_schema.schemata
88
+ where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
89
+ ).fetchone()
90
+ ):
91
+ duck_conn.execute(f"SET schema='{self.database}.{self.schema}'")
92
+ self.database_set = True
93
+ self.schema_set = True
94
+ # set database if only that exists
95
+ elif (
96
+ self.database
97
+ and duck_conn.execute(
98
+ f"""select * from information_schema.schemata
99
+ where upper(catalog_name) = '{self.database}'"""
100
+ ).fetchone()
101
+ ):
102
+ duck_conn.execute(f"SET schema='{self.database}.main'")
103
+ self.database_set = True
104
+
105
+ # use UTC instead of local time zone for consistent testing
106
+ duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
107
+
108
+ def __enter__(self) -> Self:
109
+ return self
110
+
111
+ def __exit__(
112
+ self,
113
+ exc_type: type[BaseException] | None,
114
+ exc_value: BaseException | None,
115
+ traceback: TracebackType | None,
116
+ ) -> None:
117
+ pass
118
+
119
+ def close(self, retry: bool = True) -> None:
120
+ self._duck_conn.close()
121
+ self._is_closed = True
122
+
123
+ def commit(self) -> None:
124
+ self.cursor().execute("COMMIT")
125
+
126
+ def cursor(self, cursor_class: type[SnowflakeCursor] = SnowflakeCursor) -> FakeSnowflakeCursor:
127
+ # TODO: use duck_conn cursor for thread-safety
128
+ return FakeSnowflakeCursor(conn=self, duck_conn=self._duck_conn, use_dict_result=cursor_class == DictCursor)
129
+
130
+ def execute_string(
131
+ self,
132
+ sql_text: str,
133
+ remove_comments: bool = False,
134
+ return_cursors: bool = True,
135
+ cursor_class: type[SnowflakeCursor] = SnowflakeCursor,
136
+ **kwargs: dict[str, Any],
137
+ ) -> Iterable[FakeSnowflakeCursor]:
138
+ cursors = [
139
+ self.cursor(cursor_class).execute(e.sql(dialect="snowflake"))
140
+ for e in sqlglot.parse(sql_text, read="snowflake")
141
+ if e and not isinstance(e, exp.Semicolon) # ignore comments
142
+ ]
143
+ return cursors if return_cursors else []
144
+
145
+ def is_closed(self) -> bool:
146
+ return self._is_closed
147
+
148
+ def rollback(self) -> None:
149
+ self.cursor().execute("ROLLBACK")
150
+
151
+ def _insert_df(self, df: pd.DataFrame, table_name: str) -> int:
152
+ # Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
153
+ # Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
154
+ # and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
155
+ # For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
156
+ #
157
+ # When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
158
+ # varchar and value a string containing a struct representation. In order to support dicts with different keys
159
+ # we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
160
+ # copy and transform in python.
161
+
162
+ df = df.copy()
163
+
164
+ # Identify columns of type object
165
+ object_cols = df.select_dtypes(include=["object"]).columns
166
+
167
+ # Apply json.dumps to these columns
168
+ for col in object_cols:
169
+ # don't jsonify string
170
+ df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
171
+
172
+ escaped_cols = ",".join(f'"{col}"' for col in df.columns.to_list())
173
+ self._duck_conn.execute(f"INSERT INTO {table_name}({escaped_cols}) SELECT * FROM df")
174
+
175
+ return self._duck_conn.fetchall()[0][0]
@@ -1,38 +1,36 @@
1
1
  from __future__ import annotations
2
2
 
3
- import json
4
3
  import os
5
4
  import re
6
5
  import sys
7
- from collections.abc import Iterable, Iterator, Sequence
8
- from pathlib import Path
6
+ from collections.abc import Iterator, Sequence
9
7
  from string import Template
10
8
  from types import TracebackType
11
- from typing import TYPE_CHECKING, Any, Literal, Optional, cast
9
+ from typing import TYPE_CHECKING, Any, cast
12
10
 
13
11
  import duckdb
14
- from sqlglot import exp
15
-
16
- if TYPE_CHECKING:
17
- import pandas as pd
18
- import pyarrow.lib
19
- import numpy as np
20
12
  import pyarrow
21
13
  import snowflake.connector.converter
22
14
  import snowflake.connector.errors
23
15
  import sqlglot
24
16
  from duckdb import DuckDBPyConnection
25
- from snowflake.connector.cursor import DictCursor, ResultMetadata, SnowflakeCursor
17
+ from snowflake.connector.cursor import ResultMetadata
26
18
  from snowflake.connector.result_batch import ResultBatch
27
- from sqlglot import parse_one
19
+ from sqlglot import exp, parse_one
28
20
  from typing_extensions import Self
29
21
 
30
22
  import fakesnow.checks as checks
31
23
  import fakesnow.expr as expr
32
24
  import fakesnow.info_schema as info_schema
33
- import fakesnow.macros as macros
34
25
  import fakesnow.transforms as transforms
35
- from fakesnow.variables import Variables
26
+ from fakesnow.types import describe_as_result_metadata
27
+
28
+ if TYPE_CHECKING:
29
+ import pandas as pd
30
+ import pyarrow.lib
31
+
32
+ from fakesnow.conn import FakeSnowflakeConnection
33
+
36
34
 
37
35
  SCHEMA_UNSET = "schema_unset"
38
36
  SQL_SUCCESS = "SELECT 'Statement executed successfully.' as 'status'"
@@ -108,7 +106,7 @@ class FakeSnowflakeCursor:
108
106
 
109
107
  describe = f"DESCRIBE {command}"
110
108
  self.execute(describe, *args, **kwargs)
111
- return FakeSnowflakeCursor._describe_as_result_metadata(self.fetchall())
109
+ return describe_as_result_metadata(self.fetchall())
112
110
 
113
111
  @property
114
112
  def description(self) -> list[ResultMetadata]:
@@ -116,7 +114,7 @@ class FakeSnowflakeCursor:
116
114
  with self._conn.cursor() as cur:
117
115
  expression = sqlglot.parse_one(f"DESCRIBE {self._last_sql}", read="duckdb")
118
116
  cur._execute(expression, self._last_params) # noqa: SLF001
119
- meta = FakeSnowflakeCursor._describe_as_result_metadata(cur.fetchall())
117
+ meta = describe_as_result_metadata(cur.fetchall())
120
118
 
121
119
  return meta
122
120
 
@@ -417,76 +415,6 @@ class FakeSnowflakeCursor:
417
415
  def sqlstate(self) -> str | None:
418
416
  return self._sqlstate
419
417
 
420
- @staticmethod
421
- def _describe_as_result_metadata(describe_results: list) -> list[ResultMetadata]:
422
- # fmt: off
423
- def as_result_metadata(column_name: str, column_type: str, _: str) -> ResultMetadata:
424
- # see https://docs.snowflake.com/en/user-guide/python-connector-api.html#type-codes
425
- # and https://arrow.apache.org/docs/python/api/datatypes.html#type-checking
426
- if column_type in {"BIGINT", "INTEGER"}:
427
- return ResultMetadata(
428
- name=column_name, type_code=0, display_size=None, internal_size=None, precision=38, scale=0, is_nullable=True # noqa: E501
429
- )
430
- elif column_type.startswith("DECIMAL"):
431
- match = re.search(r'\((\d+),(\d+)\)', column_type)
432
- if match:
433
- precision = int(match[1])
434
- scale = int(match[2])
435
- else:
436
- precision = scale = None
437
- return ResultMetadata(
438
- name=column_name, type_code=0, display_size=None, internal_size=None, precision=precision, scale=scale, is_nullable=True # noqa: E501
439
- )
440
- elif column_type == "VARCHAR":
441
- # TODO: fetch internal_size from varchar size
442
- return ResultMetadata(
443
- name=column_name, type_code=2, display_size=None, internal_size=16777216, precision=None, scale=None, is_nullable=True # noqa: E501
444
- )
445
- elif column_type == "DOUBLE":
446
- return ResultMetadata(
447
- name=column_name, type_code=1, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
448
- )
449
- elif column_type == "BOOLEAN":
450
- return ResultMetadata(
451
- name=column_name, type_code=13, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
452
- )
453
- elif column_type == "DATE":
454
- return ResultMetadata(
455
- name=column_name, type_code=3, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
456
- )
457
- elif column_type in {"TIMESTAMP", "TIMESTAMP_NS"}:
458
- return ResultMetadata(
459
- name=column_name, type_code=8, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
460
- )
461
- elif column_type == "TIMESTAMP WITH TIME ZONE":
462
- return ResultMetadata(
463
- name=column_name, type_code=7, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
464
- )
465
- elif column_type == "BLOB":
466
- return ResultMetadata(
467
- name=column_name, type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=True # noqa: E501
468
- )
469
- elif column_type == "TIME":
470
- return ResultMetadata(
471
- name=column_name, type_code=12, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True # noqa: E501
472
- )
473
- elif column_type == "JSON":
474
- # TODO: correctly map OBJECT and ARRAY see https://github.com/tekumara/fakesnow/issues/26
475
- return ResultMetadata(
476
- name=column_name, type_code=5, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True # noqa: E501
477
- )
478
- else:
479
- # TODO handle more types
480
- raise NotImplementedError(f"for column type {column_type}")
481
-
482
- # fmt: on
483
-
484
- meta = [
485
- as_result_metadata(column_name, column_type, null)
486
- for (column_name, column_type, null, _, _, _) in describe_results
487
- ]
488
- return meta
489
-
490
418
  def _rewrite_with_params(
491
419
  self,
492
420
  command: str,
@@ -511,154 +439,6 @@ class FakeSnowflakeCursor:
511
439
  return self._conn.variables.inline_variables(sql)
512
440
 
513
441
 
514
- class FakeSnowflakeConnection:
515
- def __init__(
516
- self,
517
- duck_conn: DuckDBPyConnection,
518
- database: str | None = None,
519
- schema: str | None = None,
520
- create_database: bool = True,
521
- create_schema: bool = True,
522
- db_path: str | os.PathLike | None = None,
523
- nop_regexes: list[str] | None = None,
524
- *args: Any,
525
- **kwargs: Any,
526
- ):
527
- self._duck_conn = duck_conn
528
- # upper case database and schema like snowflake unquoted identifiers
529
- # so they appear as upper-cased in information_schema
530
- # catalog and schema names are not actually case-sensitive in duckdb even though
531
- # they are as cased in information_schema.schemata, so when selecting from
532
- # information_schema.schemata below we use upper-case to match any existing duckdb
533
- # catalog or schemas like "information_schema"
534
- self.database = database and database.upper()
535
- self.schema = schema and schema.upper()
536
-
537
- self.database_set = False
538
- self.schema_set = False
539
- self.db_path = Path(db_path) if db_path else None
540
- self.nop_regexes = nop_regexes
541
- self._paramstyle = snowflake.connector.paramstyle
542
- self.variables = Variables()
543
-
544
- # create database if needed
545
- if (
546
- create_database
547
- and self.database
548
- and not duck_conn.execute(
549
- f"""select * from information_schema.schemata
550
- where upper(catalog_name) = '{self.database}'"""
551
- ).fetchone()
552
- ):
553
- db_file = f"{self.db_path/self.database}.db" if self.db_path else ":memory:"
554
- duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
555
- duck_conn.execute(info_schema.creation_sql(self.database))
556
- duck_conn.execute(macros.creation_sql(self.database))
557
-
558
- # create schema if needed
559
- if (
560
- create_schema
561
- and self.database
562
- and self.schema
563
- and not duck_conn.execute(
564
- f"""select * from information_schema.schemata
565
- where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
566
- ).fetchone()
567
- ):
568
- duck_conn.execute(f"CREATE SCHEMA {self.database}.{self.schema}")
569
-
570
- # set database and schema if both exist
571
- if (
572
- self.database
573
- and self.schema
574
- and duck_conn.execute(
575
- f"""select * from information_schema.schemata
576
- where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
577
- ).fetchone()
578
- ):
579
- duck_conn.execute(f"SET schema='{self.database}.{self.schema}'")
580
- self.database_set = True
581
- self.schema_set = True
582
- # set database if only that exists
583
- elif (
584
- self.database
585
- and duck_conn.execute(
586
- f"""select * from information_schema.schemata
587
- where upper(catalog_name) = '{self.database}'"""
588
- ).fetchone()
589
- ):
590
- duck_conn.execute(f"SET schema='{self.database}.main'")
591
- self.database_set = True
592
-
593
- # use UTC instead of local time zone for consistent testing
594
- duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
595
-
596
- def __enter__(self) -> Self:
597
- return self
598
-
599
- def __exit__(
600
- self,
601
- exc_type: type[BaseException] | None,
602
- exc_value: BaseException | None,
603
- traceback: TracebackType | None,
604
- ) -> None:
605
- pass
606
-
607
- def close(self, retry: bool = True) -> None:
608
- self._duck_conn.close()
609
-
610
- def commit(self) -> None:
611
- self.cursor().execute("COMMIT")
612
-
613
- def cursor(self, cursor_class: type[SnowflakeCursor] = SnowflakeCursor) -> FakeSnowflakeCursor:
614
- # TODO: use duck_conn cursor for thread-safety
615
- return FakeSnowflakeCursor(conn=self, duck_conn=self._duck_conn, use_dict_result=cursor_class == DictCursor)
616
-
617
- def execute_string(
618
- self,
619
- sql_text: str,
620
- remove_comments: bool = False,
621
- return_cursors: bool = True,
622
- cursor_class: type[SnowflakeCursor] = SnowflakeCursor,
623
- **kwargs: dict[str, Any],
624
- ) -> Iterable[FakeSnowflakeCursor]:
625
- cursors = [
626
- self.cursor(cursor_class).execute(e.sql(dialect="snowflake"))
627
- for e in sqlglot.parse(sql_text, read="snowflake")
628
- if e and not isinstance(e, exp.Semicolon) # ignore comments
629
- ]
630
- return cursors if return_cursors else []
631
-
632
- def rollback(self) -> None:
633
- self.cursor().execute("ROLLBACK")
634
-
635
- def _insert_df(self, df: pd.DataFrame, table_name: str) -> int:
636
- # Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
637
- # Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
638
- # and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
639
- # For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
640
- #
641
- # When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
642
- # varchar and value a string containing a struct representation. In order to support dicts with different keys
643
- # we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
644
- # copy and transform in python.
645
-
646
- df = df.copy()
647
-
648
- # Identify columns of type object
649
- object_cols = df.select_dtypes(include=["object"]).columns
650
-
651
- # Apply json.dumps to these columns
652
- for col in object_cols:
653
- # don't jsonify string
654
- df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
655
-
656
- escaped_cols = ",".join(f'"{col}"' for col in df.columns.to_list())
657
- self._duck_conn.execute(f"INSERT INTO {table_name}({escaped_cols}) SELECT * FROM df")
658
-
659
- return self._duck_conn.fetchall()[0][0]
660
-
661
-
662
442
  class FakeResultBatch(ResultBatch):
663
443
  def __init__(self, use_dict_result: bool, batch: pyarrow.RecordBatch):
664
444
  self._use_dict_result = use_dict_result
@@ -681,70 +461,3 @@ class FakeResultBatch(ResultBatch):
681
461
 
682
462
  def to_arrow(self) -> pyarrow.Table:
683
463
  raise NotImplementedError()
684
-
685
-
686
- CopyResult = tuple[
687
- str,
688
- str,
689
- int,
690
- int,
691
- int,
692
- int,
693
- Optional[str],
694
- Optional[int],
695
- Optional[int],
696
- Optional[str],
697
- ]
698
-
699
- WritePandasResult = tuple[
700
- bool,
701
- int,
702
- int,
703
- Sequence[CopyResult],
704
- ]
705
-
706
-
707
- def sql_type(dtype: np.dtype) -> str:
708
- if str(dtype) == "int64":
709
- return "NUMBER"
710
- elif str(dtype) == "object":
711
- return "VARCHAR"
712
- else:
713
- raise NotImplementedError(f"sql_type {dtype=}")
714
-
715
-
716
- def write_pandas(
717
- conn: FakeSnowflakeConnection,
718
- df: pd.DataFrame,
719
- table_name: str,
720
- database: str | None = None,
721
- schema: str | None = None,
722
- chunk_size: int | None = None,
723
- compression: str = "gzip",
724
- on_error: str = "abort_statement",
725
- parallel: int = 4,
726
- quote_identifiers: bool = True,
727
- auto_create_table: bool = False,
728
- create_temp_table: bool = False,
729
- overwrite: bool = False,
730
- table_type: Literal["", "temp", "temporary", "transient"] = "",
731
- **kwargs: Any,
732
- ) -> WritePandasResult:
733
- name = table_name
734
- if schema:
735
- name = f"{schema}.{name}"
736
- if database:
737
- name = f"{database}.{name}"
738
-
739
- if auto_create_table:
740
- cols = [f"{c} {sql_type(t)}" for c, t in df.dtypes.to_dict().items()]
741
-
742
- conn.cursor().execute(f"CREATE TABLE IF NOT EXISTS {name} ({','.join(cols)})")
743
-
744
- count = conn._insert_df(df, name) # noqa: SLF001
745
-
746
- # mocks https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#output
747
- mock_copy_results = [("fakesnow/file0.txt", "LOADED", count, count, 1, 0, None, None, None, None)]
748
-
749
- # return success
750
- return (True, len(mock_copy_results), count, mock_copy_results)
@@ -0,0 +1,3 @@
1
+ from .conn import FakeSnowflakeConnection as FakeSnowflakeConnection
2
+ from .cursor import FakeSnowflakeCursor as FakeSnowflakeCursor
3
+ from .pandas_tools import write_pandas as write_pandas