PyPI - fakesnow - Versions diffs - 0.9.22__tar.gz → 0.9.24__tar.gz - Mend

fakesnow 0.9.22tar.gz → 0.9.24tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{fakesnow-0.9.22 → fakesnow-0.9.24}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fakesnow
-Version: 0.9.22
+Version: 0.9.24
 Summary: Fake Snowflake Connector for Python. Run, mock and test Snowflake DB locally.
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -216,6 +216,7 @@ Requires-Dist: snowflake-connector-python
 Requires-Dist: sqlglot~=25.9.0
 Provides-Extra: dev
 Requires-Dist: build~=1.0; extra == "dev"
+Requires-Dist: dirty-equals; extra == "dev"
 Requires-Dist: pandas-stubs; extra == "dev"
 Requires-Dist: snowflake-connector-python[pandas,secure-local-storage]; extra == "dev"
 Requires-Dist: pre-commit~=3.4; extra == "dev"

fakesnow-0.9.24/fakesnow/arrow.py ADDED Viewed

@@ -0,0 +1,67 @@
+from typing import Any
+import pyarrow as pa
+def with_sf_metadata(schema: pa.Schema) -> pa.Schema:
+    # see https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp#L32
+    # and https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/SnowflakeType.cpp#L10
+    fms = []
+    for i, t in enumerate(schema.types):
+        f = schema.field(i)
+        # TODO: precision, scale, charLength etc. for all types
+        if t == pa.bool_():
+            fm = f.with_metadata({"logicalType": "BOOLEAN"})
+        elif t == pa.int64():
+            # scale and precision required, see here
+            # https://github.com/snowflakedb/snowflake-connector-python/blob/416ff57/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowChunkIterator.cpp#L147
+            fm = f.with_metadata({"logicalType": "FIXED", "precision": "38", "scale": "0"})
+        elif t == pa.float64():
+            fm = f.with_metadata({"logicalType": "REAL"})
+        elif isinstance(t, pa.Decimal128Type):
+            fm = f.with_metadata({"logicalType": "FIXED", "precision": str(t.precision), "scale": str(t.scale)})
+        elif t == pa.string():
+            # TODO: set charLength to size of column
+            fm = f.with_metadata({"logicalType": "TEXT", "charLength": "16777216"})
+        else:
+            raise NotImplementedError(f"Unsupported Arrow type: {t}")
+        fms.append(fm)
+    return pa.schema(fms)
+def to_ipc(table: pa.Table) -> pa.Buffer:
+    batches = table.to_batches()
+    if len(batches) != 1:
+        raise NotImplementedError(f"{len(batches)} batches")
+    batch = batches[0]
+    sink = pa.BufferOutputStream()
+    with pa.ipc.new_stream(sink, with_sf_metadata(table.schema)) as writer:
+        writer.write_batch(batch)
+    return sink.getvalue()
+# TODO: should this be derived before with_schema?
+def to_rowtype(schema: pa.Schema) -> list[dict[str, Any]]:
+    return [
+        {
+            "name": f.name,
+            # TODO
+            # "database": "",
+            # "schema": "",
+            # "table": "",
+            "nullable": f.nullable,
+            "type": f.metadata.get(b"logicalType").decode("utf-8").lower(),  # type: ignore
+            # TODO
+            # "byteLength": 20,
+            "length": int(f.metadata.get(b"charLength")) if f.metadata.get(b"charLength") else None,  # type: ignore
+            "scale": int(f.metadata.get(b"scale")) if f.metadata.get(b"scale") else None,  # type: ignore
+            "precision": int(f.metadata.get(b"precision")) if f.metadata.get(b"precision") else None,  # type: ignore
+            "collation": None,
+        }
+        for f in schema
+    ]

fakesnow-0.9.24/fakesnow/conn.py ADDED Viewed

@@ -0,0 +1,147 @@
+from __future__ import annotations
+import os
+from collections.abc import Iterable
+from pathlib import Path
+from types import TracebackType
+from typing import Any
+import snowflake.connector.converter
+import snowflake.connector.errors
+import sqlglot
+from duckdb import DuckDBPyConnection
+from snowflake.connector.cursor import DictCursor, SnowflakeCursor
+from sqlglot import exp
+from typing_extensions import Self
+import fakesnow.info_schema as info_schema
+import fakesnow.macros as macros
+from fakesnow.cursor import FakeSnowflakeCursor
+from fakesnow.variables import Variables
+class FakeSnowflakeConnection:
+    def __init__(
+        self,
+        duck_conn: DuckDBPyConnection,
+        database: str | None = None,
+        schema: str | None = None,
+        create_database: bool = True,
+        create_schema: bool = True,
+        db_path: str | os.PathLike | None = None,
+        nop_regexes: list[str] | None = None,
+        *args: Any,
+        **kwargs: Any,
+    ):
+        self._duck_conn = duck_conn
+        self._is_closed = False
+        # upper case database and schema like snowflake unquoted identifiers
+        # so they appear as upper-cased in information_schema
+        # catalog and schema names are not actually case-sensitive in duckdb even though
+        # they are as cased in information_schema.schemata, so when selecting from
+        # information_schema.schemata below we use upper-case to match any existing duckdb
+        # catalog or schemas like "information_schema"
+        self.database = database and database.upper()
+        self.schema = schema and schema.upper()
+        self.database_set = False
+        self.schema_set = False
+        self.db_path = Path(db_path) if db_path else None
+        self.nop_regexes = nop_regexes
+        self._paramstyle = snowflake.connector.paramstyle
+        self.variables = Variables()
+        # create database if needed
+        if (
+            create_database
+            and self.database
+            and not duck_conn.execute(
+                f"""select * from information_schema.schemata
+                where upper(catalog_name) = '{self.database}'"""
+            ).fetchone()
+        ):
+            db_file = f"{self.db_path/self.database}.db" if self.db_path else ":memory:"
+            duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
+            duck_conn.execute(info_schema.creation_sql(self.database))
+            duck_conn.execute(macros.creation_sql(self.database))
+        # create schema if needed
+        if (
+            create_schema
+            and self.database
+            and self.schema
+            and not duck_conn.execute(
+                f"""select * from information_schema.schemata
+                where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
+            ).fetchone()
+        ):
+            duck_conn.execute(f"CREATE SCHEMA {self.database}.{self.schema}")
+        # set database and schema if both exist
+        if (
+            self.database
+            and self.schema
+            and duck_conn.execute(
+                f"""select * from information_schema.schemata
+                where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
+            ).fetchone()
+        ):
+            duck_conn.execute(f"SET schema='{self.database}.{self.schema}'")
+            self.database_set = True
+            self.schema_set = True
+        # set database if only that exists
+        elif (
+            self.database
+            and duck_conn.execute(
+                f"""select * from information_schema.schemata
+                where upper(catalog_name) = '{self.database}'"""
+            ).fetchone()
+        ):
+            duck_conn.execute(f"SET schema='{self.database}.main'")
+            self.database_set = True
+        # use UTC instead of local time zone for consistent testing
+        duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
+    def __enter__(self) -> Self:
+        return self
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        pass
+    def close(self, retry: bool = True) -> None:
+        self._duck_conn.close()
+        self._is_closed = True
+    def commit(self) -> None:
+        self.cursor().execute("COMMIT")
+    def cursor(self, cursor_class: type[SnowflakeCursor] = SnowflakeCursor) -> FakeSnowflakeCursor:
+        # TODO: use duck_conn cursor for thread-safety
+        return FakeSnowflakeCursor(conn=self, duck_conn=self._duck_conn, use_dict_result=cursor_class == DictCursor)
+    def execute_string(
+        self,
+        sql_text: str,
+        remove_comments: bool = False,
+        return_cursors: bool = True,
+        cursor_class: type[SnowflakeCursor] = SnowflakeCursor,
+        **kwargs: dict[str, Any],
+    ) -> Iterable[FakeSnowflakeCursor]:
+        cursors = [
+            self.cursor(cursor_class).execute(e.sql(dialect="snowflake"))
+            for e in sqlglot.parse(sql_text, read="snowflake")
+            if e and not isinstance(e, exp.Semicolon)  # ignore comments
+        ]
+        return cursors if return_cursors else []
+    def is_closed(self) -> bool:
+        return self._is_closed
+    def rollback(self) -> None:
+        self.cursor().execute("ROLLBACK")

fakesnow-0.9.22/fakesnow/fakes.py → fakesnow-0.9.24/fakesnow/cursor.py RENAMED Viewed

@@ -1,38 +1,38 @@
 from __future__ import annotations
-import json
 import os
 import re
 import sys
-from collections.abc import Iterable, Iterator, Sequence
-from pathlib import Path
+from collections.abc import Iterator, Sequence
 from string import Template
 from types import TracebackType
-from typing import TYPE_CHECKING, Any, Literal, Optional, cast
+from typing import TYPE_CHECKING, Any, cast
 import duckdb
-from sqlglot import exp
-if TYPE_CHECKING:
-    import pandas as pd
-    import pyarrow.lib
-import numpy as np
 import pyarrow
 import snowflake.connector.converter
 import snowflake.connector.errors
 import sqlglot
 from duckdb import DuckDBPyConnection
-from snowflake.connector.cursor import DictCursor, ResultMetadata, SnowflakeCursor
+from snowflake.connector.cursor import ResultMetadata
 from snowflake.connector.result_batch import ResultBatch
-from sqlglot import parse_one
+from sqlglot import exp, parse_one
 from typing_extensions import Self
 import fakesnow.checks as checks
 import fakesnow.expr as expr
 import fakesnow.info_schema as info_schema
-import fakesnow.macros as macros
 import fakesnow.transforms as transforms
-from fakesnow.variables import Variables
+from fakesnow.types import describe_as_result_metadata
+if TYPE_CHECKING:
+    # don't require pandas at import time
+    import pandas as pd
+    import pyarrow.lib
+    # avoid circular import
+    from fakesnow.conn import FakeSnowflakeConnection
 SCHEMA_UNSET = "schema_unset"
 SQL_SUCCESS = "SELECT 'Statement executed successfully.' as 'status'"
@@ -108,7 +108,7 @@ class FakeSnowflakeCursor:
         describe = f"DESCRIBE {command}"
         self.execute(describe, *args, **kwargs)
-        return FakeSnowflakeCursor._describe_as_result_metadata(self.fetchall())
+        return describe_as_result_metadata(self.fetchall())
     @property
     def description(self) -> list[ResultMetadata]:
@@ -116,7 +116,7 @@ class FakeSnowflakeCursor:
         with self._conn.cursor() as cur:
             expression = sqlglot.parse_one(f"DESCRIBE {self._last_sql}", read="duckdb")
             cur._execute(expression, self._last_params)  # noqa: SLF001
-            meta = FakeSnowflakeCursor._describe_as_result_metadata(cur.fetchall())
+            meta = describe_as_result_metadata(cur.fetchall())
         return meta
@@ -417,76 +417,6 @@ class FakeSnowflakeCursor:
     def sqlstate(self) -> str | None:
         return self._sqlstate
-    @staticmethod
-    def _describe_as_result_metadata(describe_results: list) -> list[ResultMetadata]:
-        # fmt: off
-        def as_result_metadata(column_name: str, column_type: str, _: str) -> ResultMetadata:
-            # see https://docs.snowflake.com/en/user-guide/python-connector-api.html#type-codes
-            # and https://arrow.apache.org/docs/python/api/datatypes.html#type-checking
-            if column_type in {"BIGINT", "INTEGER"}:
-                return ResultMetadata(
-                    name=column_name, type_code=0, display_size=None, internal_size=None, precision=38, scale=0, is_nullable=True               # noqa: E501
-                )
-            elif column_type.startswith("DECIMAL"):
-                match = re.search(r'\((\d+),(\d+)\)', column_type)
-                if match:
-                    precision = int(match[1])
-                    scale = int(match[2])
-                else:
-                    precision = scale = None
-                return ResultMetadata(
-                    name=column_name, type_code=0, display_size=None, internal_size=None, precision=precision, scale=scale, is_nullable=True    # noqa: E501
-                )
-            elif column_type == "VARCHAR":
-                # TODO: fetch internal_size from varchar size
-                return ResultMetadata(
-                    name=column_name, type_code=2, display_size=None, internal_size=16777216, precision=None, scale=None, is_nullable=True      # noqa: E501
-                )
-            elif column_type == "DOUBLE":
-                return ResultMetadata(
-                    name=column_name, type_code=1, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True          # noqa: E501
-                )
-            elif column_type == "BOOLEAN":
-                return ResultMetadata(
-                    name=column_name, type_code=13, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True         # noqa: E501
-                )
-            elif column_type == "DATE":
-                return ResultMetadata(
-                    name=column_name, type_code=3, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True          # noqa: E501
-                )
-            elif column_type in {"TIMESTAMP", "TIMESTAMP_NS"}:
-                return ResultMetadata(
-                    name=column_name, type_code=8, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True                # noqa: E501
-                )
-            elif column_type == "TIMESTAMP WITH TIME ZONE":
-                return ResultMetadata(
-                    name=column_name, type_code=7, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True                # noqa: E501
-                )
-            elif column_type == "BLOB":
-                return ResultMetadata(
-                    name=column_name, type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=True      # noqa: E501
-                )
-            elif column_type == "TIME":
-                return ResultMetadata(
-                    name=column_name, type_code=12, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True               # noqa: E501
-                )
-            elif column_type == "JSON":
-                # TODO: correctly map OBJECT and ARRAY see https://github.com/tekumara/fakesnow/issues/26
-                return ResultMetadata(
-                    name=column_name, type_code=5, display_size=None, internal_size=None, precision=None, scale=None, is_nullable=True               # noqa: E501
-                )
-            else:
-                # TODO handle more types
-                raise NotImplementedError(f"for column type {column_type}")
-        # fmt: on
-        meta = [
-            as_result_metadata(column_name, column_type, null)
-            for (column_name, column_type, null, _, _, _) in describe_results
-        ]
-        return meta
     def _rewrite_with_params(
         self,
         command: str,
@@ -511,154 +441,6 @@ class FakeSnowflakeCursor:
         return self._conn.variables.inline_variables(sql)
-class FakeSnowflakeConnection:
-    def __init__(
-        self,
-        duck_conn: DuckDBPyConnection,
-        database: str | None = None,
-        schema: str | None = None,
-        create_database: bool = True,
-        create_schema: bool = True,
-        db_path: str | os.PathLike | None = None,
-        nop_regexes: list[str] | None = None,
-        *args: Any,
-        **kwargs: Any,
-    ):
-        self._duck_conn = duck_conn
-        # upper case database and schema like snowflake unquoted identifiers
-        # so they appear as upper-cased in information_schema
-        # catalog and schema names are not actually case-sensitive in duckdb even though
-        # they are as cased in information_schema.schemata, so when selecting from
-        # information_schema.schemata below we use upper-case to match any existing duckdb
-        # catalog or schemas like "information_schema"
-        self.database = database and database.upper()
-        self.schema = schema and schema.upper()
-        self.database_set = False
-        self.schema_set = False
-        self.db_path = Path(db_path) if db_path else None
-        self.nop_regexes = nop_regexes
-        self._paramstyle = snowflake.connector.paramstyle
-        self.variables = Variables()
-        # create database if needed
-        if (
-            create_database
-            and self.database
-            and not duck_conn.execute(
-                f"""select * from information_schema.schemata
-                where upper(catalog_name) = '{self.database}'"""
-            ).fetchone()
-        ):
-            db_file = f"{self.db_path/self.database}.db" if self.db_path else ":memory:"
-            duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
-            duck_conn.execute(info_schema.creation_sql(self.database))
-            duck_conn.execute(macros.creation_sql(self.database))
-        # create schema if needed
-        if (
-            create_schema
-            and self.database
-            and self.schema
-            and not duck_conn.execute(
-                f"""select * from information_schema.schemata
-                where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
-            ).fetchone()
-        ):
-            duck_conn.execute(f"CREATE SCHEMA {self.database}.{self.schema}")
-        # set database and schema if both exist
-        if (
-            self.database
-            and self.schema
-            and duck_conn.execute(
-                f"""select * from information_schema.schemata
-                where upper(catalog_name) = '{self.database}' and upper(schema_name) = '{self.schema}'"""
-            ).fetchone()
-        ):
-            duck_conn.execute(f"SET schema='{self.database}.{self.schema}'")
-            self.database_set = True
-            self.schema_set = True
-        # set database if only that exists
-        elif (
-            self.database
-            and duck_conn.execute(
-                f"""select * from information_schema.schemata
-                where upper(catalog_name) = '{self.database}'"""
-            ).fetchone()
-        ):
-            duck_conn.execute(f"SET schema='{self.database}.main'")
-            self.database_set = True
-        # use UTC instead of local time zone for consistent testing
-        duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
-    def __enter__(self) -> Self:
-        return self
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc_value: BaseException | None,
-        traceback: TracebackType | None,
-    ) -> None:
-        pass
-    def close(self, retry: bool = True) -> None:
-        self._duck_conn.close()
-    def commit(self) -> None:
-        self.cursor().execute("COMMIT")
-    def cursor(self, cursor_class: type[SnowflakeCursor] = SnowflakeCursor) -> FakeSnowflakeCursor:
-        # TODO: use duck_conn cursor for thread-safety
-        return FakeSnowflakeCursor(conn=self, duck_conn=self._duck_conn, use_dict_result=cursor_class == DictCursor)
-    def execute_string(
-        self,
-        sql_text: str,
-        remove_comments: bool = False,
-        return_cursors: bool = True,
-        cursor_class: type[SnowflakeCursor] = SnowflakeCursor,
-        **kwargs: dict[str, Any],
-    ) -> Iterable[FakeSnowflakeCursor]:
-        cursors = [
-            self.cursor(cursor_class).execute(e.sql(dialect="snowflake"))
-            for e in sqlglot.parse(sql_text, read="snowflake")
-            if e and not isinstance(e, exp.Semicolon)  # ignore comments
-        ]
-        return cursors if return_cursors else []
-    def rollback(self) -> None:
-        self.cursor().execute("ROLLBACK")
-    def _insert_df(self, df: pd.DataFrame, table_name: str) -> int:
-        # Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
-        # Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
-        # and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
-        # For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
-        #
-        # When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
-        # varchar and value a string containing a struct representation. In order to support dicts with different keys
-        # we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
-        # copy and transform in python.
-        df = df.copy()
-        # Identify columns of type object
-        object_cols = df.select_dtypes(include=["object"]).columns
-        # Apply json.dumps to these columns
-        for col in object_cols:
-            # don't jsonify string
-            df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
-        escaped_cols = ",".join(f'"{col}"' for col in df.columns.to_list())
-        self._duck_conn.execute(f"INSERT INTO {table_name}({escaped_cols}) SELECT * FROM df")
-        return self._duck_conn.fetchall()[0][0]
 class FakeResultBatch(ResultBatch):
     def __init__(self, use_dict_result: bool, batch: pyarrow.RecordBatch):
         self._use_dict_result = use_dict_result
@@ -681,70 +463,3 @@ class FakeResultBatch(ResultBatch):
     def to_arrow(self) -> pyarrow.Table:
         raise NotImplementedError()
-CopyResult = tuple[
-    str,
-    str,
-    int,
-    int,
-    int,
-    int,
-    Optional[str],
-    Optional[int],
-    Optional[int],
-    Optional[str],
-]
-WritePandasResult = tuple[
-    bool,
-    int,
-    int,
-    Sequence[CopyResult],
-]
-def sql_type(dtype: np.dtype) -> str:
-    if str(dtype) == "int64":
-        return "NUMBER"
-    elif str(dtype) == "object":
-        return "VARCHAR"
-    else:
-        raise NotImplementedError(f"sql_type {dtype=}")
-def write_pandas(
-    conn: FakeSnowflakeConnection,
-    df: pd.DataFrame,
-    table_name: str,
-    database: str | None = None,
-    schema: str | None = None,
-    chunk_size: int | None = None,
-    compression: str = "gzip",
-    on_error: str = "abort_statement",
-    parallel: int = 4,
-    quote_identifiers: bool = True,
-    auto_create_table: bool = False,
-    create_temp_table: bool = False,
-    overwrite: bool = False,
-    table_type: Literal["", "temp", "temporary", "transient"] = "",
-    **kwargs: Any,
-) -> WritePandasResult:
-    name = table_name
-    if schema:
-        name = f"{schema}.{name}"
-    if database:
-        name = f"{database}.{name}"
-    if auto_create_table:
-        cols = [f"{c} {sql_type(t)}" for c, t in df.dtypes.to_dict().items()]
-        conn.cursor().execute(f"CREATE TABLE IF NOT EXISTS {name} ({','.join(cols)})")
-    count = conn._insert_df(df, name)  # noqa: SLF001
-    # mocks https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#output
-    mock_copy_results = [("fakesnow/file0.txt", "LOADED", count, count, 1, 0, None, None, None, None)]
-    # return success
-    return (True, len(mock_copy_results), count, mock_copy_results)

fakesnow-0.9.24/fakesnow/fakes.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .conn import FakeSnowflakeConnection as FakeSnowflakeConnection
+from .cursor import FakeSnowflakeCursor as FakeSnowflakeCursor
+from .pandas_tools import write_pandas as write_pandas

fakesnow 0.9.22__tar.gz → 0.9.24__tar.gz

fakesnow 0.9.22tar.gz → 0.9.24tar.gz