PyPI - fakesnow - Versions diffs - 0.8.2__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

fakesnow 0.8.2py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

fakesnow/__init__.py +8 -2
fakesnow/checks.py +3 -3
fakesnow/cli.py +54 -12
fakesnow/fakes.py +135 -90
fakesnow/fixtures.py +6 -6
fakesnow/info_schema.py +9 -7
fakesnow/macros.py +13 -0
fakesnow/transforms.py +195 -32
fakesnow-0.9.1.dist-info/LICENSE +201 -0
fakesnow-0.9.1.dist-info/METADATA +362 -0
fakesnow-0.9.1.dist-info/RECORD +17 -0
fakesnow-0.8.2.dist-info/LICENSE +0 -20
fakesnow-0.8.2.dist-info/METADATA +0 -174
fakesnow-0.8.2.dist-info/RECORD +0 -16
{fakesnow-0.8.2.dist-info → fakesnow-0.9.1.dist-info}/WHEEL +0 -0
{fakesnow-0.8.2.dist-info → fakesnow-0.9.1.dist-info}/entry_points.txt +0 -0
{fakesnow-0.8.2.dist-info → fakesnow-0.9.1.dist-info}/top_level.txt +0 -0

fakesnow/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import contextlib
 import importlib
+import os
 import sys
 import unittest.mock as mock
 from collections.abc import Iterator, Sequence
@@ -19,6 +20,7 @@ def patch(
     extra_targets: str | Sequence[str] = [],
     create_database_on_connect: bool = True,
     create_schema_on_connect: bool = True,
+    db_path: str | os.PathLike | None = None,
 ) -> Iterator[None]:
     """Patch snowflake targets with fakes.
@@ -28,12 +30,15 @@ def patch(
     Args:
         extra_targets (str | Sequence[str], optional): Extra targets to patch. Defaults to [].
-        create_database_on_connect (bool, optional): Create database if provided in connection. Defaults to True.
-        create_schema_on_connect (bool, optional): Create schema if provided in connection. Defaults to True.
         Allows extra targets beyond the standard snowflake.connector targets to be patched. Needed because we cannot
         patch definitions, only usages, see https://docs.python.org/3/library/unittest.mock.html#where-to-patch
+        create_database_on_connect (bool, optional): Create database if provided in connection. Defaults to True.
+        create_schema_on_connect (bool, optional): Create schema if provided in connection. Defaults to True.
+        db_path (str | os.PathLike | None, optional): _description_. Use existing database files from this path
+            or create them here if they don't already exist. If None databases are in-memory. Defaults to None.
     Yields:
         Iterator[None]: None.
     """
@@ -51,6 +56,7 @@ def patch(
             duck_conn.cursor(),
             create_database=create_database_on_connect,
             create_schema=create_schema_on_connect,
+            db_path=db_path,
             **kwargs,
         ),
         snowflake.connector.pandas_tools.write_pandas: fakes.write_pandas,

fakesnow/checks.py CHANGED Viewed

@@ -37,10 +37,10 @@ def is_unqualified_table_expression(expression: exp.Expression) -> tuple[bool, b
             no_schema = False
         elif parent_kind.upper() == "SCHEMA":
             # "CREATE/DROP SCHEMA"
-            no_database = not node.args.get("db")
+            no_database = not node.args.get("catalog")
             no_schema = False
-        elif parent_kind.upper() == "TABLE":
-            # "DROP TABLE"
+        elif parent_kind.upper() in {"TABLE", "VIEW"}:
+            # "CREATE/DROP TABLE/VIEW"
             no_database = not node.args.get("catalog")
             no_schema = not node.args.get("db")
         else:

fakesnow/cli.py CHANGED Viewed

@@ -1,28 +1,70 @@
+import argparse
 import runpy
 import sys
 from collections.abc import Sequence
 import fakesnow
-USAGE = "Usage: fakesnow <path> | -m <module> [<arg>]..."
+def arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="""eg: fakesnow script.py OR fakesnow -m pytest""",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "-d",
+        "--db_path",
+        help="databases path. Use existing database files from this path or create them here if they don't already "
+        "exist. If None databases are in-memory.",
+    )
+    parser.add_argument("-m", "--module", help="target module")
+    parser.add_argument("path", type=str, nargs="?", help="target path")
+    parser.add_argument("targs", nargs="*", help="target args")
+    return parser
-def main(args: Sequence[str] = sys.argv) -> int:
-    if len(args) < 2 or (len(args) == 2 and args[1] == "-m"):
-        print(USAGE, file=sys.stderr)
-        return 42
-    with fakesnow.patch():
-        if args[1] == "-m":
-            module = args[2]
-            sys.argv = args[2:]
+def split(args: Sequence[str]) -> tuple[Sequence[str], Sequence[str]]:
+    # split the arguments into two lists either:
+    # 1) after the first -m flag, or
+    # 2) after the first positional arg
+    in_flag = False
+    i = 0
+    for i in range(len(args)):
+        a = args[i]
+        if a in ["-m", "--module"]:
+            i = min(i + 1, len(args) - 1)
+            break
+        elif a.startswith("-"):
+            in_flag = True
+        elif not in_flag:
+            break
+        else:
+            in_flag = False
+    return args[: i + 1], args[i + 1 :]
+def main(args: Sequence[str] = sys.argv[1:]) -> int:
+    parser = arg_parser()
+    # split args so the fakesnow cli doesn't consume from the target's args (eg: -m and -d)
+    fsargs, targs = split(args)
+    pargs = parser.parse_args(fsargs)
+    with fakesnow.patch(db_path=pargs.db_path):
+        if module := pargs.module:
+            # NB: pargs.path and pargs.args are consumed by targs
+            sys.argv = [module, *targs]
             # add current directory to path to mimic python -m
             sys.path.insert(0, "")
             runpy.run_module(module, run_name="__main__", alter_sys=True)
-        else:
-            path = args[1]
-            sys.argv = args[1:]
+        elif path := pargs.path:
+            # NB: pargs.args is consumed by targs
+            sys.argv = [path, *targs]
             runpy.run_path(path, run_name="__main__")
+        else:
+            parser.print_usage()
+            return 42
     return 0

fakesnow/fakes.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from __future__ import annotations
+import json
 import os
 import re
 import sys
 from collections.abc import Iterable, Iterator, Sequence
+from pathlib import Path
 from string import Template
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, Literal, Optional, cast
@@ -26,15 +28,18 @@ from typing_extensions import Self
 import fakesnow.checks as checks
 import fakesnow.expr as expr
 import fakesnow.info_schema as info_schema
+import fakesnow.macros as macros
 import fakesnow.transforms as transforms
 SCHEMA_UNSET = "schema_unset"
-SUCCESS_SQL = "SELECT 'Statement executed successfully.' as status"
-DATABASE_CREATED_SQL = Template("SELECT 'Database ${name} successfully created.' as status")
-TABLE_CREATED_SQL = Template("SELECT 'Table ${name} successfully created.' as status")
-DROPPED_SQL = Template("SELECT '${name} successfully dropped.' as status")
-SCHEMA_CREATED_SQL = Template("SELECT 'Schema ${name} successfully created.' as status")
-INSERTED_SQL = Template("SELECT ${count} as 'number of rows inserted'")
+SQL_SUCCESS = "SELECT 'Statement executed successfully.' as 'status'"
+SQL_CREATED_DATABASE = Template("SELECT 'Database ${name} successfully created.' as 'status'")
+SQL_CREATED_SCHEMA = Template("SELECT 'Schema ${name} successfully created.' as 'status'")
+SQL_CREATED_TABLE = Template("SELECT 'Table ${name} successfully created.' as 'status'")
+SQL_DROPPED = Template("SELECT '${name} successfully dropped.' as 'status'")
+SQL_INSERTED_ROWS = Template("SELECT ${count} as 'number of rows inserted'")
+SQL_UPDATED_ROWS = Template("SELECT ${count} as 'number of rows updated', 0 as 'number of multi-joined rows updated'")
+SQL_DELETED_ROWS = Template("SELECT ${count} as 'number of rows deleted'")
 class FakeSnowflakeCursor:
@@ -59,6 +64,9 @@ class FakeSnowflakeCursor:
         self._last_params = None
         self._sqlstate = None
         self._arraysize = 1
+        self._arrow_table = None
+        self._arrow_table_fetch_index = None
+        self._rowcount = None
         self._converter = snowflake.connector.converter.SnowflakeConverter()
     def __enter__(self) -> Self:
@@ -69,8 +77,8 @@ class FakeSnowflakeCursor:
         exc_type: type[BaseException] | None,
         exc_value: BaseException | None,
         traceback: TracebackType | None,
-    ) -> bool:
-        return False
+    ) -> None:
+        pass
     @property
     def arraysize(self) -> int:
@@ -96,22 +104,16 @@ class FakeSnowflakeCursor:
         describe = f"DESCRIBE {command}"
         self.execute(describe, *args, **kwargs)
-        return FakeSnowflakeCursor._describe_as_result_metadata(self._duck_conn.fetchall())
+        return FakeSnowflakeCursor._describe_as_result_metadata(self.fetchall())
     @property
     def description(self) -> list[ResultMetadata]:
-        # use a cursor to avoid destroying an unfetched result on the main connection
-        with self._duck_conn.cursor() as cur:
-            # TODO: allow sql alchemy connection with no database or schema
-            assert self._conn.database, ".description not implemented when database is None"
-            assert self._conn.schema, ".description not implemented when schema is None"
-            # match database and schema used on the main connection
-            cur.execute(f"SET SCHEMA = '{self._conn.database}.{self._conn.schema}'")
+        # use a separate cursor to avoid consuming the result set on this cursor
+        with self._conn.cursor() as cur:
             cur.execute(f"DESCRIBE {self._last_sql}", self._last_params)
             meta = FakeSnowflakeCursor._describe_as_result_metadata(cur.fetchall())
-        return meta  # type: ignore see https://github.com/duckdb/duckdb/issues/7816
+        return meta
     def execute(
         self,
@@ -135,6 +137,8 @@ class FakeSnowflakeCursor:
         **kwargs: Any,
     ) -> FakeSnowflakeCursor:
         self._arrow_table = None
+        self._arrow_table_fetch_index = None
+        self._rowcount = None
         command, params = self._rewrite_with_params(command, params)
         expression = parse_one(command, read="snowflake")
@@ -159,10 +163,11 @@ class FakeSnowflakeCursor:
         transformed = (
             expression.transform(transforms.upper_case_unquoted_identifiers)
             .transform(transforms.set_schema, current_database=self._conn.database)
-            .transform(transforms.create_database)
-            .transform(transforms.extract_comment)
-            .transform(transforms.information_schema_columns_snowflake)
-            .transform(transforms.information_schema_tables_ext)
+            .transform(transforms.create_database, db_path=self._conn.db_path)
+            .transform(transforms.extract_comment_on_table)
+            .transform(transforms.extract_comment_on_columns)
+            .transform(transforms.information_schema_fs_columns_snowflake)
+            .transform(transforms.information_schema_fs_tables_ext)
             .transform(transforms.drop_schema_cascade)
             .transform(transforms.tag)
             .transform(transforms.semi_structured_types)
@@ -188,20 +193,20 @@ class FakeSnowflakeCursor:
             .transform(transforms.array_size)
             .transform(transforms.random)
             .transform(transforms.identifier)
+            .transform(lambda e: transforms.show_schemas(e, self._conn.database))
+            .transform(lambda e: transforms.show_objects_tables(e, self._conn.database))
         )
         sql = transformed.sql(dialect="duckdb")
+        result_sql = None
         if transformed.find(exp.Select) and (seed := transformed.args.get("seed")):
             sql = f"SELECT setseed({seed}); {sql}"
-        if os.environ.get("FAKESNOW_DEBUG") == "snowflake":
-            print(f"{command};", file=sys.stderr)
-        elif os.environ.get("FAKESNOW_DEBUG"):
-            print(f"{sql};", file=sys.stderr)
+        if fs_debug := os.environ.get("FAKESNOW_DEBUG"):
+            debug = command if fs_debug == "snowflake" else sql
+            print(f"{debug};{params=}" if params else f"{debug};", file=sys.stderr)
         try:
-            self._last_sql = sql
-            self._last_params = params
             self._duck_conn.execute(sql, params)
         except duckdb.BinderException as e:
             msg = e.args[0]
@@ -215,49 +220,62 @@ class FakeSnowflakeCursor:
                 e
             ) or "cannot commit - no transaction is active" in str(e):
                 # snowflake doesn't error on rollback or commit outside a tx
-                self._duck_conn.execute(SUCCESS_SQL)
-                self._last_sql = SUCCESS_SQL
+                result_sql = SQL_SUCCESS
             else:
                 raise e
+        affected_count = None
         if cmd == "USE DATABASE" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
             self._conn.database = ident.this.upper()
             self._conn.database_set = True
-        if cmd == "USE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
+        elif cmd == "USE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
             self._conn.schema = ident.this.upper()
             self._conn.schema_set = True
-        if create_db_name := transformed.args.get("create_db_name"):
+        elif create_db_name := transformed.args.get("create_db_name"):
             # we created a new database, so create the info schema extensions
             self._duck_conn.execute(info_schema.creation_sql(create_db_name))
-            created_sql = DATABASE_CREATED_SQL.substitute(name=create_db_name)
-            self._duck_conn.execute(created_sql)
-            self._last_sql = created_sql
+            result_sql = SQL_CREATED_DATABASE.substitute(name=create_db_name)
-        if cmd == "CREATE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
+        elif cmd == "CREATE SCHEMA" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
             name = ident.this if ident.quoted else ident.this.upper()
-            created_sql = SCHEMA_CREATED_SQL.substitute(name=name)
-            self._duck_conn.execute(created_sql)
-            self._last_sql = created_sql
+            result_sql = SQL_CREATED_SCHEMA.substitute(name=name)
-        if cmd == "CREATE TABLE" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
+        elif cmd == "CREATE TABLE" and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
             name = ident.this if ident.quoted else ident.this.upper()
-            created_sql = TABLE_CREATED_SQL.substitute(name=name)
-            self._duck_conn.execute(created_sql)
-            self._last_sql = created_sql
+            result_sql = SQL_CREATED_TABLE.substitute(name=name)
-        if cmd.startswith("DROP") and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
+        elif cmd.startswith("DROP") and (ident := expression.find(exp.Identifier)) and isinstance(ident.this, str):
             name = ident.this if ident.quoted else ident.this.upper()
-            dropped_sql = DROPPED_SQL.substitute(name=name)
-            self._duck_conn.execute(dropped_sql)
-            self._last_sql = dropped_sql
+            result_sql = SQL_DROPPED.substitute(name=name)
+            # if dropping the current database/schema then reset conn metadata
+            if cmd == "DROP DATABASE" and name == self._conn.database:
+                self._conn.database = None
+                self._conn.schema = None
+            elif cmd == "DROP SCHEMA" and name == self._conn.schema:
+                self._conn.schema = None
+        elif cmd == "INSERT":
+            (affected_count,) = self._duck_conn.fetchall()[0]
+            result_sql = SQL_INSERTED_ROWS.substitute(count=affected_count)
-        if cmd == "INSERT":
-            (count,) = self._duck_conn.fetchall()[0]
-            inserted_sql = INSERTED_SQL.substitute(count=count)
-            self._duck_conn.execute(inserted_sql)
-            self._last_sql = inserted_sql
+        elif cmd == "UPDATE":
+            (affected_count,) = self._duck_conn.fetchall()[0]
+            result_sql = SQL_UPDATED_ROWS.substitute(count=affected_count)
+        elif cmd == "DELETE":
+            (affected_count,) = self._duck_conn.fetchall()[0]
+            result_sql = SQL_DELETED_ROWS.substitute(count=affected_count)
+        elif cmd == "DESCRIBE TABLE":
+            # DESCRIBE TABLE has already been run above to detect and error if the table exists
+            # We now rerun DESCRIBE TABLE but transformed with columns to match Snowflake
+            result_sql = transformed.transform(
+                lambda e: transforms.describe_table(e, self._conn.database, self._conn.schema)
+            ).sql(dialect="duckdb")
         if table_comment := cast(tuple[exp.Table, str], transformed.args.get("table_comment")):
             # record table comment
@@ -276,6 +294,15 @@ class FakeSnowflakeCursor:
             assert catalog and schema
             self._duck_conn.execute(info_schema.insert_text_lengths_sql(catalog, schema, table.name, text_lengths))
+        if result_sql:
+            self._duck_conn.execute(result_sql)
+        self._arrow_table = self._duck_conn.fetch_arrow_table()
+        self._rowcount = affected_count or self._arrow_table.num_rows
+        self._last_sql = result_sql or sql
+        self._last_params = params
         return self
     def executemany(
@@ -298,13 +325,16 @@ class FakeSnowflakeCursor:
         return self
     def fetchall(self) -> list[tuple] | list[dict]:
-        if self._use_dict_result:
-            return self._duck_conn.fetch_arrow_table().to_pylist()
-        else:
-            return self._duck_conn.fetchall()
+        if self._arrow_table is None:
+            # mimic snowflake python connector error type
+            raise TypeError("No open result set")
+        return self.fetchmany(self._arrow_table.num_rows)
     def fetch_pandas_all(self, **kwargs: dict[str, Any]) -> pd.DataFrame:
-        return self._duck_conn.fetch_df()
+        if self._arrow_table is None:
+            # mimic snowflake python connector error type
+            raise snowflake.connector.NotSupportedError("No open result set")
+        return self._arrow_table.to_pandas()
     def fetchone(self) -> dict | tuple | None:
         result = self.fetchmany(1)
@@ -313,35 +343,26 @@ class FakeSnowflakeCursor:
     def fetchmany(self, size: int | None = None) -> list[tuple] | list[dict]:
         # https://peps.python.org/pep-0249/#fetchmany
         size = size or self._arraysize
-        if not self._use_dict_result:
-            return cast(list[tuple], self._duck_conn.fetchmany(size))
-        if not self._arrow_table:
-            self._arrow_table = self._duck_conn.fetch_arrow_table()
-            self._arrow_table_fetch_index = -size
-        self._arrow_table_fetch_index += size
+        if self._arrow_table is None:
+            # mimic snowflake python connector error type
+            raise TypeError("No open result set")
+        if self._arrow_table_fetch_index is None:
+            self._arrow_table_fetch_index = 0
+        else:
+            self._arrow_table_fetch_index += size
-        return self._arrow_table.slice(offset=self._arrow_table_fetch_index, length=size).to_pylist()
+        tslice = self._arrow_table.slice(offset=self._arrow_table_fetch_index, length=size).to_pylist()
+        return tslice if self._use_dict_result else [tuple(d.values()) for d in tslice]
     def get_result_batches(self) -> list[ResultBatch] | None:
-        # rows_per_batch is approximate
-        # see https://github.com/duckdb/duckdb/issues/4755
-        reader = self._duck_conn.fetch_record_batch(rows_per_batch=1000)
-        batches = []
-        try:
-            while True:
-                batches.append(FakeResultBatch(self._use_dict_result, reader.read_next_batch()))
-        except StopIteration:
-            pass
-        return batches
+        if self._arrow_table is None:
+            return None
+        return [FakeResultBatch(self._use_dict_result, b) for b in self._arrow_table.to_batches(max_chunksize=1000)]
     @property
     def rowcount(self) -> int | None:
-        # TODO: return number of rows updated/inserted (using returning)
-        return None
+        return self._rowcount
     @property
     def sfqid(self) -> str | None:
@@ -392,6 +413,10 @@ class FakeSnowflakeCursor:
                 return ResultMetadata(
                     name=column_name, type_code=8, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True                # noqa: E501
                 )
+            elif column_type == "TIMESTAMP WITH TIME ZONE":
+                return ResultMetadata(
+                    name=column_name, type_code=7, display_size=None, internal_size=None, precision=0, scale=9, is_nullable=True                # noqa: E501
+                )
             elif column_type == "BLOB":
                 return ResultMetadata(
                     name=column_name, type_code=11, display_size=None, internal_size=8388608, precision=None, scale=None, is_nullable=True      # noqa: E501
@@ -446,15 +471,18 @@ class FakeSnowflakeConnection:
         schema: str | None = None,
         create_database: bool = True,
         create_schema: bool = True,
+        db_path: str | os.PathLike | None = None,
         *args: Any,
         **kwargs: Any,
     ):
         self._duck_conn = duck_conn
-        # upper case database and schema like snowflake
+        # upper case database and schema like snowflake unquoted identifiers
+        # NB: catalog names are not case-sensitive in duckdb but stored as cased in information_schema.schemata
         self.database = database and database.upper()
         self.schema = schema and schema.upper()
         self.database_set = False
         self.schema_set = False
+        self.db_path = db_path
         self._paramstyle = "pyformat"
         # create database if needed
@@ -466,8 +494,10 @@ class FakeSnowflakeConnection:
                 where catalog_name = '{self.database}'"""
             ).fetchone()
         ):
-            duck_conn.execute(f"ATTACH DATABASE ':memory:' AS {self.database}")
+            db_file = f"{Path(db_path)/self.database}.db" if db_path else ":memory:"
+            duck_conn.execute(f"ATTACH DATABASE '{db_file}' AS {self.database}")
             duck_conn.execute(info_schema.creation_sql(self.database))
+            duck_conn.execute(macros.creation_sql(self.database))
         # create schema if needed
         if (
@@ -505,7 +535,7 @@ class FakeSnowflakeConnection:
             self.database_set = True
         # use UTC instead of local time zone for consistent testing
-        duck_conn.execute("SET TimeZone = 'UTC'")
+        duck_conn.execute("SET GLOBAL TimeZone = 'UTC'")
     def __enter__(self) -> Self:
         return self
@@ -515,8 +545,8 @@ class FakeSnowflakeConnection:
         exc_type: type[BaseException] | None,
         exc_value: BaseException | None,
         traceback: TracebackType | None,
-    ) -> bool:
-        return False
+    ) -> None:
+        pass
     def commit(self) -> None:
         self.cursor().execute("COMMIT")
@@ -545,12 +575,27 @@ class FakeSnowflakeConnection:
     def _insert_df(
         self, df: pd.DataFrame, table_name: str, database: str | None = None, schema: str | None = None
     ) -> int:
-        # dicts in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings
-        # whereas duckdb loads them as a struct, so we convert them to json here
-        cols = [f"TO_JSON({c})" if isinstance(df[c][0], dict) else c for c in df.columns]
-        cols = ",".join(cols)
-        self._duck_conn.execute(f"INSERT INTO {table_name}({','.join(df.columns.to_list())}) SELECT {cols} FROM df")
+        # Objects in dataframes are written as parquet structs, and snowflake loads parquet structs as json strings.
+        # Whereas duckdb analyses a dataframe see https://duckdb.org/docs/api/python/data_ingestion.html#pandas-dataframes--object-columns
+        # and converts a object to the most specific type possible, eg: dict -> STRUCT, MAP or varchar, and list -> LIST
+        # For dicts see https://github.com/duckdb/duckdb/pull/3985 and https://github.com/duckdb/duckdb/issues/9510
+        #
+        # When the rows have dicts with different keys there isn't a single STRUCT that can cover them, so the type is
+        # varchar and value a string containing a struct representation. In order to support dicts with different keys
+        # we first convert the dicts to json strings. A pity we can't do something inside duckdb and avoid the dataframe
+        # copy and transform in python.
+        df = df.copy()
+        # Identify columns of type object
+        object_cols = df.select_dtypes(include=["object"]).columns
+        # Apply json.dumps to these columns
+        for col in object_cols:
+            # don't jsonify string
+            df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x)
+        self._duck_conn.execute(f"INSERT INTO {table_name}({','.join(df.columns.to_list())}) SELECT * FROM df")
         return self._duck_conn.fetchall()[0][0]

fakesnow/fixtures.py CHANGED Viewed

@@ -7,17 +7,17 @@ import fakesnow
 @pytest.fixture
 def _fakesnow() -> Iterator[None]:
-    with fakesnow.patch() as fake_fns:
-        yield fake_fns
+    with fakesnow.patch():
+        yield
 @pytest.fixture
 def _fakesnow_no_auto_create() -> Iterator[None]:
-    with fakesnow.patch(create_database_on_connect=False, create_schema_on_connect=False) as fake_fns:
-        yield fake_fns
+    with fakesnow.patch(create_database_on_connect=False, create_schema_on_connect=False):
+        yield
 @pytest.fixture(scope="session")
 def _fakesnow_session() -> Iterator[None]:
-    with fakesnow.patch() as fake_fns:
-        yield fake_fns
+    with fakesnow.patch():
+        yield

fakesnow/info_schema.py CHANGED Viewed

@@ -1,11 +1,12 @@
 """Info schema extension tables/views used for storing snowflake metadata not captured by duckdb."""
+from __future__ import annotations
 from string import Template
 # use ext prefix in columns to disambiguate when joining with information_schema.tables
 SQL_CREATE_INFORMATION_SCHEMA_TABLES_EXT = Template(
     """
-create table ${catalog}.information_schema.tables_ext (
+create table if not exists ${catalog}.information_schema._fs_tables_ext (
     ext_table_catalog varchar,
     ext_table_schema varchar,
     ext_table_name varchar,
@@ -17,7 +18,7 @@ create table ${catalog}.information_schema.tables_ext (
 SQL_CREATE_INFORMATION_SCHEMA_COLUMNS_EXT = Template(
     """
-create table ${catalog}.information_schema.columns_ext (
+create table if not exists ${catalog}.information_schema._fs_columns_ext (
     ext_table_catalog varchar,
     ext_table_schema varchar,
     ext_table_name varchar,
@@ -33,13 +34,14 @@ create table ${catalog}.information_schema.columns_ext (
 # snowflake integers are 38 digits, base 10, See https://docs.snowflake.com/en/sql-reference/data-types-numeric
 SQL_CREATE_INFORMATION_SCHEMA_COLUMNS_VIEW = Template(
     """
-create view ${catalog}.information_schema.columns_snowflake AS
+create view if not exists ${catalog}.information_schema._fs_columns_snowflake AS
 select table_catalog, table_schema, table_name, column_name, ordinal_position, column_default, is_nullable,
 case when starts_with(data_type, 'DECIMAL') or data_type='BIGINT' then 'NUMBER'
      when data_type='VARCHAR' then 'TEXT'
      when data_type='DOUBLE' then 'FLOAT'
      when data_type='BLOB' then 'BINARY'
      when data_type='TIMESTAMP' then 'TIMESTAMP_NTZ'
+     when data_type='TIMESTAMP WITH TIME ZONE' then 'TIMESTAMP_TZ'
      when data_type='JSON' then 'VARIANT'
      else data_type end as data_type,
 ext_character_maximum_length as character_maximum_length, ext_character_octet_length as character_octet_length,
@@ -52,7 +54,7 @@ case when data_type='BIGINT' then 10
 case when data_type='DOUBLE' then NULL else numeric_scale end as numeric_scale,
 collation_name, is_identity, identity_generation, identity_cycle
 from ${catalog}.information_schema.columns
-left join ${catalog}.information_schema.columns_ext ext
+left join ${catalog}.information_schema._fs_columns_ext ext
 on ext_table_catalog = table_catalog AND ext_table_schema = table_schema
 AND ext_table_name = table_name AND ext_column_name = column_name
 """
@@ -61,7 +63,7 @@ AND ext_table_name = table_name AND ext_column_name = column_name
 # replicates https://docs.snowflake.com/sql-reference/info-schema/databases
 SQL_CREATE_INFORMATION_SCHEMA_DATABASES_VIEW = Template(
     """
-create view ${catalog}.information_schema.databases AS
+create view if not exists ${catalog}.information_schema.databases AS
 select
     catalog_name as database_name,
     'SYSADMIN' as database_owner,
@@ -88,7 +90,7 @@ def creation_sql(catalog: str) -> str:
 def insert_table_comment_sql(catalog: str, schema: str, table: str, comment: str) -> str:
     return f"""
-        INSERT INTO {catalog}.information_schema.tables_ext
+        INSERT INTO {catalog}.information_schema._fs_tables_ext
         values ('{catalog}', '{schema}', '{table}', '{comment}')
         ON CONFLICT (ext_table_catalog, ext_table_schema, ext_table_name)
         DO UPDATE SET comment = excluded.comment
@@ -102,7 +104,7 @@ def insert_text_lengths_sql(catalog: str, schema: str, table: str, text_lengths:
     )
     return f"""
-        INSERT INTO {catalog}.information_schema.columns_ext
+        INSERT INTO {catalog}.information_schema._fs_columns_ext
         values {values}
         ON CONFLICT (ext_table_catalog, ext_table_schema, ext_table_name, ext_column_name)
         DO UPDATE SET ext_character_maximum_length = excluded.ext_character_maximum_length,

fakesnow/macros.py ADDED Viewed

@@ -0,0 +1,13 @@
+from string import Template
+EQUAL_NULL = Template(
+    """
+CREATE MACRO IF NOT EXISTS ${catalog}.equal_null(a, b) AS a IS NOT DISTINCT FROM b;
+"""
+)
+def creation_sql(catalog: str) -> str:
+    return f"""
+        {EQUAL_NULL.substitute(catalog=catalog)};
+    """

fakesnow 0.8.2__py3-none-any.whl → 0.9.1__py3-none-any.whl

fakesnow 0.8.2py3-none-any.whl → 0.9.1py3-none-any.whl