PyPI - fakesnow - Versions diffs - 0.9.24__py3-none-any.whl → 0.9.26__py3-none-any.whl - Mend

fakesnow 0.9.24py3-none-any.whl → 0.9.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

fakesnow/__init__.py +1 -0
fakesnow/arrow.py +84 -45
fakesnow/checks.py +8 -0
fakesnow/cursor.py +24 -12
fakesnow/info_schema.py +1 -1
fakesnow/server.py +41 -14
fakesnow/transforms.py +58 -33
fakesnow/transforms_merge.py +203 -0
{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/METADATA +7 -7
fakesnow-0.9.26.dist-info/RECORD +26 -0
{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/WHEEL +1 -1
fakesnow-0.9.24.dist-info/RECORD +0 -25
{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/LICENSE +0 -0
{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/entry_points.txt +0 -0
{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/top_level.txt +0 -0

fakesnow/__init__.py CHANGED Viewed

@@ -90,3 +90,4 @@ def patch(
         yield None
     finally:
         stack.close()
+        fs.duck_conn.close()

fakesnow/arrow.py CHANGED Viewed

@@ -1,33 +1,45 @@
-from typing import Any
+from __future__ import annotations
+from typing import cast
 import pyarrow as pa
+import pyarrow.compute as pc
+from fakesnow.types import ColumnInfo
+def to_sf_schema(schema: pa.Schema, rowtype: list[ColumnInfo]) -> pa.Schema:
+    # expected by the snowflake connector
+    # uses rowtype to populate metadata, rather than the arrow schema type, for consistency with
+    # rowtype returned in the response
+    assert len(schema) == len(rowtype), f"schema and rowtype must be same length but f{len(schema)=} f{len(rowtype)=}"
-def with_sf_metadata(schema: pa.Schema) -> pa.Schema:
     # see https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp#L32
     # and https://github.com/snowflakedb/snowflake-connector-python/blob/e9393a6/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/SnowflakeType.cpp#L10
-    fms = []
-    for i, t in enumerate(schema.types):
-        f = schema.field(i)
-        # TODO: precision, scale, charLength etc. for all types
-        if t == pa.bool_():
-            fm = f.with_metadata({"logicalType": "BOOLEAN"})
-        elif t == pa.int64():
-            # scale and precision required, see here
-            # https://github.com/snowflakedb/snowflake-connector-python/blob/416ff57/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowChunkIterator.cpp#L147
-            fm = f.with_metadata({"logicalType": "FIXED", "precision": "38", "scale": "0"})
-        elif t == pa.float64():
-            fm = f.with_metadata({"logicalType": "REAL"})
-        elif isinstance(t, pa.Decimal128Type):
-            fm = f.with_metadata({"logicalType": "FIXED", "precision": str(t.precision), "scale": str(t.scale)})
-        elif t == pa.string():
-            # TODO: set charLength to size of column
-            fm = f.with_metadata({"logicalType": "TEXT", "charLength": "16777216"})
-        else:
-            raise NotImplementedError(f"Unsupported Arrow type: {t}")
-        fms.append(fm)
+    def sf_field(field: pa.Field, c: ColumnInfo) -> pa.Field:
+        if isinstance(field.type, pa.TimestampType):
+            # snowflake uses a struct to represent timestamps, see timestamp_to_sf_struct
+            fields = [pa.field("epoch", pa.int64(), nullable=False), pa.field("fraction", pa.int32(), nullable=False)]
+            if field.type.tz:
+                fields.append(pa.field("timezone", nullable=False, type=pa.int32()))
+            field = field.with_type(pa.struct(fields))
+        elif isinstance(field.type, pa.Time64Type):
+            field = field.with_type(pa.int64())
+        return field.with_metadata(
+            {
+                "logicalType": c["type"].upper(),
+                # required for FIXED type see
+                # https://github.com/snowflakedb/snowflake-connector-python/blob/416ff57/src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowChunkIterator.cpp#L147
+                "precision": str(c["precision"] or 38),
+                "scale": str(c["scale"] or 0),
+                "charLength": str(c["length"] or 0),
+            }
+        )
+    fms = [sf_field(schema.field(i), c) for i, c in enumerate(rowtype)]
     return pa.schema(fms)
@@ -39,29 +51,56 @@ def to_ipc(table: pa.Table) -> pa.Buffer:
     sink = pa.BufferOutputStream()
-    with pa.ipc.new_stream(sink, with_sf_metadata(table.schema)) as writer:
+    with pa.ipc.new_stream(sink, table.schema) as writer:
         writer.write_batch(batch)
     return sink.getvalue()
-# TODO: should this be derived before with_schema?
-def to_rowtype(schema: pa.Schema) -> list[dict[str, Any]]:
-    return [
-        {
-            "name": f.name,
-            # TODO
-            # "database": "",
-            # "schema": "",
-            # "table": "",
-            "nullable": f.nullable,
-            "type": f.metadata.get(b"logicalType").decode("utf-8").lower(),  # type: ignore
-            # TODO
-            # "byteLength": 20,
-            "length": int(f.metadata.get(b"charLength")) if f.metadata.get(b"charLength") else None,  # type: ignore
-            "scale": int(f.metadata.get(b"scale")) if f.metadata.get(b"scale") else None,  # type: ignore
-            "precision": int(f.metadata.get(b"precision")) if f.metadata.get(b"precision") else None,  # type: ignore
-            "collation": None,
-        }
-        for f in schema
-    ]
+def to_sf(table: pa.Table, rowtype: list[ColumnInfo]) -> pa.Table:
+    def to_sf_col(col: pa.Array) -> pa.Array:
+        if pa.types.is_timestamp(col.type):
+            return timestamp_to_sf_struct(col)
+        elif pa.types.is_time(col.type):
+            # as nanoseconds
+            return pc.multiply(col.cast(pa.int64()), 1000)  # type: ignore https://github.com/zen-xu/pyarrow-stubs/issues/44
+        return col
+    return pa.Table.from_arrays([to_sf_col(c) for c in table.columns], schema=to_sf_schema(table.schema, rowtype))
+def timestamp_to_sf_struct(ts: pa.Array | pa.ChunkedArray) -> pa.Array:
+    if isinstance(ts, pa.ChunkedArray):
+        # combine because pa.StructArray.from_arrays doesn't support ChunkedArray
+        ts = cast(pa.Array, ts.combine_chunks())  # see https://github.com/zen-xu/pyarrow-stubs/issues/46
+    if not isinstance(ts.type, pa.TimestampType):
+        raise ValueError(f"Expected TimestampArray, got {type(ts)}")
+    # Round to seconds, ie: strip subseconds
+    tsa_without_us = pc.floor_temporal(ts, unit="second")  # type: ignore https://github.com/zen-xu/pyarrow-stubs/issues/45
+    epoch = pc.divide(tsa_without_us.cast(pa.int64()), 1_000_000)  # type: ignore https://github.com/zen-xu/pyarrow-stubs/issues/44
+    # Calculate fractional part as nanoseconds
+    fraction = pc.multiply(pc.subsecond(ts), 1_000_000_000).cast(pa.int32())  # type: ignore
+    if ts.type.tz:
+        assert ts.type.tz == "UTC", f"Timezone {ts.type.tz} not yet supported"
+        timezone = pa.array([1440] * len(ts), type=pa.int32())
+        return pa.StructArray.from_arrays(
+            arrays=[epoch, fraction, timezone],  # type: ignore https://github.com/zen-xu/pyarrow-stubs/issues/42
+            fields=[
+                pa.field("epoch", nullable=False, type=pa.int64()),
+                pa.field("fraction", nullable=False, type=pa.int32()),
+                pa.field("timezone", nullable=False, type=pa.int32()),
+            ],
+        )
+    else:
+        return pa.StructArray.from_arrays(
+            arrays=[epoch, fraction],  # type: ignore https://github.com/zen-xu/pyarrow-stubs/issues/42
+            fields=[
+                pa.field("epoch", nullable=False, type=pa.int64()),
+                pa.field("fraction", nullable=False, type=pa.int32()),
+            ],
+        )

fakesnow/checks.py CHANGED Viewed

@@ -68,3 +68,11 @@ def is_unqualified_table_expression(expression: exp.Expression) -> tuple[bool, b
         no_schema = not node.args.get("db")
     return no_database, no_schema
+def equal(left: exp.Identifier, right: exp.Identifier) -> bool:
+    # as per https://docs.snowflake.com/en/sql-reference/identifiers-syntax#label-identifier-casing
+    lid = left.this if left.quoted else left.this.upper()
+    rid = right.this if right.quoted else right.this.upper()
+    return lid == rid

fakesnow/cursor.py CHANGED Viewed

@@ -112,13 +112,15 @@ class FakeSnowflakeCursor:
     @property
     def description(self) -> list[ResultMetadata]:
+        return describe_as_result_metadata(self._describe_last_sql())
+    def _describe_last_sql(self) -> list:
         # use a separate cursor to avoid consuming the result set on this cursor
         with self._conn.cursor() as cur:
+            # TODO: can we replace with self._duck_conn.description?
             expression = sqlglot.parse_one(f"DESCRIBE {self._last_sql}", read="duckdb")
             cur._execute(expression, self._last_params)  # noqa: SLF001
-            meta = describe_as_result_metadata(cur.fetchall())
-        return meta
+            return cur.fetchall()
     def execute(
         self,
@@ -137,10 +139,15 @@ class FakeSnowflakeCursor:
             command, params = self._rewrite_with_params(command, params)
             if self._conn.nop_regexes and any(re.match(p, command, re.IGNORECASE) for p in self._conn.nop_regexes):
                 transformed = transforms.SUCCESS_NOP
-            else:
-                expression = parse_one(command, read="snowflake")
-                transformed = self._transform(expression)
-            return self._execute(transformed, params)
+                self._execute(transformed, params)
+                return self
+            expression = parse_one(command, read="snowflake")
+            for exp in self._transform_explode(expression):
+                transformed = self._transform(exp)
+                self._execute(transformed, params)
+            return self
         except snowflake.connector.errors.ProgrammingError as e:
             self._sqlstate = e.sqlstate
             raise e
@@ -155,6 +162,7 @@ class FakeSnowflakeCursor:
             .transform(transforms.extract_comment_on_columns)
             .transform(transforms.information_schema_fs_columns_snowflake)
             .transform(transforms.information_schema_fs_tables_ext)
+            .transform(transforms.information_schema_fs_views)
             .transform(transforms.drop_schema_cascade)
             .transform(transforms.tag)
             .transform(transforms.semi_structured_types)
@@ -205,9 +213,12 @@ class FakeSnowflakeCursor:
             .transform(transforms.alter_table_strip_cluster_by)
         )
-    def _execute(
-        self, transformed: exp.Expression, params: Sequence[Any] | dict[Any, Any] | None = None
-    ) -> FakeSnowflakeCursor:
+    def _transform_explode(self, expression: exp.Expression) -> list[exp.Expression]:
+        # Applies transformations that require splitting the expression into multiple expressions
+        # Split transforms have limited support at the moment.
+        return transforms.merge(expression)
+    def _execute(self, transformed: exp.Expression, params: Sequence[Any] | dict[Any, Any] | None = None) -> None:
         self._arrow_table = None
         self._arrow_table_fetch_index = None
         self._rowcount = None
@@ -284,6 +295,9 @@ class FakeSnowflakeCursor:
             (affected_count,) = self._duck_conn.fetchall()[0]
             result_sql = SQL_DELETED_ROWS.substitute(count=affected_count)
+        elif cmd == "TRUNCATETABLE":
+            result_sql = SQL_SUCCESS
         elif cmd in ("DESCRIBE TABLE", "DESCRIBE VIEW"):
             # DESCRIBE TABLE/VIEW has already been run above to detect and error if the table exists
             # We now rerun DESCRIBE TABLE/VIEW but transformed with columns to match Snowflake
@@ -343,8 +357,6 @@ class FakeSnowflakeCursor:
         self._last_sql = result_sql or sql
         self._last_params = params
-        return self
     def _log_sql(self, sql: str, params: Sequence[Any] | dict[Any, Any] | None = None) -> None:
         if (fs_debug := os.environ.get("FAKESNOW_DEBUG")) and fs_debug != "snowflake":
             print(f"{sql};{params=}" if params else f"{sql};", file=sys.stderr)

fakesnow/info_schema.py CHANGED Viewed

@@ -102,7 +102,7 @@ where catalog_name not in ('memory', 'system', 'temp', '_fs_global')
 # replicates https://docs.snowflake.com/sql-reference/info-schema/views
 SQL_CREATE_INFORMATION_SCHEMA_VIEWS_VIEW = Template(
     """
-create view if not exists ${catalog}.information_schema.views AS
+create view if not exists ${catalog}.information_schema._fs_views AS
 select
     database_name as table_catalog,
     schema_name as table_schema,

fakesnow/server.py CHANGED Viewed

@@ -5,19 +5,22 @@ import json
 import secrets
 from base64 import b64encode
 from dataclasses import dataclass
+from typing import Any
+import snowflake.connector.errors
 from starlette.applications import Starlette
 from starlette.concurrency import run_in_threadpool
 from starlette.requests import Request
 from starlette.responses import JSONResponse
 from starlette.routing import Route
-from fakesnow.arrow import to_ipc, to_rowtype, with_sf_metadata
+from fakesnow.arrow import to_ipc, to_sf
 from fakesnow.fakes import FakeSnowflakeConnection
 from fakesnow.instance import FakeSnow
+from fakesnow.types import describe_as_rowtype
-fs = FakeSnow()
-sessions = {}
+shared_fs = FakeSnow()
+sessions: dict[str, FakeSnowflakeConnection] = {}
 @dataclass
@@ -27,9 +30,19 @@ class ServerError(Exception):
     message: str
-def login_request(request: Request) -> JSONResponse:
+async def login_request(request: Request) -> JSONResponse:
     database = request.query_params.get("databaseName")
     schema = request.query_params.get("schemaName")
+    body = await request.body()
+    body_json = json.loads(gzip.decompress(body))
+    session_params: dict[str, Any] = body_json["data"]["SESSION_PARAMETERS"]
+    if db_path := session_params.get("FAKESNOW_DB_PATH"):
+        # isolated creates a new in-memory database, rather than using the shared in-memory database
+        # so this connection won't share any tables with other connections
+        fs = FakeSnow() if db_path == ":isolated:" else FakeSnow(db_path=db_path)
+    else:
+        # share the in-memory database across connections
+        fs = shared_fs
     token = secrets.token_urlsafe(32)
     sessions[token] = fs.connect(database, schema)
     return JSONResponse({"data": {"token": token}, "success": True})
@@ -44,16 +57,30 @@ async def query_request(request: Request) -> JSONResponse:
         sql_text = body_json["sqlText"]
-        # only a single sql statement is sent at a time by the python snowflake connector
-        cur = await run_in_threadpool(conn.cursor().execute, sql_text)
-        assert cur._arrow_table, "No result set"  # noqa: SLF001
-        batch_bytes = to_ipc(cur._arrow_table)  # noqa: SLF001
-        rowset_b64 = b64encode(batch_bytes).decode("utf-8")
-        # TODO: avoid calling with_sf_metadata twice
-        rowtype = to_rowtype(with_sf_metadata(cur._arrow_table.schema))  # noqa: SLF001
+        try:
+            # only a single sql statement is sent at a time by the python snowflake connector
+            cur = await run_in_threadpool(conn.cursor().execute, sql_text)
+        except snowflake.connector.errors.ProgrammingError as e:
+            code = f"{e.errno:06d}"
+            return JSONResponse(
+                {
+                    "data": {
+                        "errorCode": code,
+                        "sqlState": e.sqlstate,
+                    },
+                    "code": code,
+                    "message": e.msg,
+                    "success": False,
+                }
+            )
+        rowtype = describe_as_rowtype(cur._describe_last_sql())  # noqa: SLF001
+        if cur._arrow_table:  # noqa: SLF001
+            batch_bytes = to_ipc(to_sf(cur._arrow_table, rowtype))  # noqa: SLF001
+            rowset_b64 = b64encode(batch_bytes).decode("utf-8")
+        else:
+            rowset_b64 = ""
         return JSONResponse(
             {

fakesnow/transforms.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import ClassVar, Literal, cast
 import sqlglot
 from sqlglot import exp
+from fakesnow import transforms_merge
 from fakesnow.instance import USERS_TABLE_FQ_NAME
 from fakesnow.variables import Variables
@@ -36,7 +37,7 @@ def alias_in_join(expression: exp.Expression) -> exp.Expression:
 def alter_table_strip_cluster_by(expression: exp.Expression) -> exp.Expression:
     """Turn alter table cluster by into a no-op"""
     if (
-        isinstance(expression, exp.AlterTable)
+        isinstance(expression, exp.Alter)
         and (actions := expression.args.get("actions"))
         and len(actions) == 1
         and (isinstance(actions[0], exp.Cluster))
@@ -355,7 +356,7 @@ def extract_comment_on_columns(expression: exp.Expression) -> exp.Expression:
         exp.Expression: The transformed expression, with any comment stored in the new 'table_comment' arg.
     """
-    if isinstance(expression, exp.AlterTable) and (actions := expression.args.get("actions")):
+    if isinstance(expression, exp.Alter) and (actions := expression.args.get("actions")):
         new_actions: list[exp.Expression] = []
         col_comments: list[tuple[str, str]] = []
         for a in actions:
@@ -409,7 +410,7 @@ def extract_comment_on_table(expression: exp.Expression) -> exp.Expression:
         new.args["table_comment"] = (table, cexp.this)
         return new
     elif (
-        isinstance(expression, exp.AlterTable)
+        isinstance(expression, exp.Alter)
         and (sexp := expression.find(exp.AlterSet))
         and (scp := sexp.find(exp.SchemaCommentProperty))
         and isinstance(scp.this, exp.Literal)
@@ -435,7 +436,7 @@ def extract_text_length(expression: exp.Expression) -> exp.Expression:
         exp.Expression: The original expression, with any text lengths stored in the new 'text_lengths' arg.
     """
-    if isinstance(expression, (exp.Create, exp.AlterTable)):
+    if isinstance(expression, (exp.Create, exp.Alter)):
         text_lengths = []
         # exp.Select is for a ctas, exp.Schema is a plain definition
@@ -470,7 +471,6 @@ def flatten(expression: exp.Expression) -> exp.Expression:
     See https://docs.snowflake.com/en/sql-reference/functions/flatten
-    TODO: return index.
     TODO: support objects.
     """
     if (
@@ -482,20 +482,34 @@ def flatten(expression: exp.Expression) -> exp.Expression:
     ):
         explode_expression = expression.this.this.expression
-        return exp.Lateral(
-            this=exp.Unnest(
+        value = exp.Cast(
+            this=explode_expression,
+            to=exp.DataType(
+                this=exp.DataType.Type.ARRAY,
+                expressions=[exp.DataType(this=exp.DataType.Type.JSON, nested=False, prefix=False)],
+                nested=True,
+            ),
+        )
+        return exp.Subquery(
+            this=exp.Select(
                 expressions=[
-                    exp.Cast(
-                        this=explode_expression,
-                        to=exp.DataType(
-                            this=exp.DataType.Type.ARRAY,
-                            expressions=[exp.DataType(this=exp.DataType.Type.JSON, nested=False, prefix=False)],
-                            nested=True,
+                    exp.Unnest(
+                        expressions=[value],
+                        alias=exp.Identifier(this="VALUE", quoted=False),
+                    ),
+                    exp.Alias(
+                        this=exp.Sub(
+                            this=exp.Anonymous(
+                                this="generate_subscripts", expressions=[value, exp.Literal(this="1", is_string=False)]
+                            ),
+                            expression=exp.Literal(this="1", is_string=False),
                         ),
-                    )
+                        alias=exp.Identifier(this="INDEX", quoted=False),
+                    ),
                 ],
             ),
-            alias=exp.TableAlias(this=alias.this, columns=[exp.Identifier(this="VALUE", quoted=False)]),
+            alias=exp.TableAlias(this=alias.this),
         )
     return expression
@@ -621,6 +635,20 @@ def information_schema_fs_tables_ext(expression: exp.Expression) -> exp.Expressi
     return expression
+def information_schema_fs_views(expression: exp.Expression) -> exp.Expression:
+    """Use information_schema._fs_views to return Snowflake's version instead of duckdb's."""
+    if (
+        isinstance(expression, exp.Select)
+        and (tbl_exp := expression.find(exp.Table))
+        and tbl_exp.name.upper() == "VIEWS"
+        and tbl_exp.db.upper() == "INFORMATION_SCHEMA"
+    ):
+        tbl_exp.set("this", exp.Identifier(this="_FS_VIEWS", quoted=False))
+    return expression
 def integer_precision(expression: exp.Expression) -> exp.Expression:
     """Convert integers to bigint.
@@ -691,6 +719,10 @@ def json_extract_precedence(expression: exp.Expression) -> exp.Expression:
     return expression
+def merge(expression: exp.Expression) -> list[exp.Expression]:
+    return transforms_merge.merge(expression)
 def random(expression: exp.Expression) -> exp.Expression:
     """Convert random() and random(seed).
@@ -702,8 +734,8 @@ def random(expression: exp.Expression) -> exp.Expression:
         new_rand = exp.Cast(
             this=exp.Paren(
                 this=exp.Mul(
-                    this=exp.Paren(this=exp.Sub(this=exp.Rand(), expression=exp.Literal(this=0.5, is_string=False))),
-                    expression=exp.Literal(this=9223372036854775807, is_string=False),
+                    this=exp.Paren(this=exp.Sub(this=exp.Rand(), expression=exp.Literal(this="0.5", is_string=False))),
+                    expression=exp.Literal(this="9223372036854775807", is_string=False),
                 )
             ),
             to=exp.DataType(this=exp.DataType.Type.BIGINT, nested=False, prefix=False),
@@ -804,31 +836,24 @@ def regex_substr(expression: exp.Expression) -> exp.Expression:
         pattern.args["this"] = pattern.this.replace("\\\\", "\\")
         # number of characters from the beginning of the string where the function starts searching for matches
-        try:
-            position = expression.args["position"]
-        except KeyError:
-            position = exp.Literal(this="1", is_string=False)
+        position = expression.args["position"] or exp.Literal(this="1", is_string=False)
         # which occurrence of the pattern to match
-        try:
-            occurrence = int(expression.args["occurrence"].this)
-        except KeyError:
-            occurrence = 1
+        occurrence = expression.args["occurrence"]
+        occurrence = int(occurrence.this) if occurrence else 1
         # the duckdb dialect increments bracket (ie: index) expressions by 1 because duckdb is 1-indexed,
         # so we need to compensate by subtracting 1
         occurrence = exp.Literal(this=str(occurrence - 1), is_string=False)
-        try:
-            regex_parameters_value = str(expression.args["parameters"].this)
+        if parameters := expression.args["parameters"]:
             # 'e' parameter doesn't make sense for duckdb
-            regex_parameters = exp.Literal(this=regex_parameters_value.replace("e", ""), is_string=True)
-        except KeyError:
+            regex_parameters = exp.Literal(this=parameters.this.replace("e", ""), is_string=True)
+        else:
             regex_parameters = exp.Literal(is_string=True)
-        try:
-            group_num = expression.args["group"]
-        except KeyError:
+        group_num = expression.args["group"]
+        if not group_num:
             if isinstance(regex_parameters.this, str) and "e" in regex_parameters.this:
                 group_num = exp.Literal(this="1", is_string=False)
             else:
@@ -1018,7 +1043,7 @@ def tag(expression: exp.Expression) -> exp.Expression:
         exp.Expression: The transformed expression.
     """
-    if isinstance(expression, exp.AlterTable) and (actions := expression.args.get("actions")):
+    if isinstance(expression, exp.Alter) and (actions := expression.args.get("actions")):
         for a in actions:
             if isinstance(a, exp.AlterSet) and a.args.get("tag"):
                 return SUCCESS_NOP

fakesnow/transforms_merge.py ADDED Viewed

@@ -0,0 +1,203 @@
+import sqlglot
+from sqlglot import exp
+from fakesnow import checks
+# Implements snowflake's MERGE INTO functionality in duckdb (https://docs.snowflake.com/en/sql-reference/sql/merge).
+def merge(merge_expr: exp.Expression) -> list[exp.Expression]:
+    if not isinstance(merge_expr, exp.Merge):
+        return [merge_expr]
+    return [_create_merge_candidates(merge_expr), *_mutations(merge_expr), _counts(merge_expr)]
+def _create_merge_candidates(merge_expr: exp.Merge) -> exp.Expression:
+    """
+    Given a merge statement, produce a temporary table that joins together the target and source tables.
+    The merge_op column identifies which merge clause applies to the row.
+    """
+    target_tbl = merge_expr.this
+    source = merge_expr.args.get("using")
+    assert isinstance(source, exp.Expression)
+    source_id = (alias := source.args.get("alias")) and alias.this if isinstance(source, exp.Subquery) else source.this
+    assert isinstance(source_id, exp.Identifier)
+    join_expr = merge_expr.args.get("on")
+    assert isinstance(join_expr, exp.Binary)
+    case_when_clauses: list[str] = []
+    values: set[str] = set()
+    # extract keys that reference the source table from the join expression
+    # so they can be used by the mutation statements for joining
+    # will include the source table identifier
+    values.update(
+        map(
+            str,
+            {
+                c
+                for c in join_expr.find_all(exp.Column)
+                if (table := c.args.get("table"))
+                and isinstance(table, exp.Identifier)
+                and checks.equal(table, source_id)
+            },
+        )
+    )
+    # Iterate through the WHEN clauses to build up the CASE WHEN clauses
+    for w_idx, w in enumerate(merge_expr.expressions):
+        assert isinstance(w, exp.When), f"Expected When expression, got {w}"
+        predicate = join_expr.copy()
+        matched = w.args.get("matched")
+        then = w.args.get("then")
+        condition = w.args.get("condition")
+        if matched:
+            # matchedClause see https://docs.snowflake.com/en/sql-reference/sql/merge#matchedclause-for-updates-or-deletes
+            if condition:
+                # Combine the top level ON expression with the AND condition
+                # from this specific WHEN into a subquery, we use to target rows.
+                # Eg. MERGE INTO t1 USING t2 ON t1.t1Key = t2.t2Key
+                #       WHEN MATCHED AND t2.marked = 1 THEN DELETE
+                predicate = exp.And(this=predicate, expression=condition)
+            if isinstance(then, exp.Update):
+                case_when_clauses.append(f"WHEN {predicate} THEN {w_idx}")
+                values.update([str(c.expression) for c in then.expressions if isinstance(c.expression, exp.Column)])
+            elif isinstance(then, exp.Var) and then.args.get("this") == "DELETE":
+                case_when_clauses.append(f"WHEN {predicate} THEN {w_idx}")
+            else:
+                raise AssertionError(f"Expected 'Update' or 'Delete', got {then}")
+        else:
+            # notMatchedClause see https://docs.snowflake.com/en/sql-reference/sql/merge#notmatchedclause-for-inserts
+            assert isinstance(then, exp.Insert), f"Expected 'Insert', got {then}"
+            insert_values = then.expression.expressions
+            values.update([str(c) for c in insert_values if isinstance(c, exp.Column)])
+            predicate = f"AND {condition}" if condition else ""
+            case_when_clauses.append(f"WHEN {target_tbl}.rowid is NULL {predicate} THEN {w_idx}")
+    sql = f"""
+    CREATE OR REPLACE TEMPORARY TABLE merge_candidates AS
+    SELECT
+        {', '.join(sorted(values))},
+        CASE
+            {' '.join(case_when_clauses)}
+            ELSE NULL
+        END AS MERGE_OP
+    FROM {target_tbl}
+    FULL OUTER JOIN {source} ON {join_expr.sql()}
+    WHERE MERGE_OP IS NOT NULL
+    """
+    return sqlglot.parse_one(sql)
+def _mutations(merge_expr: exp.Merge) -> list[exp.Expression]:
+    """
+    Given a merge statement, produce a list of delete, update and insert statements that use the
+    merge_candidates and source table to update the target target.
+    """
+    target_tbl = merge_expr.this
+    source = merge_expr.args.get("using")
+    source_tbl = source.alias if isinstance(source, exp.Subquery) else source
+    join_expr = merge_expr.args.get("on")
+    statements: list[exp.Expression] = []
+    # Iterate through the WHEN clauses to generate delete/update/insert statements
+    for w_idx, w in enumerate(merge_expr.expressions):
+        assert isinstance(w, exp.When), f"Expected When expression, got {w}"
+        matched = w.args.get("matched")
+        then = w.args.get("then")
+        if matched:
+            if isinstance(then, exp.Var) and then.args.get("this") == "DELETE":
+                delete_sql = f"""
+                    DELETE FROM {target_tbl}
+                    USING merge_candidates AS {source_tbl}
+                    WHERE {join_expr}
+                    AND {source_tbl}.merge_op = {w_idx}
+                """
+                statements.append(sqlglot.parse_one(delete_sql))
+            elif isinstance(then, exp.Update):
+                # when the update statement has a table alias, duckdb doesn't support the alias in the set
+                # column name, so we use e.this.this to get just the column name without its table prefix
+                set_clauses = ", ".join(
+                    [f"{e.this.this} = {e.expression.sql()}" for e in then.args.get("expressions", [])]
+                )
+                update_sql = f"""
+                    UPDATE {target_tbl}
+                    SET {set_clauses}
+                    FROM merge_candidates AS {source_tbl}
+                    WHERE {join_expr}
+                    AND {source_tbl}.merge_op = {w_idx}
+                """
+                statements.append(sqlglot.parse_one(update_sql))
+            else:
+                raise AssertionError(f"Expected 'Update' or 'Delete', got {then}")
+        else:
+            assert isinstance(then, exp.Insert), f"Expected 'Insert', got {then}"
+            cols = [str(c) for c in then.this.expressions] if then.this else []
+            columns = f"({', '.join(cols)})" if cols else ""
+            values = ", ".join(map(str, then.expression.expressions))
+            insert_sql = f"""
+                INSERT INTO {target_tbl} {columns}
+                SELECT {values}
+                FROM merge_candidates AS {source_tbl}
+                WHERE {source_tbl}.merge_op = {w_idx}
+            """
+            statements.append(sqlglot.parse_one(insert_sql))
+    return statements
+def _counts(merge_expr: exp.Merge) -> exp.Expression:
+    """
+    Given a merge statement, derive the a SQL statement which produces the following columns using the merge_candidates
+    table:
+    - "number of rows inserted"
+    - "number of rows updated"
+    - "number of rows deleted"
+    Only columns relevant to the merge operation are included, eg: if no rows are deleted, the "number of rows deleted"
+    column is not included.
+    """
+    # Initialize dictionaries to store operation types and their corresponding indices
+    operations = {"inserted": [], "updated": [], "deleted": []}
+    # Iterate through the WHEN clauses to categorize operations
+    for w_idx, w in enumerate(merge_expr.expressions):
+        assert isinstance(w, exp.When), f"Expected When expression, got {w}"
+        matched = w.args.get("matched")
+        then = w.args.get("then")
+        if matched:
+            if isinstance(then, exp.Update):
+                operations["updated"].append(w_idx)
+            elif isinstance(then, exp.Var) and then.args.get("this") == "DELETE":
+                operations["deleted"].append(w_idx)
+            else:
+                raise AssertionError(f"Expected 'Update' or 'Delete', got {then}")
+        else:
+            assert isinstance(then, exp.Insert), f"Expected 'Insert', got {then}"
+            operations["inserted"].append(w_idx)
+    count_statements = [
+        f"""COUNT_IF(merge_op in ({','.join(map(str, indices))})) as \"number of rows {op}\""""
+        for op, indices in operations.items()
+        if indices
+    ]
+    sql = f"""
+    SELECT {', '.join(count_statements)}
+    FROM merge_candidates
+    """
+    return sqlglot.parse_one(sql)

{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fakesnow
-Version: 0.9.24
+Version: 0.9.26
 Summary: Fake Snowflake Connector for Python. Run, mock and test Snowflake DB locally.
 License: Apache License
                                    Version 2.0, January 2004
@@ -210,22 +210,22 @@ Classifier: License :: OSI Approved :: MIT License
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: duckdb~=1.0.0
+Requires-Dist: duckdb~=1.1.3
 Requires-Dist: pyarrow
 Requires-Dist: snowflake-connector-python
-Requires-Dist: sqlglot~=25.9.0
+Requires-Dist: sqlglot~=25.24.1
 Provides-Extra: dev
 Requires-Dist: build~=1.0; extra == "dev"
 Requires-Dist: dirty-equals; extra == "dev"
 Requires-Dist: pandas-stubs; extra == "dev"
 Requires-Dist: snowflake-connector-python[pandas,secure-local-storage]; extra == "dev"
-Requires-Dist: pre-commit~=3.4; extra == "dev"
-Requires-Dist: pyarrow-stubs; extra == "dev"
+Requires-Dist: pre-commit~=4.0; extra == "dev"
+Requires-Dist: pyarrow-stubs==10.0.1.9; extra == "dev"
 Requires-Dist: pytest~=8.0; extra == "dev"
 Requires-Dist: pytest-asyncio; extra == "dev"
-Requires-Dist: ruff~=0.5.1; extra == "dev"
+Requires-Dist: ruff~=0.7.2; extra == "dev"
 Requires-Dist: twine~=5.0; extra == "dev"
-Requires-Dist: snowflake-sqlalchemy~=1.5.0; extra == "dev"
+Requires-Dist: snowflake-sqlalchemy~=1.6.1; extra == "dev"
 Provides-Extra: notebook
 Requires-Dist: duckdb-engine; extra == "notebook"
 Requires-Dist: ipykernel; extra == "notebook"

fakesnow-0.9.26.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,26 @@
+fakesnow/__init__.py,sha256=qUfgucQYPdELrJaxczalhJgWAWQ6cfTCUAHx6nUqRaI,3528
+fakesnow/__main__.py,sha256=GDrGyNTvBFuqn_UfDjKs7b3LPtU6gDv1KwosVDrukIM,76
+fakesnow/arrow.py,sha256=EGAYeuCnRuvmWBEGqw2YOcgQR4zcCsZBu85kSRl70dQ,4698
+fakesnow/checks.py,sha256=N8sXldhS3u1gG32qvZ4VFlsKgavRKrQrxLiQU8am1lw,2691
+fakesnow/cli.py,sha256=9qfI-Ssr6mo8UmIlXkUAOz2z2YPBgDsrEVaZv9FjGFs,2201
+fakesnow/conn.py,sha256=Gy_Z7BZRm5yMjV3x6hR4iegDQFdG9aJBjqWdc3iWYFU,5353
+fakesnow/cursor.py,sha256=8wWtRCxzrM1yiHmH2C-9CT0b98nTzr23ygeaEAkumRE,20086
+fakesnow/expr.py,sha256=CAxuYIUkwI339DQIBzvFF0F-m1tcVGKEPA5rDTzmH9A,892
+fakesnow/fakes.py,sha256=JQTiUkkwPeQrJ8FDWhPFPK6pGwd_aR2oiOrNzCWznlM,187
+fakesnow/fixtures.py,sha256=G-NkVeruSQAJ7fvSS2fR2oysUn0Yra1pohHlOvacKEk,455
+fakesnow/info_schema.py,sha256=nsDceFtjiSXrvkksKziVvqrefskaSyOmAspBwMAsaDg,6307
+fakesnow/instance.py,sha256=3cJvPRuFy19dMKXbtBLl6imzO48pEw8uTYhZyFDuwhk,3133
+fakesnow/macros.py,sha256=pX1YJDnQOkFJSHYUjQ6ErEkYIKvFI6Ncz_au0vv1csA,265
+fakesnow/pandas_tools.py,sha256=WjyjTV8QUCQQaCGboaEOvx2uo4BkknpWYjtLwkeCY6U,3468
+fakesnow/py.typed,sha256=B-DLSjYBi7pkKjwxCSdpVj2J02wgfJr-E7B1wOUyxYU,80
+fakesnow/server.py,sha256=SO5xKZ4rvySsuKDsoSPSCZcFuIX_K7d1XJYhRRJ-7Bk,4150
+fakesnow/transforms.py,sha256=VFLA5Fc1i4FuiVdvUuDrK-kA2caqiT8Gw9btMDPJhRA,55367
+fakesnow/transforms_merge.py,sha256=7rq-UPjfFNRrFsqR8xx3otwP6-k4eslLVLhfuqSXq1A,8314
+fakesnow/types.py,sha256=9Tt83Z7ctc9_v6SYyayXYz4MEI4RZo4zq_uqdj4g3Dk,2681
+fakesnow/variables.py,sha256=WXyPnkeNwD08gy52yF66CVe2twiYC50tztNfgXV4q1k,3032
+fakesnow-0.9.26.dist-info/LICENSE,sha256=kW-7NWIyaRMQiDpryfSmF2DObDZHGR1cJZ39s6B1Svg,11344
+fakesnow-0.9.26.dist-info/METADATA,sha256=92zIwzq7FP-BrfhUcKbdbqYs0eqN9TCKvT_NVdEKZTI,18075
+fakesnow-0.9.26.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
+fakesnow-0.9.26.dist-info/entry_points.txt,sha256=2riAUgu928ZIHawtO8EsfrMEJhi-EH-z_Vq7Q44xKPM,47
+fakesnow-0.9.26.dist-info/top_level.txt,sha256=500evXI1IFX9so82cizGIEMHAb_dJNPaZvd2H9dcKTA,24
+fakesnow-0.9.26.dist-info/RECORD,,

{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.44.0)
+Generator: bdist_wheel (0.45.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

fakesnow-0.9.24.dist-info/RECORD DELETED Viewed

@@ -1,25 +0,0 @@
-fakesnow/__init__.py,sha256=9tFJJKvowKNW3vfnlmza6hOLN1I52DwChgNc5Ew6CcA,3499
-fakesnow/__main__.py,sha256=GDrGyNTvBFuqn_UfDjKs7b3LPtU6gDv1KwosVDrukIM,76
-fakesnow/arrow.py,sha256=WLkr1nEiNxUcPdzadKSM33sRAiQJsN6LvuzTVIsi3D0,2766
-fakesnow/checks.py,sha256=-QMvdcrRbhN60rnzxLBJ0IkUBWyLR8gGGKKmCS0w9mA,2383
-fakesnow/cli.py,sha256=9qfI-Ssr6mo8UmIlXkUAOz2z2YPBgDsrEVaZv9FjGFs,2201
-fakesnow/conn.py,sha256=Gy_Z7BZRm5yMjV3x6hR4iegDQFdG9aJBjqWdc3iWYFU,5353
-fakesnow/cursor.py,sha256=2PtW9hzfXs3mzv6BBxXLoS-pPtD4otrfQ2KnPNNanGI,19441
-fakesnow/expr.py,sha256=CAxuYIUkwI339DQIBzvFF0F-m1tcVGKEPA5rDTzmH9A,892
-fakesnow/fakes.py,sha256=JQTiUkkwPeQrJ8FDWhPFPK6pGwd_aR2oiOrNzCWznlM,187
-fakesnow/fixtures.py,sha256=G-NkVeruSQAJ7fvSS2fR2oysUn0Yra1pohHlOvacKEk,455
-fakesnow/info_schema.py,sha256=DObVOrhzppAFHsdtj4YI9oRISn9SkJUG6ONjVleQQ_Y,6303
-fakesnow/instance.py,sha256=3cJvPRuFy19dMKXbtBLl6imzO48pEw8uTYhZyFDuwhk,3133
-fakesnow/macros.py,sha256=pX1YJDnQOkFJSHYUjQ6ErEkYIKvFI6Ncz_au0vv1csA,265
-fakesnow/pandas_tools.py,sha256=WjyjTV8QUCQQaCGboaEOvx2uo4BkknpWYjtLwkeCY6U,3468
-fakesnow/py.typed,sha256=B-DLSjYBi7pkKjwxCSdpVj2J02wgfJr-E7B1wOUyxYU,80
-fakesnow/server.py,sha256=8dzaLUUXPzCMm6-ESn0CBws6XSwwOpnUuHQAZJ-4SwU,3011
-fakesnow/transforms.py,sha256=ellcY5OBc7mqgL9ChNolrqcCLWXF9RH21Jt88FcFl-I,54419
-fakesnow/types.py,sha256=9Tt83Z7ctc9_v6SYyayXYz4MEI4RZo4zq_uqdj4g3Dk,2681
-fakesnow/variables.py,sha256=WXyPnkeNwD08gy52yF66CVe2twiYC50tztNfgXV4q1k,3032
-fakesnow-0.9.24.dist-info/LICENSE,sha256=kW-7NWIyaRMQiDpryfSmF2DObDZHGR1cJZ39s6B1Svg,11344
-fakesnow-0.9.24.dist-info/METADATA,sha256=LHKc6JYn9sxxFh6_i7kqlWz1fmloFv2CCmpalwPVFrE,18064
-fakesnow-0.9.24.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-fakesnow-0.9.24.dist-info/entry_points.txt,sha256=2riAUgu928ZIHawtO8EsfrMEJhi-EH-z_Vq7Q44xKPM,47
-fakesnow-0.9.24.dist-info/top_level.txt,sha256=500evXI1IFX9so82cizGIEMHAb_dJNPaZvd2H9dcKTA,24
-fakesnow-0.9.24.dist-info/RECORD,,

{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/LICENSE RENAMED Viewed

File without changes

{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fakesnow-0.9.24.dist-info → fakesnow-0.9.26.dist-info}/top_level.txt RENAMED Viewed

File without changes

fakesnow 0.9.24__py3-none-any.whl → 0.9.26__py3-none-any.whl

fakesnow 0.9.24py3-none-any.whl → 0.9.26py3-none-any.whl