PyPI - duckdb - Versions diffs - 1.4.1.dev125__cp313-cp313-macosx_10_13_universal2.whl → 1.5.0.dev37__cp313-cp313-macosx_10_13_universal2.whl - Mend

duckdb 1.4.1.dev125__cp313-cp313-macosx_10_13_universal2.whl → 1.5.0.dev37__cp313-cp313-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of duckdb might be problematic. Click here for more details.

Files changed (48) hide show

_duckdb.cpython-313-darwin.so +0 -0
duckdb/__init__.py +374 -373
duckdb/__init__.pyi +180 -604
duckdb/bytes_io_wrapper.py +7 -6
duckdb/experimental/__init__.py +1 -2
duckdb/experimental/spark/__init__.py +4 -3
duckdb/experimental/spark/_globals.py +8 -8
duckdb/experimental/spark/_typing.py +9 -7
duckdb/experimental/spark/conf.py +15 -16
duckdb/experimental/spark/context.py +44 -60
duckdb/experimental/spark/errors/__init__.py +35 -33
duckdb/experimental/spark/errors/error_classes.py +1 -1
duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
duckdb/experimental/spark/errors/exceptions/base.py +88 -39
duckdb/experimental/spark/errors/utils.py +16 -11
duckdb/experimental/spark/exception.py +6 -9
duckdb/experimental/spark/sql/__init__.py +5 -5
duckdb/experimental/spark/sql/_typing.py +15 -8
duckdb/experimental/spark/sql/catalog.py +20 -21
duckdb/experimental/spark/sql/column.py +54 -47
duckdb/experimental/spark/sql/conf.py +8 -9
duckdb/experimental/spark/sql/dataframe.py +233 -185
duckdb/experimental/spark/sql/functions.py +1248 -1222
duckdb/experimental/spark/sql/group.py +52 -56
duckdb/experimental/spark/sql/readwriter.py +94 -80
duckdb/experimental/spark/sql/session.py +59 -64
duckdb/experimental/spark/sql/streaming.py +10 -9
duckdb/experimental/spark/sql/type_utils.py +64 -66
duckdb/experimental/spark/sql/types.py +344 -308
duckdb/experimental/spark/sql/udf.py +6 -6
duckdb/filesystem.py +8 -13
duckdb/functional/__init__.py +16 -2
duckdb/polars_io.py +57 -66
duckdb/query_graph/__main__.py +96 -91
duckdb/typing/__init__.py +8 -8
duckdb/typing/__init__.pyi +2 -4
duckdb/udf.py +5 -10
duckdb/value/__init__.py +0 -1
duckdb/value/constant/__init__.py +59 -61
duckdb/value/constant/__init__.pyi +4 -3
duckdb-1.5.0.dev37.dist-info/METADATA +80 -0
duckdb-1.5.0.dev37.dist-info/RECORD +47 -0
adbc_driver_duckdb/__init__.py +0 -50
adbc_driver_duckdb/dbapi.py +0 -115
duckdb-1.4.1.dev125.dist-info/METADATA +0 -326
duckdb-1.4.1.dev125.dist-info/RECORD +0 -49
{duckdb-1.4.1.dev125.dist-info → duckdb-1.5.0.dev37.dist-info}/WHEEL +0 -0
{duckdb-1.4.1.dev125.dist-info → duckdb-1.5.0.dev37.dist-info}/licenses/LICENSE +0 -0

duckdb/experimental/spark/sql/udf.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/  # noqa: D100
+# https://sparkbyexamples.com/pyspark/pyspark-udf-user-defined-function/
 from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union
 from .types import DataType
@@ -10,11 +10,11 @@ DataTypeOrString = Union[DataType, str]
 UserDefinedFunctionLike = TypeVar("UserDefinedFunctionLike")
-class UDFRegistration:  # noqa: D101
-    def __init__(self, sparkSession: "SparkSession") -> None:  # noqa: D107
+class UDFRegistration:
+    def __init__(self, sparkSession: "SparkSession"):
         self.sparkSession = sparkSession
-    def register(  # noqa: D102
+    def register(
         self,
         name: str,
         f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
@@ -22,7 +22,7 @@ class UDFRegistration:  # noqa: D101
     ) -> "UserDefinedFunctionLike":
         self.sparkSession.conn.create_function(name, f, return_type=returnType)
-    def registerJavaFunction(  # noqa: D102
+    def registerJavaFunction(
         self,
         name: str,
         javaClassName: str,
@@ -30,7 +30,7 @@ class UDFRegistration:  # noqa: D101
     ) -> None:
         raise NotImplementedError
-    def registerJavaUDAF(self, name: str, javaClassName: str) -> None:  # noqa: D102
+    def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
         raise NotImplementedError

duckdb/filesystem.py CHANGED Viewed

@@ -1,26 +1,21 @@
-from io import TextIOBase  # noqa: D100
-from typing import IO
-from fsspec import AbstractFileSystem
-from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
+from fsspec import filesystem, AbstractFileSystem
+from fsspec.implementations.memory import MemoryFileSystem, MemoryFile
 from .bytes_io_wrapper import BytesIOWrapper
+from io import TextIOBase
-def is_file_like(obj) -> bool:  # noqa: D103, ANN001
+def is_file_like(obj):
     # We only care that we can read from the file
     return hasattr(obj, "read") and hasattr(obj, "seek")
-class ModifiedMemoryFileSystem(MemoryFileSystem):  # noqa: D101
-    protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",)
+class ModifiedMemoryFileSystem(MemoryFileSystem):
+    protocol = ('DUCKDB_INTERNAL_OBJECTSTORE',)
     # defer to the original implementation that doesn't hardcode the protocol
     _strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
-    def add_file(self, object: IO, path: str) -> None:  # noqa: D102
+    def add_file(self, object, path):
         if not is_file_like(object):
-            msg = "Can not read from a non file-like object"
-            raise ValueError(msg)
+            raise ValueError("Can not read from a non file-like object")
         path = self._strip_protocol(path)
         if isinstance(object, TextIOBase):
             # Wrap this so that we can return a bytes object from 'read'

duckdb/functional/__init__.py CHANGED Viewed

@@ -1,3 +1,17 @@
-from _duckdb.functional import ARROW, DEFAULT, NATIVE, SPECIAL, FunctionNullHandling, PythonUDFType  # noqa: D104
+from _duckdb.functional import (
+	FunctionNullHandling,
+	PythonUDFType,
+	SPECIAL,
+	DEFAULT,
+	NATIVE,
+	ARROW
+)
-__all__ = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"]
+__all__ = [
+	"FunctionNullHandling",
+	"PythonUDFType",
+	"SPECIAL",
+	"DEFAULT",
+	"NATIVE",
+	"ARROW"
+]

duckdb/polars_io.py CHANGED Viewed

@@ -1,22 +1,20 @@
-import datetime  # noqa: D100
-import json
-from collections.abc import Iterator
-from decimal import Decimal
-from typing import Optional
+import duckdb
 import polars as pl
-from polars.io.plugins import register_io_source
+from typing import Iterator, Optional
-import duckdb
+from polars.io.plugins import register_io_source
 from duckdb import SQLExpression
+import json
+from decimal import Decimal
+import datetime
 def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
-    """Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
+    """
+    Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
     Parameters:
         predicate (pl.Expr): A Polars expression (e.g., col("foo") > 5)
     Returns:
         SQLExpression: A DuckDB SQL expression string equivalent.
         None: If conversion fails.
@@ -27,19 +25,20 @@ def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
     """
     # Serialize the Polars expression tree to JSON
     tree = json.loads(predicate.meta.serialize(format="json"))
     try:
         # Convert the tree to SQL
         sql_filter = _pl_tree_to_sql(tree)
         return SQLExpression(sql_filter)
-    except Exception:
+    except:
         # If the conversion fails, we return None
         return None
 def _pl_operation_to_sql(op: str) -> str:
-    """Map Polars binary operation strings to SQL equivalents.
+    """
+    Map Polars binary operation strings to SQL equivalents.
     Example:
         >>> _pl_operation_to_sql("Eq")
         '='
@@ -56,11 +55,12 @@ def _pl_operation_to_sql(op: str) -> str:
             "Or": "OR",
         }[op]
     except KeyError:
-        raise NotImplementedError(op)  # noqa: B904
+        raise NotImplementedError(op)
 def _escape_sql_identifier(identifier: str) -> str:
-    """Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
+    """
+    Escape SQL identifiers by doubling any double quotes and wrapping in double quotes.
     Example:
         >>> _escape_sql_identifier('column"name')
@@ -71,14 +71,15 @@ def _escape_sql_identifier(identifier: str) -> str:
 def _pl_tree_to_sql(tree: dict) -> str:
-    """Recursively convert a Polars expression tree (as JSON) to a SQL string.
+    """
+    Recursively convert a Polars expression tree (as JSON) to a SQL string.
     Parameters:
         tree (dict): JSON-deserialized expression tree from Polars
     Returns:
         str: SQL expression string
     Example:
         Input tree:
         {
@@ -96,15 +97,13 @@ def _pl_tree_to_sql(tree: dict) -> str:
     if node_type == "BinaryExpr":
         # Binary expressions: left OP right
         return (
-            "("
-            + " ".join(
-                (
-                    _pl_tree_to_sql(subtree["left"]),
-                    _pl_operation_to_sql(subtree["op"]),
-                    _pl_tree_to_sql(subtree["right"]),
-                )
-            )
-            + ")"
+                "(" +
+                " ".join((
+                    _pl_tree_to_sql(subtree['left']),
+                    _pl_operation_to_sql(subtree['op']),
+                    _pl_tree_to_sql(subtree['right'])
+                )) +
+                ")"
         )
     if node_type == "Column":
         # A reference to a column name
@@ -132,11 +131,9 @@ def _pl_tree_to_sql(tree: dict) -> str:
                 return f"({arg_sql} IS NULL)"
             if func == "IsNotNull":
                 return f"({arg_sql} IS NOT NULL)"
-            msg = f"Boolean function not supported: {func}"
-            raise NotImplementedError(msg)
+            raise NotImplementedError(f"Boolean function not supported: {func}")
-        msg = f"Unsupported function type: {func_dict}"
-        raise NotImplementedError(msg)
+        raise NotImplementedError(f"Unsupported function type: {func_dict}")
     if node_type == "Scalar":
         # Detect format: old style (dtype/value) or new style (direct type key)
@@ -150,30 +147,20 @@ def _pl_tree_to_sql(tree: dict) -> str:
         # Decimal support
         if dtype.startswith("{'Decimal'") or dtype == "Decimal":
-            decimal_value = value["Decimal"]
+            decimal_value = value['Decimal']
             decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])
             return str(decimal_value)
         # Datetime with microseconds since epoch
         if dtype.startswith("{'Datetime'") or dtype == "Datetime":
-            micros = value["Datetime"][0]
+            micros = value['Datetime'][0]
             dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC)
-            return f"'{dt_timestamp!s}'::TIMESTAMP"
+            return f"'{str(dt_timestamp)}'::TIMESTAMP"
         # Match simple numeric/boolean types
-        if dtype in (
-            "Int8",
-            "Int16",
-            "Int32",
-            "Int64",
-            "UInt8",
-            "UInt16",
-            "UInt32",
-            "UInt64",
-            "Float32",
-            "Float64",
-            "Boolean",
-        ):
+        if dtype in ("Int8", "Int16", "Int32", "Int64",
+                     "UInt8", "UInt16", "UInt32", "UInt64",
+                     "Float32", "Float64", "Boolean"):
             return str(value[dtype])
         # Time type
@@ -181,7 +168,9 @@ def _pl_tree_to_sql(tree: dict) -> str:
             nanoseconds = value["Time"]
             seconds = nanoseconds // 1_000_000_000
             microseconds = (nanoseconds % 1_000_000_000) // 1_000
-            dt_time = (datetime.datetime.min + datetime.timedelta(seconds=seconds, microseconds=microseconds)).time()
+            dt_time = (datetime.datetime.min + datetime.timedelta(
+                seconds=seconds, microseconds=microseconds
+            )).time()
             return f"'{dt_time}'::TIME"
         # Date type
@@ -193,7 +182,7 @@ def _pl_tree_to_sql(tree: dict) -> str:
         # Binary type
         if dtype == "Binary":
             binary_data = bytes(value["Binary"])
-            escaped = "".join(f"\\x{b:02x}" for b in binary_data)
+            escaped = ''.join(f'\\x{b:02x}' for b in binary_data)
             return f"'{escaped}'::BLOB"
         # String type
@@ -202,16 +191,15 @@ def _pl_tree_to_sql(tree: dict) -> str:
             string_val = value.get("StringOwned", value.get("String", None))
             return f"'{string_val}'"
-        msg = f"Unsupported scalar type {dtype!s}, with value {value}"
-        raise NotImplementedError(msg)
-    msg = f"Node type: {node_type} is not implemented. {subtree}"
-    raise NotImplementedError(msg)
+        raise NotImplementedError(f"Unsupported scalar type {str(dtype)}, with value {value}")
+    raise NotImplementedError(f"Node type: {node_type} is not implemented. {subtree}")
 def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -> pl.LazyFrame:
-    """A polars IO plugin for DuckDB."""
+    """
+    A polars IO plugin for DuckDB.
+    """
     def source_generator(
         with_columns: Optional[list[str]],
         predicate: Optional[pl.Expr],
@@ -235,12 +223,15 @@ def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -
             results = relation_final.fetch_arrow_reader()
         else:
             results = relation_final.fetch_arrow_reader(batch_size)
-        for record_batch in iter(results.read_next_batch, None):
-            if predicate is not None and duck_predicate is None:
-                # We have a predicate, but did not manage to push it down, we fallback here
-                yield pl.from_arrow(record_batch).filter(predicate)
-            else:
-                yield pl.from_arrow(record_batch)
+        while True:
+            try:
+                record_batch = results.read_next_batch()
+                if predicate is not None and duck_predicate is None:
+                    # We have a predicate, but did not manage to push it down, we fallback here
+                    yield pl.from_arrow(record_batch).filter(predicate)
+                else:
+                    yield pl.from_arrow(record_batch)
+            except StopIteration:
+                break
     return register_io_source(source_generator, schema=schema)