PyPI - duckdb - Versions diffs - 1.5.0.dev44__cp313-cp313-win_amd64.whl → 1.5.0.dev94__cp313-cp313-win_amd64.whl - Mend

duckdb 1.5.0.dev44__cp313-cp313-win_amd64.whl → 1.5.0.dev94__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of duckdb might be problematic. Click here for more details.

Files changed (56) hide show

_duckdb-stubs/__init__.pyi +1443 -0
_duckdb-stubs/_func.pyi +46 -0
_duckdb-stubs/_sqltypes.pyi +75 -0
_duckdb.cp313-win_amd64.pyd +0 -0
adbc_driver_duckdb/__init__.py +49 -0
adbc_driver_duckdb/dbapi.py +115 -0
duckdb/__init__.py +341 -435
duckdb/_dbapi_type_object.py +231 -0
duckdb/_version.py +22 -0
duckdb/bytes_io_wrapper.py +12 -9
duckdb/experimental/__init__.py +2 -1
duckdb/experimental/spark/__init__.py +3 -4
duckdb/experimental/spark/_globals.py +8 -8
duckdb/experimental/spark/_typing.py +7 -9
duckdb/experimental/spark/conf.py +16 -15
duckdb/experimental/spark/context.py +60 -44
duckdb/experimental/spark/errors/__init__.py +33 -35
duckdb/experimental/spark/errors/error_classes.py +1 -1
duckdb/experimental/spark/errors/exceptions/__init__.py +1 -1
duckdb/experimental/spark/errors/exceptions/base.py +39 -88
duckdb/experimental/spark/errors/utils.py +11 -16
duckdb/experimental/spark/exception.py +9 -6
duckdb/experimental/spark/sql/__init__.py +5 -5
duckdb/experimental/spark/sql/_typing.py +8 -15
duckdb/experimental/spark/sql/catalog.py +21 -20
duckdb/experimental/spark/sql/column.py +48 -55
duckdb/experimental/spark/sql/conf.py +9 -8
duckdb/experimental/spark/sql/dataframe.py +185 -233
duckdb/experimental/spark/sql/functions.py +1222 -1248
duckdb/experimental/spark/sql/group.py +56 -52
duckdb/experimental/spark/sql/readwriter.py +80 -94
duckdb/experimental/spark/sql/session.py +64 -59
duckdb/experimental/spark/sql/streaming.py +9 -10
duckdb/experimental/spark/sql/type_utils.py +67 -65
duckdb/experimental/spark/sql/types.py +309 -345
duckdb/experimental/spark/sql/udf.py +6 -6
duckdb/filesystem.py +26 -16
duckdb/func/__init__.py +3 -0
duckdb/functional/__init__.py +12 -16
duckdb/polars_io.py +130 -83
duckdb/query_graph/__main__.py +91 -96
duckdb/sqltypes/__init__.py +63 -0
duckdb/typing/__init__.py +18 -8
duckdb/udf.py +10 -5
duckdb/value/__init__.py +1 -0
duckdb/value/constant/__init__.py +62 -60
{duckdb-1.5.0.dev44.dist-info → duckdb-1.5.0.dev94.dist-info}/METADATA +12 -4
duckdb-1.5.0.dev94.dist-info/RECORD +52 -0
duckdb/__init__.pyi +0 -713
duckdb/functional/__init__.pyi +0 -31
duckdb/typing/__init__.pyi +0 -36
duckdb/value/constant/__init__.pyi +0 -115
duckdb-1.5.0.dev44.dist-info/RECORD +0 -47
/duckdb/{value/__init__.pyi → py.typed} +0 -0
{duckdb-1.5.0.dev44.dist-info → duckdb-1.5.0.dev94.dist-info}/WHEEL +0 -0
{duckdb-1.5.0.dev44.dist-info → duckdb-1.5.0.dev94.dist-info}/licenses/LICENSE +0 -0

duckdb/experimental/spark/sql/session.py CHANGED Viewed

@@ -1,32 +1,31 @@
-from typing import Optional, List, Any, Union, Iterable, TYPE_CHECKING
-import uuid
+import uuid  # noqa: D100
+from collections.abc import Iterable, Sized
+from typing import TYPE_CHECKING, Any, NoReturn, Optional, Union
+import duckdb
 if TYPE_CHECKING:
-    from .catalog import Catalog
     from pandas.core.frame import DataFrame as PandasDataFrame
-from ..exception import ContributionsAcceptedError
-from .types import StructType, AtomicType, DataType
+    from .catalog import Catalog
 from ..conf import SparkConf
-from .dataframe import DataFrame
+from ..context import SparkContext
+from ..errors import PySparkTypeError
+from ..exception import ContributionsAcceptedError
 from .conf import RuntimeConfig
+from .dataframe import DataFrame
 from .readwriter import DataFrameReader
-from ..context import SparkContext
-from .udf import UDFRegistration
 from .streaming import DataStreamReader
-import duckdb
-from ..errors import (
-    PySparkTypeError,
-    PySparkValueError
-)
-from ..errors.error_classes import *
+from .types import StructType
+from .udf import UDFRegistration
 # In spark:
 # SparkSession holds a SparkContext
 # SparkContext gets created from SparkConf
-# At this level the check is made to determine whether the instance already exists and just needs to be retrieved or it needs to be created
+# At this level the check is made to determine whether the instance already exists and just needs
+# to be retrieved or it needs to be created.
 # For us this is done inside of `duckdb.connect`, based on the passed in path + configuration
 # SparkContext can be compared to our Connection class, and SparkConf to our ClientContext class
@@ -34,7 +33,7 @@ from ..errors.error_classes import *
 # data is a List of rows
 # every value in each row needs to be turned into a Value
-def _combine_data_and_schema(data: Iterable[Any], schema: StructType):
+def _combine_data_and_schema(data: Iterable[Any], schema: StructType) -> list[duckdb.Value]:
     from duckdb import Value
     new_data = []
@@ -44,8 +43,8 @@ def _combine_data_and_schema(data: Iterable[Any], schema: StructType):
     return new_data
-class SparkSession:
-    def __init__(self, context: SparkContext):
+class SparkSession:  # noqa: D101
+    def __init__(self, context: SparkContext) -> None:  # noqa: D107
         self.conn = context.connection
         self._context = context
         self._conf = RuntimeConfig(self.conn)
@@ -53,15 +52,16 @@ class SparkSession:
     def _create_dataframe(self, data: Union[Iterable[Any], "PandasDataFrame"]) -> DataFrame:
         try:
             import pandas
             has_pandas = True
         except ImportError:
             has_pandas = False
         if has_pandas and isinstance(data, pandas.DataFrame):
-            unique_name = f'pyspark_pandas_df_{uuid.uuid1()}'
+            unique_name = f"pyspark_pandas_df_{uuid.uuid1()}"
             self.conn.register(unique_name, data)
             return DataFrame(self.conn.sql(f'select * from "{unique_name}"'), self)
-        def verify_tuple_integrity(tuples):
+        def verify_tuple_integrity(tuples: list[tuple]) -> None:
             if len(tuples) <= 1:
                 return
             expected_length = len(tuples[0])
@@ -73,9 +73,9 @@ class SparkSession:
                     error_class="LENGTH_SHOULD_BE_THE_SAME",
                     message_parameters={
                         "arg1": f"data{i}",
-                        "arg2": f"data{i+1}",
+                        "arg2": f"data{i + 1}",
                         "arg1_length": str(expected_length),
-                        "arg2_length": str(actual_length)
+                        "arg2_length": str(actual_length),
                     },
                 )
@@ -83,16 +83,16 @@ class SparkSession:
             data = list(data)
         verify_tuple_integrity(data)
-        def construct_query(tuples) -> str:
-            def construct_values_list(row, start_param_idx):
+        def construct_query(tuples: Iterable) -> str:
+            def construct_values_list(row: Sized, start_param_idx: int) -> str:
                 parameter_count = len(row)
-                parameters = [f'${x+start_param_idx}' for x in range(parameter_count)]
-                parameters = '(' + ', '.join(parameters) + ')'
+                parameters = [f"${x + start_param_idx}" for x in range(parameter_count)]
+                parameters = "(" + ", ".join(parameters) + ")"
                 return parameters
             row_size = len(tuples[0])
             values_list = [construct_values_list(x, 1 + (i * row_size)) for i, x in enumerate(tuples)]
-            values_list = ', '.join(values_list)
+            values_list = ", ".join(values_list)
             query = f"""
                 select * from (values {values_list})
@@ -101,7 +101,7 @@ class SparkSession:
         query = construct_query(data)
-        def construct_parameters(tuples):
+        def construct_parameters(tuples: Iterable) -> list[list]:
             parameters = []
             for row in tuples:
                 parameters.extend(list(row))
@@ -112,7 +112,9 @@ class SparkSession:
         rel = self.conn.sql(query, params=parameters)
         return DataFrame(rel, self)
-    def _createDataFrameFromPandas(self, data: "PandasDataFrame", types, names) -> DataFrame:
+    def _createDataFrameFromPandas(
+        self, data: "PandasDataFrame", types: Union[list[str], None], names: Union[list[str], None]
+    ) -> DataFrame:
         df = self._create_dataframe(data)
         # Cast to types
@@ -123,10 +125,10 @@ class SparkSession:
             df = df.toDF(*names)
         return df
-    def createDataFrame(
+    def createDataFrame(  # noqa: D102
         self,
         data: Union["PandasDataFrame", Iterable[Any]],
-        schema: Optional[Union[StructType, List[str]]] = None,
+        schema: Optional[Union[StructType, list[str]]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
     ) -> DataFrame:
@@ -175,7 +177,7 @@ class SparkSession:
         if is_empty:
             rel = df.relation
             # Add impossible where clause
-            rel = rel.filter('1=0')
+            rel = rel.filter("1=0")
             df = DataFrame(rel, self)
         # Cast to types
@@ -186,10 +188,10 @@ class SparkSession:
             df = df.toDF(*names)
         return df
-    def newSession(self) -> "SparkSession":
+    def newSession(self) -> "SparkSession":  # noqa: D102
         return SparkSession(self._context)
-    def range(
+    def range(  # noqa: D102
         self,
         start: int,
         end: Optional[int] = None,
@@ -203,26 +205,26 @@ class SparkSession:
             end = start
             start = 0
-        return DataFrame(self.conn.table_function("range", parameters=[start, end, step]),self)
+        return DataFrame(self.conn.table_function("range", parameters=[start, end, step]), self)
-    def sql(self, sqlQuery: str, **kwargs: Any) -> DataFrame:
+    def sql(self, sqlQuery: str, **kwargs: Any) -> DataFrame:  # noqa: D102, ANN401
         if kwargs:
             raise NotImplementedError
         relation = self.conn.sql(sqlQuery)
         return DataFrame(relation, self)
-    def stop(self) -> None:
+    def stop(self) -> None:  # noqa: D102
         self._context.stop()
-    def table(self, tableName: str) -> DataFrame:
+    def table(self, tableName: str) -> DataFrame:  # noqa: D102
         relation = self.conn.table(tableName)
         return DataFrame(relation, self)
-    def getActiveSession(self) -> "SparkSession":
+    def getActiveSession(self) -> "SparkSession":  # noqa: D102
         return self
     @property
-    def catalog(self) -> "Catalog":
+    def catalog(self) -> "Catalog":  # noqa: D102
         if not hasattr(self, "_catalog"):
             from duckdb.experimental.spark.sql.catalog import Catalog
@@ -230,59 +232,62 @@ class SparkSession:
         return self._catalog
     @property
-    def conf(self) -> RuntimeConfig:
+    def conf(self) -> RuntimeConfig:  # noqa: D102
         return self._conf
     @property
-    def read(self) -> DataFrameReader:
+    def read(self) -> DataFrameReader:  # noqa: D102
         return DataFrameReader(self)
     @property
-    def readStream(self) -> DataStreamReader:
+    def readStream(self) -> DataStreamReader:  # noqa: D102
         return DataStreamReader(self)
     @property
-    def sparkContext(self) -> SparkContext:
+    def sparkContext(self) -> SparkContext:  # noqa: D102
         return self._context
     @property
-    def streams(self) -> Any:
+    def streams(self) -> NoReturn:  # noqa: D102
         raise ContributionsAcceptedError
     @property
-    def udf(self) -> UDFRegistration:
+    def udf(self) -> UDFRegistration:  # noqa: D102
         return UDFRegistration(self)
     @property
-    def version(self) -> str:
-        return '1.0.0'
+    def version(self) -> str:  # noqa: D102
+        return "1.0.0"
-    class Builder:
-        def __init__(self):
+    class Builder:  # noqa: D106
+        def __init__(self) -> None:  # noqa: D107
             pass
-        def master(self, name: str) -> "SparkSession.Builder":
+        def master(self, name: str) -> "SparkSession.Builder":  # noqa: D102
             # no-op
             return self
-        def appName(self, name: str) -> "SparkSession.Builder":
+        def appName(self, name: str) -> "SparkSession.Builder":  # noqa: D102
             # no-op
             return self
-        def remote(self, url: str) -> "SparkSession.Builder":
+        def remote(self, url: str) -> "SparkSession.Builder":  # noqa: D102
             # no-op
             return self
-        def getOrCreate(self) -> "SparkSession":
+        def getOrCreate(self) -> "SparkSession":  # noqa: D102
             context = SparkContext("__ignored__")
             return SparkSession(context)
-        def config(
-            self, key: Optional[str] = None, value: Optional[Any] = None, conf: Optional[SparkConf] = None
+        def config(  # noqa: D102
+            self,
+            key: Optional[str] = None,
+            value: Optional[Any] = None,  # noqa: ANN401
+            conf: Optional[SparkConf] = None,
         ) -> "SparkSession.Builder":
             return self
-        def enableHiveSupport(self) -> "SparkSession.Builder":
+        def enableHiveSupport(self) -> "SparkSession.Builder":  # noqa: D102
             # no-op
             return self

duckdb/experimental/spark/sql/streaming.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, Optional, Union  # noqa: D100
 from .types import StructType
 if TYPE_CHECKING:
@@ -9,28 +10,26 @@ PrimitiveType = Union[bool, float, int, str]
 OptionalPrimitiveType = Optional[PrimitiveType]
-class DataStreamWriter:
-    def __init__(self, dataframe: "DataFrame"):
+class DataStreamWriter:  # noqa: D101
+    def __init__(self, dataframe: "DataFrame") -> None:  # noqa: D107
         self.dataframe = dataframe
-    def toTable(self, table_name: str) -> None:
+    def toTable(self, table_name: str) -> None:  # noqa: D102
         # Should we register the dataframe or create a table from the contents?
         raise NotImplementedError
-class DataStreamReader:
-    def __init__(self, session: "SparkSession"):
+class DataStreamReader:  # noqa: D101
+    def __init__(self, session: "SparkSession") -> None:  # noqa: D107
         self.session = session
-    def load(
+    def load(  # noqa: D102
         self,
         path: Optional[str] = None,
         format: Optional[str] = None,
         schema: Union[StructType, str, None] = None,
-        **options: OptionalPrimitiveType
+        **options: OptionalPrimitiveType,
     ) -> "DataFrame":
-        from duckdb.experimental.spark.sql.dataframe import DataFrame
         raise NotImplementedError

duckdb/experimental/spark/sql/type_utils.py CHANGED Viewed

@@ -1,105 +1,107 @@
-from duckdb.typing import DuckDBPyType
-from typing import List, Tuple, cast
+from typing import cast  # noqa: D100
+from duckdb.sqltypes import DuckDBPyType
 from .types import (
-    DataType,
-    StringType,
+    ArrayType,
     BinaryType,
     BitstringType,
-    UUIDType,
     BooleanType,
+    ByteType,
+    DataType,
     DateType,
-    TimestampType,
-    TimestampNTZType,
-    TimeType,
-    TimeNTZType,
-    TimestampNanosecondNTZType,
-    TimestampMilisecondNTZType,
-    TimestampSecondNTZType,
+    DayTimeIntervalType,
     DecimalType,
     DoubleType,
     FloatType,
-    ByteType,
-    UnsignedByteType,
-    ShortType,
-    UnsignedShortType,
+    HugeIntegerType,
     IntegerType,
-    UnsignedIntegerType,
     LongType,
-    UnsignedLongType,
-    HugeIntegerType,
-    UnsignedHugeIntegerType,
-    DayTimeIntervalType,
-    ArrayType,
     MapType,
+    ShortType,
+    StringType,
     StructField,
     StructType,
+    TimeNTZType,
+    TimestampMilisecondNTZType,
+    TimestampNanosecondNTZType,
+    TimestampNTZType,
+    TimestampSecondNTZType,
+    TimestampType,
+    TimeType,
+    UnsignedByteType,
+    UnsignedHugeIntegerType,
+    UnsignedIntegerType,
+    UnsignedLongType,
+    UnsignedShortType,
+    UUIDType,
 )
 _sqltype_to_spark_class = {
-    'boolean': BooleanType,
-    'utinyint': UnsignedByteType,
-    'tinyint': ByteType,
-    'usmallint': UnsignedShortType,
-    'smallint': ShortType,
-    'uinteger': UnsignedIntegerType,
-    'integer': IntegerType,
-    'ubigint': UnsignedLongType,
-    'bigint': LongType,
-    'hugeint': HugeIntegerType,
-    'uhugeint': UnsignedHugeIntegerType,
-    'varchar': StringType,
-    'blob': BinaryType,
-    'bit': BitstringType,
-    'uuid': UUIDType,
-    'date': DateType,
-    'time': TimeNTZType,
-    'time with time zone': TimeType,
-    'timestamp': TimestampNTZType,
-    'timestamp with time zone': TimestampType,
-    'timestamp_ms': TimestampNanosecondNTZType,
-    'timestamp_ns': TimestampMilisecondNTZType,
-    'timestamp_s': TimestampSecondNTZType,
-    'interval': DayTimeIntervalType,
-    'list': ArrayType,
-    'struct': StructType,
-    'map': MapType,
+    "boolean": BooleanType,
+    "utinyint": UnsignedByteType,
+    "tinyint": ByteType,
+    "usmallint": UnsignedShortType,
+    "smallint": ShortType,
+    "uinteger": UnsignedIntegerType,
+    "integer": IntegerType,
+    "ubigint": UnsignedLongType,
+    "bigint": LongType,
+    "hugeint": HugeIntegerType,
+    "uhugeint": UnsignedHugeIntegerType,
+    "varchar": StringType,
+    "blob": BinaryType,
+    "bit": BitstringType,
+    "uuid": UUIDType,
+    "date": DateType,
+    "time": TimeNTZType,
+    "time with time zone": TimeType,
+    "timestamp": TimestampNTZType,
+    "timestamp with time zone": TimestampType,
+    "timestamp_ms": TimestampNanosecondNTZType,
+    "timestamp_ns": TimestampMilisecondNTZType,
+    "timestamp_s": TimestampSecondNTZType,
+    "interval": DayTimeIntervalType,
+    "list": ArrayType,
+    "struct": StructType,
+    "map": MapType,
     # union
     # enum
     # null (???)
-    'float': FloatType,
-    'double': DoubleType,
-    'decimal': DecimalType,
+    "float": FloatType,
+    "double": DoubleType,
+    "decimal": DecimalType,
 }
-def convert_nested_type(dtype: DuckDBPyType) -> DataType:
+def convert_nested_type(dtype: DuckDBPyType) -> DataType:  # noqa: D103
     id = dtype.id
-    if id == 'list' or id == 'array':
+    if id == "list" or id == "array":
         children = dtype.children
         return ArrayType(convert_type(children[0][1]))
-    # TODO: add support for 'union'
-    if id == 'struct':
-        children: List[Tuple[str, DuckDBPyType]] = dtype.children
+    # TODO: add support for 'union'  # noqa: TD002, TD003
+    if id == "struct":
+        children: list[tuple[str, DuckDBPyType]] = dtype.children
         fields = [StructField(x[0], convert_type(x[1])) for x in children]
         return StructType(fields)
-    if id == 'map':
+    if id == "map":
         return MapType(convert_type(dtype.key), convert_type(dtype.value))
     raise NotImplementedError
-def convert_type(dtype: DuckDBPyType) -> DataType:
+def convert_type(dtype: DuckDBPyType) -> DataType:  # noqa: D103
     id = dtype.id
-    if id in ['list', 'struct', 'map', 'array']:
+    if id in ["list", "struct", "map", "array"]:
         return convert_nested_type(dtype)
-    if id == 'decimal':
-        children: List[Tuple[str, DuckDBPyType]] = dtype.children
-        precision = cast(int, children[0][1])
-        scale = cast(int, children[1][1])
+    if id == "decimal":
+        children: list[tuple[str, DuckDBPyType]] = dtype.children
+        precision = cast("int", children[0][1])
+        scale = cast("int", children[1][1])
         return DecimalType(precision, scale)
     spark_type = _sqltype_to_spark_class[id]
     return spark_type()
-def duckdb_to_spark_schema(names: List[str], types: List[DuckDBPyType]) -> StructType:
+def duckdb_to_spark_schema(names: list[str], types: list[DuckDBPyType]) -> StructType:  # noqa: D103
     fields = [StructField(name, dtype) for name, dtype in zip(names, [convert_type(x) for x in types])]
     return StructType(fields)