PyPI - duckdb - Versions diffs - 1.5.0.dev32__cp314-cp314-macosx_10_13_x86_64.whl - Mend

duckdb 1.5.0.dev32__cp314-cp314-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of duckdb might be problematic. Click here for more details.

Files changed (47) hide show

_duckdb.cpython-314-darwin.so +0 -0
duckdb/__init__.py +475 -0
duckdb/__init__.pyi +713 -0
duckdb/bytes_io_wrapper.py +66 -0
duckdb/experimental/__init__.py +2 -0
duckdb/experimental/spark/LICENSE +260 -0
duckdb/experimental/spark/__init__.py +7 -0
duckdb/experimental/spark/_globals.py +77 -0
duckdb/experimental/spark/_typing.py +48 -0
duckdb/experimental/spark/conf.py +45 -0
duckdb/experimental/spark/context.py +164 -0
duckdb/experimental/spark/errors/__init__.py +72 -0
duckdb/experimental/spark/errors/error_classes.py +918 -0
duckdb/experimental/spark/errors/exceptions/__init__.py +16 -0
duckdb/experimental/spark/errors/exceptions/base.py +217 -0
duckdb/experimental/spark/errors/utils.py +116 -0
duckdb/experimental/spark/exception.py +15 -0
duckdb/experimental/spark/sql/__init__.py +7 -0
duckdb/experimental/spark/sql/_typing.py +93 -0
duckdb/experimental/spark/sql/catalog.py +78 -0
duckdb/experimental/spark/sql/column.py +368 -0
duckdb/experimental/spark/sql/conf.py +23 -0
duckdb/experimental/spark/sql/dataframe.py +1437 -0
duckdb/experimental/spark/sql/functions.py +6221 -0
duckdb/experimental/spark/sql/group.py +420 -0
duckdb/experimental/spark/sql/readwriter.py +449 -0
duckdb/experimental/spark/sql/session.py +292 -0
duckdb/experimental/spark/sql/streaming.py +37 -0
duckdb/experimental/spark/sql/type_utils.py +105 -0
duckdb/experimental/spark/sql/types.py +1275 -0
duckdb/experimental/spark/sql/udf.py +37 -0
duckdb/filesystem.py +23 -0
duckdb/functional/__init__.py +17 -0
duckdb/functional/__init__.pyi +31 -0
duckdb/polars_io.py +237 -0
duckdb/query_graph/__main__.py +363 -0
duckdb/typing/__init__.py +61 -0
duckdb/typing/__init__.pyi +36 -0
duckdb/udf.py +19 -0
duckdb/value/__init__.py +0 -0
duckdb/value/__init__.pyi +0 -0
duckdb/value/constant/__init__.py +268 -0
duckdb/value/constant/__init__.pyi +115 -0
duckdb-1.5.0.dev32.dist-info/METADATA +326 -0
duckdb-1.5.0.dev32.dist-info/RECORD +47 -0
duckdb-1.5.0.dev32.dist-info/WHEEL +6 -0
duckdb-1.5.0.dev32.dist-info/licenses/LICENSE +7 -0

duckdb/experimental/spark/sql/column.py ADDED Viewed

@@ -0,0 +1,368 @@
+from typing import Union, TYPE_CHECKING, Any, cast, Callable, Tuple
+from ..exception import ContributionsAcceptedError
+from .types import DataType
+if TYPE_CHECKING:
+    from ._typing import ColumnOrName, LiteralType, DecimalLiteral, DateTimeLiteral
+from duckdb import ConstantExpression, ColumnExpression, FunctionExpression, Expression
+from duckdb.typing import DuckDBPyType
+__all__ = ["Column"]
+def _get_expr(x) -> Expression:
+    return x.expr if isinstance(x, Column) else ConstantExpression(x)
+def _func_op(name: str, doc: str = "") -> Callable[["Column"], "Column"]:
+    def _(self: "Column") -> "Column":
+        njc = getattr(self.expr, name)()
+        return Column(njc)
+    _.__doc__ = doc
+    return _
+def _unary_op(
+    name: str,
+    doc: str = "unary operator",
+) -> Callable[["Column"], "Column"]:
+    """Create a method for given unary operator"""
+    def _(self: "Column") -> "Column":
+        # Call the function identified by 'name' on the internal Expression object
+        expr = getattr(self.expr, name)()
+        return Column(expr)
+    _.__doc__ = doc
+    return _
+def _bin_op(
+    name: str,
+    doc: str = "binary operator",
+) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"]:
+    """Create a method for given binary operator"""
+    def _(
+        self: "Column",
+        other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
+    ) -> "Column":
+        jc = _get_expr(other)
+        njc = getattr(self.expr, name)(jc)
+        return Column(njc)
+    _.__doc__ = doc
+    return _
+def _bin_func(
+    name: str,
+    doc: str = "binary function",
+) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"]:
+    """Create a function expression for the given binary function"""
+    def _(
+        self: "Column",
+        other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
+    ) -> "Column":
+        other = _get_expr(other)
+        func = FunctionExpression(name, self.expr, other)
+        return Column(func)
+    _.__doc__ = doc
+    return _
+class Column:
+    """
+    A column in a DataFrame.
+    :class:`Column` instances can be created by::
+        # 1. Select a column out of a DataFrame
+        df.colName
+        df["colName"]
+        # 2. Create from an expression
+        df.colName + 1
+        1 / df.colName
+    .. versionadded:: 1.3.0
+    """
+    def __init__(self, expr: Expression):
+        self.expr = expr
+    # arithmetic operators
+    def __neg__(self):
+        return Column(-self.expr)
+    # `and`, `or`, `not` cannot be overloaded in Python,
+    # so use bitwise operators as boolean operators
+    __and__ = _bin_op("__and__")
+    __or__ = _bin_op("__or__")
+    __invert__ = _func_op("__invert__")
+    __rand__ = _bin_op("__rand__")
+    __ror__ = _bin_op("__ror__")
+    __add__ = _bin_op("__add__")
+    __sub__ = _bin_op("__sub__")
+    __mul__ = _bin_op("__mul__")
+    __div__ = _bin_op("__div__")
+    __truediv__ = _bin_op("__truediv__")
+    __mod__ = _bin_op("__mod__")
+    __pow__ = _bin_op("__pow__")
+    __radd__ = _bin_op("__radd__")
+    __rsub__ = _bin_op("__rsub__")
+    __rmul__ = _bin_op("__rmul__")
+    __rdiv__ = _bin_op("__rdiv__")
+    __rtruediv__ = _bin_op("__rtruediv__")
+    __rmod__ = _bin_op("__rmod__")
+    __rpow__ = _bin_op("__rpow__")
+    def __getitem__(self, k: Any) -> "Column":
+        """
+        An expression that gets an item at position ``ordinal`` out of a list,
+        or gets an item by key out of a dict.
+        .. versionadded:: 1.3.0
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+        Parameters
+        ----------
+        k
+            a literal value, or a slice object without step.
+        Returns
+        -------
+        :class:`Column`
+            Column representing the item got by key out of a dict, or substrings sliced by
+            the given slice object.
+        Examples
+        --------
+        >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])
+        >>> df.select(df.l[slice(1, 3)], df.d['key']).show()
+        +------------------+------+
+        |substring(l, 1, 3)|d[key]|
+        +------------------+------+
+        |               abc| value|
+        +------------------+------+
+        """
+        if isinstance(k, slice):
+            raise ContributionsAcceptedError
+            # if k.step is not None:
+            #    raise ValueError("Using a slice with a step value is not supported")
+            # return self.substr(k.start, k.stop)
+        else:
+            # FIXME: this is super hacky
+            expr_str = str(self.expr) + "." + str(k)
+            return Column(ColumnExpression(expr_str))
+    def __getattr__(self, item: Any) -> "Column":
+        """
+        An expression that gets an item at position ``ordinal`` out of a list,
+        or gets an item by key out of a dict.
+        Parameters
+        ----------
+        item
+            a literal value.
+        Returns
+        -------
+        :class:`Column`
+            Column representing the item got by key out of a dict.
+        Examples
+        --------
+        >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])
+        >>> df.select(df.d.key).show()
+        +------+
+        |d[key]|
+        +------+
+        | value|
+        +------+
+        """
+        if item.startswith("__"):
+            raise AttributeError("Can not access __ (dunder) method")
+        return self[item]
+    def alias(self, alias: str):
+        return Column(self.expr.alias(alias))
+    def when(self, condition: "Column", value: Any):
+        if not isinstance(condition, Column):
+            raise TypeError("condition should be a Column")
+        v = _get_expr(value)
+        expr = self.expr.when(condition.expr, v)
+        return Column(expr)
+    def otherwise(self, value: Any):
+        v = _get_expr(value)
+        expr = self.expr.otherwise(v)
+        return Column(expr)
+    def cast(self, dataType: Union[DataType, str]) -> "Column":
+        if isinstance(dataType, str):
+            # Try to construct a default DuckDBPyType from it
+            internal_type = DuckDBPyType(dataType)
+        else:
+            internal_type = dataType.duckdb_type
+        return Column(self.expr.cast(internal_type))
+    def isin(self, *cols: Any) -> "Column":
+        if len(cols) == 1 and isinstance(cols[0], (list, set)):
+            # Only one argument supplied, it's a list
+            cols = cast(Tuple, cols[0])
+        cols = cast(
+            Tuple,
+            [_get_expr(c) for c in cols],
+        )
+        return Column(self.expr.isin(*cols))
+    # logistic operators
+    def __eq__(  # type: ignore[override]
+        self,
+        other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
+    ) -> "Column":
+        """binary function"""
+        return Column(self.expr == (_get_expr(other)))
+    def __ne__(  # type: ignore[override]
+        self,
+        other: Any,
+    ) -> "Column":
+        """binary function"""
+        return Column(self.expr != (_get_expr(other)))
+    __lt__ = _bin_op("__lt__")
+    __le__ = _bin_op("__le__")
+    __ge__ = _bin_op("__ge__")
+    __gt__ = _bin_op("__gt__")
+    # String interrogation methods
+    contains = _bin_func("contains")
+    rlike = _bin_func("regexp_matches")
+    like = _bin_func("~~")
+    ilike = _bin_func("~~*")
+    startswith = _bin_func("starts_with")
+    endswith = _bin_func("suffix")
+    # order
+    _asc_doc = """
+    Returns a sort expression based on the ascending order of the column.
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.asc()).collect()
+    [Row(name='Alice'), Row(name='Tom')]
+    """
+    _asc_nulls_first_doc = """
+    Returns a sort expression based on ascending order of the column, and null values
+    return before non-null values.
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
+    [Row(name=None), Row(name='Alice'), Row(name='Tom')]
+    """
+    _asc_nulls_last_doc = """
+    Returns a sort expression based on ascending order of the column, and null values
+    appear after non-null values.
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
+    [Row(name='Alice'), Row(name='Tom'), Row(name=None)]
+    """
+    _desc_doc = """
+    Returns a sort expression based on the descending order of the column.
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.desc()).collect()
+    [Row(name='Tom'), Row(name='Alice')]
+    """
+    _desc_nulls_first_doc = """
+    Returns a sort expression based on the descending order of the column, and null values
+    appear before non-null values.
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
+    [Row(name=None), Row(name='Tom'), Row(name='Alice')]
+    """
+    _desc_nulls_last_doc = """
+    Returns a sort expression based on the descending order of the column, and null values
+    appear after non-null values.
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
+    [Row(name='Tom'), Row(name='Alice'), Row(name=None)]
+    """
+    asc = _unary_op("asc", _asc_doc)
+    desc = _unary_op("desc", _desc_doc)
+    nulls_first = _unary_op("nulls_first")
+    nulls_last = _unary_op("nulls_last")
+    def asc_nulls_first(self) -> "Column":
+        return self.asc().nulls_first()
+    def asc_nulls_last(self) -> "Column":
+        return self.asc().nulls_last()
+    def desc_nulls_first(self) -> "Column":
+        return self.desc().nulls_first()
+    def desc_nulls_last(self) -> "Column":
+        return self.desc().nulls_last()
+    def isNull(self) -> "Column":
+        return Column(self.expr.isnull())
+    def isNotNull(self) -> "Column":
+        return Column(self.expr.isnotnull())

duckdb/experimental/spark/sql/conf.py ADDED Viewed

@@ -0,0 +1,23 @@
+from typing import Optional, Union
+from duckdb.experimental.spark._globals import _NoValueType, _NoValue
+from duckdb import DuckDBPyConnection
+class RuntimeConfig:
+    def __init__(self, connection: DuckDBPyConnection):
+        self._connection = connection
+    def set(self, key: str, value: str) -> None:
+        raise NotImplementedError
+    def isModifiable(self, key: str) -> bool:
+        raise NotImplementedError
+    def unset(self, key: str) -> None:
+        raise NotImplementedError
+    def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str:
+        raise NotImplementedError
+__all__ = ["RuntimeConfig"]