PyPI - cudf-polars-cu13 - Versions diffs - 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl - Mend

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

cudf_polars/GIT_COMMIT +1 -1
cudf_polars/VERSION +1 -1
cudf_polars/callback.py +60 -15
cudf_polars/containers/column.py +137 -77
cudf_polars/containers/dataframe.py +123 -34
cudf_polars/containers/datatype.py +134 -13
cudf_polars/dsl/expr.py +0 -2
cudf_polars/dsl/expressions/aggregation.py +80 -28
cudf_polars/dsl/expressions/binaryop.py +34 -14
cudf_polars/dsl/expressions/boolean.py +110 -37
cudf_polars/dsl/expressions/datetime.py +59 -30
cudf_polars/dsl/expressions/literal.py +11 -5
cudf_polars/dsl/expressions/rolling.py +460 -119
cudf_polars/dsl/expressions/selection.py +9 -8
cudf_polars/dsl/expressions/slicing.py +1 -1
cudf_polars/dsl/expressions/string.py +256 -114
cudf_polars/dsl/expressions/struct.py +19 -7
cudf_polars/dsl/expressions/ternary.py +33 -3
cudf_polars/dsl/expressions/unary.py +126 -64
cudf_polars/dsl/ir.py +1053 -350
cudf_polars/dsl/to_ast.py +30 -13
cudf_polars/dsl/tracing.py +194 -0
cudf_polars/dsl/translate.py +307 -107
cudf_polars/dsl/utils/aggregations.py +43 -30
cudf_polars/dsl/utils/reshape.py +14 -2
cudf_polars/dsl/utils/rolling.py +12 -8
cudf_polars/dsl/utils/windows.py +35 -20
cudf_polars/experimental/base.py +55 -2
cudf_polars/experimental/benchmarks/pdsds.py +12 -126
cudf_polars/experimental/benchmarks/pdsh.py +792 -2
cudf_polars/experimental/benchmarks/utils.py +596 -39
cudf_polars/experimental/dask_registers.py +47 -20
cudf_polars/experimental/dispatch.py +9 -3
cudf_polars/experimental/distinct.py +2 -0
cudf_polars/experimental/explain.py +15 -2
cudf_polars/experimental/expressions.py +30 -15
cudf_polars/experimental/groupby.py +25 -4
cudf_polars/experimental/io.py +156 -124
cudf_polars/experimental/join.py +53 -23
cudf_polars/experimental/parallel.py +68 -19
cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
cudf_polars/experimental/rapidsmpf/core.py +488 -0
cudf_polars/experimental/rapidsmpf/dask.py +172 -0
cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
cudf_polars/experimental/rapidsmpf/io.py +696 -0
cudf_polars/experimental/rapidsmpf/join.py +322 -0
cudf_polars/experimental/rapidsmpf/lower.py +74 -0
cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
cudf_polars/experimental/rapidsmpf/union.py +115 -0
cudf_polars/experimental/rapidsmpf/utils.py +374 -0
cudf_polars/experimental/repartition.py +9 -2
cudf_polars/experimental/select.py +177 -14
cudf_polars/experimental/shuffle.py +46 -12
cudf_polars/experimental/sort.py +100 -26
cudf_polars/experimental/spilling.py +1 -1
cudf_polars/experimental/statistics.py +24 -5
cudf_polars/experimental/utils.py +25 -7
cudf_polars/testing/asserts.py +13 -8
cudf_polars/testing/io.py +2 -1
cudf_polars/testing/plugin.py +93 -17
cudf_polars/typing/__init__.py +86 -32
cudf_polars/utils/config.py +473 -58
cudf_polars/utils/cuda_stream.py +70 -0
cudf_polars/utils/versions.py +5 -4
cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0

cudf_polars/dsl/expressions/aggregation.py CHANGED Viewed

@@ -6,6 +6,7 @@
 from __future__ import annotations
+from decimal import Decimal
 from functools import partial
 from typing import TYPE_CHECKING, Any, ClassVar
@@ -16,23 +17,31 @@ from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 from cudf_polars.dsl.expressions.literal import Literal
 if TYPE_CHECKING:
+    from rmm.pylibrmm.stream import Stream
     from cudf_polars.containers import DataFrame, DataType
 __all__ = ["Agg"]
 class Agg(Expr):
-    __slots__ = ("name", "op", "options", "request")
-    _non_child = ("dtype", "name", "options")
+    __slots__ = ("context", "name", "op", "options", "request")
+    _non_child = ("dtype", "name", "options", "context")
     def __init__(
-        self, dtype: DataType, name: str, options: Any, *children: Expr
+        self,
+        dtype: DataType,
+        name: str,
+        options: Any,
+        context: ExecutionContext,
+        *children: Expr,
     ) -> None:
         self.dtype = dtype
         self.name = name
         self.options = options
         self.is_pointwise = False
         self.children = children
+        self.context = context
         if name not in Agg._SUPPORTED:
             raise NotImplementedError(
                 f"Unsupported aggregation {name=}"
@@ -71,7 +80,7 @@ class Agg(Expr):
                 raise NotImplementedError("Only support literal quantile values")
             if options == "equiprobable":
                 raise NotImplementedError("Quantile with equiprobable interpolation")
-            if plc.traits.is_duration(child.dtype.plc):
+            if plc.traits.is_duration(child.dtype.plc_type):
                 raise NotImplementedError("Quantile with duration data type")
             req = plc.aggregation.quantile(
                 quantiles=[quantile.value], interp=Agg.interp_mapping[options]
@@ -80,9 +89,19 @@ class Agg(Expr):
             raise NotImplementedError(
                 f"Unreachable, {name=} is incorrectly listed in _SUPPORTED"
             )  # pragma: no cover
+        if (
+            context == ExecutionContext.FRAME
+            and req is not None
+            and not plc.aggregation.is_valid_aggregation(dtype.plc_type, req)
+        ):
+            # TODO: Check which cases polars raises vs returns all-NULL column.
+            # For the all-NULL column cases, we could build it using Column.all_null_like
+            # at evaluation time.
+            raise NotImplementedError(f"Invalid aggregation {req} with dtype {dtype}")
         self.request = req
         op = getattr(self, f"_{name}", None)
         if op is None:
+            assert req is not None  # Ensure req is not None for _reduce
             op = partial(self._reduce, request=req)
         elif name in {"min", "max"}:
             op = partial(op, propagate_nans=options)
@@ -136,77 +155,110 @@ class Agg(Expr):
             return self.request
     def _reduce(
-        self, column: Column, *, request: plc.aggregation.Aggregation
+        self, column: Column, *, request: plc.aggregation.Aggregation, stream: Stream
     ) -> Column:
+        if (
+            # For sum, this condition can only pass
+            # after expression decomposition in the streaming
+            # engine
+            self.name in {"sum", "mean", "median"}
+            and plc.traits.is_fixed_point(column.dtype.plc_type)
+            and self.dtype.plc_type.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
+        ):
+            column = column.astype(self.dtype, stream=stream)
         return Column(
             plc.Column.from_scalar(
-                plc.reduce.reduce(column.obj, request, self.dtype.plc),
+                plc.reduce.reduce(
+                    column.obj, request, self.dtype.plc_type, stream=stream
+                ),
                 1,
+                stream=stream,
             ),
             name=column.name,
             dtype=self.dtype,
         )
-    def _count(self, column: Column, *, include_nulls: bool) -> Column:
+    def _count(self, column: Column, *, include_nulls: bool, stream: Stream) -> Column:
         null_count = column.null_count if not include_nulls else 0
         return Column(
             plc.Column.from_scalar(
-                plc.Scalar.from_py(column.size - null_count, self.dtype.plc),
+                plc.Scalar.from_py(
+                    column.size - null_count, self.dtype.plc_type, stream=stream
+                ),
                 1,
+                stream=stream,
             ),
             name=column.name,
             dtype=self.dtype,
         )
-    def _sum(self, column: Column) -> Column:
+    def _sum(self, column: Column, stream: Stream) -> Column:
         if column.size == 0 or column.null_count == column.size:
+            dtype = self.dtype.plc_type
             return Column(
                 plc.Column.from_scalar(
-                    plc.Scalar.from_py(0, self.dtype.plc),
+                    plc.Scalar.from_py(
+                        Decimal(0).scaleb(dtype.scale())
+                        if plc.traits.is_fixed_point(dtype)
+                        else 0,
+                        dtype,
+                        stream=stream,
+                    ),
                     1,
+                    stream=stream,
                 ),
                 name=column.name,
                 dtype=self.dtype,
             )
-        return self._reduce(column, request=plc.aggregation.sum())
+        return self._reduce(column, request=plc.aggregation.sum(), stream=stream)
-    def _min(self, column: Column, *, propagate_nans: bool) -> Column:
-        if propagate_nans and column.nan_count > 0:
+    def _min(self, column: Column, *, propagate_nans: bool, stream: Stream) -> Column:
+        nan_count = column.nan_count(stream=stream)
+        if propagate_nans and nan_count > 0:
             return Column(
                 plc.Column.from_scalar(
-                    plc.Scalar.from_py(float("nan"), self.dtype.plc),
+                    plc.Scalar.from_py(
+                        float("nan"), self.dtype.plc_type, stream=stream
+                    ),
                     1,
+                    stream=stream,
                 ),
                 name=column.name,
                 dtype=self.dtype,
             )
-        if column.nan_count > 0:
-            column = column.mask_nans()
-        return self._reduce(column, request=plc.aggregation.min())
+        if nan_count > 0:
+            column = column.mask_nans(stream=stream)
+        return self._reduce(column, request=plc.aggregation.min(), stream=stream)
-    def _max(self, column: Column, *, propagate_nans: bool) -> Column:
-        if propagate_nans and column.nan_count > 0:
+    def _max(self, column: Column, *, propagate_nans: bool, stream: Stream) -> Column:
+        nan_count = column.nan_count(stream=stream)
+        if propagate_nans and nan_count > 0:
             return Column(
                 plc.Column.from_scalar(
-                    plc.Scalar.from_py(float("nan"), self.dtype.plc),
+                    plc.Scalar.from_py(
+                        float("nan"), self.dtype.plc_type, stream=stream
+                    ),
                     1,
+                    stream=stream,
                 ),
                 name=column.name,
                 dtype=self.dtype,
             )
-        if column.nan_count > 0:
-            column = column.mask_nans()
-        return self._reduce(column, request=plc.aggregation.max())
+        if nan_count > 0:
+            column = column.mask_nans(stream=stream)
+        return self._reduce(column, request=plc.aggregation.max(), stream=stream)
-    def _first(self, column: Column) -> Column:
+    def _first(self, column: Column, stream: Stream) -> Column:
         return Column(
-            plc.copying.slice(column.obj, [0, 1])[0], name=column.name, dtype=self.dtype
+            plc.copying.slice(column.obj, [0, 1], stream=stream)[0],
+            name=column.name,
+            dtype=self.dtype,
         )
-    def _last(self, column: Column) -> Column:
+    def _last(self, column: Column, stream: Stream) -> Column:
         n = column.size
         return Column(
-            plc.copying.slice(column.obj, [n - 1, n])[0],
+            plc.copying.slice(column.obj, [n - 1, n], stream=stream)[0],
             name=column.name,
             dtype=self.dtype,
         )
@@ -223,4 +275,4 @@ class Agg(Expr):
         # Aggregations like quantiles may have additional children that were
         # preprocessed into pylibcudf requests.
         child = self.children[0]
-        return self.op(child.evaluate(df, context=context))
+        return self.op(child.evaluate(df, context=context), stream=df.stream)

cudf_polars/dsl/expressions/binaryop.py CHANGED Viewed

@@ -8,13 +8,15 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, ClassVar
-from polars.polars import _expr_nodes as pl_expr
+from polars import polars  # type: ignore[attr-defined]
 import pylibcudf as plc
 from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
+pl_expr = polars._expr_nodes
 if TYPE_CHECKING:
     from cudf_polars.containers import DataFrame, DataType
@@ -33,7 +35,7 @@ class BinOp(Expr):
         right: Expr,
     ) -> None:
         self.dtype = dtype
-        if plc.traits.is_boolean(self.dtype.plc):
+        if plc.traits.is_boolean(self.dtype.plc_type):
             # For boolean output types, bitand and bitor implement
             # boolean logic, so translate. bitxor also does, but the
             # default behaviour is correct.
@@ -42,7 +44,7 @@ class BinOp(Expr):
         self.children = (left, right)
         self.is_pointwise = True
         if not plc.binaryop.is_supported_operation(
-            self.dtype.plc, left.dtype.plc, right.dtype.plc, op
+            self.dtype.plc_type, left.dtype.plc_type, right.dtype.plc_type, op
         ):
             raise NotImplementedError(
                 f"Operation {op.name} not supported "
@@ -59,7 +61,9 @@ class BinOp(Expr):
         plc.binaryop.BinaryOperator.LOGICAL_OR: plc.binaryop.BinaryOperator.NULL_LOGICAL_OR,
     }
-    _MAPPING: ClassVar[dict[pl_expr.Operator, plc.binaryop.BinaryOperator]] = {
+    _MAPPING: ClassVar[
+        dict[polars._expr_nodes.Operator, plc.binaryop.BinaryOperator]
+    ] = {
         pl_expr.Operator.Eq: plc.binaryop.BinaryOperator.EQUAL,
         pl_expr.Operator.EqValidity: plc.binaryop.BinaryOperator.NULL_EQUALS,
         pl_expr.Operator.NotEq: plc.binaryop.BinaryOperator.NOT_EQUAL,
@@ -87,20 +91,25 @@ class BinOp(Expr):
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         left, right = (child.evaluate(df, context=context) for child in self.children)
-        lop = left.obj
-        rop = right.obj
+        lop: plc.Column | plc.Scalar = left.obj
+        rop: plc.Column | plc.Scalar = right.obj
         if left.size != right.size:
             if left.is_scalar:
-                lop = left.obj_scalar
+                lop = left.obj_scalar(stream=df.stream)
             elif right.is_scalar:
-                rop = right.obj_scalar
-        if plc.traits.is_integral_not_bool(self.dtype.plc) and self.op in {
+                rop = right.obj_scalar(stream=df.stream)
+        if plc.traits.is_integral_not_bool(self.dtype.plc_type) and self.op in {
             plc.binaryop.BinaryOperator.FLOOR_DIV,
             plc.binaryop.BinaryOperator.PYMOD,
         }:
-            if right.obj.size() == 1 and right.obj.to_scalar().to_py() == 0:
+            if (
+                right.obj.size() == 1
+                and right.obj.to_scalar(stream=df.stream).to_py(stream=df.stream) == 0
+            ):
                 return Column(
-                    plc.Column.all_null_like(left.obj, left.obj.size()),
+                    plc.Column.all_null_like(
+                        left.obj, left.obj.size(), stream=df.stream
+                    ),
                     dtype=self.dtype,
                 )
@@ -108,13 +117,24 @@ class BinOp(Expr):
                 rop = plc.replace.find_and_replace_all(
                     right.obj,
                     plc.Column.from_scalar(
-                        plc.Scalar.from_py(0, dtype=self.dtype.plc), 1
+                        plc.Scalar.from_py(
+                            0, dtype=self.dtype.plc_type, stream=df.stream
+                        ),
+                        1,
+                        stream=df.stream,
                     ),
                     plc.Column.from_scalar(
-                        plc.Scalar.from_py(None, dtype=self.dtype.plc), 1
+                        plc.Scalar.from_py(
+                            None, dtype=self.dtype.plc_type, stream=df.stream
+                        ),
+                        1,
+                        stream=df.stream,
                     ),
+                    stream=df.stream,
                 )
         return Column(
-            plc.binaryop.binary_operation(lop, rop, self.op, self.dtype.plc),
+            plc.binaryop.binary_operation(
+                lop, rop, self.op, self.dtype.plc_type, stream=df.stream
+            ),
             dtype=self.dtype,
         )

cudf_polars/dsl/expressions/boolean.py CHANGED Viewed

@@ -8,7 +8,9 @@ from __future__ import annotations
 from enum import IntEnum, auto
 from functools import partial, reduce
-from typing import TYPE_CHECKING, Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar, cast
+import polars as pl
 import pylibcudf as plc
@@ -22,7 +24,9 @@ if TYPE_CHECKING:
     from typing_extensions import Self
     import polars.type_aliases as pl_types
-    from polars.polars import _expr_nodes as pl_expr
+    from polars import polars  # type: ignore[attr-defined]
+    from rmm.pylibrmm.stream import Stream
     from cudf_polars.containers import DataFrame
@@ -53,7 +57,7 @@ class BooleanFunction(Expr):
         Not = auto()
         @classmethod
-        def from_polars(cls, obj: pl_expr.BooleanFunction) -> Self:
+        def from_polars(cls, obj: polars._expr_nodes.BooleanFunction) -> Self:
             """Convert from polars' `BooleanFunction`."""
             try:
                 function, name = str(obj).split(".", maxsplit=1)
@@ -101,6 +105,7 @@ class BooleanFunction(Expr):
         keep: plc.stream_compaction.DuplicateKeepOption,
         source_value: plc.Scalar,
         target_value: plc.Scalar,
+        stream: Stream,
     ) -> Column:
         table = plc.Table([column.obj])
         indices = plc.stream_compaction.distinct_indices(
@@ -109,12 +114,20 @@ class BooleanFunction(Expr):
             # TODO: polars doesn't expose options for these
             plc.types.NullEquality.EQUAL,
             plc.types.NanEquality.ALL_EQUAL,
+            stream=stream,
         )
         return Column(
             plc.copying.scatter(
                 [source_value],
                 indices,
-                plc.Table([plc.Column.from_scalar(target_value, table.num_rows())]),
+                plc.Table(
+                    [
+                        plc.Column.from_scalar(
+                            target_value, table.num_rows(), stream=stream
+                        )
+                    ]
+                ),
+                stream=stream,
             ).columns()[0],
             dtype=dtype,
         )
@@ -153,31 +166,36 @@ class BooleanFunction(Expr):
         ):
             # Avoid evaluating the child if the dtype tells us it's unnecessary.
             (child,) = self.children
-            needles = child.evaluate(df, context=context)
-            is_float = needles.obj.type().id() in (
+            values = child.evaluate(df, context=context)
+            is_float = values.obj.type().id() in (
                 plc.TypeId.FLOAT32,
                 plc.TypeId.FLOAT64,
             )
             is_finite = self.name is BooleanFunction.Name.IsFinite
             if not is_float:
                 base = plc.Column.from_scalar(
-                    plc.Scalar.from_py(py_val=is_finite), needles.size
+                    plc.Scalar.from_py(py_val=is_finite, stream=df.stream),
+                    values.size,
+                    stream=df.stream,
                 )
-                out = base.with_mask(needles.obj.null_mask(), needles.null_count)
+                out = base.with_mask(values.obj.null_mask(), values.null_count)
                 return Column(out, dtype=self.dtype)
             to_search = [-float("inf"), float("inf")]
             if is_finite:
                 # NaN is neither finite not infinite
                 to_search.append(float("nan"))
-            haystack = plc.Column.from_iterable_of_py(
+            nonfinite_values = plc.Column.from_iterable_of_py(
                 to_search,
-                dtype=needles.obj.type(),
+                dtype=values.obj.type(),
+                stream=df.stream,
             )
-            result = plc.search.contains(haystack, needles.obj)
+            result = plc.search.contains(nonfinite_values, values.obj, stream=df.stream)
             if is_finite:
-                result = plc.unary.unary_operation(result, plc.unary.UnaryOperator.NOT)
+                result = plc.unary.unary_operation(
+                    result, plc.unary.UnaryOperator.NOT, stream=df.stream
+                )
             return Column(
-                result.with_mask(needles.obj.null_mask(), needles.null_count),
+                result.with_mask(values.obj.null_mask(), values.null_count),
                 dtype=self.dtype,
             )
         columns = [child.evaluate(df, context=context) for child in self.children]
@@ -188,7 +206,9 @@ class BooleanFunction(Expr):
             (column,) = columns
             is_any = self.name is BooleanFunction.Name.Any
             agg = plc.aggregation.any() if is_any else plc.aggregation.all()
-            result = plc.reduce.reduce(column.obj, agg, self.dtype.plc)
+            scalar_result = plc.reduce.reduce(
+                column.obj, agg, self.dtype.plc_type, stream=df.stream
+            )
             if not ignore_nulls and column.null_count > 0:
                 #      Truth tables
                 #     Any         All
@@ -200,20 +220,28 @@ class BooleanFunction(Expr):
                 #
                 # If the input null count was non-zero, we must
                 # post-process the result to insert the correct value.
-                h_result = result.to_py()
+                h_result = scalar_result.to_py(stream=df.stream)
                 if (is_any and not h_result) or (not is_any and h_result):
                     # Any                     All
                     # False || Null => Null   True && Null => Null
                     return Column(
-                        plc.Column.all_null_like(column.obj, 1), dtype=self.dtype
+                        plc.Column.all_null_like(column.obj, 1, stream=df.stream),
+                        dtype=self.dtype,
                     )
-            return Column(plc.Column.from_scalar(result, 1), dtype=self.dtype)
+            return Column(
+                plc.Column.from_scalar(scalar_result, 1, stream=df.stream),
+                dtype=self.dtype,
+            )
         if self.name is BooleanFunction.Name.IsNull:
             (column,) = columns
-            return Column(plc.unary.is_null(column.obj), dtype=self.dtype)
+            return Column(
+                plc.unary.is_null(column.obj, stream=df.stream), dtype=self.dtype
+            )
         elif self.name is BooleanFunction.Name.IsNotNull:
             (column,) = columns
-            return Column(plc.unary.is_valid(column.obj), dtype=self.dtype)
+            return Column(
+                plc.unary.is_valid(column.obj, stream=df.stream), dtype=self.dtype
+            )
         elif self.name in (BooleanFunction.Name.IsNan, BooleanFunction.Name.IsNotNan):
             (column,) = columns
             is_float = column.obj.type().id() in (
@@ -230,9 +258,11 @@ class BooleanFunction(Expr):
             else:
                 base = plc.Column.from_scalar(
                     plc.Scalar.from_py(
-                        py_val=self.name is not BooleanFunction.Name.IsNan
+                        py_val=self.name is not BooleanFunction.Name.IsNan,
+                        stream=df.stream,
                     ),
                     column.size,
+                    stream=df.stream,
                 )
             out = base.with_mask(column.obj.null_mask(), column.null_count)
             return Column(out, dtype=self.dtype)
@@ -242,8 +272,13 @@ class BooleanFunction(Expr):
                 column,
                 dtype=self.dtype,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
-                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype.plc),
-                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype.plc),
+                source_value=plc.Scalar.from_py(
+                    py_val=True, dtype=self.dtype.plc_type, stream=df.stream
+                ),
+                target_value=plc.Scalar.from_py(
+                    py_val=False, dtype=self.dtype.plc_type, stream=df.stream
+                ),
+                stream=df.stream,
             )
         elif self.name is BooleanFunction.Name.IsLastDistinct:
             (column,) = columns
@@ -251,8 +286,15 @@ class BooleanFunction(Expr):
                 column,
                 dtype=self.dtype,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_LAST,
-                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype.plc),
-                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype.plc),
+                source_value=plc.Scalar.from_py(
+                    py_val=True, dtype=self.dtype.plc_type, stream=df.stream
+                ),
+                target_value=plc.Scalar.from_py(
+                    py_val=False,
+                    dtype=self.dtype.plc_type,
+                    stream=df.stream,
+                ),
+                stream=df.stream,
             )
         elif self.name is BooleanFunction.Name.IsUnique:
             (column,) = columns
@@ -260,8 +302,13 @@ class BooleanFunction(Expr):
                 column,
                 dtype=self.dtype,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_NONE,
-                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype.plc),
-                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype.plc),
+                source_value=plc.Scalar.from_py(
+                    py_val=True, dtype=self.dtype.plc_type, stream=df.stream
+                ),
+                target_value=plc.Scalar.from_py(
+                    py_val=False, dtype=self.dtype.plc_type, stream=df.stream
+                ),
+                stream=df.stream,
             )
         elif self.name is BooleanFunction.Name.IsDuplicated:
             (column,) = columns
@@ -269,8 +316,13 @@ class BooleanFunction(Expr):
                 column,
                 dtype=self.dtype,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_NONE,
-                source_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype.plc),
-                target_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype.plc),
+                source_value=plc.Scalar.from_py(
+                    py_val=False, dtype=self.dtype.plc_type, stream=df.stream
+                ),
+                target_value=plc.Scalar.from_py(
+                    py_val=True, dtype=self.dtype.plc_type, stream=df.stream
+                ),
+                stream=df.stream,
             )
         elif self.name is BooleanFunction.Name.AllHorizontal:
             return Column(
@@ -278,7 +330,7 @@ class BooleanFunction(Expr):
                     partial(
                         plc.binaryop.binary_operation,
                         op=plc.binaryop.BinaryOperator.NULL_LOGICAL_AND,
-                        output_type=self.dtype.plc,
+                        output_type=self.dtype.plc_type,
                     ),
                     (c.obj for c in columns),
                 ),
@@ -290,7 +342,7 @@ class BooleanFunction(Expr):
                     partial(
                         plc.binaryop.binary_operation,
                         op=plc.binaryop.BinaryOperator.NULL_LOGICAL_OR,
-                        output_type=self.dtype.plc,
+                        output_type=self.dtype.plc_type,
                     ),
                     (c.obj for c in columns),
                 ),
@@ -300,24 +352,45 @@ class BooleanFunction(Expr):
             needles, haystack = columns
             if haystack.obj.type().id() == plc.TypeId.LIST:
                 # Unwrap values from the list column
-                # the type: ignore is safe because we know that the type ID is LIST,
-                # which always has an inner attribute.
+                # .inner returns DataTypeClass | DataType, need to cast to DataType
                 haystack = Column(
                     haystack.obj.children()[1],
-                    dtype=DataType(haystack.dtype.polars.inner),  # type: ignore[attr-defined]
-                ).astype(needles.dtype)
+                    dtype=DataType(
+                        cast(
+                            pl.DataType, cast(pl.List, haystack.dtype.polars_type).inner
+                        )
+                    ),
+                ).astype(needles.dtype, stream=df.stream)
             if haystack.size:
                 return Column(
-                    plc.search.contains(haystack.obj, needles.obj), dtype=self.dtype
+                    plc.search.contains(
+                        haystack.obj,
+                        needles.obj,
+                        stream=df.stream,
+                    ),
+                    dtype=self.dtype,
                 )
             return Column(
-                plc.Column.from_scalar(plc.Scalar.from_py(py_val=False), needles.size),
+                plc.Column.from_scalar(
+                    plc.Scalar.from_py(py_val=False, stream=df.stream),
+                    needles.size,
+                    stream=df.stream,
+                ),
                 dtype=self.dtype,
             )
         elif self.name is BooleanFunction.Name.Not:
             (column,) = columns
+            # Polars semantics:
+            #   integer input: NOT => bitwise invert.
+            #   boolean input: NOT => logical NOT.
             return Column(
-                plc.unary.unary_operation(column.obj, plc.unary.UnaryOperator.NOT),
+                plc.unary.unary_operation(
+                    column.obj,
+                    plc.unary.UnaryOperator.NOT
+                    if column.obj.type().id() == plc.TypeId.BOOL8
+                    else plc.unary.UnaryOperator.BIT_INVERT,
+                    stream=df.stream,
+                ),
                 dtype=self.dtype,
             )
         else:

cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl