PyPI - cudf-polars-cu13 - Versions diffs - 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl - Mend

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

cudf_polars/GIT_COMMIT +1 -1
cudf_polars/VERSION +1 -1
cudf_polars/callback.py +60 -15
cudf_polars/containers/column.py +137 -77
cudf_polars/containers/dataframe.py +123 -34
cudf_polars/containers/datatype.py +134 -13
cudf_polars/dsl/expr.py +0 -2
cudf_polars/dsl/expressions/aggregation.py +80 -28
cudf_polars/dsl/expressions/binaryop.py +34 -14
cudf_polars/dsl/expressions/boolean.py +110 -37
cudf_polars/dsl/expressions/datetime.py +59 -30
cudf_polars/dsl/expressions/literal.py +11 -5
cudf_polars/dsl/expressions/rolling.py +460 -119
cudf_polars/dsl/expressions/selection.py +9 -8
cudf_polars/dsl/expressions/slicing.py +1 -1
cudf_polars/dsl/expressions/string.py +256 -114
cudf_polars/dsl/expressions/struct.py +19 -7
cudf_polars/dsl/expressions/ternary.py +33 -3
cudf_polars/dsl/expressions/unary.py +126 -64
cudf_polars/dsl/ir.py +1053 -350
cudf_polars/dsl/to_ast.py +30 -13
cudf_polars/dsl/tracing.py +194 -0
cudf_polars/dsl/translate.py +307 -107
cudf_polars/dsl/utils/aggregations.py +43 -30
cudf_polars/dsl/utils/reshape.py +14 -2
cudf_polars/dsl/utils/rolling.py +12 -8
cudf_polars/dsl/utils/windows.py +35 -20
cudf_polars/experimental/base.py +55 -2
cudf_polars/experimental/benchmarks/pdsds.py +12 -126
cudf_polars/experimental/benchmarks/pdsh.py +792 -2
cudf_polars/experimental/benchmarks/utils.py +596 -39
cudf_polars/experimental/dask_registers.py +47 -20
cudf_polars/experimental/dispatch.py +9 -3
cudf_polars/experimental/distinct.py +2 -0
cudf_polars/experimental/explain.py +15 -2
cudf_polars/experimental/expressions.py +30 -15
cudf_polars/experimental/groupby.py +25 -4
cudf_polars/experimental/io.py +156 -124
cudf_polars/experimental/join.py +53 -23
cudf_polars/experimental/parallel.py +68 -19
cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
cudf_polars/experimental/rapidsmpf/core.py +488 -0
cudf_polars/experimental/rapidsmpf/dask.py +172 -0
cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
cudf_polars/experimental/rapidsmpf/io.py +696 -0
cudf_polars/experimental/rapidsmpf/join.py +322 -0
cudf_polars/experimental/rapidsmpf/lower.py +74 -0
cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
cudf_polars/experimental/rapidsmpf/union.py +115 -0
cudf_polars/experimental/rapidsmpf/utils.py +374 -0
cudf_polars/experimental/repartition.py +9 -2
cudf_polars/experimental/select.py +177 -14
cudf_polars/experimental/shuffle.py +46 -12
cudf_polars/experimental/sort.py +100 -26
cudf_polars/experimental/spilling.py +1 -1
cudf_polars/experimental/statistics.py +24 -5
cudf_polars/experimental/utils.py +25 -7
cudf_polars/testing/asserts.py +13 -8
cudf_polars/testing/io.py +2 -1
cudf_polars/testing/plugin.py +93 -17
cudf_polars/typing/__init__.py +86 -32
cudf_polars/utils/config.py +473 -58
cudf_polars/utils/cuda_stream.py +70 -0
cudf_polars/utils/versions.py +5 -4
cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0

cudf_polars/GIT_COMMIT CHANGED Viewed

	@@ -1 +1 @@
1	- ~~f4e35ca02118eada383e7417273c6cb1857ec66e~~
1	+ 9782a269e689140d2b00b5172a93056bdf19e8c2

cudf_polars/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 25.10.00
1	+ 26.02.000

cudf_polars/callback.py CHANGED Viewed

@@ -11,6 +11,7 @@ import textwrap
 import time
 import warnings
 from functools import cache, partial
+from threading import Lock
 from typing import TYPE_CHECKING, Literal, overload
 import nvtx
@@ -22,9 +23,14 @@ import pylibcudf
 import rmm
 from rmm._cuda import gpu
+import cudf_polars.dsl.tracing
+from cudf_polars.dsl.ir import IRExecutionContext
 from cudf_polars.dsl.tracing import CUDF_POLARS_NVTX_DOMAIN
 from cudf_polars.dsl.translate import Translator
-from cudf_polars.utils.config import _env_get_int, get_total_device_memory
+from cudf_polars.utils.config import (
+    _env_get_int,
+    get_total_device_memory,
+)
 from cudf_polars.utils.timer import Timer
 if TYPE_CHECKING:
@@ -35,7 +41,7 @@ if TYPE_CHECKING:
     from cudf_polars.dsl.ir import IR
     from cudf_polars.typing import NodeTraverser
-    from cudf_polars.utils.config import ConfigOptions
+    from cudf_polars.utils.config import ConfigOptions, MemoryResourceConfig
 __all__: list[str] = ["execute_with_cudf"]
@@ -44,6 +50,7 @@ __all__: list[str] = ["execute_with_cudf"]
 def default_memory_resource(
     device: int,
     cuda_managed_memory: bool,  # noqa: FBT001
+    memory_resource_config: MemoryResourceConfig | None,
 ) -> rmm.mr.DeviceMemoryResource:
     """
     Return the default memory resource for cudf-polars.
@@ -55,6 +62,9 @@ def default_memory_resource(
         the active device when this function is called.
     cuda_managed_memory
         Whether to use managed memory or not.
+    memory_resource_config
+        Memory resource configuration to use. If ``None``, the default
+        memory resource is used.
     Returns
     -------
@@ -64,7 +74,9 @@ def default_memory_resource(
         else, an async pool resource is returned.
     """
     try:
-        if (
+        if memory_resource_config is not None:
+            mr = memory_resource_config.create_memory_resource()
+        elif (
             cuda_managed_memory
             and pylibcudf.utils._is_concurrent_managed_access_supported()
         ):
@@ -89,7 +101,7 @@ def default_memory_resource(
         ):
             raise ComputeError(
                 "GPU engine requested, but incorrect cudf-polars package installed. "
-                "cudf-polars requires CUDA 12.0+ to installed."
+                "cudf-polars requires CUDA 12.2+ to installed."
             ) from None
         else:
             raise
@@ -100,6 +112,7 @@ def default_memory_resource(
 @contextlib.contextmanager
 def set_memory_resource(
     mr: rmm.mr.DeviceMemoryResource | None,
+    memory_resource_config: MemoryResourceConfig | None,
 ) -> Generator[rmm.mr.DeviceMemoryResource, None, None]:
     """
     Set the current memory resource for an execution block.
@@ -109,6 +122,9 @@ def set_memory_resource(
     mr
         Memory resource to use. If `None`, calls :func:`default_memory_resource`
         to obtain an mr on the currently active device.
+    memory_resource_config
+        Memory resource configuration to use when a concrete memory resource.
+        is not provided. If ``None``, the default memory resource is used.
     Returns
     -------
@@ -132,7 +148,14 @@ def set_memory_resource(
                 )
                 != 0
             ),
+            memory_resource_config=memory_resource_config,
         )
+    if (
+        cudf_polars.dsl.tracing.LOG_TRACES
+    ):  # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
+        mr = rmm.mr.StatisticsResourceAdaptor(mr)
     rmm.mr.set_current_device_resource(mr)
     try:
         yield mr
@@ -140,6 +163,11 @@ def set_memory_resource(
         rmm.mr.set_current_device_resource(previous)
+# libcudf doesn't support executing on multiple devices from within the same process.
+SEEN_DEVICE = None
+SEEN_DEVICE_LOCK = Lock()
 @contextlib.contextmanager
 def set_device(device: int | None) -> Generator[int, None, None]:
     """
@@ -158,13 +186,28 @@ def set_device(device: int | None) -> Generator[int, None, None]:
     -----
     At exit, the device is restored to whatever was current at entry.
     """
-    previous: int = gpu.getDevice()
-    if device is not None:
-        gpu.setDevice(device)
-    try:
-        yield previous
-    finally:
-        gpu.setDevice(previous)
+    global SEEN_DEVICE  # noqa: PLW0603
+    current: int = gpu.getDevice()
+    to_use = device if device is not None else current
+    with SEEN_DEVICE_LOCK:
+        if (
+            SEEN_DEVICE is not None and to_use != SEEN_DEVICE
+        ):  # pragma: no cover; requires multiple GPUs in CI
+            raise RuntimeError(
+                "cudf-polars does not support running queries on "
+                "multiple devices in the same process. "
+                f"A previous query used device-{SEEN_DEVICE}, "
+                f"the current query is using device-{to_use}."
+            )
+        SEEN_DEVICE = to_use
+    if to_use != current:
+        gpu.setDevice(to_use)
+        try:
+            yield to_use
+        finally:
+            gpu.setDevice(current)
+    else:
+        yield to_use
 @overload
@@ -211,14 +254,16 @@ def _callback(
     assert n_rows is None
     if timer is not None:
         assert should_time
     with (
         nvtx.annotate(message="ExecuteIR", domain=CUDF_POLARS_NVTX_DOMAIN),
         # Device must be set before memory resource is obtained.
         set_device(config_options.device),
-        set_memory_resource(memory_resource),
+        set_memory_resource(memory_resource, config_options.memory_resource_config),
     ):
         if config_options.executor.name == "in-memory":
-            df = ir.evaluate(cache={}, timer=timer).to_polars()
+            context = IRExecutionContext.from_config_options(config_options)
+            df = ir.evaluate(cache={}, timer=timer, context=context).to_polars()
             if timer is None:
                 return df
             else:
@@ -236,7 +281,7 @@ def _callback(
                     """)
                 raise NotImplementedError(msg)
-            return evaluate_streaming(ir, config_options).to_polars()
+            return evaluate_streaming(ir, config_options)
         assert_never(f"Unknown executor '{config_options.executor}'")
@@ -287,7 +332,7 @@ def execute_with_cudf(
         if (
             memory_resource is None
             and translator.config_options.executor.name == "streaming"
-            and translator.config_options.executor.scheduler == "distributed"
+            and translator.config_options.executor.cluster == "distributed"
         ):  # pragma: no cover; Requires distributed cluster
             memory_resource = rmm.mr.get_current_device_resource()
         if len(ir_translation_errors):

cudf_polars/containers/column.py CHANGED Viewed

@@ -5,11 +5,8 @@
 from __future__ import annotations
-import functools
 from typing import TYPE_CHECKING
-import polars as pl
-import polars.datatypes.convert
 from polars.exceptions import InvalidOperationError
 import pylibcudf as plc
@@ -19,15 +16,21 @@ from pylibcudf.strings.convert.convert_integers import (
     is_integer,
     to_integers,
 )
-from pylibcudf.traits import is_floating_point
 from cudf_polars.containers import DataType
+from cudf_polars.containers.datatype import _dtype_from_header, _dtype_to_header
 from cudf_polars.utils import conversion
 from cudf_polars.utils.dtypes import is_order_preserving_cast
 if TYPE_CHECKING:
+    from collections.abc import Callable
     from typing_extensions import Self
+    from polars import Series as pl_Series
+    from rmm.pylibrmm.stream import Stream
     from cudf_polars.typing import (
         ColumnHeader,
         ColumnOptions,
@@ -38,22 +41,6 @@ if TYPE_CHECKING:
 __all__: list[str] = ["Column"]
-def _dtype_short_repr_to_dtype(dtype_str: str) -> pl.DataType:
-    """Convert a Polars dtype short repr to a Polars dtype."""
-    # limitations of dtype_short_repr_to_dtype described in
-    # py-polars/polars/datatypes/convert.py#L299
-    if dtype_str.startswith("list["):
-        stripped = dtype_str.removeprefix("list[").removesuffix("]")
-        return pl.List(_dtype_short_repr_to_dtype(stripped))
-    pl_type = polars.datatypes.convert.dtype_short_repr_to_dtype(dtype_str)
-    if pl_type is None:
-        raise ValueError(f"{dtype_str} was not able to be parsed by Polars.")
-    if isinstance(pl_type, polars.datatypes.DataTypeClass):
-        return pl_type()
-    else:
-        return pl_type
 class Column:
     """An immutable column with sortedness metadata."""
@@ -85,7 +72,10 @@ class Column:
     @classmethod
     def deserialize(
-        cls, header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]
+        cls,
+        header: ColumnHeader,
+        frames: tuple[memoryview[bytes], plc.gpumemoryview],
+        stream: Stream,
     ) -> Self:
         """
         Create a Column from a serialized representation returned by `.serialize()`.
@@ -96,6 +86,10 @@ class Column:
             The (unpickled) metadata required to reconstruct the object.
         frames
             Two-tuple of frames (a memoryview and a gpumemoryview).
+        stream
+            CUDA stream used for device memory operations and kernel launches
+            on this column. The caller is responsible for ensuring that
+            the data in ``frames`` is valid on ``stream``.
         Returns
         -------
@@ -104,7 +98,7 @@ class Column:
         """
         packed_metadata, packed_gpu_data = frames
         (plc_column,) = plc.contiguous_split.unpack_from_memoryviews(
-            packed_metadata, packed_gpu_data
+            packed_metadata, packed_gpu_data, stream
         ).columns()
         return cls(plc_column, **cls.deserialize_ctor_kwargs(header["column_kwargs"]))
@@ -113,20 +107,18 @@ class Column:
         column_kwargs: ColumnOptions,
     ) -> DeserializedColumnOptions:
         """Deserialize the constructor kwargs for a Column."""
-        dtype = DataType(  # pragma: no cover
-            _dtype_short_repr_to_dtype(column_kwargs["dtype"])
-        )
         return {
             "is_sorted": column_kwargs["is_sorted"],
             "order": column_kwargs["order"],
             "null_order": column_kwargs["null_order"],
             "name": column_kwargs["name"],
-            "dtype": dtype,
+            "dtype": DataType(_dtype_from_header(column_kwargs["dtype"])),
         }
     def serialize(
         self,
-    ) -> tuple[ColumnHeader, tuple[memoryview, plc.gpumemoryview]]:
+        stream: Stream,
+    ) -> tuple[ColumnHeader, tuple[memoryview[bytes], plc.gpumemoryview]]:
         """
         Serialize the Column into header and frames.
@@ -145,7 +137,7 @@ class Column:
         frames
             Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
         """
-        packed = plc.contiguous_split.pack(plc.Table([self.obj]))
+        packed = plc.contiguous_split.pack(plc.Table([self.obj]), stream=stream)
         header: ColumnHeader = {
             "column_kwargs": self.serialize_ctor_kwargs(),
             "frame_count": 2,
@@ -159,14 +151,20 @@ class Column:
             "order": self.order,
             "null_order": self.null_order,
             "name": self.name,
-            "dtype": pl.polars.dtype_str_repr(self.dtype.polars),
+            "dtype": _dtype_to_header(self.dtype.polars_type),
         }
-    @functools.cached_property
-    def obj_scalar(self) -> plc.Scalar:
+    def obj_scalar(self, stream: Stream) -> plc.Scalar:
         """
         A copy of the column object as a pylibcudf Scalar.
+        Parameters
+        ----------
+        stream
+            CUDA stream used for device memory operations and kernel launches.
+            ``self.obj`` must be valid on this stream, and the result will be
+            valid on this stream.
         Returns
         -------
         pylibcudf Scalar object.
@@ -178,7 +176,7 @@ class Column:
         """
         if not self.is_scalar:
             raise ValueError(f"Cannot convert a column of length {self.size} to scalar")
-        return plc.copying.get_element(self.obj, 0)
+        return plc.copying.get_element(self.obj, 0, stream=stream)
     def rename(self, name: str | None, /) -> Self:
         """
@@ -228,6 +226,7 @@ class Column:
         *,
         order: plc.types.Order,
         null_order: plc.types.NullOrder,
+        stream: Stream,
     ) -> bool:
         """
         Check if the column is sorted.
@@ -238,6 +237,9 @@ class Column:
             The requested sort order.
         null_order
             Where nulls sort to.
+        stream
+            CUDA stream used for device memory operations and kernel launches
+            on this Column. The data in ``self.obj`` must be valid on this stream.
         Returns
         -------
@@ -254,14 +256,16 @@ class Column:
             return self.order == order and (
                 self.null_count == 0 or self.null_order == null_order
             )
-        if plc.sorting.is_sorted(plc.Table([self.obj]), [order], [null_order]):
+        if plc.sorting.is_sorted(
+            plc.Table([self.obj]), [order], [null_order], stream=stream
+        ):
             self.sorted = plc.types.Sorted.YES
             self.order = order
             self.null_order = null_order
             return True
         return False
-    def astype(self, dtype: DataType) -> Column:
+    def astype(self, dtype: DataType, stream: Stream, *, strict: bool = True) -> Column:
         """
         Cast the column to as the requested dtype.
@@ -269,6 +273,12 @@ class Column:
         ----------
         dtype
             Datatype to cast to.
+        stream
+            CUDA stream used for device memory operations and kernel launches
+            on this Column. The data in ``self.obj`` must be valid on this stream.
+        strict
+            If True, raise an error if the cast is unsupported.
+            If False, return nulls for unsupported casts.
         Returns
         -------
@@ -284,7 +294,7 @@ class Column:
         This only produces a copy if the requested dtype doesn't match
         the current one.
         """
-        plc_dtype = dtype.plc
+        plc_dtype = dtype.plc_type
         if self.obj.type() == plc_dtype:
             return self
@@ -292,12 +302,17 @@ class Column:
             plc_dtype.id() == plc.TypeId.STRING
             or self.obj.type().id() == plc.TypeId.STRING
         ):
-            return Column(self._handle_string_cast(plc_dtype), dtype=dtype)
+            return Column(
+                self._handle_string_cast(plc_dtype, stream=stream, strict=strict),
+                dtype=dtype,
+            )
         elif plc.traits.is_integral_not_bool(
             self.obj.type()
         ) and plc.traits.is_timestamp(plc_dtype):
-            upcasted = plc.unary.cast(self.obj, plc.DataType(plc.TypeId.INT64))
-            result = plc.column.Column(
+            upcasted = plc.unary.cast(
+                self.obj, plc.DataType(plc.TypeId.INT64), stream=stream
+            )
+            plc_col = plc.column.Column(
                 plc_dtype,
                 upcasted.size(),
                 upcasted.data(),
@@ -306,11 +321,11 @@ class Column:
                 upcasted.offset(),
                 upcasted.children(),
             )
-            return Column(result, dtype=dtype).sorted_like(self)
+            return Column(plc_col, dtype=dtype).sorted_like(self)
         elif plc.traits.is_integral_not_bool(plc_dtype) and plc.traits.is_timestamp(
             self.obj.type()
         ):
-            result = plc.column.Column(
+            plc_col = plc.column.Column(
                 plc.DataType(plc.TypeId.INT64),
                 self.obj.size(),
                 self.obj.data(),
@@ -319,42 +334,66 @@ class Column:
                 self.obj.offset(),
                 self.obj.children(),
             )
-            return Column(plc.unary.cast(result, plc_dtype), dtype=dtype).sorted_like(
-                self
-            )
+            return Column(
+                plc.unary.cast(plc_col, plc_dtype, stream=stream), dtype=dtype
+            ).sorted_like(self)
         else:
-            result = Column(plc.unary.cast(self.obj, plc_dtype), dtype=dtype)
+            result = Column(
+                plc.unary.cast(self.obj, plc_dtype, stream=stream), dtype=dtype
+            )
             if is_order_preserving_cast(self.obj.type(), plc_dtype):
                 return result.sorted_like(self)
             return result
-    def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
+    def _handle_string_cast(
+        self, dtype: plc.DataType, stream: Stream, *, strict: bool
+    ) -> plc.Column:
         if dtype.id() == plc.TypeId.STRING:
-            if is_floating_point(self.obj.type()):
-                return from_floats(self.obj)
+            if plc.traits.is_floating_point(self.obj.type()):
+                return from_floats(self.obj, stream=stream)
+            elif plc.traits.is_integral_not_bool(self.obj.type()):
+                return from_integers(self.obj, stream=stream)
             else:
-                return from_integers(self.obj)
+                raise InvalidOperationError(
+                    f"Unsupported casting from {self.dtype.id()} to {dtype.id()}."
+                )
+        type_checker: Callable[[plc.Column, Stream], plc.Column]
+        type_caster: Callable[[plc.Column, plc.DataType, Stream], plc.Column]
+        if plc.traits.is_floating_point(dtype):
+            type_checker = is_float
+            type_caster = to_floats
+        elif plc.traits.is_integral_not_bool(dtype):
+            # is_integer has a second optional int_type: plc.DataType | None = None argument
+            # we do not use
+            # unused-ignore for if RMM is missing
+            type_checker = is_integer  # type: ignore[assignment,unused-ignore]
+            type_caster = to_integers
         else:
-            if is_floating_point(dtype):
-                floats = is_float(self.obj)
-                if not plc.reduce.reduce(
-                    floats,
-                    plc.aggregation.all(),
-                    plc.DataType(plc.TypeId.BOOL8),
-                ).to_py():
-                    raise InvalidOperationError("Conversion from `str` failed.")
-                return to_floats(self.obj, dtype)
+            raise InvalidOperationError(
+                f"Unsupported casting from {self.dtype.id()} to {dtype.id()}."
+            )
+        castable = type_checker(self.obj, stream=stream)  # type: ignore[call-arg]
+        if not plc.reduce.reduce(
+            castable,
+            plc.aggregation.all(),
+            plc.DataType(plc.TypeId.BOOL8),
+            stream=stream,
+        ).to_py(stream=stream):
+            if strict:
+                raise InvalidOperationError(
+                    f"Conversion from {self.dtype.id()} to {dtype.id()} failed."
+                )
             else:
-                integers = is_integer(self.obj)
-                if not plc.reduce.reduce(
-                    integers,
-                    plc.aggregation.all(),
-                    plc.DataType(plc.TypeId.BOOL8),
-                ).to_py():
-                    raise InvalidOperationError("Conversion from `str` failed.")
-                return to_integers(self.obj, dtype)
-    def copy_metadata(self, from_: pl.Series, /) -> Self:
+                values = self.obj.with_mask(
+                    *plc.transform.bools_to_mask(castable, stream=stream)
+                )
+        else:
+            values = self.obj
+        return type_caster(values, dtype, stream=stream)
+    def copy_metadata(self, from_: pl_Series, /) -> Self:
         """
         Copy metadata from a host series onto self.
@@ -439,27 +478,44 @@ class Column:
             dtype=self.dtype,
         )
-    def mask_nans(self) -> Self:
+    def mask_nans(self, stream: Stream) -> Self:
         """Return a shallow copy of self with nans masked out."""
         if plc.traits.is_floating_point(self.obj.type()):
             old_count = self.null_count
-            mask, new_count = plc.transform.nans_to_nulls(self.obj)
+            mask, new_count = plc.transform.nans_to_nulls(self.obj, stream=stream)
             result = type(self)(self.obj.with_mask(mask, new_count), self.dtype)
             if old_count == new_count:
                 return result.sorted_like(self)
             return result
         return self.copy()
-    @functools.cached_property
-    def nan_count(self) -> int:
-        """Return the number of NaN values in the column."""
+    def nan_count(self, stream: Stream) -> int:
+        """
+        Return the number of NaN values in the column.
+        Parameters
+        ----------
+        stream
+            CUDA stream used for device memory operations and kernel launches.
+            ``self.obj`` must be valid on this stream, and the result will be
+            valid on this stream.
+        Returns
+        -------
+        Number of NaN values in the column.
+        """
+        result: int
         if self.size > 0 and plc.traits.is_floating_point(self.obj.type()):
-            return plc.reduce.reduce(
-                plc.unary.is_nan(self.obj),
+            # See https://github.com/rapidsai/cudf/issues/20202 for we type ignore
+            result = plc.reduce.reduce(  # type: ignore[assignment]
+                plc.unary.is_nan(self.obj, stream=stream),
                 plc.aggregation.sum(),
                 plc.types.SIZE_TYPE,
-            ).to_py()
-        return 0
+                stream=stream,
+            ).to_py(stream=stream)
+        else:
+            result = 0
+        return result
     @property
     def size(self) -> int:
@@ -471,7 +527,7 @@ class Column:
         """Return the number of Null values in the column."""
         return self.obj.null_count()
-    def slice(self, zlice: Slice | None) -> Self:
+    def slice(self, zlice: Slice | None, stream: Stream) -> Self:
         """
         Slice a column.
@@ -480,6 +536,9 @@ class Column:
         zlice
             optional, tuple of start and length, negative values of start
             treated as for python indexing. If not provided, returns self.
+        stream
+            CUDA stream used for device memory operations and kernel launches
+            on this Column. The data in ``self.obj`` must be valid on this stream.
         Returns
         -------
@@ -490,6 +549,7 @@ class Column:
         (table,) = plc.copying.slice(
             plc.Table([self.obj]),
             conversion.from_polars_slice(zlice, num_rows=self.size),
+            stream=stream,
         )
         (column,) = table.columns()
         return type(self)(column, name=self.name, dtype=self.dtype).sorted_like(self)

cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl