cudf-polars-cu13 25.10.0__py3-none-any.whl → 25.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cudf_polars/GIT_COMMIT +1 -1
  2. cudf_polars/VERSION +1 -1
  3. cudf_polars/callback.py +32 -8
  4. cudf_polars/containers/column.py +94 -59
  5. cudf_polars/containers/dataframe.py +123 -34
  6. cudf_polars/containers/datatype.py +134 -13
  7. cudf_polars/dsl/expr.py +0 -2
  8. cudf_polars/dsl/expressions/aggregation.py +80 -28
  9. cudf_polars/dsl/expressions/binaryop.py +34 -14
  10. cudf_polars/dsl/expressions/boolean.py +110 -37
  11. cudf_polars/dsl/expressions/datetime.py +59 -30
  12. cudf_polars/dsl/expressions/literal.py +11 -5
  13. cudf_polars/dsl/expressions/rolling.py +460 -119
  14. cudf_polars/dsl/expressions/selection.py +9 -8
  15. cudf_polars/dsl/expressions/slicing.py +1 -1
  16. cudf_polars/dsl/expressions/string.py +235 -102
  17. cudf_polars/dsl/expressions/struct.py +19 -7
  18. cudf_polars/dsl/expressions/ternary.py +9 -3
  19. cudf_polars/dsl/expressions/unary.py +117 -58
  20. cudf_polars/dsl/ir.py +923 -290
  21. cudf_polars/dsl/to_ast.py +30 -13
  22. cudf_polars/dsl/tracing.py +194 -0
  23. cudf_polars/dsl/translate.py +294 -97
  24. cudf_polars/dsl/utils/aggregations.py +34 -26
  25. cudf_polars/dsl/utils/reshape.py +14 -2
  26. cudf_polars/dsl/utils/rolling.py +12 -8
  27. cudf_polars/dsl/utils/windows.py +35 -20
  28. cudf_polars/experimental/base.py +45 -2
  29. cudf_polars/experimental/benchmarks/pdsds.py +12 -126
  30. cudf_polars/experimental/benchmarks/pdsh.py +791 -1
  31. cudf_polars/experimental/benchmarks/utils.py +515 -39
  32. cudf_polars/experimental/dask_registers.py +47 -20
  33. cudf_polars/experimental/dispatch.py +9 -3
  34. cudf_polars/experimental/explain.py +15 -2
  35. cudf_polars/experimental/expressions.py +22 -10
  36. cudf_polars/experimental/groupby.py +23 -4
  37. cudf_polars/experimental/io.py +93 -83
  38. cudf_polars/experimental/join.py +39 -22
  39. cudf_polars/experimental/parallel.py +60 -14
  40. cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
  41. cudf_polars/experimental/rapidsmpf/core.py +361 -0
  42. cudf_polars/experimental/rapidsmpf/dispatch.py +150 -0
  43. cudf_polars/experimental/rapidsmpf/io.py +604 -0
  44. cudf_polars/experimental/rapidsmpf/join.py +237 -0
  45. cudf_polars/experimental/rapidsmpf/lower.py +74 -0
  46. cudf_polars/experimental/rapidsmpf/nodes.py +494 -0
  47. cudf_polars/experimental/rapidsmpf/repartition.py +151 -0
  48. cudf_polars/experimental/rapidsmpf/shuffle.py +277 -0
  49. cudf_polars/experimental/rapidsmpf/union.py +96 -0
  50. cudf_polars/experimental/rapidsmpf/utils.py +162 -0
  51. cudf_polars/experimental/repartition.py +9 -2
  52. cudf_polars/experimental/select.py +177 -14
  53. cudf_polars/experimental/shuffle.py +28 -8
  54. cudf_polars/experimental/sort.py +92 -25
  55. cudf_polars/experimental/statistics.py +24 -5
  56. cudf_polars/experimental/utils.py +25 -7
  57. cudf_polars/testing/asserts.py +13 -8
  58. cudf_polars/testing/io.py +2 -1
  59. cudf_polars/testing/plugin.py +88 -15
  60. cudf_polars/typing/__init__.py +86 -32
  61. cudf_polars/utils/config.py +406 -58
  62. cudf_polars/utils/cuda_stream.py +70 -0
  63. cudf_polars/utils/versions.py +3 -2
  64. cudf_polars_cu13-25.12.0.dist-info/METADATA +182 -0
  65. cudf_polars_cu13-25.12.0.dist-info/RECORD +104 -0
  66. cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
  67. cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
  68. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-25.12.0.dist-info}/WHEEL +0 -0
  69. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-25.12.0.dist-info}/licenses/LICENSE +0 -0
  70. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-25.12.0.dist-info}/top_level.txt +0 -0
cudf_polars/GIT_COMMIT CHANGED
@@ -1 +1 @@
1
- f4e35ca02118eada383e7417273c6cb1857ec66e
1
+ 580975be72b3516c2c18da149b62de557b28fb67
cudf_polars/VERSION CHANGED
@@ -1 +1 @@
1
- 25.10.00
1
+ 25.12.00
cudf_polars/callback.py CHANGED
@@ -22,9 +22,14 @@ import pylibcudf
22
22
  import rmm
23
23
  from rmm._cuda import gpu
24
24
 
25
+ import cudf_polars.dsl.tracing
26
+ from cudf_polars.dsl.ir import IRExecutionContext
25
27
  from cudf_polars.dsl.tracing import CUDF_POLARS_NVTX_DOMAIN
26
28
  from cudf_polars.dsl.translate import Translator
27
- from cudf_polars.utils.config import _env_get_int, get_total_device_memory
29
+ from cudf_polars.utils.config import (
30
+ _env_get_int,
31
+ get_total_device_memory,
32
+ )
28
33
  from cudf_polars.utils.timer import Timer
29
34
 
30
35
  if TYPE_CHECKING:
@@ -35,7 +40,7 @@ if TYPE_CHECKING:
35
40
 
36
41
  from cudf_polars.dsl.ir import IR
37
42
  from cudf_polars.typing import NodeTraverser
38
- from cudf_polars.utils.config import ConfigOptions
43
+ from cudf_polars.utils.config import ConfigOptions, MemoryResourceConfig
39
44
 
40
45
  __all__: list[str] = ["execute_with_cudf"]
41
46
 
@@ -44,6 +49,7 @@ __all__: list[str] = ["execute_with_cudf"]
44
49
  def default_memory_resource(
45
50
  device: int,
46
51
  cuda_managed_memory: bool, # noqa: FBT001
52
+ memory_resource_config: MemoryResourceConfig | None,
47
53
  ) -> rmm.mr.DeviceMemoryResource:
48
54
  """
49
55
  Return the default memory resource for cudf-polars.
@@ -55,6 +61,9 @@ def default_memory_resource(
55
61
  the active device when this function is called.
56
62
  cuda_managed_memory
57
63
  Whether to use managed memory or not.
64
+ memory_resource_config
65
+ Memory resource configuration to use. If ``None``, the default
66
+ memory resource is used.
58
67
 
59
68
  Returns
60
69
  -------
@@ -64,7 +73,9 @@ def default_memory_resource(
64
73
  else, an async pool resource is returned.
65
74
  """
66
75
  try:
67
- if (
76
+ if memory_resource_config is not None:
77
+ mr = memory_resource_config.create_memory_resource()
78
+ elif (
68
79
  cuda_managed_memory
69
80
  and pylibcudf.utils._is_concurrent_managed_access_supported()
70
81
  ):
@@ -89,7 +100,7 @@ def default_memory_resource(
89
100
  ):
90
101
  raise ComputeError(
91
102
  "GPU engine requested, but incorrect cudf-polars package installed. "
92
- "cudf-polars requires CUDA 12.0+ to installed."
103
+ "cudf-polars requires CUDA 12.2+ to installed."
93
104
  ) from None
94
105
  else:
95
106
  raise
@@ -100,6 +111,7 @@ def default_memory_resource(
100
111
  @contextlib.contextmanager
101
112
  def set_memory_resource(
102
113
  mr: rmm.mr.DeviceMemoryResource | None,
114
+ memory_resource_config: MemoryResourceConfig | None,
103
115
  ) -> Generator[rmm.mr.DeviceMemoryResource, None, None]:
104
116
  """
105
117
  Set the current memory resource for an execution block.
@@ -109,6 +121,9 @@ def set_memory_resource(
109
121
  mr
110
122
  Memory resource to use. If `None`, calls :func:`default_memory_resource`
111
123
  to obtain an mr on the currently active device.
124
+ memory_resource_config
125
+ Memory resource configuration to use when a concrete memory resource.
126
+ is not provided. If ``None``, the default memory resource is used.
112
127
 
113
128
  Returns
114
129
  -------
@@ -132,7 +147,14 @@ def set_memory_resource(
132
147
  )
133
148
  != 0
134
149
  ),
150
+ memory_resource_config=memory_resource_config,
135
151
  )
152
+
153
+ if (
154
+ cudf_polars.dsl.tracing.LOG_TRACES
155
+ ): # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
156
+ mr = rmm.mr.StatisticsResourceAdaptor(mr)
157
+
136
158
  rmm.mr.set_current_device_resource(mr)
137
159
  try:
138
160
  yield mr
@@ -211,14 +233,16 @@ def _callback(
211
233
  assert n_rows is None
212
234
  if timer is not None:
213
235
  assert should_time
236
+
214
237
  with (
215
238
  nvtx.annotate(message="ExecuteIR", domain=CUDF_POLARS_NVTX_DOMAIN),
216
239
  # Device must be set before memory resource is obtained.
217
240
  set_device(config_options.device),
218
- set_memory_resource(memory_resource),
241
+ set_memory_resource(memory_resource, config_options.memory_resource_config),
219
242
  ):
220
243
  if config_options.executor.name == "in-memory":
221
- df = ir.evaluate(cache={}, timer=timer).to_polars()
244
+ context = IRExecutionContext.from_config_options(config_options)
245
+ df = ir.evaluate(cache={}, timer=timer, context=context).to_polars()
222
246
  if timer is None:
223
247
  return df
224
248
  else:
@@ -236,7 +260,7 @@ def _callback(
236
260
  """)
237
261
  raise NotImplementedError(msg)
238
262
 
239
- return evaluate_streaming(ir, config_options).to_polars()
263
+ return evaluate_streaming(ir, config_options)
240
264
  assert_never(f"Unknown executor '{config_options.executor}'")
241
265
 
242
266
 
@@ -287,7 +311,7 @@ def execute_with_cudf(
287
311
  if (
288
312
  memory_resource is None
289
313
  and translator.config_options.executor.name == "streaming"
290
- and translator.config_options.executor.scheduler == "distributed"
314
+ and translator.config_options.executor.cluster == "distributed"
291
315
  ): # pragma: no cover; Requires distributed cluster
292
316
  memory_resource = rmm.mr.get_current_device_resource()
293
317
  if len(ir_translation_errors):
@@ -5,11 +5,8 @@
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- import functools
9
8
  from typing import TYPE_CHECKING
10
9
 
11
- import polars as pl
12
- import polars.datatypes.convert
13
10
  from polars.exceptions import InvalidOperationError
14
11
 
15
12
  import pylibcudf as plc
@@ -22,12 +19,17 @@ from pylibcudf.strings.convert.convert_integers import (
22
19
  from pylibcudf.traits import is_floating_point
23
20
 
24
21
  from cudf_polars.containers import DataType
22
+ from cudf_polars.containers.datatype import _dtype_from_header, _dtype_to_header
25
23
  from cudf_polars.utils import conversion
26
24
  from cudf_polars.utils.dtypes import is_order_preserving_cast
27
25
 
28
26
  if TYPE_CHECKING:
29
27
  from typing_extensions import Self
30
28
 
29
+ from polars import Series as pl_Series
30
+
31
+ from rmm.pylibrmm.stream import Stream
32
+
31
33
  from cudf_polars.typing import (
32
34
  ColumnHeader,
33
35
  ColumnOptions,
@@ -38,22 +40,6 @@ if TYPE_CHECKING:
38
40
  __all__: list[str] = ["Column"]
39
41
 
40
42
 
41
- def _dtype_short_repr_to_dtype(dtype_str: str) -> pl.DataType:
42
- """Convert a Polars dtype short repr to a Polars dtype."""
43
- # limitations of dtype_short_repr_to_dtype described in
44
- # py-polars/polars/datatypes/convert.py#L299
45
- if dtype_str.startswith("list["):
46
- stripped = dtype_str.removeprefix("list[").removesuffix("]")
47
- return pl.List(_dtype_short_repr_to_dtype(stripped))
48
- pl_type = polars.datatypes.convert.dtype_short_repr_to_dtype(dtype_str)
49
- if pl_type is None:
50
- raise ValueError(f"{dtype_str} was not able to be parsed by Polars.")
51
- if isinstance(pl_type, polars.datatypes.DataTypeClass):
52
- return pl_type()
53
- else:
54
- return pl_type
55
-
56
-
57
43
  class Column:
58
44
  """An immutable column with sortedness metadata."""
59
45
 
@@ -85,7 +71,10 @@ class Column:
85
71
 
86
72
  @classmethod
87
73
  def deserialize(
88
- cls, header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]
74
+ cls,
75
+ header: ColumnHeader,
76
+ frames: tuple[memoryview[bytes], plc.gpumemoryview],
77
+ stream: Stream,
89
78
  ) -> Self:
90
79
  """
91
80
  Create a Column from a serialized representation returned by `.serialize()`.
@@ -96,6 +85,10 @@ class Column:
96
85
  The (unpickled) metadata required to reconstruct the object.
97
86
  frames
98
87
  Two-tuple of frames (a memoryview and a gpumemoryview).
88
+ stream
89
+ CUDA stream used for device memory operations and kernel launches
90
+ on this column. The caller is responsible for ensuring that
91
+ the data in ``frames`` is valid on ``stream``.
99
92
 
100
93
  Returns
101
94
  -------
@@ -104,7 +97,7 @@ class Column:
104
97
  """
105
98
  packed_metadata, packed_gpu_data = frames
106
99
  (plc_column,) = plc.contiguous_split.unpack_from_memoryviews(
107
- packed_metadata, packed_gpu_data
100
+ packed_metadata, packed_gpu_data, stream
108
101
  ).columns()
109
102
  return cls(plc_column, **cls.deserialize_ctor_kwargs(header["column_kwargs"]))
110
103
 
@@ -113,20 +106,18 @@ class Column:
113
106
  column_kwargs: ColumnOptions,
114
107
  ) -> DeserializedColumnOptions:
115
108
  """Deserialize the constructor kwargs for a Column."""
116
- dtype = DataType( # pragma: no cover
117
- _dtype_short_repr_to_dtype(column_kwargs["dtype"])
118
- )
119
109
  return {
120
110
  "is_sorted": column_kwargs["is_sorted"],
121
111
  "order": column_kwargs["order"],
122
112
  "null_order": column_kwargs["null_order"],
123
113
  "name": column_kwargs["name"],
124
- "dtype": dtype,
114
+ "dtype": DataType(_dtype_from_header(column_kwargs["dtype"])),
125
115
  }
126
116
 
127
117
  def serialize(
128
118
  self,
129
- ) -> tuple[ColumnHeader, tuple[memoryview, plc.gpumemoryview]]:
119
+ stream: Stream,
120
+ ) -> tuple[ColumnHeader, tuple[memoryview[bytes], plc.gpumemoryview]]:
130
121
  """
131
122
  Serialize the Column into header and frames.
132
123
 
@@ -145,7 +136,7 @@ class Column:
145
136
  frames
146
137
  Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
147
138
  """
148
- packed = plc.contiguous_split.pack(plc.Table([self.obj]))
139
+ packed = plc.contiguous_split.pack(plc.Table([self.obj]), stream=stream)
149
140
  header: ColumnHeader = {
150
141
  "column_kwargs": self.serialize_ctor_kwargs(),
151
142
  "frame_count": 2,
@@ -159,14 +150,20 @@ class Column:
159
150
  "order": self.order,
160
151
  "null_order": self.null_order,
161
152
  "name": self.name,
162
- "dtype": pl.polars.dtype_str_repr(self.dtype.polars),
153
+ "dtype": _dtype_to_header(self.dtype.polars_type),
163
154
  }
164
155
 
165
- @functools.cached_property
166
- def obj_scalar(self) -> plc.Scalar:
156
+ def obj_scalar(self, stream: Stream) -> plc.Scalar:
167
157
  """
168
158
  A copy of the column object as a pylibcudf Scalar.
169
159
 
160
+ Parameters
161
+ ----------
162
+ stream
163
+ CUDA stream used for device memory operations and kernel launches.
164
+ ``self.obj`` must be valid on this stream, and the result will be
165
+ valid on this stream.
166
+
170
167
  Returns
171
168
  -------
172
169
  pylibcudf Scalar object.
@@ -178,7 +175,7 @@ class Column:
178
175
  """
179
176
  if not self.is_scalar:
180
177
  raise ValueError(f"Cannot convert a column of length {self.size} to scalar")
181
- return plc.copying.get_element(self.obj, 0)
178
+ return plc.copying.get_element(self.obj, 0, stream=stream)
182
179
 
183
180
  def rename(self, name: str | None, /) -> Self:
184
181
  """
@@ -228,6 +225,7 @@ class Column:
228
225
  *,
229
226
  order: plc.types.Order,
230
227
  null_order: plc.types.NullOrder,
228
+ stream: Stream,
231
229
  ) -> bool:
232
230
  """
233
231
  Check if the column is sorted.
@@ -238,6 +236,9 @@ class Column:
238
236
  The requested sort order.
239
237
  null_order
240
238
  Where nulls sort to.
239
+ stream
240
+ CUDA stream used for device memory operations and kernel launches
241
+ on this Column. The data in ``self.obj`` must be valid on this stream.
241
242
 
242
243
  Returns
243
244
  -------
@@ -254,14 +255,16 @@ class Column:
254
255
  return self.order == order and (
255
256
  self.null_count == 0 or self.null_order == null_order
256
257
  )
257
- if plc.sorting.is_sorted(plc.Table([self.obj]), [order], [null_order]):
258
+ if plc.sorting.is_sorted(
259
+ plc.Table([self.obj]), [order], [null_order], stream=stream
260
+ ):
258
261
  self.sorted = plc.types.Sorted.YES
259
262
  self.order = order
260
263
  self.null_order = null_order
261
264
  return True
262
265
  return False
263
266
 
264
- def astype(self, dtype: DataType) -> Column:
267
+ def astype(self, dtype: DataType, stream: Stream) -> Column:
265
268
  """
266
269
  Cast the column to as the requested dtype.
267
270
 
@@ -269,6 +272,9 @@ class Column:
269
272
  ----------
270
273
  dtype
271
274
  Datatype to cast to.
275
+ stream
276
+ CUDA stream used for device memory operations and kernel launches
277
+ on this Column. The data in ``self.obj`` must be valid on this stream.
272
278
 
273
279
  Returns
274
280
  -------
@@ -284,7 +290,7 @@ class Column:
284
290
  This only produces a copy if the requested dtype doesn't match
285
291
  the current one.
286
292
  """
287
- plc_dtype = dtype.plc
293
+ plc_dtype = dtype.plc_type
288
294
  if self.obj.type() == plc_dtype:
289
295
  return self
290
296
 
@@ -292,12 +298,16 @@ class Column:
292
298
  plc_dtype.id() == plc.TypeId.STRING
293
299
  or self.obj.type().id() == plc.TypeId.STRING
294
300
  ):
295
- return Column(self._handle_string_cast(plc_dtype), dtype=dtype)
301
+ return Column(
302
+ self._handle_string_cast(plc_dtype, stream=stream), dtype=dtype
303
+ )
296
304
  elif plc.traits.is_integral_not_bool(
297
305
  self.obj.type()
298
306
  ) and plc.traits.is_timestamp(plc_dtype):
299
- upcasted = plc.unary.cast(self.obj, plc.DataType(plc.TypeId.INT64))
300
- result = plc.column.Column(
307
+ upcasted = plc.unary.cast(
308
+ self.obj, plc.DataType(plc.TypeId.INT64), stream=stream
309
+ )
310
+ plc_col = plc.column.Column(
301
311
  plc_dtype,
302
312
  upcasted.size(),
303
313
  upcasted.data(),
@@ -306,11 +316,11 @@ class Column:
306
316
  upcasted.offset(),
307
317
  upcasted.children(),
308
318
  )
309
- return Column(result, dtype=dtype).sorted_like(self)
319
+ return Column(plc_col, dtype=dtype).sorted_like(self)
310
320
  elif plc.traits.is_integral_not_bool(plc_dtype) and plc.traits.is_timestamp(
311
321
  self.obj.type()
312
322
  ):
313
- result = plc.column.Column(
323
+ plc_col = plc.column.Column(
314
324
  plc.DataType(plc.TypeId.INT64),
315
325
  self.obj.size(),
316
326
  self.obj.data(),
@@ -319,42 +329,46 @@ class Column:
319
329
  self.obj.offset(),
320
330
  self.obj.children(),
321
331
  )
322
- return Column(plc.unary.cast(result, plc_dtype), dtype=dtype).sorted_like(
323
- self
324
- )
332
+ return Column(
333
+ plc.unary.cast(plc_col, plc_dtype, stream=stream), dtype=dtype
334
+ ).sorted_like(self)
325
335
  else:
326
- result = Column(plc.unary.cast(self.obj, plc_dtype), dtype=dtype)
336
+ result = Column(
337
+ plc.unary.cast(self.obj, plc_dtype, stream=stream), dtype=dtype
338
+ )
327
339
  if is_order_preserving_cast(self.obj.type(), plc_dtype):
328
340
  return result.sorted_like(self)
329
341
  return result
330
342
 
331
- def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
343
+ def _handle_string_cast(self, dtype: plc.DataType, stream: Stream) -> plc.Column:
332
344
  if dtype.id() == plc.TypeId.STRING:
333
345
  if is_floating_point(self.obj.type()):
334
- return from_floats(self.obj)
346
+ return from_floats(self.obj, stream=stream)
335
347
  else:
336
- return from_integers(self.obj)
348
+ return from_integers(self.obj, stream=stream)
337
349
  else:
338
350
  if is_floating_point(dtype):
339
- floats = is_float(self.obj)
351
+ floats = is_float(self.obj, stream=stream)
340
352
  if not plc.reduce.reduce(
341
353
  floats,
342
354
  plc.aggregation.all(),
343
355
  plc.DataType(plc.TypeId.BOOL8),
356
+ stream=stream,
344
357
  ).to_py():
345
358
  raise InvalidOperationError("Conversion from `str` failed.")
346
359
  return to_floats(self.obj, dtype)
347
360
  else:
348
- integers = is_integer(self.obj)
361
+ integers = is_integer(self.obj, stream=stream)
349
362
  if not plc.reduce.reduce(
350
363
  integers,
351
364
  plc.aggregation.all(),
352
365
  plc.DataType(plc.TypeId.BOOL8),
366
+ stream=stream,
353
367
  ).to_py():
354
368
  raise InvalidOperationError("Conversion from `str` failed.")
355
- return to_integers(self.obj, dtype)
369
+ return to_integers(self.obj, dtype, stream=stream)
356
370
 
357
- def copy_metadata(self, from_: pl.Series, /) -> Self:
371
+ def copy_metadata(self, from_: pl_Series, /) -> Self:
358
372
  """
359
373
  Copy metadata from a host series onto self.
360
374
 
@@ -439,27 +453,44 @@ class Column:
439
453
  dtype=self.dtype,
440
454
  )
441
455
 
442
- def mask_nans(self) -> Self:
456
+ def mask_nans(self, stream: Stream) -> Self:
443
457
  """Return a shallow copy of self with nans masked out."""
444
458
  if plc.traits.is_floating_point(self.obj.type()):
445
459
  old_count = self.null_count
446
- mask, new_count = plc.transform.nans_to_nulls(self.obj)
460
+ mask, new_count = plc.transform.nans_to_nulls(self.obj, stream=stream)
447
461
  result = type(self)(self.obj.with_mask(mask, new_count), self.dtype)
448
462
  if old_count == new_count:
449
463
  return result.sorted_like(self)
450
464
  return result
451
465
  return self.copy()
452
466
 
453
- @functools.cached_property
454
- def nan_count(self) -> int:
455
- """Return the number of NaN values in the column."""
467
+ def nan_count(self, stream: Stream) -> int:
468
+ """
469
+ Return the number of NaN values in the column.
470
+
471
+ Parameters
472
+ ----------
473
+ stream
474
+ CUDA stream used for device memory operations and kernel launches.
475
+ ``self.obj`` must be valid on this stream, and the result will be
476
+ valid on this stream.
477
+
478
+ Returns
479
+ -------
480
+ Number of NaN values in the column.
481
+ """
482
+ result: int
456
483
  if self.size > 0 and plc.traits.is_floating_point(self.obj.type()):
457
- return plc.reduce.reduce(
458
- plc.unary.is_nan(self.obj),
484
+ # See https://github.com/rapidsai/cudf/issues/20202 for we type ignore
485
+ result = plc.reduce.reduce( # type: ignore[assignment]
486
+ plc.unary.is_nan(self.obj, stream=stream),
459
487
  plc.aggregation.sum(),
460
488
  plc.types.SIZE_TYPE,
489
+ stream=stream,
461
490
  ).to_py()
462
- return 0
491
+ else:
492
+ result = 0
493
+ return result
463
494
 
464
495
  @property
465
496
  def size(self) -> int:
@@ -471,7 +502,7 @@ class Column:
471
502
  """Return the number of Null values in the column."""
472
503
  return self.obj.null_count()
473
504
 
474
- def slice(self, zlice: Slice | None) -> Self:
505
+ def slice(self, zlice: Slice | None, stream: Stream) -> Self:
475
506
  """
476
507
  Slice a column.
477
508
 
@@ -480,6 +511,9 @@ class Column:
480
511
  zlice
481
512
  optional, tuple of start and length, negative values of start
482
513
  treated as for python indexing. If not provided, returns self.
514
+ stream
515
+ CUDA stream used for device memory operations and kernel launches
516
+ on this Column. The data in ``self.obj`` must be valid on this stream.
483
517
 
484
518
  Returns
485
519
  -------
@@ -490,6 +524,7 @@ class Column:
490
524
  (table,) = plc.copying.slice(
491
525
  plc.Table([self.obj]),
492
526
  conversion.from_polars_slice(zlice, num_rows=self.size),
527
+ stream=stream,
493
528
  )
494
529
  (column,) = table.columns()
495
530
  return type(self)(column, name=self.name, dtype=self.dtype).sorted_like(self)