cudf-polars-cu12 25.8.0__py3-none-any.whl → 25.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -1
  3. cudf_polars/callback.py +33 -18
  4. cudf_polars/containers/column.py +116 -52
  5. cudf_polars/containers/dataframe.py +137 -39
  6. cudf_polars/containers/datatype.py +136 -13
  7. cudf_polars/dsl/expr.py +0 -2
  8. cudf_polars/dsl/expressions/aggregation.py +80 -28
  9. cudf_polars/dsl/expressions/base.py +4 -0
  10. cudf_polars/dsl/expressions/binaryop.py +34 -14
  11. cudf_polars/dsl/expressions/boolean.py +143 -49
  12. cudf_polars/dsl/expressions/datetime.py +60 -30
  13. cudf_polars/dsl/expressions/literal.py +11 -5
  14. cudf_polars/dsl/expressions/rolling.py +839 -18
  15. cudf_polars/dsl/expressions/selection.py +9 -8
  16. cudf_polars/dsl/expressions/slicing.py +1 -1
  17. cudf_polars/dsl/expressions/string.py +490 -103
  18. cudf_polars/dsl/expressions/struct.py +22 -11
  19. cudf_polars/dsl/expressions/ternary.py +9 -3
  20. cudf_polars/dsl/expressions/unary.py +236 -61
  21. cudf_polars/dsl/ir.py +1123 -307
  22. cudf_polars/dsl/to_ast.py +30 -13
  23. cudf_polars/dsl/tracing.py +194 -0
  24. cudf_polars/dsl/translate.py +357 -113
  25. cudf_polars/dsl/utils/aggregations.py +211 -23
  26. cudf_polars/dsl/utils/groupby.py +6 -1
  27. cudf_polars/dsl/utils/reshape.py +14 -2
  28. cudf_polars/dsl/utils/rolling.py +19 -9
  29. cudf_polars/dsl/utils/windows.py +41 -20
  30. cudf_polars/experimental/base.py +302 -26
  31. cudf_polars/experimental/benchmarks/pdsds.py +13 -123
  32. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  33. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  34. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  35. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  36. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  37. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  38. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  39. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  40. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  41. cudf_polars/experimental/benchmarks/pdsh.py +793 -1
  42. cudf_polars/experimental/benchmarks/utils.py +646 -63
  43. cudf_polars/experimental/dask_registers.py +47 -20
  44. cudf_polars/experimental/dispatch.py +66 -3
  45. cudf_polars/experimental/distinct.py +3 -0
  46. cudf_polars/experimental/explain.py +48 -5
  47. cudf_polars/experimental/expressions.py +67 -12
  48. cudf_polars/experimental/groupby.py +26 -4
  49. cudf_polars/experimental/io.py +164 -113
  50. cudf_polars/experimental/join.py +44 -25
  51. cudf_polars/experimental/parallel.py +99 -20
  52. cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
  53. cudf_polars/experimental/rapidsmpf/core.py +361 -0
  54. cudf_polars/experimental/rapidsmpf/dispatch.py +150 -0
  55. cudf_polars/experimental/rapidsmpf/io.py +604 -0
  56. cudf_polars/experimental/rapidsmpf/join.py +237 -0
  57. cudf_polars/experimental/rapidsmpf/lower.py +74 -0
  58. cudf_polars/experimental/rapidsmpf/nodes.py +494 -0
  59. cudf_polars/experimental/rapidsmpf/repartition.py +151 -0
  60. cudf_polars/experimental/rapidsmpf/shuffle.py +277 -0
  61. cudf_polars/experimental/rapidsmpf/union.py +96 -0
  62. cudf_polars/experimental/rapidsmpf/utils.py +162 -0
  63. cudf_polars/experimental/repartition.py +9 -2
  64. cudf_polars/experimental/select.py +208 -18
  65. cudf_polars/experimental/shuffle.py +107 -35
  66. cudf_polars/experimental/sort.py +642 -11
  67. cudf_polars/experimental/statistics.py +814 -0
  68. cudf_polars/experimental/utils.py +90 -15
  69. cudf_polars/testing/asserts.py +35 -19
  70. cudf_polars/testing/io.py +53 -2
  71. cudf_polars/testing/plugin.py +98 -34
  72. cudf_polars/typing/__init__.py +86 -32
  73. cudf_polars/utils/config.py +550 -66
  74. cudf_polars/utils/cuda_stream.py +70 -0
  75. cudf_polars/utils/dtypes.py +17 -4
  76. cudf_polars/utils/versions.py +6 -3
  77. cudf_polars_cu12-25.12.0.dist-info/METADATA +182 -0
  78. cudf_polars_cu12-25.12.0.dist-info/RECORD +104 -0
  79. cudf_polars_cu12-25.8.0.dist-info/METADATA +0 -125
  80. cudf_polars_cu12-25.8.0.dist-info/RECORD +0 -81
  81. {cudf_polars_cu12-25.8.0.dist-info → cudf_polars_cu12-25.12.0.dist-info}/WHEEL +0 -0
  82. {cudf_polars_cu12-25.8.0.dist-info → cudf_polars_cu12-25.12.0.dist-info}/licenses/LICENSE +0 -0
  83. {cudf_polars_cu12-25.8.0.dist-info → cudf_polars_cu12-25.12.0.dist-info}/top_level.txt +0 -0
cudf_polars/GIT_COMMIT ADDED
@@ -0,0 +1 @@
1
+ 580975be72b3516c2c18da149b62de557b28fb67
cudf_polars/VERSION CHANGED
@@ -1 +1 @@
1
- 25.08.00
1
+ 25.12.00
cudf_polars/callback.py CHANGED
@@ -22,9 +22,14 @@ import pylibcudf
22
22
  import rmm
23
23
  from rmm._cuda import gpu
24
24
 
25
+ import cudf_polars.dsl.tracing
26
+ from cudf_polars.dsl.ir import IRExecutionContext
25
27
  from cudf_polars.dsl.tracing import CUDF_POLARS_NVTX_DOMAIN
26
28
  from cudf_polars.dsl.translate import Translator
27
- from cudf_polars.utils.config import _env_get_int, get_total_device_memory
29
+ from cudf_polars.utils.config import (
30
+ _env_get_int,
31
+ get_total_device_memory,
32
+ )
28
33
  from cudf_polars.utils.timer import Timer
29
34
 
30
35
  if TYPE_CHECKING:
@@ -35,23 +40,16 @@ if TYPE_CHECKING:
35
40
 
36
41
  from cudf_polars.dsl.ir import IR
37
42
  from cudf_polars.typing import NodeTraverser
38
- from cudf_polars.utils.config import ConfigOptions
43
+ from cudf_polars.utils.config import ConfigOptions, MemoryResourceConfig
39
44
 
40
45
  __all__: list[str] = ["execute_with_cudf"]
41
46
 
42
47
 
43
- _SUPPORTED_PREFETCHES = {
44
- "column_view::get_data",
45
- "mutable_column_view::get_data",
46
- "gather",
47
- "hash_join",
48
- }
49
-
50
-
51
48
  @cache
52
49
  def default_memory_resource(
53
50
  device: int,
54
51
  cuda_managed_memory: bool, # noqa: FBT001
52
+ memory_resource_config: MemoryResourceConfig | None,
55
53
  ) -> rmm.mr.DeviceMemoryResource:
56
54
  """
57
55
  Return the default memory resource for cudf-polars.
@@ -63,6 +61,9 @@ def default_memory_resource(
63
61
  the active device when this function is called.
64
62
  cuda_managed_memory
65
63
  Whether to use managed memory or not.
64
+ memory_resource_config
65
+ Memory resource configuration to use. If ``None``, the default
66
+ memory resource is used.
66
67
 
67
68
  Returns
68
69
  -------
@@ -72,7 +73,9 @@ def default_memory_resource(
72
73
  else, an async pool resource is returned.
73
74
  """
74
75
  try:
75
- if (
76
+ if memory_resource_config is not None:
77
+ mr = memory_resource_config.create_memory_resource()
78
+ elif (
76
79
  cuda_managed_memory
77
80
  and pylibcudf.utils._is_concurrent_managed_access_supported()
78
81
  ):
@@ -80,8 +83,7 @@ def default_memory_resource(
80
83
  # Leaving a 20% headroom to avoid OOM errors.
81
84
  free_memory, _ = rmm.mr.available_device_memory()
82
85
  free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
83
- for key in _SUPPORTED_PREFETCHES:
84
- pylibcudf.experimental.enable_prefetching(key)
86
+ pylibcudf.prefetch.enable()
85
87
  mr = rmm.mr.PrefetchResourceAdaptor(
86
88
  rmm.mr.PoolMemoryResource(
87
89
  rmm.mr.ManagedMemoryResource(),
@@ -98,7 +100,7 @@ def default_memory_resource(
98
100
  ):
99
101
  raise ComputeError(
100
102
  "GPU engine requested, but incorrect cudf-polars package installed. "
101
- "cudf-polars requires CUDA 12.0+ to installed."
103
+ "cudf-polars requires CUDA 12.2+ to installed."
102
104
  ) from None
103
105
  else:
104
106
  raise
@@ -109,6 +111,7 @@ def default_memory_resource(
109
111
  @contextlib.contextmanager
110
112
  def set_memory_resource(
111
113
  mr: rmm.mr.DeviceMemoryResource | None,
114
+ memory_resource_config: MemoryResourceConfig | None,
112
115
  ) -> Generator[rmm.mr.DeviceMemoryResource, None, None]:
113
116
  """
114
117
  Set the current memory resource for an execution block.
@@ -118,6 +121,9 @@ def set_memory_resource(
118
121
  mr
119
122
  Memory resource to use. If `None`, calls :func:`default_memory_resource`
120
123
  to obtain an mr on the currently active device.
124
+ memory_resource_config
125
+ Memory resource configuration to use when a concrete memory resource.
126
+ is not provided. If ``None``, the default memory resource is used.
121
127
 
122
128
  Returns
123
129
  -------
@@ -141,7 +147,14 @@ def set_memory_resource(
141
147
  )
142
148
  != 0
143
149
  ),
150
+ memory_resource_config=memory_resource_config,
144
151
  )
152
+
153
+ if (
154
+ cudf_polars.dsl.tracing.LOG_TRACES
155
+ ): # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
156
+ mr = rmm.mr.StatisticsResourceAdaptor(mr)
157
+
145
158
  rmm.mr.set_current_device_resource(mr)
146
159
  try:
147
160
  yield mr
@@ -220,14 +233,16 @@ def _callback(
220
233
  assert n_rows is None
221
234
  if timer is not None:
222
235
  assert should_time
236
+
223
237
  with (
224
238
  nvtx.annotate(message="ExecuteIR", domain=CUDF_POLARS_NVTX_DOMAIN),
225
239
  # Device must be set before memory resource is obtained.
226
240
  set_device(config_options.device),
227
- set_memory_resource(memory_resource),
241
+ set_memory_resource(memory_resource, config_options.memory_resource_config),
228
242
  ):
229
243
  if config_options.executor.name == "in-memory":
230
- df = ir.evaluate(cache={}, timer=timer).to_polars()
244
+ context = IRExecutionContext.from_config_options(config_options)
245
+ df = ir.evaluate(cache={}, timer=timer, context=context).to_polars()
231
246
  if timer is None:
232
247
  return df
233
248
  else:
@@ -245,7 +260,7 @@ def _callback(
245
260
  """)
246
261
  raise NotImplementedError(msg)
247
262
 
248
- return evaluate_streaming(ir, config_options).to_polars()
263
+ return evaluate_streaming(ir, config_options)
249
264
  assert_never(f"Unknown executor '{config_options.executor}'")
250
265
 
251
266
 
@@ -296,7 +311,7 @@ def execute_with_cudf(
296
311
  if (
297
312
  memory_resource is None
298
313
  and translator.config_options.executor.name == "streaming"
299
- and translator.config_options.executor.scheduler == "distributed"
314
+ and translator.config_options.executor.cluster == "distributed"
300
315
  ): # pragma: no cover; Requires distributed cluster
301
316
  memory_resource = rmm.mr.get_current_device_resource()
302
317
  if len(ir_translation_errors):
@@ -5,11 +5,8 @@
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- import functools
9
8
  from typing import TYPE_CHECKING
10
9
 
11
- import polars as pl
12
- import polars.datatypes.convert
13
10
  from polars.exceptions import InvalidOperationError
14
11
 
15
12
  import pylibcudf as plc
@@ -22,12 +19,17 @@ from pylibcudf.strings.convert.convert_integers import (
22
19
  from pylibcudf.traits import is_floating_point
23
20
 
24
21
  from cudf_polars.containers import DataType
22
+ from cudf_polars.containers.datatype import _dtype_from_header, _dtype_to_header
25
23
  from cudf_polars.utils import conversion
26
24
  from cudf_polars.utils.dtypes import is_order_preserving_cast
27
25
 
28
26
  if TYPE_CHECKING:
29
27
  from typing_extensions import Self
30
28
 
29
+ from polars import Series as pl_Series
30
+
31
+ from rmm.pylibrmm.stream import Stream
32
+
31
33
  from cudf_polars.typing import (
32
34
  ColumnHeader,
33
35
  ColumnOptions,
@@ -38,22 +40,6 @@ if TYPE_CHECKING:
38
40
  __all__: list[str] = ["Column"]
39
41
 
40
42
 
41
- def _dtype_short_repr_to_dtype(dtype_str: str) -> pl.DataType:
42
- """Convert a Polars dtype short repr to a Polars dtype."""
43
- # limitations of dtype_short_repr_to_dtype described in
44
- # py-polars/polars/datatypes/convert.py#L299
45
- if dtype_str.startswith("list["):
46
- stripped = dtype_str.removeprefix("list[").removesuffix("]")
47
- return pl.List(_dtype_short_repr_to_dtype(stripped))
48
- pl_type = polars.datatypes.convert.dtype_short_repr_to_dtype(dtype_str)
49
- if pl_type is None:
50
- raise ValueError(f"{dtype_str} was not able to be parsed by Polars.")
51
- if isinstance(pl_type, polars.datatypes.DataTypeClass):
52
- return pl_type()
53
- else:
54
- return pl_type
55
-
56
-
57
43
  class Column:
58
44
  """An immutable column with sortedness metadata."""
59
45
 
@@ -85,7 +71,10 @@ class Column:
85
71
 
86
72
  @classmethod
87
73
  def deserialize(
88
- cls, header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]
74
+ cls,
75
+ header: ColumnHeader,
76
+ frames: tuple[memoryview[bytes], plc.gpumemoryview],
77
+ stream: Stream,
89
78
  ) -> Self:
90
79
  """
91
80
  Create a Column from a serialized representation returned by `.serialize()`.
@@ -96,6 +85,10 @@ class Column:
96
85
  The (unpickled) metadata required to reconstruct the object.
97
86
  frames
98
87
  Two-tuple of frames (a memoryview and a gpumemoryview).
88
+ stream
89
+ CUDA stream used for device memory operations and kernel launches
90
+ on this column. The caller is responsible for ensuring that
91
+ the data in ``frames`` is valid on ``stream``.
99
92
 
100
93
  Returns
101
94
  -------
@@ -104,7 +97,7 @@ class Column:
104
97
  """
105
98
  packed_metadata, packed_gpu_data = frames
106
99
  (plc_column,) = plc.contiguous_split.unpack_from_memoryviews(
107
- packed_metadata, packed_gpu_data
100
+ packed_metadata, packed_gpu_data, stream
108
101
  ).columns()
109
102
  return cls(plc_column, **cls.deserialize_ctor_kwargs(header["column_kwargs"]))
110
103
 
@@ -113,20 +106,18 @@ class Column:
113
106
  column_kwargs: ColumnOptions,
114
107
  ) -> DeserializedColumnOptions:
115
108
  """Deserialize the constructor kwargs for a Column."""
116
- dtype = DataType( # pragma: no cover
117
- _dtype_short_repr_to_dtype(column_kwargs["dtype"])
118
- )
119
109
  return {
120
110
  "is_sorted": column_kwargs["is_sorted"],
121
111
  "order": column_kwargs["order"],
122
112
  "null_order": column_kwargs["null_order"],
123
113
  "name": column_kwargs["name"],
124
- "dtype": dtype,
114
+ "dtype": DataType(_dtype_from_header(column_kwargs["dtype"])),
125
115
  }
126
116
 
127
117
  def serialize(
128
118
  self,
129
- ) -> tuple[ColumnHeader, tuple[memoryview, plc.gpumemoryview]]:
119
+ stream: Stream,
120
+ ) -> tuple[ColumnHeader, tuple[memoryview[bytes], plc.gpumemoryview]]:
130
121
  """
131
122
  Serialize the Column into header and frames.
132
123
 
@@ -145,7 +136,7 @@ class Column:
145
136
  frames
146
137
  Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
147
138
  """
148
- packed = plc.contiguous_split.pack(plc.Table([self.obj]))
139
+ packed = plc.contiguous_split.pack(plc.Table([self.obj]), stream=stream)
149
140
  header: ColumnHeader = {
150
141
  "column_kwargs": self.serialize_ctor_kwargs(),
151
142
  "frame_count": 2,
@@ -159,14 +150,20 @@ class Column:
159
150
  "order": self.order,
160
151
  "null_order": self.null_order,
161
152
  "name": self.name,
162
- "dtype": pl.polars.dtype_str_repr(self.dtype.polars),
153
+ "dtype": _dtype_to_header(self.dtype.polars_type),
163
154
  }
164
155
 
165
- @functools.cached_property
166
- def obj_scalar(self) -> plc.Scalar:
156
+ def obj_scalar(self, stream: Stream) -> plc.Scalar:
167
157
  """
168
158
  A copy of the column object as a pylibcudf Scalar.
169
159
 
160
+ Parameters
161
+ ----------
162
+ stream
163
+ CUDA stream used for device memory operations and kernel launches.
164
+ ``self.obj`` must be valid on this stream, and the result will be
165
+ valid on this stream.
166
+
170
167
  Returns
171
168
  -------
172
169
  pylibcudf Scalar object.
@@ -178,7 +175,7 @@ class Column:
178
175
  """
179
176
  if not self.is_scalar:
180
177
  raise ValueError(f"Cannot convert a column of length {self.size} to scalar")
181
- return plc.copying.get_element(self.obj, 0)
178
+ return plc.copying.get_element(self.obj, 0, stream=stream)
182
179
 
183
180
  def rename(self, name: str | None, /) -> Self:
184
181
  """
@@ -228,6 +225,7 @@ class Column:
228
225
  *,
229
226
  order: plc.types.Order,
230
227
  null_order: plc.types.NullOrder,
228
+ stream: Stream,
231
229
  ) -> bool:
232
230
  """
233
231
  Check if the column is sorted.
@@ -238,6 +236,9 @@ class Column:
238
236
  The requested sort order.
239
237
  null_order
240
238
  Where nulls sort to.
239
+ stream
240
+ CUDA stream used for device memory operations and kernel launches
241
+ on this Column. The data in ``self.obj`` must be valid on this stream.
241
242
 
242
243
  Returns
243
244
  -------
@@ -254,14 +255,16 @@ class Column:
254
255
  return self.order == order and (
255
256
  self.null_count == 0 or self.null_order == null_order
256
257
  )
257
- if plc.sorting.is_sorted(plc.Table([self.obj]), [order], [null_order]):
258
+ if plc.sorting.is_sorted(
259
+ plc.Table([self.obj]), [order], [null_order], stream=stream
260
+ ):
258
261
  self.sorted = plc.types.Sorted.YES
259
262
  self.order = order
260
263
  self.null_order = null_order
261
264
  return True
262
265
  return False
263
266
 
264
- def astype(self, dtype: DataType) -> Column:
267
+ def astype(self, dtype: DataType, stream: Stream) -> Column:
265
268
  """
266
269
  Cast the column to as the requested dtype.
267
270
 
@@ -269,6 +272,9 @@ class Column:
269
272
  ----------
270
273
  dtype
271
274
  Datatype to cast to.
275
+ stream
276
+ CUDA stream used for device memory operations and kernel launches
277
+ on this Column. The data in ``self.obj`` must be valid on this stream.
272
278
 
273
279
  Returns
274
280
  -------
@@ -284,7 +290,7 @@ class Column:
284
290
  This only produces a copy if the requested dtype doesn't match
285
291
  the current one.
286
292
  """
287
- plc_dtype = dtype.plc
293
+ plc_dtype = dtype.plc_type
288
294
  if self.obj.type() == plc_dtype:
289
295
  return self
290
296
 
@@ -292,40 +298,77 @@ class Column:
292
298
  plc_dtype.id() == plc.TypeId.STRING
293
299
  or self.obj.type().id() == plc.TypeId.STRING
294
300
  ):
295
- return Column(self._handle_string_cast(plc_dtype), dtype=dtype)
301
+ return Column(
302
+ self._handle_string_cast(plc_dtype, stream=stream), dtype=dtype
303
+ )
304
+ elif plc.traits.is_integral_not_bool(
305
+ self.obj.type()
306
+ ) and plc.traits.is_timestamp(plc_dtype):
307
+ upcasted = plc.unary.cast(
308
+ self.obj, plc.DataType(plc.TypeId.INT64), stream=stream
309
+ )
310
+ plc_col = plc.column.Column(
311
+ plc_dtype,
312
+ upcasted.size(),
313
+ upcasted.data(),
314
+ upcasted.null_mask(),
315
+ upcasted.null_count(),
316
+ upcasted.offset(),
317
+ upcasted.children(),
318
+ )
319
+ return Column(plc_col, dtype=dtype).sorted_like(self)
320
+ elif plc.traits.is_integral_not_bool(plc_dtype) and plc.traits.is_timestamp(
321
+ self.obj.type()
322
+ ):
323
+ plc_col = plc.column.Column(
324
+ plc.DataType(plc.TypeId.INT64),
325
+ self.obj.size(),
326
+ self.obj.data(),
327
+ self.obj.null_mask(),
328
+ self.obj.null_count(),
329
+ self.obj.offset(),
330
+ self.obj.children(),
331
+ )
332
+ return Column(
333
+ plc.unary.cast(plc_col, plc_dtype, stream=stream), dtype=dtype
334
+ ).sorted_like(self)
296
335
  else:
297
- result = Column(plc.unary.cast(self.obj, plc_dtype), dtype=dtype)
336
+ result = Column(
337
+ plc.unary.cast(self.obj, plc_dtype, stream=stream), dtype=dtype
338
+ )
298
339
  if is_order_preserving_cast(self.obj.type(), plc_dtype):
299
340
  return result.sorted_like(self)
300
341
  return result
301
342
 
302
- def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
343
+ def _handle_string_cast(self, dtype: plc.DataType, stream: Stream) -> plc.Column:
303
344
  if dtype.id() == plc.TypeId.STRING:
304
345
  if is_floating_point(self.obj.type()):
305
- return from_floats(self.obj)
346
+ return from_floats(self.obj, stream=stream)
306
347
  else:
307
- return from_integers(self.obj)
348
+ return from_integers(self.obj, stream=stream)
308
349
  else:
309
350
  if is_floating_point(dtype):
310
- floats = is_float(self.obj)
351
+ floats = is_float(self.obj, stream=stream)
311
352
  if not plc.reduce.reduce(
312
353
  floats,
313
354
  plc.aggregation.all(),
314
355
  plc.DataType(plc.TypeId.BOOL8),
356
+ stream=stream,
315
357
  ).to_py():
316
358
  raise InvalidOperationError("Conversion from `str` failed.")
317
359
  return to_floats(self.obj, dtype)
318
360
  else:
319
- integers = is_integer(self.obj)
361
+ integers = is_integer(self.obj, stream=stream)
320
362
  if not plc.reduce.reduce(
321
363
  integers,
322
364
  plc.aggregation.all(),
323
365
  plc.DataType(plc.TypeId.BOOL8),
366
+ stream=stream,
324
367
  ).to_py():
325
368
  raise InvalidOperationError("Conversion from `str` failed.")
326
- return to_integers(self.obj, dtype)
369
+ return to_integers(self.obj, dtype, stream=stream)
327
370
 
328
- def copy_metadata(self, from_: pl.Series, /) -> Self:
371
+ def copy_metadata(self, from_: pl_Series, /) -> Self:
329
372
  """
330
373
  Copy metadata from a host series onto self.
331
374
 
@@ -410,27 +453,44 @@ class Column:
410
453
  dtype=self.dtype,
411
454
  )
412
455
 
413
- def mask_nans(self) -> Self:
456
+ def mask_nans(self, stream: Stream) -> Self:
414
457
  """Return a shallow copy of self with nans masked out."""
415
458
  if plc.traits.is_floating_point(self.obj.type()):
416
459
  old_count = self.null_count
417
- mask, new_count = plc.transform.nans_to_nulls(self.obj)
460
+ mask, new_count = plc.transform.nans_to_nulls(self.obj, stream=stream)
418
461
  result = type(self)(self.obj.with_mask(mask, new_count), self.dtype)
419
462
  if old_count == new_count:
420
463
  return result.sorted_like(self)
421
464
  return result
422
465
  return self.copy()
423
466
 
424
- @functools.cached_property
425
- def nan_count(self) -> int:
426
- """Return the number of NaN values in the column."""
467
+ def nan_count(self, stream: Stream) -> int:
468
+ """
469
+ Return the number of NaN values in the column.
470
+
471
+ Parameters
472
+ ----------
473
+ stream
474
+ CUDA stream used for device memory operations and kernel launches.
475
+ ``self.obj`` must be valid on this stream, and the result will be
476
+ valid on this stream.
477
+
478
+ Returns
479
+ -------
480
+ Number of NaN values in the column.
481
+ """
482
+ result: int
427
483
  if self.size > 0 and plc.traits.is_floating_point(self.obj.type()):
428
- return plc.reduce.reduce(
429
- plc.unary.is_nan(self.obj),
484
+ # See https://github.com/rapidsai/cudf/issues/20202 for we type ignore
485
+ result = plc.reduce.reduce( # type: ignore[assignment]
486
+ plc.unary.is_nan(self.obj, stream=stream),
430
487
  plc.aggregation.sum(),
431
488
  plc.types.SIZE_TYPE,
489
+ stream=stream,
432
490
  ).to_py()
433
- return 0
491
+ else:
492
+ result = 0
493
+ return result
434
494
 
435
495
  @property
436
496
  def size(self) -> int:
@@ -442,7 +502,7 @@ class Column:
442
502
  """Return the number of Null values in the column."""
443
503
  return self.obj.null_count()
444
504
 
445
- def slice(self, zlice: Slice | None) -> Self:
505
+ def slice(self, zlice: Slice | None, stream: Stream) -> Self:
446
506
  """
447
507
  Slice a column.
448
508
 
@@ -451,6 +511,9 @@ class Column:
451
511
  zlice
452
512
  optional, tuple of start and length, negative values of start
453
513
  treated as for python indexing. If not provided, returns self.
514
+ stream
515
+ CUDA stream used for device memory operations and kernel launches
516
+ on this Column. The data in ``self.obj`` must be valid on this stream.
454
517
 
455
518
  Returns
456
519
  -------
@@ -461,6 +524,7 @@ class Column:
461
524
  (table,) = plc.copying.slice(
462
525
  plc.Table([self.obj]),
463
526
  conversion.from_polars_slice(zlice, num_rows=self.size),
527
+ stream=stream,
464
528
  )
465
529
  (column,) = table.columns()
466
530
  return type(self)(column, name=self.name, dtype=self.dtype).sorted_like(self)