cudf-polars-cu12 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. cudf_polars/GIT_COMMIT +1 -1
  2. cudf_polars/VERSION +1 -1
  3. cudf_polars/callback.py +60 -15
  4. cudf_polars/containers/column.py +137 -77
  5. cudf_polars/containers/dataframe.py +123 -34
  6. cudf_polars/containers/datatype.py +134 -13
  7. cudf_polars/dsl/expr.py +0 -2
  8. cudf_polars/dsl/expressions/aggregation.py +80 -28
  9. cudf_polars/dsl/expressions/binaryop.py +34 -14
  10. cudf_polars/dsl/expressions/boolean.py +110 -37
  11. cudf_polars/dsl/expressions/datetime.py +59 -30
  12. cudf_polars/dsl/expressions/literal.py +11 -5
  13. cudf_polars/dsl/expressions/rolling.py +460 -119
  14. cudf_polars/dsl/expressions/selection.py +9 -8
  15. cudf_polars/dsl/expressions/slicing.py +1 -1
  16. cudf_polars/dsl/expressions/string.py +256 -114
  17. cudf_polars/dsl/expressions/struct.py +19 -7
  18. cudf_polars/dsl/expressions/ternary.py +33 -3
  19. cudf_polars/dsl/expressions/unary.py +126 -64
  20. cudf_polars/dsl/ir.py +1053 -350
  21. cudf_polars/dsl/to_ast.py +30 -13
  22. cudf_polars/dsl/tracing.py +194 -0
  23. cudf_polars/dsl/translate.py +307 -107
  24. cudf_polars/dsl/utils/aggregations.py +43 -30
  25. cudf_polars/dsl/utils/reshape.py +14 -2
  26. cudf_polars/dsl/utils/rolling.py +12 -8
  27. cudf_polars/dsl/utils/windows.py +35 -20
  28. cudf_polars/experimental/base.py +55 -2
  29. cudf_polars/experimental/benchmarks/pdsds.py +12 -126
  30. cudf_polars/experimental/benchmarks/pdsh.py +792 -2
  31. cudf_polars/experimental/benchmarks/utils.py +596 -39
  32. cudf_polars/experimental/dask_registers.py +47 -20
  33. cudf_polars/experimental/dispatch.py +9 -3
  34. cudf_polars/experimental/distinct.py +2 -0
  35. cudf_polars/experimental/explain.py +15 -2
  36. cudf_polars/experimental/expressions.py +30 -15
  37. cudf_polars/experimental/groupby.py +25 -4
  38. cudf_polars/experimental/io.py +156 -124
  39. cudf_polars/experimental/join.py +53 -23
  40. cudf_polars/experimental/parallel.py +68 -19
  41. cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
  42. cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
  43. cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
  44. cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
  45. cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
  46. cudf_polars/experimental/rapidsmpf/core.py +488 -0
  47. cudf_polars/experimental/rapidsmpf/dask.py +172 -0
  48. cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
  49. cudf_polars/experimental/rapidsmpf/io.py +696 -0
  50. cudf_polars/experimental/rapidsmpf/join.py +322 -0
  51. cudf_polars/experimental/rapidsmpf/lower.py +74 -0
  52. cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
  53. cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
  54. cudf_polars/experimental/rapidsmpf/union.py +115 -0
  55. cudf_polars/experimental/rapidsmpf/utils.py +374 -0
  56. cudf_polars/experimental/repartition.py +9 -2
  57. cudf_polars/experimental/select.py +177 -14
  58. cudf_polars/experimental/shuffle.py +46 -12
  59. cudf_polars/experimental/sort.py +100 -26
  60. cudf_polars/experimental/spilling.py +1 -1
  61. cudf_polars/experimental/statistics.py +24 -5
  62. cudf_polars/experimental/utils.py +25 -7
  63. cudf_polars/testing/asserts.py +13 -8
  64. cudf_polars/testing/io.py +2 -1
  65. cudf_polars/testing/plugin.py +93 -17
  66. cudf_polars/typing/__init__.py +86 -32
  67. cudf_polars/utils/config.py +473 -58
  68. cudf_polars/utils/cuda_stream.py +70 -0
  69. cudf_polars/utils/versions.py +5 -4
  70. cudf_polars_cu12-26.2.0.dist-info/METADATA +181 -0
  71. cudf_polars_cu12-26.2.0.dist-info/RECORD +108 -0
  72. {cudf_polars_cu12-25.10.0.dist-info → cudf_polars_cu12-26.2.0.dist-info}/WHEEL +1 -1
  73. cudf_polars_cu12-25.10.0.dist-info/METADATA +0 -136
  74. cudf_polars_cu12-25.10.0.dist-info/RECORD +0 -92
  75. {cudf_polars_cu12-25.10.0.dist-info → cudf_polars_cu12-26.2.0.dist-info}/licenses/LICENSE +0 -0
  76. {cudf_polars_cu12-25.10.0.dist-info → cudf_polars_cu12-26.2.0.dist-info}/top_level.txt +0 -0
cudf_polars/GIT_COMMIT CHANGED
@@ -1 +1 @@
1
- f4e35ca02118eada383e7417273c6cb1857ec66e
1
+ 9782a269e689140d2b00b5172a93056bdf19e8c2
cudf_polars/VERSION CHANGED
@@ -1 +1 @@
1
- 25.10.00
1
+ 26.02.000
cudf_polars/callback.py CHANGED
@@ -11,6 +11,7 @@ import textwrap
11
11
  import time
12
12
  import warnings
13
13
  from functools import cache, partial
14
+ from threading import Lock
14
15
  from typing import TYPE_CHECKING, Literal, overload
15
16
 
16
17
  import nvtx
@@ -22,9 +23,14 @@ import pylibcudf
22
23
  import rmm
23
24
  from rmm._cuda import gpu
24
25
 
26
+ import cudf_polars.dsl.tracing
27
+ from cudf_polars.dsl.ir import IRExecutionContext
25
28
  from cudf_polars.dsl.tracing import CUDF_POLARS_NVTX_DOMAIN
26
29
  from cudf_polars.dsl.translate import Translator
27
- from cudf_polars.utils.config import _env_get_int, get_total_device_memory
30
+ from cudf_polars.utils.config import (
31
+ _env_get_int,
32
+ get_total_device_memory,
33
+ )
28
34
  from cudf_polars.utils.timer import Timer
29
35
 
30
36
  if TYPE_CHECKING:
@@ -35,7 +41,7 @@ if TYPE_CHECKING:
35
41
 
36
42
  from cudf_polars.dsl.ir import IR
37
43
  from cudf_polars.typing import NodeTraverser
38
- from cudf_polars.utils.config import ConfigOptions
44
+ from cudf_polars.utils.config import ConfigOptions, MemoryResourceConfig
39
45
 
40
46
  __all__: list[str] = ["execute_with_cudf"]
41
47
 
@@ -44,6 +50,7 @@ __all__: list[str] = ["execute_with_cudf"]
44
50
  def default_memory_resource(
45
51
  device: int,
46
52
  cuda_managed_memory: bool, # noqa: FBT001
53
+ memory_resource_config: MemoryResourceConfig | None,
47
54
  ) -> rmm.mr.DeviceMemoryResource:
48
55
  """
49
56
  Return the default memory resource for cudf-polars.
@@ -55,6 +62,9 @@ def default_memory_resource(
55
62
  the active device when this function is called.
56
63
  cuda_managed_memory
57
64
  Whether to use managed memory or not.
65
+ memory_resource_config
66
+ Memory resource configuration to use. If ``None``, the default
67
+ memory resource is used.
58
68
 
59
69
  Returns
60
70
  -------
@@ -64,7 +74,9 @@ def default_memory_resource(
64
74
  else, an async pool resource is returned.
65
75
  """
66
76
  try:
67
- if (
77
+ if memory_resource_config is not None:
78
+ mr = memory_resource_config.create_memory_resource()
79
+ elif (
68
80
  cuda_managed_memory
69
81
  and pylibcudf.utils._is_concurrent_managed_access_supported()
70
82
  ):
@@ -89,7 +101,7 @@ def default_memory_resource(
89
101
  ):
90
102
  raise ComputeError(
91
103
  "GPU engine requested, but incorrect cudf-polars package installed. "
92
- "cudf-polars requires CUDA 12.0+ to installed."
104
+ "cudf-polars requires CUDA 12.2+ to installed."
93
105
  ) from None
94
106
  else:
95
107
  raise
@@ -100,6 +112,7 @@ def default_memory_resource(
100
112
  @contextlib.contextmanager
101
113
  def set_memory_resource(
102
114
  mr: rmm.mr.DeviceMemoryResource | None,
115
+ memory_resource_config: MemoryResourceConfig | None,
103
116
  ) -> Generator[rmm.mr.DeviceMemoryResource, None, None]:
104
117
  """
105
118
  Set the current memory resource for an execution block.
@@ -109,6 +122,9 @@ def set_memory_resource(
109
122
  mr
110
123
  Memory resource to use. If `None`, calls :func:`default_memory_resource`
111
124
  to obtain an mr on the currently active device.
125
+ memory_resource_config
126
+ Memory resource configuration to use when a concrete memory resource.
127
+ is not provided. If ``None``, the default memory resource is used.
112
128
 
113
129
  Returns
114
130
  -------
@@ -132,7 +148,14 @@ def set_memory_resource(
132
148
  )
133
149
  != 0
134
150
  ),
151
+ memory_resource_config=memory_resource_config,
135
152
  )
153
+
154
+ if (
155
+ cudf_polars.dsl.tracing.LOG_TRACES
156
+ ): # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
157
+ mr = rmm.mr.StatisticsResourceAdaptor(mr)
158
+
136
159
  rmm.mr.set_current_device_resource(mr)
137
160
  try:
138
161
  yield mr
@@ -140,6 +163,11 @@ def set_memory_resource(
140
163
  rmm.mr.set_current_device_resource(previous)
141
164
 
142
165
 
166
+ # libcudf doesn't support executing on multiple devices from within the same process.
167
+ SEEN_DEVICE = None
168
+ SEEN_DEVICE_LOCK = Lock()
169
+
170
+
143
171
  @contextlib.contextmanager
144
172
  def set_device(device: int | None) -> Generator[int, None, None]:
145
173
  """
@@ -158,13 +186,28 @@ def set_device(device: int | None) -> Generator[int, None, None]:
158
186
  -----
159
187
  At exit, the device is restored to whatever was current at entry.
160
188
  """
161
- previous: int = gpu.getDevice()
162
- if device is not None:
163
- gpu.setDevice(device)
164
- try:
165
- yield previous
166
- finally:
167
- gpu.setDevice(previous)
189
+ global SEEN_DEVICE # noqa: PLW0603
190
+ current: int = gpu.getDevice()
191
+ to_use = device if device is not None else current
192
+ with SEEN_DEVICE_LOCK:
193
+ if (
194
+ SEEN_DEVICE is not None and to_use != SEEN_DEVICE
195
+ ): # pragma: no cover; requires multiple GPUs in CI
196
+ raise RuntimeError(
197
+ "cudf-polars does not support running queries on "
198
+ "multiple devices in the same process. "
199
+ f"A previous query used device-{SEEN_DEVICE}, "
200
+ f"the current query is using device-{to_use}."
201
+ )
202
+ SEEN_DEVICE = to_use
203
+ if to_use != current:
204
+ gpu.setDevice(to_use)
205
+ try:
206
+ yield to_use
207
+ finally:
208
+ gpu.setDevice(current)
209
+ else:
210
+ yield to_use
168
211
 
169
212
 
170
213
  @overload
@@ -211,14 +254,16 @@ def _callback(
211
254
  assert n_rows is None
212
255
  if timer is not None:
213
256
  assert should_time
257
+
214
258
  with (
215
259
  nvtx.annotate(message="ExecuteIR", domain=CUDF_POLARS_NVTX_DOMAIN),
216
260
  # Device must be set before memory resource is obtained.
217
261
  set_device(config_options.device),
218
- set_memory_resource(memory_resource),
262
+ set_memory_resource(memory_resource, config_options.memory_resource_config),
219
263
  ):
220
264
  if config_options.executor.name == "in-memory":
221
- df = ir.evaluate(cache={}, timer=timer).to_polars()
265
+ context = IRExecutionContext.from_config_options(config_options)
266
+ df = ir.evaluate(cache={}, timer=timer, context=context).to_polars()
222
267
  if timer is None:
223
268
  return df
224
269
  else:
@@ -236,7 +281,7 @@ def _callback(
236
281
  """)
237
282
  raise NotImplementedError(msg)
238
283
 
239
- return evaluate_streaming(ir, config_options).to_polars()
284
+ return evaluate_streaming(ir, config_options)
240
285
  assert_never(f"Unknown executor '{config_options.executor}'")
241
286
 
242
287
 
@@ -287,7 +332,7 @@ def execute_with_cudf(
287
332
  if (
288
333
  memory_resource is None
289
334
  and translator.config_options.executor.name == "streaming"
290
- and translator.config_options.executor.scheduler == "distributed"
335
+ and translator.config_options.executor.cluster == "distributed"
291
336
  ): # pragma: no cover; Requires distributed cluster
292
337
  memory_resource = rmm.mr.get_current_device_resource()
293
338
  if len(ir_translation_errors):
@@ -5,11 +5,8 @@
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- import functools
9
8
  from typing import TYPE_CHECKING
10
9
 
11
- import polars as pl
12
- import polars.datatypes.convert
13
10
  from polars.exceptions import InvalidOperationError
14
11
 
15
12
  import pylibcudf as plc
@@ -19,15 +16,21 @@ from pylibcudf.strings.convert.convert_integers import (
19
16
  is_integer,
20
17
  to_integers,
21
18
  )
22
- from pylibcudf.traits import is_floating_point
23
19
 
24
20
  from cudf_polars.containers import DataType
21
+ from cudf_polars.containers.datatype import _dtype_from_header, _dtype_to_header
25
22
  from cudf_polars.utils import conversion
26
23
  from cudf_polars.utils.dtypes import is_order_preserving_cast
27
24
 
28
25
  if TYPE_CHECKING:
26
+ from collections.abc import Callable
27
+
29
28
  from typing_extensions import Self
30
29
 
30
+ from polars import Series as pl_Series
31
+
32
+ from rmm.pylibrmm.stream import Stream
33
+
31
34
  from cudf_polars.typing import (
32
35
  ColumnHeader,
33
36
  ColumnOptions,
@@ -38,22 +41,6 @@ if TYPE_CHECKING:
38
41
  __all__: list[str] = ["Column"]
39
42
 
40
43
 
41
- def _dtype_short_repr_to_dtype(dtype_str: str) -> pl.DataType:
42
- """Convert a Polars dtype short repr to a Polars dtype."""
43
- # limitations of dtype_short_repr_to_dtype described in
44
- # py-polars/polars/datatypes/convert.py#L299
45
- if dtype_str.startswith("list["):
46
- stripped = dtype_str.removeprefix("list[").removesuffix("]")
47
- return pl.List(_dtype_short_repr_to_dtype(stripped))
48
- pl_type = polars.datatypes.convert.dtype_short_repr_to_dtype(dtype_str)
49
- if pl_type is None:
50
- raise ValueError(f"{dtype_str} was not able to be parsed by Polars.")
51
- if isinstance(pl_type, polars.datatypes.DataTypeClass):
52
- return pl_type()
53
- else:
54
- return pl_type
55
-
56
-
57
44
  class Column:
58
45
  """An immutable column with sortedness metadata."""
59
46
 
@@ -85,7 +72,10 @@ class Column:
85
72
 
86
73
  @classmethod
87
74
  def deserialize(
88
- cls, header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]
75
+ cls,
76
+ header: ColumnHeader,
77
+ frames: tuple[memoryview[bytes], plc.gpumemoryview],
78
+ stream: Stream,
89
79
  ) -> Self:
90
80
  """
91
81
  Create a Column from a serialized representation returned by `.serialize()`.
@@ -96,6 +86,10 @@ class Column:
96
86
  The (unpickled) metadata required to reconstruct the object.
97
87
  frames
98
88
  Two-tuple of frames (a memoryview and a gpumemoryview).
89
+ stream
90
+ CUDA stream used for device memory operations and kernel launches
91
+ on this column. The caller is responsible for ensuring that
92
+ the data in ``frames`` is valid on ``stream``.
99
93
 
100
94
  Returns
101
95
  -------
@@ -104,7 +98,7 @@ class Column:
104
98
  """
105
99
  packed_metadata, packed_gpu_data = frames
106
100
  (plc_column,) = plc.contiguous_split.unpack_from_memoryviews(
107
- packed_metadata, packed_gpu_data
101
+ packed_metadata, packed_gpu_data, stream
108
102
  ).columns()
109
103
  return cls(plc_column, **cls.deserialize_ctor_kwargs(header["column_kwargs"]))
110
104
 
@@ -113,20 +107,18 @@ class Column:
113
107
  column_kwargs: ColumnOptions,
114
108
  ) -> DeserializedColumnOptions:
115
109
  """Deserialize the constructor kwargs for a Column."""
116
- dtype = DataType( # pragma: no cover
117
- _dtype_short_repr_to_dtype(column_kwargs["dtype"])
118
- )
119
110
  return {
120
111
  "is_sorted": column_kwargs["is_sorted"],
121
112
  "order": column_kwargs["order"],
122
113
  "null_order": column_kwargs["null_order"],
123
114
  "name": column_kwargs["name"],
124
- "dtype": dtype,
115
+ "dtype": DataType(_dtype_from_header(column_kwargs["dtype"])),
125
116
  }
126
117
 
127
118
  def serialize(
128
119
  self,
129
- ) -> tuple[ColumnHeader, tuple[memoryview, plc.gpumemoryview]]:
120
+ stream: Stream,
121
+ ) -> tuple[ColumnHeader, tuple[memoryview[bytes], plc.gpumemoryview]]:
130
122
  """
131
123
  Serialize the Column into header and frames.
132
124
 
@@ -145,7 +137,7 @@ class Column:
145
137
  frames
146
138
  Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
147
139
  """
148
- packed = plc.contiguous_split.pack(plc.Table([self.obj]))
140
+ packed = plc.contiguous_split.pack(plc.Table([self.obj]), stream=stream)
149
141
  header: ColumnHeader = {
150
142
  "column_kwargs": self.serialize_ctor_kwargs(),
151
143
  "frame_count": 2,
@@ -159,14 +151,20 @@ class Column:
159
151
  "order": self.order,
160
152
  "null_order": self.null_order,
161
153
  "name": self.name,
162
- "dtype": pl.polars.dtype_str_repr(self.dtype.polars),
154
+ "dtype": _dtype_to_header(self.dtype.polars_type),
163
155
  }
164
156
 
165
- @functools.cached_property
166
- def obj_scalar(self) -> plc.Scalar:
157
+ def obj_scalar(self, stream: Stream) -> plc.Scalar:
167
158
  """
168
159
  A copy of the column object as a pylibcudf Scalar.
169
160
 
161
+ Parameters
162
+ ----------
163
+ stream
164
+ CUDA stream used for device memory operations and kernel launches.
165
+ ``self.obj`` must be valid on this stream, and the result will be
166
+ valid on this stream.
167
+
170
168
  Returns
171
169
  -------
172
170
  pylibcudf Scalar object.
@@ -178,7 +176,7 @@ class Column:
178
176
  """
179
177
  if not self.is_scalar:
180
178
  raise ValueError(f"Cannot convert a column of length {self.size} to scalar")
181
- return plc.copying.get_element(self.obj, 0)
179
+ return plc.copying.get_element(self.obj, 0, stream=stream)
182
180
 
183
181
  def rename(self, name: str | None, /) -> Self:
184
182
  """
@@ -228,6 +226,7 @@ class Column:
228
226
  *,
229
227
  order: plc.types.Order,
230
228
  null_order: plc.types.NullOrder,
229
+ stream: Stream,
231
230
  ) -> bool:
232
231
  """
233
232
  Check if the column is sorted.
@@ -238,6 +237,9 @@ class Column:
238
237
  The requested sort order.
239
238
  null_order
240
239
  Where nulls sort to.
240
+ stream
241
+ CUDA stream used for device memory operations and kernel launches
242
+ on this Column. The data in ``self.obj`` must be valid on this stream.
241
243
 
242
244
  Returns
243
245
  -------
@@ -254,14 +256,16 @@ class Column:
254
256
  return self.order == order and (
255
257
  self.null_count == 0 or self.null_order == null_order
256
258
  )
257
- if plc.sorting.is_sorted(plc.Table([self.obj]), [order], [null_order]):
259
+ if plc.sorting.is_sorted(
260
+ plc.Table([self.obj]), [order], [null_order], stream=stream
261
+ ):
258
262
  self.sorted = plc.types.Sorted.YES
259
263
  self.order = order
260
264
  self.null_order = null_order
261
265
  return True
262
266
  return False
263
267
 
264
- def astype(self, dtype: DataType) -> Column:
268
+ def astype(self, dtype: DataType, stream: Stream, *, strict: bool = True) -> Column:
265
269
  """
266
270
  Cast the column to as the requested dtype.
267
271
 
@@ -269,6 +273,12 @@ class Column:
269
273
  ----------
270
274
  dtype
271
275
  Datatype to cast to.
276
+ stream
277
+ CUDA stream used for device memory operations and kernel launches
278
+ on this Column. The data in ``self.obj`` must be valid on this stream.
279
+ strict
280
+ If True, raise an error if the cast is unsupported.
281
+ If False, return nulls for unsupported casts.
272
282
 
273
283
  Returns
274
284
  -------
@@ -284,7 +294,7 @@ class Column:
284
294
  This only produces a copy if the requested dtype doesn't match
285
295
  the current one.
286
296
  """
287
- plc_dtype = dtype.plc
297
+ plc_dtype = dtype.plc_type
288
298
  if self.obj.type() == plc_dtype:
289
299
  return self
290
300
 
@@ -292,12 +302,17 @@ class Column:
292
302
  plc_dtype.id() == plc.TypeId.STRING
293
303
  or self.obj.type().id() == plc.TypeId.STRING
294
304
  ):
295
- return Column(self._handle_string_cast(plc_dtype), dtype=dtype)
305
+ return Column(
306
+ self._handle_string_cast(plc_dtype, stream=stream, strict=strict),
307
+ dtype=dtype,
308
+ )
296
309
  elif plc.traits.is_integral_not_bool(
297
310
  self.obj.type()
298
311
  ) and plc.traits.is_timestamp(plc_dtype):
299
- upcasted = plc.unary.cast(self.obj, plc.DataType(plc.TypeId.INT64))
300
- result = plc.column.Column(
312
+ upcasted = plc.unary.cast(
313
+ self.obj, plc.DataType(plc.TypeId.INT64), stream=stream
314
+ )
315
+ plc_col = plc.column.Column(
301
316
  plc_dtype,
302
317
  upcasted.size(),
303
318
  upcasted.data(),
@@ -306,11 +321,11 @@ class Column:
306
321
  upcasted.offset(),
307
322
  upcasted.children(),
308
323
  )
309
- return Column(result, dtype=dtype).sorted_like(self)
324
+ return Column(plc_col, dtype=dtype).sorted_like(self)
310
325
  elif plc.traits.is_integral_not_bool(plc_dtype) and plc.traits.is_timestamp(
311
326
  self.obj.type()
312
327
  ):
313
- result = plc.column.Column(
328
+ plc_col = plc.column.Column(
314
329
  plc.DataType(plc.TypeId.INT64),
315
330
  self.obj.size(),
316
331
  self.obj.data(),
@@ -319,42 +334,66 @@ class Column:
319
334
  self.obj.offset(),
320
335
  self.obj.children(),
321
336
  )
322
- return Column(plc.unary.cast(result, plc_dtype), dtype=dtype).sorted_like(
323
- self
324
- )
337
+ return Column(
338
+ plc.unary.cast(plc_col, plc_dtype, stream=stream), dtype=dtype
339
+ ).sorted_like(self)
325
340
  else:
326
- result = Column(plc.unary.cast(self.obj, plc_dtype), dtype=dtype)
341
+ result = Column(
342
+ plc.unary.cast(self.obj, plc_dtype, stream=stream), dtype=dtype
343
+ )
327
344
  if is_order_preserving_cast(self.obj.type(), plc_dtype):
328
345
  return result.sorted_like(self)
329
346
  return result
330
347
 
331
- def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
348
+ def _handle_string_cast(
349
+ self, dtype: plc.DataType, stream: Stream, *, strict: bool
350
+ ) -> plc.Column:
332
351
  if dtype.id() == plc.TypeId.STRING:
333
- if is_floating_point(self.obj.type()):
334
- return from_floats(self.obj)
352
+ if plc.traits.is_floating_point(self.obj.type()):
353
+ return from_floats(self.obj, stream=stream)
354
+ elif plc.traits.is_integral_not_bool(self.obj.type()):
355
+ return from_integers(self.obj, stream=stream)
335
356
  else:
336
- return from_integers(self.obj)
357
+ raise InvalidOperationError(
358
+ f"Unsupported casting from {self.dtype.id()} to {dtype.id()}."
359
+ )
360
+
361
+ type_checker: Callable[[plc.Column, Stream], plc.Column]
362
+ type_caster: Callable[[plc.Column, plc.DataType, Stream], plc.Column]
363
+ if plc.traits.is_floating_point(dtype):
364
+ type_checker = is_float
365
+ type_caster = to_floats
366
+ elif plc.traits.is_integral_not_bool(dtype):
367
+ # is_integer has a second optional int_type: plc.DataType | None = None argument
368
+ # we do not use
369
+ # unused-ignore for if RMM is missing
370
+ type_checker = is_integer # type: ignore[assignment,unused-ignore]
371
+ type_caster = to_integers
337
372
  else:
338
- if is_floating_point(dtype):
339
- floats = is_float(self.obj)
340
- if not plc.reduce.reduce(
341
- floats,
342
- plc.aggregation.all(),
343
- plc.DataType(plc.TypeId.BOOL8),
344
- ).to_py():
345
- raise InvalidOperationError("Conversion from `str` failed.")
346
- return to_floats(self.obj, dtype)
373
+ raise InvalidOperationError(
374
+ f"Unsupported casting from {self.dtype.id()} to {dtype.id()}."
375
+ )
376
+
377
+ castable = type_checker(self.obj, stream=stream) # type: ignore[call-arg]
378
+ if not plc.reduce.reduce(
379
+ castable,
380
+ plc.aggregation.all(),
381
+ plc.DataType(plc.TypeId.BOOL8),
382
+ stream=stream,
383
+ ).to_py(stream=stream):
384
+ if strict:
385
+ raise InvalidOperationError(
386
+ f"Conversion from {self.dtype.id()} to {dtype.id()} failed."
387
+ )
347
388
  else:
348
- integers = is_integer(self.obj)
349
- if not plc.reduce.reduce(
350
- integers,
351
- plc.aggregation.all(),
352
- plc.DataType(plc.TypeId.BOOL8),
353
- ).to_py():
354
- raise InvalidOperationError("Conversion from `str` failed.")
355
- return to_integers(self.obj, dtype)
356
-
357
- def copy_metadata(self, from_: pl.Series, /) -> Self:
389
+ values = self.obj.with_mask(
390
+ *plc.transform.bools_to_mask(castable, stream=stream)
391
+ )
392
+ else:
393
+ values = self.obj
394
+ return type_caster(values, dtype, stream=stream)
395
+
396
+ def copy_metadata(self, from_: pl_Series, /) -> Self:
358
397
  """
359
398
  Copy metadata from a host series onto self.
360
399
 
@@ -439,27 +478,44 @@ class Column:
439
478
  dtype=self.dtype,
440
479
  )
441
480
 
442
- def mask_nans(self) -> Self:
481
+ def mask_nans(self, stream: Stream) -> Self:
443
482
  """Return a shallow copy of self with nans masked out."""
444
483
  if plc.traits.is_floating_point(self.obj.type()):
445
484
  old_count = self.null_count
446
- mask, new_count = plc.transform.nans_to_nulls(self.obj)
485
+ mask, new_count = plc.transform.nans_to_nulls(self.obj, stream=stream)
447
486
  result = type(self)(self.obj.with_mask(mask, new_count), self.dtype)
448
487
  if old_count == new_count:
449
488
  return result.sorted_like(self)
450
489
  return result
451
490
  return self.copy()
452
491
 
453
- @functools.cached_property
454
- def nan_count(self) -> int:
455
- """Return the number of NaN values in the column."""
492
+ def nan_count(self, stream: Stream) -> int:
493
+ """
494
+ Return the number of NaN values in the column.
495
+
496
+ Parameters
497
+ ----------
498
+ stream
499
+ CUDA stream used for device memory operations and kernel launches.
500
+ ``self.obj`` must be valid on this stream, and the result will be
501
+ valid on this stream.
502
+
503
+ Returns
504
+ -------
505
+ Number of NaN values in the column.
506
+ """
507
+ result: int
456
508
  if self.size > 0 and plc.traits.is_floating_point(self.obj.type()):
457
- return plc.reduce.reduce(
458
- plc.unary.is_nan(self.obj),
509
+ # See https://github.com/rapidsai/cudf/issues/20202 for we type ignore
510
+ result = plc.reduce.reduce( # type: ignore[assignment]
511
+ plc.unary.is_nan(self.obj, stream=stream),
459
512
  plc.aggregation.sum(),
460
513
  plc.types.SIZE_TYPE,
461
- ).to_py()
462
- return 0
514
+ stream=stream,
515
+ ).to_py(stream=stream)
516
+ else:
517
+ result = 0
518
+ return result
463
519
 
464
520
  @property
465
521
  def size(self) -> int:
@@ -471,7 +527,7 @@ class Column:
471
527
  """Return the number of Null values in the column."""
472
528
  return self.obj.null_count()
473
529
 
474
- def slice(self, zlice: Slice | None) -> Self:
530
+ def slice(self, zlice: Slice | None, stream: Stream) -> Self:
475
531
  """
476
532
  Slice a column.
477
533
 
@@ -480,6 +536,9 @@ class Column:
480
536
  zlice
481
537
  optional, tuple of start and length, negative values of start
482
538
  treated as for python indexing. If not provided, returns self.
539
+ stream
540
+ CUDA stream used for device memory operations and kernel launches
541
+ on this Column. The data in ``self.obj`` must be valid on this stream.
483
542
 
484
543
  Returns
485
544
  -------
@@ -490,6 +549,7 @@ class Column:
490
549
  (table,) = plc.copying.slice(
491
550
  plc.Table([self.obj]),
492
551
  conversion.from_polars_slice(zlice, num_rows=self.size),
552
+ stream=stream,
493
553
  )
494
554
  (column,) = table.columns()
495
555
  return type(self)(column, name=self.name, dtype=self.dtype).sorted_like(self)