cudf-polars-cu12 25.4.0__py3-none-any.whl → 25.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. cudf_polars/VERSION +1 -1
  2. cudf_polars/callback.py +55 -61
  3. cudf_polars/containers/__init__.py +4 -2
  4. cudf_polars/containers/column.py +123 -40
  5. cudf_polars/containers/dataframe.py +70 -35
  6. cudf_polars/containers/datatype.py +135 -0
  7. cudf_polars/dsl/expr.py +2 -0
  8. cudf_polars/dsl/expressions/aggregation.py +51 -71
  9. cudf_polars/dsl/expressions/base.py +45 -77
  10. cudf_polars/dsl/expressions/binaryop.py +29 -44
  11. cudf_polars/dsl/expressions/boolean.py +64 -71
  12. cudf_polars/dsl/expressions/datetime.py +70 -34
  13. cudf_polars/dsl/expressions/literal.py +45 -33
  14. cudf_polars/dsl/expressions/rolling.py +133 -10
  15. cudf_polars/dsl/expressions/selection.py +13 -31
  16. cudf_polars/dsl/expressions/slicing.py +6 -13
  17. cudf_polars/dsl/expressions/sorting.py +9 -21
  18. cudf_polars/dsl/expressions/string.py +470 -84
  19. cudf_polars/dsl/expressions/struct.py +138 -0
  20. cudf_polars/dsl/expressions/ternary.py +9 -13
  21. cudf_polars/dsl/expressions/unary.py +151 -90
  22. cudf_polars/dsl/ir.py +798 -331
  23. cudf_polars/dsl/nodebase.py +11 -4
  24. cudf_polars/dsl/to_ast.py +61 -20
  25. cudf_polars/dsl/tracing.py +16 -0
  26. cudf_polars/dsl/translate.py +279 -167
  27. cudf_polars/dsl/traversal.py +64 -15
  28. cudf_polars/dsl/utils/__init__.py +8 -0
  29. cudf_polars/dsl/utils/aggregations.py +301 -0
  30. cudf_polars/dsl/utils/groupby.py +93 -0
  31. cudf_polars/dsl/utils/naming.py +34 -0
  32. cudf_polars/dsl/utils/replace.py +61 -0
  33. cudf_polars/dsl/utils/reshape.py +74 -0
  34. cudf_polars/dsl/utils/rolling.py +115 -0
  35. cudf_polars/dsl/utils/windows.py +186 -0
  36. cudf_polars/experimental/base.py +112 -8
  37. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  38. cudf_polars/experimental/benchmarks/pdsds.py +216 -0
  39. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  40. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  41. cudf_polars/experimental/benchmarks/pdsh.py +812 -0
  42. cudf_polars/experimental/benchmarks/utils.py +725 -0
  43. cudf_polars/experimental/dask_registers.py +200 -0
  44. cudf_polars/experimental/dispatch.py +22 -7
  45. cudf_polars/experimental/distinct.py +194 -0
  46. cudf_polars/experimental/explain.py +127 -0
  47. cudf_polars/experimental/expressions.py +547 -0
  48. cudf_polars/experimental/groupby.py +174 -196
  49. cudf_polars/experimental/io.py +626 -51
  50. cudf_polars/experimental/join.py +104 -33
  51. cudf_polars/experimental/parallel.py +219 -133
  52. cudf_polars/experimental/repartition.py +69 -0
  53. cudf_polars/experimental/scheduler.py +155 -0
  54. cudf_polars/experimental/select.py +132 -7
  55. cudf_polars/experimental/shuffle.py +126 -18
  56. cudf_polars/experimental/sort.py +45 -0
  57. cudf_polars/experimental/spilling.py +151 -0
  58. cudf_polars/experimental/utils.py +112 -0
  59. cudf_polars/testing/asserts.py +213 -14
  60. cudf_polars/testing/io.py +72 -0
  61. cudf_polars/testing/plugin.py +77 -67
  62. cudf_polars/typing/__init__.py +63 -22
  63. cudf_polars/utils/config.py +584 -117
  64. cudf_polars/utils/dtypes.py +4 -117
  65. cudf_polars/utils/timer.py +1 -1
  66. cudf_polars/utils/versions.py +7 -5
  67. {cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.8.0.dist-info}/METADATA +13 -18
  68. cudf_polars_cu12-25.8.0.dist-info/RECORD +81 -0
  69. {cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.8.0.dist-info}/WHEEL +1 -1
  70. cudf_polars/experimental/dask_serialize.py +0 -73
  71. cudf_polars_cu12-25.4.0.dist-info/RECORD +0 -55
  72. {cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.8.0.dist-info}/licenses/LICENSE +0 -0
  73. {cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.8.0.dist-info}/top_level.txt +0 -0
cudf_polars/VERSION CHANGED
@@ -1 +1 @@
1
- 25.04.00
1
+ 25.08.00
cudf_polars/callback.py CHANGED
@@ -7,12 +7,14 @@ from __future__ import annotations
7
7
 
8
8
  import contextlib
9
9
  import os
10
+ import textwrap
10
11
  import time
11
12
  import warnings
12
13
  from functools import cache, partial
13
14
  from typing import TYPE_CHECKING, Literal, overload
14
15
 
15
16
  import nvtx
17
+ from typing_extensions import assert_never
16
18
 
17
19
  from polars.exceptions import ComputeError, PerformanceWarning
18
20
 
@@ -20,9 +22,10 @@ import pylibcudf
20
22
  import rmm
21
23
  from rmm._cuda import gpu
22
24
 
25
+ from cudf_polars.dsl.tracing import CUDF_POLARS_NVTX_DOMAIN
23
26
  from cudf_polars.dsl.translate import Translator
27
+ from cudf_polars.utils.config import _env_get_int, get_total_device_memory
24
28
  from cudf_polars.utils.timer import Timer
25
- from cudf_polars.utils.versions import POLARS_VERSION_LT_125
26
29
 
27
30
  if TYPE_CHECKING:
28
31
  from collections.abc import Generator
@@ -32,6 +35,7 @@ if TYPE_CHECKING:
32
35
 
33
36
  from cudf_polars.dsl.ir import IR
34
37
  from cudf_polars.typing import NodeTraverser
38
+ from cudf_polars.utils.config import ConfigOptions
35
39
 
36
40
  __all__: list[str] = ["execute_with_cudf"]
37
41
 
@@ -44,13 +48,6 @@ _SUPPORTED_PREFETCHES = {
44
48
  }
45
49
 
46
50
 
47
- def _env_get_int(name, default):
48
- try:
49
- return int(os.getenv(name, default))
50
- except (ValueError, TypeError): # pragma: no cover
51
- return default # pragma: no cover
52
-
53
-
54
51
  @cache
55
52
  def default_memory_resource(
56
53
  device: int,
@@ -101,8 +98,7 @@ def default_memory_resource(
101
98
  ):
102
99
  raise ComputeError(
103
100
  "GPU engine requested, but incorrect cudf-polars package installed. "
104
- "If your system has a CUDA 11 driver, please uninstall `cudf-polars-cu12` "
105
- "and install `cudf-polars-cu11`"
101
+ "cudf-polars requires CUDA 12.0+ to installed."
106
102
  ) from None
107
103
  else:
108
104
  raise
@@ -139,7 +135,11 @@ def set_memory_resource(
139
135
  mr = default_memory_resource(
140
136
  device=device,
141
137
  cuda_managed_memory=bool(
142
- _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) != 0
138
+ _env_get_int(
139
+ "POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY",
140
+ default=1 if get_total_device_memory() is not None else 0,
141
+ )
142
+ != 0
143
143
  ),
144
144
  )
145
145
  rmm.mr.set_current_device_resource(mr)
@@ -184,9 +184,8 @@ def _callback(
184
184
  n_rows: int | None,
185
185
  should_time: Literal[False],
186
186
  *,
187
- device: int | None,
188
- memory_resource: int | None,
189
- executor: Literal["pylibcudf", "dask-experimental"] | None,
187
+ memory_resource: rmm.mr.DeviceMemoryResource | None,
188
+ config_options: ConfigOptions,
190
189
  timer: Timer | None,
191
190
  ) -> pl.DataFrame: ...
192
191
 
@@ -199,9 +198,8 @@ def _callback(
199
198
  n_rows: int | None,
200
199
  should_time: Literal[True],
201
200
  *,
202
- device: int | None,
203
- memory_resource: int | None,
204
- executor: Literal["pylibcudf", "dask-experimental"] | None,
201
+ memory_resource: rmm.mr.DeviceMemoryResource | None,
202
+ config_options: ConfigOptions,
205
203
  timer: Timer | None,
206
204
  ) -> tuple[pl.DataFrame, list[tuple[int, int, str]]]: ...
207
205
 
@@ -213,34 +211,42 @@ def _callback(
213
211
  n_rows: int | None,
214
212
  should_time: bool, # noqa: FBT001
215
213
  *,
216
- device: int | None,
217
- memory_resource: int | None,
218
- executor: Literal["pylibcudf", "dask-experimental"] | None,
214
+ memory_resource: rmm.mr.DeviceMemoryResource | None,
215
+ config_options: ConfigOptions,
219
216
  timer: Timer | None,
220
- ):
217
+ ) -> pl.DataFrame | tuple[pl.DataFrame, list[tuple[int, int, str]]]:
221
218
  assert with_columns is None
222
219
  assert pyarrow_predicate is None
223
220
  assert n_rows is None
224
221
  if timer is not None:
225
222
  assert should_time
226
223
  with (
227
- nvtx.annotate(message="ExecuteIR", domain="cudf_polars"),
224
+ nvtx.annotate(message="ExecuteIR", domain=CUDF_POLARS_NVTX_DOMAIN),
228
225
  # Device must be set before memory resource is obtained.
229
- set_device(device),
226
+ set_device(config_options.device),
230
227
  set_memory_resource(memory_resource),
231
228
  ):
232
- if executor is None or executor == "pylibcudf":
229
+ if config_options.executor.name == "in-memory":
233
230
  df = ir.evaluate(cache={}, timer=timer).to_polars()
234
231
  if timer is None:
235
232
  return df
236
233
  else:
237
234
  return df, timer.timings
238
- elif executor == "dask-experimental":
239
- from cudf_polars.experimental.parallel import evaluate_dask
235
+ elif config_options.executor.name == "streaming":
236
+ from cudf_polars.experimental.parallel import evaluate_streaming
240
237
 
241
- return evaluate_dask(ir).to_polars()
242
- else:
243
- raise ValueError(f"Unknown executor '{executor}'")
238
+ if timer is not None:
239
+ msg = textwrap.dedent("""\
240
+ LazyFrame.profile() is not supported with the streaming executor.
241
+ To profile execution with the streaming executor, use:
242
+
243
+ - NVIDIA NSight Systems with the 'streaming' scheduler.
244
+ - Dask's built-in profiling tools with the 'distributed' scheduler.
245
+ """)
246
+ raise NotImplementedError(msg)
247
+
248
+ return evaluate_streaming(ir, config_options).to_polars()
249
+ assert_never(f"Unknown executor '{config_options.executor}'")
244
250
 
245
251
 
246
252
  def execute_with_cudf(
@@ -259,7 +265,7 @@ def execute_with_cudf(
259
265
  profiling should occur).
260
266
 
261
267
  config
262
- GPUEngine configuration object
268
+ GPUEngine object. Configuration is available as ``engine.config``.
263
269
 
264
270
  Raises
265
271
  ------
@@ -277,16 +283,22 @@ def execute_with_cudf(
277
283
  else:
278
284
  start = time.monotonic_ns()
279
285
  timer = Timer(start - duration_since_start)
280
- device = config.device
286
+
281
287
  memory_resource = config.memory_resource
282
- raise_on_fail = config.config.get("raise_on_fail", False)
283
- executor = config.config.get("executor", None)
284
- with nvtx.annotate(message="ConvertIR", domain="cudf_polars"):
288
+
289
+ with nvtx.annotate(message="ConvertIR", domain=CUDF_POLARS_NVTX_DOMAIN):
285
290
  translator = Translator(nt, config)
286
291
  ir = translator.translate_ir()
287
292
  ir_translation_errors = translator.errors
288
293
  if timer is not None:
289
294
  timer.store(start, time.monotonic_ns(), "gpu-ir-translation")
295
+
296
+ if (
297
+ memory_resource is None
298
+ and translator.config_options.executor.name == "streaming"
299
+ and translator.config_options.executor.scheduler == "distributed"
300
+ ): # pragma: no cover; Requires distributed cluster
301
+ memory_resource = rmm.mr.get_current_device_resource()
290
302
  if len(ir_translation_errors):
291
303
  # TODO: Display these errors in user-friendly way.
292
304
  # tracked in https://github.com/rapidsai/cudf/issues/17051
@@ -301,33 +313,15 @@ def execute_with_cudf(
301
313
  exception = NotImplementedError(error_message, unique_errors)
302
314
  if bool(int(os.environ.get("POLARS_VERBOSE", 0))):
303
315
  warnings.warn(error_message, PerformanceWarning, stacklevel=2)
304
- if raise_on_fail:
316
+ if translator.config_options.raise_on_fail:
305
317
  raise exception
306
318
  else:
307
- if POLARS_VERSION_LT_125: # pragma: no cover
308
- nt.set_udf(
309
- partial(
310
- _callback,
311
- ir,
312
- should_time=False,
313
- device=device,
314
- memory_resource=memory_resource,
315
- executor=executor,
316
- timer=None,
317
- )
318
- )
319
- else:
320
- nt.set_udf(
321
- partial(
322
- _callback,
323
- ir,
324
- device=device,
325
- memory_resource=memory_resource,
326
- executor=executor,
327
- timer=timer,
328
- )
319
+ nt.set_udf(
320
+ partial(
321
+ _callback,
322
+ ir,
323
+ memory_resource=memory_resource,
324
+ config_options=translator.config_options,
325
+ timer=timer,
329
326
  )
330
-
331
-
332
- if POLARS_VERSION_LT_125: # pragma: no cover
333
- execute_with_cudf = partial(execute_with_cudf, duration_since_start=None)
327
+ )
@@ -1,11 +1,13 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  """Containers of concrete data."""
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- __all__: list[str] = ["Column", "DataFrame"]
8
+ __all__: list[str] = ["Column", "DataFrame", "DataType"]
9
9
 
10
+ # dataframe.py & column.py imports DataType, so import in this order to avoid circular import
11
+ from cudf_polars.containers.datatype import DataType # noqa: I001
10
12
  from cudf_polars.containers.column import Column
11
13
  from cudf_polars.containers.dataframe import DataFrame
@@ -8,6 +8,8 @@ from __future__ import annotations
8
8
  import functools
9
9
  from typing import TYPE_CHECKING
10
10
 
11
+ import polars as pl
12
+ import polars.datatypes.convert
11
13
  from polars.exceptions import InvalidOperationError
12
14
 
13
15
  import pylibcudf as plc
@@ -19,19 +21,39 @@ from pylibcudf.strings.convert.convert_integers import (
19
21
  )
20
22
  from pylibcudf.traits import is_floating_point
21
23
 
24
+ from cudf_polars.containers import DataType
22
25
  from cudf_polars.utils import conversion
23
26
  from cudf_polars.utils.dtypes import is_order_preserving_cast
24
27
 
25
28
  if TYPE_CHECKING:
26
29
  from typing_extensions import Self
27
30
 
28
- import polars as pl
29
-
30
- from cudf_polars.typing import ColumnHeader, ColumnOptions, Slice
31
+ from cudf_polars.typing import (
32
+ ColumnHeader,
33
+ ColumnOptions,
34
+ DeserializedColumnOptions,
35
+ Slice,
36
+ )
31
37
 
32
38
  __all__: list[str] = ["Column"]
33
39
 
34
40
 
41
+ def _dtype_short_repr_to_dtype(dtype_str: str) -> pl.DataType:
42
+ """Convert a Polars dtype short repr to a Polars dtype."""
43
+ # limitations of dtype_short_repr_to_dtype described in
44
+ # py-polars/polars/datatypes/convert.py#L299
45
+ if dtype_str.startswith("list["):
46
+ stripped = dtype_str.removeprefix("list[").removesuffix("]")
47
+ return pl.List(_dtype_short_repr_to_dtype(stripped))
48
+ pl_type = polars.datatypes.convert.dtype_short_repr_to_dtype(dtype_str)
49
+ if pl_type is None:
50
+ raise ValueError(f"{dtype_str} was not able to be parsed by Polars.")
51
+ if isinstance(pl_type, polars.datatypes.DataTypeClass):
52
+ return pl_type()
53
+ else:
54
+ return pl_type
55
+
56
+
35
57
  class Column:
36
58
  """An immutable column with sortedness metadata."""
37
59
 
@@ -43,10 +65,12 @@ class Column:
43
65
  # Optional name, only ever set by evaluation of NamedExpr nodes
44
66
  # The internal evaluation should not care about the name.
45
67
  name: str | None
68
+ dtype: DataType
46
69
 
47
70
  def __init__(
48
71
  self,
49
72
  column: plc.Column,
73
+ dtype: DataType,
50
74
  *,
51
75
  is_sorted: plc.types.Sorted = plc.types.Sorted.NO,
52
76
  order: plc.types.Order = plc.types.Order.ASCENDING,
@@ -56,6 +80,7 @@ class Column:
56
80
  self.obj = column
57
81
  self.is_scalar = self.size == 1
58
82
  self.name = name
83
+ self.dtype = dtype
59
84
  self.set_sorted(is_sorted=is_sorted, order=order, null_order=null_order)
60
85
 
61
86
  @classmethod
@@ -81,7 +106,23 @@ class Column:
81
106
  (plc_column,) = plc.contiguous_split.unpack_from_memoryviews(
82
107
  packed_metadata, packed_gpu_data
83
108
  ).columns()
84
- return cls(plc_column, **header["column_kwargs"])
109
+ return cls(plc_column, **cls.deserialize_ctor_kwargs(header["column_kwargs"]))
110
+
111
+ @staticmethod
112
+ def deserialize_ctor_kwargs(
113
+ column_kwargs: ColumnOptions,
114
+ ) -> DeserializedColumnOptions:
115
+ """Deserialize the constructor kwargs for a Column."""
116
+ dtype = DataType( # pragma: no cover
117
+ _dtype_short_repr_to_dtype(column_kwargs["dtype"])
118
+ )
119
+ return {
120
+ "is_sorted": column_kwargs["is_sorted"],
121
+ "order": column_kwargs["order"],
122
+ "null_order": column_kwargs["null_order"],
123
+ "name": column_kwargs["name"],
124
+ "dtype": dtype,
125
+ }
85
126
 
86
127
  def serialize(
87
128
  self,
@@ -105,17 +146,21 @@ class Column:
105
146
  Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
106
147
  """
107
148
  packed = plc.contiguous_split.pack(plc.Table([self.obj]))
108
- column_kwargs: ColumnOptions = {
149
+ header: ColumnHeader = {
150
+ "column_kwargs": self.serialize_ctor_kwargs(),
151
+ "frame_count": 2,
152
+ }
153
+ return header, packed.release()
154
+
155
+ def serialize_ctor_kwargs(self) -> ColumnOptions:
156
+ """Serialize the constructor kwargs for self."""
157
+ return {
109
158
  "is_sorted": self.is_sorted,
110
159
  "order": self.order,
111
160
  "null_order": self.null_order,
112
161
  "name": self.name,
162
+ "dtype": pl.polars.dtype_str_repr(self.dtype.polars),
113
163
  }
114
- header: ColumnHeader = {
115
- "column_kwargs": column_kwargs,
116
- "frame_count": 2,
117
- }
118
- return header, packed.release()
119
164
 
120
165
  @functools.cached_property
121
166
  def obj_scalar(self) -> plc.Scalar:
@@ -172,12 +217,51 @@ class Column:
172
217
  return type(self)(
173
218
  self.obj,
174
219
  name=self.name,
220
+ dtype=self.dtype,
175
221
  is_sorted=like.is_sorted,
176
222
  order=like.order,
177
223
  null_order=like.null_order,
178
224
  )
179
225
 
180
- def astype(self, dtype: plc.DataType) -> Column:
226
+ def check_sorted(
227
+ self,
228
+ *,
229
+ order: plc.types.Order,
230
+ null_order: plc.types.NullOrder,
231
+ ) -> bool:
232
+ """
233
+ Check if the column is sorted.
234
+
235
+ Parameters
236
+ ----------
237
+ order
238
+ The requested sort order.
239
+ null_order
240
+ Where nulls sort to.
241
+
242
+ Returns
243
+ -------
244
+ True if the column is sorted, false otherwise.
245
+
246
+ Notes
247
+ -----
248
+ If the sortedness flag is not set, this launches a kernel to
249
+ check sortedness.
250
+ """
251
+ if self.size <= 1 or self.size == self.null_count:
252
+ return True
253
+ if self.is_sorted == plc.types.Sorted.YES:
254
+ return self.order == order and (
255
+ self.null_count == 0 or self.null_order == null_order
256
+ )
257
+ if plc.sorting.is_sorted(plc.Table([self.obj]), [order], [null_order]):
258
+ self.sorted = plc.types.Sorted.YES
259
+ self.order = order
260
+ self.null_order = null_order
261
+ return True
262
+ return False
263
+
264
+ def astype(self, dtype: DataType) -> Column:
181
265
  """
182
266
  Cast the column to as the requested dtype.
183
267
 
@@ -200,14 +284,18 @@ class Column:
200
284
  This only produces a copy if the requested dtype doesn't match
201
285
  the current one.
202
286
  """
203
- if self.obj.type() == dtype:
287
+ plc_dtype = dtype.plc
288
+ if self.obj.type() == plc_dtype:
204
289
  return self
205
290
 
206
- if dtype.id() == plc.TypeId.STRING or self.obj.type().id() == plc.TypeId.STRING:
207
- return Column(self._handle_string_cast(dtype))
291
+ if (
292
+ plc_dtype.id() == plc.TypeId.STRING
293
+ or self.obj.type().id() == plc.TypeId.STRING
294
+ ):
295
+ return Column(self._handle_string_cast(plc_dtype), dtype=dtype)
208
296
  else:
209
- result = Column(plc.unary.cast(self.obj, dtype))
210
- if is_order_preserving_cast(self.obj.type(), dtype):
297
+ result = Column(plc.unary.cast(self.obj, plc_dtype), dtype=dtype)
298
+ if is_order_preserving_cast(self.obj.type(), plc_dtype):
211
299
  return result.sorted_like(self)
212
300
  return result
213
301
 
@@ -220,24 +308,20 @@ class Column:
220
308
  else:
221
309
  if is_floating_point(dtype):
222
310
  floats = is_float(self.obj)
223
- if not plc.interop.to_arrow(
224
- plc.reduce.reduce(
225
- floats,
226
- plc.aggregation.all(),
227
- plc.DataType(plc.TypeId.BOOL8),
228
- )
229
- ).as_py():
311
+ if not plc.reduce.reduce(
312
+ floats,
313
+ plc.aggregation.all(),
314
+ plc.DataType(plc.TypeId.BOOL8),
315
+ ).to_py():
230
316
  raise InvalidOperationError("Conversion from `str` failed.")
231
317
  return to_floats(self.obj, dtype)
232
318
  else:
233
319
  integers = is_integer(self.obj)
234
- if not plc.interop.to_arrow(
235
- plc.reduce.reduce(
236
- integers,
237
- plc.aggregation.all(),
238
- plc.DataType(plc.TypeId.BOOL8),
239
- )
240
- ).as_py():
320
+ if not plc.reduce.reduce(
321
+ integers,
322
+ plc.aggregation.all(),
323
+ plc.DataType(plc.TypeId.BOOL8),
324
+ ).to_py():
241
325
  raise InvalidOperationError("Conversion from `str` failed.")
242
326
  return to_integers(self.obj, dtype)
243
327
 
@@ -323,6 +407,7 @@ class Column:
323
407
  order=self.order,
324
408
  null_order=self.null_order,
325
409
  name=self.name,
410
+ dtype=self.dtype,
326
411
  )
327
412
 
328
413
  def mask_nans(self) -> Self:
@@ -330,7 +415,7 @@ class Column:
330
415
  if plc.traits.is_floating_point(self.obj.type()):
331
416
  old_count = self.null_count
332
417
  mask, new_count = plc.transform.nans_to_nulls(self.obj)
333
- result = type(self)(self.obj.with_mask(mask, new_count))
418
+ result = type(self)(self.obj.with_mask(mask, new_count), self.dtype)
334
419
  if old_count == new_count:
335
420
  return result.sorted_like(self)
336
421
  return result
@@ -339,14 +424,12 @@ class Column:
339
424
  @functools.cached_property
340
425
  def nan_count(self) -> int:
341
426
  """Return the number of NaN values in the column."""
342
- if plc.traits.is_floating_point(self.obj.type()):
343
- return plc.interop.to_arrow(
344
- plc.reduce.reduce(
345
- plc.unary.is_nan(self.obj),
346
- plc.aggregation.sum(),
347
- plc.types.SIZE_TYPE,
348
- )
349
- ).as_py()
427
+ if self.size > 0 and plc.traits.is_floating_point(self.obj.type()):
428
+ return plc.reduce.reduce(
429
+ plc.unary.is_nan(self.obj),
430
+ plc.aggregation.sum(),
431
+ plc.types.SIZE_TYPE,
432
+ ).to_py()
350
433
  return 0
351
434
 
352
435
  @property
@@ -380,4 +463,4 @@ class Column:
380
463
  conversion.from_polars_slice(zlice, num_rows=self.size),
381
464
  )
382
465
  (column,) = table.columns()
383
- return type(self)(column, name=self.name).sorted_like(self)
466
+ return type(self)(column, name=self.name, dtype=self.dtype).sorted_like(self)