cudf-polars-cu12 24.8.0a281__py3-none-any.whl → 25.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cudf_polars/VERSION +1 -1
  2. cudf_polars/__init__.py +9 -3
  3. cudf_polars/callback.py +258 -23
  4. cudf_polars/containers/__init__.py +2 -2
  5. cudf_polars/containers/column.py +167 -66
  6. cudf_polars/containers/dataframe.py +157 -58
  7. cudf_polars/dsl/expr.py +37 -1397
  8. cudf_polars/dsl/expressions/__init__.py +8 -0
  9. cudf_polars/dsl/expressions/aggregation.py +246 -0
  10. cudf_polars/dsl/expressions/base.py +300 -0
  11. cudf_polars/dsl/expressions/binaryop.py +135 -0
  12. cudf_polars/dsl/expressions/boolean.py +312 -0
  13. cudf_polars/dsl/expressions/datetime.py +196 -0
  14. cudf_polars/dsl/expressions/literal.py +91 -0
  15. cudf_polars/dsl/expressions/rolling.py +40 -0
  16. cudf_polars/dsl/expressions/selection.py +92 -0
  17. cudf_polars/dsl/expressions/sorting.py +97 -0
  18. cudf_polars/dsl/expressions/string.py +362 -0
  19. cudf_polars/dsl/expressions/ternary.py +53 -0
  20. cudf_polars/dsl/expressions/unary.py +339 -0
  21. cudf_polars/dsl/ir.py +1202 -427
  22. cudf_polars/dsl/nodebase.py +150 -0
  23. cudf_polars/dsl/to_ast.py +318 -0
  24. cudf_polars/dsl/translate.py +398 -181
  25. cudf_polars/dsl/traversal.py +175 -0
  26. cudf_polars/experimental/__init__.py +8 -0
  27. cudf_polars/experimental/base.py +43 -0
  28. cudf_polars/experimental/dask_serialize.py +59 -0
  29. cudf_polars/experimental/dispatch.py +84 -0
  30. cudf_polars/experimental/io.py +325 -0
  31. cudf_polars/experimental/parallel.py +253 -0
  32. cudf_polars/experimental/select.py +36 -0
  33. cudf_polars/testing/asserts.py +139 -19
  34. cudf_polars/testing/plugin.py +242 -0
  35. cudf_polars/typing/__init__.py +51 -10
  36. cudf_polars/utils/dtypes.py +88 -39
  37. cudf_polars/utils/sorting.py +2 -2
  38. cudf_polars/utils/versions.py +22 -0
  39. {cudf_polars_cu12-24.8.0a281.dist-info → cudf_polars_cu12-25.2.0.dist-info}/METADATA +15 -12
  40. cudf_polars_cu12-25.2.0.dist-info/RECORD +48 -0
  41. {cudf_polars_cu12-24.8.0a281.dist-info → cudf_polars_cu12-25.2.0.dist-info}/WHEEL +1 -1
  42. cudf_polars_cu12-24.8.0a281.dist-info/RECORD +0 -23
  43. {cudf_polars_cu12-24.8.0a281.dist-info → cudf_polars_cu12-25.2.0.dist-info}/LICENSE +0 -0
  44. {cudf_polars_cu12-24.8.0a281.dist-info → cudf_polars_cu12-25.2.0.dist-info}/top_level.txt +0 -0
cudf_polars/VERSION CHANGED
@@ -1 +1 @@
1
- 24.08.00a281
1
+ 25.02.00
cudf_polars/__init__.py CHANGED
@@ -12,11 +12,17 @@ from __future__ import annotations
12
12
 
13
13
  from cudf_polars._version import __git_commit__, __version__
14
14
  from cudf_polars.callback import execute_with_cudf
15
- from cudf_polars.dsl.translate import translate_ir
15
+ from cudf_polars.dsl.translate import Translator
16
+
17
+ # Check we have a supported polars version
18
+ from cudf_polars.utils.versions import _ensure_polars_version
19
+
20
+ _ensure_polars_version()
21
+ del _ensure_polars_version
16
22
 
17
23
  __all__: list[str] = [
18
- "execute_with_cudf",
19
- "translate_ir",
24
+ "Translator",
20
25
  "__git_commit__",
21
26
  "__version__",
27
+ "execute_with_cudf",
22
28
  ]
cudf_polars/callback.py CHANGED
@@ -5,15 +5,27 @@
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- from functools import partial
9
- from typing import TYPE_CHECKING
8
+ import contextlib
9
+ import os
10
+ import warnings
11
+ from functools import cache, partial
12
+ from typing import TYPE_CHECKING, Literal
10
13
 
11
14
  import nvtx
12
15
 
13
- from cudf_polars.dsl.translate import translate_ir
16
+ from polars.exceptions import ComputeError, PerformanceWarning
17
+
18
+ import pylibcudf
19
+ import rmm
20
+ from rmm._cuda import gpu
21
+
22
+ from cudf_polars.dsl.translate import Translator
14
23
 
15
24
  if TYPE_CHECKING:
25
+ from collections.abc import Generator
26
+
16
27
  import polars as pl
28
+ from polars import GPUEngine
17
29
 
18
30
  from cudf_polars.dsl.ir import IR
19
31
  from cudf_polars.typing import NodeTraverser
@@ -21,25 +33,214 @@ if TYPE_CHECKING:
21
33
  __all__: list[str] = ["execute_with_cudf"]
22
34
 
23
35
 
36
+ _SUPPORTED_PREFETCHES = {
37
+ "column_view::get_data",
38
+ "mutable_column_view::get_data",
39
+ "gather",
40
+ "hash_join",
41
+ }
42
+
43
+
44
+ def _env_get_int(name, default):
45
+ try:
46
+ return int(os.getenv(name, default))
47
+ except (ValueError, TypeError): # pragma: no cover
48
+ return default # pragma: no cover
49
+
50
+
51
+ @cache
52
+ def default_memory_resource(
53
+ device: int,
54
+ cuda_managed_memory: bool, # noqa: FBT001
55
+ ) -> rmm.mr.DeviceMemoryResource:
56
+ """
57
+ Return the default memory resource for cudf-polars.
58
+
59
+ Parameters
60
+ ----------
61
+ device
62
+ Disambiguating device id when selecting the device. Must be
63
+ the active device when this function is called.
64
+ cuda_managed_memory
65
+ Whether to use managed memory or not.
66
+
67
+ Returns
68
+ -------
69
+ rmm.mr.DeviceMemoryResource
70
+ The default memory resource that cudf-polars uses. Currently
71
+ a managed memory resource, if `cuda_managed_memory` is `True`.
72
+ else, an async pool resource is returned.
73
+ """
74
+ try:
75
+ if (
76
+ cuda_managed_memory
77
+ and pylibcudf.utils._is_concurrent_managed_access_supported()
78
+ ):
79
+ # Allocating 80% of the available memory for the pool.
80
+ # Leaving a 20% headroom to avoid OOM errors.
81
+ free_memory, _ = rmm.mr.available_device_memory()
82
+ free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
83
+ for key in _SUPPORTED_PREFETCHES:
84
+ pylibcudf.experimental.enable_prefetching(key)
85
+ mr = rmm.mr.PrefetchResourceAdaptor(
86
+ rmm.mr.PoolMemoryResource(
87
+ rmm.mr.ManagedMemoryResource(),
88
+ initial_pool_size=free_memory,
89
+ )
90
+ )
91
+ else:
92
+ mr = rmm.mr.CudaAsyncMemoryResource()
93
+ except RuntimeError as e: # pragma: no cover
94
+ msg, *_ = e.args
95
+ if (
96
+ msg.startswith("RMM failure")
97
+ and msg.find("not supported with this CUDA driver/runtime version") > -1
98
+ ):
99
+ raise ComputeError(
100
+ "GPU engine requested, but incorrect cudf-polars package installed. "
101
+ "If your system has a CUDA 11 driver, please uninstall `cudf-polars-cu12` "
102
+ "and install `cudf-polars-cu11`"
103
+ ) from None
104
+ else:
105
+ raise
106
+ else:
107
+ return mr
108
+
109
+
110
+ @contextlib.contextmanager
111
+ def set_memory_resource(
112
+ mr: rmm.mr.DeviceMemoryResource | None,
113
+ ) -> Generator[rmm.mr.DeviceMemoryResource, None, None]:
114
+ """
115
+ Set the current memory resource for an execution block.
116
+
117
+ Parameters
118
+ ----------
119
+ mr
120
+ Memory resource to use. If `None`, calls :func:`default_memory_resource`
121
+ to obtain an mr on the currently active device.
122
+
123
+ Returns
124
+ -------
125
+ Memory resource used.
126
+
127
+ Notes
128
+ -----
129
+ At exit, the memory resource is restored to whatever was current
130
+ at entry. If a memory resource is provided, it must be valid to
131
+ use with the currently active device.
132
+ """
133
+ previous = rmm.mr.get_current_device_resource()
134
+ if mr is None:
135
+ device: int = gpu.getDevice()
136
+ mr = default_memory_resource(
137
+ device=device,
138
+ cuda_managed_memory=bool(
139
+ _env_get_int("POLARS_GPU_ENABLE_CUDA_MANAGED_MEMORY", default=1) != 0
140
+ ),
141
+ )
142
+ rmm.mr.set_current_device_resource(mr)
143
+ try:
144
+ yield mr
145
+ finally:
146
+ rmm.mr.set_current_device_resource(previous)
147
+
148
+
149
+ @contextlib.contextmanager
150
+ def set_device(device: int | None) -> Generator[int, None, None]:
151
+ """
152
+ Set the device the query is executed on.
153
+
154
+ Parameters
155
+ ----------
156
+ device
157
+ Device to use. If `None`, uses the current device.
158
+
159
+ Returns
160
+ -------
161
+ Device active for the execution of the block.
162
+
163
+ Notes
164
+ -----
165
+ At exit, the device is restored to whatever was current at entry.
166
+ """
167
+ previous: int = gpu.getDevice()
168
+ if device is not None:
169
+ gpu.setDevice(device)
170
+ try:
171
+ yield previous
172
+ finally:
173
+ gpu.setDevice(previous)
174
+
175
+
24
176
  def _callback(
25
177
  ir: IR,
26
178
  with_columns: list[str] | None,
27
179
  pyarrow_predicate: str | None,
28
180
  n_rows: int | None,
181
+ *,
182
+ device: int | None,
183
+ memory_resource: int | None,
184
+ executor: Literal["pylibcudf", "dask-experimental"] | None,
29
185
  ) -> pl.DataFrame:
30
186
  assert with_columns is None
31
187
  assert pyarrow_predicate is None
32
188
  assert n_rows is None
33
- with nvtx.annotate(message="ExecuteIR", domain="cudf_polars"):
34
- return ir.evaluate(cache={}).to_polars()
189
+ with (
190
+ nvtx.annotate(message="ExecuteIR", domain="cudf_polars"),
191
+ # Device must be set before memory resource is obtained.
192
+ set_device(device),
193
+ set_memory_resource(memory_resource),
194
+ ):
195
+ if executor is None or executor == "pylibcudf":
196
+ return ir.evaluate(cache={}).to_polars()
197
+ elif executor == "dask-experimental":
198
+ from cudf_polars.experimental.parallel import evaluate_dask
35
199
 
200
+ return evaluate_dask(ir).to_polars()
201
+ else:
202
+ raise ValueError(f"Unknown executor '{executor}'")
36
203
 
37
- def execute_with_cudf(
38
- nt: NodeTraverser,
39
- *,
40
- raise_on_fail: bool = False,
41
- exception: type[Exception] | tuple[type[Exception], ...] = Exception,
42
- ) -> None:
204
+
205
+ def validate_config_options(config: dict) -> None:
206
+ """
207
+ Validate the configuration options for the GPU engine.
208
+
209
+ Parameters
210
+ ----------
211
+ config
212
+ Configuration options to validate.
213
+
214
+ Raises
215
+ ------
216
+ ValueError
217
+ If the configuration contains unsupported options.
218
+ """
219
+ if unsupported := (
220
+ config.keys()
221
+ - {"raise_on_fail", "parquet_options", "executor", "executor_options"}
222
+ ):
223
+ raise ValueError(
224
+ f"Engine configuration contains unsupported settings: {unsupported}"
225
+ )
226
+ assert {"chunked", "chunk_read_limit", "pass_read_limit"}.issuperset(
227
+ config.get("parquet_options", {})
228
+ )
229
+
230
+ # Validate executor_options
231
+ executor = config.get("executor", "pylibcudf")
232
+ if executor == "dask-experimental":
233
+ unsupported = config.get("executor_options", {}).keys() - {
234
+ "max_rows_per_partition",
235
+ "parquet_blocksize",
236
+ }
237
+ else:
238
+ unsupported = config.get("executor_options", {}).keys()
239
+ if unsupported:
240
+ raise ValueError(f"Unsupported executor_options for {executor}: {unsupported}")
241
+
242
+
243
+ def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None:
43
244
  """
44
245
  A post optimization callback that attempts to execute the plan with cudf.
45
246
 
@@ -48,19 +249,53 @@ def execute_with_cudf(
48
249
  nt
49
250
  NodeTraverser
50
251
 
51
- raise_on_fail
52
- Should conversion raise an exception rather than continuing
53
- without setting a callback.
252
+ config
253
+ GPUEngine configuration object
54
254
 
55
- exception
56
- Optional exception, or tuple of exceptions, to catch during
57
- translation. Defaults to ``Exception``.
255
+ Raises
256
+ ------
257
+ ValueError
258
+ If the config contains unsupported keys.
259
+ NotImplementedError
260
+ If translation of the plan is unsupported.
58
261
 
262
+ Notes
263
+ -----
59
264
  The NodeTraverser is mutated if the libcudf executor can handle the plan.
60
265
  """
61
- try:
62
- with nvtx.annotate(message="ConvertIR", domain="cudf_polars"):
63
- nt.set_udf(partial(_callback, translate_ir(nt)))
64
- except exception:
65
- if raise_on_fail:
66
- raise
266
+ device = config.device
267
+ memory_resource = config.memory_resource
268
+ raise_on_fail = config.config.get("raise_on_fail", False)
269
+ executor = config.config.get("executor", None)
270
+ validate_config_options(config.config)
271
+
272
+ with nvtx.annotate(message="ConvertIR", domain="cudf_polars"):
273
+ translator = Translator(nt, config)
274
+ ir = translator.translate_ir()
275
+ ir_translation_errors = translator.errors
276
+ if len(ir_translation_errors):
277
+ # TODO: Display these errors in user-friendly way.
278
+ # tracked in https://github.com/rapidsai/cudf/issues/17051
279
+ unique_errors = sorted(set(ir_translation_errors), key=str)
280
+ formatted_errors = "\n".join(
281
+ f"- {e.__class__.__name__}: {e}" for e in unique_errors
282
+ )
283
+ error_message = (
284
+ "Query execution with GPU not possible: unsupported operations."
285
+ f"\nThe errors were:\n{formatted_errors}"
286
+ )
287
+ exception = NotImplementedError(error_message, unique_errors)
288
+ if bool(int(os.environ.get("POLARS_VERBOSE", 0))):
289
+ warnings.warn(error_message, PerformanceWarning, stacklevel=2)
290
+ if raise_on_fail:
291
+ raise exception
292
+ else:
293
+ nt.set_udf(
294
+ partial(
295
+ _callback,
296
+ ir,
297
+ device=device,
298
+ memory_resource=memory_resource,
299
+ executor=executor,
300
+ )
301
+ )
@@ -5,7 +5,7 @@
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- __all__: list[str] = ["DataFrame", "Column", "NamedColumn"]
8
+ __all__: list[str] = ["Column", "DataFrame"]
9
9
 
10
- from cudf_polars.containers.column import Column, NamedColumn
10
+ from cudf_polars.containers.column import Column
11
11
  from cudf_polars.containers.dataframe import DataFrame
@@ -8,12 +8,25 @@ from __future__ import annotations
8
8
  import functools
9
9
  from typing import TYPE_CHECKING
10
10
 
11
- import cudf._lib.pylibcudf as plc
11
+ from polars.exceptions import InvalidOperationError
12
+
13
+ import pylibcudf as plc
14
+ from pylibcudf.strings.convert.convert_floats import from_floats, is_float, to_floats
15
+ from pylibcudf.strings.convert.convert_integers import (
16
+ from_integers,
17
+ is_integer,
18
+ to_integers,
19
+ )
20
+ from pylibcudf.traits import is_floating_point
21
+
22
+ from cudf_polars.utils.dtypes import is_order_preserving_cast
12
23
 
13
24
  if TYPE_CHECKING:
14
25
  from typing_extensions import Self
15
26
 
16
- __all__: list[str] = ["Column", "NamedColumn"]
27
+ import polars as pl
28
+
29
+ __all__: list[str] = ["Column"]
17
30
 
18
31
 
19
32
  class Column:
@@ -24,6 +37,9 @@ class Column:
24
37
  order: plc.types.Order
25
38
  null_order: plc.types.NullOrder
26
39
  is_scalar: bool
40
+ # Optional name, only ever set by evaluation of NamedExpr nodes
41
+ # The internal evaluation should not care about the name.
42
+ name: str | None
27
43
 
28
44
  def __init__(
29
45
  self,
@@ -32,14 +48,12 @@ class Column:
32
48
  is_sorted: plc.types.Sorted = plc.types.Sorted.NO,
33
49
  order: plc.types.Order = plc.types.Order.ASCENDING,
34
50
  null_order: plc.types.NullOrder = plc.types.NullOrder.BEFORE,
51
+ name: str | None = None,
35
52
  ):
36
53
  self.obj = column
37
54
  self.is_scalar = self.obj.size() == 1
38
- if self.obj.size() <= 1:
39
- is_sorted = plc.types.Sorted.YES
40
- self.is_sorted = is_sorted
41
- self.order = order
42
- self.null_order = null_order
55
+ self.name = name
56
+ self.set_sorted(is_sorted=is_sorted, order=order, null_order=null_order)
43
57
 
44
58
  @functools.cached_property
45
59
  def obj_scalar(self) -> plc.Scalar:
@@ -61,9 +75,26 @@ class Column:
61
75
  )
62
76
  return plc.copying.get_element(self.obj, 0)
63
77
 
78
+ def rename(self, name: str | None, /) -> Self:
79
+ """
80
+ Return a shallow copy with a new name.
81
+
82
+ Parameters
83
+ ----------
84
+ name
85
+ New name
86
+
87
+ Returns
88
+ -------
89
+ Shallow copy of self with new name set.
90
+ """
91
+ new = self.copy()
92
+ new.name = name
93
+ return new
94
+
64
95
  def sorted_like(self, like: Column, /) -> Self:
65
96
  """
66
- Copy sortedness properties from a column onto self.
97
+ Return a shallow copy with sortedness from like.
67
98
 
68
99
  Parameters
69
100
  ----------
@@ -72,16 +103,122 @@ class Column:
72
103
 
73
104
  Returns
74
105
  -------
75
- Self with metadata set.
106
+ Shallow copy of self with metadata set.
76
107
 
77
108
  See Also
78
109
  --------
79
- set_sorted
110
+ set_sorted, copy_metadata
80
111
  """
81
- return self.set_sorted(
82
- is_sorted=like.is_sorted, order=like.order, null_order=like.null_order
112
+ return type(self)(
113
+ self.obj,
114
+ name=self.name,
115
+ is_sorted=like.is_sorted,
116
+ order=like.order,
117
+ null_order=like.null_order,
83
118
  )
84
119
 
120
+ def astype(self, dtype: plc.DataType) -> Column:
121
+ """
122
+ Cast the column to as the requested dtype.
123
+
124
+ Parameters
125
+ ----------
126
+ dtype
127
+ Datatype to cast to.
128
+
129
+ Returns
130
+ -------
131
+ Column of requested type.
132
+
133
+ Raises
134
+ ------
135
+ RuntimeError
136
+ If the cast is unsupported.
137
+
138
+ Notes
139
+ -----
140
+ This only produces a copy if the requested dtype doesn't match
141
+ the current one.
142
+ """
143
+ if self.obj.type() == dtype:
144
+ return self
145
+
146
+ if dtype.id() == plc.TypeId.STRING or self.obj.type().id() == plc.TypeId.STRING:
147
+ return Column(self._handle_string_cast(dtype))
148
+ else:
149
+ result = Column(plc.unary.cast(self.obj, dtype))
150
+ if is_order_preserving_cast(self.obj.type(), dtype):
151
+ return result.sorted_like(self)
152
+ return result
153
+
154
+ def _handle_string_cast(self, dtype: plc.DataType) -> plc.Column:
155
+ if dtype.id() == plc.TypeId.STRING:
156
+ if is_floating_point(self.obj.type()):
157
+ return from_floats(self.obj)
158
+ else:
159
+ return from_integers(self.obj)
160
+ else:
161
+ if is_floating_point(dtype):
162
+ floats = is_float(self.obj)
163
+ if not plc.interop.to_arrow(
164
+ plc.reduce.reduce(
165
+ floats,
166
+ plc.aggregation.all(),
167
+ plc.DataType(plc.TypeId.BOOL8),
168
+ )
169
+ ).as_py():
170
+ raise InvalidOperationError("Conversion from `str` failed.")
171
+ return to_floats(self.obj, dtype)
172
+ else:
173
+ integers = is_integer(self.obj)
174
+ if not plc.interop.to_arrow(
175
+ plc.reduce.reduce(
176
+ integers,
177
+ plc.aggregation.all(),
178
+ plc.DataType(plc.TypeId.BOOL8),
179
+ )
180
+ ).as_py():
181
+ raise InvalidOperationError("Conversion from `str` failed.")
182
+ return to_integers(self.obj, dtype)
183
+
184
+ def copy_metadata(self, from_: pl.Series, /) -> Self:
185
+ """
186
+ Copy metadata from a host series onto self.
187
+
188
+ Parameters
189
+ ----------
190
+ from_
191
+ Polars series to copy metadata from
192
+
193
+ Returns
194
+ -------
195
+ Self with metadata set.
196
+
197
+ See Also
198
+ --------
199
+ set_sorted, sorted_like
200
+ """
201
+ self.name = from_.name
202
+ if len(from_) <= 1:
203
+ return self
204
+ ascending = from_.flags["SORTED_ASC"]
205
+ descending = from_.flags["SORTED_DESC"]
206
+ if ascending or descending:
207
+ has_null_first = from_.item(0) is None
208
+ has_null_last = from_.item(-1) is None
209
+ order = (
210
+ plc.types.Order.ASCENDING if ascending else plc.types.Order.DESCENDING
211
+ )
212
+ null_order = plc.types.NullOrder.BEFORE
213
+ if (descending and has_null_first) or (ascending and has_null_last):
214
+ null_order = plc.types.NullOrder.AFTER
215
+ return self.set_sorted(
216
+ is_sorted=plc.types.Sorted.YES,
217
+ order=order,
218
+ null_order=null_order,
219
+ )
220
+ return self
221
+
85
222
  def set_sorted(
86
223
  self,
87
224
  *,
@@ -125,65 +262,29 @@ class Column:
125
262
  is_sorted=self.is_sorted,
126
263
  order=self.order,
127
264
  null_order=self.null_order,
265
+ name=self.name,
128
266
  )
129
267
 
130
268
  def mask_nans(self) -> Self:
131
- """Return a copy of self with nans masked out."""
132
- if self.nan_count > 0:
133
- raise NotImplementedError("Need to port transform.hpp to pylibcudf")
269
+ """Return a shallow copy of self with nans masked out."""
270
+ if plc.traits.is_floating_point(self.obj.type()):
271
+ old_count = self.obj.null_count()
272
+ mask, new_count = plc.transform.nans_to_nulls(self.obj)
273
+ result = type(self)(self.obj.with_mask(mask, new_count))
274
+ if old_count == new_count:
275
+ return result.sorted_like(self)
276
+ return result
134
277
  return self.copy()
135
278
 
136
279
  @functools.cached_property
137
280
  def nan_count(self) -> int:
138
281
  """Return the number of NaN values in the column."""
139
- if self.obj.type().id() not in (plc.TypeId.FLOAT32, plc.TypeId.FLOAT64):
140
- return 0
141
- return plc.interop.to_arrow(
142
- plc.reduce.reduce(
143
- plc.unary.is_nan(self.obj),
144
- plc.aggregation.sum(),
145
- # TODO: pylibcudf needs to have a SizeType DataType singleton
146
- plc.DataType(plc.TypeId.INT32),
147
- )
148
- ).as_py()
149
-
150
-
151
- class NamedColumn(Column):
152
- """A column with a name."""
153
-
154
- name: str
155
-
156
- def __init__(
157
- self,
158
- column: plc.Column,
159
- name: str,
160
- *,
161
- is_sorted: plc.types.Sorted = plc.types.Sorted.NO,
162
- order: plc.types.Order = plc.types.Order.ASCENDING,
163
- null_order: plc.types.NullOrder = plc.types.NullOrder.BEFORE,
164
- ) -> None:
165
- super().__init__(
166
- column, is_sorted=is_sorted, order=order, null_order=null_order
167
- )
168
- self.name = name
169
-
170
- def copy(self, *, new_name: str | None = None) -> Self:
171
- """
172
- A shallow copy of the column.
173
-
174
- Parameters
175
- ----------
176
- new_name
177
- Optional new name for the copied column.
178
-
179
- Returns
180
- -------
181
- New column sharing data with self.
182
- """
183
- return type(self)(
184
- self.obj,
185
- self.name if new_name is None else new_name,
186
- is_sorted=self.is_sorted,
187
- order=self.order,
188
- null_order=self.null_order,
189
- )
282
+ if plc.traits.is_floating_point(self.obj.type()):
283
+ return plc.interop.to_arrow(
284
+ plc.reduce.reduce(
285
+ plc.unary.is_nan(self.obj),
286
+ plc.aggregation.sum(),
287
+ plc.types.SIZE_TYPE,
288
+ )
289
+ ).as_py()
290
+ return 0