cudf-polars-cu13 25.12.0__py3-none-any.whl → 26.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -1
- cudf_polars/VERSION +1 -1
- cudf_polars/callback.py +28 -7
- cudf_polars/containers/column.py +51 -26
- cudf_polars/dsl/expressions/binaryop.py +1 -1
- cudf_polars/dsl/expressions/boolean.py +1 -1
- cudf_polars/dsl/expressions/selection.py +1 -1
- cudf_polars/dsl/expressions/string.py +29 -20
- cudf_polars/dsl/expressions/ternary.py +25 -1
- cudf_polars/dsl/expressions/unary.py +11 -8
- cudf_polars/dsl/ir.py +351 -281
- cudf_polars/dsl/translate.py +18 -15
- cudf_polars/dsl/utils/aggregations.py +10 -5
- cudf_polars/experimental/base.py +10 -0
- cudf_polars/experimental/benchmarks/pdsh.py +1 -1
- cudf_polars/experimental/benchmarks/utils.py +83 -2
- cudf_polars/experimental/distinct.py +2 -0
- cudf_polars/experimental/explain.py +1 -1
- cudf_polars/experimental/expressions.py +8 -5
- cudf_polars/experimental/groupby.py +2 -0
- cudf_polars/experimental/io.py +64 -42
- cudf_polars/experimental/join.py +15 -2
- cudf_polars/experimental/parallel.py +10 -7
- cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
- cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
- cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
- cudf_polars/experimental/rapidsmpf/{shuffle.py → collectives/shuffle.py} +90 -114
- cudf_polars/experimental/rapidsmpf/core.py +194 -67
- cudf_polars/experimental/rapidsmpf/dask.py +172 -0
- cudf_polars/experimental/rapidsmpf/dispatch.py +6 -3
- cudf_polars/experimental/rapidsmpf/io.py +162 -70
- cudf_polars/experimental/rapidsmpf/join.py +162 -77
- cudf_polars/experimental/rapidsmpf/nodes.py +421 -180
- cudf_polars/experimental/rapidsmpf/repartition.py +130 -65
- cudf_polars/experimental/rapidsmpf/union.py +24 -5
- cudf_polars/experimental/rapidsmpf/utils.py +228 -16
- cudf_polars/experimental/shuffle.py +18 -4
- cudf_polars/experimental/sort.py +13 -6
- cudf_polars/experimental/spilling.py +1 -1
- cudf_polars/testing/plugin.py +6 -3
- cudf_polars/utils/config.py +67 -0
- cudf_polars/utils/versions.py +3 -3
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/METADATA +9 -10
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/RECORD +47 -43
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
cudf_polars/GIT_COMMIT
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
9782a269e689140d2b00b5172a93056bdf19e8c2
|
cudf_polars/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
26.02.000
|
cudf_polars/callback.py
CHANGED
|
@@ -11,6 +11,7 @@ import textwrap
|
|
|
11
11
|
import time
|
|
12
12
|
import warnings
|
|
13
13
|
from functools import cache, partial
|
|
14
|
+
from threading import Lock
|
|
14
15
|
from typing import TYPE_CHECKING, Literal, overload
|
|
15
16
|
|
|
16
17
|
import nvtx
|
|
@@ -162,6 +163,11 @@ def set_memory_resource(
|
|
|
162
163
|
rmm.mr.set_current_device_resource(previous)
|
|
163
164
|
|
|
164
165
|
|
|
166
|
+
# libcudf doesn't support executing on multiple devices from within the same process.
|
|
167
|
+
SEEN_DEVICE = None
|
|
168
|
+
SEEN_DEVICE_LOCK = Lock()
|
|
169
|
+
|
|
170
|
+
|
|
165
171
|
@contextlib.contextmanager
|
|
166
172
|
def set_device(device: int | None) -> Generator[int, None, None]:
|
|
167
173
|
"""
|
|
@@ -180,13 +186,28 @@ def set_device(device: int | None) -> Generator[int, None, None]:
|
|
|
180
186
|
-----
|
|
181
187
|
At exit, the device is restored to whatever was current at entry.
|
|
182
188
|
"""
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
189
|
+
global SEEN_DEVICE # noqa: PLW0603
|
|
190
|
+
current: int = gpu.getDevice()
|
|
191
|
+
to_use = device if device is not None else current
|
|
192
|
+
with SEEN_DEVICE_LOCK:
|
|
193
|
+
if (
|
|
194
|
+
SEEN_DEVICE is not None and to_use != SEEN_DEVICE
|
|
195
|
+
): # pragma: no cover; requires multiple GPUs in CI
|
|
196
|
+
raise RuntimeError(
|
|
197
|
+
"cudf-polars does not support running queries on "
|
|
198
|
+
"multiple devices in the same process. "
|
|
199
|
+
f"A previous query used device-{SEEN_DEVICE}, "
|
|
200
|
+
f"the current query is using device-{to_use}."
|
|
201
|
+
)
|
|
202
|
+
SEEN_DEVICE = to_use
|
|
203
|
+
if to_use != current:
|
|
204
|
+
gpu.setDevice(to_use)
|
|
205
|
+
try:
|
|
206
|
+
yield to_use
|
|
207
|
+
finally:
|
|
208
|
+
gpu.setDevice(current)
|
|
209
|
+
else:
|
|
210
|
+
yield to_use
|
|
190
211
|
|
|
191
212
|
|
|
192
213
|
@overload
|
cudf_polars/containers/column.py
CHANGED
|
@@ -16,7 +16,6 @@ from pylibcudf.strings.convert.convert_integers import (
|
|
|
16
16
|
is_integer,
|
|
17
17
|
to_integers,
|
|
18
18
|
)
|
|
19
|
-
from pylibcudf.traits import is_floating_point
|
|
20
19
|
|
|
21
20
|
from cudf_polars.containers import DataType
|
|
22
21
|
from cudf_polars.containers.datatype import _dtype_from_header, _dtype_to_header
|
|
@@ -24,6 +23,8 @@ from cudf_polars.utils import conversion
|
|
|
24
23
|
from cudf_polars.utils.dtypes import is_order_preserving_cast
|
|
25
24
|
|
|
26
25
|
if TYPE_CHECKING:
|
|
26
|
+
from collections.abc import Callable
|
|
27
|
+
|
|
27
28
|
from typing_extensions import Self
|
|
28
29
|
|
|
29
30
|
from polars import Series as pl_Series
|
|
@@ -264,7 +265,7 @@ class Column:
|
|
|
264
265
|
return True
|
|
265
266
|
return False
|
|
266
267
|
|
|
267
|
-
def astype(self, dtype: DataType, stream: Stream) -> Column:
|
|
268
|
+
def astype(self, dtype: DataType, stream: Stream, *, strict: bool = True) -> Column:
|
|
268
269
|
"""
|
|
269
270
|
Cast the column to as the requested dtype.
|
|
270
271
|
|
|
@@ -275,6 +276,9 @@ class Column:
|
|
|
275
276
|
stream
|
|
276
277
|
CUDA stream used for device memory operations and kernel launches
|
|
277
278
|
on this Column. The data in ``self.obj`` must be valid on this stream.
|
|
279
|
+
strict
|
|
280
|
+
If True, raise an error if the cast is unsupported.
|
|
281
|
+
If False, return nulls for unsupported casts.
|
|
278
282
|
|
|
279
283
|
Returns
|
|
280
284
|
-------
|
|
@@ -299,7 +303,8 @@ class Column:
|
|
|
299
303
|
or self.obj.type().id() == plc.TypeId.STRING
|
|
300
304
|
):
|
|
301
305
|
return Column(
|
|
302
|
-
self._handle_string_cast(plc_dtype, stream=stream
|
|
306
|
+
self._handle_string_cast(plc_dtype, stream=stream, strict=strict),
|
|
307
|
+
dtype=dtype,
|
|
303
308
|
)
|
|
304
309
|
elif plc.traits.is_integral_not_bool(
|
|
305
310
|
self.obj.type()
|
|
@@ -340,33 +345,53 @@ class Column:
|
|
|
340
345
|
return result.sorted_like(self)
|
|
341
346
|
return result
|
|
342
347
|
|
|
343
|
-
def _handle_string_cast(
|
|
348
|
+
def _handle_string_cast(
|
|
349
|
+
self, dtype: plc.DataType, stream: Stream, *, strict: bool
|
|
350
|
+
) -> plc.Column:
|
|
344
351
|
if dtype.id() == plc.TypeId.STRING:
|
|
345
|
-
if is_floating_point(self.obj.type()):
|
|
352
|
+
if plc.traits.is_floating_point(self.obj.type()):
|
|
346
353
|
return from_floats(self.obj, stream=stream)
|
|
347
|
-
|
|
354
|
+
elif plc.traits.is_integral_not_bool(self.obj.type()):
|
|
348
355
|
return from_integers(self.obj, stream=stream)
|
|
356
|
+
else:
|
|
357
|
+
raise InvalidOperationError(
|
|
358
|
+
f"Unsupported casting from {self.dtype.id()} to {dtype.id()}."
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
type_checker: Callable[[plc.Column, Stream], plc.Column]
|
|
362
|
+
type_caster: Callable[[plc.Column, plc.DataType, Stream], plc.Column]
|
|
363
|
+
if plc.traits.is_floating_point(dtype):
|
|
364
|
+
type_checker = is_float
|
|
365
|
+
type_caster = to_floats
|
|
366
|
+
elif plc.traits.is_integral_not_bool(dtype):
|
|
367
|
+
# is_integer has a second optional int_type: plc.DataType | None = None argument
|
|
368
|
+
# we do not use
|
|
369
|
+
# unused-ignore for if RMM is missing
|
|
370
|
+
type_checker = is_integer # type: ignore[assignment,unused-ignore]
|
|
371
|
+
type_caster = to_integers
|
|
349
372
|
else:
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
373
|
+
raise InvalidOperationError(
|
|
374
|
+
f"Unsupported casting from {self.dtype.id()} to {dtype.id()}."
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
castable = type_checker(self.obj, stream=stream) # type: ignore[call-arg]
|
|
378
|
+
if not plc.reduce.reduce(
|
|
379
|
+
castable,
|
|
380
|
+
plc.aggregation.all(),
|
|
381
|
+
plc.DataType(plc.TypeId.BOOL8),
|
|
382
|
+
stream=stream,
|
|
383
|
+
).to_py(stream=stream):
|
|
384
|
+
if strict:
|
|
385
|
+
raise InvalidOperationError(
|
|
386
|
+
f"Conversion from {self.dtype.id()} to {dtype.id()} failed."
|
|
387
|
+
)
|
|
360
388
|
else:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
).to_py():
|
|
368
|
-
raise InvalidOperationError("Conversion from `str` failed.")
|
|
369
|
-
return to_integers(self.obj, dtype, stream=stream)
|
|
389
|
+
values = self.obj.with_mask(
|
|
390
|
+
*plc.transform.bools_to_mask(castable, stream=stream)
|
|
391
|
+
)
|
|
392
|
+
else:
|
|
393
|
+
values = self.obj
|
|
394
|
+
return type_caster(values, dtype, stream=stream)
|
|
370
395
|
|
|
371
396
|
def copy_metadata(self, from_: pl_Series, /) -> Self:
|
|
372
397
|
"""
|
|
@@ -487,7 +512,7 @@ class Column:
|
|
|
487
512
|
plc.aggregation.sum(),
|
|
488
513
|
plc.types.SIZE_TYPE,
|
|
489
514
|
stream=stream,
|
|
490
|
-
).to_py()
|
|
515
|
+
).to_py(stream=stream)
|
|
491
516
|
else:
|
|
492
517
|
result = 0
|
|
493
518
|
return result
|
|
@@ -220,7 +220,7 @@ class BooleanFunction(Expr):
|
|
|
220
220
|
#
|
|
221
221
|
# If the input null count was non-zero, we must
|
|
222
222
|
# post-process the result to insert the correct value.
|
|
223
|
-
h_result = scalar_result.to_py()
|
|
223
|
+
h_result = scalar_result.to_py(stream=df.stream)
|
|
224
224
|
if (is_any and not h_result) or (not is_any and h_result):
|
|
225
225
|
# Any All
|
|
226
226
|
# False || Null => Null True && Null => Null
|
|
@@ -37,7 +37,7 @@ class Gather(Expr):
|
|
|
37
37
|
)
|
|
38
38
|
n = values.size
|
|
39
39
|
lo, hi = plc.reduce.minmax(indices.obj, stream=df.stream)
|
|
40
|
-
if hi.to_py() >= n or lo.to_py() < -n: # type: ignore[operator]
|
|
40
|
+
if hi.to_py(stream=df.stream) >= n or lo.to_py(stream=df.stream) < -n: # type: ignore[operator]
|
|
41
41
|
raise ValueError("gather indices are out of bounds")
|
|
42
42
|
if indices.null_count:
|
|
43
43
|
bounds_policy = plc.copying.OutOfBoundsPolicy.NULLIFY
|
|
@@ -390,7 +390,7 @@ class StringFunction(Expr):
|
|
|
390
390
|
plc.aggregation.all(),
|
|
391
391
|
plc.DataType(plc.TypeId.BOOL8),
|
|
392
392
|
stream=df.stream,
|
|
393
|
-
).to_py():
|
|
393
|
+
).to_py(stream=df.stream):
|
|
394
394
|
raise InvalidOperationError(
|
|
395
395
|
"zfill only supports ascii strings with no unicode characters"
|
|
396
396
|
)
|
|
@@ -427,15 +427,12 @@ class StringFunction(Expr):
|
|
|
427
427
|
stream=df.stream,
|
|
428
428
|
)
|
|
429
429
|
|
|
430
|
-
if (
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
stream=df.stream,
|
|
437
|
-
).to_py()
|
|
438
|
-
): # pragma: no cover
|
|
430
|
+
if POLARS_VERSION_LT_132 and not plc.reduce.reduce(
|
|
431
|
+
all_gt_0,
|
|
432
|
+
plc.aggregation.all(),
|
|
433
|
+
plc.DataType(plc.TypeId.BOOL8),
|
|
434
|
+
stream=df.stream,
|
|
435
|
+
).to_py(stream=df.stream): # pragma: no cover
|
|
439
436
|
raise InvalidOperationError("fill conversion failed.")
|
|
440
437
|
|
|
441
438
|
return Column(
|
|
@@ -887,11 +884,11 @@ class StringFunction(Expr):
|
|
|
887
884
|
filtered = table.columns()[0]
|
|
888
885
|
first_valid_data = plc.copying.get_element(
|
|
889
886
|
filtered, 0, stream=df.stream
|
|
890
|
-
).to_py()
|
|
887
|
+
).to_py(stream=df.stream)
|
|
891
888
|
else:
|
|
892
889
|
first_valid_data = plc.copying.get_element(
|
|
893
890
|
plc_col, 0, stream=df.stream
|
|
894
|
-
).to_py()
|
|
891
|
+
).to_py(stream=df.stream)
|
|
895
892
|
|
|
896
893
|
# See https://github.com/rapidsai/cudf/issues/20202 for we type ignore
|
|
897
894
|
format = _infer_datetime_format(first_valid_data) # type: ignore[arg-type]
|
|
@@ -909,7 +906,7 @@ class StringFunction(Expr):
|
|
|
909
906
|
plc.aggregation.all(),
|
|
910
907
|
plc.DataType(plc.TypeId.BOOL8),
|
|
911
908
|
stream=df.stream,
|
|
912
|
-
).to_py():
|
|
909
|
+
).to_py(stream=df.stream):
|
|
913
910
|
raise InvalidOperationError("conversion from `str` failed.")
|
|
914
911
|
else:
|
|
915
912
|
not_timestamps = plc.unary.unary_operation(
|
|
@@ -950,18 +947,24 @@ class StringFunction(Expr):
|
|
|
950
947
|
elif self.name is StringFunction.Name.PadStart:
|
|
951
948
|
if POLARS_VERSION_LT_132: # pragma: no cover
|
|
952
949
|
(column,) = columns
|
|
953
|
-
|
|
950
|
+
width_arg, char = self.options
|
|
951
|
+
pad_width = cast(int, width_arg)
|
|
954
952
|
else:
|
|
955
953
|
(column, width_col) = columns
|
|
956
954
|
(char,) = self.options
|
|
957
955
|
# TODO: Maybe accept a string scalar in
|
|
958
956
|
# cudf::strings::pad to avoid DtoH transfer
|
|
959
|
-
# See https://github.com/rapidsai/cudf/issues/20202
|
|
960
|
-
|
|
957
|
+
# See https://github.com/rapidsai/cudf/issues/20202
|
|
958
|
+
width_py = width_col.obj.to_scalar(stream=df.stream).to_py(
|
|
959
|
+
stream=df.stream
|
|
960
|
+
)
|
|
961
|
+
assert width_py is not None
|
|
962
|
+
pad_width = int(width_py)
|
|
963
|
+
|
|
961
964
|
return Column(
|
|
962
965
|
plc.strings.padding.pad(
|
|
963
966
|
column.obj,
|
|
964
|
-
|
|
967
|
+
pad_width,
|
|
965
968
|
plc.strings.SideType.LEFT,
|
|
966
969
|
char,
|
|
967
970
|
stream=df.stream,
|
|
@@ -971,17 +974,23 @@ class StringFunction(Expr):
|
|
|
971
974
|
elif self.name is StringFunction.Name.PadEnd:
|
|
972
975
|
if POLARS_VERSION_LT_132: # pragma: no cover
|
|
973
976
|
(column,) = columns
|
|
974
|
-
|
|
977
|
+
width_arg, char = self.options
|
|
978
|
+
pad_width = cast(int, width_arg)
|
|
975
979
|
else:
|
|
976
980
|
(column, width_col) = columns
|
|
977
981
|
(char,) = self.options
|
|
978
982
|
# TODO: Maybe accept a string scalar in
|
|
979
983
|
# cudf::strings::pad to avoid DtoH transfer
|
|
980
|
-
|
|
984
|
+
width_py = width_col.obj.to_scalar(stream=df.stream).to_py(
|
|
985
|
+
stream=df.stream
|
|
986
|
+
)
|
|
987
|
+
assert width_py is not None
|
|
988
|
+
pad_width = int(width_py)
|
|
989
|
+
|
|
981
990
|
return Column(
|
|
982
991
|
plc.strings.padding.pad(
|
|
983
992
|
column.obj,
|
|
984
|
-
|
|
993
|
+
pad_width,
|
|
985
994
|
plc.strings.SideType.RIGHT,
|
|
986
995
|
char,
|
|
987
996
|
stream=df.stream,
|
|
@@ -15,6 +15,7 @@ from cudf_polars.dsl.expressions.base import (
|
|
|
15
15
|
ExecutionContext,
|
|
16
16
|
Expr,
|
|
17
17
|
)
|
|
18
|
+
from cudf_polars.dsl.utils.reshape import broadcast
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
20
21
|
from cudf_polars.containers import DataFrame, DataType
|
|
@@ -41,15 +42,38 @@ class Ternary(Expr):
|
|
|
41
42
|
when, then, otherwise = (
|
|
42
43
|
child.evaluate(df, context=context) for child in self.children
|
|
43
44
|
)
|
|
45
|
+
|
|
46
|
+
if when.is_scalar:
|
|
47
|
+
# For scalar predicates: lowering to copy_if_else would require
|
|
48
|
+
# materializing an all true/false mask column. Instead, just pick
|
|
49
|
+
# the correct branch.
|
|
50
|
+
when_predicate = when.obj_scalar(stream=df.stream).to_py(stream=df.stream)
|
|
51
|
+
pick, other = (then, otherwise) if when_predicate else (otherwise, then)
|
|
52
|
+
|
|
53
|
+
pick_col = (
|
|
54
|
+
broadcast(
|
|
55
|
+
pick,
|
|
56
|
+
target_length=1 if other.is_scalar else other.size,
|
|
57
|
+
stream=df.stream,
|
|
58
|
+
)[0]
|
|
59
|
+
if pick.is_scalar
|
|
60
|
+
else pick
|
|
61
|
+
)
|
|
62
|
+
return Column(pick_col.obj, dtype=self.dtype)
|
|
63
|
+
|
|
44
64
|
then_obj = then.obj_scalar(stream=df.stream) if then.is_scalar else then.obj
|
|
45
65
|
otherwise_obj = (
|
|
46
66
|
otherwise.obj_scalar(stream=df.stream)
|
|
47
67
|
if otherwise.is_scalar
|
|
48
68
|
else otherwise.obj
|
|
49
69
|
)
|
|
70
|
+
|
|
50
71
|
return Column(
|
|
51
72
|
plc.copying.copy_if_else(
|
|
52
|
-
then_obj,
|
|
73
|
+
then_obj,
|
|
74
|
+
otherwise_obj,
|
|
75
|
+
when.obj,
|
|
76
|
+
stream=df.stream,
|
|
53
77
|
),
|
|
54
78
|
dtype=self.dtype,
|
|
55
79
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
# TODO: remove need for this
|
|
4
4
|
"""DSL nodes for unary operations."""
|
|
@@ -25,11 +25,12 @@ __all__ = ["Cast", "Len", "UnaryFunction"]
|
|
|
25
25
|
class Cast(Expr):
|
|
26
26
|
"""Class representing a cast of an expression."""
|
|
27
27
|
|
|
28
|
-
__slots__ = ()
|
|
29
|
-
_non_child = ("dtype",)
|
|
28
|
+
__slots__ = ("strict",)
|
|
29
|
+
_non_child = ("dtype", "strict")
|
|
30
30
|
|
|
31
|
-
def __init__(self, dtype: DataType, value: Expr) -> None:
|
|
31
|
+
def __init__(self, dtype: DataType, strict: bool, value: Expr) -> None: # noqa: FBT001
|
|
32
32
|
self.dtype = dtype
|
|
33
|
+
self.strict = strict
|
|
33
34
|
self.children = (value,)
|
|
34
35
|
self.is_pointwise = True
|
|
35
36
|
if not dtypes.can_cast(value.dtype.plc_type, self.dtype.plc_type):
|
|
@@ -43,7 +44,7 @@ class Cast(Expr):
|
|
|
43
44
|
"""Evaluate this expression given a dataframe for context."""
|
|
44
45
|
(child,) = self.children
|
|
45
46
|
column = child.evaluate(df, context=context)
|
|
46
|
-
return column.astype(self.dtype, stream=df.stream)
|
|
47
|
+
return column.astype(self.dtype, stream=df.stream, strict=self.strict)
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
class Len(Expr):
|
|
@@ -240,7 +241,9 @@ class UnaryFunction(Expr):
|
|
|
240
241
|
if maintain_order:
|
|
241
242
|
column = column.sorted_like(values)
|
|
242
243
|
return column
|
|
243
|
-
elif self.name == "set_sorted":
|
|
244
|
+
elif self.name == "set_sorted": # pragma: no cover
|
|
245
|
+
# TODO: LazyFrame.set_sorted is proper IR concept (ie. FunctionIR::Hint)
|
|
246
|
+
# and is is currently not implemented. We should reimplement it as a MapFunction.
|
|
244
247
|
(column,) = (child.evaluate(df, context=context) for child in self.children)
|
|
245
248
|
(asc,) = self.options
|
|
246
249
|
order = (
|
|
@@ -253,10 +256,10 @@ class UnaryFunction(Expr):
|
|
|
253
256
|
# PERF: This invokes four stream synchronisations!
|
|
254
257
|
has_nulls_first = not plc.copying.get_element(
|
|
255
258
|
column.obj, 0, stream=df.stream
|
|
256
|
-
).is_valid()
|
|
259
|
+
).is_valid(df.stream)
|
|
257
260
|
has_nulls_last = not plc.copying.get_element(
|
|
258
261
|
column.obj, n - 1, stream=df.stream
|
|
259
|
-
).is_valid()
|
|
262
|
+
).is_valid(df.stream)
|
|
260
263
|
if (order == plc.types.Order.DESCENDING and has_nulls_first) or (
|
|
261
264
|
order == plc.types.Order.ASCENDING and has_nulls_last
|
|
262
265
|
):
|