cudf-polars-cu12 25.2.1__py3-none-any.whl → 25.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/VERSION +1 -1
- cudf_polars/callback.py +85 -53
- cudf_polars/containers/column.py +100 -7
- cudf_polars/containers/dataframe.py +16 -24
- cudf_polars/dsl/expr.py +3 -1
- cudf_polars/dsl/expressions/aggregation.py +3 -3
- cudf_polars/dsl/expressions/binaryop.py +2 -2
- cudf_polars/dsl/expressions/boolean.py +4 -4
- cudf_polars/dsl/expressions/datetime.py +39 -1
- cudf_polars/dsl/expressions/literal.py +3 -9
- cudf_polars/dsl/expressions/selection.py +2 -2
- cudf_polars/dsl/expressions/slicing.py +53 -0
- cudf_polars/dsl/expressions/sorting.py +1 -1
- cudf_polars/dsl/expressions/string.py +4 -4
- cudf_polars/dsl/expressions/unary.py +3 -2
- cudf_polars/dsl/ir.py +222 -93
- cudf_polars/dsl/nodebase.py +8 -1
- cudf_polars/dsl/translate.py +66 -38
- cudf_polars/experimental/base.py +18 -12
- cudf_polars/experimental/dask_serialize.py +22 -8
- cudf_polars/experimental/groupby.py +346 -0
- cudf_polars/experimental/io.py +13 -11
- cudf_polars/experimental/join.py +318 -0
- cudf_polars/experimental/parallel.py +57 -6
- cudf_polars/experimental/shuffle.py +194 -0
- cudf_polars/testing/plugin.py +23 -34
- cudf_polars/typing/__init__.py +33 -2
- cudf_polars/utils/config.py +138 -0
- cudf_polars/utils/conversion.py +40 -0
- cudf_polars/utils/dtypes.py +14 -4
- cudf_polars/utils/timer.py +39 -0
- cudf_polars/utils/versions.py +4 -3
- {cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/METADATA +8 -7
- cudf_polars_cu12-25.4.0.dist-info/RECORD +55 -0
- {cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/WHEEL +1 -1
- cudf_polars_cu12-25.2.1.dist-info/RECORD +0 -48
- {cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info/licenses}/LICENSE +0 -0
- {cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
# TODO: remove need for this
|
|
4
4
|
# ruff: noqa: D101
|
|
@@ -50,7 +50,7 @@ class Gather(Expr):
|
|
|
50
50
|
n = df.num_rows
|
|
51
51
|
if hi >= n or lo < -n:
|
|
52
52
|
raise ValueError("gather indices are out of bounds")
|
|
53
|
-
if indices.
|
|
53
|
+
if indices.null_count:
|
|
54
54
|
bounds_policy = plc.copying.OutOfBoundsPolicy.NULLIFY
|
|
55
55
|
obj = plc.replace.replace_nulls(
|
|
56
56
|
indices.obj,
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
# TODO: remove need for this
|
|
4
|
+
# ruff: noqa: D101
|
|
5
|
+
"""Slicing DSL nodes."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from cudf_polars.dsl.expressions.base import (
|
|
12
|
+
ExecutionContext,
|
|
13
|
+
Expr,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Mapping
|
|
18
|
+
|
|
19
|
+
import pylibcudf as plc
|
|
20
|
+
|
|
21
|
+
from cudf_polars.containers import Column, DataFrame
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
__all__ = ["Slice"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Slice(Expr):
|
|
28
|
+
__slots__ = ("length", "offset")
|
|
29
|
+
_non_child = ("dtype", "offset", "length")
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
dtype: plc.DataType,
|
|
34
|
+
offset: int,
|
|
35
|
+
length: int,
|
|
36
|
+
column: Expr,
|
|
37
|
+
) -> None:
|
|
38
|
+
self.dtype = dtype
|
|
39
|
+
self.offset = offset
|
|
40
|
+
self.length = length
|
|
41
|
+
self.children = (column,)
|
|
42
|
+
|
|
43
|
+
def do_evaluate(
|
|
44
|
+
self,
|
|
45
|
+
df: DataFrame,
|
|
46
|
+
*,
|
|
47
|
+
context: ExecutionContext = ExecutionContext.FRAME,
|
|
48
|
+
mapping: Mapping[Expr, Column] | None = None,
|
|
49
|
+
) -> Column:
|
|
50
|
+
"""Evaluate this expression given a dataframe for context."""
|
|
51
|
+
(child,) = self.children
|
|
52
|
+
column = child.evaluate(df, context=context, mapping=mapping)
|
|
53
|
+
return column.slice((self.offset, self.length))
|
|
@@ -210,7 +210,7 @@ class StringFunction(Expr):
|
|
|
210
210
|
(child,) = self.children
|
|
211
211
|
column = child.evaluate(df, context=context, mapping=mapping)
|
|
212
212
|
delimiter, ignore_nulls = self.options
|
|
213
|
-
if column.
|
|
213
|
+
if column.null_count > 0 and not ignore_nulls:
|
|
214
214
|
return Column(plc.Column.all_null_like(column.obj, 1))
|
|
215
215
|
return Column(
|
|
216
216
|
plc.strings.combine.join_strings(
|
|
@@ -228,7 +228,7 @@ class StringFunction(Expr):
|
|
|
228
228
|
pat = arg.evaluate(df, context=context, mapping=mapping)
|
|
229
229
|
pattern = (
|
|
230
230
|
pat.obj_scalar
|
|
231
|
-
if pat.is_scalar and pat.
|
|
231
|
+
if pat.is_scalar and pat.size != column.size
|
|
232
232
|
else pat.obj
|
|
233
233
|
)
|
|
234
234
|
return Column(plc.strings.find.contains(column.obj, pattern))
|
|
@@ -298,7 +298,7 @@ class StringFunction(Expr):
|
|
|
298
298
|
plc.strings.find.ends_with(
|
|
299
299
|
column.obj,
|
|
300
300
|
suffix.obj_scalar
|
|
301
|
-
if column.
|
|
301
|
+
if column.size != suffix.size and suffix.is_scalar
|
|
302
302
|
else suffix.obj,
|
|
303
303
|
)
|
|
304
304
|
)
|
|
@@ -308,7 +308,7 @@ class StringFunction(Expr):
|
|
|
308
308
|
plc.strings.find.starts_with(
|
|
309
309
|
column.obj,
|
|
310
310
|
prefix.obj_scalar
|
|
311
|
-
if column.
|
|
311
|
+
if column.size != prefix.size and prefix.is_scalar
|
|
312
312
|
else prefix.obj,
|
|
313
313
|
)
|
|
314
314
|
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
# TODO: remove need for this
|
|
4
4
|
"""DSL nodes for unary operations."""
|
|
@@ -119,6 +119,7 @@ class UnaryFunction(Expr):
|
|
|
119
119
|
"abs": plc.unary.UnaryOperator.ABS,
|
|
120
120
|
"bit_invert": plc.unary.UnaryOperator.BIT_INVERT,
|
|
121
121
|
"not": plc.unary.UnaryOperator.NOT,
|
|
122
|
+
"negate": plc.unary.UnaryOperator.NEGATE,
|
|
122
123
|
}
|
|
123
124
|
_supported_misc_fns = frozenset(
|
|
124
125
|
{
|
|
@@ -235,7 +236,7 @@ class UnaryFunction(Expr):
|
|
|
235
236
|
else plc.types.Order.DESCENDING
|
|
236
237
|
)
|
|
237
238
|
null_order = plc.types.NullOrder.BEFORE
|
|
238
|
-
if column.
|
|
239
|
+
if column.null_count > 0 and (n := column.size) > 1:
|
|
239
240
|
# PERF: This invokes four stream synchronisations!
|
|
240
241
|
has_nulls_first = not plc.copying.get_element(column.obj, 0).is_valid()
|
|
241
242
|
has_nulls_last = not plc.copying.get_element(
|