cudf-polars-cu12 25.2.1__py3-none-any.whl → 25.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. cudf_polars/VERSION +1 -1
  2. cudf_polars/callback.py +85 -53
  3. cudf_polars/containers/column.py +100 -7
  4. cudf_polars/containers/dataframe.py +16 -24
  5. cudf_polars/dsl/expr.py +3 -1
  6. cudf_polars/dsl/expressions/aggregation.py +3 -3
  7. cudf_polars/dsl/expressions/binaryop.py +2 -2
  8. cudf_polars/dsl/expressions/boolean.py +4 -4
  9. cudf_polars/dsl/expressions/datetime.py +39 -1
  10. cudf_polars/dsl/expressions/literal.py +3 -9
  11. cudf_polars/dsl/expressions/selection.py +2 -2
  12. cudf_polars/dsl/expressions/slicing.py +53 -0
  13. cudf_polars/dsl/expressions/sorting.py +1 -1
  14. cudf_polars/dsl/expressions/string.py +4 -4
  15. cudf_polars/dsl/expressions/unary.py +3 -2
  16. cudf_polars/dsl/ir.py +222 -93
  17. cudf_polars/dsl/nodebase.py +8 -1
  18. cudf_polars/dsl/translate.py +66 -38
  19. cudf_polars/experimental/base.py +18 -12
  20. cudf_polars/experimental/dask_serialize.py +22 -8
  21. cudf_polars/experimental/groupby.py +346 -0
  22. cudf_polars/experimental/io.py +13 -11
  23. cudf_polars/experimental/join.py +318 -0
  24. cudf_polars/experimental/parallel.py +57 -6
  25. cudf_polars/experimental/shuffle.py +194 -0
  26. cudf_polars/testing/plugin.py +23 -34
  27. cudf_polars/typing/__init__.py +33 -2
  28. cudf_polars/utils/config.py +138 -0
  29. cudf_polars/utils/conversion.py +40 -0
  30. cudf_polars/utils/dtypes.py +14 -4
  31. cudf_polars/utils/timer.py +39 -0
  32. cudf_polars/utils/versions.py +4 -3
  33. {cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/METADATA +8 -7
  34. cudf_polars_cu12-25.4.0.dist-info/RECORD +55 -0
  35. {cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/WHEEL +1 -1
  36. cudf_polars_cu12-25.2.1.dist-info/RECORD +0 -48
  37. {cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info/licenses}/LICENSE +0 -0
  38. {cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  # TODO: remove need for this
4
4
  # ruff: noqa: D101
@@ -50,7 +50,7 @@ class Gather(Expr):
50
50
  n = df.num_rows
51
51
  if hi >= n or lo < -n:
52
52
  raise ValueError("gather indices are out of bounds")
53
- if indices.obj.null_count():
53
+ if indices.null_count:
54
54
  bounds_policy = plc.copying.OutOfBoundsPolicy.NULLIFY
55
55
  obj = plc.replace.replace_nulls(
56
56
  indices.obj,
@@ -0,0 +1,53 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ # TODO: remove need for this
4
+ # ruff: noqa: D101
5
+ """Slicing DSL nodes."""
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ from cudf_polars.dsl.expressions.base import (
12
+ ExecutionContext,
13
+ Expr,
14
+ )
15
+
16
+ if TYPE_CHECKING:
17
+ from collections.abc import Mapping
18
+
19
+ import pylibcudf as plc
20
+
21
+ from cudf_polars.containers import Column, DataFrame
22
+
23
+
24
+ __all__ = ["Slice"]
25
+
26
+
27
+ class Slice(Expr):
28
+ __slots__ = ("length", "offset")
29
+ _non_child = ("dtype", "offset", "length")
30
+
31
+ def __init__(
32
+ self,
33
+ dtype: plc.DataType,
34
+ offset: int,
35
+ length: int,
36
+ column: Expr,
37
+ ) -> None:
38
+ self.dtype = dtype
39
+ self.offset = offset
40
+ self.length = length
41
+ self.children = (column,)
42
+
43
+ def do_evaluate(
44
+ self,
45
+ df: DataFrame,
46
+ *,
47
+ context: ExecutionContext = ExecutionContext.FRAME,
48
+ mapping: Mapping[Expr, Column] | None = None,
49
+ ) -> Column:
50
+ """Evaluate this expression given a dataframe for context."""
51
+ (child,) = self.children
52
+ column = child.evaluate(df, context=context, mapping=mapping)
53
+ return column.slice((self.offset, self.length))
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  # TODO: remove need for this
4
4
  # ruff: noqa: D101
@@ -210,7 +210,7 @@ class StringFunction(Expr):
210
210
  (child,) = self.children
211
211
  column = child.evaluate(df, context=context, mapping=mapping)
212
212
  delimiter, ignore_nulls = self.options
213
- if column.obj.null_count() > 0 and not ignore_nulls:
213
+ if column.null_count > 0 and not ignore_nulls:
214
214
  return Column(plc.Column.all_null_like(column.obj, 1))
215
215
  return Column(
216
216
  plc.strings.combine.join_strings(
@@ -228,7 +228,7 @@ class StringFunction(Expr):
228
228
  pat = arg.evaluate(df, context=context, mapping=mapping)
229
229
  pattern = (
230
230
  pat.obj_scalar
231
- if pat.is_scalar and pat.obj.size() != column.obj.size()
231
+ if pat.is_scalar and pat.size != column.size
232
232
  else pat.obj
233
233
  )
234
234
  return Column(plc.strings.find.contains(column.obj, pattern))
@@ -298,7 +298,7 @@ class StringFunction(Expr):
298
298
  plc.strings.find.ends_with(
299
299
  column.obj,
300
300
  suffix.obj_scalar
301
- if column.obj.size() != suffix.obj.size() and suffix.is_scalar
301
+ if column.size != suffix.size and suffix.is_scalar
302
302
  else suffix.obj,
303
303
  )
304
304
  )
@@ -308,7 +308,7 @@ class StringFunction(Expr):
308
308
  plc.strings.find.starts_with(
309
309
  column.obj,
310
310
  prefix.obj_scalar
311
- if column.obj.size() != prefix.obj.size() and prefix.is_scalar
311
+ if column.size != prefix.size and prefix.is_scalar
312
312
  else prefix.obj,
313
313
  )
314
314
  )
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  # TODO: remove need for this
4
4
  """DSL nodes for unary operations."""
@@ -119,6 +119,7 @@ class UnaryFunction(Expr):
119
119
  "abs": plc.unary.UnaryOperator.ABS,
120
120
  "bit_invert": plc.unary.UnaryOperator.BIT_INVERT,
121
121
  "not": plc.unary.UnaryOperator.NOT,
122
+ "negate": plc.unary.UnaryOperator.NEGATE,
122
123
  }
123
124
  _supported_misc_fns = frozenset(
124
125
  {
@@ -235,7 +236,7 @@ class UnaryFunction(Expr):
235
236
  else plc.types.Order.DESCENDING
236
237
  )
237
238
  null_order = plc.types.NullOrder.BEFORE
238
- if column.obj.null_count() > 0 and (n := column.obj.size()) > 1:
239
+ if column.null_count > 0 and (n := column.size) > 1:
239
240
  # PERF: This invokes four stream synchronisations!
240
241
  has_nulls_first = not plc.copying.get_element(column.obj, 0).is_valid()
241
242
  has_nulls_last = not plc.copying.get_element(