cudf-polars-cu12 24.12.0__py3-none-any.whl → 25.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. cudf_polars/VERSION +1 -1
  2. cudf_polars/__init__.py +1 -1
  3. cudf_polars/callback.py +28 -3
  4. cudf_polars/containers/__init__.py +1 -1
  5. cudf_polars/dsl/expr.py +16 -16
  6. cudf_polars/dsl/expressions/aggregation.py +21 -4
  7. cudf_polars/dsl/expressions/base.py +7 -2
  8. cudf_polars/dsl/expressions/binaryop.py +1 -0
  9. cudf_polars/dsl/expressions/boolean.py +65 -22
  10. cudf_polars/dsl/expressions/datetime.py +82 -20
  11. cudf_polars/dsl/expressions/literal.py +2 -0
  12. cudf_polars/dsl/expressions/rolling.py +3 -1
  13. cudf_polars/dsl/expressions/selection.py +3 -1
  14. cudf_polars/dsl/expressions/sorting.py +2 -0
  15. cudf_polars/dsl/expressions/string.py +118 -39
  16. cudf_polars/dsl/expressions/ternary.py +1 -0
  17. cudf_polars/dsl/expressions/unary.py +11 -1
  18. cudf_polars/dsl/ir.py +173 -122
  19. cudf_polars/dsl/to_ast.py +4 -6
  20. cudf_polars/dsl/translate.py +53 -21
  21. cudf_polars/dsl/traversal.py +10 -10
  22. cudf_polars/experimental/base.py +43 -0
  23. cudf_polars/experimental/dispatch.py +84 -0
  24. cudf_polars/experimental/io.py +325 -0
  25. cudf_polars/experimental/parallel.py +253 -0
  26. cudf_polars/experimental/select.py +36 -0
  27. cudf_polars/testing/asserts.py +14 -5
  28. cudf_polars/testing/plugin.py +60 -4
  29. cudf_polars/typing/__init__.py +5 -5
  30. cudf_polars/utils/dtypes.py +9 -7
  31. cudf_polars/utils/versions.py +4 -7
  32. {cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/METADATA +6 -6
  33. cudf_polars_cu12-25.2.0.dist-info/RECORD +48 -0
  34. {cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/WHEEL +1 -1
  35. cudf_polars_cu12-24.12.0.dist-info/RECORD +0 -43
  36. {cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/LICENSE +0 -0
  37. {cudf_polars_cu12-24.12.0.dist-info → cudf_polars_cu12-25.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Multi-partition Dask execution."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import itertools
8
+ import operator
9
+ from functools import reduce
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ import cudf_polars.experimental.io
13
+ import cudf_polars.experimental.select # noqa: F401
14
+ from cudf_polars.dsl.ir import IR, Cache, Filter, HStack, Projection, Select, Union
15
+ from cudf_polars.dsl.traversal import CachingVisitor, traversal
16
+ from cudf_polars.experimental.base import PartitionInfo, _concat, get_key_name
17
+ from cudf_polars.experimental.dispatch import (
18
+ generate_ir_tasks,
19
+ lower_ir_node,
20
+ )
21
+
22
+ if TYPE_CHECKING:
23
+ from collections.abc import MutableMapping
24
+
25
+ from cudf_polars.containers import DataFrame
26
+ from cudf_polars.experimental.dispatch import LowerIRTransformer
27
+
28
+
29
+ @lower_ir_node.register(IR)
30
+ def _(ir: IR, rec: LowerIRTransformer) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
31
+ # Default logic - Requires single partition
32
+
33
+ if len(ir.children) == 0:
34
+ # Default leaf node has single partition
35
+ return ir, {
36
+ ir: PartitionInfo(count=1)
37
+ } # pragma: no cover; Missed by pylibcudf executor
38
+
39
+ # Lower children
40
+ children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
41
+ partition_info = reduce(operator.or_, _partition_info)
42
+
43
+ # Check that child partitioning is supported
44
+ if any(partition_info[c].count > 1 for c in children):
45
+ raise NotImplementedError(
46
+ f"Class {type(ir)} does not support multiple partitions."
47
+ ) # pragma: no cover
48
+
49
+ # Return reconstructed node and partition-info dict
50
+ partition = PartitionInfo(count=1)
51
+ new_node = ir.reconstruct(children)
52
+ partition_info[new_node] = partition
53
+ return new_node, partition_info
54
+
55
+
56
+ def lower_ir_graph(ir: IR) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
57
+ """
58
+ Rewrite an IR graph and extract partitioning information.
59
+
60
+ Parameters
61
+ ----------
62
+ ir
63
+ Root of the graph to rewrite.
64
+
65
+ Returns
66
+ -------
67
+ new_ir, partition_info
68
+ The rewritten graph, and a mapping from unique nodes
69
+ in the new graph to associated partitioning information.
70
+
71
+ Notes
72
+ -----
73
+ This function traverses the unique nodes of the graph with
74
+ root `ir`, and applies :func:`lower_ir_node` to each node.
75
+
76
+ See Also
77
+ --------
78
+ lower_ir_node
79
+ """
80
+ mapper = CachingVisitor(lower_ir_node)
81
+ return mapper(ir)
82
+
83
+
84
+ def task_graph(
85
+ ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
86
+ ) -> tuple[MutableMapping[Any, Any], str | tuple[str, int]]:
87
+ """
88
+ Construct a task graph for evaluation of an IR graph.
89
+
90
+ Parameters
91
+ ----------
92
+ ir
93
+ Root of the graph to rewrite.
94
+ partition_info
95
+ A mapping from all unique IR nodes to the
96
+ associated partitioning information.
97
+
98
+ Returns
99
+ -------
100
+ graph
101
+ A Dask-compatible task graph for the entire
102
+ IR graph with root `ir`.
103
+
104
+ Notes
105
+ -----
106
+ This function traverses the unique nodes of the
107
+ graph with root `ir`, and extracts the tasks for
108
+ each node with :func:`generate_ir_tasks`.
109
+
110
+ See Also
111
+ --------
112
+ generate_ir_tasks
113
+ """
114
+ graph = reduce(
115
+ operator.or_,
116
+ (generate_ir_tasks(node, partition_info) for node in traversal([ir])),
117
+ )
118
+
119
+ key_name = get_key_name(ir)
120
+ partition_count = partition_info[ir].count
121
+ if partition_count > 1:
122
+ graph[key_name] = (_concat, list(partition_info[ir].keys(ir)))
123
+ return graph, key_name
124
+ else:
125
+ return graph, (key_name, 0)
126
+
127
+
128
+ def evaluate_dask(ir: IR) -> DataFrame:
129
+ """Evaluate an IR graph with Dask."""
130
+ from dask import get
131
+
132
+ ir, partition_info = lower_ir_graph(ir)
133
+
134
+ graph, key = task_graph(ir, partition_info)
135
+ return get(graph, key)
136
+
137
+
138
+ @generate_ir_tasks.register(IR)
139
+ def _(
140
+ ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
141
+ ) -> MutableMapping[Any, Any]:
142
+ # Single-partition default behavior.
143
+ # This is used by `generate_ir_tasks` for all unregistered IR sub-types.
144
+ if partition_info[ir].count > 1:
145
+ raise NotImplementedError(
146
+ f"Failed to generate multiple output tasks for {ir}."
147
+ ) # pragma: no cover
148
+
149
+ child_names = []
150
+ for child in ir.children:
151
+ child_names.append(get_key_name(child))
152
+ if partition_info[child].count > 1:
153
+ raise NotImplementedError(
154
+ f"Failed to generate tasks for {ir} with child {child}."
155
+ ) # pragma: no cover
156
+
157
+ key_name = get_key_name(ir)
158
+ return {
159
+ (key_name, 0): (
160
+ ir.do_evaluate,
161
+ *ir._non_child_args,
162
+ *((child_name, 0) for child_name in child_names),
163
+ )
164
+ }
165
+
166
+
167
+ @lower_ir_node.register(Union)
168
+ def _(
169
+ ir: Union, rec: LowerIRTransformer
170
+ ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
171
+ # Lower children
172
+ children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
173
+ partition_info = reduce(operator.or_, _partition_info)
174
+
175
+ # Check zlice
176
+ if ir.zlice is not None: # pragma: no cover
177
+ if any(p[c].count > 1 for p, c in zip(children, _partition_info, strict=False)):
178
+ raise NotImplementedError("zlice is not supported for multiple partitions.")
179
+ new_node = ir.reconstruct(children)
180
+ partition_info[new_node] = PartitionInfo(count=1)
181
+ return new_node, partition_info
182
+
183
+ # Partition count is the sum of all child partitions
184
+ count = sum(partition_info[c].count for c in children)
185
+
186
+ # Return reconstructed node and partition-info dict
187
+ new_node = ir.reconstruct(children)
188
+ partition_info[new_node] = PartitionInfo(count=count)
189
+ return new_node, partition_info
190
+
191
+
192
+ @generate_ir_tasks.register(Union)
193
+ def _(
194
+ ir: Union, partition_info: MutableMapping[IR, PartitionInfo]
195
+ ) -> MutableMapping[Any, Any]:
196
+ key_name = get_key_name(ir)
197
+ partition = itertools.count()
198
+ return {
199
+ (key_name, next(partition)): child_key
200
+ for child in ir.children
201
+ for child_key in partition_info[child].keys(child)
202
+ }
203
+
204
+
205
+ def _lower_ir_pwise(
206
+ ir: IR, rec: LowerIRTransformer
207
+ ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
208
+ # Lower a partition-wise (i.e. embarrassingly-parallel) IR node
209
+
210
+ # Lower children
211
+ children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
212
+ partition_info = reduce(operator.or_, _partition_info)
213
+ counts = {partition_info[c].count for c in children}
214
+
215
+ # Check that child partitioning is supported
216
+ if len(counts) > 1:
217
+ raise NotImplementedError(
218
+ f"Class {type(ir)} does not support unbalanced partitions."
219
+ ) # pragma: no cover
220
+
221
+ # Return reconstructed node and partition-info dict
222
+ partition = PartitionInfo(count=max(counts))
223
+ new_node = ir.reconstruct(children)
224
+ partition_info[new_node] = partition
225
+ return new_node, partition_info
226
+
227
+
228
+ lower_ir_node.register(Projection, _lower_ir_pwise)
229
+ lower_ir_node.register(Cache, _lower_ir_pwise)
230
+ lower_ir_node.register(Filter, _lower_ir_pwise)
231
+ lower_ir_node.register(HStack, _lower_ir_pwise)
232
+
233
+
234
+ def _generate_ir_tasks_pwise(
235
+ ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
236
+ ) -> MutableMapping[Any, Any]:
237
+ # Generate partition-wise (i.e. embarrassingly-parallel) tasks
238
+ child_names = [get_key_name(c) for c in ir.children]
239
+ return {
240
+ key: (
241
+ ir.do_evaluate,
242
+ *ir._non_child_args,
243
+ *[(child_name, i) for child_name in child_names],
244
+ )
245
+ for i, key in enumerate(partition_info[ir].keys(ir))
246
+ }
247
+
248
+
249
+ generate_ir_tasks.register(Projection, _generate_ir_tasks_pwise)
250
+ generate_ir_tasks.register(Cache, _generate_ir_tasks_pwise)
251
+ generate_ir_tasks.register(Filter, _generate_ir_tasks_pwise)
252
+ generate_ir_tasks.register(HStack, _generate_ir_tasks_pwise)
253
+ generate_ir_tasks.register(Select, _generate_ir_tasks_pwise)
@@ -0,0 +1,36 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ """Parallel Select Logic."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import TYPE_CHECKING
8
+
9
+ from cudf_polars.dsl.ir import Select
10
+ from cudf_polars.dsl.traversal import traversal
11
+ from cudf_polars.experimental.dispatch import lower_ir_node
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import MutableMapping
15
+
16
+ from cudf_polars.dsl.ir import IR
17
+ from cudf_polars.experimental.base import PartitionInfo
18
+ from cudf_polars.experimental.parallel import LowerIRTransformer
19
+
20
+
21
+ @lower_ir_node.register(Select)
22
+ def _(
23
+ ir: Select, rec: LowerIRTransformer
24
+ ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
25
+ child, partition_info = rec(ir.children[0])
26
+ pi = partition_info[child]
27
+ if pi.count > 1 and not all(
28
+ expr.is_pointwise for expr in traversal([e.value for e in ir.exprs])
29
+ ):
30
+ # TODO: Handle non-pointwise expressions.
31
+ raise NotImplementedError(
32
+ f"Selection {ir} does not support multiple partitions."
33
+ )
34
+ new_node = ir.reconstruct([child])
35
+ partition_info[new_node] = pi
36
+ return new_node, partition_info
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  """Device-aware assertions."""
@@ -20,6 +20,11 @@ if TYPE_CHECKING:
20
20
  __all__: list[str] = ["assert_gpu_result_equal", "assert_ir_translation_raises"]
21
21
 
22
22
 
23
+ # Will be overriden by `conftest.py` with the value from the `--executor`
24
+ # command-line argument
25
+ Executor = None
26
+
27
+
23
28
  def assert_gpu_result_equal(
24
29
  lazydf: pl.LazyFrame,
25
30
  *,
@@ -34,6 +39,7 @@ def assert_gpu_result_equal(
34
39
  rtol: float = 1e-05,
35
40
  atol: float = 1e-08,
36
41
  categorical_as_str: bool = False,
42
+ executor: str | None = None,
37
43
  ) -> None:
38
44
  """
39
45
  Assert that collection of a lazyframe on GPU produces correct results.
@@ -71,6 +77,9 @@ def assert_gpu_result_equal(
71
77
  Absolute tolerance for float comparisons
72
78
  categorical_as_str
73
79
  Decat categoricals to strings before comparing
80
+ executor
81
+ The executor configuration to pass to `GPUEngine`. If not specified
82
+ uses the module level `Executor` attribute.
74
83
 
75
84
  Raises
76
85
  ------
@@ -80,7 +89,7 @@ def assert_gpu_result_equal(
80
89
  If GPU collection failed in some way.
81
90
  """
82
91
  if engine is None:
83
- engine = GPUEngine(raise_on_fail=True)
92
+ engine = GPUEngine(raise_on_fail=True, executor=executor or Executor)
84
93
 
85
94
  final_polars_collect_kwargs, final_cudf_collect_kwargs = _process_kwargs(
86
95
  collect_kwargs, polars_collect_kwargs, cudf_collect_kwargs
@@ -126,9 +135,9 @@ def assert_ir_translation_raises(q: pl.LazyFrame, *exceptions: type[Exception])
126
135
  translator.translate_ir()
127
136
  if errors := translator.errors:
128
137
  for err in errors:
129
- assert any(
130
- isinstance(err, err_type) for err_type in exceptions
131
- ), f"Translation DID NOT RAISE {exceptions}"
138
+ assert any(isinstance(err, err_type) for err_type in exceptions), (
139
+ f"Translation DID NOT RAISE {exceptions}"
140
+ )
132
141
  return
133
142
  else:
134
143
  raise AssertionError(f"Translation DID NOT RAISE {exceptions}")
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  """Plugin for running polars test suite setting GPU engine as default."""
@@ -44,7 +44,7 @@ def pytest_configure(config: pytest.Config) -> None:
44
44
  )
45
45
 
46
46
 
47
- EXPECTED_FAILURES: Mapping[str, str] = {
47
+ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
48
48
  "tests/unit/io/test_csv.py::test_compressed_csv": "Need to determine if file is compressed",
49
49
  "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
50
50
  "tests/unit/io/test_delta.py::test_scan_delta_version": "Need to expose hive partitioning",
@@ -122,7 +122,15 @@ EXPECTED_FAILURES: Mapping[str, str] = {
122
122
  "tests/unit/io/test_scan.py::test_scan_with_row_index_filter_and_limit[single-parquet-async]": "Debug output on stderr doesn't match",
123
123
  "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_parquet-write_parquet]": "Need to add include_file_path to IR",
124
124
  "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_csv-write_csv]": "Need to add include_file_path to IR",
125
+ "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_parquet-write_parquet]": "Debug output on stderr doesn't match",
126
+ "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_csv-write_csv]": "Debug output on stderr doesn't match",
127
+ "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_ndjson-write_ndjson]": "Debug output on stderr doesn't match",
125
128
  "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_ndjson-write_ndjson]": "Need to add include_file_path to IR",
129
+ "tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR",
130
+ "tests/unit/io/test_write.py::test_write_async[<lambda>-write_csv]": "Need to add include_file_path to IR",
131
+ "tests/unit/io/test_write.py::test_write_async[read_parquet-<lambda>]": "Need to add include_file_path to IR",
132
+ "tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>0]": "Need to add include_file_path to IR",
133
+ "tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>2]": "Need to add include_file_path to IR",
126
134
  "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed",
127
135
  "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed",
128
136
  "tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
@@ -140,6 +148,22 @@ EXPECTED_FAILURES: Mapping[str, str] = {
140
148
  "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero",
141
149
  "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero",
142
150
  "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero",
151
+ "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
152
+ "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
153
+ "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
154
+ "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
155
+ "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
156
+ "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
157
+ "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
158
+ "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
159
+ "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
160
+ "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
161
+ "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
162
+ "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
163
+ "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
164
+ "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
165
+ "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
166
+ "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
143
167
  "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
144
168
  "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852",
145
169
  "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",
@@ -155,6 +179,7 @@ EXPECTED_FAILURES: Mapping[str, str] = {
155
179
  "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input15-expected15-input_dtype15-output_dtype15]": "Unsupported groupby-agg for a particular dtype",
156
180
  "tests/unit/operations/test_group_by.py::test_group_by_median_by_dtype[input16-expected16-input_dtype16-output_dtype16]": "Unsupported groupby-agg for a particular dtype",
157
181
  "tests/unit/operations/test_group_by.py::test_group_by_binary_agg_with_literal": "Incorrect broadcasting of literals in groupby-agg",
182
+ "tests/unit/operations/test_group_by.py::test_group_by_lit_series": "Incorrect broadcasting of literals in groupby-agg",
158
183
  "tests/unit/operations/test_group_by.py::test_aggregated_scalar_elementwise_15602": "Unsupported boolean function/dtype combination in groupby-agg",
159
184
  "tests/unit/operations/test_group_by.py::test_schemas[data1-expr1-expected_select1-expected_gb1]": "Mismatching dtypes, needs cudf#15852",
160
185
  "tests/unit/operations/test_join.py::test_cross_join_slice_pushdown": "Need to implement slice pushdown for cross joins",
@@ -174,6 +199,24 @@ EXPECTED_FAILURES: Mapping[str, str] = {
174
199
  }
175
200
 
176
201
 
202
+ TESTS_TO_SKIP: Mapping[str, str] = {
203
+ # On Ubuntu 20.04, the tzdata package contains a bunch of symlinks
204
+ # for obsolete timezone names. However, the chrono_tz package that
205
+ # polars uses doesn't read /usr/share/zoneinfo, instead packaging
206
+ # the current zoneinfo database from IANA. Consequently, when this
207
+ # hypothesis-generated test runs and generates timezones from the
208
+ # available zoneinfo-reported timezones, we can get an error from
209
+ # polars that the requested timezone is unknown.
210
+ # Since this is random, just skip it, rather than xfailing.
211
+ "tests/unit/lazyframe/test_serde.py::test_lf_serde_roundtrip_binary": "chrono_tz doesn't have all tzdata symlink names",
212
+ # The test may segfault with the legacy streaming engine. We should
213
+ # remove this skip when all polars tests use the new streaming engine.
214
+ "tests/unit/streaming/test_streaming_group_by.py::test_streaming_group_by_literal[1]": "May segfault w/the legacy streaming engine",
215
+ # Fails in CI, but passes locally
216
+ "tests/unit/streaming/test_streaming.py::test_streaming_streamable_functions": "RuntimeError: polars_python::sql::PySQLContext is unsendable, but is being dropped on another thread",
217
+ }
218
+
219
+
177
220
  def pytest_collection_modifyitems(
178
221
  session: pytest.Session, config: pytest.Config, items: list[pytest.Item]
179
222
  ) -> None:
@@ -182,5 +225,18 @@ def pytest_collection_modifyitems(
182
225
  # Don't xfail tests if running without fallback
183
226
  return
184
227
  for item in items:
185
- if item.nodeid in EXPECTED_FAILURES:
186
- item.add_marker(pytest.mark.xfail(reason=EXPECTED_FAILURES[item.nodeid]))
228
+ if item.nodeid in TESTS_TO_SKIP:
229
+ item.add_marker(pytest.mark.skip(reason=TESTS_TO_SKIP[item.nodeid]))
230
+ elif item.nodeid in EXPECTED_FAILURES:
231
+ if isinstance(EXPECTED_FAILURES[item.nodeid], tuple):
232
+ # the second entry in the tuple is the condition to xfail on
233
+ item.add_marker(
234
+ pytest.mark.xfail(
235
+ condition=EXPECTED_FAILURES[item.nodeid][1],
236
+ reason=EXPECTED_FAILURES[item.nodeid][0],
237
+ ),
238
+ )
239
+ else:
240
+ item.add_marker(
241
+ pytest.mark.xfail(reason=EXPECTED_FAILURES[item.nodeid])
242
+ )
@@ -21,13 +21,13 @@ if TYPE_CHECKING:
21
21
  from cudf_polars.dsl import expr, ir, nodebase
22
22
 
23
23
  __all__: list[str] = [
24
- "PolarsIR",
25
- "PolarsExpr",
26
- "NodeTraverser",
27
- "OptimizationArgs",
28
- "GenericTransformer",
29
24
  "ExprTransformer",
25
+ "GenericTransformer",
30
26
  "IRTransformer",
27
+ "NodeTraverser",
28
+ "OptimizationArgs",
29
+ "PolarsExpr",
30
+ "PolarsIR",
31
31
  ]
32
32
 
33
33
  PolarsIR: TypeAlias = Union[
@@ -19,9 +19,9 @@ from pylibcudf.traits import (
19
19
  )
20
20
 
21
21
  __all__ = [
22
- "from_polars",
23
- "downcast_arrow_lists",
24
22
  "can_cast",
23
+ "downcast_arrow_lists",
24
+ "from_polars",
25
25
  "is_order_preserving_cast",
26
26
  ]
27
27
  import pylibcudf as plc
@@ -75,11 +75,13 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
75
75
  return (
76
76
  (
77
77
  from_ == to
78
- or not has_empty
79
- and (
80
- plc.traits.is_fixed_width(to)
81
- and plc.traits.is_fixed_width(from_)
82
- and plc.unary.is_supported_cast(from_, to)
78
+ or (
79
+ not has_empty
80
+ and (
81
+ plc.traits.is_fixed_width(to)
82
+ and plc.traits.is_fixed_width(from_)
83
+ and plc.unary.is_supported_cast(from_, to)
84
+ )
83
85
  )
84
86
  )
85
87
  or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to))
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  """Version utilities so that cudf_polars supports a range of polars versions."""
@@ -12,14 +12,11 @@ from polars import __version__
12
12
 
13
13
  POLARS_VERSION = parse(__version__)
14
14
 
15
- POLARS_VERSION_LT_111 = POLARS_VERSION < parse("1.11")
16
- POLARS_VERSION_LT_112 = POLARS_VERSION < parse("1.12")
17
- POLARS_VERSION_GT_112 = POLARS_VERSION > parse("1.12")
18
- POLARS_VERSION_LT_113 = POLARS_VERSION < parse("1.13")
15
+ POLARS_VERSION_LT_120 = POLARS_VERSION < parse("1.20")
19
16
 
20
17
 
21
18
  def _ensure_polars_version():
22
- if POLARS_VERSION_LT_111:
19
+ if POLARS_VERSION_LT_120:
23
20
  raise ImportError(
24
- "cudf_polars requires py-polars v1.11 or greater."
21
+ "cudf_polars requires py-polars v1.20 or greater."
25
22
  ) # pragma: no cover
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: cudf-polars-cu12
3
- Version: 24.12.0
3
+ Version: 25.2.0
4
4
  Summary: Executor for polars using cudf
5
5
  Author: NVIDIA Corporation
6
6
  License: Apache 2.0
@@ -16,15 +16,15 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: polars<1.15,>=1.11
20
- Requires-Dist: pylibcudf-cu12==24.12.*
19
+ Requires-Dist: polars<1.22,>=1.20
20
+ Requires-Dist: pylibcudf-cu12==25.2.*
21
21
  Provides-Extra: test
22
22
  Requires-Dist: numpy<3.0a0,>=1.23; extra == "test"
23
23
  Requires-Dist: pytest-cov; extra == "test"
24
24
  Requires-Dist: pytest-xdist; extra == "test"
25
25
  Requires-Dist: pytest<8; extra == "test"
26
26
  Provides-Extra: experimental
27
- Requires-Dist: rapids-dask-dependency==24.12.*; extra == "experimental"
27
+ Requires-Dist: rapids-dask-dependency==25.2.*; extra == "experimental"
28
28
 
29
29
  # <div align="left"><img src="img/rapids_logo.png" width="90px"/>&nbsp;cuDF - GPU DataFrames</div>
30
30
 
@@ -111,7 +111,7 @@ cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge
111
111
 
112
112
  ```bash
113
113
  conda install -c rapidsai -c conda-forge -c nvidia \
114
- cudf=24.12 python=3.12 cuda-version=12.5
114
+ cudf=25.02 python=3.12 cuda-version=12.8
115
115
  ```
116
116
 
117
117
  We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
@@ -0,0 +1,48 @@
1
+ cudf_polars/VERSION,sha256=gWnOsR7j8lHNsXJO_balY3FJzbDTto6xlQk1ItvppEY,8
2
+ cudf_polars/__init__.py,sha256=fSTx5nmqajdwp7qvP4PnYL6wZN9-k1fKB43NkcZlHwk,740
3
+ cudf_polars/_version.py,sha256=kj5Ir4dxZRR-k2k8mWUDJHiGpE8_ZcTNzt_kMZxcFRA,528
4
+ cudf_polars/callback.py,sha256=u8CyAFgb9f8fL4eOBqGRQWb0BuoX6tIDaig4gJeAlLw,9023
5
+ cudf_polars/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ cudf_polars/containers/__init__.py,sha256=He8qCk9m37k7KOWC6s4QuL8LYF11KhP2vluH9s8EeYI,339
7
+ cudf_polars/containers/column.py,sha256=O4Sjnjs2gUoo50oaL2OZ45BFePjG1oo3yO163KIKY0E,8371
8
+ cudf_polars/containers/dataframe.py,sha256=Szzv4jc8cxux5MVII9mmEFwZdYn6jCNcVJ9EKSNLpb4,10637
9
+ cudf_polars/dsl/__init__.py,sha256=bYwYnqmqINMgwnkJ22EnXMlHviLolPaMgQ8QqoZL3YE,244
10
+ cudf_polars/dsl/expr.py,sha256=L6nmP4zsBITrzvTfR_QvO1NLnbTMlB-KUeFvXbyG26A,1795
11
+ cudf_polars/dsl/ir.py,sha256=6o7VLhfnNeP50YQzL-Jf9qxa55isjb6zmY2Fyntd2ik,64874
12
+ cudf_polars/dsl/nodebase.py,sha256=_ffrQJcsE0_Nwa6K8LG0N5DmgHHEFBnTE1oz8Ugx4N4,4352
13
+ cudf_polars/dsl/to_ast.py,sha256=q_lLko1AI9fYStzqEzASDqlRZrjBnoMfNu6MHAaI3d0,12309
14
+ cudf_polars/dsl/translate.py,sha256=Ex5uVL1sVpDAZrqfrC_xUVnR5SQ51n5v9-KnL5Q7pdU,24915
15
+ cudf_polars/dsl/traversal.py,sha256=dVH37YoLx-wo61lUofaOM77eji99yIaC62PSb3QYKHM,4610
16
+ cudf_polars/dsl/expressions/__init__.py,sha256=uj1a4BzrDVAjOgP6FKKtnvR5elF4ksUj1RhkLTZoS1k,224
17
+ cudf_polars/dsl/expressions/aggregation.py,sha256=BdKCdKj4DNxkYjmRNLER-7zYCo-PfXVY3ZFqPPoPeNM,8722
18
+ cudf_polars/dsl/expressions/base.py,sha256=a7GRXNRUeheR4haHLGtgC8j796D9p8yolF5tKKxT5do,9038
19
+ cudf_polars/dsl/expressions/binaryop.py,sha256=EU3YF0d5OQMMwIuZVsQgtHaWyJ1Qc09HBLuSpMma_xg,5640
20
+ cudf_polars/dsl/expressions/boolean.py,sha256=GCJhinhgGpKKO1RsBAWEwFdyQiFIvDDrWsnSMSoAEWU,11756
21
+ cudf_polars/dsl/expressions/datetime.py,sha256=H6Nmz_cHFthzlb8bpD5gDJSMv0BWcdTvPm-OTcXTPoQ,6861
22
+ cudf_polars/dsl/expressions/literal.py,sha256=BTNamYPm6fHxuMlH7Knr7pCWbqYZURf6DmDRZHPeklc,2864
23
+ cudf_polars/dsl/expressions/rolling.py,sha256=zeGjO-FqnQF7buxRrWdtuJe9x_CpCPjUhL6nJtk8jgU,1177
24
+ cudf_polars/dsl/expressions/selection.py,sha256=OK3dT9aP_-hpHzKhjVbPQ-uUe7YCcXelpMUZP-vxNOk,2900
25
+ cudf_polars/dsl/expressions/sorting.py,sha256=djIyJ_FXeJOSKGRIF7uKwa9uefgCVwLMaf4pQNMraUU,3000
26
+ cudf_polars/dsl/expressions/string.py,sha256=px-6_J46XXuS3JcM3zvV3O9QQCGhDy5t8ntkQl4TJeg,14220
27
+ cudf_polars/dsl/expressions/ternary.py,sha256=iF7g9XvZNXwNqFDjgKMGR-uWat6G3vJEZudesnQ4KUs,1499
28
+ cudf_polars/dsl/expressions/unary.py,sha256=H24itZGMoDv63y57pBSsnCPOJ5IrEuZxM5gNNQtfwh4,12788
29
+ cudf_polars/experimental/__init__.py,sha256=S2oI2K__woyPQAAlFMOo6RTMtdfIZxmzzAO92VtJgP4,256
30
+ cudf_polars/experimental/base.py,sha256=mW8A3DBK4S7VpD7k4UjGynWp6kDmV8uWQvj0EsWYQhg,1149
31
+ cudf_polars/experimental/dask_serialize.py,sha256=9-qTd04I9nRA-ijyv8daGLjXo5W6YbVnY8Z1ugyinCY,2029
32
+ cudf_polars/experimental/dispatch.py,sha256=pw6KyC_irRcQgKb8S0o1KKvRHQPFOikyv8kfMQtgxjc,2094
33
+ cudf_polars/experimental/io.py,sha256=ejOPjn5WEK7KiPoo4N_KRMTT82qROhFi12wRpKt9Bw8,11431
34
+ cudf_polars/experimental/parallel.py,sha256=M6vECqlpaOVpbm_MSw84EUxC7TgwO8YHrfEwzZgF-Bw,7981
35
+ cudf_polars/experimental/select.py,sha256=TMU-7DKv8e-F0Crqn811HDiBAHzyZxE68kTnYmuZYjk,1196
36
+ cudf_polars/testing/__init__.py,sha256=0MnlTjkTEqkSpL5GdMhQf4uXOaQwNrzgEJCZKa5FnL4,219
37
+ cudf_polars/testing/asserts.py,sha256=-D9ZNojBZk75Dz8c8IZUMF_sPOJc9bIsX2cybqX2zK8,7883
38
+ cudf_polars/testing/plugin.py,sha256=a6gxdQoC1Hgx1wSjm_nrwvs5W4AUdhpaH0qC8Of8jW4,24557
39
+ cudf_polars/typing/__init__.py,sha256=7A2xD8w62xG-jrx9ujxWkQnEGTt8WN-s_S_VT8pNP7M,3555
40
+ cudf_polars/utils/__init__.py,sha256=urdV5MUIneU8Dn6pt1db5GkDG0oY4NsFD0Uhl3j98l8,195
41
+ cudf_polars/utils/dtypes.py,sha256=6ygA_y-wTdv0WelNnWGZDnzgA2GKHfkhQ_hfP-jr8mk,6570
42
+ cudf_polars/utils/sorting.py,sha256=Mqb_KLsYnKU8p1dDan2mtlIQl65RqwM78OlUi-_Jj0k,1725
43
+ cudf_polars/utils/versions.py,sha256=4txZKAD5Ql-fEeW9HAomHgePOnNN1pj1e-XXPE0HSAQ,605
44
+ cudf_polars_cu12-25.2.0.dist-info/LICENSE,sha256=4YCpjWCbYMkMQFW47JXsorZLOaP957HwmP6oHW2_ngM,11348
45
+ cudf_polars_cu12-25.2.0.dist-info/METADATA,sha256=_NnRcF4Gzeq_9vIMkV2VUW1MNh8krORxnyNKCxbR-TU,4545
46
+ cudf_polars_cu12-25.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
47
+ cudf_polars_cu12-25.2.0.dist-info/top_level.txt,sha256=w2bOa7MpuyapYgZh480Znh4UzX7rSWlFcYR1Yo6QIPs,12
48
+ cudf_polars_cu12-25.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5