cudf-polars-cu12 25.2.2__py3-none-any.whl → 25.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. cudf_polars/VERSION +1 -1
  2. cudf_polars/callback.py +85 -53
  3. cudf_polars/containers/column.py +100 -7
  4. cudf_polars/containers/dataframe.py +16 -24
  5. cudf_polars/dsl/expr.py +3 -1
  6. cudf_polars/dsl/expressions/aggregation.py +3 -3
  7. cudf_polars/dsl/expressions/binaryop.py +2 -2
  8. cudf_polars/dsl/expressions/boolean.py +4 -4
  9. cudf_polars/dsl/expressions/datetime.py +39 -1
  10. cudf_polars/dsl/expressions/literal.py +3 -9
  11. cudf_polars/dsl/expressions/selection.py +2 -2
  12. cudf_polars/dsl/expressions/slicing.py +53 -0
  13. cudf_polars/dsl/expressions/sorting.py +1 -1
  14. cudf_polars/dsl/expressions/string.py +4 -4
  15. cudf_polars/dsl/expressions/unary.py +3 -2
  16. cudf_polars/dsl/ir.py +222 -93
  17. cudf_polars/dsl/nodebase.py +8 -1
  18. cudf_polars/dsl/translate.py +66 -38
  19. cudf_polars/experimental/base.py +18 -12
  20. cudf_polars/experimental/dask_serialize.py +22 -8
  21. cudf_polars/experimental/groupby.py +346 -0
  22. cudf_polars/experimental/io.py +13 -11
  23. cudf_polars/experimental/join.py +318 -0
  24. cudf_polars/experimental/parallel.py +57 -6
  25. cudf_polars/experimental/shuffle.py +194 -0
  26. cudf_polars/testing/plugin.py +23 -34
  27. cudf_polars/typing/__init__.py +33 -2
  28. cudf_polars/utils/config.py +138 -0
  29. cudf_polars/utils/conversion.py +40 -0
  30. cudf_polars/utils/dtypes.py +14 -4
  31. cudf_polars/utils/timer.py +39 -0
  32. cudf_polars/utils/versions.py +4 -3
  33. {cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.4.0.dist-info}/METADATA +8 -7
  34. cudf_polars_cu12-25.4.0.dist-info/RECORD +55 -0
  35. {cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.4.0.dist-info}/WHEEL +1 -1
  36. cudf_polars_cu12-25.2.2.dist-info/RECORD +0 -48
  37. {cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.4.0.dist-info/licenses}/LICENSE +0 -0
  38. {cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.4.0.dist-info}/top_level.txt +0 -0
@@ -31,9 +31,12 @@ def pytest_addoption(parser: pytest.Parser) -> None:
31
31
  def pytest_configure(config: pytest.Config) -> None:
32
32
  """Enable use of this module as a pytest plugin to enable GPU collection."""
33
33
  no_fallback = config.getoption("--cudf-polars-no-fallback")
34
- collect = polars.LazyFrame.collect
35
- engine = polars.GPUEngine(raise_on_fail=no_fallback)
36
- polars.LazyFrame.collect = partialmethod(collect, engine=engine)
34
+ if no_fallback:
35
+ collect = polars.LazyFrame.collect
36
+ engine = polars.GPUEngine(raise_on_fail=no_fallback)
37
+ polars.LazyFrame.collect = partialmethod(collect, engine=engine)
38
+ else:
39
+ polars.Config.set_engine_affinity("gpu")
37
40
  config.addinivalue_line(
38
41
  "filterwarnings",
39
42
  "ignore:.*GPU engine does not support streaming or background collection",
@@ -52,21 +55,11 @@ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
52
55
  "tests/unit/io/test_delta.py::test_read_delta_version": "Need to expose hive partitioning",
53
56
  "tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed",
54
57
  "tests/unit/io/test_lazy_csv.py::test_scan_csv_slice_offset_zero": "Integer overflow in sliced read",
55
- "tests/unit/io/test_lazy_parquet.py::test_dsl2ir_cached_metadata[False]": "cudf-polars doesn't use metadata read by rust preprocessing",
56
58
  "tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match",
57
59
  "tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match",
58
- "tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394",
59
- "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-columns]": "Correctly raises but different error",
60
- "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-row_groups]": "Correctly raises but different error",
61
- "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-prefiltered]": "Correctly raises but different error",
62
- "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-none]": "Correctly raises but different error",
63
- "tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394",
64
60
  "tests/unit/io/test_lazy_parquet.py::test_scan_parquet_ignores_dtype_mismatch_for_non_projected_columns_19249[False-False]": "Needs some variant of cudf#16394",
65
61
  "tests/unit/io/test_lazy_parquet.py::test_scan_parquet_ignores_dtype_mismatch_for_non_projected_columns_19249[True-False]": "Needs some variant of cudf#16394",
66
- "tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?",
67
62
  "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394",
68
- "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception",
69
- "tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception",
70
63
  "tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU",
71
64
  "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-False-none]": "Mismatching column read cudf#16394",
72
65
  "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-False-none]": "Mismatching column read cudf#16394",
@@ -84,6 +77,7 @@ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
84
77
  "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-row_groups]": "Mismatching column read cudf#16394",
85
78
  "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-columns]": "Mismatching column read cudf#16394",
86
79
  "tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-columns]": "Mismatching column read cudf#16394",
80
+ "tests/unit/io/test_parquet.py::test_scan_parquet_filter_statistics_load_missing_column_21391": "Mismatching column read cudf#16394",
87
81
  "tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match",
88
82
  "tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match",
89
83
  "tests/unit/io/test_scan.py::test_scan_with_filter[single-csv-async]": "Debug output on stderr doesn't match",
@@ -120,12 +114,6 @@ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
120
114
  "tests/unit/io/test_scan.py::test_scan_with_row_index_limit_and_filter[single-parquet-async]": "Debug output on stderr doesn't match",
121
115
  "tests/unit/io/test_scan.py::test_scan_with_row_index_projected_out[single-parquet-async]": "Debug output on stderr doesn't match",
122
116
  "tests/unit/io/test_scan.py::test_scan_with_row_index_filter_and_limit[single-parquet-async]": "Debug output on stderr doesn't match",
123
- "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_parquet-write_parquet]": "Need to add include_file_path to IR",
124
- "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_csv-write_csv]": "Need to add include_file_path to IR",
125
- "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_parquet-write_parquet]": "Debug output on stderr doesn't match",
126
- "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_csv-write_csv]": "Debug output on stderr doesn't match",
127
- "tests/unit/io/test_scan.py::test_scan_include_file_paths[False-scan_ndjson-write_ndjson]": "Debug output on stderr doesn't match",
128
- "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_ndjson-write_ndjson]": "Need to add include_file_path to IR",
129
117
  "tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR",
130
118
  "tests/unit/io/test_write.py::test_write_async[<lambda>-write_csv]": "Need to add include_file_path to IR",
131
119
  "tests/unit/io/test_write.py::test_write_async[read_parquet-<lambda>]": "Need to add include_file_path to IR",
@@ -189,17 +177,16 @@ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
189
177
  "tests/unit/sql/test_cast.py::test_cast_errors[values5-values::int4-conversion from `str` to `i32` failed]": "Cast raises, but error user receives is wrong",
190
178
  "tests/unit/sql/test_miscellaneous.py::test_read_csv": "Incorrect handling of missing_is_null in read_csv",
191
179
  "tests/unit/sql/test_wildcard_opts.py::test_select_wildcard_errors": "Raises correctly but with different exception",
192
- "tests/unit/streaming/test_streaming_io.py::test_parquet_eq_statistics": "Debug output on stderr doesn't match",
193
180
  "tests/unit/test_cse.py::test_cse_predicate_self_join": "Debug output on stderr doesn't match",
194
181
  "tests/unit/test_empty.py::test_empty_9137": "Mismatching dtypes, needs cudf#15852",
195
182
  "tests/unit/test_errors.py::test_error_on_empty_group_by": "Incorrect exception raised",
196
183
  # Maybe flaky, order-dependent?
197
- "tests/unit/test_projections.py::test_schema_full_outer_join_projection_pd_13287": "Order-specific result check, query is correct but in different order",
198
184
  "tests/unit/test_queries.py::test_group_by_agg_equals_zero_3535": "libcudf sums all nulls to null, not zero",
199
185
  }
200
186
 
201
187
 
202
188
  TESTS_TO_SKIP: Mapping[str, str] = {
189
+ "tests/unit/operations/test_profile.py::test_profile_with_cse": "Shape assertion won't match",
203
190
  # On Ubuntu 20.04, the tzdata package contains a bunch of symlinks
204
191
  # for obsolete timezone names. However, the chrono_tz package that
205
192
  # polars uses doesn't read /usr/share/zoneinfo, instead packaging
@@ -209,15 +196,18 @@ TESTS_TO_SKIP: Mapping[str, str] = {
209
196
  # polars that the requested timezone is unknown.
210
197
  # Since this is random, just skip it, rather than xfailing.
211
198
  "tests/unit/lazyframe/test_serde.py::test_lf_serde_roundtrip_binary": "chrono_tz doesn't have all tzdata symlink names",
199
+ # Tests performance difference of CPU engine
200
+ "tests/unit/operations/test_join.py::test_join_where_eager_perf_21145": "Tests performance bug in CPU engine",
212
201
  # The test may segfault with the legacy streaming engine. We should
213
202
  # remove this skip when all polars tests use the new streaming engine.
214
203
  "tests/unit/streaming/test_streaming_group_by.py::test_streaming_group_by_literal[1]": "May segfault w/the legacy streaming engine",
215
204
  # Fails in CI, but passes locally
216
205
  "tests/unit/streaming/test_streaming.py::test_streaming_streamable_functions": "RuntimeError: polars_python::sql::PySQLContext is unsendable, but is being dropped on another thread",
217
- # TODO: Remove once when we support polars 1.23
218
- "tests/unit/io/database/test_read.py::test_read_database[uri: connectorx]": "ValueError: arrow2",
219
- "tests/unit/io/database/test_read.py::test_read_database_cx_credentials[fakedb://123:456@account/database/schema?warehouse=warehouse&role=role]": "ValueError: arrow2",
220
- "tests/unit/io/database/test_read.py::test_read_database_cx_credentials[fakedb://my#%us3r:p433w0rd@not_a_real_host:9999/database]": "ValueError: arrow2",
206
+ # Remove when polars supports Pydantic V3
207
+ "tests/unit/constructors/test_constructors.py::test_init_structured_objects": "pydantic deprecation warning",
208
+ "tests/unit/constructors/test_constructors.py::test_init_pydantic_2x": "pydantic deprecation warning",
209
+ "tests/unit/constructors/test_constructors.py::test_init_structured_objects_nested[_TestFooPD-_TestBarPD-_TestBazPD]": "pydantic deprecation warning",
210
+ "tests/unit/series/test_series.py::test_init_structured_objects": "pydantic deprecation warning",
221
211
  }
222
212
 
223
213
 
@@ -229,18 +219,17 @@ def pytest_collection_modifyitems(
229
219
  # Don't xfail tests if running without fallback
230
220
  return
231
221
  for item in items:
232
- if item.nodeid in TESTS_TO_SKIP:
233
- item.add_marker(pytest.mark.skip(reason=TESTS_TO_SKIP[item.nodeid]))
234
- elif item.nodeid in EXPECTED_FAILURES:
235
- if isinstance(EXPECTED_FAILURES[item.nodeid], tuple):
222
+ if (reason := TESTS_TO_SKIP.get(item.nodeid, None)) is not None:
223
+ item.add_marker(pytest.mark.skip(reason=reason))
224
+ elif (entry := EXPECTED_FAILURES.get(item.nodeid, None)) is not None:
225
+ if isinstance(entry, tuple):
236
226
  # the second entry in the tuple is the condition to xfail on
227
+ reason, condition = entry
237
228
  item.add_marker(
238
229
  pytest.mark.xfail(
239
- condition=EXPECTED_FAILURES[item.nodeid][1],
240
- reason=EXPECTED_FAILURES[item.nodeid][0],
230
+ condition=condition,
231
+ reason=reason,
241
232
  ),
242
233
  )
243
234
  else:
244
- item.add_marker(
245
- pytest.mark.xfail(reason=EXPECTED_FAILURES[item.nodeid])
246
- )
235
+ item.add_marker(pytest.mark.xfail(reason=entry))
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  """Typing utilities for cudf_polars."""
@@ -6,7 +6,7 @@
6
6
  from __future__ import annotations
7
7
 
8
8
  from collections.abc import Hashable, Mapping
9
- from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, Union
9
+ from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar, TypedDict, Union
10
10
 
11
11
  from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir
12
12
 
@@ -68,6 +68,8 @@ PolarsExpr: TypeAlias = Union[
68
68
 
69
69
  Schema: TypeAlias = Mapping[str, plc.DataType]
70
70
 
71
+ Slice: TypeAlias = tuple[int, int | None]
72
+
71
73
 
72
74
  class NodeTraverser(Protocol):
73
75
  """Abstract protocol for polars NodeTraverser."""
@@ -145,3 +147,32 @@ ExprTransformer: TypeAlias = GenericTransformer["expr.Expr", "expr.Expr"]
145
147
 
146
148
  IRTransformer: TypeAlias = GenericTransformer["ir.IR", "ir.IR"]
147
149
  """Protocol for transformation of IR nodes."""
150
+
151
+
152
+ class ColumnOptions(TypedDict):
153
+ """
154
+ Column constructor options.
155
+
156
+ Notes
157
+ -----
158
+ Used to serialize Column and DataFrame containers.
159
+ """
160
+
161
+ is_sorted: plc.types.Sorted
162
+ order: plc.types.Order
163
+ null_order: plc.types.NullOrder
164
+ name: str | None
165
+
166
+
167
+ class ColumnHeader(TypedDict):
168
+ """Column serialization header."""
169
+
170
+ column_kwargs: ColumnOptions
171
+ frame_count: int
172
+
173
+
174
+ class DataFrameHeader(TypedDict):
175
+ """DataFrame serialization header."""
176
+
177
+ columns_kwargs: list[ColumnOptions]
178
+ frame_count: int
@@ -0,0 +1,138 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Config utilities."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import copy
9
+ import json
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ if TYPE_CHECKING:
13
+ from typing_extensions import Self
14
+
15
+ __all__ = ["ConfigOptions"]
16
+
17
+
18
+ class ConfigOptions:
19
+ """
20
+ GPUEngine configuration-option manager.
21
+
22
+ This is a convenience class to help manage the nested
23
+ dictionary of user-accessible `GPUEngine` options.
24
+ """
25
+
26
+ __slots__ = ("_hash_value", "config_options")
27
+ _hash_value: int
28
+ config_options: dict[str, Any]
29
+ """The underlying (nested) config-option dictionary."""
30
+
31
+ def __init__(self, options: dict[str, Any]):
32
+ self.validate(options)
33
+ self.config_options = options
34
+
35
+ def set(self, name: str, value: Any) -> Self:
36
+ """
37
+ Set a user config option.
38
+
39
+ Nested dictionary keys should be separated by periods.
40
+ For example::
41
+
42
+ >>> options = options.set("parquet_options.chunked", False)
43
+
44
+ Parameters
45
+ ----------
46
+ name
47
+ Period-separated config name.
48
+ value
49
+ New config value.
50
+ """
51
+ options = config_options = copy.deepcopy(self.config_options)
52
+ keys = name.split(".")
53
+ for k in keys[:-1]:
54
+ assert isinstance(options, dict)
55
+ if k not in options:
56
+ options[k] = {}
57
+ options = options[k]
58
+ options[keys[-1]] = value
59
+ return type(self)(config_options)
60
+
61
+ def get(self, name: str, *, default: Any = None) -> Any:
62
+ """
63
+ Get a user config option.
64
+
65
+ Nested dictionary keys should be separated by periods.
66
+ For example::
67
+
68
+ >>> chunked = config_options.get("parquet_options.chunked")
69
+
70
+ Parameters
71
+ ----------
72
+ name
73
+ Period-separated config name.
74
+ default
75
+ Default return value.
76
+
77
+ Returns
78
+ -------
79
+ The user-specified config value, or `default`
80
+ if the config is not found.
81
+ """
82
+ options = self.config_options
83
+ keys = name.split(".")
84
+ for k in keys[:-1]:
85
+ assert isinstance(options, dict)
86
+ options = options.get(k, {})
87
+ return options.get(keys[-1], default)
88
+
89
+ def __hash__(self) -> int:
90
+ """Hash a ConfigOptions object."""
91
+ try:
92
+ return self._hash_value
93
+ except AttributeError:
94
+ self._hash_value = hash(json.dumps(self.config_options))
95
+ return self._hash_value
96
+
97
+ @staticmethod
98
+ def validate(config: dict) -> None:
99
+ """
100
+ Validate a configuration-option dictionary.
101
+
102
+ Parameters
103
+ ----------
104
+ config
105
+ GPUEngine configuration options to validate.
106
+
107
+ Raises
108
+ ------
109
+ ValueError
110
+ If the configuration contains unsupported options.
111
+ """
112
+ if unsupported := (
113
+ config.keys()
114
+ - {"raise_on_fail", "parquet_options", "executor", "executor_options"}
115
+ ):
116
+ raise ValueError(
117
+ f"Engine configuration contains unsupported settings: {unsupported}"
118
+ )
119
+ assert {"chunked", "chunk_read_limit", "pass_read_limit"}.issuperset(
120
+ config.get("parquet_options", {})
121
+ )
122
+
123
+ # Validate executor_options
124
+ executor = config.get("executor", "pylibcudf")
125
+ if executor == "dask-experimental":
126
+ unsupported = config.get("executor_options", {}).keys() - {
127
+ "max_rows_per_partition",
128
+ "parquet_blocksize",
129
+ "cardinality_factor",
130
+ "groupby_n_ary",
131
+ "broadcast_join_limit",
132
+ }
133
+ else:
134
+ unsupported = config.get("executor_options", {}).keys()
135
+ if unsupported:
136
+ raise ValueError(
137
+ f"Unsupported executor_options for {executor}: {unsupported}"
138
+ )
@@ -0,0 +1,40 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Miscellaneous conversion functions."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from cudf_polars.typing import Slice
12
+
13
+
14
+ def from_polars_slice(zlice: Slice, *, num_rows: int) -> list[int]:
15
+ """
16
+ Convert a Polar slice into something pylibcudf handles.
17
+
18
+ Parameters
19
+ ----------
20
+ zlice
21
+ The slice to convert
22
+ num_rows
23
+ The number of rows in the object being sliced.
24
+
25
+ Returns
26
+ -------
27
+ List of start and end slice bounds.
28
+ """
29
+ start, length = zlice
30
+ if length is None:
31
+ length = num_rows
32
+ if start < 0:
33
+ start += num_rows
34
+ # Polars implementation wraps negative start by num_rows, then
35
+ # adds length to start to get the end, then clamps both to
36
+ # [0, num_rows)
37
+ end = start + length
38
+ start = max(min(start, num_rows), 0)
39
+ end = max(min(end, num_rows), 0)
40
+ return [start, end]
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  """Datatype utilities."""
@@ -71,7 +71,9 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
71
71
  -------
72
72
  True if casting is supported, False otherwise
73
73
  """
74
- has_empty = from_.id() == plc.TypeId.EMPTY or to.id() == plc.TypeId.EMPTY
74
+ to_is_empty = to.id() == plc.TypeId.EMPTY
75
+ from_is_empty = from_.id() == plc.TypeId.EMPTY
76
+ has_empty = to_is_empty or from_is_empty
75
77
  return (
76
78
  (
77
79
  from_ == to
@@ -84,8 +86,16 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
84
86
  )
85
87
  )
86
88
  )
87
- or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to))
88
- or (to.id() == plc.TypeId.STRING and is_numeric_not_bool(from_))
89
+ or (
90
+ from_.id() == plc.TypeId.STRING
91
+ and not to_is_empty
92
+ and is_numeric_not_bool(to)
93
+ )
94
+ or (
95
+ to.id() == plc.TypeId.STRING
96
+ and not from_is_empty
97
+ and is_numeric_not_bool(from_)
98
+ )
89
99
  )
90
100
 
91
101
 
@@ -0,0 +1,39 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Timing utilities."""
5
+
6
+ from __future__ import annotations
7
+
8
+ __all__: list[str] = ["Timer"]
9
+
10
+
11
+ class Timer:
12
+ """
13
+ A timer for recording execution times of nodes.
14
+
15
+ Parameters
16
+ ----------
17
+ query_start
18
+ Duration in nanoseconds since the query was started on the
19
+ Polars side
20
+ """
21
+
22
+ def __init__(self, query_start: int):
23
+ self.query_start = query_start
24
+ self.timings: list[tuple[int, int, str]] = []
25
+
26
+ def store(self, start: int, end: int, name: str):
27
+ """
28
+ Store timing for a node.
29
+
30
+ Parameters
31
+ ----------
32
+ start
33
+ Start of the execution for this node (use time.monotonic_ns).
34
+ end
35
+ End of the execution for this node.
36
+ name
37
+ The name for this node.
38
+ """
39
+ self.timings.append((start - self.query_start, end - self.query_start, name))
@@ -12,11 +12,12 @@ from polars import __version__
12
12
 
13
13
  POLARS_VERSION = parse(__version__)
14
14
 
15
- POLARS_VERSION_LT_120 = POLARS_VERSION < parse("1.20")
15
+ POLARS_VERSION_LT_125 = POLARS_VERSION < parse("1.25")
16
+ POLARS_VERSION_LT_124 = POLARS_VERSION < parse("1.24")
16
17
 
17
18
 
18
19
  def _ensure_polars_version():
19
- if POLARS_VERSION_LT_120:
20
+ if POLARS_VERSION_LT_124:
20
21
  raise ImportError(
21
- "cudf_polars requires py-polars v1.20 or greater."
22
+ "cudf_polars requires py-polars v1.24 or greater."
22
23
  ) # pragma: no cover
@@ -1,9 +1,9 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: cudf-polars-cu12
3
- Version: 25.2.2
3
+ Version: 25.4.0
4
4
  Summary: Executor for polars using cudf
5
5
  Author: NVIDIA Corporation
6
- License: Apache 2.0
6
+ License: Apache-2.0
7
7
  Project-URL: Homepage, https://github.com/rapidsai/cudf
8
8
  Classifier: Intended Audience :: Developers
9
9
  Classifier: Topic :: Database
@@ -16,15 +16,16 @@ Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: polars<1.22,>=1.20
20
- Requires-Dist: pylibcudf-cu12==25.2.*
19
+ Requires-Dist: polars<1.26,>=1.24
20
+ Requires-Dist: pylibcudf-cu12==25.4.*
21
21
  Provides-Extra: test
22
22
  Requires-Dist: numpy<3.0a0,>=1.23; extra == "test"
23
23
  Requires-Dist: pytest-cov; extra == "test"
24
24
  Requires-Dist: pytest-xdist; extra == "test"
25
25
  Requires-Dist: pytest<8; extra == "test"
26
26
  Provides-Extra: experimental
27
- Requires-Dist: rapids-dask-dependency==25.2.*; extra == "experimental"
27
+ Requires-Dist: rapids-dask-dependency==25.4.*; extra == "experimental"
28
+ Dynamic: license-file
28
29
 
29
30
  # <div align="left"><img src="img/rapids_logo.png" width="90px"/>&nbsp;cuDF - GPU DataFrames</div>
30
31
 
@@ -111,7 +112,7 @@ cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge
111
112
 
112
113
  ```bash
113
114
  conda install -c rapidsai -c conda-forge -c nvidia \
114
- cudf=25.02 python=3.12 cuda-version=12.8
115
+ cudf=25.04 python=3.12 cuda-version=12.8
115
116
  ```
116
117
 
117
118
  We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
@@ -0,0 +1,55 @@
1
+ cudf_polars/VERSION,sha256=EM36MPurzJgotElKb8R7ZaIOF2woBA69gsVnmiyf-LY,8
2
+ cudf_polars/__init__.py,sha256=fSTx5nmqajdwp7qvP4PnYL6wZN9-k1fKB43NkcZlHwk,740
3
+ cudf_polars/_version.py,sha256=kj5Ir4dxZRR-k2k8mWUDJHiGpE8_ZcTNzt_kMZxcFRA,528
4
+ cudf_polars/callback.py,sha256=I95-f82LKuAPN3eW_0H5QjCC6Vw9-G6omVVdFLovj2c,10004
5
+ cudf_polars/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ cudf_polars/containers/__init__.py,sha256=He8qCk9m37k7KOWC6s4QuL8LYF11KhP2vluH9s8EeYI,339
7
+ cudf_polars/containers/column.py,sha256=6u11AmXbO-BQGD0tLqG8WcmQVwm0dwE-PTjo0Fmubfc,11379
8
+ cudf_polars/containers/dataframe.py,sha256=LbVr-1a3sbisanX3CP9wLeNG2jIAJCFl_GVb8sbcTa4,10319
9
+ cudf_polars/dsl/__init__.py,sha256=bYwYnqmqINMgwnkJ22EnXMlHviLolPaMgQ8QqoZL3YE,244
10
+ cudf_polars/dsl/expr.py,sha256=PgzO75CP-n5ZggpT-XqpUdjAwEpGP04G5N_aiX3VEqU,1868
11
+ cudf_polars/dsl/ir.py,sha256=9yqvcKZvAWaBOyrlSU1KGOmnlPrSYPv_ikXSKLsbeWc,69030
12
+ cudf_polars/dsl/nodebase.py,sha256=s9a4uU4c-Zn-DSPbi0x6zPElyjZgigmktaSqF1OedLw,4521
13
+ cudf_polars/dsl/to_ast.py,sha256=q_lLko1AI9fYStzqEzASDqlRZrjBnoMfNu6MHAaI3d0,12309
14
+ cudf_polars/dsl/translate.py,sha256=hfD-iN33DmwmDbMKPpqEEmgcZWNdUDWty9uzG_SwE3M,25258
15
+ cudf_polars/dsl/traversal.py,sha256=dVH37YoLx-wo61lUofaOM77eji99yIaC62PSb3QYKHM,4610
16
+ cudf_polars/dsl/expressions/__init__.py,sha256=uj1a4BzrDVAjOgP6FKKtnvR5elF4ksUj1RhkLTZoS1k,224
17
+ cudf_polars/dsl/expressions/aggregation.py,sha256=O2idMrs5QFCBfwfegBCYB09RHWqLXQ3UjeZMyzfI1Eg,8734
18
+ cudf_polars/dsl/expressions/base.py,sha256=a7GRXNRUeheR4haHLGtgC8j796D9p8yolF5tKKxT5do,9038
19
+ cudf_polars/dsl/expressions/binaryop.py,sha256=AcnIYEcQycfRtT1sbauftFRch_lzR496oXBB5NmxYu4,5634
20
+ cudf_polars/dsl/expressions/boolean.py,sha256=ygG9LpL-cA7CUGGivO2q18JxcIFEUV4wnxW1CnsR5X8,11744
21
+ cudf_polars/dsl/expressions/datetime.py,sha256=wQoXgwrYF0yvzUcSjSQm0NQznlhA9xX1V7i13f9QotA,8330
22
+ cudf_polars/dsl/expressions/literal.py,sha256=MEW1VYrKfBXDkn8a65TTAWbxZ4dyzxAv2_S8NdBkgJ4,2709
23
+ cudf_polars/dsl/expressions/rolling.py,sha256=zeGjO-FqnQF7buxRrWdtuJe9x_CpCPjUhL6nJtk8jgU,1177
24
+ cudf_polars/dsl/expressions/selection.py,sha256=Hz8loXlpksXurT818kBKhAr02r0tYeDnyux6k1AuB1o,2900
25
+ cudf_polars/dsl/expressions/slicing.py,sha256=8nfPhL_SgneQIv5M5JdztUce_aJHIjOlzYT8zJZNYHw,1304
26
+ cudf_polars/dsl/expressions/sorting.py,sha256=W7CBwvfEp9T1BqmuoWEVJfsbuHr8OXfoyasnsgdWxpM,3006
27
+ cudf_polars/dsl/expressions/string.py,sha256=PPb-PCpkPB9xQlW3l1xmGeDLfuBLHt-aKNAagbv0Wbo,14178
28
+ cudf_polars/dsl/expressions/ternary.py,sha256=iF7g9XvZNXwNqFDjgKMGR-uWat6G3vJEZudesnQ4KUs,1499
29
+ cudf_polars/dsl/expressions/unary.py,sha256=tc9EXYR4nvg2GGNHnEOjmye8L5hzZSc1E-bpFDIYad0,12832
30
+ cudf_polars/experimental/__init__.py,sha256=S2oI2K__woyPQAAlFMOo6RTMtdfIZxmzzAO92VtJgP4,256
31
+ cudf_polars/experimental/base.py,sha256=Pl_6-LFWTSm9X7AjVF6FHUIivtCrveOCK8iYESUlUUs,1385
32
+ cudf_polars/experimental/dask_serialize.py,sha256=Jb0CWanuVhW90A8omzNnMfo-YHg4IGohZeIuUrUnO30,2692
33
+ cudf_polars/experimental/dispatch.py,sha256=pw6KyC_irRcQgKb8S0o1KKvRHQPFOikyv8kfMQtgxjc,2094
34
+ cudf_polars/experimental/groupby.py,sha256=LZUj2kEtVVUFqLeAgZ2MD4ORXEeAuXqA7sK_t_2p41Q,11487
35
+ cudf_polars/experimental/io.py,sha256=4XfTpbQTwiGwfFGAWwzgqD4xwbZC7sKThytiYAP0b4s,11387
36
+ cudf_polars/experimental/join.py,sha256=tHtmva6v-FVec0G4j8nST1Qog0OUuaBsrv2OSCbF4tQ,10075
37
+ cudf_polars/experimental/parallel.py,sha256=UA-ovotBPQqnmr5wH97jAse-2fhMlKn3F4068ejZMbA,9700
38
+ cudf_polars/experimental/select.py,sha256=TMU-7DKv8e-F0Crqn811HDiBAHzyZxE68kTnYmuZYjk,1196
39
+ cudf_polars/experimental/shuffle.py,sha256=q0pfRzOhqc_qHv7kwzYNCUA3TrnpUvkWXL26H0bh550,5358
40
+ cudf_polars/testing/__init__.py,sha256=0MnlTjkTEqkSpL5GdMhQf4uXOaQwNrzgEJCZKa5FnL4,219
41
+ cudf_polars/testing/asserts.py,sha256=-D9ZNojBZk75Dz8c8IZUMF_sPOJc9bIsX2cybqX2zK8,7883
42
+ cudf_polars/testing/plugin.py,sha256=UuTyZCUDf-BqOGvpaLwwR_T_k-ggjtBdcFbBv-KN9Hc,23105
43
+ cudf_polars/typing/__init__.py,sha256=ZNdcZoHC-_RuNdZGHk71qqHv7Rh20jLWwZHGPI2pF80,4157
44
+ cudf_polars/utils/__init__.py,sha256=urdV5MUIneU8Dn6pt1db5GkDG0oY4NsFD0Uhl3j98l8,195
45
+ cudf_polars/utils/config.py,sha256=WFc14fSdDXEJi_nebb8iNJrksXcHn3PvaFJJBEYJnrY,4015
46
+ cudf_polars/utils/conversion.py,sha256=k_apLbSR-MiYYlQBGrzYOInuvcbfSi-il-o9nkovdXQ,1042
47
+ cudf_polars/utils/dtypes.py,sha256=KKxCIId3w-xDA1iaY-QOcqrr3sHO3vzJmrcocXwMkPk,6774
48
+ cudf_polars/utils/sorting.py,sha256=Mqb_KLsYnKU8p1dDan2mtlIQl65RqwM78OlUi-_Jj0k,1725
49
+ cudf_polars/utils/timer.py,sha256=NWTOFJn6ULrEQu7J0NRj2S5g5VUNAHIautJxS-wVOBk,997
50
+ cudf_polars/utils/versions.py,sha256=XFwPc4NorW0FQLK7tkODeoNyZa04xp4bgmgtavRBo4E,660
51
+ cudf_polars_cu12-25.4.0.dist-info/licenses/LICENSE,sha256=4YCpjWCbYMkMQFW47JXsorZLOaP957HwmP6oHW2_ngM,11348
52
+ cudf_polars_cu12-25.4.0.dist-info/METADATA,sha256=gr3hPO-k4wMnnkCIqe9rHH6SiGbZCENq_YYzGg9sYMM,4567
53
+ cudf_polars_cu12-25.4.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
54
+ cudf_polars_cu12-25.4.0.dist-info/top_level.txt,sha256=w2bOa7MpuyapYgZh480Znh4UzX7rSWlFcYR1Yo6QIPs,12
55
+ cudf_polars_cu12-25.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,48 +0,0 @@
1
- cudf_polars/VERSION,sha256=plYJffSq5Sjl-h4X3Vi1vv8w_9VtpoafQ7qHYheQUKQ,8
2
- cudf_polars/__init__.py,sha256=fSTx5nmqajdwp7qvP4PnYL6wZN9-k1fKB43NkcZlHwk,740
3
- cudf_polars/_version.py,sha256=kj5Ir4dxZRR-k2k8mWUDJHiGpE8_ZcTNzt_kMZxcFRA,528
4
- cudf_polars/callback.py,sha256=u8CyAFgb9f8fL4eOBqGRQWb0BuoX6tIDaig4gJeAlLw,9023
5
- cudf_polars/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- cudf_polars/containers/__init__.py,sha256=He8qCk9m37k7KOWC6s4QuL8LYF11KhP2vluH9s8EeYI,339
7
- cudf_polars/containers/column.py,sha256=O4Sjnjs2gUoo50oaL2OZ45BFePjG1oo3yO163KIKY0E,8371
8
- cudf_polars/containers/dataframe.py,sha256=Szzv4jc8cxux5MVII9mmEFwZdYn6jCNcVJ9EKSNLpb4,10637
9
- cudf_polars/dsl/__init__.py,sha256=bYwYnqmqINMgwnkJ22EnXMlHviLolPaMgQ8QqoZL3YE,244
10
- cudf_polars/dsl/expr.py,sha256=L6nmP4zsBITrzvTfR_QvO1NLnbTMlB-KUeFvXbyG26A,1795
11
- cudf_polars/dsl/ir.py,sha256=6o7VLhfnNeP50YQzL-Jf9qxa55isjb6zmY2Fyntd2ik,64874
12
- cudf_polars/dsl/nodebase.py,sha256=_ffrQJcsE0_Nwa6K8LG0N5DmgHHEFBnTE1oz8Ugx4N4,4352
13
- cudf_polars/dsl/to_ast.py,sha256=q_lLko1AI9fYStzqEzASDqlRZrjBnoMfNu6MHAaI3d0,12309
14
- cudf_polars/dsl/translate.py,sha256=Ex5uVL1sVpDAZrqfrC_xUVnR5SQ51n5v9-KnL5Q7pdU,24915
15
- cudf_polars/dsl/traversal.py,sha256=dVH37YoLx-wo61lUofaOM77eji99yIaC62PSb3QYKHM,4610
16
- cudf_polars/dsl/expressions/__init__.py,sha256=uj1a4BzrDVAjOgP6FKKtnvR5elF4ksUj1RhkLTZoS1k,224
17
- cudf_polars/dsl/expressions/aggregation.py,sha256=BdKCdKj4DNxkYjmRNLER-7zYCo-PfXVY3ZFqPPoPeNM,8722
18
- cudf_polars/dsl/expressions/base.py,sha256=a7GRXNRUeheR4haHLGtgC8j796D9p8yolF5tKKxT5do,9038
19
- cudf_polars/dsl/expressions/binaryop.py,sha256=EU3YF0d5OQMMwIuZVsQgtHaWyJ1Qc09HBLuSpMma_xg,5640
20
- cudf_polars/dsl/expressions/boolean.py,sha256=GCJhinhgGpKKO1RsBAWEwFdyQiFIvDDrWsnSMSoAEWU,11756
21
- cudf_polars/dsl/expressions/datetime.py,sha256=H6Nmz_cHFthzlb8bpD5gDJSMv0BWcdTvPm-OTcXTPoQ,6861
22
- cudf_polars/dsl/expressions/literal.py,sha256=BTNamYPm6fHxuMlH7Knr7pCWbqYZURf6DmDRZHPeklc,2864
23
- cudf_polars/dsl/expressions/rolling.py,sha256=zeGjO-FqnQF7buxRrWdtuJe9x_CpCPjUhL6nJtk8jgU,1177
24
- cudf_polars/dsl/expressions/selection.py,sha256=OK3dT9aP_-hpHzKhjVbPQ-uUe7YCcXelpMUZP-vxNOk,2900
25
- cudf_polars/dsl/expressions/sorting.py,sha256=djIyJ_FXeJOSKGRIF7uKwa9uefgCVwLMaf4pQNMraUU,3000
26
- cudf_polars/dsl/expressions/string.py,sha256=px-6_J46XXuS3JcM3zvV3O9QQCGhDy5t8ntkQl4TJeg,14220
27
- cudf_polars/dsl/expressions/ternary.py,sha256=iF7g9XvZNXwNqFDjgKMGR-uWat6G3vJEZudesnQ4KUs,1499
28
- cudf_polars/dsl/expressions/unary.py,sha256=H24itZGMoDv63y57pBSsnCPOJ5IrEuZxM5gNNQtfwh4,12788
29
- cudf_polars/experimental/__init__.py,sha256=S2oI2K__woyPQAAlFMOo6RTMtdfIZxmzzAO92VtJgP4,256
30
- cudf_polars/experimental/base.py,sha256=mW8A3DBK4S7VpD7k4UjGynWp6kDmV8uWQvj0EsWYQhg,1149
31
- cudf_polars/experimental/dask_serialize.py,sha256=9-qTd04I9nRA-ijyv8daGLjXo5W6YbVnY8Z1ugyinCY,2029
32
- cudf_polars/experimental/dispatch.py,sha256=pw6KyC_irRcQgKb8S0o1KKvRHQPFOikyv8kfMQtgxjc,2094
33
- cudf_polars/experimental/io.py,sha256=ejOPjn5WEK7KiPoo4N_KRMTT82qROhFi12wRpKt9Bw8,11431
34
- cudf_polars/experimental/parallel.py,sha256=M6vECqlpaOVpbm_MSw84EUxC7TgwO8YHrfEwzZgF-Bw,7981
35
- cudf_polars/experimental/select.py,sha256=TMU-7DKv8e-F0Crqn811HDiBAHzyZxE68kTnYmuZYjk,1196
36
- cudf_polars/testing/__init__.py,sha256=0MnlTjkTEqkSpL5GdMhQf4uXOaQwNrzgEJCZKa5FnL4,219
37
- cudf_polars/testing/asserts.py,sha256=-D9ZNojBZk75Dz8c8IZUMF_sPOJc9bIsX2cybqX2zK8,7883
38
- cudf_polars/testing/plugin.py,sha256=TiBV5WTTUfGk9fHikWLr6qfDZCqiJ8jRULF0QccOLQ4,25041
39
- cudf_polars/typing/__init__.py,sha256=7A2xD8w62xG-jrx9ujxWkQnEGTt8WN-s_S_VT8pNP7M,3555
40
- cudf_polars/utils/__init__.py,sha256=urdV5MUIneU8Dn6pt1db5GkDG0oY4NsFD0Uhl3j98l8,195
41
- cudf_polars/utils/dtypes.py,sha256=6ygA_y-wTdv0WelNnWGZDnzgA2GKHfkhQ_hfP-jr8mk,6570
42
- cudf_polars/utils/sorting.py,sha256=Mqb_KLsYnKU8p1dDan2mtlIQl65RqwM78OlUi-_Jj0k,1725
43
- cudf_polars/utils/versions.py,sha256=4txZKAD5Ql-fEeW9HAomHgePOnNN1pj1e-XXPE0HSAQ,605
44
- cudf_polars_cu12-25.2.2.dist-info/LICENSE,sha256=4YCpjWCbYMkMQFW47JXsorZLOaP957HwmP6oHW2_ngM,11348
45
- cudf_polars_cu12-25.2.2.dist-info/METADATA,sha256=Z2C4e5YANZEpMDsdM81ToSKvsNYS_0JLue7MjBMRI7Q,4545
46
- cudf_polars_cu12-25.2.2.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
47
- cudf_polars_cu12-25.2.2.dist-info/top_level.txt,sha256=w2bOa7MpuyapYgZh480Znh4UzX7rSWlFcYR1Yo6QIPs,12
48
- cudf_polars_cu12-25.2.2.dist-info/RECORD,,