cudf-polars-cu12 25.2.2__py3-none-any.whl → 25.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. cudf_polars/VERSION +1 -1
  2. cudf_polars/callback.py +82 -65
  3. cudf_polars/containers/column.py +138 -7
  4. cudf_polars/containers/dataframe.py +26 -39
  5. cudf_polars/dsl/expr.py +3 -1
  6. cudf_polars/dsl/expressions/aggregation.py +27 -63
  7. cudf_polars/dsl/expressions/base.py +40 -72
  8. cudf_polars/dsl/expressions/binaryop.py +5 -41
  9. cudf_polars/dsl/expressions/boolean.py +25 -53
  10. cudf_polars/dsl/expressions/datetime.py +97 -17
  11. cudf_polars/dsl/expressions/literal.py +27 -33
  12. cudf_polars/dsl/expressions/rolling.py +110 -9
  13. cudf_polars/dsl/expressions/selection.py +8 -26
  14. cudf_polars/dsl/expressions/slicing.py +47 -0
  15. cudf_polars/dsl/expressions/sorting.py +5 -18
  16. cudf_polars/dsl/expressions/string.py +33 -36
  17. cudf_polars/dsl/expressions/ternary.py +3 -10
  18. cudf_polars/dsl/expressions/unary.py +35 -75
  19. cudf_polars/dsl/ir.py +749 -212
  20. cudf_polars/dsl/nodebase.py +8 -1
  21. cudf_polars/dsl/to_ast.py +5 -3
  22. cudf_polars/dsl/translate.py +319 -171
  23. cudf_polars/dsl/utils/__init__.py +8 -0
  24. cudf_polars/dsl/utils/aggregations.py +292 -0
  25. cudf_polars/dsl/utils/groupby.py +97 -0
  26. cudf_polars/dsl/utils/naming.py +34 -0
  27. cudf_polars/dsl/utils/replace.py +46 -0
  28. cudf_polars/dsl/utils/rolling.py +113 -0
  29. cudf_polars/dsl/utils/windows.py +186 -0
  30. cudf_polars/experimental/base.py +17 -19
  31. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  32. cudf_polars/experimental/benchmarks/pdsh.py +1279 -0
  33. cudf_polars/experimental/dask_registers.py +196 -0
  34. cudf_polars/experimental/distinct.py +174 -0
  35. cudf_polars/experimental/explain.py +127 -0
  36. cudf_polars/experimental/expressions.py +521 -0
  37. cudf_polars/experimental/groupby.py +288 -0
  38. cudf_polars/experimental/io.py +58 -29
  39. cudf_polars/experimental/join.py +353 -0
  40. cudf_polars/experimental/parallel.py +166 -93
  41. cudf_polars/experimental/repartition.py +69 -0
  42. cudf_polars/experimental/scheduler.py +155 -0
  43. cudf_polars/experimental/select.py +92 -7
  44. cudf_polars/experimental/shuffle.py +294 -0
  45. cudf_polars/experimental/sort.py +45 -0
  46. cudf_polars/experimental/spilling.py +151 -0
  47. cudf_polars/experimental/utils.py +100 -0
  48. cudf_polars/testing/asserts.py +146 -6
  49. cudf_polars/testing/io.py +72 -0
  50. cudf_polars/testing/plugin.py +78 -76
  51. cudf_polars/typing/__init__.py +59 -6
  52. cudf_polars/utils/config.py +353 -0
  53. cudf_polars/utils/conversion.py +40 -0
  54. cudf_polars/utils/dtypes.py +22 -5
  55. cudf_polars/utils/timer.py +39 -0
  56. cudf_polars/utils/versions.py +5 -4
  57. {cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/METADATA +10 -7
  58. cudf_polars_cu12-25.6.0.dist-info/RECORD +73 -0
  59. {cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/WHEEL +1 -1
  60. cudf_polars/experimental/dask_serialize.py +0 -59
  61. cudf_polars_cu12-25.2.2.dist-info/RECORD +0 -48
  62. {cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info/licenses}/LICENSE +0 -0
  63. {cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,353 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Config utilities."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import dataclasses
9
+ import enum
10
+ import json
11
+ import os
12
+ import warnings
13
+ from typing import TYPE_CHECKING, Literal
14
+
15
+ if TYPE_CHECKING:
16
+ from typing_extensions import Self
17
+
18
+ import polars as pl
19
+
20
+
21
+ __all__ = ["ConfigOptions"]
22
+
23
+
24
+ # TODO: Use enum.StrEnum when we drop Python 3.10
25
+
26
+
27
+ class StreamingFallbackMode(str, enum.Enum):
28
+ """
29
+ How the streaming executor handles operations that don't support multiple partitions.
30
+
31
+ Upon encountering an unsupported operation, the streaming executor will fall
32
+ back to using a single partition, which might use a large amount of memory.
33
+
34
+ * ``StreamingFallbackMode.WARN`` : Emit a warning and fall back to a single partition.
35
+ * ``StreamingFallbackMode.SILENT``: Silently fall back to a single partition.
36
+ * ``StreamingFallbackMode.RAISE`` : Raise an exception.
37
+ """
38
+
39
+ WARN = "warn"
40
+ RAISE = "raise"
41
+ SILENT = "silent"
42
+
43
+
44
+ class Scheduler(str, enum.Enum):
45
+ """
46
+ The scheduler to use for the streaming executor.
47
+
48
+ * ``Scheduler.SYNCHRONOUS`` : Use the synchronous scheduler.
49
+ * ``Scheduler.DISTRIBUTED`` : Use the distributed scheduler.
50
+ """
51
+
52
+ SYNCHRONOUS = "synchronous"
53
+ DISTRIBUTED = "distributed"
54
+
55
+
56
+ class ShuffleMethod(str, enum.Enum):
57
+ """
58
+ The method to use for shuffling data between workers with the streaming executor.
59
+
60
+ * ``ShuffleMethod.TASKS`` : Use the task-based shuffler.
61
+ * ``ShuffleMethod.RAPIDSMPF`` : Use the rapidsmpf scheduler.
62
+
63
+ In :class:`StreamingExecutor`, the default of ``None`` will attempt to use
64
+ ``ShuffleMethod.RAPIDSMPF``, but will fall back to ``ShuffleMethod.TASKS``
65
+ if rapidsmpf is not installed.
66
+ """
67
+
68
+ TASKS = "tasks"
69
+ RAPIDSMPF = "rapidsmpf"
70
+
71
+
72
+ @dataclasses.dataclass(frozen=True)
73
+ class ParquetOptions:
74
+ """
75
+ Configuration for the cudf-polars Parquet engine.
76
+
77
+ Parameters
78
+ ----------
79
+ chunked
80
+ Whether to use libcudf's ``ChunkedParquetReader`` to read the parquet
81
+ dataset in chunks. This is useful when reading very large parquet
82
+ files.
83
+ chunk_read_limit
84
+ Limit on total number of bytes to be returned per read, or 0 if
85
+ there is no limit.
86
+ pass_read_limit
87
+ Limit on the amount of memory used for reading and decompressing data
88
+ or 0 if there is no limit.
89
+ """
90
+
91
+ chunked: bool = True
92
+ chunk_read_limit: int = 0
93
+ pass_read_limit: int = 0
94
+
95
+ def __post_init__(self) -> None:
96
+ if not isinstance(self.chunked, bool):
97
+ raise TypeError("chunked must be a bool")
98
+ if not isinstance(self.chunk_read_limit, int):
99
+ raise TypeError("chunk_read_limit must be an int")
100
+ if not isinstance(self.pass_read_limit, int):
101
+ raise TypeError("pass_read_limit must be an int")
102
+
103
+
104
+ def default_blocksize(scheduler: str) -> int:
105
+ """Return the default blocksize."""
106
+ try:
107
+ # Use PyNVML to find the worker device size.
108
+ import pynvml
109
+
110
+ pynvml.nvmlInit()
111
+ index = os.environ.get("CUDA_VISIBLE_DEVICES", "0").split(",")[0]
112
+ if index and not index.isnumeric(): # pragma: no cover
113
+ # This means device_index is UUID.
114
+ # This works for both MIG and non-MIG device UUIDs.
115
+ handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(index))
116
+ if pynvml.nvmlDeviceIsMigDeviceHandle(handle):
117
+ # Additionally get parent device handle
118
+ # if the device itself is a MIG instance
119
+ handle = pynvml.nvmlDeviceGetDeviceHandleFromMigDeviceHandle(handle)
120
+ else:
121
+ handle = pynvml.nvmlDeviceGetHandleByIndex(int(index))
122
+
123
+ device_size = pynvml.nvmlDeviceGetMemoryInfo(handle).total
124
+
125
+ except (ImportError, ValueError, pynvml.NVMLError) as err: # pragma: no cover
126
+ # Fall back to a conservative 12GiB default
127
+ warnings.warn(
128
+ "Failed to query the device size with NVML. Please "
129
+ "set 'target_partition_size' to a literal byte size to "
130
+ f"silence this warning. Original error: {err}",
131
+ stacklevel=1,
132
+ )
133
+ device_size = 12 * 1024**3
134
+
135
+ if scheduler == "distributed":
136
+ # Distributed execution requires a conservative
137
+ # blocksize for now.
138
+ blocksize = int(device_size * 0.025)
139
+ else:
140
+ # Single-GPU execution can lean on UVM to
141
+ # support a much larger blocksize.
142
+ blocksize = int(device_size * 0.0625)
143
+
144
+ return max(blocksize, 256_000_000)
145
+
146
+
147
+ @dataclasses.dataclass(frozen=True, eq=True)
148
+ class StreamingExecutor:
149
+ """
150
+ Configuration for the cudf-polars streaming executor.
151
+
152
+ Parameters
153
+ ----------
154
+ scheduler
155
+ The scheduler to use for the streaming executor. ``Scheduler.SYNCHRONOUS``
156
+ by default.
157
+
158
+ Note ``scheduler="distributed"`` requires a Dask cluster to be running.
159
+ fallback_mode
160
+ How to handle errors when the GPU engine fails to execute a query.
161
+ ``StreamingFallbackMode.WARN`` by default.
162
+ max_rows_per_partition
163
+ The maximum number of rows to process per partition. 1_000_000 by default.
164
+ When the number of rows exceeds this value, the query will be split into
165
+ multiple partitions and executed in parallel.
166
+ cardinality_factor
167
+ A dictionary mapping column names to floats between 0 and 1 (inclusive
168
+ on the right).
169
+
170
+ Each factor estimates the fractional number of unique values in the
171
+ column. By default, ``1.0`` is used for any column not included in
172
+ ``cardinality_factor``.
173
+ target_partition_size
174
+ Target partition size for IO tasks. This configuration currently
175
+ controls how large parquet files are split into multiple partitions.
176
+ Files larger than ``target_partition_size`` bytes are split into multiple
177
+ partitions.
178
+ groupby_n_ary
179
+ The factor by which the number of partitions is decreased when performing
180
+ a groupby on a partitioned column. For example, if a column has 64 partitions,
181
+ it will first be reduced to ``ceil(64 / 32) = 2`` partitions.
182
+
183
+ This is useful when the absolute number of partitions is large.
184
+ broadcast_join_limit
185
+ The maximum number of partitions to allow for the smaller table in
186
+ a broadcast join.
187
+ shuffle_method
188
+ The method to use for shuffling data between workers. ``None``
189
+ by default, which will use 'rapidsmpf' if installed and fall back to
190
+ 'tasks' if not.
191
+ rapidsmpf_spill
192
+ Whether to wrap task arguments and output in objects that are
193
+ spillable by 'rapidsmpf'.
194
+ """
195
+
196
+ name: Literal["streaming"] = dataclasses.field(default="streaming", init=False)
197
+ scheduler: Scheduler = Scheduler.SYNCHRONOUS
198
+ fallback_mode: StreamingFallbackMode = StreamingFallbackMode.WARN
199
+ max_rows_per_partition: int = 1_000_000
200
+ cardinality_factor: dict[str, float] = dataclasses.field(default_factory=dict)
201
+ target_partition_size: int = 0
202
+ groupby_n_ary: int = 32
203
+ broadcast_join_limit: int = 0
204
+ shuffle_method: ShuffleMethod | None = None
205
+ rapidsmpf_spill: bool = False
206
+
207
+ def __post_init__(self) -> None:
208
+ if self.scheduler == "synchronous" and self.shuffle_method == "rapidsmpf":
209
+ raise ValueError(
210
+ "rapidsmpf shuffle method is not supported for synchronous scheduler"
211
+ )
212
+
213
+ # frozen dataclass, so use object.__setattr__
214
+ object.__setattr__(
215
+ self, "fallback_mode", StreamingFallbackMode(self.fallback_mode)
216
+ )
217
+ if self.target_partition_size == 0:
218
+ object.__setattr__(
219
+ self, "target_partition_size", default_blocksize(self.scheduler)
220
+ )
221
+ if self.broadcast_join_limit == 0:
222
+ object.__setattr__(
223
+ self,
224
+ "broadcast_join_limit",
225
+ # Usually better to avoid shuffling for single gpu
226
+ 2 if self.scheduler == "distributed" else 32,
227
+ )
228
+ object.__setattr__(self, "scheduler", Scheduler(self.scheduler))
229
+ if self.shuffle_method is not None:
230
+ object.__setattr__(
231
+ self, "shuffle_method", ShuffleMethod(self.shuffle_method)
232
+ )
233
+
234
+ # Type / value check everything else
235
+ if not isinstance(self.max_rows_per_partition, int):
236
+ raise TypeError("max_rows_per_partition must be an int")
237
+ if not isinstance(self.cardinality_factor, dict):
238
+ raise TypeError("cardinality_factor must be a dict of column name to float")
239
+ if not isinstance(self.target_partition_size, int):
240
+ raise TypeError("target_partition_size must be an int")
241
+ if not isinstance(self.groupby_n_ary, int):
242
+ raise TypeError("groupby_n_ary must be an int")
243
+ if not isinstance(self.broadcast_join_limit, int):
244
+ raise TypeError("broadcast_join_limit must be an int")
245
+ if not isinstance(self.rapidsmpf_spill, bool):
246
+ raise TypeError("rapidsmpf_spill must be bool")
247
+
248
+ def __hash__(self) -> int:
249
+ # cardinality factory, a dict, isn't natively hashable. We'll dump it
250
+ # to json and hash that.
251
+ d = dataclasses.asdict(self)
252
+ d["cardinality_factor"] = json.dumps(d["cardinality_factor"])
253
+ return hash(tuple(sorted(d.items())))
254
+
255
+
256
+ @dataclasses.dataclass(frozen=True, eq=True)
257
+ class InMemoryExecutor:
258
+ """
259
+ Configuration for the cudf-polars in-memory executor.
260
+
261
+ Parameters
262
+ ----------
263
+ scheduler:
264
+ The scheduler to use for the in-memory executor. Currently
265
+ only ``Scheduler.SYNCHRONOUS`` is supported for the in-memory executor.
266
+ """
267
+
268
+ name: Literal["in-memory"] = dataclasses.field(default="in-memory", init=False)
269
+
270
+
271
+ @dataclasses.dataclass(frozen=True, eq=True)
272
+ class ConfigOptions:
273
+ """
274
+ Configuration for the polars GPUEngine.
275
+
276
+ Parameters
277
+ ----------
278
+ raise_on_fail
279
+ Whether to raise an exception when the GPU engine cannot execute a
280
+ query. ``False`` by default.
281
+ parquet_options
282
+ Options controlling parquet file reading and writing. See
283
+ :class:`ParquetOptions` for more.
284
+ executor
285
+ The executor to use for the GPU engine. See :class:`StreamingExecutor`
286
+ and :class:`InMemoryExecutor` for more.
287
+ device
288
+ The GPU used to run the query. If not provided, the
289
+ query uses the current CUDA device.
290
+ """
291
+
292
+ raise_on_fail: bool = False
293
+ parquet_options: ParquetOptions = dataclasses.field(default_factory=ParquetOptions)
294
+ executor: StreamingExecutor | InMemoryExecutor = dataclasses.field(
295
+ default_factory=InMemoryExecutor
296
+ )
297
+ device: int | None = None
298
+
299
+ @classmethod
300
+ def from_polars_engine(cls, engine: pl.GPUEngine) -> Self:
301
+ """
302
+ Create a `ConfigOptions` object from a `pl.GPUEngine` object.
303
+
304
+ This creates our internal, typed, configuration object from the
305
+ user-provided `polars.GPUEngine` object.
306
+ """
307
+ # these are the valid top-level keys in the engine.config that
308
+ # the user passes as **kwargs to GPUEngine.
309
+ valid_options = {
310
+ "executor",
311
+ "executor_options",
312
+ "parquet_options",
313
+ "raise_on_fail",
314
+ }
315
+
316
+ extra_options = set(engine.config.keys()) - valid_options
317
+ if extra_options:
318
+ raise TypeError(f"Unsupported executor_options: {extra_options}")
319
+
320
+ user_executor = engine.config.get("executor", "in-memory")
321
+ if user_executor is None:
322
+ user_executor = "in-memory"
323
+ user_executor_options = engine.config.get("executor_options", {})
324
+ user_parquet_options = engine.config.get("parquet_options", {})
325
+ user_raise_on_fail = engine.config.get("raise_on_fail", False)
326
+
327
+ # These are user-provided options, so we need to actually validate
328
+ # them.
329
+
330
+ if user_executor not in {"in-memory", "streaming"}:
331
+ raise ValueError(f"Unknown executor '{user_executor}'")
332
+
333
+ if not isinstance(user_raise_on_fail, bool):
334
+ raise TypeError("GPUEngine option 'raise_on_fail' must be a boolean.")
335
+
336
+ executor: InMemoryExecutor | StreamingExecutor
337
+
338
+ match user_executor:
339
+ case "in-memory":
340
+ executor = InMemoryExecutor(**user_executor_options)
341
+ case "streaming":
342
+ executor = StreamingExecutor(**user_executor_options)
343
+ # Update with the streaming defaults, but user options take precedence.
344
+
345
+ case _: # pragma: no cover; Unreachable
346
+ raise ValueError(f"Unsupported executor: {user_executor}")
347
+
348
+ return cls(
349
+ raise_on_fail=user_raise_on_fail,
350
+ parquet_options=ParquetOptions(**user_parquet_options),
351
+ executor=executor,
352
+ device=engine.device,
353
+ )
@@ -0,0 +1,40 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Miscellaneous conversion functions."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from cudf_polars.typing import Slice
12
+
13
+
14
+ def from_polars_slice(zlice: Slice, *, num_rows: int) -> list[int]:
15
+ """
16
+ Convert a Polar slice into something pylibcudf handles.
17
+
18
+ Parameters
19
+ ----------
20
+ zlice
21
+ The slice to convert
22
+ num_rows
23
+ The number of rows in the object being sliced.
24
+
25
+ Returns
26
+ -------
27
+ List of start and end slice bounds.
28
+ """
29
+ start, length = zlice
30
+ if length is None:
31
+ length = num_rows
32
+ if start < 0:
33
+ start += num_rows
34
+ # Polars implementation wraps negative start by num_rows, then
35
+ # adds length to start to get the end, then clamps both to
36
+ # [0, num_rows)
37
+ end = start + length
38
+ start = max(min(start, num_rows), 0)
39
+ end = max(min(end, num_rows), 0)
40
+ return [start, end]
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  """Datatype utilities."""
@@ -12,6 +12,7 @@ from typing_extensions import assert_never
12
12
 
13
13
  import polars as pl
14
14
 
15
+ import pylibcudf as plc
15
16
  from pylibcudf.traits import (
16
17
  is_floating_point,
17
18
  is_integral_not_bool,
@@ -19,12 +20,18 @@ from pylibcudf.traits import (
19
20
  )
20
21
 
21
22
  __all__ = [
23
+ "TO_ARROW_COMPAT_LEVEL",
22
24
  "can_cast",
23
25
  "downcast_arrow_lists",
24
26
  "from_polars",
25
27
  "is_order_preserving_cast",
26
28
  ]
27
- import pylibcudf as plc
29
+
30
+ TO_ARROW_COMPAT_LEVEL = (
31
+ pl.CompatLevel.newest()
32
+ if hasattr(pa.lib, "Type_STRING_VIEW")
33
+ else pl.CompatLevel.oldest()
34
+ )
28
35
 
29
36
 
30
37
  def downcast_arrow_lists(typ: pa.DataType) -> pa.DataType:
@@ -71,7 +78,9 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
71
78
  -------
72
79
  True if casting is supported, False otherwise
73
80
  """
74
- has_empty = from_.id() == plc.TypeId.EMPTY or to.id() == plc.TypeId.EMPTY
81
+ to_is_empty = to.id() == plc.TypeId.EMPTY
82
+ from_is_empty = from_.id() == plc.TypeId.EMPTY
83
+ has_empty = to_is_empty or from_is_empty
75
84
  return (
76
85
  (
77
86
  from_ == to
@@ -84,8 +93,16 @@ def can_cast(from_: plc.DataType, to: plc.DataType) -> bool:
84
93
  )
85
94
  )
86
95
  )
87
- or (from_.id() == plc.TypeId.STRING and is_numeric_not_bool(to))
88
- or (to.id() == plc.TypeId.STRING and is_numeric_not_bool(from_))
96
+ or (
97
+ from_.id() == plc.TypeId.STRING
98
+ and not to_is_empty
99
+ and is_numeric_not_bool(to)
100
+ )
101
+ or (
102
+ to.id() == plc.TypeId.STRING
103
+ and not from_is_empty
104
+ and is_numeric_not_bool(from_)
105
+ )
89
106
  )
90
107
 
91
108
 
@@ -0,0 +1,39 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Timing utilities."""
5
+
6
+ from __future__ import annotations
7
+
8
+ __all__: list[str] = ["Timer"]
9
+
10
+
11
+ class Timer:
12
+ """
13
+ A timer for recording execution times of nodes.
14
+
15
+ Parameters
16
+ ----------
17
+ query_start
18
+ Duration in nanoseconds since the query was started on the
19
+ Polars side
20
+ """
21
+
22
+ def __init__(self, query_start: int):
23
+ self.query_start = query_start
24
+ self.timings: list[tuple[int, int, str]] = []
25
+
26
+ def store(self, start: int, end: int, name: str) -> None:
27
+ """
28
+ Store timing for a node.
29
+
30
+ Parameters
31
+ ----------
32
+ start
33
+ Start of the execution for this node (use time.monotonic_ns).
34
+ end
35
+ End of the execution for this node.
36
+ name
37
+ The name for this node.
38
+ """
39
+ self.timings.append((start - self.query_start, end - self.query_start, name))
@@ -12,11 +12,12 @@ from polars import __version__
12
12
 
13
13
  POLARS_VERSION = parse(__version__)
14
14
 
15
- POLARS_VERSION_LT_120 = POLARS_VERSION < parse("1.20")
15
+ POLARS_VERSION_LT_125 = POLARS_VERSION < parse("1.25")
16
+ POLARS_VERSION_LT_128 = POLARS_VERSION < parse("1.28")
16
17
 
17
18
 
18
- def _ensure_polars_version():
19
- if POLARS_VERSION_LT_120:
19
+ def _ensure_polars_version() -> None:
20
+ if POLARS_VERSION_LT_125:
20
21
  raise ImportError(
21
- "cudf_polars requires py-polars v1.20 or greater."
22
+ "cudf_polars requires py-polars v1.25 or greater."
22
23
  ) # pragma: no cover
@@ -1,9 +1,9 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: cudf-polars-cu12
3
- Version: 25.2.2
3
+ Version: 25.6.0
4
4
  Summary: Executor for polars using cudf
5
5
  Author: NVIDIA Corporation
6
- License: Apache 2.0
6
+ License: Apache-2.0
7
7
  Project-URL: Homepage, https://github.com/rapidsai/cudf
8
8
  Classifier: Intended Audience :: Developers
9
9
  Classifier: Topic :: Database
@@ -13,18 +13,21 @@ Classifier: Programming Language :: Python
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
16
17
  Requires-Python: >=3.10
17
18
  Description-Content-Type: text/markdown
18
19
  License-File: LICENSE
19
- Requires-Dist: polars<1.22,>=1.20
20
- Requires-Dist: pylibcudf-cu12==25.2.*
20
+ Requires-Dist: polars<1.29,>=1.25
21
+ Requires-Dist: pylibcudf-cu12==25.6.*
21
22
  Provides-Extra: test
23
+ Requires-Dist: dask-cuda==25.6.*; extra == "test"
22
24
  Requires-Dist: numpy<3.0a0,>=1.23; extra == "test"
23
25
  Requires-Dist: pytest-cov; extra == "test"
24
26
  Requires-Dist: pytest-xdist; extra == "test"
25
27
  Requires-Dist: pytest<8; extra == "test"
26
28
  Provides-Extra: experimental
27
- Requires-Dist: rapids-dask-dependency==25.2.*; extra == "experimental"
29
+ Requires-Dist: rapids-dask-dependency==25.6.*; extra == "experimental"
30
+ Dynamic: license-file
28
31
 
29
32
  # <div align="left"><img src="img/rapids_logo.png" width="90px"/>&nbsp;cuDF - GPU DataFrames</div>
30
33
 
@@ -111,7 +114,7 @@ cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge
111
114
 
112
115
  ```bash
113
116
  conda install -c rapidsai -c conda-forge -c nvidia \
114
- cudf=25.02 python=3.12 cuda-version=12.8
117
+ cudf=25.06 python=3.13 cuda-version=12.8
115
118
  ```
116
119
 
117
120
  We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
@@ -0,0 +1,73 @@
1
+ cudf_polars/VERSION,sha256=mkkPLCPxib-wy79AMMpM4Bq103DbRbHiXhZFFnGa_sk,8
2
+ cudf_polars/__init__.py,sha256=fSTx5nmqajdwp7qvP4PnYL6wZN9-k1fKB43NkcZlHwk,740
3
+ cudf_polars/_version.py,sha256=kj5Ir4dxZRR-k2k8mWUDJHiGpE8_ZcTNzt_kMZxcFRA,528
4
+ cudf_polars/callback.py,sha256=Pq8_oG_C1QCxaoTyqYZEmQnXXkF8RnkdOT4S1sv9wIQ,9702
5
+ cudf_polars/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ cudf_polars/containers/__init__.py,sha256=He8qCk9m37k7KOWC6s4QuL8LYF11KhP2vluH9s8EeYI,339
7
+ cudf_polars/containers/column.py,sha256=a-gK48MPqz92b1gkG6EqrSxr3_vK2I3efqaQsbp9FLs,12464
8
+ cudf_polars/containers/dataframe.py,sha256=6f-fVJ3KZT9VWaqd0DH9nO0tZGaT30B0mketsz9iW18,10093
9
+ cudf_polars/dsl/__init__.py,sha256=bYwYnqmqINMgwnkJ22EnXMlHviLolPaMgQ8QqoZL3YE,244
10
+ cudf_polars/dsl/expr.py,sha256=PgzO75CP-n5ZggpT-XqpUdjAwEpGP04G5N_aiX3VEqU,1868
11
+ cudf_polars/dsl/ir.py,sha256=W_qD6K8UMrByU-kTD-3EjEfaELmjB-XNV4RLHpg2Z8w,83936
12
+ cudf_polars/dsl/nodebase.py,sha256=4lE2WAekzCG_ut0lYor8iPI_7hJZsy_lLTEh-54nbUk,4540
13
+ cudf_polars/dsl/to_ast.py,sha256=MUwFANApCSDgayK6hwlIcro2sMpE-nanqGqUsbR9sLk,12426
14
+ cudf_polars/dsl/translate.py,sha256=Ghr3i0f6X2TlMVqc-qZkf4jvfkZ_sF7tSGPhrjhXrFw,29847
15
+ cudf_polars/dsl/traversal.py,sha256=dVH37YoLx-wo61lUofaOM77eji99yIaC62PSb3QYKHM,4610
16
+ cudf_polars/dsl/expressions/__init__.py,sha256=uj1a4BzrDVAjOgP6FKKtnvR5elF4ksUj1RhkLTZoS1k,224
17
+ cudf_polars/dsl/expressions/aggregation.py,sha256=5EmwA_sXAZj4sRnM_YjGuOkxKVNHigzxNTDolVpttqw,7450
18
+ cudf_polars/dsl/expressions/base.py,sha256=Of-J3BgtRUaUAeNUiw0MzZq4jHlCJtfZ6opt8HZkqrQ,7822
19
+ cudf_polars/dsl/expressions/binaryop.py,sha256=sRYXXgIwX9Cn8uJFJuLCWQgKgG2nuMiv9W8JREr3xx0,4170
20
+ cudf_polars/dsl/expressions/boolean.py,sha256=pBEWN8DOSG9V3RISQZTsztnNuNbuW9vKfka0Fj7Szfs,10920
21
+ cudf_polars/dsl/expressions/datetime.py,sha256=bkJ5qAcy43E49Ye6Nrw83YKScdbgvz6HBqQ49QjASDU,10366
22
+ cudf_polars/dsl/expressions/literal.py,sha256=2fiTu0NikVyLRQei2tN3wcgZFD2OweevdN_8_ASJXQY,2755
23
+ cudf_polars/dsl/expressions/rolling.py,sha256=veHY728JI7Qy9UyuUDMsqVw69udQDz0dXHrr_LFGIvA,5106
24
+ cudf_polars/dsl/expressions/selection.py,sha256=QfkV7hRpJWv9GfX45WSyCHct--1EMWRC05aTBsbhvyA,2526
25
+ cudf_polars/dsl/expressions/slicing.py,sha256=I7KcvMOwSDd740wrMUW_gX8Jny5hQVDP8mdfy_dJwDA,1167
26
+ cudf_polars/dsl/expressions/sorting.py,sha256=nnuBBWofgSHjGQY9QONDn_w4eMPMU2fGu9d445zfGmc,2739
27
+ cudf_polars/dsl/expressions/string.py,sha256=oheWLRvd0sOyNF_yr7nKg0v7NVkEy8Z05Ku5xG-dukY,14396
28
+ cudf_polars/dsl/expressions/ternary.py,sha256=PQ4nKRqG7dZggZ9dxGJJRCIAsrSBqS0RzAS61WiVSYw,1356
29
+ cudf_polars/dsl/expressions/unary.py,sha256=cL1y4IjzhW6RLsGlof6bEXGHklFgGjsiISy8Y36DXuo,11542
30
+ cudf_polars/dsl/utils/__init__.py,sha256=JL26nlMAbcdL8ZE4iXRrMOEVSTEZU1P5y9WvxTEDdnY,199
31
+ cudf_polars/dsl/utils/aggregations.py,sha256=pv007Uu9h--Hj2pvU6l5_IpPZmShLpYgibbloXadWOY,10067
32
+ cudf_polars/dsl/utils/groupby.py,sha256=aEnJOpEBw6sus6PXM1qeOzOO2kVYIFmmc21xCS2b7CY,2632
33
+ cudf_polars/dsl/utils/naming.py,sha256=ydp_BYYAt3mG7JHfi9Snp3dDNzdQZD6F2sAMEmT4OYA,737
34
+ cudf_polars/dsl/utils/replace.py,sha256=ZAz6htTBSPkkrnv6faRh9_xPnnzin9kuI9PcrGtaX0Q,1224
35
+ cudf_polars/dsl/utils/rolling.py,sha256=S6Lzs7rpBxFP8exHSrEHDr6tWHdwW3V42jp40ZNauKk,3405
36
+ cudf_polars/dsl/utils/windows.py,sha256=vPEzlwYs1ogri7ly3gQPrLPFsfWhLBygsnIROqgwgAk,5155
37
+ cudf_polars/experimental/__init__.py,sha256=S2oI2K__woyPQAAlFMOo6RTMtdfIZxmzzAO92VtJgP4,256
38
+ cudf_polars/experimental/base.py,sha256=neb7H4uqn5Yt3lL6ZTpFsRB7xQc0-muOE3meq8DLMuE,1158
39
+ cudf_polars/experimental/dask_registers.py,sha256=xJh6wW7rq78fd17gLsOCmQ0cZQVx2yXRnH7z8a54Kvw,7507
40
+ cudf_polars/experimental/dispatch.py,sha256=pw6KyC_irRcQgKb8S0o1KKvRHQPFOikyv8kfMQtgxjc,2094
41
+ cudf_polars/experimental/distinct.py,sha256=6-EkCtgA7Qukjb3f7ERWPs6y9fd0xggbSOK9ZjJOaSY,6351
42
+ cudf_polars/experimental/explain.py,sha256=o2HNzZr1X0P-Q68B7iPgE52WMf_G1XfiZX9xQ1aFeUM,3646
43
+ cudf_polars/experimental/expressions.py,sha256=8ug6lnGj1rz6bkXi3Y9UIdZmXczlRQiErsj1aBqWBhQ,16227
44
+ cudf_polars/experimental/groupby.py,sha256=HHB38kUW4u_QlkIyRqtdPxlvBtryOB_Oi9hlRDYEuas,9811
45
+ cudf_polars/experimental/io.py,sha256=kfNz1kjcskP5ubH-T-adWzizLntGXd10HgtPeRFQvCw,12475
46
+ cudf_polars/experimental/join.py,sha256=HZ-dIzHhvuisBb-zw9BV-dxSPsU-FGKUBOxBY43Vu3E,11644
47
+ cudf_polars/experimental/parallel.py,sha256=6ckDfuw3MSWnTRr7WgqpHEIoUw3RD9QdOlrIX7bXR78,9636
48
+ cudf_polars/experimental/repartition.py,sha256=o1qtstaB_dBaYjkmsmfnveZ6T66A9XGwzFEBUucfyrk,2124
49
+ cudf_polars/experimental/scheduler.py,sha256=ieL7bdxTqlmd8MO37JCaCoqhyDRZNTLnPFUme3hv6SQ,4203
50
+ cudf_polars/experimental/select.py,sha256=Zrdlnw426Ul8DE8WzST7ggLW74yTMXHp-enMTog9tp8,3829
51
+ cudf_polars/experimental/shuffle.py,sha256=lv33GaR0w5h8p7z9NfwNgXw5DazjCohsy_B7ZUEUQIo,8975
52
+ cudf_polars/experimental/sort.py,sha256=OAY_Us0IXfYPy6NRwqH3JsSE00lg63o83uuve6KLFiY,1633
53
+ cudf_polars/experimental/spilling.py,sha256=OVpH9PHYNJcYL-PAB0CvoAil_nJW0VepLvcIrrAUdlc,4255
54
+ cudf_polars/experimental/utils.py,sha256=l6OWbWsdn8AfFiNSXZwvL9Gm9HP_P9cVXLXfIqizinY,3303
55
+ cudf_polars/experimental/benchmarks/__init__.py,sha256=XiT8hL6V9Ns_SSXDXkzoSWlXIo6aFLDXUHLLWch23Ok,149
56
+ cudf_polars/experimental/benchmarks/pdsh.py,sha256=OCQJeek-hz2nsctHO93oDr9BgQc97mdCCtcpAVodomo,45230
57
+ cudf_polars/testing/__init__.py,sha256=0MnlTjkTEqkSpL5GdMhQf4uXOaQwNrzgEJCZKa5FnL4,219
58
+ cudf_polars/testing/asserts.py,sha256=bzzS5ZalRAxDMvGncHvem7xuzljPJxeRFMT5KLCye7Q,11926
59
+ cudf_polars/testing/io.py,sha256=sw9aT4wXLRMKxuH2rCwy7wV3c8pqEdqZdXn2jQSb-jM,2221
60
+ cudf_polars/testing/plugin.py,sha256=AkJRQZhhCSk-vLJr2WAOIZCvR-iQ24ZZQ61IHZuPPY8,25913
61
+ cudf_polars/typing/__init__.py,sha256=cEMEtn5bJTS0mY9U2E7fO15ya8CsOVUE442bqVubWzI,4568
62
+ cudf_polars/utils/__init__.py,sha256=urdV5MUIneU8Dn6pt1db5GkDG0oY4NsFD0Uhl3j98l8,195
63
+ cudf_polars/utils/config.py,sha256=X8yOjJW2lab-FFPma_IEbSKghUiKE_57BTrY3ixj3Do,13017
64
+ cudf_polars/utils/conversion.py,sha256=k_apLbSR-MiYYlQBGrzYOInuvcbfSi-il-o9nkovdXQ,1042
65
+ cudf_polars/utils/dtypes.py,sha256=t5pslaZo320URlKramQDLrTbAbS21RUHvVzuLe1PIhA,6936
66
+ cudf_polars/utils/sorting.py,sha256=Mqb_KLsYnKU8p1dDan2mtlIQl65RqwM78OlUi-_Jj0k,1725
67
+ cudf_polars/utils/timer.py,sha256=KqcXqOcbovsj6KDCwaxl70baQXjuod43rABrpQkE78M,1005
68
+ cudf_polars/utils/versions.py,sha256=wOXN56WwDFeQvzX1-8EXYiGNxq1pYtKPHxGQ1bttZLo,668
69
+ cudf_polars_cu12-25.6.0.dist-info/licenses/LICENSE,sha256=4YCpjWCbYMkMQFW47JXsorZLOaP957HwmP6oHW2_ngM,11348
70
+ cudf_polars_cu12-25.6.0.dist-info/METADATA,sha256=RBlwRQKUIYjoAE7LWZuwtApcvyyxy3ImlXLF_1HqWA8,4668
71
+ cudf_polars_cu12-25.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
72
+ cudf_polars_cu12-25.6.0.dist-info/top_level.txt,sha256=w2bOa7MpuyapYgZh480Znh4UzX7rSWlFcYR1Yo6QIPs,12
73
+ cudf_polars_cu12-25.6.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5