cudf-polars-cu13 25.12.0__py3-none-any.whl → 26.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -1
- cudf_polars/VERSION +1 -1
- cudf_polars/callback.py +28 -7
- cudf_polars/containers/column.py +51 -26
- cudf_polars/dsl/expressions/binaryop.py +1 -1
- cudf_polars/dsl/expressions/boolean.py +1 -1
- cudf_polars/dsl/expressions/selection.py +1 -1
- cudf_polars/dsl/expressions/string.py +29 -20
- cudf_polars/dsl/expressions/ternary.py +25 -1
- cudf_polars/dsl/expressions/unary.py +11 -8
- cudf_polars/dsl/ir.py +351 -281
- cudf_polars/dsl/translate.py +18 -15
- cudf_polars/dsl/utils/aggregations.py +10 -5
- cudf_polars/experimental/base.py +10 -0
- cudf_polars/experimental/benchmarks/pdsh.py +1 -1
- cudf_polars/experimental/benchmarks/utils.py +83 -2
- cudf_polars/experimental/distinct.py +2 -0
- cudf_polars/experimental/explain.py +1 -1
- cudf_polars/experimental/expressions.py +8 -5
- cudf_polars/experimental/groupby.py +2 -0
- cudf_polars/experimental/io.py +64 -42
- cudf_polars/experimental/join.py +15 -2
- cudf_polars/experimental/parallel.py +10 -7
- cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
- cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
- cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
- cudf_polars/experimental/rapidsmpf/{shuffle.py → collectives/shuffle.py} +90 -114
- cudf_polars/experimental/rapidsmpf/core.py +194 -67
- cudf_polars/experimental/rapidsmpf/dask.py +172 -0
- cudf_polars/experimental/rapidsmpf/dispatch.py +6 -3
- cudf_polars/experimental/rapidsmpf/io.py +162 -70
- cudf_polars/experimental/rapidsmpf/join.py +162 -77
- cudf_polars/experimental/rapidsmpf/nodes.py +421 -180
- cudf_polars/experimental/rapidsmpf/repartition.py +130 -65
- cudf_polars/experimental/rapidsmpf/union.py +24 -5
- cudf_polars/experimental/rapidsmpf/utils.py +228 -16
- cudf_polars/experimental/shuffle.py +18 -4
- cudf_polars/experimental/sort.py +13 -6
- cudf_polars/experimental/spilling.py +1 -1
- cudf_polars/testing/plugin.py +6 -3
- cudf_polars/utils/config.py +67 -0
- cudf_polars/utils/versions.py +3 -3
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/METADATA +9 -10
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/RECORD +47 -43
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
"""IO logic for the RapidsMPF streaming runtime."""
|
|
4
4
|
|
|
@@ -34,9 +34,14 @@ from cudf_polars.experimental.rapidsmpf.dispatch import (
|
|
|
34
34
|
)
|
|
35
35
|
from cudf_polars.experimental.rapidsmpf.nodes import (
|
|
36
36
|
define_py_node,
|
|
37
|
+
metadata_feeder_node,
|
|
37
38
|
shutdown_on_error,
|
|
38
39
|
)
|
|
39
|
-
from cudf_polars.experimental.rapidsmpf.utils import
|
|
40
|
+
from cudf_polars.experimental.rapidsmpf.utils import (
|
|
41
|
+
ChannelManager,
|
|
42
|
+
Metadata,
|
|
43
|
+
opaque_reservation,
|
|
44
|
+
)
|
|
40
45
|
|
|
41
46
|
if TYPE_CHECKING:
|
|
42
47
|
from collections.abc import MutableMapping
|
|
@@ -103,7 +108,7 @@ class Lineariser:
|
|
|
103
108
|
|
|
104
109
|
# Forward any remaining buffered messages
|
|
105
110
|
for seq in sorted(buffer.keys()):
|
|
106
|
-
await self.ch_out.send(self.context, buffer
|
|
111
|
+
await self.ch_out.send(self.context, buffer.pop(seq))
|
|
107
112
|
|
|
108
113
|
await self.ch_out.drain(self.context)
|
|
109
114
|
|
|
@@ -138,6 +143,7 @@ async def dataframescan_node(
|
|
|
138
143
|
*,
|
|
139
144
|
num_producers: int,
|
|
140
145
|
rows_per_partition: int,
|
|
146
|
+
estimated_chunk_bytes: int,
|
|
141
147
|
) -> None:
|
|
142
148
|
"""
|
|
143
149
|
DataFrameScan node for rapidsmpf.
|
|
@@ -156,19 +162,26 @@ async def dataframescan_node(
|
|
|
156
162
|
The number of producers to use for the DataFrameScan node.
|
|
157
163
|
rows_per_partition
|
|
158
164
|
The number of rows per partition.
|
|
165
|
+
estimated_chunk_bytes
|
|
166
|
+
Estimated size of each chunk in bytes. Used for memory reservation
|
|
167
|
+
with block spilling to avoid thrashing.
|
|
159
168
|
"""
|
|
160
|
-
|
|
161
|
-
|
|
169
|
+
async with shutdown_on_error(context, ch_out.metadata, ch_out.data):
|
|
170
|
+
# Find local partition count.
|
|
171
|
+
nrows = ir.df.shape()[0]
|
|
172
|
+
global_count = math.ceil(nrows / rows_per_partition) if nrows > 0 else 0
|
|
173
|
+
|
|
174
|
+
# For single rank, simplify the logic
|
|
175
|
+
if context.comm().nranks == 1:
|
|
176
|
+
local_count = global_count
|
|
177
|
+
local_offset = 0
|
|
178
|
+
else:
|
|
179
|
+
local_count = math.ceil(global_count / context.comm().nranks)
|
|
180
|
+
local_offset = local_count * context.comm().rank
|
|
162
181
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
local_count = global_count
|
|
166
|
-
local_offset = 0
|
|
167
|
-
else:
|
|
168
|
-
local_count = math.ceil(global_count / context.comm().nranks)
|
|
169
|
-
local_offset = local_count * context.comm().rank
|
|
182
|
+
# Send basic metadata
|
|
183
|
+
await ch_out.send_metadata(context, Metadata(max(1, local_count)))
|
|
170
184
|
|
|
171
|
-
async with shutdown_on_error(context, ch_out.data):
|
|
172
185
|
# Build list of IR slices to read
|
|
173
186
|
ir_slices = []
|
|
174
187
|
for seq_num in range(local_count):
|
|
@@ -183,6 +196,26 @@ async def dataframescan_node(
|
|
|
183
196
|
)
|
|
184
197
|
)
|
|
185
198
|
|
|
199
|
+
# If there are no slices, drain the channel and return
|
|
200
|
+
if len(ir_slices) == 0:
|
|
201
|
+
await ch_out.data.drain(context)
|
|
202
|
+
return
|
|
203
|
+
|
|
204
|
+
# If there is only one ir_slices or one producer, we can
|
|
205
|
+
# skip the lineariser and read the chunks directly
|
|
206
|
+
if len(ir_slices) == 1 or num_producers == 1:
|
|
207
|
+
for seq_num, ir_slice in enumerate(ir_slices):
|
|
208
|
+
await read_chunk(
|
|
209
|
+
context,
|
|
210
|
+
ir_slice,
|
|
211
|
+
seq_num,
|
|
212
|
+
ch_out.data,
|
|
213
|
+
ir_context,
|
|
214
|
+
estimated_chunk_bytes,
|
|
215
|
+
)
|
|
216
|
+
await ch_out.data.drain(context)
|
|
217
|
+
return
|
|
218
|
+
|
|
186
219
|
# Use Lineariser to ensure ordered delivery
|
|
187
220
|
num_producers = min(num_producers, len(ir_slices))
|
|
188
221
|
lineariser = Lineariser(context, ch_out.data, num_producers)
|
|
@@ -203,6 +236,7 @@ async def dataframescan_node(
|
|
|
203
236
|
task_idx,
|
|
204
237
|
ch_out,
|
|
205
238
|
ir_context,
|
|
239
|
+
estimated_chunk_bytes,
|
|
206
240
|
)
|
|
207
241
|
await ch_out.drain(context)
|
|
208
242
|
|
|
@@ -216,27 +250,32 @@ async def dataframescan_node(
|
|
|
216
250
|
@generate_ir_sub_network.register(DataFrameScan)
|
|
217
251
|
def _(
|
|
218
252
|
ir: DataFrameScan, rec: SubNetGenerator
|
|
219
|
-
) -> tuple[list[Any], dict[IR, ChannelManager]]:
|
|
253
|
+
) -> tuple[dict[IR, list[Any]], dict[IR, ChannelManager]]:
|
|
220
254
|
config_options = rec.state["config_options"]
|
|
221
255
|
assert config_options.executor.name == "streaming", (
|
|
222
256
|
"'in-memory' executor not supported in 'generate_ir_sub_network'"
|
|
223
257
|
)
|
|
224
258
|
rows_per_partition = config_options.executor.max_rows_per_partition
|
|
225
259
|
num_producers = rec.state["max_io_threads"]
|
|
260
|
+
# Use target_partition_size as the estimated chunk size
|
|
261
|
+
estimated_chunk_bytes = config_options.executor.target_partition_size
|
|
226
262
|
|
|
227
263
|
context = rec.state["context"]
|
|
228
264
|
ir_context = rec.state["ir_context"]
|
|
229
265
|
channels: dict[IR, ChannelManager] = {ir: ChannelManager(rec.state["context"])}
|
|
230
|
-
nodes: list[Any] =
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
266
|
+
nodes: dict[IR, list[Any]] = {
|
|
267
|
+
ir: [
|
|
268
|
+
dataframescan_node(
|
|
269
|
+
context,
|
|
270
|
+
ir,
|
|
271
|
+
ir_context,
|
|
272
|
+
channels[ir].reserve_input_slot(),
|
|
273
|
+
num_producers=num_producers,
|
|
274
|
+
rows_per_partition=rows_per_partition,
|
|
275
|
+
estimated_chunk_bytes=estimated_chunk_bytes,
|
|
276
|
+
)
|
|
277
|
+
]
|
|
278
|
+
}
|
|
240
279
|
|
|
241
280
|
return nodes, channels
|
|
242
281
|
|
|
@@ -278,6 +317,7 @@ async def read_chunk(
|
|
|
278
317
|
seq_num: int,
|
|
279
318
|
ch_out: Channel[TableChunk],
|
|
280
319
|
ir_context: IRExecutionContext,
|
|
320
|
+
estimated_chunk_bytes: int,
|
|
281
321
|
) -> None:
|
|
282
322
|
"""
|
|
283
323
|
Read a chunk from disk and send it to the output channel.
|
|
@@ -294,24 +334,27 @@ async def read_chunk(
|
|
|
294
334
|
The output channel.
|
|
295
335
|
ir_context
|
|
296
336
|
The execution context for the IR node.
|
|
337
|
+
estimated_chunk_bytes
|
|
338
|
+
Estimated size of the chunk in bytes. Used for memory reservation
|
|
339
|
+
with block spilling to avoid thrashing.
|
|
297
340
|
"""
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
341
|
+
with opaque_reservation(context, estimated_chunk_bytes):
|
|
342
|
+
df = await asyncio.to_thread(
|
|
343
|
+
scan.do_evaluate,
|
|
344
|
+
*scan._non_child_args,
|
|
345
|
+
context=ir_context,
|
|
346
|
+
)
|
|
347
|
+
await ch_out.send(
|
|
348
|
+
context,
|
|
349
|
+
Message(
|
|
350
|
+
seq_num,
|
|
351
|
+
TableChunk.from_pylibcudf_table(
|
|
352
|
+
df.table,
|
|
353
|
+
df.stream,
|
|
354
|
+
exclusive_view=True,
|
|
355
|
+
),
|
|
312
356
|
),
|
|
313
|
-
)
|
|
314
|
-
)
|
|
357
|
+
)
|
|
315
358
|
|
|
316
359
|
|
|
317
360
|
@define_py_node()
|
|
@@ -324,6 +367,7 @@ async def scan_node(
|
|
|
324
367
|
num_producers: int,
|
|
325
368
|
plan: IOPartitionPlan,
|
|
326
369
|
parquet_options: ParquetOptions,
|
|
370
|
+
estimated_chunk_bytes: int,
|
|
327
371
|
) -> None:
|
|
328
372
|
"""
|
|
329
373
|
Scan node for rapidsmpf.
|
|
@@ -344,8 +388,11 @@ async def scan_node(
|
|
|
344
388
|
The partitioning plan.
|
|
345
389
|
parquet_options
|
|
346
390
|
The Parquet options.
|
|
391
|
+
estimated_chunk_bytes
|
|
392
|
+
Estimated size of each chunk in bytes. Used for memory reservation
|
|
393
|
+
with block spilling to avoid thrashing.
|
|
347
394
|
"""
|
|
348
|
-
async with shutdown_on_error(context, ch_out.data):
|
|
395
|
+
async with shutdown_on_error(context, ch_out.metadata, ch_out.data):
|
|
349
396
|
# Build a list of local Scan operations
|
|
350
397
|
scans: list[Scan | SplitScan] = []
|
|
351
398
|
if plan.flavor == IOPartitionFlavor.SPLIT_FILES:
|
|
@@ -353,9 +400,11 @@ async def scan_node(
|
|
|
353
400
|
local_count = math.ceil(count / context.comm().nranks)
|
|
354
401
|
local_offset = local_count * context.comm().rank
|
|
355
402
|
path_offset = local_offset // plan.factor
|
|
356
|
-
|
|
403
|
+
path_end = math.ceil((local_offset + local_count) / plan.factor)
|
|
404
|
+
path_count = path_end - path_offset
|
|
357
405
|
local_paths = ir.paths[path_offset : path_offset + path_count]
|
|
358
406
|
sindex = local_offset % plan.factor
|
|
407
|
+
splits_created = 0
|
|
359
408
|
for path in local_paths:
|
|
360
409
|
base_scan = Scan(
|
|
361
410
|
ir.schema,
|
|
@@ -371,7 +420,7 @@ async def scan_node(
|
|
|
371
420
|
ir.predicate,
|
|
372
421
|
parquet_options,
|
|
373
422
|
)
|
|
374
|
-
while sindex < plan.factor:
|
|
423
|
+
while sindex < plan.factor and splits_created < local_count:
|
|
375
424
|
scans.append(
|
|
376
425
|
SplitScan(
|
|
377
426
|
ir.schema,
|
|
@@ -382,6 +431,7 @@ async def scan_node(
|
|
|
382
431
|
)
|
|
383
432
|
)
|
|
384
433
|
sindex += 1
|
|
434
|
+
splits_created += 1
|
|
385
435
|
sindex = 0
|
|
386
436
|
|
|
387
437
|
else:
|
|
@@ -392,22 +442,46 @@ async def scan_node(
|
|
|
392
442
|
paths_offset_end = paths_offset_start + plan.factor * local_count
|
|
393
443
|
for offset in range(paths_offset_start, paths_offset_end, plan.factor):
|
|
394
444
|
local_paths = ir.paths[offset : offset + plan.factor]
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
445
|
+
if len(local_paths) > 0: # Only add scan if there are paths
|
|
446
|
+
scans.append(
|
|
447
|
+
Scan(
|
|
448
|
+
ir.schema,
|
|
449
|
+
ir.typ,
|
|
450
|
+
ir.reader_options,
|
|
451
|
+
ir.cloud_options,
|
|
452
|
+
local_paths,
|
|
453
|
+
ir.with_columns,
|
|
454
|
+
ir.skip_rows,
|
|
455
|
+
ir.n_rows,
|
|
456
|
+
ir.row_index,
|
|
457
|
+
ir.include_file_paths,
|
|
458
|
+
ir.predicate,
|
|
459
|
+
parquet_options,
|
|
460
|
+
)
|
|
409
461
|
)
|
|
462
|
+
|
|
463
|
+
# Send basic metadata
|
|
464
|
+
await ch_out.send_metadata(context, Metadata(max(1, len(scans))))
|
|
465
|
+
|
|
466
|
+
# If there is nothing to scan, drain the channel and return
|
|
467
|
+
if len(scans) == 0:
|
|
468
|
+
await ch_out.data.drain(context)
|
|
469
|
+
return
|
|
470
|
+
|
|
471
|
+
# If there is only one scan or one producer, we can
|
|
472
|
+
# skip the lineariser and read the chunks directly
|
|
473
|
+
if len(scans) == 1 or num_producers == 1:
|
|
474
|
+
for seq_num, scan in enumerate(scans):
|
|
475
|
+
await read_chunk(
|
|
476
|
+
context,
|
|
477
|
+
scan,
|
|
478
|
+
seq_num,
|
|
479
|
+
ch_out.data,
|
|
480
|
+
ir_context,
|
|
481
|
+
estimated_chunk_bytes,
|
|
410
482
|
)
|
|
483
|
+
await ch_out.data.drain(context)
|
|
484
|
+
return
|
|
411
485
|
|
|
412
486
|
# Use Lineariser to ensure ordered delivery
|
|
413
487
|
num_producers = min(num_producers, len(scans))
|
|
@@ -429,6 +503,7 @@ async def scan_node(
|
|
|
429
503
|
task_idx,
|
|
430
504
|
ch_out,
|
|
431
505
|
ir_context,
|
|
506
|
+
estimated_chunk_bytes,
|
|
432
507
|
)
|
|
433
508
|
await ch_out.drain(context)
|
|
434
509
|
|
|
@@ -548,9 +623,12 @@ def make_rapidsmpf_read_parquet_node(
|
|
|
548
623
|
|
|
549
624
|
|
|
550
625
|
@generate_ir_sub_network.register(Scan)
|
|
551
|
-
def _(
|
|
626
|
+
def _(
|
|
627
|
+
ir: Scan, rec: SubNetGenerator
|
|
628
|
+
) -> tuple[dict[IR, list[Any]], dict[IR, ChannelManager]]:
|
|
552
629
|
config_options = rec.state["config_options"]
|
|
553
|
-
|
|
630
|
+
executor = rec.state["config_options"].executor
|
|
631
|
+
assert executor.name == "streaming", (
|
|
554
632
|
"'in-memory' executor not supported in 'generate_ir_sub_network'"
|
|
555
633
|
)
|
|
556
634
|
parquet_options = config_options.parquet_options
|
|
@@ -558,17 +636,28 @@ def _(ir: Scan, rec: SubNetGenerator) -> tuple[list[Any], dict[IR, ChannelManage
|
|
|
558
636
|
num_producers = rec.state["max_io_threads"]
|
|
559
637
|
channels: dict[IR, ChannelManager] = {ir: ChannelManager(rec.state["context"])}
|
|
560
638
|
|
|
561
|
-
|
|
639
|
+
assert partition_info.io_plan is not None, "Scan node must have a partition plan"
|
|
640
|
+
plan: IOPartitionPlan = partition_info.io_plan
|
|
641
|
+
|
|
642
|
+
# Native node cannot split large files in distributed mode yet
|
|
643
|
+
distributed_split_files = (
|
|
644
|
+
plan.flavor == IOPartitionFlavor.SPLIT_FILES
|
|
645
|
+
and rec.state["context"].comm().nranks > 1
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
# Use rapidsmpf native read_parquet node if possible
|
|
562
649
|
ch_pair = channels[ir].reserve_input_slot()
|
|
563
|
-
nodes: list[Any]
|
|
650
|
+
nodes: dict[IR, list[Any]] = {}
|
|
564
651
|
native_node: Any = None
|
|
565
652
|
if (
|
|
566
|
-
|
|
653
|
+
parquet_options.use_rapidsmpf_native
|
|
654
|
+
and partition_info.count > 1
|
|
567
655
|
and ir.typ == "parquet"
|
|
568
656
|
and ir.row_index is None
|
|
569
657
|
and ir.include_file_paths is None
|
|
570
658
|
and ir.n_rows == -1
|
|
571
659
|
and ir.skip_rows == 0
|
|
660
|
+
and not distributed_split_files
|
|
572
661
|
):
|
|
573
662
|
native_node = make_rapidsmpf_read_parquet_node(
|
|
574
663
|
rec.state["context"],
|
|
@@ -580,17 +669,19 @@ def _(ir: Scan, rec: SubNetGenerator) -> tuple[list[Any], dict[IR, ChannelManage
|
|
|
580
669
|
)
|
|
581
670
|
|
|
582
671
|
if native_node is not None:
|
|
583
|
-
|
|
672
|
+
# Need metadata node, because the native read_parquet
|
|
673
|
+
# node does not send metadata.
|
|
674
|
+
metadata_node = metadata_feeder_node(
|
|
675
|
+
rec.state["context"],
|
|
676
|
+
ch_pair,
|
|
677
|
+
Metadata(partition_info.count),
|
|
678
|
+
)
|
|
679
|
+
nodes[ir] = [metadata_node, native_node]
|
|
584
680
|
else:
|
|
585
681
|
# Fall back to scan_node (predicate not convertible, or other constraint)
|
|
586
|
-
|
|
587
|
-
"Scan node must have a partition plan"
|
|
588
|
-
)
|
|
589
|
-
plan: IOPartitionPlan = partition_info.io_plan
|
|
590
|
-
if plan.flavor == IOPartitionFlavor.SPLIT_FILES:
|
|
591
|
-
parquet_options = dataclasses.replace(parquet_options, chunked=False)
|
|
682
|
+
parquet_options = dataclasses.replace(parquet_options, chunked=False)
|
|
592
683
|
|
|
593
|
-
nodes = [
|
|
684
|
+
nodes[ir] = [
|
|
594
685
|
scan_node(
|
|
595
686
|
rec.state["context"],
|
|
596
687
|
ir,
|
|
@@ -599,6 +690,7 @@ def _(ir: Scan, rec: SubNetGenerator) -> tuple[list[Any], dict[IR, ChannelManage
|
|
|
599
690
|
num_producers=num_producers,
|
|
600
691
|
plan=plan,
|
|
601
692
|
parquet_options=parquet_options,
|
|
693
|
+
estimated_chunk_bytes=executor.target_partition_size,
|
|
602
694
|
)
|
|
603
695
|
]
|
|
604
696
|
return nodes, channels
|