cudf-polars-cu13 25.12.0__py3-none-any.whl → 26.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -1
- cudf_polars/VERSION +1 -1
- cudf_polars/callback.py +28 -7
- cudf_polars/containers/column.py +51 -26
- cudf_polars/dsl/expressions/binaryop.py +1 -1
- cudf_polars/dsl/expressions/boolean.py +1 -1
- cudf_polars/dsl/expressions/selection.py +1 -1
- cudf_polars/dsl/expressions/string.py +29 -20
- cudf_polars/dsl/expressions/ternary.py +25 -1
- cudf_polars/dsl/expressions/unary.py +11 -8
- cudf_polars/dsl/ir.py +351 -281
- cudf_polars/dsl/translate.py +18 -15
- cudf_polars/dsl/utils/aggregations.py +10 -5
- cudf_polars/experimental/base.py +10 -0
- cudf_polars/experimental/benchmarks/pdsh.py +1 -1
- cudf_polars/experimental/benchmarks/utils.py +83 -2
- cudf_polars/experimental/distinct.py +2 -0
- cudf_polars/experimental/explain.py +1 -1
- cudf_polars/experimental/expressions.py +8 -5
- cudf_polars/experimental/groupby.py +2 -0
- cudf_polars/experimental/io.py +64 -42
- cudf_polars/experimental/join.py +15 -2
- cudf_polars/experimental/parallel.py +10 -7
- cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
- cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
- cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
- cudf_polars/experimental/rapidsmpf/{shuffle.py → collectives/shuffle.py} +90 -114
- cudf_polars/experimental/rapidsmpf/core.py +194 -67
- cudf_polars/experimental/rapidsmpf/dask.py +172 -0
- cudf_polars/experimental/rapidsmpf/dispatch.py +6 -3
- cudf_polars/experimental/rapidsmpf/io.py +162 -70
- cudf_polars/experimental/rapidsmpf/join.py +162 -77
- cudf_polars/experimental/rapidsmpf/nodes.py +421 -180
- cudf_polars/experimental/rapidsmpf/repartition.py +130 -65
- cudf_polars/experimental/rapidsmpf/union.py +24 -5
- cudf_polars/experimental/rapidsmpf/utils.py +228 -16
- cudf_polars/experimental/shuffle.py +18 -4
- cudf_polars/experimental/sort.py +13 -6
- cudf_polars/experimental/spilling.py +1 -1
- cudf_polars/testing/plugin.py +6 -3
- cudf_polars/utils/config.py +67 -0
- cudf_polars/utils/versions.py +3 -3
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/METADATA +9 -10
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/RECORD +47 -43
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
- {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
"""Join logic for the RapidsMPF streaming runtime."""
|
|
4
4
|
|
|
@@ -7,11 +7,13 @@ from __future__ import annotations
|
|
|
7
7
|
import asyncio
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Literal
|
|
9
9
|
|
|
10
|
+
from rapidsmpf.memory.buffer import MemoryType
|
|
10
11
|
from rapidsmpf.streaming.core.message import Message
|
|
11
12
|
from rapidsmpf.streaming.cudf.table_chunk import TableChunk
|
|
12
13
|
|
|
13
14
|
from cudf_polars.containers import DataFrame
|
|
14
15
|
from cudf_polars.dsl.ir import IR, Join
|
|
16
|
+
from cudf_polars.experimental.rapidsmpf.collectives.allgather import AllGatherManager
|
|
15
17
|
from cudf_polars.experimental.rapidsmpf.dispatch import (
|
|
16
18
|
generate_ir_sub_network,
|
|
17
19
|
)
|
|
@@ -22,6 +24,10 @@ from cudf_polars.experimental.rapidsmpf.nodes import (
|
|
|
22
24
|
)
|
|
23
25
|
from cudf_polars.experimental.rapidsmpf.utils import (
|
|
24
26
|
ChannelManager,
|
|
27
|
+
Metadata,
|
|
28
|
+
chunk_to_frame,
|
|
29
|
+
empty_table_chunk,
|
|
30
|
+
opaque_reservation,
|
|
25
31
|
process_children,
|
|
26
32
|
)
|
|
27
33
|
from cudf_polars.experimental.utils import _concat
|
|
@@ -34,48 +40,6 @@ if TYPE_CHECKING:
|
|
|
34
40
|
from cudf_polars.experimental.rapidsmpf.utils import ChannelPair
|
|
35
41
|
|
|
36
42
|
|
|
37
|
-
async def get_small_table(
|
|
38
|
-
context: Context,
|
|
39
|
-
small_child: IR,
|
|
40
|
-
ch_small: ChannelPair,
|
|
41
|
-
) -> list[DataFrame]:
|
|
42
|
-
"""
|
|
43
|
-
Get the small-table DataFrame partitions from the small-table ChannelPair.
|
|
44
|
-
|
|
45
|
-
Parameters
|
|
46
|
-
----------
|
|
47
|
-
context
|
|
48
|
-
The rapidsmpf context.
|
|
49
|
-
small_child
|
|
50
|
-
The small-table child IR node.
|
|
51
|
-
ch_small
|
|
52
|
-
The small-table ChannelPair.
|
|
53
|
-
|
|
54
|
-
Returns
|
|
55
|
-
-------
|
|
56
|
-
list[DataFrame]
|
|
57
|
-
The small-table DataFrame partitions.
|
|
58
|
-
"""
|
|
59
|
-
small_chunks = []
|
|
60
|
-
while (msg := await ch_small.data.recv(context)) is not None:
|
|
61
|
-
small_chunks.append(
|
|
62
|
-
TableChunk.from_message(msg).make_available_and_spill(
|
|
63
|
-
context.br(), allow_overbooking=True
|
|
64
|
-
)
|
|
65
|
-
)
|
|
66
|
-
assert small_chunks, "Empty small side"
|
|
67
|
-
|
|
68
|
-
return [
|
|
69
|
-
DataFrame.from_table(
|
|
70
|
-
small_chunk.table_view(),
|
|
71
|
-
list(small_child.schema.keys()),
|
|
72
|
-
list(small_child.schema.values()),
|
|
73
|
-
small_chunk.stream,
|
|
74
|
-
)
|
|
75
|
-
for small_chunk in small_chunks
|
|
76
|
-
]
|
|
77
|
-
|
|
78
|
-
|
|
79
43
|
@define_py_node()
|
|
80
44
|
async def broadcast_join_node(
|
|
81
45
|
context: Context,
|
|
@@ -85,6 +49,8 @@ async def broadcast_join_node(
|
|
|
85
49
|
ch_left: ChannelPair,
|
|
86
50
|
ch_right: ChannelPair,
|
|
87
51
|
broadcast_side: Literal["left", "right"],
|
|
52
|
+
collective_id: int,
|
|
53
|
+
target_partition_size: int,
|
|
88
54
|
) -> None:
|
|
89
55
|
"""
|
|
90
56
|
Join node for rapidsmpf.
|
|
@@ -105,33 +71,132 @@ async def broadcast_join_node(
|
|
|
105
71
|
The right input ChannelPair.
|
|
106
72
|
broadcast_side
|
|
107
73
|
The side to broadcast.
|
|
74
|
+
collective_id
|
|
75
|
+
Pre-allocated collective ID for this operation.
|
|
76
|
+
target_partition_size
|
|
77
|
+
The target partition size in bytes.
|
|
108
78
|
"""
|
|
109
|
-
async with shutdown_on_error(
|
|
79
|
+
async with shutdown_on_error(
|
|
80
|
+
context,
|
|
81
|
+
ch_left.metadata,
|
|
82
|
+
ch_left.data,
|
|
83
|
+
ch_right.metadata,
|
|
84
|
+
ch_right.data,
|
|
85
|
+
ch_out.metadata,
|
|
86
|
+
ch_out.data,
|
|
87
|
+
):
|
|
88
|
+
# Receive metadata.
|
|
89
|
+
left_metadata, right_metadata = await asyncio.gather(
|
|
90
|
+
ch_left.recv_metadata(context),
|
|
91
|
+
ch_right.recv_metadata(context),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
partitioned_on: tuple[str, ...] = ()
|
|
110
95
|
if broadcast_side == "right":
|
|
111
96
|
# Broadcast right, stream left
|
|
112
97
|
small_ch = ch_right
|
|
113
98
|
large_ch = ch_left
|
|
114
99
|
small_child = ir.children[1]
|
|
115
100
|
large_child = ir.children[0]
|
|
101
|
+
chunk_count = left_metadata.count
|
|
102
|
+
partitioned_on = left_metadata.partitioned_on
|
|
103
|
+
small_duplicated = right_metadata.duplicated
|
|
116
104
|
else:
|
|
117
105
|
# Broadcast left, stream right
|
|
118
106
|
small_ch = ch_left
|
|
119
107
|
large_ch = ch_right
|
|
120
108
|
small_child = ir.children[0]
|
|
121
109
|
large_child = ir.children[1]
|
|
110
|
+
chunk_count = right_metadata.count
|
|
111
|
+
small_duplicated = left_metadata.duplicated
|
|
112
|
+
if ir.options[0] == "Right":
|
|
113
|
+
partitioned_on = right_metadata.partitioned_on
|
|
122
114
|
|
|
123
|
-
#
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
115
|
+
# Send metadata.
|
|
116
|
+
output_metadata = Metadata(
|
|
117
|
+
chunk_count,
|
|
118
|
+
partitioned_on=partitioned_on,
|
|
119
|
+
duplicated=left_metadata.duplicated and right_metadata.duplicated,
|
|
120
|
+
)
|
|
121
|
+
await ch_out.send_metadata(context, output_metadata)
|
|
128
122
|
|
|
129
|
-
#
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
123
|
+
# Collect small-side (may be empty if no data received)
|
|
124
|
+
small_chunks: list[TableChunk] = []
|
|
125
|
+
small_size = 0
|
|
126
|
+
while (msg := await small_ch.data.recv(context)) is not None:
|
|
127
|
+
small_chunks.append(
|
|
128
|
+
TableChunk.from_message(msg).make_available_and_spill(
|
|
129
|
+
context.br(), allow_overbooking=True
|
|
130
|
+
)
|
|
133
131
|
)
|
|
134
|
-
|
|
132
|
+
del msg
|
|
133
|
+
small_size += small_chunks[-1].data_alloc_size(MemoryType.DEVICE)
|
|
134
|
+
|
|
135
|
+
# Allgather is a collective - all ranks must participate even with no local data
|
|
136
|
+
need_allgather = context.comm().nranks > 1 and not small_duplicated
|
|
137
|
+
if need_allgather:
|
|
138
|
+
allgather = AllGatherManager(context, collective_id)
|
|
139
|
+
for s_id in range(len(small_chunks)):
|
|
140
|
+
allgather.insert(s_id, small_chunks.pop(0))
|
|
141
|
+
allgather.insert_finished()
|
|
142
|
+
stream = ir_context.get_cuda_stream()
|
|
143
|
+
# extract_concatenated returns a plc.Table, not a TableChunk
|
|
144
|
+
small_dfs = [
|
|
145
|
+
DataFrame.from_table(
|
|
146
|
+
await allgather.extract_concatenated(stream),
|
|
147
|
+
list(small_child.schema.keys()),
|
|
148
|
+
list(small_child.schema.values()),
|
|
149
|
+
stream,
|
|
150
|
+
)
|
|
151
|
+
]
|
|
152
|
+
elif len(small_chunks) > 1 and (
|
|
153
|
+
ir.options[0] != "Inner" or small_size < target_partition_size
|
|
154
|
+
):
|
|
155
|
+
# Pre-concat for non-inner joins, otherwise
|
|
156
|
+
# we need a local shuffle, and face additional
|
|
157
|
+
# memory pressure anyway.
|
|
158
|
+
small_dfs = [
|
|
159
|
+
_concat(
|
|
160
|
+
*[chunk_to_frame(chunk, small_child) for chunk in small_chunks],
|
|
161
|
+
context=ir_context,
|
|
162
|
+
)
|
|
163
|
+
]
|
|
164
|
+
small_chunks.clear() # small_dfs is not a view of small_chunks anymore
|
|
165
|
+
else:
|
|
166
|
+
small_dfs = [
|
|
167
|
+
chunk_to_frame(small_chunk, small_child) for small_chunk in small_chunks
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
# Stream through large side, joining with the small-side
|
|
171
|
+
seq_num = 0
|
|
172
|
+
large_chunk_processed = False
|
|
173
|
+
receiving_large_chunks = True
|
|
174
|
+
while receiving_large_chunks:
|
|
175
|
+
msg = await large_ch.data.recv(context)
|
|
176
|
+
if msg is None:
|
|
177
|
+
receiving_large_chunks = False
|
|
178
|
+
if large_chunk_processed:
|
|
179
|
+
# Normal exit - We've processed all large-table data
|
|
180
|
+
break
|
|
181
|
+
elif small_dfs:
|
|
182
|
+
# We received small-table data, but no large-table data.
|
|
183
|
+
# This may never happen, but we can handle it by generating
|
|
184
|
+
# an empty large-table chunk
|
|
185
|
+
stream = ir_context.get_cuda_stream()
|
|
186
|
+
large_chunk = empty_table_chunk(large_child, context, stream)
|
|
187
|
+
else:
|
|
188
|
+
# We received no data for either the small or large table.
|
|
189
|
+
# Drain the output channel and return
|
|
190
|
+
await ch_out.data.drain(context)
|
|
191
|
+
return
|
|
192
|
+
else:
|
|
193
|
+
large_chunk_processed = True
|
|
194
|
+
large_chunk = TableChunk.from_message(msg).make_available_and_spill(
|
|
195
|
+
context.br(), allow_overbooking=True
|
|
196
|
+
)
|
|
197
|
+
seq_num = msg.sequence_number
|
|
198
|
+
del msg
|
|
199
|
+
|
|
135
200
|
large_df = DataFrame.from_table(
|
|
136
201
|
large_chunk.table_view(),
|
|
137
202
|
list(large_child.schema.keys()),
|
|
@@ -139,10 +204,17 @@ async def broadcast_join_node(
|
|
|
139
204
|
large_chunk.stream,
|
|
140
205
|
)
|
|
141
206
|
|
|
142
|
-
#
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
207
|
+
# Lazily create empty small table if small_dfs is empty
|
|
208
|
+
if not small_dfs:
|
|
209
|
+
stream = ir_context.get_cuda_stream()
|
|
210
|
+
empty_small_chunk = empty_table_chunk(small_child, context, stream)
|
|
211
|
+
small_dfs = [chunk_to_frame(empty_small_chunk, small_child)]
|
|
212
|
+
|
|
213
|
+
large_chunk_size = large_chunk.data_alloc_size(MemoryType.DEVICE)
|
|
214
|
+
input_bytes = large_chunk_size + small_size
|
|
215
|
+
with opaque_reservation(context, input_bytes):
|
|
216
|
+
df = _concat(
|
|
217
|
+
*[
|
|
146
218
|
await asyncio.to_thread(
|
|
147
219
|
ir.do_evaluate,
|
|
148
220
|
*ir._non_child_args,
|
|
@@ -153,28 +225,31 @@ async def broadcast_join_node(
|
|
|
153
225
|
),
|
|
154
226
|
context=ir_context,
|
|
155
227
|
)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
)
|
|
228
|
+
for small_df in small_dfs
|
|
229
|
+
],
|
|
230
|
+
context=ir_context,
|
|
231
|
+
)
|
|
161
232
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
233
|
+
# Send output chunk
|
|
234
|
+
await ch_out.data.send(
|
|
235
|
+
context,
|
|
236
|
+
Message(
|
|
237
|
+
seq_num,
|
|
238
|
+
TableChunk.from_pylibcudf_table(
|
|
239
|
+
df.table, df.stream, exclusive_view=True
|
|
240
|
+
),
|
|
169
241
|
),
|
|
170
|
-
)
|
|
171
|
-
|
|
242
|
+
)
|
|
243
|
+
del df, large_df, large_chunk
|
|
172
244
|
|
|
245
|
+
del small_dfs, small_chunks
|
|
173
246
|
await ch_out.data.drain(context)
|
|
174
247
|
|
|
175
248
|
|
|
176
249
|
@generate_ir_sub_network.register(Join)
|
|
177
|
-
def _(
|
|
250
|
+
def _(
|
|
251
|
+
ir: Join, rec: SubNetGenerator
|
|
252
|
+
) -> tuple[dict[IR, list[Any]], dict[IR, ChannelManager]]:
|
|
178
253
|
# Join operation.
|
|
179
254
|
left, right = ir.children
|
|
180
255
|
partition_info = rec.state["partition_info"]
|
|
@@ -200,7 +275,8 @@ def _(ir: Join, rec: SubNetGenerator) -> tuple[list[Any], dict[IR, ChannelManage
|
|
|
200
275
|
|
|
201
276
|
if pwise_join:
|
|
202
277
|
# Partition-wise join (use default_node_multi)
|
|
203
|
-
|
|
278
|
+
partitioning_index = 1 if ir.options[0] == "Right" else 0
|
|
279
|
+
nodes[ir] = [
|
|
204
280
|
default_node_multi(
|
|
205
281
|
rec.state["context"],
|
|
206
282
|
ir,
|
|
@@ -210,8 +286,9 @@ def _(ir: Join, rec: SubNetGenerator) -> tuple[list[Any], dict[IR, ChannelManage
|
|
|
210
286
|
channels[left].reserve_output_slot(),
|
|
211
287
|
channels[right].reserve_output_slot(),
|
|
212
288
|
),
|
|
289
|
+
partitioning_index=partitioning_index,
|
|
213
290
|
)
|
|
214
|
-
|
|
291
|
+
]
|
|
215
292
|
return nodes, channels
|
|
216
293
|
|
|
217
294
|
else:
|
|
@@ -223,7 +300,13 @@ def _(ir: Join, rec: SubNetGenerator) -> tuple[list[Any], dict[IR, ChannelManage
|
|
|
223
300
|
else:
|
|
224
301
|
broadcast_side = "left"
|
|
225
302
|
|
|
226
|
-
|
|
303
|
+
# Get target partition size
|
|
304
|
+
config_options = rec.state["config_options"]
|
|
305
|
+
executor = config_options.executor
|
|
306
|
+
assert executor.name == "streaming", "Join node requires streaming executor"
|
|
307
|
+
target_partition_size = executor.target_partition_size
|
|
308
|
+
|
|
309
|
+
nodes[ir] = [
|
|
227
310
|
broadcast_join_node(
|
|
228
311
|
rec.state["context"],
|
|
229
312
|
ir,
|
|
@@ -232,6 +315,8 @@ def _(ir: Join, rec: SubNetGenerator) -> tuple[list[Any], dict[IR, ChannelManage
|
|
|
232
315
|
channels[left].reserve_output_slot(),
|
|
233
316
|
channels[right].reserve_output_slot(),
|
|
234
317
|
broadcast_side=broadcast_side,
|
|
318
|
+
collective_id=rec.state["collective_id_map"][ir],
|
|
319
|
+
target_partition_size=target_partition_size,
|
|
235
320
|
)
|
|
236
|
-
|
|
321
|
+
]
|
|
237
322
|
return nodes, channels
|