cudf-polars-cu13 25.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -0
- cudf_polars/VERSION +1 -0
- cudf_polars/__init__.py +28 -0
- cudf_polars/_version.py +21 -0
- cudf_polars/callback.py +318 -0
- cudf_polars/containers/__init__.py +13 -0
- cudf_polars/containers/column.py +495 -0
- cudf_polars/containers/dataframe.py +361 -0
- cudf_polars/containers/datatype.py +137 -0
- cudf_polars/dsl/__init__.py +8 -0
- cudf_polars/dsl/expr.py +66 -0
- cudf_polars/dsl/expressions/__init__.py +8 -0
- cudf_polars/dsl/expressions/aggregation.py +226 -0
- cudf_polars/dsl/expressions/base.py +272 -0
- cudf_polars/dsl/expressions/binaryop.py +120 -0
- cudf_polars/dsl/expressions/boolean.py +326 -0
- cudf_polars/dsl/expressions/datetime.py +271 -0
- cudf_polars/dsl/expressions/literal.py +97 -0
- cudf_polars/dsl/expressions/rolling.py +643 -0
- cudf_polars/dsl/expressions/selection.py +74 -0
- cudf_polars/dsl/expressions/slicing.py +46 -0
- cudf_polars/dsl/expressions/sorting.py +85 -0
- cudf_polars/dsl/expressions/string.py +1002 -0
- cudf_polars/dsl/expressions/struct.py +137 -0
- cudf_polars/dsl/expressions/ternary.py +49 -0
- cudf_polars/dsl/expressions/unary.py +517 -0
- cudf_polars/dsl/ir.py +2607 -0
- cudf_polars/dsl/nodebase.py +164 -0
- cudf_polars/dsl/to_ast.py +359 -0
- cudf_polars/dsl/tracing.py +16 -0
- cudf_polars/dsl/translate.py +939 -0
- cudf_polars/dsl/traversal.py +224 -0
- cudf_polars/dsl/utils/__init__.py +8 -0
- cudf_polars/dsl/utils/aggregations.py +481 -0
- cudf_polars/dsl/utils/groupby.py +98 -0
- cudf_polars/dsl/utils/naming.py +34 -0
- cudf_polars/dsl/utils/replace.py +61 -0
- cudf_polars/dsl/utils/reshape.py +74 -0
- cudf_polars/dsl/utils/rolling.py +121 -0
- cudf_polars/dsl/utils/windows.py +192 -0
- cudf_polars/experimental/__init__.py +8 -0
- cudf_polars/experimental/base.py +386 -0
- cudf_polars/experimental/benchmarks/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds.py +220 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
- cudf_polars/experimental/benchmarks/pdsh.py +814 -0
- cudf_polars/experimental/benchmarks/utils.py +832 -0
- cudf_polars/experimental/dask_registers.py +200 -0
- cudf_polars/experimental/dispatch.py +156 -0
- cudf_polars/experimental/distinct.py +197 -0
- cudf_polars/experimental/explain.py +157 -0
- cudf_polars/experimental/expressions.py +590 -0
- cudf_polars/experimental/groupby.py +327 -0
- cudf_polars/experimental/io.py +943 -0
- cudf_polars/experimental/join.py +391 -0
- cudf_polars/experimental/parallel.py +423 -0
- cudf_polars/experimental/repartition.py +69 -0
- cudf_polars/experimental/scheduler.py +155 -0
- cudf_polars/experimental/select.py +188 -0
- cudf_polars/experimental/shuffle.py +354 -0
- cudf_polars/experimental/sort.py +609 -0
- cudf_polars/experimental/spilling.py +151 -0
- cudf_polars/experimental/statistics.py +795 -0
- cudf_polars/experimental/utils.py +169 -0
- cudf_polars/py.typed +0 -0
- cudf_polars/testing/__init__.py +8 -0
- cudf_polars/testing/asserts.py +448 -0
- cudf_polars/testing/io.py +122 -0
- cudf_polars/testing/plugin.py +236 -0
- cudf_polars/typing/__init__.py +219 -0
- cudf_polars/utils/__init__.py +8 -0
- cudf_polars/utils/config.py +741 -0
- cudf_polars/utils/conversion.py +40 -0
- cudf_polars/utils/dtypes.py +118 -0
- cudf_polars/utils/sorting.py +53 -0
- cudf_polars/utils/timer.py +39 -0
- cudf_polars/utils/versions.py +27 -0
- cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
- cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
- cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
- cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
- cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
"""Parallel Join Logic."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import operator
|
|
8
|
+
from functools import reduce
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
10
|
+
|
|
11
|
+
from cudf_polars.dsl.ir import ConditionalJoin, Join, Slice
|
|
12
|
+
from cudf_polars.experimental.base import PartitionInfo, get_key_name
|
|
13
|
+
from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node
|
|
14
|
+
from cudf_polars.experimental.repartition import Repartition
|
|
15
|
+
from cudf_polars.experimental.shuffle import Shuffle, _hash_partition_dataframe
|
|
16
|
+
from cudf_polars.experimental.utils import _concat, _fallback_inform, _lower_ir_fallback
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import MutableMapping
|
|
20
|
+
|
|
21
|
+
from cudf_polars.dsl.expr import NamedExpr
|
|
22
|
+
from cudf_polars.dsl.ir import IR
|
|
23
|
+
from cudf_polars.experimental.parallel import LowerIRTransformer
|
|
24
|
+
from cudf_polars.utils.config import ShuffleMethod
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _maybe_shuffle_frame(
|
|
28
|
+
frame: IR,
|
|
29
|
+
on: tuple[NamedExpr, ...],
|
|
30
|
+
partition_info: MutableMapping[IR, PartitionInfo],
|
|
31
|
+
shuffle_method: ShuffleMethod,
|
|
32
|
+
output_count: int,
|
|
33
|
+
) -> IR:
|
|
34
|
+
# Shuffle `frame` if it isn't already shuffled.
|
|
35
|
+
if (
|
|
36
|
+
partition_info[frame].partitioned_on == on
|
|
37
|
+
and partition_info[frame].count == output_count
|
|
38
|
+
):
|
|
39
|
+
# Already shuffled
|
|
40
|
+
return frame
|
|
41
|
+
else:
|
|
42
|
+
# Insert new Shuffle node
|
|
43
|
+
frame = Shuffle(
|
|
44
|
+
frame.schema,
|
|
45
|
+
on,
|
|
46
|
+
shuffle_method,
|
|
47
|
+
frame,
|
|
48
|
+
)
|
|
49
|
+
partition_info[frame] = PartitionInfo(
|
|
50
|
+
count=output_count,
|
|
51
|
+
partitioned_on=on,
|
|
52
|
+
)
|
|
53
|
+
return frame
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _make_hash_join(
|
|
57
|
+
ir: Join,
|
|
58
|
+
output_count: int,
|
|
59
|
+
partition_info: MutableMapping[IR, PartitionInfo],
|
|
60
|
+
left: IR,
|
|
61
|
+
right: IR,
|
|
62
|
+
shuffle_method: ShuffleMethod,
|
|
63
|
+
) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
|
|
64
|
+
# Shuffle left and right dataframes (if necessary)
|
|
65
|
+
new_left = _maybe_shuffle_frame(
|
|
66
|
+
left,
|
|
67
|
+
ir.left_on,
|
|
68
|
+
partition_info,
|
|
69
|
+
shuffle_method,
|
|
70
|
+
output_count,
|
|
71
|
+
)
|
|
72
|
+
new_right = _maybe_shuffle_frame(
|
|
73
|
+
right,
|
|
74
|
+
ir.right_on,
|
|
75
|
+
partition_info,
|
|
76
|
+
shuffle_method,
|
|
77
|
+
output_count,
|
|
78
|
+
)
|
|
79
|
+
if left != new_left or right != new_right:
|
|
80
|
+
ir = ir.reconstruct([new_left, new_right])
|
|
81
|
+
left = new_left
|
|
82
|
+
right = new_right
|
|
83
|
+
|
|
84
|
+
# Record new partitioning info
|
|
85
|
+
partitioned_on: tuple[NamedExpr, ...] = ()
|
|
86
|
+
if ir.left_on == ir.right_on or (ir.options[0] in ("Left", "Semi", "Anti")):
|
|
87
|
+
partitioned_on = ir.left_on
|
|
88
|
+
elif ir.options[0] == "Right":
|
|
89
|
+
partitioned_on = ir.right_on
|
|
90
|
+
partition_info[ir] = PartitionInfo(
|
|
91
|
+
count=output_count,
|
|
92
|
+
partitioned_on=partitioned_on,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return ir, partition_info
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _should_bcast_join(
|
|
99
|
+
ir: Join,
|
|
100
|
+
left: IR,
|
|
101
|
+
right: IR,
|
|
102
|
+
partition_info: MutableMapping[IR, PartitionInfo],
|
|
103
|
+
output_count: int,
|
|
104
|
+
broadcast_join_limit: int,
|
|
105
|
+
) -> bool:
|
|
106
|
+
# Decide if a broadcast join is appropriate.
|
|
107
|
+
if partition_info[left].count >= partition_info[right].count:
|
|
108
|
+
small_count = partition_info[right].count
|
|
109
|
+
large = left
|
|
110
|
+
large_on = ir.left_on
|
|
111
|
+
else:
|
|
112
|
+
small_count = partition_info[left].count
|
|
113
|
+
large = right
|
|
114
|
+
large_on = ir.right_on
|
|
115
|
+
|
|
116
|
+
# Avoid the broadcast if the "large" table is already shuffled
|
|
117
|
+
large_shuffled = (
|
|
118
|
+
partition_info[large].partitioned_on == large_on
|
|
119
|
+
and partition_info[large].count == output_count
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Broadcast-Join Criteria:
|
|
123
|
+
# 1. Large dataframe isn't already shuffled
|
|
124
|
+
# 2. Small dataframe has 8 partitions (or fewer).
|
|
125
|
+
# TODO: Make this value/heuristic configurable).
|
|
126
|
+
# We may want to account for the number of workers.
|
|
127
|
+
# 3. The "kind" of join is compatible with a broadcast join
|
|
128
|
+
|
|
129
|
+
return (
|
|
130
|
+
not large_shuffled
|
|
131
|
+
and small_count <= broadcast_join_limit
|
|
132
|
+
and (
|
|
133
|
+
ir.options[0] == "Inner"
|
|
134
|
+
or (ir.options[0] in ("Left", "Semi", "Anti") and large == left)
|
|
135
|
+
or (ir.options[0] == "Right" and large == right)
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _make_bcast_join(
|
|
141
|
+
ir: Join,
|
|
142
|
+
output_count: int,
|
|
143
|
+
partition_info: MutableMapping[IR, PartitionInfo],
|
|
144
|
+
left: IR,
|
|
145
|
+
right: IR,
|
|
146
|
+
shuffle_method: ShuffleMethod,
|
|
147
|
+
) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
|
|
148
|
+
if ir.options[0] != "Inner":
|
|
149
|
+
left_count = partition_info[left].count
|
|
150
|
+
right_count = partition_info[right].count
|
|
151
|
+
|
|
152
|
+
# Shuffle the smaller table (if necessary) - Notes:
|
|
153
|
+
# - We need to shuffle the smaller table if
|
|
154
|
+
# (1) we are not doing an "inner" join,
|
|
155
|
+
# and (2) the small table contains multiple
|
|
156
|
+
# partitions.
|
|
157
|
+
# - We cannot simply join a large-table partition
|
|
158
|
+
# to each small-table partition, and then
|
|
159
|
+
# concatenate the partial-join results, because
|
|
160
|
+
# a non-"inner" join does NOT commute with
|
|
161
|
+
# concatenation.
|
|
162
|
+
# - In some cases, we can perform the partial joins
|
|
163
|
+
# sequentially. However, we are starting with a
|
|
164
|
+
# catch-all algorithm that works for all cases.
|
|
165
|
+
if left_count >= right_count:
|
|
166
|
+
right = _maybe_shuffle_frame(
|
|
167
|
+
right,
|
|
168
|
+
ir.right_on,
|
|
169
|
+
partition_info,
|
|
170
|
+
shuffle_method,
|
|
171
|
+
right_count,
|
|
172
|
+
)
|
|
173
|
+
else:
|
|
174
|
+
left = _maybe_shuffle_frame(
|
|
175
|
+
left,
|
|
176
|
+
ir.left_on,
|
|
177
|
+
partition_info,
|
|
178
|
+
shuffle_method,
|
|
179
|
+
left_count,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
new_node = ir.reconstruct([left, right])
|
|
183
|
+
partition_info[new_node] = PartitionInfo(count=output_count)
|
|
184
|
+
return new_node, partition_info
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@lower_ir_node.register(ConditionalJoin)
|
|
188
|
+
def _(
|
|
189
|
+
ir: ConditionalJoin, rec: LowerIRTransformer
|
|
190
|
+
) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
|
|
191
|
+
if ir.options[2]: # pragma: no cover
|
|
192
|
+
return _lower_ir_fallback(
|
|
193
|
+
ir,
|
|
194
|
+
rec,
|
|
195
|
+
msg="Slice not supported in ConditionalJoin for multiple partitions.",
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Lower children
|
|
199
|
+
left, right = ir.children
|
|
200
|
+
left, pi_left = rec(left)
|
|
201
|
+
right, pi_right = rec(right)
|
|
202
|
+
|
|
203
|
+
# Fallback to single partition on the smaller table
|
|
204
|
+
left_count = pi_left[left].count
|
|
205
|
+
right_count = pi_right[right].count
|
|
206
|
+
output_count = max(left_count, right_count)
|
|
207
|
+
fallback_msg = "ConditionalJoin not supported for multiple partitions."
|
|
208
|
+
if left_count < right_count:
|
|
209
|
+
if left_count > 1:
|
|
210
|
+
left = Repartition(left.schema, left)
|
|
211
|
+
pi_left[left] = PartitionInfo(count=1)
|
|
212
|
+
_fallback_inform(fallback_msg, rec.state["config_options"])
|
|
213
|
+
elif right_count > 1:
|
|
214
|
+
right = Repartition(left.schema, right)
|
|
215
|
+
pi_right[right] = PartitionInfo(count=1)
|
|
216
|
+
_fallback_inform(fallback_msg, rec.state["config_options"])
|
|
217
|
+
|
|
218
|
+
# Reconstruct and return
|
|
219
|
+
new_node = ir.reconstruct([left, right])
|
|
220
|
+
partition_info = reduce(operator.or_, (pi_left, pi_right))
|
|
221
|
+
partition_info[new_node] = PartitionInfo(count=output_count)
|
|
222
|
+
return new_node, partition_info
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@lower_ir_node.register(Join)
|
|
226
|
+
def _(
|
|
227
|
+
ir: Join, rec: LowerIRTransformer
|
|
228
|
+
) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
|
|
229
|
+
# Pull slice operations out of the Join before lowering
|
|
230
|
+
if (zlice := ir.options[2]) is not None:
|
|
231
|
+
offset, length = zlice
|
|
232
|
+
if length is None: # pragma: no cover
|
|
233
|
+
return _lower_ir_fallback(
|
|
234
|
+
ir,
|
|
235
|
+
rec,
|
|
236
|
+
msg="This slice not supported for multiple partitions.",
|
|
237
|
+
)
|
|
238
|
+
new_join = Join(
|
|
239
|
+
ir.schema,
|
|
240
|
+
ir.left_on,
|
|
241
|
+
ir.right_on,
|
|
242
|
+
(*ir.options[:2], None, *ir.options[3:]),
|
|
243
|
+
*ir.children,
|
|
244
|
+
)
|
|
245
|
+
return rec(Slice(ir.schema, offset, length, new_join))
|
|
246
|
+
|
|
247
|
+
# Lower children
|
|
248
|
+
children, _partition_info = zip(*(rec(c) for c in ir.children), strict=True)
|
|
249
|
+
partition_info = reduce(operator.or_, _partition_info)
|
|
250
|
+
|
|
251
|
+
left, right = children
|
|
252
|
+
output_count = max(partition_info[left].count, partition_info[right].count)
|
|
253
|
+
if output_count == 1:
|
|
254
|
+
new_node = ir.reconstruct(children)
|
|
255
|
+
partition_info[new_node] = PartitionInfo(count=1)
|
|
256
|
+
return new_node, partition_info
|
|
257
|
+
elif ir.options[0] == "Cross": # pragma: no cover
|
|
258
|
+
return _lower_ir_fallback(
|
|
259
|
+
ir, rec, msg="Cross join not support for multiple partitions."
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
config_options = rec.state["config_options"]
|
|
263
|
+
assert config_options.executor.name == "streaming", (
|
|
264
|
+
"'in-memory' executor not supported in 'lower_join'"
|
|
265
|
+
)
|
|
266
|
+
if _should_bcast_join(
|
|
267
|
+
ir,
|
|
268
|
+
left,
|
|
269
|
+
right,
|
|
270
|
+
partition_info,
|
|
271
|
+
output_count,
|
|
272
|
+
config_options.executor.broadcast_join_limit,
|
|
273
|
+
):
|
|
274
|
+
# Create a broadcast join
|
|
275
|
+
return _make_bcast_join(
|
|
276
|
+
ir,
|
|
277
|
+
output_count,
|
|
278
|
+
partition_info,
|
|
279
|
+
left,
|
|
280
|
+
right,
|
|
281
|
+
config_options.executor.shuffle_method,
|
|
282
|
+
)
|
|
283
|
+
else:
|
|
284
|
+
# Create a hash join
|
|
285
|
+
return _make_hash_join(
|
|
286
|
+
ir,
|
|
287
|
+
output_count,
|
|
288
|
+
partition_info,
|
|
289
|
+
left,
|
|
290
|
+
right,
|
|
291
|
+
config_options.executor.shuffle_method,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@generate_ir_tasks.register(Join)
|
|
296
|
+
def _(
|
|
297
|
+
ir: Join, partition_info: MutableMapping[IR, PartitionInfo]
|
|
298
|
+
) -> MutableMapping[Any, Any]:
|
|
299
|
+
left, right = ir.children
|
|
300
|
+
output_count = partition_info[ir].count
|
|
301
|
+
|
|
302
|
+
left_partitioned = (
|
|
303
|
+
partition_info[left].partitioned_on == ir.left_on
|
|
304
|
+
and partition_info[left].count == output_count
|
|
305
|
+
)
|
|
306
|
+
right_partitioned = (
|
|
307
|
+
partition_info[right].partitioned_on == ir.right_on
|
|
308
|
+
and partition_info[right].count == output_count
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
if output_count == 1 or (left_partitioned and right_partitioned):
|
|
312
|
+
# Partition-wise join
|
|
313
|
+
left_name = get_key_name(left)
|
|
314
|
+
right_name = get_key_name(right)
|
|
315
|
+
return {
|
|
316
|
+
key: (
|
|
317
|
+
ir.do_evaluate,
|
|
318
|
+
*ir._non_child_args,
|
|
319
|
+
(left_name, i),
|
|
320
|
+
(right_name, i),
|
|
321
|
+
)
|
|
322
|
+
for i, key in enumerate(partition_info[ir].keys(ir))
|
|
323
|
+
}
|
|
324
|
+
else:
|
|
325
|
+
# Broadcast join
|
|
326
|
+
left_parts = partition_info[left]
|
|
327
|
+
right_parts = partition_info[right]
|
|
328
|
+
if left_parts.count >= right_parts.count:
|
|
329
|
+
small_side = "Right"
|
|
330
|
+
small_name = get_key_name(right)
|
|
331
|
+
small_size = partition_info[right].count
|
|
332
|
+
large_name = get_key_name(left)
|
|
333
|
+
large_on = ir.left_on
|
|
334
|
+
else:
|
|
335
|
+
small_side = "Left"
|
|
336
|
+
small_name = get_key_name(left)
|
|
337
|
+
small_size = partition_info[left].count
|
|
338
|
+
large_name = get_key_name(right)
|
|
339
|
+
large_on = ir.right_on
|
|
340
|
+
|
|
341
|
+
graph: MutableMapping[Any, Any] = {}
|
|
342
|
+
|
|
343
|
+
out_name = get_key_name(ir)
|
|
344
|
+
out_size = partition_info[ir].count
|
|
345
|
+
split_name = f"split-{out_name}"
|
|
346
|
+
getit_name = f"getit-{out_name}"
|
|
347
|
+
inter_name = f"inter-{out_name}"
|
|
348
|
+
|
|
349
|
+
# Split each large partition if we have
|
|
350
|
+
# multiple small partitions (unless this
|
|
351
|
+
# is an inner join)
|
|
352
|
+
split_large = ir.options[0] != "Inner" and small_size > 1
|
|
353
|
+
|
|
354
|
+
for part_out in range(out_size):
|
|
355
|
+
if split_large:
|
|
356
|
+
graph[(split_name, part_out)] = (
|
|
357
|
+
_hash_partition_dataframe,
|
|
358
|
+
(large_name, part_out),
|
|
359
|
+
part_out,
|
|
360
|
+
small_size,
|
|
361
|
+
None,
|
|
362
|
+
large_on,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
_concat_list = []
|
|
366
|
+
for j in range(small_size):
|
|
367
|
+
left_key: tuple[str, int] | tuple[str, int, int]
|
|
368
|
+
if split_large:
|
|
369
|
+
left_key = (getit_name, part_out, j)
|
|
370
|
+
graph[left_key] = (operator.getitem, (split_name, part_out), j)
|
|
371
|
+
else:
|
|
372
|
+
left_key = (large_name, part_out)
|
|
373
|
+
join_children = [left_key, (small_name, j)]
|
|
374
|
+
if small_side == "Left":
|
|
375
|
+
join_children.reverse()
|
|
376
|
+
|
|
377
|
+
inter_key = (inter_name, part_out, j)
|
|
378
|
+
graph[(inter_name, part_out, j)] = (
|
|
379
|
+
ir.do_evaluate,
|
|
380
|
+
ir.left_on,
|
|
381
|
+
ir.right_on,
|
|
382
|
+
ir.options,
|
|
383
|
+
*join_children,
|
|
384
|
+
)
|
|
385
|
+
_concat_list.append(inter_key)
|
|
386
|
+
if len(_concat_list) == 1:
|
|
387
|
+
graph[(out_name, part_out)] = graph.pop(_concat_list[0])
|
|
388
|
+
else:
|
|
389
|
+
graph[(out_name, part_out)] = (_concat, *_concat_list)
|
|
390
|
+
|
|
391
|
+
return graph
|