cudf-polars-cu13 25.12.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. cudf_polars/GIT_COMMIT +1 -1
  2. cudf_polars/VERSION +1 -1
  3. cudf_polars/callback.py +28 -7
  4. cudf_polars/containers/column.py +51 -26
  5. cudf_polars/dsl/expressions/binaryop.py +1 -1
  6. cudf_polars/dsl/expressions/boolean.py +1 -1
  7. cudf_polars/dsl/expressions/selection.py +1 -1
  8. cudf_polars/dsl/expressions/string.py +29 -20
  9. cudf_polars/dsl/expressions/ternary.py +25 -1
  10. cudf_polars/dsl/expressions/unary.py +11 -8
  11. cudf_polars/dsl/ir.py +351 -281
  12. cudf_polars/dsl/translate.py +18 -15
  13. cudf_polars/dsl/utils/aggregations.py +10 -5
  14. cudf_polars/experimental/base.py +10 -0
  15. cudf_polars/experimental/benchmarks/pdsh.py +1 -1
  16. cudf_polars/experimental/benchmarks/utils.py +83 -2
  17. cudf_polars/experimental/distinct.py +2 -0
  18. cudf_polars/experimental/explain.py +1 -1
  19. cudf_polars/experimental/expressions.py +8 -5
  20. cudf_polars/experimental/groupby.py +2 -0
  21. cudf_polars/experimental/io.py +64 -42
  22. cudf_polars/experimental/join.py +15 -2
  23. cudf_polars/experimental/parallel.py +10 -7
  24. cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
  25. cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
  26. cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
  27. cudf_polars/experimental/rapidsmpf/{shuffle.py → collectives/shuffle.py} +90 -114
  28. cudf_polars/experimental/rapidsmpf/core.py +194 -67
  29. cudf_polars/experimental/rapidsmpf/dask.py +172 -0
  30. cudf_polars/experimental/rapidsmpf/dispatch.py +6 -3
  31. cudf_polars/experimental/rapidsmpf/io.py +162 -70
  32. cudf_polars/experimental/rapidsmpf/join.py +162 -77
  33. cudf_polars/experimental/rapidsmpf/nodes.py +421 -180
  34. cudf_polars/experimental/rapidsmpf/repartition.py +130 -65
  35. cudf_polars/experimental/rapidsmpf/union.py +24 -5
  36. cudf_polars/experimental/rapidsmpf/utils.py +228 -16
  37. cudf_polars/experimental/shuffle.py +18 -4
  38. cudf_polars/experimental/sort.py +13 -6
  39. cudf_polars/experimental/spilling.py +1 -1
  40. cudf_polars/testing/plugin.py +6 -3
  41. cudf_polars/utils/config.py +67 -0
  42. cudf_polars/utils/versions.py +3 -3
  43. {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/METADATA +9 -10
  44. {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/RECORD +47 -43
  45. {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
  46. {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
  47. {cudf_polars_cu13-25.12.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
cudf_polars/dsl/ir.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  """
4
4
  DSL nodes for the LogicalPlan of polars.
@@ -13,6 +13,7 @@ can be considered as functions:
13
13
 
14
14
  from __future__ import annotations
15
15
 
16
+ import contextlib
16
17
  import itertools
17
18
  import json
18
19
  import random
@@ -53,7 +54,7 @@ from cudf_polars.utils.cuda_stream import (
53
54
  from cudf_polars.utils.versions import POLARS_VERSION_LT_131, POLARS_VERSION_LT_134
54
55
 
55
56
  if TYPE_CHECKING:
56
- from collections.abc import Callable, Hashable, Iterable, Sequence
57
+ from collections.abc import Callable, Generator, Hashable, Iterable, Sequence
57
58
  from typing import Literal
58
59
 
59
60
  from typing_extensions import Self
@@ -125,6 +126,50 @@ class IRExecutionContext:
125
126
  f"Invalid CUDA stream policy: {config_options.cuda_stream_policy}"
126
127
  )
127
128
 
129
+ @contextlib.contextmanager
130
+ def stream_ordered_after(self, *dfs: DataFrame) -> Generator[Stream, None, None]:
131
+ """
132
+ Get a joined CUDA stream with safe stream ordering for deallocation of inputs.
133
+
134
+ Parameters
135
+ ----------
136
+ dfs
137
+ The dataframes being provided to stream-ordered operations.
138
+
139
+ Yields
140
+ ------
141
+ A CUDA stream that is downstream of the given dataframes.
142
+
143
+ Notes
144
+ -----
145
+ This context manager provides two useful guarantees when working with
146
+ objects holding references to stream-ordered objects:
147
+
148
+ 1. The stream yield upon entering the context manager is *downstream* of
149
+ all the input dataframes. This ensures that you can safely perform
150
+ stream-ordered operations on any input using the yielded stream.
151
+ 2. The stream-ordered CUDA deallocation of the inputs happens *after* the
152
+ context manager exits. This ensures that all stream-ordered operations
153
+ submitted inside the context manager can complete before the memory
154
+ referenced by the inputs is deallocated.
155
+
156
+ Note that this does (deliberately) disconnect the dropping of the Python
157
+ object (by its refcount dropping to 0) from the actual stream-ordered
158
+ deallocation of the CUDA memory. This is precisely what we need to ensure
159
+ that the inputs are valid long enough for the stream-ordered operations to
160
+ complete.
161
+ """
162
+ result_stream = get_joined_cuda_stream(
163
+ self.get_cuda_stream, upstreams=[df.stream for df in dfs]
164
+ )
165
+
166
+ yield result_stream
167
+
168
+ # ensure that the inputs are downstream of result_stream (so that deallocation happens after the result is ready)
169
+ join_cuda_streams(
170
+ downstreams=[df.stream for df in dfs], upstreams=[result_stream]
171
+ )
172
+
128
173
 
129
174
  _BINOPS = {
130
175
  plc.binaryop.BinaryOperator.EQUAL,
@@ -320,7 +365,11 @@ def _cast_literal_to_decimal(
320
365
  name = side.name
321
366
  if (type_ := phys_type_map[name]).id() in _DECIMAL_IDS:
322
367
  scale = abs(type_.scale())
323
- return expr.Cast(side.dtype, expr.Cast(DataType(pl.Decimal(38, scale)), lit))
368
+ return expr.Cast(
369
+ side.dtype,
370
+ True, # noqa: FBT003
371
+ expr.Cast(DataType(pl.Decimal(38, scale)), True, lit), # noqa: FBT003
372
+ )
324
373
  return lit
325
374
 
326
375
 
@@ -1291,6 +1340,42 @@ class DataFrameScan(IR):
1291
1340
  self.children = ()
1292
1341
  self._id_for_hash = random.randint(0, 2**64 - 1)
1293
1342
 
1343
+ @staticmethod
1344
+ def _reconstruct(
1345
+ schema: Schema,
1346
+ pl_df: pl.DataFrame,
1347
+ projection: Sequence[str] | None,
1348
+ id_for_hash: int,
1349
+ ) -> DataFrameScan: # pragma: no cover
1350
+ """
1351
+ Reconstruct a DataFrameScan from pickled data.
1352
+
1353
+ Parameters
1354
+ ----------
1355
+ schema: Schema
1356
+ The schema of the DataFrameScan.
1357
+ pl_df: pl.DataFrame
1358
+ The underlying polars DataFrame.
1359
+ projection: Sequence[str] | None
1360
+ The projection of the DataFrameScan.
1361
+ id_for_hash: int
1362
+ The id for hash of the DataFrameScan.
1363
+
1364
+ Returns
1365
+ -------
1366
+ The reconstructed DataFrameScan.
1367
+ """
1368
+ node = DataFrameScan(schema, pl_df._df, projection)
1369
+ node._id_for_hash = id_for_hash
1370
+ return node
1371
+
1372
+ def __reduce__(self) -> tuple[Any, ...]: # pragma: no cover
1373
+ """Pickle a DataFrameScan object."""
1374
+ return (
1375
+ self._reconstruct,
1376
+ (*self._non_child_args, self._id_for_hash),
1377
+ )
1378
+
1294
1379
  def get_hashable(self) -> Hashable:
1295
1380
  """
1296
1381
  Hashable representation of the node.
@@ -1307,6 +1392,17 @@ class DataFrameScan(IR):
1307
1392
  self.projection,
1308
1393
  )
1309
1394
 
1395
+ def is_equal(self, other: Self) -> bool:
1396
+ """Equality of DataFrameScan nodes."""
1397
+ return self is other or (
1398
+ self._id_for_hash == other._id_for_hash
1399
+ and self.schema == other.schema
1400
+ and self.projection == other.projection
1401
+ and pl.DataFrame._from_pydf(self.df).equals(
1402
+ pl.DataFrame._from_pydf(other.df)
1403
+ )
1404
+ )
1405
+
1310
1406
  @classmethod
1311
1407
  @log_do_evaluate
1312
1408
  @nvtx_annotate_cudf_polars(message="DataFrameScan")
@@ -1761,7 +1857,12 @@ class GroupBy(IR):
1761
1857
  col = child.evaluate(df, context=ExecutionContext.GROUPBY).obj
1762
1858
  else:
1763
1859
  # Anything else, we pre-evaluate
1764
- col = value.evaluate(df, context=ExecutionContext.GROUPBY).obj
1860
+ column = value.evaluate(df, context=ExecutionContext.GROUPBY)
1861
+ if column.size != keys[0].size:
1862
+ column = broadcast(
1863
+ column, target_length=keys[0].size, stream=df.stream
1864
+ )[0]
1865
+ col = column.obj
1765
1866
  requests.append(plc.groupby.GroupByRequest(col, [value.agg_request]))
1766
1867
  names.append(name)
1767
1868
  group_keys, raw_tables = grouper.aggregate(requests, stream=df.stream)
@@ -2030,54 +2131,48 @@ class ConditionalJoin(IR):
2030
2131
  context: IRExecutionContext,
2031
2132
  ) -> DataFrame:
2032
2133
  """Evaluate and return a dataframe."""
2033
- stream = get_joined_cuda_stream(
2034
- context.get_cuda_stream,
2035
- upstreams=(
2036
- left.stream,
2037
- right.stream,
2038
- ),
2039
- )
2040
- left_casts, right_casts = _collect_decimal_binop_casts(
2041
- predicate_wrapper.predicate
2042
- )
2043
- _, _, zlice, suffix, _, _ = options
2044
-
2045
- lg, rg = plc.join.conditional_inner_join(
2046
- _apply_casts(left, left_casts).table,
2047
- _apply_casts(right, right_casts).table,
2048
- predicate_wrapper.ast,
2049
- stream=stream,
2050
- )
2051
- left_result = DataFrame.from_table(
2052
- plc.copying.gather(
2053
- left.table, lg, plc.copying.OutOfBoundsPolicy.DONT_CHECK, stream=stream
2054
- ),
2055
- left.column_names,
2056
- left.dtypes,
2057
- stream=stream,
2058
- )
2059
- right_result = DataFrame.from_table(
2060
- plc.copying.gather(
2061
- right.table, rg, plc.copying.OutOfBoundsPolicy.DONT_CHECK, stream=stream
2062
- ),
2063
- right.column_names,
2064
- right.dtypes,
2065
- stream=stream,
2066
- )
2067
- right_result = right_result.rename_columns(
2068
- {
2069
- name: f"{name}{suffix}"
2070
- for name in right.column_names
2071
- if name in left.column_names_set
2072
- }
2073
- )
2074
- result = left_result.with_columns(right_result.columns, stream=stream)
2134
+ with context.stream_ordered_after(left, right) as stream:
2135
+ left_casts, right_casts = _collect_decimal_binop_casts(
2136
+ predicate_wrapper.predicate
2137
+ )
2138
+ _, _, zlice, suffix, _, _ = options
2075
2139
 
2076
- # Join the original streams back into the result stream to ensure that the
2077
- # deallocations (on the original streams) happen after the result is ready
2078
- join_cuda_streams(
2079
- downstreams=(left.stream, right.stream), upstreams=(result.stream,)
2080
- )
2140
+ lg, rg = plc.join.conditional_inner_join(
2141
+ _apply_casts(left, left_casts).table,
2142
+ _apply_casts(right, right_casts).table,
2143
+ predicate_wrapper.ast,
2144
+ stream=stream,
2145
+ )
2146
+ left_result = DataFrame.from_table(
2147
+ plc.copying.gather(
2148
+ left.table,
2149
+ lg,
2150
+ plc.copying.OutOfBoundsPolicy.DONT_CHECK,
2151
+ stream=stream,
2152
+ ),
2153
+ left.column_names,
2154
+ left.dtypes,
2155
+ stream=stream,
2156
+ )
2157
+ right_result = DataFrame.from_table(
2158
+ plc.copying.gather(
2159
+ right.table,
2160
+ rg,
2161
+ plc.copying.OutOfBoundsPolicy.DONT_CHECK,
2162
+ stream=stream,
2163
+ ),
2164
+ right.column_names,
2165
+ right.dtypes,
2166
+ stream=stream,
2167
+ )
2168
+ right_result = right_result.rename_columns(
2169
+ {
2170
+ name: f"{name}{suffix}"
2171
+ for name in right.column_names
2172
+ if name in left.column_names_set
2173
+ }
2174
+ )
2175
+ result = left_result.with_columns(right_result.columns, stream=stream)
2081
2176
 
2082
2177
  return result.slice(zlice)
2083
2178
 
@@ -2328,162 +2423,162 @@ class Join(IR):
2328
2423
  context: IRExecutionContext,
2329
2424
  ) -> DataFrame:
2330
2425
  """Evaluate and return a dataframe."""
2331
- stream = get_joined_cuda_stream(
2332
- context.get_cuda_stream, upstreams=(left.stream, right.stream)
2333
- )
2334
- how, nulls_equal, zlice, suffix, coalesce, maintain_order = options
2335
- if how == "Cross":
2336
- # Separate implementation, since cross_join returns the
2337
- # result, not the gather maps
2338
- if right.num_rows == 0:
2339
- left_cols = Join._build_columns(
2340
- [], left.columns, empty=True, stream=stream
2341
- )
2342
- right_cols = Join._build_columns(
2343
- [],
2344
- right.columns,
2345
- left=False,
2346
- empty=True,
2347
- rename=lambda name: name
2348
- if name not in left.column_names_set
2349
- else f"{name}{suffix}",
2350
- stream=stream,
2351
- )
2352
- result = DataFrame([*left_cols, *right_cols], stream=stream)
2426
+ with context.stream_ordered_after(left, right) as stream:
2427
+ how, nulls_equal, zlice, suffix, coalesce, maintain_order = options
2428
+ if how == "Cross":
2429
+ # Separate implementation, since cross_join returns the
2430
+ # result, not the gather maps
2431
+ if right.num_rows == 0:
2432
+ left_cols = Join._build_columns(
2433
+ [], left.columns, empty=True, stream=stream
2434
+ )
2435
+ right_cols = Join._build_columns(
2436
+ [],
2437
+ right.columns,
2438
+ left=False,
2439
+ empty=True,
2440
+ rename=lambda name: name
2441
+ if name not in left.column_names_set
2442
+ else f"{name}{suffix}",
2443
+ stream=stream,
2444
+ )
2445
+ result = DataFrame([*left_cols, *right_cols], stream=stream)
2446
+ else:
2447
+ columns = plc.join.cross_join(
2448
+ left.table, right.table, stream=stream
2449
+ ).columns()
2450
+ left_cols = Join._build_columns(
2451
+ columns[: left.num_columns], left.columns, stream=stream
2452
+ )
2453
+ right_cols = Join._build_columns(
2454
+ columns[left.num_columns :],
2455
+ right.columns,
2456
+ rename=lambda name: name
2457
+ if name not in left.column_names_set
2458
+ else f"{name}{suffix}",
2459
+ left=False,
2460
+ stream=stream,
2461
+ )
2462
+ result = DataFrame([*left_cols, *right_cols], stream=stream).slice(
2463
+ zlice
2464
+ )
2465
+
2353
2466
  else:
2354
- columns = plc.join.cross_join(
2355
- left.table, right.table, stream=stream
2356
- ).columns()
2357
- left_cols = Join._build_columns(
2358
- columns[: left.num_columns], left.columns, stream=stream
2359
- )
2360
- right_cols = Join._build_columns(
2361
- columns[left.num_columns :],
2362
- right.columns,
2363
- rename=lambda name: name
2364
- if name not in left.column_names_set
2365
- else f"{name}{suffix}",
2366
- left=False,
2467
+ # how != "Cross"
2468
+ # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184
2469
+ left_on = DataFrame(
2470
+ broadcast(
2471
+ *(e.evaluate(left) for e in left_on_exprs), stream=stream
2472
+ ),
2367
2473
  stream=stream,
2368
2474
  )
2369
- result = DataFrame([*left_cols, *right_cols], stream=stream).slice(
2370
- zlice
2371
- )
2372
-
2373
- else:
2374
- # how != "Cross"
2375
- # TODO: Waiting on clarity based on https://github.com/pola-rs/polars/issues/17184
2376
- left_on = DataFrame(
2377
- broadcast(*(e.evaluate(left) for e in left_on_exprs), stream=stream),
2378
- stream=stream,
2379
- )
2380
- right_on = DataFrame(
2381
- broadcast(*(e.evaluate(right) for e in right_on_exprs), stream=stream),
2382
- stream=stream,
2383
- )
2384
- null_equality = (
2385
- plc.types.NullEquality.EQUAL
2386
- if nulls_equal
2387
- else plc.types.NullEquality.UNEQUAL
2388
- )
2389
- join_fn, left_policy, right_policy = cls._joiners(how)
2390
- if right_policy is None:
2391
- # Semi join
2392
- lg = join_fn(left_on.table, right_on.table, null_equality, stream)
2393
- table = plc.copying.gather(left.table, lg, left_policy, stream=stream)
2394
- result = DataFrame.from_table(
2395
- table, left.column_names, left.dtypes, stream=stream
2475
+ right_on = DataFrame(
2476
+ broadcast(
2477
+ *(e.evaluate(right) for e in right_on_exprs), stream=stream
2478
+ ),
2479
+ stream=stream,
2396
2480
  )
2397
- else:
2398
- if how == "Right":
2399
- # Right join is a left join with the tables swapped
2400
- left, right = right, left
2401
- left_on, right_on = right_on, left_on
2402
- maintain_order = Join.SWAPPED_ORDER[maintain_order]
2403
-
2404
- lg, rg = join_fn(
2405
- left_on.table, right_on.table, null_equality, stream=stream
2481
+ null_equality = (
2482
+ plc.types.NullEquality.EQUAL
2483
+ if nulls_equal
2484
+ else plc.types.NullEquality.UNEQUAL
2406
2485
  )
2407
- if (
2408
- how in ("Inner", "Left", "Right", "Full")
2409
- and maintain_order != "none"
2410
- ):
2411
- lg, rg = cls._reorder_maps(
2412
- left.num_rows,
2413
- lg,
2414
- left_policy,
2415
- right.num_rows,
2416
- rg,
2417
- right_policy,
2418
- left_primary=maintain_order.startswith("left"),
2419
- stream=stream,
2486
+ join_fn, left_policy, right_policy = cls._joiners(how)
2487
+ if right_policy is None:
2488
+ # Semi join
2489
+ lg = join_fn(left_on.table, right_on.table, null_equality, stream)
2490
+ table = plc.copying.gather(
2491
+ left.table, lg, left_policy, stream=stream
2420
2492
  )
2421
- if coalesce:
2422
- if how == "Full":
2423
- # In this case, keys must be column references,
2424
- # possibly with dtype casting. We should use them in
2425
- # preference to the columns from the original tables.
2426
-
2427
- # We need to specify `stream` here. We know that `{left,right}_on`
2428
- # is valid on `stream`, which is ordered after `{left,right}.stream`.
2429
- left = left.with_columns(
2430
- left_on.columns, replace_only=True, stream=stream
2431
- )
2432
- right = right.with_columns(
2433
- right_on.columns, replace_only=True, stream=stream
2493
+ result = DataFrame.from_table(
2494
+ table, left.column_names, left.dtypes, stream=stream
2495
+ )
2496
+ else:
2497
+ if how == "Right":
2498
+ # Right join is a left join with the tables swapped
2499
+ left, right = right, left
2500
+ left_on, right_on = right_on, left_on
2501
+ maintain_order = Join.SWAPPED_ORDER[maintain_order]
2502
+
2503
+ lg, rg = join_fn(
2504
+ left_on.table, right_on.table, null_equality, stream=stream
2505
+ )
2506
+ if (
2507
+ how in ("Inner", "Left", "Right", "Full")
2508
+ and maintain_order != "none"
2509
+ ):
2510
+ lg, rg = cls._reorder_maps(
2511
+ left.num_rows,
2512
+ lg,
2513
+ left_policy,
2514
+ right.num_rows,
2515
+ rg,
2516
+ right_policy,
2517
+ left_primary=maintain_order.startswith("left"),
2518
+ stream=stream,
2434
2519
  )
2435
- else:
2436
- right = right.discard_columns(right_on.column_names_set)
2437
- left = DataFrame.from_table(
2438
- plc.copying.gather(left.table, lg, left_policy, stream=stream),
2439
- left.column_names,
2440
- left.dtypes,
2441
- stream=stream,
2442
- )
2443
- right = DataFrame.from_table(
2444
- plc.copying.gather(right.table, rg, right_policy, stream=stream),
2445
- right.column_names,
2446
- right.dtypes,
2447
- stream=stream,
2448
- )
2449
- if coalesce and how == "Full":
2450
- left = left.with_columns(
2451
- (
2452
- Column(
2453
- plc.replace.replace_nulls(
2454
- left_col.obj, right_col.obj, stream=stream
2455
- ),
2456
- name=left_col.name,
2457
- dtype=left_col.dtype,
2520
+ if coalesce:
2521
+ if how == "Full":
2522
+ # In this case, keys must be column references,
2523
+ # possibly with dtype casting. We should use them in
2524
+ # preference to the columns from the original tables.
2525
+
2526
+ # We need to specify `stream` here. We know that `{left,right}_on`
2527
+ # is valid on `stream`, which is ordered after `{left,right}.stream`.
2528
+ left = left.with_columns(
2529
+ left_on.columns, replace_only=True, stream=stream
2458
2530
  )
2459
- for left_col, right_col in zip(
2460
- left.select_columns(left_on.column_names_set),
2461
- right.select_columns(right_on.column_names_set),
2462
- strict=True,
2531
+ right = right.with_columns(
2532
+ right_on.columns, replace_only=True, stream=stream
2463
2533
  )
2534
+ else:
2535
+ right = right.discard_columns(right_on.column_names_set)
2536
+ left = DataFrame.from_table(
2537
+ plc.copying.gather(left.table, lg, left_policy, stream=stream),
2538
+ left.column_names,
2539
+ left.dtypes,
2540
+ stream=stream,
2541
+ )
2542
+ right = DataFrame.from_table(
2543
+ plc.copying.gather(
2544
+ right.table, rg, right_policy, stream=stream
2464
2545
  ),
2465
- replace_only=True,
2546
+ right.column_names,
2547
+ right.dtypes,
2466
2548
  stream=stream,
2467
2549
  )
2468
- right = right.discard_columns(right_on.column_names_set)
2469
- if how == "Right":
2470
- # Undo the swap for right join before gluing together.
2471
- left, right = right, left
2472
- right = right.rename_columns(
2473
- {
2474
- name: f"{name}{suffix}"
2475
- for name in right.column_names
2476
- if name in left.column_names_set
2477
- }
2478
- )
2479
- result = left.with_columns(right.columns, stream=stream)
2480
- result = result.slice(zlice)
2481
-
2482
- # Join the original streams back into the result stream to ensure that the
2483
- # deallocations (on the original streams) happen after the result is ready
2484
- join_cuda_streams(
2485
- downstreams=(left.stream, right.stream), upstreams=(result.stream,)
2486
- )
2550
+ if coalesce and how == "Full":
2551
+ left = left.with_columns(
2552
+ (
2553
+ Column(
2554
+ plc.replace.replace_nulls(
2555
+ left_col.obj, right_col.obj, stream=stream
2556
+ ),
2557
+ name=left_col.name,
2558
+ dtype=left_col.dtype,
2559
+ )
2560
+ for left_col, right_col in zip(
2561
+ left.select_columns(left_on.column_names_set),
2562
+ right.select_columns(right_on.column_names_set),
2563
+ strict=True,
2564
+ )
2565
+ ),
2566
+ replace_only=True,
2567
+ stream=stream,
2568
+ )
2569
+ right = right.discard_columns(right_on.column_names_set)
2570
+ if how == "Right":
2571
+ # Undo the swap for right join before gluing together.
2572
+ left, right = right, left
2573
+ right = right.rename_columns(
2574
+ {
2575
+ name: f"{name}{suffix}"
2576
+ for name in right.column_names
2577
+ if name in left.column_names_set
2578
+ }
2579
+ )
2580
+ result = left.with_columns(right.columns, stream=stream)
2581
+ result = result.slice(zlice)
2487
2582
 
2488
2583
  return result
2489
2584
 
@@ -2826,33 +2921,25 @@ class MergeSorted(IR):
2826
2921
  cls, key: str, *dfs: DataFrame, context: IRExecutionContext
2827
2922
  ) -> DataFrame:
2828
2923
  """Evaluate and return a dataframe."""
2829
- stream = get_joined_cuda_stream(
2830
- context.get_cuda_stream, upstreams=[df.stream for df in dfs]
2831
- )
2832
- left, right = dfs
2833
- right = right.discard_columns(right.column_names_set - left.column_names_set)
2834
- on_col_left = left.select_columns({key})[0]
2835
- on_col_right = right.select_columns({key})[0]
2836
- result = DataFrame.from_table(
2837
- plc.merge.merge(
2838
- [right.table, left.table],
2839
- [left.column_names.index(key), right.column_names.index(key)],
2840
- [on_col_left.order, on_col_right.order],
2841
- [on_col_left.null_order, on_col_right.null_order],
2924
+ with context.stream_ordered_after(*dfs) as stream:
2925
+ left, right = dfs
2926
+ right = right.discard_columns(
2927
+ right.column_names_set - left.column_names_set
2928
+ )
2929
+ on_col_left = left.select_columns({key})[0]
2930
+ on_col_right = right.select_columns({key})[0]
2931
+ return DataFrame.from_table(
2932
+ plc.merge.merge(
2933
+ [right.table, left.table],
2934
+ [left.column_names.index(key), right.column_names.index(key)],
2935
+ [on_col_left.order, on_col_right.order],
2936
+ [on_col_left.null_order, on_col_right.null_order],
2937
+ stream=stream,
2938
+ ),
2939
+ left.column_names,
2940
+ left.dtypes,
2842
2941
  stream=stream,
2843
- ),
2844
- left.column_names,
2845
- left.dtypes,
2846
- stream=stream,
2847
- )
2848
-
2849
- # Join the original streams back into the result stream to ensure that the
2850
- # deallocations (on the original streams) happen after the result is ready
2851
- join_cuda_streams(
2852
- downstreams=[df.stream for df in dfs], upstreams=(result.stream,)
2853
- )
2854
-
2855
- return result
2942
+ )
2856
2943
 
2857
2944
 
2858
2945
  class MapFunction(IR):
@@ -2928,6 +3015,8 @@ class MapFunction(IR):
2928
3015
  )
2929
3016
  elif self.name == "row_index":
2930
3017
  col_name, offset = options
3018
+ if col_name in df.schema:
3019
+ raise NotImplementedError("Duplicate row index name")
2931
3020
  self.options = (col_name, offset)
2932
3021
  elif self.name == "fast_count":
2933
3022
  # TODO: Remove this once all scan types support projections
@@ -3078,26 +3167,14 @@ class Union(IR):
3078
3167
  cls, zlice: Zlice | None, *dfs: DataFrame, context: IRExecutionContext
3079
3168
  ) -> DataFrame:
3080
3169
  """Evaluate and return a dataframe."""
3081
- stream = get_joined_cuda_stream(
3082
- context.get_cuda_stream, upstreams=[df.stream for df in dfs]
3083
- )
3084
-
3085
- # TODO: only evaluate what we need if we have a slice?
3086
- result = DataFrame.from_table(
3087
- plc.concatenate.concatenate([df.table for df in dfs], stream=stream),
3088
- dfs[0].column_names,
3089
- dfs[0].dtypes,
3090
- stream=stream,
3091
- ).slice(zlice)
3092
-
3093
- # now join the original streams *back* to the new result stream
3094
- # to ensure that the deallocations (on the original streams)
3095
- # happen after the result is ready
3096
- join_cuda_streams(
3097
- downstreams=[df.stream for df in dfs], upstreams=(result.stream,)
3098
- )
3099
-
3100
- return result
3170
+ with context.stream_ordered_after(*dfs) as stream:
3171
+ # TODO: only evaluate what we need if we have a slice?
3172
+ return DataFrame.from_table(
3173
+ plc.concatenate.concatenate([df.table for df in dfs], stream=stream),
3174
+ dfs[0].column_names,
3175
+ dfs[0].dtypes,
3176
+ stream=stream,
3177
+ ).slice(zlice)
3101
3178
 
3102
3179
 
3103
3180
  class HConcat(IR):
@@ -3160,48 +3237,41 @@ class HConcat(IR):
3160
3237
  context: IRExecutionContext,
3161
3238
  ) -> DataFrame:
3162
3239
  """Evaluate and return a dataframe."""
3163
- stream = get_joined_cuda_stream(
3164
- context.get_cuda_stream, upstreams=[df.stream for df in dfs]
3165
- )
3166
-
3167
- # Special should_broadcast case.
3168
- # Used to recombine decomposed expressions
3169
- if should_broadcast:
3170
- result = DataFrame(
3171
- broadcast(
3172
- *itertools.chain.from_iterable(df.columns for df in dfs),
3240
+ with context.stream_ordered_after(*dfs) as stream:
3241
+ # Special should_broadcast case.
3242
+ # Used to recombine decomposed expressions
3243
+ if should_broadcast:
3244
+ result = DataFrame(
3245
+ broadcast(
3246
+ *itertools.chain.from_iterable(df.columns for df in dfs),
3247
+ stream=stream,
3248
+ ),
3173
3249
  stream=stream,
3174
- ),
3175
- stream=stream,
3176
- )
3177
- else:
3178
- max_rows = max(df.num_rows for df in dfs)
3179
- # Horizontal concatenation extends shorter tables with nulls
3180
- result = DataFrame(
3181
- itertools.chain.from_iterable(
3182
- df.columns
3183
- for df in (
3184
- df
3185
- if df.num_rows == max_rows
3186
- else DataFrame.from_table(
3187
- cls._extend_with_nulls(
3188
- df.table, nrows=max_rows - df.num_rows, stream=stream
3189
- ),
3190
- df.column_names,
3191
- df.dtypes,
3192
- stream=stream,
3250
+ )
3251
+ else:
3252
+ max_rows = max(df.num_rows for df in dfs)
3253
+ # Horizontal concatenation extends shorter tables with nulls
3254
+ result = DataFrame(
3255
+ itertools.chain.from_iterable(
3256
+ df.columns
3257
+ for df in (
3258
+ df
3259
+ if df.num_rows == max_rows
3260
+ else DataFrame.from_table(
3261
+ cls._extend_with_nulls(
3262
+ df.table,
3263
+ nrows=max_rows - df.num_rows,
3264
+ stream=stream,
3265
+ ),
3266
+ df.column_names,
3267
+ df.dtypes,
3268
+ stream=stream,
3269
+ )
3270
+ for df in dfs
3193
3271
  )
3194
- for df in dfs
3195
- )
3196
- ),
3197
- stream=stream,
3198
- )
3199
-
3200
- # Join the original streams back into the result stream to ensure that the
3201
- # deallocations (on the original streams) happen after the result is ready
3202
- join_cuda_streams(
3203
- downstreams=[df.stream for df in dfs], upstreams=(result.stream,)
3204
- )
3272
+ ),
3273
+ stream=stream,
3274
+ )
3205
3275
 
3206
3276
  return result
3207
3277