PyPI - cudf-polars-cu13 - Versions diffs - 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl - Mend

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

cudf_polars/GIT_COMMIT +1 -1
cudf_polars/VERSION +1 -1
cudf_polars/callback.py +60 -15
cudf_polars/containers/column.py +137 -77
cudf_polars/containers/dataframe.py +123 -34
cudf_polars/containers/datatype.py +134 -13
cudf_polars/dsl/expr.py +0 -2
cudf_polars/dsl/expressions/aggregation.py +80 -28
cudf_polars/dsl/expressions/binaryop.py +34 -14
cudf_polars/dsl/expressions/boolean.py +110 -37
cudf_polars/dsl/expressions/datetime.py +59 -30
cudf_polars/dsl/expressions/literal.py +11 -5
cudf_polars/dsl/expressions/rolling.py +460 -119
cudf_polars/dsl/expressions/selection.py +9 -8
cudf_polars/dsl/expressions/slicing.py +1 -1
cudf_polars/dsl/expressions/string.py +256 -114
cudf_polars/dsl/expressions/struct.py +19 -7
cudf_polars/dsl/expressions/ternary.py +33 -3
cudf_polars/dsl/expressions/unary.py +126 -64
cudf_polars/dsl/ir.py +1053 -350
cudf_polars/dsl/to_ast.py +30 -13
cudf_polars/dsl/tracing.py +194 -0
cudf_polars/dsl/translate.py +307 -107
cudf_polars/dsl/utils/aggregations.py +43 -30
cudf_polars/dsl/utils/reshape.py +14 -2
cudf_polars/dsl/utils/rolling.py +12 -8
cudf_polars/dsl/utils/windows.py +35 -20
cudf_polars/experimental/base.py +55 -2
cudf_polars/experimental/benchmarks/pdsds.py +12 -126
cudf_polars/experimental/benchmarks/pdsh.py +792 -2
cudf_polars/experimental/benchmarks/utils.py +596 -39
cudf_polars/experimental/dask_registers.py +47 -20
cudf_polars/experimental/dispatch.py +9 -3
cudf_polars/experimental/distinct.py +2 -0
cudf_polars/experimental/explain.py +15 -2
cudf_polars/experimental/expressions.py +30 -15
cudf_polars/experimental/groupby.py +25 -4
cudf_polars/experimental/io.py +156 -124
cudf_polars/experimental/join.py +53 -23
cudf_polars/experimental/parallel.py +68 -19
cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
cudf_polars/experimental/rapidsmpf/core.py +488 -0
cudf_polars/experimental/rapidsmpf/dask.py +172 -0
cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
cudf_polars/experimental/rapidsmpf/io.py +696 -0
cudf_polars/experimental/rapidsmpf/join.py +322 -0
cudf_polars/experimental/rapidsmpf/lower.py +74 -0
cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
cudf_polars/experimental/rapidsmpf/union.py +115 -0
cudf_polars/experimental/rapidsmpf/utils.py +374 -0
cudf_polars/experimental/repartition.py +9 -2
cudf_polars/experimental/select.py +177 -14
cudf_polars/experimental/shuffle.py +46 -12
cudf_polars/experimental/sort.py +100 -26
cudf_polars/experimental/spilling.py +1 -1
cudf_polars/experimental/statistics.py +24 -5
cudf_polars/experimental/utils.py +25 -7
cudf_polars/testing/asserts.py +13 -8
cudf_polars/testing/io.py +2 -1
cudf_polars/testing/plugin.py +93 -17
cudf_polars/typing/__init__.py +86 -32
cudf_polars/utils/config.py +473 -58
cudf_polars/utils/cuda_stream.py +70 -0
cudf_polars/utils/versions.py +5 -4
cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0

cudf_polars/experimental/parallel.py CHANGED Viewed

@@ -22,6 +22,7 @@ from cudf_polars.dsl.ir import (
     Filter,
     HConcat,
     HStack,
+    IRExecutionContext,
     MapFunction,
     Projection,
     Slice,
@@ -42,7 +43,9 @@ if TYPE_CHECKING:
     from collections.abc import MutableMapping
     from typing import Any
-    from cudf_polars.containers import DataFrame
+    import polars as pl
+    from cudf_polars.experimental.base import StatsCollector
     from cudf_polars.experimental.dispatch import LowerIRTransformer, State
     from cudf_polars.utils.config import ConfigOptions
@@ -59,7 +62,7 @@ def _(
 def lower_ir_graph(
     ir: IR, config_options: ConfigOptions
-) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+) -> tuple[IR, MutableMapping[IR, PartitionInfo], StatsCollector]:
     """
     Rewrite an IR graph and extract partitioning information.
@@ -72,9 +75,10 @@ def lower_ir_graph(
     Returns
     -------
-    new_ir, partition_info
-        The rewritten graph, and a mapping from unique nodes
-        in the new graph to associated partitioning information.
+    new_ir, partition_info, stats
+        The rewritten graph, a mapping from unique nodes
+        in the new graph to associated partitioning information,
+        and the statistics collector.
     Notes
     -----
@@ -90,7 +94,7 @@ def lower_ir_graph(
         "stats": collect_statistics(ir, config_options),
     }
     mapper: LowerIRTransformer = CachingVisitor(lower_ir_node, state=state)
-    return mapper(ir)
+    return *mapper(ir), state["stats"]
 def task_graph(
@@ -110,6 +114,8 @@ def task_graph(
         associated partitioning information.
     config_options
         GPUEngine configuration options.
+    context
+        Runtime context for IR node execution.
     Returns
     -------
@@ -130,9 +136,13 @@ def task_graph(
     --------
     generate_ir_tasks
     """
+    context = IRExecutionContext.from_config_options(config_options)
     graph = reduce(
         operator.or_,
-        (generate_ir_tasks(node, partition_info) for node in traversal([ir])),
+        (
+            generate_ir_tasks(node, partition_info, context=context)
+            for node in traversal([ir])
+        ),
     )
     key_name = get_key_name(ir)
@@ -140,7 +150,10 @@ def task_graph(
     key: str | tuple[str, int]
     if partition_count > 1:
-        graph[key_name] = (_concat, *partition_info[ir].keys(ir))
+        graph[key_name] = (
+            partial(_concat, context=context),
+            *partition_info[ir].keys(ir),
+        )
         key = key_name
     else:
         key = (key_name, 0)
@@ -158,10 +171,10 @@ def get_scheduler(config_options: ConfigOptions) -> Any:
         "'in-memory' executor not supported in 'generate_ir_tasks'"
     )
-    scheduler = config_options.executor.scheduler
+    cluster = config_options.executor.cluster
     if (
-        scheduler == "distributed"
+        cluster == "distributed"
     ):  # pragma: no cover; block depends on executor type and Distributed cluster
         from distributed import get_client
@@ -171,12 +184,12 @@ def get_scheduler(config_options: ConfigOptions) -> Any:
         DaskRegisterManager.register_once()
         DaskRegisterManager.run_on_cluster(client)
         return client.get
-    elif scheduler == "synchronous":
+    elif cluster == "single":
         from cudf_polars.experimental.scheduler import synchronous_scheduler
         return synchronous_scheduler
     else:  # pragma: no cover
-        raise ValueError(f"{scheduler} not a supported scheduler option.")
+        raise ValueError(f"{cluster} not a supported cluster option.")
 def post_process_task_graph(
@@ -214,10 +227,34 @@ def post_process_task_graph(
     return graph
+def evaluate_rapidsmpf(
+    ir: IR,
+    config_options: ConfigOptions,
+) -> pl.DataFrame:  # pragma: no cover; rapidsmpf runtime not tested in CI yet
+    """
+    Evaluate with the RapidsMPF streaming runtime.
+    Parameters
+    ----------
+    ir
+        Logical plan to evaluate.
+    config_options
+        GPUEngine configuration options.
+    Returns
+    -------
+    A cudf-polars DataFrame object.
+    """
+    from cudf_polars.experimental.rapidsmpf.core import evaluate_logical_plan
+    result, _ = evaluate_logical_plan(ir, config_options, collect_metadata=False)
+    return result
 def evaluate_streaming(
     ir: IR,
     config_options: ConfigOptions,
-) -> DataFrame:
+) -> pl.DataFrame:
     """
     Evaluate an IR graph with partitioning.
@@ -235,16 +272,26 @@ def evaluate_streaming(
     # Clear source info cache in case data was overwritten
     _clear_source_info_cache()
-    ir, partition_info = lower_ir_graph(ir, config_options)
+    assert config_options.executor.name == "streaming", "Executor must be streaming"
+    if (
+        config_options.executor.runtime == "rapidsmpf"
+    ):  # pragma: no cover; rapidsmpf runtime not tested in CI yet
+        # Using the RapidsMPF streaming runtime.
+        return evaluate_rapidsmpf(ir, config_options)
+    else:
+        # Using the default task engine.
+        ir, partition_info, _ = lower_ir_graph(ir, config_options)
-    graph, key = task_graph(ir, partition_info, config_options)
+        graph, key = task_graph(ir, partition_info, config_options)
-    return get_scheduler(config_options)(graph, key)
+        return get_scheduler(config_options)(graph, key).to_polars()
 @generate_ir_tasks.register(IR)
 def _(
-    ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
+    ir: IR,
+    partition_info: MutableMapping[IR, PartitionInfo],
+    context: IRExecutionContext,
 ) -> MutableMapping[Any, Any]:
     # Generate pointwise (embarrassingly-parallel) tasks by default
     child_names = [get_key_name(c) for c in ir.children]
@@ -252,7 +299,7 @@ def _(
     return {
         key: (
-            ir.do_evaluate,
+            partial(ir.do_evaluate, context=context),
             *ir._non_child_args,
             *[
                 (child_name, 0 if bcast_child[j] else i)
@@ -292,7 +339,9 @@ def _(
 @generate_ir_tasks.register(Union)
 def _(
-    ir: Union, partition_info: MutableMapping[IR, PartitionInfo]
+    ir: Union,
+    partition_info: MutableMapping[IR, PartitionInfo],
+    context: IRExecutionContext,
 ) -> MutableMapping[Any, Any]:
     key_name = get_key_name(ir)
     partition = itertools.count()

cudf_polars/experimental/rapidsmpf/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""RapidsMPF streaming-engine support."""
+from __future__ import annotations
+__all__: list[str] = []

cudf_polars/experimental/rapidsmpf/collectives/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Collective operations for the RapidsMPF streaming runtime."""
+from __future__ import annotations
+from cudf_polars.experimental.rapidsmpf.collectives.common import ReserveOpIDs
+__all__ = ["ReserveOpIDs"]

cudf_polars/experimental/rapidsmpf/collectives/allgather.py ADDED Viewed

@@ -0,0 +1,90 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""AllGather logic for the RapidsMPF streaming runtime."""
+from __future__ import annotations
+import asyncio
+from typing import TYPE_CHECKING
+from rapidsmpf.integrations.cudf.partition import unpack_and_concat
+from rapidsmpf.memory.packed_data import PackedData
+from rapidsmpf.streaming.coll.allgather import AllGather
+from pylibcudf.contiguous_split import pack
+if TYPE_CHECKING:
+    from rapidsmpf.streaming.core.context import Context
+    from rapidsmpf.streaming.cudf.table_chunk import TableChunk
+    import pylibcudf as plc
+    from rmm.pylibrmm.stream import Stream
+class AllGatherManager:
+    """
+    AllGather manager.
+    Parameters
+    ----------
+    context: Context
+        The streaming context.
+    op_id: int
+        Pre-allocated operation ID for this operation.
+    """
+    def __init__(self, context: Context, op_id: int):
+        self.context = context
+        self.allgather = AllGather(self.context, op_id)
+    def insert(self, sequence_number: int, chunk: TableChunk) -> None:
+        """
+        Insert a chunk into the AllGatherContext.
+        Parameters
+        ----------
+        sequence_number: int
+            The sequence number of the chunk to insert.
+        chunk: TableChunk
+            The table chunk to insert.
+        """
+        self.allgather.insert(
+            sequence_number,
+            PackedData.from_cudf_packed_columns(
+                pack(
+                    chunk.table_view(),
+                    chunk.stream,
+                ),
+                chunk.stream,
+                self.context.br(),
+            ),
+        )
+        del chunk
+    def insert_finished(self) -> None:
+        """Insert finished into the AllGatherManager."""
+        self.allgather.insert_finished()
+    async def extract_concatenated(
+        self, stream: Stream, *, ordered: bool = True
+    ) -> plc.Table:
+        """
+        Extract the concatenated result.
+        Parameters
+        ----------
+        stream: Stream
+            The stream to use for chunk extraction.
+        ordered: bool
+            Whether to extract the data in ordered or unordered fashion.
+        Returns
+        -------
+        The concatenated AllGather result.
+        """
+        return await asyncio.to_thread(
+            unpack_and_concat,
+            partitions=await self.allgather.extract_all(self.context, ordered=ordered),
+            stream=stream,
+            br=self.context.br(),
+        )

cudf_polars/experimental/rapidsmpf/collectives/common.py ADDED Viewed

@@ -0,0 +1,96 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Common utilities for collective operations."""
+from __future__ import annotations
+import threading
+from typing import TYPE_CHECKING, Literal
+from rapidsmpf.shuffler import Shuffler
+from cudf_polars.dsl.traversal import traversal
+from cudf_polars.experimental.join import Join
+from cudf_polars.experimental.repartition import Repartition
+from cudf_polars.experimental.shuffle import Shuffle
+if TYPE_CHECKING:
+    from types import TracebackType
+    from cudf_polars.dsl.ir import IR
+# Set of available collective IDs
+_collective_id_vacancy: set[int] = set(range(Shuffler.max_concurrent_shuffles))
+_collective_id_vacancy_lock: threading.Lock = threading.Lock()
+def _get_new_collective_id() -> int:
+    with _collective_id_vacancy_lock:
+        if not _collective_id_vacancy:
+            raise ValueError(
+                f"Cannot shuffle more than {Shuffler.max_concurrent_shuffles} "
+                "times in a single query."
+            )
+        return _collective_id_vacancy.pop()
+def _release_collective_id(collective_id: int) -> None:
+    """Release a collective ID back to the vacancy set."""
+    with _collective_id_vacancy_lock:
+        _collective_id_vacancy.add(collective_id)
+class ReserveOpIDs:
+    """
+    Context manager to reserve collective IDs for pipeline execution.
+    Parameters
+    ----------
+    ir : IR
+        The root IR node of the pipeline.
+    Notes
+    -----
+    This context manager:
+    1. Identifies all Shuffle nodes in the IR
+    2. Reserves collective IDs from the vacancy pool
+    3. Creates a mapping from IR nodes to their reserved IDs
+    4. Releases all IDs back to the pool on __exit__
+    """
+    def __init__(self, ir: IR):
+        # Find all collective IR nodes.
+        self.collective_nodes: list[IR] = [
+            node
+            for node in traversal([ir])
+            if isinstance(node, (Shuffle, Join, Repartition))
+        ]
+        self.collective_id_map: dict[IR, int] = {}
+    def __enter__(self) -> dict[IR, int]:
+        """
+        Reserve collective IDs and return the mapping.
+        Returns
+        -------
+        collective_id_map : dict[IR, int]
+            Mapping from IR nodes to their reserved collective IDs.
+        """
+        # Reserve IDs and map nodes directly to their IDs
+        for node in self.collective_nodes:
+            self.collective_id_map[node] = _get_new_collective_id()
+        return self.collective_id_map
+    def __exit__(
+        self,
+        exc_type: type | None,
+        exc_val: Exception | None,
+        exc_tb: TracebackType | None,
+    ) -> Literal[False]:
+        """Release all reserved collective IDs back to the vacancy pool."""
+        for collective_id in self.collective_id_map.values():
+            _release_collective_id(collective_id)
+        return False

cudf_polars/experimental/rapidsmpf/collectives/shuffle.py ADDED Viewed

@@ -0,0 +1,253 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Shuffle logic for the RapidsMPF streaming runtime."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from rapidsmpf.integrations.cudf.partition import (
+    partition_and_pack as py_partition_and_pack,
+    unpack_and_concat as py_unpack_and_concat,
+)
+from rapidsmpf.streaming.coll.shuffler import ShufflerAsync
+from rapidsmpf.streaming.core.message import Message
+from rapidsmpf.streaming.core.node import define_py_node
+from rapidsmpf.streaming.cudf.table_chunk import TableChunk
+from cudf_polars.dsl.expr import Col
+from cudf_polars.experimental.rapidsmpf.dispatch import (
+    generate_ir_sub_network,
+)
+from cudf_polars.experimental.rapidsmpf.nodes import shutdown_on_error
+from cudf_polars.experimental.rapidsmpf.utils import (
+    ChannelManager,
+    Metadata,
+)
+from cudf_polars.experimental.shuffle import Shuffle
+if TYPE_CHECKING:
+    from rapidsmpf.streaming.core.context import Context
+    import pylibcudf as plc
+    from rmm.pylibrmm.stream import Stream
+    from cudf_polars.dsl.ir import IR, IRExecutionContext
+    from cudf_polars.experimental.rapidsmpf.core import SubNetGenerator
+    from cudf_polars.experimental.rapidsmpf.utils import ChannelPair
+class ShuffleManager:
+    """
+    ShufflerAsync manager.
+    Parameters
+    ----------
+    context: Context
+        The streaming context.
+    num_partitions: int
+        The number of partitions to shuffle into.
+    columns_to_hash: tuple[int, ...]
+        The columns to hash.
+    collective_id: int
+        The collective ID.
+    """
+    def __init__(
+        self,
+        context: Context,
+        num_partitions: int,
+        columns_to_hash: tuple[int, ...],
+        collective_id: int,
+    ):
+        self.context = context
+        self.num_partitions = num_partitions
+        self.columns_to_hash = columns_to_hash
+        self.shuffler = ShufflerAsync(
+            context,
+            collective_id,
+            num_partitions,
+        )
+    def insert_chunk(self, chunk: TableChunk) -> None:
+        """
+        Insert a chunk into the ShuffleContext.
+        Parameters
+        ----------
+        chunk: TableChunk
+            The table chunk to insert.
+        """
+        # Partition and pack using the Python function
+        partitioned_chunks = py_partition_and_pack(
+            table=chunk.table_view(),
+            columns_to_hash=self.columns_to_hash,
+            num_partitions=self.num_partitions,
+            stream=chunk.stream,
+            br=self.context.br(),
+        )
+        # Insert into shuffler
+        self.shuffler.insert(partitioned_chunks)
+    async def insert_finished(self) -> None:
+        """Insert finished into the ShuffleManager."""
+        await self.shuffler.insert_finished(self.context)
+    async def extract_chunk(self, sequence_number: int, stream: Stream) -> plc.Table:
+        """
+        Extract a chunk from the ShuffleManager.
+        Parameters
+        ----------
+        sequence_number: int
+            The sequence number of the chunk to extract.
+        stream: Stream
+            The stream to use for chunk extraction.
+        Returns
+        -------
+        The extracted table.
+        """
+        partition_chunks = await self.shuffler.extract_async(
+            self.context, sequence_number
+        )
+        return py_unpack_and_concat(
+            partitions=partition_chunks,
+            stream=stream,
+            br=self.context.br(),
+        )
+@define_py_node()
+async def shuffle_node(
+    context: Context,
+    ir: Shuffle,
+    ir_context: IRExecutionContext,
+    ch_in: ChannelPair,
+    ch_out: ChannelPair,
+    columns_to_hash: tuple[int, ...],
+    num_partitions: int,
+    collective_id: int,
+) -> None:
+    """
+    Execute a local shuffle pipeline in a single node.
+    This node combines partition_and_pack, shuffler, and unpack_and_concat
+    into a single Python node using rapidsmpf.shuffler.Shuffler and utilities
+    from rapidsmpf.integrations.cudf.partition.
+    Parameters
+    ----------
+    context
+        The rapidsmpf context.
+    ir
+        The Shuffle IR node.
+    ir_context
+        The execution context for the IR node.
+    ch_in
+        Input ChannelPair with metadata and data channels.
+    ch_out
+        Output ChannelPair with metadata and data channels.
+    columns_to_hash
+        Tuple of column indices to use for hashing.
+    num_partitions
+        Number of partitions to shuffle into.
+    collective_id
+        The collective ID.
+    """
+    async with shutdown_on_error(
+        context, ch_in.metadata, ch_in.data, ch_out.metadata, ch_out.data
+    ):
+        # Receive and send updated metadata.
+        _ = await ch_in.recv_metadata(context)
+        column_names = list(ir.schema.keys())
+        partitioned_on = tuple(column_names[i] for i in columns_to_hash)
+        output_metadata = Metadata(
+            max(1, num_partitions // context.comm().nranks),
+            partitioned_on=partitioned_on,
+        )
+        await ch_out.send_metadata(context, output_metadata)
+        # Create ShuffleManager instance
+        shuffle = ShuffleManager(
+            context, num_partitions, columns_to_hash, collective_id
+        )
+        # Process input chunks
+        while (msg := await ch_in.data.recv(context)) is not None:
+            # Extract TableChunk from message and insert into shuffler
+            shuffle.insert_chunk(
+                TableChunk.from_message(msg).make_available_and_spill(
+                    context.br(), allow_overbooking=True
+                )
+            )
+            del msg
+        # Insert finished
+        await shuffle.insert_finished()
+        # Extract shuffled partitions and send them out
+        stream = ir_context.get_cuda_stream()
+        for partition_id in range(
+            # Round-robin partition assignment
+            context.comm().rank,
+            num_partitions,
+            context.comm().nranks,
+        ):
+            # Extract and send the output chunk
+            await ch_out.data.send(
+                context,
+                Message(
+                    partition_id,
+                    TableChunk.from_pylibcudf_table(
+                        table=await shuffle.extract_chunk(partition_id, stream),
+                        stream=stream,
+                        exclusive_view=True,
+                    ),
+                ),
+            )
+        await ch_out.data.drain(context)
+@generate_ir_sub_network.register(Shuffle)
+def _(
+    ir: Shuffle, rec: SubNetGenerator
+) -> tuple[dict[IR, list[Any]], dict[IR, ChannelManager]]:
+    # Local shuffle operation.
+    # Process children
+    (child,) = ir.children
+    nodes, channels = rec(child)
+    keys: list[Col] = [ne.value for ne in ir.keys if isinstance(ne.value, Col)]
+    if len(keys) != len(ir.keys):  # pragma: no cover
+        raise NotImplementedError("Shuffle requires simple keys.")
+    column_names = list(ir.schema.keys())
+    context = rec.state["context"]
+    columns_to_hash = tuple(column_names.index(k.name) for k in keys)
+    num_partitions = rec.state["partition_info"][ir].count
+    # Look up the reserved collective ID for this operation
+    collective_id = rec.state["collective_id_map"][ir]
+    # Create output ChannelManager
+    channels[ir] = ChannelManager(rec.state["context"])
+    # Complete shuffle node
+    nodes[ir] = [
+        shuffle_node(
+            context,
+            ir,
+            rec.state["ir_context"],
+            ch_in=channels[child].reserve_output_slot(),
+            ch_out=channels[ir].reserve_input_slot(),
+            columns_to_hash=columns_to_hash,
+            num_partitions=num_partitions,
+            collective_id=collective_id,
+        )
+    ]
+    return nodes, channels

cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl