PyPI - cudf-polars-cu12 - Versions diffs - 25.2.1__py3-none-any.whl → 25.4.0__py3-none-any.whl - Mend

cudf-polars-cu12 25.2.1py3-none-any.whl → 25.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

cudf_polars/VERSION +1 -1
cudf_polars/callback.py +85 -53
cudf_polars/containers/column.py +100 -7
cudf_polars/containers/dataframe.py +16 -24
cudf_polars/dsl/expr.py +3 -1
cudf_polars/dsl/expressions/aggregation.py +3 -3
cudf_polars/dsl/expressions/binaryop.py +2 -2
cudf_polars/dsl/expressions/boolean.py +4 -4
cudf_polars/dsl/expressions/datetime.py +39 -1
cudf_polars/dsl/expressions/literal.py +3 -9
cudf_polars/dsl/expressions/selection.py +2 -2
cudf_polars/dsl/expressions/slicing.py +53 -0
cudf_polars/dsl/expressions/sorting.py +1 -1
cudf_polars/dsl/expressions/string.py +4 -4
cudf_polars/dsl/expressions/unary.py +3 -2
cudf_polars/dsl/ir.py +222 -93
cudf_polars/dsl/nodebase.py +8 -1
cudf_polars/dsl/translate.py +66 -38
cudf_polars/experimental/base.py +18 -12
cudf_polars/experimental/dask_serialize.py +22 -8
cudf_polars/experimental/groupby.py +346 -0
cudf_polars/experimental/io.py +13 -11
cudf_polars/experimental/join.py +318 -0
cudf_polars/experimental/parallel.py +57 -6
cudf_polars/experimental/shuffle.py +194 -0
cudf_polars/testing/plugin.py +23 -34
cudf_polars/typing/__init__.py +33 -2
cudf_polars/utils/config.py +138 -0
cudf_polars/utils/conversion.py +40 -0
cudf_polars/utils/dtypes.py +14 -4
cudf_polars/utils/timer.py +39 -0
cudf_polars/utils/versions.py +4 -3
{cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/METADATA +8 -7
cudf_polars_cu12-25.4.0.dist-info/RECORD +55 -0
{cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/WHEEL +1 -1
cudf_polars_cu12-25.2.1.dist-info/RECORD +0 -48
{cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info/licenses}/LICENSE +0 -0
{cudf_polars_cu12-25.2.1.dist-info → cudf_polars_cu12-25.4.0.dist-info}/top_level.txt +0 -0

cudf_polars/dsl/nodebase.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """Base class for IR nodes, and utilities."""
@@ -58,6 +58,13 @@ class Node(Generic[T]):
         """
         return type(self)(*self._ctor_arguments(children))
+    def __reduce__(self):
+        """Pickle a Node object."""
+        return (
+            type(self),
+            self._ctor_arguments(self.children),
+        )
     def get_hashable(self) -> Hashable:
         """
         Return a hashable object for the node.

cudf_polars/dsl/translate.py CHANGED Viewed

@@ -5,6 +5,7 @@
 from __future__ import annotations
+import copy
 import functools
 import json
 from contextlib import AbstractContextManager, nullcontext
@@ -23,7 +24,7 @@ import pylibcudf as plc
 from cudf_polars.dsl import expr, ir
 from cudf_polars.dsl.to_ast import insert_colrefs
 from cudf_polars.typing import NodeTraverser
-from cudf_polars.utils import dtypes, sorting
+from cudf_polars.utils import config, dtypes, sorting
 if TYPE_CHECKING:
     from polars import GPUEngine
@@ -41,13 +42,13 @@ class Translator:
     ----------
     visitor
         Polars NodeTraverser object
-    config
+    engine
         GPU engine configuration.
     """
-    def __init__(self, visitor: NodeTraverser, config: GPUEngine):
+    def __init__(self, visitor: NodeTraverser, engine: GPUEngine):
         self.visitor = visitor
-        self.config = config
+        self.config_options = config.ConfigOptions(copy.deepcopy(engine.config))
         self.errors: list[Exception] = []
     def translate_ir(self, *, n: int | None = None) -> ir.IR:
@@ -84,7 +85,7 @@ class Translator:
         # IR is versioned with major.minor, minor is bumped for backwards
         # compatible changes (e.g. adding new nodes), major is bumped for
         # incompatible changes (e.g. renaming nodes).
-        if (version := self.visitor.version()) >= (5, 1):
+        if (version := self.visitor.version()) >= (6, 1):
             e = NotImplementedError(
                 f"No support for polars IR {version=}"
             )  # pragma: no cover; no such version for now.
@@ -227,13 +228,15 @@ def _(
         # TODO: with versioning, rename on the rust side
         skip_rows, n_rows = n_rows
+    if file_options.include_file_paths is not None:
+        raise NotImplementedError("No support for including file path in scan")
     row_index = file_options.row_index
     return ir.Scan(
         schema,
         typ,
         reader_options,
         cloud_options,
-        translator.config.config.copy(),
+        translator.config_options,
         node.paths,
         with_columns,
         skip_rows,
@@ -260,7 +263,7 @@ def _(
         schema,
         node.df,
         node.projection,
-        translator.config.config.copy(),
+        translator.config_options,
     )
@@ -288,6 +291,7 @@ def _(
         aggs,
         node.maintain_order,
         node.options,
+        translator.config_options,
         inp,
     )
@@ -299,38 +303,12 @@ def _(
     # Join key dtypes are dependent on the schema of the left and
     # right inputs, so these must be translated with the relevant
     # input active.
-    def adjust_literal_dtype(literal: expr.Literal) -> expr.Literal:
-        if literal.dtype.id() == plc.types.TypeId.INT32:
-            plc_int64 = plc.types.DataType(plc.types.TypeId.INT64)
-            return expr.Literal(
-                plc_int64,
-                pa.scalar(literal.value.as_py(), type=plc.interop.to_arrow(plc_int64)),
-            )
-        return literal
-    def maybe_adjust_binop(e) -> expr.Expr:
-        if isinstance(e.value, expr.BinOp):
-            left, right = e.value.children
-            if isinstance(left, expr.Col) and isinstance(right, expr.Literal):
-                e.value.children = (left, adjust_literal_dtype(right))
-            elif isinstance(left, expr.Literal) and isinstance(right, expr.Col):
-                e.value.children = (adjust_literal_dtype(left), right)
-        return e
-    def translate_expr_and_maybe_fix_binop_args(translator, exprs):
-        return [
-            maybe_adjust_binop(translate_named_expr(translator, n=e)) for e in exprs
-        ]
     with set_node(translator.visitor, node.input_left):
         inp_left = translator.translate_ir(n=None)
-        # TODO: There's bug in the polars type coercion phase. Use
-        # translate_named_expr directly once it is resolved.
-        # Tracking issue: https://github.com/pola-rs/polars/issues/20935
-        left_on = translate_expr_and_maybe_fix_binop_args(translator, node.left_on)
+        left_on = [translate_named_expr(translator, n=e) for e in node.left_on]
     with set_node(translator.visitor, node.input_right):
         inp_right = translator.translate_ir(n=None)
-        right_on = translate_expr_and_maybe_fix_binop_args(translator, node.right_on)
+        right_on = [translate_named_expr(translator, n=e) for e in node.right_on]
     if (how := node.options[0]) in {
         "Inner",
@@ -341,7 +319,15 @@ def _(
         "Semi",
         "Anti",
     }:
-        return ir.Join(schema, left_on, right_on, node.options, inp_left, inp_right)
+        return ir.Join(
+            schema,
+            left_on,
+            right_on,
+            node.options,
+            translator.config_options,
+            inp_left,
+            inp_right,
+        )
     else:
         how, op1, op2 = node.options[0]
         if how != "IEJoin":
@@ -463,6 +449,21 @@ def _(
     return ir.Projection(schema, translator.translate_ir(n=node.input))
+@_translate_ir.register
+def _(
+    node: pl_ir.MergeSorted, translator: Translator, schema: dict[str, plc.DataType]
+) -> ir.IR:
+    key = node.key
+    inp_left = translator.translate_ir(n=node.input_left)
+    inp_right = translator.translate_ir(n=node.input_right)
+    return ir.MergeSorted(
+        schema,
+        key,
+        inp_left,
+        inp_right,
+    )
 @_translate_ir.register
 def _(
     node: pl_ir.MapFunction, translator: Translator, schema: dict[str, plc.DataType]
@@ -472,7 +473,6 @@ def _(
         schema,
         name,
         options,
-        # TODO: merge_sorted breaks this pattern
         translator.translate_ir(n=node.input),
     )
@@ -623,6 +623,17 @@ def _(node: pl_expr.Function, translator: Translator, dtype: plc.DataType) -> ex
             )
         elif name == "pow":
             return expr.BinOp(dtype, plc.binaryop.BinaryOperator.POW, *children)
+        elif name in "top_k":
+            (col, k) = children
+            assert isinstance(k, expr.Literal)
+            (descending,) = options
+            return expr.Slice(
+                dtype,
+                0,
+                k.value.as_py(),
+                expr.Sort(dtype, (False, True, not descending), col),
+            )
         return expr.UnaryFunction(dtype, name, options, *children)
     raise NotImplementedError(
         f"No handler for Expr function node with {name=}"
@@ -651,7 +662,10 @@ def _(node: pl_expr.Window, translator: Translator, dtype: plc.DataType) -> expr
 @_translate_expr.register
 def _(node: pl_expr.Literal, translator: Translator, dtype: plc.DataType) -> expr.Expr:
     if isinstance(node.value, plrs.PySeries):
-        return expr.LiteralColumn(dtype, pl.Series._from_pyseries(node.value))
+        data = pl.Series._from_pyseries(node.value).to_arrow()
+        return expr.LiteralColumn(
+            dtype, data.cast(dtypes.downcast_arrow_lists(data.type))
+        )
     value = pa.scalar(node.value, type=plc.interop.to_arrow(dtype))
     return expr.Literal(dtype, value)
@@ -673,6 +687,20 @@ def _(node: pl_expr.SortBy, translator: Translator, dtype: plc.DataType) -> expr
     )
+@_translate_expr.register
+def _(node: pl_expr.Slice, translator: Translator, dtype: plc.DataType) -> expr.Expr:
+    offset = translator.translate_expr(n=node.offset)
+    length = translator.translate_expr(n=node.length)
+    assert isinstance(offset, expr.Literal)
+    assert isinstance(length, expr.Literal)
+    return expr.Slice(
+        dtype,
+        offset.value.as_py(),
+        length.value.as_py(),
+        translator.translate_expr(n=node.input),
+    )
 @_translate_expr.register
 def _(node: pl_expr.Gather, translator: Translator, dtype: plc.DataType) -> expr.Expr:
     return expr.Gather(

cudf_polars/experimental/base.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """Multi-partition base classes."""
@@ -9,23 +9,29 @@ from typing import TYPE_CHECKING
 from cudf_polars.dsl.ir import Union
 if TYPE_CHECKING:
-    from collections.abc import Iterator, Sequence
+    from collections.abc import Iterator
     from cudf_polars.containers import DataFrame
+    from cudf_polars.dsl.expr import NamedExpr
     from cudf_polars.dsl.nodebase import Node
 class PartitionInfo:
-    """
-    Partitioning information.
-    This class only tracks the partition count (for now).
-    """
-    __slots__ = ("count",)
-    def __init__(self, count: int):
+    """Partitioning information."""
+    __slots__ = ("count", "partitioned_on")
+    count: int
+    """Partition count."""
+    partitioned_on: tuple[NamedExpr, ...]
+    """Columns the data is hash-partitioned on."""
+    def __init__(
+        self,
+        count: int,
+        partitioned_on: tuple[NamedExpr, ...] = (),
+    ):
         self.count = count
+        self.partitioned_on = partitioned_on
     def keys(self, node: Node) -> Iterator[tuple[str, int]]:
         """Return the partitioned keys for a given node."""
@@ -38,6 +44,6 @@ def get_key_name(node: Node) -> str:
     return f"{type(node).__name__.lower()}-{hash(node)}"
-def _concat(dfs: Sequence[DataFrame]) -> DataFrame:
+def _concat(*dfs: DataFrame) -> DataFrame:
     # Concatenate a sequence of DataFrames vertically
     return Union.do_evaluate(None, *dfs)

cudf_polars/experimental/dask_serialize.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """Dask serialization."""
@@ -12,7 +12,7 @@ from distributed.utils import log_errors
 import pylibcudf as plc
 import rmm
-from cudf_polars.containers import DataFrame
+from cudf_polars.containers import Column, DataFrame
 __all__ = ["register"]
@@ -20,8 +20,8 @@ __all__ = ["register"]
 def register() -> None:
     """Register dask serialization routines for DataFrames."""
-    @cuda_serialize.register(DataFrame)
-    def _(x: DataFrame):
+    @cuda_serialize.register((Column, DataFrame))
+    def _(x: DataFrame | Column):
         with log_errors():
             header, frames = x.serialize()
             return header, list(frames)  # Dask expect a list of frames
@@ -29,11 +29,17 @@ def register() -> None:
     @cuda_deserialize.register(DataFrame)
     def _(header, frames):
         with log_errors():
-            assert len(frames) == 2
-            return DataFrame.deserialize(header, tuple(frames))
+            metadata, gpudata = frames
+            return DataFrame.deserialize(header, (metadata, plc.gpumemoryview(gpudata)))
-    @dask_serialize.register(DataFrame)
-    def _(x: DataFrame):
+    @cuda_deserialize.register(Column)
+    def _(header, frames):
+        with log_errors():
+            metadata, gpudata = frames
+            return Column.deserialize(header, (metadata, plc.gpumemoryview(gpudata)))
+    @dask_serialize.register((Column, DataFrame))
+    def _(x: DataFrame | Column):
         with log_errors():
             header, (metadata, gpudata) = x.serialize()
@@ -57,3 +63,11 @@ def register() -> None:
             # Copy the second frame (the gpudata in host memory) back to the gpu
             frames = frames[0], plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1]))
             return DataFrame.deserialize(header, frames)
+    @dask_deserialize.register(Column)
+    def _(header, frames) -> Column:
+        with log_errors():
+            assert len(frames) == 2
+            # Copy the second frame (the gpudata in host memory) back to the gpu
+            frames = frames[0], plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1]))
+            return Column.deserialize(header, frames)

cudf_polars/experimental/groupby.py ADDED Viewed

@@ -0,0 +1,346 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""Parallel GroupBy Logic."""
+from __future__ import annotations
+import itertools
+import uuid
+from typing import TYPE_CHECKING, Any
+import pylibcudf as plc
+from cudf_polars.dsl.expr import (
+    Agg,
+    BinOp,
+    Cast,
+    Col,
+    Len,
+    Literal,
+    NamedExpr,
+    UnaryFunction,
+)
+from cudf_polars.dsl.ir import GroupBy, Select
+from cudf_polars.dsl.traversal import traversal
+from cudf_polars.experimental.base import PartitionInfo, _concat, get_key_name
+from cudf_polars.experimental.dispatch import generate_ir_tasks, lower_ir_node
+from cudf_polars.experimental.shuffle import Shuffle
+if TYPE_CHECKING:
+    from collections.abc import MutableMapping
+    from cudf_polars.dsl.expr import Expr
+    from cudf_polars.dsl.ir import IR
+    from cudf_polars.experimental.parallel import LowerIRTransformer
+# Supported multi-partition aggregations
+_GB_AGG_SUPPORTED = ("sum", "count", "mean", "min", "max")
+def combine(
+    *decompositions: tuple[NamedExpr, list[NamedExpr], list[NamedExpr]],
+) -> tuple[list[NamedExpr], list[NamedExpr], list[NamedExpr]]:
+    """
+    Combine multiple groupby-aggregation decompositions.
+    Parameters
+    ----------
+    decompositions
+        Packed sequence of `decompose` results.
+    Returns
+    -------
+    Unified groupby-aggregation decomposition.
+    """
+    selections, aggregations, reductions = zip(*decompositions, strict=True)
+    assert all(isinstance(ne, NamedExpr) for ne in selections)
+    return (
+        list(selections),
+        list(itertools.chain.from_iterable(aggregations)),
+        list(itertools.chain.from_iterable(reductions)),
+    )
+def decompose(
+    name: str, expr: Expr
+) -> tuple[NamedExpr, list[NamedExpr], list[NamedExpr]]:
+    """
+    Decompose a groupby-aggregation expression.
+    Parameters
+    ----------
+    name
+        Output schema name.
+    expr
+        The aggregation expression for a single column.
+    Returns
+    -------
+    NamedExpr
+        The expression selecting the *output* column or columns.
+    list[NamedExpr]
+        The initial aggregation expressions.
+    list[NamedExpr]
+        The reduction expressions.
+    """
+    dtype = expr.dtype
+    expr = expr.children[0] if isinstance(expr, Cast) else expr
+    unary_op: list[Any] = []
+    if isinstance(expr, UnaryFunction) and expr.is_pointwise:
+        # TODO: Handle multiple/sequential unary ops
+        unary_op = [expr.name, expr.options]
+        expr = expr.children[0]
+    def _wrap_unary(select):
+        # Helper function to wrap the final selection
+        # in a UnaryFunction (when necessary)
+        if unary_op:
+            return UnaryFunction(select.dtype, *unary_op, select)
+        return select
+    if isinstance(expr, Len):
+        selection = NamedExpr(name, _wrap_unary(Col(dtype, name)))
+        aggregation = [NamedExpr(name, expr)]
+        reduction = [
+            NamedExpr(
+                name,
+                # Sum reduction may require casting.
+                # Do it for all cases to be safe (for now)
+                Cast(dtype, Agg(dtype, "sum", None, Col(dtype, name))),
+            )
+        ]
+        return selection, aggregation, reduction
+    if isinstance(expr, Agg):
+        if expr.name in ("sum", "count", "min", "max"):
+            if expr.name in ("sum", "count"):
+                aggfunc = "sum"
+            else:
+                aggfunc = expr.name
+            selection = NamedExpr(name, _wrap_unary(Col(dtype, name)))
+            aggregation = [NamedExpr(name, expr)]
+            reduction = [
+                NamedExpr(
+                    name,
+                    # Sum reduction may require casting.
+                    # Do it for all cases to be safe (for now)
+                    Cast(dtype, Agg(dtype, aggfunc, None, Col(dtype, name))),
+                )
+            ]
+            return selection, aggregation, reduction
+        elif expr.name == "mean":
+            (child,) = expr.children
+            token = str(uuid.uuid4().hex)  # prevent collisions with user's names
+            (sum, count), aggregations, reductions = combine(
+                decompose(f"{name}__mean_sum_{token}", Agg(dtype, "sum", None, child)),
+                decompose(f"{name}__mean_count_{token}", Len(dtype)),
+            )
+            selection = NamedExpr(
+                name,
+                _wrap_unary(
+                    BinOp(
+                        dtype, plc.binaryop.BinaryOperator.DIV, sum.value, count.value
+                    )
+                ),
+            )
+            return selection, aggregations, reductions
+        else:
+            raise NotImplementedError(
+                "GroupBy does not support multiple partitions "
+                f"for this aggregation type:\n{type(expr)}\n"
+                f"Only {_GB_AGG_SUPPORTED} are supported."
+            )
+    elif isinstance(expr, BinOp):
+        # The expectation is that each operand of the BinOp is decomposable.
+        # We can then combine the decompositions of the operands to form the
+        # decomposition of the BinOp.
+        (left, right) = expr.children
+        token = str(uuid.uuid4().hex)  # prevent collisions with user's names
+        (left_selection, right_selection), aggregations, reductions = combine(
+            decompose(f"{name}__left_{token}", left),
+            decompose(f"{name}__right_{token}", right),
+        )
+        selection = NamedExpr(
+            name,
+            _wrap_unary(
+                BinOp(dtype, expr.op, left_selection.value, right_selection.value)
+            ),
+        )
+        return selection, aggregations, reductions
+    elif isinstance(expr, Literal):
+        selection = NamedExpr(name, _wrap_unary(Col(dtype, name)))
+        aggregation = []
+        reduction = [NamedExpr(name, expr)]
+        return selection, aggregation, reduction
+    else:  # pragma: no cover
+        # Unsupported expression
+        raise NotImplementedError(
+            f"GroupBy does not support multiple partitions for this expression:\n{expr}"
+        )
+@lower_ir_node.register(GroupBy)
+def _(
+    ir: GroupBy, rec: LowerIRTransformer
+) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
+    # Extract child partitioning
+    child, partition_info = rec(ir.children[0])
+    # Handle single-partition case
+    if partition_info[child].count == 1:
+        single_part_node = ir.reconstruct([child])
+        partition_info[single_part_node] = partition_info[child]
+        return single_part_node, partition_info
+    # Check group-by keys
+    if not all(expr.is_pointwise for expr in traversal([e.value for e in ir.keys])):
+        raise NotImplementedError(
+            f"GroupBy does not support multiple partitions for keys:\n{ir.keys}"
+        )  # pragma: no cover
+    # Check if we are dealing with any high-cardinality columns
+    post_aggregation_count = 1  # Default tree reduction
+    groupby_key_columns = [ne.name for ne in ir.keys]
+    cardinality_factor = {
+        c: min(f, 1.0)
+        for c, f in ir.config_options.get(
+            "executor_options.cardinality_factor", default={}
+        ).items()
+        if c in groupby_key_columns
+    }
+    if cardinality_factor:
+        # The `cardinality_factor` dictionary can be used
+        # to specify a mapping between column names and
+        # cardinality "factors". Each factor estimates the
+        # fractional number of unique values in the column.
+        # Each value should be in the range (0, 1].
+        child_count = partition_info[child].count
+        post_aggregation_count = max(
+            int(max(cardinality_factor.values()) * child_count),
+            1,
+        )
+    # Decompose the aggregation requests into three distinct phases
+    selection_exprs, piecewise_exprs, reduction_exprs = combine(
+        *(decompose(agg.name, agg.value) for agg in ir.agg_requests)
+    )
+    # Partition-wise groupby operation
+    pwise_schema = {k.name: k.value.dtype for k in ir.keys} | {
+        k.name: k.value.dtype for k in piecewise_exprs
+    }
+    gb_pwise = GroupBy(
+        pwise_schema,
+        ir.keys,
+        piecewise_exprs,
+        ir.maintain_order,
+        ir.options,
+        ir.config_options,
+        child,
+    )
+    child_count = partition_info[child].count
+    partition_info[gb_pwise] = PartitionInfo(count=child_count)
+    # Add Shuffle node if necessary
+    gb_inter: GroupBy | Shuffle = gb_pwise
+    if post_aggregation_count > 1:
+        if ir.maintain_order:  # pragma: no cover
+            raise NotImplementedError(
+                "maintain_order not supported for multiple output partitions."
+            )
+        gb_inter = Shuffle(
+            pwise_schema,
+            ir.keys,
+            ir.config_options,
+            gb_pwise,
+        )
+        partition_info[gb_inter] = PartitionInfo(count=post_aggregation_count)
+    # Tree reduction if post_aggregation_count==1
+    # (Otherwise, this is another partition-wise op)
+    gb_reduce = GroupBy(
+        {k.name: k.value.dtype for k in ir.keys}
+        | {k.name: k.value.dtype for k in reduction_exprs},
+        ir.keys,
+        reduction_exprs,
+        ir.maintain_order,
+        ir.options,
+        ir.config_options,
+        gb_inter,
+    )
+    partition_info[gb_reduce] = PartitionInfo(count=post_aggregation_count)
+    # Final Select phase
+    aggregated = {ne.name: ne for ne in selection_exprs}
+    new_node = Select(
+        ir.schema,
+        [
+            # Select the aggregated data or the original column
+            aggregated.get(name, NamedExpr(name, Col(dtype, name)))
+            for name, dtype in ir.schema.items()
+        ],
+        False,  # noqa: FBT003
+        gb_reduce,
+    )
+    partition_info[new_node] = PartitionInfo(count=post_aggregation_count)
+    return new_node, partition_info
+def _tree_node(do_evaluate, nbatch, *args):
+    return do_evaluate(*args[nbatch:], _concat(*args[:nbatch]))
+@generate_ir_tasks.register(GroupBy)
+def _(
+    ir: GroupBy, partition_info: MutableMapping[IR, PartitionInfo]
+) -> MutableMapping[Any, Any]:
+    (child,) = ir.children
+    child_count = partition_info[child].count
+    child_name = get_key_name(child)
+    output_count = partition_info[ir].count
+    if output_count == child_count:
+        return {
+            key: (
+                ir.do_evaluate,
+                *ir._non_child_args,
+                (child_name, i),
+            )
+            for i, key in enumerate(partition_info[ir].keys(ir))
+        }
+    elif output_count != 1:  # pragma: no cover
+        raise ValueError(f"Expected single partition, got {output_count}")
+    # Simple N-ary tree reduction
+    j = 0
+    n_ary = ir.config_options.get("executor_options.groupby_n_ary", default=32)
+    graph: MutableMapping[Any, Any] = {}
+    name = get_key_name(ir)
+    keys: list[Any] = [(child_name, i) for i in range(child_count)]
+    while len(keys) > n_ary:
+        new_keys: list[Any] = []
+        for i, k in enumerate(range(0, len(keys), n_ary)):
+            batch = keys[k : k + n_ary]
+            graph[(name, j, i)] = (
+                _tree_node,
+                ir.do_evaluate,
+                len(batch),
+                *batch,
+                *ir._non_child_args,
+            )
+            new_keys.append((name, j, i))
+        j += 1
+        keys = new_keys
+    graph[(name, 0)] = (
+        _tree_node,
+        ir.do_evaluate,
+        len(keys),
+        *keys,
+        *ir._non_child_args,
+    )
+    return graph

cudf-polars-cu12 25.2.1__py3-none-any.whl → 25.4.0__py3-none-any.whl

cudf-polars-cu12 25.2.1py3-none-any.whl → 25.4.0py3-none-any.whl