PyPI - cudf-polars-cu13 - Versions diffs - 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl - Mend

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

cudf_polars/GIT_COMMIT +1 -1
cudf_polars/VERSION +1 -1
cudf_polars/callback.py +60 -15
cudf_polars/containers/column.py +137 -77
cudf_polars/containers/dataframe.py +123 -34
cudf_polars/containers/datatype.py +134 -13
cudf_polars/dsl/expr.py +0 -2
cudf_polars/dsl/expressions/aggregation.py +80 -28
cudf_polars/dsl/expressions/binaryop.py +34 -14
cudf_polars/dsl/expressions/boolean.py +110 -37
cudf_polars/dsl/expressions/datetime.py +59 -30
cudf_polars/dsl/expressions/literal.py +11 -5
cudf_polars/dsl/expressions/rolling.py +460 -119
cudf_polars/dsl/expressions/selection.py +9 -8
cudf_polars/dsl/expressions/slicing.py +1 -1
cudf_polars/dsl/expressions/string.py +256 -114
cudf_polars/dsl/expressions/struct.py +19 -7
cudf_polars/dsl/expressions/ternary.py +33 -3
cudf_polars/dsl/expressions/unary.py +126 -64
cudf_polars/dsl/ir.py +1053 -350
cudf_polars/dsl/to_ast.py +30 -13
cudf_polars/dsl/tracing.py +194 -0
cudf_polars/dsl/translate.py +307 -107
cudf_polars/dsl/utils/aggregations.py +43 -30
cudf_polars/dsl/utils/reshape.py +14 -2
cudf_polars/dsl/utils/rolling.py +12 -8
cudf_polars/dsl/utils/windows.py +35 -20
cudf_polars/experimental/base.py +55 -2
cudf_polars/experimental/benchmarks/pdsds.py +12 -126
cudf_polars/experimental/benchmarks/pdsh.py +792 -2
cudf_polars/experimental/benchmarks/utils.py +596 -39
cudf_polars/experimental/dask_registers.py +47 -20
cudf_polars/experimental/dispatch.py +9 -3
cudf_polars/experimental/distinct.py +2 -0
cudf_polars/experimental/explain.py +15 -2
cudf_polars/experimental/expressions.py +30 -15
cudf_polars/experimental/groupby.py +25 -4
cudf_polars/experimental/io.py +156 -124
cudf_polars/experimental/join.py +53 -23
cudf_polars/experimental/parallel.py +68 -19
cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
cudf_polars/experimental/rapidsmpf/core.py +488 -0
cudf_polars/experimental/rapidsmpf/dask.py +172 -0
cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
cudf_polars/experimental/rapidsmpf/io.py +696 -0
cudf_polars/experimental/rapidsmpf/join.py +322 -0
cudf_polars/experimental/rapidsmpf/lower.py +74 -0
cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
cudf_polars/experimental/rapidsmpf/union.py +115 -0
cudf_polars/experimental/rapidsmpf/utils.py +374 -0
cudf_polars/experimental/repartition.py +9 -2
cudf_polars/experimental/select.py +177 -14
cudf_polars/experimental/shuffle.py +46 -12
cudf_polars/experimental/sort.py +100 -26
cudf_polars/experimental/spilling.py +1 -1
cudf_polars/experimental/statistics.py +24 -5
cudf_polars/experimental/utils.py +25 -7
cudf_polars/testing/asserts.py +13 -8
cudf_polars/testing/io.py +2 -1
cudf_polars/testing/plugin.py +93 -17
cudf_polars/typing/__init__.py +86 -32
cudf_polars/utils/config.py +473 -58
cudf_polars/utils/cuda_stream.py +70 -0
cudf_polars/utils/versions.py +5 -4
cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0

cudf_polars/dsl/expressions/rolling.py CHANGED Viewed

@@ -6,23 +6,27 @@
 from __future__ import annotations
-import itertools
+from collections import defaultdict
 from dataclasses import dataclass
 from functools import singledispatchmethod
 from typing import TYPE_CHECKING, Any
-import polars as pl
 import pylibcudf as plc
 from cudf_polars.containers import Column, DataFrame, DataType
 from cudf_polars.dsl import expr
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 from cudf_polars.dsl.utils.reshape import broadcast
-from cudf_polars.dsl.utils.windows import offsets_to_windows, range_window_bounds
+from cudf_polars.dsl.utils.windows import (
+    duration_to_int,
+    offsets_to_windows,
+    range_window_bounds,
+)
 if TYPE_CHECKING:
-    from collections.abc import Generator, Sequence
+    from collections.abc import Sequence
+    from rmm.pylibrmm.stream import Stream
     from cudf_polars.typing import ClosedInterval, Duration
@@ -42,6 +46,16 @@ class RankOp(UnaryOp):
     pass
+@dataclass(frozen=True)
+class FillNullWithStrategyOp(UnaryOp):
+    policy: plc.replace.ReplacePolicy = plc.replace.ReplacePolicy.PRECEDING
+@dataclass(frozen=True)
+class CumSumOp(UnaryOp):
+    pass
 def to_request(
     value: expr.Expr, orderby: Column, df: DataFrame
 ) -> plc.rolling.RollingRequest:
@@ -78,26 +92,15 @@ def to_request(
     return plc.rolling.RollingRequest(col.obj, min_periods, value.agg_request)
-def _by_exprs(b: Expr | tuple) -> Generator[Expr]:
-    if isinstance(b, Expr):
-        yield b
-    elif isinstance(b, tuple):  # pragma: no cover; tests cover this path when
-        # run with the distributed scheduler only
-        for item in b:
-            yield from _by_exprs(item)
-    else:
-        yield expr.Literal(DataType(pl.Int64()), b)  # pragma: no cover
 class RollingWindow(Expr):
     __slots__ = (
         "closed_window",
-        "following",
+        "following_ordinal",
         "offset",
         "orderby",
         "orderby_dtype",
         "period",
-        "preceding",
+        "preceding_ordinal",
     )
     _non_child = (
         "dtype",
@@ -111,7 +114,7 @@ class RollingWindow(Expr):
     def __init__(
         self,
         dtype: DataType,
-        orderby_dtype: DataType,
+        orderby_dtype: plc.DataType,
         offset: Duration,
         period: Duration,
         closed_window: ClosedInterval,
@@ -126,9 +129,11 @@ class RollingWindow(Expr):
         # within `__init__`).
         self.offset = offset
         self.period = period
-        self.preceding, self.following = offsets_to_windows(
-            orderby_dtype, offset, period
-        )
+        self.orderby_dtype = orderby_dtype
+        self.offset = offset
+        self.period = period
+        self.preceding_ordinal = duration_to_int(orderby_dtype, *offset)
+        self.following_ordinal = duration_to_int(orderby_dtype, *period)
         self.closed_window = closed_window
         self.orderby = orderby
         self.children = (agg,)
@@ -137,7 +142,9 @@ class RollingWindow(Expr):
             raise NotImplementedError(
                 "Incorrect handling of empty groups for list collection"
             )
-        if not plc.rolling.is_valid_rolling_aggregation(agg.dtype.plc, agg.agg_request):
+        if not plc.rolling.is_valid_rolling_aggregation(
+            agg.dtype.plc_type, agg.agg_request
+        ):
             raise NotImplementedError(f"Unsupported rolling aggregation {agg}")
     def do_evaluate(  # noqa: D102
@@ -154,18 +161,28 @@ class RollingWindow(Expr):
             plc.traits.is_integral(orderby.obj.type())
             and orderby.obj.type().id() != plc.TypeId.INT64
         ):
-            orderby_obj = plc.unary.cast(orderby.obj, plc.DataType(plc.TypeId.INT64))
+            orderby_obj = plc.unary.cast(
+                orderby.obj, plc.DataType(plc.TypeId.INT64), stream=df.stream
+            )
         else:
             orderby_obj = orderby.obj
+        preceding_scalar, following_scalar = offsets_to_windows(
+            self.orderby_dtype,
+            self.preceding_ordinal,
+            self.following_ordinal,
+            stream=df.stream,
+        )
         preceding, following = range_window_bounds(
-            self.preceding, self.following, self.closed_window
+            preceding_scalar, following_scalar, self.closed_window
         )
         if orderby.obj.null_count() != 0:
             raise RuntimeError(
                 f"Index column '{self.orderby}' in rolling may not contain nulls"
             )
         if not orderby.check_sorted(
-            order=plc.types.Order.ASCENDING, null_order=plc.types.NullOrder.BEFORE
+            order=plc.types.Order.ASCENDING,
+            null_order=plc.types.NullOrder.BEFORE,
+            stream=df.stream,
         ):
             raise RuntimeError(
                 f"Index column '{self.orderby}' in rolling is not sorted, please sort first"
@@ -178,6 +195,7 @@ class RollingWindow(Expr):
             preceding,
             following,
             [to_request(agg, orderby, df)],
+            stream=df.stream,
         ).columns()
         return Column(result, dtype=self.dtype)
@@ -207,7 +225,6 @@ class GroupedRollingWindow(Expr):
         "named_aggs",
         "post",
         "by_count",
-        "_order_by_expr",
     )
     def __init__(
@@ -216,15 +233,18 @@ class GroupedRollingWindow(Expr):
         options: Any,
         named_aggs: Sequence[expr.NamedExpr],
         post: expr.NamedExpr,
-        *by: Expr,
-        _order_by_expr: Expr | None = None,
+        by_count: int,
+        *children: Expr,
     ) -> None:
         self.dtype = dtype
         self.options = options
         self.named_aggs = tuple(named_aggs)
         self.post = post
+        self.by_count = by_count
+        has_order_by = self.options[1]
         self.is_pointwise = False
-        self._order_by_expr = _order_by_expr
+        self.children = tuple(children)
+        self._order_by_expr = children[by_count] if has_order_by else None
         unsupported = [
             type(named_expr.value).__name__
@@ -233,7 +253,8 @@ class GroupedRollingWindow(Expr):
                 isinstance(named_expr.value, (expr.Len, expr.Agg))
                 or (
                     isinstance(named_expr.value, expr.UnaryFunction)
-                    and named_expr.value.name in {"rank"}
+                    and named_expr.value.name
+                    in {"rank", "fill_null_with_strategy", "cum_sum"}
                 )
             )
         ]
@@ -242,29 +263,22 @@ class GroupedRollingWindow(Expr):
             raise NotImplementedError(
                 f"Unsupported over(...) only expression: {kinds}="
             )
-        # Ensures every partition-by is an Expr
-        # Fixes over(1) cases with the streaming
-        # executor and a small blocksize
-        by_expr = [e for b in by for e in _by_exprs(b)]
-        # Expose agg dependencies as children so the streaming
-        # executor retains required source columns
-        child_deps = [
-            v.children[0]
-            for ne in self.named_aggs
-            for v in (ne.value,)
-            if isinstance(v, expr.Agg)
-            or (isinstance(v, expr.UnaryFunction) and v.name in {"rank"})
-        ]
-        self.by_count = len(by_expr)
-        self.children = tuple(
-            itertools.chain(
-                by_expr,
-                (() if self._order_by_expr is None else (self._order_by_expr,)),
-                child_deps,
+        if has_order_by:
+            ob = self._order_by_expr
+            is_multi_order_by = (
+                isinstance(ob, expr.UnaryFunction)
+                and ob.name == "as_struct"
+                and len(ob.children) > 1
             )
-        )
+            has_order_sensitive_agg = any(
+                isinstance(ne.value, expr.Agg)
+                and ne.value.agg_request.kind() == plc.aggregation.Kind.NTH_ELEMENT
+                for ne in self.named_aggs
+            )
+            if is_multi_order_by and has_order_sensitive_agg:
+                raise NotImplementedError(
+                    "Multiple order_by keys with order-sensitive aggregations"
+                )
     @staticmethod
     def _sorted_grouper(by_cols_for_scan: list[Column]) -> plc.groupby.GroupBy:
@@ -311,6 +325,7 @@ class GroupedRollingWindow(Expr):
                     plc.Table([val_col]),
                     order_index,
                     plc.copying.OutOfBoundsPolicy.NULLIFY,
+                    stream=df.stream,
                 ).columns()[0]
             assert isinstance(rank_expr, expr.UnaryFunction)
             method_str, descending, _ = rank_expr.options
@@ -351,6 +366,85 @@ class GroupedRollingWindow(Expr):
             _, rank_tables = grouper.scan(rank_requests)
         return rank_out_names, rank_out_dtypes, rank_tables
+    @_apply_unary_op.register
+    def _(  # type: ignore[no-untyped-def]
+        self,
+        op: FillNullWithStrategyOp,
+        df: DataFrame,
+        _,
+    ) -> tuple[list[str], list[DataType], list[plc.Table]]:
+        named_exprs = op.named_exprs
+        plc_cols = [
+            ne.value.children[0].evaluate(df, context=ExecutionContext.FRAME).obj
+            for ne in named_exprs
+        ]
+        if op.order_index is not None:
+            vals_tbl = plc.copying.gather(
+                plc.Table(plc_cols),
+                op.order_index,
+                plc.copying.OutOfBoundsPolicy.NULLIFY,
+                stream=df.stream,
+            )
+        else:
+            vals_tbl = plc.Table(plc_cols)
+        local_grouper = op.local_grouper
+        assert isinstance(local_grouper, plc.groupby.GroupBy)
+        _, filled_tbl = local_grouper.replace_nulls(
+            vals_tbl,
+            [op.policy] * len(plc_cols),
+        )
+        tables = [plc.Table([column]) for column in filled_tbl.columns()]
+        names = [ne.name for ne in named_exprs]
+        dtypes = [ne.value.dtype for ne in named_exprs]
+        return names, dtypes, tables
+    @_apply_unary_op.register
+    def _(  # type: ignore[no-untyped-def]
+        self,
+        op: CumSumOp,
+        df: DataFrame,
+        _,
+    ) -> tuple[list[str], list[DataType], list[plc.Table]]:
+        cum_named = op.named_exprs
+        order_index = op.order_index
+        requests: list[plc.groupby.GroupByRequest] = []
+        out_names: list[str] = []
+        out_dtypes: list[DataType] = []
+        # Instead of calling self._gather_columns, let's call plc.copying.gather directly
+        # since we need plc.Column objects, not cudf_polars Column objects
+        if order_index is not None:
+            plc_cols = [
+                ne.value.children[0].evaluate(df, context=ExecutionContext.FRAME).obj
+                for ne in cum_named
+            ]
+            val_cols = plc.copying.gather(
+                plc.Table(plc_cols),
+                order_index,
+                plc.copying.OutOfBoundsPolicy.NULLIFY,
+                stream=df.stream,
+            ).columns()
+        else:
+            val_cols = [
+                ne.value.children[0].evaluate(df, context=ExecutionContext.FRAME).obj
+                for ne in cum_named
+            ]
+        agg = plc.aggregation.sum()
+        for ne, val_col in zip(cum_named, val_cols, strict=True):
+            requests.append(plc.groupby.GroupByRequest(val_col, [agg]))
+            out_names.append(ne.name)
+            out_dtypes.append(ne.value.dtype)
+        local_grouper = op.local_grouper
+        assert isinstance(local_grouper, plc.groupby.GroupBy)
+        _, tables = local_grouper.scan(requests)
+        return out_names, out_dtypes, tables
     def _reorder_to_input(
         self,
         row_id: plc.Column,
@@ -361,6 +455,7 @@ class GroupedRollingWindow(Expr):
         rank_out_dtypes: list[DataType],
         *,
         order_index: plc.Column | None = None,
+        stream: Stream,
     ) -> list[Column]:
         # Reorder scan results from grouped-order back to input row order
         if order_index is None:
@@ -370,6 +465,7 @@ class GroupedRollingWindow(Expr):
                 plc.Table([*(c.obj for c in by_cols), row_id]),
                 [*key_orders, plc.types.Order.ASCENDING],
                 [*key_nulls, plc.types.NullOrder.AFTER],
+                stream=stream,
             )
         return [
@@ -380,11 +476,15 @@ class GroupedRollingWindow(Expr):
                     plc.Table(
                         [
                             plc.Column.from_scalar(
-                                plc.Scalar.from_py(None, tbl.columns()[0].type()),
+                                plc.Scalar.from_py(
+                                    None, tbl.columns()[0].type(), stream=stream
+                                ),
                                 n_rows,
+                                stream=stream,
                             )
                         ]
                     ),
+                    stream=stream,
                 ).columns()[0],
                 name=name,
                 dtype=dtype,
@@ -401,6 +501,8 @@ class GroupedRollingWindow(Expr):
         reductions: list[expr.NamedExpr] = []
         unary_window_ops: dict[str, list[expr.NamedExpr]] = {
             "rank": [],
+            "fill_null_with_strategy": [],
+            "cum_sum": [],
         }
         for ne in self.named_aggs:
@@ -421,6 +523,7 @@ class GroupedRollingWindow(Expr):
         ob_nulls_last: bool,
         value_col: plc.Column | None = None,
         value_desc: bool = False,
+        stream: Stream,
     ) -> plc.Column:
         """Compute a stable row ordering for unary operations in a grouped context."""
         cols: list[plc.Column] = [c.obj for c in by_cols]
@@ -444,7 +547,7 @@ class GroupedRollingWindow(Expr):
             )
             nulls.append(
                 plc.types.NullOrder.AFTER
-                if ob_nulls_last
+                if ob_desc ^ ob_nulls_last
                 else plc.types.NullOrder.BEFORE
             )
@@ -453,31 +556,153 @@ class GroupedRollingWindow(Expr):
         orders.append(plc.types.Order.ASCENDING)
         nulls.append(plc.types.NullOrder.AFTER)
-        return plc.sorting.stable_sorted_order(plc.Table(cols), orders, nulls)
+        return plc.sorting.stable_sorted_order(
+            plc.Table(cols), orders, nulls, stream=stream
+        )
     def _gather_columns(
-        self,
-        cols: list[Column],
-        order_index: plc.Column,
-    ) -> list[plc.Column] | list[Column]:
+        self, cols: Sequence[Column], order_index: plc.Column, stream: Stream
+    ) -> list[Column]:
         gathered_tbl = plc.copying.gather(
             plc.Table([c.obj for c in cols]),
             order_index,
             plc.copying.OutOfBoundsPolicy.NULLIFY,
+            stream=stream,
         )
         return [
             Column(
-                gathered_tbl.columns()[i],
+                gathered,
                 name=c.name,
                 dtype=c.dtype,
                 order=c.order,
                 null_order=c.null_order,
-                is_sorted=True,
+                is_sorted=c.is_sorted,
             )
-            for i, c in enumerate(cols)
+            for gathered, c in zip(gathered_tbl.columns(), cols, strict=True)
         ]
+    def _grouped_window_scan_setup(
+        self,
+        by_cols: list[Column],
+        *,
+        row_id: plc.Column,
+        order_by_col: Column | None,
+        ob_desc: bool,
+        ob_nulls_last: bool,
+        grouper: plc.groupby.GroupBy,
+        stream: Stream,
+    ) -> tuple[plc.Column | None, list[Column] | None, plc.groupby.GroupBy]:
+        if order_by_col is None:
+            # keep the original ordering
+            return None, None, grouper
+        order_index = self._build_window_order_index(
+            by_cols,
+            row_id=row_id,
+            order_by_col=order_by_col,
+            ob_desc=ob_desc,
+            ob_nulls_last=ob_nulls_last,
+            stream=stream,
+        )
+        by_cols_for_scan = self._gather_columns(by_cols, order_index, stream=stream)
+        assert by_cols_for_scan is not None
+        local = self._sorted_grouper(by_cols_for_scan)
+        return order_index, by_cols_for_scan, local
+    def _broadcast_agg_results(
+        self,
+        by_tbl: plc.Table,
+        group_keys_tbl: plc.Table,
+        value_tbls: list[plc.Table],
+        names: list[str],
+        dtypes: list[DataType],
+        stream: Stream,
+    ) -> list[Column]:
+        # We do a left-join between the input keys to group-keys
+        # so every input row appears exactly once. left_order is
+        # returned un-ordered by libcudf.
+        left_order, right_order = plc.join.left_join(
+            by_tbl, group_keys_tbl, plc.types.NullEquality.EQUAL, stream
+        )
+        # Scatter the right order indices into an all-null table
+        # and at the position of the index in left order. Now we
+        # have the map between rows and groups with the correct ordering.
+        left_rows = left_order.size()
+        target = plc.Column.from_scalar(
+            plc.Scalar.from_py(None, plc.types.SIZE_TYPE, stream), left_rows, stream
+        )
+        aligned_map = plc.copying.scatter(
+            plc.Table([right_order]),
+            left_order,
+            plc.Table([target]),
+            stream,
+        ).columns()[0]
+        # Broadcast each scalar aggregated result back to row-shape using
+        # the aligned mapping between row indices and group indices.
+        out_cols = (t.columns()[0] for t in value_tbls)
+        return [
+            Column(
+                plc.copying.gather(
+                    plc.Table([col]),
+                    aligned_map,
+                    plc.copying.OutOfBoundsPolicy.NULLIFY,
+                    stream,
+                ).columns()[0],
+                name=name,
+                dtype=dtype,
+            )
+            for name, dtype, col in zip(names, dtypes, out_cols, strict=True)
+        ]
+    def _build_groupby_requests(
+        self,
+        named_exprs: list[expr.NamedExpr],
+        df: DataFrame,
+        order_index: plc.Column | None = None,
+        by_cols: list[Column] | None = None,
+    ) -> tuple[list[plc.groupby.GroupByRequest], list[str], list[DataType]]:
+        assert by_cols is not None
+        gb_requests: list[plc.groupby.GroupByRequest] = []
+        out_names: list[str] = []
+        out_dtypes: list[DataType] = []
+        eval_cols: list[plc.Column] = []
+        val_nodes: list[tuple[expr.NamedExpr, expr.Agg]] = []
+        for ne in named_exprs:
+            val = ne.value
+            out_names.append(ne.name)
+            out_dtypes.append(val.dtype)
+            if isinstance(val, expr.Agg):
+                (child,) = (
+                    val.children
+                    if getattr(val, "name", None) != "quantile"
+                    else (val.children[0],)
+                )
+                eval_cols.append(child.evaluate(df, context=ExecutionContext.FRAME).obj)
+                val_nodes.append((ne, val))
+        if order_index is not None and eval_cols:
+            eval_cols = plc.copying.gather(
+                plc.Table(eval_cols),
+                order_index,
+                plc.copying.OutOfBoundsPolicy.NULLIFY,
+                stream=df.stream,
+            ).columns()
+        gathered_iter = iter(eval_cols)
+        for ne in named_exprs:
+            val = ne.value
+            if isinstance(val, expr.Len):
+                col = by_cols[0].obj
+            else:
+                col = next(gathered_iter)
+            gb_requests.append(plc.groupby.GroupByRequest(col, [val.agg_request]))
+        return gb_requests, out_names, out_dtypes
     def do_evaluate(  # noqa: D102
         self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
@@ -493,9 +718,12 @@ class GroupedRollingWindow(Expr):
         by_cols = broadcast(
             *(b.evaluate(df) for b in by_exprs),
             target_length=df.num_rows,
+            stream=df.stream,
         )
         order_by_col = (
-            broadcast(order_by_expr.evaluate(df), target_length=df.num_rows)[0]
+            broadcast(
+                order_by_expr.evaluate(df), target_length=df.num_rows, stream=df.stream
+            )[0]
             if order_by_expr is not None
             else None
         )
@@ -518,67 +746,73 @@ class GroupedRollingWindow(Expr):
         scalar_named, unary_window_ops = self._split_named_expr()
         # Build GroupByRequests for scalar aggregations
-        gb_requests: list[plc.groupby.GroupByRequest] = []
-        out_names: list[str] = []
-        out_dtypes: list[DataType] = []
+        order_sensitive: list[expr.NamedExpr] = []
+        other_scalars: list[expr.NamedExpr] = []
         for ne in scalar_named:
             val = ne.value
-            out_names.append(ne.name)
-            out_dtypes.append(val.dtype)
+            if (
+                self._order_by_expr is not None
+                and isinstance(val, expr.Agg)
+                and val.agg_request.kind() == plc.aggregation.Kind.NTH_ELEMENT
+            ):
+                order_sensitive.append(ne)
+            else:
+                other_scalars.append(ne)
-            if isinstance(val, expr.Len):
-                # A count aggregation, we need a column so use a key column
-                col = by_cols[0].obj
-                gb_requests.append(plc.groupby.GroupByRequest(col, [val.agg_request]))
-            elif isinstance(val, expr.Agg):
-                (child,) = (
-                    val.children if val.name != "quantile" else (val.children[0],)
-                )
-                col = child.evaluate(df, context=ExecutionContext.FRAME).obj
-                gb_requests.append(plc.groupby.GroupByRequest(col, [val.agg_request]))
+        gb_requests, out_names, out_dtypes = self._build_groupby_requests(
+            other_scalars, df, by_cols=by_cols
+        )
         group_keys_tbl, value_tables = grouper.aggregate(gb_requests)
-        out_cols = (t.columns()[0] for t in value_tables)
-        # We do a left-join between the input keys to group-keys
-        # so every input row appears exactly once. left_order is
-        # returned un-ordered by libcudf.
-        left_order, right_order = plc.join.left_join(
-            by_tbl, group_keys_tbl, plc.types.NullEquality.EQUAL
+        broadcasted_cols = self._broadcast_agg_results(
+            by_tbl,
+            group_keys_tbl,
+            value_tables,
+            out_names,
+            out_dtypes,
+            df.stream,
         )
-        # Scatter the right order indices into an all-null table
-        # and at the position of the index in left order. Now we
-        # have the map between rows and groups with the correct ordering.
-        left_rows = left_order.size()
-        target = plc.Column.from_scalar(
-            plc.Scalar.from_py(None, plc.types.SIZE_TYPE), left_rows
-        )
-        aligned_map = plc.copying.scatter(
-            plc.Table([right_order]),
-            left_order,
-            plc.Table([target]),
-        ).columns()[0]
+        if order_sensitive:
+            row_id = plc.filling.sequence(
+                df.num_rows,
+                plc.Scalar.from_py(0, plc.types.SIZE_TYPE, stream=df.stream),
+                plc.Scalar.from_py(1, plc.types.SIZE_TYPE, stream=df.stream),
+                stream=df.stream,
+            )
+            _, _, ob_desc, ob_nulls_last = self.options
+            order_index, _, local = self._grouped_window_scan_setup(
+                by_cols,
+                row_id=row_id,
+                order_by_col=order_by_col,
+                ob_desc=ob_desc,
+                ob_nulls_last=ob_nulls_last,
+                grouper=grouper,
+                stream=df.stream,
+            )
+            assert order_index is not None
-        # Broadcast each scalar aggregated result back to row-shape using
-        # the aligned mapping between row indices and group indices.
-        broadcasted_cols = [
-            Column(
-                plc.copying.gather(
-                    plc.Table([col]), aligned_map, plc.copying.OutOfBoundsPolicy.NULLIFY
-                ).columns()[0],
-                name=named_expr.name,
-                dtype=dtype,
+            gb_requests, out_names, out_dtypes = self._build_groupby_requests(
+                order_sensitive, df, order_index=order_index, by_cols=by_cols
             )
-            for named_expr, dtype, col in zip(
-                scalar_named, out_dtypes, out_cols, strict=True
+            group_keys_tbl_local, value_tables_local = local.aggregate(gb_requests)
+            broadcasted_cols.extend(
+                self._broadcast_agg_results(
+                    by_tbl,
+                    group_keys_tbl_local,
+                    value_tables_local,
+                    out_names,
+                    out_dtypes,
+                    df.stream,
+                )
             )
-        ]
         row_id = plc.filling.sequence(
             df.num_rows,
-            plc.Scalar.from_py(0, plc.types.SIZE_TYPE),
-            plc.Scalar.from_py(1, plc.types.SIZE_TYPE),
+            plc.Scalar.from_py(0, plc.types.SIZE_TYPE, stream=df.stream),
+            plc.Scalar.from_py(1, plc.types.SIZE_TYPE, stream=df.stream),
+            stream=df.stream,
         )
         if rank_named := unary_window_ops["rank"]:
@@ -588,7 +822,6 @@ class GroupedRollingWindow(Expr):
                     rank_expr = ne.value
                     assert isinstance(rank_expr, expr.UnaryFunction)
                     (child,) = rank_expr.children
-                    val = child.evaluate(df, context=ExecutionContext.FRAME).obj
                     desc = rank_expr.options[1]
                     order_index = self._build_window_order_index(
@@ -597,16 +830,21 @@ class GroupedRollingWindow(Expr):
                         order_by_col=order_by_col,
                         ob_desc=ob_desc,
                         ob_nulls_last=ob_nulls_last,
-                        value_col=val,
+                        value_col=child.evaluate(
+                            df, context=ExecutionContext.FRAME
+                        ).obj,
                         value_desc=desc,
+                        stream=df.stream,
                     )
-                    by_cols_for_scan = self._gather_columns(by_cols, order_index)
-                    local = GroupedRollingWindow._sorted_grouper(by_cols_for_scan)
+                    rank_by_cols_for_scan = self._gather_columns(
+                        by_cols, order_index, stream=df.stream
+                    )
+                    local = GroupedRollingWindow._sorted_grouper(rank_by_cols_for_scan)
                     names, dtypes, tables = self._apply_unary_op(
                         RankOp(
                             named_exprs=[ne],
                             order_index=order_index,
-                            by_cols_for_scan=by_cols_for_scan,
+                            by_cols_for_scan=rank_by_cols_for_scan,
                             local_grouper=local,
                         ),
                         df,
@@ -621,6 +859,7 @@ class GroupedRollingWindow(Expr):
                             names,
                             dtypes,
                             order_index=order_index,
+                            stream=df.stream,
                         )
                     )
             else:
@@ -633,11 +872,113 @@ class GroupedRollingWindow(Expr):
                 )
                 broadcasted_cols.extend(
                     self._reorder_to_input(
-                        row_id, by_cols, df.num_rows, tables, names, dtypes
+                        row_id,
+                        by_cols,
+                        df.num_rows,
+                        tables,
+                        names,
+                        dtypes,
+                        stream=df.stream,
+                    )
+                )
+        if fill_named := unary_window_ops["fill_null_with_strategy"]:
+            order_index, fill_null_by_cols_for_scan, local = (
+                self._grouped_window_scan_setup(
+                    by_cols,
+                    row_id=row_id,
+                    order_by_col=order_by_col
+                    if self._order_by_expr is not None
+                    else None,
+                    ob_desc=self.options[2]
+                    if self._order_by_expr is not None
+                    else False,
+                    ob_nulls_last=self.options[3]
+                    if self._order_by_expr is not None
+                    else False,
+                    grouper=grouper,
+                    stream=df.stream,
+                )
+            )
+            strategy_exprs: dict[str, list[expr.NamedExpr]] = defaultdict(list)
+            for ne in fill_named:
+                fill_null_expr = ne.value
+                assert isinstance(fill_null_expr, expr.UnaryFunction)
+                strategy_exprs[fill_null_expr.options[0]].append(ne)
+            replace_policy = {
+                "forward": plc.replace.ReplacePolicy.PRECEDING,
+                "backward": plc.replace.ReplacePolicy.FOLLOWING,
+            }
+            for strategy, fill_exprs in strategy_exprs.items():
+                names, dtypes, tables = self._apply_unary_op(
+                    FillNullWithStrategyOp(
+                        named_exprs=fill_exprs,
+                        order_index=order_index,
+                        by_cols_for_scan=fill_null_by_cols_for_scan,
+                        local_grouper=local,
+                        policy=replace_policy[strategy],
+                    ),
+                    df,
+                    grouper,
+                )
+                broadcasted_cols.extend(
+                    self._reorder_to_input(
+                        row_id,
+                        by_cols,
+                        df.num_rows,
+                        tables,
+                        names,
+                        dtypes,
+                        order_index=order_index,
+                        stream=df.stream,
                     )
                 )
+        if cum_named := unary_window_ops["cum_sum"]:
+            order_index, cum_sum_by_cols_for_scan, local = (
+                self._grouped_window_scan_setup(
+                    by_cols,
+                    row_id=row_id,
+                    order_by_col=order_by_col
+                    if self._order_by_expr is not None
+                    else None,
+                    ob_desc=self.options[2]
+                    if self._order_by_expr is not None
+                    else False,
+                    ob_nulls_last=self.options[3]
+                    if self._order_by_expr is not None
+                    else False,
+                    grouper=grouper,
+                    stream=df.stream,
+                )
+            )
+            names, dtypes, tables = self._apply_unary_op(
+                CumSumOp(
+                    named_exprs=cum_named,
+                    order_index=order_index,
+                    by_cols_for_scan=cum_sum_by_cols_for_scan,
+                    local_grouper=local,
+                ),
+                df,
+                grouper,
+            )
+            broadcasted_cols.extend(
+                self._reorder_to_input(
+                    row_id,
+                    by_cols,
+                    df.num_rows,
+                    tables,
+                    names,
+                    dtypes,
+                    order_index=order_index,
+                    stream=df.stream,
+                )
+            )
         # Create a temporary DataFrame with the broadcasted columns named by their
         # placeholder names from agg decomposition, then evaluate the post-expression.
-        df = DataFrame(broadcasted_cols)
+        df = DataFrame(broadcasted_cols, stream=df.stream)
         return self.post.value.evaluate(df, context=ExecutionContext.FRAME)

cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl