PyPI - cudf-polars-cu12 - Versions diffs - 25.4.0__py3-none-any.whl → 25.6.0__py3-none-any.whl - Mend

cudf-polars-cu12 25.4.0py3-none-any.whl → 25.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

cudf_polars/VERSION +1 -1
cudf_polars/callback.py +35 -50
cudf_polars/containers/column.py +38 -0
cudf_polars/containers/dataframe.py +11 -16
cudf_polars/dsl/expressions/aggregation.py +25 -61
cudf_polars/dsl/expressions/base.py +40 -72
cudf_polars/dsl/expressions/binaryop.py +3 -39
cudf_polars/dsl/expressions/boolean.py +21 -49
cudf_polars/dsl/expressions/datetime.py +59 -17
cudf_polars/dsl/expressions/literal.py +24 -24
cudf_polars/dsl/expressions/rolling.py +110 -9
cudf_polars/dsl/expressions/selection.py +6 -24
cudf_polars/dsl/expressions/slicing.py +2 -8
cudf_polars/dsl/expressions/sorting.py +4 -17
cudf_polars/dsl/expressions/string.py +29 -32
cudf_polars/dsl/expressions/ternary.py +3 -10
cudf_polars/dsl/expressions/unary.py +32 -73
cudf_polars/dsl/ir.py +575 -167
cudf_polars/dsl/nodebase.py +1 -1
cudf_polars/dsl/to_ast.py +5 -3
cudf_polars/dsl/translate.py +272 -152
cudf_polars/dsl/utils/__init__.py +8 -0
cudf_polars/dsl/utils/aggregations.py +292 -0
cudf_polars/dsl/utils/groupby.py +97 -0
cudf_polars/dsl/utils/naming.py +34 -0
cudf_polars/dsl/utils/replace.py +46 -0
cudf_polars/dsl/utils/rolling.py +113 -0
cudf_polars/dsl/utils/windows.py +186 -0
cudf_polars/experimental/base.py +0 -8
cudf_polars/experimental/benchmarks/__init__.py +4 -0
cudf_polars/experimental/benchmarks/pdsh.py +1279 -0
cudf_polars/experimental/dask_registers.py +196 -0
cudf_polars/experimental/distinct.py +174 -0
cudf_polars/experimental/explain.py +127 -0
cudf_polars/experimental/expressions.py +521 -0
cudf_polars/experimental/groupby.py +109 -167
cudf_polars/experimental/io.py +53 -26
cudf_polars/experimental/join.py +59 -24
cudf_polars/experimental/parallel.py +155 -133
cudf_polars/experimental/repartition.py +69 -0
cudf_polars/experimental/scheduler.py +155 -0
cudf_polars/experimental/select.py +92 -7
cudf_polars/experimental/shuffle.py +109 -9
cudf_polars/experimental/sort.py +45 -0
cudf_polars/experimental/spilling.py +151 -0
cudf_polars/experimental/utils.py +100 -0
cudf_polars/testing/asserts.py +146 -6
cudf_polars/testing/io.py +72 -0
cudf_polars/testing/plugin.py +55 -42
cudf_polars/typing/__init__.py +27 -5
cudf_polars/utils/config.py +317 -102
cudf_polars/utils/dtypes.py +8 -1
cudf_polars/utils/timer.py +1 -1
cudf_polars/utils/versions.py +4 -4
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.6.0.dist-info}/METADATA +7 -5
cudf_polars_cu12-25.6.0.dist-info/RECORD +73 -0
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.6.0.dist-info}/WHEEL +1 -1
cudf_polars/experimental/dask_serialize.py +0 -73
cudf_polars_cu12-25.4.0.dist-info/RECORD +0 -55
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.6.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.6.0.dist-info}/top_level.txt +0 -0

cudf_polars/dsl/expressions/binaryop.py CHANGED Viewed

@@ -13,11 +13,9 @@ from polars.polars import _expr_nodes as pl_expr
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from cudf_polars.containers import DataFrame
 __all__ = ["BinOp"]
@@ -85,17 +83,10 @@ class BinOp(Expr):
     }
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        left, right = (
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        )
+        left, right = (child.evaluate(df, context=context) for child in self.children)
         lop = left.obj
         rop = right.obj
         if left.size != right.size:
@@ -106,30 +97,3 @@ class BinOp(Expr):
         return Column(
             plc.binaryop.binary_operation(lop, rop, self.op, self.dtype),
         )
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        if depth == 1:
-            # inside aggregation, need to pre-evaluate,
-            # groupby construction has checked that we don't have
-            # nested aggs, so stop the recursion and return ourselves
-            # for pre-eval
-            return AggInfo([(self, plc.aggregation.collect_list(), self)])
-        else:
-            left_info, right_info = (
-                child.collect_agg(depth=depth) for child in self.children
-            )
-            requests = [*left_info.requests, *right_info.requests]
-            # TODO: Hack, if there were no reductions inside this
-            # binary expression then we want to pre-evaluate and
-            # collect ourselves. Otherwise we want to collect the
-            # aggregations inside and post-evaluate. This is a bad way
-            # of checking that we are in case 1.
-            if all(
-                agg.kind() == plc.aggregation.Kind.COLLECT_LIST
-                for _, agg, _ in requests
-            ):
-                return AggInfo([(self, plc.aggregation.collect_list(), self)])
-            return AggInfo(
-                [*left_info.requests, *right_info.requests],
-            )

cudf_polars/dsl/expressions/boolean.py CHANGED Viewed

@@ -10,8 +10,6 @@ from enum import IntEnum, auto
 from functools import partial, reduce
 from typing import TYPE_CHECKING, Any, ClassVar
-import pyarrow as pa
 import pylibcudf as plc
 from cudf_polars.containers import Column
@@ -19,10 +17,9 @@ from cudf_polars.dsl.expressions.base import (
     ExecutionContext,
     Expr,
 )
+from cudf_polars.utils.versions import POLARS_VERSION_LT_128
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from typing_extensions import Self
     import polars.type_aliases as pl_types
@@ -89,9 +86,11 @@ class BooleanFunction(Expr):
             BooleanFunction.Name.IsLastDistinct,
             BooleanFunction.Name.IsUnique,
         )
-        if self.name is BooleanFunction.Name.IsIn and not all(
-            c.dtype == self.children[0].dtype for c in self.children
-        ):
+        if (
+            POLARS_VERSION_LT_128
+            and self.name is BooleanFunction.Name.IsIn
+            and not all(c.dtype == self.children[0].dtype for c in self.children)
+        ):  # pragma: no cover
             # TODO: If polars IR doesn't put the casts in, we need to
             # mimic the supertype promotion rules.
             raise NotImplementedError("IsIn doesn't support supertype casting")
@@ -145,11 +144,7 @@ class BooleanFunction(Expr):
     }
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         if self.name in (
@@ -160,29 +155,22 @@ class BooleanFunction(Expr):
             (child,) = self.children
             is_finite = self.name is BooleanFunction.Name.IsFinite
             if child.dtype.id() not in (plc.TypeId.FLOAT32, plc.TypeId.FLOAT64):
-                value = plc.interop.from_arrow(
-                    pa.scalar(value=is_finite, type=plc.interop.to_arrow(self.dtype))
-                )
+                value = plc.Scalar.from_py(is_finite)
                 return Column(plc.Column.from_scalar(value, df.num_rows))
-            needles = child.evaluate(df, context=context, mapping=mapping)
+            needles = child.evaluate(df, context=context)
             to_search = [-float("inf"), float("inf")]
             if is_finite:
                 # NaN is neither finite not infinite
                 to_search.append(float("nan"))
-            haystack = plc.interop.from_arrow(
-                pa.array(
-                    to_search,
-                    type=plc.interop.to_arrow(needles.obj.type()),
-                )
+            haystack = plc.Column.from_iterable_of_py(
+                to_search,
+                dtype=needles.obj.type(),
             )
             result = plc.search.contains(haystack, needles.obj)
             if is_finite:
                 result = plc.unary.unary_operation(result, plc.unary.UnaryOperator.NOT)
             return Column(result)
-        columns = [
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        ]
+        columns = [child.evaluate(df, context=context) for child in self.children]
         # Kleene logic for Any (OR) and All (AND) if ignore_nulls is
         # False
         if self.name in (BooleanFunction.Name.Any, BooleanFunction.Name.All):
@@ -233,48 +221,32 @@ class BooleanFunction(Expr):
             return self._distinct(
                 column,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
-                source_value=plc.interop.from_arrow(
-                    pa.scalar(value=True, type=plc.interop.to_arrow(self.dtype))
-                ),
-                target_value=plc.interop.from_arrow(
-                    pa.scalar(value=False, type=plc.interop.to_arrow(self.dtype))
-                ),
+                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype),
+                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype),
             )
         elif self.name is BooleanFunction.Name.IsLastDistinct:
             (column,) = columns
             return self._distinct(
                 column,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_LAST,
-                source_value=plc.interop.from_arrow(
-                    pa.scalar(value=True, type=plc.interop.to_arrow(self.dtype))
-                ),
-                target_value=plc.interop.from_arrow(
-                    pa.scalar(value=False, type=plc.interop.to_arrow(self.dtype))
-                ),
+                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype),
+                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype),
             )
         elif self.name is BooleanFunction.Name.IsUnique:
             (column,) = columns
             return self._distinct(
                 column,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_NONE,
-                source_value=plc.interop.from_arrow(
-                    pa.scalar(value=True, type=plc.interop.to_arrow(self.dtype))
-                ),
-                target_value=plc.interop.from_arrow(
-                    pa.scalar(value=False, type=plc.interop.to_arrow(self.dtype))
-                ),
+                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype),
+                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype),
             )
         elif self.name is BooleanFunction.Name.IsDuplicated:
             (column,) = columns
             return self._distinct(
                 column,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_NONE,
-                source_value=plc.interop.from_arrow(
-                    pa.scalar(value=False, type=plc.interop.to_arrow(self.dtype))
-                ),
-                target_value=plc.interop.from_arrow(
-                    pa.scalar(value=True, type=plc.interop.to_arrow(self.dtype))
-                ),
+                source_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype),
+                target_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype),
             )
         elif self.name is BooleanFunction.Name.AllHorizontal:
             return Column(

cudf_polars/dsl/expressions/datetime.py CHANGED Viewed

@@ -17,8 +17,6 @@ from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from typing_extensions import Self
     from polars.polars import _expr_nodes as pl_expr
@@ -108,8 +106,12 @@ class TemporalFunction(Expr):
         *_COMPONENT_MAP.keys(),
         Name.IsLeapYear,
         Name.OrdinalDay,
+        Name.ToString,
+        Name.Week,
+        Name.IsoYear,
         Name.MonthStart,
         Name.MonthEnd,
+        Name.CastTimeUnit,
     }
     def __init__(
@@ -127,26 +129,66 @@ class TemporalFunction(Expr):
         if self.name not in self._valid_ops:
             raise NotImplementedError(f"Temporal function {self.name}")
+        if self.name is TemporalFunction.Name.ToString and plc.traits.is_duration(
+            self.children[0].dtype
+        ):
+            raise NotImplementedError("ToString is not supported on duration types")
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        columns = [
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        ]
+        columns = [child.evaluate(df, context=context) for child in self.children]
         (column,) = columns
+        if self.name is TemporalFunction.Name.CastTimeUnit:
+            (unit,) = self.options
+            if plc.traits.is_timestamp(column.obj.type()):
+                dtype = plc.interop.from_arrow(pa.timestamp(unit))
+            elif plc.traits.is_duration(column.obj.type()):
+                dtype = plc.interop.from_arrow(pa.duration(unit))
+            result = plc.unary.cast(column.obj, dtype)
+            return Column(result)
+        if self.name == TemporalFunction.Name.ToString:
+            return Column(
+                plc.strings.convert.convert_datetime.from_timestamps(
+                    column.obj,
+                    self.options[0],
+                    plc.Column.from_iterable_of_py(
+                        [], dtype=plc.DataType(plc.TypeId.STRING)
+                    ),
+                )
+            )
+        if self.name is TemporalFunction.Name.Week:
+            result = plc.strings.convert.convert_integers.to_integers(
+                plc.strings.convert.convert_datetime.from_timestamps(
+                    column.obj,
+                    format="%V",
+                    input_strings_names=plc.Column.from_iterable_of_py(
+                        [], dtype=plc.DataType(plc.TypeId.STRING)
+                    ),
+                ),
+                plc.types.DataType(plc.types.TypeId.INT8),
+            )
+            return Column(result)
+        if self.name is TemporalFunction.Name.IsoYear:
+            result = plc.strings.convert.convert_integers.to_integers(
+                plc.strings.convert.convert_datetime.from_timestamps(
+                    column.obj,
+                    format="%G",
+                    input_strings_names=plc.Column.from_iterable_of_py(
+                        [], dtype=plc.DataType(plc.TypeId.STRING)
+                    ),
+                ),
+                plc.types.DataType(plc.types.TypeId.INT32),
+            )
+            return Column(result)
         if self.name is TemporalFunction.Name.MonthStart:
             ends = plc.datetime.last_day_of_month(column.obj)
             days_to_subtract = plc.datetime.days_in_month(column.obj)
             # must subtract 1 to avoid rolling over to the previous month
             days_to_subtract = plc.binaryop.binary_operation(
                 days_to_subtract,
-                plc.interop.from_arrow(pa.scalar(1, type=pa.int32())),
+                plc.Scalar.from_py(1, plc.DataType(plc.TypeId.INT32)),
                 plc.binaryop.BinaryOperator.SUB,
                 plc.DataType(plc.TypeId.DURATION_DAYS),
             )
@@ -179,7 +221,7 @@ class TemporalFunction(Expr):
             )
             millis_as_micros = plc.binaryop.binary_operation(
                 millis,
-                plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())),
+                plc.Scalar.from_py(1_000, plc.DataType(plc.TypeId.INT32)),
                 plc.binaryop.BinaryOperator.MUL,
                 plc.DataType(plc.TypeId.INT32),
             )
@@ -202,15 +244,15 @@ class TemporalFunction(Expr):
             )
             millis_as_nanos = plc.binaryop.binary_operation(
                 millis,
-                plc.interop.from_arrow(pa.scalar(1_000_000, type=pa.int32())),
+                plc.Scalar.from_py(1_000_000, plc.DataType(plc.TypeId.INT32)),
                 plc.binaryop.BinaryOperator.MUL,
-                plc.types.DataType(plc.types.TypeId.INT32),
+                plc.DataType(plc.TypeId.INT32),
             )
             micros_as_nanos = plc.binaryop.binary_operation(
                 micros,
-                plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())),
+                plc.Scalar.from_py(1_000, plc.DataType(plc.TypeId.INT32)),
                 plc.binaryop.BinaryOperator.MUL,
-                plc.types.DataType(plc.types.TypeId.INT32),
+                plc.DataType(plc.TypeId.INT32),
             )
             total_nanos = plc.binaryop.binary_operation(
                 nanos,

cudf_polars/dsl/expressions/literal.py CHANGED Viewed

@@ -6,15 +6,15 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, NoReturn
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 if TYPE_CHECKING:
-    from collections.abc import Hashable, Mapping
+    from collections.abc import Hashable
     import pyarrow as pa
@@ -26,29 +26,31 @@ __all__ = ["Literal", "LiteralColumn"]
 class Literal(Expr):
     __slots__ = ("value",)
     _non_child = ("dtype", "value")
-    value: pa.Scalar[Any]
+    value: Any  # Python scalar
-    def __init__(self, dtype: plc.DataType, value: pa.Scalar[Any]) -> None:
+    def __init__(self, dtype: plc.DataType, value: Any) -> None:
+        if value is None and dtype.id() == plc.TypeId.EMPTY:
+            # TypeId.EMPTY not supported by libcudf
+            # cuDF Python also maps EMPTY to INT8
+            dtype = plc.DataType(plc.TypeId.INT8)
         self.dtype = dtype
-        assert value.type == plc.interop.to_arrow(dtype)
         self.value = value
         self.children = ()
         self.is_pointwise = True
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        # datatype of pyarrow scalar is correct by construction.
-        return Column(plc.Column.from_scalar(plc.interop.from_arrow(self.value), 1))
+        return Column(
+            plc.Column.from_scalar(plc.Scalar.from_py(self.value, self.dtype), 1)
+        )
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        return AggInfo([])
+    @property
+    def agg_request(self) -> NoReturn:  # noqa: D102
+        raise NotImplementedError(
+            "Not expecting to require agg request of literal"
+        )  # pragma: no cover
 class LiteralColumn(Expr):
@@ -70,16 +72,14 @@ class LiteralColumn(Expr):
         return (type(self), self.dtype, id(self.value))
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         # datatype of pyarrow array is correct by construction.
         return Column(plc.interop.from_arrow(self.value))
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        return AggInfo([])
+    @property
+    def agg_request(self) -> NoReturn:  # noqa: D102
+        raise NotImplementedError(
+            "Not expecting to require agg request of literal"
+        )  # pragma: no cover

cudf_polars/dsl/expressions/rolling.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -8,24 +8,125 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Any
-from cudf_polars.dsl.expressions.base import Expr
+import pylibcudf as plc
+from cudf_polars.containers import Column
+from cudf_polars.dsl import expr
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
+from cudf_polars.dsl.utils.windows import range_window_bounds
 if TYPE_CHECKING:
-    import pylibcudf as plc
+    import pyarrow as pa
+    from cudf_polars.containers import DataFrame
+    from cudf_polars.typing import ClosedInterval
+__all__ = ["GroupedRollingWindow", "RollingWindow", "to_request"]
+def to_request(
+    value: expr.Expr, orderby: Column, df: DataFrame
+) -> plc.rolling.RollingRequest:
+    """
+    Produce a rolling request for evaluation with pylibcudf.
-__all__ = ["GroupedRollingWindow", "RollingWindow"]
+    Parameters
+    ----------
+    value
+        The expression to perform the rolling aggregation on.
+    orderby
+        Orderby column, used as input to the request when the aggregation is Len.
+    df
+        DataFrame used to evaluate the inputs to the aggregation.
+    """
+    min_periods = 1
+    if isinstance(value, expr.Len):
+        # A count aggregation, we need a column so use the orderby column
+        col = orderby
+    elif isinstance(value, expr.Agg):
+        child = value.children[0]
+        col = child.evaluate(df, context=ExecutionContext.ROLLING)
+        if value.name == "var":
+            # Polars variance produces null if nvalues <= ddof
+            # libcudf produces NaN. However, we can get the polars
+            # behaviour by setting the minimum window size to ddof +
+            # 1.
+            min_periods = value.options + 1
+    else:
+        col = value.evaluate(
+            df, context=ExecutionContext.ROLLING
+        )  # pragma: no cover; raise before we get here because we
+        # don't do correct handling of empty groups
+    return plc.rolling.RollingRequest(col.obj, min_periods, value.agg_request)
 class RollingWindow(Expr):
-    __slots__ = ("options",)
-    _non_child = ("dtype", "options")
+    __slots__ = ("closed_window", "following", "orderby", "preceding")
+    _non_child = ("dtype", "preceding", "following", "closed_window", "orderby")
-    def __init__(self, dtype: plc.DataType, options: Any, agg: Expr) -> None:
+    def __init__(
+        self,
+        dtype: plc.DataType,
+        preceding: pa.Scalar,
+        following: pa.Scalar,
+        closed_window: ClosedInterval,
+        orderby: str,
+        agg: Expr,
+    ) -> None:
         self.dtype = dtype
-        self.options = options
+        self.preceding = preceding
+        self.following = following
+        self.closed_window = closed_window
+        self.orderby = orderby
         self.children = (agg,)
         self.is_pointwise = False
-        raise NotImplementedError("Rolling window not implemented")
+        if agg.agg_request.kind() == plc.aggregation.Kind.COLLECT_LIST:
+            raise NotImplementedError(
+                "Incorrect handling of empty groups for list collection"
+            )
+        if not plc.rolling.is_valid_rolling_aggregation(agg.dtype, agg.agg_request):
+            raise NotImplementedError(f"Unsupported rolling aggregation {agg}")
+    def do_evaluate(  # noqa: D102
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
+    ) -> Column:
+        if context != ExecutionContext.FRAME:
+            raise RuntimeError(
+                "Rolling aggregation inside groupby/over/rolling"
+            )  # pragma: no cover; translation raises first
+        (agg,) = self.children
+        orderby = df.column_map[self.orderby]
+        # Polars casts integral orderby to int64, but only for calculating window bounds
+        if (
+            plc.traits.is_integral(orderby.obj.type())
+            and orderby.obj.type().id() != plc.TypeId.INT64
+        ):
+            orderby_obj = plc.unary.cast(orderby.obj, plc.DataType(plc.TypeId.INT64))
+        else:
+            orderby_obj = orderby.obj
+        preceding, following = range_window_bounds(
+            self.preceding, self.following, self.closed_window
+        )
+        if orderby.obj.null_count() != 0:
+            raise RuntimeError(
+                f"Index column '{self.orderby}' in rolling may not contain nulls"
+            )
+        if not orderby.check_sorted(
+            order=plc.types.Order.ASCENDING, null_order=plc.types.NullOrder.BEFORE
+        ):
+            raise RuntimeError(
+                f"Index column '{self.orderby}' in rolling is not sorted, please sort first"
+            )
+        (result,) = plc.rolling.grouped_range_rolling_window(
+            plc.Table([]),
+            orderby_obj,
+            plc.types.Order.ASCENDING,
+            plc.types.NullOrder.BEFORE,
+            preceding,
+            following,
+            [to_request(agg, orderby, df)],
+        ).columns()
+        return Column(result)
 class GroupedRollingWindow(Expr):

cudf_polars/dsl/expressions/selection.py CHANGED Viewed

@@ -8,16 +8,12 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
-import pyarrow as pa
 import pylibcudf as plc
 from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from cudf_polars.containers import DataFrame
 __all__ = ["Filter", "Gather"]
@@ -33,16 +29,11 @@ class Gather(Expr):
         self.is_pointwise = False
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         values, indices = (
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
+            child.evaluate(df, context=context) for child in self.children
         )
         lo, hi = plc.reduce.minmax(indices.obj)
         lo = plc.interop.to_arrow(lo).as_py()
@@ -54,9 +45,7 @@ class Gather(Expr):
             bounds_policy = plc.copying.OutOfBoundsPolicy.NULLIFY
             obj = plc.replace.replace_nulls(
                 indices.obj,
-                plc.interop.from_arrow(
-                    pa.scalar(n, type=plc.interop.to_arrow(indices.obj.type()))
-                ),
+                plc.Scalar.from_py(n, dtype=indices.obj.type()),
             )
         else:
             bounds_policy = plc.copying.OutOfBoundsPolicy.DONT_CHECK
@@ -72,20 +61,13 @@ class Filter(Expr):
     def __init__(self, dtype: plc.DataType, values: Expr, indices: Expr):
         self.dtype = dtype
         self.children = (values, indices)
-        self.is_pointwise = True
+        self.is_pointwise = False
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        values, mask = (
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        )
+        values, mask = (child.evaluate(df, context=context) for child in self.children)
         table = plc.stream_compaction.apply_boolean_mask(
             plc.Table([values.obj]), mask.obj
         )

cudf_polars/dsl/expressions/slicing.py CHANGED Viewed

@@ -14,8 +14,6 @@ from cudf_polars.dsl.expressions.base import (
 )
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     import pylibcudf as plc
     from cudf_polars.containers import Column, DataFrame
@@ -41,13 +39,9 @@ class Slice(Expr):
         self.children = (column,)
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         (child,) = self.children
-        column = child.evaluate(df, context=context, mapping=mapping)
+        column = child.evaluate(df, context=context)
         return column.slice((self.offset, self.length))

cudf-polars-cu12 25.4.0__py3-none-any.whl → 25.6.0__py3-none-any.whl

cudf-polars-cu12 25.4.0py3-none-any.whl → 25.6.0py3-none-any.whl