PyPI - cudf-polars-cu12 - Versions diffs - 25.2.2__py3-none-any.whl → 25.6.0__py3-none-any.whl - Mend

cudf-polars-cu12 25.2.2py3-none-any.whl → 25.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

cudf_polars/VERSION +1 -1
cudf_polars/callback.py +82 -65
cudf_polars/containers/column.py +138 -7
cudf_polars/containers/dataframe.py +26 -39
cudf_polars/dsl/expr.py +3 -1
cudf_polars/dsl/expressions/aggregation.py +27 -63
cudf_polars/dsl/expressions/base.py +40 -72
cudf_polars/dsl/expressions/binaryop.py +5 -41
cudf_polars/dsl/expressions/boolean.py +25 -53
cudf_polars/dsl/expressions/datetime.py +97 -17
cudf_polars/dsl/expressions/literal.py +27 -33
cudf_polars/dsl/expressions/rolling.py +110 -9
cudf_polars/dsl/expressions/selection.py +8 -26
cudf_polars/dsl/expressions/slicing.py +47 -0
cudf_polars/dsl/expressions/sorting.py +5 -18
cudf_polars/dsl/expressions/string.py +33 -36
cudf_polars/dsl/expressions/ternary.py +3 -10
cudf_polars/dsl/expressions/unary.py +35 -75
cudf_polars/dsl/ir.py +749 -212
cudf_polars/dsl/nodebase.py +8 -1
cudf_polars/dsl/to_ast.py +5 -3
cudf_polars/dsl/translate.py +319 -171
cudf_polars/dsl/utils/__init__.py +8 -0
cudf_polars/dsl/utils/aggregations.py +292 -0
cudf_polars/dsl/utils/groupby.py +97 -0
cudf_polars/dsl/utils/naming.py +34 -0
cudf_polars/dsl/utils/replace.py +46 -0
cudf_polars/dsl/utils/rolling.py +113 -0
cudf_polars/dsl/utils/windows.py +186 -0
cudf_polars/experimental/base.py +17 -19
cudf_polars/experimental/benchmarks/__init__.py +4 -0
cudf_polars/experimental/benchmarks/pdsh.py +1279 -0
cudf_polars/experimental/dask_registers.py +196 -0
cudf_polars/experimental/distinct.py +174 -0
cudf_polars/experimental/explain.py +127 -0
cudf_polars/experimental/expressions.py +521 -0
cudf_polars/experimental/groupby.py +288 -0
cudf_polars/experimental/io.py +58 -29
cudf_polars/experimental/join.py +353 -0
cudf_polars/experimental/parallel.py +166 -93
cudf_polars/experimental/repartition.py +69 -0
cudf_polars/experimental/scheduler.py +155 -0
cudf_polars/experimental/select.py +92 -7
cudf_polars/experimental/shuffle.py +294 -0
cudf_polars/experimental/sort.py +45 -0
cudf_polars/experimental/spilling.py +151 -0
cudf_polars/experimental/utils.py +100 -0
cudf_polars/testing/asserts.py +146 -6
cudf_polars/testing/io.py +72 -0
cudf_polars/testing/plugin.py +78 -76
cudf_polars/typing/__init__.py +59 -6
cudf_polars/utils/config.py +353 -0
cudf_polars/utils/conversion.py +40 -0
cudf_polars/utils/dtypes.py +22 -5
cudf_polars/utils/timer.py +39 -0
cudf_polars/utils/versions.py +5 -4
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/METADATA +10 -7
cudf_polars_cu12-25.6.0.dist-info/RECORD +73 -0
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/WHEEL +1 -1
cudf_polars/experimental/dask_serialize.py +0 -59
cudf_polars_cu12-25.2.2.dist-info/RECORD +0 -48
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info/licenses}/LICENSE +0 -0
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/top_level.txt +0 -0

cudf_polars/dsl/expressions/datetime.py CHANGED Viewed

@@ -17,8 +17,6 @@ from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from typing_extensions import Self
     from polars.polars import _expr_nodes as pl_expr
@@ -104,6 +102,18 @@ class TemporalFunction(Expr):
         Name.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND,
     }
+    _valid_ops: ClassVar[set[Name]] = {
+        *_COMPONENT_MAP.keys(),
+        Name.IsLeapYear,
+        Name.OrdinalDay,
+        Name.ToString,
+        Name.Week,
+        Name.IsoYear,
+        Name.MonthStart,
+        Name.MonthEnd,
+        Name.CastTimeUnit,
+    }
     def __init__(
         self,
         dtype: plc.DataType,
@@ -116,22 +126,92 @@ class TemporalFunction(Expr):
         self.name = name
         self.children = children
         self.is_pointwise = True
-        if self.name not in self._COMPONENT_MAP:
+        if self.name not in self._valid_ops:
             raise NotImplementedError(f"Temporal function {self.name}")
+        if self.name is TemporalFunction.Name.ToString and plc.traits.is_duration(
+            self.children[0].dtype
+        ):
+            raise NotImplementedError("ToString is not supported on duration types")
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        columns = [
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        ]
+        columns = [child.evaluate(df, context=context) for child in self.children]
         (column,) = columns
+        if self.name is TemporalFunction.Name.CastTimeUnit:
+            (unit,) = self.options
+            if plc.traits.is_timestamp(column.obj.type()):
+                dtype = plc.interop.from_arrow(pa.timestamp(unit))
+            elif plc.traits.is_duration(column.obj.type()):
+                dtype = plc.interop.from_arrow(pa.duration(unit))
+            result = plc.unary.cast(column.obj, dtype)
+            return Column(result)
+        if self.name == TemporalFunction.Name.ToString:
+            return Column(
+                plc.strings.convert.convert_datetime.from_timestamps(
+                    column.obj,
+                    self.options[0],
+                    plc.Column.from_iterable_of_py(
+                        [], dtype=plc.DataType(plc.TypeId.STRING)
+                    ),
+                )
+            )
+        if self.name is TemporalFunction.Name.Week:
+            result = plc.strings.convert.convert_integers.to_integers(
+                plc.strings.convert.convert_datetime.from_timestamps(
+                    column.obj,
+                    format="%V",
+                    input_strings_names=plc.Column.from_iterable_of_py(
+                        [], dtype=plc.DataType(plc.TypeId.STRING)
+                    ),
+                ),
+                plc.types.DataType(plc.types.TypeId.INT8),
+            )
+            return Column(result)
+        if self.name is TemporalFunction.Name.IsoYear:
+            result = plc.strings.convert.convert_integers.to_integers(
+                plc.strings.convert.convert_datetime.from_timestamps(
+                    column.obj,
+                    format="%G",
+                    input_strings_names=plc.Column.from_iterable_of_py(
+                        [], dtype=plc.DataType(plc.TypeId.STRING)
+                    ),
+                ),
+                plc.types.DataType(plc.types.TypeId.INT32),
+            )
+            return Column(result)
+        if self.name is TemporalFunction.Name.MonthStart:
+            ends = plc.datetime.last_day_of_month(column.obj)
+            days_to_subtract = plc.datetime.days_in_month(column.obj)
+            # must subtract 1 to avoid rolling over to the previous month
+            days_to_subtract = plc.binaryop.binary_operation(
+                days_to_subtract,
+                plc.Scalar.from_py(1, plc.DataType(plc.TypeId.INT32)),
+                plc.binaryop.BinaryOperator.SUB,
+                plc.DataType(plc.TypeId.DURATION_DAYS),
+            )
+            result = plc.binaryop.binary_operation(
+                ends,
+                days_to_subtract,
+                plc.binaryop.BinaryOperator.SUB,
+                column.obj.type(),
+            )
+            return Column(result)
+        if self.name is TemporalFunction.Name.MonthEnd:
+            return Column(
+                plc.unary.cast(
+                    plc.datetime.last_day_of_month(column.obj), column.obj.type()
+                )
+            )
+        if self.name is TemporalFunction.Name.IsLeapYear:
+            return Column(
+                plc.datetime.is_leap_year(column.obj),
+            )
+        if self.name is TemporalFunction.Name.OrdinalDay:
+            return Column(plc.datetime.day_of_year(column.obj))
         if self.name is TemporalFunction.Name.Microsecond:
             millis = plc.datetime.extract_datetime_component(
                 column.obj, plc.datetime.DatetimeComponent.MILLISECOND
@@ -141,7 +221,7 @@ class TemporalFunction(Expr):
             )
             millis_as_micros = plc.binaryop.binary_operation(
                 millis,
-                plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())),
+                plc.Scalar.from_py(1_000, plc.DataType(plc.TypeId.INT32)),
                 plc.binaryop.BinaryOperator.MUL,
                 plc.DataType(plc.TypeId.INT32),
             )
@@ -164,15 +244,15 @@ class TemporalFunction(Expr):
             )
             millis_as_nanos = plc.binaryop.binary_operation(
                 millis,
-                plc.interop.from_arrow(pa.scalar(1_000_000, type=pa.int32())),
+                plc.Scalar.from_py(1_000_000, plc.DataType(plc.TypeId.INT32)),
                 plc.binaryop.BinaryOperator.MUL,
-                plc.types.DataType(plc.types.TypeId.INT32),
+                plc.DataType(plc.TypeId.INT32),
             )
             micros_as_nanos = plc.binaryop.binary_operation(
                 micros,
-                plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())),
+                plc.Scalar.from_py(1_000, plc.DataType(plc.TypeId.INT32)),
                 plc.binaryop.BinaryOperator.MUL,
-                plc.types.DataType(plc.types.TypeId.INT32),
+                plc.DataType(plc.TypeId.INT32),
             )
             total_nanos = plc.binaryop.binary_operation(
                 nanos,

cudf_polars/dsl/expressions/literal.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -6,23 +6,18 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Any
-import pyarrow as pa
+from typing import TYPE_CHECKING, Any, NoReturn
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
-from cudf_polars.utils import dtypes
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 if TYPE_CHECKING:
-    from collections.abc import Hashable, Mapping
+    from collections.abc import Hashable
     import pyarrow as pa
-    import polars as pl
     from cudf_polars.containers import DataFrame
 __all__ = ["Literal", "LiteralColumn"]
@@ -31,29 +26,31 @@ __all__ = ["Literal", "LiteralColumn"]
 class Literal(Expr):
     __slots__ = ("value",)
     _non_child = ("dtype", "value")
-    value: pa.Scalar[Any]
+    value: Any  # Python scalar
-    def __init__(self, dtype: plc.DataType, value: pa.Scalar[Any]) -> None:
+    def __init__(self, dtype: plc.DataType, value: Any) -> None:
+        if value is None and dtype.id() == plc.TypeId.EMPTY:
+            # TypeId.EMPTY not supported by libcudf
+            # cuDF Python also maps EMPTY to INT8
+            dtype = plc.DataType(plc.TypeId.INT8)
         self.dtype = dtype
-        assert value.type == plc.interop.to_arrow(dtype)
         self.value = value
         self.children = ()
         self.is_pointwise = True
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        # datatype of pyarrow scalar is correct by construction.
-        return Column(plc.Column.from_scalar(plc.interop.from_arrow(self.value), 1))
+        return Column(
+            plc.Column.from_scalar(plc.Scalar.from_py(self.value, self.dtype), 1)
+        )
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        return AggInfo([])
+    @property
+    def agg_request(self) -> NoReturn:  # noqa: D102
+        raise NotImplementedError(
+            "Not expecting to require agg request of literal"
+        )  # pragma: no cover
 class LiteralColumn(Expr):
@@ -61,10 +58,9 @@ class LiteralColumn(Expr):
     _non_child = ("dtype", "value")
     value: pa.Array[Any]
-    def __init__(self, dtype: plc.DataType, value: pl.Series) -> None:
+    def __init__(self, dtype: plc.DataType, value: pa.Array) -> None:
         self.dtype = dtype
-        data = value.to_arrow()
-        self.value = data.cast(dtypes.downcast_arrow_lists(data.type))
+        self.value = value
         self.children = ()
         self.is_pointwise = True
@@ -76,16 +72,14 @@ class LiteralColumn(Expr):
         return (type(self), self.dtype, id(self.value))
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         # datatype of pyarrow array is correct by construction.
         return Column(plc.interop.from_arrow(self.value))
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        return AggInfo([])
+    @property
+    def agg_request(self) -> NoReturn:  # noqa: D102
+        raise NotImplementedError(
+            "Not expecting to require agg request of literal"
+        )  # pragma: no cover

cudf_polars/dsl/expressions/rolling.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -8,24 +8,125 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Any
-from cudf_polars.dsl.expressions.base import Expr
+import pylibcudf as plc
+from cudf_polars.containers import Column
+from cudf_polars.dsl import expr
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
+from cudf_polars.dsl.utils.windows import range_window_bounds
 if TYPE_CHECKING:
-    import pylibcudf as plc
+    import pyarrow as pa
+    from cudf_polars.containers import DataFrame
+    from cudf_polars.typing import ClosedInterval
+__all__ = ["GroupedRollingWindow", "RollingWindow", "to_request"]
+def to_request(
+    value: expr.Expr, orderby: Column, df: DataFrame
+) -> plc.rolling.RollingRequest:
+    """
+    Produce a rolling request for evaluation with pylibcudf.
-__all__ = ["GroupedRollingWindow", "RollingWindow"]
+    Parameters
+    ----------
+    value
+        The expression to perform the rolling aggregation on.
+    orderby
+        Orderby column, used as input to the request when the aggregation is Len.
+    df
+        DataFrame used to evaluate the inputs to the aggregation.
+    """
+    min_periods = 1
+    if isinstance(value, expr.Len):
+        # A count aggregation, we need a column so use the orderby column
+        col = orderby
+    elif isinstance(value, expr.Agg):
+        child = value.children[0]
+        col = child.evaluate(df, context=ExecutionContext.ROLLING)
+        if value.name == "var":
+            # Polars variance produces null if nvalues <= ddof
+            # libcudf produces NaN. However, we can get the polars
+            # behaviour by setting the minimum window size to ddof +
+            # 1.
+            min_periods = value.options + 1
+    else:
+        col = value.evaluate(
+            df, context=ExecutionContext.ROLLING
+        )  # pragma: no cover; raise before we get here because we
+        # don't do correct handling of empty groups
+    return plc.rolling.RollingRequest(col.obj, min_periods, value.agg_request)
 class RollingWindow(Expr):
-    __slots__ = ("options",)
-    _non_child = ("dtype", "options")
+    __slots__ = ("closed_window", "following", "orderby", "preceding")
+    _non_child = ("dtype", "preceding", "following", "closed_window", "orderby")
-    def __init__(self, dtype: plc.DataType, options: Any, agg: Expr) -> None:
+    def __init__(
+        self,
+        dtype: plc.DataType,
+        preceding: pa.Scalar,
+        following: pa.Scalar,
+        closed_window: ClosedInterval,
+        orderby: str,
+        agg: Expr,
+    ) -> None:
         self.dtype = dtype
-        self.options = options
+        self.preceding = preceding
+        self.following = following
+        self.closed_window = closed_window
+        self.orderby = orderby
         self.children = (agg,)
         self.is_pointwise = False
-        raise NotImplementedError("Rolling window not implemented")
+        if agg.agg_request.kind() == plc.aggregation.Kind.COLLECT_LIST:
+            raise NotImplementedError(
+                "Incorrect handling of empty groups for list collection"
+            )
+        if not plc.rolling.is_valid_rolling_aggregation(agg.dtype, agg.agg_request):
+            raise NotImplementedError(f"Unsupported rolling aggregation {agg}")
+    def do_evaluate(  # noqa: D102
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
+    ) -> Column:
+        if context != ExecutionContext.FRAME:
+            raise RuntimeError(
+                "Rolling aggregation inside groupby/over/rolling"
+            )  # pragma: no cover; translation raises first
+        (agg,) = self.children
+        orderby = df.column_map[self.orderby]
+        # Polars casts integral orderby to int64, but only for calculating window bounds
+        if (
+            plc.traits.is_integral(orderby.obj.type())
+            and orderby.obj.type().id() != plc.TypeId.INT64
+        ):
+            orderby_obj = plc.unary.cast(orderby.obj, plc.DataType(plc.TypeId.INT64))
+        else:
+            orderby_obj = orderby.obj
+        preceding, following = range_window_bounds(
+            self.preceding, self.following, self.closed_window
+        )
+        if orderby.obj.null_count() != 0:
+            raise RuntimeError(
+                f"Index column '{self.orderby}' in rolling may not contain nulls"
+            )
+        if not orderby.check_sorted(
+            order=plc.types.Order.ASCENDING, null_order=plc.types.NullOrder.BEFORE
+        ):
+            raise RuntimeError(
+                f"Index column '{self.orderby}' in rolling is not sorted, please sort first"
+            )
+        (result,) = plc.rolling.grouped_range_rolling_window(
+            plc.Table([]),
+            orderby_obj,
+            plc.types.Order.ASCENDING,
+            plc.types.NullOrder.BEFORE,
+            preceding,
+            following,
+            [to_request(agg, orderby, df)],
+        ).columns()
+        return Column(result)
 class GroupedRollingWindow(Expr):

cudf_polars/dsl/expressions/selection.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -8,16 +8,12 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
-import pyarrow as pa
 import pylibcudf as plc
 from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from cudf_polars.containers import DataFrame
 __all__ = ["Filter", "Gather"]
@@ -33,16 +29,11 @@ class Gather(Expr):
         self.is_pointwise = False
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         values, indices = (
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
+            child.evaluate(df, context=context) for child in self.children
         )
         lo, hi = plc.reduce.minmax(indices.obj)
         lo = plc.interop.to_arrow(lo).as_py()
@@ -50,13 +41,11 @@ class Gather(Expr):
         n = df.num_rows
         if hi >= n or lo < -n:
             raise ValueError("gather indices are out of bounds")
-        if indices.obj.null_count():
+        if indices.null_count:
             bounds_policy = plc.copying.OutOfBoundsPolicy.NULLIFY
             obj = plc.replace.replace_nulls(
                 indices.obj,
-                plc.interop.from_arrow(
-                    pa.scalar(n, type=plc.interop.to_arrow(indices.obj.type()))
-                ),
+                plc.Scalar.from_py(n, dtype=indices.obj.type()),
             )
         else:
             bounds_policy = plc.copying.OutOfBoundsPolicy.DONT_CHECK
@@ -72,20 +61,13 @@ class Filter(Expr):
     def __init__(self, dtype: plc.DataType, values: Expr, indices: Expr):
         self.dtype = dtype
         self.children = (values, indices)
-        self.is_pointwise = True
+        self.is_pointwise = False
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        values, mask = (
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        )
+        values, mask = (child.evaluate(df, context=context) for child in self.children)
         table = plc.stream_compaction.apply_boolean_mask(
             plc.Table([values.obj]), mask.obj
         )

cudf_polars/dsl/expressions/slicing.py ADDED Viewed

@@ -0,0 +1,47 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+# TODO: remove need for this
+# ruff: noqa: D101
+"""Slicing DSL nodes."""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from cudf_polars.dsl.expressions.base import (
+    ExecutionContext,
+    Expr,
+)
+if TYPE_CHECKING:
+    import pylibcudf as plc
+    from cudf_polars.containers import Column, DataFrame
+__all__ = ["Slice"]
+class Slice(Expr):
+    __slots__ = ("length", "offset")
+    _non_child = ("dtype", "offset", "length")
+    def __init__(
+        self,
+        dtype: plc.DataType,
+        offset: int,
+        length: int,
+        column: Expr,
+    ) -> None:
+        self.dtype = dtype
+        self.offset = offset
+        self.length = length
+        self.children = (column,)
+    def do_evaluate(
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
+    ) -> Column:
+        """Evaluate this expression given a dataframe for context."""
+        (child,) = self.children
+        column = child.evaluate(df, context=context)
+        return column.slice((self.offset, self.length))

cudf_polars/dsl/expressions/sorting.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -15,8 +15,6 @@ from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 from cudf_polars.utils import sorting
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from cudf_polars.containers import DataFrame
 __all__ = ["Sort", "SortBy"]
@@ -35,15 +33,11 @@ class Sort(Expr):
         self.is_pointwise = False
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         (child,) = self.children
-        column = child.evaluate(df, context=context, mapping=mapping)
+        column = child.evaluate(df, context=context)
         (stable, nulls_last, descending) = self.options
         order, null_order = sorting.sort_order(
             [descending], nulls_last=[nulls_last], num_keys=1
@@ -75,17 +69,10 @@ class SortBy(Expr):
         self.is_pointwise = False
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        column, *by = (
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        )
+        column, *by = (child.evaluate(df, context=context) for child in self.children)
         (stable, nulls_last, descending) = self.options
         order, null_order = sorting.sort_order(
             descending, nulls_last=nulls_last, num_keys=len(by)

cudf-polars-cu12 25.2.2__py3-none-any.whl → 25.6.0__py3-none-any.whl

cudf-polars-cu12 25.2.2py3-none-any.whl → 25.6.0py3-none-any.whl