PyPI - cudf-polars-cu12 - Versions diffs - 25.4.0__py3-none-any.whl → 25.6.0__py3-none-any.whl - Mend

cudf-polars-cu12 25.4.0py3-none-any.whl → 25.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

cudf_polars/VERSION +1 -1
cudf_polars/callback.py +35 -50
cudf_polars/containers/column.py +38 -0
cudf_polars/containers/dataframe.py +11 -16
cudf_polars/dsl/expressions/aggregation.py +25 -61
cudf_polars/dsl/expressions/base.py +40 -72
cudf_polars/dsl/expressions/binaryop.py +3 -39
cudf_polars/dsl/expressions/boolean.py +21 -49
cudf_polars/dsl/expressions/datetime.py +59 -17
cudf_polars/dsl/expressions/literal.py +24 -24
cudf_polars/dsl/expressions/rolling.py +110 -9
cudf_polars/dsl/expressions/selection.py +6 -24
cudf_polars/dsl/expressions/slicing.py +2 -8
cudf_polars/dsl/expressions/sorting.py +4 -17
cudf_polars/dsl/expressions/string.py +29 -32
cudf_polars/dsl/expressions/ternary.py +3 -10
cudf_polars/dsl/expressions/unary.py +32 -73
cudf_polars/dsl/ir.py +575 -167
cudf_polars/dsl/nodebase.py +1 -1
cudf_polars/dsl/to_ast.py +5 -3
cudf_polars/dsl/translate.py +272 -152
cudf_polars/dsl/utils/__init__.py +8 -0
cudf_polars/dsl/utils/aggregations.py +292 -0
cudf_polars/dsl/utils/groupby.py +97 -0
cudf_polars/dsl/utils/naming.py +34 -0
cudf_polars/dsl/utils/replace.py +46 -0
cudf_polars/dsl/utils/rolling.py +113 -0
cudf_polars/dsl/utils/windows.py +186 -0
cudf_polars/experimental/base.py +0 -8
cudf_polars/experimental/benchmarks/__init__.py +4 -0
cudf_polars/experimental/benchmarks/pdsh.py +1279 -0
cudf_polars/experimental/dask_registers.py +196 -0
cudf_polars/experimental/distinct.py +174 -0
cudf_polars/experimental/explain.py +127 -0
cudf_polars/experimental/expressions.py +521 -0
cudf_polars/experimental/groupby.py +109 -167
cudf_polars/experimental/io.py +53 -26
cudf_polars/experimental/join.py +59 -24
cudf_polars/experimental/parallel.py +155 -133
cudf_polars/experimental/repartition.py +69 -0
cudf_polars/experimental/scheduler.py +155 -0
cudf_polars/experimental/select.py +92 -7
cudf_polars/experimental/shuffle.py +109 -9
cudf_polars/experimental/sort.py +45 -0
cudf_polars/experimental/spilling.py +151 -0
cudf_polars/experimental/utils.py +100 -0
cudf_polars/testing/asserts.py +146 -6
cudf_polars/testing/io.py +72 -0
cudf_polars/testing/plugin.py +55 -42
cudf_polars/typing/__init__.py +27 -5
cudf_polars/utils/config.py +317 -102
cudf_polars/utils/dtypes.py +8 -1
cudf_polars/utils/timer.py +1 -1
cudf_polars/utils/versions.py +4 -4
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.6.0.dist-info}/METADATA +7 -5
cudf_polars_cu12-25.6.0.dist-info/RECORD +73 -0
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.6.0.dist-info}/WHEEL +1 -1
cudf_polars/experimental/dask_serialize.py +0 -73
cudf_polars_cu12-25.4.0.dist-info/RECORD +0 -55
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.6.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.6.0.dist-info}/top_level.txt +0 -0

cudf_polars/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 25.04.00
1	+ 25.06.00

cudf_polars/callback.py CHANGED Viewed

@@ -13,6 +13,7 @@ from functools import cache, partial
 from typing import TYPE_CHECKING, Literal, overload
 import nvtx
+from typing_extensions import assert_never
 from polars.exceptions import ComputeError, PerformanceWarning
@@ -22,7 +23,6 @@ from rmm._cuda import gpu
 from cudf_polars.dsl.translate import Translator
 from cudf_polars.utils.timer import Timer
-from cudf_polars.utils.versions import POLARS_VERSION_LT_125
 if TYPE_CHECKING:
     from collections.abc import Generator
@@ -32,6 +32,7 @@ if TYPE_CHECKING:
     from cudf_polars.dsl.ir import IR
     from cudf_polars.typing import NodeTraverser
+    from cudf_polars.utils.config import ConfigOptions
 __all__: list[str] = ["execute_with_cudf"]
@@ -44,7 +45,7 @@ _SUPPORTED_PREFETCHES = {
 }
-def _env_get_int(name, default):
+def _env_get_int(name: str, default: int) -> int:
     try:
         return int(os.getenv(name, default))
     except (ValueError, TypeError):  # pragma: no cover
@@ -184,9 +185,8 @@ def _callback(
     n_rows: int | None,
     should_time: Literal[False],
     *,
-    device: int | None,
-    memory_resource: int | None,
-    executor: Literal["pylibcudf", "dask-experimental"] | None,
+    memory_resource: rmm.mr.DeviceMemoryResource | None,
+    config_options: ConfigOptions,
     timer: Timer | None,
 ) -> pl.DataFrame: ...
@@ -199,9 +199,8 @@ def _callback(
     n_rows: int | None,
     should_time: Literal[True],
     *,
-    device: int | None,
-    memory_resource: int | None,
-    executor: Literal["pylibcudf", "dask-experimental"] | None,
+    memory_resource: rmm.mr.DeviceMemoryResource | None,
+    config_options: ConfigOptions,
     timer: Timer | None,
 ) -> tuple[pl.DataFrame, list[tuple[int, int, str]]]: ...
@@ -213,11 +212,10 @@ def _callback(
     n_rows: int | None,
     should_time: bool,  # noqa: FBT001
     *,
-    device: int | None,
-    memory_resource: int | None,
-    executor: Literal["pylibcudf", "dask-experimental"] | None,
+    memory_resource: rmm.mr.DeviceMemoryResource | None,
+    config_options: ConfigOptions,
     timer: Timer | None,
-):
+) -> pl.DataFrame | tuple[pl.DataFrame, list[tuple[int, int, str]]]:
     assert with_columns is None
     assert pyarrow_predicate is None
     assert n_rows is None
@@ -226,21 +224,20 @@ def _callback(
     with (
         nvtx.annotate(message="ExecuteIR", domain="cudf_polars"),
         # Device must be set before memory resource is obtained.
-        set_device(device),
+        set_device(config_options.device),
         set_memory_resource(memory_resource),
     ):
-        if executor is None or executor == "pylibcudf":
+        if config_options.executor.name == "in-memory":
             df = ir.evaluate(cache={}, timer=timer).to_polars()
             if timer is None:
                 return df
             else:
                 return df, timer.timings
-        elif executor == "dask-experimental":
-            from cudf_polars.experimental.parallel import evaluate_dask
+        elif config_options.executor.name == "streaming":
+            from cudf_polars.experimental.parallel import evaluate_streaming
-            return evaluate_dask(ir).to_polars()
-        else:
-            raise ValueError(f"Unknown executor '{executor}'")
+            return evaluate_streaming(ir, config_options).to_polars()
+        assert_never(f"Unknown executor '{config_options.executor}'")
 def execute_with_cudf(
@@ -259,7 +256,7 @@ def execute_with_cudf(
         profiling should occur).
     config
-        GPUEngine configuration object
+        GPUEngine object. Configuration is available as ``engine.config``.
     Raises
     ------
@@ -277,16 +274,22 @@ def execute_with_cudf(
     else:
         start = time.monotonic_ns()
         timer = Timer(start - duration_since_start)
-    device = config.device
     memory_resource = config.memory_resource
-    raise_on_fail = config.config.get("raise_on_fail", False)
-    executor = config.config.get("executor", None)
     with nvtx.annotate(message="ConvertIR", domain="cudf_polars"):
         translator = Translator(nt, config)
         ir = translator.translate_ir()
         ir_translation_errors = translator.errors
         if timer is not None:
             timer.store(start, time.monotonic_ns(), "gpu-ir-translation")
+        if (
+            memory_resource is None
+            and translator.config_options.executor.name == "streaming"
+            and translator.config_options.executor.scheduler == "distributed"
+        ):  # pragma: no cover; Requires distributed cluster
+            memory_resource = rmm.mr.get_current_device_resource()
         if len(ir_translation_errors):
             # TODO: Display these errors in user-friendly way.
             # tracked in https://github.com/rapidsai/cudf/issues/17051
@@ -301,33 +304,15 @@ def execute_with_cudf(
             exception = NotImplementedError(error_message, unique_errors)
             if bool(int(os.environ.get("POLARS_VERBOSE", 0))):
                 warnings.warn(error_message, PerformanceWarning, stacklevel=2)
-            if raise_on_fail:
+            if translator.config_options.raise_on_fail:
                 raise exception
         else:
-            if POLARS_VERSION_LT_125:  # pragma: no cover
-                nt.set_udf(
-                    partial(
-                        _callback,
-                        ir,
-                        should_time=False,
-                        device=device,
-                        memory_resource=memory_resource,
-                        executor=executor,
-                        timer=None,
-                    )
+            nt.set_udf(
+                partial(
+                    _callback,
+                    ir,
+                    memory_resource=memory_resource,
+                    config_options=translator.config_options,
+                    timer=timer,
                 )
-            else:
-                nt.set_udf(
-                    partial(
-                        _callback,
-                        ir,
-                        device=device,
-                        memory_resource=memory_resource,
-                        executor=executor,
-                        timer=timer,
-                    )
-                )
-if POLARS_VERSION_LT_125:  # pragma: no cover
-    execute_with_cudf = partial(execute_with_cudf, duration_since_start=None)
+            )

cudf_polars/containers/column.py CHANGED Viewed

@@ -177,6 +177,44 @@ class Column:
             null_order=like.null_order,
         )
+    def check_sorted(
+        self,
+        *,
+        order: plc.types.Order,
+        null_order: plc.types.NullOrder,
+    ) -> bool:
+        """
+        Check if the column is sorted.
+        Parameters
+        ----------
+        order
+            The requested sort order.
+        null_order
+            Where nulls sort to.
+        Returns
+        -------
+        True if the column is sorted, false otherwise.
+        Notes
+        -----
+        If the sortedness flag is not set, this launches a kernel to
+        check sortedness.
+        """
+        if self.obj.size() <= 1 or self.obj.size() == self.obj.null_count():
+            return True
+        if self.is_sorted == plc.types.Sorted.YES:
+            return self.order == order and (
+                self.obj.null_count() == 0 or self.null_order == null_order
+            )
+        if plc.sorting.is_sorted(plc.Table([self.obj]), [order], [null_order]):
+            self.sorted = plc.types.Sorted.YES
+            self.order = order
+            self.null_order = null_order
+            return True
+        return False
     def astype(self, dtype: plc.DataType) -> Column:
         """
         Cast the column to as the requested dtype.

cudf_polars/containers/dataframe.py CHANGED Viewed

@@ -8,19 +8,17 @@ from __future__ import annotations
 from functools import cached_property
 from typing import TYPE_CHECKING, cast
-import pyarrow as pa
 import polars as pl
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.utils import conversion, dtypes
+from cudf_polars.utils import conversion
 if TYPE_CHECKING:
     from collections.abc import Iterable, Mapping, Sequence, Set
-    from typing_extensions import Self
+    from typing_extensions import Any, Self
     from cudf_polars.typing import ColumnOptions, DataFrameHeader, Slice
@@ -108,17 +106,12 @@ class DataFrame:
         -------
         New dataframe representing the input.
         """
-        table = df.to_arrow()
-        schema = table.schema
-        for i, field in enumerate(schema):
-            schema = schema.set(
-                i, pa.field(field.name, dtypes.downcast_arrow_lists(field.type))
-            )
-        # No-op if the schema is unchanged.
-        d_table = plc.interop.from_arrow(table.cast(schema))
+        plc_table = plc.Table(df)
         return cls(
-            Column(column).copy_metadata(h_col)
-            for column, h_col in zip(d_table.columns(), df.iter_columns(), strict=True)
+            Column(d_col, name=name).copy_metadata(h_col)
+            for d_col, h_col, name in zip(
+                plc_table.columns(), df.iter_columns(), df.columns, strict=True
+            )
         )
     @classmethod
@@ -246,7 +239,9 @@ class DataFrame:
             for c, other in zip(self.columns, like.columns, strict=True)
         )
-    def with_columns(self, columns: Iterable[Column], *, replace_only=False) -> Self:
+    def with_columns(
+        self, columns: Iterable[Column], *, replace_only: bool = False
+    ) -> Self:
         """
         Return a new dataframe with extra columns.
@@ -275,7 +270,7 @@ class DataFrame:
         """Drop columns by name."""
         return type(self)(column for column in self.columns if column.name not in names)
-    def select(self, names: Sequence[str]) -> Self:
+    def select(self, names: Sequence[str] | Mapping[str, Any]) -> Self:
         """Select columns by name returning DataFrame."""
         try:
             return type(self)(self.column_map[name] for name in names)

cudf_polars/dsl/expressions/aggregation.py CHANGED Viewed

@@ -9,22 +9,13 @@ from __future__ import annotations
 from functools import partial
 from typing import TYPE_CHECKING, Any, ClassVar
-import pyarrow as pa
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.dsl.expressions.base import (
-    AggInfo,
-    ExecutionContext,
-    Expr,
-)
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 from cudf_polars.dsl.expressions.literal import Literal
-from cudf_polars.dsl.expressions.unary import UnaryFunction
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from cudf_polars.containers import DataFrame
 __all__ = ["Agg"]
@@ -75,11 +66,15 @@ class Agg(Expr):
                 else plc.types.NullPolicy.INCLUDE
             )
         elif name == "quantile":
-            _, quantile = self.children
+            child, quantile = self.children
             if not isinstance(quantile, Literal):
                 raise NotImplementedError("Only support literal quantile values")
+            if options == "equiprobable":
+                raise NotImplementedError("Quantile with equiprobable interpolation")
+            if plc.traits.is_duration(child.dtype):
+                raise NotImplementedError("Quantile with duration data type")
             req = plc.aggregation.quantile(
-                quantiles=[quantile.value.as_py()], interp=Agg.interp_mapping[options]
+                quantiles=[quantile.value], interp=Agg.interp_mapping[options]
             )
         else:
             raise NotImplementedError(
@@ -91,7 +86,9 @@ class Agg(Expr):
             op = partial(self._reduce, request=req)
         elif name in {"min", "max"}:
             op = partial(op, propagate_nans=options)
-        elif name in {"count", "sum", "first", "last"}:
+        elif name == "count":
+            op = partial(op, include_nulls=options)
+        elif name in {"sum", "first", "last"}:
             pass
         else:
             raise NotImplementedError(
@@ -124,38 +121,19 @@ class Agg(Expr):
         "linear": plc.types.Interpolation.LINEAR,
     }
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        if depth >= 1:
-            raise NotImplementedError(
-                "Nested aggregations in groupby"
-            )  # pragma: no cover; check_agg trips first
-        if (isminmax := self.name in {"min", "max"}) and self.options:
-            raise NotImplementedError("Nan propagation in groupby for min/max")
-        (child,) = self.children
-        ((expr, _, _),) = child.collect_agg(depth=depth + 1).requests
-        request = self.request
-        # These are handled specially here because we don't set up the
-        # request for the whole-frame agg because we can avoid a
-        # reduce for these.
+    @property
+    def agg_request(self) -> plc.aggregation.Aggregation:  # noqa: D102
         if self.name == "first":
-            request = plc.aggregation.nth_element(
+            return plc.aggregation.nth_element(
                 0, null_handling=plc.types.NullPolicy.INCLUDE
             )
         elif self.name == "last":
-            request = plc.aggregation.nth_element(
+            return plc.aggregation.nth_element(
                 -1, null_handling=plc.types.NullPolicy.INCLUDE
             )
-        if request is None:
-            raise NotImplementedError(
-                f"Aggregation {self.name} in groupby"
-            )  # pragma: no cover; __init__ trips first
-        if isminmax and plc.traits.is_floating_point(self.dtype):
-            assert expr is not None
-            # Ignore nans in these groupby aggs, do this by masking
-            # nans in the input
-            expr = UnaryFunction(self.dtype, "mask_nans", (), expr)
-        return AggInfo([(expr, request, self)])
+        else:
+            assert self.request is not None, "Init should have raised"
+            return self.request
     def _reduce(
         self, column: Column, *, request: plc.aggregation.Aggregation
@@ -167,15 +145,11 @@ class Agg(Expr):
             )
         )
-    def _count(self, column: Column) -> Column:
+    def _count(self, column: Column, *, include_nulls: bool) -> Column:
+        null_count = column.null_count if not include_nulls else 0
         return Column(
             plc.Column.from_scalar(
-                plc.interop.from_arrow(
-                    pa.scalar(
-                        column.size - column.null_count,
-                        type=plc.interop.to_arrow(self.dtype),
-                    ),
-                ),
+                plc.Scalar.from_py(column.size - null_count, self.dtype),
                 1,
             )
         )
@@ -184,9 +158,7 @@ class Agg(Expr):
         if column.size == 0 or column.null_count == column.size:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(0, type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(0, self.dtype),
                     1,
                 )
             )
@@ -196,9 +168,7 @@ class Agg(Expr):
         if propagate_nans and column.nan_count > 0:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(float("nan"), type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(float("nan"), self.dtype),
                     1,
                 )
             )
@@ -210,9 +180,7 @@ class Agg(Expr):
         if propagate_nans and column.nan_count > 0:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(float("nan"), type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(float("nan"), self.dtype),
                     1,
                 )
             )
@@ -228,11 +196,7 @@ class Agg(Expr):
         return Column(plc.copying.slice(column.obj, [n - 1, n])[0])
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         if context is not ExecutionContext.FRAME:
@@ -243,4 +207,4 @@ class Agg(Expr):
         # Aggregations like quantiles may have additional children that were
         # preprocessed into pylibcudf requests.
         child = self.children[0]
-        return self.op(child.evaluate(df, context=context, mapping=mapping))
+        return self.op(child.evaluate(df, context=context))

cudf_polars/dsl/expressions/base.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -16,7 +16,7 @@ from cudf_polars.containers import Column
 from cudf_polars.dsl.nodebase import Node
 if TYPE_CHECKING:
-    from collections.abc import Mapping
+    from typing_extensions import Self
     from cudf_polars.containers import Column, DataFrame
@@ -46,11 +46,7 @@ class Expr(Node["Expr"]):
     """Names of non-child data (not Exprs) for reconstruction."""
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """
         Evaluate this expression given a dataframe for context.
@@ -61,15 +57,10 @@ class Expr(Node["Expr"]):
             DataFrame that will provide columns.
         context
             What context are we performing this evaluation in?
-        mapping
-            Substitution mapping from expressions to Columns, used to
-            override the evaluation of a given expression if we're
-            performing a simple rewritten evaluation.
         Notes
         -----
-        Do not call this function directly, but rather
-        :meth:`evaluate` which handles the mapping lookups.
+        Do not call this function directly, but rather :meth:`evaluate`.
         Returns
         -------
@@ -87,11 +78,7 @@ class Expr(Node["Expr"]):
         )  # pragma: no cover; translation of unimplemented nodes trips first
     def evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """
         Evaluate this expression given a dataframe for context.
@@ -102,10 +89,6 @@ class Expr(Node["Expr"]):
             DataFrame that will provide columns.
         context
             What context are we performing this evaluation in?
-        mapping
-            Substitution mapping from expressions to Columns, used to
-            override the evaluation of a given expression if we're
-            performing a simple rewritten evaluation.
         Notes
         -----
@@ -124,37 +107,28 @@ class Expr(Node["Expr"]):
             are returned during translation to the IR, but for now we
             are not perfect.
         """
-        if mapping is None:
-            return self.do_evaluate(df, context=context, mapping=mapping)
-        try:
-            return mapping[self]
-        except KeyError:
-            return self.do_evaluate(df, context=context, mapping=mapping)
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """
-        Collect information about aggregations in groupbys.
+        return self.do_evaluate(df, context=context)
-        Parameters
-        ----------
-        depth
-            The depth of aggregating (reduction or sampling)
-            expressions we are currently at.
+    @property
+    def agg_request(self) -> plc.aggregation.Aggregation:
+        """
+        The aggregation for this expression in a grouped aggregation.
         Returns
         -------
-        Aggregation info describing the expression to aggregate in the
-        groupby.
+        Aggregation request. Default is to collect the expression.
+        Notes
+        -----
+        This presumes that the IR translation has decomposed groupby
+        reductions only into cases we can handle.
         Raises
         ------
         NotImplementedError
-            If we can't currently perform the aggregation request, for
-            example nested aggregations like ``a.max().min()``.
+            If requesting an aggregation from an unexpected expression.
         """
-        raise NotImplementedError(
-            f"Collecting aggregation info for {type(self).__name__}"
-        )  # pragma: no cover; check_agg trips first
+        return plc.aggregation.collect_list()
 class ErrorExpr(Expr):
@@ -166,7 +140,7 @@ class ErrorExpr(Expr):
         self.dtype = dtype
         self.error = error
         self.children = ()
-        self.is_pointwise = True
+        self.is_pointwise = False
 class NamedExpr:
@@ -202,11 +176,7 @@ class NamedExpr:
         return not self.__eq__(other)
     def evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """
         Evaluate this expression given a dataframe for context.
@@ -217,8 +187,6 @@ class NamedExpr:
             DataFrame providing context
         context
             Execution context
-        mapping
-            Substitution mapping
         Returns
         -------
@@ -229,13 +197,25 @@ class NamedExpr:
         :meth:`Expr.evaluate` for details, this function just adds the
         name to a column produced from an expression.
         """
-        return self.value.evaluate(df, context=context, mapping=mapping).rename(
-            self.name
-        )
+        return self.value.evaluate(df, context=context).rename(self.name)
+    def reconstruct(self, expr: Expr) -> Self:
+        """
+        Rebuild with a new `Expr` value.
+        Parameters
+        ----------
+        expr
+            New `Expr` value
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        return self.value.collect_agg(depth=depth)
+        Returns
+        -------
+        New `NamedExpr` with `expr` as the underlying expression.
+        The name of the original `NamedExpr` is preserved.
+        """
+        if expr is self.value:
+            return self
+        return type(self)(self.name, expr)
 class Col(Expr):
@@ -250,21 +230,13 @@ class Col(Expr):
         self.children = ()
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         # Deliberately remove the name here so that we guarantee
         # evaluation of the IR produces names.
         return df.column_map[self.name].rename(None)
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        return AggInfo([(self, plc.aggregation.collect_list(), self)])
 class ColRef(Expr):
     __slots__ = ("index", "table_ref")
@@ -288,11 +260,7 @@ class ColRef(Expr):
         self.children = (column,)
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         raise NotImplementedError(

cudf-polars-cu12 25.4.0__py3-none-any.whl → 25.6.0__py3-none-any.whl

cudf-polars-cu12 25.4.0py3-none-any.whl → 25.6.0py3-none-any.whl