PyPI - cudf-polars-cu12 - Versions diffs - 25.4.0__py3-none-any.whl → 25.8.0__py3-none-any.whl - Mend

cudf-polars-cu12 25.4.0py3-none-any.whl → 25.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

cudf_polars/VERSION +1 -1
cudf_polars/callback.py +55 -61
cudf_polars/containers/__init__.py +4 -2
cudf_polars/containers/column.py +123 -40
cudf_polars/containers/dataframe.py +70 -35
cudf_polars/containers/datatype.py +135 -0
cudf_polars/dsl/expr.py +2 -0
cudf_polars/dsl/expressions/aggregation.py +51 -71
cudf_polars/dsl/expressions/base.py +45 -77
cudf_polars/dsl/expressions/binaryop.py +29 -44
cudf_polars/dsl/expressions/boolean.py +64 -71
cudf_polars/dsl/expressions/datetime.py +70 -34
cudf_polars/dsl/expressions/literal.py +45 -33
cudf_polars/dsl/expressions/rolling.py +133 -10
cudf_polars/dsl/expressions/selection.py +13 -31
cudf_polars/dsl/expressions/slicing.py +6 -13
cudf_polars/dsl/expressions/sorting.py +9 -21
cudf_polars/dsl/expressions/string.py +470 -84
cudf_polars/dsl/expressions/struct.py +138 -0
cudf_polars/dsl/expressions/ternary.py +9 -13
cudf_polars/dsl/expressions/unary.py +151 -90
cudf_polars/dsl/ir.py +798 -331
cudf_polars/dsl/nodebase.py +11 -4
cudf_polars/dsl/to_ast.py +61 -20
cudf_polars/dsl/tracing.py +16 -0
cudf_polars/dsl/translate.py +279 -167
cudf_polars/dsl/traversal.py +64 -15
cudf_polars/dsl/utils/__init__.py +8 -0
cudf_polars/dsl/utils/aggregations.py +301 -0
cudf_polars/dsl/utils/groupby.py +93 -0
cudf_polars/dsl/utils/naming.py +34 -0
cudf_polars/dsl/utils/replace.py +61 -0
cudf_polars/dsl/utils/reshape.py +74 -0
cudf_polars/dsl/utils/rolling.py +115 -0
cudf_polars/dsl/utils/windows.py +186 -0
cudf_polars/experimental/base.py +112 -8
cudf_polars/experimental/benchmarks/__init__.py +4 -0
cudf_polars/experimental/benchmarks/pdsds.py +216 -0
cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
cudf_polars/experimental/benchmarks/pdsh.py +812 -0
cudf_polars/experimental/benchmarks/utils.py +725 -0
cudf_polars/experimental/dask_registers.py +200 -0
cudf_polars/experimental/dispatch.py +22 -7
cudf_polars/experimental/distinct.py +194 -0
cudf_polars/experimental/explain.py +127 -0
cudf_polars/experimental/expressions.py +547 -0
cudf_polars/experimental/groupby.py +174 -196
cudf_polars/experimental/io.py +626 -51
cudf_polars/experimental/join.py +104 -33
cudf_polars/experimental/parallel.py +219 -133
cudf_polars/experimental/repartition.py +69 -0
cudf_polars/experimental/scheduler.py +155 -0
cudf_polars/experimental/select.py +132 -7
cudf_polars/experimental/shuffle.py +126 -18
cudf_polars/experimental/sort.py +45 -0
cudf_polars/experimental/spilling.py +151 -0
cudf_polars/experimental/utils.py +112 -0
cudf_polars/testing/asserts.py +213 -14
cudf_polars/testing/io.py +72 -0
cudf_polars/testing/plugin.py +77 -67
cudf_polars/typing/__init__.py +63 -22
cudf_polars/utils/config.py +584 -117
cudf_polars/utils/dtypes.py +4 -117
cudf_polars/utils/timer.py +1 -1
cudf_polars/utils/versions.py +7 -5
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.8.0.dist-info}/METADATA +13 -18
cudf_polars_cu12-25.8.0.dist-info/RECORD +81 -0
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.8.0.dist-info}/WHEEL +1 -1
cudf_polars/experimental/dask_serialize.py +0 -73
cudf_polars_cu12-25.4.0.dist-info/RECORD +0 -55
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.8.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu12-25.4.0.dist-info → cudf_polars_cu12-25.8.0.dist-info}/top_level.txt +0 -0

cudf_polars/containers/dataframe.py CHANGED Viewed

@@ -8,26 +8,55 @@ from __future__ import annotations
 from functools import cached_property
 from typing import TYPE_CHECKING, cast
-import pyarrow as pa
 import polars as pl
 import pylibcudf as plc
-from cudf_polars.containers import Column
-from cudf_polars.utils import conversion, dtypes
+from cudf_polars.containers import Column, DataType
+from cudf_polars.utils import conversion
 if TYPE_CHECKING:
     from collections.abc import Iterable, Mapping, Sequence, Set
-    from typing_extensions import Self
+    from typing_extensions import Any, CapsuleType, Self
-    from cudf_polars.typing import ColumnOptions, DataFrameHeader, Slice
+    from cudf_polars.typing import ColumnOptions, DataFrameHeader, PolarsDataType, Slice
 __all__: list[str] = ["DataFrame"]
+def _create_polars_column_metadata(
+    name: str, dtype: PolarsDataType
+) -> plc.interop.ColumnMetadata:
+    """Create ColumnMetadata preserving pl.Struct field names."""
+    if isinstance(dtype, pl.Struct):
+        children_meta = [
+            _create_polars_column_metadata(field.name, field.dtype)
+            for field in dtype.fields
+        ]
+    else:
+        children_meta = []
+    timezone = dtype.time_zone if isinstance(dtype, pl.Datetime) else None
+    return plc.interop.ColumnMetadata(
+        name=name, timezone=timezone or "", children_meta=children_meta
+    )
+# This is also defined in pylibcudf.interop
+class _ObjectWithArrowMetadata:
+    def __init__(
+        self, obj: plc.Table, metadata: list[plc.interop.ColumnMetadata]
+    ) -> None:
+        self.obj = obj
+        self.metadata = metadata
+    def __arrow_c_array__(
+        self, requested_schema: None = None
+    ) -> tuple[CapsuleType, CapsuleType]:
+        return self.obj._to_schema(self.metadata), self.obj._to_host_array()
 # Pacify the type checker. DataFrame init asserts that all the columns
 # have a string name, so let's narrow the type.
 class NamedColumn(Column):
@@ -46,6 +75,7 @@ class DataFrame:
         if any(c.name is None for c in columns):
             raise ValueError("All columns must have a name")
         self.columns = [cast(NamedColumn, c) for c in columns]
+        self.dtypes = [c.dtype for c in self.columns]
         self.column_map = {c.name: c for c in self.columns}
         self.table = plc.Table([c.obj for c in self.columns])
@@ -62,11 +92,12 @@ class DataFrame:
         # To guarantee we produce correct names, we therefore
         # serialise with names we control and rename with that map.
         name_map = {f"column_{i}": name for i, name in enumerate(self.column_map)}
-        table = plc.interop.to_arrow(
-            self.table,
-            [plc.interop.ColumnMetadata(name=name) for name in name_map],
-        )
-        df: pl.DataFrame = pl.from_arrow(table)
+        metadata = [
+            _create_polars_column_metadata(name, dtype.polars)
+            for name, dtype in zip(name_map, self.dtypes, strict=True)
+        ]
+        table_with_metadata = _ObjectWithArrowMetadata(self.table, metadata)
+        df = pl.DataFrame(table_with_metadata)
         return df.rename(name_map).with_columns(
             pl.col(c.name).set_sorted(descending=c.order == plc.types.Order.DESCENDING)
             if c.is_sorted
@@ -108,21 +139,18 @@ class DataFrame:
         -------
         New dataframe representing the input.
         """
-        table = df.to_arrow()
-        schema = table.schema
-        for i, field in enumerate(schema):
-            schema = schema.set(
-                i, pa.field(field.name, dtypes.downcast_arrow_lists(field.type))
-            )
-        # No-op if the schema is unchanged.
-        d_table = plc.interop.from_arrow(table.cast(schema))
+        plc_table = plc.Table.from_arrow(df)
         return cls(
-            Column(column).copy_metadata(h_col)
-            for column, h_col in zip(d_table.columns(), df.iter_columns(), strict=True)
+            Column(d_col, name=name, dtype=DataType(h_col.dtype)).copy_metadata(h_col)
+            for d_col, h_col, name in zip(
+                plc_table.columns(), df.iter_columns(), df.columns, strict=True
+            )
         )
     @classmethod
-    def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self:
+    def from_table(
+        cls, table: plc.Table, names: Sequence[str], dtypes: Sequence[DataType]
+    ) -> Self:
         """
         Create from a pylibcudf table.
@@ -132,6 +160,8 @@ class DataFrame:
             Pylibcudf table to obtain columns from
         names
             Names for the columns
+        dtypes
+            Dtypes for the columns
         Returns
         -------
@@ -146,7 +176,8 @@ class DataFrame:
         if table.num_columns() != len(names):
             raise ValueError("Mismatching name and table length.")
         return cls(
-            Column(c, name=name) for c, name in zip(table.columns(), names, strict=True)
+            Column(c, name=name, dtype=dtype)
+            for c, name, dtype in zip(table.columns(), names, dtypes, strict=True)
         )
     @classmethod
@@ -173,7 +204,7 @@ class DataFrame:
             packed_metadata, packed_gpu_data
         )
         return cls(
-            Column(c, **kw)
+            Column(c, **Column.deserialize_ctor_kwargs(kw))
             for c, kw in zip(table.columns(), header["columns_kwargs"], strict=True)
         )
@@ -202,13 +233,7 @@ class DataFrame:
         # Keyword arguments for `Column.__init__`.
         columns_kwargs: list[ColumnOptions] = [
-            {
-                "is_sorted": col.is_sorted,
-                "order": col.order,
-                "null_order": col.null_order,
-                "name": col.name,
-            }
-            for col in self.columns
+            col.serialize_ctor_kwargs() for col in self.columns
         ]
         header: DataFrameHeader = {
             "columns_kwargs": columns_kwargs,
@@ -246,7 +271,9 @@ class DataFrame:
             for c, other in zip(self.columns, like.columns, strict=True)
         )
-    def with_columns(self, columns: Iterable[Column], *, replace_only=False) -> Self:
+    def with_columns(
+        self, columns: Iterable[Column], *, replace_only: bool = False
+    ) -> Self:
         """
         Return a new dataframe with extra columns.
@@ -275,7 +302,7 @@ class DataFrame:
         """Drop columns by name."""
         return type(self)(column for column in self.columns if column.name not in names)
-    def select(self, names: Sequence[str]) -> Self:
+    def select(self, names: Sequence[str] | Mapping[str, Any]) -> Self:
         """Select columns by name returning DataFrame."""
         try:
             return type(self)(self.column_map[name] for name in names)
@@ -293,7 +320,11 @@ class DataFrame:
     def filter(self, mask: Column) -> Self:
         """Return a filtered table given a mask."""
         table = plc.stream_compaction.apply_boolean_mask(self.table, mask.obj)
-        return type(self).from_table(table, self.column_names).sorted_like(self)
+        return (
+            type(self)
+            .from_table(table, self.column_names, self.dtypes)
+            .sorted_like(self)
+        )
     def slice(self, zlice: Slice | None) -> Self:
         """
@@ -314,4 +345,8 @@ class DataFrame:
         (table,) = plc.copying.slice(
             self.table, conversion.from_polars_slice(zlice, num_rows=self.num_rows)
         )
-        return type(self).from_table(table, self.column_names).sorted_like(self)
+        return (
+            type(self)
+            .from_table(table, self.column_names, self.dtypes)
+            .sorted_like(self)
+        )

cudf_polars/containers/datatype.py ADDED Viewed

@@ -0,0 +1,135 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+"""A datatype, preserving polars metadata."""
+from __future__ import annotations
+from functools import cache
+from typing_extensions import assert_never
+import polars as pl
+import pylibcudf as plc
+__all__ = ["DataType"]
+@cache
+def _from_polars(dtype: pl.DataType) -> plc.DataType:
+    """
+    Convert a polars datatype to a pylibcudf one.
+    Parameters
+    ----------
+    dtype
+        Polars dtype to convert
+    Returns
+    -------
+    Matching pylibcudf DataType object.
+    Raises
+    ------
+    NotImplementedError
+        For unsupported conversions.
+    """
+    if isinstance(dtype, pl.Boolean):
+        return plc.DataType(plc.TypeId.BOOL8)
+    elif isinstance(dtype, pl.Int8):
+        return plc.DataType(plc.TypeId.INT8)
+    elif isinstance(dtype, pl.Int16):
+        return plc.DataType(plc.TypeId.INT16)
+    elif isinstance(dtype, pl.Int32):
+        return plc.DataType(plc.TypeId.INT32)
+    elif isinstance(dtype, pl.Int64):
+        return plc.DataType(plc.TypeId.INT64)
+    if isinstance(dtype, pl.UInt8):
+        return plc.DataType(plc.TypeId.UINT8)
+    elif isinstance(dtype, pl.UInt16):
+        return plc.DataType(plc.TypeId.UINT16)
+    elif isinstance(dtype, pl.UInt32):
+        return plc.DataType(plc.TypeId.UINT32)
+    elif isinstance(dtype, pl.UInt64):
+        return plc.DataType(plc.TypeId.UINT64)
+    elif isinstance(dtype, pl.Float32):
+        return plc.DataType(plc.TypeId.FLOAT32)
+    elif isinstance(dtype, pl.Float64):
+        return plc.DataType(plc.TypeId.FLOAT64)
+    elif isinstance(dtype, pl.Date):
+        return plc.DataType(plc.TypeId.TIMESTAMP_DAYS)
+    elif isinstance(dtype, pl.Time):
+        raise NotImplementedError("Time of day dtype not implemented")
+    elif isinstance(dtype, pl.Datetime):
+        if dtype.time_unit == "ms":
+            return plc.DataType(plc.TypeId.TIMESTAMP_MILLISECONDS)
+        elif dtype.time_unit == "us":
+            return plc.DataType(plc.TypeId.TIMESTAMP_MICROSECONDS)
+        elif dtype.time_unit == "ns":
+            return plc.DataType(plc.TypeId.TIMESTAMP_NANOSECONDS)
+        assert dtype.time_unit is not None  # pragma: no cover
+        assert_never(dtype.time_unit)
+    elif isinstance(dtype, pl.Duration):
+        if dtype.time_unit == "ms":
+            return plc.DataType(plc.TypeId.DURATION_MILLISECONDS)
+        elif dtype.time_unit == "us":
+            return plc.DataType(plc.TypeId.DURATION_MICROSECONDS)
+        elif dtype.time_unit == "ns":
+            return plc.DataType(plc.TypeId.DURATION_NANOSECONDS)
+        assert dtype.time_unit is not None  # pragma: no cover
+        assert_never(dtype.time_unit)
+    elif isinstance(dtype, pl.String):
+        return plc.DataType(plc.TypeId.STRING)
+    elif isinstance(dtype, pl.Null):
+        # TODO: Hopefully
+        return plc.DataType(plc.TypeId.EMPTY)
+    elif isinstance(dtype, pl.List):
+        # Recurse to catch unsupported inner types
+        _ = _from_polars(dtype.inner)
+        return plc.DataType(plc.TypeId.LIST)
+    elif isinstance(dtype, pl.Struct):
+        # Recurse to catch unsupported field types
+        for field in dtype.fields:
+            _ = _from_polars(field.dtype)
+        return plc.DataType(plc.TypeId.STRUCT)
+    else:
+        raise NotImplementedError(f"{dtype=} conversion not supported")
+class DataType:
+    """A datatype, preserving polars metadata."""
+    polars: pl.datatypes.DataType
+    plc: plc.DataType
+    def __init__(self, polars_dtype: pl.DataType) -> None:
+        self.polars = polars_dtype
+        self.plc = _from_polars(polars_dtype)
+    def id(self) -> plc.TypeId:
+        """The pylibcudf.TypeId of this DataType."""
+        return self.plc.id()
+    @property
+    def children(self) -> list[DataType]:
+        """The children types of this DataType."""
+        if self.plc.id() == plc.TypeId.STRUCT:
+            return [DataType(field.dtype) for field in self.polars.fields]
+        elif self.plc.id() == plc.TypeId.LIST:
+            return [DataType(self.polars.inner)]
+        return []
+    def __eq__(self, other: object) -> bool:
+        """Equality of DataTypes."""
+        if not isinstance(other, DataType):
+            return False
+        return self.polars == other.polars
+    def __hash__(self) -> int:
+        """Hash of the DataType."""
+        return hash(self.polars)
+    def __repr__(self) -> str:
+        """Representation of the DataType."""
+        return f"<DataType(polars={self.polars}, plc={self.id()!r})>"

cudf_polars/dsl/expr.py CHANGED Viewed

@@ -33,6 +33,7 @@ from cudf_polars.dsl.expressions.selection import Filter, Gather
 from cudf_polars.dsl.expressions.slicing import Slice
 from cudf_polars.dsl.expressions.sorting import Sort, SortBy
 from cudf_polars.dsl.expressions.string import StringFunction
+from cudf_polars.dsl.expressions.struct import StructFunction
 from cudf_polars.dsl.expressions.ternary import Ternary
 from cudf_polars.dsl.expressions.unary import Cast, Len, UnaryFunction
@@ -58,6 +59,7 @@ __all__ = [
     "Sort",
     "SortBy",
     "StringFunction",
+    "StructFunction",
     "TemporalFunction",
     "Ternary",
     "UnaryFunction",

cudf_polars/dsl/expressions/aggregation.py CHANGED Viewed

@@ -9,23 +9,14 @@ from __future__ import annotations
 from functools import partial
 from typing import TYPE_CHECKING, Any, ClassVar
-import pyarrow as pa
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.dsl.expressions.base import (
-    AggInfo,
-    ExecutionContext,
-    Expr,
-)
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 from cudf_polars.dsl.expressions.literal import Literal
-from cudf_polars.dsl.expressions.unary import UnaryFunction
 if TYPE_CHECKING:
-    from collections.abc import Mapping
-    from cudf_polars.containers import DataFrame
+    from cudf_polars.containers import DataFrame, DataType
 __all__ = ["Agg"]
@@ -35,7 +26,7 @@ class Agg(Expr):
     _non_child = ("dtype", "name", "options")
     def __init__(
-        self, dtype: plc.DataType, name: str, options: Any, *children: Expr
+        self, dtype: DataType, name: str, options: Any, *children: Expr
     ) -> None:
         self.dtype = dtype
         self.name = name
@@ -75,11 +66,15 @@ class Agg(Expr):
                 else plc.types.NullPolicy.INCLUDE
             )
         elif name == "quantile":
-            _, quantile = self.children
+            child, quantile = self.children
             if not isinstance(quantile, Literal):
                 raise NotImplementedError("Only support literal quantile values")
+            if options == "equiprobable":
+                raise NotImplementedError("Quantile with equiprobable interpolation")
+            if plc.traits.is_duration(child.dtype.plc):
+                raise NotImplementedError("Quantile with duration data type")
             req = plc.aggregation.quantile(
-                quantiles=[quantile.value.as_py()], interp=Agg.interp_mapping[options]
+                quantiles=[quantile.value], interp=Agg.interp_mapping[options]
             )
         else:
             raise NotImplementedError(
@@ -91,7 +86,9 @@ class Agg(Expr):
             op = partial(self._reduce, request=req)
         elif name in {"min", "max"}:
             op = partial(op, propagate_nans=options)
-        elif name in {"count", "sum", "first", "last"}:
+        elif name == "count":
+            op = partial(op, include_nulls=options)
+        elif name in {"sum", "first", "last"}:
             pass
         else:
             raise NotImplementedError(
@@ -124,71 +121,52 @@ class Agg(Expr):
         "linear": plc.types.Interpolation.LINEAR,
     }
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        if depth >= 1:
-            raise NotImplementedError(
-                "Nested aggregations in groupby"
-            )  # pragma: no cover; check_agg trips first
-        if (isminmax := self.name in {"min", "max"}) and self.options:
-            raise NotImplementedError("Nan propagation in groupby for min/max")
-        (child,) = self.children
-        ((expr, _, _),) = child.collect_agg(depth=depth + 1).requests
-        request = self.request
-        # These are handled specially here because we don't set up the
-        # request for the whole-frame agg because we can avoid a
-        # reduce for these.
+    @property
+    def agg_request(self) -> plc.aggregation.Aggregation:  # noqa: D102
         if self.name == "first":
-            request = plc.aggregation.nth_element(
+            return plc.aggregation.nth_element(
                 0, null_handling=plc.types.NullPolicy.INCLUDE
             )
         elif self.name == "last":
-            request = plc.aggregation.nth_element(
+            return plc.aggregation.nth_element(
                 -1, null_handling=plc.types.NullPolicy.INCLUDE
             )
-        if request is None:
-            raise NotImplementedError(
-                f"Aggregation {self.name} in groupby"
-            )  # pragma: no cover; __init__ trips first
-        if isminmax and plc.traits.is_floating_point(self.dtype):
-            assert expr is not None
-            # Ignore nans in these groupby aggs, do this by masking
-            # nans in the input
-            expr = UnaryFunction(self.dtype, "mask_nans", (), expr)
-        return AggInfo([(expr, request, self)])
+        else:
+            assert self.request is not None, "Init should have raised"
+            return self.request
     def _reduce(
         self, column: Column, *, request: plc.aggregation.Aggregation
     ) -> Column:
         return Column(
             plc.Column.from_scalar(
-                plc.reduce.reduce(column.obj, request, self.dtype),
+                plc.reduce.reduce(column.obj, request, self.dtype.plc),
                 1,
-            )
+            ),
+            name=column.name,
+            dtype=self.dtype,
         )
-    def _count(self, column: Column) -> Column:
+    def _count(self, column: Column, *, include_nulls: bool) -> Column:
+        null_count = column.null_count if not include_nulls else 0
         return Column(
             plc.Column.from_scalar(
-                plc.interop.from_arrow(
-                    pa.scalar(
-                        column.size - column.null_count,
-                        type=plc.interop.to_arrow(self.dtype),
-                    ),
-                ),
+                plc.Scalar.from_py(column.size - null_count, self.dtype.plc),
                 1,
-            )
+            ),
+            name=column.name,
+            dtype=self.dtype,
         )
     def _sum(self, column: Column) -> Column:
         if column.size == 0 or column.null_count == column.size:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(0, type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(0, self.dtype.plc),
                     1,
-                )
+                ),
+                name=column.name,
+                dtype=self.dtype,
             )
         return self._reduce(column, request=plc.aggregation.sum())
@@ -196,11 +174,11 @@ class Agg(Expr):
         if propagate_nans and column.nan_count > 0:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(float("nan"), type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(float("nan"), self.dtype.plc),
                     1,
-                )
+                ),
+                name=column.name,
+                dtype=self.dtype,
             )
         if column.nan_count > 0:
             column = column.mask_nans()
@@ -210,29 +188,31 @@ class Agg(Expr):
         if propagate_nans and column.nan_count > 0:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(float("nan"), type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(float("nan"), self.dtype.plc),
                     1,
-                )
+                ),
+                name=column.name,
+                dtype=self.dtype,
             )
         if column.nan_count > 0:
             column = column.mask_nans()
         return self._reduce(column, request=plc.aggregation.max())
     def _first(self, column: Column) -> Column:
-        return Column(plc.copying.slice(column.obj, [0, 1])[0])
+        return Column(
+            plc.copying.slice(column.obj, [0, 1])[0], name=column.name, dtype=self.dtype
+        )
     def _last(self, column: Column) -> Column:
         n = column.size
-        return Column(plc.copying.slice(column.obj, [n - 1, n])[0])
+        return Column(
+            plc.copying.slice(column.obj, [n - 1, n])[0],
+            name=column.name,
+            dtype=self.dtype,
+        )
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         if context is not ExecutionContext.FRAME:
@@ -243,4 +223,4 @@ class Agg(Expr):
         # Aggregations like quantiles may have additional children that were
         # preprocessed into pylibcudf requests.
         child = self.children[0]
-        return self.op(child.evaluate(df, context=context, mapping=mapping))
+        return self.op(child.evaluate(df, context=context))

cudf-polars-cu12 25.4.0__py3-none-any.whl → 25.8.0__py3-none-any.whl

cudf-polars-cu12 25.4.0py3-none-any.whl → 25.8.0py3-none-any.whl