PyPI - cudf-polars-cu12 - Versions diffs - 25.2.2__py3-none-any.whl → 25.4.0__py3-none-any.whl - Mend

cudf-polars-cu12 25.2.2py3-none-any.whl → 25.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

cudf_polars/VERSION +1 -1
cudf_polars/callback.py +85 -53
cudf_polars/containers/column.py +100 -7
cudf_polars/containers/dataframe.py +16 -24
cudf_polars/dsl/expr.py +3 -1
cudf_polars/dsl/expressions/aggregation.py +3 -3
cudf_polars/dsl/expressions/binaryop.py +2 -2
cudf_polars/dsl/expressions/boolean.py +4 -4
cudf_polars/dsl/expressions/datetime.py +39 -1
cudf_polars/dsl/expressions/literal.py +3 -9
cudf_polars/dsl/expressions/selection.py +2 -2
cudf_polars/dsl/expressions/slicing.py +53 -0
cudf_polars/dsl/expressions/sorting.py +1 -1
cudf_polars/dsl/expressions/string.py +4 -4
cudf_polars/dsl/expressions/unary.py +3 -2
cudf_polars/dsl/ir.py +222 -93
cudf_polars/dsl/nodebase.py +8 -1
cudf_polars/dsl/translate.py +66 -38
cudf_polars/experimental/base.py +18 -12
cudf_polars/experimental/dask_serialize.py +22 -8
cudf_polars/experimental/groupby.py +346 -0
cudf_polars/experimental/io.py +13 -11
cudf_polars/experimental/join.py +318 -0
cudf_polars/experimental/parallel.py +57 -6
cudf_polars/experimental/shuffle.py +194 -0
cudf_polars/testing/plugin.py +23 -34
cudf_polars/typing/__init__.py +33 -2
cudf_polars/utils/config.py +138 -0
cudf_polars/utils/conversion.py +40 -0
cudf_polars/utils/dtypes.py +14 -4
cudf_polars/utils/timer.py +39 -0
cudf_polars/utils/versions.py +4 -3
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.4.0.dist-info}/METADATA +8 -7
cudf_polars_cu12-25.4.0.dist-info/RECORD +55 -0
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.4.0.dist-info}/WHEEL +1 -1
cudf_polars_cu12-25.2.2.dist-info/RECORD +0 -48
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.4.0.dist-info/licenses}/LICENSE +0 -0
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.4.0.dist-info}/top_level.txt +0 -0

cudf_polars/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 25.02.02
1	+ 25.04.00

cudf_polars/callback.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """Callback for the polars collect function to execute on device."""
@@ -7,9 +7,10 @@ from __future__ import annotations
 import contextlib
 import os
+import time
 import warnings
 from functools import cache, partial
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Literal, overload
 import nvtx
@@ -20,6 +21,8 @@ import rmm
 from rmm._cuda import gpu
 from cudf_polars.dsl.translate import Translator
+from cudf_polars.utils.timer import Timer
+from cudf_polars.utils.versions import POLARS_VERSION_LT_125
 if TYPE_CHECKING:
     from collections.abc import Generator
@@ -173,19 +176,53 @@ def set_device(device: int | None) -> Generator[int, None, None]:
         gpu.setDevice(previous)
+@overload
 def _callback(
     ir: IR,
     with_columns: list[str] | None,
     pyarrow_predicate: str | None,
     n_rows: int | None,
+    should_time: Literal[False],
     *,
     device: int | None,
     memory_resource: int | None,
     executor: Literal["pylibcudf", "dask-experimental"] | None,
-) -> pl.DataFrame:
+    timer: Timer | None,
+) -> pl.DataFrame: ...
+@overload
+def _callback(
+    ir: IR,
+    with_columns: list[str] | None,
+    pyarrow_predicate: str | None,
+    n_rows: int | None,
+    should_time: Literal[True],
+    *,
+    device: int | None,
+    memory_resource: int | None,
+    executor: Literal["pylibcudf", "dask-experimental"] | None,
+    timer: Timer | None,
+) -> tuple[pl.DataFrame, list[tuple[int, int, str]]]: ...
+def _callback(
+    ir: IR,
+    with_columns: list[str] | None,
+    pyarrow_predicate: str | None,
+    n_rows: int | None,
+    should_time: bool,  # noqa: FBT001
+    *,
+    device: int | None,
+    memory_resource: int | None,
+    executor: Literal["pylibcudf", "dask-experimental"] | None,
+    timer: Timer | None,
+):
     assert with_columns is None
     assert pyarrow_predicate is None
     assert n_rows is None
+    if timer is not None:
+        assert should_time
     with (
         nvtx.annotate(message="ExecuteIR", domain="cudf_polars"),
         # Device must be set before memory resource is obtained.
@@ -193,7 +230,11 @@ def _callback(
         set_memory_resource(memory_resource),
     ):
         if executor is None or executor == "pylibcudf":
-            return ir.evaluate(cache={}).to_polars()
+            df = ir.evaluate(cache={}, timer=timer).to_polars()
+            if timer is None:
+                return df
+            else:
+                return df, timer.timings
         elif executor == "dask-experimental":
             from cudf_polars.experimental.parallel import evaluate_dask
@@ -202,45 +243,9 @@ def _callback(
             raise ValueError(f"Unknown executor '{executor}'")
-def validate_config_options(config: dict) -> None:
-    """
-    Validate the configuration options for the GPU engine.
-    Parameters
-    ----------
-    config
-        Configuration options to validate.
-    Raises
-    ------
-    ValueError
-        If the configuration contains unsupported options.
-    """
-    if unsupported := (
-        config.keys()
-        - {"raise_on_fail", "parquet_options", "executor", "executor_options"}
-    ):
-        raise ValueError(
-            f"Engine configuration contains unsupported settings: {unsupported}"
-        )
-    assert {"chunked", "chunk_read_limit", "pass_read_limit"}.issuperset(
-        config.get("parquet_options", {})
-    )
-    # Validate executor_options
-    executor = config.get("executor", "pylibcudf")
-    if executor == "dask-experimental":
-        unsupported = config.get("executor_options", {}).keys() - {
-            "max_rows_per_partition",
-            "parquet_blocksize",
-        }
-    else:
-        unsupported = config.get("executor_options", {}).keys()
-    if unsupported:
-        raise ValueError(f"Unsupported executor_options for {executor}: {unsupported}")
-def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None:
+def execute_with_cudf(
+    nt: NodeTraverser, duration_since_start: int | None, *, config: GPUEngine
+) -> None:
     """
     A post optimization callback that attempts to execute the plan with cudf.
@@ -249,6 +254,10 @@ def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None:
     nt
         NodeTraverser
+    duration_since_start
+        Time since the user started executing the query (or None if no
+        profiling should occur).
     config
         GPUEngine configuration object
@@ -263,16 +272,21 @@ def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None:
     -----
     The NodeTraverser is mutated if the libcudf executor can handle the plan.
     """
+    if duration_since_start is None:
+        timer = None
+    else:
+        start = time.monotonic_ns()
+        timer = Timer(start - duration_since_start)
     device = config.device
     memory_resource = config.memory_resource
     raise_on_fail = config.config.get("raise_on_fail", False)
     executor = config.config.get("executor", None)
-    validate_config_options(config.config)
     with nvtx.annotate(message="ConvertIR", domain="cudf_polars"):
         translator = Translator(nt, config)
         ir = translator.translate_ir()
         ir_translation_errors = translator.errors
+        if timer is not None:
+            timer.store(start, time.monotonic_ns(), "gpu-ir-translation")
         if len(ir_translation_errors):
             # TODO: Display these errors in user-friendly way.
             # tracked in https://github.com/rapidsai/cudf/issues/17051
@@ -290,12 +304,30 @@ def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None:
             if raise_on_fail:
                 raise exception
         else:
-            nt.set_udf(
-                partial(
-                    _callback,
-                    ir,
-                    device=device,
-                    memory_resource=memory_resource,
-                    executor=executor,
+            if POLARS_VERSION_LT_125:  # pragma: no cover
+                nt.set_udf(
+                    partial(
+                        _callback,
+                        ir,
+                        should_time=False,
+                        device=device,
+                        memory_resource=memory_resource,
+                        executor=executor,
+                        timer=None,
+                    )
                 )
-            )
+            else:
+                nt.set_udf(
+                    partial(
+                        _callback,
+                        ir,
+                        device=device,
+                        memory_resource=memory_resource,
+                        executor=executor,
+                        timer=timer,
+                    )
+                )
+if POLARS_VERSION_LT_125:  # pragma: no cover
+    execute_with_cudf = partial(execute_with_cudf, duration_since_start=None)

cudf_polars/containers/column.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """A column, with some properties."""
@@ -19,6 +19,7 @@ from pylibcudf.strings.convert.convert_integers import (
 )
 from pylibcudf.traits import is_floating_point
+from cudf_polars.utils import conversion
 from cudf_polars.utils.dtypes import is_order_preserving_cast
 if TYPE_CHECKING:
@@ -26,6 +27,8 @@ if TYPE_CHECKING:
     import polars as pl
+    from cudf_polars.typing import ColumnHeader, ColumnOptions, Slice
 __all__: list[str] = ["Column"]
@@ -51,10 +54,69 @@ class Column:
         name: str | None = None,
     ):
         self.obj = column
-        self.is_scalar = self.obj.size() == 1
+        self.is_scalar = self.size == 1
         self.name = name
         self.set_sorted(is_sorted=is_sorted, order=order, null_order=null_order)
+    @classmethod
+    def deserialize(
+        cls, header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]
+    ) -> Self:
+        """
+        Create a Column from a serialized representation returned by `.serialize()`.
+        Parameters
+        ----------
+        header
+            The (unpickled) metadata required to reconstruct the object.
+        frames
+            Two-tuple of frames (a memoryview and a gpumemoryview).
+        Returns
+        -------
+        Column
+            The deserialized Column.
+        """
+        packed_metadata, packed_gpu_data = frames
+        (plc_column,) = plc.contiguous_split.unpack_from_memoryviews(
+            packed_metadata, packed_gpu_data
+        ).columns()
+        return cls(plc_column, **header["column_kwargs"])
+    def serialize(
+        self,
+    ) -> tuple[ColumnHeader, tuple[memoryview, plc.gpumemoryview]]:
+        """
+        Serialize the Column into header and frames.
+        Follows the Dask serialization scheme with a picklable header (dict) and
+        a tuple of frames (in this case a contiguous host and device buffer).
+        To enable dask support, dask serializers must be registered
+            >>> from cudf_polars.experimental.dask_serialize import register
+            >>> register()
+        Returns
+        -------
+        header
+            A dict containing any picklable metadata required to reconstruct the object.
+        frames
+            Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
+        """
+        packed = plc.contiguous_split.pack(plc.Table([self.obj]))
+        column_kwargs: ColumnOptions = {
+            "is_sorted": self.is_sorted,
+            "order": self.order,
+            "null_order": self.null_order,
+            "name": self.name,
+        }
+        header: ColumnHeader = {
+            "column_kwargs": column_kwargs,
+            "frame_count": 2,
+        }
+        return header, packed.release()
     @functools.cached_property
     def obj_scalar(self) -> plc.Scalar:
         """
@@ -70,9 +132,7 @@ class Column:
             If the column is not length-1.
         """
         if not self.is_scalar:
-            raise ValueError(
-                f"Cannot convert a column of length {self.obj.size()} to scalar"
-            )
+            raise ValueError(f"Cannot convert a column of length {self.size} to scalar")
         return plc.copying.get_element(self.obj, 0)
     def rename(self, name: str | None, /) -> Self:
@@ -242,7 +302,7 @@ class Column:
         -------
         Self with metadata set.
         """
-        if self.obj.size() <= 1:
+        if self.size <= 1:
             is_sorted = plc.types.Sorted.YES
         self.is_sorted = is_sorted
         self.order = order
@@ -268,7 +328,7 @@ class Column:
     def mask_nans(self) -> Self:
         """Return a shallow copy of self with nans masked out."""
         if plc.traits.is_floating_point(self.obj.type()):
-            old_count = self.obj.null_count()
+            old_count = self.null_count
             mask, new_count = plc.transform.nans_to_nulls(self.obj)
             result = type(self)(self.obj.with_mask(mask, new_count))
             if old_count == new_count:
@@ -288,3 +348,36 @@ class Column:
                 )
             ).as_py()
         return 0
+    @property
+    def size(self) -> int:
+        """Return the size of the column."""
+        return self.obj.size()
+    @property
+    def null_count(self) -> int:
+        """Return the number of Null values in the column."""
+        return self.obj.null_count()
+    def slice(self, zlice: Slice | None) -> Self:
+        """
+        Slice a column.
+        Parameters
+        ----------
+        zlice
+            optional, tuple of start and length, negative values of start
+            treated as for python indexing. If not provided, returns self.
+        Returns
+        -------
+        New column (if zlice is not None) otherwise self (if it is)
+        """
+        if zlice is None:
+            return self
+        (table,) = plc.copying.slice(
+            plc.Table([self.obj]),
+            conversion.from_polars_slice(zlice, num_rows=self.size),
+        )
+        (column,) = table.columns()
+        return type(self)(column, name=self.name).sorted_like(self)

cudf_polars/containers/dataframe.py CHANGED Viewed

@@ -1,13 +1,12 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """A dataframe, with some properties."""
 from __future__ import annotations
-import pickle
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, cast
+from typing import TYPE_CHECKING, cast
 import pyarrow as pa
@@ -16,13 +15,15 @@ import polars as pl
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.utils import dtypes
+from cudf_polars.utils import conversion, dtypes
 if TYPE_CHECKING:
     from collections.abc import Iterable, Mapping, Sequence, Set
     from typing_extensions import Self
+    from cudf_polars.typing import ColumnOptions, DataFrameHeader, Slice
 __all__: list[str] = ["DataFrame"]
@@ -150,7 +151,7 @@ class DataFrame:
     @classmethod
     def deserialize(
-        cls, header: Mapping[str, Any], frames: tuple[memoryview, plc.gpumemoryview]
+        cls, header: DataFrameHeader, frames: tuple[memoryview, plc.gpumemoryview]
     ) -> Self:
         """
         Create a DataFrame from a serialized representation returned by `.serialize()`.
@@ -178,7 +179,7 @@ class DataFrame:
     def serialize(
         self,
-    ) -> tuple[Mapping[str, Any], tuple[memoryview, plc.gpumemoryview]]:
+    ) -> tuple[DataFrameHeader, tuple[memoryview, plc.gpumemoryview]]:
         """
         Serialize the table into header and frames.
@@ -187,20 +188,20 @@ class DataFrame:
         To enable dask support, dask serializers must be registered
-        >>> from cudf_polars.experimental.dask_serialize import register
-        >>> register()
+            >>> from cudf_polars.experimental.dask_serialize import register
+            >>> register()
         Returns
         -------
         header
             A dict containing any picklable metadata required to reconstruct the object.
         frames
-            Two-tuple of frames suitable for passing to `unpack_from_memoryviews`
+            Two-tuple of frames suitable for passing to `plc.contiguous_split.unpack_from_memoryviews`
         """
         packed = plc.contiguous_split.pack(self.table)
         # Keyword arguments for `Column.__init__`.
-        columns_kwargs = [
+        columns_kwargs: list[ColumnOptions] = [
             {
                 "is_sorted": col.is_sorted,
                 "order": col.order,
@@ -209,10 +210,8 @@ class DataFrame:
             }
             for col in self.columns
         ]
-        header = {
+        header: DataFrameHeader = {
             "columns_kwargs": columns_kwargs,
-            # Dask Distributed uses "type-serialized" to dispatch deserialization
-            "type-serialized": pickle.dumps(type(self)),
             "frame_count": 2,
         }
         return header, packed.release()
@@ -296,7 +295,7 @@ class DataFrame:
         table = plc.stream_compaction.apply_boolean_mask(self.table, mask.obj)
         return type(self).from_table(table, self.column_names).sorted_like(self)
-    def slice(self, zlice: tuple[int, int] | None) -> Self:
+    def slice(self, zlice: Slice | None) -> Self:
         """
         Slice a dataframe.
@@ -312,14 +311,7 @@ class DataFrame:
         """
         if zlice is None:
             return self
-        start, length = zlice
-        if start < 0:
-            start += self.num_rows
-        # Polars implementation wraps negative start by num_rows, then
-        # adds length to start to get the end, then clamps both to
-        # [0, num_rows)
-        end = start + length
-        start = max(min(start, self.num_rows), 0)
-        end = max(min(end, self.num_rows), 0)
-        (table,) = plc.copying.slice(self.table, [start, end])
+        (table,) = plc.copying.slice(
+            self.table, conversion.from_polars_slice(zlice, num_rows=self.num_rows)
+        )
         return type(self).from_table(table, self.column_names).sorted_like(self)

cudf_polars/dsl/expr.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -30,6 +30,7 @@ from cudf_polars.dsl.expressions.datetime import TemporalFunction
 from cudf_polars.dsl.expressions.literal import Literal, LiteralColumn
 from cudf_polars.dsl.expressions.rolling import GroupedRollingWindow, RollingWindow
 from cudf_polars.dsl.expressions.selection import Filter, Gather
+from cudf_polars.dsl.expressions.slicing import Slice
 from cudf_polars.dsl.expressions.sorting import Sort, SortBy
 from cudf_polars.dsl.expressions.string import StringFunction
 from cudf_polars.dsl.expressions.ternary import Ternary
@@ -53,6 +54,7 @@ __all__ = [
     "LiteralColumn",
     "NamedExpr",
     "RollingWindow",
+    "Slice",
     "Sort",
     "SortBy",
     "StringFunction",

cudf_polars/dsl/expressions/aggregation.py CHANGED Viewed

@@ -172,7 +172,7 @@ class Agg(Expr):
             plc.Column.from_scalar(
                 plc.interop.from_arrow(
                     pa.scalar(
-                        column.obj.size() - column.obj.null_count(),
+                        column.size - column.null_count,
                         type=plc.interop.to_arrow(self.dtype),
                     ),
                 ),
@@ -181,7 +181,7 @@ class Agg(Expr):
         )
     def _sum(self, column: Column) -> Column:
-        if column.obj.size() == 0:
+        if column.size == 0 or column.null_count == column.size:
             return Column(
                 plc.Column.from_scalar(
                     plc.interop.from_arrow(
@@ -224,7 +224,7 @@ class Agg(Expr):
         return Column(plc.copying.slice(column.obj, [0, 1])[0])
     def _last(self, column: Column) -> Column:
-        n = column.obj.size()
+        n = column.size
         return Column(plc.copying.slice(column.obj, [n - 1, n])[0])
     def do_evaluate(

cudf_polars/dsl/expressions/binaryop.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -98,7 +98,7 @@ class BinOp(Expr):
         )
         lop = left.obj
         rop = right.obj
-        if left.obj.size() != right.obj.size():
+        if left.size != right.size:
             if left.is_scalar:
                 lop = left.obj_scalar
             elif right.is_scalar:

cudf_polars/dsl/expressions/boolean.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -191,7 +191,7 @@ class BooleanFunction(Expr):
             is_any = self.name is BooleanFunction.Name.Any
             agg = plc.aggregation.any() if is_any else plc.aggregation.all()
             result = plc.reduce.reduce(column.obj, agg, self.dtype)
-            if not ignore_nulls and column.obj.null_count() > 0:
+            if not ignore_nulls and column.null_count > 0:
                 #      Truth tables
                 #     Any         All
                 #   | F U T     | F U T
@@ -218,14 +218,14 @@ class BooleanFunction(Expr):
             (column,) = columns
             return Column(
                 plc.unary.is_nan(column.obj).with_mask(
-                    column.obj.null_mask(), column.obj.null_count()
+                    column.obj.null_mask(), column.null_count
                 )
             )
         elif self.name is BooleanFunction.Name.IsNotNan:
             (column,) = columns
             return Column(
                 plc.unary.is_not_nan(column.obj).with_mask(
-                    column.obj.null_mask(), column.obj.null_count()
+                    column.obj.null_mask(), column.null_count
                 )
             )
         elif self.name is BooleanFunction.Name.IsFirstDistinct:

cudf_polars/dsl/expressions/datetime.py CHANGED Viewed

@@ -104,6 +104,14 @@ class TemporalFunction(Expr):
         Name.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND,
     }
+    _valid_ops: ClassVar[set[Name]] = {
+        *_COMPONENT_MAP.keys(),
+        Name.IsLeapYear,
+        Name.OrdinalDay,
+        Name.MonthStart,
+        Name.MonthEnd,
+    }
     def __init__(
         self,
         dtype: plc.DataType,
@@ -116,7 +124,7 @@ class TemporalFunction(Expr):
         self.name = name
         self.children = children
         self.is_pointwise = True
-        if self.name not in self._COMPONENT_MAP:
+        if self.name not in self._valid_ops:
             raise NotImplementedError(f"Temporal function {self.name}")
     def do_evaluate(
@@ -132,6 +140,36 @@ class TemporalFunction(Expr):
             for child in self.children
         ]
         (column,) = columns
+        if self.name is TemporalFunction.Name.MonthStart:
+            ends = plc.datetime.last_day_of_month(column.obj)
+            days_to_subtract = plc.datetime.days_in_month(column.obj)
+            # must subtract 1 to avoid rolling over to the previous month
+            days_to_subtract = plc.binaryop.binary_operation(
+                days_to_subtract,
+                plc.interop.from_arrow(pa.scalar(1, type=pa.int32())),
+                plc.binaryop.BinaryOperator.SUB,
+                plc.DataType(plc.TypeId.DURATION_DAYS),
+            )
+            result = plc.binaryop.binary_operation(
+                ends,
+                days_to_subtract,
+                plc.binaryop.BinaryOperator.SUB,
+                column.obj.type(),
+            )
+            return Column(result)
+        if self.name is TemporalFunction.Name.MonthEnd:
+            return Column(
+                plc.unary.cast(
+                    plc.datetime.last_day_of_month(column.obj), column.obj.type()
+                )
+            )
+        if self.name is TemporalFunction.Name.IsLeapYear:
+            return Column(
+                plc.datetime.is_leap_year(column.obj),
+            )
+        if self.name is TemporalFunction.Name.OrdinalDay:
+            return Column(plc.datetime.day_of_year(column.obj))
         if self.name is TemporalFunction.Name.Microsecond:
             millis = plc.datetime.extract_datetime_component(
                 column.obj, plc.datetime.DatetimeComponent.MILLISECOND

cudf_polars/dsl/expressions/literal.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -8,21 +8,16 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Any
-import pyarrow as pa
 import pylibcudf as plc
 from cudf_polars.containers import Column
 from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
-from cudf_polars.utils import dtypes
 if TYPE_CHECKING:
     from collections.abc import Hashable, Mapping
     import pyarrow as pa
-    import polars as pl
     from cudf_polars.containers import DataFrame
 __all__ = ["Literal", "LiteralColumn"]
@@ -61,10 +56,9 @@ class LiteralColumn(Expr):
     _non_child = ("dtype", "value")
     value: pa.Array[Any]
-    def __init__(self, dtype: plc.DataType, value: pl.Series) -> None:
+    def __init__(self, dtype: plc.DataType, value: pa.Array) -> None:
         self.dtype = dtype
-        data = value.to_arrow()
-        self.value = data.cast(dtypes.downcast_arrow_lists(data.type))
+        self.value = value
         self.children = ()
         self.is_pointwise = True

cudf-polars-cu12 25.2.2__py3-none-any.whl → 25.4.0__py3-none-any.whl

cudf-polars-cu12 25.2.2py3-none-any.whl → 25.4.0py3-none-any.whl