PyPI - cudf-polars-cu12 - Versions diffs - 25.2.2__py3-none-any.whl → 25.6.0__py3-none-any.whl - Mend

cudf-polars-cu12 25.2.2py3-none-any.whl → 25.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

cudf_polars/VERSION +1 -1
cudf_polars/callback.py +82 -65
cudf_polars/containers/column.py +138 -7
cudf_polars/containers/dataframe.py +26 -39
cudf_polars/dsl/expr.py +3 -1
cudf_polars/dsl/expressions/aggregation.py +27 -63
cudf_polars/dsl/expressions/base.py +40 -72
cudf_polars/dsl/expressions/binaryop.py +5 -41
cudf_polars/dsl/expressions/boolean.py +25 -53
cudf_polars/dsl/expressions/datetime.py +97 -17
cudf_polars/dsl/expressions/literal.py +27 -33
cudf_polars/dsl/expressions/rolling.py +110 -9
cudf_polars/dsl/expressions/selection.py +8 -26
cudf_polars/dsl/expressions/slicing.py +47 -0
cudf_polars/dsl/expressions/sorting.py +5 -18
cudf_polars/dsl/expressions/string.py +33 -36
cudf_polars/dsl/expressions/ternary.py +3 -10
cudf_polars/dsl/expressions/unary.py +35 -75
cudf_polars/dsl/ir.py +749 -212
cudf_polars/dsl/nodebase.py +8 -1
cudf_polars/dsl/to_ast.py +5 -3
cudf_polars/dsl/translate.py +319 -171
cudf_polars/dsl/utils/__init__.py +8 -0
cudf_polars/dsl/utils/aggregations.py +292 -0
cudf_polars/dsl/utils/groupby.py +97 -0
cudf_polars/dsl/utils/naming.py +34 -0
cudf_polars/dsl/utils/replace.py +46 -0
cudf_polars/dsl/utils/rolling.py +113 -0
cudf_polars/dsl/utils/windows.py +186 -0
cudf_polars/experimental/base.py +17 -19
cudf_polars/experimental/benchmarks/__init__.py +4 -0
cudf_polars/experimental/benchmarks/pdsh.py +1279 -0
cudf_polars/experimental/dask_registers.py +196 -0
cudf_polars/experimental/distinct.py +174 -0
cudf_polars/experimental/explain.py +127 -0
cudf_polars/experimental/expressions.py +521 -0
cudf_polars/experimental/groupby.py +288 -0
cudf_polars/experimental/io.py +58 -29
cudf_polars/experimental/join.py +353 -0
cudf_polars/experimental/parallel.py +166 -93
cudf_polars/experimental/repartition.py +69 -0
cudf_polars/experimental/scheduler.py +155 -0
cudf_polars/experimental/select.py +92 -7
cudf_polars/experimental/shuffle.py +294 -0
cudf_polars/experimental/sort.py +45 -0
cudf_polars/experimental/spilling.py +151 -0
cudf_polars/experimental/utils.py +100 -0
cudf_polars/testing/asserts.py +146 -6
cudf_polars/testing/io.py +72 -0
cudf_polars/testing/plugin.py +78 -76
cudf_polars/typing/__init__.py +59 -6
cudf_polars/utils/config.py +353 -0
cudf_polars/utils/conversion.py +40 -0
cudf_polars/utils/dtypes.py +22 -5
cudf_polars/utils/timer.py +39 -0
cudf_polars/utils/versions.py +5 -4
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/METADATA +10 -7
cudf_polars_cu12-25.6.0.dist-info/RECORD +73 -0
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/WHEEL +1 -1
cudf_polars/experimental/dask_serialize.py +0 -59
cudf_polars_cu12-25.2.2.dist-info/RECORD +0 -48
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info/licenses}/LICENSE +0 -0
{cudf_polars_cu12-25.2.2.dist-info → cudf_polars_cu12-25.6.0.dist-info}/top_level.txt +0 -0

cudf_polars/dsl/expressions/aggregation.py CHANGED Viewed

@@ -9,22 +9,13 @@ from __future__ import annotations
 from functools import partial
 from typing import TYPE_CHECKING, Any, ClassVar
-import pyarrow as pa
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.dsl.expressions.base import (
-    AggInfo,
-    ExecutionContext,
-    Expr,
-)
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 from cudf_polars.dsl.expressions.literal import Literal
-from cudf_polars.dsl.expressions.unary import UnaryFunction
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from cudf_polars.containers import DataFrame
 __all__ = ["Agg"]
@@ -75,11 +66,15 @@ class Agg(Expr):
                 else plc.types.NullPolicy.INCLUDE
             )
         elif name == "quantile":
-            _, quantile = self.children
+            child, quantile = self.children
             if not isinstance(quantile, Literal):
                 raise NotImplementedError("Only support literal quantile values")
+            if options == "equiprobable":
+                raise NotImplementedError("Quantile with equiprobable interpolation")
+            if plc.traits.is_duration(child.dtype):
+                raise NotImplementedError("Quantile with duration data type")
             req = plc.aggregation.quantile(
-                quantiles=[quantile.value.as_py()], interp=Agg.interp_mapping[options]
+                quantiles=[quantile.value], interp=Agg.interp_mapping[options]
             )
         else:
             raise NotImplementedError(
@@ -91,7 +86,9 @@ class Agg(Expr):
             op = partial(self._reduce, request=req)
         elif name in {"min", "max"}:
             op = partial(op, propagate_nans=options)
-        elif name in {"count", "sum", "first", "last"}:
+        elif name == "count":
+            op = partial(op, include_nulls=options)
+        elif name in {"sum", "first", "last"}:
             pass
         else:
             raise NotImplementedError(
@@ -124,38 +121,19 @@ class Agg(Expr):
         "linear": plc.types.Interpolation.LINEAR,
     }
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        if depth >= 1:
-            raise NotImplementedError(
-                "Nested aggregations in groupby"
-            )  # pragma: no cover; check_agg trips first
-        if (isminmax := self.name in {"min", "max"}) and self.options:
-            raise NotImplementedError("Nan propagation in groupby for min/max")
-        (child,) = self.children
-        ((expr, _, _),) = child.collect_agg(depth=depth + 1).requests
-        request = self.request
-        # These are handled specially here because we don't set up the
-        # request for the whole-frame agg because we can avoid a
-        # reduce for these.
+    @property
+    def agg_request(self) -> plc.aggregation.Aggregation:  # noqa: D102
         if self.name == "first":
-            request = plc.aggregation.nth_element(
+            return plc.aggregation.nth_element(
                 0, null_handling=plc.types.NullPolicy.INCLUDE
             )
         elif self.name == "last":
-            request = plc.aggregation.nth_element(
+            return plc.aggregation.nth_element(
                 -1, null_handling=plc.types.NullPolicy.INCLUDE
             )
-        if request is None:
-            raise NotImplementedError(
-                f"Aggregation {self.name} in groupby"
-            )  # pragma: no cover; __init__ trips first
-        if isminmax and plc.traits.is_floating_point(self.dtype):
-            assert expr is not None
-            # Ignore nans in these groupby aggs, do this by masking
-            # nans in the input
-            expr = UnaryFunction(self.dtype, "mask_nans", (), expr)
-        return AggInfo([(expr, request, self)])
+        else:
+            assert self.request is not None, "Init should have raised"
+            return self.request
     def _reduce(
         self, column: Column, *, request: plc.aggregation.Aggregation
@@ -167,26 +145,20 @@ class Agg(Expr):
             )
         )
-    def _count(self, column: Column) -> Column:
+    def _count(self, column: Column, *, include_nulls: bool) -> Column:
+        null_count = column.null_count if not include_nulls else 0
         return Column(
             plc.Column.from_scalar(
-                plc.interop.from_arrow(
-                    pa.scalar(
-                        column.obj.size() - column.obj.null_count(),
-                        type=plc.interop.to_arrow(self.dtype),
-                    ),
-                ),
+                plc.Scalar.from_py(column.size - null_count, self.dtype),
                 1,
             )
         )
     def _sum(self, column: Column) -> Column:
-        if column.obj.size() == 0:
+        if column.size == 0 or column.null_count == column.size:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(0, type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(0, self.dtype),
                     1,
                 )
             )
@@ -196,9 +168,7 @@ class Agg(Expr):
         if propagate_nans and column.nan_count > 0:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(float("nan"), type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(float("nan"), self.dtype),
                     1,
                 )
             )
@@ -210,9 +180,7 @@ class Agg(Expr):
         if propagate_nans and column.nan_count > 0:
             return Column(
                 plc.Column.from_scalar(
-                    plc.interop.from_arrow(
-                        pa.scalar(float("nan"), type=plc.interop.to_arrow(self.dtype))
-                    ),
+                    plc.Scalar.from_py(float("nan"), self.dtype),
                     1,
                 )
             )
@@ -224,15 +192,11 @@ class Agg(Expr):
         return Column(plc.copying.slice(column.obj, [0, 1])[0])
     def _last(self, column: Column) -> Column:
-        n = column.obj.size()
+        n = column.size
         return Column(plc.copying.slice(column.obj, [n - 1, n])[0])
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         if context is not ExecutionContext.FRAME:
@@ -243,4 +207,4 @@ class Agg(Expr):
         # Aggregations like quantiles may have additional children that were
         # preprocessed into pylibcudf requests.
         child = self.children[0]
-        return self.op(child.evaluate(df, context=context, mapping=mapping))
+        return self.op(child.evaluate(df, context=context))

cudf_polars/dsl/expressions/base.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -16,7 +16,7 @@ from cudf_polars.containers import Column
 from cudf_polars.dsl.nodebase import Node
 if TYPE_CHECKING:
-    from collections.abc import Mapping
+    from typing_extensions import Self
     from cudf_polars.containers import Column, DataFrame
@@ -46,11 +46,7 @@ class Expr(Node["Expr"]):
     """Names of non-child data (not Exprs) for reconstruction."""
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """
         Evaluate this expression given a dataframe for context.
@@ -61,15 +57,10 @@ class Expr(Node["Expr"]):
             DataFrame that will provide columns.
         context
             What context are we performing this evaluation in?
-        mapping
-            Substitution mapping from expressions to Columns, used to
-            override the evaluation of a given expression if we're
-            performing a simple rewritten evaluation.
         Notes
         -----
-        Do not call this function directly, but rather
-        :meth:`evaluate` which handles the mapping lookups.
+        Do not call this function directly, but rather :meth:`evaluate`.
         Returns
         -------
@@ -87,11 +78,7 @@ class Expr(Node["Expr"]):
         )  # pragma: no cover; translation of unimplemented nodes trips first
     def evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """
         Evaluate this expression given a dataframe for context.
@@ -102,10 +89,6 @@ class Expr(Node["Expr"]):
             DataFrame that will provide columns.
         context
             What context are we performing this evaluation in?
-        mapping
-            Substitution mapping from expressions to Columns, used to
-            override the evaluation of a given expression if we're
-            performing a simple rewritten evaluation.
         Notes
         -----
@@ -124,37 +107,28 @@ class Expr(Node["Expr"]):
             are returned during translation to the IR, but for now we
             are not perfect.
         """
-        if mapping is None:
-            return self.do_evaluate(df, context=context, mapping=mapping)
-        try:
-            return mapping[self]
-        except KeyError:
-            return self.do_evaluate(df, context=context, mapping=mapping)
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """
-        Collect information about aggregations in groupbys.
+        return self.do_evaluate(df, context=context)
-        Parameters
-        ----------
-        depth
-            The depth of aggregating (reduction or sampling)
-            expressions we are currently at.
+    @property
+    def agg_request(self) -> plc.aggregation.Aggregation:
+        """
+        The aggregation for this expression in a grouped aggregation.
         Returns
         -------
-        Aggregation info describing the expression to aggregate in the
-        groupby.
+        Aggregation request. Default is to collect the expression.
+        Notes
+        -----
+        This presumes that the IR translation has decomposed groupby
+        reductions only into cases we can handle.
         Raises
         ------
         NotImplementedError
-            If we can't currently perform the aggregation request, for
-            example nested aggregations like ``a.max().min()``.
+            If requesting an aggregation from an unexpected expression.
         """
-        raise NotImplementedError(
-            f"Collecting aggregation info for {type(self).__name__}"
-        )  # pragma: no cover; check_agg trips first
+        return plc.aggregation.collect_list()
 class ErrorExpr(Expr):
@@ -166,7 +140,7 @@ class ErrorExpr(Expr):
         self.dtype = dtype
         self.error = error
         self.children = ()
-        self.is_pointwise = True
+        self.is_pointwise = False
 class NamedExpr:
@@ -202,11 +176,7 @@ class NamedExpr:
         return not self.__eq__(other)
     def evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """
         Evaluate this expression given a dataframe for context.
@@ -217,8 +187,6 @@ class NamedExpr:
             DataFrame providing context
         context
             Execution context
-        mapping
-            Substitution mapping
         Returns
         -------
@@ -229,13 +197,25 @@ class NamedExpr:
         :meth:`Expr.evaluate` for details, this function just adds the
         name to a column produced from an expression.
         """
-        return self.value.evaluate(df, context=context, mapping=mapping).rename(
-            self.name
-        )
+        return self.value.evaluate(df, context=context).rename(self.name)
+    def reconstruct(self, expr: Expr) -> Self:
+        """
+        Rebuild with a new `Expr` value.
+        Parameters
+        ----------
+        expr
+            New `Expr` value
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        return self.value.collect_agg(depth=depth)
+        Returns
+        -------
+        New `NamedExpr` with `expr` as the underlying expression.
+        The name of the original `NamedExpr` is preserved.
+        """
+        if expr is self.value:
+            return self
+        return type(self)(self.name, expr)
 class Col(Expr):
@@ -250,21 +230,13 @@ class Col(Expr):
         self.children = ()
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         # Deliberately remove the name here so that we guarantee
         # evaluation of the IR produces names.
         return df.column_map[self.name].rename(None)
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        return AggInfo([(self, plc.aggregation.collect_list(), self)])
 class ColRef(Expr):
     __slots__ = ("index", "table_ref")
@@ -288,11 +260,7 @@ class ColRef(Expr):
         self.children = (column,)
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         raise NotImplementedError(

cudf_polars/dsl/expressions/binaryop.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -13,11 +13,9 @@ from polars.polars import _expr_nodes as pl_expr
 import pylibcudf as plc
 from cudf_polars.containers import Column
-from cudf_polars.dsl.expressions.base import AggInfo, ExecutionContext, Expr
+from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from cudf_polars.containers import DataFrame
 __all__ = ["BinOp"]
@@ -85,20 +83,13 @@ class BinOp(Expr):
     }
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        left, right = (
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        )
+        left, right = (child.evaluate(df, context=context) for child in self.children)
         lop = left.obj
         rop = right.obj
-        if left.obj.size() != right.obj.size():
+        if left.size != right.size:
             if left.is_scalar:
                 lop = left.obj_scalar
             elif right.is_scalar:
@@ -106,30 +97,3 @@ class BinOp(Expr):
         return Column(
             plc.binaryop.binary_operation(lop, rop, self.op, self.dtype),
         )
-    def collect_agg(self, *, depth: int) -> AggInfo:
-        """Collect information about aggregations in groupbys."""
-        if depth == 1:
-            # inside aggregation, need to pre-evaluate,
-            # groupby construction has checked that we don't have
-            # nested aggs, so stop the recursion and return ourselves
-            # for pre-eval
-            return AggInfo([(self, plc.aggregation.collect_list(), self)])
-        else:
-            left_info, right_info = (
-                child.collect_agg(depth=depth) for child in self.children
-            )
-            requests = [*left_info.requests, *right_info.requests]
-            # TODO: Hack, if there were no reductions inside this
-            # binary expression then we want to pre-evaluate and
-            # collect ourselves. Otherwise we want to collect the
-            # aggregations inside and post-evaluate. This is a bad way
-            # of checking that we are in case 1.
-            if all(
-                agg.kind() == plc.aggregation.Kind.COLLECT_LIST
-                for _, agg, _ in requests
-            ):
-                return AggInfo([(self, plc.aggregation.collect_list(), self)])
-            return AggInfo(
-                [*left_info.requests, *right_info.requests],
-            )

cudf_polars/dsl/expressions/boolean.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove need for this
 # ruff: noqa: D101
@@ -10,8 +10,6 @@ from enum import IntEnum, auto
 from functools import partial, reduce
 from typing import TYPE_CHECKING, Any, ClassVar
-import pyarrow as pa
 import pylibcudf as plc
 from cudf_polars.containers import Column
@@ -19,10 +17,9 @@ from cudf_polars.dsl.expressions.base import (
     ExecutionContext,
     Expr,
 )
+from cudf_polars.utils.versions import POLARS_VERSION_LT_128
 if TYPE_CHECKING:
-    from collections.abc import Mapping
     from typing_extensions import Self
     import polars.type_aliases as pl_types
@@ -89,9 +86,11 @@ class BooleanFunction(Expr):
             BooleanFunction.Name.IsLastDistinct,
             BooleanFunction.Name.IsUnique,
         )
-        if self.name is BooleanFunction.Name.IsIn and not all(
-            c.dtype == self.children[0].dtype for c in self.children
-        ):
+        if (
+            POLARS_VERSION_LT_128
+            and self.name is BooleanFunction.Name.IsIn
+            and not all(c.dtype == self.children[0].dtype for c in self.children)
+        ):  # pragma: no cover
             # TODO: If polars IR doesn't put the casts in, we need to
             # mimic the supertype promotion rules.
             raise NotImplementedError("IsIn doesn't support supertype casting")
@@ -145,11 +144,7 @@ class BooleanFunction(Expr):
     }
     def do_evaluate(
-        self,
-        df: DataFrame,
-        *,
-        context: ExecutionContext = ExecutionContext.FRAME,
-        mapping: Mapping[Expr, Column] | None = None,
+        self, df: DataFrame, *, context: ExecutionContext = ExecutionContext.FRAME
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         if self.name in (
@@ -160,29 +155,22 @@ class BooleanFunction(Expr):
             (child,) = self.children
             is_finite = self.name is BooleanFunction.Name.IsFinite
             if child.dtype.id() not in (plc.TypeId.FLOAT32, plc.TypeId.FLOAT64):
-                value = plc.interop.from_arrow(
-                    pa.scalar(value=is_finite, type=plc.interop.to_arrow(self.dtype))
-                )
+                value = plc.Scalar.from_py(is_finite)
                 return Column(plc.Column.from_scalar(value, df.num_rows))
-            needles = child.evaluate(df, context=context, mapping=mapping)
+            needles = child.evaluate(df, context=context)
             to_search = [-float("inf"), float("inf")]
             if is_finite:
                 # NaN is neither finite not infinite
                 to_search.append(float("nan"))
-            haystack = plc.interop.from_arrow(
-                pa.array(
-                    to_search,
-                    type=plc.interop.to_arrow(needles.obj.type()),
-                )
+            haystack = plc.Column.from_iterable_of_py(
+                to_search,
+                dtype=needles.obj.type(),
             )
             result = plc.search.contains(haystack, needles.obj)
             if is_finite:
                 result = plc.unary.unary_operation(result, plc.unary.UnaryOperator.NOT)
             return Column(result)
-        columns = [
-            child.evaluate(df, context=context, mapping=mapping)
-            for child in self.children
-        ]
+        columns = [child.evaluate(df, context=context) for child in self.children]
         # Kleene logic for Any (OR) and All (AND) if ignore_nulls is
         # False
         if self.name in (BooleanFunction.Name.Any, BooleanFunction.Name.All):
@@ -191,7 +179,7 @@ class BooleanFunction(Expr):
             is_any = self.name is BooleanFunction.Name.Any
             agg = plc.aggregation.any() if is_any else plc.aggregation.all()
             result = plc.reduce.reduce(column.obj, agg, self.dtype)
-            if not ignore_nulls and column.obj.null_count() > 0:
+            if not ignore_nulls and column.null_count > 0:
                 #      Truth tables
                 #     Any         All
                 #   | F U T     | F U T
@@ -218,14 +206,14 @@ class BooleanFunction(Expr):
             (column,) = columns
             return Column(
                 plc.unary.is_nan(column.obj).with_mask(
-                    column.obj.null_mask(), column.obj.null_count()
+                    column.obj.null_mask(), column.null_count
                 )
             )
         elif self.name is BooleanFunction.Name.IsNotNan:
             (column,) = columns
             return Column(
                 plc.unary.is_not_nan(column.obj).with_mask(
-                    column.obj.null_mask(), column.obj.null_count()
+                    column.obj.null_mask(), column.null_count
                 )
             )
         elif self.name is BooleanFunction.Name.IsFirstDistinct:
@@ -233,48 +221,32 @@ class BooleanFunction(Expr):
             return self._distinct(
                 column,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
-                source_value=plc.interop.from_arrow(
-                    pa.scalar(value=True, type=plc.interop.to_arrow(self.dtype))
-                ),
-                target_value=plc.interop.from_arrow(
-                    pa.scalar(value=False, type=plc.interop.to_arrow(self.dtype))
-                ),
+                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype),
+                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype),
             )
         elif self.name is BooleanFunction.Name.IsLastDistinct:
             (column,) = columns
             return self._distinct(
                 column,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_LAST,
-                source_value=plc.interop.from_arrow(
-                    pa.scalar(value=True, type=plc.interop.to_arrow(self.dtype))
-                ),
-                target_value=plc.interop.from_arrow(
-                    pa.scalar(value=False, type=plc.interop.to_arrow(self.dtype))
-                ),
+                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype),
+                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype),
             )
         elif self.name is BooleanFunction.Name.IsUnique:
             (column,) = columns
             return self._distinct(
                 column,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_NONE,
-                source_value=plc.interop.from_arrow(
-                    pa.scalar(value=True, type=plc.interop.to_arrow(self.dtype))
-                ),
-                target_value=plc.interop.from_arrow(
-                    pa.scalar(value=False, type=plc.interop.to_arrow(self.dtype))
-                ),
+                source_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype),
+                target_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype),
             )
         elif self.name is BooleanFunction.Name.IsDuplicated:
             (column,) = columns
             return self._distinct(
                 column,
                 keep=plc.stream_compaction.DuplicateKeepOption.KEEP_NONE,
-                source_value=plc.interop.from_arrow(
-                    pa.scalar(value=False, type=plc.interop.to_arrow(self.dtype))
-                ),
-                target_value=plc.interop.from_arrow(
-                    pa.scalar(value=True, type=plc.interop.to_arrow(self.dtype))
-                ),
+                source_value=plc.Scalar.from_py(py_val=False, dtype=self.dtype),
+                target_value=plc.Scalar.from_py(py_val=True, dtype=self.dtype),
             )
         elif self.name is BooleanFunction.Name.AllHorizontal:
             return Column(

cudf-polars-cu12 25.2.2__py3-none-any.whl → 25.6.0__py3-none-any.whl

cudf-polars-cu12 25.2.2py3-none-any.whl → 25.6.0py3-none-any.whl