PyPI - cudf-polars-cu13 - Versions diffs - 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl - Mend

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

cudf_polars/GIT_COMMIT +1 -1
cudf_polars/VERSION +1 -1
cudf_polars/callback.py +60 -15
cudf_polars/containers/column.py +137 -77
cudf_polars/containers/dataframe.py +123 -34
cudf_polars/containers/datatype.py +134 -13
cudf_polars/dsl/expr.py +0 -2
cudf_polars/dsl/expressions/aggregation.py +80 -28
cudf_polars/dsl/expressions/binaryop.py +34 -14
cudf_polars/dsl/expressions/boolean.py +110 -37
cudf_polars/dsl/expressions/datetime.py +59 -30
cudf_polars/dsl/expressions/literal.py +11 -5
cudf_polars/dsl/expressions/rolling.py +460 -119
cudf_polars/dsl/expressions/selection.py +9 -8
cudf_polars/dsl/expressions/slicing.py +1 -1
cudf_polars/dsl/expressions/string.py +256 -114
cudf_polars/dsl/expressions/struct.py +19 -7
cudf_polars/dsl/expressions/ternary.py +33 -3
cudf_polars/dsl/expressions/unary.py +126 -64
cudf_polars/dsl/ir.py +1053 -350
cudf_polars/dsl/to_ast.py +30 -13
cudf_polars/dsl/tracing.py +194 -0
cudf_polars/dsl/translate.py +307 -107
cudf_polars/dsl/utils/aggregations.py +43 -30
cudf_polars/dsl/utils/reshape.py +14 -2
cudf_polars/dsl/utils/rolling.py +12 -8
cudf_polars/dsl/utils/windows.py +35 -20
cudf_polars/experimental/base.py +55 -2
cudf_polars/experimental/benchmarks/pdsds.py +12 -126
cudf_polars/experimental/benchmarks/pdsh.py +792 -2
cudf_polars/experimental/benchmarks/utils.py +596 -39
cudf_polars/experimental/dask_registers.py +47 -20
cudf_polars/experimental/dispatch.py +9 -3
cudf_polars/experimental/distinct.py +2 -0
cudf_polars/experimental/explain.py +15 -2
cudf_polars/experimental/expressions.py +30 -15
cudf_polars/experimental/groupby.py +25 -4
cudf_polars/experimental/io.py +156 -124
cudf_polars/experimental/join.py +53 -23
cudf_polars/experimental/parallel.py +68 -19
cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
cudf_polars/experimental/rapidsmpf/core.py +488 -0
cudf_polars/experimental/rapidsmpf/dask.py +172 -0
cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
cudf_polars/experimental/rapidsmpf/io.py +696 -0
cudf_polars/experimental/rapidsmpf/join.py +322 -0
cudf_polars/experimental/rapidsmpf/lower.py +74 -0
cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
cudf_polars/experimental/rapidsmpf/union.py +115 -0
cudf_polars/experimental/rapidsmpf/utils.py +374 -0
cudf_polars/experimental/repartition.py +9 -2
cudf_polars/experimental/select.py +177 -14
cudf_polars/experimental/shuffle.py +46 -12
cudf_polars/experimental/sort.py +100 -26
cudf_polars/experimental/spilling.py +1 -1
cudf_polars/experimental/statistics.py +24 -5
cudf_polars/experimental/utils.py +25 -7
cudf_polars/testing/asserts.py +13 -8
cudf_polars/testing/io.py +2 -1
cudf_polars/testing/plugin.py +93 -17
cudf_polars/typing/__init__.py +86 -32
cudf_polars/utils/config.py +473 -58
cudf_polars/utils/cuda_stream.py +70 -0
cudf_polars/utils/versions.py +5 -4
cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0

cudf_polars/dsl/expressions/string.py CHANGED Viewed

@@ -10,10 +10,10 @@ import functools
 import re
 from datetime import datetime
 from enum import IntEnum, auto
-from typing import TYPE_CHECKING, Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar, cast
+from polars import Struct as pl_Struct, polars  # type: ignore[attr-defined]
 from polars.exceptions import InvalidOperationError
-from polars.polars import dtype_str_repr
 import pylibcudf as plc
@@ -26,8 +26,6 @@ from cudf_polars.utils.versions import POLARS_VERSION_LT_132
 if TYPE_CHECKING:
     from typing_extensions import Self
-    from polars.polars import _expr_nodes as pl_expr
     from cudf_polars.containers import DataFrame, DataType
 __all__ = ["StringFunction"]
@@ -37,10 +35,15 @@ JsonDecodeType = list[tuple[str, plc.DataType, "JsonDecodeType"]]
 def _dtypes_for_json_decode(dtype: DataType) -> JsonDecodeType:
     """Get the dtypes for json decode."""
+    # Type checker doesn't narrow polars_type through dtype.id() check
     if dtype.id() == plc.TypeId.STRUCT:
         return [
-            (field.name, child.plc, _dtypes_for_json_decode(child))
-            for field, child in zip(dtype.polars.fields, dtype.children, strict=True)
+            (field.name, child.plc_type, _dtypes_for_json_decode(child))
+            for field, child in zip(
+                cast(pl_Struct, dtype.polars_type).fields,
+                dtype.children,
+                strict=True,
+            )
         ]
     else:
         return []
@@ -96,7 +99,7 @@ class StringFunction(Expr):
         ZFill = auto()
         @classmethod
-        def from_polars(cls, obj: pl_expr.StringFunction) -> Self:
+        def from_polars(cls, obj: polars._expr_nodes.StringFunction) -> Self:
             """Convert from polars' `StringFunction`."""
             try:
                 function, name = str(obj).split(".", maxsplit=1)
@@ -278,7 +281,7 @@ class StringFunction(Expr):
                     and width.value is not None
                     and width.value < 0
                 ):  # pragma: no cover
-                    dtypestr = dtype_str_repr(width.dtype.polars)
+                    dtypestr = polars.dtype_str_repr(width.dtype.polars_type)
                     raise InvalidOperationError(
                         f"conversion from `{dtypestr}` to `u64` "
                         f"failed in column 'literal' for 1 out of "
@@ -310,14 +313,17 @@ class StringFunction(Expr):
             columns = [
                 Column(
                     child.evaluate(df, context=context).obj, dtype=child.dtype
-                ).astype(self.dtype)
+                ).astype(self.dtype, stream=df.stream)
                 for child in self.children
             ]
+            if len(columns) == 1:
+                return columns[0]
             non_unit_sizes = [c.size for c in columns if c.size != 1]
             broadcasted = broadcast(
                 *columns,
                 target_length=max(non_unit_sizes) if non_unit_sizes else None,
+                stream=df.stream,
             )
             delimiter, ignore_nulls = self.options
@@ -325,24 +331,39 @@ class StringFunction(Expr):
             return Column(
                 plc.strings.combine.concatenate(
                     plc.Table([col.obj for col in broadcasted]),
-                    plc.Scalar.from_py(delimiter, self.dtype.plc),
-                    None if ignore_nulls else plc.Scalar.from_py(None, self.dtype.plc),
+                    plc.Scalar.from_py(
+                        delimiter, self.dtype.plc_type, stream=df.stream
+                    ),
+                    None
+                    if ignore_nulls
+                    else plc.Scalar.from_py(
+                        None, self.dtype.plc_type, stream=df.stream
+                    ),
                     None,
                     plc.strings.combine.SeparatorOnNulls.NO,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.ConcatVertical:
             (child,) = self.children
-            column = child.evaluate(df, context=context).astype(self.dtype)
+            column = child.evaluate(df, context=context).astype(
+                self.dtype, stream=df.stream
+            )
             delimiter, ignore_nulls = self.options
             if column.null_count > 0 and not ignore_nulls:
-                return Column(plc.Column.all_null_like(column.obj, 1), dtype=self.dtype)
+                return Column(
+                    plc.Column.all_null_like(column.obj, 1, stream=df.stream),
+                    dtype=self.dtype,
+                )
             return Column(
                 plc.strings.combine.join_strings(
                     column.obj,
-                    plc.Scalar.from_py(delimiter, self.dtype.plc),
-                    plc.Scalar.from_py(None, self.dtype.plc),
+                    plc.Scalar.from_py(
+                        delimiter, self.dtype.plc_type, stream=df.stream
+                    ),
+                    plc.Scalar.from_py(None, self.dtype.plc_type, stream=df.stream),
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
@@ -351,19 +372,25 @@ class StringFunction(Expr):
             # polars pads based on bytes, libcudf by visual width
             # only pass chars if the visual width matches the byte length
             column = self.children[0].evaluate(df, context=context)
-            col_len_bytes = plc.strings.attributes.count_bytes(column.obj)
-            col_len_chars = plc.strings.attributes.count_characters(column.obj)
+            col_len_bytes = plc.strings.attributes.count_bytes(
+                column.obj, stream=df.stream
+            )
+            col_len_chars = plc.strings.attributes.count_characters(
+                column.obj, stream=df.stream
+            )
             equal = plc.binaryop.binary_operation(
                 col_len_bytes,
                 col_len_chars,
                 plc.binaryop.BinaryOperator.NULL_EQUALS,
                 plc.DataType(plc.TypeId.BOOL8),
+                stream=df.stream,
             )
             if not plc.reduce.reduce(
                 equal,
                 plc.aggregation.all(),
                 plc.DataType(plc.TypeId.BOOL8),
-            ).to_py():
+                stream=df.stream,
+            ).to_py(stream=df.stream):
                 raise InvalidOperationError(
                     "zfill only supports ascii strings with no unicode characters"
                 )
@@ -373,36 +400,45 @@ class StringFunction(Expr):
                 if width.value is None:
                     return Column(
                         plc.Column.from_scalar(
-                            plc.Scalar.from_py(None, self.dtype.plc),
+                            plc.Scalar.from_py(
+                                None, self.dtype.plc_type, stream=df.stream
+                            ),
                             column.size,
+                            stream=df.stream,
                         ),
                         self.dtype,
                     )
                 return Column(
-                    plc.strings.padding.zfill(column.obj, width.value), self.dtype
+                    plc.strings.padding.zfill(
+                        column.obj, width.value, stream=df.stream
+                    ),
+                    self.dtype,
                 )
             else:
                 col_width = self.children[1].evaluate(df, context=context)
                 assert isinstance(col_width, Column)
                 all_gt_0 = plc.binaryop.binary_operation(
                     col_width.obj,
-                    plc.Scalar.from_py(0, plc.DataType(plc.TypeId.INT64)),
+                    plc.Scalar.from_py(
+                        0, plc.DataType(plc.TypeId.INT64), stream=df.stream
+                    ),
                     plc.binaryop.BinaryOperator.GREATER_EQUAL,
                     plc.DataType(plc.TypeId.BOOL8),
+                    stream=df.stream,
                 )
-                if (
-                    POLARS_VERSION_LT_132
-                    and not plc.reduce.reduce(
-                        all_gt_0,
-                        plc.aggregation.all(),
-                        plc.DataType(plc.TypeId.BOOL8),
-                    ).to_py()
-                ):  # pragma: no cover
+                if POLARS_VERSION_LT_132 and not plc.reduce.reduce(
+                    all_gt_0,
+                    plc.aggregation.all(),
+                    plc.DataType(plc.TypeId.BOOL8),
+                    stream=df.stream,
+                ).to_py(stream=df.stream):  # pragma: no cover
                     raise InvalidOperationError("fill conversion failed.")
                 return Column(
-                    plc.strings.padding.zfill_by_widths(column.obj, col_width.obj),
+                    plc.strings.padding.zfill_by_widths(
+                        column.obj, col_width.obj, stream=df.stream
+                    ),
                     self.dtype,
                 )
@@ -414,34 +450,39 @@ class StringFunction(Expr):
             if literal:
                 pat = arg.evaluate(df, context=context)
                 pattern = (
-                    pat.obj_scalar
+                    pat.obj_scalar(stream=df.stream)
                     if pat.is_scalar and pat.size != column.size
                     else pat.obj
                 )
                 return Column(
-                    plc.strings.find.contains(column.obj, pattern), dtype=self.dtype
+                    plc.strings.find.contains(column.obj, pattern, stream=df.stream),
+                    dtype=self.dtype,
                 )
             else:
                 return Column(
-                    plc.strings.contains.contains_re(column.obj, self._regex_program),
+                    plc.strings.contains.contains_re(
+                        column.obj, self._regex_program, stream=df.stream
+                    ),
                     dtype=self.dtype,
                 )
         elif self.name is StringFunction.Name.ContainsAny:
             (ascii_case_insensitive,) = self.options
             child, arg = self.children
-            column = child.evaluate(df, context=context).obj
-            targets = arg.evaluate(df, context=context).obj
+            plc_column = child.evaluate(df, context=context).obj
+            plc_targets = arg.evaluate(df, context=context).obj
             if ascii_case_insensitive:
-                column = plc.strings.case.to_lower(column)
-                targets = plc.strings.case.to_lower(targets)
+                plc_column = plc.strings.case.to_lower(plc_column, stream=df.stream)
+                plc_targets = plc.strings.case.to_lower(plc_targets, stream=df.stream)
             contains = plc.strings.find_multiple.contains_multiple(
-                column,
-                targets,
+                plc_column,
+                plc_targets,
+                stream=df.stream,
             )
             binary_or = functools.partial(
                 plc.binaryop.binary_operation,
                 op=plc.binaryop.BinaryOperator.BITWISE_OR,
-                output_type=self.dtype.plc,
+                output_type=self.dtype.plc_type,
+                stream=df.stream,
             )
             return Column(
                 functools.reduce(binary_or, contains.columns()),
@@ -449,28 +490,30 @@ class StringFunction(Expr):
             )
         elif self.name is StringFunction.Name.CountMatches:
             (child, _) = self.children
-            column = child.evaluate(df, context=context).obj
+            plc_column = child.evaluate(df, context=context).obj
             return Column(
                 plc.unary.cast(
-                    plc.strings.contains.count_re(column, self._regex_program),
-                    self.dtype.plc,
+                    plc.strings.contains.count_re(
+                        plc_column, self._regex_program, stream=df.stream
+                    ),
+                    self.dtype.plc_type,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.Extract:
             (group_index,) = self.options
-            column = self.children[0].evaluate(df, context=context).obj
+            plc_column = self.children[0].evaluate(df, context=context).obj
             return Column(
                 plc.strings.extract.extract_single(
-                    column, self._regex_program, group_index - 1
+                    plc_column, self._regex_program, group_index - 1, stream=df.stream
                 ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.ExtractGroups:
-            column = self.children[0].evaluate(df, context=context).obj
+            plc_column = self.children[0].evaluate(df, context=context).obj
             plc_table = plc.strings.extract.extract(
-                column,
-                self._regex_program,
+                plc_column, self._regex_program, stream=df.stream
             )
             return Column(
                 plc.Column.struct_from_children(plc_table.columns()),
@@ -479,38 +522,45 @@ class StringFunction(Expr):
         elif self.name is StringFunction.Name.Find:
             literal, _ = self.options
             (child, expr) = self.children
-            column = child.evaluate(df, context=context).obj
+            plc_column = child.evaluate(df, context=context).obj
             if literal:
                 assert isinstance(expr, Literal)
                 plc_column = plc.strings.find.find(
-                    column,
-                    plc.Scalar.from_py(expr.value, expr.dtype.plc),
+                    plc_column,
+                    plc.Scalar.from_py(
+                        expr.value, expr.dtype.plc_type, stream=df.stream
+                    ),
+                    stream=df.stream,
                 )
             else:
                 plc_column = plc.strings.findall.find_re(
-                    column,
-                    self._regex_program,
+                    plc_column, self._regex_program, stream=df.stream
                 )
             # Polars returns None for not found, libcudf returns -1
             new_mask, null_count = plc.transform.bools_to_mask(
                 plc.binaryop.binary_operation(
                     plc_column,
-                    plc.Scalar.from_py(-1, plc_column.type()),
+                    plc.Scalar.from_py(-1, plc_column.type(), stream=df.stream),
                     plc.binaryop.BinaryOperator.NOT_EQUAL,
                     plc.DataType(plc.TypeId.BOOL8),
-                )
+                    stream=df.stream,
+                ),
+                stream=df.stream,
             )
             plc_column = plc.unary.cast(
-                plc_column.with_mask(new_mask, null_count), self.dtype.plc
+                plc_column.with_mask(new_mask, null_count),
+                self.dtype.plc_type,
+                stream=df.stream,
             )
             return Column(plc_column, dtype=self.dtype)
         elif self.name is StringFunction.Name.JsonDecode:
             plc_column = self.children[0].evaluate(df, context=context).obj
             plc_table_with_metadata = plc.io.json.read_json_from_string_column(
                 plc_column,
-                plc.Scalar.from_py("\n"),
-                plc.Scalar.from_py("NULL"),
+                plc.Scalar.from_py("\n", stream=df.stream),
+                plc.Scalar.from_py("NULL", stream=df.stream),
                 _dtypes_for_json_decode(self.dtype),
+                stream=df.stream,
             )
             return Column(
                 plc.Column.struct_from_children(plc_table_with_metadata.columns),
@@ -518,26 +568,34 @@ class StringFunction(Expr):
             )
         elif self.name is StringFunction.Name.JsonPathMatch:
             (child, expr) = self.children
-            column = child.evaluate(df, context=context).obj
+            plc_column = child.evaluate(df, context=context).obj
             assert isinstance(expr, Literal)
-            json_path = plc.Scalar.from_py(expr.value, expr.dtype.plc)
+            json_path = plc.Scalar.from_py(
+                expr.value, expr.dtype.plc_type, stream=df.stream
+            )
             return Column(
-                plc.json.get_json_object(column, json_path),
+                plc.json.get_json_object(plc_column, json_path, stream=df.stream),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.LenBytes:
-            column = self.children[0].evaluate(df, context=context).obj
+            plc_column = self.children[0].evaluate(df, context=context).obj
             return Column(
                 plc.unary.cast(
-                    plc.strings.attributes.count_bytes(column), self.dtype.plc
+                    plc.strings.attributes.count_bytes(plc_column, stream=df.stream),
+                    self.dtype.plc_type,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.LenChars:
-            column = self.children[0].evaluate(df, context=context).obj
+            plc_column = self.children[0].evaluate(df, context=context).obj
             return Column(
                 plc.unary.cast(
-                    plc.strings.attributes.count_characters(column), self.dtype.plc
+                    plc.strings.attributes.count_characters(
+                        plc_column, stream=df.stream
+                    ),
+                    self.dtype.plc_type,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
@@ -567,8 +625,13 @@ class StringFunction(Expr):
             return Column(
                 plc.strings.slice.slice_strings(
                     column.obj,
-                    plc.Scalar.from_py(start, plc.DataType(plc.TypeId.INT32)),
-                    plc.Scalar.from_py(stop, plc.DataType(plc.TypeId.INT32)),
+                    plc.Scalar.from_py(
+                        start, plc.DataType(plc.TypeId.INT32), stream=df.stream
+                    ),
+                    plc.Scalar.from_py(
+                        stop, plc.DataType(plc.TypeId.INT32), stream=df.stream
+                    ),
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
@@ -582,7 +645,7 @@ class StringFunction(Expr):
             column = child.evaluate(df, context=context)
             if n == 1 and self.name is StringFunction.Name.SplitN:
                 plc_column = plc.Column(
-                    self.dtype.plc,
+                    self.dtype.plc_type,
                     column.obj.size(),
                     None,
                     None,
@@ -592,7 +655,9 @@ class StringFunction(Expr):
                 )
             else:
                 assert isinstance(expr, Literal)
-                by = plc.Scalar.from_py(expr.value, expr.dtype.plc)
+                by = plc.Scalar.from_py(
+                    expr.value, expr.dtype.plc_type, stream=df.stream
+                )
                 # See https://github.com/pola-rs/polars/issues/11640
                 # for SplitN vs SplitExact edge case behaviors
                 max_splits = n if is_split_n else 0
@@ -600,13 +665,16 @@ class StringFunction(Expr):
                     column.obj,
                     by,
                     max_splits - 1,
+                    stream=df.stream,
                 )
                 children = plc_table.columns()
                 ref_column = children[0]
                 if (remainder := n - len(children)) > 0:
                     # Reach expected number of splits by padding with nulls
                     children.extend(
-                        plc.Column.all_null_like(ref_column, ref_column.size())
+                        plc.Column.all_null_like(
+                            ref_column, ref_column.size(), stream=df.stream
+                        )
                         for _ in range(remainder + int(not is_split_n))
                     )
                 if not is_split_n:
@@ -614,7 +682,7 @@ class StringFunction(Expr):
                 # TODO: Use plc.Column.struct_from_children once it is generalized
                 # to handle columns that don't share the same null_mask/null_count
                 plc_column = plc.Column(
-                    self.dtype.plc,
+                    self.dtype.plc_type,
                     ref_column.size(),
                     None,
                     None,
@@ -628,9 +696,11 @@ class StringFunction(Expr):
             StringFunction.Name.StripSuffix,
         }:
             child, expr = self.children
-            column = child.evaluate(df, context=context).obj
+            plc_column = child.evaluate(df, context=context).obj
             assert isinstance(expr, Literal)
-            target = plc.Scalar.from_py(expr.value, expr.dtype.plc)
+            target = plc.Scalar.from_py(
+                expr.value, expr.dtype.plc_type, stream=df.stream
+            )
             if self.name == StringFunction.Name.StripPrefix:
                 find = plc.strings.find.starts_with
                 start = len(expr.value)
@@ -640,17 +710,23 @@ class StringFunction(Expr):
                 start = 0
                 end = -len(expr.value)
-            mask = find(column, target)
+            mask = find(plc_column, target, stream=df.stream)
             sliced = plc.strings.slice.slice_strings(
-                column,
-                plc.Scalar.from_py(start, plc.DataType(plc.TypeId.INT32)),
-                plc.Scalar.from_py(end, plc.DataType(plc.TypeId.INT32)),
+                plc_column,
+                plc.Scalar.from_py(
+                    start, plc.DataType(plc.TypeId.INT32), stream=df.stream
+                ),
+                plc.Scalar.from_py(
+                    end, plc.DataType(plc.TypeId.INT32), stream=df.stream
+                ),
+                stream=df.stream,
             )
             return Column(
                 plc.copying.copy_if_else(
                     sliced,
-                    column,
+                    plc_column,
                     mask,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
@@ -667,7 +743,12 @@ class StringFunction(Expr):
             else:
                 side = plc.strings.SideType.BOTH
             return Column(
-                plc.strings.strip.strip(column.obj, side, chars.obj_scalar),
+                plc.strings.strip.strip(
+                    column.obj,
+                    side,
+                    chars.obj_scalar(stream=df.stream),
+                    stream=df.stream,
+                ),
                 dtype=self.dtype,
             )
@@ -678,15 +759,17 @@ class StringFunction(Expr):
             if self.children[1].value is None:
                 return Column(
                     plc.Column.from_scalar(
-                        plc.Scalar.from_py(None, self.dtype.plc),
+                        plc.Scalar.from_py(None, self.dtype.plc_type, stream=df.stream),
                         column.size,
+                        stream=df.stream,
                     ),
                     self.dtype,
                 )
             elif self.children[1].value == 0:
                 result = plc.Column.from_scalar(
-                    plc.Scalar.from_py("", self.dtype.plc),
+                    plc.Scalar.from_py("", self.dtype.plc_type, stream=df.stream),
                     column.size,
+                    stream=df.stream,
                 )
                 if column.obj.null_mask():
                     result = result.with_mask(
@@ -700,9 +783,14 @@ class StringFunction(Expr):
                 return Column(
                     plc.strings.slice.slice_strings(
                         column.obj,
-                        plc.Scalar.from_py(start, plc.DataType(plc.TypeId.INT32)),
-                        plc.Scalar.from_py(end, plc.DataType(plc.TypeId.INT32)),
+                        plc.Scalar.from_py(
+                            start, plc.DataType(plc.TypeId.INT32), stream=df.stream
+                        ),
+                        plc.Scalar.from_py(
+                            end, plc.DataType(plc.TypeId.INT32), stream=df.stream
+                        ),
                         None,
+                        stream=df.stream,
                     ),
                     self.dtype,
                 )
@@ -715,16 +803,22 @@ class StringFunction(Expr):
             if end is None:
                 return Column(
                     plc.Column.from_scalar(
-                        plc.Scalar.from_py(None, self.dtype.plc),
+                        plc.Scalar.from_py(None, self.dtype.plc_type, stream=df.stream),
                         column.size,
+                        stream=df.stream,
                     ),
                     self.dtype,
                 )
             return Column(
                 plc.strings.slice.slice_strings(
                     column.obj,
-                    plc.Scalar.from_py(0, plc.DataType(plc.TypeId.INT32)),
-                    plc.Scalar.from_py(end, plc.DataType(plc.TypeId.INT32)),
+                    plc.Scalar.from_py(
+                        0, plc.DataType(plc.TypeId.INT32), stream=df.stream
+                    ),
+                    plc.Scalar.from_py(
+                        end, plc.DataType(plc.TypeId.INT32), stream=df.stream
+                    ),
+                    stream=df.stream,
                 ),
                 self.dtype,
             )
@@ -732,18 +826,25 @@ class StringFunction(Expr):
         columns = [child.evaluate(df, context=context) for child in self.children]
         if self.name is StringFunction.Name.Lowercase:
             (column,) = columns
-            return Column(plc.strings.case.to_lower(column.obj), dtype=self.dtype)
+            return Column(
+                plc.strings.case.to_lower(column.obj, stream=df.stream),
+                dtype=self.dtype,
+            )
         elif self.name is StringFunction.Name.Uppercase:
             (column,) = columns
-            return Column(plc.strings.case.to_upper(column.obj), dtype=self.dtype)
+            return Column(
+                plc.strings.case.to_upper(column.obj, stream=df.stream),
+                dtype=self.dtype,
+            )
         elif self.name is StringFunction.Name.EndsWith:
             column, suffix = columns
             return Column(
                 plc.strings.find.ends_with(
                     column.obj,
-                    suffix.obj_scalar
+                    suffix.obj_scalar(stream=df.stream)
                     if column.size != suffix.size and suffix.is_scalar
                     else suffix.obj,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
@@ -752,9 +853,10 @@ class StringFunction(Expr):
             return Column(
                 plc.strings.find.starts_with(
                     column.obj,
-                    prefix.obj_scalar
+                    prefix.obj_scalar(stream=df.stream)
                     if column.size != prefix.size and prefix.is_scalar
                     else prefix.obj,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
@@ -766,107 +868,147 @@ class StringFunction(Expr):
             if plc_col.null_count() == plc_col.size():
                 return Column(
                     plc.Column.from_scalar(
-                        plc.Scalar.from_py(None, self.dtype.plc),
+                        plc.Scalar.from_py(None, self.dtype.plc_type, stream=df.stream),
                         plc_col.size(),
+                        stream=df.stream,
                     ),
                     self.dtype,
                 )
             if format is None:
                 # Polars begins inference with the first non null value
                 if plc_col.null_mask() is not None:
-                    boolmask = plc.unary.is_valid(plc_col)
+                    boolmask = plc.unary.is_valid(plc_col, stream=df.stream)
                     table = plc.stream_compaction.apply_boolean_mask(
-                        plc.Table([plc_col]), boolmask
+                        plc.Table([plc_col]), boolmask, stream=df.stream
                     )
                     filtered = table.columns()[0]
-                    first_valid_data = plc.copying.get_element(filtered, 0).to_py()
+                    first_valid_data = plc.copying.get_element(
+                        filtered, 0, stream=df.stream
+                    ).to_py(stream=df.stream)
                 else:
-                    first_valid_data = plc.copying.get_element(plc_col, 0).to_py()
+                    first_valid_data = plc.copying.get_element(
+                        plc_col, 0, stream=df.stream
+                    ).to_py(stream=df.stream)
-                format = _infer_datetime_format(first_valid_data)
+                # See https://github.com/rapidsai/cudf/issues/20202 for we type ignore
+                format = _infer_datetime_format(first_valid_data)  # type: ignore[arg-type]
                 if not format:
                     raise InvalidOperationError(
                         "Unable to infer datetime format from data"
                     )
             is_timestamps = plc.strings.convert.convert_datetime.is_timestamp(
-                plc_col, format
+                plc_col, format, stream=df.stream
             )
             if strict:
                 if not plc.reduce.reduce(
                     is_timestamps,
                     plc.aggregation.all(),
                     plc.DataType(plc.TypeId.BOOL8),
-                ).to_py():
+                    stream=df.stream,
+                ).to_py(stream=df.stream):
                     raise InvalidOperationError("conversion from `str` failed.")
             else:
                 not_timestamps = plc.unary.unary_operation(
-                    is_timestamps, plc.unary.UnaryOperator.NOT
+                    is_timestamps, plc.unary.UnaryOperator.NOT, stream=df.stream
                 )
-                null = plc.Scalar.from_py(None, plc_col.type())
+                null = plc.Scalar.from_py(None, plc_col.type(), stream=df.stream)
                 plc_col = plc.copying.boolean_mask_scatter(
-                    [null], plc.Table([plc_col]), not_timestamps
+                    [null], plc.Table([plc_col]), not_timestamps, stream=df.stream
                 ).columns()[0]
             return Column(
                 plc.strings.convert.convert_datetime.to_timestamps(
-                    plc_col, self.dtype.plc, format
+                    plc_col, self.dtype.plc_type, format, stream=df.stream
                 ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.Replace:
-            column, target, repl = columns
+            col_column, col_target, col_repl = columns
             n, _ = self.options
             return Column(
                 plc.strings.replace.replace(
-                    column.obj, target.obj_scalar, repl.obj_scalar, maxrepl=n
+                    col_column.obj,
+                    col_target.obj_scalar(stream=df.stream),
+                    col_repl.obj_scalar(stream=df.stream),
+                    maxrepl=n,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.ReplaceMany:
-            column, target, repl = columns
+            col_column, col_target, col_repl = columns
             return Column(
-                plc.strings.replace.replace_multiple(column.obj, target.obj, repl.obj),
+                plc.strings.replace.replace_multiple(
+                    col_column.obj, col_target.obj, col_repl.obj, stream=df.stream
+                ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.PadStart:
             if POLARS_VERSION_LT_132:  # pragma: no cover
                 (column,) = columns
-                width, char = self.options
+                width_arg, char = self.options
+                pad_width = cast(int, width_arg)
             else:
                 (column, width_col) = columns
                 (char,) = self.options
                 # TODO: Maybe accept a string scalar in
                 # cudf::strings::pad to avoid DtoH transfer
-                width = width_col.obj.to_scalar().to_py()
+                # See https://github.com/rapidsai/cudf/issues/20202
+                width_py = width_col.obj.to_scalar(stream=df.stream).to_py(
+                    stream=df.stream
+                )
+                assert width_py is not None
+                pad_width = int(width_py)
             return Column(
                 plc.strings.padding.pad(
-                    column.obj, width, plc.strings.SideType.LEFT, char
+                    column.obj,
+                    pad_width,
+                    plc.strings.SideType.LEFT,
+                    char,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.PadEnd:
             if POLARS_VERSION_LT_132:  # pragma: no cover
                 (column,) = columns
-                width, char = self.options
+                width_arg, char = self.options
+                pad_width = cast(int, width_arg)
             else:
                 (column, width_col) = columns
                 (char,) = self.options
                 # TODO: Maybe accept a string scalar in
                 # cudf::strings::pad to avoid DtoH transfer
-                width = width_col.obj.to_scalar().to_py()
+                width_py = width_col.obj.to_scalar(stream=df.stream).to_py(
+                    stream=df.stream
+                )
+                assert width_py is not None
+                pad_width = int(width_py)
             return Column(
                 plc.strings.padding.pad(
-                    column.obj, width, plc.strings.SideType.RIGHT, char
+                    column.obj,
+                    pad_width,
+                    plc.strings.SideType.RIGHT,
+                    char,
+                    stream=df.stream,
                 ),
                 dtype=self.dtype,
             )
         elif self.name is StringFunction.Name.Reverse:
             (column,) = columns
-            return Column(plc.strings.reverse.reverse(column.obj), dtype=self.dtype)
+            return Column(
+                plc.strings.reverse.reverse(column.obj, stream=df.stream),
+                dtype=self.dtype,
+            )
         elif self.name is StringFunction.Name.Titlecase:
             (column,) = columns
-            return Column(plc.strings.capitalize.title(column.obj), dtype=self.dtype)
+            return Column(
+                plc.strings.capitalize.title(column.obj, stream=df.stream),
+                dtype=self.dtype,
+            )
         raise NotImplementedError(
             f"StringFunction {self.name}"
         )  # pragma: no cover; handled by init raising

cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl