PyPI - cudf-polars-cu13 - Versions diffs - 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl - Mend

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

cudf_polars/GIT_COMMIT +1 -1
cudf_polars/VERSION +1 -1
cudf_polars/callback.py +60 -15
cudf_polars/containers/column.py +137 -77
cudf_polars/containers/dataframe.py +123 -34
cudf_polars/containers/datatype.py +134 -13
cudf_polars/dsl/expr.py +0 -2
cudf_polars/dsl/expressions/aggregation.py +80 -28
cudf_polars/dsl/expressions/binaryop.py +34 -14
cudf_polars/dsl/expressions/boolean.py +110 -37
cudf_polars/dsl/expressions/datetime.py +59 -30
cudf_polars/dsl/expressions/literal.py +11 -5
cudf_polars/dsl/expressions/rolling.py +460 -119
cudf_polars/dsl/expressions/selection.py +9 -8
cudf_polars/dsl/expressions/slicing.py +1 -1
cudf_polars/dsl/expressions/string.py +256 -114
cudf_polars/dsl/expressions/struct.py +19 -7
cudf_polars/dsl/expressions/ternary.py +33 -3
cudf_polars/dsl/expressions/unary.py +126 -64
cudf_polars/dsl/ir.py +1053 -350
cudf_polars/dsl/to_ast.py +30 -13
cudf_polars/dsl/tracing.py +194 -0
cudf_polars/dsl/translate.py +307 -107
cudf_polars/dsl/utils/aggregations.py +43 -30
cudf_polars/dsl/utils/reshape.py +14 -2
cudf_polars/dsl/utils/rolling.py +12 -8
cudf_polars/dsl/utils/windows.py +35 -20
cudf_polars/experimental/base.py +55 -2
cudf_polars/experimental/benchmarks/pdsds.py +12 -126
cudf_polars/experimental/benchmarks/pdsh.py +792 -2
cudf_polars/experimental/benchmarks/utils.py +596 -39
cudf_polars/experimental/dask_registers.py +47 -20
cudf_polars/experimental/dispatch.py +9 -3
cudf_polars/experimental/distinct.py +2 -0
cudf_polars/experimental/explain.py +15 -2
cudf_polars/experimental/expressions.py +30 -15
cudf_polars/experimental/groupby.py +25 -4
cudf_polars/experimental/io.py +156 -124
cudf_polars/experimental/join.py +53 -23
cudf_polars/experimental/parallel.py +68 -19
cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
cudf_polars/experimental/rapidsmpf/core.py +488 -0
cudf_polars/experimental/rapidsmpf/dask.py +172 -0
cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
cudf_polars/experimental/rapidsmpf/io.py +696 -0
cudf_polars/experimental/rapidsmpf/join.py +322 -0
cudf_polars/experimental/rapidsmpf/lower.py +74 -0
cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
cudf_polars/experimental/rapidsmpf/union.py +115 -0
cudf_polars/experimental/rapidsmpf/utils.py +374 -0
cudf_polars/experimental/repartition.py +9 -2
cudf_polars/experimental/select.py +177 -14
cudf_polars/experimental/shuffle.py +46 -12
cudf_polars/experimental/sort.py +100 -26
cudf_polars/experimental/spilling.py +1 -1
cudf_polars/experimental/statistics.py +24 -5
cudf_polars/experimental/utils.py +25 -7
cudf_polars/testing/asserts.py +13 -8
cudf_polars/testing/io.py +2 -1
cudf_polars/testing/plugin.py +93 -17
cudf_polars/typing/__init__.py +86 -32
cudf_polars/utils/config.py +473 -58
cudf_polars/utils/cuda_stream.py +70 -0
cudf_polars/utils/versions.py +5 -4
cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0

cudf_polars/experimental/dask_registers.py CHANGED Viewed

@@ -18,6 +18,7 @@ import rmm
 from cudf_polars.containers import Column, DataFrame, DataType
 from cudf_polars.dsl.expressions.base import NamedExpr
+from cudf_polars.utils.cuda_stream import get_dask_cuda_stream
 if TYPE_CHECKING:
     from collections.abc import Hashable, Mapping
@@ -33,7 +34,7 @@ if TYPE_CHECKING:
 __all__ = ["DaskRegisterManager", "register"]
-class DaskRegisterManager:  # pragma: no cover; Only used with Distributed scheduler
+class DaskRegisterManager:  # pragma: no cover; Only used with Distributed cluster
     """Manager to ensure ensure serializer is only registered once."""
     _registered: bool = False
@@ -73,41 +74,57 @@ def register() -> None:
     @cuda_serialize.register((Column, DataFrame))
     def serialize_column_or_frame(
         x: DataFrame | Column,
-    ) -> tuple[DataFrameHeader | ColumnHeader, list[memoryview]]:
+    ) -> tuple[
+        DataFrameHeader | ColumnHeader, list[memoryview[bytes] | plc.gpumemoryview]
+    ]:
         with log_errors():
-            header, frames = x.serialize()
-            return header, list(frames)  # Dask expect a list of frames
+            header, frames = x.serialize(stream=get_dask_cuda_stream())
+            # Dask expect a list of frames
+            return header, list(frames)
     @cuda_deserialize.register(DataFrame)
     def _(
-        header: DataFrameHeader, frames: tuple[memoryview, plc.gpumemoryview]
+        header: DataFrameHeader, frames: tuple[memoryview[bytes], plc.gpumemoryview]
     ) -> DataFrame:
         with log_errors():
             metadata, gpudata = frames  # TODO: check if this is a length-2 list...
-            return DataFrame.deserialize(header, (metadata, plc.gpumemoryview(gpudata)))
+            return DataFrame.deserialize(
+                header,
+                (metadata, plc.gpumemoryview(gpudata)),
+                stream=get_dask_cuda_stream(),
+            )
     @cuda_deserialize.register(Column)
-    def _(header: ColumnHeader, frames: tuple[memoryview, plc.gpumemoryview]) -> Column:
+    def _(
+        header: ColumnHeader, frames: tuple[memoryview[bytes], plc.gpumemoryview]
+    ) -> Column:
         with log_errors():
             metadata, gpudata = frames
-            return Column.deserialize(header, (metadata, plc.gpumemoryview(gpudata)))
+            return Column.deserialize(
+                header,
+                (metadata, plc.gpumemoryview(gpudata)),
+                stream=get_dask_cuda_stream(),
+            )
     @overload
     def dask_serialize_column_or_frame(
         x: DataFrame,
-    ) -> tuple[DataFrameHeader, tuple[memoryview, memoryview]]: ...
+    ) -> tuple[DataFrameHeader, tuple[memoryview[bytes], memoryview[bytes]]]: ...
     @overload
     def dask_serialize_column_or_frame(
         x: Column,
-    ) -> tuple[ColumnHeader, tuple[memoryview, memoryview]]: ...
+    ) -> tuple[ColumnHeader, tuple[memoryview[bytes], memoryview[bytes]]]: ...
     @dask_serialize.register(Column)
     def dask_serialize_column_or_frame(
         x: DataFrame | Column,
-    ) -> tuple[DataFrameHeader | ColumnHeader, tuple[memoryview, memoryview]]:
+    ) -> tuple[
+        DataFrameHeader | ColumnHeader, tuple[memoryview[bytes], memoryview[bytes]]
+    ]:
+        stream = get_dask_cuda_stream()
         with log_errors():
-            header, (metadata, gpudata) = x.serialize()
+            header, (metadata, gpudata) = x.serialize(stream=stream)
             # For robustness, we check that the gpu data is contiguous
             cai = gpudata.__cuda_array_interface__
@@ -117,23 +134,26 @@ def register() -> None:
             nbytes = cai["shape"][0]
             # Copy the gpudata to host memory
-            gpudata_on_host = memoryview(
+            gpudata_on_host: memoryview[bytes] = memoryview(
                 rmm.DeviceBuffer(ptr=gpudata.ptr, size=nbytes).copy_to_host()
             )
             return header, (metadata, gpudata_on_host)
     @dask_deserialize.register(Column)
-    def _(header: ColumnHeader, frames: tuple[memoryview, memoryview]) -> Column:
+    def _(header: ColumnHeader, frames: tuple[memoryview[bytes], memoryview]) -> Column:
         with log_errors():
             assert len(frames) == 2
             # Copy the second frame (the gpudata in host memory) back to the gpu
-            frames = frames[0], plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1]))
-            return Column.deserialize(header, frames)
+            new_frames = (
+                frames[0],
+                plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1])),
+            )
+            return Column.deserialize(header, new_frames, stream=get_dask_cuda_stream())
     @dask_serialize.register(DataFrame)
     def _(
         x: DataFrame, context: Mapping[str, Any] | None = None
-    ) -> tuple[DataFrameHeader, tuple[memoryview, memoryview]]:
+    ) -> tuple[DataFrameHeader, tuple[memoryview[bytes], memoryview[bytes]]]:
         # Do regular serialization if no staging buffer is provided.
         if context is None or "staging_device_buffer" not in context:
             return dask_serialize_column_or_frame(x)
@@ -166,12 +186,19 @@ def register() -> None:
             return header, frame
     @dask_deserialize.register(DataFrame)
-    def _(header: DataFrameHeader, frames: tuple[memoryview, memoryview]) -> DataFrame:
+    def _(
+        header: DataFrameHeader, frames: tuple[memoryview[bytes], memoryview]
+    ) -> DataFrame:
         with log_errors():
             assert len(frames) == 2
             # Copy the second frame (the gpudata in host memory) back to the gpu
-            frames = frames[0], plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1]))
-            return DataFrame.deserialize(header, frames)
+            new_frames = (
+                frames[0],
+                plc.gpumemoryview(rmm.DeviceBuffer.to_device(frames[1])),
+            )
+            return DataFrame.deserialize(
+                header, new_frames, stream=get_dask_cuda_stream()
+            )
     @sizeof_dispatch.register(Column)
     def _(x: Column) -> int:

cudf_polars/experimental/dispatch.py CHANGED Viewed

@@ -13,7 +13,7 @@ if TYPE_CHECKING:
     from collections.abc import MutableMapping
     from cudf_polars.dsl import ir
-    from cudf_polars.dsl.ir import IR
+    from cudf_polars.dsl.ir import IR, IRExecutionContext
     from cudf_polars.experimental.base import (
         ColumnStats,
         PartitionInfo,
@@ -77,7 +77,9 @@ def lower_ir_node(
 @singledispatch
 def generate_ir_tasks(
-    ir: IR, partition_info: MutableMapping[IR, PartitionInfo]
+    ir: IR,
+    partition_info: MutableMapping[IR, PartitionInfo],
+    context: IRExecutionContext,
 ) -> MutableMapping[Any, Any]:
     """
     Generate a task graph for evaluation of an IR node.
@@ -88,6 +90,8 @@ def generate_ir_tasks(
         IR node to generate tasks for.
     partition_info
         Partitioning information, obtained from :func:`lower_ir_graph`.
+    context
+        Runtime context for IR node execution.
     Returns
     -------
@@ -139,7 +143,9 @@ def initialize_column_stats(
 @singledispatch
 def update_column_stats(
-    ir: IR, stats: StatsCollector, config_options: ConfigOptions
+    ir: IR,
+    stats: StatsCollector,
+    config_options: ConfigOptions,
 ) -> None:
     """
     Finalize local column statistics for an IR node.

cudf_polars/experimental/distinct.py CHANGED Viewed

@@ -97,6 +97,7 @@ def lower_distinct(
                 child.schema,
                 shuffle_keys,
                 config_options.executor.shuffle_method,
+                config_options.executor.shuffler_insertion_method,
                 child,
             )
             partition_info[child] = PartitionInfo(
@@ -150,6 +151,7 @@ def lower_distinct(
             new_node.schema,
             shuffle_keys,
             config_options.executor.shuffle_method,
+            config_options.executor.shuffler_insertion_method,
             new_node,
         )
         partition_info[new_node] = PartitionInfo(count=output_count)

cudf_polars/experimental/explain.py CHANGED Viewed

@@ -34,7 +34,10 @@ if TYPE_CHECKING:
 def explain_query(
-    q: pl.LazyFrame, engine: pl.GPUEngine, *, physical: bool = True
+    q: pl.LazyFrame,
+    engine: pl.GPUEngine,
+    *,
+    physical: bool = True,
 ) -> str:
     """
     Return a formatted string representation of the IR plan.
@@ -58,7 +61,17 @@ def explain_query(
     ir = Translator(q._ldf.visit(), engine).translate_ir()
     if physical:
-        lowered_ir, partition_info = lower_ir_graph(ir, config)
+        if (
+            config.executor.name == "streaming"
+            and config.executor.runtime == "rapidsmpf"
+        ):  # pragma: no cover; rapidsmpf runtime not tested in CI yet
+            from cudf_polars.experimental.rapidsmpf.core import (
+                lower_ir_graph as rapidsmpf_lower_ir_graph,
+            )
+            lowered_ir, partition_info, _ = rapidsmpf_lower_ir_graph(ir, config)
+        else:
+            lowered_ir, partition_info, _ = lower_ir_graph(ir, config)
         return _repr_ir_tree(lowered_ir, partition_info)
     else:
         if config.executor.name == "streaming":

cudf_polars/experimental/expressions.py CHANGED Viewed

@@ -38,15 +38,14 @@ from typing import TYPE_CHECKING, TypeAlias, TypedDict
 import pylibcudf as plc
 from cudf_polars.dsl.expressions.aggregation import Agg
-from cudf_polars.dsl.expressions.base import Col, Expr, NamedExpr
+from cudf_polars.dsl.expressions.base import Col, ExecutionContext, Expr, NamedExpr
 from cudf_polars.dsl.expressions.binaryop import BinOp
 from cudf_polars.dsl.expressions.literal import Literal
-from cudf_polars.dsl.expressions.unary import Cast, UnaryFunction
+from cudf_polars.dsl.expressions.unary import Cast, Len, UnaryFunction
 from cudf_polars.dsl.ir import IR, Distinct, Empty, HConcat, Select
 from cudf_polars.dsl.traversal import (
     CachingVisitor,
 )
-from cudf_polars.dsl.utils.naming import unique_names
 from cudf_polars.experimental.base import PartitionInfo
 from cudf_polars.experimental.repartition import Repartition
 from cudf_polars.experimental.utils import _get_unique_fractions, _leaf_column_names
@@ -237,7 +236,7 @@ def _decompose_unique(
 def _decompose_agg_node(
-    agg: Agg,
+    agg: Agg | Len,
     input_ir: IR,
     partition_info: MutableMapping[IR, PartitionInfo],
     config_options: ConfigOptions,
@@ -273,7 +272,7 @@ def _decompose_agg_node(
     """
     expr: Expr
     exprs: list[Expr]
-    if agg.name == "count":
+    if isinstance(agg, Len) or agg.name == "count":
         # Chunkwise stage
         columns, input_ir, partition_info = select(
             [agg],
@@ -286,7 +285,7 @@ def _decompose_agg_node(
         # Combined stage
         (column,) = columns
         columns, input_ir, partition_info = select(
-            [Agg(agg.dtype, "sum", None, column)],
+            [Agg(agg.dtype, "sum", None, ExecutionContext.FRAME, column)],
             input_ir,
             partition_info,
             names=names,
@@ -295,8 +294,8 @@ def _decompose_agg_node(
     elif agg.name == "mean":
         # Chunkwise stage
         exprs = [
-            Agg(agg.dtype, "sum", None, *agg.children),
-            Agg(agg.dtype, "count", None, *agg.children),
+            Agg(agg.dtype, "sum", None, ExecutionContext.FRAME, *agg.children),
+            Agg(agg.dtype, "count", None, ExecutionContext.FRAME, *agg.children),
         ]
         columns, input_ir, partition_info = select(
             exprs,
@@ -311,7 +310,10 @@ def _decompose_agg_node(
             BinOp(
                 agg.dtype,
                 plc.binaryop.BinaryOperator.DIV,
-                *(Agg(agg.dtype, "sum", None, column) for column in columns),
+                *(
+                    Agg(agg.dtype, "sum", None, ExecutionContext.FRAME, column)
+                    for column in columns
+                ),
             )
         ]
         columns, input_ir, partition_info = select(
@@ -348,6 +350,7 @@ def _decompose_agg_node(
                 input_ir.schema,
                 shuffle_on,
                 config_options.executor.shuffle_method,
+                config_options.executor.shuffler_insertion_method,
                 input_ir,
             )
             partition_info[input_ir] = PartitionInfo(
@@ -357,7 +360,7 @@ def _decompose_agg_node(
         # Chunkwise stage
         columns, input_ir, partition_info = select(
-            [Cast(agg.dtype, agg)],
+            [Cast(agg.dtype, True, agg)],  # noqa: FBT003
             input_ir,
             partition_info,
             names=names,
@@ -367,7 +370,7 @@ def _decompose_agg_node(
         # Combined stage
         (column,) = columns
         columns, input_ir, partition_info = select(
-            [Agg(agg.dtype, "sum", None, column)],
+            [Agg(agg.dtype, "sum", None, ExecutionContext.FRAME, column)],
             input_ir,
             partition_info,
             names=names,
@@ -386,7 +389,7 @@ def _decompose_agg_node(
         # Combined stage
         (column,) = columns
         columns, input_ir, partition_info = select(
-            [Agg(agg.dtype, agg.name, agg.options, column)],
+            [Agg(agg.dtype, agg.name, agg.options, ExecutionContext.FRAME, column)],
             input_ir,
             partition_info,
             names=names,
@@ -451,7 +454,9 @@ def _decompose_expr_node(
     if partition_count == 1 or expr.is_pointwise:
         # Single-partition and pointwise expressions are always supported.
         return expr, input_ir, partition_info
-    elif isinstance(expr, Agg) and expr.name in _SUPPORTED_AGGS:
+    elif isinstance(expr, Len) or (
+        isinstance(expr, Agg) and expr.name in _SUPPORTED_AGGS
+    ):
         # This is a supported Agg expression.
         return _decompose_agg_node(
             expr, input_ir, partition_info, config_options, names=names
@@ -515,8 +520,15 @@ def _decompose(
             *unique_input_irs,
         )
         partition_info[input_ir] = PartitionInfo(count=partition_count)
-    else:
+    elif len(unique_input_irs) == 1:
         input_ir = unique_input_irs[0]
+    else:
+        # All child IRs were Empty. Use an Empty({}) with
+        # count=1 to ensure that scalar expressions still
+        # produce one output partition with a single row
+        # See: https://github.com/rapidsai/cudf/pull/20409
+        input_ir = Empty({})
+        partition_info[input_ir] = PartitionInfo(count=1)
     # Call into class-specific logic to decompose ``expr``
     return _decompose_expr_node(
@@ -537,6 +549,7 @@ def decompose_expr_graph(
     config_options: ConfigOptions,
     row_count_estimate: ColumnStat[int],
     column_stats: dict[str, ColumnStats],
+    unique_names: Generator[str, None, None],
 ) -> tuple[NamedExpr, IR, MutableMapping[IR, PartitionInfo]]:
     """
     Decompose a NamedExpr into stages.
@@ -557,6 +570,8 @@ def decompose_expr_graph(
         Row-count estimate for the input IR.
     column_stats
         Column statistics for the input IR.
+    unique_names
+        Generator of unique names for temporaries.
     Returns
     -------
@@ -581,7 +596,7 @@ def decompose_expr_graph(
             "input_ir": input_ir,
             "input_partition_info": partition_info[input_ir],
             "config_options": config_options,
-            "unique_names": unique_names((named_expr.name, *input_ir.schema.keys())),
+            "unique_names": unique_names,
             "row_count_estimate": row_count_estimate,
             "column_stats": column_stats,
         },

cudf_polars/experimental/groupby.py CHANGED Viewed

@@ -14,6 +14,7 @@ import pylibcudf as plc
 from cudf_polars.containers import DataType
 from cudf_polars.dsl.expr import Agg, BinOp, Col, Len, NamedExpr
+from cudf_polars.dsl.expressions.base import ExecutionContext
 from cudf_polars.dsl.ir import GroupBy, Select, Slice
 from cudf_polars.dsl.traversal import traversal
 from cudf_polars.dsl.utils.naming import unique_names
@@ -95,7 +96,12 @@ def decompose(
     if isinstance(expr, Len):
         selection = NamedExpr(name, Col(dtype, name))
         aggregation = [NamedExpr(name, expr)]
-        reduction = [NamedExpr(name, Agg(dtype, "sum", None, Col(dtype, name)))]
+        reduction = [
+            NamedExpr(
+                name,
+                Agg(dtype, "sum", None, ExecutionContext.GROUPBY, Col(dtype, name)),
+            )
+        ]
         return selection, aggregation, reduction, False
     if isinstance(expr, Agg):
         if expr.name in ("sum", "count", "min", "max", "n_unique"):
@@ -105,19 +111,32 @@ def decompose(
                 aggfunc = expr.name
             selection = NamedExpr(name, Col(dtype, name))
             aggregation = [NamedExpr(name, expr)]
-            reduction = [NamedExpr(name, Agg(dtype, aggfunc, None, Col(dtype, name)))]
+            reduction = [
+                NamedExpr(
+                    name,
+                    Agg(
+                        dtype, aggfunc, None, ExecutionContext.GROUPBY, Col(dtype, name)
+                    ),
+                )
+            ]
             return selection, aggregation, reduction, expr.name == "n_unique"
         elif expr.name == "mean":
             (child,) = expr.children
             (sum, count), aggregations, reductions, need_preshuffle = combine(
                 decompose(
                     f"{next(names)}__mean_sum",
-                    Agg(dtype, "sum", None, child),
+                    Agg(dtype, "sum", None, ExecutionContext.GROUPBY, child),
                     names=names,
                 ),
                 decompose(
                     f"{next(names)}__mean_count",
-                    Agg(DataType(pl.Int32()), "count", False, child),  # noqa: FBT003
+                    Agg(
+                        DataType(pl.Int32()),
+                        "count",
+                        False,  # noqa: FBT003
+                        ExecutionContext.GROUPBY,
+                        child,
+                    ),
                     names=names,
                 ),
             )
@@ -230,6 +249,7 @@ def _(
             child.schema,
             ir.keys,
             config_options.executor.shuffle_method,
+            config_options.executor.shuffler_insertion_method,
             child,
         )
         partition_info[child] = PartitionInfo(
@@ -272,6 +292,7 @@ def _(
             gb_pwise.schema,
             grouped_keys,
             config_options.executor.shuffle_method,
+            config_options.executor.shuffler_insertion_method,
             gb_pwise,
         )
         partition_info[gb_inter] = PartitionInfo(count=post_aggregation_count)

cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl