PyPI - cudf-polars-cu13 - Versions diffs - 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl - Mend

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

cudf_polars/GIT_COMMIT +1 -1
cudf_polars/VERSION +1 -1
cudf_polars/callback.py +60 -15
cudf_polars/containers/column.py +137 -77
cudf_polars/containers/dataframe.py +123 -34
cudf_polars/containers/datatype.py +134 -13
cudf_polars/dsl/expr.py +0 -2
cudf_polars/dsl/expressions/aggregation.py +80 -28
cudf_polars/dsl/expressions/binaryop.py +34 -14
cudf_polars/dsl/expressions/boolean.py +110 -37
cudf_polars/dsl/expressions/datetime.py +59 -30
cudf_polars/dsl/expressions/literal.py +11 -5
cudf_polars/dsl/expressions/rolling.py +460 -119
cudf_polars/dsl/expressions/selection.py +9 -8
cudf_polars/dsl/expressions/slicing.py +1 -1
cudf_polars/dsl/expressions/string.py +256 -114
cudf_polars/dsl/expressions/struct.py +19 -7
cudf_polars/dsl/expressions/ternary.py +33 -3
cudf_polars/dsl/expressions/unary.py +126 -64
cudf_polars/dsl/ir.py +1053 -350
cudf_polars/dsl/to_ast.py +30 -13
cudf_polars/dsl/tracing.py +194 -0
cudf_polars/dsl/translate.py +307 -107
cudf_polars/dsl/utils/aggregations.py +43 -30
cudf_polars/dsl/utils/reshape.py +14 -2
cudf_polars/dsl/utils/rolling.py +12 -8
cudf_polars/dsl/utils/windows.py +35 -20
cudf_polars/experimental/base.py +55 -2
cudf_polars/experimental/benchmarks/pdsds.py +12 -126
cudf_polars/experimental/benchmarks/pdsh.py +792 -2
cudf_polars/experimental/benchmarks/utils.py +596 -39
cudf_polars/experimental/dask_registers.py +47 -20
cudf_polars/experimental/dispatch.py +9 -3
cudf_polars/experimental/distinct.py +2 -0
cudf_polars/experimental/explain.py +15 -2
cudf_polars/experimental/expressions.py +30 -15
cudf_polars/experimental/groupby.py +25 -4
cudf_polars/experimental/io.py +156 -124
cudf_polars/experimental/join.py +53 -23
cudf_polars/experimental/parallel.py +68 -19
cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
cudf_polars/experimental/rapidsmpf/core.py +488 -0
cudf_polars/experimental/rapidsmpf/dask.py +172 -0
cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
cudf_polars/experimental/rapidsmpf/io.py +696 -0
cudf_polars/experimental/rapidsmpf/join.py +322 -0
cudf_polars/experimental/rapidsmpf/lower.py +74 -0
cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
cudf_polars/experimental/rapidsmpf/union.py +115 -0
cudf_polars/experimental/rapidsmpf/utils.py +374 -0
cudf_polars/experimental/repartition.py +9 -2
cudf_polars/experimental/select.py +177 -14
cudf_polars/experimental/shuffle.py +46 -12
cudf_polars/experimental/sort.py +100 -26
cudf_polars/experimental/spilling.py +1 -1
cudf_polars/experimental/statistics.py +24 -5
cudf_polars/experimental/utils.py +25 -7
cudf_polars/testing/asserts.py +13 -8
cudf_polars/testing/io.py +2 -1
cudf_polars/testing/plugin.py +93 -17
cudf_polars/typing/__init__.py +86 -32
cudf_polars/utils/config.py +473 -58
cudf_polars/utils/cuda_stream.py +70 -0
cudf_polars/utils/versions.py +5 -4
cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
{cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0

cudf_polars/dsl/to_ast.py CHANGED Viewed

@@ -6,7 +6,9 @@
 from __future__ import annotations
 from functools import partial, reduce, singledispatch
-from typing import TYPE_CHECKING, TypeAlias, TypedDict
+from typing import TYPE_CHECKING, TypeAlias, TypedDict, cast
+import polars as pl
 import pylibcudf as plc
 from pylibcudf import expressions as plc_expr
@@ -19,6 +21,8 @@ from cudf_polars.typing import GenericTransformer
 if TYPE_CHECKING:
     from collections.abc import Mapping
+    from rmm.pylibrmm.stream import Stream
 # Can't merge these op-mapping dictionaries because scoped enum values
 # are exposed by cython with equality/hash based one their underlying
@@ -103,6 +107,7 @@ class ASTState(TypedDict):
     """
     for_parquet: bool
+    stream: Stream
 class ExprTransformerState(TypedDict):
@@ -170,7 +175,9 @@ def _(node: expr.ColRef, self: Transformer) -> plc_expr.Expression:
 @_to_ast.register
 def _(node: expr.Literal, self: Transformer) -> plc_expr.Expression:
-    return plc_expr.Literal(plc.Scalar.from_py(node.value, node.dtype.plc))
+    return plc_expr.Literal(
+        plc.Scalar.from_py(node.value, node.dtype.plc_type, stream=self.state["stream"])
+    )
 @_to_ast.register
@@ -190,7 +197,7 @@ def _(node: expr.BinOp, self: Transformer) -> plc_expr.Expression:
     if self.state["for_parquet"]:
         op1_col, op2_col = (isinstance(op, expr.Col) for op in node.children)
         if op1_col ^ op2_col:
-            op = node.op
+            op: plc.binaryop.BinaryOperator = node.op
             if op not in SUPPORTED_STATISTICS_BINOPS:
                 raise NotImplementedError(
                     f"Parquet filter binop with column doesn't support {node.op!r}"
@@ -221,14 +228,16 @@ def _(node: expr.BooleanFunction, self: Transformer) -> plc_expr.Expression:
             if haystack.dtype.id() == plc.TypeId.LIST:
                 # Because we originally translated pl_expr.Literal with a list scalar
                 # to a expr.LiteralColumn, so the actual type is in the inner type
-                #
-                # the type-ignore is safe because the for plc.TypeID.LIST, we know
-                # we have a polars.List type, which has an inner attribute.
-                plc_dtype = DataType(haystack.dtype.polars.inner).plc  # type: ignore[attr-defined]
+                # .inner returns DataTypeClass | DataType, need to cast to DataType
+                plc_dtype = DataType(
+                    cast(pl.DataType, cast(pl.List, haystack.dtype.polars_type).inner)
+                ).plc_type
             else:
-                plc_dtype = haystack.dtype.plc  # pragma: no cover
+                plc_dtype = haystack.dtype.plc_type  # pragma: no cover
             values = (
-                plc_expr.Literal(plc.Scalar.from_py(val, plc_dtype))
+                plc_expr.Literal(
+                    plc.Scalar.from_py(val, plc_dtype, stream=self.state["stream"])
+                )
                 for val in haystack.value
             )
             return reduce(
@@ -265,7 +274,7 @@ def _(node: expr.UnaryFunction, self: Transformer) -> plc_expr.Expression:
     )
-def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None:
+def to_parquet_filter(node: expr.Expr, stream: Stream) -> plc_expr.Expression | None:
     """
     Convert an expression to libcudf AST nodes suitable for parquet filtering.
@@ -273,19 +282,23 @@ def to_parquet_filter(node: expr.Expr) -> plc_expr.Expression | None:
     ----------
     node
         Expression to convert.
+    stream
+        CUDA stream used for device memory operations and kernel launches.
     Returns
     -------
     pylibcudf Expression if conversion is possible, otherwise None.
     """
-    mapper: Transformer = CachingVisitor(_to_ast, state={"for_parquet": True})
+    mapper: Transformer = CachingVisitor(
+        _to_ast, state={"for_parquet": True, "stream": stream}
+    )
     try:
         return mapper(node)
     except (KeyError, NotImplementedError):
         return None
-def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
+def to_ast(node: expr.Expr, stream: Stream) -> plc_expr.Expression | None:
     """
     Convert an expression to libcudf AST nodes suitable for compute_column.
@@ -293,6 +306,8 @@ def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
     ----------
     node
         Expression to convert.
+    stream
+        CUDA stream used for device memory operations and kernel launches.
     Notes
     -----
@@ -304,7 +319,9 @@ def to_ast(node: expr.Expr) -> plc_expr.Expression | None:
     -------
     pylibcudf Expression if conversion is possible, otherwise None.
     """
-    mapper: Transformer = CachingVisitor(_to_ast, state={"for_parquet": False})
+    mapper: Transformer = CachingVisitor(
+        _to_ast, state={"for_parquet": False, "stream": stream}
+    )
     try:
         return mapper(node)
     except (KeyError, NotImplementedError):

cudf_polars/dsl/tracing.py CHANGED Viewed

@@ -6,11 +6,205 @@
 from __future__ import annotations
 import functools
+import os
+import time
+from typing import TYPE_CHECKING, Any, Concatenate, Literal
 import nvtx
+import pynvml
+from typing_extensions import ParamSpec
+import rmm
+import rmm.statistics
+from cudf_polars.utils.config import _bool_converter, get_device_handle
+try:
+    import structlog
+except ImportError:
+    _HAS_STRUCTLOG = False
+else:
+    _HAS_STRUCTLOG = True
+LOG_TRACES = _HAS_STRUCTLOG and _bool_converter(
+    os.environ.get("CUDF_POLARS_LOG_TRACES", "0")
+)
+LOG_MEMORY = LOG_TRACES and _bool_converter(
+    os.environ.get("CUDF_POLARS_LOG_TRACES_MEMORY", "1")
+)
+LOG_DATAFRAMES = LOG_TRACES and _bool_converter(
+    os.environ.get("CUDF_POLARS_LOG_TRACES_DATAFRAMES", "1")
+)
 CUDF_POLARS_NVTX_DOMAIN = "cudf_polars"
 nvtx_annotate_cudf_polars = functools.partial(
     nvtx.annotate, domain=CUDF_POLARS_NVTX_DOMAIN
 )
+if TYPE_CHECKING:
+    from collections.abc import Callable, Sequence
+    import cudf_polars.containers
+    from cudf_polars.dsl import ir
+@functools.cache
+def _getpid() -> int:  # pragma: no cover
+    # Gets called for each IR.do_evaluate node, so we'll cache it.
+    return os.getpid()
+def make_snapshot(
+    node_type: type[ir.IR],
+    frames: Sequence[cudf_polars.containers.DataFrame],
+    extra: dict[str, Any] | None = None,
+    *,
+    pid: int,
+    device_handle: Any | None = None,
+    phase: Literal["input", "output"] = "input",
+) -> dict:  # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
+    """
+    Collect statistics about the evaluation of an IR node.
+    Parameters
+    ----------
+    node_type
+        The type of the IR node.
+    frames
+        The list of DataFrames to capture information for. For ``phase="input"``,
+        this is typically the dataframes passed to ``IR.do_evaluate``. For
+        ``phase="output"``, this is typically the DataFrame returned from
+        ``IR.do_evaluate``.
+    extra
+        Extra information to log.
+    pid
+        The ID of the current process. Used for NVML memory usage.
+    device_handle
+        The pynvml device handle. Used for NVML memory usage.
+    phase
+        The phase of the evaluation. Either "input" or "output".
+    """
+    ir_name = node_type.__name__
+    d: dict[str, Any] = {
+        "type": ir_name,
+    }
+    if LOG_DATAFRAMES:
+        d.update(
+            {
+                f"count_frames_{phase}": len(frames),
+                f"frames_{phase}": [
+                    {
+                        "shape": frame.table.shape(),
+                        "size": sum(
+                            col.device_buffer_size() for col in frame.table.columns()
+                        ),
+                    }
+                    for frame in frames
+                ],
+            }
+        )
+        d[f"total_bytes_{phase}"] = sum(x["size"] for x in d[f"frames_{phase}"])
+    if LOG_MEMORY:
+        stats = rmm.statistics.get_statistics()
+        if stats:
+            d.update(
+                {
+                    f"rmm_current_bytes_{phase}": stats.current_bytes,
+                    f"rmm_current_count_{phase}": stats.current_count,
+                    f"rmm_peak_bytes_{phase}": stats.peak_bytes,
+                    f"rmm_peak_count_{phase}": stats.peak_count,
+                    f"rmm_total_bytes_{phase}": stats.total_bytes,
+                    f"rmm_total_count_{phase}": stats.total_count,
+                }
+            )
+        if device_handle is not None:
+            processes = pynvml.nvmlDeviceGetComputeRunningProcesses(device_handle)
+            for proc in processes:
+                if proc.pid == pid:
+                    d[f"nvml_current_bytes_{phase}"] = proc.usedGpuMemory
+                    break
+    if extra:
+        d.update(extra)
+    return d
+P = ParamSpec("P")
+def log_do_evaluate(
+    func: Callable[Concatenate[type[ir.IR], P], cudf_polars.containers.DataFrame],
+) -> Callable[Concatenate[type[ir.IR], P], cudf_polars.containers.DataFrame]:
+    """
+    Decorator for an ``IR.do_evaluate`` method that logs information before and after evaluation.
+    Parameters
+    ----------
+    func
+        The ``IR.do_evaluate`` method to wrap.
+    """
+    if not LOG_TRACES:
+        return func
+    else:  # pragma: no cover; requires CUDF_POLARS_LOG_TRACES=1
+        @functools.wraps(func)
+        def wrapper(
+            cls: type[ir.IR],
+            *args: P.args,
+            **kwargs: P.kwargs,
+        ) -> cudf_polars.containers.DataFrame:
+            # do this just once
+            pynvml.nvmlInit()
+            maybe_handle = get_device_handle()
+            pid = _getpid()
+            log = structlog.get_logger()
+            # By convention, all non-dataframe arguments (non_child) come first.
+            # Anything remaining is a dataframe, except for 'context' kwarg.
+            frames: list[cudf_polars.containers.DataFrame] = (
+                list(args) + [v for k, v in kwargs.items() if k != "context"]
+            )[len(cls._non_child) :]  # type: ignore[assignment]
+            before_start = time.monotonic_ns()
+            before = make_snapshot(
+                cls, frames, phase="input", device_handle=maybe_handle, pid=pid
+            )
+            before_end = time.monotonic_ns()
+            # The decorator preserves the exact signature of the original do_evaluate method.
+            # Each IR.do_evaluate method is a classmethod that takes the IR class as first
+            # argument, followed by the method-specific arguments, and returns a DataFrame.
+            start = time.monotonic_ns()
+            result = func(cls, *args, **kwargs)
+            stop = time.monotonic_ns()
+            after_start = time.monotonic_ns()
+            after = make_snapshot(
+                cls,
+                [result],
+                phase="output",
+                extra={"start": start, "stop": stop},
+                device_handle=maybe_handle,
+                pid=pid,
+            )
+            after_end = time.monotonic_ns()
+            record = (
+                before
+                | after
+                | {
+                    "overhead_duration": (before_end - before_start)
+                    + (after_end - after_start)
+                }
+            )
+            log.info("Execute IR", **record)
+            return result
+        return wrapper

cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

cudf-polars-cu13 25.10.0py3-none-any.whl → 26.2.0py3-none-any.whl