PyPI - dask-cuda - Versions diffs - 25.2.0__py3-none-any.whl → 25.6.0__py3-none-any.whl - Mend

dask-cuda 25.2.0py3-none-any.whl → 25.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

dask_cuda/GIT_COMMIT +1 -0
dask_cuda/VERSION +1 -1
dask_cuda/__init__.py +38 -27
dask_cuda/_compat.py +18 -0
dask_cuda/explicit_comms/comms.py +34 -7
dask_cuda/explicit_comms/dataframe/shuffle.py +127 -35
dask_cuda/get_device_memory_objects.py +15 -0
dask_cuda/is_device_object.py +4 -1
dask_cuda/is_spillable_object.py +4 -1
dask_cuda/proxify_device_objects.py +4 -1
dask_cuda/proxy_object.py +55 -35
dask_cuda/tests/test_dask_cuda_worker.py +5 -2
dask_cuda/tests/test_explicit_comms.py +136 -6
dask_cuda/tests/test_initialize.py +36 -0
dask_cuda/tests/test_local_cuda_cluster.py +5 -2
dask_cuda/tests/test_proxify_host_file.py +15 -2
dask_cuda/tests/test_spill.py +100 -27
dask_cuda/utils.py +61 -33
{dask_cuda-25.2.0.dist-info → dask_cuda-25.6.0.dist-info}/METADATA +7 -5
{dask_cuda-25.2.0.dist-info → dask_cuda-25.6.0.dist-info}/RECORD +24 -22
{dask_cuda-25.2.0.dist-info → dask_cuda-25.6.0.dist-info}/WHEEL +1 -1
{dask_cuda-25.2.0.dist-info → dask_cuda-25.6.0.dist-info}/top_level.txt +0 -1
{dask_cuda-25.2.0.dist-info → dask_cuda-25.6.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.2.0.dist-info → dask_cuda-25.6.0.dist-info/licenses}/LICENSE +0 -0

dask_cuda/GIT_COMMIT ADDED Viewed

	@@ -0,0 +1 @@
1	+ 1f834655ecc6286b9e3082f037594f70dcb74062

dask_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 25.02.00
1	+ 25.06.00

dask_cuda/__init__.py CHANGED Viewed

@@ -5,8 +5,6 @@ if sys.platform != "linux":
 import dask
 import dask.utils
-import dask.dataframe.shuffle
-from .explicit_comms.dataframe.shuffle import patch_shuffle_expression
 from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
 from distributed.protocol.serialize import dask_deserialize, dask_serialize
@@ -14,30 +12,43 @@ from ._version import __git_commit__, __version__
 from .cuda_worker import CUDAWorker
 from .local_cuda_cluster import LocalCUDACluster
-from .proxify_device_objects import proxify_decorator, unproxify_decorator
-# Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`
-patch_shuffle_expression()
-# Monkey patching Dask to make use of proxify and unproxify in compatibility mode
-dask.dataframe.shuffle.shuffle_group = proxify_decorator(
-    dask.dataframe.shuffle.shuffle_group
-)
-dask.dataframe.core._concat = unproxify_decorator(dask.dataframe.core._concat)
-def _register_cudf_spill_aware():
-    import cudf
-    # Only enable Dask/cuDF spilling if cuDF spilling is disabled, see
-    # https://github.com/rapidsai/dask-cuda/issues/1363
-    if not cudf.get_option("spill"):
-        # This reproduces the implementation of `_register_cudf`, see
-        # https://github.com/dask/distributed/blob/40fcd65e991382a956c3b879e438be1b100dff97/distributed/protocol/__init__.py#L106-L115
-        from cudf.comm import serialize
-for registry in [cuda_serialize, cuda_deserialize, dask_serialize, dask_deserialize]:
-    for lib in ["cudf", "dask_cudf"]:
-        if lib in registry._lazy:
-            registry._lazy[lib] = _register_cudf_spill_aware
+try:
+    import dask.dataframe as dask_dataframe
+except ImportError:
+    # Dask DataFrame (optional) isn't installed
+    dask_dataframe = None
+if dask_dataframe is not None:
+    from .explicit_comms.dataframe.shuffle import patch_shuffle_expression
+    from .proxify_device_objects import proxify_decorator, unproxify_decorator
+    # Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`
+    patch_shuffle_expression()
+    # Monkey patching Dask to make use of proxify and unproxify in compatibility mode
+    dask_dataframe.shuffle.shuffle_group = proxify_decorator(
+        dask.dataframe.shuffle.shuffle_group
+    )
+    dask_dataframe.core._concat = unproxify_decorator(dask.dataframe.core._concat)
+    def _register_cudf_spill_aware():
+        import cudf
+        # Only enable Dask/cuDF spilling if cuDF spilling is disabled, see
+        # https://github.com/rapidsai/dask-cuda/issues/1363
+        if not cudf.get_option("spill"):
+            # This reproduces the implementation of `_register_cudf`, see
+            # https://github.com/dask/distributed/blob/40fcd65e991382a956c3b879e438be1b100dff97/distributed/protocol/__init__.py#L106-L115
+            from cudf.comm import serialize
+    for registry in [
+        cuda_serialize,
+        cuda_deserialize,
+        dask_serialize,
+        dask_deserialize,
+    ]:
+        for lib in ["cudf", "dask_cudf"]:
+            if lib in registry._lazy:
+                registry._lazy[lib] = _register_cudf_spill_aware

dask_cuda/_compat.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
+import functools
+import importlib.metadata
+import packaging.version
+@functools.lru_cache(maxsize=None)
+def get_dask_version() -> packaging.version.Version:
+    return packaging.version.parse(importlib.metadata.version("dask"))
+@functools.lru_cache(maxsize=None)
+def DASK_2025_4_0():
+    # dask 2025.4.0 isn't currently released, so we're relying
+    # on strictly greater than here.
+    return get_dask_version() > packaging.version.parse("2025.3.0")

dask_cuda/explicit_comms/comms.py CHANGED Viewed

@@ -1,15 +1,21 @@
+# Copyright (c) 2021-2025 NVIDIA CORPORATION.
 import asyncio
 import concurrent.futures
 import contextlib
 import time
 import uuid
+import weakref
 from typing import Any, Dict, Hashable, Iterable, List, Optional
 import distributed.comm
+from dask.tokenize import tokenize
 from distributed import Client, Worker, default_client, get_worker
 from distributed.comm.addressing import parse_address, parse_host_port, unparse_address
-_default_comms = None
+# Mapping tokenize(client ID, [worker addresses]) to CommsContext
+_comms_cache: weakref.WeakValueDictionary[
+    str, "CommsContext"
+] = weakref.WeakValueDictionary()
 def get_multi_lock_or_null_context(multi_lock_context, *args, **kwargs):
@@ -38,9 +44,10 @@ def get_multi_lock_or_null_context(multi_lock_context, *args, **kwargs):
 def default_comms(client: Optional[Client] = None) -> "CommsContext":
-    """Return the default comms object
+    """Return the default comms object for ``client``.
-    Creates a new default comms object if no one exist.
+    Creates a new default comms object if one does not already exist
+    for ``client``.
     Parameters
     ----------
@@ -52,11 +59,31 @@ def default_comms(client: Optional[Client] = None) -> "CommsContext":
     -------
     comms: CommsContext
         The default comms object
+    Notes
+    -----
+    There are some subtle points around explicit-comms and the lifecycle
+    of a Dask Cluster.
+    A :class:`CommsContext` establishes explicit communication channels
+    between the workers *at the time it's created*. If workers are added
+    or removed, they will not be included in the communication channels
+    with the other workers.
+    If you need to refresh the explicit communications channels, then
+    create a new :class:`CommsContext` object or call ``default_comms``
+    again after workers have been added to or removed from the cluster.
     """
-    global _default_comms
-    if _default_comms is None:
-        _default_comms = CommsContext(client=client)
-    return _default_comms
+    # Comms are unique to a {client, [workers]} pair, so we key our
+    # cache by the token of that.
+    client = client or default_client()
+    token = tokenize(client.id, list(client.scheduler_info()["workers"].keys()))
+    maybe_comms = _comms_cache.get(token)
+    if maybe_comms is None:
+        maybe_comms = CommsContext(client=client)
+        _comms_cache[token] = maybe_comms
+    return maybe_comms
 def worker_state(sessionId: Optional[int] = None) -> dict:

dask_cuda/explicit_comms/dataframe/shuffle.py CHANGED Viewed

@@ -1,6 +1,9 @@
+# Copyright (c) 2021-2025 NVIDIA CORPORATION.
 from __future__ import annotations
 import asyncio
+import functools
 from collections import defaultdict
 from math import ceil
 from operator import getitem
@@ -23,6 +26,7 @@ from distributed import wait
 from distributed.protocol import nested_deserialize, to_serialize
 from distributed.worker import Worker
+from ..._compat import DASK_2025_4_0
 from .. import comms
 T = TypeVar("T")
@@ -582,6 +586,128 @@ def _use_explicit_comms() -> bool:
     return False
+_base_lower = dask_expr._shuffle.Shuffle._lower
+_base_compute = dask.base.compute
+def _contains_shuffle_expr(*args) -> bool:
+    """
+    Check whether any of the arguments is a Shuffle expression.
+    This is called by `compute`, which is given a sequence of Dask Collections
+    to process. For each of those, we'll check whether the expresion contains a
+    Shuffle operation.
+    """
+    for collection in args:
+        if isinstance(collection, dask.dataframe.DataFrame):
+            shuffle_ops = list(
+                collection.expr.find_operations(
+                    (
+                        dask_expr._shuffle.RearrangeByColumn,
+                        dask_expr.SetIndex,
+                        dask_expr._shuffle.Shuffle,
+                    )
+                )
+            )
+            if len(shuffle_ops) > 0:
+                return True
+    return False
+@functools.wraps(_base_compute)
+def _patched_compute(
+    *args,
+    traverse=True,
+    optimize_graph=True,
+    scheduler=None,
+    get=None,
+    **kwargs,
+):
+    # A patched version of dask.compute that explicitly materializes the task
+    # graph when we're using explicit-comms and the expression contains a
+    # Shuffle operation.
+    # https://github.com/rapidsai/dask-upstream-testing/issues/37#issuecomment-2779798670
+    # contains more details on the issue.
+    if DASK_2025_4_0() and _use_explicit_comms() and _contains_shuffle_expr(*args):
+        from dask.base import (
+            collections_to_expr,
+            flatten,
+            get_scheduler,
+            shorten_traceback,
+            unpack_collections,
+        )
+        collections, repack = unpack_collections(*args, traverse=traverse)
+        if not collections:
+            return args
+        schedule = get_scheduler(
+            scheduler=scheduler,
+            collections=collections,
+            get=get,
+        )
+        from dask._expr import FinalizeCompute
+        expr = collections_to_expr(collections, optimize_graph)
+        expr = FinalizeCompute(expr)
+        with shorten_traceback():
+            expr = expr.optimize()
+            keys = list(flatten(expr.__dask_keys__()))
+            # materialize the HLG here
+            expr = dict(expr.__dask_graph__())
+            results = schedule(expr, keys, **kwargs)
+            return repack(results)
+    else:
+        return _base_compute(
+            *args,
+            traverse=traverse,
+            optimize_graph=optimize_graph,
+            scheduler=scheduler,
+            get=get,
+            **kwargs,
+        )
+class ECShuffle(dask_expr._shuffle.TaskShuffle):
+    """Explicit-Comms Shuffle Expression."""
+    def _layer(self):
+        # Execute an explicit-comms shuffle
+        if not hasattr(self, "_ec_shuffled"):
+            on = self.partitioning_index
+            df = dask_expr.new_collection(self.frame)
+            ec_shuffled = shuffle(
+                df,
+                [on] if isinstance(on, str) else on,
+                self.npartitions_out,
+                self.ignore_index,
+            )
+            object.__setattr__(self, "_ec_shuffled", ec_shuffled)
+        graph = self._ec_shuffled.dask.copy()
+        shuffled_name = self._ec_shuffled._name
+        for i in range(self.npartitions_out):
+            graph[(self._name, i)] = graph[(shuffled_name, i)]
+        return graph
+def _patched_lower(self):
+    if self.method in (None, "tasks") and _use_explicit_comms():
+        return ECShuffle(
+            self.frame,
+            self.partitioning_index,
+            self.npartitions_out,
+            self.ignore_index,
+            self.options,
+            self.original_partitioning_index,
+        )
+    else:
+        return _base_lower(self)
 def patch_shuffle_expression() -> None:
     """Patch Dasks Shuffle expression.
@@ -590,40 +716,6 @@ def patch_shuffle_expression() -> None:
     an `ECShuffle` expression when the 'explicit-comms'
     config is set to `True`.
     """
-    class ECShuffle(dask_expr._shuffle.TaskShuffle):
-        """Explicit-Comms Shuffle Expression."""
-        def _layer(self):
-            # Execute an explicit-comms shuffle
-            if not hasattr(self, "_ec_shuffled"):
-                on = self.partitioning_index
-                df = dask_expr.new_collection(self.frame)
-                self._ec_shuffled = shuffle(
-                    df,
-                    [on] if isinstance(on, str) else on,
-                    self.npartitions_out,
-                    self.ignore_index,
-                )
-            graph = self._ec_shuffled.dask.copy()
-            shuffled_name = self._ec_shuffled._name
-            for i in range(self.npartitions_out):
-                graph[(self._name, i)] = graph[(shuffled_name, i)]
-            return graph
-    _base_lower = dask_expr._shuffle.Shuffle._lower
-    def _patched_lower(self):
-        if self.method in (None, "tasks") and _use_explicit_comms():
-            return ECShuffle(
-                self.frame,
-                self.partitioning_index,
-                self.npartitions_out,
-                self.ignore_index,
-                self.options,
-                self.original_partitioning_index,
-            )
-        else:
-            return _base_lower(self)
+    dask.base.compute = _patched_compute
     dask_expr._shuffle.Shuffle._lower = _patched_lower

dask_cuda/get_device_memory_objects.py CHANGED Viewed

@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
 from typing import Set
 from dask.sizeof import sizeof
@@ -140,3 +142,16 @@ def register_cupy():  # NB: this overwrites dask.sizeof.register_cupy()
     @sizeof.register(cupy.ndarray)
     def sizeof_cupy_ndarray(x):
         return int(x.nbytes)
+@sizeof.register_lazy("pylibcudf")
+def register_pylibcudf():
+    import pylibcudf
+    @sizeof.register(pylibcudf.column.OwnerWithCAI)
+    def sizeof_owner_with_cai(x):
+        # OwnerWithCAI implements __cuda_array_interface__ so this should always
+        # be zero-copy
+        col = pylibcudf.column.Column.from_cuda_array_interface(x)
+        # col.data() returns a gpumemoryview, which knows the size in bytes
+        return col.data().nbytes

dask_cuda/is_device_object.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
 from __future__ import absolute_import, division, print_function
 from dask.utils import Dispatch
@@ -35,6 +36,8 @@ def register_cudf():
     def is_device_object_cudf_series(s):
         return True
-    @is_device_object.register(cudf.BaseIndex)
+    @is_device_object.register(cudf.Index)
+    @is_device_object.register(cudf.RangeIndex)
+    @is_device_object.register(cudf.MultiIndex)
     def is_device_object_cudf_index(s):
         return True

dask_cuda/is_spillable_object.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
 from __future__ import absolute_import, division, print_function
 from typing import Optional
@@ -34,7 +35,9 @@ def register_cudf():
     def is_device_object_cudf_dataframe(df):
         return cudf_spilling_status()
-    @is_spillable_object.register(cudf.BaseIndex)
+    @is_spillable_object.register(cudf.Index)
+    @is_spillable_object.register(cudf.RangeIndex)
+    @is_spillable_object.register(cudf.MultiIndex)
     def is_device_object_cudf_index(s):
         return cudf_spilling_status()

dask_cuda/proxify_device_objects.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
 import functools
 import pydoc
 from collections import defaultdict
@@ -242,7 +243,9 @@ def _register_cudf():
     @dispatch.register(cudf.DataFrame)
     @dispatch.register(cudf.Series)
-    @dispatch.register(cudf.BaseIndex)
+    @dispatch.register(cudf.Index)
+    @dispatch.register(cudf.MultiIndex)
+    @dispatch.register(cudf.RangeIndex)
     def proxify_device_object_cudf_dataframe(
         obj, proxied_id_to_proxy, found_proxies, excl_proxies
     ):

dask_cuda/proxy_object.py CHANGED Viewed

@@ -11,10 +11,6 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Tuple, Type, Un
 import pandas
 import dask
-import dask.array.core
-import dask.dataframe.backends
-import dask.dataframe.dispatch
-import dask.dataframe.utils
 import dask.utils
 import distributed.protocol
 import distributed.utils
@@ -30,6 +26,22 @@ if TYPE_CHECKING:
     from .proxify_host_file import ProxyManager
+try:
+    import dask.dataframe as dask_dataframe
+    import dask.dataframe.backends
+    import dask.dataframe.dispatch
+    import dask.dataframe.utils
+except ImportError:
+    dask_dataframe = None
+try:
+    import dask.array as dask_array
+    import dask.array.core
+except ImportError:
+    dask_array = None
 # List of attributes that should be copied to the proxy at creation, which makes
 # them accessible without deserialization of the proxied object
 _FIXED_ATTRS = ["name", "__len__"]
@@ -884,14 +896,6 @@ def obj_pxy_dask_deserialize(header, frames):
     return subclass(pxy)
-@dask.dataframe.dispatch.get_parallel_type.register(ProxyObject)
-def get_parallel_type_proxy_object(obj: ProxyObject):
-    # Notice, `get_parallel_type()` needs a instance not a type object
-    return dask.dataframe.dispatch.get_parallel_type(
-        obj.__class__.__new__(obj.__class__)
-    )
 def unproxify_input_wrapper(func):
     """Unproxify the input of `func`"""
@@ -904,26 +908,42 @@ def unproxify_input_wrapper(func):
     return wrapper
-# Register dispatch of ProxyObject on all known dispatch objects
-for dispatch in (
-    dask.dataframe.dispatch.hash_object_dispatch,
-    dask.dataframe.dispatch.make_meta_dispatch,
-    dask.dataframe.utils.make_scalar,
-    dask.dataframe.dispatch.group_split_dispatch,
-    dask.array.core.tensordot_lookup,
-    dask.array.core.einsum_lookup,
-    dask.array.core.concatenate_lookup,
-):
-    dispatch.register(ProxyObject, unproxify_input_wrapper(dispatch))
-dask.dataframe.dispatch.concat_dispatch.register(
-    ProxyObject, unproxify_input_wrapper(dask.dataframe.dispatch.concat)
-)
-# We overwrite the Dask dispatch of Pandas objects in order to
-# deserialize all ProxyObjects before concatenating
-dask.dataframe.dispatch.concat_dispatch.register(
-    (pandas.DataFrame, pandas.Series, pandas.Index),
-    unproxify_input_wrapper(dask.dataframe.backends.concat_pandas),
-)
+if dask_array is not None:
+    # Register dispatch of ProxyObject on all known dispatch objects
+    for dispatch in (
+        dask.array.core.tensordot_lookup,
+        dask.array.core.einsum_lookup,
+        dask.array.core.concatenate_lookup,
+    ):
+        dispatch.register(ProxyObject, unproxify_input_wrapper(dispatch))
+if dask_dataframe is not None:
+    @dask.dataframe.dispatch.get_parallel_type.register(ProxyObject)
+    def get_parallel_type_proxy_object(obj: ProxyObject):
+        # Notice, `get_parallel_type()` needs a instance not a type object
+        return dask.dataframe.dispatch.get_parallel_type(
+            obj.__class__.__new__(obj.__class__)
+        )
+    # Register dispatch of ProxyObject on all known dispatch objects
+    for dispatch in (
+        dask.dataframe.dispatch.hash_object_dispatch,
+        dask.dataframe.dispatch.make_meta_dispatch,
+        dask.dataframe.utils.make_scalar,
+        dask.dataframe.dispatch.group_split_dispatch,
+    ):
+        dispatch.register(ProxyObject, unproxify_input_wrapper(dispatch))
+    dask.dataframe.dispatch.concat_dispatch.register(
+        ProxyObject, unproxify_input_wrapper(dask.dataframe.dispatch.concat)
+    )
+    # We overwrite the Dask dispatch of Pandas objects in order to
+    # deserialize all ProxyObjects before concatenating
+    dask.dataframe.dispatch.concat_dispatch.register(
+        (pandas.DataFrame, pandas.Series, pandas.Index),
+        unproxify_input_wrapper(dask.dataframe.backends.concat_pandas),
+    )

dask_cuda/tests/test_dask_cuda_worker.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import absolute_import, division, print_function
 import os
@@ -16,7 +19,7 @@ from dask_cuda.utils import (
     get_cluster_configuration,
     get_device_total_memory,
     get_gpu_count_mig,
-    get_gpu_uuid_from_index,
+    get_gpu_uuid,
     get_n_gpus,
     wait_workers,
 )
@@ -409,7 +412,7 @@ def test_cuda_mig_visible_devices_and_memory_limit_and_nthreads(loop):  # noqa:
 def test_cuda_visible_devices_uuid(loop):  # noqa: F811
-    gpu_uuid = get_gpu_uuid_from_index(0)
+    gpu_uuid = get_gpu_uuid(0)
     with patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": gpu_uuid}):
         with popen(["dask", "scheduler", "--port", "9359", "--no-dashboard"]):

dask-cuda 25.2.0__py3-none-any.whl → 25.6.0__py3-none-any.whl

dask-cuda 25.2.0py3-none-any.whl → 25.6.0py3-none-any.whl