PyPI - dask-cuda - Versions diffs - 24.12.0__py3-none-any.whl → 25.4.0__py3-none-any.whl - Mend

dask-cuda 24.12.0py3-none-any.whl → 25.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

dask_cuda/GIT_COMMIT +1 -0
dask_cuda/VERSION +1 -1
dask_cuda/__init__.py +36 -51
dask_cuda/benchmarks/local_cudf_shuffle.py +0 -1
dask_cuda/benchmarks/read_parquet.py +0 -1
dask_cuda/benchmarks/utils.py +0 -20
dask_cuda/explicit_comms/comms.py +34 -7
dask_cuda/explicit_comms/dataframe/shuffle.py +56 -36
dask_cuda/plugins.py +10 -1
dask_cuda/proxy_object.py +55 -42
dask_cuda/tests/test_dask_cuda_worker.py +1 -0
dask_cuda/tests/test_explicit_comms.py +116 -14
dask_cuda/tests/test_initialize.py +36 -0
dask_cuda/tests/test_proxy.py +8 -8
dask_cuda/tests/test_utils.py +1 -2
{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/METADATA +6 -12
{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/RECORD +21 -20
{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/WHEEL +1 -1
{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info/licenses}/LICENSE +0 -0
{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/top_level.txt +0 -0

dask_cuda/GIT_COMMIT ADDED Viewed

	@@ -0,0 +1 @@
1	+ e9ebd92886e6f518af02faf8a2cdadeb700b25a9

dask_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 24.12.00
1	+ 25.04.00

dask_cuda/__init__.py CHANGED Viewed

@@ -5,65 +5,50 @@ if sys.platform != "linux":
 import dask
 import dask.utils
-import dask.dataframe.core
-import dask.dataframe.shuffle
-import dask.dataframe.multi
-import dask.bag.core
 from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
 from distributed.protocol.serialize import dask_deserialize, dask_serialize
 from ._version import __git_commit__, __version__
 from .cuda_worker import CUDAWorker
-from .explicit_comms.dataframe.shuffle import (
-    get_rearrange_by_column_wrapper,
-    get_default_shuffle_method,
-)
-from .local_cuda_cluster import LocalCUDACluster
-from .proxify_device_objects import proxify_decorator, unproxify_decorator
-if dask.config.get("dataframe.query-planning", None) is not False and dask.config.get(
-    "explicit-comms", False
-):
-    raise NotImplementedError(
-        "The 'explicit-comms' config is not yet supported when "
-        "query-planning is enabled in dask. Please use the shuffle "
-        "API directly, or use the legacy dask-dataframe API "
-        "(set the 'dataframe.query-planning' config to `False`"
-        "before importing `dask.dataframe`).",
-    )
-# Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`
-dask.dataframe.shuffle.rearrange_by_column = get_rearrange_by_column_wrapper(
-    dask.dataframe.shuffle.rearrange_by_column
-)
-# We have to replace all modules that imports Dask's `get_default_shuffle_method()`
-# TODO: introduce a shuffle-algorithm dispatcher in Dask so we don't need this hack
-dask.dataframe.shuffle.get_default_shuffle_method = get_default_shuffle_method
-dask.dataframe.multi.get_default_shuffle_method = get_default_shuffle_method
-dask.bag.core.get_default_shuffle_method = get_default_shuffle_method
-# Monkey patching Dask to make use of proxify and unproxify in compatibility mode
-dask.dataframe.shuffle.shuffle_group = proxify_decorator(
-    dask.dataframe.shuffle.shuffle_group
-)
-dask.dataframe.core._concat = unproxify_decorator(dask.dataframe.core._concat)
+from .local_cuda_cluster import LocalCUDACluster
-def _register_cudf_spill_aware():
-    import cudf
+try:
+    import dask.dataframe as dask_dataframe
+except ImportError:
+    # Dask DataFrame (optional) isn't installed
+    dask_dataframe = None
-    # Only enable Dask/cuDF spilling if cuDF spilling is disabled, see
-    # https://github.com/rapidsai/dask-cuda/issues/1363
-    if not cudf.get_option("spill"):
-        # This reproduces the implementation of `_register_cudf`, see
-        # https://github.com/dask/distributed/blob/40fcd65e991382a956c3b879e438be1b100dff97/distributed/protocol/__init__.py#L106-L115
-        from cudf.comm import serialize
+if dask_dataframe is not None:
+    from .explicit_comms.dataframe.shuffle import patch_shuffle_expression
+    from .proxify_device_objects import proxify_decorator, unproxify_decorator
-for registry in [cuda_serialize, cuda_deserialize, dask_serialize, dask_deserialize]:
-    for lib in ["cudf", "dask_cudf"]:
-        if lib in registry._lazy:
-            registry._lazy[lib] = _register_cudf_spill_aware
+    # Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`
+    patch_shuffle_expression()
+    # Monkey patching Dask to make use of proxify and unproxify in compatibility mode
+    dask_dataframe.shuffle.shuffle_group = proxify_decorator(
+        dask.dataframe.shuffle.shuffle_group
+    )
+    dask_dataframe.core._concat = unproxify_decorator(dask.dataframe.core._concat)
+    def _register_cudf_spill_aware():
+        import cudf
+        # Only enable Dask/cuDF spilling if cuDF spilling is disabled, see
+        # https://github.com/rapidsai/dask-cuda/issues/1363
+        if not cudf.get_option("spill"):
+            # This reproduces the implementation of `_register_cudf`, see
+            # https://github.com/dask/distributed/blob/40fcd65e991382a956c3b879e438be1b100dff97/distributed/protocol/__init__.py#L106-L115
+            from cudf.comm import serialize
+    for registry in [
+        cuda_serialize,
+        cuda_deserialize,
+        dask_serialize,
+        dask_deserialize,
+    ]:
+        for lib in ["cudf", "dask_cudf"]:
+            if lib in registry._lazy:
+                registry._lazy[lib] = _register_cudf_spill_aware

dask_cuda/benchmarks/local_cudf_shuffle.py CHANGED Viewed

@@ -246,7 +246,6 @@ def parse_args():
     return parse_benchmark_args(
         description="Distributed shuffle (dask/cudf) benchmark",
         args_list=special_args,
-        check_explicit_comms=False,
     )

dask_cuda/benchmarks/read_parquet.py CHANGED Viewed

@@ -251,7 +251,6 @@ def parse_args():
     args = parse_benchmark_args(
         description="Parquet read benchmark",
         args_list=special_args,
-        check_explicit_comms=False,
     )
     args.no_show_p2p_bandwidth = True
     return args

dask_cuda/benchmarks/utils.py CHANGED Viewed

@@ -11,7 +11,6 @@ from typing import Any, Callable, Mapping, NamedTuple, Optional, Tuple
 import numpy as np
 import pandas as pd
-from dask import config
 from dask.distributed import Client, SSHCluster
 from dask.utils import format_bytes, format_time, parse_bytes
 from distributed.comm.addressing import get_address_host
@@ -52,7 +51,6 @@ def as_noop(dsk):
 def parse_benchmark_args(
     description="Generic dask-cuda Benchmark",
     args_list=[],
-    check_explicit_comms=True,
 ):
     parser = argparse.ArgumentParser(description=description)
     worker_args = parser.add_argument_group(description="Worker configuration")
@@ -377,24 +375,6 @@ def parse_benchmark_args(
     if args.multi_node and len(args.hosts.split(",")) < 2:
         raise ValueError("--multi-node requires at least 2 hosts")
-    # Raise error early if "explicit-comms" is not allowed
-    if (
-        check_explicit_comms
-        and args.backend == "explicit-comms"
-        and config.get(
-            "dataframe.query-planning",
-            None,
-        )
-        is not False
-    ):
-        raise NotImplementedError(
-            "The 'explicit-comms' config is not yet supported when "
-            "query-planning is enabled in dask. Please use the legacy "
-            "dask-dataframe API by setting the following environment "
-            "variable before executing:",
-            "    DASK_DATAFRAME__QUERY_PLANNING=False",
-        )
     return args

dask_cuda/explicit_comms/comms.py CHANGED Viewed

@@ -1,15 +1,21 @@
+# Copyright (c) 2021-2025 NVIDIA CORPORATION.
 import asyncio
 import concurrent.futures
 import contextlib
 import time
 import uuid
+import weakref
 from typing import Any, Dict, Hashable, Iterable, List, Optional
 import distributed.comm
+from dask.tokenize import tokenize
 from distributed import Client, Worker, default_client, get_worker
 from distributed.comm.addressing import parse_address, parse_host_port, unparse_address
-_default_comms = None
+# Mapping tokenize(client ID, [worker addresses]) to CommsContext
+_comms_cache: weakref.WeakValueDictionary[
+    str, "CommsContext"
+] = weakref.WeakValueDictionary()
 def get_multi_lock_or_null_context(multi_lock_context, *args, **kwargs):
@@ -38,9 +44,10 @@ def get_multi_lock_or_null_context(multi_lock_context, *args, **kwargs):
 def default_comms(client: Optional[Client] = None) -> "CommsContext":
-    """Return the default comms object
+    """Return the default comms object for ``client``.
-    Creates a new default comms object if no one exist.
+    Creates a new default comms object if one does not already exist
+    for ``client``.
     Parameters
     ----------
@@ -52,11 +59,31 @@ def default_comms(client: Optional[Client] = None) -> "CommsContext":
     -------
     comms: CommsContext
         The default comms object
+    Notes
+    -----
+    There are some subtle points around explicit-comms and the lifecycle
+    of a Dask Cluster.
+    A :class:`CommsContext` establishes explicit communication channels
+    between the workers *at the time it's created*. If workers are added
+    or removed, they will not be included in the communication channels
+    with the other workers.
+    If you need to refresh the explicit communications channels, then
+    create a new :class:`CommsContext` object or call ``default_comms``
+    again after workers have been added to or removed from the cluster.
     """
-    global _default_comms
-    if _default_comms is None:
-        _default_comms = CommsContext(client=client)
-    return _default_comms
+    # Comms are unique to a {client, [workers]} pair, so we key our
+    # cache by the token of that.
+    client = client or default_client()
+    token = tokenize(client.id, list(client.scheduler_info()["workers"].keys()))
+    maybe_comms = _comms_cache.get(token)
+    if maybe_comms is None:
+        maybe_comms = CommsContext(client=client)
+        _comms_cache[token] = maybe_comms
+    return maybe_comms
 def worker_state(sessionId: Optional[int] = None) -> dict:

dask_cuda/explicit_comms/dataframe/shuffle.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from __future__ import annotations
 import asyncio
-import functools
-import inspect
 from collections import defaultdict
 from math import ceil
 from operator import getitem
@@ -20,7 +18,7 @@ import distributed.worker
 from dask.base import tokenize
 from dask.dataframe import DataFrame, Series
 from dask.dataframe.core import _concat as dd_concat
-from dask.dataframe.shuffle import group_split_dispatch, hash_object_dispatch
+from dask.dataframe.dispatch import group_split_dispatch, hash_object_dispatch
 from distributed import wait
 from distributed.protocol import nested_deserialize, to_serialize
 from distributed.worker import Worker
@@ -33,6 +31,20 @@ T = TypeVar("T")
 Proxify = Callable[[T], T]
+try:
+    from dask.dataframe import dask_expr
+except ImportError:
+    # TODO: Remove when pinned to dask>2024.12.1
+    import dask_expr
+    if not dd._dask_expr_enabled():
+        raise ValueError(
+            "The legacy DataFrame API is not supported in dask_cudf>24.12. "
+            "Please enable query-planning, or downgrade to dask_cudf<=24.12"
+        )
 def get_proxify(worker: Worker) -> Proxify:
     """Get function to proxify objects"""
     from dask_cuda.proxify_host_file import ProxifyHostFile
@@ -570,40 +582,48 @@ def _use_explicit_comms() -> bool:
     return False
-def get_rearrange_by_column_wrapper(func):
-    """Returns a function wrapper that dispatch the shuffle to explicit-comms.
+def patch_shuffle_expression() -> None:
+    """Patch Dasks Shuffle expression.
-    Notice, this is monkey patched into Dask at dask_cuda import
+    Notice, this is monkey patched into Dask at dask_cuda
+    import, and it changes `Shuffle._layer` to lower into
+    an `ECShuffle` expression when the 'explicit-comms'
+    config is set to `True`.
     """
-    func_sig = inspect.signature(func)
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        if _use_explicit_comms():
-            # Convert `*args, **kwargs` to a dict of `keyword -> values`
-            kw = func_sig.bind(*args, **kwargs)
-            kw.apply_defaults()
-            kw = kw.arguments
-            # Notice, we only overwrite the default and the "tasks" shuffle
-            # algorithm. The "disk" and "p2p" algorithm, we don't touch.
-            if kw["shuffle_method"] in ("tasks", None):
-                col = kw["col"]
-                if isinstance(col, str):
-                    col = [col]
-                return shuffle(kw["df"], col, kw["npartitions"], kw["ignore_index"])
-        return func(*args, **kwargs)
-    return wrapper
-def get_default_shuffle_method() -> str:
-    """Return the default shuffle algorithm used by Dask
+    class ECShuffle(dask_expr._shuffle.TaskShuffle):
+        """Explicit-Comms Shuffle Expression."""
+        def _layer(self):
+            # Execute an explicit-comms shuffle
+            if not hasattr(self, "_ec_shuffled"):
+                on = self.partitioning_index
+                df = dask_expr.new_collection(self.frame)
+                self._ec_shuffled = shuffle(
+                    df,
+                    [on] if isinstance(on, str) else on,
+                    self.npartitions_out,
+                    self.ignore_index,
+                )
+            graph = self._ec_shuffled.dask.copy()
+            shuffled_name = self._ec_shuffled._name
+            for i in range(self.npartitions_out):
+                graph[(self._name, i)] = graph[(shuffled_name, i)]
+            return graph
+    _base_lower = dask_expr._shuffle.Shuffle._lower
+    def _patched_lower(self):
+        if self.method in (None, "tasks") and _use_explicit_comms():
+            return ECShuffle(
+                self.frame,
+                self.partitioning_index,
+                self.npartitions_out,
+                self.ignore_index,
+                self.options,
+                self.original_partitioning_index,
+            )
+        else:
+            return _base_lower(self)
-    This changes the default shuffle algorithm from "p2p" to "tasks"
-    when explicit comms is enabled.
-    """
-    ret = dask.config.get("dataframe.shuffle.algorithm", None)
-    if ret is None and _use_explicit_comms():
-        return "tasks"
-    return dask.utils.get_default_shuffle_method()
+    dask_expr._shuffle.Shuffle._lower = _patched_lower

dask_cuda/plugins.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import importlib
+import logging
 import os
 from typing import Callable, Dict
@@ -12,7 +13,15 @@ class CPUAffinity(WorkerPlugin):
         self.cores = cores
     def setup(self, worker=None):
-        os.sched_setaffinity(0, self.cores)
+        try:
+            os.sched_setaffinity(0, self.cores)
+        except Exception:
+            logger = logging.getLogger("distributed.worker")
+            logger.warning(
+                "Setting CPU affinity for GPU failed. Please refer to the following "
+                "link for troubleshooting information: "
+                "https://docs.rapids.ai/api/dask-cuda/nightly/troubleshooting/#setting-cpu-affinity-failure"  # noqa: E501
+            )
 class CUDFSetup(WorkerPlugin):

dask_cuda/proxy_object.py CHANGED Viewed

@@ -11,9 +11,6 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Tuple, Type, Un
 import pandas
 import dask
-import dask.array.core
-import dask.dataframe.methods
-import dask.dataframe.utils
 import dask.utils
 import distributed.protocol
 import distributed.utils
@@ -22,16 +19,6 @@ from distributed.protocol.compression import decompress
 from dask_cuda.disk_io import disk_read
-try:
-    from dask.dataframe.backends import concat_pandas
-except ImportError:
-    from dask.dataframe.methods import concat_pandas
-try:
-    from dask.dataframe.dispatch import make_meta_dispatch as make_meta_dispatch
-except ImportError:
-    from dask.dataframe.utils import make_meta as make_meta_dispatch
 from .disk_io import SpillToDiskFile
 from .is_device_object import is_device_object
@@ -39,6 +26,22 @@ if TYPE_CHECKING:
     from .proxify_host_file import ProxyManager
+try:
+    import dask.dataframe as dask_dataframe
+    import dask.dataframe.backends
+    import dask.dataframe.dispatch
+    import dask.dataframe.utils
+except ImportError:
+    dask_dataframe = None
+try:
+    import dask.array as dask_array
+    import dask.array.core
+except ImportError:
+    dask_array = None
 # List of attributes that should be copied to the proxy at creation, which makes
 # them accessible without deserialization of the proxied object
 _FIXED_ATTRS = ["name", "__len__"]
@@ -893,12 +896,6 @@ def obj_pxy_dask_deserialize(header, frames):
     return subclass(pxy)
-@dask.dataframe.core.get_parallel_type.register(ProxyObject)
-def get_parallel_type_proxy_object(obj: ProxyObject):
-    # Notice, `get_parallel_type()` needs a instance not a type object
-    return dask.dataframe.core.get_parallel_type(obj.__class__.__new__(obj.__class__))
 def unproxify_input_wrapper(func):
     """Unproxify the input of `func`"""
@@ -911,26 +908,42 @@ def unproxify_input_wrapper(func):
     return wrapper
-# Register dispatch of ProxyObject on all known dispatch objects
-for dispatch in (
-    dask.dataframe.core.hash_object_dispatch,
-    make_meta_dispatch,
-    dask.dataframe.utils.make_scalar,
-    dask.dataframe.core.group_split_dispatch,
-    dask.array.core.tensordot_lookup,
-    dask.array.core.einsum_lookup,
-    dask.array.core.concatenate_lookup,
-):
-    dispatch.register(ProxyObject, unproxify_input_wrapper(dispatch))
-dask.dataframe.methods.concat_dispatch.register(
-    ProxyObject, unproxify_input_wrapper(dask.dataframe.methods.concat)
-)
-# We overwrite the Dask dispatch of Pandas objects in order to
-# deserialize all ProxyObjects before concatenating
-dask.dataframe.methods.concat_dispatch.register(
-    (pandas.DataFrame, pandas.Series, pandas.Index),
-    unproxify_input_wrapper(concat_pandas),
-)
+if dask_array is not None:
+    # Register dispatch of ProxyObject on all known dispatch objects
+    for dispatch in (
+        dask.array.core.tensordot_lookup,
+        dask.array.core.einsum_lookup,
+        dask.array.core.concatenate_lookup,
+    ):
+        dispatch.register(ProxyObject, unproxify_input_wrapper(dispatch))
+if dask_dataframe is not None:
+    @dask.dataframe.dispatch.get_parallel_type.register(ProxyObject)
+    def get_parallel_type_proxy_object(obj: ProxyObject):
+        # Notice, `get_parallel_type()` needs a instance not a type object
+        return dask.dataframe.dispatch.get_parallel_type(
+            obj.__class__.__new__(obj.__class__)
+        )
+    # Register dispatch of ProxyObject on all known dispatch objects
+    for dispatch in (
+        dask.dataframe.dispatch.hash_object_dispatch,
+        dask.dataframe.dispatch.make_meta_dispatch,
+        dask.dataframe.utils.make_scalar,
+        dask.dataframe.dispatch.group_split_dispatch,
+    ):
+        dispatch.register(ProxyObject, unproxify_input_wrapper(dispatch))
+    dask.dataframe.dispatch.concat_dispatch.register(
+        ProxyObject, unproxify_input_wrapper(dask.dataframe.dispatch.concat)
+    )
+    # We overwrite the Dask dispatch of Pandas objects in order to
+    # deserialize all ProxyObjects before concatenating
+    dask.dataframe.dispatch.concat_dispatch.register(
+        (pandas.DataFrame, pandas.Series, pandas.Index),
+        unproxify_input_wrapper(dask.dataframe.backends.concat_pandas),
+    )

dask_cuda/tests/test_dask_cuda_worker.py CHANGED Viewed

@@ -320,6 +320,7 @@ def test_unknown_argument():
     assert b"Scheduler address: --my-argument" in ret.stderr
+@pytest.mark.xfail(reason="https://github.com/rapidsai/dask-cuda/issues/1441")
 @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
 def test_pre_import(loop):  # noqa: F811
     module = None

dask_cuda/tests/test_explicit_comms.py CHANGED Viewed

@@ -1,3 +1,5 @@
+# Copyright (c) 2021-2025 NVIDIA CORPORATION.
 import asyncio
 import multiprocessing as mp
 import os
@@ -25,16 +27,6 @@ from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
 mp = mp.get_context("spawn")  # type: ignore
 ucp = pytest.importorskip("ucp")
-QUERY_PLANNING_ON = dask.config.get("dataframe.query-planning", None) is not False
-# Skip these tests when dask-expr is active (for now)
-query_planning_skip = pytest.mark.skipif(
-    QUERY_PLANNING_ON,
-    reason=(
-        "The 'explicit-comms' config is not supported "
-        "when query planning is enabled."
-    ),
-)
 # Set default shuffle method to "tasks"
 if dask.config.get("dataframe.shuffle.method", None) is None:
@@ -98,7 +90,6 @@ def _test_dataframe_merge_empty_partitions(nrows, npartitions):
                     pd.testing.assert_frame_equal(got, expected)
-@query_planning_skip
 def test_dataframe_merge_empty_partitions():
     # Notice, we use more partitions than rows
     p = mp.Process(target=_test_dataframe_merge_empty_partitions, args=(2, 4))
@@ -250,7 +241,7 @@ def _test_dask_use_explicit_comms(in_cluster):
             ):
                 dask.config.refresh()  # Trigger re-read of the environment variables
                 with pytest.raises(ValueError, match="explicit-comms-batchsize"):
-                    ddf.shuffle(on="key", npartitions=4)
+                    ddf.shuffle(on="key", npartitions=4).dask
     if in_cluster:
         with LocalCluster(
@@ -267,7 +258,6 @@ def _test_dask_use_explicit_comms(in_cluster):
         check_shuffle()
-@query_planning_skip
 @pytest.mark.parametrize("in_cluster", [True, False])
 def test_dask_use_explicit_comms(in_cluster):
     def _timeout(process, function, timeout):
@@ -330,7 +320,6 @@ def _test_dataframe_shuffle_merge(backend, protocol, n_workers):
             assert_eq(got, expected)
-@query_planning_skip
 @pytest.mark.parametrize("nworkers", [1, 2, 4])
 @pytest.mark.parametrize("backend", ["pandas", "cudf"])
 @pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
@@ -428,3 +417,116 @@ def test_lock_workers():
             p.join()
         assert all(p.exitcode == 0 for p in ps)
+def test_create_destroy_create():
+    # https://github.com/rapidsai/dask-cuda/issues/1450
+    assert len(comms._comms_cache) == 0
+    with LocalCluster(n_workers=1) as cluster:
+        with Client(cluster) as client:
+            context = comms.default_comms()
+            scheduler_addresses_old = list(client.scheduler_info()["workers"].keys())
+            comms_addresses_old = list(comms.default_comms().worker_addresses)
+            assert comms.default_comms() is context
+            assert len(comms._comms_cache) == 1
+            # Add a worker, which should have a new comms object
+            cluster.scale(2)
+            client.wait_for_workers(2, timeout=5)
+            context2 = comms.default_comms()
+            assert context is not context2
+            assert len(comms._comms_cache) == 2
+    del context
+    del context2
+    assert len(comms._comms_cache) == 0
+    assert scheduler_addresses_old == comms_addresses_old
+    # A new cluster should have a new comms object. Previously, this failed
+    # because we referenced the old cluster's addresses.
+    with LocalCluster(n_workers=1) as cluster:
+        with Client(cluster) as client:
+            scheduler_addresses_new = list(client.scheduler_info()["workers"].keys())
+            comms_addresses_new = list(comms.default_comms().worker_addresses)
+    assert scheduler_addresses_new == comms_addresses_new
+def test_scaled_cluster_gets_new_comms_context():
+    # Ensure that if we create a CommsContext, scale the cluster,
+    # and create a new CommsContext, then the new CommsContext
+    # should include the new worker.
+    # https://github.com/rapidsai/dask-cuda/issues/1450
+    name = "explicit-comms-shuffle"
+    ddf = dd.from_pandas(pd.DataFrame({"key": np.arange(10)}), npartitions=2)
+    with LocalCluster(n_workers=2) as cluster:
+        with Client(cluster) as client:
+            context_1 = comms.default_comms()
+            def check(dask_worker, session_id: int):
+                has_state = hasattr(dask_worker, "_explicit_comm_state")
+                has_state_for_session = (
+                    has_state and session_id in dask_worker._explicit_comm_state
+                )
+                if has_state_for_session:
+                    n_workers = dask_worker._explicit_comm_state[session_id]["nworkers"]
+                else:
+                    n_workers = None
+                return {
+                    "has_state": has_state,
+                    "has_state_for_session": has_state_for_session,
+                    "n_workers": n_workers,
+                }
+            result_1 = client.run(check, session_id=context_1.sessionId)
+            expected_values = {
+                "has_state": True,
+                "has_state_for_session": True,
+                "n_workers": 2,
+            }
+            expected_1 = {
+                k: expected_values for k in client.scheduler_info()["workers"]
+            }
+            assert result_1 == expected_1
+            # Run a shuffle with the initial setup as a sanity test
+            with dask.config.set(explicit_comms=True):
+                shuffled = ddf.shuffle(on="key", npartitions=4)
+                assert any(name in str(key) for key in shuffled.dask)
+                result = shuffled.compute()
+            with dask.config.set(explicit_comms=False):
+                shuffled = ddf.shuffle(on="key", npartitions=4)
+                expected = shuffled.compute()
+            assert_eq(result, expected)
+            # --- Scale the cluster ---
+            cluster.scale(3)
+            client.wait_for_workers(3, timeout=5)
+            context_2 = comms.default_comms()
+            result_2 = client.run(check, session_id=context_2.sessionId)
+            expected_values = {
+                "has_state": True,
+                "has_state_for_session": True,
+                "n_workers": 3,
+            }
+            expected_2 = {
+                k: expected_values for k in client.scheduler_info()["workers"]
+            }
+            assert result_2 == expected_2
+            # Run a shuffle with the new setup
+            with dask.config.set(explicit_comms=True):
+                shuffled = ddf.shuffle(on="key", npartitions=4)
+                assert any(name in str(key) for key in shuffled.dask)
+                result = shuffled.compute()
+            with dask.config.set(explicit_comms=False):
+                shuffled = ddf.shuffle(on="key", npartitions=4)
+                expected = shuffled.compute()
+            assert_eq(result, expected)

dask_cuda/tests/test_initialize.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import multiprocessing as mp
+import sys
 import numpy
 import psutil
@@ -214,3 +215,38 @@ def test_initialize_ucx_all(protocol):
     p.start()
     p.join()
     assert not p.exitcode
+def _test_dask_cuda_import():
+    # Check that importing `dask_cuda` does NOT
+    # require `dask.dataframe` or `dask.array`.
+    # Patch sys.modules so that `dask.dataframe`
+    # and `dask.array` cannot be found.
+    with pytest.MonkeyPatch.context() as monkeypatch:
+        for k in list(sys.modules):
+            if k.startswith("dask.dataframe") or k.startswith("dask.array"):
+                monkeypatch.setitem(sys.modules, k, None)
+        monkeypatch.delitem(sys.modules, "dask_cuda")
+        # Check that top-level imports still succeed.
+        import dask_cuda  # noqa: F401
+        from dask_cuda import CUDAWorker  # noqa: F401
+        from dask_cuda import LocalCUDACluster
+        with LocalCUDACluster(
+            dashboard_address=None,
+            n_workers=1,
+            threads_per_worker=1,
+            processes=True,
+            worker_class=IncreasedCloseTimeoutNanny,
+        ) as cluster:
+            with Client(cluster) as client:
+                client.run(lambda *args: None)
+def test_dask_cuda_import():
+    p = mp.Process(target=_test_dask_cuda_import)
+    p.start()
+    p.join()
+    assert not p.exitcode

dask_cuda/tests/test_proxy.py CHANGED Viewed

@@ -504,27 +504,27 @@ def test_pandas():
     df1 = pandas.DataFrame({"a": range(10)})
     df2 = pandas.DataFrame({"a": range(10)})
-    res = dask.dataframe.methods.concat([df1, df2])
-    got = dask.dataframe.methods.concat([df1, df2])
+    res = dask.dataframe.dispatch.concat([df1, df2])
+    got = dask.dataframe.dispatch.concat([df1, df2])
     assert_frame_equal(res, got)
-    got = dask.dataframe.methods.concat([proxy_object.asproxy(df1), df2])
+    got = dask.dataframe.dispatch.concat([proxy_object.asproxy(df1), df2])
     assert_frame_equal(res, got)
-    got = dask.dataframe.methods.concat([df1, proxy_object.asproxy(df2)])
+    got = dask.dataframe.dispatch.concat([df1, proxy_object.asproxy(df2)])
     assert_frame_equal(res, got)
     df1 = pandas.Series(range(10))
     df2 = pandas.Series(range(10))
-    res = dask.dataframe.methods.concat([df1, df2])
-    got = dask.dataframe.methods.concat([df1, df2])
+    res = dask.dataframe.dispatch.concat([df1, df2])
+    got = dask.dataframe.dispatch.concat([df1, df2])
     assert all(res == got)
-    got = dask.dataframe.methods.concat([proxy_object.asproxy(df1), df2])
+    got = dask.dataframe.dispatch.concat([proxy_object.asproxy(df1), df2])
     assert all(res == got)
-    got = dask.dataframe.methods.concat([df1, proxy_object.asproxy(df2)])
+    got = dask.dataframe.dispatch.concat([df1, proxy_object.asproxy(df2)])
     assert all(res == got)

dask_cuda/tests/test_utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 from unittest.mock import patch
+import pynvml
 import pytest
 from numba import cuda
@@ -197,7 +198,6 @@ def test_get_ucx_config(enable_tcp_over_ucx, enable_infiniband, enable_nvlink):
 def test_parse_visible_devices():
-    pynvml = pytest.importorskip("pynvml")
     pynvml.nvmlInit()
     indices = []
     uuids = []
@@ -250,7 +250,6 @@ def test_parse_device_memory_limit():
 def test_parse_visible_mig_devices():
-    pynvml = pytest.importorskip("pynvml")
     pynvml.nvmlInit()
     for index in range(get_gpu_count()):
         handle = pynvml.nvmlDeviceGetHandleByIndex(index)

{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: dask-cuda
-Version: 24.12.0
+Version: 25.4.0
 Summary: Utilities for Dask and CUDA interactions
 Author: NVIDIA Corporation
 License: Apache 2.0
@@ -19,24 +19,18 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: click>=8.1
-Requires-Dist: numba>=0.57
+Requires-Dist: numba<0.61.0a0,>=0.59.1
 Requires-Dist: numpy<3.0a0,>=1.23
 Requires-Dist: pandas>=1.3
-Requires-Dist: pynvml<12.0.0a0,>=11.0.0
-Requires-Dist: rapids-dask-dependency==24.12.*
+Requires-Dist: pynvml<13.0.0a0,>=12.0.0
+Requires-Dist: rapids-dask-dependency==25.4.*
 Requires-Dist: zict>=2.0.0
 Provides-Extra: docs
 Requires-Dist: numpydoc>=1.1.0; extra == "docs"
 Requires-Dist: sphinx; extra == "docs"
 Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
 Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
-Provides-Extra: test
-Requires-Dist: cudf==24.12.*; extra == "test"
-Requires-Dist: dask-cudf==24.12.*; extra == "test"
-Requires-Dist: kvikio==24.12.*; extra == "test"
-Requires-Dist: pytest; extra == "test"
-Requires-Dist: pytest-cov; extra == "test"
-Requires-Dist: ucx-py==0.41.*; extra == "test"
+Dynamic: license-file
 Dask CUDA
 =========

{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,6 @@
-dask_cuda/VERSION,sha256=NltZ4By82NzVjz00LGPhCXfkG4BB0JdUSXqlG8fiVuo,8
-dask_cuda/__init__.py,sha256=eOCH3Wj0A8X0qbNUoNA15dgxb2O-ZApha4QHq5EEVFw,2748
+dask_cuda/GIT_COMMIT,sha256=wbY8QunTBf6nZeA4ulUfzAdQWyE7hoxV330KmJ3VnjA,41
+dask_cuda/VERSION,sha256=EM36MPurzJgotElKb8R7ZaIOF2woBA69gsVnmiyf-LY,8
+dask_cuda/__init__.py,sha256=Wbc7R0voN4vsQkb7SKuVXH0YXuXtfnAxrupxfM4lT10,1933
 dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
 dask_cuda/cli.py,sha256=cScVyNiA_l9uXeDgkIcmbcR4l4cH1_1shqSqsVmuHPE,17053
 dask_cuda/cuda_worker.py,sha256=rZ1ITG_ZCbuaMA9e8uSqCjU8Km4AMphGGrxpBPQG8xU,9477
@@ -10,10 +11,10 @@ dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
 dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
 dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
 dask_cuda/local_cuda_cluster.py,sha256=wqwKVRV6jT13sf9e-XsvbVBlTrnhmcbmHQBFPTFcayw,20335
-dask_cuda/plugins.py,sha256=yGHEurbYhL4jucQrmsxLfOyE5c3bSJdfs6GVwvDAeEA,6770
+dask_cuda/plugins.py,sha256=A2aT8HA6q_JhIEx6-XKcpbWEbl7aTg1GNoZQH8_vh00,7197
 dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
 dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
-dask_cuda/proxy_object.py,sha256=bZq92kjgFB-ad_luSAFT_RItV3nssmiEk4OOSp34laU,29812
+dask_cuda/proxy_object.py,sha256=mrCCGwS-mltcY8oddJEXnPL6rV2dBpGgsFypBVbxRsA,30150
 dask_cuda/utils.py,sha256=Goq-m78rYZ-bcJitg47N1h_PC4PDuzXG0CUVH7V8azU,25515
 dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
 dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
@@ -21,35 +22,35 @@ dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 dask_cuda/benchmarks/common.py,sha256=YFhxBYkoxIV-2mddSbLwTbyg67U4zXDd2_fFq9oP3_A,6922
 dask_cuda/benchmarks/local_cudf_groupby.py,sha256=zrDiF-yBAUxVt9mWOTH5hUm-pb-XnVX-G9gvCEX7_GI,8512
 dask_cuda/benchmarks/local_cudf_merge.py,sha256=Q7lnZ87-O7j28hkS-i_5hMApTX8VsuI4ftZf2XAnp1E,12195
-dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=8FjPFtiC-UqZcdPfocdMuzq_8TURAQWJlmhfcMWdo4w,8276
+dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=Ied7r_fdGuOJyikBVVkMaIX3niJIlF39C1Xk6IVwgo4,8240
 dask_cuda/benchmarks/local_cupy.py,sha256=RCxQJd88bn3vyMAJDPK3orUpxzvDZY957wOSYkfriq0,10323
 dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=YAllGFuG6MePfPL8gdZ-Ld7a44-G0eEaHZJWB4vFPdY,6017
-dask_cuda/benchmarks/read_parquet.py,sha256=TARcG-TS1NGcQWJmuAKtfmBmy5LAaLc3xgtKgAd1DaA,7650
-dask_cuda/benchmarks/utils.py,sha256=_NSWS5e8SzZ6vxDcEFo97Y8gs_e23Qqd-c3r83BA6PU,30748
+dask_cuda/benchmarks/read_parquet.py,sha256=spKu6RLWYngPZq9hnaoU0mz7INIaJnErfqjBG2wH8Zc,7614
+dask_cuda/benchmarks/utils.py,sha256=_x0XXL_F3W-fExpuQfTBwuK3WnrVuXQQepbnvjUqS9o,30075
 dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
+dask_cuda/explicit_comms/comms.py,sha256=uq-XPOH38dFcYS_13Vomj2ER6zxQz7DPeSM000mOVmY,11541
 dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=4xfhfbTGa36YPs_ex1_fFhzfGMYJq-QkS5q0RwgeHh8,20645
+dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=g9xDyFKmblEuevZt5Drh66uMLw-LUNOI8CIucDdACmY,21231
 dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
-dask_cuda/tests/test_dask_cuda_worker.py,sha256=6rroHvJAn5R3X9LwIcE8QrPxG1GO3PaxXVjhbdQ90Pw,20477
+dask_cuda/tests/test_dask_cuda_worker.py,sha256=C1emlr47yGa3TdSSlAXJRzguY4bcH74htk21x9th7nQ,20556
 dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
 dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
-dask_cuda/tests/test_explicit_comms.py,sha256=Pa5vVx63qWtScnVJuS31WESXIt2FPyTJVFO-0OUbbmU,15276
+dask_cuda/tests/test_explicit_comms.py,sha256=xnQjjUrd6RFd9CS99pVuWY1frfiMXzRv_fW4rk9opOk,19465
 dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
 dask_cuda/tests/test_gds.py,sha256=j1Huud6UGm1fbkyRLQEz_ysrVw__5AimwSn_M-2GEvs,1513
-dask_cuda/tests/test_initialize.py,sha256=Rba59ZbljEm1yyN94_sWZPEE_f7hWln95aiBVc49pmY,6960
+dask_cuda/tests/test_initialize.py,sha256=4Ovv_ClokKibPX6wfuaoQgN4eKCohagRFoE3s3D7Huk,8119
 dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
 dask_cuda/tests/test_proxify_host_file.py,sha256=LC3jjo_gbfhdIy1Zy_ynmgyv31HXFoBINCe1-XXZ4XU,18994
-dask_cuda/tests/test_proxy.py,sha256=51qsXGJBg_hwSMRsC_QvJBz4wVM0Bf8fbFmTUFA7HJE,23809
+dask_cuda/tests/test_proxy.py,sha256=U9uE-QesTwquNKzTReEKiYgoRgS_pfGW-A-gJNppHyg,23817
 dask_cuda/tests/test_spill.py,sha256=CYMbp5HDBYlZ7T_n8RfSOZxaWFcAQKjprjRM7Wupcdw,13419
-dask_cuda/tests/test_utils.py,sha256=JRIwXfemc3lWSzLJX0VcvR1_0wB4yeoOTsw7kB6z6pU,9176
+dask_cuda/tests/test_utils.py,sha256=PQI_oTONWnKSKlkQfEeK-vlmYa0-cPpDjDEbm74cNCE,9104
 dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
 dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
+dask_cuda-25.4.0.dist-info/licenses/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
 examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
 examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
-dask_cuda-24.12.0.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
-dask_cuda-24.12.0.dist-info/METADATA,sha256=qFewjmkl67EsxFm9VoMTmw_XOOK3savtnO9hK-Qwx-E,2557
-dask_cuda-24.12.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-dask_cuda-24.12.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
-dask_cuda-24.12.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
-dask_cuda-24.12.0.dist-info/RECORD,,
+dask_cuda-25.4.0.dist-info/METADATA,sha256=udK2maTnpkUBnOOtTvGOwySUtJxnIo4rcIOmySPBuOk,2294
+dask_cuda-25.4.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+dask_cuda-25.4.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
+dask_cuda-25.4.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
+dask_cuda-25.4.0.dist-info/RECORD,,

{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{dask_cuda-24.12.0.dist-info → dask_cuda-25.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dask-cuda 24.12.0__py3-none-any.whl → 25.4.0__py3-none-any.whl

dask-cuda 24.12.0py3-none-any.whl → 25.4.0py3-none-any.whl