PyPI - dask-cuda - Versions diffs - 25.6.0__py3-none-any.whl → 25.8.0__py3-none-any.whl - Mend

dask-cuda 25.6.0py3-none-any.whl → 25.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

dask_cuda/GIT_COMMIT +1 -1
dask_cuda/VERSION +1 -1
dask_cuda/benchmarks/common.py +4 -1
dask_cuda/benchmarks/local_cudf_groupby.py +4 -1
dask_cuda/benchmarks/local_cudf_merge.py +5 -2
dask_cuda/benchmarks/local_cudf_shuffle.py +5 -2
dask_cuda/benchmarks/local_cupy.py +4 -1
dask_cuda/benchmarks/local_cupy_map_overlap.py +4 -1
dask_cuda/benchmarks/utils.py +7 -4
dask_cuda/cli.py +21 -15
dask_cuda/cuda_worker.py +27 -57
dask_cuda/device_host_file.py +31 -15
dask_cuda/disk_io.py +7 -4
dask_cuda/explicit_comms/comms.py +11 -7
dask_cuda/explicit_comms/dataframe/shuffle.py +23 -23
dask_cuda/get_device_memory_objects.py +3 -3
dask_cuda/initialize.py +80 -44
dask_cuda/local_cuda_cluster.py +63 -66
dask_cuda/plugins.py +17 -16
dask_cuda/proxify_device_objects.py +12 -10
dask_cuda/proxify_host_file.py +30 -27
dask_cuda/proxy_object.py +20 -17
dask_cuda/tests/conftest.py +41 -0
dask_cuda/tests/test_dask_cuda_worker.py +109 -25
dask_cuda/tests/test_dgx.py +10 -18
dask_cuda/tests/test_explicit_comms.py +30 -12
dask_cuda/tests/test_from_array.py +7 -5
dask_cuda/tests/test_initialize.py +16 -37
dask_cuda/tests/test_local_cuda_cluster.py +159 -52
dask_cuda/tests/test_proxify_host_file.py +19 -3
dask_cuda/tests/test_proxy.py +18 -16
dask_cuda/tests/test_rdd_ucx.py +160 -0
dask_cuda/tests/test_spill.py +7 -0
dask_cuda/tests/test_utils.py +106 -20
dask_cuda/tests/test_worker_spec.py +5 -2
dask_cuda/utils.py +261 -38
dask_cuda/utils_test.py +23 -7
dask_cuda/worker_common.py +196 -0
dask_cuda/worker_spec.py +12 -5
{dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/METADATA +2 -2
dask_cuda-25.8.0.dist-info/RECORD +63 -0
dask_cuda-25.8.0.dist-info/top_level.txt +6 -0
shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
dask_cuda-25.6.0.dist-info/RECORD +0 -57
dask_cuda-25.6.0.dist-info/top_level.txt +0 -4
{dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/WHEEL +0 -0
{dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/licenses/LICENSE +0 -0

dask_cuda/tests/test_local_cuda_cluster.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 import asyncio
+import contextlib
 import os
 import pkgutil
 import sys
@@ -20,15 +21,17 @@ from dask_cuda.utils import (
     get_device_total_memory,
     get_gpu_count_mig,
     get_gpu_uuid,
+    has_device_memory_resource,
     print_cluster_config,
 )
-from dask_cuda.utils_test import MockWorker
+from dask_cuda.utils_test import MockWorker, get_ucx_implementation
 @gen_test(timeout=20)
 async def test_local_cuda_cluster():
     async with LocalCUDACluster(
-        scheduler_port=0, asynchronous=True, device_memory_limit=1
+        scheduler_port=0,
+        asynchronous=True,
     ) as cluster:
         async with Client(cluster, asynchronous=True) as client:
             assert len(cluster.workers) == utils.get_n_gpus()
@@ -68,8 +71,8 @@ async def test_with_subset_of_cuda_visible_devices():
     async with LocalCUDACluster(
         scheduler_port=0,
         asynchronous=True,
-        device_memory_limit=1,
         worker_class=MockWorker,
+        data=dict,
     ) as cluster:
         async with Client(cluster, asynchronous=True) as client:
             assert len(cluster.workers) == 4
@@ -92,14 +95,11 @@ async def test_with_subset_of_cuda_visible_devices():
 @pytest.mark.parametrize(
     "protocol",
-    ["ucx", "ucxx"],
+    ["ucx", "ucx-old"],
 )
 @gen_test(timeout=20)
 async def test_ucx_protocol(protocol):
-    if protocol == "ucx":
-        pytest.importorskip("ucp")
-    elif protocol == "ucxx":
-        pytest.importorskip("ucxx")
+    get_ucx_implementation(protocol)
     async with LocalCUDACluster(
         protocol=protocol, asynchronous=True, data=dict
@@ -112,35 +112,32 @@ async def test_ucx_protocol(protocol):
 @pytest.mark.parametrize(
     "protocol",
-    ["ucx", "ucxx"],
+    ["ucx", "ucx-old"],
 )
 @gen_test(timeout=20)
 async def test_explicit_ucx_with_protocol_none(protocol):
-    if protocol == "ucx":
-        pytest.importorskip("ucp")
-    elif protocol == "ucxx":
-        pytest.importorskip("ucxx")
+    get_ucx_implementation(protocol)
     initialize(protocol=protocol, enable_tcp_over_ucx=True)
     async with LocalCUDACluster(
-        protocol=None, enable_tcp_over_ucx=True, asynchronous=True, data=dict
+        protocol=None,
+        enable_tcp_over_ucx=True,
+        asynchronous=True,
     ) as cluster:
         assert all(
-            ws.address.startswith("ucx://") for ws in cluster.scheduler.workers.values()
+            ws.address.startswith(f"{protocol}://")
+            for ws in cluster.scheduler.workers.values()
         )
 @pytest.mark.filterwarnings("ignore:Exception ignored in")
 @pytest.mark.parametrize(
     "protocol",
-    ["ucx", "ucxx"],
+    ["ucx", "ucx-old"],
 )
 @gen_test(timeout=20)
 async def test_ucx_protocol_type_error(protocol):
-    if protocol == "ucx":
-        pytest.importorskip("ucp")
-    elif protocol == "ucxx":
-        pytest.importorskip("ucxx")
+    get_ucx_implementation(protocol)
     initialize(protocol=protocol, enable_tcp_over_ucx=True)
     with pytest.raises(TypeError):
@@ -153,7 +150,10 @@ async def test_ucx_protocol_type_error(protocol):
 @gen_test(timeout=20)
 async def test_n_workers():
     async with LocalCUDACluster(
-        CUDA_VISIBLE_DEVICES="0,1", worker_class=MockWorker, asynchronous=True
+        CUDA_VISIBLE_DEVICES="0,1",
+        worker_class=MockWorker,
+        asynchronous=True,
+        data=dict,
     ) as cluster:
         assert len(cluster.workers) == 2
         assert len(cluster.worker_spec) == 2
@@ -208,10 +208,13 @@ async def test_no_memory_limits_cudaworker():
 @gen_test(timeout=20)
 async def test_all_to_all():
     async with LocalCUDACluster(
-        CUDA_VISIBLE_DEVICES="0,1", worker_class=MockWorker, asynchronous=True
+        CUDA_VISIBLE_DEVICES="0,1",
+        worker_class=MockWorker,
+        asynchronous=True,
+        data=dict,
     ) as cluster:
         async with Client(cluster, asynchronous=True) as client:
-            workers = list(client.scheduler_info()["workers"])
+            workers = list(client.scheduler_info(n_workers=-1)["workers"])
             n_workers = len(workers)
             await utils.all_to_all(client)
             # assert all to all has resulted in all data on every worker
@@ -263,11 +266,6 @@ async def test_rmm_managed():
 async def test_rmm_async():
     rmm = pytest.importorskip("rmm")
-    driver_version = rmm._cuda.gpu.driverGetVersion()
-    runtime_version = rmm._cuda.gpu.runtimeGetVersion()
-    if driver_version < 11020 or runtime_version < 11020:
-        pytest.skip("cudaMallocAsync not supported")
     async with LocalCUDACluster(
         rmm_async=True,
         rmm_pool_size="2GB",
@@ -290,11 +288,6 @@ async def test_rmm_async():
 async def test_rmm_async_with_maximum_pool_size():
     rmm = pytest.importorskip("rmm")
-    driver_version = rmm._cuda.gpu.driverGetVersion()
-    runtime_version = rmm._cuda.gpu.runtimeGetVersion()
-    if driver_version < 11020 or runtime_version < 11020:
-        pytest.skip("cudaMallocAsync not supported")
     async with LocalCUDACluster(
         rmm_async=True,
         rmm_pool_size="2GB",
@@ -381,7 +374,6 @@ async def test_cluster_worker():
     async with LocalCUDACluster(
         scheduler_port=0,
         asynchronous=True,
-        device_memory_limit=1,
         n_workers=1,
     ) as cluster:
         assert len(cluster.workers) == 1
@@ -464,7 +456,7 @@ async def test_get_cluster_configuration():
     async with LocalCUDACluster(
         rmm_pool_size="2GB",
         rmm_maximum_pool_size="3GB",
-        device_memory_limit="30B",
+        device_memory_limit="30B" if has_device_memory_resource() else None,
         CUDA_VISIBLE_DEVICES="0",
         scheduler_port=0,
         asynchronous=True,
@@ -474,10 +466,14 @@ async def test_get_cluster_configuration():
             assert ret["[plugin] RMMSetup"]["initial_pool_size"] == 2000000000
             assert ret["[plugin] RMMSetup"]["maximum_pool_size"] == 3000000000
             assert ret["jit-unspill"] is False
-            assert ret["device-memory-limit"] == 30
+            if has_device_memory_resource():
+                assert ret["device-memory-limit"] == 30
 @gen_test(timeout=20)
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources do not support fractional limits"
+)
 async def test_worker_fraction_limits():
     async with LocalCUDACluster(
         dashboard_address=None,
@@ -503,6 +499,40 @@ async def test_worker_fraction_limits():
             )
+# Intentionally not using @gen_test to skip cleanup checks
+@pytest.mark.parametrize(
+    "argument", ["pool_size", "maximum_pool_size", "release_threshold"]
+)
+@pytest.mark.xfail(reason="https://github.com/rapidsai/dask-cuda/issues/1265")
+@pytest.mark.skip_if_device_memory(
+    "Devices with dedicated memory resources cannot test error"
+)
+def test_worker_fraction_limits_no_dedicated_memory(argument):
+    async def _test_worker_fraction_limits_no_dedicated_memory():
+        if argument == "pool_size":
+            kwargs = {"rmm_pool_size": "0.1"}
+        elif argument == "maximum_pool_size":
+            kwargs = {"rmm_pool_size": "1 GiB", "rmm_maximum_pool_size": "0.1"}
+        else:
+            kwargs = {"rmm_async": True, "rmm_release_threshold": "0.1"}
+        with raises_with_cause(
+            RuntimeError,
+            "Nanny failed to start",
+            RuntimeError,
+            "Worker failed to start",
+            ValueError,
+            "Fractional of total device memory not supported in devices without a "
+            "dedicated memory resource",
+        ):
+            await LocalCUDACluster(
+                asynchronous=True,
+                **kwargs,
+            )
+    asyncio.run(_test_worker_fraction_limits_no_dedicated_memory())
 @gen_test(timeout=20)
 async def test_cudf_spill_disabled():
     cudf = pytest.importorskip("cudf")
@@ -527,6 +557,9 @@ async def test_cudf_spill_disabled():
 @gen_test(timeout=20)
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources cannot enable cuDF spill"
+)
 async def test_cudf_spill():
     cudf = pytest.importorskip("cudf")
@@ -551,27 +584,101 @@ async def test_cudf_spill():
                 assert v == 2
+@pytest.mark.skip_if_device_memory(
+    "Devices with dedicated memory resources cannot test error"
+)
+@gen_test(timeout=20)
+async def test_cudf_spill_no_dedicated_memory():
+    cudf = pytest.importorskip("cudf")  # noqa: F841
+    with pytest.raises(
+        ValueError,
+        match="cuDF spilling is not supported on devices without dedicated memory",
+    ):
+        await LocalCUDACluster(
+            enable_cudf_spill=True,
+            cudf_spill_stats=2,
+            asynchronous=True,
+        )
 @pytest.mark.parametrize(
     "protocol",
-    ["ucx", "ucxx"],
+    ["ucx", "ucx-old"],
 )
-def test_print_cluster_config(capsys, protocol):
-    if protocol == "ucx":
-        pytest.importorskip("ucp")
-    elif protocol == "ucxx":
-        pytest.importorskip("ucxx")
+@pytest.mark.parametrize(
+    "jit_unspill",
+    [False, True],
+)
+@pytest.mark.parametrize(
+    "device_memory_limit",
+    [None, "1B"],
+)
+def test_print_cluster_config(capsys, protocol, jit_unspill, device_memory_limit):
+    get_ucx_implementation(protocol)
     pytest.importorskip("rich")
-    with LocalCUDACluster(
-        n_workers=1, device_memory_limit="1B", jit_unspill=True, protocol=protocol
-    ) as cluster:
-        with Client(cluster) as client:
-            print_cluster_config(client)
-            captured = capsys.readouterr()
-            assert "Dask Cluster Configuration" in captured.out
-            assert protocol in captured.out
-            assert "1 B" in captured.out
-            assert "[plugin]" in captured.out
+    ctx = contextlib.nullcontext()
+    if not has_device_memory_resource():
+        if device_memory_limit:
+            ctx = pytest.raises(
+                ValueError,
+                match="device_memory_limit is set but device has no dedicated memory.",
+            )
+        if jit_unspill:
+            # JIT-Unspill exception has precedence, thus overwrite ctx if both are
+            # enabled
+            ctx = pytest.raises(
+                ValueError,
+                match="JIT-Unspill is not supported on devices without dedicated "
+                "memory",
+            )
+    with ctx:
+        with LocalCUDACluster(
+            n_workers=1,
+            device_memory_limit=device_memory_limit,
+            jit_unspill=jit_unspill,
+            protocol=protocol,
+        ) as cluster:
+            with Client(cluster) as client:
+                print_cluster_config(client)
+                captured = capsys.readouterr()
+                assert "Dask Cluster Configuration" in captured.out
+                assert protocol in captured.out
+                if device_memory_limit == "1B":
+                    assert "1 B" in captured.out
+                assert "[plugin]" in captured.out
+                client.shutdown()
+    def ucxpy_reset(timeout=20):
+        """Reset UCX-Py with a timeout.
+        Attempt to reset UCX-Py, not doing so may cause a deadlock because UCX-Py is
+        not thread-safe and the Dask cluster may still be alive while a new cluster
+        and UCX-Py instances are initalized.
+        """
+        import time
+        import ucp
+        start = time.monotonic()
+        while True:
+            try:
+                ucp.reset()
+            except ucp._libs.exceptions.UCXError as e:
+                if time.monotonic() - start > timeout:
+                    raise RuntimeError(
+                        f"Could not reset UCX-Py in {timeout} seconds, this may result "
+                        f"in a deadlock. Failure:\n{e}"
+                    )
+                continue
+            else:
+                break
+    if protocol == "ucx-old":
+        ucxpy_reset()
 @pytest.mark.xfail(reason="https://github.com/rapidsai/dask-cuda/issues/1265")

dask_cuda/tests/test_proxify_host_file.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 from typing import Iterable
 from unittest.mock import patch
@@ -219,6 +220,9 @@ def test_one_item_host_limit(capsys, root_dir):
     assert len(dhf.manager) == 0
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources do not support spilling"
+)
 def test_spill_on_demand(root_dir):
     """
     Test spilling on demand by disabling the device_memory_limit
@@ -241,6 +245,9 @@ def test_spill_on_demand(root_dir):
 @pytest.mark.parametrize("jit_unspill", [True, False])
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources do not support spilling"
+)
 @gen_test(timeout=20)
 async def test_local_cuda_cluster(jit_unspill):
     """Testing spilling of a proxied cudf dataframe in a local cuda cluster"""
@@ -377,9 +384,9 @@ def test_externals(root_dir):
 @patch("dask_cuda.proxify_device_objects.incompatible_types", (cupy.ndarray,))
 def test_incompatible_types(root_dir):
-    """Check that ProxifyHostFile unproxifies `cupy.ndarray` on retrieval
+    """Check that ProxifyHostFile unproxifies ``cupy.ndarray`` on retrieval
-    Notice, in this test we add `cupy.ndarray` to the incompatible_types temporarily.
+    Notice, in this test we add ``cupy.ndarray`` to the incompatible_types temporarily.
     """
     cupy = pytest.importorskip("cupy")
     cudf = pytest.importorskip("cudf")
@@ -398,6 +405,9 @@ def test_incompatible_types(root_dir):
 @pytest.mark.parametrize("npartitions", [1, 2, 3])
 @pytest.mark.parametrize("compatibility_mode", [True, False])
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources do not support JIT-Unspill"
+)
 @gen_test(timeout=30)
 async def test_compatibility_mode_dataframe_shuffle(compatibility_mode, npartitions):
     cudf = pytest.importorskip("cudf")
@@ -430,6 +440,9 @@ async def test_compatibility_mode_dataframe_shuffle(compatibility_mode, npartiti
                     assert all(res)  # Only proxy objects
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources do not support JIT-Unspill"
+)
 @gen_test(timeout=60)
 async def test_worker_force_spill_to_disk():
     """Test Dask triggering CPU-to-Disk spilling"""
@@ -465,6 +478,9 @@ async def test_worker_force_spill_to_disk():
                 assert "Unmanaged memory use is high" not in log
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources do not support JIT-Unspill"
+)
 def test_on_demand_debug_info():
     """Test worker logging when on-demand-spilling fails"""
     rmm = pytest.importorskip("rmm")

dask_cuda/tests/test_proxy.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import operator
 import os
 import pickle
@@ -23,7 +26,7 @@ from dask_cuda import LocalCUDACluster, proxy_object
 from dask_cuda.disk_io import SpillToDiskFile
 from dask_cuda.proxify_device_objects import proxify_device_objects
 from dask_cuda.proxify_host_file import ProxifyHostFile
-from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
+from dask_cuda.utils_test import IncreasedCloseTimeoutNanny, get_ucx_implementation
 # Make the "disk" serializer available and use a directory that are
 # remove on exit.
@@ -242,7 +245,7 @@ def test_serialize_of_proxied_cudf(proxy_serializers, dask_serializers):
 @pytest.mark.parametrize("backend", ["numpy", "cupy"])
 def test_fixed_attribute_length(backend):
-    """Test fixed attribute `x.__len__` access
+    """Test fixed attribute ``x.__len__`` access
     Notice, accessing fixed attributes shouldn't de-serialize the proxied object
     """
@@ -263,7 +266,7 @@ def test_fixed_attribute_length(backend):
 def test_fixed_attribute_name():
-    """Test fixed attribute `x.name` access
+    """Test fixed attribute ``x.name`` access
     Notice, accessing fixed attributes shouldn't de-serialize the proxied object
     """
@@ -284,6 +287,9 @@ def test_fixed_attribute_name():
 @pytest.mark.parametrize("jit_unspill", [True, False])
+@pytest.mark.skip_if_no_device_memory(
+    "Spilling not supported in devices without dedicated memory resource"
+)
 @gen_test(timeout=20)
 async def test_spilling_local_cuda_cluster(jit_unspill):
     """Testing spilling of a proxied cudf dataframe in a local cuda cluster"""
@@ -386,8 +392,8 @@ def test_serializing_array_to_disk(backend, serializers, size):
 class _PxyObjTest(proxy_object.ProxyObject):
     """
     A class that:
-        - defines `__dask_tokenize__` in order to avoid deserialization when
-          calling `client.scatter()`
+        - defines ``__dask_tokenize__`` in order to avoid deserialization when
+          calling ``client.scatter()``
         - Asserts that no deserialization is performaned when communicating.
     """
@@ -401,14 +407,12 @@ class _PxyObjTest(proxy_object.ProxyObject):
 @pytest.mark.parametrize("send_serializers", [None, ("dask", "pickle"), ("cuda",)])
-@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucx-old"])
 @gen_test(timeout=120)
 async def test_communicating_proxy_objects(protocol, send_serializers):
     """Testing serialization of cuDF dataframe when communicating"""
-    if protocol == "ucx":
-        pytest.importorskip("ucp")
-    elif protocol == "ucxx":
-        pytest.importorskip("ucxx")
+    if protocol.startswith("ucx"):
+        get_ucx_implementation(protocol)
     cudf = pytest.importorskip("cudf")
     def task(x):
@@ -417,7 +421,7 @@ async def test_communicating_proxy_objects(protocol, send_serializers):
         serializers_used = x._pxy_get().serializer
         # Check that `x` is serialized with the expected serializers
-        if protocol in ["ucx", "ucxx"]:
+        if protocol in ["ucx", "ucx-old"]:
             if send_serializers is None:
                 assert serializers_used == "cuda"
             else:
@@ -448,15 +452,13 @@ async def test_communicating_proxy_objects(protocol, send_serializers):
             await client.submit(task, df)
-@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucx-old"])
 @pytest.mark.parametrize("shared_fs", [True, False])
 @gen_test(timeout=20)
 async def test_communicating_disk_objects(protocol, shared_fs):
     """Testing disk serialization of cuDF dataframe when communicating"""
-    if protocol == "ucx":
-        pytest.importorskip("ucp")
-    elif protocol == "ucxx":
-        pytest.importorskip("ucxx")
+    if protocol.startswith("ucx"):
+        get_ucx_implementation(protocol)
     cudf = pytest.importorskip("cudf")
     ProxifyHostFile._spill_to_disk.shared_filesystem = shared_fs

dask_cuda/tests/test_rdd_ucx.py ADDED Viewed

@@ -0,0 +1,160 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+import importlib
+import io
+import multiprocessing as mp
+import sys
+import pytest
+from dask_cuda import LocalCUDACluster
+mp = mp.get_context("spawn")  # type: ignore
+def _has_distributed_ucxx() -> bool:
+    return bool(importlib.util.find_spec("distributed_ucxx"))
+def _test_protocol_ucx():
+    with LocalCUDACluster(protocol="ucx") as cluster:
+        assert cluster.scheduler_comm.address.startswith("ucx://")
+        if _has_distributed_ucxx():
+            import distributed_ucxx
+            assert all(
+                isinstance(batched_send.comm, distributed_ucxx.ucxx.UCXX)
+                for batched_send in cluster.scheduler.stream_comms.values()
+            )
+        else:
+            import rapids_dask_dependency
+            assert all(
+                isinstance(
+                    batched_send.comm,
+                    rapids_dask_dependency.patches.distributed.comm.__rdd_patch_ucx.UCX,
+                )
+                for batched_send in cluster.scheduler.stream_comms.values()
+            )
+def _test_protocol_ucxx():
+    if _has_distributed_ucxx():
+        with LocalCUDACluster(protocol="ucxx") as cluster:
+            assert cluster.scheduler_comm.address.startswith("ucxx://")
+            import distributed_ucxx
+            assert all(
+                isinstance(batched_send.comm, distributed_ucxx.ucxx.UCXX)
+                for batched_send in cluster.scheduler.stream_comms.values()
+            )
+    else:
+        with pytest.raises(RuntimeError, match="Cluster failed to start"):
+            LocalCUDACluster(protocol="ucxx")
+def _test_protocol_ucx_old():
+    with LocalCUDACluster(protocol="ucx-old") as cluster:
+        assert cluster.scheduler_comm.address.startswith("ucx-old://")
+        import rapids_dask_dependency
+        assert all(
+            isinstance(
+                batched_send.comm,
+                rapids_dask_dependency.patches.distributed.comm.__rdd_patch_ucx.UCX,
+            )
+            for batched_send in cluster.scheduler.stream_comms.values()
+        )
+def _run_test_with_output_capture(test_func_name, conn):
+    """Run a test function in a subprocess and capture stdout/stderr."""
+    # Redirect stdout and stderr to capture output
+    old_stdout = sys.stdout
+    old_stderr = sys.stderr
+    captured_output = io.StringIO()
+    sys.stdout = sys.stderr = captured_output
+    try:
+        # Import and run the test function
+        if test_func_name == "_test_protocol_ucx":
+            _test_protocol_ucx()
+        elif test_func_name == "_test_protocol_ucxx":
+            _test_protocol_ucxx()
+        elif test_func_name == "_test_protocol_ucx_old":
+            _test_protocol_ucx_old()
+        else:
+            raise ValueError(f"Unknown test function: {test_func_name}")
+        output = captured_output.getvalue()
+        conn.send((True, output))  # True = success
+    except Exception as e:
+        output = captured_output.getvalue()
+        output += f"\nException: {e}"
+        import traceback
+        output += f"\nTraceback:\n{traceback.format_exc()}"
+        conn.send((False, output))  # False = failure
+    finally:
+        # Restore original stdout/stderr
+        sys.stdout = old_stdout
+        sys.stderr = old_stderr
+        conn.close()
+@pytest.mark.parametrize("protocol", ["ucx", "ucxx", "ucx-old"])
+def test_rdd_protocol(protocol):
+    """Test rapids-dask-dependency protocol selection"""
+    if protocol == "ucx":
+        test_func_name = "_test_protocol_ucx"
+    elif protocol == "ucxx":
+        test_func_name = "_test_protocol_ucxx"
+    else:
+        test_func_name = "_test_protocol_ucx_old"
+    # Create a pipe for communication between parent and child processes
+    parent_conn, child_conn = mp.Pipe()
+    p = mp.Process(
+        target=_run_test_with_output_capture, args=(test_func_name, child_conn)
+    )
+    p.start()
+    p.join(timeout=60)
+    if p.is_alive():
+        p.kill()
+        p.close()
+        raise TimeoutError("Test process timed out")
+    # Get the result from the child process
+    success, output = parent_conn.recv()
+    # Check that the test passed
+    assert success, f"Test failed in subprocess. Output:\n{output}"
+    # For the ucx protocol, check if warnings are printed when distributed_ucxx is not
+    # available
+    if protocol == "ucx" and not _has_distributed_ucxx():
+        # Check if the warning about protocol='ucx' is printed
+        print(f"Output for {protocol} protocol:\n{output}")
+        assert (
+            "you have requested protocol='ucx'" in output
+        ), f"Expected warning not found in output: {output}"
+        assert (
+            "'distributed-ucxx' is not installed" in output
+        ), f"Expected warning about distributed-ucxx not found in output: {output}"
+    elif protocol == "ucx" and _has_distributed_ucxx():
+        # When distributed_ucxx is available, the warning should NOT be printed
+        assert "you have requested protocol='ucx'" not in output, (
+            "Warning should not be printed when distributed_ucxx is available: "
+            f"{output}"
+        )
+    elif protocol == "ucx-old":
+        # The ucx-old protocol should not generate warnings
+        assert (
+            "you have requested protocol='ucx'" not in output
+        ), f"Warning should not be printed for ucx-old protocol: {output}"

dask_cuda/tests/test_spill.py CHANGED Viewed

@@ -20,6 +20,13 @@ import dask_cudf
 from dask_cuda import LocalCUDACluster, utils
 from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
+if not utils.has_device_memory_resource():
+    pytest.skip(
+        "No spilling tests supported for devices without memory resources. "
+        "See https://github.com/rapidsai/dask-cuda/issues/1510",
+        allow_module_level=True,
+    )
 if utils.get_device_total_memory() < 1e10:
     pytest.skip("Not enough GPU memory", allow_module_level=True)

dask-cuda 25.6.0__py3-none-any.whl → 25.8.0__py3-none-any.whl

dask-cuda 25.6.0py3-none-any.whl → 25.8.0py3-none-any.whl