PyPI - dask-cuda - Versions diffs - 25.6.0__py3-none-any.whl → 25.8.0__py3-none-any.whl - Mend

dask-cuda 25.6.0py3-none-any.whl → 25.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

dask_cuda/GIT_COMMIT +1 -1
dask_cuda/VERSION +1 -1
dask_cuda/benchmarks/common.py +4 -1
dask_cuda/benchmarks/local_cudf_groupby.py +4 -1
dask_cuda/benchmarks/local_cudf_merge.py +5 -2
dask_cuda/benchmarks/local_cudf_shuffle.py +5 -2
dask_cuda/benchmarks/local_cupy.py +4 -1
dask_cuda/benchmarks/local_cupy_map_overlap.py +4 -1
dask_cuda/benchmarks/utils.py +7 -4
dask_cuda/cli.py +21 -15
dask_cuda/cuda_worker.py +27 -57
dask_cuda/device_host_file.py +31 -15
dask_cuda/disk_io.py +7 -4
dask_cuda/explicit_comms/comms.py +11 -7
dask_cuda/explicit_comms/dataframe/shuffle.py +23 -23
dask_cuda/get_device_memory_objects.py +3 -3
dask_cuda/initialize.py +80 -44
dask_cuda/local_cuda_cluster.py +63 -66
dask_cuda/plugins.py +17 -16
dask_cuda/proxify_device_objects.py +12 -10
dask_cuda/proxify_host_file.py +30 -27
dask_cuda/proxy_object.py +20 -17
dask_cuda/tests/conftest.py +41 -0
dask_cuda/tests/test_dask_cuda_worker.py +109 -25
dask_cuda/tests/test_dgx.py +10 -18
dask_cuda/tests/test_explicit_comms.py +30 -12
dask_cuda/tests/test_from_array.py +7 -5
dask_cuda/tests/test_initialize.py +16 -37
dask_cuda/tests/test_local_cuda_cluster.py +159 -52
dask_cuda/tests/test_proxify_host_file.py +19 -3
dask_cuda/tests/test_proxy.py +18 -16
dask_cuda/tests/test_rdd_ucx.py +160 -0
dask_cuda/tests/test_spill.py +7 -0
dask_cuda/tests/test_utils.py +106 -20
dask_cuda/tests/test_worker_spec.py +5 -2
dask_cuda/utils.py +261 -38
dask_cuda/utils_test.py +23 -7
dask_cuda/worker_common.py +196 -0
dask_cuda/worker_spec.py +12 -5
{dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/METADATA +2 -2
dask_cuda-25.8.0.dist-info/RECORD +63 -0
dask_cuda-25.8.0.dist-info/top_level.txt +6 -0
shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
dask_cuda-25.6.0.dist-info/RECORD +0 -57
dask_cuda-25.6.0.dist-info/top_level.txt +0 -4
{dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/WHEEL +0 -0
{dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.6.0.dist-info → dask_cuda-25.8.0.dist-info}/licenses/LICENSE +0 -0

dask_cuda/proxify_device_objects.py CHANGED Viewed

@@ -1,4 +1,6 @@
-# Copyright (c) 2025 NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import functools
 import pydoc
 from collections import defaultdict
@@ -59,9 +61,9 @@ def proxify_device_objects(
 ) -> T:
     """Wrap device objects in ProxyObject
-    Search through `obj` and wraps all CUDA device objects in ProxyObject.
-    It uses `proxied_id_to_proxy` to make sure that identical CUDA device
-    objects found in `obj` are wrapped by the same ProxyObject.
+    Search through ``obj`` and wraps all CUDA device objects in ProxyObject.
+    It uses ``proxied_id_to_proxy`` to make sure that identical CUDA device
+    objects found in ``obj`` are wrapped by the same ProxyObject.
     Parameters
     ----------
@@ -69,11 +71,11 @@ def proxify_device_objects(
         Object to search through or wrap in a ProxyObject.
     proxied_id_to_proxy: MutableMapping[int, ProxyObject]
         Dict mapping the id() of proxied objects (CUDA device objects) to
-        their proxy and is updated with all new proxied objects found in `obj`.
+        their proxy and is updated with all new proxied objects found in ``obj``.
         If None, use an empty dict.
     found_proxies: List[ProxyObject]
-        List of found proxies in `obj`. Notice, this includes all proxies found,
-        including those already in `proxied_id_to_proxy`.
+        List of found proxies in ``obj``. Notice, this includes all proxies found,
+        including those already in ``proxied_id_to_proxy``.
         If None, use an empty list.
     excl_proxies: bool
         Don't add found objects that are already ProxyObject to found_proxies.
@@ -84,7 +86,7 @@ def proxify_device_objects(
     Returns
     -------
     ret: Any
-        A copy of `obj` where all CUDA device objects are wrapped in ProxyObject
+        A copy of ``obj`` where all CUDA device objects are wrapped in ProxyObject
     """
     _register_incompatible_types()
@@ -103,7 +105,7 @@ def unproxify_device_objects(
 ) -> T:
     """Unproxify device objects
-    Search through `obj` and un-wraps all CUDA device objects.
+    Search through ``obj`` and un-wraps all CUDA device objects.
     Parameters
     ----------
@@ -118,7 +120,7 @@ def unproxify_device_objects(
     Returns
     -------
     ret: Any
-        A copy of `obj` where all CUDA device objects are unproxify
+        A copy of ``obj`` where all CUDA device objects are unproxify
     """
     if isinstance(obj, dict):
         return {

dask_cuda/proxify_host_file.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import abc
 import gc
 import io
@@ -64,29 +67,29 @@ class Proxies(abc.ABC):
     @abc.abstractmethod
     def mem_usage_add(self, proxy: ProxyObject) -> None:
-        """Given a new proxy, update `self._mem_usage`"""
+        """Given a new proxy, update ``self._mem_usage``"""
     @abc.abstractmethod
     def mem_usage_remove(self, proxy: ProxyObject) -> None:
-        """Removal of proxy, update `self._mem_usage`"""
+        """Removal of proxy, update ``self._mem_usage``"""
     @abc.abstractmethod
     def buffer_info(self) -> List[Tuple[float, int, List[ProxyObject]]]:
         """Return a list of buffer information
         The returned format is:
-            `[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]
+            ``[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]``
         """
     def add(self, proxy: ProxyObject) -> None:
-        """Add a proxy for tracking, calls `self.mem_usage_add`"""
+        """Add a proxy for tracking, calls ``self.mem_usage_add``"""
         assert not self.contains_proxy_id(id(proxy))
         with self._lock:
             self._proxy_id_to_proxy[id(proxy)] = weakref.ref(proxy)
         self.mem_usage_add(proxy)
     def remove(self, proxy: ProxyObject) -> None:
-        """Remove proxy from tracking, calls `self.mem_usage_remove`"""
+        """Remove proxy from tracking, calls ``self.mem_usage_remove``"""
         with self._lock:
             del self._proxy_id_to_proxy[id(proxy)]
         self.mem_usage_remove(proxy)
@@ -323,13 +326,13 @@ class ProxyManager:
                         assert header["serializer"] == pxy.serializer
     def proxify(self, obj: T, duplicate_check=True) -> Tuple[T, bool]:
-        """Proxify `obj` and add found proxies to the `Proxies` collections
+        """Proxify ``obj`` and add found proxies to the ``Proxies`` collections
-        Search through `obj` and wrap all CUDA device objects in ProxyObject.
+        Search through ``obj`` and wrap all CUDA device objects in ProxyObject.
         If duplicate_check is True, identical CUDA device objects found in
-        `obj` are wrapped by the same ProxyObject.
+        ``obj`` are wrapped by the same ProxyObject.
-        Returns the proxified object and a boolean, which is `True` when one or
+        Returns the proxified object and a boolean, which is ``True`` when one or
         more incompatible-types were found.
         Parameters
@@ -337,7 +340,7 @@ class ProxyManager:
         obj
             Object to search through or wrap in a ProxyObject.
         duplicate_check
-            Make sure that identical CUDA device objects found in `obj` are
+            Make sure that identical CUDA device objects found in ``obj`` are
             wrapped by the same ProxyObject. This check comes with a significant
             overhead hence it is recommended setting to False when it is known
             that no duplicate exist.
@@ -380,11 +383,11 @@ class ProxyManager:
         proxies_access: Callable[[], List[Tuple[float, int, List[ProxyObject]]]],
         serializer: Callable[[ProxyObject], None],
     ) -> int:
-        """Evict buffers retrieved by calling `proxies_access`
+        """Evict buffers retrieved by calling ``proxies_access``
-        Calls `proxies_access` to retrieve a list of proxies and then spills
-        enough proxies to free up at a minimum `nbytes` bytes. In order to
-        spill a proxy, `serializer` is called.
+        Calls ``proxies_access`` to retrieve a list of proxies and then spills
+        enough proxies to free up at a minimum ``nbytes`` bytes. In order to
+        spill a proxy, ``serializer`` is called.
         Parameters
         ----------
@@ -392,7 +395,7 @@ class ProxyManager:
             Number of bytes to evict.
         proxies_access: callable
             Function that returns a list of proxies pack in a tuple like:
-            `[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]
+            ``[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]``
         serializer: callable
             Function that serialize the given proxy object.
@@ -423,7 +426,7 @@ class ProxyManager:
     def maybe_evict_from_device(self, extra_dev_mem=0) -> None:
         """Evict buffers until total memory usage is below device-memory-limit
-        Adds `extra_dev_mem` to the current total memory usage when comparing
+        Adds ``extra_dev_mem`` to the current total memory usage when comparing
         against device-memory-limit.
         """
         mem_over_usage = (
@@ -439,7 +442,7 @@ class ProxyManager:
     def maybe_evict_from_host(self, extra_host_mem=0) -> None:
         """Evict buffers until total memory usage is below host-memory-limit
-        Adds `extra_host_mem` to the current total memory usage when comparing
+        Adds ``extra_host_mem`` to the current total memory usage when comparing
         against device-memory-limit.
         """
         assert self._host_memory_limit is not None
@@ -466,7 +469,7 @@ class ProxifyHostFile(MutableMapping):
     workers in Distributed.
     It wraps all CUDA device objects in a ProxyObject instance and maintains
-    `device_memory_limit` by spilling ProxyObject on-the-fly. This addresses
+    ``device_memory_limit`` by spilling ProxyObject on-the-fly. This addresses
     some issues with the default DeviceHostFile host, which tracks device
     memory inaccurately see <https://github.com/rapidsai/dask-cuda/pull/451>
@@ -488,16 +491,16 @@ class ProxifyHostFile(MutableMapping):
     memory_limit: int
         Number of bytes of host memory used before spilling to disk.
     shared_filesystem: bool or None, default None
-        Whether the `local_directory` above is shared between all workers or not.
+        Whether the ``local_directory`` above is shared between all workers or not.
         If ``None``, the "jit-unspill-shared-fs" config value are used, which
         defaults to False.
-        Notice, a shared filesystem must support the `os.link()` operation.
+        Notice, a shared filesystem must support the ``os.link()`` operation.
     compatibility_mode: bool or None, default None
         Enables compatibility-mode, which means that items are un-proxified before
         retrieval. This makes it possible to get some of the JIT-unspill benefits
         without having to be ProxyObject compatible. In order to still allow specific
-        ProxyObjects, set the `mark_as_explicit_proxies=True` when proxifying with
-        `proxify_device_objects()`. If ``None``, the "jit-unspill-compatibility-mode"
+        ProxyObjects, set the ``mark_as_explicit_proxies=True`` when proxifying with
+        ``proxify_device_objects()``. If ``None``, the "jit-unspill-compatibility-mode"
         config value are used, which defaults to False.
     spill_on_demand: bool or None, default None
         Enables spilling when the RMM memory pool goes out of memory. If ``None``,
@@ -639,7 +642,7 @@ class ProxifyHostFile(MutableMapping):
         """Manually evict 1% of host limit.
         Dask uses this to trigger CPU-to-Disk spilling. We don't know how much
-        we need to spill but Dask will call `evict()` repeatedly until enough
+        we need to spill but Dask will call ``evict()`` repeatedly until enough
         is spilled. We ask for 1% each time.
         Return
@@ -658,9 +661,9 @@ class ProxifyHostFile(MutableMapping):
     @property
     def fast(self):
-        """Alternative access to `.evict()` used by Dask
+        """Alternative access to ``.evict()`` used by Dask
-        Dask expects `.fast.evict()` to be available for manually triggering
+        Dask expects ``.fast.evict()`` to be available for manually triggering
         of CPU-to-Disk spilling.
         """
         if len(self.manager._host) == 0:
@@ -758,9 +761,9 @@ class ProxifyHostFile(MutableMapping):
     @classmethod
     def serialize_proxy_to_disk_inplace(cls, proxy: ProxyObject) -> None:
-        """Serialize `proxy` to disk.
+        """Serialize ``proxy`` to disk.
-        Avoid de-serializing if `proxy` is serialized using "dask" or
+        Avoid de-serializing if ``proxy`` is serialized using "dask" or
         "pickle". In this case the already serialized data is written
         directly to disk.

dask_cuda/proxy_object.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import copy as _copy
 import functools
 import operator
@@ -52,21 +55,21 @@ def asproxy(
     serializers: Optional[Iterable[str]] = None,
     subclass: Optional[Type["ProxyObject"]] = None,
 ) -> "ProxyObject":
-    """Wrap `obj` in a ProxyObject object if it isn't already.
+    """Wrap ``obj`` in a ProxyObject object if it isn't already.
     Parameters
     ----------
     obj: object
         Object to wrap in a ProxyObject object.
     serializers: Iterable[str], optional
-        Serializers to use to serialize `obj`. If None, no serialization is done.
+        Serializers to use to serialize ``obj``. If None, no serialization is done.
     subclass: class, optional
         Specify a subclass of ProxyObject to create instead of ProxyObject.
-        `subclass` must be pickable.
+        ``subclass`` must be pickable.
     Returns
     -------
-    The ProxyObject proxying `obj`
+    The ProxyObject proxying ``obj``
     """
     if isinstance(obj, ProxyObject):  # Already a proxy object
         ret = obj
@@ -119,7 +122,7 @@ def unproxy(obj):
     Returns
     -------
-    The proxied object or `obj` itself if it isn't a ProxyObject
+    The proxied object or ``obj`` itself if it isn't a ProxyObject
     """
     try:
         obj = obj._pxy_deserialize()
@@ -185,16 +188,16 @@ class ProxyDetail:
         Dictionary of attributes that are accessible without deserializing
         the proxied object.
     type_serialized: bytes
-        Pickled type of `obj`.
+        Pickled type of ``obj``.
     typename: str
-        Name of the type of `obj`.
+        Name of the type of ``obj``.
     is_cuda_object: boolean
-        Whether `obj` is a CUDA object or not.
+        Whether ``obj`` is a CUDA object or not.
     subclass: bytes
         Pickled type to use instead of ProxyObject when deserializing. The type
         must inherit from ProxyObject.
     serializers: str, optional
-        Serializers to use to serialize `obj`. If None, no serialization is done.
+        Serializers to use to serialize ``obj``. If None, no serialization is done.
     explicit_proxy: bool
         Mark the proxy object as "explicit", which means that the user allows it
         as input argument to dask tasks even in compatibility-mode.
@@ -258,7 +261,7 @@ class ProxyDetail:
         return self.serializer is not None
     def serialize(self, serializers: Iterable[str]) -> Tuple[dict, list]:
-        """Inplace serialization of the proxied object using the `serializers`
+        """Inplace serialization of the proxied object using the ``serializers``
         Parameters
         ----------
@@ -333,7 +336,7 @@ class ProxyObject:
     ProxyObject has some limitations and doesn't mimic the proxied object perfectly.
     Thus, if encountering problems remember that it is always possible to use unproxy()
     to access the proxied object directly or disable JIT deserialization completely
-    with `jit_unspill=False`.
+    with ``jit_unspill=False``.
     Type checking using instance() works as expected but direct type checking
     doesn't:
@@ -386,7 +389,7 @@ class ProxyObject:
         serializers: Iterable[str],
         proxy_detail: Optional[ProxyDetail] = None,
     ) -> None:
-        """Inplace serialization of the proxied object using the `serializers`
+        """Inplace serialization of the proxied object using the ``serializers``
         Parameters
         ----------
@@ -787,8 +790,8 @@ class ProxyObject:
 def obj_pxy_is_device_object(obj: ProxyObject):
     """
     In order to avoid de-serializing the proxied object,
-    we check `is_cuda_object` instead of the default
-    `hasattr(o, "__cuda_array_interface__")` check.
+    we check ``is_cuda_object`` instead of the default
+    ``hasattr(o, "__cuda_array_interface__")`` check.
     """
     return obj._pxy_get().is_cuda_object
@@ -830,7 +833,7 @@ def obj_pxy_dask_serialize(obj: ProxyObject):
     As serializers, it uses "dask" or "pickle", which means that proxied CUDA objects
     are spilled to main memory before communicated. Deserialization is needed, unless
-    obj is serialized to disk on a shared filesystem see `handle_disk_serialized()`.
+    obj is serialized to disk on a shared filesystem see ``handle_disk_serialized()``.
     """
     pxy = obj._pxy_get(copy=True)
     if pxy.serializer == "disk":
@@ -851,7 +854,7 @@ def obj_pxy_cuda_serialize(obj: ProxyObject):
     As serializers, it uses "cuda", which means that proxied CUDA objects are _not_
     spilled to main memory before communicated. However, we still have to handle disk
-    serialized proxied like in `obj_pxy_dask_serialize()`
+    serialized proxied like in ``obj_pxy_dask_serialize()``
     """
     pxy = obj._pxy_get(copy=True)
     if pxy.serializer in ("dask", "pickle"):
@@ -897,7 +900,7 @@ def obj_pxy_dask_deserialize(header, frames):
 def unproxify_input_wrapper(func):
-    """Unproxify the input of `func`"""
+    """Unproxify the input of ``func``"""
     @functools.wraps(func)
     def wrapper(*args, **kwargs):

dask_cuda/tests/conftest.py ADDED Viewed

@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+import pytest
+from dask_cuda.utils import has_device_memory_resource
+def pytest_configure(config):
+    """Register custom markers."""
+    config.addinivalue_line(
+        "markers",
+        "skip_if_no_device_memory: mark test to skip if device has no dedicated memory "
+        "resource",
+    )
+    config.addinivalue_line(
+        "markers",
+        "skip_if_device_memory: mark test to skip if device has dedicated memory "
+        "resource",
+    )
+def pytest_collection_modifyitems(items):
+    """Handle skip_if_no_device_memory marker."""
+    for item in items:
+        if item.get_closest_marker("skip_if_no_device_memory"):
+            skip_marker = item.get_closest_marker("skip_if_no_device_memory")
+            reason = skip_marker.kwargs.get(
+                "reason", "Test requires device with dedicated memory resource"
+            )
+            item.add_marker(
+                pytest.mark.skipif(not has_device_memory_resource(), reason=reason)
+            )
+        if item.get_closest_marker("skip_if_device_memory"):
+            skip_marker = item.get_closest_marker("skip_if_device_memory")
+            reason = skip_marker.kwargs.get(
+                "reason", "Test requires device without dedicated memory resource"
+            )
+            item.add_marker(
+                pytest.mark.skipif(has_device_memory_resource(), reason=reason)
+            )

dask_cuda/tests/test_dask_cuda_worker.py CHANGED Viewed

@@ -21,13 +21,16 @@ from dask_cuda.utils import (
     get_gpu_count_mig,
     get_gpu_uuid,
     get_n_gpus,
+    has_device_memory_resource,
     wait_workers,
 )
-@patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,3,7,8"})
-def test_cuda_visible_devices_and_memory_limit_and_nthreads(loop):  # noqa: F811
-    nthreads = 4
+@patch.dict(
+    os.environ,
+    {"CUDA_VISIBLE_DEVICES": "0,3,7,8", "DASK_CUDA_TEST_DISABLE_DEVICE_SPECIFIC": "1"},
+)
+def test_cuda_visible_devices(loop):  # noqa: F811
     with popen(["dask", "scheduler", "--port", "9359", "--no-dashboard"]):
         with popen(
             [
@@ -37,14 +40,10 @@ def test_cuda_visible_devices_and_memory_limit_and_nthreads(loop):  # noqa: F811
                 "127.0.0.1:9359",
                 "--host",
                 "127.0.0.1",
-                "--device-memory-limit",
-                "1 MB",
-                "--nthreads",
-                str(nthreads),
                 "--no-dashboard",
                 "--worker-class",
                 "dask_cuda.utils_test.MockWorker",
-            ]
+            ],
         ):
             with Client("127.0.0.1:9359", loop=loop) as client:
                 assert wait_workers(client, n_gpus=4)
@@ -58,12 +57,43 @@ def test_cuda_visible_devices_and_memory_limit_and_nthreads(loop):  # noqa: F811
                 for v in result.values():
                     del expected[v]
-                workers = client.scheduler_info()["workers"]
+                assert len(expected) == 0
+def test_memory_limit_and_nthreads(loop):  # noqa: F811
+    nthreads = 4
+    device_memory_limit_args = []
+    if has_device_memory_resource():
+        device_memory_limit_args = ["--device-memory-limit", "1 MB"]
+    with popen(["dask", "scheduler", "--port", "9359", "--no-dashboard"]):
+        with popen(
+            [
+                "dask",
+                "cuda",
+                "worker",
+                "127.0.0.1:9359",
+                "--host",
+                "127.0.0.1",
+                *device_memory_limit_args,
+                "--nthreads",
+                str(nthreads),
+                "--no-dashboard",
+                "--worker-class",
+                "dask_cuda.utils_test.MockWorker",
+            ],
+        ):
+            with Client("127.0.0.1:9359", loop=loop) as client:
+                assert wait_workers(client, n_gpus=get_n_gpus())
+                def get_visible_devices():
+                    return os.environ["CUDA_VISIBLE_DEVICES"]
+                workers = client.scheduler_info(n_workers=-1)["workers"]
                 for w in workers.values():
                     assert w["memory_limit"] == MEMORY_LIMIT // len(workers)
-                assert len(expected) == 0
 def test_rmm_pool(loop):  # noqa: F811
     rmm = pytest.importorskip("rmm")
@@ -119,11 +149,6 @@ def test_rmm_managed(loop):  # noqa: F811
 def test_rmm_async(loop):  # noqa: F811
     rmm = pytest.importorskip("rmm")
-    driver_version = rmm._cuda.gpu.driverGetVersion()
-    runtime_version = rmm._cuda.gpu.runtimeGetVersion()
-    if driver_version < 11020 or runtime_version < 11020:
-        pytest.skip("cudaMallocAsync not supported")
     with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
         with popen(
             [
@@ -159,11 +184,6 @@ def test_rmm_async(loop):  # noqa: F811
 def test_rmm_async_with_maximum_pool_size(loop):  # noqa: F811
     rmm = pytest.importorskip("rmm")
-    driver_version = rmm._cuda.gpu.driverGetVersion()
-    runtime_version = rmm._cuda.gpu.runtimeGetVersion()
-    if driver_version < 11020 or runtime_version < 11020:
-        pytest.skip("cudaMallocAsync not supported")
     with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
         with popen(
             [
@@ -263,8 +283,12 @@ def test_cudf_spill_disabled(loop):  # noqa: F811
                     assert v == 0
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources cannot enable cuDF spill"
+)
 def test_cudf_spill(loop):  # noqa: F811
     cudf = pytest.importorskip("cudf")
     with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
         with popen(
             [
@@ -292,6 +316,24 @@ def test_cudf_spill(loop):  # noqa: F811
                     assert v == 2
+@pytest.mark.skip_if_device_memory(
+    "Devices with dedicated memory resources cannot test error"
+)
+def test_cudf_spill_no_dedicated_memory_error():
+    pytest.importorskip("cudf")
+    ret = subprocess.run(
+        ["dask", "cuda", "worker", "127.0.0.1:9369", "--enable-cudf-spill"],
+        capture_output=True,
+    )
+    assert ret.returncode != 0
+    assert (
+        b"cuDF spilling is not supported on devices without dedicated memory"
+        in ret.stderr
+    )
 @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
 def test_dashboard_address(loop):  # noqa: F811
     with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
@@ -472,6 +514,11 @@ def test_rmm_track_allocations(loop):  # noqa: F811
 @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
 def test_get_cluster_configuration(loop):  # noqa: F811
     pytest.importorskip("rmm")
+    device_memory_limit_args = []
+    if has_device_memory_resource():
+        device_memory_limit_args += ["--device-memory-limit", "30 B"]
     with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
         with popen(
             [
@@ -481,8 +528,7 @@ def test_get_cluster_configuration(loop):  # noqa: F811
                 "127.0.0.1:9369",
                 "--host",
                 "127.0.0.1",
-                "--device-memory-limit",
-                "30 B",
+                *device_memory_limit_args,
                 "--rmm-pool-size",
                 "2 GB",
                 "--rmm-maximum-pool-size",
@@ -499,12 +545,17 @@ def test_get_cluster_configuration(loop):  # noqa: F811
                 assert ret["[plugin] RMMSetup"]["initial_pool_size"] == 2000000000
                 assert ret["[plugin] RMMSetup"]["maximum_pool_size"] == 3000000000
                 assert ret["jit-unspill"] is False
-                assert ret["device-memory-limit"] == 30
+                if has_device_memory_resource():
+                    assert ret["device-memory-limit"] == 30
 @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
+@pytest.mark.skip_if_no_device_memory(
+    "Devices without dedicated memory resources do not support fractional limits"
+)
 def test_worker_fraction_limits(loop):  # noqa: F811
     pytest.importorskip("rmm")
     with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
         with popen(
             [
@@ -545,6 +596,33 @@ def test_worker_fraction_limits(loop):  # noqa: F811
                 )
+@pytest.mark.parametrize(
+    "argument", ["pool_size", "maximum_pool_size", "release_threshold"]
+)
+@pytest.mark.skip_if_device_memory(
+    "Devices with dedicated memory resources cannot test error"
+)
+def test_worker_fraction_limits_no_dedicated_memory(argument):
+    if argument == "pool_size":
+        argument_list = ["--rmm-pool-size", "0.1"]
+    elif argument == "maximum_pool_size":
+        argument_list = ["--rmm-pool-size", "1 GiB", "--rmm-maximum-pool-size", "0.1"]
+    else:
+        argument_list = ["--rmm-async", "--rmm-release-threshold", "0.1"]
+    with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
+        ret = subprocess.run(
+            ["dask", "cuda", "worker", "127.0.0.1:9369", *argument_list],
+            capture_output=True,
+        )
+        assert ret.returncode != 0
+        assert (
+            b"Fractional of total device memory not supported in devices without a "
+            b"dedicated memory resource" in ret.stderr
+        )
 @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
 def test_worker_timeout():
     ret = subprocess.run(
@@ -595,6 +673,12 @@ def test_worker_cudf_spill_warning(enable_cudf_spill_warning):  # noqa: F811
             capture_output=True,
         )
         if enable_cudf_spill_warning:
-            assert b"UserWarning: cuDF spilling is enabled" in ret.stderr
+            if has_device_memory_resource():
+                assert b"UserWarning: cuDF spilling is enabled" in ret.stderr
+            else:
+                assert (
+                    b"cuDF spilling is not supported on devices without dedicated "
+                    b"memory" in ret.stderr
+                )
         else:
             assert b"UserWarning: cuDF spilling is enabled" not in ret.stderr

dask-cuda 25.6.0__py3-none-any.whl → 25.8.0__py3-none-any.whl

dask-cuda 25.6.0py3-none-any.whl → 25.8.0py3-none-any.whl