PyPI - dask-cuda - Versions diffs - 25.6.0__py3-none-any.whl → 25.10.0__py3-none-any.whl - Mend

dask-cuda 25.6.0py3-none-any.whl → 25.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

dask_cuda/GIT_COMMIT +1 -1
dask_cuda/VERSION +1 -1
dask_cuda/benchmarks/common.py +4 -1
dask_cuda/benchmarks/local_cudf_groupby.py +3 -0
dask_cuda/benchmarks/local_cudf_merge.py +4 -1
dask_cuda/benchmarks/local_cudf_shuffle.py +4 -1
dask_cuda/benchmarks/local_cupy.py +3 -0
dask_cuda/benchmarks/local_cupy_map_overlap.py +3 -0
dask_cuda/benchmarks/utils.py +6 -3
dask_cuda/cli.py +21 -15
dask_cuda/cuda_worker.py +28 -58
dask_cuda/device_host_file.py +31 -15
dask_cuda/disk_io.py +7 -4
dask_cuda/explicit_comms/comms.py +11 -7
dask_cuda/explicit_comms/dataframe/shuffle.py +23 -23
dask_cuda/get_device_memory_objects.py +4 -7
dask_cuda/initialize.py +149 -94
dask_cuda/local_cuda_cluster.py +52 -70
dask_cuda/plugins.py +17 -16
dask_cuda/proxify_device_objects.py +12 -10
dask_cuda/proxify_host_file.py +30 -27
dask_cuda/proxy_object.py +20 -17
dask_cuda/tests/conftest.py +41 -0
dask_cuda/tests/test_cudf_builtin_spilling.py +3 -1
dask_cuda/tests/test_dask_cuda_worker.py +109 -25
dask_cuda/tests/test_dask_setup.py +193 -0
dask_cuda/tests/test_dgx.py +20 -44
dask_cuda/tests/test_explicit_comms.py +31 -12
dask_cuda/tests/test_from_array.py +4 -6
dask_cuda/tests/test_initialize.py +233 -65
dask_cuda/tests/test_local_cuda_cluster.py +129 -68
dask_cuda/tests/test_proxify_host_file.py +28 -7
dask_cuda/tests/test_proxy.py +15 -13
dask_cuda/tests/test_spill.py +10 -3
dask_cuda/tests/test_utils.py +100 -29
dask_cuda/tests/test_worker_spec.py +6 -0
dask_cuda/utils.py +211 -42
dask_cuda/utils_test.py +10 -7
dask_cuda/worker_common.py +196 -0
dask_cuda/worker_spec.py +6 -1
{dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/METADATA +11 -4
dask_cuda-25.10.0.dist-info/RECORD +63 -0
dask_cuda-25.10.0.dist-info/top_level.txt +6 -0
shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
dask_cuda-25.6.0.dist-info/RECORD +0 -57
dask_cuda-25.6.0.dist-info/top_level.txt +0 -4
{dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/WHEEL +0 -0
{dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/licenses/LICENSE +0 -0

dask_cuda/tests/test_utils.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import os
 from unittest.mock import patch
@@ -15,6 +18,7 @@ from dask_cuda.utils import (
     get_n_gpus,
     get_preload_options,
     get_ucx_config,
+    has_device_memory_resource,
     nvml_device_index,
     parse_cuda_visible_device,
     parse_device_memory_limit,
@@ -76,22 +80,18 @@ def test_get_device_total_memory():
     for i in range(get_n_gpus()):
         with cuda.gpus[i]:
             total_mem = get_device_total_memory(i)
-            assert type(total_mem) is int
-            assert total_mem > 0
+            if has_device_memory_resource():
+                assert type(total_mem) is int
+                assert total_mem > 0
+            else:
+                assert total_mem is None
-@pytest.mark.parametrize(
-    "protocol",
-    ["ucx", "ucxx"],
-)
-def test_get_preload_options_default(protocol):
-    if protocol == "ucx":
-        pytest.importorskip("ucp")
-    elif protocol == "ucxx":
-        pytest.importorskip("ucxx")
+def test_get_preload_options_default():
+    pytest.importorskip("distributed_ucxx")
     opts = get_preload_options(
-        protocol=protocol,
+        protocol="ucx",
         create_cuda_context=True,
     )
@@ -101,21 +101,14 @@ def test_get_preload_options_default(protocol):
     assert opts["preload_argv"] == ["--create-cuda-context"]
-@pytest.mark.parametrize(
-    "protocol",
-    ["ucx", "ucxx"],
-)
 @pytest.mark.parametrize("enable_tcp", [True, False])
 @pytest.mark.parametrize("enable_infiniband", [True, False])
 @pytest.mark.parametrize("enable_nvlink", [True, False])
-def test_get_preload_options(protocol, enable_tcp, enable_infiniband, enable_nvlink):
-    if protocol == "ucx":
-        pytest.importorskip("ucp")
-    elif protocol == "ucxx":
-        pytest.importorskip("ucxx")
+def test_get_preload_options(enable_tcp, enable_infiniband, enable_nvlink):
+    pytest.importorskip("distributed_ucxx")
     opts = get_preload_options(
-        protocol=protocol,
+        protocol="ucx",
         create_cuda_context=True,
         enable_tcp_over_ucx=enable_tcp,
         enable_infiniband=enable_infiniband,
@@ -139,7 +132,7 @@ def test_get_preload_options(protocol, enable_tcp, enable_infiniband, enable_nvl
 @pytest.mark.parametrize("enable_nvlink", [True, False, None])
 @pytest.mark.parametrize("enable_infiniband", [True, False, None])
 def test_get_ucx_config(enable_tcp_over_ucx, enable_infiniband, enable_nvlink):
-    pytest.importorskip("ucp")
+    pytest.importorskip("distributed_ucxx")
     kwargs = {
         "enable_tcp_over_ucx": enable_tcp_over_ucx,
@@ -234,20 +227,98 @@ def test_parse_visible_devices():
         parse_cuda_visible_device([])
+def test_parse_device_bytes():
+    total = get_device_total_memory(0)
+    assert parse_device_memory_limit(None) is None
+    assert parse_device_memory_limit(0) is None
+    assert parse_device_memory_limit("0") is None
+    assert parse_device_memory_limit("0.0") is None
+    assert parse_device_memory_limit("0 GiB") is None
+    assert parse_device_memory_limit(1) == 1
+    assert parse_device_memory_limit("1") == 1
+    assert parse_device_memory_limit(1000000000) == 1000000000
+    assert parse_device_memory_limit("1GB") == 1000000000
+    if has_device_memory_resource(0):
+        assert parse_device_memory_limit(1.0) == total
+        assert parse_device_memory_limit("1.0") == total
+        assert parse_device_memory_limit(0.8) == int(total * 0.8)
+        assert parse_device_memory_limit(0.8, alignment_size=256) == int(
+            total * 0.8 // 256 * 256
+        )
+        assert parse_device_memory_limit("default") == parse_device_memory_limit(0.8)
+    else:
+        assert parse_device_memory_limit("default") is None
+        with pytest.raises(ValueError):
+            assert parse_device_memory_limit(1.0) == total
+        with pytest.raises(ValueError):
+            assert parse_device_memory_limit("1.0") == total
+        with pytest.raises(ValueError):
+            assert parse_device_memory_limit(0.8) == int(total * 0.8)
+        with pytest.raises(ValueError):
+            assert parse_device_memory_limit(0.8, alignment_size=256) == int(
+                total * 0.8 // 256 * 256
+            )
 def test_parse_device_memory_limit():
     total = get_device_total_memory(0)
-    assert parse_device_memory_limit(None) == total
-    assert parse_device_memory_limit(0) == total
+    assert parse_device_memory_limit(None) is None
+    assert parse_device_memory_limit(0) is None
+    assert parse_device_memory_limit("0") is None
+    assert parse_device_memory_limit(0.0) is None
+    assert parse_device_memory_limit("0 GiB") is None
+    assert parse_device_memory_limit(1) == 1
+    assert parse_device_memory_limit("1") == 1
     assert parse_device_memory_limit("auto") == total
-    assert parse_device_memory_limit(0.8) == int(total * 0.8)
-    assert parse_device_memory_limit(0.8, alignment_size=256) == int(
-        total * 0.8 // 256 * 256
-    )
     assert parse_device_memory_limit(1000000000) == 1000000000
     assert parse_device_memory_limit("1GB") == 1000000000
+    if has_device_memory_resource(0):
+        assert parse_device_memory_limit(1.0) == total
+        assert parse_device_memory_limit("1.0") == total
+        assert parse_device_memory_limit(0.8) == int(total * 0.8)
+        assert parse_device_memory_limit(0.8, alignment_size=256) == int(
+            total * 0.8 // 256 * 256
+        )
+        assert parse_device_memory_limit("default") == parse_device_memory_limit(0.8)
+    else:
+        assert parse_device_memory_limit("default") is None
+        with pytest.raises(ValueError):
+            assert parse_device_memory_limit(1.0) == total
+        with pytest.raises(ValueError):
+            assert parse_device_memory_limit("1.0") == total
+        with pytest.raises(ValueError):
+            assert parse_device_memory_limit(0.8) == int(total * 0.8)
+        with pytest.raises(ValueError):
+            assert parse_device_memory_limit(0.8, alignment_size=256) == int(
+                total * 0.8 // 256 * 256
+            )
+def test_has_device_memory_resoure():
+    has_memory_resource = has_device_memory_resource()
+    total = get_device_total_memory(0)
+    if has_memory_resource:
+        # Tested only in devices with a memory resource
+        assert total == parse_device_memory_limit("auto")
+    else:
+        # Tested only in devices without a memory resource
+        assert total is None
 def test_parse_visible_mig_devices():
     pynvml.nvmlInit()

dask_cuda/tests/test_worker_spec.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import pytest
 from distributed import Nanny
@@ -45,6 +48,9 @@ def test_worker_spec(
     enable_infiniband,
     enable_nvlink,
 ):
+    if protocol == "ucx":
+        pytest.importorskip("distributed_ucxx")
     def _test():
         return worker_spec(
             CUDA_VISIBLE_DEVICES=list(range(num_devices)),

dask_cuda/utils.py CHANGED Viewed

@@ -18,7 +18,6 @@ import pynvml
 import toolz
 import dask
-import distributed  # noqa: required for dask.config.get("distributed.comm.ucx")
 from dask.config import canonical_name
 from dask.utils import format_bytes, parse_bytes
 from distributed import wait
@@ -43,7 +42,7 @@ def unpack_bitmask(x, mask_bits=64):
     x: list of int
         A list of integers
     mask_bits: int
-        An integer determining the bitwidth of `x`
+        An integer determining the bitwidth of ``x``
     Examples
     --------
@@ -220,9 +219,44 @@ def get_device_total_memory(device_index=0):
     ----------
     device_index: int or str
         The index or UUID of the device from which to obtain the CPU affinity.
+    Returns
+    -------
+    The total memory of the CUDA Device in bytes, or ``None`` for devices that do not
+    have a dedicated memory resource, as is usually the case for system on a chip (SoC)
+    devices.
+    """
+    handle = get_gpu_handle(device_index)
+    try:
+        return pynvml.nvmlDeviceGetMemoryInfo(handle).total
+    except pynvml.NVMLError_NotSupported:
+        return None
+def has_device_memory_resource(device_index=0):
+    """Determine wheter CUDA device has dedicated memory resource.
+    Certain devices have no dedicated memory resource, such as system on a chip (SoC)
+    devices.
+    Parameters
+    ----------
+    device_index: int or str
+        The index or UUID of the device from which to obtain the CPU affinity.
+    Returns
+    -------
+    Whether the device has a dedicated memory resource.
     """
     handle = get_gpu_handle(device_index)
-    return pynvml.nvmlDeviceGetMemoryInfo(handle).total
+    try:
+        pynvml.nvmlDeviceGetMemoryInfo(handle).total
+    except pynvml.NVMLError_NotSupported:
+        return False
+    else:
+        return True
 def get_ucx_config(
@@ -231,10 +265,15 @@ def get_ucx_config(
     enable_nvlink=None,
     enable_rdmacm=None,
 ):
-    ucx_config = dask.config.get("distributed.comm.ucx")
+    try:
+        import distributed_ucxx
+    except ImportError:
+        return None
+    distributed_ucxx.config.setup_config()
+    ucx_config = dask.config.get("distributed-ucxx")
     ucx_config[canonical_name("create-cuda-context", ucx_config)] = True
-    ucx_config[canonical_name("reuse-endpoints", ucx_config)] = False
     # If any transport is explicitly disabled (`False`) by the user, others that
     # are not specified should be enabled (`True`). If transports are explicitly
@@ -316,7 +355,7 @@ def get_preload_options(
     if create_cuda_context:
         preload_options["preload_argv"].append("--create-cuda-context")
-    if protocol in ["ucx", "ucxx"]:
+    if protocol in ("ucx", "ucxx"):
         initialize_ucx_argv = []
         if enable_tcp_over_ucx:
             initialize_ucx_argv.append("--enable-tcp-over-ucx")
@@ -365,7 +404,7 @@ def wait_workers(
         Instance of client, used to query for number of workers connected.
     min_timeout: float
         Minimum number of seconds to wait before timeout. This value may be
-        overridden by setting the `DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT` with
+        overridden by setting the ``DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT`` with
         a positive integer.
     seconds_per_gpu: float
         Seconds to wait for each GPU on the system. For example, if its
@@ -374,7 +413,7 @@ def wait_workers(
         used as timeout when larger than min_timeout.
     n_gpus: None or int
         If specified, will wait for a that amount of GPUs (i.e., Dask workers)
-        to come online, else waits for a total of `get_n_gpus` workers.
+        to come online, else waits for a total of ``get_n_gpus`` workers.
     timeout_callback: None or callable
         A callback function to be executed if a timeout occurs, ignored if
         None.
@@ -390,7 +429,7 @@ def wait_workers(
     start = time.time()
     while True:
-        if len(client.scheduler_info()["workers"]) == n_gpus:
+        if len(client.scheduler_info(n_workers=-1)["workers"]) == n_gpus:
             return True
         elif time.time() - start > timeout:
             if callable(timeout_callback):
@@ -404,7 +443,7 @@ async def _all_to_all(client):
     """
     Trigger all to all communication between workers and scheduler
     """
-    workers = list(client.scheduler_info()["workers"])
+    workers = list(client.scheduler_info(n_workers=-1)["workers"])
     futs = []
     for w in workers:
         bit_of_data = b"0" * 1
@@ -493,8 +532,8 @@ def nvml_device_index(i, CUDA_VISIBLE_DEVICES):
     """Get the device index for NVML addressing
     NVML expects the index of the physical device, unlike CUDA runtime which
-    expects the address relative to `CUDA_VISIBLE_DEVICES`. This function
-    returns the i-th device index from the `CUDA_VISIBLE_DEVICES`
+    expects the address relative to ``CUDA_VISIBLE_DEVICES``. This function
+    returns the i-th device index from the ``CUDA_VISIBLE_DEVICES``
     comma-separated string of devices or list.
     Examples
@@ -532,15 +571,125 @@ def nvml_device_index(i, CUDA_VISIBLE_DEVICES):
         raise ValueError("`CUDA_VISIBLE_DEVICES` must be `str` or `list`")
+def parse_device_bytes(device_bytes, device_index=0, alignment_size=1):
+    """Parse bytes relative to a specific CUDA device.
+    Parameters
+    ----------
+    device_bytes: float, int, str or None
+        Can be an integer (bytes), float (fraction of total device memory), string
+        (like ``"5GB"`` or ``"5000M"``), ``0`` and ``None`` are special cases
+        returning ``None``.
+    device_index: int or str
+        The index or UUID of the device from which to obtain the total memory amount.
+        Default: 0.
+    alignment_size: int
+        Number of bytes of alignment to use, i.e., allocation must be a multiple of
+        that size. RMM pool requires 256 bytes alignment.
+    Returns
+    -------
+    The parsed bytes value relative to the CUDA devices, or ``None`` as convenience if
+    ``device_bytes`` is ``None`` or any value that would evaluate to ``0``.
+    Examples
+    --------
+    >>> # On a 32GB CUDA device
+    >>> parse_device_bytes(None)
+    None
+    >>> parse_device_bytes(0)
+    None
+    >>> parse_device_bytes(0.0)
+    None
+    >>> parse_device_bytes("0 MiB")
+    None
+    >>> parse_device_bytes(1.0)
+    34089730048
+    >>> parse_device_bytes(0.8)
+    27271784038
+    >>> parse_device_bytes(1000000000)
+    1000000000
+    >>> parse_device_bytes("1GB")
+    1000000000
+    >>> parse_device_bytes("1GB")
+    1000000000
+    """
+    def _align(size, alignment_size):
+        return size // alignment_size * alignment_size
+    def parse_fractional(v):
+        """Parse fractional value.
+        Ensures ``int(1)`` and ``str("1")`` are not treated as fractionals, but
+        ``float(1)`` is.
+        Fractionals must be represented as a ``float`` within the range
+        ``0.0 < v <= 1.0``.
+        Parameters
+        ----------
+        v: int, float or str
+            The value to check if fractional.
+        Returns
+        -------
+        """
+        # Check if `x` matches exactly `int(1)` or `str("1")`, and is not a `float(1)`
+        is_one = lambda x: not isinstance(x, float) and (x == 1 or x == "1")
+        if not is_one(v):
+            with suppress(ValueError, TypeError):
+                v = float(v)
+                if 0.0 < v <= 1.0:
+                    return v
+        raise ValueError("The value is not fractional")
+    # Special case for fractional limit. This comes before `0` special cases because
+    # the `float` may be passed in a `str`, e.g., from `CUDAWorker`.
+    try:
+        fractional_device_bytes = parse_fractional(device_bytes)
+    except ValueError:
+        pass
+    else:
+        if not has_device_memory_resource():
+            raise ValueError(
+                "Fractional of total device memory not supported in devices without "
+                "a dedicated memory resource."
+            )
+        return _align(
+            int(get_device_total_memory(device_index) * fractional_device_bytes),
+            alignment_size,
+        )
+    # Special cases that evaluates to `None` or `0`
+    if device_bytes is None:
+        return None
+    elif device_bytes == 0.0:
+        return None
+    elif not isinstance(device_bytes, float) and parse_bytes(device_bytes) == 0:
+        return None
+    if isinstance(device_bytes, str):
+        return _align(parse_bytes(device_bytes), alignment_size)
+    else:
+        return _align(int(device_bytes), alignment_size)
 def parse_device_memory_limit(device_memory_limit, device_index=0, alignment_size=1):
     """Parse memory limit to be used by a CUDA device.
     Parameters
     ----------
     device_memory_limit: float, int, str or None
-        This can be a float (fraction of total device memory), an integer (bytes),
-        a string (like 5GB or 5000M), and "auto", 0 or None for the total device
-        size.
+        Can be an integer (bytes), float (fraction of total device memory), string
+        (like ``"5GB"`` or ``"5000M"``), ``"auto"``, ``0`` or ``None`` to disable
+        spilling to host (i.e. allow full device memory usage). Another special value
+        ``"default"`` is also available and returns the recommended Dask-CUDA's defaults
+        and means 80% of the total device memory (analogous to ``0.8``), and disabled
+        spilling (analogous to ``auto``/``0``/``None``) on devices without a dedicated
+        memory resource, such as system on a chip (SoC) devices.
     device_index: int or str
         The index or UUID of the device from which to obtain the total memory amount.
         Default: 0.
@@ -548,10 +697,23 @@ def parse_device_memory_limit(device_memory_limit, device_index=0, alignment_siz
         Number of bytes of alignment to use, i.e., allocation must be a multiple of
         that size. RMM pool requires 256 bytes alignment.
+    Returns
+    -------
+    The parsed memory limit in bytes, or ``None`` as convenience if
+    ``device_memory_limit`` is ``None`` or any value that would evaluate to ``0``.
     Examples
     --------
     >>> # On a 32GB CUDA device
     >>> parse_device_memory_limit(None)
+    None
+    >>> parse_device_memory_limit(0)
+    None
+    >>> parse_device_memory_limit(0.0)
+    None
+    >>> parse_device_memory_limit("0 MiB")
+    None
+    >>> parse_device_memory_limit(1.0)
     34089730048
     >>> parse_device_memory_limit(0.8)
     27271784038
@@ -559,26 +721,36 @@ def parse_device_memory_limit(device_memory_limit, device_index=0, alignment_siz
     1000000000
     >>> parse_device_memory_limit("1GB")
     1000000000
+    >>> parse_device_memory_limit("1GB")
+    1000000000
+    >>> parse_device_memory_limit("auto") == (
+    ...    parse_device_memory_limit(1.0)
+    ...    if has_device_memory_resource()
+    ...    else None
+    ... )
+    True
+    >>> parse_device_memory_limit("default") == (
+    ...    parse_device_memory_limit(0.8)
+    ...    if has_device_memory_resource()
+    ...    else None
+    ... )
+    True
     """
-    def _align(size, alignment_size):
-        return size // alignment_size * alignment_size
-    if device_memory_limit in {0, "0", None, "auto"}:
-        return _align(get_device_total_memory(device_index), alignment_size)
-    with suppress(ValueError, TypeError):
-        device_memory_limit = float(device_memory_limit)
-        if isinstance(device_memory_limit, float) and device_memory_limit <= 1:
-            return _align(
-                int(get_device_total_memory(device_index) * device_memory_limit),
-                alignment_size,
-            )
+    # Special cases for "auto" and "default".
+    if device_memory_limit in ["auto", "default"]:
+        if not has_device_memory_resource():
+            return None
+        if device_memory_limit == "auto":
+            device_memory_limit = get_device_total_memory(device_index)
+        else:
+            device_memory_limit = 0.8
-    if isinstance(device_memory_limit, str):
-        return _align(parse_bytes(device_memory_limit), alignment_size)
-    else:
-        return _align(int(device_memory_limit), alignment_size)
+    return parse_device_bytes(
+        device_bytes=device_memory_limit,
+        device_index=device_index,
+        alignment_size=alignment_size,
+    )
 def get_gpu_uuid(device_index=0):
@@ -644,17 +816,14 @@ def get_worker_config(dask_worker):
         ret["device-memory-limit"] = dask_worker.data.manager._device_memory_limit
     else:
         has_device = hasattr(dask_worker.data, "device_buffer")
-        if has_device:
+        if has_device and hasattr(dask_worker.data.device_buffer, "n"):
+            # If `n` is not an attribute, device spilling is disabled/unavailable.
             ret["device-memory-limit"] = dask_worker.data.device_buffer.n
     # using ucx ?
-    scheme, loc = parse_address(dask_worker.scheduler.address)
-    ret["protocol"] = scheme
-    if scheme == "ucx":
-        import ucp
-        ret["ucx-transports"] = ucp.get_active_transports()
-    elif scheme == "ucxx":
+    protocol, loc = parse_address(dask_worker.scheduler.address)
+    ret["protocol"] = protocol
+    if protocol in ("ucx", "ucxx"):
         import ucxx
         ret["ucx-transports"] = ucxx.get_active_transports()
@@ -689,7 +858,7 @@ async def _get_cluster_configuration(client):
     if worker_config:
         w = list(worker_config.values())[0]
         ret.update(w)
-        info = client.scheduler_info()
+        info = client.scheduler_info(n_workers=-1)
         workers = info.get("workers", {})
         ret["nworkers"] = len(workers)
         ret["nthreads"] = sum(w["nthreads"] for w in workers.values())
@@ -767,7 +936,7 @@ def get_rmm_device_memory_usage() -> Optional[int]:
     """Get current bytes allocated on current device through RMM
     Check the current RMM resource stack for resources such as
-    `StatisticsResourceAdaptor` and `TrackingResourceAdaptor`
+    ``StatisticsResourceAdaptor`` and ``TrackingResourceAdaptor``
     that can report the current allocated bytes. Returns None,
     if no such resources exist.

dask_cuda/utils_test.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 from typing import Literal
 import distributed
@@ -8,7 +11,7 @@ class MockWorker(Worker):
     """Mock Worker class preventing NVML from getting used by SystemMonitor.
     By preventing the Worker from initializing NVML in the SystemMonitor, we can
-    mock test multiple devices in `CUDA_VISIBLE_DEVICES` behavior with single-GPU
+    mock test multiple devices in ``CUDA_VISIBLE_DEVICES`` behavior with single-GPU
     machines.
     """
@@ -26,17 +29,17 @@ class MockWorker(Worker):
 class IncreasedCloseTimeoutNanny(Nanny):
-    """Increase `Nanny`'s close timeout.
+    """Increase ``Nanny``'s close timeout.
-    The internal close timeout mechanism of `Nanny` recomputes the time left to kill
-    the `Worker` process based on elapsed time of the close task, which may leave
+    The internal close timeout mechanism of ``Nanny`` recomputes the time left to kill
+    the ``Worker`` process based on elapsed time of the close task, which may leave
     very little time for the subprocess to shutdown cleanly, which may cause tests
     to fail when the system is under higher load. This class increases the default
-    close timeout of 5.0 seconds that `Nanny` sets by default, which can be overriden
+    close timeout of 5.0 seconds that ``Nanny`` sets by default, which can be overriden
     via Distributed's public API.
-    This class can be used with the `worker_class` argument of `LocalCluster` or
-    `LocalCUDACluster` to provide a much higher default of 30.0 seconds.
+    This class can be used with the ``worker_class`` argument of ``LocalCluster`` or
+    ``LocalCUDACluster`` to provide a much higher default of 30.0 seconds.
     """
     async def close(  # type:ignore[override]

dask-cuda 25.6.0__py3-none-any.whl → 25.10.0__py3-none-any.whl

dask-cuda 25.6.0py3-none-any.whl → 25.10.0py3-none-any.whl