PyPI - dask-cuda - Versions diffs - 25.4.0__py3-none-any.whl → 25.8.0__py3-none-any.whl - Mend

dask-cuda 25.4.0py3-none-any.whl → 25.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

dask_cuda/GIT_COMMIT +1 -1
dask_cuda/VERSION +1 -1
dask_cuda/_compat.py +18 -0
dask_cuda/benchmarks/common.py +4 -1
dask_cuda/benchmarks/local_cudf_groupby.py +4 -1
dask_cuda/benchmarks/local_cudf_merge.py +5 -2
dask_cuda/benchmarks/local_cudf_shuffle.py +5 -2
dask_cuda/benchmarks/local_cupy.py +4 -1
dask_cuda/benchmarks/local_cupy_map_overlap.py +4 -1
dask_cuda/benchmarks/utils.py +7 -4
dask_cuda/cli.py +21 -15
dask_cuda/cuda_worker.py +27 -57
dask_cuda/device_host_file.py +31 -15
dask_cuda/disk_io.py +7 -4
dask_cuda/explicit_comms/comms.py +11 -7
dask_cuda/explicit_comms/dataframe/shuffle.py +147 -55
dask_cuda/get_device_memory_objects.py +18 -3
dask_cuda/initialize.py +80 -44
dask_cuda/is_device_object.py +4 -1
dask_cuda/is_spillable_object.py +4 -1
dask_cuda/local_cuda_cluster.py +63 -66
dask_cuda/plugins.py +17 -16
dask_cuda/proxify_device_objects.py +15 -10
dask_cuda/proxify_host_file.py +30 -27
dask_cuda/proxy_object.py +20 -17
dask_cuda/tests/conftest.py +41 -0
dask_cuda/tests/test_dask_cuda_worker.py +114 -27
dask_cuda/tests/test_dgx.py +10 -18
dask_cuda/tests/test_explicit_comms.py +51 -18
dask_cuda/tests/test_from_array.py +7 -5
dask_cuda/tests/test_initialize.py +16 -37
dask_cuda/tests/test_local_cuda_cluster.py +164 -54
dask_cuda/tests/test_proxify_host_file.py +33 -4
dask_cuda/tests/test_proxy.py +18 -16
dask_cuda/tests/test_rdd_ucx.py +160 -0
dask_cuda/tests/test_spill.py +107 -27
dask_cuda/tests/test_utils.py +106 -20
dask_cuda/tests/test_worker_spec.py +5 -2
dask_cuda/utils.py +319 -68
dask_cuda/utils_test.py +23 -7
dask_cuda/worker_common.py +196 -0
dask_cuda/worker_spec.py +12 -5
{dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/METADATA +5 -4
dask_cuda-25.8.0.dist-info/RECORD +63 -0
{dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/WHEEL +1 -1
dask_cuda-25.8.0.dist-info/top_level.txt +6 -0
shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
dask_cuda-25.4.0.dist-info/RECORD +0 -56
dask_cuda-25.4.0.dist-info/top_level.txt +0 -5
{dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/licenses/LICENSE +0 -0

dask_cuda/local_cuda_cluster.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import copy
 import logging
 import os
@@ -8,18 +11,15 @@ import dask
 from distributed import LocalCluster, Nanny, Worker
 from distributed.worker_memory import parse_memory_limit
-from .device_host_file import DeviceHostFile
 from .initialize import initialize
-from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup
-from .proxify_host_file import ProxifyHostFile
 from .utils import (
     cuda_visible_devices,
-    get_cpu_affinity,
     get_ucx_config,
     nvml_device_index,
     parse_cuda_visible_device,
     parse_device_memory_limit,
 )
+from .worker_common import worker_data_function, worker_plugins
 class LoggedWorker(Worker):
@@ -68,11 +68,16 @@ class LocalCUDACluster(LocalCluster):
         starts spilling to disk (not available if JIT-Unspill is enabled). Can be an
         integer (bytes), float (fraction of total system memory), string (like ``"5GB"``
         or ``"5000M"``), or ``"auto"``, 0, or ``None`` for no memory management.
-    device_memory_limit : int, float, str, or None, default 0.8
+    device_memory_limit : int, float, str, or None, default "default"
         Size of the CUDA device LRU cache, which is used to determine when the worker
         starts spilling to host memory. Can be an integer (bytes), float (fraction of
-        total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"``, 0,
+        total device memory), string (like ``"5GB"`` or ``"5000M"``), ``"auto"``, ``0``
         or ``None`` to disable spilling to host (i.e. allow full device memory usage).
+        Another special value ``"default"`` (which happens to be the default) is also
+        available and uses the recommended Dask-CUDA's defaults and means 80% of the
+        total device memory (analogous to ``0.8``), and disabled spilling (analogous
+        to ``auto``/``0``) on devices without a dedicated memory resource, such as
+        system on a chip (SoC) devices.
     enable_cudf_spill : bool, default False
         Enable automatic cuDF spilling.
@@ -87,7 +92,7 @@ class LocalCUDACluster(LocalCluster):
         ``dask.temporary-directory`` in the local Dask configuration, using the current
         working directory if this is not set.
     shared_filesystem: bool or None, default None
-        Whether the `local_directory` above is shared between all workers or not.
+        Whether the ``local_directory`` above is shared between all workers or not.
         If ``None``, the "jit-unspill-shared-fs" config value are used, which
         defaults to True. Notice, in all other cases this option defaults to False,
         but on a local cluster it defaults to True -- we assume all workers use the
@@ -100,13 +105,16 @@ class LocalCUDACluster(LocalCluster):
         are not supported or disabled.
     enable_infiniband : bool, default None
         Set environment variables to enable UCX over InfiniBand, requires
-        ``protocol="ucx"`` and implies ``enable_tcp_over_ucx=True`` when ``True``.
+        ``protocol="ucx"``, ``protocol="ucxx"`` or ``protocol="ucx-old"``, and implies
+        ``enable_tcp_over_ucx=True`` when ``True``.
     enable_nvlink : bool, default None
-        Set environment variables to enable UCX over NVLink, requires ``protocol="ucx"``
-        and implies ``enable_tcp_over_ucx=True`` when ``True``.
+        Set environment variables to enable UCX over NVLink, requires
+        ``protocol="ucx"``, ``protocol="ucxx"`` or ``protocol="ucx-old"``, and implies
+        ``enable_tcp_over_ucx=True`` when ``True``.
     enable_rdmacm : bool, default None
         Set environment variables to enable UCX RDMA connection manager support,
-        requires ``protocol="ucx"`` and ``enable_infiniband=True``.
+        requires ``protocol="ucx"``, ``protocol="ucxx"`` or ``protocol="ucx-old"``,
+        and ``enable_infiniband=True``.
     rmm_pool_size : int, str or None, default None
         RMM pool size to initialize each worker with. Can be an integer (bytes), float
         (fraction of total device memory), string (like ``"5GB"`` or ``"5000M"``), or
@@ -123,8 +131,8 @@ class LocalCUDACluster(LocalCluster):
         and to set the maximum pool size.
         .. note::
-            When paired with `--enable-rmm-async` the maximum size cannot be guaranteed
-            due to fragmentation.
+            When paired with ``--enable-rmm-async`` the maximum size cannot be
+            guaranteed due to fragmentation.
         .. note::
             This size is a per-worker configuration, and not cluster-wide.
@@ -140,9 +148,8 @@ class LocalCUDACluster(LocalCluster):
         See ``rmm.mr.CudaAsyncMemoryResource`` for more info.
         .. warning::
-            The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
-            incompatible with RMM pools and managed memory. Trying to enable both will
-            result in an exception.
+            The asynchronous allocator is incompatible with RMM pools and managed
+            memory. Trying to enable both will result in an exception.
     rmm_allocator_external_lib_list: str, list or None, default None
         List of external libraries for which to set RMM as the allocator.
         Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string
@@ -201,7 +208,8 @@ class LocalCUDACluster(LocalCluster):
     Raises
     ------
     TypeError
-        If InfiniBand or NVLink are enabled and ``protocol!="ucx"``.
+        If InfiniBand or NVLink are enabled and
+        ``protocol not in ("ucx", "ucxx", "ucx-old")``.
     ValueError
         If RMM pool, RMM managed memory or RMM async allocator are requested but RMM
         cannot be imported.
@@ -221,10 +229,9 @@ class LocalCUDACluster(LocalCluster):
         n_workers=None,
         threads_per_worker=1,
         memory_limit="auto",
-        device_memory_limit=0.8,
+        device_memory_limit="default",
         enable_cudf_spill=False,
         cudf_spill_stats=0,
-        data=None,
         local_directory=None,
         shared_filesystem=None,
         protocol=None,
@@ -242,7 +249,6 @@ class LocalCUDACluster(LocalCluster):
         rmm_track_allocations=False,
         jit_unspill=None,
         log_spilling=False,
-        worker_class=None,
         pre_import=None,
         **kwargs,
     ):
@@ -339,40 +345,29 @@ class LocalCUDACluster(LocalCluster):
             jit_unspill = dask.config.get("jit-unspill", default=False)
         data = kwargs.pop("data", None)
         if data is None:
-            if device_memory_limit is None and memory_limit is None:
-                data = {}
-            elif jit_unspill:
-                if enable_cudf_spill:
-                    warnings.warn(
-                        "Enabling cuDF spilling and JIT-Unspill together is not "
-                        "safe, consider disabling JIT-Unspill."
-                    )
-                data = (
-                    ProxifyHostFile,
-                    {
-                        "device_memory_limit": self.device_memory_limit,
-                        "memory_limit": self.memory_limit,
-                        "shared_filesystem": shared_filesystem,
-                    },
-                )
-            else:
-                data = (
-                    DeviceHostFile,
-                    {
-                        "device_memory_limit": self.device_memory_limit,
-                        "memory_limit": self.memory_limit,
-                        "log_spilling": log_spilling,
-                    },
-                )
+            self.data = worker_data_function(
+                device_memory_limit=self.device_memory_limit,
+                memory_limit=self.memory_limit,
+                jit_unspill=jit_unspill,
+                enable_cudf_spill=enable_cudf_spill,
+                shared_filesystem=shared_filesystem,
+            )
         if enable_tcp_over_ucx or enable_infiniband or enable_nvlink:
             if protocol is None:
-                protocol = "ucx"
-            elif protocol not in ["ucx", "ucxx"]:
+                ucx_protocol = dask.config.get(
+                    "distributed.comm.ucx.ucx-protocol", default=None
+                )
+                if ucx_protocol is not None:
+                    # TODO: remove when UCX-Py is removed,
+                    # see https://github.com/rapidsai/dask-cuda/issues/1517
+                    protocol = ucx_protocol
+                else:
+                    protocol = "ucx"
+            elif protocol not in ("ucx", "ucxx", "ucx-old"):
                 raise TypeError(
-                    "Enabling InfiniBand or NVLink requires protocol='ucx' or "
-                    "protocol='ucxx'"
+                    "Enabling InfiniBand or NVLink requires protocol='ucx', "
+                    "protocol='ucxx' or protocol='ucx-old'"
                 )
         self.host = kwargs.get("host", None)
@@ -385,6 +380,7 @@ class LocalCUDACluster(LocalCluster):
             enable_rdmacm=enable_rdmacm,
         )
+        worker_class = kwargs.pop("worker_class", None)
         if worker_class is not None:
             if log_spilling is True:
                 raise ValueError(
@@ -441,28 +437,29 @@ class LocalCUDACluster(LocalCluster):
         spec = copy.deepcopy(self.new_spec)
         worker_count = self.cuda_visible_devices.index(name)
         visible_devices = cuda_visible_devices(worker_count, self.cuda_visible_devices)
+        device_index = nvml_device_index(0, visible_devices)
         spec["options"].update(
             {
                 "env": {
                     "CUDA_VISIBLE_DEVICES": visible_devices,
                 },
-                "plugins": {
-                    CPUAffinity(
-                        get_cpu_affinity(nvml_device_index(0, visible_devices))
-                    ),
-                    RMMSetup(
-                        initial_pool_size=self.rmm_pool_size,
-                        maximum_pool_size=self.rmm_maximum_pool_size,
-                        managed_memory=self.rmm_managed_memory,
-                        async_alloc=self.rmm_async,
-                        release_threshold=self.rmm_release_threshold,
-                        log_directory=self.rmm_log_directory,
-                        track_allocations=self.rmm_track_allocations,
-                        external_lib_list=self.rmm_allocator_external_lib_list,
+                **({"data": self.data(device_index)} if hasattr(self, "data") else {}),
+                "plugins": worker_plugins(
+                    device_index=device_index,
+                    rmm_initial_pool_size=self.rmm_pool_size,
+                    rmm_maximum_pool_size=self.rmm_maximum_pool_size,
+                    rmm_managed_memory=self.rmm_managed_memory,
+                    rmm_async_alloc=self.rmm_async,
+                    rmm_release_threshold=self.rmm_release_threshold,
+                    rmm_log_directory=self.rmm_log_directory,
+                    rmm_track_allocations=self.rmm_track_allocations,
+                    rmm_allocator_external_lib_list=(
+                        self.rmm_allocator_external_lib_list
                     ),
-                    PreImport(self.pre_import),
-                    CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
-                },
+                    pre_import=self.pre_import,
+                    enable_cudf_spill=self.enable_cudf_spill,
+                    cudf_spill_stats=self.cudf_spill_stats,
+                ),
             }
         )

dask_cuda/plugins.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import importlib
 import logging
 import os
@@ -5,7 +8,7 @@ from typing import Callable, Dict
 from distributed import WorkerPlugin
-from .utils import get_rmm_log_file_name, parse_device_memory_limit
+from .utils import get_rmm_log_file_name, parse_device_bytes
 class CPUAffinity(WorkerPlugin):
@@ -75,28 +78,26 @@ class RMMSetup(WorkerPlugin):
         self.external_lib_list = external_lib_list
     def setup(self, worker=None):
-        if self.initial_pool_size is not None:
-            self.initial_pool_size = parse_device_memory_limit(
-                self.initial_pool_size, alignment_size=256
-            )
+        self.initial_pool_size = parse_device_bytes(
+            self.initial_pool_size, alignment_size=256
+        )
         if self.async_alloc:
             import rmm
-            if self.release_threshold is not None:
-                self.release_threshold = parse_device_memory_limit(
-                    self.release_threshold, alignment_size=256
-                )
+            self.release_threshold = parse_device_bytes(
+                self.release_threshold, alignment_size=256
+            )
             mr = rmm.mr.CudaAsyncMemoryResource(
                 initial_pool_size=self.initial_pool_size,
                 release_threshold=self.release_threshold,
             )
+            self.maximum_pool_size = parse_device_bytes(
+                self.maximum_pool_size, alignment_size=256
+            )
             if self.maximum_pool_size is not None:
-                self.maximum_pool_size = parse_device_memory_limit(
-                    self.maximum_pool_size, alignment_size=256
-                )
                 mr = rmm.mr.LimitingResourceAdaptor(
                     mr, allocation_limit=self.maximum_pool_size
                 )
@@ -114,10 +115,9 @@ class RMMSetup(WorkerPlugin):
             pool_allocator = False if self.initial_pool_size is None else True
             if self.initial_pool_size is not None:
-                if self.maximum_pool_size is not None:
-                    self.maximum_pool_size = parse_device_memory_limit(
-                        self.maximum_pool_size, alignment_size=256
-                    )
+                self.maximum_pool_size = parse_device_bytes(
+                    self.maximum_pool_size, alignment_size=256
+                )
             rmm.reinitialize(
                 pool_allocator=pool_allocator,
@@ -129,6 +129,7 @@ class RMMSetup(WorkerPlugin):
                     worker, self.logging, self.log_directory
                 ),
             )
         if self.rmm_track_allocations:
             import rmm

dask_cuda/proxify_device_objects.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import functools
 import pydoc
 from collections import defaultdict
@@ -58,9 +61,9 @@ def proxify_device_objects(
 ) -> T:
     """Wrap device objects in ProxyObject
-    Search through `obj` and wraps all CUDA device objects in ProxyObject.
-    It uses `proxied_id_to_proxy` to make sure that identical CUDA device
-    objects found in `obj` are wrapped by the same ProxyObject.
+    Search through ``obj`` and wraps all CUDA device objects in ProxyObject.
+    It uses ``proxied_id_to_proxy`` to make sure that identical CUDA device
+    objects found in ``obj`` are wrapped by the same ProxyObject.
     Parameters
     ----------
@@ -68,11 +71,11 @@ def proxify_device_objects(
         Object to search through or wrap in a ProxyObject.
     proxied_id_to_proxy: MutableMapping[int, ProxyObject]
         Dict mapping the id() of proxied objects (CUDA device objects) to
-        their proxy and is updated with all new proxied objects found in `obj`.
+        their proxy and is updated with all new proxied objects found in ``obj``.
         If None, use an empty dict.
     found_proxies: List[ProxyObject]
-        List of found proxies in `obj`. Notice, this includes all proxies found,
-        including those already in `proxied_id_to_proxy`.
+        List of found proxies in ``obj``. Notice, this includes all proxies found,
+        including those already in ``proxied_id_to_proxy``.
         If None, use an empty list.
     excl_proxies: bool
         Don't add found objects that are already ProxyObject to found_proxies.
@@ -83,7 +86,7 @@ def proxify_device_objects(
     Returns
     -------
     ret: Any
-        A copy of `obj` where all CUDA device objects are wrapped in ProxyObject
+        A copy of ``obj`` where all CUDA device objects are wrapped in ProxyObject
     """
     _register_incompatible_types()
@@ -102,7 +105,7 @@ def unproxify_device_objects(
 ) -> T:
     """Unproxify device objects
-    Search through `obj` and un-wraps all CUDA device objects.
+    Search through ``obj`` and un-wraps all CUDA device objects.
     Parameters
     ----------
@@ -117,7 +120,7 @@ def unproxify_device_objects(
     Returns
     -------
     ret: Any
-        A copy of `obj` where all CUDA device objects are unproxify
+        A copy of ``obj`` where all CUDA device objects are unproxify
     """
     if isinstance(obj, dict):
         return {
@@ -242,7 +245,9 @@ def _register_cudf():
     @dispatch.register(cudf.DataFrame)
     @dispatch.register(cudf.Series)
-    @dispatch.register(cudf.BaseIndex)
+    @dispatch.register(cudf.Index)
+    @dispatch.register(cudf.MultiIndex)
+    @dispatch.register(cudf.RangeIndex)
     def proxify_device_object_cudf_dataframe(
         obj, proxied_id_to_proxy, found_proxies, excl_proxies
     ):

dask_cuda/proxify_host_file.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import abc
 import gc
 import io
@@ -64,29 +67,29 @@ class Proxies(abc.ABC):
     @abc.abstractmethod
     def mem_usage_add(self, proxy: ProxyObject) -> None:
-        """Given a new proxy, update `self._mem_usage`"""
+        """Given a new proxy, update ``self._mem_usage``"""
     @abc.abstractmethod
     def mem_usage_remove(self, proxy: ProxyObject) -> None:
-        """Removal of proxy, update `self._mem_usage`"""
+        """Removal of proxy, update ``self._mem_usage``"""
     @abc.abstractmethod
     def buffer_info(self) -> List[Tuple[float, int, List[ProxyObject]]]:
         """Return a list of buffer information
         The returned format is:
-            `[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]
+            ``[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]``
         """
     def add(self, proxy: ProxyObject) -> None:
-        """Add a proxy for tracking, calls `self.mem_usage_add`"""
+        """Add a proxy for tracking, calls ``self.mem_usage_add``"""
         assert not self.contains_proxy_id(id(proxy))
         with self._lock:
             self._proxy_id_to_proxy[id(proxy)] = weakref.ref(proxy)
         self.mem_usage_add(proxy)
     def remove(self, proxy: ProxyObject) -> None:
-        """Remove proxy from tracking, calls `self.mem_usage_remove`"""
+        """Remove proxy from tracking, calls ``self.mem_usage_remove``"""
         with self._lock:
             del self._proxy_id_to_proxy[id(proxy)]
         self.mem_usage_remove(proxy)
@@ -323,13 +326,13 @@ class ProxyManager:
                         assert header["serializer"] == pxy.serializer
     def proxify(self, obj: T, duplicate_check=True) -> Tuple[T, bool]:
-        """Proxify `obj` and add found proxies to the `Proxies` collections
+        """Proxify ``obj`` and add found proxies to the ``Proxies`` collections
-        Search through `obj` and wrap all CUDA device objects in ProxyObject.
+        Search through ``obj`` and wrap all CUDA device objects in ProxyObject.
         If duplicate_check is True, identical CUDA device objects found in
-        `obj` are wrapped by the same ProxyObject.
+        ``obj`` are wrapped by the same ProxyObject.
-        Returns the proxified object and a boolean, which is `True` when one or
+        Returns the proxified object and a boolean, which is ``True`` when one or
         more incompatible-types were found.
         Parameters
@@ -337,7 +340,7 @@ class ProxyManager:
         obj
             Object to search through or wrap in a ProxyObject.
         duplicate_check
-            Make sure that identical CUDA device objects found in `obj` are
+            Make sure that identical CUDA device objects found in ``obj`` are
             wrapped by the same ProxyObject. This check comes with a significant
             overhead hence it is recommended setting to False when it is known
             that no duplicate exist.
@@ -380,11 +383,11 @@ class ProxyManager:
         proxies_access: Callable[[], List[Tuple[float, int, List[ProxyObject]]]],
         serializer: Callable[[ProxyObject], None],
     ) -> int:
-        """Evict buffers retrieved by calling `proxies_access`
+        """Evict buffers retrieved by calling ``proxies_access``
-        Calls `proxies_access` to retrieve a list of proxies and then spills
-        enough proxies to free up at a minimum `nbytes` bytes. In order to
-        spill a proxy, `serializer` is called.
+        Calls ``proxies_access`` to retrieve a list of proxies and then spills
+        enough proxies to free up at a minimum ``nbytes`` bytes. In order to
+        spill a proxy, ``serializer`` is called.
         Parameters
         ----------
@@ -392,7 +395,7 @@ class ProxyManager:
             Number of bytes to evict.
         proxies_access: callable
             Function that returns a list of proxies pack in a tuple like:
-            `[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]
+            ``[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]``
         serializer: callable
             Function that serialize the given proxy object.
@@ -423,7 +426,7 @@ class ProxyManager:
     def maybe_evict_from_device(self, extra_dev_mem=0) -> None:
         """Evict buffers until total memory usage is below device-memory-limit
-        Adds `extra_dev_mem` to the current total memory usage when comparing
+        Adds ``extra_dev_mem`` to the current total memory usage when comparing
         against device-memory-limit.
         """
         mem_over_usage = (
@@ -439,7 +442,7 @@ class ProxyManager:
     def maybe_evict_from_host(self, extra_host_mem=0) -> None:
         """Evict buffers until total memory usage is below host-memory-limit
-        Adds `extra_host_mem` to the current total memory usage when comparing
+        Adds ``extra_host_mem`` to the current total memory usage when comparing
         against device-memory-limit.
         """
         assert self._host_memory_limit is not None
@@ -466,7 +469,7 @@ class ProxifyHostFile(MutableMapping):
     workers in Distributed.
     It wraps all CUDA device objects in a ProxyObject instance and maintains
-    `device_memory_limit` by spilling ProxyObject on-the-fly. This addresses
+    ``device_memory_limit`` by spilling ProxyObject on-the-fly. This addresses
     some issues with the default DeviceHostFile host, which tracks device
     memory inaccurately see <https://github.com/rapidsai/dask-cuda/pull/451>
@@ -488,16 +491,16 @@ class ProxifyHostFile(MutableMapping):
     memory_limit: int
         Number of bytes of host memory used before spilling to disk.
     shared_filesystem: bool or None, default None
-        Whether the `local_directory` above is shared between all workers or not.
+        Whether the ``local_directory`` above is shared between all workers or not.
         If ``None``, the "jit-unspill-shared-fs" config value are used, which
         defaults to False.
-        Notice, a shared filesystem must support the `os.link()` operation.
+        Notice, a shared filesystem must support the ``os.link()`` operation.
     compatibility_mode: bool or None, default None
         Enables compatibility-mode, which means that items are un-proxified before
         retrieval. This makes it possible to get some of the JIT-unspill benefits
         without having to be ProxyObject compatible. In order to still allow specific
-        ProxyObjects, set the `mark_as_explicit_proxies=True` when proxifying with
-        `proxify_device_objects()`. If ``None``, the "jit-unspill-compatibility-mode"
+        ProxyObjects, set the ``mark_as_explicit_proxies=True`` when proxifying with
+        ``proxify_device_objects()``. If ``None``, the "jit-unspill-compatibility-mode"
         config value are used, which defaults to False.
     spill_on_demand: bool or None, default None
         Enables spilling when the RMM memory pool goes out of memory. If ``None``,
@@ -639,7 +642,7 @@ class ProxifyHostFile(MutableMapping):
         """Manually evict 1% of host limit.
         Dask uses this to trigger CPU-to-Disk spilling. We don't know how much
-        we need to spill but Dask will call `evict()` repeatedly until enough
+        we need to spill but Dask will call ``evict()`` repeatedly until enough
         is spilled. We ask for 1% each time.
         Return
@@ -658,9 +661,9 @@ class ProxifyHostFile(MutableMapping):
     @property
     def fast(self):
-        """Alternative access to `.evict()` used by Dask
+        """Alternative access to ``.evict()`` used by Dask
-        Dask expects `.fast.evict()` to be available for manually triggering
+        Dask expects ``.fast.evict()`` to be available for manually triggering
         of CPU-to-Disk spilling.
         """
         if len(self.manager._host) == 0:
@@ -758,9 +761,9 @@ class ProxifyHostFile(MutableMapping):
     @classmethod
     def serialize_proxy_to_disk_inplace(cls, proxy: ProxyObject) -> None:
-        """Serialize `proxy` to disk.
+        """Serialize ``proxy`` to disk.
-        Avoid de-serializing if `proxy` is serialized using "dask" or
+        Avoid de-serializing if ``proxy`` is serialized using "dask" or
         "pickle". In this case the already serialized data is written
         directly to disk.

dask-cuda 25.4.0__py3-none-any.whl → 25.8.0__py3-none-any.whl

dask-cuda 25.4.0py3-none-any.whl → 25.8.0py3-none-any.whl