PyPI - dask-cuda - Versions diffs - 25.4.0__py3-none-any.whl → 25.8.0__py3-none-any.whl - Mend

dask-cuda 25.4.0py3-none-any.whl → 25.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

dask_cuda/GIT_COMMIT +1 -1
dask_cuda/VERSION +1 -1
dask_cuda/_compat.py +18 -0
dask_cuda/benchmarks/common.py +4 -1
dask_cuda/benchmarks/local_cudf_groupby.py +4 -1
dask_cuda/benchmarks/local_cudf_merge.py +5 -2
dask_cuda/benchmarks/local_cudf_shuffle.py +5 -2
dask_cuda/benchmarks/local_cupy.py +4 -1
dask_cuda/benchmarks/local_cupy_map_overlap.py +4 -1
dask_cuda/benchmarks/utils.py +7 -4
dask_cuda/cli.py +21 -15
dask_cuda/cuda_worker.py +27 -57
dask_cuda/device_host_file.py +31 -15
dask_cuda/disk_io.py +7 -4
dask_cuda/explicit_comms/comms.py +11 -7
dask_cuda/explicit_comms/dataframe/shuffle.py +147 -55
dask_cuda/get_device_memory_objects.py +18 -3
dask_cuda/initialize.py +80 -44
dask_cuda/is_device_object.py +4 -1
dask_cuda/is_spillable_object.py +4 -1
dask_cuda/local_cuda_cluster.py +63 -66
dask_cuda/plugins.py +17 -16
dask_cuda/proxify_device_objects.py +15 -10
dask_cuda/proxify_host_file.py +30 -27
dask_cuda/proxy_object.py +20 -17
dask_cuda/tests/conftest.py +41 -0
dask_cuda/tests/test_dask_cuda_worker.py +114 -27
dask_cuda/tests/test_dgx.py +10 -18
dask_cuda/tests/test_explicit_comms.py +51 -18
dask_cuda/tests/test_from_array.py +7 -5
dask_cuda/tests/test_initialize.py +16 -37
dask_cuda/tests/test_local_cuda_cluster.py +164 -54
dask_cuda/tests/test_proxify_host_file.py +33 -4
dask_cuda/tests/test_proxy.py +18 -16
dask_cuda/tests/test_rdd_ucx.py +160 -0
dask_cuda/tests/test_spill.py +107 -27
dask_cuda/tests/test_utils.py +106 -20
dask_cuda/tests/test_worker_spec.py +5 -2
dask_cuda/utils.py +319 -68
dask_cuda/utils_test.py +23 -7
dask_cuda/worker_common.py +196 -0
dask_cuda/worker_spec.py +12 -5
{dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/METADATA +5 -4
dask_cuda-25.8.0.dist-info/RECORD +63 -0
{dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/WHEEL +1 -1
dask_cuda-25.8.0.dist-info/top_level.txt +6 -0
shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
dask_cuda-25.4.0.dist-info/RECORD +0 -56
dask_cuda-25.4.0.dist-info/top_level.txt +0 -5
{dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/licenses/LICENSE +0 -0

dask_cuda/GIT_COMMIT CHANGED Viewed

	@@ -1 +1 @@
1	- ~~e9ebd92886e6f518af02faf8a2cdadeb700b25a9~~
1	+ bde9a4d3ee2c4338f56b3acf919b8e756ecb35b3

dask_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 25.04.00
1	+ 25.08.00

dask_cuda/_compat.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
+import functools
+import importlib.metadata
+import packaging.version
+@functools.lru_cache(maxsize=None)
+def get_dask_version() -> packaging.version.Version:
+    return packaging.version.parse(importlib.metadata.version("dask"))
+@functools.lru_cache(maxsize=None)
+def DASK_2025_4_0():
+    # dask 2025.4.0 isn't currently released, so we're relying
+    # on strictly greater than here.
+    return get_dask_version() > packaging.version.parse("2025.3.0")

dask_cuda/benchmarks/common.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from argparse import Namespace
 from functools import partial
@@ -124,7 +127,7 @@ def run(client: Client, args: Namespace, config: Config):
     """
     wait_for_cluster(client, shutdown_on_failure=True)
-    assert len(client.scheduler_info()["workers"]) > 0
+    assert len(client.scheduler_info(n_workers=-1)["workers"]) > 0
     setup_memory_pools(
         client=client,
         is_gpu=args.type == "gpu",

dask_cuda/benchmarks/local_cudf_groupby.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from collections import ChainMap
 from time import perf_counter as clock
@@ -138,7 +141,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
             key="Device memory limit", value=f"{format_bytes(args.device_memory_limit)}"
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
-    if args.protocol in ["ucx", "ucxx"]:
+    if args.protocol in ["ucx", "ucxx", "ucx-old"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

dask_cuda/benchmarks/local_cudf_merge.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 import math
 from collections import ChainMap
@@ -166,7 +169,7 @@ def merge(args, ddf1, ddf2):
 def bench_once(client, args, write_profile=None):
     # Generate random Dask dataframes
-    n_workers = len(client.scheduler_info()["workers"])
+    n_workers = len(client.scheduler_info(n_workers=-1)["workers"])
     # Allow the number of chunks to vary between
     # the "base" and "other" DataFrames
     args.base_chunks = args.base_chunks or n_workers
@@ -224,7 +227,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
     print_key_value(key="Frac-match", value=f"{args.frac_match}")
-    if args.protocol in ["ucx", "ucxx"]:
+    if args.protocol in ["ucx", "ucxx", "ucx-old"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

dask_cuda/benchmarks/local_cudf_shuffle.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from collections import ChainMap
 from time import perf_counter
@@ -70,7 +73,7 @@ def create_data(
     """
     chunksize = args.partition_size // np.float64().nbytes
-    workers = list(client.scheduler_info()["workers"].keys())
+    workers = list(client.scheduler_info(n_workers=-1)["workers"].keys())
     assert len(workers) > 0
     dist = args.partition_distribution
@@ -149,7 +152,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
             key="Device memory limit", value=f"{format_bytes(args.device_memory_limit)}"
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
-    if args.protocol in ["ucx", "ucxx"]:
+    if args.protocol in ["ucx", "ucxx", "ucx-old"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

dask_cuda/benchmarks/local_cupy.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from collections import ChainMap
 from time import perf_counter as clock
@@ -192,7 +195,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
     print_key_value(key="Protocol", value=f"{args.protocol}")
-    if args.protocol in ["ucx", "ucxx"]:
+    if args.protocol in ["ucx", "ucxx", "ucx-old"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

dask_cuda/benchmarks/local_cupy_map_overlap.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from collections import ChainMap
 from time import perf_counter as clock
@@ -77,7 +80,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
     print_key_value(key="Protocol", value=f"{args.protocol}")
-    if args.protocol in ["ucx", "ucxx"]:
+    if args.protocol in ["ucx", "ucxx", "ucx-old"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

dask_cuda/benchmarks/utils.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import argparse
 import itertools
 import json
@@ -77,7 +80,7 @@ def parse_benchmark_args(
     cluster_args.add_argument(
         "-p",
         "--protocol",
-        choices=["tcp", "ucx", "ucxx"],
+        choices=["tcp", "ucx", "ucxx", "ucx-old"],
         default="tcp",
         type=str,
         help="The communication protocol to use.",
@@ -122,7 +125,7 @@ def parse_benchmark_args(
         "pool size."
         ""
         ".. note::"
-        "    When paired with `--enable-rmm-async` the maximum size cannot be "
+        "    When paired with ``--enable-rmm-async`` the maximum size cannot be "
         "    guaranteed due to fragmentation."
         ""
         ".. note::"
@@ -641,11 +644,11 @@ def wait_for_cluster(client, timeout=120, shutdown_on_failure=True):
     for _ in range(timeout // 5):
         print(
             "Waiting for workers to come up, "
-            f"have {len(client.scheduler_info().get('workers', []))}, "
+            f"have {len(client.scheduler_info(n_workers=-1).get('workers', []))}, "
             f"want {expected}"
         )
         time.sleep(5)
-        nworkers = len(client.scheduler_info().get("workers", []))
+        nworkers = len(client.scheduler_info(n_workers=-1).get("workers", []))
         if nworkers == expected:
             return
     else:

dask_cuda/cli.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import absolute_import, division, print_function
 import logging
@@ -90,16 +93,20 @@ def cuda():
     help="""Size of the host LRU cache, which is used to determine when the worker
     starts spilling to disk (not available if JIT-Unspill is enabled). Can be an
     integer (bytes), float (fraction of total system memory), string (like ``"5GB"``
-    or ``"5000M"``), or ``"auto"``, 0, or ``None`` for no memory management.""",
+    or ``"5000M"``), or ``"auto"`` or ``0`` for no memory management.""",
 )
 @click.option(
     "--device-memory-limit",
-    default="0.8",
+    default="default",
     show_default=True,
     help="""Size of the CUDA device LRU cache, which is used to determine when the
     worker starts spilling to host memory. Can be an integer (bytes), float (fraction of
-    total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"`` or 0 to
-    disable spilling to host (i.e. allow full device memory usage).""",
+    total device memory), string (like ``"5GB"`` or ``"5000M"``), ``"auto"`` or ``0``
+    to disable spilling to host (i.e. allow full device memory usage). Another special
+    value ``"default"`` (which happens to be the default) is also available and uses the
+    recommended Dask-CUDA's defaults and means 80% of the total device memory (analogous
+    to ``0.8``), and disabled spilling (analogous to ``auto``/``0``) on devices without
+    a dedicated memory resource, such as system on a chip (SoC) devices.""",
 )
 @click.option(
     "--enable-cudf-spill/--disable-cudf-spill",
@@ -113,7 +120,7 @@ def cuda():
     type=int,
     default=0,
     help="""Set the cuDF spilling statistics level. This option has no effect if
-    `--enable-cudf-spill` is not specified.""",
+    ``--enable-cudf-spill`` is not specified.""",
 )
 @click.option(
     "--rmm-pool-size",
@@ -135,8 +142,8 @@ def cuda():
     to set the maximum pool size.
     .. note::
-        When paired with `--enable-rmm-async` the maximum size cannot be guaranteed due
-        to fragmentation.
+        When paired with ``--enable-rmm-async`` the maximum size cannot be guaranteed
+        due to fragmentation.
     .. note::
         This size is a per-worker configuration, and not cluster-wide.""",
@@ -160,9 +167,8 @@ def cuda():
     allocator. See ``rmm.mr.CudaAsyncMemoryResource`` for more info.
     .. warning::
-        The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
-        incompatible with RMM pools and managed memory, trying to enable both will
-        result in failure.""",
+        The asynchronous allocator is incompatible with RMM pools and managed memory,
+        trying to enable both will result in failure.""",
 )
 @click.option(
     "--set-rmm-allocator-for-libs",
@@ -245,12 +251,12 @@ def cuda():
     "--shared-filesystem/--no-shared-filesystem",
     default=None,
     type=bool,
-    help="""If `--shared-filesystem` is specified, inform JIT-Unspill that
-    `local_directory` is a shared filesystem available for all workers, whereas
-    `--no-shared-filesystem` informs it may not assume it's a shared filesystem.
+    help="""If ``--shared-filesystem`` is specified, inform JIT-Unspill that
+    ``local_directory`` is a shared filesystem available for all workers, whereas
+    ``--no-shared-filesystem`` informs it may not assume it's a shared filesystem.
     If neither is specified, JIT-Unspill will decide based on the Dask config value
-    specified by `"jit-unspill-shared-fs"`.
-    Notice, a shared filesystem must support the `os.link()` operation.""",
+    specified by ``"jit-unspill-shared-fs"``.
+    Notice, a shared filesystem must support the ``os.link()`` operation.""",
 )
 @scheduler_file
 @click.option(

dask_cuda/cuda_worker.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import absolute_import, division, print_function
 import asyncio
@@ -18,18 +21,9 @@ from distributed.proctitle import (
 )
 from distributed.worker_memory import parse_memory_limit
-from .device_host_file import DeviceHostFile
 from .initialize import initialize
-from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup
-from .proxify_host_file import ProxifyHostFile
-from .utils import (
-    cuda_visible_devices,
-    get_cpu_affinity,
-    get_n_gpus,
-    get_ucx_config,
-    nvml_device_index,
-    parse_device_memory_limit,
-)
+from .utils import cuda_visible_devices, get_n_gpus, get_ucx_config, nvml_device_index
+from .worker_common import worker_data_function, worker_plugins
 class CUDAWorker(Server):
@@ -40,7 +34,7 @@ class CUDAWorker(Server):
         nthreads=1,
         name=None,
         memory_limit="auto",
-        device_memory_limit="auto",
+        device_memory_limit="default",
         enable_cudf_spill=False,
         cudf_spill_stats=0,
         rmm_pool_size=None,
@@ -166,35 +160,14 @@ class CUDAWorker(Server):
         if jit_unspill is None:
             jit_unspill = dask.config.get("jit-unspill", default=False)
-        if device_memory_limit is None and memory_limit is None:
-            data = lambda _: {}
-        elif jit_unspill:
-            if enable_cudf_spill:
-                warnings.warn(
-                    "Enabling cuDF spilling and JIT-Unspill together is not "
-                    "safe, consider disabling JIT-Unspill."
-                )
-            data = lambda i: (
-                ProxifyHostFile,
-                {
-                    "device_memory_limit": parse_device_memory_limit(
-                        device_memory_limit, device_index=i
-                    ),
-                    "memory_limit": memory_limit,
-                    "shared_filesystem": shared_filesystem,
-                },
-            )
-        else:
-            data = lambda i: (
-                DeviceHostFile,
-                {
-                    "device_memory_limit": parse_device_memory_limit(
-                        device_memory_limit, device_index=i
-                    ),
-                    "memory_limit": memory_limit,
-                },
-            )
+        data = worker_data_function(
+            device_memory_limit=device_memory_limit,
+            memory_limit=memory_limit,
+            jit_unspill=jit_unspill,
+            enable_cudf_spill=enable_cudf_spill,
+            shared_filesystem=shared_filesystem,
+        )
         cudf_spill_warning = dask.config.get("cudf-spill-warning", default=True)
         if enable_cudf_spill and cudf_spill_warning:
@@ -220,23 +193,20 @@ class CUDAWorker(Server):
                 preload_argv=(list(preload_argv) or []) + ["--create-cuda-context"],
                 security=security,
                 env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)},
-                plugins={
-                    CPUAffinity(
-                        get_cpu_affinity(nvml_device_index(i, cuda_visible_devices(i)))
-                    ),
-                    RMMSetup(
-                        initial_pool_size=rmm_pool_size,
-                        maximum_pool_size=rmm_maximum_pool_size,
-                        managed_memory=rmm_managed_memory,
-                        async_alloc=rmm_async,
-                        release_threshold=rmm_release_threshold,
-                        log_directory=rmm_log_directory,
-                        track_allocations=rmm_track_allocations,
-                        external_lib_list=rmm_allocator_external_lib_list,
-                    ),
-                    PreImport(pre_import),
-                    CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
-                },
+                plugins=worker_plugins(
+                    device_index=nvml_device_index(i, cuda_visible_devices(i)),
+                    rmm_initial_pool_size=rmm_pool_size,
+                    rmm_maximum_pool_size=rmm_maximum_pool_size,
+                    rmm_managed_memory=rmm_managed_memory,
+                    rmm_async_alloc=rmm_async,
+                    rmm_release_threshold=rmm_release_threshold,
+                    rmm_log_directory=rmm_log_directory,
+                    rmm_track_allocations=rmm_track_allocations,
+                    rmm_allocator_external_lib_list=rmm_allocator_external_lib_list,
+                    pre_import=pre_import,
+                    enable_cudf_spill=enable_cudf_spill,
+                    cudf_spill_stats=cudf_spill_stats,
+                ),
                 name=name if nprocs == 1 or name is None else str(name) + "-" + str(i),
                 local_directory=local_directory,
                 config={

dask_cuda/device_host_file.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import itertools
 import logging
 import os
@@ -35,7 +38,7 @@ def _serialize_bytelist(x, **kwargs):
 class LoggedBuffer(Buffer):
     """Extends zict.Buffer with logging capabilities
-    Two arguments `fast_name` and `slow_name` are passed to constructor that
+    Two arguments ``fast_name`` and ``slow_name`` are passed to constructor that
     identify a user-friendly name for logging of where spilling is going from/to.
     For example, their names can be "Device" and "Host" to identify that spilling
     is happening from a CUDA device into system memory.
@@ -112,7 +115,7 @@ class DeviceSerialized:
     This stores a device-side object as
     1.  A msgpack encodable header
-    2.  A list of `bytes`-like objects (like NumPy arrays)
+    2.  A list of ``bytes``-like objects (like NumPy arrays)
         that are in host memory
     """
@@ -169,12 +172,13 @@ class DeviceHostFile(ZictBase):
     ----------
     worker_local_directory: path
         Path where to store serialized objects on disk
-    device_memory_limit: int
+    device_memory_limit: int or None
         Number of bytes of CUDA device memory for device LRU cache,
-        spills to host cache once filled.
-    memory_limit: int
+        spills to host cache once filled. Setting this ``0`` or ``None``
+        means unlimited device memory, implies no spilling to host.
+    memory_limit: int or None
         Number of bytes of host memory for host LRU cache, spills to
-        disk once filled. Setting this to `0` or `None` means unlimited
+        disk once filled. Setting this to ``0`` or ``None`` means unlimited
         host memory, implies no spilling to disk.
     log_spilling: bool
         If True, all spilling operations will be logged directly to
@@ -230,15 +234,22 @@ class DeviceHostFile(ZictBase):
         self.device_keys = set()
         self.device_func = dict()
         self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer)
-        self.device_buffer = Buffer(
-            self.device_func,
-            self.device_host_func,
-            device_memory_limit,
-            weight=lambda k, v: safe_sizeof(v),
-            **device_buffer_kwargs,
-        )
+        if device_memory_limit is None:
+            self.device_buffer = self.device_func
+        else:
+            self.device_buffer = Buffer(
+                self.device_func,
+                self.device_host_func,
+                device_memory_limit,
+                weight=lambda k, v: safe_sizeof(v),
+                **device_buffer_kwargs,
+            )
-        self.device = self.device_buffer.fast.d
+        self.device = (
+            self.device_buffer
+            if device_memory_limit is None
+            else self.device_buffer.fast.d
+        )
         self.host = (
             self.host_buffer if memory_limit is None else self.host_buffer.fast.d
         )
@@ -283,7 +294,12 @@ class DeviceHostFile(ZictBase):
         if key in self.others:
             del self.others[key]
         else:
-            del self.device_buffer[key]
+            if isinstance(self.device_buffer, dict) and key not in self.device_buffer:
+                # If `self.device_buffer` is a dictionary, host `key`s are inserted
+                # directly into `self.host_buffer`.
+                del self.host_buffer[key]
+            else:
+                del self.device_buffer[key]
     def evict(self):
         """Evicts least recently used host buffer (aka, CPU or system memory)

dask_cuda/disk_io.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import itertools
 import os
 import os.path
@@ -106,7 +109,7 @@ class SpillToDiskProperties:
         root_dir : os.PathLike
             Path to the root directory to write serialized data.
         shared_filesystem: bool or None, default None
-            Whether the `root_dir` above is shared between all workers or not.
+            Whether the ``root_dir`` above is shared between all workers or not.
             If ``None``, the "jit-unspill-shared-fs" config value are used, which
             defaults to False.
         gds: bool
@@ -154,10 +157,10 @@ def disk_write(path: str, frames: Iterable, shared_filesystem: bool, gds=False)
         The frames to write to disk
     shared_filesystem: bool
         Whether the target filesystem is shared between all workers or not.
-        If True, the filesystem must support the `os.link()` operation.
+        If True, the filesystem must support the ``os.link()`` operation.
     gds: bool
         Enable the use of GPUDirect Storage. Notice, the consecutive
-        `disk_read()` must enable GDS as well.
+        ``disk_read()`` must enable GDS as well.
     Returns
     -------
@@ -196,7 +199,7 @@ def disk_read(header: Mapping, gds=False) -> list:
         The metadata of the frames to read
     gds: bool
         Enable the use of GPUDirect Storage. Notice, this must
-        match the GDS option set by the prior `disk_write()` call.
+        match the GDS option set by the prior ``disk_write()`` call.
     Returns
     -------

dask_cuda/explicit_comms/comms.py CHANGED Viewed

@@ -33,7 +33,7 @@ def get_multi_lock_or_null_context(multi_lock_context, *args, **kwargs):
     Returns
     -------
     context: context
-        Either `MultiLock(*args, **kwargs)` or a NULL context
+        Either ``MultiLock(*args, **kwargs)`` or a NULL context
     """
     if multi_lock_context:
         from distributed import MultiLock
@@ -52,7 +52,7 @@ def default_comms(client: Optional[Client] = None) -> "CommsContext":
     Parameters
     ----------
     client: Client, optional
-        If no default comm object exists, create the new comm on `client`
+        If no default comm object exists, create the new comm on ``client``
         are returned.
     Returns
@@ -77,7 +77,9 @@ def default_comms(client: Optional[Client] = None) -> "CommsContext":
     # Comms are unique to a {client, [workers]} pair, so we key our
     # cache by the token of that.
     client = client or default_client()
-    token = tokenize(client.id, list(client.scheduler_info()["workers"].keys()))
+    token = tokenize(
+        client.id, list(client.scheduler_info(n_workers=-1)["workers"].keys())
+    )
     maybe_comms = _comms_cache.get(token)
     if maybe_comms is None:
         maybe_comms = CommsContext(client=client)
@@ -206,7 +208,9 @@ class CommsContext:
         self.sessionId = uuid.uuid4().int
         # Get address of all workers (not Nanny addresses)
-        self.worker_addresses = list(self.client.scheduler_info()["workers"].keys())
+        self.worker_addresses = list(
+            self.client.scheduler_info(n_workers=-1)["workers"].keys()
+        )
         # Make all workers listen and get all listen addresses
         self.worker_direct_addresses = []
@@ -248,7 +252,7 @@ class CommsContext:
         Returns
         -------
         ret: object or Future
-            If wait=True, the result of `coroutine`
+            If wait=True, the result of ``coroutine``
             If wait=False, Future that can be waited on later.
         """
         ret = self.client.submit(
@@ -305,7 +309,7 @@ class CommsContext:
     def stage_keys(self, name: str, keys: Iterable[Hashable]) -> Dict[int, set]:
         """Staging keys on workers under the given name
-        In an explicit-comms task, use `pop_staging_area(..., name)` to access
+        In an explicit-comms task, use ``pop_staging_area(..., name)`` to access
         the staged keys and the associated data.
         Notes
@@ -335,7 +339,7 @@ class CommsContext:
 def pop_staging_area(session_state: dict, name: str) -> Dict[str, Any]:
-    """Pop the staging area called `name`
+    """Pop the staging area called ``name``
     This function must be called within a running explicit-comms task.

dask-cuda 25.4.0__py3-none-any.whl → 25.8.0__py3-none-any.whl

dask-cuda 25.4.0py3-none-any.whl → 25.8.0py3-none-any.whl