PyPI - dask-cuda - Versions diffs - 25.6.0__py3-none-any.whl → 25.10.0__py3-none-any.whl - Mend

dask-cuda 25.6.0py3-none-any.whl → 25.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

dask_cuda/GIT_COMMIT +1 -1
dask_cuda/VERSION +1 -1
dask_cuda/benchmarks/common.py +4 -1
dask_cuda/benchmarks/local_cudf_groupby.py +3 -0
dask_cuda/benchmarks/local_cudf_merge.py +4 -1
dask_cuda/benchmarks/local_cudf_shuffle.py +4 -1
dask_cuda/benchmarks/local_cupy.py +3 -0
dask_cuda/benchmarks/local_cupy_map_overlap.py +3 -0
dask_cuda/benchmarks/utils.py +6 -3
dask_cuda/cli.py +21 -15
dask_cuda/cuda_worker.py +28 -58
dask_cuda/device_host_file.py +31 -15
dask_cuda/disk_io.py +7 -4
dask_cuda/explicit_comms/comms.py +11 -7
dask_cuda/explicit_comms/dataframe/shuffle.py +23 -23
dask_cuda/get_device_memory_objects.py +4 -7
dask_cuda/initialize.py +149 -94
dask_cuda/local_cuda_cluster.py +52 -70
dask_cuda/plugins.py +17 -16
dask_cuda/proxify_device_objects.py +12 -10
dask_cuda/proxify_host_file.py +30 -27
dask_cuda/proxy_object.py +20 -17
dask_cuda/tests/conftest.py +41 -0
dask_cuda/tests/test_cudf_builtin_spilling.py +3 -1
dask_cuda/tests/test_dask_cuda_worker.py +109 -25
dask_cuda/tests/test_dask_setup.py +193 -0
dask_cuda/tests/test_dgx.py +20 -44
dask_cuda/tests/test_explicit_comms.py +31 -12
dask_cuda/tests/test_from_array.py +4 -6
dask_cuda/tests/test_initialize.py +233 -65
dask_cuda/tests/test_local_cuda_cluster.py +129 -68
dask_cuda/tests/test_proxify_host_file.py +28 -7
dask_cuda/tests/test_proxy.py +15 -13
dask_cuda/tests/test_spill.py +10 -3
dask_cuda/tests/test_utils.py +100 -29
dask_cuda/tests/test_worker_spec.py +6 -0
dask_cuda/utils.py +211 -42
dask_cuda/utils_test.py +10 -7
dask_cuda/worker_common.py +196 -0
dask_cuda/worker_spec.py +6 -1
{dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/METADATA +11 -4
dask_cuda-25.10.0.dist-info/RECORD +63 -0
dask_cuda-25.10.0.dist-info/top_level.txt +6 -0
shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
dask_cuda-25.6.0.dist-info/RECORD +0 -57
dask_cuda-25.6.0.dist-info/top_level.txt +0 -4
{dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/WHEEL +0 -0
{dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/licenses/LICENSE +0 -0

dask_cuda/GIT_COMMIT CHANGED Viewed

	@@ -1 +1 @@
1	- ~~1f834655ecc6286b9e3082f037594f70dcb74062~~
1	+ 472ca1ce6d1fe836104a5a4f10b284ca9a828ea9

dask_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 25.06.00
1	+ 25.10.00

dask_cuda/benchmarks/common.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from argparse import Namespace
 from functools import partial
@@ -124,7 +127,7 @@ def run(client: Client, args: Namespace, config: Config):
     """
     wait_for_cluster(client, shutdown_on_failure=True)
-    assert len(client.scheduler_info()["workers"]) > 0
+    assert len(client.scheduler_info(n_workers=-1)["workers"]) > 0
     setup_memory_pools(
         client=client,
         is_gpu=args.type == "gpu",

dask_cuda/benchmarks/local_cudf_groupby.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from collections import ChainMap
 from time import perf_counter as clock

dask_cuda/benchmarks/local_cudf_merge.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 import math
 from collections import ChainMap
@@ -166,7 +169,7 @@ def merge(args, ddf1, ddf2):
 def bench_once(client, args, write_profile=None):
     # Generate random Dask dataframes
-    n_workers = len(client.scheduler_info()["workers"])
+    n_workers = len(client.scheduler_info(n_workers=-1)["workers"])
     # Allow the number of chunks to vary between
     # the "base" and "other" DataFrames
     args.base_chunks = args.base_chunks or n_workers

dask_cuda/benchmarks/local_cudf_shuffle.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from collections import ChainMap
 from time import perf_counter
@@ -70,7 +73,7 @@ def create_data(
     """
     chunksize = args.partition_size // np.float64().nbytes
-    workers = list(client.scheduler_info()["workers"].keys())
+    workers = list(client.scheduler_info(n_workers=-1)["workers"].keys())
     assert len(workers) > 0
     dist = args.partition_distribution

dask_cuda/benchmarks/local_cupy.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from collections import ChainMap
 from time import perf_counter as clock

dask_cuda/benchmarks/local_cupy_map_overlap.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import contextlib
 from collections import ChainMap
 from time import perf_counter as clock

dask_cuda/benchmarks/utils.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import argparse
 import itertools
 import json
@@ -122,7 +125,7 @@ def parse_benchmark_args(
         "pool size."
         ""
         ".. note::"
-        "    When paired with `--enable-rmm-async` the maximum size cannot be "
+        "    When paired with ``--enable-rmm-async`` the maximum size cannot be "
         "    guaranteed due to fragmentation."
         ""
         ".. note::"
@@ -641,11 +644,11 @@ def wait_for_cluster(client, timeout=120, shutdown_on_failure=True):
     for _ in range(timeout // 5):
         print(
             "Waiting for workers to come up, "
-            f"have {len(client.scheduler_info().get('workers', []))}, "
+            f"have {len(client.scheduler_info(n_workers=-1).get('workers', []))}, "
             f"want {expected}"
         )
         time.sleep(5)
-        nworkers = len(client.scheduler_info().get("workers", []))
+        nworkers = len(client.scheduler_info(n_workers=-1).get("workers", []))
         if nworkers == expected:
             return
     else:

dask_cuda/cli.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import absolute_import, division, print_function
 import logging
@@ -90,16 +93,20 @@ def cuda():
     help="""Size of the host LRU cache, which is used to determine when the worker
     starts spilling to disk (not available if JIT-Unspill is enabled). Can be an
     integer (bytes), float (fraction of total system memory), string (like ``"5GB"``
-    or ``"5000M"``), or ``"auto"``, 0, or ``None`` for no memory management.""",
+    or ``"5000M"``), or ``"auto"`` or ``0`` for no memory management.""",
 )
 @click.option(
     "--device-memory-limit",
-    default="0.8",
+    default="default",
     show_default=True,
     help="""Size of the CUDA device LRU cache, which is used to determine when the
     worker starts spilling to host memory. Can be an integer (bytes), float (fraction of
-    total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"`` or 0 to
-    disable spilling to host (i.e. allow full device memory usage).""",
+    total device memory), string (like ``"5GB"`` or ``"5000M"``), ``"auto"`` or ``0``
+    to disable spilling to host (i.e. allow full device memory usage). Another special
+    value ``"default"`` (which happens to be the default) is also available and uses the
+    recommended Dask-CUDA's defaults and means 80% of the total device memory (analogous
+    to ``0.8``), and disabled spilling (analogous to ``auto``/``0``) on devices without
+    a dedicated memory resource, such as system on a chip (SoC) devices.""",
 )
 @click.option(
     "--enable-cudf-spill/--disable-cudf-spill",
@@ -113,7 +120,7 @@ def cuda():
     type=int,
     default=0,
     help="""Set the cuDF spilling statistics level. This option has no effect if
-    `--enable-cudf-spill` is not specified.""",
+    ``--enable-cudf-spill`` is not specified.""",
 )
 @click.option(
     "--rmm-pool-size",
@@ -135,8 +142,8 @@ def cuda():
     to set the maximum pool size.
     .. note::
-        When paired with `--enable-rmm-async` the maximum size cannot be guaranteed due
-        to fragmentation.
+        When paired with ``--enable-rmm-async`` the maximum size cannot be guaranteed
+        due to fragmentation.
     .. note::
         This size is a per-worker configuration, and not cluster-wide.""",
@@ -160,9 +167,8 @@ def cuda():
     allocator. See ``rmm.mr.CudaAsyncMemoryResource`` for more info.
     .. warning::
-        The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
-        incompatible with RMM pools and managed memory, trying to enable both will
-        result in failure.""",
+        The asynchronous allocator is incompatible with RMM pools and managed memory,
+        trying to enable both will result in failure.""",
 )
 @click.option(
     "--set-rmm-allocator-for-libs",
@@ -245,12 +251,12 @@ def cuda():
     "--shared-filesystem/--no-shared-filesystem",
     default=None,
     type=bool,
-    help="""If `--shared-filesystem` is specified, inform JIT-Unspill that
-    `local_directory` is a shared filesystem available for all workers, whereas
-    `--no-shared-filesystem` informs it may not assume it's a shared filesystem.
+    help="""If ``--shared-filesystem`` is specified, inform JIT-Unspill that
+    ``local_directory`` is a shared filesystem available for all workers, whereas
+    ``--no-shared-filesystem`` informs it may not assume it's a shared filesystem.
     If neither is specified, JIT-Unspill will decide based on the Dask config value
-    specified by `"jit-unspill-shared-fs"`.
-    Notice, a shared filesystem must support the `os.link()` operation.""",
+    specified by ``"jit-unspill-shared-fs"``.
+    Notice, a shared filesystem must support the ``os.link()`` operation.""",
 )
 @scheduler_file
 @click.option(

dask_cuda/cuda_worker.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import absolute_import, division, print_function
 import asyncio
@@ -18,18 +21,9 @@ from distributed.proctitle import (
 )
 from distributed.worker_memory import parse_memory_limit
-from .device_host_file import DeviceHostFile
 from .initialize import initialize
-from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup
-from .proxify_host_file import ProxifyHostFile
-from .utils import (
-    cuda_visible_devices,
-    get_cpu_affinity,
-    get_n_gpus,
-    get_ucx_config,
-    nvml_device_index,
-    parse_device_memory_limit,
-)
+from .utils import cuda_visible_devices, get_n_gpus, get_ucx_config, nvml_device_index
+from .worker_common import worker_data_function, worker_plugins
 class CUDAWorker(Server):
@@ -40,7 +34,7 @@ class CUDAWorker(Server):
         nthreads=1,
         name=None,
         memory_limit="auto",
-        device_memory_limit="auto",
+        device_memory_limit="default",
         enable_cudf_spill=False,
         cudf_spill_stats=0,
         rmm_pool_size=None,
@@ -166,35 +160,14 @@ class CUDAWorker(Server):
         if jit_unspill is None:
             jit_unspill = dask.config.get("jit-unspill", default=False)
-        if device_memory_limit is None and memory_limit is None:
-            data = lambda _: {}
-        elif jit_unspill:
-            if enable_cudf_spill:
-                warnings.warn(
-                    "Enabling cuDF spilling and JIT-Unspill together is not "
-                    "safe, consider disabling JIT-Unspill."
-                )
-            data = lambda i: (
-                ProxifyHostFile,
-                {
-                    "device_memory_limit": parse_device_memory_limit(
-                        device_memory_limit, device_index=i
-                    ),
-                    "memory_limit": memory_limit,
-                    "shared_filesystem": shared_filesystem,
-                },
-            )
-        else:
-            data = lambda i: (
-                DeviceHostFile,
-                {
-                    "device_memory_limit": parse_device_memory_limit(
-                        device_memory_limit, device_index=i
-                    ),
-                    "memory_limit": memory_limit,
-                },
-            )
+        data = worker_data_function(
+            device_memory_limit=device_memory_limit,
+            memory_limit=memory_limit,
+            jit_unspill=jit_unspill,
+            enable_cudf_spill=enable_cudf_spill,
+            shared_filesystem=shared_filesystem,
+        )
         cudf_spill_warning = dask.config.get("cudf-spill-warning", default=True)
         if enable_cudf_spill and cudf_spill_warning:
@@ -220,27 +193,24 @@ class CUDAWorker(Server):
                 preload_argv=(list(preload_argv) or []) + ["--create-cuda-context"],
                 security=security,
                 env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)},
-                plugins={
-                    CPUAffinity(
-                        get_cpu_affinity(nvml_device_index(i, cuda_visible_devices(i)))
-                    ),
-                    RMMSetup(
-                        initial_pool_size=rmm_pool_size,
-                        maximum_pool_size=rmm_maximum_pool_size,
-                        managed_memory=rmm_managed_memory,
-                        async_alloc=rmm_async,
-                        release_threshold=rmm_release_threshold,
-                        log_directory=rmm_log_directory,
-                        track_allocations=rmm_track_allocations,
-                        external_lib_list=rmm_allocator_external_lib_list,
-                    ),
-                    PreImport(pre_import),
-                    CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
-                },
+                plugins=worker_plugins(
+                    device_index=nvml_device_index(i, cuda_visible_devices(i)),
+                    rmm_initial_pool_size=rmm_pool_size,
+                    rmm_maximum_pool_size=rmm_maximum_pool_size,
+                    rmm_managed_memory=rmm_managed_memory,
+                    rmm_async_alloc=rmm_async,
+                    rmm_release_threshold=rmm_release_threshold,
+                    rmm_log_directory=rmm_log_directory,
+                    rmm_track_allocations=rmm_track_allocations,
+                    rmm_allocator_external_lib_list=rmm_allocator_external_lib_list,
+                    pre_import=pre_import,
+                    enable_cudf_spill=enable_cudf_spill,
+                    cudf_spill_stats=cudf_spill_stats,
+                ),
                 name=name if nprocs == 1 or name is None else str(name) + "-" + str(i),
                 local_directory=local_directory,
                 config={
-                    "distributed.comm.ucx": get_ucx_config(
+                    "distributed-ucxx": get_ucx_config(
                         enable_tcp_over_ucx=enable_tcp_over_ucx,
                         enable_infiniband=enable_infiniband,
                         enable_nvlink=enable_nvlink,

dask_cuda/device_host_file.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import itertools
 import logging
 import os
@@ -35,7 +38,7 @@ def _serialize_bytelist(x, **kwargs):
 class LoggedBuffer(Buffer):
     """Extends zict.Buffer with logging capabilities
-    Two arguments `fast_name` and `slow_name` are passed to constructor that
+    Two arguments ``fast_name`` and ``slow_name`` are passed to constructor that
     identify a user-friendly name for logging of where spilling is going from/to.
     For example, their names can be "Device" and "Host" to identify that spilling
     is happening from a CUDA device into system memory.
@@ -112,7 +115,7 @@ class DeviceSerialized:
     This stores a device-side object as
     1.  A msgpack encodable header
-    2.  A list of `bytes`-like objects (like NumPy arrays)
+    2.  A list of ``bytes``-like objects (like NumPy arrays)
         that are in host memory
     """
@@ -169,12 +172,13 @@ class DeviceHostFile(ZictBase):
     ----------
     worker_local_directory: path
         Path where to store serialized objects on disk
-    device_memory_limit: int
+    device_memory_limit: int or None
         Number of bytes of CUDA device memory for device LRU cache,
-        spills to host cache once filled.
-    memory_limit: int
+        spills to host cache once filled. Setting this ``0`` or ``None``
+        means unlimited device memory, implies no spilling to host.
+    memory_limit: int or None
         Number of bytes of host memory for host LRU cache, spills to
-        disk once filled. Setting this to `0` or `None` means unlimited
+        disk once filled. Setting this to ``0`` or ``None`` means unlimited
         host memory, implies no spilling to disk.
     log_spilling: bool
         If True, all spilling operations will be logged directly to
@@ -230,15 +234,22 @@ class DeviceHostFile(ZictBase):
         self.device_keys = set()
         self.device_func = dict()
         self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer)
-        self.device_buffer = Buffer(
-            self.device_func,
-            self.device_host_func,
-            device_memory_limit,
-            weight=lambda k, v: safe_sizeof(v),
-            **device_buffer_kwargs,
-        )
+        if device_memory_limit is None:
+            self.device_buffer = self.device_func
+        else:
+            self.device_buffer = Buffer(
+                self.device_func,
+                self.device_host_func,
+                device_memory_limit,
+                weight=lambda k, v: safe_sizeof(v),
+                **device_buffer_kwargs,
+            )
-        self.device = self.device_buffer.fast.d
+        self.device = (
+            self.device_buffer
+            if device_memory_limit is None
+            else self.device_buffer.fast.d
+        )
         self.host = (
             self.host_buffer if memory_limit is None else self.host_buffer.fast.d
         )
@@ -283,7 +294,12 @@ class DeviceHostFile(ZictBase):
         if key in self.others:
             del self.others[key]
         else:
-            del self.device_buffer[key]
+            if isinstance(self.device_buffer, dict) and key not in self.device_buffer:
+                # If `self.device_buffer` is a dictionary, host `key`s are inserted
+                # directly into `self.host_buffer`.
+                del self.host_buffer[key]
+            else:
+                del self.device_buffer[key]
     def evict(self):
         """Evicts least recently used host buffer (aka, CPU or system memory)

dask_cuda/disk_io.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import itertools
 import os
 import os.path
@@ -106,7 +109,7 @@ class SpillToDiskProperties:
         root_dir : os.PathLike
             Path to the root directory to write serialized data.
         shared_filesystem: bool or None, default None
-            Whether the `root_dir` above is shared between all workers or not.
+            Whether the ``root_dir`` above is shared between all workers or not.
             If ``None``, the "jit-unspill-shared-fs" config value are used, which
             defaults to False.
         gds: bool
@@ -154,10 +157,10 @@ def disk_write(path: str, frames: Iterable, shared_filesystem: bool, gds=False)
         The frames to write to disk
     shared_filesystem: bool
         Whether the target filesystem is shared between all workers or not.
-        If True, the filesystem must support the `os.link()` operation.
+        If True, the filesystem must support the ``os.link()`` operation.
     gds: bool
         Enable the use of GPUDirect Storage. Notice, the consecutive
-        `disk_read()` must enable GDS as well.
+        ``disk_read()`` must enable GDS as well.
     Returns
     -------
@@ -196,7 +199,7 @@ def disk_read(header: Mapping, gds=False) -> list:
         The metadata of the frames to read
     gds: bool
         Enable the use of GPUDirect Storage. Notice, this must
-        match the GDS option set by the prior `disk_write()` call.
+        match the GDS option set by the prior ``disk_write()`` call.
     Returns
     -------

dask_cuda/explicit_comms/comms.py CHANGED Viewed

@@ -33,7 +33,7 @@ def get_multi_lock_or_null_context(multi_lock_context, *args, **kwargs):
     Returns
     -------
     context: context
-        Either `MultiLock(*args, **kwargs)` or a NULL context
+        Either ``MultiLock(*args, **kwargs)`` or a NULL context
     """
     if multi_lock_context:
         from distributed import MultiLock
@@ -52,7 +52,7 @@ def default_comms(client: Optional[Client] = None) -> "CommsContext":
     Parameters
     ----------
     client: Client, optional
-        If no default comm object exists, create the new comm on `client`
+        If no default comm object exists, create the new comm on ``client``
         are returned.
     Returns
@@ -77,7 +77,9 @@ def default_comms(client: Optional[Client] = None) -> "CommsContext":
     # Comms are unique to a {client, [workers]} pair, so we key our
     # cache by the token of that.
     client = client or default_client()
-    token = tokenize(client.id, list(client.scheduler_info()["workers"].keys()))
+    token = tokenize(
+        client.id, list(client.scheduler_info(n_workers=-1)["workers"].keys())
+    )
     maybe_comms = _comms_cache.get(token)
     if maybe_comms is None:
         maybe_comms = CommsContext(client=client)
@@ -206,7 +208,9 @@ class CommsContext:
         self.sessionId = uuid.uuid4().int
         # Get address of all workers (not Nanny addresses)
-        self.worker_addresses = list(self.client.scheduler_info()["workers"].keys())
+        self.worker_addresses = list(
+            self.client.scheduler_info(n_workers=-1)["workers"].keys()
+        )
         # Make all workers listen and get all listen addresses
         self.worker_direct_addresses = []
@@ -248,7 +252,7 @@ class CommsContext:
         Returns
         -------
         ret: object or Future
-            If wait=True, the result of `coroutine`
+            If wait=True, the result of ``coroutine``
             If wait=False, Future that can be waited on later.
         """
         ret = self.client.submit(
@@ -305,7 +309,7 @@ class CommsContext:
     def stage_keys(self, name: str, keys: Iterable[Hashable]) -> Dict[int, set]:
         """Staging keys on workers under the given name
-        In an explicit-comms task, use `pop_staging_area(..., name)` to access
+        In an explicit-comms task, use ``pop_staging_area(..., name)`` to access
         the staged keys and the associated data.
         Notes
@@ -335,7 +339,7 @@ class CommsContext:
 def pop_staging_area(session_state: dict, name: str) -> Dict[str, Any]:
-    """Pop the staging area called `name`
+    """Pop the staging area called ``name``
     This function must be called within a running explicit-comms task.

dask_cuda/explicit_comms/dataframe/shuffle.py CHANGED Viewed

@@ -65,13 +65,13 @@ def get_no_comm_postprocess(
 ) -> Callable[[DataFrame], DataFrame]:
     """Get function for post-processing partitions not communicated
-    In cuDF, the `group_split_dispatch` uses `scatter_by_map` to create
+    In cuDF, the ``group_split_dispatch`` uses ``scatter_by_map`` to create
     the partitions, which is implemented by splitting a single base dataframe
     into multiple partitions. This means that memory are not freed until
     ALL partitions are deleted.
     In order to free memory ASAP, we can deep copy partitions NOT being
-    communicated. We do this when `num_rounds != batchsize`.
+    communicated. We do this when ``num_rounds != batchsize``.
     Parameters
     ----------
@@ -116,7 +116,7 @@ async def send(
     rank_to_out_part_ids: Dict[int, Set[int]],
     out_part_id_to_dataframe: Dict[int, DataFrame],
 ) -> None:
-    """Notice, items sent are removed from `out_part_id_to_dataframe`"""
+    """Notice, items sent are removed from ``out_part_id_to_dataframe``"""
     futures = []
     for rank, out_part_ids in rank_to_out_part_ids.items():
         if rank != myrank:
@@ -135,7 +135,7 @@ async def recv(
     out_part_id_to_dataframe_list: Dict[int, List[DataFrame]],
     proxify: Proxify,
 ) -> None:
-    """Notice, received items are appended to `out_parts_list`"""
+    """Notice, received items are appended to ``out_parts_list``"""
     async def read_msg(rank: int) -> None:
         msg: Dict[int, DataFrame] = nested_deserialize(await eps[rank].read())
@@ -150,11 +150,11 @@ async def recv(
 def compute_map_index(
     df: DataFrame, column_names: List[str], npartitions: int
 ) -> Series:
-    """Return a Series that maps each row `df` to a partition ID
+    """Return a Series that maps each row ``df`` to a partition ID
     The partitions are determined by hashing the columns given by column_names
-    unless if `column_names[0] == "_partitions"`, in which case the values of
-    `column_names[0]` are used as index.
+    unless if ``column_names[0] == "_partitions"``, in which case the values of
+    ``column_names[0]`` are used as index.
     Parameters
     ----------
@@ -168,7 +168,7 @@ def compute_map_index(
     Returns
     -------
     Series
-        Series that maps each row `df` to a partition ID
+        Series that maps each row ``df`` to a partition ID
     """
     if column_names[0] == "_partitions":
@@ -193,8 +193,8 @@ def partition_dataframe(
     """Partition dataframe to a dict of dataframes
     The partitions are determined by hashing the columns given by column_names
-    unless `column_names[0] == "_partitions"`, in which case the values of
-    `column_names[0]` are used as index.
+    unless ``column_names[0] == "_partitions"``, in which case the values of
+    ``column_names[0]`` are used as index.
     Parameters
     ----------
@@ -301,13 +301,13 @@ async def send_recv_partitions(
     rank_to_out_part_ids
         dict that for each worker rank specifies a set of output partition IDs.
         If the worker shouldn't return any partitions, it is excluded from the
-        dict. Partition IDs are global integers `0..npartitions` and corresponds
-        to the dict keys returned by `group_split_dispatch`.
+        dict. Partition IDs are global integers ``0..npartitions`` and corresponds
+        to the dict keys returned by ``group_split_dispatch``.
     out_part_id_to_dataframe
         Mapping from partition ID to dataframe. This dict is cleared on return.
     no_comm_postprocess
         Function to post-process partitions not communicated.
-        See `get_no_comm_postprocess`
+        See ``get_no_comm_postprocess``
     proxify
         Function to proxify object.
     out_part_id_to_dataframe_list
@@ -365,8 +365,8 @@ async def shuffle_task(
     rank_to_out_part_ids: dict
         dict that for each worker rank specifies a set of output partition IDs.
         If the worker shouldn't return any partitions, it is excluded from the
-        dict. Partition IDs are global integers `0..npartitions` and corresponds
-        to the dict keys returned by `group_split_dispatch`.
+        dict. Partition IDs are global integers ``0..npartitions`` and corresponds
+        to the dict keys returned by ``group_split_dispatch``.
     column_names: list of strings
         List of column names on which we want to split.
     npartitions: int
@@ -449,7 +449,7 @@ def shuffle(
         List of column names on which we want to split.
     npartitions: int or None
         The desired number of output partitions. If None, the number of output
-        partitions equals `df.npartitions`
+        partitions equals ``df.npartitions``
     ignore_index: bool
         Ignore index during shuffle. If True, performance may improve,
         but index values will not be preserved.
@@ -460,7 +460,7 @@ def shuffle(
         If -1, each worker will handle all its partitions in a single round and
         all techniques to reduce memory usage are disabled, which might be faster
         when memory pressure isn't an issue.
-        If None, the value of `DASK_EXPLICIT_COMMS_BATCHSIZE` is used or 1 if not
+        If None, the value of ``DASK_EXPLICIT_COMMS_BATCHSIZE`` is used or 1 if not
         set thus by default, we prioritize robustness over performance.
     Returns
@@ -471,12 +471,12 @@ def shuffle(
     Developer Notes
     ---------------
     The implementation consist of three steps:
-      (a) Stage the partitions of `df` on all workers and then cancel them
+      (a) Stage the partitions of ``df`` on all workers and then cancel them
           thus at this point the Dask Scheduler doesn't know about any of the
           the partitions.
       (b) Submit a task on each worker that shuffle (all-to-all communicate)
           the staged partitions and return a list of dataframe-partitions.
-      (c) Submit a dask graph that extract (using `getitem()`) individual
+      (c) Submit a dask graph that extract (using ``getitem()``) individual
           dataframe-partitions from (b).
     """
     c = comms.default_comms()
@@ -594,7 +594,7 @@ def _contains_shuffle_expr(*args) -> bool:
     """
     Check whether any of the arguments is a Shuffle expression.
-    This is called by `compute`, which is given a sequence of Dask Collections
+    This is called by ``compute``, which is given a sequence of Dask Collections
     to process. For each of those, we'll check whether the expresion contains a
     Shuffle operation.
     """
@@ -712,9 +712,9 @@ def patch_shuffle_expression() -> None:
     """Patch Dasks Shuffle expression.
     Notice, this is monkey patched into Dask at dask_cuda
-    import, and it changes `Shuffle._layer` to lower into
-    an `ECShuffle` expression when the 'explicit-comms'
-    config is set to `True`.
+    import, and it changes ``Shuffle._layer`` to lower into
+    an ``ECShuffle`` expression when the 'explicit-comms'
+    config is set to ``True``.
     """
     dask.base.compute = _patched_compute

dask-cuda 25.6.0__py3-none-any.whl → 25.10.0__py3-none-any.whl

dask-cuda 25.6.0py3-none-any.whl → 25.10.0py3-none-any.whl