PyPI - dask-cuda - Versions diffs - 25.4.0__py3-none-any.whl → 25.6.0__py3-none-any.whl - Mend

dask-cuda 25.4.0py3-none-any.whl → 25.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

dask_cuda/GIT_COMMIT +1 -1
dask_cuda/VERSION +1 -1
dask_cuda/_compat.py +18 -0
dask_cuda/explicit_comms/dataframe/shuffle.py +127 -35
dask_cuda/get_device_memory_objects.py +15 -0
dask_cuda/is_device_object.py +4 -1
dask_cuda/is_spillable_object.py +4 -1
dask_cuda/proxify_device_objects.py +4 -1
dask_cuda/tests/test_dask_cuda_worker.py +5 -2
dask_cuda/tests/test_explicit_comms.py +21 -6
dask_cuda/tests/test_local_cuda_cluster.py +5 -2
dask_cuda/tests/test_proxify_host_file.py +15 -2
dask_cuda/tests/test_spill.py +100 -27
dask_cuda/utils.py +61 -33
{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/METADATA +5 -4
{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/RECORD +20 -19
{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/WHEEL +1 -1
{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/top_level.txt +0 -1
{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/licenses/LICENSE +0 -0

dask_cuda/GIT_COMMIT CHANGED Viewed

	@@ -1 +1 @@
1	- ~~e9ebd92886e6f518af02faf8a2cdadeb700b25a9~~
1	+ 1f834655ecc6286b9e3082f037594f70dcb74062

dask_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 25.04.00
1	+ 25.06.00

dask_cuda/_compat.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
+import functools
+import importlib.metadata
+import packaging.version
+@functools.lru_cache(maxsize=None)
+def get_dask_version() -> packaging.version.Version:
+    return packaging.version.parse(importlib.metadata.version("dask"))
+@functools.lru_cache(maxsize=None)
+def DASK_2025_4_0():
+    # dask 2025.4.0 isn't currently released, so we're relying
+    # on strictly greater than here.
+    return get_dask_version() > packaging.version.parse("2025.3.0")

dask_cuda/explicit_comms/dataframe/shuffle.py CHANGED Viewed

@@ -1,6 +1,9 @@
+# Copyright (c) 2021-2025 NVIDIA CORPORATION.
 from __future__ import annotations
 import asyncio
+import functools
 from collections import defaultdict
 from math import ceil
 from operator import getitem
@@ -23,6 +26,7 @@ from distributed import wait
 from distributed.protocol import nested_deserialize, to_serialize
 from distributed.worker import Worker
+from ..._compat import DASK_2025_4_0
 from .. import comms
 T = TypeVar("T")
@@ -582,6 +586,128 @@ def _use_explicit_comms() -> bool:
     return False
+_base_lower = dask_expr._shuffle.Shuffle._lower
+_base_compute = dask.base.compute
+def _contains_shuffle_expr(*args) -> bool:
+    """
+    Check whether any of the arguments is a Shuffle expression.
+    This is called by `compute`, which is given a sequence of Dask Collections
+    to process. For each of those, we'll check whether the expresion contains a
+    Shuffle operation.
+    """
+    for collection in args:
+        if isinstance(collection, dask.dataframe.DataFrame):
+            shuffle_ops = list(
+                collection.expr.find_operations(
+                    (
+                        dask_expr._shuffle.RearrangeByColumn,
+                        dask_expr.SetIndex,
+                        dask_expr._shuffle.Shuffle,
+                    )
+                )
+            )
+            if len(shuffle_ops) > 0:
+                return True
+    return False
+@functools.wraps(_base_compute)
+def _patched_compute(
+    *args,
+    traverse=True,
+    optimize_graph=True,
+    scheduler=None,
+    get=None,
+    **kwargs,
+):
+    # A patched version of dask.compute that explicitly materializes the task
+    # graph when we're using explicit-comms and the expression contains a
+    # Shuffle operation.
+    # https://github.com/rapidsai/dask-upstream-testing/issues/37#issuecomment-2779798670
+    # contains more details on the issue.
+    if DASK_2025_4_0() and _use_explicit_comms() and _contains_shuffle_expr(*args):
+        from dask.base import (
+            collections_to_expr,
+            flatten,
+            get_scheduler,
+            shorten_traceback,
+            unpack_collections,
+        )
+        collections, repack = unpack_collections(*args, traverse=traverse)
+        if not collections:
+            return args
+        schedule = get_scheduler(
+            scheduler=scheduler,
+            collections=collections,
+            get=get,
+        )
+        from dask._expr import FinalizeCompute
+        expr = collections_to_expr(collections, optimize_graph)
+        expr = FinalizeCompute(expr)
+        with shorten_traceback():
+            expr = expr.optimize()
+            keys = list(flatten(expr.__dask_keys__()))
+            # materialize the HLG here
+            expr = dict(expr.__dask_graph__())
+            results = schedule(expr, keys, **kwargs)
+            return repack(results)
+    else:
+        return _base_compute(
+            *args,
+            traverse=traverse,
+            optimize_graph=optimize_graph,
+            scheduler=scheduler,
+            get=get,
+            **kwargs,
+        )
+class ECShuffle(dask_expr._shuffle.TaskShuffle):
+    """Explicit-Comms Shuffle Expression."""
+    def _layer(self):
+        # Execute an explicit-comms shuffle
+        if not hasattr(self, "_ec_shuffled"):
+            on = self.partitioning_index
+            df = dask_expr.new_collection(self.frame)
+            ec_shuffled = shuffle(
+                df,
+                [on] if isinstance(on, str) else on,
+                self.npartitions_out,
+                self.ignore_index,
+            )
+            object.__setattr__(self, "_ec_shuffled", ec_shuffled)
+        graph = self._ec_shuffled.dask.copy()
+        shuffled_name = self._ec_shuffled._name
+        for i in range(self.npartitions_out):
+            graph[(self._name, i)] = graph[(shuffled_name, i)]
+        return graph
+def _patched_lower(self):
+    if self.method in (None, "tasks") and _use_explicit_comms():
+        return ECShuffle(
+            self.frame,
+            self.partitioning_index,
+            self.npartitions_out,
+            self.ignore_index,
+            self.options,
+            self.original_partitioning_index,
+        )
+    else:
+        return _base_lower(self)
 def patch_shuffle_expression() -> None:
     """Patch Dasks Shuffle expression.
@@ -590,40 +716,6 @@ def patch_shuffle_expression() -> None:
     an `ECShuffle` expression when the 'explicit-comms'
     config is set to `True`.
     """
-    class ECShuffle(dask_expr._shuffle.TaskShuffle):
-        """Explicit-Comms Shuffle Expression."""
-        def _layer(self):
-            # Execute an explicit-comms shuffle
-            if not hasattr(self, "_ec_shuffled"):
-                on = self.partitioning_index
-                df = dask_expr.new_collection(self.frame)
-                self._ec_shuffled = shuffle(
-                    df,
-                    [on] if isinstance(on, str) else on,
-                    self.npartitions_out,
-                    self.ignore_index,
-                )
-            graph = self._ec_shuffled.dask.copy()
-            shuffled_name = self._ec_shuffled._name
-            for i in range(self.npartitions_out):
-                graph[(self._name, i)] = graph[(shuffled_name, i)]
-            return graph
-    _base_lower = dask_expr._shuffle.Shuffle._lower
-    def _patched_lower(self):
-        if self.method in (None, "tasks") and _use_explicit_comms():
-            return ECShuffle(
-                self.frame,
-                self.partitioning_index,
-                self.npartitions_out,
-                self.ignore_index,
-                self.options,
-                self.original_partitioning_index,
-            )
-        else:
-            return _base_lower(self)
+    dask.base.compute = _patched_compute
     dask_expr._shuffle.Shuffle._lower = _patched_lower

dask_cuda/get_device_memory_objects.py CHANGED Viewed

@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
 from typing import Set
 from dask.sizeof import sizeof
@@ -140,3 +142,16 @@ def register_cupy():  # NB: this overwrites dask.sizeof.register_cupy()
     @sizeof.register(cupy.ndarray)
     def sizeof_cupy_ndarray(x):
         return int(x.nbytes)
+@sizeof.register_lazy("pylibcudf")
+def register_pylibcudf():
+    import pylibcudf
+    @sizeof.register(pylibcudf.column.OwnerWithCAI)
+    def sizeof_owner_with_cai(x):
+        # OwnerWithCAI implements __cuda_array_interface__ so this should always
+        # be zero-copy
+        col = pylibcudf.column.Column.from_cuda_array_interface(x)
+        # col.data() returns a gpumemoryview, which knows the size in bytes
+        return col.data().nbytes

dask_cuda/is_device_object.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
 from __future__ import absolute_import, division, print_function
 from dask.utils import Dispatch
@@ -35,6 +36,8 @@ def register_cudf():
     def is_device_object_cudf_series(s):
         return True
-    @is_device_object.register(cudf.BaseIndex)
+    @is_device_object.register(cudf.Index)
+    @is_device_object.register(cudf.RangeIndex)
+    @is_device_object.register(cudf.MultiIndex)
     def is_device_object_cudf_index(s):
         return True

dask_cuda/is_spillable_object.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
 from __future__ import absolute_import, division, print_function
 from typing import Optional
@@ -34,7 +35,9 @@ def register_cudf():
     def is_device_object_cudf_dataframe(df):
         return cudf_spilling_status()
-    @is_spillable_object.register(cudf.BaseIndex)
+    @is_spillable_object.register(cudf.Index)
+    @is_spillable_object.register(cudf.RangeIndex)
+    @is_spillable_object.register(cudf.MultiIndex)
     def is_device_object_cudf_index(s):
         return cudf_spilling_status()

dask_cuda/proxify_device_objects.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# Copyright (c) 2025 NVIDIA CORPORATION.
 import functools
 import pydoc
 from collections import defaultdict
@@ -242,7 +243,9 @@ def _register_cudf():
     @dispatch.register(cudf.DataFrame)
     @dispatch.register(cudf.Series)
-    @dispatch.register(cudf.BaseIndex)
+    @dispatch.register(cudf.Index)
+    @dispatch.register(cudf.MultiIndex)
+    @dispatch.register(cudf.RangeIndex)
     def proxify_device_object_cudf_dataframe(
         obj, proxied_id_to_proxy, found_proxies, excl_proxies
     ):

dask_cuda/tests/test_dask_cuda_worker.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import absolute_import, division, print_function
 import os
@@ -16,7 +19,7 @@ from dask_cuda.utils import (
     get_cluster_configuration,
     get_device_total_memory,
     get_gpu_count_mig,
-    get_gpu_uuid_from_index,
+    get_gpu_uuid,
     get_n_gpus,
     wait_workers,
 )
@@ -409,7 +412,7 @@ def test_cuda_mig_visible_devices_and_memory_limit_and_nthreads(loop):  # noqa:
 def test_cuda_visible_devices_uuid(loop):  # noqa: F811
-    gpu_uuid = get_gpu_uuid_from_index(0)
+    gpu_uuid = get_gpu_uuid(0)
     with patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": gpu_uuid}):
         with popen(["dask", "scheduler", "--port", "9359", "--no-dashboard"]):

dask_cuda/tests/test_explicit_comms.py CHANGED Viewed

@@ -21,18 +21,16 @@ from distributed.deploy.local import LocalCluster
 import dask_cuda
 from dask_cuda.explicit_comms import comms
-from dask_cuda.explicit_comms.dataframe.shuffle import shuffle as explicit_comms_shuffle
+from dask_cuda.explicit_comms.dataframe.shuffle import (
+    _contains_shuffle_expr,
+    shuffle as explicit_comms_shuffle,
+)
 from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
 mp = mp.get_context("spawn")  # type: ignore
 ucp = pytest.importorskip("ucp")
-# Set default shuffle method to "tasks"
-if dask.config.get("dataframe.shuffle.method", None) is None:
-    dask.config.set({"dataframe.shuffle.method": "tasks"})
 # Notice, all of the following tests is executed in a new process such
 # that UCX options of the different tests doesn't conflict.
@@ -530,3 +528,20 @@ def test_scaled_cluster_gets_new_comms_context():
                 expected = shuffled.compute()
             assert_eq(result, expected)
+def test_contains_shuffle_expr():
+    df = dd.from_pandas(pd.DataFrame({"key": np.arange(10)}), npartitions=2)
+    assert not _contains_shuffle_expr(df)
+    with dask.config.set(explicit_comms=True):
+        shuffled = df.shuffle(on="key")
+        assert _contains_shuffle_expr(shuffled)
+        assert not _contains_shuffle_expr(df)
+        # this requires an active client.
+        with LocalCluster(n_workers=1) as cluster:
+            with Client(cluster):
+                explict_shuffled = explicit_comms_shuffle(df, ["key"])
+                assert not _contains_shuffle_expr(explict_shuffled)

dask_cuda/tests/test_local_cuda_cluster.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 import os
 import pkgutil
@@ -16,7 +19,7 @@ from dask_cuda.utils import (
     get_cluster_configuration,
     get_device_total_memory,
     get_gpu_count_mig,
-    get_gpu_uuid_from_index,
+    get_gpu_uuid,
     print_cluster_config,
 )
 from dask_cuda.utils_test import MockWorker
@@ -419,7 +422,7 @@ async def test_available_mig_workers():
 @gen_test(timeout=20)
 async def test_gpu_uuid():
-    gpu_uuid = get_gpu_uuid_from_index(0)
+    gpu_uuid = get_gpu_uuid(0)
     async with LocalCUDACluster(
         CUDA_VISIBLE_DEVICES=gpu_uuid,

dask_cuda/tests/test_proxify_host_file.py CHANGED Viewed

@@ -1,3 +1,5 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
 from typing import Iterable
 from unittest.mock import patch
@@ -414,7 +416,7 @@ async def test_compatibility_mode_dataframe_shuffle(compatibility_mode, npartiti
                 ddf = dask.dataframe.from_pandas(
                     cudf.DataFrame({"key": np.arange(10)}), npartitions=npartitions
                 )
-                res = ddf.shuffle(on="key", shuffle_method="tasks").persist()
+                [res] = client.persist([ddf.shuffle(on="key", shuffle_method="tasks")])
                 # With compatibility mode on, we shouldn't encounter any proxy objects
                 if compatibility_mode:
@@ -440,7 +442,7 @@ async def test_worker_force_spill_to_disk():
             async with Client(cluster, asynchronous=True) as client:
                 # Create a df that are spilled to host memory immediately
                 df = cudf.DataFrame({"key": np.arange(10**8)})
-                ddf = dask.dataframe.from_pandas(df, npartitions=1).persist()
+                [ddf] = client.persist([dask.dataframe.from_pandas(df, npartitions=1)])
                 await ddf
                 async def f(dask_worker):
@@ -498,3 +500,14 @@ def test_on_demand_debug_info():
             assert f"WARNING - RMM allocation of {size} failed" in log
             assert f"RMM allocs: {size}" in log
             assert "traceback:" in log
+def test_sizeof_owner_with_cai():
+    cudf = pytest.importorskip("cudf")
+    s = cudf.Series([1, 2, 3])
+    items = dask_cuda.get_device_memory_objects.dispatch(s)
+    assert len(items) == 1
+    item = items[0]
+    result = dask.sizeof.sizeof(item)
+    assert result == 24

dask_cuda/tests/test_spill.py CHANGED Viewed

@@ -1,14 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
 import gc
 import os
 from time import sleep
+from typing import TypedDict
 import pytest
 import dask
 from dask import array as da
-from distributed import Client, wait
+from distributed import Client, Worker, wait
 from distributed.metrics import time
 from distributed.sizeof import sizeof
+from distributed.utils import Deadline
 from distributed.utils_test import gen_cluster, gen_test, loop  # noqa: F401
 import dask_cudf
@@ -72,24 +76,66 @@ def cudf_spill(request):
 def device_host_file_size_matches(
-    dhf, total_bytes, device_chunk_overhead=0, serialized_chunk_overhead=1024
+    dask_worker: Worker,
+    total_bytes,
+    device_chunk_overhead=0,
+    serialized_chunk_overhead=1024,
 ):
-    byte_sum = dhf.device_buffer.fast.total_weight
+    worker_data_sizes = collect_device_host_file_size(
+        dask_worker,
+        device_chunk_overhead=device_chunk_overhead,
+        serialized_chunk_overhead=serialized_chunk_overhead,
+    )
+    byte_sum = (
+        worker_data_sizes["device_fast"]
+        + worker_data_sizes["host_fast"]
+        + worker_data_sizes["host_buffer"]
+        + worker_data_sizes["disk"]
+    )
+    return (
+        byte_sum >= total_bytes
+        and byte_sum
+        <= total_bytes
+        + worker_data_sizes["device_overhead"]
+        + worker_data_sizes["host_overhead"]
+        + worker_data_sizes["disk_overhead"]
+    )
+class WorkerDataSizes(TypedDict):
+    device_fast: int
+    host_fast: int
+    host_buffer: int
+    disk: int
+    device_overhead: int
+    host_overhead: int
+    disk_overhead: int
+def collect_device_host_file_size(
+    dask_worker: Worker,
+    device_chunk_overhead: int,
+    serialized_chunk_overhead: int,
+) -> WorkerDataSizes:
+    dhf = dask_worker.data
-    # `dhf.host_buffer.fast` is only available when Worker's `memory_limit != 0`
+    device_fast = dhf.device_buffer.fast.total_weight or 0
     if hasattr(dhf.host_buffer, "fast"):
-        byte_sum += dhf.host_buffer.fast.total_weight
+        host_fast = dhf.host_buffer.fast.total_weight or 0
+        host_buffer = 0
     else:
-        byte_sum += sum([sizeof(b) for b in dhf.host_buffer.values()])
+        host_buffer = sum([sizeof(b) for b in dhf.host_buffer.values()])
+        host_fast = 0
-    # `dhf.disk` is only available when Worker's `memory_limit != 0`
     if dhf.disk is not None:
         file_path = [
             os.path.join(dhf.disk.directory, fname)
             for fname in dhf.disk.filenames.values()
         ]
         file_size = [os.path.getsize(f) for f in file_path]
-        byte_sum += sum(file_size)
+        disk = sum(file_size)
+    else:
+        disk = 0
     # Allow up to chunk_overhead bytes overhead per chunk
     device_overhead = len(dhf.device) * device_chunk_overhead
@@ -98,17 +144,25 @@ def device_host_file_size_matches(
         len(dhf.disk) * serialized_chunk_overhead if dhf.disk is not None else 0
     )
-    return (
-        byte_sum >= total_bytes
-        and byte_sum <= total_bytes + device_overhead + host_overhead + disk_overhead
+    return WorkerDataSizes(
+        device_fast=device_fast,
+        host_fast=host_fast,
+        host_buffer=host_buffer,
+        disk=disk,
+        device_overhead=device_overhead,
+        host_overhead=host_overhead,
+        disk_overhead=disk_overhead,
     )
 def assert_device_host_file_size(
-    dhf, total_bytes, device_chunk_overhead=0, serialized_chunk_overhead=1024
+    dask_worker: Worker,
+    total_bytes,
+    device_chunk_overhead=0,
+    serialized_chunk_overhead=1024,
 ):
     assert device_host_file_size_matches(
-        dhf, total_bytes, device_chunk_overhead, serialized_chunk_overhead
+        dask_worker, total_bytes, device_chunk_overhead, serialized_chunk_overhead
     )
@@ -119,7 +173,7 @@ def worker_assert(
     dask_worker=None,
 ):
     assert_device_host_file_size(
-        dask_worker.data, total_size, device_chunk_overhead, serialized_chunk_overhead
+        dask_worker, total_size, device_chunk_overhead, serialized_chunk_overhead
     )
@@ -131,12 +185,12 @@ def delayed_worker_assert(
 ):
     start = time()
     while not device_host_file_size_matches(
-        dask_worker.data, total_size, device_chunk_overhead, serialized_chunk_overhead
+        dask_worker, total_size, device_chunk_overhead, serialized_chunk_overhead
     ):
         sleep(0.01)
         if time() < start + 3:
             assert_device_host_file_size(
-                dask_worker.data,
+                dask_worker,
                 total_size,
                 device_chunk_overhead,
                 serialized_chunk_overhead,
@@ -224,8 +278,8 @@ async def test_cupy_cluster_device_spill(params):
                 x = rs.random(int(50e6), chunks=2e6)
                 await wait(x)
-                xx = x.persist()
-                await wait(xx)
+                [xx] = client.persist([x])
+                await xx
                 # Allow up to 1024 bytes overhead per chunk serialized
                 await client.run(
@@ -344,19 +398,38 @@ async def test_cudf_cluster_device_spill(params, cudf_spill):
                 sizes = sizes.to_arrow().to_pylist()
                 nbytes = sum(sizes)
-                cdf2 = cdf.persist()
-                await wait(cdf2)
+                [cdf2] = client.persist([cdf])
+                await cdf2
                 del cdf
                 gc.collect()
                 if enable_cudf_spill:
-                    await client.run(
-                        worker_assert,
-                        0,
-                        0,
-                        0,
+                    expected_data = WorkerDataSizes(
+                        device_fast=0,
+                        host_fast=0,
+                        host_buffer=0,
+                        disk=0,
+                        device_overhead=0,
+                        host_overhead=0,
+                        disk_overhead=0,
                     )
+                    deadline = Deadline.after(duration=3)
+                    while not deadline.expired:
+                        data = await client.run(
+                            collect_device_host_file_size,
+                            device_chunk_overhead=0,
+                            serialized_chunk_overhead=0,
+                        )
+                        expected = {k: expected_data for k in data}
+                        if data == expected:
+                            break
+                        sleep(0.01)
+                    # final assertion for pytest to reraise with a nice traceback
+                    assert data == expected
                 else:
                     await client.run(
                         assert_host_chunks,
@@ -419,8 +492,8 @@ async def test_cudf_spill_cluster(cudf_spill):
                 }
             )
-            ddf = dask_cudf.from_cudf(cdf, npartitions=2).sum().persist()
-            await wait(ddf)
+            [ddf] = client.persist([dask_cudf.from_cudf(cdf, npartitions=2).sum()])
+            await ddf
             await client.run(_assert_cudf_spill_stats, enable_cudf_spill)
             _assert_cudf_spill_stats(enable_cudf_spill)

dask_cuda/utils.py CHANGED Viewed

@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
 import math
 import operator
 import os
@@ -86,6 +89,45 @@ def get_gpu_count():
     return pynvml.nvmlDeviceGetCount()
+def get_gpu_handle(device_id=0):
+    """Get GPU handle from device index or UUID.
+    Parameters
+    ----------
+    device_id: int or str
+        The index or UUID of the device from which to obtain the handle.
+    Raises
+    ------
+    ValueError
+        If acquiring the device handle for the device specified failed.
+    pynvml.NVMLError
+        If any NVML error occurred while initializing.
+    Examples
+    --------
+    >>> get_gpu_handle(device_id=0)
+    >>> get_gpu_handle(device_id="GPU-9fb42d6f-7d6b-368f-f79c-3c3e784c93f6")
+    """
+    pynvml.nvmlInit()
+    try:
+        if device_id and not str(device_id).isnumeric():
+            # This means device_id is UUID.
+            # This works for both MIG and non-MIG device UUIDs.
+            handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(device_id))
+            if pynvml.nvmlDeviceIsMigDeviceHandle(handle):
+                # Additionally get parent device handle
+                # if the device itself is a MIG instance
+                handle = pynvml.nvmlDeviceGetDeviceHandleFromMigDeviceHandle(handle)
+        else:
+            handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
+        return handle
+    except pynvml.NVMLError:
+        raise ValueError(f"Invalid device index or UUID: {device_id}")
 @toolz.memoize
 def get_gpu_count_mig(return_uuids=False):
     """Return the number of MIG instances available
@@ -129,7 +171,7 @@ def get_cpu_affinity(device_index=None):
     Parameters
     ----------
     device_index: int or str
-        Index or UUID of the GPU device
+        The index or UUID of the device from which to obtain the CPU affinity.
     Examples
     --------
@@ -148,26 +190,15 @@ def get_cpu_affinity(device_index=None):
      40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
      60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]
     """
-    pynvml.nvmlInit()
     try:
-        if device_index and not str(device_index).isnumeric():
-            # This means device_index is UUID.
-            # This works for both MIG and non-MIG device UUIDs.
-            handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(device_index))
-            if pynvml.nvmlDeviceIsMigDeviceHandle(handle):
-                # Additionally get parent device handle
-                # if the device itself is a MIG instance
-                handle = pynvml.nvmlDeviceGetDeviceHandleFromMigDeviceHandle(handle)
-        else:
-            handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
+        handle = get_gpu_handle(device_index)
         # Result is a list of 64-bit integers, thus ceil(get_cpu_count() / 64)
         affinity = pynvml.nvmlDeviceGetCpuAffinity(
             handle,
             math.ceil(get_cpu_count() / 64),
         )
         return unpack_bitmask(affinity)
-    except pynvml.NVMLError:
+    except (pynvml.NVMLError, ValueError):
         warnings.warn(
             "Cannot get CPU affinity for device with index %d, setting default affinity"
             % device_index
@@ -182,18 +213,15 @@ def get_n_gpus():
         return get_gpu_count()
-def get_device_total_memory(index=0):
-    """
-    Return total memory of CUDA device with index or with device identifier UUID
-    """
-    pynvml.nvmlInit()
+def get_device_total_memory(device_index=0):
+    """Return total memory of CUDA device with index or with device identifier UUID.
-    if index and not str(index).isnumeric():
-        # This means index is UUID. This works for both MIG and non-MIG device UUIDs.
-        handle = pynvml.nvmlDeviceGetHandleByUUID(str.encode(str(index)))
-    else:
-        # This is a device index
-        handle = pynvml.nvmlDeviceGetHandleByIndex(index)
+    Parameters
+    ----------
+    device_index: int or str
+        The index or UUID of the device from which to obtain the CPU affinity.
+    """
+    handle = get_gpu_handle(device_index)
     return pynvml.nvmlDeviceGetMemoryInfo(handle).total
@@ -553,26 +581,26 @@ def parse_device_memory_limit(device_memory_limit, device_index=0, alignment_siz
         return _align(int(device_memory_limit), alignment_size)
-def get_gpu_uuid_from_index(device_index=0):
+def get_gpu_uuid(device_index=0):
     """Get GPU UUID from CUDA device index.
     Parameters
     ----------
     device_index: int or str
-        The index of the device from which to obtain the UUID. Default: 0.
+        The index or UUID of the device from which to obtain the UUID.
     Examples
     --------
-    >>> get_gpu_uuid_from_index()
+    >>> get_gpu_uuid()
     'GPU-9baca7f5-0f2f-01ac-6b05-8da14d6e9005'
-    >>> get_gpu_uuid_from_index(3)
+    >>> get_gpu_uuid(3)
     'GPU-9fb42d6f-7d6b-368f-f79c-3c3e784c93f6'
-    """
-    import pynvml
-    pynvml.nvmlInit()
-    handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
+    >>> get_gpu_uuid("GPU-9fb42d6f-7d6b-368f-f79c-3c3e784c93f6")
+    'GPU-9fb42d6f-7d6b-368f-f79c-3c3e784c93f6'
+    """
+    handle = get_gpu_handle(device_index)
     try:
         return pynvml.nvmlDeviceGetUUID(handle).decode("utf-8")
     except AttributeError:

{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: dask-cuda
-Version: 25.4.0
+Version: 25.6.0
 Summary: Utilities for Dask and CUDA interactions
 Author: NVIDIA Corporation
-License: Apache 2.0
+License: Apache-2.0
 Project-URL: Homepage, https://github.com/rapidsai/dask-cuda
 Project-URL: Documentation, https://docs.rapids.ai/api/dask-cuda/stable/
 Project-URL: Source, https://github.com/rapidsai/dask-cuda
@@ -15,15 +15,16 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: click>=8.1
-Requires-Dist: numba<0.61.0a0,>=0.59.1
+Requires-Dist: numba<0.62.0a0,>=0.59.1
 Requires-Dist: numpy<3.0a0,>=1.23
 Requires-Dist: pandas>=1.3
 Requires-Dist: pynvml<13.0.0a0,>=12.0.0
-Requires-Dist: rapids-dask-dependency==25.4.*
+Requires-Dist: rapids-dask-dependency==25.6.*
 Requires-Dist: zict>=2.0.0
 Provides-Extra: docs
 Requires-Dist: numpydoc>=1.1.0; extra == "docs"

{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/RECORD RENAMED Viewed

@@ -1,21 +1,22 @@
-dask_cuda/GIT_COMMIT,sha256=wbY8QunTBf6nZeA4ulUfzAdQWyE7hoxV330KmJ3VnjA,41
-dask_cuda/VERSION,sha256=EM36MPurzJgotElKb8R7ZaIOF2woBA69gsVnmiyf-LY,8
+dask_cuda/GIT_COMMIT,sha256=TiWUPXNqs5gL3lxRLAbL9S16XUILnjLBQ-tX9pxEkwE,41
+dask_cuda/VERSION,sha256=mkkPLCPxib-wy79AMMpM4Bq103DbRbHiXhZFFnGa_sk,8
 dask_cuda/__init__.py,sha256=Wbc7R0voN4vsQkb7SKuVXH0YXuXtfnAxrupxfM4lT10,1933
+dask_cuda/_compat.py,sha256=AG2lKGAtZitDPBjHeFDKLTN_B5HKodrhZ2kHlk1Z-D0,498
 dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
 dask_cuda/cli.py,sha256=cScVyNiA_l9uXeDgkIcmbcR4l4cH1_1shqSqsVmuHPE,17053
 dask_cuda/cuda_worker.py,sha256=rZ1ITG_ZCbuaMA9e8uSqCjU8Km4AMphGGrxpBPQG8xU,9477
 dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
 dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
-dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
+dask_cuda/get_device_memory_objects.py,sha256=peqXY8nAOtZpo9Pk1innP0rKySB8X4647YYqrwLYPHo,4569
 dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
-dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
-dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
+dask_cuda/is_device_object.py,sha256=x9klFdeQzLcug7wZMxN3GK2AS121tlDe-LQ2uznm5yo,1179
+dask_cuda/is_spillable_object.py,sha256=8gj6QgtKcmzrpQwy8rE-pS1R8tjaJOeD-Fzr6LumjJg,1596
 dask_cuda/local_cuda_cluster.py,sha256=wqwKVRV6jT13sf9e-XsvbVBlTrnhmcbmHQBFPTFcayw,20335
 dask_cuda/plugins.py,sha256=A2aT8HA6q_JhIEx6-XKcpbWEbl7aTg1GNoZQH8_vh00,7197
-dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
+dask_cuda/proxify_device_objects.py,sha256=jWljqWddOT8NksyNKOh_9nFoV70_3P6s8P91oXdCfEk,8225
 dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
 dask_cuda/proxy_object.py,sha256=mrCCGwS-mltcY8oddJEXnPL6rV2dBpGgsFypBVbxRsA,30150
-dask_cuda/utils.py,sha256=Goq-m78rYZ-bcJitg47N1h_PC4PDuzXG0CUVH7V8azU,25515
+dask_cuda/utils.py,sha256=wJ-oTj6mJHojz7JEMTh_QFnvz5igj4ULCbpI0r_XqMY,26273
 dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
 dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
 dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -30,27 +31,27 @@ dask_cuda/benchmarks/utils.py,sha256=_x0XXL_F3W-fExpuQfTBwuK3WnrVuXQQepbnvjUqS9o
 dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dask_cuda/explicit_comms/comms.py,sha256=uq-XPOH38dFcYS_13Vomj2ER6zxQz7DPeSM000mOVmY,11541
 dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=g9xDyFKmblEuevZt5Drh66uMLw-LUNOI8CIucDdACmY,21231
+dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=yG9_7BuXSswiZjFfs6kVdHBA2-mlSBKN1i6phgNTJMY,23815
 dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
-dask_cuda/tests/test_dask_cuda_worker.py,sha256=C1emlr47yGa3TdSSlAXJRzguY4bcH74htk21x9th7nQ,20556
+dask_cuda/tests/test_dask_cuda_worker.py,sha256=yG_RcOTF6vt-LBBVrjEQ_2vRZvVfFgFDMedPMSzkFws,20657
 dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
 dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
-dask_cuda/tests/test_explicit_comms.py,sha256=xnQjjUrd6RFd9CS99pVuWY1frfiMXzRv_fW4rk9opOk,19465
+dask_cuda/tests/test_explicit_comms.py,sha256=hrNrTKP-pBSohyUqn1hnXKkUttGwRLeYY2bniEXM1FM,19944
 dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
 dask_cuda/tests/test_gds.py,sha256=j1Huud6UGm1fbkyRLQEz_ysrVw__5AimwSn_M-2GEvs,1513
 dask_cuda/tests/test_initialize.py,sha256=4Ovv_ClokKibPX6wfuaoQgN4eKCohagRFoE3s3D7Huk,8119
-dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
-dask_cuda/tests/test_proxify_host_file.py,sha256=LC3jjo_gbfhdIy1Zy_ynmgyv31HXFoBINCe1-XXZ4XU,18994
+dask_cuda/tests/test_local_cuda_cluster.py,sha256=AiVUx3PkuIeobw1QXdr3mvom_l8DFVvRIvMQE91zAag,19811
+dask_cuda/tests/test_proxify_host_file.py,sha256=pFORynzqGpe9mz_rPwTVW6O4VoY2E0EmjsT7Ux_c920,19333
 dask_cuda/tests/test_proxy.py,sha256=U9uE-QesTwquNKzTReEKiYgoRgS_pfGW-A-gJNppHyg,23817
-dask_cuda/tests/test_spill.py,sha256=CYMbp5HDBYlZ7T_n8RfSOZxaWFcAQKjprjRM7Wupcdw,13419
+dask_cuda/tests/test_spill.py,sha256=A4-pJWCfShUaEGKbUdeIpcVL8zCyyPfAjdlJ0As3LDQ,15462
 dask_cuda/tests/test_utils.py,sha256=PQI_oTONWnKSKlkQfEeK-vlmYa0-cPpDjDEbm74cNCE,9104
 dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
 dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
-dask_cuda-25.4.0.dist-info/licenses/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
+dask_cuda-25.6.0.dist-info/licenses/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
 examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
 examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
-dask_cuda-25.4.0.dist-info/METADATA,sha256=udK2maTnpkUBnOOtTvGOwySUtJxnIo4rcIOmySPBuOk,2294
-dask_cuda-25.4.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-dask_cuda-25.4.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
-dask_cuda-25.4.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
-dask_cuda-25.4.0.dist-info/RECORD,,
+dask_cuda-25.6.0.dist-info/METADATA,sha256=Eolq3LbkRkU0ukh5enuHzLIK-YYo-Q_PX2bobo-rT1E,2345
+dask_cuda-25.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dask_cuda-25.6.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
+dask_cuda-25.6.0.dist-info/top_level.txt,sha256=S_m57qClWFTZ9rBMNTPikpBiy9vTn6_4pjGuInt0XE8,28
+dask_cuda-25.6.0.dist-info/RECORD,,

{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.1.0)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/top_level.txt RENAMED Viewed

@@ -1,5 +1,4 @@
 ci
 conda
 dask_cuda
-dist
 examples

{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dask_cuda-25.4.0.dist-info → dask_cuda-25.6.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

dask-cuda 25.4.0__py3-none-any.whl → 25.6.0__py3-none-any.whl

dask-cuda 25.4.0py3-none-any.whl → 25.6.0py3-none-any.whl