PyPI - dask-cuda - Versions diffs - 24.8.2__py3-none-any.whl → 24.12.0__py3-none-any.whl - Mend

dask-cuda 24.8.2py3-none-any.whl → 24.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

dask_cuda/VERSION +1 -1
dask_cuda/__init__.py +19 -0
dask_cuda/benchmarks/common.py +15 -6
dask_cuda/benchmarks/local_cudf_groupby.py +4 -18
dask_cuda/benchmarks/local_cudf_merge.py +2 -15
dask_cuda/benchmarks/local_cudf_shuffle.py +3 -17
dask_cuda/benchmarks/local_cupy.py +5 -19
dask_cuda/benchmarks/local_cupy_map_overlap.py +5 -19
dask_cuda/benchmarks/read_parquet.py +268 -0
dask_cuda/benchmarks/utils.py +30 -2
dask_cuda/cli.py +18 -5
dask_cuda/cuda_worker.py +10 -0
dask_cuda/local_cuda_cluster.py +29 -0
dask_cuda/plugins.py +67 -0
dask_cuda/tests/test_dask_cuda_worker.py +27 -0
dask_cuda/tests/test_gds.py +1 -1
dask_cuda/tests/test_proxify_host_file.py +1 -1
dask_cuda/tests/test_proxy.py +5 -5
dask_cuda/tests/test_spill.py +116 -16
dask_cuda/utils.py +11 -0
{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/METADATA +10 -10
{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/RECORD +26 -25
{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/WHEEL +1 -1
{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/LICENSE +0 -0
{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/top_level.txt +0 -0

dask_cuda/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 24.08.02
1	+ 24.12.00

dask_cuda/__init__.py CHANGED Viewed

@@ -9,6 +9,8 @@ import dask.dataframe.core
 import dask.dataframe.shuffle
 import dask.dataframe.multi
 import dask.bag.core
+from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
+from distributed.protocol.serialize import dask_deserialize, dask_serialize
 from ._version import __git_commit__, __version__
 from .cuda_worker import CUDAWorker
@@ -48,3 +50,20 @@ dask.dataframe.shuffle.shuffle_group = proxify_decorator(
     dask.dataframe.shuffle.shuffle_group
 )
 dask.dataframe.core._concat = unproxify_decorator(dask.dataframe.core._concat)
+def _register_cudf_spill_aware():
+    import cudf
+    # Only enable Dask/cuDF spilling if cuDF spilling is disabled, see
+    # https://github.com/rapidsai/dask-cuda/issues/1363
+    if not cudf.get_option("spill"):
+        # This reproduces the implementation of `_register_cudf`, see
+        # https://github.com/dask/distributed/blob/40fcd65e991382a956c3b879e438be1b100dff97/distributed/protocol/__init__.py#L106-L115
+        from cudf.comm import serialize
+for registry in [cuda_serialize, cuda_deserialize, dask_serialize, dask_deserialize]:
+    for lib in ["cudf", "dask_cudf"]:
+        if lib in registry._lazy:
+            registry._lazy[lib] = _register_cudf_spill_aware

dask_cuda/benchmarks/common.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import contextlib
 from argparse import Namespace
 from functools import partial
 from typing import Any, Callable, List, Mapping, NamedTuple, Optional, Tuple
@@ -7,7 +8,7 @@ import numpy as np
 import pandas as pd
 import dask
-from distributed import Client
+from distributed import Client, performance_report
 from dask_cuda.benchmarks.utils import (
     address_to_index,
@@ -87,12 +88,20 @@ def run_benchmark(client: Client, args: Namespace, config: Config):
     If ``args.profile`` is set, the final run is profiled.
     """
     results = []
-    for _ in range(max(1, args.runs) - 1):
-        res = config.bench_once(client, args, write_profile=None)
-        results.append(res)
-    results.append(config.bench_once(client, args, write_profile=args.profile))
-    return results
+    for _ in range(max(0, args.warmup_runs)):
+        config.bench_once(client, args, write_profile=None)
+    ctx = contextlib.nullcontext()
+    if args.profile is not None:
+        ctx = performance_report(filename=args.profile)
+    with ctx:
+        for _ in range(max(1, args.runs) - 1):
+            res = config.bench_once(client, args, write_profile=None)
+            results.append(res)
+        results.append(config.bench_once(client, args, write_profile=args.profile_last))
+        return results
 def gather_bench_results(client: Client, args: Namespace, config: Config):

dask_cuda/benchmarks/local_cudf_groupby.py CHANGED Viewed

@@ -7,7 +7,7 @@ import pandas as pd
 import dask
 import dask.dataframe as dd
 from dask.distributed import performance_report, wait
-from dask.utils import format_bytes, parse_bytes
+from dask.utils import format_bytes
 from dask_cuda.benchmarks.common import Config, execute_benchmark
 from dask_cuda.benchmarks.utils import (
@@ -98,10 +98,9 @@ def bench_once(client, args, write_profile=None):
         "False": False,
     }.get(args.shuffle, args.shuffle)
-    if write_profile is None:
-        ctx = contextlib.nullcontext()
-    else:
-        ctx = performance_report(filename=args.profile)
+    ctx = contextlib.nullcontext()
+    if write_profile is not None:
+        ctx = performance_report(filename=write_profile)
     with ctx:
         t1 = clock()
@@ -260,19 +259,6 @@ def parse_args():
             "type": str,
             "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
-        {
-            "name": "--runs",
-            "default": 3,
-            "type": int,
-            "help": "Number of runs",
-        },
     ]
     return parse_benchmark_args(

dask_cuda/benchmarks/local_cudf_merge.py CHANGED Viewed

@@ -9,7 +9,7 @@ import pandas as pd
 import dask
 import dask.dataframe as dd
 from dask.distributed import performance_report, wait
-from dask.utils import format_bytes, parse_bytes
+from dask.utils import format_bytes
 from dask_cuda.benchmarks.common import Config, execute_benchmark
 from dask_cuda.benchmarks.utils import (
@@ -190,7 +190,7 @@ def bench_once(client, args, write_profile=None):
     if args.backend == "explicit-comms":
         ctx1 = dask.config.set(explicit_comms=True)
     if write_profile is not None:
-        ctx2 = performance_report(filename=args.profile)
+        ctx2 = performance_report(filename=write_profile)
     with ctx1:
         with ctx2:
@@ -335,13 +335,6 @@ def parse_args():
             "action": "store_true",
             "help": "Use shuffle join (takes precedence over '--broadcast-join').",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
         {
             "name": "--frac-match",
             "default": 0.3,
@@ -353,12 +346,6 @@ def parse_args():
             "action": "store_true",
             "help": "Don't shuffle the keys of the left (base) dataframe.",
         },
-        {
-            "name": "--runs",
-            "default": 3,
-            "type": int,
-            "help": "Number of runs",
-        },
         {
             "name": [
                 "-s",

dask_cuda/benchmarks/local_cudf_shuffle.py CHANGED Viewed

@@ -121,10 +121,9 @@ def create_data(
 def bench_once(client, args, write_profile=None):
     data_processed, df = create_data(client, args)
-    if write_profile is None:
-        ctx = contextlib.nullcontext()
-    else:
-        ctx = performance_report(filename=args.profile)
+    ctx = contextlib.nullcontext()
+    if write_profile is not None:
+        ctx = performance_report(filename=write_profile)
     with ctx:
         if args.backend in {"dask", "dask-noop"}:
@@ -228,19 +227,6 @@ def parse_args():
             "type": str,
             "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
-        {
-            "name": "--runs",
-            "default": 3,
-            "type": int,
-            "help": "Number of runs",
-        },
         {
             "name": "--ignore-index",
             "action": "store_true",

dask_cuda/benchmarks/local_cupy.py CHANGED Viewed

@@ -8,7 +8,7 @@ from nvtx import end_range, start_range
 from dask import array as da
 from dask.distributed import performance_report, wait
-from dask.utils import format_bytes, parse_bytes
+from dask.utils import format_bytes
 from dask_cuda.benchmarks.common import Config, execute_benchmark
 from dask_cuda.benchmarks.utils import (
@@ -141,12 +141,11 @@ def bench_once(client, args, write_profile=None):
     chunksize = x.chunksize
     data_processed = sum(arg.nbytes for arg in func_args)
-    # Execute the operations to benchmark
-    if args.profile is not None and write_profile is not None:
-        ctx = performance_report(filename=args.profile)
-    else:
-        ctx = contextlib.nullcontext()
+    ctx = contextlib.nullcontext()
+    if write_profile is not None:
+        ctx = performance_report(filename=write_profile)
+    # Execute the operations to benchmark
     with ctx:
         rng = start_range(message=args.operation, color="purple")
         result = func(*func_args)
@@ -297,19 +296,6 @@ def parse_args():
             "type": int,
             "help": "Chunk size (default 2500).",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB').",
-        },
-        {
-            "name": "--runs",
-            "default": 3,
-            "type": int,
-            "help": "Number of runs (default 3).",
-        },
         {
             "name": [
                 "-b",

dask_cuda/benchmarks/local_cupy_map_overlap.py CHANGED Viewed

@@ -10,7 +10,7 @@ from scipy.ndimage import convolve as sp_convolve
 from dask import array as da
 from dask.distributed import performance_report, wait
-from dask.utils import format_bytes, parse_bytes
+from dask.utils import format_bytes
 from dask_cuda.benchmarks.common import Config, execute_benchmark
 from dask_cuda.benchmarks.utils import (
@@ -42,12 +42,11 @@ def bench_once(client, args, write_profile=None):
     data_processed = x.nbytes
-    # Execute the operations to benchmark
-    if args.profile is not None and write_profile is not None:
-        ctx = performance_report(filename=args.profile)
-    else:
-        ctx = contextlib.nullcontext()
+    ctx = contextlib.nullcontext()
+    if write_profile is not None:
+        ctx = performance_report(filename=write_profile)
+    # Execute the operations to benchmark
     with ctx:
         result = x.map_overlap(mean_filter, args.kernel_size, shape=ks)
         if args.backend == "dask-noop":
@@ -168,19 +167,6 @@ def parse_args():
             "type": int,
             "help": "Kernel size, 2*k+1, in each dimension (default 1)",
         },
-        {
-            "name": "--ignore-size",
-            "default": "1 MiB",
-            "metavar": "nbytes",
-            "type": parse_bytes,
-            "help": "Ignore messages smaller than this (default '1 MB')",
-        },
-        {
-            "name": "--runs",
-            "default": 3,
-            "type": int,
-            "help": "Number of runs",
-        },
         {
             "name": [
                 "-b",

dask_cuda/benchmarks/read_parquet.py ADDED Viewed

@@ -0,0 +1,268 @@
+import contextlib
+from collections import ChainMap
+from time import perf_counter as clock
+import fsspec
+import pandas as pd
+import dask
+import dask.dataframe as dd
+from dask.base import tokenize
+from dask.distributed import performance_report
+from dask.utils import format_bytes, parse_bytes
+from dask_cuda.benchmarks.common import Config, execute_benchmark
+from dask_cuda.benchmarks.utils import (
+    parse_benchmark_args,
+    print_key_value,
+    print_separator,
+    print_throughput_bandwidth,
+)
+DISK_SIZE_CACHE = {}
+OPTIONS_CACHE = {}
+def _noop(df):
+    return df
+def read_data(paths, columns, backend, **kwargs):
+    with dask.config.set({"dataframe.backend": backend}):
+        return dd.read_parquet(
+            paths,
+            columns=columns,
+            **kwargs,
+        )
+def get_fs_paths_kwargs(args):
+    kwargs = {}
+    storage_options = {}
+    if args.key:
+        storage_options["key"] = args.key
+    if args.secret:
+        storage_options["secret"] = args.secret
+    if args.filesystem == "arrow":
+        import pyarrow.fs as pa_fs
+        from fsspec.implementations.arrow import ArrowFSWrapper
+        _mapping = {
+            "key": "access_key",
+            "secret": "secret_key",
+        }  # See: pyarrow.fs.S3FileSystem docs
+        s3_args = {}
+        for k, v in storage_options.items():
+            s3_args[_mapping[k]] = v
+        fs = pa_fs.FileSystem.from_uri(args.path)[0]
+        try:
+            region = {"region": fs.region}
+        except AttributeError:
+            region = {}
+        kwargs["filesystem"] = type(fs)(**region, **s3_args)
+        fsspec_fs = ArrowFSWrapper(kwargs["filesystem"])
+        if args.type == "gpu":
+            kwargs["blocksize"] = args.blocksize
+    else:
+        fsspec_fs = fsspec.core.get_fs_token_paths(
+            args.path, mode="rb", storage_options=storage_options
+        )[0]
+        kwargs["filesystem"] = fsspec_fs
+        kwargs["blocksize"] = args.blocksize
+        kwargs["aggregate_files"] = args.aggregate_files
+    # Collect list of paths
+    stripped_url_path = fsspec_fs._strip_protocol(args.path)
+    if stripped_url_path.endswith("/"):
+        stripped_url_path = stripped_url_path[:-1]
+    paths = fsspec_fs.glob(f"{stripped_url_path}/*.parquet")
+    if args.file_count:
+        paths = paths[: args.file_count]
+    return fsspec_fs, paths, kwargs
+def bench_once(client, args, write_profile=None):
+    global OPTIONS_CACHE
+    global DISK_SIZE_CACHE
+    # Construct kwargs
+    token = tokenize(args)
+    try:
+        fsspec_fs, paths, kwargs = OPTIONS_CACHE[token]
+    except KeyError:
+        fsspec_fs, paths, kwargs = get_fs_paths_kwargs(args)
+        OPTIONS_CACHE[token] = (fsspec_fs, paths, kwargs)
+    if write_profile is None:
+        ctx = contextlib.nullcontext()
+    else:
+        ctx = performance_report(filename=args.profile)
+    with ctx:
+        t1 = clock()
+        df = read_data(
+            paths,
+            columns=args.columns,
+            backend="cudf" if args.type == "gpu" else "pandas",
+            **kwargs,
+        )
+        num_rows = len(
+            # Use opaque `map_partitions` call to "block"
+            # dask-expr from using pq metadata to get length
+            df.map_partitions(
+                _noop,
+                meta=df._meta,
+                enforce_metadata=False,
+            )
+        )
+        t2 = clock()
+    # Extract total size of files on disk
+    token = tokenize(paths)
+    try:
+        disk_size = DISK_SIZE_CACHE[token]
+    except KeyError:
+        disk_size = sum(fsspec_fs.sizes(paths))
+        DISK_SIZE_CACHE[token] = disk_size
+    return (disk_size, num_rows, t2 - t1)
+def pretty_print_results(args, address_to_index, p2p_bw, results):
+    if args.markdown:
+        print("```")
+    print("Parquet read benchmark")
+    data_processed, row_count, durations = zip(*results)
+    print_separator(separator="-")
+    backend = "cudf" if args.type == "gpu" else "pandas"
+    print_key_value(key="Path", value=args.path)
+    print_key_value(key="Columns", value=f"{args.columns}")
+    print_key_value(key="Backend", value=f"{backend}")
+    print_key_value(key="Filesystem", value=f"{args.filesystem}")
+    print_key_value(key="Blocksize", value=f"{format_bytes(args.blocksize)}")
+    print_key_value(key="Aggregate files", value=f"{args.aggregate_files}")
+    print_key_value(key="Row count", value=f"{row_count[0]}")
+    print_key_value(key="Size on disk", value=f"{format_bytes(data_processed[0])}")
+    if args.markdown:
+        print("\n```")
+    args.no_show_p2p_bandwidth = True
+    print_throughput_bandwidth(
+        args, durations, data_processed, p2p_bw, address_to_index
+    )
+    print_separator(separator="=")
+def create_tidy_results(args, p2p_bw, results):
+    configuration = {
+        "path": args.path,
+        "columns": args.columns,
+        "backend": "cudf" if args.type == "gpu" else "pandas",
+        "filesystem": args.filesystem,
+        "blocksize": args.blocksize,
+        "aggregate_files": args.aggregate_files,
+    }
+    timing_data = pd.DataFrame(
+        [
+            pd.Series(
+                data=ChainMap(
+                    configuration,
+                    {
+                        "wallclock": duration,
+                        "data_processed": data_processed,
+                        "num_rows": num_rows,
+                    },
+                )
+            )
+            for data_processed, num_rows, duration in results
+        ]
+    )
+    return timing_data, p2p_bw
+def parse_args():
+    special_args = [
+        {
+            "name": "path",
+            "type": str,
+            "help": "Parquet directory to read from (must be a flat directory).",
+        },
+        {
+            "name": "--blocksize",
+            "default": "256MB",
+            "type": parse_bytes,
+            "help": "How to set the blocksize option",
+        },
+        {
+            "name": "--aggregate-files",
+            "default": False,
+            "action": "store_true",
+            "help": "How to set the aggregate_files option",
+        },
+        {
+            "name": "--file-count",
+            "type": int,
+            "help": "Maximum number of files to read.",
+        },
+        {
+            "name": "--columns",
+            "type": str,
+            "help": "Columns to read/select from data.",
+        },
+        {
+            "name": "--key",
+            "type": str,
+            "help": "Public S3 key.",
+        },
+        {
+            "name": "--secret",
+            "type": str,
+            "help": "Secret S3 key.",
+        },
+        {
+            "name": [
+                "-t",
+                "--type",
+            ],
+            "choices": ["cpu", "gpu"],
+            "default": "gpu",
+            "type": str,
+            "help": "Use GPU or CPU dataframes (default 'gpu')",
+        },
+        {
+            "name": "--filesystem",
+            "choices": ["arrow", "fsspec"],
+            "default": "fsspec",
+            "type": str,
+            "help": "Filesystem backend",
+        },
+        {
+            "name": "--runs",
+            "default": 3,
+            "type": int,
+            "help": "Number of runs",
+        },
+    ]
+    args = parse_benchmark_args(
+        description="Parquet read benchmark",
+        args_list=special_args,
+        check_explicit_comms=False,
+    )
+    args.no_show_p2p_bandwidth = True
+    return args
+if __name__ == "__main__":
+    execute_benchmark(
+        Config(
+            args=parse_args(),
+            bench_once=bench_once,
+            create_tidy_results=create_tidy_results,
+            pretty_print_results=pretty_print_results,
+        )
+    )

dask_cuda/benchmarks/utils.py CHANGED Viewed

@@ -323,7 +323,16 @@ def parse_benchmark_args(
         metavar="PATH",
         default=None,
         type=str,
-        help="Write dask profile report (E.g. dask-report.html)",
+        help="Write dask profile report (E.g. dask-report.html) on all "
+        "iterations (excluding warmup).",
+    )
+    parser.add_argument(
+        "--profile-last",
+        metavar="PATH",
+        default=None,
+        type=str,
+        help="Write dask profile report (E.g. dask-report.html) on last "
+        "iteration only.",
     )
     # See save_benchmark_data for more information
     parser.add_argument(
@@ -337,6 +346,25 @@ def parse_benchmark_args(
         "If the files already exist, new files are created with a uniquified "
         "BASENAME.",
     )
+    parser.add_argument(
+        "--ignore-size",
+        default="1 MiB",
+        metavar="nbytes",
+        type=parse_bytes,
+        help="Bandwidth statistics: ignore messages smaller than this (default '1 MB')",
+    )
+    parser.add_argument(
+        "--runs",
+        default=3,
+        type=int,
+        help="Number of runs",
+    )
+    parser.add_argument(
+        "--warmup-runs",
+        default=1,
+        type=int,
+        help="Number of warmup runs",
+    )
     for args in args_list:
         name = args.pop("name")
@@ -765,7 +793,7 @@ def print_throughput_bandwidth(
     )
     print_key_value(
         key="Wall clock",
-        value=f"{format_time(durations.mean())} +/- {format_time(durations.std()) }",
+        value=f"{format_time(durations.mean())} +/- {format_time(durations.std())}",
     )
     if not args.no_show_p2p_bandwidth:
         print_separator(separator="=")

dask_cuda/cli.py CHANGED Viewed

@@ -13,7 +13,7 @@ from distributed.security import Security
 from distributed.utils import import_term
 from .cuda_worker import CUDAWorker
-from .utils import print_cluster_config
+from .utils import CommaSeparatedChoice, print_cluster_config
 logger = logging.getLogger(__name__)
@@ -164,13 +164,24 @@ def cuda():
         incompatible with RMM pools and managed memory, trying to enable both will
         result in failure.""",
 )
+@click.option(
+    "--set-rmm-allocator-for-libs",
+    "rmm_allocator_external_lib_list",
+    type=CommaSeparatedChoice(["cupy", "torch"]),
+    default=None,
+    show_default=True,
+    help="""
+    Set RMM as the allocator for external libraries. Provide a comma-separated
+    list of libraries to set, e.g., "torch,cupy".""",
+)
 @click.option(
     "--rmm-release-threshold",
     default=None,
-    help="""When ``rmm.async`` is ``True`` and the pool size grows beyond this value, unused
-    memory held by the pool will be released at the next synchronization point. Can be
-    an integer (bytes), float (fraction of total device memory), string (like ``"5GB"``
-    or ``"5000M"``) or ``None``. By default, this feature is disabled.
+    help="""When ``rmm.async`` is ``True`` and the pool size grows beyond this
+    value, unused memory held by the pool will be released at the next
+    synchronization point. Can be an integer (bytes), float (fraction of total
+    device memory), string (like ``"5GB"`` or ``"5000M"``) or ``None``. By
+    default, this feature is disabled.
     .. note::
         This size is a per-worker configuration, and not cluster-wide.""",
@@ -350,6 +361,7 @@ def worker(
     rmm_maximum_pool_size,
     rmm_managed_memory,
     rmm_async,
+    rmm_allocator_external_lib_list,
     rmm_release_threshold,
     rmm_log_directory,
     rmm_track_allocations,
@@ -424,6 +436,7 @@ def worker(
             rmm_maximum_pool_size,
             rmm_managed_memory,
             rmm_async,
+            rmm_allocator_external_lib_list,
             rmm_release_threshold,
             rmm_log_directory,
             rmm_track_allocations,

dask_cuda/cuda_worker.py CHANGED Viewed

@@ -47,6 +47,7 @@ class CUDAWorker(Server):
         rmm_maximum_pool_size=None,
         rmm_managed_memory=False,
         rmm_async=False,
+        rmm_allocator_external_lib_list=None,
         rmm_release_threshold=None,
         rmm_log_directory=None,
         rmm_track_allocations=False,
@@ -195,6 +196,14 @@ class CUDAWorker(Server):
                 },
             )
+        cudf_spill_warning = dask.config.get("cudf-spill-warning", default=True)
+        if enable_cudf_spill and cudf_spill_warning:
+            warnings.warn(
+                "cuDF spilling is enabled, please ensure the client and scheduler "
+                "processes set `CUDF_SPILL=on` as well. To disable this warning "
+                "set `DASK_CUDF_SPILL_WARNING=False`."
+            )
         self.nannies = [
             Nanny(
                 scheduler,
@@ -223,6 +232,7 @@ class CUDAWorker(Server):
                         release_threshold=rmm_release_threshold,
                         log_directory=rmm_log_directory,
                         track_allocations=rmm_track_allocations,
+                        external_lib_list=rmm_allocator_external_lib_list,
                     ),
                     PreImport(pre_import),
                     CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),

dask_cuda/local_cuda_cluster.py CHANGED Viewed

@@ -143,6 +143,11 @@ class LocalCUDACluster(LocalCluster):
             The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
             incompatible with RMM pools and managed memory. Trying to enable both will
             result in an exception.
+    rmm_allocator_external_lib_list: str, list or None, default None
+        List of external libraries for which to set RMM as the allocator.
+        Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string
+        (like ``"torch,cupy"``) or a list of strings (like ``["torch", "cupy"]``).
+        If ``None``, no external libraries will use RMM as their allocator.
     rmm_release_threshold: int, str or None, default None
         When ``rmm.async is True`` and the pool size grows beyond this value, unused
         memory held by the pool will be released at the next synchronization point.
@@ -231,6 +236,7 @@ class LocalCUDACluster(LocalCluster):
         rmm_maximum_pool_size=None,
         rmm_managed_memory=False,
         rmm_async=False,
+        rmm_allocator_external_lib_list=None,
         rmm_release_threshold=None,
         rmm_log_directory=None,
         rmm_track_allocations=False,
@@ -244,6 +250,13 @@ class LocalCUDACluster(LocalCluster):
         # initialization happens before we can set CUDA_VISIBLE_DEVICES
         os.environ["RAPIDS_NO_INITIALIZE"] = "True"
+        if enable_cudf_spill:
+            import cudf
+            # cuDF spilling must be enabled in the client/scheduler process too.
+            cudf.set_option("spill", enable_cudf_spill)
+            cudf.set_option("spill_stats", cudf_spill_stats)
         if threads_per_worker < 1:
             raise ValueError("threads_per_worker must be higher than 0.")
@@ -258,6 +271,19 @@ class LocalCUDACluster(LocalCluster):
             n_workers = len(CUDA_VISIBLE_DEVICES)
         if n_workers < 1:
             raise ValueError("Number of workers cannot be less than 1.")
+        if rmm_allocator_external_lib_list is not None:
+            if isinstance(rmm_allocator_external_lib_list, str):
+                rmm_allocator_external_lib_list = [
+                    v.strip() for v in rmm_allocator_external_lib_list.split(",")
+                ]
+            elif not isinstance(rmm_allocator_external_lib_list, list):
+                raise ValueError(
+                    "rmm_allocator_external_lib_list must be either a comma-separated "
+                    "string or a list of strings. Examples: 'torch,cupy' "
+                    "or ['torch', 'cupy']"
+                )
         # Set nthreads=1 when parsing mem_limit since it only depends on n_workers
         logger = logging.getLogger(__name__)
         self.memory_limit = parse_memory_limit(
@@ -277,6 +303,8 @@ class LocalCUDACluster(LocalCluster):
         self.rmm_managed_memory = rmm_managed_memory
         self.rmm_async = rmm_async
         self.rmm_release_threshold = rmm_release_threshold
+        self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list
         if rmm_pool_size is not None or rmm_managed_memory or rmm_async:
             try:
                 import rmm  # noqa F401
@@ -430,6 +458,7 @@ class LocalCUDACluster(LocalCluster):
                         release_threshold=self.rmm_release_threshold,
                         log_directory=self.rmm_log_directory,
                         track_allocations=self.rmm_track_allocations,
+                        external_lib_list=self.rmm_allocator_external_lib_list,
                     ),
                     PreImport(self.pre_import),
                     CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),

dask_cuda/plugins.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import importlib
 import os
+from typing import Callable, Dict
 from distributed import WorkerPlugin
@@ -39,6 +40,7 @@ class RMMSetup(WorkerPlugin):
         release_threshold,
         log_directory,
         track_allocations,
+        external_lib_list,
     ):
         if initial_pool_size is None and maximum_pool_size is not None:
             raise ValueError(
@@ -61,6 +63,7 @@ class RMMSetup(WorkerPlugin):
         self.logging = log_directory is not None
         self.log_directory = log_directory
         self.rmm_track_allocations = track_allocations
+        self.external_lib_list = external_lib_list
     def setup(self, worker=None):
         if self.initial_pool_size is not None:
@@ -123,6 +126,70 @@ class RMMSetup(WorkerPlugin):
             mr = rmm.mr.get_current_device_resource()
             rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr))
+        if self.external_lib_list is not None:
+            for lib in self.external_lib_list:
+                enable_rmm_memory_for_library(lib)
+def enable_rmm_memory_for_library(lib_name: str) -> None:
+    """Enable RMM memory pool support for a specified third-party library.
+    This function allows the given library to utilize RMM's memory pool if it supports
+    integration with RMM. The library name is passed as a string argument, and if the
+    library is compatible, its memory allocator will be configured to use RMM.
+    Parameters
+    ----------
+    lib_name : str
+        The name of the third-party library to enable RMM memory pool support for.
+        Supported libraries are "cupy" and "torch".
+    Raises
+    ------
+    ValueError
+        If the library name is not supported or does not have RMM integration.
+    ImportError
+        If the required library is not installed.
+    """
+    # Mapping of supported libraries to their respective setup functions
+    setup_functions: Dict[str, Callable[[], None]] = {
+        "torch": _setup_rmm_for_torch,
+        "cupy": _setup_rmm_for_cupy,
+    }
+    if lib_name not in setup_functions:
+        supported_libs = ", ".join(setup_functions.keys())
+        raise ValueError(
+            f"The library '{lib_name}' is not supported for RMM integration. "
+            f"Supported libraries are: {supported_libs}."
+        )
+    # Call the setup function for the specified library
+    setup_functions[lib_name]()
+def _setup_rmm_for_torch() -> None:
+    try:
+        import torch
+    except ImportError as e:
+        raise ImportError("PyTorch is not installed.") from e
+    from rmm.allocators.torch import rmm_torch_allocator
+    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+def _setup_rmm_for_cupy() -> None:
+    try:
+        import cupy
+    except ImportError as e:
+        raise ImportError("CuPy is not installed.") from e
+    from rmm.allocators.cupy import rmm_cupy_allocator
+    cupy.cuda.set_allocator(rmm_cupy_allocator)
 class PreImport(WorkerPlugin):
     def __init__(self, libraries):

dask_cuda/tests/test_dask_cuda_worker.py CHANGED Viewed

@@ -567,3 +567,30 @@ def test_worker_timeout():
         assert "reason: nanny-close" in ret.stderr.lower()
     assert ret.returncode == 0
+@pytest.mark.parametrize("enable_cudf_spill_warning", [False, True])
+def test_worker_cudf_spill_warning(enable_cudf_spill_warning):  # noqa: F811
+    pytest.importorskip("rmm")
+    environ = {"CUDA_VISIBLE_DEVICES": "0"}
+    if not enable_cudf_spill_warning:
+        environ["DASK_CUDF_SPILL_WARNING"] = "False"
+    with patch.dict(os.environ, environ):
+        ret = subprocess.run(
+            [
+                "dask",
+                "cuda",
+                "worker",
+                "127.0.0.1:9369",
+                "--enable-cudf-spill",
+                "--death-timeout",
+                "1",
+            ],
+            capture_output=True,
+        )
+        if enable_cudf_spill_warning:
+            assert b"UserWarning: cuDF spilling is enabled" in ret.stderr
+        else:
+            assert b"UserWarning: cuDF spilling is enabled" not in ret.stderr

dask_cuda/tests/test_gds.py CHANGED Viewed

@@ -38,7 +38,7 @@ def test_gds(gds_enabled, cuda_lib):
         a = data_create()
         header, frames = serialize(a, serializers=("disk",))
         b = deserialize(header, frames)
-        assert type(a) == type(b)
+        assert type(a) is type(b)
         assert data_compare(a, b)
     finally:
         ProxifyHostFile.register_disk_spilling()  # Reset disk spilling options

dask_cuda/tests/test_proxify_host_file.py CHANGED Viewed

@@ -252,7 +252,7 @@ async def test_local_cuda_cluster(jit_unspill):
             assert "ProxyObject" in str(type(x))
             assert x._pxy_get().serializer == "dask"
         else:
-            assert type(x) == cudf.DataFrame
+            assert type(x) is cudf.DataFrame
         assert len(x) == 10  # Trigger deserialization
         return x

dask_cuda/tests/test_proxy.py CHANGED Viewed

@@ -114,7 +114,7 @@ def test_proxy_object_of_array(serializers, backend):
         pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
         expect = op(org)
         got = op(pxy)
-        assert type(expect) == type(got)
+        assert type(expect) is type(got)
         assert expect == got
     # Check unary operators
@@ -124,7 +124,7 @@ def test_proxy_object_of_array(serializers, backend):
         pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
         expect = op(org)
         got = op(pxy)
-        assert type(expect) == type(got)
+        assert type(expect) is type(got)
         assert all(expect == got)
     # Check binary operators that takes a scalar as second argument
@@ -134,7 +134,7 @@ def test_proxy_object_of_array(serializers, backend):
         pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
         expect = op(org, 2)
         got = op(pxy, 2)
-        assert type(expect) == type(got)
+        assert type(expect) is type(got)
         assert all(expect == got)
     # Check binary operators
@@ -192,7 +192,7 @@ def test_proxy_object_of_array(serializers, backend):
         pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
         expect = op(org)
         got = op(pxy)
-        assert type(expect) == type(got)
+        assert type(expect) is type(got)
         assert expect == got
     # Check reflected methods
@@ -297,7 +297,7 @@ async def test_spilling_local_cuda_cluster(jit_unspill):
             assert "ProxyObject" in str(type(x))
             assert x._pxy_get().serializer == "dask"
         else:
-            assert type(x) == cudf.DataFrame
+            assert type(x) is cudf.DataFrame
         assert len(x) == 10  # Trigger deserialization
         return x

dask_cuda/tests/test_spill.py CHANGED Viewed

@@ -11,6 +11,8 @@ from distributed.metrics import time
 from distributed.sizeof import sizeof
 from distributed.utils_test import gen_cluster, gen_test, loop  # noqa: F401
+import dask_cudf
 from dask_cuda import LocalCUDACluster, utils
 from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
@@ -18,6 +20,57 @@ if utils.get_device_total_memory() < 1e10:
     pytest.skip("Not enough GPU memory", allow_module_level=True)
+def _set_cudf_device_limit():
+    """Ensure spilling for objects of all sizes"""
+    import cudf
+    cudf.set_option("spill_device_limit", 0)
+def _assert_cudf_spill_stats(enable_cudf_spill, dask_worker=None):
+    """Ensure cuDF has spilled data with its internal mechanism"""
+    import cudf
+    global_manager = cudf.core.buffer.spill_manager.get_global_manager()
+    if enable_cudf_spill:
+        stats = global_manager.statistics
+        buffers = global_manager.buffers()
+        assert stats.spill_totals[("gpu", "cpu")][0] > 1000
+        assert stats.spill_totals[("cpu", "gpu")][0] > 1000
+        assert len(buffers) > 0
+    else:
+        assert global_manager is None
+@pytest.fixture(params=[False, True])
+def cudf_spill(request):
+    """Fixture to enable and clear cuDF spill manager in client process"""
+    cudf = pytest.importorskip("cudf")
+    enable_cudf_spill = request.param
+    if enable_cudf_spill:
+        # If the global spill manager was previously set, fail.
+        assert cudf.core.buffer.spill_manager._global_manager is None
+        cudf.set_option("spill", True)
+        cudf.set_option("spill_stats", True)
+        # This change is to prevent changing RMM resource stack in cuDF,
+        # workers do not need this because they are spawned as new
+        # processes for every new test that runs.
+        cudf.set_option("spill_on_demand", False)
+        _set_cudf_device_limit()
+    yield enable_cudf_spill
+    cudf.set_option("spill", False)
+    cudf.core.buffer.spill_manager._global_manager_uninitialized = True
+    cudf.core.buffer.spill_manager._global_manager = None
 def device_host_file_size_matches(
     dhf, total_bytes, device_chunk_overhead=0, serialized_chunk_overhead=1024
 ):
@@ -244,9 +297,11 @@ async def test_cupy_cluster_device_spill(params):
     ],
 )
 @gen_test(timeout=30)
-async def test_cudf_cluster_device_spill(params):
+async def test_cudf_cluster_device_spill(params, cudf_spill):
     cudf = pytest.importorskip("cudf")
+    enable_cudf_spill = cudf_spill
     with dask.config.set(
         {
             "distributed.comm.compression": False,
@@ -266,6 +321,7 @@ async def test_cudf_cluster_device_spill(params):
             device_memory_limit=params["device_memory_limit"],
             memory_limit=params["memory_limit"],
             worker_class=IncreasedCloseTimeoutNanny,
+            enable_cudf_spill=enable_cudf_spill,
         ) as cluster:
             async with Client(cluster, asynchronous=True) as client:
@@ -294,21 +350,28 @@ async def test_cudf_cluster_device_spill(params):
                 del cdf
                 gc.collect()
-                await client.run(
-                    assert_host_chunks,
-                    params["spills_to_disk"],
-                )
-                await client.run(
-                    assert_disk_chunks,
-                    params["spills_to_disk"],
-                )
-                await client.run(
-                    worker_assert,
-                    nbytes,
-                    32,
-                    2048,
-                )
+                if enable_cudf_spill:
+                    await client.run(
+                        worker_assert,
+                        0,
+                        0,
+                        0,
+                    )
+                else:
+                    await client.run(
+                        assert_host_chunks,
+                        params["spills_to_disk"],
+                    )
+                    await client.run(
+                        assert_disk_chunks,
+                        params["spills_to_disk"],
+                    )
+                    await client.run(
+                        worker_assert,
+                        nbytes,
+                        32,
+                        2048,
+                    )
                 del cdf2
@@ -324,3 +387,40 @@ async def test_cudf_cluster_device_spill(params):
                         gc.collect()
                     else:
                         break
+@gen_test(timeout=30)
+async def test_cudf_spill_cluster(cudf_spill):
+    cudf = pytest.importorskip("cudf")
+    enable_cudf_spill = cudf_spill
+    async with LocalCUDACluster(
+        n_workers=1,
+        scheduler_port=0,
+        silence_logs=False,
+        dashboard_address=None,
+        asynchronous=True,
+        device_memory_limit=None,
+        memory_limit=None,
+        worker_class=IncreasedCloseTimeoutNanny,
+        enable_cudf_spill=enable_cudf_spill,
+        cudf_spill_stats=enable_cudf_spill,
+    ) as cluster:
+        async with Client(cluster, asynchronous=True) as client:
+            await client.wait_for_workers(1)
+            await client.run(_set_cudf_device_limit)
+            cdf = cudf.DataFrame(
+                {
+                    "a": list(range(200)),
+                    "b": list(reversed(range(200))),
+                    "c": list(range(200)),
+                }
+            )
+            ddf = dask_cudf.from_cudf(cdf, npartitions=2).sum().persist()
+            await wait(ddf)
+            await client.run(_assert_cudf_spill_stats, enable_cudf_spill)
+            _assert_cudf_spill_stats(enable_cudf_spill)

dask_cuda/utils.py CHANGED Viewed

@@ -9,6 +9,7 @@ from functools import singledispatch
 from multiprocessing import cpu_count
 from typing import Optional
+import click
 import numpy as np
 import pynvml
 import toolz
@@ -764,3 +765,13 @@ def get_rmm_device_memory_usage() -> Optional[int]:
         if isinstance(mr, rmm.mr.StatisticsResourceAdaptor):
             return mr.allocation_counts["current_bytes"]
     return None
+class CommaSeparatedChoice(click.Choice):
+    def convert(self, value, param, ctx):
+        values = [v.strip() for v in value.split(",")]
+        for v in values:
+            if v not in self.choices:
+                choices_str = ", ".join(f"'{c}'" for c in self.choices)
+                self.fail(f"invalid choice(s): {v}. (choices are: {choices_str})")
+        return values

{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dask-cuda
-Version: 24.8.2
+Version: 24.12.0
 Summary: Utilities for Dask and CUDA interactions
 Author: NVIDIA Corporation
 License: Apache 2.0
@@ -12,18 +12,18 @@ Classifier: Topic :: Database
 Classifier: Topic :: Scientific/Engineering
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Python: >=3.9
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: click>=8.1
 Requires-Dist: numba>=0.57
-Requires-Dist: numpy<2.0a0,>=1.23
+Requires-Dist: numpy<3.0a0,>=1.23
 Requires-Dist: pandas>=1.3
-Requires-Dist: pynvml<11.5,>=11.0.0
-Requires-Dist: rapids-dask-dependency==24.8.*
+Requires-Dist: pynvml<12.0.0a0,>=11.0.0
+Requires-Dist: rapids-dask-dependency==24.12.*
 Requires-Dist: zict>=2.0.0
 Provides-Extra: docs
 Requires-Dist: numpydoc>=1.1.0; extra == "docs"
@@ -31,12 +31,12 @@ Requires-Dist: sphinx; extra == "docs"
 Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
 Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
 Provides-Extra: test
-Requires-Dist: cudf==24.8.*; extra == "test"
-Requires-Dist: dask-cudf==24.8.*; extra == "test"
-Requires-Dist: kvikio==24.8.*; extra == "test"
+Requires-Dist: cudf==24.12.*; extra == "test"
+Requires-Dist: dask-cudf==24.12.*; extra == "test"
+Requires-Dist: kvikio==24.12.*; extra == "test"
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"
-Requires-Dist: ucx-py==0.39.*; extra == "test"
+Requires-Dist: ucx-py==0.41.*; extra == "test"
 Dask CUDA
 =========

{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/RECORD RENAMED Viewed

@@ -1,54 +1,55 @@
-dask_cuda/VERSION,sha256=5YtjwV2EoD7E5Ed4K-PvnU0eEtdkkn33JHuNFDy8oKA,8
-dask_cuda/__init__.py,sha256=JLDWev7vI_dPusLgRdOwXBz-xfhlX_hc-DzmLtrEYO0,1918
+dask_cuda/VERSION,sha256=NltZ4By82NzVjz00LGPhCXfkG4BB0JdUSXqlG8fiVuo,8
+dask_cuda/__init__.py,sha256=eOCH3Wj0A8X0qbNUoNA15dgxb2O-ZApha4QHq5EEVFw,2748
 dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
-dask_cuda/cli.py,sha256=Y3aObfAyMwOIo0oVz3-NC2InGLShOpeINwW5ROTF2s8,16616
-dask_cuda/cuda_worker.py,sha256=uqyoDKsSe7sKN3StMVyz_971rj0Sjpmwfv7Bj083Wss,8959
+dask_cuda/cli.py,sha256=cScVyNiA_l9uXeDgkIcmbcR4l4cH1_1shqSqsVmuHPE,17053
+dask_cuda/cuda_worker.py,sha256=rZ1ITG_ZCbuaMA9e8uSqCjU8Km4AMphGGrxpBPQG8xU,9477
 dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
 dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
 dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
 dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
 dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
 dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
-dask_cuda/local_cuda_cluster.py,sha256=jgXjd6OvEDfQ3iXU8hV_UfULa13GZsli0SGC2PIouZk,18882
-dask_cuda/plugins.py,sha256=DCf7PnIBu_VNjFfrFeb1zCNuEnCaX9oz4Umn76t02Mc,4630
+dask_cuda/local_cuda_cluster.py,sha256=wqwKVRV6jT13sf9e-XsvbVBlTrnhmcbmHQBFPTFcayw,20335
+dask_cuda/plugins.py,sha256=yGHEurbYhL4jucQrmsxLfOyE5c3bSJdfs6GVwvDAeEA,6770
 dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
 dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
 dask_cuda/proxy_object.py,sha256=bZq92kjgFB-ad_luSAFT_RItV3nssmiEk4OOSp34laU,29812
-dask_cuda/utils.py,sha256=RWlLK2cPHaCuNNhr8bW8etBeGklwREQJOafQbTydStk,25121
+dask_cuda/utils.py,sha256=Goq-m78rYZ-bcJitg47N1h_PC4PDuzXG0CUVH7V8azU,25515
 dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
 dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
 dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dask_cuda/benchmarks/common.py,sha256=2MnDdQjvHfGaUWDgiTcTGI_EeKPmVBEwoWfsJUNpOjU,6613
-dask_cuda/benchmarks/local_cudf_groupby.py,sha256=T9lA9nb4Wzu46AH--SJEVCeCm3650J7slapdNR_08FU,8904
-dask_cuda/benchmarks/local_cudf_merge.py,sha256=AsuVnMA3H93sJwjjgi4KaIdYKnnX1OeRMPiXizrwHGk,12577
-dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=2xWJZf3gwDNimXKZN2ivtU3OE_qec1KNOhgL4_AGQZU,8655
-dask_cuda/benchmarks/local_cupy.py,sha256=aUKIYfeR7c77K4kKk697Rxo8tG8kFabQ9jQEVGr-oTs,10762
-dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=_texYmam1K_XbzIvURltui5KRsISGFNylXiGUtgRIz0,6442
-dask_cuda/benchmarks/utils.py,sha256=4k8KnJPOczKDQNBPRWlaGsU2zdEA09BDGgklUXggwMU,30008
+dask_cuda/benchmarks/common.py,sha256=YFhxBYkoxIV-2mddSbLwTbyg67U4zXDd2_fFq9oP3_A,6922
+dask_cuda/benchmarks/local_cudf_groupby.py,sha256=zrDiF-yBAUxVt9mWOTH5hUm-pb-XnVX-G9gvCEX7_GI,8512
+dask_cuda/benchmarks/local_cudf_merge.py,sha256=Q7lnZ87-O7j28hkS-i_5hMApTX8VsuI4ftZf2XAnp1E,12195
+dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=8FjPFtiC-UqZcdPfocdMuzq_8TURAQWJlmhfcMWdo4w,8276
+dask_cuda/benchmarks/local_cupy.py,sha256=RCxQJd88bn3vyMAJDPK3orUpxzvDZY957wOSYkfriq0,10323
+dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=YAllGFuG6MePfPL8gdZ-Ld7a44-G0eEaHZJWB4vFPdY,6017
+dask_cuda/benchmarks/read_parquet.py,sha256=TARcG-TS1NGcQWJmuAKtfmBmy5LAaLc3xgtKgAd1DaA,7650
+dask_cuda/benchmarks/utils.py,sha256=_NSWS5e8SzZ6vxDcEFo97Y8gs_e23Qqd-c3r83BA6PU,30748
 dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
 dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=4xfhfbTGa36YPs_ex1_fFhzfGMYJq-QkS5q0RwgeHh8,20645
 dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
-dask_cuda/tests/test_dask_cuda_worker.py,sha256=o5g0_t-2M_2lfPeOPTS4NVF4rnQF0ZWAZekXw2h0xPc,19610
+dask_cuda/tests/test_dask_cuda_worker.py,sha256=6rroHvJAn5R3X9LwIcE8QrPxG1GO3PaxXVjhbdQ90Pw,20477
 dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
 dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
 dask_cuda/tests/test_explicit_comms.py,sha256=Pa5vVx63qWtScnVJuS31WESXIt2FPyTJVFO-0OUbbmU,15276
 dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
-dask_cuda/tests/test_gds.py,sha256=6jf0HPTHAIG8Mp_FC4Ai4zpn-U1K7yk0fSXg8He8-r8,1513
+dask_cuda/tests/test_gds.py,sha256=j1Huud6UGm1fbkyRLQEz_ysrVw__5AimwSn_M-2GEvs,1513
 dask_cuda/tests/test_initialize.py,sha256=Rba59ZbljEm1yyN94_sWZPEE_f7hWln95aiBVc49pmY,6960
 dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
-dask_cuda/tests/test_proxify_host_file.py,sha256=Yiv0sDcUoWw0d2oiPeHGoHqqSSM4lfQ4rChCiaxb6EU,18994
-dask_cuda/tests/test_proxy.py,sha256=OnGnPkl5ksCb-3hpEKG2z1OfPK9DbnOCtBHOjcUUjhg,23809
-dask_cuda/tests/test_spill.py,sha256=xN9PbVERBYMuZxvscSO0mAM22loq9WT3ltZVBFxlmM4,10239
+dask_cuda/tests/test_proxify_host_file.py,sha256=LC3jjo_gbfhdIy1Zy_ynmgyv31HXFoBINCe1-XXZ4XU,18994
+dask_cuda/tests/test_proxy.py,sha256=51qsXGJBg_hwSMRsC_QvJBz4wVM0Bf8fbFmTUFA7HJE,23809
+dask_cuda/tests/test_spill.py,sha256=CYMbp5HDBYlZ7T_n8RfSOZxaWFcAQKjprjRM7Wupcdw,13419
 dask_cuda/tests/test_utils.py,sha256=JRIwXfemc3lWSzLJX0VcvR1_0wB4yeoOTsw7kB6z6pU,9176
 dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
 dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
 examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
 examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
-dask_cuda-24.8.2.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
-dask_cuda-24.8.2.dist-info/METADATA,sha256=6iMwPI8cWrEYDYz73vm8pw-LkVeEgTQzymJgRxj32VQ,2546
-dask_cuda-24.8.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-dask_cuda-24.8.2.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
-dask_cuda-24.8.2.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
-dask_cuda-24.8.2.dist-info/RECORD,,
+dask_cuda-24.12.0.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
+dask_cuda-24.12.0.dist-info/METADATA,sha256=qFewjmkl67EsxFm9VoMTmw_XOOK3savtnO9hK-Qwx-E,2557
+dask_cuda-24.12.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+dask_cuda-24.12.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
+dask_cuda-24.12.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
+dask_cuda-24.12.0.dist-info/RECORD,,

{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (72.1.0)
+Generator: setuptools (75.6.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dask-cuda 24.8.2__py3-none-any.whl → 24.12.0__py3-none-any.whl

dask-cuda 24.8.2py3-none-any.whl → 24.12.0py3-none-any.whl