dask-cuda 24.10.0__py3-none-any.whl → 25.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_cuda/VERSION +1 -1
- dask_cuda/__init__.py +3 -29
- dask_cuda/benchmarks/common.py +15 -6
- dask_cuda/benchmarks/local_cudf_groupby.py +3 -10
- dask_cuda/benchmarks/local_cudf_merge.py +1 -7
- dask_cuda/benchmarks/local_cudf_shuffle.py +3 -11
- dask_cuda/benchmarks/local_cupy.py +4 -11
- dask_cuda/benchmarks/local_cupy_map_overlap.py +4 -11
- dask_cuda/benchmarks/read_parquet.py +0 -1
- dask_cuda/benchmarks/utils.py +22 -21
- dask_cuda/cli.py +13 -1
- dask_cuda/cuda_worker.py +2 -0
- dask_cuda/explicit_comms/dataframe/shuffle.py +56 -36
- dask_cuda/local_cuda_cluster.py +22 -0
- dask_cuda/plugins.py +77 -1
- dask_cuda/proxy_object.py +13 -20
- dask_cuda/tests/test_dask_cuda_worker.py +1 -0
- dask_cuda/tests/test_explicit_comms.py +1 -14
- dask_cuda/tests/test_proxy.py +8 -8
- dask_cuda/tests/test_utils.py +1 -2
- dask_cuda/utils.py +11 -0
- {dask_cuda-24.10.0.dist-info → dask_cuda-25.2.0.dist-info}/METADATA +5 -12
- {dask_cuda-24.10.0.dist-info → dask_cuda-25.2.0.dist-info}/RECORD +27 -27
- {dask_cuda-24.10.0.dist-info → dask_cuda-25.2.0.dist-info}/WHEEL +1 -1
- {dask_cuda-24.10.0.dist-info → dask_cuda-25.2.0.dist-info}/LICENSE +0 -0
- {dask_cuda-24.10.0.dist-info → dask_cuda-25.2.0.dist-info}/entry_points.txt +0 -0
- {dask_cuda-24.10.0.dist-info → dask_cuda-25.2.0.dist-info}/top_level.txt +0 -0
dask_cuda/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
25.02.00
|
dask_cuda/__init__.py
CHANGED
|
@@ -5,46 +5,20 @@ if sys.platform != "linux":
|
|
|
5
5
|
|
|
6
6
|
import dask
|
|
7
7
|
import dask.utils
|
|
8
|
-
import dask.dataframe.core
|
|
9
8
|
import dask.dataframe.shuffle
|
|
10
|
-
|
|
11
|
-
import dask.bag.core
|
|
9
|
+
from .explicit_comms.dataframe.shuffle import patch_shuffle_expression
|
|
12
10
|
from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
|
|
13
11
|
from distributed.protocol.serialize import dask_deserialize, dask_serialize
|
|
14
12
|
|
|
15
13
|
from ._version import __git_commit__, __version__
|
|
16
14
|
from .cuda_worker import CUDAWorker
|
|
17
|
-
|
|
18
|
-
get_rearrange_by_column_wrapper,
|
|
19
|
-
get_default_shuffle_method,
|
|
20
|
-
)
|
|
15
|
+
|
|
21
16
|
from .local_cuda_cluster import LocalCUDACluster
|
|
22
17
|
from .proxify_device_objects import proxify_decorator, unproxify_decorator
|
|
23
18
|
|
|
24
19
|
|
|
25
|
-
if dask.config.get("dataframe.query-planning", None) is not False and dask.config.get(
|
|
26
|
-
"explicit-comms", False
|
|
27
|
-
):
|
|
28
|
-
raise NotImplementedError(
|
|
29
|
-
"The 'explicit-comms' config is not yet supported when "
|
|
30
|
-
"query-planning is enabled in dask. Please use the shuffle "
|
|
31
|
-
"API directly, or use the legacy dask-dataframe API "
|
|
32
|
-
"(set the 'dataframe.query-planning' config to `False`"
|
|
33
|
-
"before importing `dask.dataframe`).",
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
|
|
37
20
|
# Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`
|
|
38
|
-
|
|
39
|
-
dask.dataframe.shuffle.rearrange_by_column
|
|
40
|
-
)
|
|
41
|
-
# We have to replace all modules that imports Dask's `get_default_shuffle_method()`
|
|
42
|
-
# TODO: introduce a shuffle-algorithm dispatcher in Dask so we don't need this hack
|
|
43
|
-
dask.dataframe.shuffle.get_default_shuffle_method = get_default_shuffle_method
|
|
44
|
-
dask.dataframe.multi.get_default_shuffle_method = get_default_shuffle_method
|
|
45
|
-
dask.bag.core.get_default_shuffle_method = get_default_shuffle_method
|
|
46
|
-
|
|
47
|
-
|
|
21
|
+
patch_shuffle_expression()
|
|
48
22
|
# Monkey patching Dask to make use of proxify and unproxify in compatibility mode
|
|
49
23
|
dask.dataframe.shuffle.shuffle_group = proxify_decorator(
|
|
50
24
|
dask.dataframe.shuffle.shuffle_group
|
dask_cuda/benchmarks/common.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
from argparse import Namespace
|
|
2
3
|
from functools import partial
|
|
3
4
|
from typing import Any, Callable, List, Mapping, NamedTuple, Optional, Tuple
|
|
@@ -7,7 +8,7 @@ import numpy as np
|
|
|
7
8
|
import pandas as pd
|
|
8
9
|
|
|
9
10
|
import dask
|
|
10
|
-
from distributed import Client
|
|
11
|
+
from distributed import Client, performance_report
|
|
11
12
|
|
|
12
13
|
from dask_cuda.benchmarks.utils import (
|
|
13
14
|
address_to_index,
|
|
@@ -87,12 +88,20 @@ def run_benchmark(client: Client, args: Namespace, config: Config):
|
|
|
87
88
|
|
|
88
89
|
If ``args.profile`` is set, the final run is profiled.
|
|
89
90
|
"""
|
|
91
|
+
|
|
90
92
|
results = []
|
|
91
|
-
for _ in range(max(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
93
|
+
for _ in range(max(0, args.warmup_runs)):
|
|
94
|
+
config.bench_once(client, args, write_profile=None)
|
|
95
|
+
|
|
96
|
+
ctx = contextlib.nullcontext()
|
|
97
|
+
if args.profile is not None:
|
|
98
|
+
ctx = performance_report(filename=args.profile)
|
|
99
|
+
with ctx:
|
|
100
|
+
for _ in range(max(1, args.runs) - 1):
|
|
101
|
+
res = config.bench_once(client, args, write_profile=None)
|
|
102
|
+
results.append(res)
|
|
103
|
+
results.append(config.bench_once(client, args, write_profile=args.profile_last))
|
|
104
|
+
return results
|
|
96
105
|
|
|
97
106
|
|
|
98
107
|
def gather_bench_results(client: Client, args: Namespace, config: Config):
|
|
@@ -98,10 +98,9 @@ def bench_once(client, args, write_profile=None):
|
|
|
98
98
|
"False": False,
|
|
99
99
|
}.get(args.shuffle, args.shuffle)
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
ctx = performance_report(filename=args.profile)
|
|
101
|
+
ctx = contextlib.nullcontext()
|
|
102
|
+
if write_profile is not None:
|
|
103
|
+
ctx = performance_report(filename=write_profile)
|
|
105
104
|
|
|
106
105
|
with ctx:
|
|
107
106
|
t1 = clock()
|
|
@@ -260,12 +259,6 @@ def parse_args():
|
|
|
260
259
|
"type": str,
|
|
261
260
|
"help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
|
|
262
261
|
},
|
|
263
|
-
{
|
|
264
|
-
"name": "--runs",
|
|
265
|
-
"default": 3,
|
|
266
|
-
"type": int,
|
|
267
|
-
"help": "Number of runs",
|
|
268
|
-
},
|
|
269
262
|
]
|
|
270
263
|
|
|
271
264
|
return parse_benchmark_args(
|
|
@@ -190,7 +190,7 @@ def bench_once(client, args, write_profile=None):
|
|
|
190
190
|
if args.backend == "explicit-comms":
|
|
191
191
|
ctx1 = dask.config.set(explicit_comms=True)
|
|
192
192
|
if write_profile is not None:
|
|
193
|
-
ctx2 = performance_report(filename=
|
|
193
|
+
ctx2 = performance_report(filename=write_profile)
|
|
194
194
|
|
|
195
195
|
with ctx1:
|
|
196
196
|
with ctx2:
|
|
@@ -346,12 +346,6 @@ def parse_args():
|
|
|
346
346
|
"action": "store_true",
|
|
347
347
|
"help": "Don't shuffle the keys of the left (base) dataframe.",
|
|
348
348
|
},
|
|
349
|
-
{
|
|
350
|
-
"name": "--runs",
|
|
351
|
-
"default": 3,
|
|
352
|
-
"type": int,
|
|
353
|
-
"help": "Number of runs",
|
|
354
|
-
},
|
|
355
349
|
{
|
|
356
350
|
"name": [
|
|
357
351
|
"-s",
|
|
@@ -121,10 +121,9 @@ def create_data(
|
|
|
121
121
|
def bench_once(client, args, write_profile=None):
|
|
122
122
|
data_processed, df = create_data(client, args)
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
ctx = performance_report(filename=args.profile)
|
|
124
|
+
ctx = contextlib.nullcontext()
|
|
125
|
+
if write_profile is not None:
|
|
126
|
+
ctx = performance_report(filename=write_profile)
|
|
128
127
|
|
|
129
128
|
with ctx:
|
|
130
129
|
if args.backend in {"dask", "dask-noop"}:
|
|
@@ -228,12 +227,6 @@ def parse_args():
|
|
|
228
227
|
"type": str,
|
|
229
228
|
"help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
|
|
230
229
|
},
|
|
231
|
-
{
|
|
232
|
-
"name": "--runs",
|
|
233
|
-
"default": 3,
|
|
234
|
-
"type": int,
|
|
235
|
-
"help": "Number of runs",
|
|
236
|
-
},
|
|
237
230
|
{
|
|
238
231
|
"name": "--ignore-index",
|
|
239
232
|
"action": "store_true",
|
|
@@ -253,7 +246,6 @@ def parse_args():
|
|
|
253
246
|
return parse_benchmark_args(
|
|
254
247
|
description="Distributed shuffle (dask/cudf) benchmark",
|
|
255
248
|
args_list=special_args,
|
|
256
|
-
check_explicit_comms=False,
|
|
257
249
|
)
|
|
258
250
|
|
|
259
251
|
|
|
@@ -141,12 +141,11 @@ def bench_once(client, args, write_profile=None):
|
|
|
141
141
|
chunksize = x.chunksize
|
|
142
142
|
data_processed = sum(arg.nbytes for arg in func_args)
|
|
143
143
|
|
|
144
|
-
|
|
145
|
-
if
|
|
146
|
-
ctx = performance_report(filename=
|
|
147
|
-
else:
|
|
148
|
-
ctx = contextlib.nullcontext()
|
|
144
|
+
ctx = contextlib.nullcontext()
|
|
145
|
+
if write_profile is not None:
|
|
146
|
+
ctx = performance_report(filename=write_profile)
|
|
149
147
|
|
|
148
|
+
# Execute the operations to benchmark
|
|
150
149
|
with ctx:
|
|
151
150
|
rng = start_range(message=args.operation, color="purple")
|
|
152
151
|
result = func(*func_args)
|
|
@@ -297,12 +296,6 @@ def parse_args():
|
|
|
297
296
|
"type": int,
|
|
298
297
|
"help": "Chunk size (default 2500).",
|
|
299
298
|
},
|
|
300
|
-
{
|
|
301
|
-
"name": "--runs",
|
|
302
|
-
"default": 3,
|
|
303
|
-
"type": int,
|
|
304
|
-
"help": "Number of runs (default 3).",
|
|
305
|
-
},
|
|
306
299
|
{
|
|
307
300
|
"name": [
|
|
308
301
|
"-b",
|
|
@@ -42,12 +42,11 @@ def bench_once(client, args, write_profile=None):
|
|
|
42
42
|
|
|
43
43
|
data_processed = x.nbytes
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
if
|
|
47
|
-
ctx = performance_report(filename=
|
|
48
|
-
else:
|
|
49
|
-
ctx = contextlib.nullcontext()
|
|
45
|
+
ctx = contextlib.nullcontext()
|
|
46
|
+
if write_profile is not None:
|
|
47
|
+
ctx = performance_report(filename=write_profile)
|
|
50
48
|
|
|
49
|
+
# Execute the operations to benchmark
|
|
51
50
|
with ctx:
|
|
52
51
|
result = x.map_overlap(mean_filter, args.kernel_size, shape=ks)
|
|
53
52
|
if args.backend == "dask-noop":
|
|
@@ -168,12 +167,6 @@ def parse_args():
|
|
|
168
167
|
"type": int,
|
|
169
168
|
"help": "Kernel size, 2*k+1, in each dimension (default 1)",
|
|
170
169
|
},
|
|
171
|
-
{
|
|
172
|
-
"name": "--runs",
|
|
173
|
-
"default": 3,
|
|
174
|
-
"type": int,
|
|
175
|
-
"help": "Number of runs",
|
|
176
|
-
},
|
|
177
170
|
{
|
|
178
171
|
"name": [
|
|
179
172
|
"-b",
|
dask_cuda/benchmarks/utils.py
CHANGED
|
@@ -11,7 +11,6 @@ from typing import Any, Callable, Mapping, NamedTuple, Optional, Tuple
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
import pandas as pd
|
|
13
13
|
|
|
14
|
-
from dask import config
|
|
15
14
|
from dask.distributed import Client, SSHCluster
|
|
16
15
|
from dask.utils import format_bytes, format_time, parse_bytes
|
|
17
16
|
from distributed.comm.addressing import get_address_host
|
|
@@ -52,7 +51,6 @@ def as_noop(dsk):
|
|
|
52
51
|
def parse_benchmark_args(
|
|
53
52
|
description="Generic dask-cuda Benchmark",
|
|
54
53
|
args_list=[],
|
|
55
|
-
check_explicit_comms=True,
|
|
56
54
|
):
|
|
57
55
|
parser = argparse.ArgumentParser(description=description)
|
|
58
56
|
worker_args = parser.add_argument_group(description="Worker configuration")
|
|
@@ -323,7 +321,16 @@ def parse_benchmark_args(
|
|
|
323
321
|
metavar="PATH",
|
|
324
322
|
default=None,
|
|
325
323
|
type=str,
|
|
326
|
-
help="Write dask profile report (E.g. dask-report.html)"
|
|
324
|
+
help="Write dask profile report (E.g. dask-report.html) on all "
|
|
325
|
+
"iterations (excluding warmup).",
|
|
326
|
+
)
|
|
327
|
+
parser.add_argument(
|
|
328
|
+
"--profile-last",
|
|
329
|
+
metavar="PATH",
|
|
330
|
+
default=None,
|
|
331
|
+
type=str,
|
|
332
|
+
help="Write dask profile report (E.g. dask-report.html) on last "
|
|
333
|
+
"iteration only.",
|
|
327
334
|
)
|
|
328
335
|
# See save_benchmark_data for more information
|
|
329
336
|
parser.add_argument(
|
|
@@ -344,6 +351,18 @@ def parse_benchmark_args(
|
|
|
344
351
|
type=parse_bytes,
|
|
345
352
|
help="Bandwidth statistics: ignore messages smaller than this (default '1 MB')",
|
|
346
353
|
)
|
|
354
|
+
parser.add_argument(
|
|
355
|
+
"--runs",
|
|
356
|
+
default=3,
|
|
357
|
+
type=int,
|
|
358
|
+
help="Number of runs",
|
|
359
|
+
)
|
|
360
|
+
parser.add_argument(
|
|
361
|
+
"--warmup-runs",
|
|
362
|
+
default=1,
|
|
363
|
+
type=int,
|
|
364
|
+
help="Number of warmup runs",
|
|
365
|
+
)
|
|
347
366
|
|
|
348
367
|
for args in args_list:
|
|
349
368
|
name = args.pop("name")
|
|
@@ -356,24 +375,6 @@ def parse_benchmark_args(
|
|
|
356
375
|
if args.multi_node and len(args.hosts.split(",")) < 2:
|
|
357
376
|
raise ValueError("--multi-node requires at least 2 hosts")
|
|
358
377
|
|
|
359
|
-
# Raise error early if "explicit-comms" is not allowed
|
|
360
|
-
if (
|
|
361
|
-
check_explicit_comms
|
|
362
|
-
and args.backend == "explicit-comms"
|
|
363
|
-
and config.get(
|
|
364
|
-
"dataframe.query-planning",
|
|
365
|
-
None,
|
|
366
|
-
)
|
|
367
|
-
is not False
|
|
368
|
-
):
|
|
369
|
-
raise NotImplementedError(
|
|
370
|
-
"The 'explicit-comms' config is not yet supported when "
|
|
371
|
-
"query-planning is enabled in dask. Please use the legacy "
|
|
372
|
-
"dask-dataframe API by setting the following environment "
|
|
373
|
-
"variable before executing:",
|
|
374
|
-
" DASK_DATAFRAME__QUERY_PLANNING=False",
|
|
375
|
-
)
|
|
376
|
-
|
|
377
378
|
return args
|
|
378
379
|
|
|
379
380
|
|
dask_cuda/cli.py
CHANGED
|
@@ -13,7 +13,7 @@ from distributed.security import Security
|
|
|
13
13
|
from distributed.utils import import_term
|
|
14
14
|
|
|
15
15
|
from .cuda_worker import CUDAWorker
|
|
16
|
-
from .utils import print_cluster_config
|
|
16
|
+
from .utils import CommaSeparatedChoice, print_cluster_config
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
@@ -164,6 +164,16 @@ def cuda():
|
|
|
164
164
|
incompatible with RMM pools and managed memory, trying to enable both will
|
|
165
165
|
result in failure.""",
|
|
166
166
|
)
|
|
167
|
+
@click.option(
|
|
168
|
+
"--set-rmm-allocator-for-libs",
|
|
169
|
+
"rmm_allocator_external_lib_list",
|
|
170
|
+
type=CommaSeparatedChoice(["cupy", "torch"]),
|
|
171
|
+
default=None,
|
|
172
|
+
show_default=True,
|
|
173
|
+
help="""
|
|
174
|
+
Set RMM as the allocator for external libraries. Provide a comma-separated
|
|
175
|
+
list of libraries to set, e.g., "torch,cupy".""",
|
|
176
|
+
)
|
|
167
177
|
@click.option(
|
|
168
178
|
"--rmm-release-threshold",
|
|
169
179
|
default=None,
|
|
@@ -351,6 +361,7 @@ def worker(
|
|
|
351
361
|
rmm_maximum_pool_size,
|
|
352
362
|
rmm_managed_memory,
|
|
353
363
|
rmm_async,
|
|
364
|
+
rmm_allocator_external_lib_list,
|
|
354
365
|
rmm_release_threshold,
|
|
355
366
|
rmm_log_directory,
|
|
356
367
|
rmm_track_allocations,
|
|
@@ -425,6 +436,7 @@ def worker(
|
|
|
425
436
|
rmm_maximum_pool_size,
|
|
426
437
|
rmm_managed_memory,
|
|
427
438
|
rmm_async,
|
|
439
|
+
rmm_allocator_external_lib_list,
|
|
428
440
|
rmm_release_threshold,
|
|
429
441
|
rmm_log_directory,
|
|
430
442
|
rmm_track_allocations,
|
dask_cuda/cuda_worker.py
CHANGED
|
@@ -47,6 +47,7 @@ class CUDAWorker(Server):
|
|
|
47
47
|
rmm_maximum_pool_size=None,
|
|
48
48
|
rmm_managed_memory=False,
|
|
49
49
|
rmm_async=False,
|
|
50
|
+
rmm_allocator_external_lib_list=None,
|
|
50
51
|
rmm_release_threshold=None,
|
|
51
52
|
rmm_log_directory=None,
|
|
52
53
|
rmm_track_allocations=False,
|
|
@@ -231,6 +232,7 @@ class CUDAWorker(Server):
|
|
|
231
232
|
release_threshold=rmm_release_threshold,
|
|
232
233
|
log_directory=rmm_log_directory,
|
|
233
234
|
track_allocations=rmm_track_allocations,
|
|
235
|
+
external_lib_list=rmm_allocator_external_lib_list,
|
|
234
236
|
),
|
|
235
237
|
PreImport(pre_import),
|
|
236
238
|
CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import functools
|
|
5
|
-
import inspect
|
|
6
4
|
from collections import defaultdict
|
|
7
5
|
from math import ceil
|
|
8
6
|
from operator import getitem
|
|
@@ -20,7 +18,7 @@ import distributed.worker
|
|
|
20
18
|
from dask.base import tokenize
|
|
21
19
|
from dask.dataframe import DataFrame, Series
|
|
22
20
|
from dask.dataframe.core import _concat as dd_concat
|
|
23
|
-
from dask.dataframe.
|
|
21
|
+
from dask.dataframe.dispatch import group_split_dispatch, hash_object_dispatch
|
|
24
22
|
from distributed import wait
|
|
25
23
|
from distributed.protocol import nested_deserialize, to_serialize
|
|
26
24
|
from distributed.worker import Worker
|
|
@@ -33,6 +31,20 @@ T = TypeVar("T")
|
|
|
33
31
|
Proxify = Callable[[T], T]
|
|
34
32
|
|
|
35
33
|
|
|
34
|
+
try:
|
|
35
|
+
from dask.dataframe import dask_expr
|
|
36
|
+
|
|
37
|
+
except ImportError:
|
|
38
|
+
# TODO: Remove when pinned to dask>2024.12.1
|
|
39
|
+
import dask_expr
|
|
40
|
+
|
|
41
|
+
if not dd._dask_expr_enabled():
|
|
42
|
+
raise ValueError(
|
|
43
|
+
"The legacy DataFrame API is not supported in dask_cudf>24.12. "
|
|
44
|
+
"Please enable query-planning, or downgrade to dask_cudf<=24.12"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
36
48
|
def get_proxify(worker: Worker) -> Proxify:
|
|
37
49
|
"""Get function to proxify objects"""
|
|
38
50
|
from dask_cuda.proxify_host_file import ProxifyHostFile
|
|
@@ -570,40 +582,48 @@ def _use_explicit_comms() -> bool:
|
|
|
570
582
|
return False
|
|
571
583
|
|
|
572
584
|
|
|
573
|
-
def
|
|
574
|
-
"""
|
|
585
|
+
def patch_shuffle_expression() -> None:
|
|
586
|
+
"""Patch Dasks Shuffle expression.
|
|
575
587
|
|
|
576
|
-
Notice, this is monkey patched into Dask at dask_cuda
|
|
588
|
+
Notice, this is monkey patched into Dask at dask_cuda
|
|
589
|
+
import, and it changes `Shuffle._layer` to lower into
|
|
590
|
+
an `ECShuffle` expression when the 'explicit-comms'
|
|
591
|
+
config is set to `True`.
|
|
577
592
|
"""
|
|
578
593
|
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
594
|
+
class ECShuffle(dask_expr._shuffle.TaskShuffle):
|
|
595
|
+
"""Explicit-Comms Shuffle Expression."""
|
|
596
|
+
|
|
597
|
+
def _layer(self):
|
|
598
|
+
# Execute an explicit-comms shuffle
|
|
599
|
+
if not hasattr(self, "_ec_shuffled"):
|
|
600
|
+
on = self.partitioning_index
|
|
601
|
+
df = dask_expr.new_collection(self.frame)
|
|
602
|
+
self._ec_shuffled = shuffle(
|
|
603
|
+
df,
|
|
604
|
+
[on] if isinstance(on, str) else on,
|
|
605
|
+
self.npartitions_out,
|
|
606
|
+
self.ignore_index,
|
|
607
|
+
)
|
|
608
|
+
graph = self._ec_shuffled.dask.copy()
|
|
609
|
+
shuffled_name = self._ec_shuffled._name
|
|
610
|
+
for i in range(self.npartitions_out):
|
|
611
|
+
graph[(self._name, i)] = graph[(shuffled_name, i)]
|
|
612
|
+
return graph
|
|
613
|
+
|
|
614
|
+
_base_lower = dask_expr._shuffle.Shuffle._lower
|
|
615
|
+
|
|
616
|
+
def _patched_lower(self):
|
|
617
|
+
if self.method in (None, "tasks") and _use_explicit_comms():
|
|
618
|
+
return ECShuffle(
|
|
619
|
+
self.frame,
|
|
620
|
+
self.partitioning_index,
|
|
621
|
+
self.npartitions_out,
|
|
622
|
+
self.ignore_index,
|
|
623
|
+
self.options,
|
|
624
|
+
self.original_partitioning_index,
|
|
625
|
+
)
|
|
626
|
+
else:
|
|
627
|
+
return _base_lower(self)
|
|
602
628
|
|
|
603
|
-
|
|
604
|
-
when explicit comms is enabled.
|
|
605
|
-
"""
|
|
606
|
-
ret = dask.config.get("dataframe.shuffle.algorithm", None)
|
|
607
|
-
if ret is None and _use_explicit_comms():
|
|
608
|
-
return "tasks"
|
|
609
|
-
return dask.utils.get_default_shuffle_method()
|
|
629
|
+
dask_expr._shuffle.Shuffle._lower = _patched_lower
|
dask_cuda/local_cuda_cluster.py
CHANGED
|
@@ -143,6 +143,11 @@ class LocalCUDACluster(LocalCluster):
|
|
|
143
143
|
The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
|
|
144
144
|
incompatible with RMM pools and managed memory. Trying to enable both will
|
|
145
145
|
result in an exception.
|
|
146
|
+
rmm_allocator_external_lib_list: str, list or None, default None
|
|
147
|
+
List of external libraries for which to set RMM as the allocator.
|
|
148
|
+
Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string
|
|
149
|
+
(like ``"torch,cupy"``) or a list of strings (like ``["torch", "cupy"]``).
|
|
150
|
+
If ``None``, no external libraries will use RMM as their allocator.
|
|
146
151
|
rmm_release_threshold: int, str or None, default None
|
|
147
152
|
When ``rmm.async is True`` and the pool size grows beyond this value, unused
|
|
148
153
|
memory held by the pool will be released at the next synchronization point.
|
|
@@ -231,6 +236,7 @@ class LocalCUDACluster(LocalCluster):
|
|
|
231
236
|
rmm_maximum_pool_size=None,
|
|
232
237
|
rmm_managed_memory=False,
|
|
233
238
|
rmm_async=False,
|
|
239
|
+
rmm_allocator_external_lib_list=None,
|
|
234
240
|
rmm_release_threshold=None,
|
|
235
241
|
rmm_log_directory=None,
|
|
236
242
|
rmm_track_allocations=False,
|
|
@@ -265,6 +271,19 @@ class LocalCUDACluster(LocalCluster):
|
|
|
265
271
|
n_workers = len(CUDA_VISIBLE_DEVICES)
|
|
266
272
|
if n_workers < 1:
|
|
267
273
|
raise ValueError("Number of workers cannot be less than 1.")
|
|
274
|
+
|
|
275
|
+
if rmm_allocator_external_lib_list is not None:
|
|
276
|
+
if isinstance(rmm_allocator_external_lib_list, str):
|
|
277
|
+
rmm_allocator_external_lib_list = [
|
|
278
|
+
v.strip() for v in rmm_allocator_external_lib_list.split(",")
|
|
279
|
+
]
|
|
280
|
+
elif not isinstance(rmm_allocator_external_lib_list, list):
|
|
281
|
+
raise ValueError(
|
|
282
|
+
"rmm_allocator_external_lib_list must be either a comma-separated "
|
|
283
|
+
"string or a list of strings. Examples: 'torch,cupy' "
|
|
284
|
+
"or ['torch', 'cupy']"
|
|
285
|
+
)
|
|
286
|
+
|
|
268
287
|
# Set nthreads=1 when parsing mem_limit since it only depends on n_workers
|
|
269
288
|
logger = logging.getLogger(__name__)
|
|
270
289
|
self.memory_limit = parse_memory_limit(
|
|
@@ -284,6 +303,8 @@ class LocalCUDACluster(LocalCluster):
|
|
|
284
303
|
self.rmm_managed_memory = rmm_managed_memory
|
|
285
304
|
self.rmm_async = rmm_async
|
|
286
305
|
self.rmm_release_threshold = rmm_release_threshold
|
|
306
|
+
self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list
|
|
307
|
+
|
|
287
308
|
if rmm_pool_size is not None or rmm_managed_memory or rmm_async:
|
|
288
309
|
try:
|
|
289
310
|
import rmm # noqa F401
|
|
@@ -437,6 +458,7 @@ class LocalCUDACluster(LocalCluster):
|
|
|
437
458
|
release_threshold=self.rmm_release_threshold,
|
|
438
459
|
log_directory=self.rmm_log_directory,
|
|
439
460
|
track_allocations=self.rmm_track_allocations,
|
|
461
|
+
external_lib_list=self.rmm_allocator_external_lib_list,
|
|
440
462
|
),
|
|
441
463
|
PreImport(self.pre_import),
|
|
442
464
|
CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
|
dask_cuda/plugins.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import importlib
|
|
2
|
+
import logging
|
|
2
3
|
import os
|
|
4
|
+
from typing import Callable, Dict
|
|
3
5
|
|
|
4
6
|
from distributed import WorkerPlugin
|
|
5
7
|
|
|
@@ -11,7 +13,15 @@ class CPUAffinity(WorkerPlugin):
|
|
|
11
13
|
self.cores = cores
|
|
12
14
|
|
|
13
15
|
def setup(self, worker=None):
|
|
14
|
-
|
|
16
|
+
try:
|
|
17
|
+
os.sched_setaffinity(0, self.cores)
|
|
18
|
+
except Exception:
|
|
19
|
+
logger = logging.getLogger("distributed.worker")
|
|
20
|
+
logger.warning(
|
|
21
|
+
"Setting CPU affinity for GPU failed. Please refer to the following "
|
|
22
|
+
"link for troubleshooting information: "
|
|
23
|
+
"https://docs.rapids.ai/api/dask-cuda/nightly/troubleshooting/#setting-cpu-affinity-failure" # noqa: E501
|
|
24
|
+
)
|
|
15
25
|
|
|
16
26
|
|
|
17
27
|
class CUDFSetup(WorkerPlugin):
|
|
@@ -39,6 +49,7 @@ class RMMSetup(WorkerPlugin):
|
|
|
39
49
|
release_threshold,
|
|
40
50
|
log_directory,
|
|
41
51
|
track_allocations,
|
|
52
|
+
external_lib_list,
|
|
42
53
|
):
|
|
43
54
|
if initial_pool_size is None and maximum_pool_size is not None:
|
|
44
55
|
raise ValueError(
|
|
@@ -61,6 +72,7 @@ class RMMSetup(WorkerPlugin):
|
|
|
61
72
|
self.logging = log_directory is not None
|
|
62
73
|
self.log_directory = log_directory
|
|
63
74
|
self.rmm_track_allocations = track_allocations
|
|
75
|
+
self.external_lib_list = external_lib_list
|
|
64
76
|
|
|
65
77
|
def setup(self, worker=None):
|
|
66
78
|
if self.initial_pool_size is not None:
|
|
@@ -123,6 +135,70 @@ class RMMSetup(WorkerPlugin):
|
|
|
123
135
|
mr = rmm.mr.get_current_device_resource()
|
|
124
136
|
rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr))
|
|
125
137
|
|
|
138
|
+
if self.external_lib_list is not None:
|
|
139
|
+
for lib in self.external_lib_list:
|
|
140
|
+
enable_rmm_memory_for_library(lib)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def enable_rmm_memory_for_library(lib_name: str) -> None:
|
|
144
|
+
"""Enable RMM memory pool support for a specified third-party library.
|
|
145
|
+
|
|
146
|
+
This function allows the given library to utilize RMM's memory pool if it supports
|
|
147
|
+
integration with RMM. The library name is passed as a string argument, and if the
|
|
148
|
+
library is compatible, its memory allocator will be configured to use RMM.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
lib_name : str
|
|
153
|
+
The name of the third-party library to enable RMM memory pool support for.
|
|
154
|
+
Supported libraries are "cupy" and "torch".
|
|
155
|
+
|
|
156
|
+
Raises
|
|
157
|
+
------
|
|
158
|
+
ValueError
|
|
159
|
+
If the library name is not supported or does not have RMM integration.
|
|
160
|
+
ImportError
|
|
161
|
+
If the required library is not installed.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
# Mapping of supported libraries to their respective setup functions
|
|
165
|
+
setup_functions: Dict[str, Callable[[], None]] = {
|
|
166
|
+
"torch": _setup_rmm_for_torch,
|
|
167
|
+
"cupy": _setup_rmm_for_cupy,
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if lib_name not in setup_functions:
|
|
171
|
+
supported_libs = ", ".join(setup_functions.keys())
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"The library '{lib_name}' is not supported for RMM integration. "
|
|
174
|
+
f"Supported libraries are: {supported_libs}."
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Call the setup function for the specified library
|
|
178
|
+
setup_functions[lib_name]()
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _setup_rmm_for_torch() -> None:
|
|
182
|
+
try:
|
|
183
|
+
import torch
|
|
184
|
+
except ImportError as e:
|
|
185
|
+
raise ImportError("PyTorch is not installed.") from e
|
|
186
|
+
|
|
187
|
+
from rmm.allocators.torch import rmm_torch_allocator
|
|
188
|
+
|
|
189
|
+
torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _setup_rmm_for_cupy() -> None:
|
|
193
|
+
try:
|
|
194
|
+
import cupy
|
|
195
|
+
except ImportError as e:
|
|
196
|
+
raise ImportError("CuPy is not installed.") from e
|
|
197
|
+
|
|
198
|
+
from rmm.allocators.cupy import rmm_cupy_allocator
|
|
199
|
+
|
|
200
|
+
cupy.cuda.set_allocator(rmm_cupy_allocator)
|
|
201
|
+
|
|
126
202
|
|
|
127
203
|
class PreImport(WorkerPlugin):
|
|
128
204
|
def __init__(self, libraries):
|
dask_cuda/proxy_object.py
CHANGED
|
@@ -12,7 +12,8 @@ import pandas
|
|
|
12
12
|
|
|
13
13
|
import dask
|
|
14
14
|
import dask.array.core
|
|
15
|
-
import dask.dataframe.
|
|
15
|
+
import dask.dataframe.backends
|
|
16
|
+
import dask.dataframe.dispatch
|
|
16
17
|
import dask.dataframe.utils
|
|
17
18
|
import dask.utils
|
|
18
19
|
import distributed.protocol
|
|
@@ -22,16 +23,6 @@ from distributed.protocol.compression import decompress
|
|
|
22
23
|
|
|
23
24
|
from dask_cuda.disk_io import disk_read
|
|
24
25
|
|
|
25
|
-
try:
|
|
26
|
-
from dask.dataframe.backends import concat_pandas
|
|
27
|
-
except ImportError:
|
|
28
|
-
from dask.dataframe.methods import concat_pandas
|
|
29
|
-
|
|
30
|
-
try:
|
|
31
|
-
from dask.dataframe.dispatch import make_meta_dispatch as make_meta_dispatch
|
|
32
|
-
except ImportError:
|
|
33
|
-
from dask.dataframe.utils import make_meta as make_meta_dispatch
|
|
34
|
-
|
|
35
26
|
from .disk_io import SpillToDiskFile
|
|
36
27
|
from .is_device_object import is_device_object
|
|
37
28
|
|
|
@@ -893,10 +884,12 @@ def obj_pxy_dask_deserialize(header, frames):
|
|
|
893
884
|
return subclass(pxy)
|
|
894
885
|
|
|
895
886
|
|
|
896
|
-
@dask.dataframe.
|
|
887
|
+
@dask.dataframe.dispatch.get_parallel_type.register(ProxyObject)
|
|
897
888
|
def get_parallel_type_proxy_object(obj: ProxyObject):
|
|
898
889
|
# Notice, `get_parallel_type()` needs a instance not a type object
|
|
899
|
-
return dask.dataframe.
|
|
890
|
+
return dask.dataframe.dispatch.get_parallel_type(
|
|
891
|
+
obj.__class__.__new__(obj.__class__)
|
|
892
|
+
)
|
|
900
893
|
|
|
901
894
|
|
|
902
895
|
def unproxify_input_wrapper(func):
|
|
@@ -913,24 +906,24 @@ def unproxify_input_wrapper(func):
|
|
|
913
906
|
|
|
914
907
|
# Register dispatch of ProxyObject on all known dispatch objects
|
|
915
908
|
for dispatch in (
|
|
916
|
-
dask.dataframe.
|
|
917
|
-
make_meta_dispatch,
|
|
909
|
+
dask.dataframe.dispatch.hash_object_dispatch,
|
|
910
|
+
dask.dataframe.dispatch.make_meta_dispatch,
|
|
918
911
|
dask.dataframe.utils.make_scalar,
|
|
919
|
-
dask.dataframe.
|
|
912
|
+
dask.dataframe.dispatch.group_split_dispatch,
|
|
920
913
|
dask.array.core.tensordot_lookup,
|
|
921
914
|
dask.array.core.einsum_lookup,
|
|
922
915
|
dask.array.core.concatenate_lookup,
|
|
923
916
|
):
|
|
924
917
|
dispatch.register(ProxyObject, unproxify_input_wrapper(dispatch))
|
|
925
918
|
|
|
926
|
-
dask.dataframe.
|
|
927
|
-
ProxyObject, unproxify_input_wrapper(dask.dataframe.
|
|
919
|
+
dask.dataframe.dispatch.concat_dispatch.register(
|
|
920
|
+
ProxyObject, unproxify_input_wrapper(dask.dataframe.dispatch.concat)
|
|
928
921
|
)
|
|
929
922
|
|
|
930
923
|
|
|
931
924
|
# We overwrite the Dask dispatch of Pandas objects in order to
|
|
932
925
|
# deserialize all ProxyObjects before concatenating
|
|
933
|
-
dask.dataframe.
|
|
926
|
+
dask.dataframe.dispatch.concat_dispatch.register(
|
|
934
927
|
(pandas.DataFrame, pandas.Series, pandas.Index),
|
|
935
|
-
unproxify_input_wrapper(concat_pandas),
|
|
928
|
+
unproxify_input_wrapper(dask.dataframe.backends.concat_pandas),
|
|
936
929
|
)
|
|
@@ -320,6 +320,7 @@ def test_unknown_argument():
|
|
|
320
320
|
assert b"Scheduler address: --my-argument" in ret.stderr
|
|
321
321
|
|
|
322
322
|
|
|
323
|
+
@pytest.mark.xfail(reason="https://github.com/rapidsai/dask-cuda/issues/1441")
|
|
323
324
|
@patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
|
|
324
325
|
def test_pre_import(loop): # noqa: F811
|
|
325
326
|
module = None
|
|
@@ -25,16 +25,6 @@ from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
|
|
|
25
25
|
mp = mp.get_context("spawn") # type: ignore
|
|
26
26
|
ucp = pytest.importorskip("ucp")
|
|
27
27
|
|
|
28
|
-
QUERY_PLANNING_ON = dask.config.get("dataframe.query-planning", None) is not False
|
|
29
|
-
|
|
30
|
-
# Skip these tests when dask-expr is active (for now)
|
|
31
|
-
query_planning_skip = pytest.mark.skipif(
|
|
32
|
-
QUERY_PLANNING_ON,
|
|
33
|
-
reason=(
|
|
34
|
-
"The 'explicit-comms' config is not supported "
|
|
35
|
-
"when query planning is enabled."
|
|
36
|
-
),
|
|
37
|
-
)
|
|
38
28
|
|
|
39
29
|
# Set default shuffle method to "tasks"
|
|
40
30
|
if dask.config.get("dataframe.shuffle.method", None) is None:
|
|
@@ -98,7 +88,6 @@ def _test_dataframe_merge_empty_partitions(nrows, npartitions):
|
|
|
98
88
|
pd.testing.assert_frame_equal(got, expected)
|
|
99
89
|
|
|
100
90
|
|
|
101
|
-
@query_planning_skip
|
|
102
91
|
def test_dataframe_merge_empty_partitions():
|
|
103
92
|
# Notice, we use more partitions than rows
|
|
104
93
|
p = mp.Process(target=_test_dataframe_merge_empty_partitions, args=(2, 4))
|
|
@@ -250,7 +239,7 @@ def _test_dask_use_explicit_comms(in_cluster):
|
|
|
250
239
|
):
|
|
251
240
|
dask.config.refresh() # Trigger re-read of the environment variables
|
|
252
241
|
with pytest.raises(ValueError, match="explicit-comms-batchsize"):
|
|
253
|
-
ddf.shuffle(on="key", npartitions=4)
|
|
242
|
+
ddf.shuffle(on="key", npartitions=4).dask
|
|
254
243
|
|
|
255
244
|
if in_cluster:
|
|
256
245
|
with LocalCluster(
|
|
@@ -267,7 +256,6 @@ def _test_dask_use_explicit_comms(in_cluster):
|
|
|
267
256
|
check_shuffle()
|
|
268
257
|
|
|
269
258
|
|
|
270
|
-
@query_planning_skip
|
|
271
259
|
@pytest.mark.parametrize("in_cluster", [True, False])
|
|
272
260
|
def test_dask_use_explicit_comms(in_cluster):
|
|
273
261
|
def _timeout(process, function, timeout):
|
|
@@ -330,7 +318,6 @@ def _test_dataframe_shuffle_merge(backend, protocol, n_workers):
|
|
|
330
318
|
assert_eq(got, expected)
|
|
331
319
|
|
|
332
320
|
|
|
333
|
-
@query_planning_skip
|
|
334
321
|
@pytest.mark.parametrize("nworkers", [1, 2, 4])
|
|
335
322
|
@pytest.mark.parametrize("backend", ["pandas", "cudf"])
|
|
336
323
|
@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
|
dask_cuda/tests/test_proxy.py
CHANGED
|
@@ -504,27 +504,27 @@ def test_pandas():
|
|
|
504
504
|
df1 = pandas.DataFrame({"a": range(10)})
|
|
505
505
|
df2 = pandas.DataFrame({"a": range(10)})
|
|
506
506
|
|
|
507
|
-
res = dask.dataframe.
|
|
508
|
-
got = dask.dataframe.
|
|
507
|
+
res = dask.dataframe.dispatch.concat([df1, df2])
|
|
508
|
+
got = dask.dataframe.dispatch.concat([df1, df2])
|
|
509
509
|
assert_frame_equal(res, got)
|
|
510
510
|
|
|
511
|
-
got = dask.dataframe.
|
|
511
|
+
got = dask.dataframe.dispatch.concat([proxy_object.asproxy(df1), df2])
|
|
512
512
|
assert_frame_equal(res, got)
|
|
513
513
|
|
|
514
|
-
got = dask.dataframe.
|
|
514
|
+
got = dask.dataframe.dispatch.concat([df1, proxy_object.asproxy(df2)])
|
|
515
515
|
assert_frame_equal(res, got)
|
|
516
516
|
|
|
517
517
|
df1 = pandas.Series(range(10))
|
|
518
518
|
df2 = pandas.Series(range(10))
|
|
519
519
|
|
|
520
|
-
res = dask.dataframe.
|
|
521
|
-
got = dask.dataframe.
|
|
520
|
+
res = dask.dataframe.dispatch.concat([df1, df2])
|
|
521
|
+
got = dask.dataframe.dispatch.concat([df1, df2])
|
|
522
522
|
assert all(res == got)
|
|
523
523
|
|
|
524
|
-
got = dask.dataframe.
|
|
524
|
+
got = dask.dataframe.dispatch.concat([proxy_object.asproxy(df1), df2])
|
|
525
525
|
assert all(res == got)
|
|
526
526
|
|
|
527
|
-
got = dask.dataframe.
|
|
527
|
+
got = dask.dataframe.dispatch.concat([df1, proxy_object.asproxy(df2)])
|
|
528
528
|
assert all(res == got)
|
|
529
529
|
|
|
530
530
|
|
dask_cuda/tests/test_utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from unittest.mock import patch
|
|
3
3
|
|
|
4
|
+
import pynvml
|
|
4
5
|
import pytest
|
|
5
6
|
from numba import cuda
|
|
6
7
|
|
|
@@ -197,7 +198,6 @@ def test_get_ucx_config(enable_tcp_over_ucx, enable_infiniband, enable_nvlink):
|
|
|
197
198
|
|
|
198
199
|
|
|
199
200
|
def test_parse_visible_devices():
|
|
200
|
-
pynvml = pytest.importorskip("pynvml")
|
|
201
201
|
pynvml.nvmlInit()
|
|
202
202
|
indices = []
|
|
203
203
|
uuids = []
|
|
@@ -250,7 +250,6 @@ def test_parse_device_memory_limit():
|
|
|
250
250
|
|
|
251
251
|
|
|
252
252
|
def test_parse_visible_mig_devices():
|
|
253
|
-
pynvml = pytest.importorskip("pynvml")
|
|
254
253
|
pynvml.nvmlInit()
|
|
255
254
|
for index in range(get_gpu_count()):
|
|
256
255
|
handle = pynvml.nvmlDeviceGetHandleByIndex(index)
|
dask_cuda/utils.py
CHANGED
|
@@ -9,6 +9,7 @@ from functools import singledispatch
|
|
|
9
9
|
from multiprocessing import cpu_count
|
|
10
10
|
from typing import Optional
|
|
11
11
|
|
|
12
|
+
import click
|
|
12
13
|
import numpy as np
|
|
13
14
|
import pynvml
|
|
14
15
|
import toolz
|
|
@@ -764,3 +765,13 @@ def get_rmm_device_memory_usage() -> Optional[int]:
|
|
|
764
765
|
if isinstance(mr, rmm.mr.StatisticsResourceAdaptor):
|
|
765
766
|
return mr.allocation_counts["current_bytes"]
|
|
766
767
|
return None
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
class CommaSeparatedChoice(click.Choice):
|
|
771
|
+
def convert(self, value, param, ctx):
|
|
772
|
+
values = [v.strip() for v in value.split(",")]
|
|
773
|
+
for v in values:
|
|
774
|
+
if v not in self.choices:
|
|
775
|
+
choices_str = ", ".join(f"'{c}'" for c in self.choices)
|
|
776
|
+
self.fail(f"invalid choice(s): {v}. (choices are: {choices_str})")
|
|
777
|
+
return values
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: dask-cuda
|
|
3
|
-
Version:
|
|
3
|
+
Version: 25.2.0
|
|
4
4
|
Summary: Utilities for Dask and CUDA interactions
|
|
5
5
|
Author: NVIDIA Corporation
|
|
6
6
|
License: Apache 2.0
|
|
@@ -19,24 +19,17 @@ Requires-Python: >=3.10
|
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
21
|
Requires-Dist: click>=8.1
|
|
22
|
-
Requires-Dist: numba
|
|
22
|
+
Requires-Dist: numba<0.61.0a0,>=0.59.1
|
|
23
23
|
Requires-Dist: numpy<3.0a0,>=1.23
|
|
24
24
|
Requires-Dist: pandas>=1.3
|
|
25
|
-
Requires-Dist: pynvml<
|
|
26
|
-
Requires-Dist: rapids-dask-dependency==
|
|
25
|
+
Requires-Dist: pynvml<13.0.0a0,>=12.0.0
|
|
26
|
+
Requires-Dist: rapids-dask-dependency==25.2.*
|
|
27
27
|
Requires-Dist: zict>=2.0.0
|
|
28
28
|
Provides-Extra: docs
|
|
29
29
|
Requires-Dist: numpydoc>=1.1.0; extra == "docs"
|
|
30
30
|
Requires-Dist: sphinx; extra == "docs"
|
|
31
31
|
Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
|
|
32
32
|
Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
|
|
33
|
-
Provides-Extra: test
|
|
34
|
-
Requires-Dist: cudf==24.10.*; extra == "test"
|
|
35
|
-
Requires-Dist: dask-cudf==24.10.*; extra == "test"
|
|
36
|
-
Requires-Dist: kvikio==24.10.*; extra == "test"
|
|
37
|
-
Requires-Dist: pytest; extra == "test"
|
|
38
|
-
Requires-Dist: pytest-cov; extra == "test"
|
|
39
|
-
Requires-Dist: ucx-py==0.40.*; extra == "test"
|
|
40
33
|
|
|
41
34
|
Dask CUDA
|
|
42
35
|
=========
|
|
@@ -1,55 +1,55 @@
|
|
|
1
|
-
dask_cuda/VERSION,sha256=
|
|
2
|
-
dask_cuda/__init__.py,sha256=
|
|
1
|
+
dask_cuda/VERSION,sha256=gWnOsR7j8lHNsXJO_balY3FJzbDTto6xlQk1ItvppEY,8
|
|
2
|
+
dask_cuda/__init__.py,sha256=YMnNzbZ1pDrsbgBc9ipsxBQyYn72IzoHAei4_imXHok,1665
|
|
3
3
|
dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
|
|
4
|
-
dask_cuda/cli.py,sha256=
|
|
5
|
-
dask_cuda/cuda_worker.py,sha256=
|
|
4
|
+
dask_cuda/cli.py,sha256=cScVyNiA_l9uXeDgkIcmbcR4l4cH1_1shqSqsVmuHPE,17053
|
|
5
|
+
dask_cuda/cuda_worker.py,sha256=rZ1ITG_ZCbuaMA9e8uSqCjU8Km4AMphGGrxpBPQG8xU,9477
|
|
6
6
|
dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
|
|
7
7
|
dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
|
|
8
8
|
dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
|
|
9
9
|
dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
|
|
10
10
|
dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
|
|
11
11
|
dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
|
|
12
|
-
dask_cuda/local_cuda_cluster.py,sha256=
|
|
13
|
-
dask_cuda/plugins.py,sha256=
|
|
12
|
+
dask_cuda/local_cuda_cluster.py,sha256=wqwKVRV6jT13sf9e-XsvbVBlTrnhmcbmHQBFPTFcayw,20335
|
|
13
|
+
dask_cuda/plugins.py,sha256=A2aT8HA6q_JhIEx6-XKcpbWEbl7aTg1GNoZQH8_vh00,7197
|
|
14
14
|
dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
|
|
15
15
|
dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
|
|
16
|
-
dask_cuda/proxy_object.py,sha256=
|
|
17
|
-
dask_cuda/utils.py,sha256=
|
|
16
|
+
dask_cuda/proxy_object.py,sha256=Zwn3mUIS_6NqNvPRTbcu6auXTQpEs8Tc-pc3_kfBBNY,29616
|
|
17
|
+
dask_cuda/utils.py,sha256=Goq-m78rYZ-bcJitg47N1h_PC4PDuzXG0CUVH7V8azU,25515
|
|
18
18
|
dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
|
|
19
19
|
dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
|
|
20
20
|
dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
dask_cuda/benchmarks/common.py,sha256=
|
|
22
|
-
dask_cuda/benchmarks/local_cudf_groupby.py,sha256=
|
|
23
|
-
dask_cuda/benchmarks/local_cudf_merge.py,sha256=
|
|
24
|
-
dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=
|
|
25
|
-
dask_cuda/benchmarks/local_cupy.py,sha256=
|
|
26
|
-
dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=
|
|
27
|
-
dask_cuda/benchmarks/read_parquet.py,sha256=
|
|
28
|
-
dask_cuda/benchmarks/utils.py,sha256=
|
|
21
|
+
dask_cuda/benchmarks/common.py,sha256=YFhxBYkoxIV-2mddSbLwTbyg67U4zXDd2_fFq9oP3_A,6922
|
|
22
|
+
dask_cuda/benchmarks/local_cudf_groupby.py,sha256=zrDiF-yBAUxVt9mWOTH5hUm-pb-XnVX-G9gvCEX7_GI,8512
|
|
23
|
+
dask_cuda/benchmarks/local_cudf_merge.py,sha256=Q7lnZ87-O7j28hkS-i_5hMApTX8VsuI4ftZf2XAnp1E,12195
|
|
24
|
+
dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=Ied7r_fdGuOJyikBVVkMaIX3niJIlF39C1Xk6IVwgo4,8240
|
|
25
|
+
dask_cuda/benchmarks/local_cupy.py,sha256=RCxQJd88bn3vyMAJDPK3orUpxzvDZY957wOSYkfriq0,10323
|
|
26
|
+
dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=YAllGFuG6MePfPL8gdZ-Ld7a44-G0eEaHZJWB4vFPdY,6017
|
|
27
|
+
dask_cuda/benchmarks/read_parquet.py,sha256=spKu6RLWYngPZq9hnaoU0mz7INIaJnErfqjBG2wH8Zc,7614
|
|
28
|
+
dask_cuda/benchmarks/utils.py,sha256=_x0XXL_F3W-fExpuQfTBwuK3WnrVuXQQepbnvjUqS9o,30075
|
|
29
29
|
dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
30
|
dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
|
|
31
31
|
dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
-
dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=
|
|
32
|
+
dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=g9xDyFKmblEuevZt5Drh66uMLw-LUNOI8CIucDdACmY,21231
|
|
33
33
|
dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
|
|
34
|
-
dask_cuda/tests/test_dask_cuda_worker.py,sha256=
|
|
34
|
+
dask_cuda/tests/test_dask_cuda_worker.py,sha256=C1emlr47yGa3TdSSlAXJRzguY4bcH74htk21x9th7nQ,20556
|
|
35
35
|
dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
|
|
36
36
|
dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
|
|
37
|
-
dask_cuda/tests/test_explicit_comms.py,sha256=
|
|
37
|
+
dask_cuda/tests/test_explicit_comms.py,sha256=F4_84bx3ODO8Q6ERHXFJF31uXj4gehLwKQncT1evbuM,14895
|
|
38
38
|
dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
|
|
39
39
|
dask_cuda/tests/test_gds.py,sha256=j1Huud6UGm1fbkyRLQEz_ysrVw__5AimwSn_M-2GEvs,1513
|
|
40
40
|
dask_cuda/tests/test_initialize.py,sha256=Rba59ZbljEm1yyN94_sWZPEE_f7hWln95aiBVc49pmY,6960
|
|
41
41
|
dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
|
|
42
42
|
dask_cuda/tests/test_proxify_host_file.py,sha256=LC3jjo_gbfhdIy1Zy_ynmgyv31HXFoBINCe1-XXZ4XU,18994
|
|
43
|
-
dask_cuda/tests/test_proxy.py,sha256=
|
|
43
|
+
dask_cuda/tests/test_proxy.py,sha256=U9uE-QesTwquNKzTReEKiYgoRgS_pfGW-A-gJNppHyg,23817
|
|
44
44
|
dask_cuda/tests/test_spill.py,sha256=CYMbp5HDBYlZ7T_n8RfSOZxaWFcAQKjprjRM7Wupcdw,13419
|
|
45
|
-
dask_cuda/tests/test_utils.py,sha256=
|
|
45
|
+
dask_cuda/tests/test_utils.py,sha256=PQI_oTONWnKSKlkQfEeK-vlmYa0-cPpDjDEbm74cNCE,9104
|
|
46
46
|
dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
|
|
47
47
|
dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
|
|
48
48
|
examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
|
|
49
49
|
examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
|
|
50
|
-
dask_cuda-
|
|
51
|
-
dask_cuda-
|
|
52
|
-
dask_cuda-
|
|
53
|
-
dask_cuda-
|
|
54
|
-
dask_cuda-
|
|
55
|
-
dask_cuda-
|
|
50
|
+
dask_cuda-25.2.0.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
|
|
51
|
+
dask_cuda-25.2.0.dist-info/METADATA,sha256=c6G1F5I_jeDlfXs42b9LSwtVc5HvUtfyEgZ6cLiw7fM,2272
|
|
52
|
+
dask_cuda-25.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
53
|
+
dask_cuda-25.2.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
|
|
54
|
+
dask_cuda-25.2.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
|
|
55
|
+
dask_cuda-25.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|