dask-cuda 24.8.2__py3-none-any.whl → 24.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_cuda/VERSION +1 -1
- dask_cuda/__init__.py +19 -0
- dask_cuda/benchmarks/common.py +15 -6
- dask_cuda/benchmarks/local_cudf_groupby.py +4 -18
- dask_cuda/benchmarks/local_cudf_merge.py +2 -15
- dask_cuda/benchmarks/local_cudf_shuffle.py +3 -17
- dask_cuda/benchmarks/local_cupy.py +5 -19
- dask_cuda/benchmarks/local_cupy_map_overlap.py +5 -19
- dask_cuda/benchmarks/read_parquet.py +268 -0
- dask_cuda/benchmarks/utils.py +30 -2
- dask_cuda/cli.py +18 -5
- dask_cuda/cuda_worker.py +10 -0
- dask_cuda/local_cuda_cluster.py +29 -0
- dask_cuda/plugins.py +67 -0
- dask_cuda/tests/test_dask_cuda_worker.py +27 -0
- dask_cuda/tests/test_gds.py +1 -1
- dask_cuda/tests/test_proxify_host_file.py +1 -1
- dask_cuda/tests/test_proxy.py +5 -5
- dask_cuda/tests/test_spill.py +116 -16
- dask_cuda/utils.py +11 -0
- {dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/METADATA +10 -10
- {dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/RECORD +26 -25
- {dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/WHEEL +1 -1
- {dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/LICENSE +0 -0
- {dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/entry_points.txt +0 -0
- {dask_cuda-24.8.2.dist-info → dask_cuda-24.12.0.dist-info}/top_level.txt +0 -0
dask_cuda/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
24.
|
|
1
|
+
24.12.00
|
dask_cuda/__init__.py
CHANGED
|
@@ -9,6 +9,8 @@ import dask.dataframe.core
|
|
|
9
9
|
import dask.dataframe.shuffle
|
|
10
10
|
import dask.dataframe.multi
|
|
11
11
|
import dask.bag.core
|
|
12
|
+
from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
|
|
13
|
+
from distributed.protocol.serialize import dask_deserialize, dask_serialize
|
|
12
14
|
|
|
13
15
|
from ._version import __git_commit__, __version__
|
|
14
16
|
from .cuda_worker import CUDAWorker
|
|
@@ -48,3 +50,20 @@ dask.dataframe.shuffle.shuffle_group = proxify_decorator(
|
|
|
48
50
|
dask.dataframe.shuffle.shuffle_group
|
|
49
51
|
)
|
|
50
52
|
dask.dataframe.core._concat = unproxify_decorator(dask.dataframe.core._concat)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _register_cudf_spill_aware():
|
|
56
|
+
import cudf
|
|
57
|
+
|
|
58
|
+
# Only enable Dask/cuDF spilling if cuDF spilling is disabled, see
|
|
59
|
+
# https://github.com/rapidsai/dask-cuda/issues/1363
|
|
60
|
+
if not cudf.get_option("spill"):
|
|
61
|
+
# This reproduces the implementation of `_register_cudf`, see
|
|
62
|
+
# https://github.com/dask/distributed/blob/40fcd65e991382a956c3b879e438be1b100dff97/distributed/protocol/__init__.py#L106-L115
|
|
63
|
+
from cudf.comm import serialize
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
for registry in [cuda_serialize, cuda_deserialize, dask_serialize, dask_deserialize]:
|
|
67
|
+
for lib in ["cudf", "dask_cudf"]:
|
|
68
|
+
if lib in registry._lazy:
|
|
69
|
+
registry._lazy[lib] = _register_cudf_spill_aware
|
dask_cuda/benchmarks/common.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
from argparse import Namespace
|
|
2
3
|
from functools import partial
|
|
3
4
|
from typing import Any, Callable, List, Mapping, NamedTuple, Optional, Tuple
|
|
@@ -7,7 +8,7 @@ import numpy as np
|
|
|
7
8
|
import pandas as pd
|
|
8
9
|
|
|
9
10
|
import dask
|
|
10
|
-
from distributed import Client
|
|
11
|
+
from distributed import Client, performance_report
|
|
11
12
|
|
|
12
13
|
from dask_cuda.benchmarks.utils import (
|
|
13
14
|
address_to_index,
|
|
@@ -87,12 +88,20 @@ def run_benchmark(client: Client, args: Namespace, config: Config):
|
|
|
87
88
|
|
|
88
89
|
If ``args.profile`` is set, the final run is profiled.
|
|
89
90
|
"""
|
|
91
|
+
|
|
90
92
|
results = []
|
|
91
|
-
for _ in range(max(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
93
|
+
for _ in range(max(0, args.warmup_runs)):
|
|
94
|
+
config.bench_once(client, args, write_profile=None)
|
|
95
|
+
|
|
96
|
+
ctx = contextlib.nullcontext()
|
|
97
|
+
if args.profile is not None:
|
|
98
|
+
ctx = performance_report(filename=args.profile)
|
|
99
|
+
with ctx:
|
|
100
|
+
for _ in range(max(1, args.runs) - 1):
|
|
101
|
+
res = config.bench_once(client, args, write_profile=None)
|
|
102
|
+
results.append(res)
|
|
103
|
+
results.append(config.bench_once(client, args, write_profile=args.profile_last))
|
|
104
|
+
return results
|
|
96
105
|
|
|
97
106
|
|
|
98
107
|
def gather_bench_results(client: Client, args: Namespace, config: Config):
|
|
@@ -7,7 +7,7 @@ import pandas as pd
|
|
|
7
7
|
import dask
|
|
8
8
|
import dask.dataframe as dd
|
|
9
9
|
from dask.distributed import performance_report, wait
|
|
10
|
-
from dask.utils import format_bytes
|
|
10
|
+
from dask.utils import format_bytes
|
|
11
11
|
|
|
12
12
|
from dask_cuda.benchmarks.common import Config, execute_benchmark
|
|
13
13
|
from dask_cuda.benchmarks.utils import (
|
|
@@ -98,10 +98,9 @@ def bench_once(client, args, write_profile=None):
|
|
|
98
98
|
"False": False,
|
|
99
99
|
}.get(args.shuffle, args.shuffle)
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
ctx = performance_report(filename=args.profile)
|
|
101
|
+
ctx = contextlib.nullcontext()
|
|
102
|
+
if write_profile is not None:
|
|
103
|
+
ctx = performance_report(filename=write_profile)
|
|
105
104
|
|
|
106
105
|
with ctx:
|
|
107
106
|
t1 = clock()
|
|
@@ -260,19 +259,6 @@ def parse_args():
|
|
|
260
259
|
"type": str,
|
|
261
260
|
"help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
|
|
262
261
|
},
|
|
263
|
-
{
|
|
264
|
-
"name": "--ignore-size",
|
|
265
|
-
"default": "1 MiB",
|
|
266
|
-
"metavar": "nbytes",
|
|
267
|
-
"type": parse_bytes,
|
|
268
|
-
"help": "Ignore messages smaller than this (default '1 MB')",
|
|
269
|
-
},
|
|
270
|
-
{
|
|
271
|
-
"name": "--runs",
|
|
272
|
-
"default": 3,
|
|
273
|
-
"type": int,
|
|
274
|
-
"help": "Number of runs",
|
|
275
|
-
},
|
|
276
262
|
]
|
|
277
263
|
|
|
278
264
|
return parse_benchmark_args(
|
|
@@ -9,7 +9,7 @@ import pandas as pd
|
|
|
9
9
|
import dask
|
|
10
10
|
import dask.dataframe as dd
|
|
11
11
|
from dask.distributed import performance_report, wait
|
|
12
|
-
from dask.utils import format_bytes
|
|
12
|
+
from dask.utils import format_bytes
|
|
13
13
|
|
|
14
14
|
from dask_cuda.benchmarks.common import Config, execute_benchmark
|
|
15
15
|
from dask_cuda.benchmarks.utils import (
|
|
@@ -190,7 +190,7 @@ def bench_once(client, args, write_profile=None):
|
|
|
190
190
|
if args.backend == "explicit-comms":
|
|
191
191
|
ctx1 = dask.config.set(explicit_comms=True)
|
|
192
192
|
if write_profile is not None:
|
|
193
|
-
ctx2 = performance_report(filename=
|
|
193
|
+
ctx2 = performance_report(filename=write_profile)
|
|
194
194
|
|
|
195
195
|
with ctx1:
|
|
196
196
|
with ctx2:
|
|
@@ -335,13 +335,6 @@ def parse_args():
|
|
|
335
335
|
"action": "store_true",
|
|
336
336
|
"help": "Use shuffle join (takes precedence over '--broadcast-join').",
|
|
337
337
|
},
|
|
338
|
-
{
|
|
339
|
-
"name": "--ignore-size",
|
|
340
|
-
"default": "1 MiB",
|
|
341
|
-
"metavar": "nbytes",
|
|
342
|
-
"type": parse_bytes,
|
|
343
|
-
"help": "Ignore messages smaller than this (default '1 MB')",
|
|
344
|
-
},
|
|
345
338
|
{
|
|
346
339
|
"name": "--frac-match",
|
|
347
340
|
"default": 0.3,
|
|
@@ -353,12 +346,6 @@ def parse_args():
|
|
|
353
346
|
"action": "store_true",
|
|
354
347
|
"help": "Don't shuffle the keys of the left (base) dataframe.",
|
|
355
348
|
},
|
|
356
|
-
{
|
|
357
|
-
"name": "--runs",
|
|
358
|
-
"default": 3,
|
|
359
|
-
"type": int,
|
|
360
|
-
"help": "Number of runs",
|
|
361
|
-
},
|
|
362
349
|
{
|
|
363
350
|
"name": [
|
|
364
351
|
"-s",
|
|
@@ -121,10 +121,9 @@ def create_data(
|
|
|
121
121
|
def bench_once(client, args, write_profile=None):
|
|
122
122
|
data_processed, df = create_data(client, args)
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
ctx = performance_report(filename=args.profile)
|
|
124
|
+
ctx = contextlib.nullcontext()
|
|
125
|
+
if write_profile is not None:
|
|
126
|
+
ctx = performance_report(filename=write_profile)
|
|
128
127
|
|
|
129
128
|
with ctx:
|
|
130
129
|
if args.backend in {"dask", "dask-noop"}:
|
|
@@ -228,19 +227,6 @@ def parse_args():
|
|
|
228
227
|
"type": str,
|
|
229
228
|
"help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
|
|
230
229
|
},
|
|
231
|
-
{
|
|
232
|
-
"name": "--ignore-size",
|
|
233
|
-
"default": "1 MiB",
|
|
234
|
-
"metavar": "nbytes",
|
|
235
|
-
"type": parse_bytes,
|
|
236
|
-
"help": "Ignore messages smaller than this (default '1 MB')",
|
|
237
|
-
},
|
|
238
|
-
{
|
|
239
|
-
"name": "--runs",
|
|
240
|
-
"default": 3,
|
|
241
|
-
"type": int,
|
|
242
|
-
"help": "Number of runs",
|
|
243
|
-
},
|
|
244
230
|
{
|
|
245
231
|
"name": "--ignore-index",
|
|
246
232
|
"action": "store_true",
|
|
@@ -8,7 +8,7 @@ from nvtx import end_range, start_range
|
|
|
8
8
|
|
|
9
9
|
from dask import array as da
|
|
10
10
|
from dask.distributed import performance_report, wait
|
|
11
|
-
from dask.utils import format_bytes
|
|
11
|
+
from dask.utils import format_bytes
|
|
12
12
|
|
|
13
13
|
from dask_cuda.benchmarks.common import Config, execute_benchmark
|
|
14
14
|
from dask_cuda.benchmarks.utils import (
|
|
@@ -141,12 +141,11 @@ def bench_once(client, args, write_profile=None):
|
|
|
141
141
|
chunksize = x.chunksize
|
|
142
142
|
data_processed = sum(arg.nbytes for arg in func_args)
|
|
143
143
|
|
|
144
|
-
|
|
145
|
-
if
|
|
146
|
-
ctx = performance_report(filename=
|
|
147
|
-
else:
|
|
148
|
-
ctx = contextlib.nullcontext()
|
|
144
|
+
ctx = contextlib.nullcontext()
|
|
145
|
+
if write_profile is not None:
|
|
146
|
+
ctx = performance_report(filename=write_profile)
|
|
149
147
|
|
|
148
|
+
# Execute the operations to benchmark
|
|
150
149
|
with ctx:
|
|
151
150
|
rng = start_range(message=args.operation, color="purple")
|
|
152
151
|
result = func(*func_args)
|
|
@@ -297,19 +296,6 @@ def parse_args():
|
|
|
297
296
|
"type": int,
|
|
298
297
|
"help": "Chunk size (default 2500).",
|
|
299
298
|
},
|
|
300
|
-
{
|
|
301
|
-
"name": "--ignore-size",
|
|
302
|
-
"default": "1 MiB",
|
|
303
|
-
"metavar": "nbytes",
|
|
304
|
-
"type": parse_bytes,
|
|
305
|
-
"help": "Ignore messages smaller than this (default '1 MB').",
|
|
306
|
-
},
|
|
307
|
-
{
|
|
308
|
-
"name": "--runs",
|
|
309
|
-
"default": 3,
|
|
310
|
-
"type": int,
|
|
311
|
-
"help": "Number of runs (default 3).",
|
|
312
|
-
},
|
|
313
299
|
{
|
|
314
300
|
"name": [
|
|
315
301
|
"-b",
|
|
@@ -10,7 +10,7 @@ from scipy.ndimage import convolve as sp_convolve
|
|
|
10
10
|
|
|
11
11
|
from dask import array as da
|
|
12
12
|
from dask.distributed import performance_report, wait
|
|
13
|
-
from dask.utils import format_bytes
|
|
13
|
+
from dask.utils import format_bytes
|
|
14
14
|
|
|
15
15
|
from dask_cuda.benchmarks.common import Config, execute_benchmark
|
|
16
16
|
from dask_cuda.benchmarks.utils import (
|
|
@@ -42,12 +42,11 @@ def bench_once(client, args, write_profile=None):
|
|
|
42
42
|
|
|
43
43
|
data_processed = x.nbytes
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
if
|
|
47
|
-
ctx = performance_report(filename=
|
|
48
|
-
else:
|
|
49
|
-
ctx = contextlib.nullcontext()
|
|
45
|
+
ctx = contextlib.nullcontext()
|
|
46
|
+
if write_profile is not None:
|
|
47
|
+
ctx = performance_report(filename=write_profile)
|
|
50
48
|
|
|
49
|
+
# Execute the operations to benchmark
|
|
51
50
|
with ctx:
|
|
52
51
|
result = x.map_overlap(mean_filter, args.kernel_size, shape=ks)
|
|
53
52
|
if args.backend == "dask-noop":
|
|
@@ -168,19 +167,6 @@ def parse_args():
|
|
|
168
167
|
"type": int,
|
|
169
168
|
"help": "Kernel size, 2*k+1, in each dimension (default 1)",
|
|
170
169
|
},
|
|
171
|
-
{
|
|
172
|
-
"name": "--ignore-size",
|
|
173
|
-
"default": "1 MiB",
|
|
174
|
-
"metavar": "nbytes",
|
|
175
|
-
"type": parse_bytes,
|
|
176
|
-
"help": "Ignore messages smaller than this (default '1 MB')",
|
|
177
|
-
},
|
|
178
|
-
{
|
|
179
|
-
"name": "--runs",
|
|
180
|
-
"default": 3,
|
|
181
|
-
"type": int,
|
|
182
|
-
"help": "Number of runs",
|
|
183
|
-
},
|
|
184
170
|
{
|
|
185
171
|
"name": [
|
|
186
172
|
"-b",
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
from collections import ChainMap
|
|
3
|
+
from time import perf_counter as clock
|
|
4
|
+
|
|
5
|
+
import fsspec
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
import dask
|
|
9
|
+
import dask.dataframe as dd
|
|
10
|
+
from dask.base import tokenize
|
|
11
|
+
from dask.distributed import performance_report
|
|
12
|
+
from dask.utils import format_bytes, parse_bytes
|
|
13
|
+
|
|
14
|
+
from dask_cuda.benchmarks.common import Config, execute_benchmark
|
|
15
|
+
from dask_cuda.benchmarks.utils import (
|
|
16
|
+
parse_benchmark_args,
|
|
17
|
+
print_key_value,
|
|
18
|
+
print_separator,
|
|
19
|
+
print_throughput_bandwidth,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
DISK_SIZE_CACHE = {}
|
|
23
|
+
OPTIONS_CACHE = {}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _noop(df):
|
|
27
|
+
return df
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def read_data(paths, columns, backend, **kwargs):
|
|
31
|
+
with dask.config.set({"dataframe.backend": backend}):
|
|
32
|
+
return dd.read_parquet(
|
|
33
|
+
paths,
|
|
34
|
+
columns=columns,
|
|
35
|
+
**kwargs,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_fs_paths_kwargs(args):
|
|
40
|
+
kwargs = {}
|
|
41
|
+
|
|
42
|
+
storage_options = {}
|
|
43
|
+
if args.key:
|
|
44
|
+
storage_options["key"] = args.key
|
|
45
|
+
if args.secret:
|
|
46
|
+
storage_options["secret"] = args.secret
|
|
47
|
+
|
|
48
|
+
if args.filesystem == "arrow":
|
|
49
|
+
import pyarrow.fs as pa_fs
|
|
50
|
+
from fsspec.implementations.arrow import ArrowFSWrapper
|
|
51
|
+
|
|
52
|
+
_mapping = {
|
|
53
|
+
"key": "access_key",
|
|
54
|
+
"secret": "secret_key",
|
|
55
|
+
} # See: pyarrow.fs.S3FileSystem docs
|
|
56
|
+
s3_args = {}
|
|
57
|
+
for k, v in storage_options.items():
|
|
58
|
+
s3_args[_mapping[k]] = v
|
|
59
|
+
|
|
60
|
+
fs = pa_fs.FileSystem.from_uri(args.path)[0]
|
|
61
|
+
try:
|
|
62
|
+
region = {"region": fs.region}
|
|
63
|
+
except AttributeError:
|
|
64
|
+
region = {}
|
|
65
|
+
kwargs["filesystem"] = type(fs)(**region, **s3_args)
|
|
66
|
+
fsspec_fs = ArrowFSWrapper(kwargs["filesystem"])
|
|
67
|
+
|
|
68
|
+
if args.type == "gpu":
|
|
69
|
+
kwargs["blocksize"] = args.blocksize
|
|
70
|
+
else:
|
|
71
|
+
fsspec_fs = fsspec.core.get_fs_token_paths(
|
|
72
|
+
args.path, mode="rb", storage_options=storage_options
|
|
73
|
+
)[0]
|
|
74
|
+
kwargs["filesystem"] = fsspec_fs
|
|
75
|
+
kwargs["blocksize"] = args.blocksize
|
|
76
|
+
kwargs["aggregate_files"] = args.aggregate_files
|
|
77
|
+
|
|
78
|
+
# Collect list of paths
|
|
79
|
+
stripped_url_path = fsspec_fs._strip_protocol(args.path)
|
|
80
|
+
if stripped_url_path.endswith("/"):
|
|
81
|
+
stripped_url_path = stripped_url_path[:-1]
|
|
82
|
+
paths = fsspec_fs.glob(f"{stripped_url_path}/*.parquet")
|
|
83
|
+
if args.file_count:
|
|
84
|
+
paths = paths[: args.file_count]
|
|
85
|
+
|
|
86
|
+
return fsspec_fs, paths, kwargs
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def bench_once(client, args, write_profile=None):
|
|
90
|
+
global OPTIONS_CACHE
|
|
91
|
+
global DISK_SIZE_CACHE
|
|
92
|
+
|
|
93
|
+
# Construct kwargs
|
|
94
|
+
token = tokenize(args)
|
|
95
|
+
try:
|
|
96
|
+
fsspec_fs, paths, kwargs = OPTIONS_CACHE[token]
|
|
97
|
+
except KeyError:
|
|
98
|
+
fsspec_fs, paths, kwargs = get_fs_paths_kwargs(args)
|
|
99
|
+
OPTIONS_CACHE[token] = (fsspec_fs, paths, kwargs)
|
|
100
|
+
|
|
101
|
+
if write_profile is None:
|
|
102
|
+
ctx = contextlib.nullcontext()
|
|
103
|
+
else:
|
|
104
|
+
ctx = performance_report(filename=args.profile)
|
|
105
|
+
|
|
106
|
+
with ctx:
|
|
107
|
+
t1 = clock()
|
|
108
|
+
df = read_data(
|
|
109
|
+
paths,
|
|
110
|
+
columns=args.columns,
|
|
111
|
+
backend="cudf" if args.type == "gpu" else "pandas",
|
|
112
|
+
**kwargs,
|
|
113
|
+
)
|
|
114
|
+
num_rows = len(
|
|
115
|
+
# Use opaque `map_partitions` call to "block"
|
|
116
|
+
# dask-expr from using pq metadata to get length
|
|
117
|
+
df.map_partitions(
|
|
118
|
+
_noop,
|
|
119
|
+
meta=df._meta,
|
|
120
|
+
enforce_metadata=False,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
t2 = clock()
|
|
124
|
+
|
|
125
|
+
# Extract total size of files on disk
|
|
126
|
+
token = tokenize(paths)
|
|
127
|
+
try:
|
|
128
|
+
disk_size = DISK_SIZE_CACHE[token]
|
|
129
|
+
except KeyError:
|
|
130
|
+
disk_size = sum(fsspec_fs.sizes(paths))
|
|
131
|
+
DISK_SIZE_CACHE[token] = disk_size
|
|
132
|
+
|
|
133
|
+
return (disk_size, num_rows, t2 - t1)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def pretty_print_results(args, address_to_index, p2p_bw, results):
|
|
137
|
+
if args.markdown:
|
|
138
|
+
print("```")
|
|
139
|
+
print("Parquet read benchmark")
|
|
140
|
+
data_processed, row_count, durations = zip(*results)
|
|
141
|
+
print_separator(separator="-")
|
|
142
|
+
backend = "cudf" if args.type == "gpu" else "pandas"
|
|
143
|
+
print_key_value(key="Path", value=args.path)
|
|
144
|
+
print_key_value(key="Columns", value=f"{args.columns}")
|
|
145
|
+
print_key_value(key="Backend", value=f"{backend}")
|
|
146
|
+
print_key_value(key="Filesystem", value=f"{args.filesystem}")
|
|
147
|
+
print_key_value(key="Blocksize", value=f"{format_bytes(args.blocksize)}")
|
|
148
|
+
print_key_value(key="Aggregate files", value=f"{args.aggregate_files}")
|
|
149
|
+
print_key_value(key="Row count", value=f"{row_count[0]}")
|
|
150
|
+
print_key_value(key="Size on disk", value=f"{format_bytes(data_processed[0])}")
|
|
151
|
+
if args.markdown:
|
|
152
|
+
print("\n```")
|
|
153
|
+
args.no_show_p2p_bandwidth = True
|
|
154
|
+
print_throughput_bandwidth(
|
|
155
|
+
args, durations, data_processed, p2p_bw, address_to_index
|
|
156
|
+
)
|
|
157
|
+
print_separator(separator="=")
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def create_tidy_results(args, p2p_bw, results):
|
|
161
|
+
configuration = {
|
|
162
|
+
"path": args.path,
|
|
163
|
+
"columns": args.columns,
|
|
164
|
+
"backend": "cudf" if args.type == "gpu" else "pandas",
|
|
165
|
+
"filesystem": args.filesystem,
|
|
166
|
+
"blocksize": args.blocksize,
|
|
167
|
+
"aggregate_files": args.aggregate_files,
|
|
168
|
+
}
|
|
169
|
+
timing_data = pd.DataFrame(
|
|
170
|
+
[
|
|
171
|
+
pd.Series(
|
|
172
|
+
data=ChainMap(
|
|
173
|
+
configuration,
|
|
174
|
+
{
|
|
175
|
+
"wallclock": duration,
|
|
176
|
+
"data_processed": data_processed,
|
|
177
|
+
"num_rows": num_rows,
|
|
178
|
+
},
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
for data_processed, num_rows, duration in results
|
|
182
|
+
]
|
|
183
|
+
)
|
|
184
|
+
return timing_data, p2p_bw
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def parse_args():
|
|
188
|
+
special_args = [
|
|
189
|
+
{
|
|
190
|
+
"name": "path",
|
|
191
|
+
"type": str,
|
|
192
|
+
"help": "Parquet directory to read from (must be a flat directory).",
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
"name": "--blocksize",
|
|
196
|
+
"default": "256MB",
|
|
197
|
+
"type": parse_bytes,
|
|
198
|
+
"help": "How to set the blocksize option",
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
"name": "--aggregate-files",
|
|
202
|
+
"default": False,
|
|
203
|
+
"action": "store_true",
|
|
204
|
+
"help": "How to set the aggregate_files option",
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
"name": "--file-count",
|
|
208
|
+
"type": int,
|
|
209
|
+
"help": "Maximum number of files to read.",
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"name": "--columns",
|
|
213
|
+
"type": str,
|
|
214
|
+
"help": "Columns to read/select from data.",
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
"name": "--key",
|
|
218
|
+
"type": str,
|
|
219
|
+
"help": "Public S3 key.",
|
|
220
|
+
},
|
|
221
|
+
{
|
|
222
|
+
"name": "--secret",
|
|
223
|
+
"type": str,
|
|
224
|
+
"help": "Secret S3 key.",
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
"name": [
|
|
228
|
+
"-t",
|
|
229
|
+
"--type",
|
|
230
|
+
],
|
|
231
|
+
"choices": ["cpu", "gpu"],
|
|
232
|
+
"default": "gpu",
|
|
233
|
+
"type": str,
|
|
234
|
+
"help": "Use GPU or CPU dataframes (default 'gpu')",
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
"name": "--filesystem",
|
|
238
|
+
"choices": ["arrow", "fsspec"],
|
|
239
|
+
"default": "fsspec",
|
|
240
|
+
"type": str,
|
|
241
|
+
"help": "Filesystem backend",
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
"name": "--runs",
|
|
245
|
+
"default": 3,
|
|
246
|
+
"type": int,
|
|
247
|
+
"help": "Number of runs",
|
|
248
|
+
},
|
|
249
|
+
]
|
|
250
|
+
|
|
251
|
+
args = parse_benchmark_args(
|
|
252
|
+
description="Parquet read benchmark",
|
|
253
|
+
args_list=special_args,
|
|
254
|
+
check_explicit_comms=False,
|
|
255
|
+
)
|
|
256
|
+
args.no_show_p2p_bandwidth = True
|
|
257
|
+
return args
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
if __name__ == "__main__":
|
|
261
|
+
execute_benchmark(
|
|
262
|
+
Config(
|
|
263
|
+
args=parse_args(),
|
|
264
|
+
bench_once=bench_once,
|
|
265
|
+
create_tidy_results=create_tidy_results,
|
|
266
|
+
pretty_print_results=pretty_print_results,
|
|
267
|
+
)
|
|
268
|
+
)
|
dask_cuda/benchmarks/utils.py
CHANGED
|
@@ -323,7 +323,16 @@ def parse_benchmark_args(
|
|
|
323
323
|
metavar="PATH",
|
|
324
324
|
default=None,
|
|
325
325
|
type=str,
|
|
326
|
-
help="Write dask profile report (E.g. dask-report.html)"
|
|
326
|
+
help="Write dask profile report (E.g. dask-report.html) on all "
|
|
327
|
+
"iterations (excluding warmup).",
|
|
328
|
+
)
|
|
329
|
+
parser.add_argument(
|
|
330
|
+
"--profile-last",
|
|
331
|
+
metavar="PATH",
|
|
332
|
+
default=None,
|
|
333
|
+
type=str,
|
|
334
|
+
help="Write dask profile report (E.g. dask-report.html) on last "
|
|
335
|
+
"iteration only.",
|
|
327
336
|
)
|
|
328
337
|
# See save_benchmark_data for more information
|
|
329
338
|
parser.add_argument(
|
|
@@ -337,6 +346,25 @@ def parse_benchmark_args(
|
|
|
337
346
|
"If the files already exist, new files are created with a uniquified "
|
|
338
347
|
"BASENAME.",
|
|
339
348
|
)
|
|
349
|
+
parser.add_argument(
|
|
350
|
+
"--ignore-size",
|
|
351
|
+
default="1 MiB",
|
|
352
|
+
metavar="nbytes",
|
|
353
|
+
type=parse_bytes,
|
|
354
|
+
help="Bandwidth statistics: ignore messages smaller than this (default '1 MB')",
|
|
355
|
+
)
|
|
356
|
+
parser.add_argument(
|
|
357
|
+
"--runs",
|
|
358
|
+
default=3,
|
|
359
|
+
type=int,
|
|
360
|
+
help="Number of runs",
|
|
361
|
+
)
|
|
362
|
+
parser.add_argument(
|
|
363
|
+
"--warmup-runs",
|
|
364
|
+
default=1,
|
|
365
|
+
type=int,
|
|
366
|
+
help="Number of warmup runs",
|
|
367
|
+
)
|
|
340
368
|
|
|
341
369
|
for args in args_list:
|
|
342
370
|
name = args.pop("name")
|
|
@@ -765,7 +793,7 @@ def print_throughput_bandwidth(
|
|
|
765
793
|
)
|
|
766
794
|
print_key_value(
|
|
767
795
|
key="Wall clock",
|
|
768
|
-
value=f"{format_time(durations.mean())} +/- {format_time(durations.std())
|
|
796
|
+
value=f"{format_time(durations.mean())} +/- {format_time(durations.std())}",
|
|
769
797
|
)
|
|
770
798
|
if not args.no_show_p2p_bandwidth:
|
|
771
799
|
print_separator(separator="=")
|
dask_cuda/cli.py
CHANGED
|
@@ -13,7 +13,7 @@ from distributed.security import Security
|
|
|
13
13
|
from distributed.utils import import_term
|
|
14
14
|
|
|
15
15
|
from .cuda_worker import CUDAWorker
|
|
16
|
-
from .utils import print_cluster_config
|
|
16
|
+
from .utils import CommaSeparatedChoice, print_cluster_config
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
@@ -164,13 +164,24 @@ def cuda():
|
|
|
164
164
|
incompatible with RMM pools and managed memory, trying to enable both will
|
|
165
165
|
result in failure.""",
|
|
166
166
|
)
|
|
167
|
+
@click.option(
|
|
168
|
+
"--set-rmm-allocator-for-libs",
|
|
169
|
+
"rmm_allocator_external_lib_list",
|
|
170
|
+
type=CommaSeparatedChoice(["cupy", "torch"]),
|
|
171
|
+
default=None,
|
|
172
|
+
show_default=True,
|
|
173
|
+
help="""
|
|
174
|
+
Set RMM as the allocator for external libraries. Provide a comma-separated
|
|
175
|
+
list of libraries to set, e.g., "torch,cupy".""",
|
|
176
|
+
)
|
|
167
177
|
@click.option(
|
|
168
178
|
"--rmm-release-threshold",
|
|
169
179
|
default=None,
|
|
170
|
-
help="""When ``rmm.async`` is ``True`` and the pool size grows beyond this
|
|
171
|
-
memory held by the pool will be released at the next
|
|
172
|
-
an integer (bytes), float (fraction of total
|
|
173
|
-
or ``"5000M"``) or ``None``. By
|
|
180
|
+
help="""When ``rmm.async`` is ``True`` and the pool size grows beyond this
|
|
181
|
+
value, unused memory held by the pool will be released at the next
|
|
182
|
+
synchronization point. Can be an integer (bytes), float (fraction of total
|
|
183
|
+
device memory), string (like ``"5GB"`` or ``"5000M"``) or ``None``. By
|
|
184
|
+
default, this feature is disabled.
|
|
174
185
|
|
|
175
186
|
.. note::
|
|
176
187
|
This size is a per-worker configuration, and not cluster-wide.""",
|
|
@@ -350,6 +361,7 @@ def worker(
|
|
|
350
361
|
rmm_maximum_pool_size,
|
|
351
362
|
rmm_managed_memory,
|
|
352
363
|
rmm_async,
|
|
364
|
+
rmm_allocator_external_lib_list,
|
|
353
365
|
rmm_release_threshold,
|
|
354
366
|
rmm_log_directory,
|
|
355
367
|
rmm_track_allocations,
|
|
@@ -424,6 +436,7 @@ def worker(
|
|
|
424
436
|
rmm_maximum_pool_size,
|
|
425
437
|
rmm_managed_memory,
|
|
426
438
|
rmm_async,
|
|
439
|
+
rmm_allocator_external_lib_list,
|
|
427
440
|
rmm_release_threshold,
|
|
428
441
|
rmm_log_directory,
|
|
429
442
|
rmm_track_allocations,
|
dask_cuda/cuda_worker.py
CHANGED
|
@@ -47,6 +47,7 @@ class CUDAWorker(Server):
|
|
|
47
47
|
rmm_maximum_pool_size=None,
|
|
48
48
|
rmm_managed_memory=False,
|
|
49
49
|
rmm_async=False,
|
|
50
|
+
rmm_allocator_external_lib_list=None,
|
|
50
51
|
rmm_release_threshold=None,
|
|
51
52
|
rmm_log_directory=None,
|
|
52
53
|
rmm_track_allocations=False,
|
|
@@ -195,6 +196,14 @@ class CUDAWorker(Server):
|
|
|
195
196
|
},
|
|
196
197
|
)
|
|
197
198
|
|
|
199
|
+
cudf_spill_warning = dask.config.get("cudf-spill-warning", default=True)
|
|
200
|
+
if enable_cudf_spill and cudf_spill_warning:
|
|
201
|
+
warnings.warn(
|
|
202
|
+
"cuDF spilling is enabled, please ensure the client and scheduler "
|
|
203
|
+
"processes set `CUDF_SPILL=on` as well. To disable this warning "
|
|
204
|
+
"set `DASK_CUDF_SPILL_WARNING=False`."
|
|
205
|
+
)
|
|
206
|
+
|
|
198
207
|
self.nannies = [
|
|
199
208
|
Nanny(
|
|
200
209
|
scheduler,
|
|
@@ -223,6 +232,7 @@ class CUDAWorker(Server):
|
|
|
223
232
|
release_threshold=rmm_release_threshold,
|
|
224
233
|
log_directory=rmm_log_directory,
|
|
225
234
|
track_allocations=rmm_track_allocations,
|
|
235
|
+
external_lib_list=rmm_allocator_external_lib_list,
|
|
226
236
|
),
|
|
227
237
|
PreImport(pre_import),
|
|
228
238
|
CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
|
dask_cuda/local_cuda_cluster.py
CHANGED
|
@@ -143,6 +143,11 @@ class LocalCUDACluster(LocalCluster):
|
|
|
143
143
|
The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
|
|
144
144
|
incompatible with RMM pools and managed memory. Trying to enable both will
|
|
145
145
|
result in an exception.
|
|
146
|
+
rmm_allocator_external_lib_list: str, list or None, default None
|
|
147
|
+
List of external libraries for which to set RMM as the allocator.
|
|
148
|
+
Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string
|
|
149
|
+
(like ``"torch,cupy"``) or a list of strings (like ``["torch", "cupy"]``).
|
|
150
|
+
If ``None``, no external libraries will use RMM as their allocator.
|
|
146
151
|
rmm_release_threshold: int, str or None, default None
|
|
147
152
|
When ``rmm.async is True`` and the pool size grows beyond this value, unused
|
|
148
153
|
memory held by the pool will be released at the next synchronization point.
|
|
@@ -231,6 +236,7 @@ class LocalCUDACluster(LocalCluster):
|
|
|
231
236
|
rmm_maximum_pool_size=None,
|
|
232
237
|
rmm_managed_memory=False,
|
|
233
238
|
rmm_async=False,
|
|
239
|
+
rmm_allocator_external_lib_list=None,
|
|
234
240
|
rmm_release_threshold=None,
|
|
235
241
|
rmm_log_directory=None,
|
|
236
242
|
rmm_track_allocations=False,
|
|
@@ -244,6 +250,13 @@ class LocalCUDACluster(LocalCluster):
|
|
|
244
250
|
# initialization happens before we can set CUDA_VISIBLE_DEVICES
|
|
245
251
|
os.environ["RAPIDS_NO_INITIALIZE"] = "True"
|
|
246
252
|
|
|
253
|
+
if enable_cudf_spill:
|
|
254
|
+
import cudf
|
|
255
|
+
|
|
256
|
+
# cuDF spilling must be enabled in the client/scheduler process too.
|
|
257
|
+
cudf.set_option("spill", enable_cudf_spill)
|
|
258
|
+
cudf.set_option("spill_stats", cudf_spill_stats)
|
|
259
|
+
|
|
247
260
|
if threads_per_worker < 1:
|
|
248
261
|
raise ValueError("threads_per_worker must be higher than 0.")
|
|
249
262
|
|
|
@@ -258,6 +271,19 @@ class LocalCUDACluster(LocalCluster):
|
|
|
258
271
|
n_workers = len(CUDA_VISIBLE_DEVICES)
|
|
259
272
|
if n_workers < 1:
|
|
260
273
|
raise ValueError("Number of workers cannot be less than 1.")
|
|
274
|
+
|
|
275
|
+
if rmm_allocator_external_lib_list is not None:
|
|
276
|
+
if isinstance(rmm_allocator_external_lib_list, str):
|
|
277
|
+
rmm_allocator_external_lib_list = [
|
|
278
|
+
v.strip() for v in rmm_allocator_external_lib_list.split(",")
|
|
279
|
+
]
|
|
280
|
+
elif not isinstance(rmm_allocator_external_lib_list, list):
|
|
281
|
+
raise ValueError(
|
|
282
|
+
"rmm_allocator_external_lib_list must be either a comma-separated "
|
|
283
|
+
"string or a list of strings. Examples: 'torch,cupy' "
|
|
284
|
+
"or ['torch', 'cupy']"
|
|
285
|
+
)
|
|
286
|
+
|
|
261
287
|
# Set nthreads=1 when parsing mem_limit since it only depends on n_workers
|
|
262
288
|
logger = logging.getLogger(__name__)
|
|
263
289
|
self.memory_limit = parse_memory_limit(
|
|
@@ -277,6 +303,8 @@ class LocalCUDACluster(LocalCluster):
|
|
|
277
303
|
self.rmm_managed_memory = rmm_managed_memory
|
|
278
304
|
self.rmm_async = rmm_async
|
|
279
305
|
self.rmm_release_threshold = rmm_release_threshold
|
|
306
|
+
self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list
|
|
307
|
+
|
|
280
308
|
if rmm_pool_size is not None or rmm_managed_memory or rmm_async:
|
|
281
309
|
try:
|
|
282
310
|
import rmm # noqa F401
|
|
@@ -430,6 +458,7 @@ class LocalCUDACluster(LocalCluster):
|
|
|
430
458
|
release_threshold=self.rmm_release_threshold,
|
|
431
459
|
log_directory=self.rmm_log_directory,
|
|
432
460
|
track_allocations=self.rmm_track_allocations,
|
|
461
|
+
external_lib_list=self.rmm_allocator_external_lib_list,
|
|
433
462
|
),
|
|
434
463
|
PreImport(self.pre_import),
|
|
435
464
|
CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
|
dask_cuda/plugins.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import importlib
|
|
2
2
|
import os
|
|
3
|
+
from typing import Callable, Dict
|
|
3
4
|
|
|
4
5
|
from distributed import WorkerPlugin
|
|
5
6
|
|
|
@@ -39,6 +40,7 @@ class RMMSetup(WorkerPlugin):
|
|
|
39
40
|
release_threshold,
|
|
40
41
|
log_directory,
|
|
41
42
|
track_allocations,
|
|
43
|
+
external_lib_list,
|
|
42
44
|
):
|
|
43
45
|
if initial_pool_size is None and maximum_pool_size is not None:
|
|
44
46
|
raise ValueError(
|
|
@@ -61,6 +63,7 @@ class RMMSetup(WorkerPlugin):
|
|
|
61
63
|
self.logging = log_directory is not None
|
|
62
64
|
self.log_directory = log_directory
|
|
63
65
|
self.rmm_track_allocations = track_allocations
|
|
66
|
+
self.external_lib_list = external_lib_list
|
|
64
67
|
|
|
65
68
|
def setup(self, worker=None):
|
|
66
69
|
if self.initial_pool_size is not None:
|
|
@@ -123,6 +126,70 @@ class RMMSetup(WorkerPlugin):
|
|
|
123
126
|
mr = rmm.mr.get_current_device_resource()
|
|
124
127
|
rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr))
|
|
125
128
|
|
|
129
|
+
if self.external_lib_list is not None:
|
|
130
|
+
for lib in self.external_lib_list:
|
|
131
|
+
enable_rmm_memory_for_library(lib)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def enable_rmm_memory_for_library(lib_name: str) -> None:
|
|
135
|
+
"""Enable RMM memory pool support for a specified third-party library.
|
|
136
|
+
|
|
137
|
+
This function allows the given library to utilize RMM's memory pool if it supports
|
|
138
|
+
integration with RMM. The library name is passed as a string argument, and if the
|
|
139
|
+
library is compatible, its memory allocator will be configured to use RMM.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
lib_name : str
|
|
144
|
+
The name of the third-party library to enable RMM memory pool support for.
|
|
145
|
+
Supported libraries are "cupy" and "torch".
|
|
146
|
+
|
|
147
|
+
Raises
|
|
148
|
+
------
|
|
149
|
+
ValueError
|
|
150
|
+
If the library name is not supported or does not have RMM integration.
|
|
151
|
+
ImportError
|
|
152
|
+
If the required library is not installed.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
# Mapping of supported libraries to their respective setup functions
|
|
156
|
+
setup_functions: Dict[str, Callable[[], None]] = {
|
|
157
|
+
"torch": _setup_rmm_for_torch,
|
|
158
|
+
"cupy": _setup_rmm_for_cupy,
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if lib_name not in setup_functions:
|
|
162
|
+
supported_libs = ", ".join(setup_functions.keys())
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"The library '{lib_name}' is not supported for RMM integration. "
|
|
165
|
+
f"Supported libraries are: {supported_libs}."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Call the setup function for the specified library
|
|
169
|
+
setup_functions[lib_name]()
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _setup_rmm_for_torch() -> None:
|
|
173
|
+
try:
|
|
174
|
+
import torch
|
|
175
|
+
except ImportError as e:
|
|
176
|
+
raise ImportError("PyTorch is not installed.") from e
|
|
177
|
+
|
|
178
|
+
from rmm.allocators.torch import rmm_torch_allocator
|
|
179
|
+
|
|
180
|
+
torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _setup_rmm_for_cupy() -> None:
|
|
184
|
+
try:
|
|
185
|
+
import cupy
|
|
186
|
+
except ImportError as e:
|
|
187
|
+
raise ImportError("CuPy is not installed.") from e
|
|
188
|
+
|
|
189
|
+
from rmm.allocators.cupy import rmm_cupy_allocator
|
|
190
|
+
|
|
191
|
+
cupy.cuda.set_allocator(rmm_cupy_allocator)
|
|
192
|
+
|
|
126
193
|
|
|
127
194
|
class PreImport(WorkerPlugin):
|
|
128
195
|
def __init__(self, libraries):
|
|
@@ -567,3 +567,30 @@ def test_worker_timeout():
|
|
|
567
567
|
assert "reason: nanny-close" in ret.stderr.lower()
|
|
568
568
|
|
|
569
569
|
assert ret.returncode == 0
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
@pytest.mark.parametrize("enable_cudf_spill_warning", [False, True])
|
|
573
|
+
def test_worker_cudf_spill_warning(enable_cudf_spill_warning): # noqa: F811
|
|
574
|
+
pytest.importorskip("rmm")
|
|
575
|
+
|
|
576
|
+
environ = {"CUDA_VISIBLE_DEVICES": "0"}
|
|
577
|
+
if not enable_cudf_spill_warning:
|
|
578
|
+
environ["DASK_CUDF_SPILL_WARNING"] = "False"
|
|
579
|
+
|
|
580
|
+
with patch.dict(os.environ, environ):
|
|
581
|
+
ret = subprocess.run(
|
|
582
|
+
[
|
|
583
|
+
"dask",
|
|
584
|
+
"cuda",
|
|
585
|
+
"worker",
|
|
586
|
+
"127.0.0.1:9369",
|
|
587
|
+
"--enable-cudf-spill",
|
|
588
|
+
"--death-timeout",
|
|
589
|
+
"1",
|
|
590
|
+
],
|
|
591
|
+
capture_output=True,
|
|
592
|
+
)
|
|
593
|
+
if enable_cudf_spill_warning:
|
|
594
|
+
assert b"UserWarning: cuDF spilling is enabled" in ret.stderr
|
|
595
|
+
else:
|
|
596
|
+
assert b"UserWarning: cuDF spilling is enabled" not in ret.stderr
|
dask_cuda/tests/test_gds.py
CHANGED
|
@@ -38,7 +38,7 @@ def test_gds(gds_enabled, cuda_lib):
|
|
|
38
38
|
a = data_create()
|
|
39
39
|
header, frames = serialize(a, serializers=("disk",))
|
|
40
40
|
b = deserialize(header, frames)
|
|
41
|
-
assert type(a)
|
|
41
|
+
assert type(a) is type(b)
|
|
42
42
|
assert data_compare(a, b)
|
|
43
43
|
finally:
|
|
44
44
|
ProxifyHostFile.register_disk_spilling() # Reset disk spilling options
|
|
@@ -252,7 +252,7 @@ async def test_local_cuda_cluster(jit_unspill):
|
|
|
252
252
|
assert "ProxyObject" in str(type(x))
|
|
253
253
|
assert x._pxy_get().serializer == "dask"
|
|
254
254
|
else:
|
|
255
|
-
assert type(x)
|
|
255
|
+
assert type(x) is cudf.DataFrame
|
|
256
256
|
assert len(x) == 10 # Trigger deserialization
|
|
257
257
|
return x
|
|
258
258
|
|
dask_cuda/tests/test_proxy.py
CHANGED
|
@@ -114,7 +114,7 @@ def test_proxy_object_of_array(serializers, backend):
|
|
|
114
114
|
pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
|
|
115
115
|
expect = op(org)
|
|
116
116
|
got = op(pxy)
|
|
117
|
-
assert type(expect)
|
|
117
|
+
assert type(expect) is type(got)
|
|
118
118
|
assert expect == got
|
|
119
119
|
|
|
120
120
|
# Check unary operators
|
|
@@ -124,7 +124,7 @@ def test_proxy_object_of_array(serializers, backend):
|
|
|
124
124
|
pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
|
|
125
125
|
expect = op(org)
|
|
126
126
|
got = op(pxy)
|
|
127
|
-
assert type(expect)
|
|
127
|
+
assert type(expect) is type(got)
|
|
128
128
|
assert all(expect == got)
|
|
129
129
|
|
|
130
130
|
# Check binary operators that takes a scalar as second argument
|
|
@@ -134,7 +134,7 @@ def test_proxy_object_of_array(serializers, backend):
|
|
|
134
134
|
pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
|
|
135
135
|
expect = op(org, 2)
|
|
136
136
|
got = op(pxy, 2)
|
|
137
|
-
assert type(expect)
|
|
137
|
+
assert type(expect) is type(got)
|
|
138
138
|
assert all(expect == got)
|
|
139
139
|
|
|
140
140
|
# Check binary operators
|
|
@@ -192,7 +192,7 @@ def test_proxy_object_of_array(serializers, backend):
|
|
|
192
192
|
pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
|
|
193
193
|
expect = op(org)
|
|
194
194
|
got = op(pxy)
|
|
195
|
-
assert type(expect)
|
|
195
|
+
assert type(expect) is type(got)
|
|
196
196
|
assert expect == got
|
|
197
197
|
|
|
198
198
|
# Check reflected methods
|
|
@@ -297,7 +297,7 @@ async def test_spilling_local_cuda_cluster(jit_unspill):
|
|
|
297
297
|
assert "ProxyObject" in str(type(x))
|
|
298
298
|
assert x._pxy_get().serializer == "dask"
|
|
299
299
|
else:
|
|
300
|
-
assert type(x)
|
|
300
|
+
assert type(x) is cudf.DataFrame
|
|
301
301
|
assert len(x) == 10 # Trigger deserialization
|
|
302
302
|
return x
|
|
303
303
|
|
dask_cuda/tests/test_spill.py
CHANGED
|
@@ -11,6 +11,8 @@ from distributed.metrics import time
|
|
|
11
11
|
from distributed.sizeof import sizeof
|
|
12
12
|
from distributed.utils_test import gen_cluster, gen_test, loop # noqa: F401
|
|
13
13
|
|
|
14
|
+
import dask_cudf
|
|
15
|
+
|
|
14
16
|
from dask_cuda import LocalCUDACluster, utils
|
|
15
17
|
from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
|
|
16
18
|
|
|
@@ -18,6 +20,57 @@ if utils.get_device_total_memory() < 1e10:
|
|
|
18
20
|
pytest.skip("Not enough GPU memory", allow_module_level=True)
|
|
19
21
|
|
|
20
22
|
|
|
23
|
+
def _set_cudf_device_limit():
|
|
24
|
+
"""Ensure spilling for objects of all sizes"""
|
|
25
|
+
import cudf
|
|
26
|
+
|
|
27
|
+
cudf.set_option("spill_device_limit", 0)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _assert_cudf_spill_stats(enable_cudf_spill, dask_worker=None):
|
|
31
|
+
"""Ensure cuDF has spilled data with its internal mechanism"""
|
|
32
|
+
import cudf
|
|
33
|
+
|
|
34
|
+
global_manager = cudf.core.buffer.spill_manager.get_global_manager()
|
|
35
|
+
|
|
36
|
+
if enable_cudf_spill:
|
|
37
|
+
stats = global_manager.statistics
|
|
38
|
+
buffers = global_manager.buffers()
|
|
39
|
+
assert stats.spill_totals[("gpu", "cpu")][0] > 1000
|
|
40
|
+
assert stats.spill_totals[("cpu", "gpu")][0] > 1000
|
|
41
|
+
assert len(buffers) > 0
|
|
42
|
+
else:
|
|
43
|
+
assert global_manager is None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@pytest.fixture(params=[False, True])
|
|
47
|
+
def cudf_spill(request):
|
|
48
|
+
"""Fixture to enable and clear cuDF spill manager in client process"""
|
|
49
|
+
cudf = pytest.importorskip("cudf")
|
|
50
|
+
|
|
51
|
+
enable_cudf_spill = request.param
|
|
52
|
+
|
|
53
|
+
if enable_cudf_spill:
|
|
54
|
+
# If the global spill manager was previously set, fail.
|
|
55
|
+
assert cudf.core.buffer.spill_manager._global_manager is None
|
|
56
|
+
|
|
57
|
+
cudf.set_option("spill", True)
|
|
58
|
+
cudf.set_option("spill_stats", True)
|
|
59
|
+
|
|
60
|
+
# This change is to prevent changing RMM resource stack in cuDF,
|
|
61
|
+
# workers do not need this because they are spawned as new
|
|
62
|
+
# processes for every new test that runs.
|
|
63
|
+
cudf.set_option("spill_on_demand", False)
|
|
64
|
+
|
|
65
|
+
_set_cudf_device_limit()
|
|
66
|
+
|
|
67
|
+
yield enable_cudf_spill
|
|
68
|
+
|
|
69
|
+
cudf.set_option("spill", False)
|
|
70
|
+
cudf.core.buffer.spill_manager._global_manager_uninitialized = True
|
|
71
|
+
cudf.core.buffer.spill_manager._global_manager = None
|
|
72
|
+
|
|
73
|
+
|
|
21
74
|
def device_host_file_size_matches(
|
|
22
75
|
dhf, total_bytes, device_chunk_overhead=0, serialized_chunk_overhead=1024
|
|
23
76
|
):
|
|
@@ -244,9 +297,11 @@ async def test_cupy_cluster_device_spill(params):
|
|
|
244
297
|
],
|
|
245
298
|
)
|
|
246
299
|
@gen_test(timeout=30)
|
|
247
|
-
async def test_cudf_cluster_device_spill(params):
|
|
300
|
+
async def test_cudf_cluster_device_spill(params, cudf_spill):
|
|
248
301
|
cudf = pytest.importorskip("cudf")
|
|
249
302
|
|
|
303
|
+
enable_cudf_spill = cudf_spill
|
|
304
|
+
|
|
250
305
|
with dask.config.set(
|
|
251
306
|
{
|
|
252
307
|
"distributed.comm.compression": False,
|
|
@@ -266,6 +321,7 @@ async def test_cudf_cluster_device_spill(params):
|
|
|
266
321
|
device_memory_limit=params["device_memory_limit"],
|
|
267
322
|
memory_limit=params["memory_limit"],
|
|
268
323
|
worker_class=IncreasedCloseTimeoutNanny,
|
|
324
|
+
enable_cudf_spill=enable_cudf_spill,
|
|
269
325
|
) as cluster:
|
|
270
326
|
async with Client(cluster, asynchronous=True) as client:
|
|
271
327
|
|
|
@@ -294,21 +350,28 @@ async def test_cudf_cluster_device_spill(params):
|
|
|
294
350
|
del cdf
|
|
295
351
|
gc.collect()
|
|
296
352
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
353
|
+
if enable_cudf_spill:
|
|
354
|
+
await client.run(
|
|
355
|
+
worker_assert,
|
|
356
|
+
0,
|
|
357
|
+
0,
|
|
358
|
+
0,
|
|
359
|
+
)
|
|
360
|
+
else:
|
|
361
|
+
await client.run(
|
|
362
|
+
assert_host_chunks,
|
|
363
|
+
params["spills_to_disk"],
|
|
364
|
+
)
|
|
365
|
+
await client.run(
|
|
366
|
+
assert_disk_chunks,
|
|
367
|
+
params["spills_to_disk"],
|
|
368
|
+
)
|
|
369
|
+
await client.run(
|
|
370
|
+
worker_assert,
|
|
371
|
+
nbytes,
|
|
372
|
+
32,
|
|
373
|
+
2048,
|
|
374
|
+
)
|
|
312
375
|
|
|
313
376
|
del cdf2
|
|
314
377
|
|
|
@@ -324,3 +387,40 @@ async def test_cudf_cluster_device_spill(params):
|
|
|
324
387
|
gc.collect()
|
|
325
388
|
else:
|
|
326
389
|
break
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
@gen_test(timeout=30)
|
|
393
|
+
async def test_cudf_spill_cluster(cudf_spill):
|
|
394
|
+
cudf = pytest.importorskip("cudf")
|
|
395
|
+
enable_cudf_spill = cudf_spill
|
|
396
|
+
|
|
397
|
+
async with LocalCUDACluster(
|
|
398
|
+
n_workers=1,
|
|
399
|
+
scheduler_port=0,
|
|
400
|
+
silence_logs=False,
|
|
401
|
+
dashboard_address=None,
|
|
402
|
+
asynchronous=True,
|
|
403
|
+
device_memory_limit=None,
|
|
404
|
+
memory_limit=None,
|
|
405
|
+
worker_class=IncreasedCloseTimeoutNanny,
|
|
406
|
+
enable_cudf_spill=enable_cudf_spill,
|
|
407
|
+
cudf_spill_stats=enable_cudf_spill,
|
|
408
|
+
) as cluster:
|
|
409
|
+
async with Client(cluster, asynchronous=True) as client:
|
|
410
|
+
|
|
411
|
+
await client.wait_for_workers(1)
|
|
412
|
+
await client.run(_set_cudf_device_limit)
|
|
413
|
+
|
|
414
|
+
cdf = cudf.DataFrame(
|
|
415
|
+
{
|
|
416
|
+
"a": list(range(200)),
|
|
417
|
+
"b": list(reversed(range(200))),
|
|
418
|
+
"c": list(range(200)),
|
|
419
|
+
}
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
ddf = dask_cudf.from_cudf(cdf, npartitions=2).sum().persist()
|
|
423
|
+
await wait(ddf)
|
|
424
|
+
|
|
425
|
+
await client.run(_assert_cudf_spill_stats, enable_cudf_spill)
|
|
426
|
+
_assert_cudf_spill_stats(enable_cudf_spill)
|
dask_cuda/utils.py
CHANGED
|
@@ -9,6 +9,7 @@ from functools import singledispatch
|
|
|
9
9
|
from multiprocessing import cpu_count
|
|
10
10
|
from typing import Optional
|
|
11
11
|
|
|
12
|
+
import click
|
|
12
13
|
import numpy as np
|
|
13
14
|
import pynvml
|
|
14
15
|
import toolz
|
|
@@ -764,3 +765,13 @@ def get_rmm_device_memory_usage() -> Optional[int]:
|
|
|
764
765
|
if isinstance(mr, rmm.mr.StatisticsResourceAdaptor):
|
|
765
766
|
return mr.allocation_counts["current_bytes"]
|
|
766
767
|
return None
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
class CommaSeparatedChoice(click.Choice):
|
|
771
|
+
def convert(self, value, param, ctx):
|
|
772
|
+
values = [v.strip() for v in value.split(",")]
|
|
773
|
+
for v in values:
|
|
774
|
+
if v not in self.choices:
|
|
775
|
+
choices_str = ", ".join(f"'{c}'" for c in self.choices)
|
|
776
|
+
self.fail(f"invalid choice(s): {v}. (choices are: {choices_str})")
|
|
777
|
+
return values
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dask-cuda
|
|
3
|
-
Version: 24.
|
|
3
|
+
Version: 24.12.0
|
|
4
4
|
Summary: Utilities for Dask and CUDA interactions
|
|
5
5
|
Author: NVIDIA Corporation
|
|
6
6
|
License: Apache 2.0
|
|
@@ -12,18 +12,18 @@ Classifier: Topic :: Database
|
|
|
12
12
|
Classifier: Topic :: Scientific/Engineering
|
|
13
13
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
-
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
21
|
Requires-Dist: click>=8.1
|
|
22
22
|
Requires-Dist: numba>=0.57
|
|
23
|
-
Requires-Dist: numpy<
|
|
23
|
+
Requires-Dist: numpy<3.0a0,>=1.23
|
|
24
24
|
Requires-Dist: pandas>=1.3
|
|
25
|
-
Requires-Dist: pynvml<
|
|
26
|
-
Requires-Dist: rapids-dask-dependency==24.
|
|
25
|
+
Requires-Dist: pynvml<12.0.0a0,>=11.0.0
|
|
26
|
+
Requires-Dist: rapids-dask-dependency==24.12.*
|
|
27
27
|
Requires-Dist: zict>=2.0.0
|
|
28
28
|
Provides-Extra: docs
|
|
29
29
|
Requires-Dist: numpydoc>=1.1.0; extra == "docs"
|
|
@@ -31,12 +31,12 @@ Requires-Dist: sphinx; extra == "docs"
|
|
|
31
31
|
Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
|
|
32
32
|
Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
|
|
33
33
|
Provides-Extra: test
|
|
34
|
-
Requires-Dist: cudf==24.
|
|
35
|
-
Requires-Dist: dask-cudf==24.
|
|
36
|
-
Requires-Dist: kvikio==24.
|
|
34
|
+
Requires-Dist: cudf==24.12.*; extra == "test"
|
|
35
|
+
Requires-Dist: dask-cudf==24.12.*; extra == "test"
|
|
36
|
+
Requires-Dist: kvikio==24.12.*; extra == "test"
|
|
37
37
|
Requires-Dist: pytest; extra == "test"
|
|
38
38
|
Requires-Dist: pytest-cov; extra == "test"
|
|
39
|
-
Requires-Dist: ucx-py==0.
|
|
39
|
+
Requires-Dist: ucx-py==0.41.*; extra == "test"
|
|
40
40
|
|
|
41
41
|
Dask CUDA
|
|
42
42
|
=========
|
|
@@ -1,54 +1,55 @@
|
|
|
1
|
-
dask_cuda/VERSION,sha256=
|
|
2
|
-
dask_cuda/__init__.py,sha256=
|
|
1
|
+
dask_cuda/VERSION,sha256=NltZ4By82NzVjz00LGPhCXfkG4BB0JdUSXqlG8fiVuo,8
|
|
2
|
+
dask_cuda/__init__.py,sha256=eOCH3Wj0A8X0qbNUoNA15dgxb2O-ZApha4QHq5EEVFw,2748
|
|
3
3
|
dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
|
|
4
|
-
dask_cuda/cli.py,sha256=
|
|
5
|
-
dask_cuda/cuda_worker.py,sha256=
|
|
4
|
+
dask_cuda/cli.py,sha256=cScVyNiA_l9uXeDgkIcmbcR4l4cH1_1shqSqsVmuHPE,17053
|
|
5
|
+
dask_cuda/cuda_worker.py,sha256=rZ1ITG_ZCbuaMA9e8uSqCjU8Km4AMphGGrxpBPQG8xU,9477
|
|
6
6
|
dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
|
|
7
7
|
dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
|
|
8
8
|
dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
|
|
9
9
|
dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
|
|
10
10
|
dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
|
|
11
11
|
dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
|
|
12
|
-
dask_cuda/local_cuda_cluster.py,sha256=
|
|
13
|
-
dask_cuda/plugins.py,sha256=
|
|
12
|
+
dask_cuda/local_cuda_cluster.py,sha256=wqwKVRV6jT13sf9e-XsvbVBlTrnhmcbmHQBFPTFcayw,20335
|
|
13
|
+
dask_cuda/plugins.py,sha256=yGHEurbYhL4jucQrmsxLfOyE5c3bSJdfs6GVwvDAeEA,6770
|
|
14
14
|
dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
|
|
15
15
|
dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
|
|
16
16
|
dask_cuda/proxy_object.py,sha256=bZq92kjgFB-ad_luSAFT_RItV3nssmiEk4OOSp34laU,29812
|
|
17
|
-
dask_cuda/utils.py,sha256=
|
|
17
|
+
dask_cuda/utils.py,sha256=Goq-m78rYZ-bcJitg47N1h_PC4PDuzXG0CUVH7V8azU,25515
|
|
18
18
|
dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
|
|
19
19
|
dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
|
|
20
20
|
dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
dask_cuda/benchmarks/common.py,sha256=
|
|
22
|
-
dask_cuda/benchmarks/local_cudf_groupby.py,sha256=
|
|
23
|
-
dask_cuda/benchmarks/local_cudf_merge.py,sha256=
|
|
24
|
-
dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=
|
|
25
|
-
dask_cuda/benchmarks/local_cupy.py,sha256=
|
|
26
|
-
dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=
|
|
27
|
-
dask_cuda/benchmarks/
|
|
21
|
+
dask_cuda/benchmarks/common.py,sha256=YFhxBYkoxIV-2mddSbLwTbyg67U4zXDd2_fFq9oP3_A,6922
|
|
22
|
+
dask_cuda/benchmarks/local_cudf_groupby.py,sha256=zrDiF-yBAUxVt9mWOTH5hUm-pb-XnVX-G9gvCEX7_GI,8512
|
|
23
|
+
dask_cuda/benchmarks/local_cudf_merge.py,sha256=Q7lnZ87-O7j28hkS-i_5hMApTX8VsuI4ftZf2XAnp1E,12195
|
|
24
|
+
dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=8FjPFtiC-UqZcdPfocdMuzq_8TURAQWJlmhfcMWdo4w,8276
|
|
25
|
+
dask_cuda/benchmarks/local_cupy.py,sha256=RCxQJd88bn3vyMAJDPK3orUpxzvDZY957wOSYkfriq0,10323
|
|
26
|
+
dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=YAllGFuG6MePfPL8gdZ-Ld7a44-G0eEaHZJWB4vFPdY,6017
|
|
27
|
+
dask_cuda/benchmarks/read_parquet.py,sha256=TARcG-TS1NGcQWJmuAKtfmBmy5LAaLc3xgtKgAd1DaA,7650
|
|
28
|
+
dask_cuda/benchmarks/utils.py,sha256=_NSWS5e8SzZ6vxDcEFo97Y8gs_e23Qqd-c3r83BA6PU,30748
|
|
28
29
|
dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
30
|
dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
|
|
30
31
|
dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
32
|
dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=4xfhfbTGa36YPs_ex1_fFhzfGMYJq-QkS5q0RwgeHh8,20645
|
|
32
33
|
dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
|
|
33
|
-
dask_cuda/tests/test_dask_cuda_worker.py,sha256=
|
|
34
|
+
dask_cuda/tests/test_dask_cuda_worker.py,sha256=6rroHvJAn5R3X9LwIcE8QrPxG1GO3PaxXVjhbdQ90Pw,20477
|
|
34
35
|
dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
|
|
35
36
|
dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
|
|
36
37
|
dask_cuda/tests/test_explicit_comms.py,sha256=Pa5vVx63qWtScnVJuS31WESXIt2FPyTJVFO-0OUbbmU,15276
|
|
37
38
|
dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
|
|
38
|
-
dask_cuda/tests/test_gds.py,sha256=
|
|
39
|
+
dask_cuda/tests/test_gds.py,sha256=j1Huud6UGm1fbkyRLQEz_ysrVw__5AimwSn_M-2GEvs,1513
|
|
39
40
|
dask_cuda/tests/test_initialize.py,sha256=Rba59ZbljEm1yyN94_sWZPEE_f7hWln95aiBVc49pmY,6960
|
|
40
41
|
dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
|
|
41
|
-
dask_cuda/tests/test_proxify_host_file.py,sha256=
|
|
42
|
-
dask_cuda/tests/test_proxy.py,sha256=
|
|
43
|
-
dask_cuda/tests/test_spill.py,sha256=
|
|
42
|
+
dask_cuda/tests/test_proxify_host_file.py,sha256=LC3jjo_gbfhdIy1Zy_ynmgyv31HXFoBINCe1-XXZ4XU,18994
|
|
43
|
+
dask_cuda/tests/test_proxy.py,sha256=51qsXGJBg_hwSMRsC_QvJBz4wVM0Bf8fbFmTUFA7HJE,23809
|
|
44
|
+
dask_cuda/tests/test_spill.py,sha256=CYMbp5HDBYlZ7T_n8RfSOZxaWFcAQKjprjRM7Wupcdw,13419
|
|
44
45
|
dask_cuda/tests/test_utils.py,sha256=JRIwXfemc3lWSzLJX0VcvR1_0wB4yeoOTsw7kB6z6pU,9176
|
|
45
46
|
dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
|
|
46
47
|
dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
|
|
47
48
|
examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
|
|
48
49
|
examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
|
|
49
|
-
dask_cuda-24.
|
|
50
|
-
dask_cuda-24.
|
|
51
|
-
dask_cuda-24.
|
|
52
|
-
dask_cuda-24.
|
|
53
|
-
dask_cuda-24.
|
|
54
|
-
dask_cuda-24.
|
|
50
|
+
dask_cuda-24.12.0.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
|
|
51
|
+
dask_cuda-24.12.0.dist-info/METADATA,sha256=qFewjmkl67EsxFm9VoMTmw_XOOK3savtnO9hK-Qwx-E,2557
|
|
52
|
+
dask_cuda-24.12.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
53
|
+
dask_cuda-24.12.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
|
|
54
|
+
dask_cuda-24.12.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
|
|
55
|
+
dask_cuda-24.12.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|