dask-cuda 24.10.0__py3-none-any.whl → 24.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_cuda/VERSION +1 -1
- dask_cuda/benchmarks/common.py +15 -6
- dask_cuda/benchmarks/local_cudf_groupby.py +3 -10
- dask_cuda/benchmarks/local_cudf_merge.py +1 -7
- dask_cuda/benchmarks/local_cudf_shuffle.py +3 -10
- dask_cuda/benchmarks/local_cupy.py +4 -11
- dask_cuda/benchmarks/local_cupy_map_overlap.py +4 -11
- dask_cuda/benchmarks/utils.py +22 -1
- dask_cuda/cli.py +13 -1
- dask_cuda/cuda_worker.py +2 -0
- dask_cuda/local_cuda_cluster.py +22 -0
- dask_cuda/plugins.py +67 -0
- dask_cuda/utils.py +11 -0
- {dask_cuda-24.10.0.dist-info → dask_cuda-24.12.0.dist-info}/METADATA +7 -7
- {dask_cuda-24.10.0.dist-info → dask_cuda-24.12.0.dist-info}/RECORD +19 -19
- {dask_cuda-24.10.0.dist-info → dask_cuda-24.12.0.dist-info}/WHEEL +1 -1
- {dask_cuda-24.10.0.dist-info → dask_cuda-24.12.0.dist-info}/LICENSE +0 -0
- {dask_cuda-24.10.0.dist-info → dask_cuda-24.12.0.dist-info}/entry_points.txt +0 -0
- {dask_cuda-24.10.0.dist-info → dask_cuda-24.12.0.dist-info}/top_level.txt +0 -0
dask_cuda/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
24.
|
|
1
|
+
24.12.00
|
dask_cuda/benchmarks/common.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
from argparse import Namespace
|
|
2
3
|
from functools import partial
|
|
3
4
|
from typing import Any, Callable, List, Mapping, NamedTuple, Optional, Tuple
|
|
@@ -7,7 +8,7 @@ import numpy as np
|
|
|
7
8
|
import pandas as pd
|
|
8
9
|
|
|
9
10
|
import dask
|
|
10
|
-
from distributed import Client
|
|
11
|
+
from distributed import Client, performance_report
|
|
11
12
|
|
|
12
13
|
from dask_cuda.benchmarks.utils import (
|
|
13
14
|
address_to_index,
|
|
@@ -87,12 +88,20 @@ def run_benchmark(client: Client, args: Namespace, config: Config):
|
|
|
87
88
|
|
|
88
89
|
If ``args.profile`` is set, the final run is profiled.
|
|
89
90
|
"""
|
|
91
|
+
|
|
90
92
|
results = []
|
|
91
|
-
for _ in range(max(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
93
|
+
for _ in range(max(0, args.warmup_runs)):
|
|
94
|
+
config.bench_once(client, args, write_profile=None)
|
|
95
|
+
|
|
96
|
+
ctx = contextlib.nullcontext()
|
|
97
|
+
if args.profile is not None:
|
|
98
|
+
ctx = performance_report(filename=args.profile)
|
|
99
|
+
with ctx:
|
|
100
|
+
for _ in range(max(1, args.runs) - 1):
|
|
101
|
+
res = config.bench_once(client, args, write_profile=None)
|
|
102
|
+
results.append(res)
|
|
103
|
+
results.append(config.bench_once(client, args, write_profile=args.profile_last))
|
|
104
|
+
return results
|
|
96
105
|
|
|
97
106
|
|
|
98
107
|
def gather_bench_results(client: Client, args: Namespace, config: Config):
|
|
@@ -98,10 +98,9 @@ def bench_once(client, args, write_profile=None):
|
|
|
98
98
|
"False": False,
|
|
99
99
|
}.get(args.shuffle, args.shuffle)
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
ctx = performance_report(filename=args.profile)
|
|
101
|
+
ctx = contextlib.nullcontext()
|
|
102
|
+
if write_profile is not None:
|
|
103
|
+
ctx = performance_report(filename=write_profile)
|
|
105
104
|
|
|
106
105
|
with ctx:
|
|
107
106
|
t1 = clock()
|
|
@@ -260,12 +259,6 @@ def parse_args():
|
|
|
260
259
|
"type": str,
|
|
261
260
|
"help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
|
|
262
261
|
},
|
|
263
|
-
{
|
|
264
|
-
"name": "--runs",
|
|
265
|
-
"default": 3,
|
|
266
|
-
"type": int,
|
|
267
|
-
"help": "Number of runs",
|
|
268
|
-
},
|
|
269
262
|
]
|
|
270
263
|
|
|
271
264
|
return parse_benchmark_args(
|
|
@@ -190,7 +190,7 @@ def bench_once(client, args, write_profile=None):
|
|
|
190
190
|
if args.backend == "explicit-comms":
|
|
191
191
|
ctx1 = dask.config.set(explicit_comms=True)
|
|
192
192
|
if write_profile is not None:
|
|
193
|
-
ctx2 = performance_report(filename=
|
|
193
|
+
ctx2 = performance_report(filename=write_profile)
|
|
194
194
|
|
|
195
195
|
with ctx1:
|
|
196
196
|
with ctx2:
|
|
@@ -346,12 +346,6 @@ def parse_args():
|
|
|
346
346
|
"action": "store_true",
|
|
347
347
|
"help": "Don't shuffle the keys of the left (base) dataframe.",
|
|
348
348
|
},
|
|
349
|
-
{
|
|
350
|
-
"name": "--runs",
|
|
351
|
-
"default": 3,
|
|
352
|
-
"type": int,
|
|
353
|
-
"help": "Number of runs",
|
|
354
|
-
},
|
|
355
349
|
{
|
|
356
350
|
"name": [
|
|
357
351
|
"-s",
|
|
@@ -121,10 +121,9 @@ def create_data(
|
|
|
121
121
|
def bench_once(client, args, write_profile=None):
|
|
122
122
|
data_processed, df = create_data(client, args)
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
ctx = performance_report(filename=args.profile)
|
|
124
|
+
ctx = contextlib.nullcontext()
|
|
125
|
+
if write_profile is not None:
|
|
126
|
+
ctx = performance_report(filename=write_profile)
|
|
128
127
|
|
|
129
128
|
with ctx:
|
|
130
129
|
if args.backend in {"dask", "dask-noop"}:
|
|
@@ -228,12 +227,6 @@ def parse_args():
|
|
|
228
227
|
"type": str,
|
|
229
228
|
"help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
|
|
230
229
|
},
|
|
231
|
-
{
|
|
232
|
-
"name": "--runs",
|
|
233
|
-
"default": 3,
|
|
234
|
-
"type": int,
|
|
235
|
-
"help": "Number of runs",
|
|
236
|
-
},
|
|
237
230
|
{
|
|
238
231
|
"name": "--ignore-index",
|
|
239
232
|
"action": "store_true",
|
|
@@ -141,12 +141,11 @@ def bench_once(client, args, write_profile=None):
|
|
|
141
141
|
chunksize = x.chunksize
|
|
142
142
|
data_processed = sum(arg.nbytes for arg in func_args)
|
|
143
143
|
|
|
144
|
-
|
|
145
|
-
if
|
|
146
|
-
ctx = performance_report(filename=
|
|
147
|
-
else:
|
|
148
|
-
ctx = contextlib.nullcontext()
|
|
144
|
+
ctx = contextlib.nullcontext()
|
|
145
|
+
if write_profile is not None:
|
|
146
|
+
ctx = performance_report(filename=write_profile)
|
|
149
147
|
|
|
148
|
+
# Execute the operations to benchmark
|
|
150
149
|
with ctx:
|
|
151
150
|
rng = start_range(message=args.operation, color="purple")
|
|
152
151
|
result = func(*func_args)
|
|
@@ -297,12 +296,6 @@ def parse_args():
|
|
|
297
296
|
"type": int,
|
|
298
297
|
"help": "Chunk size (default 2500).",
|
|
299
298
|
},
|
|
300
|
-
{
|
|
301
|
-
"name": "--runs",
|
|
302
|
-
"default": 3,
|
|
303
|
-
"type": int,
|
|
304
|
-
"help": "Number of runs (default 3).",
|
|
305
|
-
},
|
|
306
299
|
{
|
|
307
300
|
"name": [
|
|
308
301
|
"-b",
|
|
@@ -42,12 +42,11 @@ def bench_once(client, args, write_profile=None):
|
|
|
42
42
|
|
|
43
43
|
data_processed = x.nbytes
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
if
|
|
47
|
-
ctx = performance_report(filename=
|
|
48
|
-
else:
|
|
49
|
-
ctx = contextlib.nullcontext()
|
|
45
|
+
ctx = contextlib.nullcontext()
|
|
46
|
+
if write_profile is not None:
|
|
47
|
+
ctx = performance_report(filename=write_profile)
|
|
50
48
|
|
|
49
|
+
# Execute the operations to benchmark
|
|
51
50
|
with ctx:
|
|
52
51
|
result = x.map_overlap(mean_filter, args.kernel_size, shape=ks)
|
|
53
52
|
if args.backend == "dask-noop":
|
|
@@ -168,12 +167,6 @@ def parse_args():
|
|
|
168
167
|
"type": int,
|
|
169
168
|
"help": "Kernel size, 2*k+1, in each dimension (default 1)",
|
|
170
169
|
},
|
|
171
|
-
{
|
|
172
|
-
"name": "--runs",
|
|
173
|
-
"default": 3,
|
|
174
|
-
"type": int,
|
|
175
|
-
"help": "Number of runs",
|
|
176
|
-
},
|
|
177
170
|
{
|
|
178
171
|
"name": [
|
|
179
172
|
"-b",
|
dask_cuda/benchmarks/utils.py
CHANGED
|
@@ -323,7 +323,16 @@ def parse_benchmark_args(
|
|
|
323
323
|
metavar="PATH",
|
|
324
324
|
default=None,
|
|
325
325
|
type=str,
|
|
326
|
-
help="Write dask profile report (E.g. dask-report.html)"
|
|
326
|
+
help="Write dask profile report (E.g. dask-report.html) on all "
|
|
327
|
+
"iterations (excluding warmup).",
|
|
328
|
+
)
|
|
329
|
+
parser.add_argument(
|
|
330
|
+
"--profile-last",
|
|
331
|
+
metavar="PATH",
|
|
332
|
+
default=None,
|
|
333
|
+
type=str,
|
|
334
|
+
help="Write dask profile report (E.g. dask-report.html) on last "
|
|
335
|
+
"iteration only.",
|
|
327
336
|
)
|
|
328
337
|
# See save_benchmark_data for more information
|
|
329
338
|
parser.add_argument(
|
|
@@ -344,6 +353,18 @@ def parse_benchmark_args(
|
|
|
344
353
|
type=parse_bytes,
|
|
345
354
|
help="Bandwidth statistics: ignore messages smaller than this (default '1 MB')",
|
|
346
355
|
)
|
|
356
|
+
parser.add_argument(
|
|
357
|
+
"--runs",
|
|
358
|
+
default=3,
|
|
359
|
+
type=int,
|
|
360
|
+
help="Number of runs",
|
|
361
|
+
)
|
|
362
|
+
parser.add_argument(
|
|
363
|
+
"--warmup-runs",
|
|
364
|
+
default=1,
|
|
365
|
+
type=int,
|
|
366
|
+
help="Number of warmup runs",
|
|
367
|
+
)
|
|
347
368
|
|
|
348
369
|
for args in args_list:
|
|
349
370
|
name = args.pop("name")
|
dask_cuda/cli.py
CHANGED
|
@@ -13,7 +13,7 @@ from distributed.security import Security
|
|
|
13
13
|
from distributed.utils import import_term
|
|
14
14
|
|
|
15
15
|
from .cuda_worker import CUDAWorker
|
|
16
|
-
from .utils import print_cluster_config
|
|
16
|
+
from .utils import CommaSeparatedChoice, print_cluster_config
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
@@ -164,6 +164,16 @@ def cuda():
|
|
|
164
164
|
incompatible with RMM pools and managed memory, trying to enable both will
|
|
165
165
|
result in failure.""",
|
|
166
166
|
)
|
|
167
|
+
@click.option(
|
|
168
|
+
"--set-rmm-allocator-for-libs",
|
|
169
|
+
"rmm_allocator_external_lib_list",
|
|
170
|
+
type=CommaSeparatedChoice(["cupy", "torch"]),
|
|
171
|
+
default=None,
|
|
172
|
+
show_default=True,
|
|
173
|
+
help="""
|
|
174
|
+
Set RMM as the allocator for external libraries. Provide a comma-separated
|
|
175
|
+
list of libraries to set, e.g., "torch,cupy".""",
|
|
176
|
+
)
|
|
167
177
|
@click.option(
|
|
168
178
|
"--rmm-release-threshold",
|
|
169
179
|
default=None,
|
|
@@ -351,6 +361,7 @@ def worker(
|
|
|
351
361
|
rmm_maximum_pool_size,
|
|
352
362
|
rmm_managed_memory,
|
|
353
363
|
rmm_async,
|
|
364
|
+
rmm_allocator_external_lib_list,
|
|
354
365
|
rmm_release_threshold,
|
|
355
366
|
rmm_log_directory,
|
|
356
367
|
rmm_track_allocations,
|
|
@@ -425,6 +436,7 @@ def worker(
|
|
|
425
436
|
rmm_maximum_pool_size,
|
|
426
437
|
rmm_managed_memory,
|
|
427
438
|
rmm_async,
|
|
439
|
+
rmm_allocator_external_lib_list,
|
|
428
440
|
rmm_release_threshold,
|
|
429
441
|
rmm_log_directory,
|
|
430
442
|
rmm_track_allocations,
|
dask_cuda/cuda_worker.py
CHANGED
|
@@ -47,6 +47,7 @@ class CUDAWorker(Server):
|
|
|
47
47
|
rmm_maximum_pool_size=None,
|
|
48
48
|
rmm_managed_memory=False,
|
|
49
49
|
rmm_async=False,
|
|
50
|
+
rmm_allocator_external_lib_list=None,
|
|
50
51
|
rmm_release_threshold=None,
|
|
51
52
|
rmm_log_directory=None,
|
|
52
53
|
rmm_track_allocations=False,
|
|
@@ -231,6 +232,7 @@ class CUDAWorker(Server):
|
|
|
231
232
|
release_threshold=rmm_release_threshold,
|
|
232
233
|
log_directory=rmm_log_directory,
|
|
233
234
|
track_allocations=rmm_track_allocations,
|
|
235
|
+
external_lib_list=rmm_allocator_external_lib_list,
|
|
234
236
|
),
|
|
235
237
|
PreImport(pre_import),
|
|
236
238
|
CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
|
dask_cuda/local_cuda_cluster.py
CHANGED
|
@@ -143,6 +143,11 @@ class LocalCUDACluster(LocalCluster):
|
|
|
143
143
|
The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
|
|
144
144
|
incompatible with RMM pools and managed memory. Trying to enable both will
|
|
145
145
|
result in an exception.
|
|
146
|
+
rmm_allocator_external_lib_list: str, list or None, default None
|
|
147
|
+
List of external libraries for which to set RMM as the allocator.
|
|
148
|
+
Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string
|
|
149
|
+
(like ``"torch,cupy"``) or a list of strings (like ``["torch", "cupy"]``).
|
|
150
|
+
If ``None``, no external libraries will use RMM as their allocator.
|
|
146
151
|
rmm_release_threshold: int, str or None, default None
|
|
147
152
|
When ``rmm.async is True`` and the pool size grows beyond this value, unused
|
|
148
153
|
memory held by the pool will be released at the next synchronization point.
|
|
@@ -231,6 +236,7 @@ class LocalCUDACluster(LocalCluster):
|
|
|
231
236
|
rmm_maximum_pool_size=None,
|
|
232
237
|
rmm_managed_memory=False,
|
|
233
238
|
rmm_async=False,
|
|
239
|
+
rmm_allocator_external_lib_list=None,
|
|
234
240
|
rmm_release_threshold=None,
|
|
235
241
|
rmm_log_directory=None,
|
|
236
242
|
rmm_track_allocations=False,
|
|
@@ -265,6 +271,19 @@ class LocalCUDACluster(LocalCluster):
|
|
|
265
271
|
n_workers = len(CUDA_VISIBLE_DEVICES)
|
|
266
272
|
if n_workers < 1:
|
|
267
273
|
raise ValueError("Number of workers cannot be less than 1.")
|
|
274
|
+
|
|
275
|
+
if rmm_allocator_external_lib_list is not None:
|
|
276
|
+
if isinstance(rmm_allocator_external_lib_list, str):
|
|
277
|
+
rmm_allocator_external_lib_list = [
|
|
278
|
+
v.strip() for v in rmm_allocator_external_lib_list.split(",")
|
|
279
|
+
]
|
|
280
|
+
elif not isinstance(rmm_allocator_external_lib_list, list):
|
|
281
|
+
raise ValueError(
|
|
282
|
+
"rmm_allocator_external_lib_list must be either a comma-separated "
|
|
283
|
+
"string or a list of strings. Examples: 'torch,cupy' "
|
|
284
|
+
"or ['torch', 'cupy']"
|
|
285
|
+
)
|
|
286
|
+
|
|
268
287
|
# Set nthreads=1 when parsing mem_limit since it only depends on n_workers
|
|
269
288
|
logger = logging.getLogger(__name__)
|
|
270
289
|
self.memory_limit = parse_memory_limit(
|
|
@@ -284,6 +303,8 @@ class LocalCUDACluster(LocalCluster):
|
|
|
284
303
|
self.rmm_managed_memory = rmm_managed_memory
|
|
285
304
|
self.rmm_async = rmm_async
|
|
286
305
|
self.rmm_release_threshold = rmm_release_threshold
|
|
306
|
+
self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list
|
|
307
|
+
|
|
287
308
|
if rmm_pool_size is not None or rmm_managed_memory or rmm_async:
|
|
288
309
|
try:
|
|
289
310
|
import rmm # noqa F401
|
|
@@ -437,6 +458,7 @@ class LocalCUDACluster(LocalCluster):
|
|
|
437
458
|
release_threshold=self.rmm_release_threshold,
|
|
438
459
|
log_directory=self.rmm_log_directory,
|
|
439
460
|
track_allocations=self.rmm_track_allocations,
|
|
461
|
+
external_lib_list=self.rmm_allocator_external_lib_list,
|
|
440
462
|
),
|
|
441
463
|
PreImport(self.pre_import),
|
|
442
464
|
CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
|
dask_cuda/plugins.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import importlib
|
|
2
2
|
import os
|
|
3
|
+
from typing import Callable, Dict
|
|
3
4
|
|
|
4
5
|
from distributed import WorkerPlugin
|
|
5
6
|
|
|
@@ -39,6 +40,7 @@ class RMMSetup(WorkerPlugin):
|
|
|
39
40
|
release_threshold,
|
|
40
41
|
log_directory,
|
|
41
42
|
track_allocations,
|
|
43
|
+
external_lib_list,
|
|
42
44
|
):
|
|
43
45
|
if initial_pool_size is None and maximum_pool_size is not None:
|
|
44
46
|
raise ValueError(
|
|
@@ -61,6 +63,7 @@ class RMMSetup(WorkerPlugin):
|
|
|
61
63
|
self.logging = log_directory is not None
|
|
62
64
|
self.log_directory = log_directory
|
|
63
65
|
self.rmm_track_allocations = track_allocations
|
|
66
|
+
self.external_lib_list = external_lib_list
|
|
64
67
|
|
|
65
68
|
def setup(self, worker=None):
|
|
66
69
|
if self.initial_pool_size is not None:
|
|
@@ -123,6 +126,70 @@ class RMMSetup(WorkerPlugin):
|
|
|
123
126
|
mr = rmm.mr.get_current_device_resource()
|
|
124
127
|
rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr))
|
|
125
128
|
|
|
129
|
+
if self.external_lib_list is not None:
|
|
130
|
+
for lib in self.external_lib_list:
|
|
131
|
+
enable_rmm_memory_for_library(lib)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def enable_rmm_memory_for_library(lib_name: str) -> None:
|
|
135
|
+
"""Enable RMM memory pool support for a specified third-party library.
|
|
136
|
+
|
|
137
|
+
This function allows the given library to utilize RMM's memory pool if it supports
|
|
138
|
+
integration with RMM. The library name is passed as a string argument, and if the
|
|
139
|
+
library is compatible, its memory allocator will be configured to use RMM.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
lib_name : str
|
|
144
|
+
The name of the third-party library to enable RMM memory pool support for.
|
|
145
|
+
Supported libraries are "cupy" and "torch".
|
|
146
|
+
|
|
147
|
+
Raises
|
|
148
|
+
------
|
|
149
|
+
ValueError
|
|
150
|
+
If the library name is not supported or does not have RMM integration.
|
|
151
|
+
ImportError
|
|
152
|
+
If the required library is not installed.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
# Mapping of supported libraries to their respective setup functions
|
|
156
|
+
setup_functions: Dict[str, Callable[[], None]] = {
|
|
157
|
+
"torch": _setup_rmm_for_torch,
|
|
158
|
+
"cupy": _setup_rmm_for_cupy,
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if lib_name not in setup_functions:
|
|
162
|
+
supported_libs = ", ".join(setup_functions.keys())
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"The library '{lib_name}' is not supported for RMM integration. "
|
|
165
|
+
f"Supported libraries are: {supported_libs}."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Call the setup function for the specified library
|
|
169
|
+
setup_functions[lib_name]()
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _setup_rmm_for_torch() -> None:
|
|
173
|
+
try:
|
|
174
|
+
import torch
|
|
175
|
+
except ImportError as e:
|
|
176
|
+
raise ImportError("PyTorch is not installed.") from e
|
|
177
|
+
|
|
178
|
+
from rmm.allocators.torch import rmm_torch_allocator
|
|
179
|
+
|
|
180
|
+
torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _setup_rmm_for_cupy() -> None:
|
|
184
|
+
try:
|
|
185
|
+
import cupy
|
|
186
|
+
except ImportError as e:
|
|
187
|
+
raise ImportError("CuPy is not installed.") from e
|
|
188
|
+
|
|
189
|
+
from rmm.allocators.cupy import rmm_cupy_allocator
|
|
190
|
+
|
|
191
|
+
cupy.cuda.set_allocator(rmm_cupy_allocator)
|
|
192
|
+
|
|
126
193
|
|
|
127
194
|
class PreImport(WorkerPlugin):
|
|
128
195
|
def __init__(self, libraries):
|
dask_cuda/utils.py
CHANGED
|
@@ -9,6 +9,7 @@ from functools import singledispatch
|
|
|
9
9
|
from multiprocessing import cpu_count
|
|
10
10
|
from typing import Optional
|
|
11
11
|
|
|
12
|
+
import click
|
|
12
13
|
import numpy as np
|
|
13
14
|
import pynvml
|
|
14
15
|
import toolz
|
|
@@ -764,3 +765,13 @@ def get_rmm_device_memory_usage() -> Optional[int]:
|
|
|
764
765
|
if isinstance(mr, rmm.mr.StatisticsResourceAdaptor):
|
|
765
766
|
return mr.allocation_counts["current_bytes"]
|
|
766
767
|
return None
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
class CommaSeparatedChoice(click.Choice):
|
|
771
|
+
def convert(self, value, param, ctx):
|
|
772
|
+
values = [v.strip() for v in value.split(",")]
|
|
773
|
+
for v in values:
|
|
774
|
+
if v not in self.choices:
|
|
775
|
+
choices_str = ", ".join(f"'{c}'" for c in self.choices)
|
|
776
|
+
self.fail(f"invalid choice(s): {v}. (choices are: {choices_str})")
|
|
777
|
+
return values
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dask-cuda
|
|
3
|
-
Version: 24.
|
|
3
|
+
Version: 24.12.0
|
|
4
4
|
Summary: Utilities for Dask and CUDA interactions
|
|
5
5
|
Author: NVIDIA Corporation
|
|
6
6
|
License: Apache 2.0
|
|
@@ -22,8 +22,8 @@ Requires-Dist: click>=8.1
|
|
|
22
22
|
Requires-Dist: numba>=0.57
|
|
23
23
|
Requires-Dist: numpy<3.0a0,>=1.23
|
|
24
24
|
Requires-Dist: pandas>=1.3
|
|
25
|
-
Requires-Dist: pynvml<
|
|
26
|
-
Requires-Dist: rapids-dask-dependency==24.
|
|
25
|
+
Requires-Dist: pynvml<12.0.0a0,>=11.0.0
|
|
26
|
+
Requires-Dist: rapids-dask-dependency==24.12.*
|
|
27
27
|
Requires-Dist: zict>=2.0.0
|
|
28
28
|
Provides-Extra: docs
|
|
29
29
|
Requires-Dist: numpydoc>=1.1.0; extra == "docs"
|
|
@@ -31,12 +31,12 @@ Requires-Dist: sphinx; extra == "docs"
|
|
|
31
31
|
Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
|
|
32
32
|
Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
|
|
33
33
|
Provides-Extra: test
|
|
34
|
-
Requires-Dist: cudf==24.
|
|
35
|
-
Requires-Dist: dask-cudf==24.
|
|
36
|
-
Requires-Dist: kvikio==24.
|
|
34
|
+
Requires-Dist: cudf==24.12.*; extra == "test"
|
|
35
|
+
Requires-Dist: dask-cudf==24.12.*; extra == "test"
|
|
36
|
+
Requires-Dist: kvikio==24.12.*; extra == "test"
|
|
37
37
|
Requires-Dist: pytest; extra == "test"
|
|
38
38
|
Requires-Dist: pytest-cov; extra == "test"
|
|
39
|
-
Requires-Dist: ucx-py==0.
|
|
39
|
+
Requires-Dist: ucx-py==0.41.*; extra == "test"
|
|
40
40
|
|
|
41
41
|
Dask CUDA
|
|
42
42
|
=========
|
|
@@ -1,31 +1,31 @@
|
|
|
1
|
-
dask_cuda/VERSION,sha256=
|
|
1
|
+
dask_cuda/VERSION,sha256=NltZ4By82NzVjz00LGPhCXfkG4BB0JdUSXqlG8fiVuo,8
|
|
2
2
|
dask_cuda/__init__.py,sha256=eOCH3Wj0A8X0qbNUoNA15dgxb2O-ZApha4QHq5EEVFw,2748
|
|
3
3
|
dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
|
|
4
|
-
dask_cuda/cli.py,sha256=
|
|
5
|
-
dask_cuda/cuda_worker.py,sha256=
|
|
4
|
+
dask_cuda/cli.py,sha256=cScVyNiA_l9uXeDgkIcmbcR4l4cH1_1shqSqsVmuHPE,17053
|
|
5
|
+
dask_cuda/cuda_worker.py,sha256=rZ1ITG_ZCbuaMA9e8uSqCjU8Km4AMphGGrxpBPQG8xU,9477
|
|
6
6
|
dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
|
|
7
7
|
dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
|
|
8
8
|
dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
|
|
9
9
|
dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
|
|
10
10
|
dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
|
|
11
11
|
dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
|
|
12
|
-
dask_cuda/local_cuda_cluster.py,sha256=
|
|
13
|
-
dask_cuda/plugins.py,sha256=
|
|
12
|
+
dask_cuda/local_cuda_cluster.py,sha256=wqwKVRV6jT13sf9e-XsvbVBlTrnhmcbmHQBFPTFcayw,20335
|
|
13
|
+
dask_cuda/plugins.py,sha256=yGHEurbYhL4jucQrmsxLfOyE5c3bSJdfs6GVwvDAeEA,6770
|
|
14
14
|
dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
|
|
15
15
|
dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
|
|
16
16
|
dask_cuda/proxy_object.py,sha256=bZq92kjgFB-ad_luSAFT_RItV3nssmiEk4OOSp34laU,29812
|
|
17
|
-
dask_cuda/utils.py,sha256=
|
|
17
|
+
dask_cuda/utils.py,sha256=Goq-m78rYZ-bcJitg47N1h_PC4PDuzXG0CUVH7V8azU,25515
|
|
18
18
|
dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
|
|
19
19
|
dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
|
|
20
20
|
dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
dask_cuda/benchmarks/common.py,sha256=
|
|
22
|
-
dask_cuda/benchmarks/local_cudf_groupby.py,sha256=
|
|
23
|
-
dask_cuda/benchmarks/local_cudf_merge.py,sha256=
|
|
24
|
-
dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=
|
|
25
|
-
dask_cuda/benchmarks/local_cupy.py,sha256=
|
|
26
|
-
dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=
|
|
21
|
+
dask_cuda/benchmarks/common.py,sha256=YFhxBYkoxIV-2mddSbLwTbyg67U4zXDd2_fFq9oP3_A,6922
|
|
22
|
+
dask_cuda/benchmarks/local_cudf_groupby.py,sha256=zrDiF-yBAUxVt9mWOTH5hUm-pb-XnVX-G9gvCEX7_GI,8512
|
|
23
|
+
dask_cuda/benchmarks/local_cudf_merge.py,sha256=Q7lnZ87-O7j28hkS-i_5hMApTX8VsuI4ftZf2XAnp1E,12195
|
|
24
|
+
dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=8FjPFtiC-UqZcdPfocdMuzq_8TURAQWJlmhfcMWdo4w,8276
|
|
25
|
+
dask_cuda/benchmarks/local_cupy.py,sha256=RCxQJd88bn3vyMAJDPK3orUpxzvDZY957wOSYkfriq0,10323
|
|
26
|
+
dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=YAllGFuG6MePfPL8gdZ-Ld7a44-G0eEaHZJWB4vFPdY,6017
|
|
27
27
|
dask_cuda/benchmarks/read_parquet.py,sha256=TARcG-TS1NGcQWJmuAKtfmBmy5LAaLc3xgtKgAd1DaA,7650
|
|
28
|
-
dask_cuda/benchmarks/utils.py,sha256=
|
|
28
|
+
dask_cuda/benchmarks/utils.py,sha256=_NSWS5e8SzZ6vxDcEFo97Y8gs_e23Qqd-c3r83BA6PU,30748
|
|
29
29
|
dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
30
|
dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
|
|
31
31
|
dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -47,9 +47,9 @@ dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDx
|
|
|
47
47
|
dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
|
|
48
48
|
examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
|
|
49
49
|
examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
|
|
50
|
-
dask_cuda-24.
|
|
51
|
-
dask_cuda-24.
|
|
52
|
-
dask_cuda-24.
|
|
53
|
-
dask_cuda-24.
|
|
54
|
-
dask_cuda-24.
|
|
55
|
-
dask_cuda-24.
|
|
50
|
+
dask_cuda-24.12.0.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
|
|
51
|
+
dask_cuda-24.12.0.dist-info/METADATA,sha256=qFewjmkl67EsxFm9VoMTmw_XOOK3savtnO9hK-Qwx-E,2557
|
|
52
|
+
dask_cuda-24.12.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
53
|
+
dask_cuda-24.12.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
|
|
54
|
+
dask_cuda-24.12.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
|
|
55
|
+
dask_cuda-24.12.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|