dask-cuda 24.6.0__py3-none-any.whl → 24.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_cuda/VERSION +1 -1
- dask_cuda/_version.py +12 -2
- dask_cuda/benchmarks/common.py +12 -10
- dask_cuda/benchmarks/utils.py +101 -30
- dask_cuda/cli.py +22 -0
- dask_cuda/cuda_worker.py +10 -1
- dask_cuda/explicit_comms/dataframe/shuffle.py +24 -20
- dask_cuda/local_cuda_cluster.py +24 -1
- dask_cuda/plugins.py +15 -0
- dask_cuda/tests/test_cudf_builtin_spilling.py +1 -1
- dask_cuda/tests/test_dask_cuda_worker.py +58 -0
- dask_cuda/tests/test_explicit_comms.py +38 -8
- dask_cuda/tests/test_local_cuda_cluster.py +48 -0
- dask_cuda/tests/test_version.py +12 -0
- {dask_cuda-24.6.0.dist-info → dask_cuda-24.8.2.dist-info}/METADATA +18 -18
- {dask_cuda-24.6.0.dist-info → dask_cuda-24.8.2.dist-info}/RECORD +20 -19
- {dask_cuda-24.6.0.dist-info → dask_cuda-24.8.2.dist-info}/WHEEL +1 -1
- {dask_cuda-24.6.0.dist-info → dask_cuda-24.8.2.dist-info}/LICENSE +0 -0
- {dask_cuda-24.6.0.dist-info → dask_cuda-24.8.2.dist-info}/entry_points.txt +0 -0
- {dask_cuda-24.6.0.dist-info → dask_cuda-24.8.2.dist-info}/top_level.txt +0 -0
dask_cuda/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
24.
|
|
1
|
+
24.08.02
|
dask_cuda/_version.py
CHANGED
|
@@ -15,6 +15,16 @@
|
|
|
15
15
|
import importlib.resources
|
|
16
16
|
|
|
17
17
|
__version__ = (
|
|
18
|
-
importlib.resources.files(
|
|
18
|
+
importlib.resources.files(__package__).joinpath("VERSION").read_text().strip()
|
|
19
19
|
)
|
|
20
|
-
|
|
20
|
+
try:
|
|
21
|
+
__git_commit__ = (
|
|
22
|
+
importlib.resources.files(__package__)
|
|
23
|
+
.joinpath("GIT_COMMIT")
|
|
24
|
+
.read_text()
|
|
25
|
+
.strip()
|
|
26
|
+
)
|
|
27
|
+
except FileNotFoundError:
|
|
28
|
+
__git_commit__ = ""
|
|
29
|
+
|
|
30
|
+
__all__ = ["__git_commit__", "__version__"]
|
dask_cuda/benchmarks/common.py
CHANGED
|
@@ -117,16 +117,18 @@ def run(client: Client, args: Namespace, config: Config):
|
|
|
117
117
|
wait_for_cluster(client, shutdown_on_failure=True)
|
|
118
118
|
assert len(client.scheduler_info()["workers"]) > 0
|
|
119
119
|
setup_memory_pools(
|
|
120
|
-
client,
|
|
121
|
-
args.type == "gpu",
|
|
122
|
-
args.
|
|
123
|
-
args.disable_rmm_pool,
|
|
124
|
-
args.
|
|
125
|
-
args.
|
|
126
|
-
args.
|
|
127
|
-
args.
|
|
128
|
-
args.
|
|
129
|
-
args.
|
|
120
|
+
client=client,
|
|
121
|
+
is_gpu=args.type == "gpu",
|
|
122
|
+
disable_rmm=args.disable_rmm,
|
|
123
|
+
disable_rmm_pool=args.disable_rmm_pool,
|
|
124
|
+
pool_size=args.rmm_pool_size,
|
|
125
|
+
maximum_pool_size=args.rmm_maximum_pool_size,
|
|
126
|
+
rmm_async=args.enable_rmm_async,
|
|
127
|
+
rmm_managed=args.enable_rmm_managed,
|
|
128
|
+
release_threshold=args.rmm_release_threshold,
|
|
129
|
+
log_directory=args.rmm_log_directory,
|
|
130
|
+
statistics=args.enable_rmm_statistics,
|
|
131
|
+
rmm_track_allocations=args.enable_rmm_track_allocations,
|
|
130
132
|
)
|
|
131
133
|
address_to_index, results, message_data = gather_bench_results(client, args, config)
|
|
132
134
|
p2p_bw = peer_to_peer_bandwidths(message_data, address_to_index)
|
dask_cuda/benchmarks/utils.py
CHANGED
|
@@ -17,6 +17,7 @@ from dask.utils import format_bytes, format_time, parse_bytes
|
|
|
17
17
|
from distributed.comm.addressing import get_address_host
|
|
18
18
|
|
|
19
19
|
from dask_cuda.local_cuda_cluster import LocalCUDACluster
|
|
20
|
+
from dask_cuda.utils import parse_device_memory_limit
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
def as_noop(dsk):
|
|
@@ -93,15 +94,41 @@ def parse_benchmark_args(
|
|
|
93
94
|
"'forkserver' can be used to avoid issues with fork not being allowed "
|
|
94
95
|
"after the networking stack has been initialised.",
|
|
95
96
|
)
|
|
97
|
+
cluster_args.add_argument(
|
|
98
|
+
"--disable-rmm",
|
|
99
|
+
action="store_true",
|
|
100
|
+
help="Disable RMM.",
|
|
101
|
+
)
|
|
102
|
+
cluster_args.add_argument(
|
|
103
|
+
"--disable-rmm-pool",
|
|
104
|
+
action="store_true",
|
|
105
|
+
help="Uses RMM for allocations but without a memory pool.",
|
|
106
|
+
)
|
|
96
107
|
cluster_args.add_argument(
|
|
97
108
|
"--rmm-pool-size",
|
|
98
109
|
default=None,
|
|
99
110
|
type=parse_bytes,
|
|
100
111
|
help="The size of the RMM memory pool. Can be an integer (bytes) or a string "
|
|
101
|
-
"(like '4GB' or '5000M'). By default, 1/2 of the total GPU memory is used."
|
|
112
|
+
"(like '4GB' or '5000M'). By default, 1/2 of the total GPU memory is used."
|
|
113
|
+
""
|
|
114
|
+
".. note::"
|
|
115
|
+
" This size is a per-worker configuration, and not cluster-wide.",
|
|
102
116
|
)
|
|
103
117
|
cluster_args.add_argument(
|
|
104
|
-
"--
|
|
118
|
+
"--rmm-maximum-pool-size",
|
|
119
|
+
default=None,
|
|
120
|
+
help="When ``--rmm-pool-size`` is specified, this argument indicates the "
|
|
121
|
+
"maximum pool size. Can be an integer (bytes), or a string (like '4GB' or "
|
|
122
|
+
"'5000M'). By default, the total available memory on the GPU is used. "
|
|
123
|
+
"``rmm_pool_size`` must be specified to use RMM pool and to set the maximum "
|
|
124
|
+
"pool size."
|
|
125
|
+
""
|
|
126
|
+
".. note::"
|
|
127
|
+
" When paired with `--enable-rmm-async` the maximum size cannot be "
|
|
128
|
+
" guaranteed due to fragmentation."
|
|
129
|
+
""
|
|
130
|
+
".. note::"
|
|
131
|
+
" This size is a per-worker configuration, and not cluster-wide.",
|
|
105
132
|
)
|
|
106
133
|
cluster_args.add_argument(
|
|
107
134
|
"--enable-rmm-managed",
|
|
@@ -407,10 +434,29 @@ def get_worker_device():
|
|
|
407
434
|
return -1
|
|
408
435
|
|
|
409
436
|
|
|
437
|
+
def setup_rmm_resources(statistics=False, rmm_track_allocations=False):
|
|
438
|
+
import cupy
|
|
439
|
+
|
|
440
|
+
import rmm
|
|
441
|
+
from rmm.allocators.cupy import rmm_cupy_allocator
|
|
442
|
+
|
|
443
|
+
cupy.cuda.set_allocator(rmm_cupy_allocator)
|
|
444
|
+
if statistics:
|
|
445
|
+
rmm.mr.set_current_device_resource(
|
|
446
|
+
rmm.mr.StatisticsResourceAdaptor(rmm.mr.get_current_device_resource())
|
|
447
|
+
)
|
|
448
|
+
if rmm_track_allocations:
|
|
449
|
+
rmm.mr.set_current_device_resource(
|
|
450
|
+
rmm.mr.TrackingResourceAdaptor(rmm.mr.get_current_device_resource())
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
|
|
410
454
|
def setup_memory_pool(
|
|
411
455
|
dask_worker=None,
|
|
456
|
+
disable_rmm=None,
|
|
457
|
+
disable_rmm_pool=None,
|
|
412
458
|
pool_size=None,
|
|
413
|
-
|
|
459
|
+
maximum_pool_size=None,
|
|
414
460
|
rmm_async=False,
|
|
415
461
|
rmm_managed=False,
|
|
416
462
|
release_threshold=None,
|
|
@@ -418,45 +464,66 @@ def setup_memory_pool(
|
|
|
418
464
|
statistics=False,
|
|
419
465
|
rmm_track_allocations=False,
|
|
420
466
|
):
|
|
421
|
-
import cupy
|
|
422
|
-
|
|
423
467
|
import rmm
|
|
424
|
-
from rmm.allocators.cupy import rmm_cupy_allocator
|
|
425
468
|
|
|
426
469
|
from dask_cuda.utils import get_rmm_log_file_name
|
|
427
470
|
|
|
428
471
|
logging = log_directory is not None
|
|
429
472
|
|
|
430
|
-
if
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
else:
|
|
437
|
-
rmm.reinitialize(
|
|
438
|
-
pool_allocator=not disable_pool,
|
|
439
|
-
managed_memory=rmm_managed,
|
|
440
|
-
initial_pool_size=pool_size,
|
|
441
|
-
logging=logging,
|
|
442
|
-
log_file_name=get_rmm_log_file_name(dask_worker, logging, log_directory),
|
|
443
|
-
)
|
|
444
|
-
cupy.cuda.set_allocator(rmm_cupy_allocator)
|
|
445
|
-
if statistics:
|
|
446
|
-
rmm.mr.set_current_device_resource(
|
|
447
|
-
rmm.mr.StatisticsResourceAdaptor(rmm.mr.get_current_device_resource())
|
|
473
|
+
if pool_size is not None:
|
|
474
|
+
pool_size = parse_device_memory_limit(pool_size, alignment_size=256)
|
|
475
|
+
|
|
476
|
+
if maximum_pool_size is not None:
|
|
477
|
+
maximum_pool_size = parse_device_memory_limit(
|
|
478
|
+
maximum_pool_size, alignment_size=256
|
|
448
479
|
)
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
480
|
+
|
|
481
|
+
if release_threshold is not None:
|
|
482
|
+
release_threshold = parse_device_memory_limit(
|
|
483
|
+
release_threshold, alignment_size=256
|
|
452
484
|
)
|
|
453
485
|
|
|
486
|
+
if not disable_rmm:
|
|
487
|
+
if rmm_async:
|
|
488
|
+
mr = rmm.mr.CudaAsyncMemoryResource(
|
|
489
|
+
initial_pool_size=pool_size,
|
|
490
|
+
release_threshold=release_threshold,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
if maximum_pool_size is not None:
|
|
494
|
+
mr = rmm.mr.LimitingResourceAdaptor(
|
|
495
|
+
mr, allocation_limit=maximum_pool_size
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
rmm.mr.set_current_device_resource(mr)
|
|
499
|
+
|
|
500
|
+
setup_rmm_resources(
|
|
501
|
+
statistics=statistics, rmm_track_allocations=rmm_track_allocations
|
|
502
|
+
)
|
|
503
|
+
else:
|
|
504
|
+
rmm.reinitialize(
|
|
505
|
+
pool_allocator=not disable_rmm_pool,
|
|
506
|
+
managed_memory=rmm_managed,
|
|
507
|
+
initial_pool_size=pool_size,
|
|
508
|
+
maximum_pool_size=maximum_pool_size,
|
|
509
|
+
logging=logging,
|
|
510
|
+
log_file_name=get_rmm_log_file_name(
|
|
511
|
+
dask_worker, logging, log_directory
|
|
512
|
+
),
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
setup_rmm_resources(
|
|
516
|
+
statistics=statistics, rmm_track_allocations=rmm_track_allocations
|
|
517
|
+
)
|
|
518
|
+
|
|
454
519
|
|
|
455
520
|
def setup_memory_pools(
|
|
456
521
|
client,
|
|
457
522
|
is_gpu,
|
|
523
|
+
disable_rmm,
|
|
524
|
+
disable_rmm_pool,
|
|
458
525
|
pool_size,
|
|
459
|
-
|
|
526
|
+
maximum_pool_size,
|
|
460
527
|
rmm_async,
|
|
461
528
|
rmm_managed,
|
|
462
529
|
release_threshold,
|
|
@@ -468,8 +535,10 @@ def setup_memory_pools(
|
|
|
468
535
|
return
|
|
469
536
|
client.run(
|
|
470
537
|
setup_memory_pool,
|
|
538
|
+
disable_rmm=disable_rmm,
|
|
539
|
+
disable_rmm_pool=disable_rmm_pool,
|
|
471
540
|
pool_size=pool_size,
|
|
472
|
-
|
|
541
|
+
maximum_pool_size=maximum_pool_size,
|
|
473
542
|
rmm_async=rmm_async,
|
|
474
543
|
rmm_managed=rmm_managed,
|
|
475
544
|
release_threshold=release_threshold,
|
|
@@ -482,7 +551,9 @@ def setup_memory_pools(
|
|
|
482
551
|
client.run_on_scheduler(
|
|
483
552
|
setup_memory_pool,
|
|
484
553
|
pool_size=1e9,
|
|
485
|
-
|
|
554
|
+
disable_rmm=disable_rmm,
|
|
555
|
+
disable_rmm_pool=disable_rmm_pool,
|
|
556
|
+
maximum_pool_size=maximum_pool_size,
|
|
486
557
|
rmm_async=rmm_async,
|
|
487
558
|
rmm_managed=rmm_managed,
|
|
488
559
|
release_threshold=release_threshold,
|
dask_cuda/cli.py
CHANGED
|
@@ -101,6 +101,20 @@ def cuda():
|
|
|
101
101
|
total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"`` or 0 to
|
|
102
102
|
disable spilling to host (i.e. allow full device memory usage).""",
|
|
103
103
|
)
|
|
104
|
+
@click.option(
|
|
105
|
+
"--enable-cudf-spill/--disable-cudf-spill",
|
|
106
|
+
default=False,
|
|
107
|
+
show_default=True,
|
|
108
|
+
help="""Enable automatic cuDF spilling. WARNING: This should NOT be used with
|
|
109
|
+
JIT-Unspill.""",
|
|
110
|
+
)
|
|
111
|
+
@click.option(
|
|
112
|
+
"--cudf-spill-stats",
|
|
113
|
+
type=int,
|
|
114
|
+
default=0,
|
|
115
|
+
help="""Set the cuDF spilling statistics level. This option has no effect if
|
|
116
|
+
`--enable-cudf-spill` is not specified.""",
|
|
117
|
+
)
|
|
104
118
|
@click.option(
|
|
105
119
|
"--rmm-pool-size",
|
|
106
120
|
default=None,
|
|
@@ -120,6 +134,10 @@ def cuda():
|
|
|
120
134
|
memory on the GPU is used. ``rmm_pool_size`` must be specified to use RMM pool and
|
|
121
135
|
to set the maximum pool size.
|
|
122
136
|
|
|
137
|
+
.. note::
|
|
138
|
+
When paired with `--enable-rmm-async` the maximum size cannot be guaranteed due
|
|
139
|
+
to fragmentation.
|
|
140
|
+
|
|
123
141
|
.. note::
|
|
124
142
|
This size is a per-worker configuration, and not cluster-wide.""",
|
|
125
143
|
)
|
|
@@ -326,6 +344,8 @@ def worker(
|
|
|
326
344
|
name,
|
|
327
345
|
memory_limit,
|
|
328
346
|
device_memory_limit,
|
|
347
|
+
enable_cudf_spill,
|
|
348
|
+
cudf_spill_stats,
|
|
329
349
|
rmm_pool_size,
|
|
330
350
|
rmm_maximum_pool_size,
|
|
331
351
|
rmm_managed_memory,
|
|
@@ -398,6 +418,8 @@ def worker(
|
|
|
398
418
|
name,
|
|
399
419
|
memory_limit,
|
|
400
420
|
device_memory_limit,
|
|
421
|
+
enable_cudf_spill,
|
|
422
|
+
cudf_spill_stats,
|
|
401
423
|
rmm_pool_size,
|
|
402
424
|
rmm_maximum_pool_size,
|
|
403
425
|
rmm_managed_memory,
|
dask_cuda/cuda_worker.py
CHANGED
|
@@ -20,7 +20,7 @@ from distributed.worker_memory import parse_memory_limit
|
|
|
20
20
|
|
|
21
21
|
from .device_host_file import DeviceHostFile
|
|
22
22
|
from .initialize import initialize
|
|
23
|
-
from .plugins import CPUAffinity, PreImport, RMMSetup
|
|
23
|
+
from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup
|
|
24
24
|
from .proxify_host_file import ProxifyHostFile
|
|
25
25
|
from .utils import (
|
|
26
26
|
cuda_visible_devices,
|
|
@@ -41,6 +41,8 @@ class CUDAWorker(Server):
|
|
|
41
41
|
name=None,
|
|
42
42
|
memory_limit="auto",
|
|
43
43
|
device_memory_limit="auto",
|
|
44
|
+
enable_cudf_spill=False,
|
|
45
|
+
cudf_spill_stats=0,
|
|
44
46
|
rmm_pool_size=None,
|
|
45
47
|
rmm_maximum_pool_size=None,
|
|
46
48
|
rmm_managed_memory=False,
|
|
@@ -166,6 +168,12 @@ class CUDAWorker(Server):
|
|
|
166
168
|
if device_memory_limit is None and memory_limit is None:
|
|
167
169
|
data = lambda _: {}
|
|
168
170
|
elif jit_unspill:
|
|
171
|
+
if enable_cudf_spill:
|
|
172
|
+
warnings.warn(
|
|
173
|
+
"Enabling cuDF spilling and JIT-Unspill together is not "
|
|
174
|
+
"safe, consider disabling JIT-Unspill."
|
|
175
|
+
)
|
|
176
|
+
|
|
169
177
|
data = lambda i: (
|
|
170
178
|
ProxifyHostFile,
|
|
171
179
|
{
|
|
@@ -217,6 +225,7 @@ class CUDAWorker(Server):
|
|
|
217
225
|
track_allocations=rmm_track_allocations,
|
|
218
226
|
),
|
|
219
227
|
PreImport(pre_import),
|
|
228
|
+
CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
|
|
220
229
|
},
|
|
221
230
|
name=name if nprocs == 1 or name is None else str(name) + "-" + str(i),
|
|
222
231
|
local_directory=local_directory,
|
|
@@ -8,6 +8,9 @@ from math import ceil
|
|
|
8
8
|
from operator import getitem
|
|
9
9
|
from typing import Any, Callable, Dict, List, Optional, Set, TypeVar
|
|
10
10
|
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
11
14
|
import dask
|
|
12
15
|
import dask.config
|
|
13
16
|
import dask.dataframe
|
|
@@ -155,9 +158,16 @@ def compute_map_index(
|
|
|
155
158
|
if column_names[0] == "_partitions":
|
|
156
159
|
ind = df[column_names[0]]
|
|
157
160
|
else:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
+
# Need to cast numerical dtypes to be consistent
|
|
162
|
+
# with `dask.dataframe.shuffle.partitioning_index`
|
|
163
|
+
dtypes = {}
|
|
164
|
+
index = df[column_names] if column_names else df
|
|
165
|
+
for col, dtype in index.dtypes.items():
|
|
166
|
+
if pd.api.types.is_numeric_dtype(dtype):
|
|
167
|
+
dtypes[col] = np.float64
|
|
168
|
+
if dtypes:
|
|
169
|
+
index = index.astype(dtypes, errors="ignore")
|
|
170
|
+
ind = hash_object_dispatch(index, index=False)
|
|
161
171
|
return ind % npartitions
|
|
162
172
|
|
|
163
173
|
|
|
@@ -187,15 +197,8 @@ def partition_dataframe(
|
|
|
187
197
|
partitions
|
|
188
198
|
Dict of dataframe-partitions, mapping partition-ID to dataframe
|
|
189
199
|
"""
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
zip(
|
|
193
|
-
range(npartitions),
|
|
194
|
-
df.partition_by_hash(
|
|
195
|
-
column_names, npartitions, keep_index=not ignore_index
|
|
196
|
-
),
|
|
197
|
-
)
|
|
198
|
-
)
|
|
200
|
+
# TODO: Use `partition_by_hash` if/when dtype-casting is added
|
|
201
|
+
# (See: https://github.com/rapidsai/cudf/issues/16221)
|
|
199
202
|
map_index = compute_map_index(df, column_names, npartitions)
|
|
200
203
|
return group_split_dispatch(df, map_index, npartitions, ignore_index=ignore_index)
|
|
201
204
|
|
|
@@ -529,18 +532,19 @@ def shuffle(
|
|
|
529
532
|
# TODO: can we do this without using `submit()` to avoid the overhead
|
|
530
533
|
# of creating a Future for each dataframe partition?
|
|
531
534
|
|
|
532
|
-
|
|
535
|
+
_futures = {}
|
|
533
536
|
for rank in ranks:
|
|
534
537
|
for part_id in rank_to_out_part_ids[rank]:
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
workers=[c.worker_addresses[rank]],
|
|
541
|
-
)
|
|
538
|
+
_futures[part_id] = c.client.submit(
|
|
539
|
+
getitem,
|
|
540
|
+
shuffle_result[rank],
|
|
541
|
+
part_id,
|
|
542
|
+
workers=[c.worker_addresses[rank]],
|
|
542
543
|
)
|
|
543
544
|
|
|
545
|
+
# Make sure partitions are properly ordered
|
|
546
|
+
futures = [_futures.pop(i) for i in range(npartitions)]
|
|
547
|
+
|
|
544
548
|
# Create a distributed Dataframe from all the pieces
|
|
545
549
|
divs = [None] * (len(futures) + 1)
|
|
546
550
|
kwargs = {"meta": df_meta, "divisions": divs, "prefix": "explicit-comms-shuffle"}
|
dask_cuda/local_cuda_cluster.py
CHANGED
|
@@ -10,7 +10,7 @@ from distributed.worker_memory import parse_memory_limit
|
|
|
10
10
|
|
|
11
11
|
from .device_host_file import DeviceHostFile
|
|
12
12
|
from .initialize import initialize
|
|
13
|
-
from .plugins import CPUAffinity, PreImport, RMMSetup
|
|
13
|
+
from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup
|
|
14
14
|
from .proxify_host_file import ProxifyHostFile
|
|
15
15
|
from .utils import (
|
|
16
16
|
cuda_visible_devices,
|
|
@@ -73,6 +73,14 @@ class LocalCUDACluster(LocalCluster):
|
|
|
73
73
|
starts spilling to host memory. Can be an integer (bytes), float (fraction of
|
|
74
74
|
total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"``, 0,
|
|
75
75
|
or ``None`` to disable spilling to host (i.e. allow full device memory usage).
|
|
76
|
+
enable_cudf_spill : bool, default False
|
|
77
|
+
Enable automatic cuDF spilling.
|
|
78
|
+
|
|
79
|
+
.. warning::
|
|
80
|
+
This should NOT be used together with JIT-Unspill.
|
|
81
|
+
cudf_spill_stats : int, default 0
|
|
82
|
+
Set the cuDF spilling statistics level. This option has no effect if
|
|
83
|
+
``enable_cudf_spill=False``.
|
|
76
84
|
local_directory : str or None, default None
|
|
77
85
|
Path on local machine to store temporary files. Can be a string (like
|
|
78
86
|
``"path/to/files"``) or ``None`` to fall back on the value of
|
|
@@ -114,6 +122,10 @@ class LocalCUDACluster(LocalCluster):
|
|
|
114
122
|
memory on the GPU is used. ``rmm_pool_size`` must be specified to use RMM pool
|
|
115
123
|
and to set the maximum pool size.
|
|
116
124
|
|
|
125
|
+
.. note::
|
|
126
|
+
When paired with `--enable-rmm-async` the maximum size cannot be guaranteed
|
|
127
|
+
due to fragmentation.
|
|
128
|
+
|
|
117
129
|
.. note::
|
|
118
130
|
This size is a per-worker configuration, and not cluster-wide.
|
|
119
131
|
rmm_managed_memory : bool, default False
|
|
@@ -205,6 +217,8 @@ class LocalCUDACluster(LocalCluster):
|
|
|
205
217
|
threads_per_worker=1,
|
|
206
218
|
memory_limit="auto",
|
|
207
219
|
device_memory_limit=0.8,
|
|
220
|
+
enable_cudf_spill=False,
|
|
221
|
+
cudf_spill_stats=0,
|
|
208
222
|
data=None,
|
|
209
223
|
local_directory=None,
|
|
210
224
|
shared_filesystem=None,
|
|
@@ -255,6 +269,8 @@ class LocalCUDACluster(LocalCluster):
|
|
|
255
269
|
self.device_memory_limit = parse_device_memory_limit(
|
|
256
270
|
device_memory_limit, device_index=nvml_device_index(0, CUDA_VISIBLE_DEVICES)
|
|
257
271
|
)
|
|
272
|
+
self.enable_cudf_spill = enable_cudf_spill
|
|
273
|
+
self.cudf_spill_stats = cudf_spill_stats
|
|
258
274
|
|
|
259
275
|
self.rmm_pool_size = rmm_pool_size
|
|
260
276
|
self.rmm_maximum_pool_size = rmm_maximum_pool_size
|
|
@@ -298,6 +314,12 @@ class LocalCUDACluster(LocalCluster):
|
|
|
298
314
|
if device_memory_limit is None and memory_limit is None:
|
|
299
315
|
data = {}
|
|
300
316
|
elif jit_unspill:
|
|
317
|
+
if enable_cudf_spill:
|
|
318
|
+
warnings.warn(
|
|
319
|
+
"Enabling cuDF spilling and JIT-Unspill together is not "
|
|
320
|
+
"safe, consider disabling JIT-Unspill."
|
|
321
|
+
)
|
|
322
|
+
|
|
301
323
|
data = (
|
|
302
324
|
ProxifyHostFile,
|
|
303
325
|
{
|
|
@@ -410,6 +432,7 @@ class LocalCUDACluster(LocalCluster):
|
|
|
410
432
|
track_allocations=self.rmm_track_allocations,
|
|
411
433
|
),
|
|
412
434
|
PreImport(self.pre_import),
|
|
435
|
+
CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
|
|
413
436
|
},
|
|
414
437
|
}
|
|
415
438
|
)
|
dask_cuda/plugins.py
CHANGED
|
@@ -14,6 +14,21 @@ class CPUAffinity(WorkerPlugin):
|
|
|
14
14
|
os.sched_setaffinity(0, self.cores)
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
class CUDFSetup(WorkerPlugin):
|
|
18
|
+
def __init__(self, spill, spill_stats):
|
|
19
|
+
self.spill = spill
|
|
20
|
+
self.spill_stats = spill_stats
|
|
21
|
+
|
|
22
|
+
def setup(self, worker=None):
|
|
23
|
+
try:
|
|
24
|
+
import cudf
|
|
25
|
+
|
|
26
|
+
cudf.set_option("spill", self.spill)
|
|
27
|
+
cudf.set_option("spill_stats", self.spill_stats)
|
|
28
|
+
except ImportError:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
17
32
|
class RMMSetup(WorkerPlugin):
|
|
18
33
|
def __init__(
|
|
19
34
|
self,
|
|
@@ -20,7 +20,7 @@ from cudf.core.buffer.spill_manager import ( # noqa: E402
|
|
|
20
20
|
get_global_manager,
|
|
21
21
|
set_global_manager,
|
|
22
22
|
)
|
|
23
|
-
from cudf.testing
|
|
23
|
+
from cudf.testing import assert_eq # noqa: E402
|
|
24
24
|
|
|
25
25
|
if get_global_manager() is not None:
|
|
26
26
|
pytest.skip(
|
|
@@ -231,6 +231,64 @@ def test_rmm_logging(loop): # noqa: F811
|
|
|
231
231
|
assert v is rmm.mr.LoggingResourceAdaptor
|
|
232
232
|
|
|
233
233
|
|
|
234
|
+
def test_cudf_spill_disabled(loop): # noqa: F811
|
|
235
|
+
cudf = pytest.importorskip("cudf")
|
|
236
|
+
with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
|
|
237
|
+
with popen(
|
|
238
|
+
[
|
|
239
|
+
"dask",
|
|
240
|
+
"cuda",
|
|
241
|
+
"worker",
|
|
242
|
+
"127.0.0.1:9369",
|
|
243
|
+
"--host",
|
|
244
|
+
"127.0.0.1",
|
|
245
|
+
"--no-dashboard",
|
|
246
|
+
]
|
|
247
|
+
):
|
|
248
|
+
with Client("127.0.0.1:9369", loop=loop) as client:
|
|
249
|
+
assert wait_workers(client, n_gpus=get_n_gpus())
|
|
250
|
+
|
|
251
|
+
cudf_spill = client.run(
|
|
252
|
+
cudf.get_option,
|
|
253
|
+
"spill",
|
|
254
|
+
)
|
|
255
|
+
for v in cudf_spill.values():
|
|
256
|
+
assert v is False
|
|
257
|
+
|
|
258
|
+
cudf_spill_stats = client.run(cudf.get_option, "spill_stats")
|
|
259
|
+
for v in cudf_spill_stats.values():
|
|
260
|
+
assert v == 0
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def test_cudf_spill(loop): # noqa: F811
|
|
264
|
+
cudf = pytest.importorskip("cudf")
|
|
265
|
+
with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
|
|
266
|
+
with popen(
|
|
267
|
+
[
|
|
268
|
+
"dask",
|
|
269
|
+
"cuda",
|
|
270
|
+
"worker",
|
|
271
|
+
"127.0.0.1:9369",
|
|
272
|
+
"--host",
|
|
273
|
+
"127.0.0.1",
|
|
274
|
+
"--no-dashboard",
|
|
275
|
+
"--enable-cudf-spill",
|
|
276
|
+
"--cudf-spill-stats",
|
|
277
|
+
"2",
|
|
278
|
+
]
|
|
279
|
+
):
|
|
280
|
+
with Client("127.0.0.1:9369", loop=loop) as client:
|
|
281
|
+
assert wait_workers(client, n_gpus=get_n_gpus())
|
|
282
|
+
|
|
283
|
+
cudf_spill = client.run(cudf.get_option, "spill")
|
|
284
|
+
for v in cudf_spill.values():
|
|
285
|
+
assert v is True
|
|
286
|
+
|
|
287
|
+
cudf_spill_stats = client.run(cudf.get_option, "spill_stats")
|
|
288
|
+
for v in cudf_spill_stats.values():
|
|
289
|
+
assert v == 2
|
|
290
|
+
|
|
291
|
+
|
|
234
292
|
@patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
|
|
235
293
|
def test_dashboard_address(loop): # noqa: F811
|
|
236
294
|
with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
|
|
@@ -109,7 +109,14 @@ def test_dataframe_merge_empty_partitions():
|
|
|
109
109
|
|
|
110
110
|
def check_partitions(df, npartitions):
|
|
111
111
|
"""Check that all values in `df` hashes to the same"""
|
|
112
|
-
|
|
112
|
+
dtypes = {}
|
|
113
|
+
for col, dtype in df.dtypes.items():
|
|
114
|
+
if pd.api.types.is_numeric_dtype(dtype):
|
|
115
|
+
dtypes[col] = np.float64
|
|
116
|
+
if not dtypes:
|
|
117
|
+
dtypes = None
|
|
118
|
+
|
|
119
|
+
hashes = partitioning_index(df, npartitions, cast_dtype=dtypes)
|
|
113
120
|
if len(hashes) > 0:
|
|
114
121
|
return len(hashes.unique()) == 1
|
|
115
122
|
else:
|
|
@@ -128,11 +135,10 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions):
|
|
|
128
135
|
worker_class=IncreasedCloseTimeoutNanny,
|
|
129
136
|
processes=True,
|
|
130
137
|
) as cluster:
|
|
131
|
-
with Client(cluster)
|
|
132
|
-
all_workers = list(client.get_worker_logs().keys())
|
|
138
|
+
with Client(cluster):
|
|
133
139
|
comms.default_comms()
|
|
134
140
|
np.random.seed(42)
|
|
135
|
-
df = pd.DataFrame({"key": np.random.
|
|
141
|
+
df = pd.DataFrame({"key": np.random.randint(0, high=100, size=100)})
|
|
136
142
|
if backend == "cudf":
|
|
137
143
|
df = cudf.DataFrame.from_pandas(df)
|
|
138
144
|
|
|
@@ -141,15 +147,13 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions):
|
|
|
141
147
|
|
|
142
148
|
for input_nparts in range(1, 5):
|
|
143
149
|
for output_nparts in range(1, 5):
|
|
144
|
-
|
|
145
|
-
workers=all_workers
|
|
146
|
-
)
|
|
150
|
+
ddf1 = dd.from_pandas(df.copy(), npartitions=input_nparts)
|
|
147
151
|
# To reduce test runtime, we change the batchsizes here instead
|
|
148
152
|
# of using a test parameter.
|
|
149
153
|
for batchsize in (-1, 1, 2):
|
|
150
154
|
with dask.config.set(explicit_comms_batchsize=batchsize):
|
|
151
155
|
ddf = explicit_comms_shuffle(
|
|
152
|
-
|
|
156
|
+
ddf1,
|
|
153
157
|
["_partitions"] if _partitions else ["key"],
|
|
154
158
|
npartitions=output_nparts,
|
|
155
159
|
batchsize=batchsize,
|
|
@@ -177,6 +181,32 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions):
|
|
|
177
181
|
got = ddf.compute().sort_values("key")
|
|
178
182
|
assert_eq(got, expected)
|
|
179
183
|
|
|
184
|
+
# Check that partitioning is consistent with "tasks"
|
|
185
|
+
ddf_tasks = ddf1.shuffle(
|
|
186
|
+
["key"],
|
|
187
|
+
npartitions=output_nparts,
|
|
188
|
+
shuffle_method="tasks",
|
|
189
|
+
)
|
|
190
|
+
for i in range(output_nparts):
|
|
191
|
+
expected_partition = ddf_tasks.partitions[
|
|
192
|
+
i
|
|
193
|
+
].compute()["key"]
|
|
194
|
+
actual_partition = ddf.partitions[i].compute()[
|
|
195
|
+
"key"
|
|
196
|
+
]
|
|
197
|
+
if backend == "cudf":
|
|
198
|
+
expected_partition = (
|
|
199
|
+
expected_partition.values_host
|
|
200
|
+
)
|
|
201
|
+
actual_partition = actual_partition.values_host
|
|
202
|
+
else:
|
|
203
|
+
expected_partition = expected_partition.values
|
|
204
|
+
actual_partition = actual_partition.values
|
|
205
|
+
assert all(
|
|
206
|
+
np.sort(expected_partition)
|
|
207
|
+
== np.sort(actual_partition)
|
|
208
|
+
)
|
|
209
|
+
|
|
180
210
|
|
|
181
211
|
@pytest.mark.parametrize("nworkers", [1, 2, 3])
|
|
182
212
|
@pytest.mark.parametrize("backend", ["pandas", "cudf"])
|
|
@@ -500,6 +500,54 @@ async def test_worker_fraction_limits():
|
|
|
500
500
|
)
|
|
501
501
|
|
|
502
502
|
|
|
503
|
+
@gen_test(timeout=20)
|
|
504
|
+
async def test_cudf_spill_disabled():
|
|
505
|
+
cudf = pytest.importorskip("cudf")
|
|
506
|
+
|
|
507
|
+
async with LocalCUDACluster(
|
|
508
|
+
asynchronous=True,
|
|
509
|
+
) as cluster:
|
|
510
|
+
async with Client(cluster, asynchronous=True) as client:
|
|
511
|
+
cudf_spill = await client.run(
|
|
512
|
+
cudf.get_option,
|
|
513
|
+
"spill",
|
|
514
|
+
)
|
|
515
|
+
for v in cudf_spill.values():
|
|
516
|
+
assert v is False
|
|
517
|
+
|
|
518
|
+
cudf_spill_stats = await client.run(
|
|
519
|
+
cudf.get_option,
|
|
520
|
+
"spill_stats",
|
|
521
|
+
)
|
|
522
|
+
for v in cudf_spill_stats.values():
|
|
523
|
+
assert v == 0
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
@gen_test(timeout=20)
|
|
527
|
+
async def test_cudf_spill():
|
|
528
|
+
cudf = pytest.importorskip("cudf")
|
|
529
|
+
|
|
530
|
+
async with LocalCUDACluster(
|
|
531
|
+
enable_cudf_spill=True,
|
|
532
|
+
cudf_spill_stats=2,
|
|
533
|
+
asynchronous=True,
|
|
534
|
+
) as cluster:
|
|
535
|
+
async with Client(cluster, asynchronous=True) as client:
|
|
536
|
+
cudf_spill = await client.run(
|
|
537
|
+
cudf.get_option,
|
|
538
|
+
"spill",
|
|
539
|
+
)
|
|
540
|
+
for v in cudf_spill.values():
|
|
541
|
+
assert v is True
|
|
542
|
+
|
|
543
|
+
cudf_spill_stats = await client.run(
|
|
544
|
+
cudf.get_option,
|
|
545
|
+
"spill_stats",
|
|
546
|
+
)
|
|
547
|
+
for v in cudf_spill_stats.values():
|
|
548
|
+
assert v == 2
|
|
549
|
+
|
|
550
|
+
|
|
503
551
|
@pytest.mark.parametrize(
|
|
504
552
|
"protocol",
|
|
505
553
|
["ucx", "ucxx"],
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Copyright (c) 2024, NVIDIA CORPORATION.
|
|
2
|
+
|
|
3
|
+
import dask_cuda
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_version_constants_are_populated():
|
|
7
|
+
# __git_commit__ will only be non-empty in a built distribution
|
|
8
|
+
assert isinstance(dask_cuda.__git_commit__, str)
|
|
9
|
+
|
|
10
|
+
# __version__ should always be non-empty
|
|
11
|
+
assert isinstance(dask_cuda.__version__, str)
|
|
12
|
+
assert len(dask_cuda.__version__) > 0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dask-cuda
|
|
3
|
-
Version: 24.
|
|
3
|
+
Version: 24.8.2
|
|
4
4
|
Summary: Utilities for Dask and CUDA interactions
|
|
5
5
|
Author: NVIDIA Corporation
|
|
6
6
|
License: Apache 2.0
|
|
@@ -18,25 +18,25 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
18
18
|
Requires-Python: >=3.9
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
-
Requires-Dist: click
|
|
22
|
-
Requires-Dist: numba
|
|
23
|
-
Requires-Dist: numpy
|
|
24
|
-
Requires-Dist: pandas
|
|
25
|
-
Requires-Dist: pynvml
|
|
26
|
-
Requires-Dist: rapids-dask-dependency
|
|
27
|
-
Requires-Dist: zict
|
|
21
|
+
Requires-Dist: click>=8.1
|
|
22
|
+
Requires-Dist: numba>=0.57
|
|
23
|
+
Requires-Dist: numpy<2.0a0,>=1.23
|
|
24
|
+
Requires-Dist: pandas>=1.3
|
|
25
|
+
Requires-Dist: pynvml<11.5,>=11.0.0
|
|
26
|
+
Requires-Dist: rapids-dask-dependency==24.8.*
|
|
27
|
+
Requires-Dist: zict>=2.0.0
|
|
28
28
|
Provides-Extra: docs
|
|
29
|
-
Requires-Dist: numpydoc
|
|
30
|
-
Requires-Dist: sphinx
|
|
31
|
-
Requires-Dist: sphinx-click
|
|
32
|
-
Requires-Dist: sphinx-rtd-theme
|
|
29
|
+
Requires-Dist: numpydoc>=1.1.0; extra == "docs"
|
|
30
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
31
|
+
Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
|
|
32
|
+
Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
|
|
33
33
|
Provides-Extra: test
|
|
34
|
-
Requires-Dist: cudf
|
|
35
|
-
Requires-Dist: dask-cudf
|
|
36
|
-
Requires-Dist: kvikio
|
|
37
|
-
Requires-Dist: pytest
|
|
38
|
-
Requires-Dist: pytest-cov
|
|
39
|
-
Requires-Dist: ucx-py
|
|
34
|
+
Requires-Dist: cudf==24.8.*; extra == "test"
|
|
35
|
+
Requires-Dist: dask-cudf==24.8.*; extra == "test"
|
|
36
|
+
Requires-Dist: kvikio==24.8.*; extra == "test"
|
|
37
|
+
Requires-Dist: pytest; extra == "test"
|
|
38
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
39
|
+
Requires-Dist: ucx-py==0.39.*; extra == "test"
|
|
40
40
|
|
|
41
41
|
Dask CUDA
|
|
42
42
|
=========
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
dask_cuda/VERSION,sha256=
|
|
1
|
+
dask_cuda/VERSION,sha256=5YtjwV2EoD7E5Ed4K-PvnU0eEtdkkn33JHuNFDy8oKA,8
|
|
2
2
|
dask_cuda/__init__.py,sha256=JLDWev7vI_dPusLgRdOwXBz-xfhlX_hc-DzmLtrEYO0,1918
|
|
3
|
-
dask_cuda/_version.py,sha256=
|
|
4
|
-
dask_cuda/cli.py,sha256=
|
|
5
|
-
dask_cuda/cuda_worker.py,sha256=
|
|
3
|
+
dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
|
|
4
|
+
dask_cuda/cli.py,sha256=Y3aObfAyMwOIo0oVz3-NC2InGLShOpeINwW5ROTF2s8,16616
|
|
5
|
+
dask_cuda/cuda_worker.py,sha256=uqyoDKsSe7sKN3StMVyz_971rj0Sjpmwfv7Bj083Wss,8959
|
|
6
6
|
dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
|
|
7
7
|
dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
|
|
8
8
|
dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
|
|
9
9
|
dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
|
|
10
10
|
dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
|
|
11
11
|
dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
|
|
12
|
-
dask_cuda/local_cuda_cluster.py,sha256=
|
|
13
|
-
dask_cuda/plugins.py,sha256=
|
|
12
|
+
dask_cuda/local_cuda_cluster.py,sha256=jgXjd6OvEDfQ3iXU8hV_UfULa13GZsli0SGC2PIouZk,18882
|
|
13
|
+
dask_cuda/plugins.py,sha256=DCf7PnIBu_VNjFfrFeb1zCNuEnCaX9oz4Umn76t02Mc,4630
|
|
14
14
|
dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
|
|
15
15
|
dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
|
|
16
16
|
dask_cuda/proxy_object.py,sha256=bZq92kjgFB-ad_luSAFT_RItV3nssmiEk4OOSp34laU,29812
|
|
@@ -18,36 +18,37 @@ dask_cuda/utils.py,sha256=RWlLK2cPHaCuNNhr8bW8etBeGklwREQJOafQbTydStk,25121
|
|
|
18
18
|
dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
|
|
19
19
|
dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
|
|
20
20
|
dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
dask_cuda/benchmarks/common.py,sha256=
|
|
21
|
+
dask_cuda/benchmarks/common.py,sha256=2MnDdQjvHfGaUWDgiTcTGI_EeKPmVBEwoWfsJUNpOjU,6613
|
|
22
22
|
dask_cuda/benchmarks/local_cudf_groupby.py,sha256=T9lA9nb4Wzu46AH--SJEVCeCm3650J7slapdNR_08FU,8904
|
|
23
23
|
dask_cuda/benchmarks/local_cudf_merge.py,sha256=AsuVnMA3H93sJwjjgi4KaIdYKnnX1OeRMPiXizrwHGk,12577
|
|
24
24
|
dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=2xWJZf3gwDNimXKZN2ivtU3OE_qec1KNOhgL4_AGQZU,8655
|
|
25
25
|
dask_cuda/benchmarks/local_cupy.py,sha256=aUKIYfeR7c77K4kKk697Rxo8tG8kFabQ9jQEVGr-oTs,10762
|
|
26
26
|
dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=_texYmam1K_XbzIvURltui5KRsISGFNylXiGUtgRIz0,6442
|
|
27
|
-
dask_cuda/benchmarks/utils.py,sha256=
|
|
27
|
+
dask_cuda/benchmarks/utils.py,sha256=4k8KnJPOczKDQNBPRWlaGsU2zdEA09BDGgklUXggwMU,30008
|
|
28
28
|
dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
|
|
30
30
|
dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=
|
|
32
|
-
dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=
|
|
33
|
-
dask_cuda/tests/test_dask_cuda_worker.py,sha256=
|
|
31
|
+
dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=4xfhfbTGa36YPs_ex1_fFhzfGMYJq-QkS5q0RwgeHh8,20645
|
|
32
|
+
dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
|
|
33
|
+
dask_cuda/tests/test_dask_cuda_worker.py,sha256=o5g0_t-2M_2lfPeOPTS4NVF4rnQF0ZWAZekXw2h0xPc,19610
|
|
34
34
|
dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
|
|
35
35
|
dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
|
|
36
|
-
dask_cuda/tests/test_explicit_comms.py,sha256=
|
|
36
|
+
dask_cuda/tests/test_explicit_comms.py,sha256=Pa5vVx63qWtScnVJuS31WESXIt2FPyTJVFO-0OUbbmU,15276
|
|
37
37
|
dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
|
|
38
38
|
dask_cuda/tests/test_gds.py,sha256=6jf0HPTHAIG8Mp_FC4Ai4zpn-U1K7yk0fSXg8He8-r8,1513
|
|
39
39
|
dask_cuda/tests/test_initialize.py,sha256=Rba59ZbljEm1yyN94_sWZPEE_f7hWln95aiBVc49pmY,6960
|
|
40
|
-
dask_cuda/tests/test_local_cuda_cluster.py,sha256=
|
|
40
|
+
dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
|
|
41
41
|
dask_cuda/tests/test_proxify_host_file.py,sha256=Yiv0sDcUoWw0d2oiPeHGoHqqSSM4lfQ4rChCiaxb6EU,18994
|
|
42
42
|
dask_cuda/tests/test_proxy.py,sha256=OnGnPkl5ksCb-3hpEKG2z1OfPK9DbnOCtBHOjcUUjhg,23809
|
|
43
43
|
dask_cuda/tests/test_spill.py,sha256=xN9PbVERBYMuZxvscSO0mAM22loq9WT3ltZVBFxlmM4,10239
|
|
44
44
|
dask_cuda/tests/test_utils.py,sha256=JRIwXfemc3lWSzLJX0VcvR1_0wB4yeoOTsw7kB6z6pU,9176
|
|
45
|
+
dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
|
|
45
46
|
dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
|
|
46
47
|
examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
|
|
47
48
|
examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
|
|
48
|
-
dask_cuda-24.
|
|
49
|
-
dask_cuda-24.
|
|
50
|
-
dask_cuda-24.
|
|
51
|
-
dask_cuda-24.
|
|
52
|
-
dask_cuda-24.
|
|
53
|
-
dask_cuda-24.
|
|
49
|
+
dask_cuda-24.8.2.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
|
|
50
|
+
dask_cuda-24.8.2.dist-info/METADATA,sha256=6iMwPI8cWrEYDYz73vm8pw-LkVeEgTQzymJgRxj32VQ,2546
|
|
51
|
+
dask_cuda-24.8.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
52
|
+
dask_cuda-24.8.2.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
|
|
53
|
+
dask_cuda-24.8.2.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
|
|
54
|
+
dask_cuda-24.8.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|