dask-cuda 24.4.0__py3-none-any.whl → 24.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dask_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 24.04.00
1
+ 24.08.02
dask_cuda/__init__.py CHANGED
@@ -20,6 +20,18 @@ from .local_cuda_cluster import LocalCUDACluster
20
20
  from .proxify_device_objects import proxify_decorator, unproxify_decorator
21
21
 
22
22
 
23
+ if dask.config.get("dataframe.query-planning", None) is not False and dask.config.get(
24
+ "explicit-comms", False
25
+ ):
26
+ raise NotImplementedError(
27
+ "The 'explicit-comms' config is not yet supported when "
28
+ "query-planning is enabled in dask. Please use the shuffle "
29
+ "API directly, or use the legacy dask-dataframe API "
30
+ "(set the 'dataframe.query-planning' config to `False`"
31
+ "before importing `dask.dataframe`).",
32
+ )
33
+
34
+
23
35
  # Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`
24
36
  dask.dataframe.shuffle.rearrange_by_column = get_rearrange_by_column_wrapper(
25
37
  dask.dataframe.shuffle.rearrange_by_column
dask_cuda/_version.py CHANGED
@@ -15,6 +15,16 @@
15
15
  import importlib.resources
16
16
 
17
17
  __version__ = (
18
- importlib.resources.files("dask_cuda").joinpath("VERSION").read_text().strip()
18
+ importlib.resources.files(__package__).joinpath("VERSION").read_text().strip()
19
19
  )
20
- __git_commit__ = "7ed67359e2264393e97ad57fc39829c3a69aa1f3"
20
+ try:
21
+ __git_commit__ = (
22
+ importlib.resources.files(__package__)
23
+ .joinpath("GIT_COMMIT")
24
+ .read_text()
25
+ .strip()
26
+ )
27
+ except FileNotFoundError:
28
+ __git_commit__ = ""
29
+
30
+ __all__ = ["__git_commit__", "__version__"]
@@ -117,16 +117,18 @@ def run(client: Client, args: Namespace, config: Config):
117
117
  wait_for_cluster(client, shutdown_on_failure=True)
118
118
  assert len(client.scheduler_info()["workers"]) > 0
119
119
  setup_memory_pools(
120
- client,
121
- args.type == "gpu",
122
- args.rmm_pool_size,
123
- args.disable_rmm_pool,
124
- args.enable_rmm_async,
125
- args.enable_rmm_managed,
126
- args.rmm_release_threshold,
127
- args.rmm_log_directory,
128
- args.enable_rmm_statistics,
129
- args.enable_rmm_track_allocations,
120
+ client=client,
121
+ is_gpu=args.type == "gpu",
122
+ disable_rmm=args.disable_rmm,
123
+ disable_rmm_pool=args.disable_rmm_pool,
124
+ pool_size=args.rmm_pool_size,
125
+ maximum_pool_size=args.rmm_maximum_pool_size,
126
+ rmm_async=args.enable_rmm_async,
127
+ rmm_managed=args.enable_rmm_managed,
128
+ release_threshold=args.rmm_release_threshold,
129
+ log_directory=args.rmm_log_directory,
130
+ statistics=args.enable_rmm_statistics,
131
+ rmm_track_allocations=args.enable_rmm_track_allocations,
130
132
  )
131
133
  address_to_index, results, message_data = gather_bench_results(client, args, config)
132
134
  p2p_bw = peer_to_peer_bandwidths(message_data, address_to_index)
@@ -7,8 +7,7 @@ import numpy as np
7
7
  import pandas as pd
8
8
 
9
9
  import dask
10
- from dask.base import tokenize
11
- from dask.dataframe.core import new_dd_object
10
+ import dask.dataframe as dd
12
11
  from dask.distributed import performance_report, wait
13
12
  from dask.utils import format_bytes, parse_bytes
14
13
 
@@ -25,12 +24,20 @@ from dask_cuda.benchmarks.utils import (
25
24
  # <https://gist.github.com/rjzamora/0ffc35c19b5180ab04bbf7c793c45955>
26
25
 
27
26
 
28
- def generate_chunk(i_chunk, local_size, num_chunks, chunk_type, frac_match, gpu):
27
+ # Set default shuffle method to "tasks"
28
+ if dask.config.get("dataframe.shuffle.method", None) is None:
29
+ dask.config.set({"dataframe.shuffle.method": "tasks"})
30
+
31
+
32
+ def generate_chunk(input):
33
+ i_chunk, local_size, num_chunks, chunk_type, frac_match, gpu = input
34
+
29
35
  # Setting a seed that triggers max amount of comm in the two-GPU case.
30
36
  if gpu:
31
37
  import cupy as xp
32
38
 
33
39
  import cudf as xdf
40
+ import dask_cudf # noqa: F401
34
41
  else:
35
42
  import numpy as xp
36
43
  import pandas as xdf
@@ -105,25 +112,25 @@ def get_random_ddf(chunk_size, num_chunks, frac_match, chunk_type, args):
105
112
 
106
113
  parts = [chunk_size for _ in range(num_chunks)]
107
114
  device_type = True if args.type == "gpu" else False
108
- meta = generate_chunk(0, 4, 1, chunk_type, None, device_type)
115
+ meta = generate_chunk((0, 4, 1, chunk_type, None, device_type))
109
116
  divisions = [None] * (len(parts) + 1)
110
117
 
111
- name = "generate-data-" + tokenize(chunk_size, num_chunks, frac_match, chunk_type)
112
-
113
- graph = {
114
- (name, i): (
115
- generate_chunk,
116
- i,
117
- part,
118
- len(parts),
119
- chunk_type,
120
- frac_match,
121
- device_type,
122
- )
123
- for i, part in enumerate(parts)
124
- }
125
-
126
- ddf = new_dd_object(graph, name, meta, divisions)
118
+ ddf = dd.from_map(
119
+ generate_chunk,
120
+ [
121
+ (
122
+ i,
123
+ part,
124
+ len(parts),
125
+ chunk_type,
126
+ frac_match,
127
+ device_type,
128
+ )
129
+ for i, part in enumerate(parts)
130
+ ],
131
+ meta=meta,
132
+ divisions=divisions,
133
+ )
127
134
 
128
135
  if chunk_type == "build":
129
136
  if not args.no_shuffle:
@@ -8,8 +8,6 @@ import pandas as pd
8
8
 
9
9
  import dask
10
10
  import dask.dataframe
11
- from dask.dataframe.core import new_dd_object
12
- from dask.dataframe.shuffle import shuffle
13
11
  from dask.distributed import Client, performance_report, wait
14
12
  from dask.utils import format_bytes, parse_bytes
15
13
 
@@ -33,7 +31,7 @@ except ImportError:
33
31
 
34
32
 
35
33
  def shuffle_dask(df, args):
36
- result = shuffle(df, index="data", shuffle="tasks", ignore_index=args.ignore_index)
34
+ result = df.shuffle("data", shuffle_method="tasks", ignore_index=args.ignore_index)
37
35
  if args.backend == "dask-noop":
38
36
  result = as_noop(result)
39
37
  t1 = perf_counter()
@@ -94,18 +92,24 @@ def create_data(
94
92
  )
95
93
 
96
94
  # Create partition based to the specified partition distribution
97
- dsk = {}
95
+ futures = []
98
96
  for i, part_size in enumerate(dist):
99
97
  for _ in range(part_size):
100
98
  # We use `client.submit` to control placement of the partition.
101
- dsk[(name, len(dsk))] = client.submit(
102
- create_df, chunksize, args.type, workers=[workers[i]], pure=False
99
+ futures.append(
100
+ client.submit(
101
+ create_df, chunksize, args.type, workers=[workers[i]], pure=False
102
+ )
103
103
  )
104
- wait(dsk.values())
104
+ wait(futures)
105
105
 
106
106
  df_meta = create_df(0, args.type)
107
- divs = [None] * (len(dsk) + 1)
108
- ret = new_dd_object(dsk, name, df_meta, divs).persist()
107
+ divs = [None] * (len(futures) + 1)
108
+ ret = dask.dataframe.from_delayed(
109
+ futures,
110
+ meta=df_meta,
111
+ divisions=divs,
112
+ ).persist()
109
113
  wait(ret)
110
114
 
111
115
  data_processed = args.in_parts * args.partition_size
@@ -254,7 +258,9 @@ def parse_args():
254
258
  ]
255
259
 
256
260
  return parse_benchmark_args(
257
- description="Distributed shuffle (dask/cudf) benchmark", args_list=special_args
261
+ description="Distributed shuffle (dask/cudf) benchmark",
262
+ args_list=special_args,
263
+ check_explicit_comms=False,
258
264
  )
259
265
 
260
266
 
@@ -11,11 +11,13 @@ from typing import Any, Callable, Mapping, NamedTuple, Optional, Tuple
11
11
  import numpy as np
12
12
  import pandas as pd
13
13
 
14
+ from dask import config
14
15
  from dask.distributed import Client, SSHCluster
15
16
  from dask.utils import format_bytes, format_time, parse_bytes
16
17
  from distributed.comm.addressing import get_address_host
17
18
 
18
19
  from dask_cuda.local_cuda_cluster import LocalCUDACluster
20
+ from dask_cuda.utils import parse_device_memory_limit
19
21
 
20
22
 
21
23
  def as_noop(dsk):
@@ -47,7 +49,11 @@ def as_noop(dsk):
47
49
  raise RuntimeError("Requested noop computation but dask-noop not installed.")
48
50
 
49
51
 
50
- def parse_benchmark_args(description="Generic dask-cuda Benchmark", args_list=[]):
52
+ def parse_benchmark_args(
53
+ description="Generic dask-cuda Benchmark",
54
+ args_list=[],
55
+ check_explicit_comms=True,
56
+ ):
51
57
  parser = argparse.ArgumentParser(description=description)
52
58
  worker_args = parser.add_argument_group(description="Worker configuration")
53
59
  worker_args.add_argument(
@@ -88,15 +94,41 @@ def parse_benchmark_args(description="Generic dask-cuda Benchmark", args_list=[]
88
94
  "'forkserver' can be used to avoid issues with fork not being allowed "
89
95
  "after the networking stack has been initialised.",
90
96
  )
97
+ cluster_args.add_argument(
98
+ "--disable-rmm",
99
+ action="store_true",
100
+ help="Disable RMM.",
101
+ )
102
+ cluster_args.add_argument(
103
+ "--disable-rmm-pool",
104
+ action="store_true",
105
+ help="Uses RMM for allocations but without a memory pool.",
106
+ )
91
107
  cluster_args.add_argument(
92
108
  "--rmm-pool-size",
93
109
  default=None,
94
110
  type=parse_bytes,
95
111
  help="The size of the RMM memory pool. Can be an integer (bytes) or a string "
96
- "(like '4GB' or '5000M'). By default, 1/2 of the total GPU memory is used.",
112
+ "(like '4GB' or '5000M'). By default, 1/2 of the total GPU memory is used."
113
+ ""
114
+ ".. note::"
115
+ " This size is a per-worker configuration, and not cluster-wide.",
97
116
  )
98
117
  cluster_args.add_argument(
99
- "--disable-rmm-pool", action="store_true", help="Disable the RMM memory pool"
118
+ "--rmm-maximum-pool-size",
119
+ default=None,
120
+ help="When ``--rmm-pool-size`` is specified, this argument indicates the "
121
+ "maximum pool size. Can be an integer (bytes), or a string (like '4GB' or "
122
+ "'5000M'). By default, the total available memory on the GPU is used. "
123
+ "``rmm_pool_size`` must be specified to use RMM pool and to set the maximum "
124
+ "pool size."
125
+ ""
126
+ ".. note::"
127
+ " When paired with `--enable-rmm-async` the maximum size cannot be "
128
+ " guaranteed due to fragmentation."
129
+ ""
130
+ ".. note::"
131
+ " This size is a per-worker configuration, and not cluster-wide.",
100
132
  )
101
133
  cluster_args.add_argument(
102
134
  "--enable-rmm-managed",
@@ -317,6 +349,24 @@ def parse_benchmark_args(description="Generic dask-cuda Benchmark", args_list=[]
317
349
  if args.multi_node and len(args.hosts.split(",")) < 2:
318
350
  raise ValueError("--multi-node requires at least 2 hosts")
319
351
 
352
+ # Raise error early if "explicit-comms" is not allowed
353
+ if (
354
+ check_explicit_comms
355
+ and args.backend == "explicit-comms"
356
+ and config.get(
357
+ "dataframe.query-planning",
358
+ None,
359
+ )
360
+ is not False
361
+ ):
362
+ raise NotImplementedError(
363
+ "The 'explicit-comms' config is not yet supported when "
364
+ "query-planning is enabled in dask. Please use the legacy "
365
+ "dask-dataframe API by setting the following environment "
366
+ "variable before executing:",
367
+ " DASK_DATAFRAME__QUERY_PLANNING=False",
368
+ )
369
+
320
370
  return args
321
371
 
322
372
 
@@ -384,10 +434,29 @@ def get_worker_device():
384
434
  return -1
385
435
 
386
436
 
437
+ def setup_rmm_resources(statistics=False, rmm_track_allocations=False):
438
+ import cupy
439
+
440
+ import rmm
441
+ from rmm.allocators.cupy import rmm_cupy_allocator
442
+
443
+ cupy.cuda.set_allocator(rmm_cupy_allocator)
444
+ if statistics:
445
+ rmm.mr.set_current_device_resource(
446
+ rmm.mr.StatisticsResourceAdaptor(rmm.mr.get_current_device_resource())
447
+ )
448
+ if rmm_track_allocations:
449
+ rmm.mr.set_current_device_resource(
450
+ rmm.mr.TrackingResourceAdaptor(rmm.mr.get_current_device_resource())
451
+ )
452
+
453
+
387
454
  def setup_memory_pool(
388
455
  dask_worker=None,
456
+ disable_rmm=None,
457
+ disable_rmm_pool=None,
389
458
  pool_size=None,
390
- disable_pool=False,
459
+ maximum_pool_size=None,
391
460
  rmm_async=False,
392
461
  rmm_managed=False,
393
462
  release_threshold=None,
@@ -395,45 +464,66 @@ def setup_memory_pool(
395
464
  statistics=False,
396
465
  rmm_track_allocations=False,
397
466
  ):
398
- import cupy
399
-
400
467
  import rmm
401
- from rmm.allocators.cupy import rmm_cupy_allocator
402
468
 
403
469
  from dask_cuda.utils import get_rmm_log_file_name
404
470
 
405
471
  logging = log_directory is not None
406
472
 
407
- if rmm_async:
408
- rmm.mr.set_current_device_resource(
409
- rmm.mr.CudaAsyncMemoryResource(
410
- initial_pool_size=pool_size, release_threshold=release_threshold
411
- )
412
- )
413
- else:
414
- rmm.reinitialize(
415
- pool_allocator=not disable_pool,
416
- managed_memory=rmm_managed,
417
- initial_pool_size=pool_size,
418
- logging=logging,
419
- log_file_name=get_rmm_log_file_name(dask_worker, logging, log_directory),
420
- )
421
- cupy.cuda.set_allocator(rmm_cupy_allocator)
422
- if statistics:
423
- rmm.mr.set_current_device_resource(
424
- rmm.mr.StatisticsResourceAdaptor(rmm.mr.get_current_device_resource())
473
+ if pool_size is not None:
474
+ pool_size = parse_device_memory_limit(pool_size, alignment_size=256)
475
+
476
+ if maximum_pool_size is not None:
477
+ maximum_pool_size = parse_device_memory_limit(
478
+ maximum_pool_size, alignment_size=256
425
479
  )
426
- if rmm_track_allocations:
427
- rmm.mr.set_current_device_resource(
428
- rmm.mr.TrackingResourceAdaptor(rmm.mr.get_current_device_resource())
480
+
481
+ if release_threshold is not None:
482
+ release_threshold = parse_device_memory_limit(
483
+ release_threshold, alignment_size=256
429
484
  )
430
485
 
486
+ if not disable_rmm:
487
+ if rmm_async:
488
+ mr = rmm.mr.CudaAsyncMemoryResource(
489
+ initial_pool_size=pool_size,
490
+ release_threshold=release_threshold,
491
+ )
492
+
493
+ if maximum_pool_size is not None:
494
+ mr = rmm.mr.LimitingResourceAdaptor(
495
+ mr, allocation_limit=maximum_pool_size
496
+ )
497
+
498
+ rmm.mr.set_current_device_resource(mr)
499
+
500
+ setup_rmm_resources(
501
+ statistics=statistics, rmm_track_allocations=rmm_track_allocations
502
+ )
503
+ else:
504
+ rmm.reinitialize(
505
+ pool_allocator=not disable_rmm_pool,
506
+ managed_memory=rmm_managed,
507
+ initial_pool_size=pool_size,
508
+ maximum_pool_size=maximum_pool_size,
509
+ logging=logging,
510
+ log_file_name=get_rmm_log_file_name(
511
+ dask_worker, logging, log_directory
512
+ ),
513
+ )
514
+
515
+ setup_rmm_resources(
516
+ statistics=statistics, rmm_track_allocations=rmm_track_allocations
517
+ )
518
+
431
519
 
432
520
  def setup_memory_pools(
433
521
  client,
434
522
  is_gpu,
523
+ disable_rmm,
524
+ disable_rmm_pool,
435
525
  pool_size,
436
- disable_pool,
526
+ maximum_pool_size,
437
527
  rmm_async,
438
528
  rmm_managed,
439
529
  release_threshold,
@@ -445,8 +535,10 @@ def setup_memory_pools(
445
535
  return
446
536
  client.run(
447
537
  setup_memory_pool,
538
+ disable_rmm=disable_rmm,
539
+ disable_rmm_pool=disable_rmm_pool,
448
540
  pool_size=pool_size,
449
- disable_pool=disable_pool,
541
+ maximum_pool_size=maximum_pool_size,
450
542
  rmm_async=rmm_async,
451
543
  rmm_managed=rmm_managed,
452
544
  release_threshold=release_threshold,
@@ -459,7 +551,9 @@ def setup_memory_pools(
459
551
  client.run_on_scheduler(
460
552
  setup_memory_pool,
461
553
  pool_size=1e9,
462
- disable_pool=disable_pool,
554
+ disable_rmm=disable_rmm,
555
+ disable_rmm_pool=disable_rmm_pool,
556
+ maximum_pool_size=maximum_pool_size,
463
557
  rmm_async=rmm_async,
464
558
  rmm_managed=rmm_managed,
465
559
  release_threshold=release_threshold,
dask_cuda/cli.py CHANGED
@@ -101,6 +101,20 @@ def cuda():
101
101
  total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"`` or 0 to
102
102
  disable spilling to host (i.e. allow full device memory usage).""",
103
103
  )
104
+ @click.option(
105
+ "--enable-cudf-spill/--disable-cudf-spill",
106
+ default=False,
107
+ show_default=True,
108
+ help="""Enable automatic cuDF spilling. WARNING: This should NOT be used with
109
+ JIT-Unspill.""",
110
+ )
111
+ @click.option(
112
+ "--cudf-spill-stats",
113
+ type=int,
114
+ default=0,
115
+ help="""Set the cuDF spilling statistics level. This option has no effect if
116
+ `--enable-cudf-spill` is not specified.""",
117
+ )
104
118
  @click.option(
105
119
  "--rmm-pool-size",
106
120
  default=None,
@@ -120,6 +134,10 @@ def cuda():
120
134
  memory on the GPU is used. ``rmm_pool_size`` must be specified to use RMM pool and
121
135
  to set the maximum pool size.
122
136
 
137
+ .. note::
138
+ When paired with `--enable-rmm-async` the maximum size cannot be guaranteed due
139
+ to fragmentation.
140
+
123
141
  .. note::
124
142
  This size is a per-worker configuration, and not cluster-wide.""",
125
143
  )
@@ -326,6 +344,8 @@ def worker(
326
344
  name,
327
345
  memory_limit,
328
346
  device_memory_limit,
347
+ enable_cudf_spill,
348
+ cudf_spill_stats,
329
349
  rmm_pool_size,
330
350
  rmm_maximum_pool_size,
331
351
  rmm_managed_memory,
@@ -398,6 +418,8 @@ def worker(
398
418
  name,
399
419
  memory_limit,
400
420
  device_memory_limit,
421
+ enable_cudf_spill,
422
+ cudf_spill_stats,
401
423
  rmm_pool_size,
402
424
  rmm_maximum_pool_size,
403
425
  rmm_managed_memory,
dask_cuda/cuda_worker.py CHANGED
@@ -20,7 +20,7 @@ from distributed.worker_memory import parse_memory_limit
20
20
 
21
21
  from .device_host_file import DeviceHostFile
22
22
  from .initialize import initialize
23
- from .plugins import CPUAffinity, PreImport, RMMSetup
23
+ from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup
24
24
  from .proxify_host_file import ProxifyHostFile
25
25
  from .utils import (
26
26
  cuda_visible_devices,
@@ -41,6 +41,8 @@ class CUDAWorker(Server):
41
41
  name=None,
42
42
  memory_limit="auto",
43
43
  device_memory_limit="auto",
44
+ enable_cudf_spill=False,
45
+ cudf_spill_stats=0,
44
46
  rmm_pool_size=None,
45
47
  rmm_maximum_pool_size=None,
46
48
  rmm_managed_memory=False,
@@ -166,6 +168,12 @@ class CUDAWorker(Server):
166
168
  if device_memory_limit is None and memory_limit is None:
167
169
  data = lambda _: {}
168
170
  elif jit_unspill:
171
+ if enable_cudf_spill:
172
+ warnings.warn(
173
+ "Enabling cuDF spilling and JIT-Unspill together is not "
174
+ "safe, consider disabling JIT-Unspill."
175
+ )
176
+
169
177
  data = lambda i: (
170
178
  ProxifyHostFile,
171
179
  {
@@ -217,6 +225,7 @@ class CUDAWorker(Server):
217
225
  track_allocations=rmm_track_allocations,
218
226
  ),
219
227
  PreImport(pre_import),
228
+ CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
220
229
  },
221
230
  name=name if nprocs == 1 or name is None else str(name) + "-" + str(i),
222
231
  local_directory=local_directory,
@@ -8,13 +8,18 @@ from math import ceil
8
8
  from operator import getitem
9
9
  from typing import Any, Callable, Dict, List, Optional, Set, TypeVar
10
10
 
11
+ import numpy as np
12
+ import pandas as pd
13
+
11
14
  import dask
12
15
  import dask.config
13
16
  import dask.dataframe
17
+ import dask.dataframe as dd
14
18
  import dask.utils
15
19
  import distributed.worker
16
20
  from dask.base import tokenize
17
- from dask.dataframe.core import DataFrame, Series, _concat as dd_concat, new_dd_object
21
+ from dask.dataframe import DataFrame, Series
22
+ from dask.dataframe.core import _concat as dd_concat
18
23
  from dask.dataframe.shuffle import group_split_dispatch, hash_object_dispatch
19
24
  from distributed import wait
20
25
  from distributed.protocol import nested_deserialize, to_serialize
@@ -153,9 +158,16 @@ def compute_map_index(
153
158
  if column_names[0] == "_partitions":
154
159
  ind = df[column_names[0]]
155
160
  else:
156
- ind = hash_object_dispatch(
157
- df[column_names] if column_names else df, index=False
158
- )
161
+ # Need to cast numerical dtypes to be consistent
162
+ # with `dask.dataframe.shuffle.partitioning_index`
163
+ dtypes = {}
164
+ index = df[column_names] if column_names else df
165
+ for col, dtype in index.dtypes.items():
166
+ if pd.api.types.is_numeric_dtype(dtype):
167
+ dtypes[col] = np.float64
168
+ if dtypes:
169
+ index = index.astype(dtypes, errors="ignore")
170
+ ind = hash_object_dispatch(index, index=False)
159
171
  return ind % npartitions
160
172
 
161
173
 
@@ -185,15 +197,8 @@ def partition_dataframe(
185
197
  partitions
186
198
  Dict of dataframe-partitions, mapping partition-ID to dataframe
187
199
  """
188
- if column_names[0] != "_partitions" and hasattr(df, "partition_by_hash"):
189
- return dict(
190
- zip(
191
- range(npartitions),
192
- df.partition_by_hash(
193
- column_names, npartitions, keep_index=not ignore_index
194
- ),
195
- )
196
- )
200
+ # TODO: Use `partition_by_hash` if/when dtype-casting is added
201
+ # (See: https://github.com/rapidsai/cudf/issues/16221)
197
202
  map_index = compute_map_index(df, column_names, npartitions)
198
203
  return group_split_dispatch(df, map_index, npartitions, ignore_index=ignore_index)
199
204
 
@@ -468,18 +473,19 @@ def shuffle(
468
473
  npartitions = df.npartitions
469
474
 
470
475
  # Step (a):
471
- df = df.persist() # Make sure optimizations are apply on the existing graph
476
+ df = df.persist() # Make sure optimizations are applied on the existing graph
472
477
  wait([df]) # Make sure all keys has been materialized on workers
478
+ persisted_keys = [f.key for f in c.client.futures_of(df)]
473
479
  name = (
474
480
  "explicit-comms-shuffle-"
475
- f"{tokenize(df, column_names, npartitions, ignore_index)}"
481
+ f"{tokenize(df, column_names, npartitions, ignore_index, batchsize)}"
476
482
  )
477
483
  df_meta: DataFrame = df._meta
478
484
 
479
485
  # Stage all keys of `df` on the workers and cancel them, which makes it possible
480
486
  # for the shuffle to free memory as the partitions of `df` are consumed.
481
487
  # See CommsContext.stage_keys() for a description of staging.
482
- rank_to_inkeys = c.stage_keys(name=name, keys=df.__dask_keys__())
488
+ rank_to_inkeys = c.stage_keys(name=name, keys=persisted_keys)
483
489
  c.client.cancel(df)
484
490
 
485
491
  # Get batchsize
@@ -526,23 +532,27 @@ def shuffle(
526
532
  # TODO: can we do this without using `submit()` to avoid the overhead
527
533
  # of creating a Future for each dataframe partition?
528
534
 
529
- dsk = {}
535
+ _futures = {}
530
536
  for rank in ranks:
531
537
  for part_id in rank_to_out_part_ids[rank]:
532
- dsk[(name, part_id)] = c.client.submit(
538
+ _futures[part_id] = c.client.submit(
533
539
  getitem,
534
540
  shuffle_result[rank],
535
541
  part_id,
536
542
  workers=[c.worker_addresses[rank]],
537
543
  )
538
544
 
545
+ # Make sure partitions are properly ordered
546
+ futures = [_futures.pop(i) for i in range(npartitions)]
547
+
539
548
  # Create a distributed Dataframe from all the pieces
540
- divs = [None] * (len(dsk) + 1)
541
- ret = new_dd_object(dsk, name, df_meta, divs).persist()
549
+ divs = [None] * (len(futures) + 1)
550
+ kwargs = {"meta": df_meta, "divisions": divs, "prefix": "explicit-comms-shuffle"}
551
+ ret = dd.from_delayed(futures, **kwargs).persist()
542
552
  wait([ret])
543
553
 
544
554
  # Release all temporary dataframes
545
- for fut in [*shuffle_result.values(), *dsk.values()]:
555
+ for fut in [*shuffle_result.values(), *futures]:
546
556
  fut.release()
547
557
  return ret
548
558
 
@@ -10,7 +10,7 @@ from distributed.worker_memory import parse_memory_limit
10
10
 
11
11
  from .device_host_file import DeviceHostFile
12
12
  from .initialize import initialize
13
- from .plugins import CPUAffinity, PreImport, RMMSetup
13
+ from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup
14
14
  from .proxify_host_file import ProxifyHostFile
15
15
  from .utils import (
16
16
  cuda_visible_devices,
@@ -73,6 +73,14 @@ class LocalCUDACluster(LocalCluster):
73
73
  starts spilling to host memory. Can be an integer (bytes), float (fraction of
74
74
  total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"``, 0,
75
75
  or ``None`` to disable spilling to host (i.e. allow full device memory usage).
76
+ enable_cudf_spill : bool, default False
77
+ Enable automatic cuDF spilling.
78
+
79
+ .. warning::
80
+ This should NOT be used together with JIT-Unspill.
81
+ cudf_spill_stats : int, default 0
82
+ Set the cuDF spilling statistics level. This option has no effect if
83
+ ``enable_cudf_spill=False``.
76
84
  local_directory : str or None, default None
77
85
  Path on local machine to store temporary files. Can be a string (like
78
86
  ``"path/to/files"``) or ``None`` to fall back on the value of
@@ -114,6 +122,10 @@ class LocalCUDACluster(LocalCluster):
114
122
  memory on the GPU is used. ``rmm_pool_size`` must be specified to use RMM pool
115
123
  and to set the maximum pool size.
116
124
 
125
+ .. note::
126
+ When paired with `--enable-rmm-async` the maximum size cannot be guaranteed
127
+ due to fragmentation.
128
+
117
129
  .. note::
118
130
  This size is a per-worker configuration, and not cluster-wide.
119
131
  rmm_managed_memory : bool, default False
@@ -205,6 +217,8 @@ class LocalCUDACluster(LocalCluster):
205
217
  threads_per_worker=1,
206
218
  memory_limit="auto",
207
219
  device_memory_limit=0.8,
220
+ enable_cudf_spill=False,
221
+ cudf_spill_stats=0,
208
222
  data=None,
209
223
  local_directory=None,
210
224
  shared_filesystem=None,
@@ -255,6 +269,8 @@ class LocalCUDACluster(LocalCluster):
255
269
  self.device_memory_limit = parse_device_memory_limit(
256
270
  device_memory_limit, device_index=nvml_device_index(0, CUDA_VISIBLE_DEVICES)
257
271
  )
272
+ self.enable_cudf_spill = enable_cudf_spill
273
+ self.cudf_spill_stats = cudf_spill_stats
258
274
 
259
275
  self.rmm_pool_size = rmm_pool_size
260
276
  self.rmm_maximum_pool_size = rmm_maximum_pool_size
@@ -298,6 +314,12 @@ class LocalCUDACluster(LocalCluster):
298
314
  if device_memory_limit is None and memory_limit is None:
299
315
  data = {}
300
316
  elif jit_unspill:
317
+ if enable_cudf_spill:
318
+ warnings.warn(
319
+ "Enabling cuDF spilling and JIT-Unspill together is not "
320
+ "safe, consider disabling JIT-Unspill."
321
+ )
322
+
301
323
  data = (
302
324
  ProxifyHostFile,
303
325
  {
@@ -410,6 +432,7 @@ class LocalCUDACluster(LocalCluster):
410
432
  track_allocations=self.rmm_track_allocations,
411
433
  ),
412
434
  PreImport(self.pre_import),
435
+ CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
413
436
  },
414
437
  }
415
438
  )
dask_cuda/plugins.py CHANGED
@@ -14,6 +14,21 @@ class CPUAffinity(WorkerPlugin):
14
14
  os.sched_setaffinity(0, self.cores)
15
15
 
16
16
 
17
+ class CUDFSetup(WorkerPlugin):
18
+ def __init__(self, spill, spill_stats):
19
+ self.spill = spill
20
+ self.spill_stats = spill_stats
21
+
22
+ def setup(self, worker=None):
23
+ try:
24
+ import cudf
25
+
26
+ cudf.set_option("spill", self.spill)
27
+ cudf.set_option("spill_stats", self.spill_stats)
28
+ except ImportError:
29
+ pass
30
+
31
+
17
32
  class RMMSetup(WorkerPlugin):
18
33
  def __init__(
19
34
  self,
@@ -20,7 +20,7 @@ from cudf.core.buffer.spill_manager import ( # noqa: E402
20
20
  get_global_manager,
21
21
  set_global_manager,
22
22
  )
23
- from cudf.testing._utils import assert_eq # noqa: E402
23
+ from cudf.testing import assert_eq # noqa: E402
24
24
 
25
25
  if get_global_manager() is not None:
26
26
  pytest.skip(
@@ -231,6 +231,64 @@ def test_rmm_logging(loop): # noqa: F811
231
231
  assert v is rmm.mr.LoggingResourceAdaptor
232
232
 
233
233
 
234
+ def test_cudf_spill_disabled(loop): # noqa: F811
235
+ cudf = pytest.importorskip("cudf")
236
+ with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
237
+ with popen(
238
+ [
239
+ "dask",
240
+ "cuda",
241
+ "worker",
242
+ "127.0.0.1:9369",
243
+ "--host",
244
+ "127.0.0.1",
245
+ "--no-dashboard",
246
+ ]
247
+ ):
248
+ with Client("127.0.0.1:9369", loop=loop) as client:
249
+ assert wait_workers(client, n_gpus=get_n_gpus())
250
+
251
+ cudf_spill = client.run(
252
+ cudf.get_option,
253
+ "spill",
254
+ )
255
+ for v in cudf_spill.values():
256
+ assert v is False
257
+
258
+ cudf_spill_stats = client.run(cudf.get_option, "spill_stats")
259
+ for v in cudf_spill_stats.values():
260
+ assert v == 0
261
+
262
+
263
+ def test_cudf_spill(loop): # noqa: F811
264
+ cudf = pytest.importorskip("cudf")
265
+ with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
266
+ with popen(
267
+ [
268
+ "dask",
269
+ "cuda",
270
+ "worker",
271
+ "127.0.0.1:9369",
272
+ "--host",
273
+ "127.0.0.1",
274
+ "--no-dashboard",
275
+ "--enable-cudf-spill",
276
+ "--cudf-spill-stats",
277
+ "2",
278
+ ]
279
+ ):
280
+ with Client("127.0.0.1:9369", loop=loop) as client:
281
+ assert wait_workers(client, n_gpus=get_n_gpus())
282
+
283
+ cudf_spill = client.run(cudf.get_option, "spill")
284
+ for v in cudf_spill.values():
285
+ assert v is True
286
+
287
+ cudf_spill_stats = client.run(cudf.get_option, "spill_stats")
288
+ for v in cudf_spill_stats.values():
289
+ assert v == 2
290
+
291
+
234
292
  @patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
235
293
  def test_dashboard_address(loop): # noqa: F811
236
294
  with popen(["dask", "scheduler", "--port", "9369", "--no-dashboard"]):
@@ -15,6 +15,10 @@ mp = mp.get_context("spawn") # type: ignore
15
15
  psutil = pytest.importorskip("psutil")
16
16
 
17
17
 
18
+ def _is_ucx_116(ucp):
19
+ return ucp.get_ucx_version()[:2] == (1, 16)
20
+
21
+
18
22
  class DGXVersion(Enum):
19
23
  DGX_1 = auto()
20
24
  DGX_2 = auto()
@@ -102,9 +106,11 @@ def _test_tcp_over_ucx(protocol):
102
106
  )
103
107
  def test_tcp_over_ucx(protocol):
104
108
  if protocol == "ucx":
105
- pytest.importorskip("ucp")
109
+ ucp = pytest.importorskip("ucp")
106
110
  elif protocol == "ucxx":
107
- pytest.importorskip("ucxx")
111
+ ucp = pytest.importorskip("ucxx")
112
+ if _is_ucx_116(ucp):
113
+ pytest.skip("https://github.com/rapidsai/ucx-py/issues/1037")
108
114
 
109
115
  p = mp.Process(target=_test_tcp_over_ucx, args=(protocol,))
110
116
  p.start()
@@ -217,9 +223,11 @@ def _test_ucx_infiniband_nvlink(
217
223
  )
218
224
  def test_ucx_infiniband_nvlink(protocol, params):
219
225
  if protocol == "ucx":
220
- pytest.importorskip("ucp")
226
+ ucp = pytest.importorskip("ucp")
221
227
  elif protocol == "ucxx":
222
- pytest.importorskip("ucxx")
228
+ ucp = pytest.importorskip("ucxx")
229
+ if _is_ucx_116(ucp) and params["enable_infiniband"] is False:
230
+ pytest.skip("https://github.com/rapidsai/ucx-py/issues/1037")
223
231
 
224
232
  skip_queue = mp.Queue()
225
233
 
@@ -25,6 +25,22 @@ from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
25
25
  mp = mp.get_context("spawn") # type: ignore
26
26
  ucp = pytest.importorskip("ucp")
27
27
 
28
+ QUERY_PLANNING_ON = dask.config.get("dataframe.query-planning", None) is not False
29
+
30
+ # Skip these tests when dask-expr is active (for now)
31
+ query_planning_skip = pytest.mark.skipif(
32
+ QUERY_PLANNING_ON,
33
+ reason=(
34
+ "The 'explicit-comms' config is not supported "
35
+ "when query planning is enabled."
36
+ ),
37
+ )
38
+
39
+ # Set default shuffle method to "tasks"
40
+ if dask.config.get("dataframe.shuffle.method", None) is None:
41
+ dask.config.set({"dataframe.shuffle.method": "tasks"})
42
+
43
+
28
44
  # Notice, all of the following tests is executed in a new process such
29
45
  # that UCX options of the different tests doesn't conflict.
30
46
 
@@ -82,6 +98,7 @@ def _test_dataframe_merge_empty_partitions(nrows, npartitions):
82
98
  pd.testing.assert_frame_equal(got, expected)
83
99
 
84
100
 
101
+ @query_planning_skip
85
102
  def test_dataframe_merge_empty_partitions():
86
103
  # Notice, we use more partitions than rows
87
104
  p = mp.Process(target=_test_dataframe_merge_empty_partitions, args=(2, 4))
@@ -92,7 +109,14 @@ def test_dataframe_merge_empty_partitions():
92
109
 
93
110
  def check_partitions(df, npartitions):
94
111
  """Check that all values in `df` hashes to the same"""
95
- hashes = partitioning_index(df, npartitions)
112
+ dtypes = {}
113
+ for col, dtype in df.dtypes.items():
114
+ if pd.api.types.is_numeric_dtype(dtype):
115
+ dtypes[col] = np.float64
116
+ if not dtypes:
117
+ dtypes = None
118
+
119
+ hashes = partitioning_index(df, npartitions, cast_dtype=dtypes)
96
120
  if len(hashes) > 0:
97
121
  return len(hashes.unique()) == 1
98
122
  else:
@@ -111,11 +135,10 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions):
111
135
  worker_class=IncreasedCloseTimeoutNanny,
112
136
  processes=True,
113
137
  ) as cluster:
114
- with Client(cluster) as client:
115
- all_workers = list(client.get_worker_logs().keys())
138
+ with Client(cluster):
116
139
  comms.default_comms()
117
140
  np.random.seed(42)
118
- df = pd.DataFrame({"key": np.random.random(100)})
141
+ df = pd.DataFrame({"key": np.random.randint(0, high=100, size=100)})
119
142
  if backend == "cudf":
120
143
  df = cudf.DataFrame.from_pandas(df)
121
144
 
@@ -124,15 +147,13 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions):
124
147
 
125
148
  for input_nparts in range(1, 5):
126
149
  for output_nparts in range(1, 5):
127
- ddf = dd.from_pandas(df.copy(), npartitions=input_nparts).persist(
128
- workers=all_workers
129
- )
150
+ ddf1 = dd.from_pandas(df.copy(), npartitions=input_nparts)
130
151
  # To reduce test runtime, we change the batchsizes here instead
131
152
  # of using a test parameter.
132
153
  for batchsize in (-1, 1, 2):
133
154
  with dask.config.set(explicit_comms_batchsize=batchsize):
134
155
  ddf = explicit_comms_shuffle(
135
- ddf,
156
+ ddf1,
136
157
  ["_partitions"] if _partitions else ["key"],
137
158
  npartitions=output_nparts,
138
159
  batchsize=batchsize,
@@ -160,6 +181,32 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions):
160
181
  got = ddf.compute().sort_values("key")
161
182
  assert_eq(got, expected)
162
183
 
184
+ # Check that partitioning is consistent with "tasks"
185
+ ddf_tasks = ddf1.shuffle(
186
+ ["key"],
187
+ npartitions=output_nparts,
188
+ shuffle_method="tasks",
189
+ )
190
+ for i in range(output_nparts):
191
+ expected_partition = ddf_tasks.partitions[
192
+ i
193
+ ].compute()["key"]
194
+ actual_partition = ddf.partitions[i].compute()[
195
+ "key"
196
+ ]
197
+ if backend == "cudf":
198
+ expected_partition = (
199
+ expected_partition.values_host
200
+ )
201
+ actual_partition = actual_partition.values_host
202
+ else:
203
+ expected_partition = expected_partition.values
204
+ actual_partition = actual_partition.values
205
+ assert all(
206
+ np.sort(expected_partition)
207
+ == np.sort(actual_partition)
208
+ )
209
+
163
210
 
164
211
  @pytest.mark.parametrize("nworkers", [1, 2, 3])
165
212
  @pytest.mark.parametrize("backend", ["pandas", "cudf"])
@@ -220,6 +267,7 @@ def _test_dask_use_explicit_comms(in_cluster):
220
267
  check_shuffle()
221
268
 
222
269
 
270
+ @query_planning_skip
223
271
  @pytest.mark.parametrize("in_cluster", [True, False])
224
272
  def test_dask_use_explicit_comms(in_cluster):
225
273
  def _timeout(process, function, timeout):
@@ -282,6 +330,7 @@ def _test_dataframe_shuffle_merge(backend, protocol, n_workers):
282
330
  assert_eq(got, expected)
283
331
 
284
332
 
333
+ @query_planning_skip
285
334
  @pytest.mark.parametrize("nworkers", [1, 2, 4])
286
335
  @pytest.mark.parametrize("backend", ["pandas", "cudf"])
287
336
  @pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
@@ -500,6 +500,54 @@ async def test_worker_fraction_limits():
500
500
  )
501
501
 
502
502
 
503
+ @gen_test(timeout=20)
504
+ async def test_cudf_spill_disabled():
505
+ cudf = pytest.importorskip("cudf")
506
+
507
+ async with LocalCUDACluster(
508
+ asynchronous=True,
509
+ ) as cluster:
510
+ async with Client(cluster, asynchronous=True) as client:
511
+ cudf_spill = await client.run(
512
+ cudf.get_option,
513
+ "spill",
514
+ )
515
+ for v in cudf_spill.values():
516
+ assert v is False
517
+
518
+ cudf_spill_stats = await client.run(
519
+ cudf.get_option,
520
+ "spill_stats",
521
+ )
522
+ for v in cudf_spill_stats.values():
523
+ assert v == 0
524
+
525
+
526
+ @gen_test(timeout=20)
527
+ async def test_cudf_spill():
528
+ cudf = pytest.importorskip("cudf")
529
+
530
+ async with LocalCUDACluster(
531
+ enable_cudf_spill=True,
532
+ cudf_spill_stats=2,
533
+ asynchronous=True,
534
+ ) as cluster:
535
+ async with Client(cluster, asynchronous=True) as client:
536
+ cudf_spill = await client.run(
537
+ cudf.get_option,
538
+ "spill",
539
+ )
540
+ for v in cudf_spill.values():
541
+ assert v is True
542
+
543
+ cudf_spill_stats = await client.run(
544
+ cudf.get_option,
545
+ "spill_stats",
546
+ )
547
+ for v in cudf_spill_stats.values():
548
+ assert v == 2
549
+
550
+
503
551
  @pytest.mark.parametrize(
504
552
  "protocol",
505
553
  ["ucx", "ucxx"],
@@ -537,10 +537,10 @@ def test_from_cudf_of_proxy_object():
537
537
  assert has_parallel_type(df)
538
538
 
539
539
  ddf = dask_cudf.from_cudf(df, npartitions=1)
540
- assert has_parallel_type(ddf)
540
+ assert has_parallel_type(ddf._meta)
541
541
 
542
542
  # Notice, the output is a dask-cudf dataframe and not a proxy object
543
- assert type(ddf) is dask_cudf.core.DataFrame
543
+ assert type(ddf._meta) is cudf.DataFrame
544
544
 
545
545
 
546
546
  def test_proxy_object_parquet(tmp_path):
@@ -0,0 +1,12 @@
1
+ # Copyright (c) 2024, NVIDIA CORPORATION.
2
+
3
+ import dask_cuda
4
+
5
+
6
+ def test_version_constants_are_populated():
7
+ # __git_commit__ will only be non-empty in a built distribution
8
+ assert isinstance(dask_cuda.__git_commit__, str)
9
+
10
+ # __version__ should always be non-empty
11
+ assert isinstance(dask_cuda.__version__, str)
12
+ assert len(dask_cuda.__version__) > 0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dask-cuda
3
- Version: 24.4.0
3
+ Version: 24.8.2
4
4
  Summary: Utilities for Dask and CUDA interactions
5
5
  Author: NVIDIA Corporation
6
- License: Apache-2.0
6
+ License: Apache 2.0
7
7
  Project-URL: Homepage, https://github.com/rapidsai/dask-cuda
8
8
  Project-URL: Documentation, https://docs.rapids.ai/api/dask-cuda/stable/
9
9
  Project-URL: Source, https://github.com/rapidsai/dask-cuda
@@ -18,25 +18,25 @@ Classifier: Programming Language :: Python :: 3.11
18
18
  Requires-Python: >=3.9
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
- Requires-Dist: click >=8.1
22
- Requires-Dist: numba >=0.57
23
- Requires-Dist: numpy <2.0a0,>=1.23
24
- Requires-Dist: pandas >=1.3
25
- Requires-Dist: pynvml <11.5,>=11.0.0
26
- Requires-Dist: rapids-dask-dependency ==24.4.*
27
- Requires-Dist: zict >=2.0.0
21
+ Requires-Dist: click>=8.1
22
+ Requires-Dist: numba>=0.57
23
+ Requires-Dist: numpy<2.0a0,>=1.23
24
+ Requires-Dist: pandas>=1.3
25
+ Requires-Dist: pynvml<11.5,>=11.0.0
26
+ Requires-Dist: rapids-dask-dependency==24.8.*
27
+ Requires-Dist: zict>=2.0.0
28
28
  Provides-Extra: docs
29
- Requires-Dist: numpydoc >=1.1.0 ; extra == 'docs'
30
- Requires-Dist: sphinx ; extra == 'docs'
31
- Requires-Dist: sphinx-click >=2.7.1 ; extra == 'docs'
32
- Requires-Dist: sphinx-rtd-theme >=0.5.1 ; extra == 'docs'
29
+ Requires-Dist: numpydoc>=1.1.0; extra == "docs"
30
+ Requires-Dist: sphinx; extra == "docs"
31
+ Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
32
+ Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
33
33
  Provides-Extra: test
34
- Requires-Dist: cudf ==24.4.* ; extra == 'test'
35
- Requires-Dist: dask-cudf ==24.4.* ; extra == 'test'
36
- Requires-Dist: kvikio ==24.4.* ; extra == 'test'
37
- Requires-Dist: pytest ; extra == 'test'
38
- Requires-Dist: pytest-cov ; extra == 'test'
39
- Requires-Dist: ucx-py ==0.37.* ; extra == 'test'
34
+ Requires-Dist: cudf==24.8.*; extra == "test"
35
+ Requires-Dist: dask-cudf==24.8.*; extra == "test"
36
+ Requires-Dist: kvikio==24.8.*; extra == "test"
37
+ Requires-Dist: pytest; extra == "test"
38
+ Requires-Dist: pytest-cov; extra == "test"
39
+ Requires-Dist: ucx-py==0.39.*; extra == "test"
40
40
 
41
41
  Dask CUDA
42
42
  =========
@@ -1,16 +1,16 @@
1
- dask_cuda/VERSION,sha256=cS6_wRwcl4ZhhE4gk3mZbC_4dD-r28YRdEQeaKdKu1U,9
2
- dask_cuda/__init__.py,sha256=Vt42yCT1WhjZgehiNLDLw2uvfxjqLThD8uKDMyKQYsw,1454
3
- dask_cuda/_version.py,sha256=OcuA1fDRuukb6OC8LzTwSO2aJ3ZtB1N2pm08Nrga8y8,778
4
- dask_cuda/cli.py,sha256=XNRH0bu-6jzRoyWJB5qSWuzePJSh3z_5Ng6rDCnz7lg,15970
5
- dask_cuda/cuda_worker.py,sha256=bIu-ESeIpJG_WaTYrv0z9z5juJ1qR5i_5Ng3CN1WK8s,8579
1
+ dask_cuda/VERSION,sha256=5YtjwV2EoD7E5Ed4K-PvnU0eEtdkkn33JHuNFDy8oKA,8
2
+ dask_cuda/__init__.py,sha256=JLDWev7vI_dPusLgRdOwXBz-xfhlX_hc-DzmLtrEYO0,1918
3
+ dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
4
+ dask_cuda/cli.py,sha256=Y3aObfAyMwOIo0oVz3-NC2InGLShOpeINwW5ROTF2s8,16616
5
+ dask_cuda/cuda_worker.py,sha256=uqyoDKsSe7sKN3StMVyz_971rj0Sjpmwfv7Bj083Wss,8959
6
6
  dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
7
7
  dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
8
8
  dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
9
9
  dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
10
10
  dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
11
11
  dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
12
- dask_cuda/local_cuda_cluster.py,sha256=hoEiEfJqAQrRS7N632VatSl1245GiWMT5B77Wc-i5C0,17928
13
- dask_cuda/plugins.py,sha256=cnHsdrXx7PBPmrzHX6YEkCH5byCsUk8LE2FeTeu8ZLU,4259
12
+ dask_cuda/local_cuda_cluster.py,sha256=jgXjd6OvEDfQ3iXU8hV_UfULa13GZsli0SGC2PIouZk,18882
13
+ dask_cuda/plugins.py,sha256=DCf7PnIBu_VNjFfrFeb1zCNuEnCaX9oz4Umn76t02Mc,4630
14
14
  dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
15
15
  dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
16
16
  dask_cuda/proxy_object.py,sha256=bZq92kjgFB-ad_luSAFT_RItV3nssmiEk4OOSp34laU,29812
@@ -18,36 +18,37 @@ dask_cuda/utils.py,sha256=RWlLK2cPHaCuNNhr8bW8etBeGklwREQJOafQbTydStk,25121
18
18
  dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
19
19
  dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
20
20
  dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- dask_cuda/benchmarks/common.py,sha256=sEIFnRZS6wbyKCQyB4fDclYLc2YqC0PolurR5qzuRxw,6393
21
+ dask_cuda/benchmarks/common.py,sha256=2MnDdQjvHfGaUWDgiTcTGI_EeKPmVBEwoWfsJUNpOjU,6613
22
22
  dask_cuda/benchmarks/local_cudf_groupby.py,sha256=T9lA9nb4Wzu46AH--SJEVCeCm3650J7slapdNR_08FU,8904
23
- dask_cuda/benchmarks/local_cudf_merge.py,sha256=POjxoPx4zY1TjG2S_anElL6rDtC5Jhn3nF4HABlnwZg,12447
24
- dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=M-Lp3O3q8uyY50imQqMKZYwkAmyR0NApjx2ipGxDkXw,8608
23
+ dask_cuda/benchmarks/local_cudf_merge.py,sha256=AsuVnMA3H93sJwjjgi4KaIdYKnnX1OeRMPiXizrwHGk,12577
24
+ dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=2xWJZf3gwDNimXKZN2ivtU3OE_qec1KNOhgL4_AGQZU,8655
25
25
  dask_cuda/benchmarks/local_cupy.py,sha256=aUKIYfeR7c77K4kKk697Rxo8tG8kFabQ9jQEVGr-oTs,10762
26
26
  dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=_texYmam1K_XbzIvURltui5KRsISGFNylXiGUtgRIz0,6442
27
- dask_cuda/benchmarks/utils.py,sha256=baL5zK6VS6Mw_M4x9zJe8vMLUd2SZd1lS78JrL-h6oo,26896
27
+ dask_cuda/benchmarks/utils.py,sha256=4k8KnJPOczKDQNBPRWlaGsU2zdEA09BDGgklUXggwMU,30008
28
28
  dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
30
30
  dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=YferHNWKsMea8tele-ynPVr_6RAZNZIR-VzK_uFuEQU,20131
32
- dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=u3kW91YRLdHFycvpGfSQKrEucu5khMJ1k4sjmddO490,4910
33
- dask_cuda/tests/test_dask_cuda_worker.py,sha256=gViHaMCSfB6ip125OEi9D0nfKC-qBXRoHz6BRodEdb4,17729
31
+ dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=4xfhfbTGa36YPs_ex1_fFhzfGMYJq-QkS5q0RwgeHh8,20645
32
+ dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
33
+ dask_cuda/tests/test_dask_cuda_worker.py,sha256=o5g0_t-2M_2lfPeOPTS4NVF4rnQF0ZWAZekXw2h0xPc,19610
34
34
  dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
35
- dask_cuda/tests/test_dgx.py,sha256=Oh2vwL_CdUzSVQQoiIu6SPwXGRtmXwaW_Hh3ipXPUOc,7162
36
- dask_cuda/tests/test_explicit_comms.py,sha256=I4lSW-NQ0E08baEoG7cY4Ix3blGb1Auz88q2BNd1cPA,13136
35
+ dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
36
+ dask_cuda/tests/test_explicit_comms.py,sha256=Pa5vVx63qWtScnVJuS31WESXIt2FPyTJVFO-0OUbbmU,15276
37
37
  dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
38
38
  dask_cuda/tests/test_gds.py,sha256=6jf0HPTHAIG8Mp_FC4Ai4zpn-U1K7yk0fSXg8He8-r8,1513
39
39
  dask_cuda/tests/test_initialize.py,sha256=Rba59ZbljEm1yyN94_sWZPEE_f7hWln95aiBVc49pmY,6960
40
- dask_cuda/tests/test_local_cuda_cluster.py,sha256=G3kR-4o-vCqWWfSuQLFKVEK0F243FaDSgRlDTUll5aU,18376
40
+ dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
41
41
  dask_cuda/tests/test_proxify_host_file.py,sha256=Yiv0sDcUoWw0d2oiPeHGoHqqSSM4lfQ4rChCiaxb6EU,18994
42
- dask_cuda/tests/test_proxy.py,sha256=6iicSYYT2BGo1iKUQ7jM00mCjC4gtfwwxFXfGwH3QHc,23807
42
+ dask_cuda/tests/test_proxy.py,sha256=OnGnPkl5ksCb-3hpEKG2z1OfPK9DbnOCtBHOjcUUjhg,23809
43
43
  dask_cuda/tests/test_spill.py,sha256=xN9PbVERBYMuZxvscSO0mAM22loq9WT3ltZVBFxlmM4,10239
44
44
  dask_cuda/tests/test_utils.py,sha256=JRIwXfemc3lWSzLJX0VcvR1_0wB4yeoOTsw7kB6z6pU,9176
45
+ dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
45
46
  dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
46
47
  examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
47
48
  examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
48
- dask_cuda-24.4.0.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
49
- dask_cuda-24.4.0.dist-info/METADATA,sha256=mjy5YntsqIKdldcPEj-jjR70Aphe-B0LXqjcZjVFS9U,2570
50
- dask_cuda-24.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
51
- dask_cuda-24.4.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
52
- dask_cuda-24.4.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
53
- dask_cuda-24.4.0.dist-info/RECORD,,
49
+ dask_cuda-24.8.2.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
50
+ dask_cuda-24.8.2.dist-info/METADATA,sha256=6iMwPI8cWrEYDYz73vm8pw-LkVeEgTQzymJgRxj32VQ,2546
51
+ dask_cuda-24.8.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
52
+ dask_cuda-24.8.2.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
53
+ dask_cuda-24.8.2.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
54
+ dask_cuda-24.8.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5