dask-cuda 24.8.2__py3-none-any.whl → 24.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dask_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 24.08.02
1
+ 24.12.00
dask_cuda/__init__.py CHANGED
@@ -9,6 +9,8 @@ import dask.dataframe.core
9
9
  import dask.dataframe.shuffle
10
10
  import dask.dataframe.multi
11
11
  import dask.bag.core
12
+ from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
13
+ from distributed.protocol.serialize import dask_deserialize, dask_serialize
12
14
 
13
15
  from ._version import __git_commit__, __version__
14
16
  from .cuda_worker import CUDAWorker
@@ -48,3 +50,20 @@ dask.dataframe.shuffle.shuffle_group = proxify_decorator(
48
50
  dask.dataframe.shuffle.shuffle_group
49
51
  )
50
52
  dask.dataframe.core._concat = unproxify_decorator(dask.dataframe.core._concat)
53
+
54
+
55
+ def _register_cudf_spill_aware():
56
+ import cudf
57
+
58
+ # Only enable Dask/cuDF spilling if cuDF spilling is disabled, see
59
+ # https://github.com/rapidsai/dask-cuda/issues/1363
60
+ if not cudf.get_option("spill"):
61
+ # This reproduces the implementation of `_register_cudf`, see
62
+ # https://github.com/dask/distributed/blob/40fcd65e991382a956c3b879e438be1b100dff97/distributed/protocol/__init__.py#L106-L115
63
+ from cudf.comm import serialize
64
+
65
+
66
+ for registry in [cuda_serialize, cuda_deserialize, dask_serialize, dask_deserialize]:
67
+ for lib in ["cudf", "dask_cudf"]:
68
+ if lib in registry._lazy:
69
+ registry._lazy[lib] = _register_cudf_spill_aware
@@ -1,3 +1,4 @@
1
+ import contextlib
1
2
  from argparse import Namespace
2
3
  from functools import partial
3
4
  from typing import Any, Callable, List, Mapping, NamedTuple, Optional, Tuple
@@ -7,7 +8,7 @@ import numpy as np
7
8
  import pandas as pd
8
9
 
9
10
  import dask
10
- from distributed import Client
11
+ from distributed import Client, performance_report
11
12
 
12
13
  from dask_cuda.benchmarks.utils import (
13
14
  address_to_index,
@@ -87,12 +88,20 @@ def run_benchmark(client: Client, args: Namespace, config: Config):
87
88
 
88
89
  If ``args.profile`` is set, the final run is profiled.
89
90
  """
91
+
90
92
  results = []
91
- for _ in range(max(1, args.runs) - 1):
92
- res = config.bench_once(client, args, write_profile=None)
93
- results.append(res)
94
- results.append(config.bench_once(client, args, write_profile=args.profile))
95
- return results
93
+ for _ in range(max(0, args.warmup_runs)):
94
+ config.bench_once(client, args, write_profile=None)
95
+
96
+ ctx = contextlib.nullcontext()
97
+ if args.profile is not None:
98
+ ctx = performance_report(filename=args.profile)
99
+ with ctx:
100
+ for _ in range(max(1, args.runs) - 1):
101
+ res = config.bench_once(client, args, write_profile=None)
102
+ results.append(res)
103
+ results.append(config.bench_once(client, args, write_profile=args.profile_last))
104
+ return results
96
105
 
97
106
 
98
107
  def gather_bench_results(client: Client, args: Namespace, config: Config):
@@ -7,7 +7,7 @@ import pandas as pd
7
7
  import dask
8
8
  import dask.dataframe as dd
9
9
  from dask.distributed import performance_report, wait
10
- from dask.utils import format_bytes, parse_bytes
10
+ from dask.utils import format_bytes
11
11
 
12
12
  from dask_cuda.benchmarks.common import Config, execute_benchmark
13
13
  from dask_cuda.benchmarks.utils import (
@@ -98,10 +98,9 @@ def bench_once(client, args, write_profile=None):
98
98
  "False": False,
99
99
  }.get(args.shuffle, args.shuffle)
100
100
 
101
- if write_profile is None:
102
- ctx = contextlib.nullcontext()
103
- else:
104
- ctx = performance_report(filename=args.profile)
101
+ ctx = contextlib.nullcontext()
102
+ if write_profile is not None:
103
+ ctx = performance_report(filename=write_profile)
105
104
 
106
105
  with ctx:
107
106
  t1 = clock()
@@ -260,19 +259,6 @@ def parse_args():
260
259
  "type": str,
261
260
  "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
262
261
  },
263
- {
264
- "name": "--ignore-size",
265
- "default": "1 MiB",
266
- "metavar": "nbytes",
267
- "type": parse_bytes,
268
- "help": "Ignore messages smaller than this (default '1 MB')",
269
- },
270
- {
271
- "name": "--runs",
272
- "default": 3,
273
- "type": int,
274
- "help": "Number of runs",
275
- },
276
262
  ]
277
263
 
278
264
  return parse_benchmark_args(
@@ -9,7 +9,7 @@ import pandas as pd
9
9
  import dask
10
10
  import dask.dataframe as dd
11
11
  from dask.distributed import performance_report, wait
12
- from dask.utils import format_bytes, parse_bytes
12
+ from dask.utils import format_bytes
13
13
 
14
14
  from dask_cuda.benchmarks.common import Config, execute_benchmark
15
15
  from dask_cuda.benchmarks.utils import (
@@ -190,7 +190,7 @@ def bench_once(client, args, write_profile=None):
190
190
  if args.backend == "explicit-comms":
191
191
  ctx1 = dask.config.set(explicit_comms=True)
192
192
  if write_profile is not None:
193
- ctx2 = performance_report(filename=args.profile)
193
+ ctx2 = performance_report(filename=write_profile)
194
194
 
195
195
  with ctx1:
196
196
  with ctx2:
@@ -335,13 +335,6 @@ def parse_args():
335
335
  "action": "store_true",
336
336
  "help": "Use shuffle join (takes precedence over '--broadcast-join').",
337
337
  },
338
- {
339
- "name": "--ignore-size",
340
- "default": "1 MiB",
341
- "metavar": "nbytes",
342
- "type": parse_bytes,
343
- "help": "Ignore messages smaller than this (default '1 MB')",
344
- },
345
338
  {
346
339
  "name": "--frac-match",
347
340
  "default": 0.3,
@@ -353,12 +346,6 @@ def parse_args():
353
346
  "action": "store_true",
354
347
  "help": "Don't shuffle the keys of the left (base) dataframe.",
355
348
  },
356
- {
357
- "name": "--runs",
358
- "default": 3,
359
- "type": int,
360
- "help": "Number of runs",
361
- },
362
349
  {
363
350
  "name": [
364
351
  "-s",
@@ -121,10 +121,9 @@ def create_data(
121
121
  def bench_once(client, args, write_profile=None):
122
122
  data_processed, df = create_data(client, args)
123
123
 
124
- if write_profile is None:
125
- ctx = contextlib.nullcontext()
126
- else:
127
- ctx = performance_report(filename=args.profile)
124
+ ctx = contextlib.nullcontext()
125
+ if write_profile is not None:
126
+ ctx = performance_report(filename=write_profile)
128
127
 
129
128
  with ctx:
130
129
  if args.backend in {"dask", "dask-noop"}:
@@ -228,19 +227,6 @@ def parse_args():
228
227
  "type": str,
229
228
  "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
230
229
  },
231
- {
232
- "name": "--ignore-size",
233
- "default": "1 MiB",
234
- "metavar": "nbytes",
235
- "type": parse_bytes,
236
- "help": "Ignore messages smaller than this (default '1 MB')",
237
- },
238
- {
239
- "name": "--runs",
240
- "default": 3,
241
- "type": int,
242
- "help": "Number of runs",
243
- },
244
230
  {
245
231
  "name": "--ignore-index",
246
232
  "action": "store_true",
@@ -8,7 +8,7 @@ from nvtx import end_range, start_range
8
8
 
9
9
  from dask import array as da
10
10
  from dask.distributed import performance_report, wait
11
- from dask.utils import format_bytes, parse_bytes
11
+ from dask.utils import format_bytes
12
12
 
13
13
  from dask_cuda.benchmarks.common import Config, execute_benchmark
14
14
  from dask_cuda.benchmarks.utils import (
@@ -141,12 +141,11 @@ def bench_once(client, args, write_profile=None):
141
141
  chunksize = x.chunksize
142
142
  data_processed = sum(arg.nbytes for arg in func_args)
143
143
 
144
- # Execute the operations to benchmark
145
- if args.profile is not None and write_profile is not None:
146
- ctx = performance_report(filename=args.profile)
147
- else:
148
- ctx = contextlib.nullcontext()
144
+ ctx = contextlib.nullcontext()
145
+ if write_profile is not None:
146
+ ctx = performance_report(filename=write_profile)
149
147
 
148
+ # Execute the operations to benchmark
150
149
  with ctx:
151
150
  rng = start_range(message=args.operation, color="purple")
152
151
  result = func(*func_args)
@@ -297,19 +296,6 @@ def parse_args():
297
296
  "type": int,
298
297
  "help": "Chunk size (default 2500).",
299
298
  },
300
- {
301
- "name": "--ignore-size",
302
- "default": "1 MiB",
303
- "metavar": "nbytes",
304
- "type": parse_bytes,
305
- "help": "Ignore messages smaller than this (default '1 MB').",
306
- },
307
- {
308
- "name": "--runs",
309
- "default": 3,
310
- "type": int,
311
- "help": "Number of runs (default 3).",
312
- },
313
299
  {
314
300
  "name": [
315
301
  "-b",
@@ -10,7 +10,7 @@ from scipy.ndimage import convolve as sp_convolve
10
10
 
11
11
  from dask import array as da
12
12
  from dask.distributed import performance_report, wait
13
- from dask.utils import format_bytes, parse_bytes
13
+ from dask.utils import format_bytes
14
14
 
15
15
  from dask_cuda.benchmarks.common import Config, execute_benchmark
16
16
  from dask_cuda.benchmarks.utils import (
@@ -42,12 +42,11 @@ def bench_once(client, args, write_profile=None):
42
42
 
43
43
  data_processed = x.nbytes
44
44
 
45
- # Execute the operations to benchmark
46
- if args.profile is not None and write_profile is not None:
47
- ctx = performance_report(filename=args.profile)
48
- else:
49
- ctx = contextlib.nullcontext()
45
+ ctx = contextlib.nullcontext()
46
+ if write_profile is not None:
47
+ ctx = performance_report(filename=write_profile)
50
48
 
49
+ # Execute the operations to benchmark
51
50
  with ctx:
52
51
  result = x.map_overlap(mean_filter, args.kernel_size, shape=ks)
53
52
  if args.backend == "dask-noop":
@@ -168,19 +167,6 @@ def parse_args():
168
167
  "type": int,
169
168
  "help": "Kernel size, 2*k+1, in each dimension (default 1)",
170
169
  },
171
- {
172
- "name": "--ignore-size",
173
- "default": "1 MiB",
174
- "metavar": "nbytes",
175
- "type": parse_bytes,
176
- "help": "Ignore messages smaller than this (default '1 MB')",
177
- },
178
- {
179
- "name": "--runs",
180
- "default": 3,
181
- "type": int,
182
- "help": "Number of runs",
183
- },
184
170
  {
185
171
  "name": [
186
172
  "-b",
@@ -0,0 +1,268 @@
1
+ import contextlib
2
+ from collections import ChainMap
3
+ from time import perf_counter as clock
4
+
5
+ import fsspec
6
+ import pandas as pd
7
+
8
+ import dask
9
+ import dask.dataframe as dd
10
+ from dask.base import tokenize
11
+ from dask.distributed import performance_report
12
+ from dask.utils import format_bytes, parse_bytes
13
+
14
+ from dask_cuda.benchmarks.common import Config, execute_benchmark
15
+ from dask_cuda.benchmarks.utils import (
16
+ parse_benchmark_args,
17
+ print_key_value,
18
+ print_separator,
19
+ print_throughput_bandwidth,
20
+ )
21
+
22
+ DISK_SIZE_CACHE = {}
23
+ OPTIONS_CACHE = {}
24
+
25
+
26
+ def _noop(df):
27
+ return df
28
+
29
+
30
+ def read_data(paths, columns, backend, **kwargs):
31
+ with dask.config.set({"dataframe.backend": backend}):
32
+ return dd.read_parquet(
33
+ paths,
34
+ columns=columns,
35
+ **kwargs,
36
+ )
37
+
38
+
39
+ def get_fs_paths_kwargs(args):
40
+ kwargs = {}
41
+
42
+ storage_options = {}
43
+ if args.key:
44
+ storage_options["key"] = args.key
45
+ if args.secret:
46
+ storage_options["secret"] = args.secret
47
+
48
+ if args.filesystem == "arrow":
49
+ import pyarrow.fs as pa_fs
50
+ from fsspec.implementations.arrow import ArrowFSWrapper
51
+
52
+ _mapping = {
53
+ "key": "access_key",
54
+ "secret": "secret_key",
55
+ } # See: pyarrow.fs.S3FileSystem docs
56
+ s3_args = {}
57
+ for k, v in storage_options.items():
58
+ s3_args[_mapping[k]] = v
59
+
60
+ fs = pa_fs.FileSystem.from_uri(args.path)[0]
61
+ try:
62
+ region = {"region": fs.region}
63
+ except AttributeError:
64
+ region = {}
65
+ kwargs["filesystem"] = type(fs)(**region, **s3_args)
66
+ fsspec_fs = ArrowFSWrapper(kwargs["filesystem"])
67
+
68
+ if args.type == "gpu":
69
+ kwargs["blocksize"] = args.blocksize
70
+ else:
71
+ fsspec_fs = fsspec.core.get_fs_token_paths(
72
+ args.path, mode="rb", storage_options=storage_options
73
+ )[0]
74
+ kwargs["filesystem"] = fsspec_fs
75
+ kwargs["blocksize"] = args.blocksize
76
+ kwargs["aggregate_files"] = args.aggregate_files
77
+
78
+ # Collect list of paths
79
+ stripped_url_path = fsspec_fs._strip_protocol(args.path)
80
+ if stripped_url_path.endswith("/"):
81
+ stripped_url_path = stripped_url_path[:-1]
82
+ paths = fsspec_fs.glob(f"{stripped_url_path}/*.parquet")
83
+ if args.file_count:
84
+ paths = paths[: args.file_count]
85
+
86
+ return fsspec_fs, paths, kwargs
87
+
88
+
89
+ def bench_once(client, args, write_profile=None):
90
+ global OPTIONS_CACHE
91
+ global DISK_SIZE_CACHE
92
+
93
+ # Construct kwargs
94
+ token = tokenize(args)
95
+ try:
96
+ fsspec_fs, paths, kwargs = OPTIONS_CACHE[token]
97
+ except KeyError:
98
+ fsspec_fs, paths, kwargs = get_fs_paths_kwargs(args)
99
+ OPTIONS_CACHE[token] = (fsspec_fs, paths, kwargs)
100
+
101
+ if write_profile is None:
102
+ ctx = contextlib.nullcontext()
103
+ else:
104
+ ctx = performance_report(filename=args.profile)
105
+
106
+ with ctx:
107
+ t1 = clock()
108
+ df = read_data(
109
+ paths,
110
+ columns=args.columns,
111
+ backend="cudf" if args.type == "gpu" else "pandas",
112
+ **kwargs,
113
+ )
114
+ num_rows = len(
115
+ # Use opaque `map_partitions` call to "block"
116
+ # dask-expr from using pq metadata to get length
117
+ df.map_partitions(
118
+ _noop,
119
+ meta=df._meta,
120
+ enforce_metadata=False,
121
+ )
122
+ )
123
+ t2 = clock()
124
+
125
+ # Extract total size of files on disk
126
+ token = tokenize(paths)
127
+ try:
128
+ disk_size = DISK_SIZE_CACHE[token]
129
+ except KeyError:
130
+ disk_size = sum(fsspec_fs.sizes(paths))
131
+ DISK_SIZE_CACHE[token] = disk_size
132
+
133
+ return (disk_size, num_rows, t2 - t1)
134
+
135
+
136
+ def pretty_print_results(args, address_to_index, p2p_bw, results):
137
+ if args.markdown:
138
+ print("```")
139
+ print("Parquet read benchmark")
140
+ data_processed, row_count, durations = zip(*results)
141
+ print_separator(separator="-")
142
+ backend = "cudf" if args.type == "gpu" else "pandas"
143
+ print_key_value(key="Path", value=args.path)
144
+ print_key_value(key="Columns", value=f"{args.columns}")
145
+ print_key_value(key="Backend", value=f"{backend}")
146
+ print_key_value(key="Filesystem", value=f"{args.filesystem}")
147
+ print_key_value(key="Blocksize", value=f"{format_bytes(args.blocksize)}")
148
+ print_key_value(key="Aggregate files", value=f"{args.aggregate_files}")
149
+ print_key_value(key="Row count", value=f"{row_count[0]}")
150
+ print_key_value(key="Size on disk", value=f"{format_bytes(data_processed[0])}")
151
+ if args.markdown:
152
+ print("\n```")
153
+ args.no_show_p2p_bandwidth = True
154
+ print_throughput_bandwidth(
155
+ args, durations, data_processed, p2p_bw, address_to_index
156
+ )
157
+ print_separator(separator="=")
158
+
159
+
160
+ def create_tidy_results(args, p2p_bw, results):
161
+ configuration = {
162
+ "path": args.path,
163
+ "columns": args.columns,
164
+ "backend": "cudf" if args.type == "gpu" else "pandas",
165
+ "filesystem": args.filesystem,
166
+ "blocksize": args.blocksize,
167
+ "aggregate_files": args.aggregate_files,
168
+ }
169
+ timing_data = pd.DataFrame(
170
+ [
171
+ pd.Series(
172
+ data=ChainMap(
173
+ configuration,
174
+ {
175
+ "wallclock": duration,
176
+ "data_processed": data_processed,
177
+ "num_rows": num_rows,
178
+ },
179
+ )
180
+ )
181
+ for data_processed, num_rows, duration in results
182
+ ]
183
+ )
184
+ return timing_data, p2p_bw
185
+
186
+
187
+ def parse_args():
188
+ special_args = [
189
+ {
190
+ "name": "path",
191
+ "type": str,
192
+ "help": "Parquet directory to read from (must be a flat directory).",
193
+ },
194
+ {
195
+ "name": "--blocksize",
196
+ "default": "256MB",
197
+ "type": parse_bytes,
198
+ "help": "How to set the blocksize option",
199
+ },
200
+ {
201
+ "name": "--aggregate-files",
202
+ "default": False,
203
+ "action": "store_true",
204
+ "help": "How to set the aggregate_files option",
205
+ },
206
+ {
207
+ "name": "--file-count",
208
+ "type": int,
209
+ "help": "Maximum number of files to read.",
210
+ },
211
+ {
212
+ "name": "--columns",
213
+ "type": str,
214
+ "help": "Columns to read/select from data.",
215
+ },
216
+ {
217
+ "name": "--key",
218
+ "type": str,
219
+ "help": "Public S3 key.",
220
+ },
221
+ {
222
+ "name": "--secret",
223
+ "type": str,
224
+ "help": "Secret S3 key.",
225
+ },
226
+ {
227
+ "name": [
228
+ "-t",
229
+ "--type",
230
+ ],
231
+ "choices": ["cpu", "gpu"],
232
+ "default": "gpu",
233
+ "type": str,
234
+ "help": "Use GPU or CPU dataframes (default 'gpu')",
235
+ },
236
+ {
237
+ "name": "--filesystem",
238
+ "choices": ["arrow", "fsspec"],
239
+ "default": "fsspec",
240
+ "type": str,
241
+ "help": "Filesystem backend",
242
+ },
243
+ {
244
+ "name": "--runs",
245
+ "default": 3,
246
+ "type": int,
247
+ "help": "Number of runs",
248
+ },
249
+ ]
250
+
251
+ args = parse_benchmark_args(
252
+ description="Parquet read benchmark",
253
+ args_list=special_args,
254
+ check_explicit_comms=False,
255
+ )
256
+ args.no_show_p2p_bandwidth = True
257
+ return args
258
+
259
+
260
+ if __name__ == "__main__":
261
+ execute_benchmark(
262
+ Config(
263
+ args=parse_args(),
264
+ bench_once=bench_once,
265
+ create_tidy_results=create_tidy_results,
266
+ pretty_print_results=pretty_print_results,
267
+ )
268
+ )
@@ -323,7 +323,16 @@ def parse_benchmark_args(
323
323
  metavar="PATH",
324
324
  default=None,
325
325
  type=str,
326
- help="Write dask profile report (E.g. dask-report.html)",
326
+ help="Write dask profile report (E.g. dask-report.html) on all "
327
+ "iterations (excluding warmup).",
328
+ )
329
+ parser.add_argument(
330
+ "--profile-last",
331
+ metavar="PATH",
332
+ default=None,
333
+ type=str,
334
+ help="Write dask profile report (E.g. dask-report.html) on last "
335
+ "iteration only.",
327
336
  )
328
337
  # See save_benchmark_data for more information
329
338
  parser.add_argument(
@@ -337,6 +346,25 @@ def parse_benchmark_args(
337
346
  "If the files already exist, new files are created with a uniquified "
338
347
  "BASENAME.",
339
348
  )
349
+ parser.add_argument(
350
+ "--ignore-size",
351
+ default="1 MiB",
352
+ metavar="nbytes",
353
+ type=parse_bytes,
354
+ help="Bandwidth statistics: ignore messages smaller than this (default '1 MB')",
355
+ )
356
+ parser.add_argument(
357
+ "--runs",
358
+ default=3,
359
+ type=int,
360
+ help="Number of runs",
361
+ )
362
+ parser.add_argument(
363
+ "--warmup-runs",
364
+ default=1,
365
+ type=int,
366
+ help="Number of warmup runs",
367
+ )
340
368
 
341
369
  for args in args_list:
342
370
  name = args.pop("name")
@@ -765,7 +793,7 @@ def print_throughput_bandwidth(
765
793
  )
766
794
  print_key_value(
767
795
  key="Wall clock",
768
- value=f"{format_time(durations.mean())} +/- {format_time(durations.std()) }",
796
+ value=f"{format_time(durations.mean())} +/- {format_time(durations.std())}",
769
797
  )
770
798
  if not args.no_show_p2p_bandwidth:
771
799
  print_separator(separator="=")
dask_cuda/cli.py CHANGED
@@ -13,7 +13,7 @@ from distributed.security import Security
13
13
  from distributed.utils import import_term
14
14
 
15
15
  from .cuda_worker import CUDAWorker
16
- from .utils import print_cluster_config
16
+ from .utils import CommaSeparatedChoice, print_cluster_config
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -164,13 +164,24 @@ def cuda():
164
164
  incompatible with RMM pools and managed memory, trying to enable both will
165
165
  result in failure.""",
166
166
  )
167
+ @click.option(
168
+ "--set-rmm-allocator-for-libs",
169
+ "rmm_allocator_external_lib_list",
170
+ type=CommaSeparatedChoice(["cupy", "torch"]),
171
+ default=None,
172
+ show_default=True,
173
+ help="""
174
+ Set RMM as the allocator for external libraries. Provide a comma-separated
175
+ list of libraries to set, e.g., "torch,cupy".""",
176
+ )
167
177
  @click.option(
168
178
  "--rmm-release-threshold",
169
179
  default=None,
170
- help="""When ``rmm.async`` is ``True`` and the pool size grows beyond this value, unused
171
- memory held by the pool will be released at the next synchronization point. Can be
172
- an integer (bytes), float (fraction of total device memory), string (like ``"5GB"``
173
- or ``"5000M"``) or ``None``. By default, this feature is disabled.
180
+ help="""When ``rmm.async`` is ``True`` and the pool size grows beyond this
181
+ value, unused memory held by the pool will be released at the next
182
+ synchronization point. Can be an integer (bytes), float (fraction of total
183
+ device memory), string (like ``"5GB"`` or ``"5000M"``) or ``None``. By
184
+ default, this feature is disabled.
174
185
 
175
186
  .. note::
176
187
  This size is a per-worker configuration, and not cluster-wide.""",
@@ -350,6 +361,7 @@ def worker(
350
361
  rmm_maximum_pool_size,
351
362
  rmm_managed_memory,
352
363
  rmm_async,
364
+ rmm_allocator_external_lib_list,
353
365
  rmm_release_threshold,
354
366
  rmm_log_directory,
355
367
  rmm_track_allocations,
@@ -424,6 +436,7 @@ def worker(
424
436
  rmm_maximum_pool_size,
425
437
  rmm_managed_memory,
426
438
  rmm_async,
439
+ rmm_allocator_external_lib_list,
427
440
  rmm_release_threshold,
428
441
  rmm_log_directory,
429
442
  rmm_track_allocations,
dask_cuda/cuda_worker.py CHANGED
@@ -47,6 +47,7 @@ class CUDAWorker(Server):
47
47
  rmm_maximum_pool_size=None,
48
48
  rmm_managed_memory=False,
49
49
  rmm_async=False,
50
+ rmm_allocator_external_lib_list=None,
50
51
  rmm_release_threshold=None,
51
52
  rmm_log_directory=None,
52
53
  rmm_track_allocations=False,
@@ -195,6 +196,14 @@ class CUDAWorker(Server):
195
196
  },
196
197
  )
197
198
 
199
+ cudf_spill_warning = dask.config.get("cudf-spill-warning", default=True)
200
+ if enable_cudf_spill and cudf_spill_warning:
201
+ warnings.warn(
202
+ "cuDF spilling is enabled, please ensure the client and scheduler "
203
+ "processes set `CUDF_SPILL=on` as well. To disable this warning "
204
+ "set `DASK_CUDF_SPILL_WARNING=False`."
205
+ )
206
+
198
207
  self.nannies = [
199
208
  Nanny(
200
209
  scheduler,
@@ -223,6 +232,7 @@ class CUDAWorker(Server):
223
232
  release_threshold=rmm_release_threshold,
224
233
  log_directory=rmm_log_directory,
225
234
  track_allocations=rmm_track_allocations,
235
+ external_lib_list=rmm_allocator_external_lib_list,
226
236
  ),
227
237
  PreImport(pre_import),
228
238
  CUDFSetup(spill=enable_cudf_spill, spill_stats=cudf_spill_stats),
@@ -143,6 +143,11 @@ class LocalCUDACluster(LocalCluster):
143
143
  The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
144
144
  incompatible with RMM pools and managed memory. Trying to enable both will
145
145
  result in an exception.
146
+ rmm_allocator_external_lib_list: str, list or None, default None
147
+ List of external libraries for which to set RMM as the allocator.
148
+ Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string
149
+ (like ``"torch,cupy"``) or a list of strings (like ``["torch", "cupy"]``).
150
+ If ``None``, no external libraries will use RMM as their allocator.
146
151
  rmm_release_threshold: int, str or None, default None
147
152
  When ``rmm.async is True`` and the pool size grows beyond this value, unused
148
153
  memory held by the pool will be released at the next synchronization point.
@@ -231,6 +236,7 @@ class LocalCUDACluster(LocalCluster):
231
236
  rmm_maximum_pool_size=None,
232
237
  rmm_managed_memory=False,
233
238
  rmm_async=False,
239
+ rmm_allocator_external_lib_list=None,
234
240
  rmm_release_threshold=None,
235
241
  rmm_log_directory=None,
236
242
  rmm_track_allocations=False,
@@ -244,6 +250,13 @@ class LocalCUDACluster(LocalCluster):
244
250
  # initialization happens before we can set CUDA_VISIBLE_DEVICES
245
251
  os.environ["RAPIDS_NO_INITIALIZE"] = "True"
246
252
 
253
+ if enable_cudf_spill:
254
+ import cudf
255
+
256
+ # cuDF spilling must be enabled in the client/scheduler process too.
257
+ cudf.set_option("spill", enable_cudf_spill)
258
+ cudf.set_option("spill_stats", cudf_spill_stats)
259
+
247
260
  if threads_per_worker < 1:
248
261
  raise ValueError("threads_per_worker must be higher than 0.")
249
262
 
@@ -258,6 +271,19 @@ class LocalCUDACluster(LocalCluster):
258
271
  n_workers = len(CUDA_VISIBLE_DEVICES)
259
272
  if n_workers < 1:
260
273
  raise ValueError("Number of workers cannot be less than 1.")
274
+
275
+ if rmm_allocator_external_lib_list is not None:
276
+ if isinstance(rmm_allocator_external_lib_list, str):
277
+ rmm_allocator_external_lib_list = [
278
+ v.strip() for v in rmm_allocator_external_lib_list.split(",")
279
+ ]
280
+ elif not isinstance(rmm_allocator_external_lib_list, list):
281
+ raise ValueError(
282
+ "rmm_allocator_external_lib_list must be either a comma-separated "
283
+ "string or a list of strings. Examples: 'torch,cupy' "
284
+ "or ['torch', 'cupy']"
285
+ )
286
+
261
287
  # Set nthreads=1 when parsing mem_limit since it only depends on n_workers
262
288
  logger = logging.getLogger(__name__)
263
289
  self.memory_limit = parse_memory_limit(
@@ -277,6 +303,8 @@ class LocalCUDACluster(LocalCluster):
277
303
  self.rmm_managed_memory = rmm_managed_memory
278
304
  self.rmm_async = rmm_async
279
305
  self.rmm_release_threshold = rmm_release_threshold
306
+ self.rmm_allocator_external_lib_list = rmm_allocator_external_lib_list
307
+
280
308
  if rmm_pool_size is not None or rmm_managed_memory or rmm_async:
281
309
  try:
282
310
  import rmm # noqa F401
@@ -430,6 +458,7 @@ class LocalCUDACluster(LocalCluster):
430
458
  release_threshold=self.rmm_release_threshold,
431
459
  log_directory=self.rmm_log_directory,
432
460
  track_allocations=self.rmm_track_allocations,
461
+ external_lib_list=self.rmm_allocator_external_lib_list,
433
462
  ),
434
463
  PreImport(self.pre_import),
435
464
  CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
dask_cuda/plugins.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import importlib
2
2
  import os
3
+ from typing import Callable, Dict
3
4
 
4
5
  from distributed import WorkerPlugin
5
6
 
@@ -39,6 +40,7 @@ class RMMSetup(WorkerPlugin):
39
40
  release_threshold,
40
41
  log_directory,
41
42
  track_allocations,
43
+ external_lib_list,
42
44
  ):
43
45
  if initial_pool_size is None and maximum_pool_size is not None:
44
46
  raise ValueError(
@@ -61,6 +63,7 @@ class RMMSetup(WorkerPlugin):
61
63
  self.logging = log_directory is not None
62
64
  self.log_directory = log_directory
63
65
  self.rmm_track_allocations = track_allocations
66
+ self.external_lib_list = external_lib_list
64
67
 
65
68
  def setup(self, worker=None):
66
69
  if self.initial_pool_size is not None:
@@ -123,6 +126,70 @@ class RMMSetup(WorkerPlugin):
123
126
  mr = rmm.mr.get_current_device_resource()
124
127
  rmm.mr.set_current_device_resource(rmm.mr.TrackingResourceAdaptor(mr))
125
128
 
129
+ if self.external_lib_list is not None:
130
+ for lib in self.external_lib_list:
131
+ enable_rmm_memory_for_library(lib)
132
+
133
+
134
+ def enable_rmm_memory_for_library(lib_name: str) -> None:
135
+ """Enable RMM memory pool support for a specified third-party library.
136
+
137
+ This function allows the given library to utilize RMM's memory pool if it supports
138
+ integration with RMM. The library name is passed as a string argument, and if the
139
+ library is compatible, its memory allocator will be configured to use RMM.
140
+
141
+ Parameters
142
+ ----------
143
+ lib_name : str
144
+ The name of the third-party library to enable RMM memory pool support for.
145
+ Supported libraries are "cupy" and "torch".
146
+
147
+ Raises
148
+ ------
149
+ ValueError
150
+ If the library name is not supported or does not have RMM integration.
151
+ ImportError
152
+ If the required library is not installed.
153
+ """
154
+
155
+ # Mapping of supported libraries to their respective setup functions
156
+ setup_functions: Dict[str, Callable[[], None]] = {
157
+ "torch": _setup_rmm_for_torch,
158
+ "cupy": _setup_rmm_for_cupy,
159
+ }
160
+
161
+ if lib_name not in setup_functions:
162
+ supported_libs = ", ".join(setup_functions.keys())
163
+ raise ValueError(
164
+ f"The library '{lib_name}' is not supported for RMM integration. "
165
+ f"Supported libraries are: {supported_libs}."
166
+ )
167
+
168
+ # Call the setup function for the specified library
169
+ setup_functions[lib_name]()
170
+
171
+
172
+ def _setup_rmm_for_torch() -> None:
173
+ try:
174
+ import torch
175
+ except ImportError as e:
176
+ raise ImportError("PyTorch is not installed.") from e
177
+
178
+ from rmm.allocators.torch import rmm_torch_allocator
179
+
180
+ torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
181
+
182
+
183
+ def _setup_rmm_for_cupy() -> None:
184
+ try:
185
+ import cupy
186
+ except ImportError as e:
187
+ raise ImportError("CuPy is not installed.") from e
188
+
189
+ from rmm.allocators.cupy import rmm_cupy_allocator
190
+
191
+ cupy.cuda.set_allocator(rmm_cupy_allocator)
192
+
126
193
 
127
194
  class PreImport(WorkerPlugin):
128
195
  def __init__(self, libraries):
@@ -567,3 +567,30 @@ def test_worker_timeout():
567
567
  assert "reason: nanny-close" in ret.stderr.lower()
568
568
 
569
569
  assert ret.returncode == 0
570
+
571
+
572
+ @pytest.mark.parametrize("enable_cudf_spill_warning", [False, True])
573
+ def test_worker_cudf_spill_warning(enable_cudf_spill_warning): # noqa: F811
574
+ pytest.importorskip("rmm")
575
+
576
+ environ = {"CUDA_VISIBLE_DEVICES": "0"}
577
+ if not enable_cudf_spill_warning:
578
+ environ["DASK_CUDF_SPILL_WARNING"] = "False"
579
+
580
+ with patch.dict(os.environ, environ):
581
+ ret = subprocess.run(
582
+ [
583
+ "dask",
584
+ "cuda",
585
+ "worker",
586
+ "127.0.0.1:9369",
587
+ "--enable-cudf-spill",
588
+ "--death-timeout",
589
+ "1",
590
+ ],
591
+ capture_output=True,
592
+ )
593
+ if enable_cudf_spill_warning:
594
+ assert b"UserWarning: cuDF spilling is enabled" in ret.stderr
595
+ else:
596
+ assert b"UserWarning: cuDF spilling is enabled" not in ret.stderr
@@ -38,7 +38,7 @@ def test_gds(gds_enabled, cuda_lib):
38
38
  a = data_create()
39
39
  header, frames = serialize(a, serializers=("disk",))
40
40
  b = deserialize(header, frames)
41
- assert type(a) == type(b)
41
+ assert type(a) is type(b)
42
42
  assert data_compare(a, b)
43
43
  finally:
44
44
  ProxifyHostFile.register_disk_spilling() # Reset disk spilling options
@@ -252,7 +252,7 @@ async def test_local_cuda_cluster(jit_unspill):
252
252
  assert "ProxyObject" in str(type(x))
253
253
  assert x._pxy_get().serializer == "dask"
254
254
  else:
255
- assert type(x) == cudf.DataFrame
255
+ assert type(x) is cudf.DataFrame
256
256
  assert len(x) == 10 # Trigger deserialization
257
257
  return x
258
258
 
@@ -114,7 +114,7 @@ def test_proxy_object_of_array(serializers, backend):
114
114
  pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
115
115
  expect = op(org)
116
116
  got = op(pxy)
117
- assert type(expect) == type(got)
117
+ assert type(expect) is type(got)
118
118
  assert expect == got
119
119
 
120
120
  # Check unary operators
@@ -124,7 +124,7 @@ def test_proxy_object_of_array(serializers, backend):
124
124
  pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
125
125
  expect = op(org)
126
126
  got = op(pxy)
127
- assert type(expect) == type(got)
127
+ assert type(expect) is type(got)
128
128
  assert all(expect == got)
129
129
 
130
130
  # Check binary operators that takes a scalar as second argument
@@ -134,7 +134,7 @@ def test_proxy_object_of_array(serializers, backend):
134
134
  pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
135
135
  expect = op(org, 2)
136
136
  got = op(pxy, 2)
137
- assert type(expect) == type(got)
137
+ assert type(expect) is type(got)
138
138
  assert all(expect == got)
139
139
 
140
140
  # Check binary operators
@@ -192,7 +192,7 @@ def test_proxy_object_of_array(serializers, backend):
192
192
  pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
193
193
  expect = op(org)
194
194
  got = op(pxy)
195
- assert type(expect) == type(got)
195
+ assert type(expect) is type(got)
196
196
  assert expect == got
197
197
 
198
198
  # Check reflected methods
@@ -297,7 +297,7 @@ async def test_spilling_local_cuda_cluster(jit_unspill):
297
297
  assert "ProxyObject" in str(type(x))
298
298
  assert x._pxy_get().serializer == "dask"
299
299
  else:
300
- assert type(x) == cudf.DataFrame
300
+ assert type(x) is cudf.DataFrame
301
301
  assert len(x) == 10 # Trigger deserialization
302
302
  return x
303
303
 
@@ -11,6 +11,8 @@ from distributed.metrics import time
11
11
  from distributed.sizeof import sizeof
12
12
  from distributed.utils_test import gen_cluster, gen_test, loop # noqa: F401
13
13
 
14
+ import dask_cudf
15
+
14
16
  from dask_cuda import LocalCUDACluster, utils
15
17
  from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
16
18
 
@@ -18,6 +20,57 @@ if utils.get_device_total_memory() < 1e10:
18
20
  pytest.skip("Not enough GPU memory", allow_module_level=True)
19
21
 
20
22
 
23
+ def _set_cudf_device_limit():
24
+ """Ensure spilling for objects of all sizes"""
25
+ import cudf
26
+
27
+ cudf.set_option("spill_device_limit", 0)
28
+
29
+
30
+ def _assert_cudf_spill_stats(enable_cudf_spill, dask_worker=None):
31
+ """Ensure cuDF has spilled data with its internal mechanism"""
32
+ import cudf
33
+
34
+ global_manager = cudf.core.buffer.spill_manager.get_global_manager()
35
+
36
+ if enable_cudf_spill:
37
+ stats = global_manager.statistics
38
+ buffers = global_manager.buffers()
39
+ assert stats.spill_totals[("gpu", "cpu")][0] > 1000
40
+ assert stats.spill_totals[("cpu", "gpu")][0] > 1000
41
+ assert len(buffers) > 0
42
+ else:
43
+ assert global_manager is None
44
+
45
+
46
+ @pytest.fixture(params=[False, True])
47
+ def cudf_spill(request):
48
+ """Fixture to enable and clear cuDF spill manager in client process"""
49
+ cudf = pytest.importorskip("cudf")
50
+
51
+ enable_cudf_spill = request.param
52
+
53
+ if enable_cudf_spill:
54
+ # If the global spill manager was previously set, fail.
55
+ assert cudf.core.buffer.spill_manager._global_manager is None
56
+
57
+ cudf.set_option("spill", True)
58
+ cudf.set_option("spill_stats", True)
59
+
60
+ # This change is to prevent changing RMM resource stack in cuDF,
61
+ # workers do not need this because they are spawned as new
62
+ # processes for every new test that runs.
63
+ cudf.set_option("spill_on_demand", False)
64
+
65
+ _set_cudf_device_limit()
66
+
67
+ yield enable_cudf_spill
68
+
69
+ cudf.set_option("spill", False)
70
+ cudf.core.buffer.spill_manager._global_manager_uninitialized = True
71
+ cudf.core.buffer.spill_manager._global_manager = None
72
+
73
+
21
74
  def device_host_file_size_matches(
22
75
  dhf, total_bytes, device_chunk_overhead=0, serialized_chunk_overhead=1024
23
76
  ):
@@ -244,9 +297,11 @@ async def test_cupy_cluster_device_spill(params):
244
297
  ],
245
298
  )
246
299
  @gen_test(timeout=30)
247
- async def test_cudf_cluster_device_spill(params):
300
+ async def test_cudf_cluster_device_spill(params, cudf_spill):
248
301
  cudf = pytest.importorskip("cudf")
249
302
 
303
+ enable_cudf_spill = cudf_spill
304
+
250
305
  with dask.config.set(
251
306
  {
252
307
  "distributed.comm.compression": False,
@@ -266,6 +321,7 @@ async def test_cudf_cluster_device_spill(params):
266
321
  device_memory_limit=params["device_memory_limit"],
267
322
  memory_limit=params["memory_limit"],
268
323
  worker_class=IncreasedCloseTimeoutNanny,
324
+ enable_cudf_spill=enable_cudf_spill,
269
325
  ) as cluster:
270
326
  async with Client(cluster, asynchronous=True) as client:
271
327
 
@@ -294,21 +350,28 @@ async def test_cudf_cluster_device_spill(params):
294
350
  del cdf
295
351
  gc.collect()
296
352
 
297
- await client.run(
298
- assert_host_chunks,
299
- params["spills_to_disk"],
300
- )
301
- await client.run(
302
- assert_disk_chunks,
303
- params["spills_to_disk"],
304
- )
305
-
306
- await client.run(
307
- worker_assert,
308
- nbytes,
309
- 32,
310
- 2048,
311
- )
353
+ if enable_cudf_spill:
354
+ await client.run(
355
+ worker_assert,
356
+ 0,
357
+ 0,
358
+ 0,
359
+ )
360
+ else:
361
+ await client.run(
362
+ assert_host_chunks,
363
+ params["spills_to_disk"],
364
+ )
365
+ await client.run(
366
+ assert_disk_chunks,
367
+ params["spills_to_disk"],
368
+ )
369
+ await client.run(
370
+ worker_assert,
371
+ nbytes,
372
+ 32,
373
+ 2048,
374
+ )
312
375
 
313
376
  del cdf2
314
377
 
@@ -324,3 +387,40 @@ async def test_cudf_cluster_device_spill(params):
324
387
  gc.collect()
325
388
  else:
326
389
  break
390
+
391
+
392
+ @gen_test(timeout=30)
393
+ async def test_cudf_spill_cluster(cudf_spill):
394
+ cudf = pytest.importorskip("cudf")
395
+ enable_cudf_spill = cudf_spill
396
+
397
+ async with LocalCUDACluster(
398
+ n_workers=1,
399
+ scheduler_port=0,
400
+ silence_logs=False,
401
+ dashboard_address=None,
402
+ asynchronous=True,
403
+ device_memory_limit=None,
404
+ memory_limit=None,
405
+ worker_class=IncreasedCloseTimeoutNanny,
406
+ enable_cudf_spill=enable_cudf_spill,
407
+ cudf_spill_stats=enable_cudf_spill,
408
+ ) as cluster:
409
+ async with Client(cluster, asynchronous=True) as client:
410
+
411
+ await client.wait_for_workers(1)
412
+ await client.run(_set_cudf_device_limit)
413
+
414
+ cdf = cudf.DataFrame(
415
+ {
416
+ "a": list(range(200)),
417
+ "b": list(reversed(range(200))),
418
+ "c": list(range(200)),
419
+ }
420
+ )
421
+
422
+ ddf = dask_cudf.from_cudf(cdf, npartitions=2).sum().persist()
423
+ await wait(ddf)
424
+
425
+ await client.run(_assert_cudf_spill_stats, enable_cudf_spill)
426
+ _assert_cudf_spill_stats(enable_cudf_spill)
dask_cuda/utils.py CHANGED
@@ -9,6 +9,7 @@ from functools import singledispatch
9
9
  from multiprocessing import cpu_count
10
10
  from typing import Optional
11
11
 
12
+ import click
12
13
  import numpy as np
13
14
  import pynvml
14
15
  import toolz
@@ -764,3 +765,13 @@ def get_rmm_device_memory_usage() -> Optional[int]:
764
765
  if isinstance(mr, rmm.mr.StatisticsResourceAdaptor):
765
766
  return mr.allocation_counts["current_bytes"]
766
767
  return None
768
+
769
+
770
+ class CommaSeparatedChoice(click.Choice):
771
+ def convert(self, value, param, ctx):
772
+ values = [v.strip() for v in value.split(",")]
773
+ for v in values:
774
+ if v not in self.choices:
775
+ choices_str = ", ".join(f"'{c}'" for c in self.choices)
776
+ self.fail(f"invalid choice(s): {v}. (choices are: {choices_str})")
777
+ return values
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dask-cuda
3
- Version: 24.8.2
3
+ Version: 24.12.0
4
4
  Summary: Utilities for Dask and CUDA interactions
5
5
  Author: NVIDIA Corporation
6
6
  License: Apache 2.0
@@ -12,18 +12,18 @@ Classifier: Topic :: Database
12
12
  Classifier: Topic :: Scientific/Engineering
13
13
  Classifier: License :: OSI Approved :: Apache Software License
14
14
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
- Requires-Python: >=3.9
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.10
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: click>=8.1
22
22
  Requires-Dist: numba>=0.57
23
- Requires-Dist: numpy<2.0a0,>=1.23
23
+ Requires-Dist: numpy<3.0a0,>=1.23
24
24
  Requires-Dist: pandas>=1.3
25
- Requires-Dist: pynvml<11.5,>=11.0.0
26
- Requires-Dist: rapids-dask-dependency==24.8.*
25
+ Requires-Dist: pynvml<12.0.0a0,>=11.0.0
26
+ Requires-Dist: rapids-dask-dependency==24.12.*
27
27
  Requires-Dist: zict>=2.0.0
28
28
  Provides-Extra: docs
29
29
  Requires-Dist: numpydoc>=1.1.0; extra == "docs"
@@ -31,12 +31,12 @@ Requires-Dist: sphinx; extra == "docs"
31
31
  Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
32
32
  Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
33
33
  Provides-Extra: test
34
- Requires-Dist: cudf==24.8.*; extra == "test"
35
- Requires-Dist: dask-cudf==24.8.*; extra == "test"
36
- Requires-Dist: kvikio==24.8.*; extra == "test"
34
+ Requires-Dist: cudf==24.12.*; extra == "test"
35
+ Requires-Dist: dask-cudf==24.12.*; extra == "test"
36
+ Requires-Dist: kvikio==24.12.*; extra == "test"
37
37
  Requires-Dist: pytest; extra == "test"
38
38
  Requires-Dist: pytest-cov; extra == "test"
39
- Requires-Dist: ucx-py==0.39.*; extra == "test"
39
+ Requires-Dist: ucx-py==0.41.*; extra == "test"
40
40
 
41
41
  Dask CUDA
42
42
  =========
@@ -1,54 +1,55 @@
1
- dask_cuda/VERSION,sha256=5YtjwV2EoD7E5Ed4K-PvnU0eEtdkkn33JHuNFDy8oKA,8
2
- dask_cuda/__init__.py,sha256=JLDWev7vI_dPusLgRdOwXBz-xfhlX_hc-DzmLtrEYO0,1918
1
+ dask_cuda/VERSION,sha256=NltZ4By82NzVjz00LGPhCXfkG4BB0JdUSXqlG8fiVuo,8
2
+ dask_cuda/__init__.py,sha256=eOCH3Wj0A8X0qbNUoNA15dgxb2O-ZApha4QHq5EEVFw,2748
3
3
  dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
4
- dask_cuda/cli.py,sha256=Y3aObfAyMwOIo0oVz3-NC2InGLShOpeINwW5ROTF2s8,16616
5
- dask_cuda/cuda_worker.py,sha256=uqyoDKsSe7sKN3StMVyz_971rj0Sjpmwfv7Bj083Wss,8959
4
+ dask_cuda/cli.py,sha256=cScVyNiA_l9uXeDgkIcmbcR4l4cH1_1shqSqsVmuHPE,17053
5
+ dask_cuda/cuda_worker.py,sha256=rZ1ITG_ZCbuaMA9e8uSqCjU8Km4AMphGGrxpBPQG8xU,9477
6
6
  dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
7
7
  dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
8
8
  dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
9
9
  dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
10
10
  dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
11
11
  dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
12
- dask_cuda/local_cuda_cluster.py,sha256=jgXjd6OvEDfQ3iXU8hV_UfULa13GZsli0SGC2PIouZk,18882
13
- dask_cuda/plugins.py,sha256=DCf7PnIBu_VNjFfrFeb1zCNuEnCaX9oz4Umn76t02Mc,4630
12
+ dask_cuda/local_cuda_cluster.py,sha256=wqwKVRV6jT13sf9e-XsvbVBlTrnhmcbmHQBFPTFcayw,20335
13
+ dask_cuda/plugins.py,sha256=yGHEurbYhL4jucQrmsxLfOyE5c3bSJdfs6GVwvDAeEA,6770
14
14
  dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
15
15
  dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
16
16
  dask_cuda/proxy_object.py,sha256=bZq92kjgFB-ad_luSAFT_RItV3nssmiEk4OOSp34laU,29812
17
- dask_cuda/utils.py,sha256=RWlLK2cPHaCuNNhr8bW8etBeGklwREQJOafQbTydStk,25121
17
+ dask_cuda/utils.py,sha256=Goq-m78rYZ-bcJitg47N1h_PC4PDuzXG0CUVH7V8azU,25515
18
18
  dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
19
19
  dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
20
20
  dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- dask_cuda/benchmarks/common.py,sha256=2MnDdQjvHfGaUWDgiTcTGI_EeKPmVBEwoWfsJUNpOjU,6613
22
- dask_cuda/benchmarks/local_cudf_groupby.py,sha256=T9lA9nb4Wzu46AH--SJEVCeCm3650J7slapdNR_08FU,8904
23
- dask_cuda/benchmarks/local_cudf_merge.py,sha256=AsuVnMA3H93sJwjjgi4KaIdYKnnX1OeRMPiXizrwHGk,12577
24
- dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=2xWJZf3gwDNimXKZN2ivtU3OE_qec1KNOhgL4_AGQZU,8655
25
- dask_cuda/benchmarks/local_cupy.py,sha256=aUKIYfeR7c77K4kKk697Rxo8tG8kFabQ9jQEVGr-oTs,10762
26
- dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=_texYmam1K_XbzIvURltui5KRsISGFNylXiGUtgRIz0,6442
27
- dask_cuda/benchmarks/utils.py,sha256=4k8KnJPOczKDQNBPRWlaGsU2zdEA09BDGgklUXggwMU,30008
21
+ dask_cuda/benchmarks/common.py,sha256=YFhxBYkoxIV-2mddSbLwTbyg67U4zXDd2_fFq9oP3_A,6922
22
+ dask_cuda/benchmarks/local_cudf_groupby.py,sha256=zrDiF-yBAUxVt9mWOTH5hUm-pb-XnVX-G9gvCEX7_GI,8512
23
+ dask_cuda/benchmarks/local_cudf_merge.py,sha256=Q7lnZ87-O7j28hkS-i_5hMApTX8VsuI4ftZf2XAnp1E,12195
24
+ dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=8FjPFtiC-UqZcdPfocdMuzq_8TURAQWJlmhfcMWdo4w,8276
25
+ dask_cuda/benchmarks/local_cupy.py,sha256=RCxQJd88bn3vyMAJDPK3orUpxzvDZY957wOSYkfriq0,10323
26
+ dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=YAllGFuG6MePfPL8gdZ-Ld7a44-G0eEaHZJWB4vFPdY,6017
27
+ dask_cuda/benchmarks/read_parquet.py,sha256=TARcG-TS1NGcQWJmuAKtfmBmy5LAaLc3xgtKgAd1DaA,7650
28
+ dask_cuda/benchmarks/utils.py,sha256=_NSWS5e8SzZ6vxDcEFo97Y8gs_e23Qqd-c3r83BA6PU,30748
28
29
  dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
30
  dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
30
31
  dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
32
  dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=4xfhfbTGa36YPs_ex1_fFhzfGMYJq-QkS5q0RwgeHh8,20645
32
33
  dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
33
- dask_cuda/tests/test_dask_cuda_worker.py,sha256=o5g0_t-2M_2lfPeOPTS4NVF4rnQF0ZWAZekXw2h0xPc,19610
34
+ dask_cuda/tests/test_dask_cuda_worker.py,sha256=6rroHvJAn5R3X9LwIcE8QrPxG1GO3PaxXVjhbdQ90Pw,20477
34
35
  dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
35
36
  dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
36
37
  dask_cuda/tests/test_explicit_comms.py,sha256=Pa5vVx63qWtScnVJuS31WESXIt2FPyTJVFO-0OUbbmU,15276
37
38
  dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
38
- dask_cuda/tests/test_gds.py,sha256=6jf0HPTHAIG8Mp_FC4Ai4zpn-U1K7yk0fSXg8He8-r8,1513
39
+ dask_cuda/tests/test_gds.py,sha256=j1Huud6UGm1fbkyRLQEz_ysrVw__5AimwSn_M-2GEvs,1513
39
40
  dask_cuda/tests/test_initialize.py,sha256=Rba59ZbljEm1yyN94_sWZPEE_f7hWln95aiBVc49pmY,6960
40
41
  dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
41
- dask_cuda/tests/test_proxify_host_file.py,sha256=Yiv0sDcUoWw0d2oiPeHGoHqqSSM4lfQ4rChCiaxb6EU,18994
42
- dask_cuda/tests/test_proxy.py,sha256=OnGnPkl5ksCb-3hpEKG2z1OfPK9DbnOCtBHOjcUUjhg,23809
43
- dask_cuda/tests/test_spill.py,sha256=xN9PbVERBYMuZxvscSO0mAM22loq9WT3ltZVBFxlmM4,10239
42
+ dask_cuda/tests/test_proxify_host_file.py,sha256=LC3jjo_gbfhdIy1Zy_ynmgyv31HXFoBINCe1-XXZ4XU,18994
43
+ dask_cuda/tests/test_proxy.py,sha256=51qsXGJBg_hwSMRsC_QvJBz4wVM0Bf8fbFmTUFA7HJE,23809
44
+ dask_cuda/tests/test_spill.py,sha256=CYMbp5HDBYlZ7T_n8RfSOZxaWFcAQKjprjRM7Wupcdw,13419
44
45
  dask_cuda/tests/test_utils.py,sha256=JRIwXfemc3lWSzLJX0VcvR1_0wB4yeoOTsw7kB6z6pU,9176
45
46
  dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
46
47
  dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
47
48
  examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
48
49
  examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
49
- dask_cuda-24.8.2.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
50
- dask_cuda-24.8.2.dist-info/METADATA,sha256=6iMwPI8cWrEYDYz73vm8pw-LkVeEgTQzymJgRxj32VQ,2546
51
- dask_cuda-24.8.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
52
- dask_cuda-24.8.2.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
53
- dask_cuda-24.8.2.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
54
- dask_cuda-24.8.2.dist-info/RECORD,,
50
+ dask_cuda-24.12.0.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
51
+ dask_cuda-24.12.0.dist-info/METADATA,sha256=qFewjmkl67EsxFm9VoMTmw_XOOK3savtnO9hK-Qwx-E,2557
52
+ dask_cuda-24.12.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
53
+ dask_cuda-24.12.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
54
+ dask_cuda-24.12.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
55
+ dask_cuda-24.12.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (75.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5