dask-cuda 24.8.2__py3-none-any.whl → 24.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dask_cuda/VERSION CHANGED
@@ -1 +1 @@
1
- 24.08.02
1
+ 24.10.00
dask_cuda/__init__.py CHANGED
@@ -9,6 +9,8 @@ import dask.dataframe.core
9
9
  import dask.dataframe.shuffle
10
10
  import dask.dataframe.multi
11
11
  import dask.bag.core
12
+ from distributed.protocol.cuda import cuda_deserialize, cuda_serialize
13
+ from distributed.protocol.serialize import dask_deserialize, dask_serialize
12
14
 
13
15
  from ._version import __git_commit__, __version__
14
16
  from .cuda_worker import CUDAWorker
@@ -48,3 +50,20 @@ dask.dataframe.shuffle.shuffle_group = proxify_decorator(
48
50
  dask.dataframe.shuffle.shuffle_group
49
51
  )
50
52
  dask.dataframe.core._concat = unproxify_decorator(dask.dataframe.core._concat)
53
+
54
+
55
+ def _register_cudf_spill_aware():
56
+ import cudf
57
+
58
+ # Only enable Dask/cuDF spilling if cuDF spilling is disabled, see
59
+ # https://github.com/rapidsai/dask-cuda/issues/1363
60
+ if not cudf.get_option("spill"):
61
+ # This reproduces the implementation of `_register_cudf`, see
62
+ # https://github.com/dask/distributed/blob/40fcd65e991382a956c3b879e438be1b100dff97/distributed/protocol/__init__.py#L106-L115
63
+ from cudf.comm import serialize
64
+
65
+
66
+ for registry in [cuda_serialize, cuda_deserialize, dask_serialize, dask_deserialize]:
67
+ for lib in ["cudf", "dask_cudf"]:
68
+ if lib in registry._lazy:
69
+ registry._lazy[lib] = _register_cudf_spill_aware
@@ -7,7 +7,7 @@ import pandas as pd
7
7
  import dask
8
8
  import dask.dataframe as dd
9
9
  from dask.distributed import performance_report, wait
10
- from dask.utils import format_bytes, parse_bytes
10
+ from dask.utils import format_bytes
11
11
 
12
12
  from dask_cuda.benchmarks.common import Config, execute_benchmark
13
13
  from dask_cuda.benchmarks.utils import (
@@ -260,13 +260,6 @@ def parse_args():
260
260
  "type": str,
261
261
  "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
262
262
  },
263
- {
264
- "name": "--ignore-size",
265
- "default": "1 MiB",
266
- "metavar": "nbytes",
267
- "type": parse_bytes,
268
- "help": "Ignore messages smaller than this (default '1 MB')",
269
- },
270
263
  {
271
264
  "name": "--runs",
272
265
  "default": 3,
@@ -9,7 +9,7 @@ import pandas as pd
9
9
  import dask
10
10
  import dask.dataframe as dd
11
11
  from dask.distributed import performance_report, wait
12
- from dask.utils import format_bytes, parse_bytes
12
+ from dask.utils import format_bytes
13
13
 
14
14
  from dask_cuda.benchmarks.common import Config, execute_benchmark
15
15
  from dask_cuda.benchmarks.utils import (
@@ -335,13 +335,6 @@ def parse_args():
335
335
  "action": "store_true",
336
336
  "help": "Use shuffle join (takes precedence over '--broadcast-join').",
337
337
  },
338
- {
339
- "name": "--ignore-size",
340
- "default": "1 MiB",
341
- "metavar": "nbytes",
342
- "type": parse_bytes,
343
- "help": "Ignore messages smaller than this (default '1 MB')",
344
- },
345
338
  {
346
339
  "name": "--frac-match",
347
340
  "default": 0.3,
@@ -228,13 +228,6 @@ def parse_args():
228
228
  "type": str,
229
229
  "help": "Do shuffle with GPU or CPU dataframes (default 'gpu')",
230
230
  },
231
- {
232
- "name": "--ignore-size",
233
- "default": "1 MiB",
234
- "metavar": "nbytes",
235
- "type": parse_bytes,
236
- "help": "Ignore messages smaller than this (default '1 MB')",
237
- },
238
231
  {
239
232
  "name": "--runs",
240
233
  "default": 3,
@@ -8,7 +8,7 @@ from nvtx import end_range, start_range
8
8
 
9
9
  from dask import array as da
10
10
  from dask.distributed import performance_report, wait
11
- from dask.utils import format_bytes, parse_bytes
11
+ from dask.utils import format_bytes
12
12
 
13
13
  from dask_cuda.benchmarks.common import Config, execute_benchmark
14
14
  from dask_cuda.benchmarks.utils import (
@@ -297,13 +297,6 @@ def parse_args():
297
297
  "type": int,
298
298
  "help": "Chunk size (default 2500).",
299
299
  },
300
- {
301
- "name": "--ignore-size",
302
- "default": "1 MiB",
303
- "metavar": "nbytes",
304
- "type": parse_bytes,
305
- "help": "Ignore messages smaller than this (default '1 MB').",
306
- },
307
300
  {
308
301
  "name": "--runs",
309
302
  "default": 3,
@@ -10,7 +10,7 @@ from scipy.ndimage import convolve as sp_convolve
10
10
 
11
11
  from dask import array as da
12
12
  from dask.distributed import performance_report, wait
13
- from dask.utils import format_bytes, parse_bytes
13
+ from dask.utils import format_bytes
14
14
 
15
15
  from dask_cuda.benchmarks.common import Config, execute_benchmark
16
16
  from dask_cuda.benchmarks.utils import (
@@ -168,13 +168,6 @@ def parse_args():
168
168
  "type": int,
169
169
  "help": "Kernel size, 2*k+1, in each dimension (default 1)",
170
170
  },
171
- {
172
- "name": "--ignore-size",
173
- "default": "1 MiB",
174
- "metavar": "nbytes",
175
- "type": parse_bytes,
176
- "help": "Ignore messages smaller than this (default '1 MB')",
177
- },
178
171
  {
179
172
  "name": "--runs",
180
173
  "default": 3,
@@ -0,0 +1,268 @@
1
+ import contextlib
2
+ from collections import ChainMap
3
+ from time import perf_counter as clock
4
+
5
+ import fsspec
6
+ import pandas as pd
7
+
8
+ import dask
9
+ import dask.dataframe as dd
10
+ from dask.base import tokenize
11
+ from dask.distributed import performance_report
12
+ from dask.utils import format_bytes, parse_bytes
13
+
14
+ from dask_cuda.benchmarks.common import Config, execute_benchmark
15
+ from dask_cuda.benchmarks.utils import (
16
+ parse_benchmark_args,
17
+ print_key_value,
18
+ print_separator,
19
+ print_throughput_bandwidth,
20
+ )
21
+
22
+ DISK_SIZE_CACHE = {}
23
+ OPTIONS_CACHE = {}
24
+
25
+
26
+ def _noop(df):
27
+ return df
28
+
29
+
30
+ def read_data(paths, columns, backend, **kwargs):
31
+ with dask.config.set({"dataframe.backend": backend}):
32
+ return dd.read_parquet(
33
+ paths,
34
+ columns=columns,
35
+ **kwargs,
36
+ )
37
+
38
+
39
+ def get_fs_paths_kwargs(args):
40
+ kwargs = {}
41
+
42
+ storage_options = {}
43
+ if args.key:
44
+ storage_options["key"] = args.key
45
+ if args.secret:
46
+ storage_options["secret"] = args.secret
47
+
48
+ if args.filesystem == "arrow":
49
+ import pyarrow.fs as pa_fs
50
+ from fsspec.implementations.arrow import ArrowFSWrapper
51
+
52
+ _mapping = {
53
+ "key": "access_key",
54
+ "secret": "secret_key",
55
+ } # See: pyarrow.fs.S3FileSystem docs
56
+ s3_args = {}
57
+ for k, v in storage_options.items():
58
+ s3_args[_mapping[k]] = v
59
+
60
+ fs = pa_fs.FileSystem.from_uri(args.path)[0]
61
+ try:
62
+ region = {"region": fs.region}
63
+ except AttributeError:
64
+ region = {}
65
+ kwargs["filesystem"] = type(fs)(**region, **s3_args)
66
+ fsspec_fs = ArrowFSWrapper(kwargs["filesystem"])
67
+
68
+ if args.type == "gpu":
69
+ kwargs["blocksize"] = args.blocksize
70
+ else:
71
+ fsspec_fs = fsspec.core.get_fs_token_paths(
72
+ args.path, mode="rb", storage_options=storage_options
73
+ )[0]
74
+ kwargs["filesystem"] = fsspec_fs
75
+ kwargs["blocksize"] = args.blocksize
76
+ kwargs["aggregate_files"] = args.aggregate_files
77
+
78
+ # Collect list of paths
79
+ stripped_url_path = fsspec_fs._strip_protocol(args.path)
80
+ if stripped_url_path.endswith("/"):
81
+ stripped_url_path = stripped_url_path[:-1]
82
+ paths = fsspec_fs.glob(f"{stripped_url_path}/*.parquet")
83
+ if args.file_count:
84
+ paths = paths[: args.file_count]
85
+
86
+ return fsspec_fs, paths, kwargs
87
+
88
+
89
+ def bench_once(client, args, write_profile=None):
90
+ global OPTIONS_CACHE
91
+ global DISK_SIZE_CACHE
92
+
93
+ # Construct kwargs
94
+ token = tokenize(args)
95
+ try:
96
+ fsspec_fs, paths, kwargs = OPTIONS_CACHE[token]
97
+ except KeyError:
98
+ fsspec_fs, paths, kwargs = get_fs_paths_kwargs(args)
99
+ OPTIONS_CACHE[token] = (fsspec_fs, paths, kwargs)
100
+
101
+ if write_profile is None:
102
+ ctx = contextlib.nullcontext()
103
+ else:
104
+ ctx = performance_report(filename=args.profile)
105
+
106
+ with ctx:
107
+ t1 = clock()
108
+ df = read_data(
109
+ paths,
110
+ columns=args.columns,
111
+ backend="cudf" if args.type == "gpu" else "pandas",
112
+ **kwargs,
113
+ )
114
+ num_rows = len(
115
+ # Use opaque `map_partitions` call to "block"
116
+ # dask-expr from using pq metadata to get length
117
+ df.map_partitions(
118
+ _noop,
119
+ meta=df._meta,
120
+ enforce_metadata=False,
121
+ )
122
+ )
123
+ t2 = clock()
124
+
125
+ # Extract total size of files on disk
126
+ token = tokenize(paths)
127
+ try:
128
+ disk_size = DISK_SIZE_CACHE[token]
129
+ except KeyError:
130
+ disk_size = sum(fsspec_fs.sizes(paths))
131
+ DISK_SIZE_CACHE[token] = disk_size
132
+
133
+ return (disk_size, num_rows, t2 - t1)
134
+
135
+
136
+ def pretty_print_results(args, address_to_index, p2p_bw, results):
137
+ if args.markdown:
138
+ print("```")
139
+ print("Parquet read benchmark")
140
+ data_processed, row_count, durations = zip(*results)
141
+ print_separator(separator="-")
142
+ backend = "cudf" if args.type == "gpu" else "pandas"
143
+ print_key_value(key="Path", value=args.path)
144
+ print_key_value(key="Columns", value=f"{args.columns}")
145
+ print_key_value(key="Backend", value=f"{backend}")
146
+ print_key_value(key="Filesystem", value=f"{args.filesystem}")
147
+ print_key_value(key="Blocksize", value=f"{format_bytes(args.blocksize)}")
148
+ print_key_value(key="Aggregate files", value=f"{args.aggregate_files}")
149
+ print_key_value(key="Row count", value=f"{row_count[0]}")
150
+ print_key_value(key="Size on disk", value=f"{format_bytes(data_processed[0])}")
151
+ if args.markdown:
152
+ print("\n```")
153
+ args.no_show_p2p_bandwidth = True
154
+ print_throughput_bandwidth(
155
+ args, durations, data_processed, p2p_bw, address_to_index
156
+ )
157
+ print_separator(separator="=")
158
+
159
+
160
+ def create_tidy_results(args, p2p_bw, results):
161
+ configuration = {
162
+ "path": args.path,
163
+ "columns": args.columns,
164
+ "backend": "cudf" if args.type == "gpu" else "pandas",
165
+ "filesystem": args.filesystem,
166
+ "blocksize": args.blocksize,
167
+ "aggregate_files": args.aggregate_files,
168
+ }
169
+ timing_data = pd.DataFrame(
170
+ [
171
+ pd.Series(
172
+ data=ChainMap(
173
+ configuration,
174
+ {
175
+ "wallclock": duration,
176
+ "data_processed": data_processed,
177
+ "num_rows": num_rows,
178
+ },
179
+ )
180
+ )
181
+ for data_processed, num_rows, duration in results
182
+ ]
183
+ )
184
+ return timing_data, p2p_bw
185
+
186
+
187
+ def parse_args():
188
+ special_args = [
189
+ {
190
+ "name": "path",
191
+ "type": str,
192
+ "help": "Parquet directory to read from (must be a flat directory).",
193
+ },
194
+ {
195
+ "name": "--blocksize",
196
+ "default": "256MB",
197
+ "type": parse_bytes,
198
+ "help": "How to set the blocksize option",
199
+ },
200
+ {
201
+ "name": "--aggregate-files",
202
+ "default": False,
203
+ "action": "store_true",
204
+ "help": "How to set the aggregate_files option",
205
+ },
206
+ {
207
+ "name": "--file-count",
208
+ "type": int,
209
+ "help": "Maximum number of files to read.",
210
+ },
211
+ {
212
+ "name": "--columns",
213
+ "type": str,
214
+ "help": "Columns to read/select from data.",
215
+ },
216
+ {
217
+ "name": "--key",
218
+ "type": str,
219
+ "help": "Public S3 key.",
220
+ },
221
+ {
222
+ "name": "--secret",
223
+ "type": str,
224
+ "help": "Secret S3 key.",
225
+ },
226
+ {
227
+ "name": [
228
+ "-t",
229
+ "--type",
230
+ ],
231
+ "choices": ["cpu", "gpu"],
232
+ "default": "gpu",
233
+ "type": str,
234
+ "help": "Use GPU or CPU dataframes (default 'gpu')",
235
+ },
236
+ {
237
+ "name": "--filesystem",
238
+ "choices": ["arrow", "fsspec"],
239
+ "default": "fsspec",
240
+ "type": str,
241
+ "help": "Filesystem backend",
242
+ },
243
+ {
244
+ "name": "--runs",
245
+ "default": 3,
246
+ "type": int,
247
+ "help": "Number of runs",
248
+ },
249
+ ]
250
+
251
+ args = parse_benchmark_args(
252
+ description="Parquet read benchmark",
253
+ args_list=special_args,
254
+ check_explicit_comms=False,
255
+ )
256
+ args.no_show_p2p_bandwidth = True
257
+ return args
258
+
259
+
260
+ if __name__ == "__main__":
261
+ execute_benchmark(
262
+ Config(
263
+ args=parse_args(),
264
+ bench_once=bench_once,
265
+ create_tidy_results=create_tidy_results,
266
+ pretty_print_results=pretty_print_results,
267
+ )
268
+ )
@@ -337,6 +337,13 @@ def parse_benchmark_args(
337
337
  "If the files already exist, new files are created with a uniquified "
338
338
  "BASENAME.",
339
339
  )
340
+ parser.add_argument(
341
+ "--ignore-size",
342
+ default="1 MiB",
343
+ metavar="nbytes",
344
+ type=parse_bytes,
345
+ help="Bandwidth statistics: ignore messages smaller than this (default '1 MB')",
346
+ )
340
347
 
341
348
  for args in args_list:
342
349
  name = args.pop("name")
@@ -765,7 +772,7 @@ def print_throughput_bandwidth(
765
772
  )
766
773
  print_key_value(
767
774
  key="Wall clock",
768
- value=f"{format_time(durations.mean())} +/- {format_time(durations.std()) }",
775
+ value=f"{format_time(durations.mean())} +/- {format_time(durations.std())}",
769
776
  )
770
777
  if not args.no_show_p2p_bandwidth:
771
778
  print_separator(separator="=")
dask_cuda/cli.py CHANGED
@@ -167,10 +167,11 @@ def cuda():
167
167
  @click.option(
168
168
  "--rmm-release-threshold",
169
169
  default=None,
170
- help="""When ``rmm.async`` is ``True`` and the pool size grows beyond this value, unused
171
- memory held by the pool will be released at the next synchronization point. Can be
172
- an integer (bytes), float (fraction of total device memory), string (like ``"5GB"``
173
- or ``"5000M"``) or ``None``. By default, this feature is disabled.
170
+ help="""When ``rmm.async`` is ``True`` and the pool size grows beyond this
171
+ value, unused memory held by the pool will be released at the next
172
+ synchronization point. Can be an integer (bytes), float (fraction of total
173
+ device memory), string (like ``"5GB"`` or ``"5000M"``) or ``None``. By
174
+ default, this feature is disabled.
174
175
 
175
176
  .. note::
176
177
  This size is a per-worker configuration, and not cluster-wide.""",
dask_cuda/cuda_worker.py CHANGED
@@ -195,6 +195,14 @@ class CUDAWorker(Server):
195
195
  },
196
196
  )
197
197
 
198
+ cudf_spill_warning = dask.config.get("cudf-spill-warning", default=True)
199
+ if enable_cudf_spill and cudf_spill_warning:
200
+ warnings.warn(
201
+ "cuDF spilling is enabled, please ensure the client and scheduler "
202
+ "processes set `CUDF_SPILL=on` as well. To disable this warning "
203
+ "set `DASK_CUDF_SPILL_WARNING=False`."
204
+ )
205
+
198
206
  self.nannies = [
199
207
  Nanny(
200
208
  scheduler,
@@ -244,6 +244,13 @@ class LocalCUDACluster(LocalCluster):
244
244
  # initialization happens before we can set CUDA_VISIBLE_DEVICES
245
245
  os.environ["RAPIDS_NO_INITIALIZE"] = "True"
246
246
 
247
+ if enable_cudf_spill:
248
+ import cudf
249
+
250
+ # cuDF spilling must be enabled in the client/scheduler process too.
251
+ cudf.set_option("spill", enable_cudf_spill)
252
+ cudf.set_option("spill_stats", cudf_spill_stats)
253
+
247
254
  if threads_per_worker < 1:
248
255
  raise ValueError("threads_per_worker must be higher than 0.")
249
256
 
@@ -567,3 +567,30 @@ def test_worker_timeout():
567
567
  assert "reason: nanny-close" in ret.stderr.lower()
568
568
 
569
569
  assert ret.returncode == 0
570
+
571
+
572
+ @pytest.mark.parametrize("enable_cudf_spill_warning", [False, True])
573
+ def test_worker_cudf_spill_warning(enable_cudf_spill_warning): # noqa: F811
574
+ pytest.importorskip("rmm")
575
+
576
+ environ = {"CUDA_VISIBLE_DEVICES": "0"}
577
+ if not enable_cudf_spill_warning:
578
+ environ["DASK_CUDF_SPILL_WARNING"] = "False"
579
+
580
+ with patch.dict(os.environ, environ):
581
+ ret = subprocess.run(
582
+ [
583
+ "dask",
584
+ "cuda",
585
+ "worker",
586
+ "127.0.0.1:9369",
587
+ "--enable-cudf-spill",
588
+ "--death-timeout",
589
+ "1",
590
+ ],
591
+ capture_output=True,
592
+ )
593
+ if enable_cudf_spill_warning:
594
+ assert b"UserWarning: cuDF spilling is enabled" in ret.stderr
595
+ else:
596
+ assert b"UserWarning: cuDF spilling is enabled" not in ret.stderr
@@ -38,7 +38,7 @@ def test_gds(gds_enabled, cuda_lib):
38
38
  a = data_create()
39
39
  header, frames = serialize(a, serializers=("disk",))
40
40
  b = deserialize(header, frames)
41
- assert type(a) == type(b)
41
+ assert type(a) is type(b)
42
42
  assert data_compare(a, b)
43
43
  finally:
44
44
  ProxifyHostFile.register_disk_spilling() # Reset disk spilling options
@@ -252,7 +252,7 @@ async def test_local_cuda_cluster(jit_unspill):
252
252
  assert "ProxyObject" in str(type(x))
253
253
  assert x._pxy_get().serializer == "dask"
254
254
  else:
255
- assert type(x) == cudf.DataFrame
255
+ assert type(x) is cudf.DataFrame
256
256
  assert len(x) == 10 # Trigger deserialization
257
257
  return x
258
258
 
@@ -114,7 +114,7 @@ def test_proxy_object_of_array(serializers, backend):
114
114
  pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
115
115
  expect = op(org)
116
116
  got = op(pxy)
117
- assert type(expect) == type(got)
117
+ assert type(expect) is type(got)
118
118
  assert expect == got
119
119
 
120
120
  # Check unary operators
@@ -124,7 +124,7 @@ def test_proxy_object_of_array(serializers, backend):
124
124
  pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
125
125
  expect = op(org)
126
126
  got = op(pxy)
127
- assert type(expect) == type(got)
127
+ assert type(expect) is type(got)
128
128
  assert all(expect == got)
129
129
 
130
130
  # Check binary operators that takes a scalar as second argument
@@ -134,7 +134,7 @@ def test_proxy_object_of_array(serializers, backend):
134
134
  pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
135
135
  expect = op(org, 2)
136
136
  got = op(pxy, 2)
137
- assert type(expect) == type(got)
137
+ assert type(expect) is type(got)
138
138
  assert all(expect == got)
139
139
 
140
140
  # Check binary operators
@@ -192,7 +192,7 @@ def test_proxy_object_of_array(serializers, backend):
192
192
  pxy = proxy_object.asproxy(org.copy(), serializers=serializers)
193
193
  expect = op(org)
194
194
  got = op(pxy)
195
- assert type(expect) == type(got)
195
+ assert type(expect) is type(got)
196
196
  assert expect == got
197
197
 
198
198
  # Check reflected methods
@@ -297,7 +297,7 @@ async def test_spilling_local_cuda_cluster(jit_unspill):
297
297
  assert "ProxyObject" in str(type(x))
298
298
  assert x._pxy_get().serializer == "dask"
299
299
  else:
300
- assert type(x) == cudf.DataFrame
300
+ assert type(x) is cudf.DataFrame
301
301
  assert len(x) == 10 # Trigger deserialization
302
302
  return x
303
303
 
@@ -11,6 +11,8 @@ from distributed.metrics import time
11
11
  from distributed.sizeof import sizeof
12
12
  from distributed.utils_test import gen_cluster, gen_test, loop # noqa: F401
13
13
 
14
+ import dask_cudf
15
+
14
16
  from dask_cuda import LocalCUDACluster, utils
15
17
  from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
16
18
 
@@ -18,6 +20,57 @@ if utils.get_device_total_memory() < 1e10:
18
20
  pytest.skip("Not enough GPU memory", allow_module_level=True)
19
21
 
20
22
 
23
+ def _set_cudf_device_limit():
24
+ """Ensure spilling for objects of all sizes"""
25
+ import cudf
26
+
27
+ cudf.set_option("spill_device_limit", 0)
28
+
29
+
30
+ def _assert_cudf_spill_stats(enable_cudf_spill, dask_worker=None):
31
+ """Ensure cuDF has spilled data with its internal mechanism"""
32
+ import cudf
33
+
34
+ global_manager = cudf.core.buffer.spill_manager.get_global_manager()
35
+
36
+ if enable_cudf_spill:
37
+ stats = global_manager.statistics
38
+ buffers = global_manager.buffers()
39
+ assert stats.spill_totals[("gpu", "cpu")][0] > 1000
40
+ assert stats.spill_totals[("cpu", "gpu")][0] > 1000
41
+ assert len(buffers) > 0
42
+ else:
43
+ assert global_manager is None
44
+
45
+
46
+ @pytest.fixture(params=[False, True])
47
+ def cudf_spill(request):
48
+ """Fixture to enable and clear cuDF spill manager in client process"""
49
+ cudf = pytest.importorskip("cudf")
50
+
51
+ enable_cudf_spill = request.param
52
+
53
+ if enable_cudf_spill:
54
+ # If the global spill manager was previously set, fail.
55
+ assert cudf.core.buffer.spill_manager._global_manager is None
56
+
57
+ cudf.set_option("spill", True)
58
+ cudf.set_option("spill_stats", True)
59
+
60
+ # This change is to prevent changing RMM resource stack in cuDF,
61
+ # workers do not need this because they are spawned as new
62
+ # processes for every new test that runs.
63
+ cudf.set_option("spill_on_demand", False)
64
+
65
+ _set_cudf_device_limit()
66
+
67
+ yield enable_cudf_spill
68
+
69
+ cudf.set_option("spill", False)
70
+ cudf.core.buffer.spill_manager._global_manager_uninitialized = True
71
+ cudf.core.buffer.spill_manager._global_manager = None
72
+
73
+
21
74
  def device_host_file_size_matches(
22
75
  dhf, total_bytes, device_chunk_overhead=0, serialized_chunk_overhead=1024
23
76
  ):
@@ -244,9 +297,11 @@ async def test_cupy_cluster_device_spill(params):
244
297
  ],
245
298
  )
246
299
  @gen_test(timeout=30)
247
- async def test_cudf_cluster_device_spill(params):
300
+ async def test_cudf_cluster_device_spill(params, cudf_spill):
248
301
  cudf = pytest.importorskip("cudf")
249
302
 
303
+ enable_cudf_spill = cudf_spill
304
+
250
305
  with dask.config.set(
251
306
  {
252
307
  "distributed.comm.compression": False,
@@ -266,6 +321,7 @@ async def test_cudf_cluster_device_spill(params):
266
321
  device_memory_limit=params["device_memory_limit"],
267
322
  memory_limit=params["memory_limit"],
268
323
  worker_class=IncreasedCloseTimeoutNanny,
324
+ enable_cudf_spill=enable_cudf_spill,
269
325
  ) as cluster:
270
326
  async with Client(cluster, asynchronous=True) as client:
271
327
 
@@ -294,21 +350,28 @@ async def test_cudf_cluster_device_spill(params):
294
350
  del cdf
295
351
  gc.collect()
296
352
 
297
- await client.run(
298
- assert_host_chunks,
299
- params["spills_to_disk"],
300
- )
301
- await client.run(
302
- assert_disk_chunks,
303
- params["spills_to_disk"],
304
- )
305
-
306
- await client.run(
307
- worker_assert,
308
- nbytes,
309
- 32,
310
- 2048,
311
- )
353
+ if enable_cudf_spill:
354
+ await client.run(
355
+ worker_assert,
356
+ 0,
357
+ 0,
358
+ 0,
359
+ )
360
+ else:
361
+ await client.run(
362
+ assert_host_chunks,
363
+ params["spills_to_disk"],
364
+ )
365
+ await client.run(
366
+ assert_disk_chunks,
367
+ params["spills_to_disk"],
368
+ )
369
+ await client.run(
370
+ worker_assert,
371
+ nbytes,
372
+ 32,
373
+ 2048,
374
+ )
312
375
 
313
376
  del cdf2
314
377
 
@@ -324,3 +387,40 @@ async def test_cudf_cluster_device_spill(params):
324
387
  gc.collect()
325
388
  else:
326
389
  break
390
+
391
+
392
+ @gen_test(timeout=30)
393
+ async def test_cudf_spill_cluster(cudf_spill):
394
+ cudf = pytest.importorskip("cudf")
395
+ enable_cudf_spill = cudf_spill
396
+
397
+ async with LocalCUDACluster(
398
+ n_workers=1,
399
+ scheduler_port=0,
400
+ silence_logs=False,
401
+ dashboard_address=None,
402
+ asynchronous=True,
403
+ device_memory_limit=None,
404
+ memory_limit=None,
405
+ worker_class=IncreasedCloseTimeoutNanny,
406
+ enable_cudf_spill=enable_cudf_spill,
407
+ cudf_spill_stats=enable_cudf_spill,
408
+ ) as cluster:
409
+ async with Client(cluster, asynchronous=True) as client:
410
+
411
+ await client.wait_for_workers(1)
412
+ await client.run(_set_cudf_device_limit)
413
+
414
+ cdf = cudf.DataFrame(
415
+ {
416
+ "a": list(range(200)),
417
+ "b": list(reversed(range(200))),
418
+ "c": list(range(200)),
419
+ }
420
+ )
421
+
422
+ ddf = dask_cudf.from_cudf(cdf, npartitions=2).sum().persist()
423
+ await wait(ddf)
424
+
425
+ await client.run(_assert_cudf_spill_stats, enable_cudf_spill)
426
+ _assert_cudf_spill_stats(enable_cudf_spill)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dask-cuda
3
- Version: 24.8.2
3
+ Version: 24.10.0
4
4
  Summary: Utilities for Dask and CUDA interactions
5
5
  Author: NVIDIA Corporation
6
6
  License: Apache 2.0
@@ -12,18 +12,18 @@ Classifier: Topic :: Database
12
12
  Classifier: Topic :: Scientific/Engineering
13
13
  Classifier: License :: OSI Approved :: Apache Software License
14
14
  Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
- Requires-Python: >=3.9
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.10
19
19
  Description-Content-Type: text/markdown
20
20
  License-File: LICENSE
21
21
  Requires-Dist: click>=8.1
22
22
  Requires-Dist: numba>=0.57
23
- Requires-Dist: numpy<2.0a0,>=1.23
23
+ Requires-Dist: numpy<3.0a0,>=1.23
24
24
  Requires-Dist: pandas>=1.3
25
25
  Requires-Dist: pynvml<11.5,>=11.0.0
26
- Requires-Dist: rapids-dask-dependency==24.8.*
26
+ Requires-Dist: rapids-dask-dependency==24.10.*
27
27
  Requires-Dist: zict>=2.0.0
28
28
  Provides-Extra: docs
29
29
  Requires-Dist: numpydoc>=1.1.0; extra == "docs"
@@ -31,12 +31,12 @@ Requires-Dist: sphinx; extra == "docs"
31
31
  Requires-Dist: sphinx-click>=2.7.1; extra == "docs"
32
32
  Requires-Dist: sphinx-rtd-theme>=0.5.1; extra == "docs"
33
33
  Provides-Extra: test
34
- Requires-Dist: cudf==24.8.*; extra == "test"
35
- Requires-Dist: dask-cudf==24.8.*; extra == "test"
36
- Requires-Dist: kvikio==24.8.*; extra == "test"
34
+ Requires-Dist: cudf==24.10.*; extra == "test"
35
+ Requires-Dist: dask-cudf==24.10.*; extra == "test"
36
+ Requires-Dist: kvikio==24.10.*; extra == "test"
37
37
  Requires-Dist: pytest; extra == "test"
38
38
  Requires-Dist: pytest-cov; extra == "test"
39
- Requires-Dist: ucx-py==0.39.*; extra == "test"
39
+ Requires-Dist: ucx-py==0.40.*; extra == "test"
40
40
 
41
41
  Dask CUDA
42
42
  =========
@@ -1,15 +1,15 @@
1
- dask_cuda/VERSION,sha256=5YtjwV2EoD7E5Ed4K-PvnU0eEtdkkn33JHuNFDy8oKA,8
2
- dask_cuda/__init__.py,sha256=JLDWev7vI_dPusLgRdOwXBz-xfhlX_hc-DzmLtrEYO0,1918
1
+ dask_cuda/VERSION,sha256=OZZp3AWPPk70ig1lMkkw_P1GS8PwjLZvHMBcR3ppnxM,8
2
+ dask_cuda/__init__.py,sha256=eOCH3Wj0A8X0qbNUoNA15dgxb2O-ZApha4QHq5EEVFw,2748
3
3
  dask_cuda/_version.py,sha256=cHDO9AzNtxkCVhwYu7hL3H7RPAkQnxpKBjElOst3rkI,964
4
- dask_cuda/cli.py,sha256=Y3aObfAyMwOIo0oVz3-NC2InGLShOpeINwW5ROTF2s8,16616
5
- dask_cuda/cuda_worker.py,sha256=uqyoDKsSe7sKN3StMVyz_971rj0Sjpmwfv7Bj083Wss,8959
4
+ dask_cuda/cli.py,sha256=Qvjxo3zk1g0pgWtsWAUXOUIbdEnWIFjTSTd0SdxFpx4,16620
5
+ dask_cuda/cuda_worker.py,sha256=H3Nq2zfviO4m6CFMx6XJXkxOMUhMai2E5y8TkYI33sw,9356
6
6
  dask_cuda/device_host_file.py,sha256=yS31LGtt9VFAG78uBBlTDr7HGIng2XymV1OxXIuEMtM,10272
7
7
  dask_cuda/disk_io.py,sha256=urSLKiPvJvYmKCzDPOUDCYuLI3r1RUiyVh3UZGRoF_Y,6626
8
8
  dask_cuda/get_device_memory_objects.py,sha256=R3U2cq4fJZPgtsUKyIguy9161p3Q99oxmcCmTcg6BtQ,4075
9
9
  dask_cuda/initialize.py,sha256=Gjcxs_c8DTafgsHe5-2mw4lJdOmbFJJAZVOnxA8lTjM,6462
10
10
  dask_cuda/is_device_object.py,sha256=CnajvbQiX0FzFzwft0MqK1OPomx3ZGDnDxT56wNjixw,1046
11
11
  dask_cuda/is_spillable_object.py,sha256=CddGmg0tuSpXh2m_TJSY6GRpnl1WRHt1CRcdWgHPzWA,1457
12
- dask_cuda/local_cuda_cluster.py,sha256=jgXjd6OvEDfQ3iXU8hV_UfULa13GZsli0SGC2PIouZk,18882
12
+ dask_cuda/local_cuda_cluster.py,sha256=CGhQcauzqYafUqXlL--mdqo-Q-wuMmHYRFEFU6zFQm4,19136
13
13
  dask_cuda/plugins.py,sha256=DCf7PnIBu_VNjFfrFeb1zCNuEnCaX9oz4Umn76t02Mc,4630
14
14
  dask_cuda/proxify_device_objects.py,sha256=99CD7LOE79YiQGJ12sYl_XImVhJXpFR4vG5utdkjTQo,8108
15
15
  dask_cuda/proxify_host_file.py,sha256=Wf5CFCC1JN5zmfvND3ls0M5FL01Y8VhHrk0xV3UQ9kk,30850
@@ -19,36 +19,37 @@ dask_cuda/utils_test.py,sha256=WNMR0gic2tuP3pgygcR9g52NfyX8iGMOan6juXhpkCE,1694
19
19
  dask_cuda/worker_spec.py,sha256=7-Uq_e5q2SkTlsmctMcYLCa9_3RiiVHZLIN7ctfaFmE,4376
20
20
  dask_cuda/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  dask_cuda/benchmarks/common.py,sha256=2MnDdQjvHfGaUWDgiTcTGI_EeKPmVBEwoWfsJUNpOjU,6613
22
- dask_cuda/benchmarks/local_cudf_groupby.py,sha256=T9lA9nb4Wzu46AH--SJEVCeCm3650J7slapdNR_08FU,8904
23
- dask_cuda/benchmarks/local_cudf_merge.py,sha256=AsuVnMA3H93sJwjjgi4KaIdYKnnX1OeRMPiXizrwHGk,12577
24
- dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=2xWJZf3gwDNimXKZN2ivtU3OE_qec1KNOhgL4_AGQZU,8655
25
- dask_cuda/benchmarks/local_cupy.py,sha256=aUKIYfeR7c77K4kKk697Rxo8tG8kFabQ9jQEVGr-oTs,10762
26
- dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=_texYmam1K_XbzIvURltui5KRsISGFNylXiGUtgRIz0,6442
27
- dask_cuda/benchmarks/utils.py,sha256=4k8KnJPOczKDQNBPRWlaGsU2zdEA09BDGgklUXggwMU,30008
22
+ dask_cuda/benchmarks/local_cudf_groupby.py,sha256=GhYxQSjT_Y8FI4OsLMEh9507fMcE0bU0SUbU34Nf9ZI,8661
23
+ dask_cuda/benchmarks/local_cudf_merge.py,sha256=KMxaZ8lsT2TvKuZBiABFD-CAYA67ZScqgFGxSwmrRYg,12334
24
+ dask_cuda/benchmarks/local_cudf_shuffle.py,sha256=rWG-xJqFsRbSOQHvosnr3wBcvoKbv_e68_tVjaDpxes,8425
25
+ dask_cuda/benchmarks/local_cupy.py,sha256=jrYV84h9PKeSHLNGzUH_3G6ICsz56rVO7uMyqSEFfc8,10518
26
+ dask_cuda/benchmarks/local_cupy_map_overlap.py,sha256=7ZuNSyBTsWo0zW3Wz2ZgbbLrorK860Dff42NGN_3zng,6199
27
+ dask_cuda/benchmarks/read_parquet.py,sha256=TARcG-TS1NGcQWJmuAKtfmBmy5LAaLc3xgtKgAd1DaA,7650
28
+ dask_cuda/benchmarks/utils.py,sha256=RbiwT8S_PF1xJA87c4-FgYjrXrWWcnHFMnk-QdvnsSo,30229
28
29
  dask_cuda/explicit_comms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
30
  dask_cuda/explicit_comms/comms.py,sha256=Su6PuNo68IyS-AwoqU4S9TmqWsLvUdNa0jot2hx8jQQ,10400
30
31
  dask_cuda/explicit_comms/dataframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
32
  dask_cuda/explicit_comms/dataframe/shuffle.py,sha256=4xfhfbTGa36YPs_ex1_fFhzfGMYJq-QkS5q0RwgeHh8,20645
32
33
  dask_cuda/tests/test_cudf_builtin_spilling.py,sha256=qVN9J0Hdv66A9COFArLIdRriyyxEKpS3lEZGHbVHaq8,4903
33
- dask_cuda/tests/test_dask_cuda_worker.py,sha256=o5g0_t-2M_2lfPeOPTS4NVF4rnQF0ZWAZekXw2h0xPc,19610
34
+ dask_cuda/tests/test_dask_cuda_worker.py,sha256=6rroHvJAn5R3X9LwIcE8QrPxG1GO3PaxXVjhbdQ90Pw,20477
34
35
  dask_cuda/tests/test_device_host_file.py,sha256=79ssUISo1YhsW_7HdwqPfsH2LRzS2bi5BjPym1Sdgqw,5882
35
36
  dask_cuda/tests/test_dgx.py,sha256=BPCF4ZvhrVKkT43OOFHdijuo-M34vW3V18C8rRH1HXg,7489
36
37
  dask_cuda/tests/test_explicit_comms.py,sha256=Pa5vVx63qWtScnVJuS31WESXIt2FPyTJVFO-0OUbbmU,15276
37
38
  dask_cuda/tests/test_from_array.py,sha256=okT1B6UqHmLxoy0uER0Ylm3UyOmi5BAXwJpTuTAw44I,601
38
- dask_cuda/tests/test_gds.py,sha256=6jf0HPTHAIG8Mp_FC4Ai4zpn-U1K7yk0fSXg8He8-r8,1513
39
+ dask_cuda/tests/test_gds.py,sha256=j1Huud6UGm1fbkyRLQEz_ysrVw__5AimwSn_M-2GEvs,1513
39
40
  dask_cuda/tests/test_initialize.py,sha256=Rba59ZbljEm1yyN94_sWZPEE_f7hWln95aiBVc49pmY,6960
40
41
  dask_cuda/tests/test_local_cuda_cluster.py,sha256=Lc9QncyGwBwhaZPGBfreXJf3ZC9Zd8SjDc2fpeQ-BT0,19710
41
- dask_cuda/tests/test_proxify_host_file.py,sha256=Yiv0sDcUoWw0d2oiPeHGoHqqSSM4lfQ4rChCiaxb6EU,18994
42
- dask_cuda/tests/test_proxy.py,sha256=OnGnPkl5ksCb-3hpEKG2z1OfPK9DbnOCtBHOjcUUjhg,23809
43
- dask_cuda/tests/test_spill.py,sha256=xN9PbVERBYMuZxvscSO0mAM22loq9WT3ltZVBFxlmM4,10239
42
+ dask_cuda/tests/test_proxify_host_file.py,sha256=LC3jjo_gbfhdIy1Zy_ynmgyv31HXFoBINCe1-XXZ4XU,18994
43
+ dask_cuda/tests/test_proxy.py,sha256=51qsXGJBg_hwSMRsC_QvJBz4wVM0Bf8fbFmTUFA7HJE,23809
44
+ dask_cuda/tests/test_spill.py,sha256=CYMbp5HDBYlZ7T_n8RfSOZxaWFcAQKjprjRM7Wupcdw,13419
44
45
  dask_cuda/tests/test_utils.py,sha256=JRIwXfemc3lWSzLJX0VcvR1_0wB4yeoOTsw7kB6z6pU,9176
45
46
  dask_cuda/tests/test_version.py,sha256=vK2HjlRLX0nxwvRsYxBqhoZryBNZklzA-vdnyuWDxVg,365
46
47
  dask_cuda/tests/test_worker_spec.py,sha256=Bvu85vkqm6ZDAYPXKMJlI2pm9Uc5tiYKNtO4goXSw-I,2399
47
48
  examples/ucx/client_initialize.py,sha256=YN3AXHF8btcMd6NicKKhKR9SXouAsK1foJhFspbOn70,1262
48
49
  examples/ucx/local_cuda_cluster.py,sha256=7xVY3EhwhkY2L4VZin_BiMCbrjhirDNChoC86KiETNc,1983
49
- dask_cuda-24.8.2.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
50
- dask_cuda-24.8.2.dist-info/METADATA,sha256=6iMwPI8cWrEYDYz73vm8pw-LkVeEgTQzymJgRxj32VQ,2546
51
- dask_cuda-24.8.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
52
- dask_cuda-24.8.2.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
53
- dask_cuda-24.8.2.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
54
- dask_cuda-24.8.2.dist-info/RECORD,,
50
+ dask_cuda-24.10.0.dist-info/LICENSE,sha256=MjI3I-EgxfEvZlgjk82rgiFsZqSDXHFETd2QJ89UwDA,11348
51
+ dask_cuda-24.10.0.dist-info/METADATA,sha256=lnlY2Dn1DOh5RPh5xwCgkB3Br_RLeeC_dERheomPDrw,2553
52
+ dask_cuda-24.10.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
53
+ dask_cuda-24.10.0.dist-info/entry_points.txt,sha256=UcRaKVEpywtxc6pF1VnfMB0UK4sJg7a8_NdZF67laPM,136
54
+ dask_cuda-24.10.0.dist-info/top_level.txt,sha256=3kKxJxeM108fuYc_lwwlklP7YBU9IEmdmRAouzi397o,33
55
+ dask_cuda-24.10.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.1.0)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5