dask-cuda 25.6.0__py3-none-any.whl → 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. dask_cuda/GIT_COMMIT +1 -1
  2. dask_cuda/VERSION +1 -1
  3. dask_cuda/benchmarks/common.py +4 -1
  4. dask_cuda/benchmarks/local_cudf_groupby.py +3 -0
  5. dask_cuda/benchmarks/local_cudf_merge.py +4 -1
  6. dask_cuda/benchmarks/local_cudf_shuffle.py +4 -1
  7. dask_cuda/benchmarks/local_cupy.py +3 -0
  8. dask_cuda/benchmarks/local_cupy_map_overlap.py +3 -0
  9. dask_cuda/benchmarks/utils.py +6 -3
  10. dask_cuda/cli.py +21 -15
  11. dask_cuda/cuda_worker.py +28 -58
  12. dask_cuda/device_host_file.py +31 -15
  13. dask_cuda/disk_io.py +7 -4
  14. dask_cuda/explicit_comms/comms.py +11 -7
  15. dask_cuda/explicit_comms/dataframe/shuffle.py +23 -23
  16. dask_cuda/get_device_memory_objects.py +4 -7
  17. dask_cuda/initialize.py +149 -94
  18. dask_cuda/local_cuda_cluster.py +52 -70
  19. dask_cuda/plugins.py +17 -16
  20. dask_cuda/proxify_device_objects.py +12 -10
  21. dask_cuda/proxify_host_file.py +30 -27
  22. dask_cuda/proxy_object.py +20 -17
  23. dask_cuda/tests/conftest.py +41 -0
  24. dask_cuda/tests/test_cudf_builtin_spilling.py +3 -1
  25. dask_cuda/tests/test_dask_cuda_worker.py +109 -25
  26. dask_cuda/tests/test_dask_setup.py +193 -0
  27. dask_cuda/tests/test_dgx.py +20 -44
  28. dask_cuda/tests/test_explicit_comms.py +31 -12
  29. dask_cuda/tests/test_from_array.py +4 -6
  30. dask_cuda/tests/test_initialize.py +233 -65
  31. dask_cuda/tests/test_local_cuda_cluster.py +129 -68
  32. dask_cuda/tests/test_proxify_host_file.py +28 -7
  33. dask_cuda/tests/test_proxy.py +15 -13
  34. dask_cuda/tests/test_spill.py +10 -3
  35. dask_cuda/tests/test_utils.py +100 -29
  36. dask_cuda/tests/test_worker_spec.py +6 -0
  37. dask_cuda/utils.py +211 -42
  38. dask_cuda/utils_test.py +10 -7
  39. dask_cuda/worker_common.py +196 -0
  40. dask_cuda/worker_spec.py +6 -1
  41. {dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/METADATA +11 -4
  42. dask_cuda-25.10.0.dist-info/RECORD +63 -0
  43. dask_cuda-25.10.0.dist-info/top_level.txt +6 -0
  44. shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
  45. shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
  46. shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
  47. dask_cuda-25.6.0.dist-info/RECORD +0 -57
  48. dask_cuda-25.6.0.dist-info/top_level.txt +0 -4
  49. {dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/WHEEL +0 -0
  50. {dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/entry_points.txt +0 -0
  51. {dask_cuda-25.6.0.dist-info → dask_cuda-25.10.0.dist-info}/licenses/LICENSE +0 -0
@@ -2,6 +2,7 @@
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  import asyncio
5
+ import contextlib
5
6
  import os
6
7
  import pkgutil
7
8
  import sys
@@ -20,6 +21,7 @@ from dask_cuda.utils import (
20
21
  get_device_total_memory,
21
22
  get_gpu_count_mig,
22
23
  get_gpu_uuid,
24
+ has_device_memory_resource,
23
25
  print_cluster_config,
24
26
  )
25
27
  from dask_cuda.utils_test import MockWorker
@@ -28,7 +30,8 @@ from dask_cuda.utils_test import MockWorker
28
30
  @gen_test(timeout=20)
29
31
  async def test_local_cuda_cluster():
30
32
  async with LocalCUDACluster(
31
- scheduler_port=0, asynchronous=True, device_memory_limit=1
33
+ scheduler_port=0,
34
+ asynchronous=True,
32
35
  ) as cluster:
33
36
  async with Client(cluster, asynchronous=True) as client:
34
37
  assert len(cluster.workers) == utils.get_n_gpus()
@@ -68,8 +71,8 @@ async def test_with_subset_of_cuda_visible_devices():
68
71
  async with LocalCUDACluster(
69
72
  scheduler_port=0,
70
73
  asynchronous=True,
71
- device_memory_limit=1,
72
74
  worker_class=MockWorker,
75
+ data=dict,
73
76
  ) as cluster:
74
77
  async with Client(cluster, asynchronous=True) as client:
75
78
  assert len(cluster.workers) == 4
@@ -90,40 +93,27 @@ async def test_with_subset_of_cuda_visible_devices():
90
93
  }
91
94
 
92
95
 
93
- @pytest.mark.parametrize(
94
- "protocol",
95
- ["ucx", "ucxx"],
96
- )
97
96
  @gen_test(timeout=20)
98
- async def test_ucx_protocol(protocol):
99
- if protocol == "ucx":
100
- pytest.importorskip("ucp")
101
- elif protocol == "ucxx":
102
- pytest.importorskip("ucxx")
97
+ async def test_ucx_protocol():
98
+ pytest.importorskip("distributed_ucxx")
103
99
 
104
100
  async with LocalCUDACluster(
105
- protocol=protocol, asynchronous=True, data=dict
101
+ protocol="ucx", asynchronous=True, data=dict
106
102
  ) as cluster:
107
103
  assert all(
108
- ws.address.startswith(f"{protocol}://")
109
- for ws in cluster.scheduler.workers.values()
104
+ ws.address.startswith("ucx://") for ws in cluster.scheduler.workers.values()
110
105
  )
111
106
 
112
107
 
113
- @pytest.mark.parametrize(
114
- "protocol",
115
- ["ucx", "ucxx"],
116
- )
117
108
  @gen_test(timeout=20)
118
- async def test_explicit_ucx_with_protocol_none(protocol):
119
- if protocol == "ucx":
120
- pytest.importorskip("ucp")
121
- elif protocol == "ucxx":
122
- pytest.importorskip("ucxx")
109
+ async def test_explicit_ucx_with_protocol_none():
110
+ pytest.importorskip("distributed_ucxx")
123
111
 
124
- initialize(protocol=protocol, enable_tcp_over_ucx=True)
112
+ initialize(enable_tcp_over_ucx=True)
125
113
  async with LocalCUDACluster(
126
- protocol=None, enable_tcp_over_ucx=True, asynchronous=True, data=dict
114
+ protocol=None,
115
+ enable_tcp_over_ucx=True,
116
+ asynchronous=True,
127
117
  ) as cluster:
128
118
  assert all(
129
119
  ws.address.startswith("ucx://") for ws in cluster.scheduler.workers.values()
@@ -131,18 +121,11 @@ async def test_explicit_ucx_with_protocol_none(protocol):
131
121
 
132
122
 
133
123
  @pytest.mark.filterwarnings("ignore:Exception ignored in")
134
- @pytest.mark.parametrize(
135
- "protocol",
136
- ["ucx", "ucxx"],
137
- )
138
124
  @gen_test(timeout=20)
139
- async def test_ucx_protocol_type_error(protocol):
140
- if protocol == "ucx":
141
- pytest.importorskip("ucp")
142
- elif protocol == "ucxx":
143
- pytest.importorskip("ucxx")
125
+ async def test_ucx_protocol_type_error():
126
+ pytest.importorskip("distributed_ucxx")
144
127
 
145
- initialize(protocol=protocol, enable_tcp_over_ucx=True)
128
+ initialize(enable_tcp_over_ucx=True)
146
129
  with pytest.raises(TypeError):
147
130
  async with LocalCUDACluster(
148
131
  protocol="tcp", enable_tcp_over_ucx=True, asynchronous=True, data=dict
@@ -153,7 +136,10 @@ async def test_ucx_protocol_type_error(protocol):
153
136
  @gen_test(timeout=20)
154
137
  async def test_n_workers():
155
138
  async with LocalCUDACluster(
156
- CUDA_VISIBLE_DEVICES="0,1", worker_class=MockWorker, asynchronous=True
139
+ CUDA_VISIBLE_DEVICES="0,1",
140
+ worker_class=MockWorker,
141
+ asynchronous=True,
142
+ data=dict,
157
143
  ) as cluster:
158
144
  assert len(cluster.workers) == 2
159
145
  assert len(cluster.worker_spec) == 2
@@ -208,10 +194,13 @@ async def test_no_memory_limits_cudaworker():
208
194
  @gen_test(timeout=20)
209
195
  async def test_all_to_all():
210
196
  async with LocalCUDACluster(
211
- CUDA_VISIBLE_DEVICES="0,1", worker_class=MockWorker, asynchronous=True
197
+ CUDA_VISIBLE_DEVICES="0,1",
198
+ worker_class=MockWorker,
199
+ asynchronous=True,
200
+ data=dict,
212
201
  ) as cluster:
213
202
  async with Client(cluster, asynchronous=True) as client:
214
- workers = list(client.scheduler_info()["workers"])
203
+ workers = list(client.scheduler_info(n_workers=-1)["workers"])
215
204
  n_workers = len(workers)
216
205
  await utils.all_to_all(client)
217
206
  # assert all to all has resulted in all data on every worker
@@ -263,11 +252,6 @@ async def test_rmm_managed():
263
252
  async def test_rmm_async():
264
253
  rmm = pytest.importorskip("rmm")
265
254
 
266
- driver_version = rmm._cuda.gpu.driverGetVersion()
267
- runtime_version = rmm._cuda.gpu.runtimeGetVersion()
268
- if driver_version < 11020 or runtime_version < 11020:
269
- pytest.skip("cudaMallocAsync not supported")
270
-
271
255
  async with LocalCUDACluster(
272
256
  rmm_async=True,
273
257
  rmm_pool_size="2GB",
@@ -290,11 +274,6 @@ async def test_rmm_async():
290
274
  async def test_rmm_async_with_maximum_pool_size():
291
275
  rmm = pytest.importorskip("rmm")
292
276
 
293
- driver_version = rmm._cuda.gpu.driverGetVersion()
294
- runtime_version = rmm._cuda.gpu.runtimeGetVersion()
295
- if driver_version < 11020 or runtime_version < 11020:
296
- pytest.skip("cudaMallocAsync not supported")
297
-
298
277
  async with LocalCUDACluster(
299
278
  rmm_async=True,
300
279
  rmm_pool_size="2GB",
@@ -381,7 +360,6 @@ async def test_cluster_worker():
381
360
  async with LocalCUDACluster(
382
361
  scheduler_port=0,
383
362
  asynchronous=True,
384
- device_memory_limit=1,
385
363
  n_workers=1,
386
364
  ) as cluster:
387
365
  assert len(cluster.workers) == 1
@@ -464,7 +442,7 @@ async def test_get_cluster_configuration():
464
442
  async with LocalCUDACluster(
465
443
  rmm_pool_size="2GB",
466
444
  rmm_maximum_pool_size="3GB",
467
- device_memory_limit="30B",
445
+ device_memory_limit="30B" if has_device_memory_resource() else None,
468
446
  CUDA_VISIBLE_DEVICES="0",
469
447
  scheduler_port=0,
470
448
  asynchronous=True,
@@ -474,10 +452,14 @@ async def test_get_cluster_configuration():
474
452
  assert ret["[plugin] RMMSetup"]["initial_pool_size"] == 2000000000
475
453
  assert ret["[plugin] RMMSetup"]["maximum_pool_size"] == 3000000000
476
454
  assert ret["jit-unspill"] is False
477
- assert ret["device-memory-limit"] == 30
455
+ if has_device_memory_resource():
456
+ assert ret["device-memory-limit"] == 30
478
457
 
479
458
 
480
459
  @gen_test(timeout=20)
460
+ @pytest.mark.skip_if_no_device_memory(
461
+ "Devices without dedicated memory resources do not support fractional limits"
462
+ )
481
463
  async def test_worker_fraction_limits():
482
464
  async with LocalCUDACluster(
483
465
  dashboard_address=None,
@@ -503,6 +485,40 @@ async def test_worker_fraction_limits():
503
485
  )
504
486
 
505
487
 
488
+ # Intentionally not using @gen_test to skip cleanup checks
489
+ @pytest.mark.parametrize(
490
+ "argument", ["pool_size", "maximum_pool_size", "release_threshold"]
491
+ )
492
+ @pytest.mark.xfail(reason="https://github.com/rapidsai/dask-cuda/issues/1265")
493
+ @pytest.mark.skip_if_device_memory(
494
+ "Devices with dedicated memory resources cannot test error"
495
+ )
496
+ def test_worker_fraction_limits_no_dedicated_memory(argument):
497
+ async def _test_worker_fraction_limits_no_dedicated_memory():
498
+ if argument == "pool_size":
499
+ kwargs = {"rmm_pool_size": "0.1"}
500
+ elif argument == "maximum_pool_size":
501
+ kwargs = {"rmm_pool_size": "1 GiB", "rmm_maximum_pool_size": "0.1"}
502
+ else:
503
+ kwargs = {"rmm_async": True, "rmm_release_threshold": "0.1"}
504
+
505
+ with raises_with_cause(
506
+ RuntimeError,
507
+ "Nanny failed to start",
508
+ RuntimeError,
509
+ "Worker failed to start",
510
+ ValueError,
511
+ "Fractional of total device memory not supported in devices without a "
512
+ "dedicated memory resource",
513
+ ):
514
+ await LocalCUDACluster(
515
+ asynchronous=True,
516
+ **kwargs,
517
+ )
518
+
519
+ asyncio.run(_test_worker_fraction_limits_no_dedicated_memory())
520
+
521
+
506
522
  @gen_test(timeout=20)
507
523
  async def test_cudf_spill_disabled():
508
524
  cudf = pytest.importorskip("cudf")
@@ -527,6 +543,9 @@ async def test_cudf_spill_disabled():
527
543
 
528
544
 
529
545
  @gen_test(timeout=20)
546
+ @pytest.mark.skip_if_no_device_memory(
547
+ "Devices without dedicated memory resources cannot enable cuDF spill"
548
+ )
530
549
  async def test_cudf_spill():
531
550
  cudf = pytest.importorskip("cudf")
532
551
 
@@ -551,27 +570,69 @@ async def test_cudf_spill():
551
570
  assert v == 2
552
571
 
553
572
 
573
+ @pytest.mark.skip_if_device_memory(
574
+ "Devices with dedicated memory resources cannot test error"
575
+ )
576
+ @gen_test(timeout=20)
577
+ async def test_cudf_spill_no_dedicated_memory():
578
+ cudf = pytest.importorskip("cudf") # noqa: F841
579
+
580
+ with pytest.raises(
581
+ ValueError,
582
+ match="cuDF spilling is not supported on devices without dedicated memory",
583
+ ):
584
+ await LocalCUDACluster(
585
+ enable_cudf_spill=True,
586
+ cudf_spill_stats=2,
587
+ asynchronous=True,
588
+ )
589
+
590
+
591
+ @pytest.mark.parametrize(
592
+ "jit_unspill",
593
+ [False, True],
594
+ )
554
595
  @pytest.mark.parametrize(
555
- "protocol",
556
- ["ucx", "ucxx"],
596
+ "device_memory_limit",
597
+ [None, "1B"],
557
598
  )
558
- def test_print_cluster_config(capsys, protocol):
559
- if protocol == "ucx":
560
- pytest.importorskip("ucp")
561
- elif protocol == "ucxx":
562
- pytest.importorskip("ucxx")
599
+ def test_print_cluster_config(capsys, jit_unspill, device_memory_limit):
600
+ pytest.importorskip("distributed_ucxx")
563
601
 
564
602
  pytest.importorskip("rich")
565
- with LocalCUDACluster(
566
- n_workers=1, device_memory_limit="1B", jit_unspill=True, protocol=protocol
567
- ) as cluster:
568
- with Client(cluster) as client:
569
- print_cluster_config(client)
570
- captured = capsys.readouterr()
571
- assert "Dask Cluster Configuration" in captured.out
572
- assert protocol in captured.out
573
- assert "1 B" in captured.out
574
- assert "[plugin]" in captured.out
603
+
604
+ ctx = contextlib.nullcontext()
605
+ if not has_device_memory_resource():
606
+ if device_memory_limit:
607
+ ctx = pytest.raises(
608
+ ValueError,
609
+ match="device_memory_limit is set but device has no dedicated memory.",
610
+ )
611
+ if jit_unspill:
612
+ # JIT-Unspill exception has precedence, thus overwrite ctx if both are
613
+ # enabled
614
+ ctx = pytest.raises(
615
+ ValueError,
616
+ match="JIT-Unspill is not supported on devices without dedicated "
617
+ "memory",
618
+ )
619
+
620
+ with ctx:
621
+ with LocalCUDACluster(
622
+ n_workers=1,
623
+ device_memory_limit=device_memory_limit,
624
+ jit_unspill=jit_unspill,
625
+ protocol="ucx",
626
+ ) as cluster:
627
+ with Client(cluster) as client:
628
+ print_cluster_config(client)
629
+ captured = capsys.readouterr()
630
+ assert "Dask Cluster Configuration" in captured.out
631
+ assert "ucx" in captured.out
632
+ if device_memory_limit == "1B":
633
+ assert "1 B" in captured.out
634
+ assert "[plugin]" in captured.out
635
+ client.shutdown()
575
636
 
576
637
 
577
638
  @pytest.mark.xfail(reason="https://github.com/rapidsai/dask-cuda/issues/1265")
@@ -1,4 +1,5 @@
1
- # Copyright (c) 2025, NVIDIA CORPORATION.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
2
3
 
3
4
  from typing import Iterable
4
5
  from unittest.mock import patch
@@ -219,6 +220,9 @@ def test_one_item_host_limit(capsys, root_dir):
219
220
  assert len(dhf.manager) == 0
220
221
 
221
222
 
223
+ @pytest.mark.skip_if_no_device_memory(
224
+ "Devices without dedicated memory resources do not support spilling"
225
+ )
222
226
  def test_spill_on_demand(root_dir):
223
227
  """
224
228
  Test spilling on demand by disabling the device_memory_limit
@@ -241,6 +245,9 @@ def test_spill_on_demand(root_dir):
241
245
 
242
246
 
243
247
  @pytest.mark.parametrize("jit_unspill", [True, False])
248
+ @pytest.mark.skip_if_no_device_memory(
249
+ "Devices without dedicated memory resources do not support spilling"
250
+ )
244
251
  @gen_test(timeout=20)
245
252
  async def test_local_cuda_cluster(jit_unspill):
246
253
  """Testing spilling of a proxied cudf dataframe in a local cuda cluster"""
@@ -377,9 +384,9 @@ def test_externals(root_dir):
377
384
 
378
385
  @patch("dask_cuda.proxify_device_objects.incompatible_types", (cupy.ndarray,))
379
386
  def test_incompatible_types(root_dir):
380
- """Check that ProxifyHostFile unproxifies `cupy.ndarray` on retrieval
387
+ """Check that ProxifyHostFile unproxifies ``cupy.ndarray`` on retrieval
381
388
 
382
- Notice, in this test we add `cupy.ndarray` to the incompatible_types temporarily.
389
+ Notice, in this test we add ``cupy.ndarray`` to the incompatible_types temporarily.
383
390
  """
384
391
  cupy = pytest.importorskip("cupy")
385
392
  cudf = pytest.importorskip("cudf")
@@ -398,6 +405,9 @@ def test_incompatible_types(root_dir):
398
405
 
399
406
  @pytest.mark.parametrize("npartitions", [1, 2, 3])
400
407
  @pytest.mark.parametrize("compatibility_mode", [True, False])
408
+ @pytest.mark.skip_if_no_device_memory(
409
+ "Devices without dedicated memory resources do not support JIT-Unspill"
410
+ )
401
411
  @gen_test(timeout=30)
402
412
  async def test_compatibility_mode_dataframe_shuffle(compatibility_mode, npartitions):
403
413
  cudf = pytest.importorskip("cudf")
@@ -430,19 +440,28 @@ async def test_compatibility_mode_dataframe_shuffle(compatibility_mode, npartiti
430
440
  assert all(res) # Only proxy objects
431
441
 
432
442
 
443
+ @pytest.mark.skip_if_no_device_memory(
444
+ "Devices without dedicated memory resources do not support JIT-Unspill"
445
+ )
433
446
  @gen_test(timeout=60)
434
447
  async def test_worker_force_spill_to_disk():
435
448
  """Test Dask triggering CPU-to-Disk spilling"""
436
449
  cudf = pytest.importorskip("cudf")
437
450
 
451
+ def create_dataframe():
452
+ return cudf.DataFrame({"key": np.arange(10**8)})
453
+
438
454
  with dask.config.set({"distributed.worker.memory.terminate": False}):
439
455
  async with dask_cuda.LocalCUDACluster(
440
456
  n_workers=1, device_memory_limit="1MB", jit_unspill=True, asynchronous=True
441
457
  ) as cluster:
442
458
  async with Client(cluster, asynchronous=True) as client:
443
459
  # Create a df that are spilled to host memory immediately
444
- df = cudf.DataFrame({"key": np.arange(10**8)})
445
- [ddf] = client.persist([dask.dataframe.from_pandas(df, npartitions=1)])
460
+ ddf = dask.dataframe.from_delayed(
461
+ dask.delayed(create_dataframe)(),
462
+ meta=cudf.DataFrame({"key": cupy.arange(0)}),
463
+ )
464
+ [ddf] = client.persist([ddf])
446
465
  await ddf
447
466
 
448
467
  async def f(dask_worker):
@@ -453,18 +472,20 @@ async def test_worker_force_spill_to_disk():
453
472
  memory = w.monitor.proc.memory_info().rss
454
473
  w.memory_manager.memory_limit = memory - 10**8
455
474
  w.memory_manager.memory_target_fraction = 1
456
- print(w.memory_manager.data)
457
475
  await w.memory_manager.memory_monitor(w)
458
476
  # Check that host memory are freed
459
477
  assert w.monitor.proc.memory_info().rss < memory - 10**7
460
478
  w.memory_manager.memory_limit = memory * 10 # Un-limit
461
479
 
462
- client.run(f)
480
+ await client.run(f)
463
481
  log = str(await client.get_worker_logs())
464
482
  # Check that the worker doesn't complain about unmanaged memory
465
483
  assert "Unmanaged memory use is high" not in log
466
484
 
467
485
 
486
+ @pytest.mark.skip_if_no_device_memory(
487
+ "Devices without dedicated memory resources do not support JIT-Unspill"
488
+ )
468
489
  def test_on_demand_debug_info():
469
490
  """Test worker logging when on-demand-spilling fails"""
470
491
  rmm = pytest.importorskip("rmm")
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
1
4
  import operator
2
5
  import os
3
6
  import pickle
@@ -242,7 +245,7 @@ def test_serialize_of_proxied_cudf(proxy_serializers, dask_serializers):
242
245
 
243
246
  @pytest.mark.parametrize("backend", ["numpy", "cupy"])
244
247
  def test_fixed_attribute_length(backend):
245
- """Test fixed attribute `x.__len__` access
248
+ """Test fixed attribute ``x.__len__`` access
246
249
 
247
250
  Notice, accessing fixed attributes shouldn't de-serialize the proxied object
248
251
  """
@@ -263,7 +266,7 @@ def test_fixed_attribute_length(backend):
263
266
 
264
267
 
265
268
  def test_fixed_attribute_name():
266
- """Test fixed attribute `x.name` access
269
+ """Test fixed attribute ``x.name`` access
267
270
 
268
271
  Notice, accessing fixed attributes shouldn't de-serialize the proxied object
269
272
  """
@@ -284,6 +287,9 @@ def test_fixed_attribute_name():
284
287
 
285
288
 
286
289
  @pytest.mark.parametrize("jit_unspill", [True, False])
290
+ @pytest.mark.skip_if_no_device_memory(
291
+ "Spilling not supported in devices without dedicated memory resource"
292
+ )
287
293
  @gen_test(timeout=20)
288
294
  async def test_spilling_local_cuda_cluster(jit_unspill):
289
295
  """Testing spilling of a proxied cudf dataframe in a local cuda cluster"""
@@ -386,8 +392,8 @@ def test_serializing_array_to_disk(backend, serializers, size):
386
392
  class _PxyObjTest(proxy_object.ProxyObject):
387
393
  """
388
394
  A class that:
389
- - defines `__dask_tokenize__` in order to avoid deserialization when
390
- calling `client.scatter()`
395
+ - defines ``__dask_tokenize__`` in order to avoid deserialization when
396
+ calling ``client.scatter()``
391
397
  - Asserts that no deserialization is performaned when communicating.
392
398
  """
393
399
 
@@ -401,14 +407,12 @@ class _PxyObjTest(proxy_object.ProxyObject):
401
407
 
402
408
 
403
409
  @pytest.mark.parametrize("send_serializers", [None, ("dask", "pickle"), ("cuda",)])
404
- @pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
410
+ @pytest.mark.parametrize("protocol", ["tcp", "ucx"])
405
411
  @gen_test(timeout=120)
406
412
  async def test_communicating_proxy_objects(protocol, send_serializers):
407
413
  """Testing serialization of cuDF dataframe when communicating"""
408
414
  if protocol == "ucx":
409
- pytest.importorskip("ucp")
410
- elif protocol == "ucxx":
411
- pytest.importorskip("ucxx")
415
+ pytest.importorskip("distributed_ucxx")
412
416
  cudf = pytest.importorskip("cudf")
413
417
 
414
418
  def task(x):
@@ -417,7 +421,7 @@ async def test_communicating_proxy_objects(protocol, send_serializers):
417
421
  serializers_used = x._pxy_get().serializer
418
422
 
419
423
  # Check that `x` is serialized with the expected serializers
420
- if protocol in ["ucx", "ucxx"]:
424
+ if protocol == "ucx":
421
425
  if send_serializers is None:
422
426
  assert serializers_used == "cuda"
423
427
  else:
@@ -448,15 +452,13 @@ async def test_communicating_proxy_objects(protocol, send_serializers):
448
452
  await client.submit(task, df)
449
453
 
450
454
 
451
- @pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
455
+ @pytest.mark.parametrize("protocol", ["tcp", "ucx"])
452
456
  @pytest.mark.parametrize("shared_fs", [True, False])
453
457
  @gen_test(timeout=20)
454
458
  async def test_communicating_disk_objects(protocol, shared_fs):
455
459
  """Testing disk serialization of cuDF dataframe when communicating"""
456
460
  if protocol == "ucx":
457
- pytest.importorskip("ucp")
458
- elif protocol == "ucxx":
459
- pytest.importorskip("ucxx")
461
+ pytest.importorskip("distributed_ucxx")
460
462
  cudf = pytest.importorskip("cudf")
461
463
  ProxifyHostFile._spill_to_disk.shared_filesystem = shared_fs
462
464
 
@@ -15,10 +15,17 @@ from distributed.sizeof import sizeof
15
15
  from distributed.utils import Deadline
16
16
  from distributed.utils_test import gen_cluster, gen_test, loop # noqa: F401
17
17
 
18
- import dask_cudf
18
+ dask_cudf = pytest.importorskip("dask_cudf")
19
19
 
20
- from dask_cuda import LocalCUDACluster, utils
21
- from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
20
+ from dask_cuda import LocalCUDACluster, utils # noqa: E402
21
+ from dask_cuda.utils_test import IncreasedCloseTimeoutNanny # noqa: E402
22
+
23
+ if not utils.has_device_memory_resource():
24
+ pytest.skip(
25
+ "No spilling tests supported for devices without memory resources. "
26
+ "See https://github.com/rapidsai/dask-cuda/issues/1510",
27
+ allow_module_level=True,
28
+ )
22
29
 
23
30
  if utils.get_device_total_memory() < 1e10:
24
31
  pytest.skip("Not enough GPU memory", allow_module_level=True)