dask-cuda 25.4.0__py3-none-any.whl → 25.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. dask_cuda/GIT_COMMIT +1 -1
  2. dask_cuda/VERSION +1 -1
  3. dask_cuda/_compat.py +18 -0
  4. dask_cuda/benchmarks/common.py +4 -1
  5. dask_cuda/benchmarks/local_cudf_groupby.py +4 -1
  6. dask_cuda/benchmarks/local_cudf_merge.py +5 -2
  7. dask_cuda/benchmarks/local_cudf_shuffle.py +5 -2
  8. dask_cuda/benchmarks/local_cupy.py +4 -1
  9. dask_cuda/benchmarks/local_cupy_map_overlap.py +4 -1
  10. dask_cuda/benchmarks/utils.py +7 -4
  11. dask_cuda/cli.py +21 -15
  12. dask_cuda/cuda_worker.py +27 -57
  13. dask_cuda/device_host_file.py +31 -15
  14. dask_cuda/disk_io.py +7 -4
  15. dask_cuda/explicit_comms/comms.py +11 -7
  16. dask_cuda/explicit_comms/dataframe/shuffle.py +147 -55
  17. dask_cuda/get_device_memory_objects.py +18 -3
  18. dask_cuda/initialize.py +80 -44
  19. dask_cuda/is_device_object.py +4 -1
  20. dask_cuda/is_spillable_object.py +4 -1
  21. dask_cuda/local_cuda_cluster.py +63 -66
  22. dask_cuda/plugins.py +17 -16
  23. dask_cuda/proxify_device_objects.py +15 -10
  24. dask_cuda/proxify_host_file.py +30 -27
  25. dask_cuda/proxy_object.py +20 -17
  26. dask_cuda/tests/conftest.py +41 -0
  27. dask_cuda/tests/test_dask_cuda_worker.py +114 -27
  28. dask_cuda/tests/test_dgx.py +10 -18
  29. dask_cuda/tests/test_explicit_comms.py +51 -18
  30. dask_cuda/tests/test_from_array.py +7 -5
  31. dask_cuda/tests/test_initialize.py +16 -37
  32. dask_cuda/tests/test_local_cuda_cluster.py +164 -54
  33. dask_cuda/tests/test_proxify_host_file.py +33 -4
  34. dask_cuda/tests/test_proxy.py +18 -16
  35. dask_cuda/tests/test_rdd_ucx.py +160 -0
  36. dask_cuda/tests/test_spill.py +107 -27
  37. dask_cuda/tests/test_utils.py +106 -20
  38. dask_cuda/tests/test_worker_spec.py +5 -2
  39. dask_cuda/utils.py +319 -68
  40. dask_cuda/utils_test.py +23 -7
  41. dask_cuda/worker_common.py +196 -0
  42. dask_cuda/worker_spec.py +12 -5
  43. {dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/METADATA +5 -4
  44. dask_cuda-25.8.0.dist-info/RECORD +63 -0
  45. {dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/WHEEL +1 -1
  46. dask_cuda-25.8.0.dist-info/top_level.txt +6 -0
  47. shared-actions/check_nightly_success/check-nightly-success/check.py +148 -0
  48. shared-actions/telemetry-impls/summarize/bump_time.py +54 -0
  49. shared-actions/telemetry-impls/summarize/send_trace.py +409 -0
  50. dask_cuda-25.4.0.dist-info/RECORD +0 -56
  51. dask_cuda-25.4.0.dist-info/top_level.txt +0 -5
  52. {dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/entry_points.txt +0 -0
  53. {dask_cuda-25.4.0.dist-info → dask_cuda-25.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
1
4
  import copy
2
5
  import logging
3
6
  import os
@@ -8,18 +11,15 @@ import dask
8
11
  from distributed import LocalCluster, Nanny, Worker
9
12
  from distributed.worker_memory import parse_memory_limit
10
13
 
11
- from .device_host_file import DeviceHostFile
12
14
  from .initialize import initialize
13
- from .plugins import CPUAffinity, CUDFSetup, PreImport, RMMSetup
14
- from .proxify_host_file import ProxifyHostFile
15
15
  from .utils import (
16
16
  cuda_visible_devices,
17
- get_cpu_affinity,
18
17
  get_ucx_config,
19
18
  nvml_device_index,
20
19
  parse_cuda_visible_device,
21
20
  parse_device_memory_limit,
22
21
  )
22
+ from .worker_common import worker_data_function, worker_plugins
23
23
 
24
24
 
25
25
  class LoggedWorker(Worker):
@@ -68,11 +68,16 @@ class LocalCUDACluster(LocalCluster):
68
68
  starts spilling to disk (not available if JIT-Unspill is enabled). Can be an
69
69
  integer (bytes), float (fraction of total system memory), string (like ``"5GB"``
70
70
  or ``"5000M"``), or ``"auto"``, 0, or ``None`` for no memory management.
71
- device_memory_limit : int, float, str, or None, default 0.8
71
+ device_memory_limit : int, float, str, or None, default "default"
72
72
  Size of the CUDA device LRU cache, which is used to determine when the worker
73
73
  starts spilling to host memory. Can be an integer (bytes), float (fraction of
74
- total device memory), string (like ``"5GB"`` or ``"5000M"``), or ``"auto"``, 0,
74
+ total device memory), string (like ``"5GB"`` or ``"5000M"``), ``"auto"``, ``0``
75
75
  or ``None`` to disable spilling to host (i.e. allow full device memory usage).
76
+ Another special value ``"default"`` (which happens to be the default) is also
77
+ available and uses the recommended Dask-CUDA's defaults and means 80% of the
78
+ total device memory (analogous to ``0.8``), and disabled spilling (analogous
79
+ to ``auto``/``0``) on devices without a dedicated memory resource, such as
80
+ system on a chip (SoC) devices.
76
81
  enable_cudf_spill : bool, default False
77
82
  Enable automatic cuDF spilling.
78
83
 
@@ -87,7 +92,7 @@ class LocalCUDACluster(LocalCluster):
87
92
  ``dask.temporary-directory`` in the local Dask configuration, using the current
88
93
  working directory if this is not set.
89
94
  shared_filesystem: bool or None, default None
90
- Whether the `local_directory` above is shared between all workers or not.
95
+ Whether the ``local_directory`` above is shared between all workers or not.
91
96
  If ``None``, the "jit-unspill-shared-fs" config value are used, which
92
97
  defaults to True. Notice, in all other cases this option defaults to False,
93
98
  but on a local cluster it defaults to True -- we assume all workers use the
@@ -100,13 +105,16 @@ class LocalCUDACluster(LocalCluster):
100
105
  are not supported or disabled.
101
106
  enable_infiniband : bool, default None
102
107
  Set environment variables to enable UCX over InfiniBand, requires
103
- ``protocol="ucx"`` and implies ``enable_tcp_over_ucx=True`` when ``True``.
108
+ ``protocol="ucx"``, ``protocol="ucxx"`` or ``protocol="ucx-old"``, and implies
109
+ ``enable_tcp_over_ucx=True`` when ``True``.
104
110
  enable_nvlink : bool, default None
105
- Set environment variables to enable UCX over NVLink, requires ``protocol="ucx"``
106
- and implies ``enable_tcp_over_ucx=True`` when ``True``.
111
+ Set environment variables to enable UCX over NVLink, requires
112
+ ``protocol="ucx"``, ``protocol="ucxx"`` or ``protocol="ucx-old"``, and implies
113
+ ``enable_tcp_over_ucx=True`` when ``True``.
107
114
  enable_rdmacm : bool, default None
108
115
  Set environment variables to enable UCX RDMA connection manager support,
109
- requires ``protocol="ucx"`` and ``enable_infiniband=True``.
116
+ requires ``protocol="ucx"``, ``protocol="ucxx"`` or ``protocol="ucx-old"``,
117
+ and ``enable_infiniband=True``.
110
118
  rmm_pool_size : int, str or None, default None
111
119
  RMM pool size to initialize each worker with. Can be an integer (bytes), float
112
120
  (fraction of total device memory), string (like ``"5GB"`` or ``"5000M"``), or
@@ -123,8 +131,8 @@ class LocalCUDACluster(LocalCluster):
123
131
  and to set the maximum pool size.
124
132
 
125
133
  .. note::
126
- When paired with `--enable-rmm-async` the maximum size cannot be guaranteed
127
- due to fragmentation.
134
+ When paired with ``--enable-rmm-async`` the maximum size cannot be
135
+ guaranteed due to fragmentation.
128
136
 
129
137
  .. note::
130
138
  This size is a per-worker configuration, and not cluster-wide.
@@ -140,9 +148,8 @@ class LocalCUDACluster(LocalCluster):
140
148
  See ``rmm.mr.CudaAsyncMemoryResource`` for more info.
141
149
 
142
150
  .. warning::
143
- The asynchronous allocator requires CUDA Toolkit 11.2 or newer. It is also
144
- incompatible with RMM pools and managed memory. Trying to enable both will
145
- result in an exception.
151
+ The asynchronous allocator is incompatible with RMM pools and managed
152
+ memory. Trying to enable both will result in an exception.
146
153
  rmm_allocator_external_lib_list: str, list or None, default None
147
154
  List of external libraries for which to set RMM as the allocator.
148
155
  Supported options are: ``["torch", "cupy"]``. Can be a comma-separated string
@@ -201,7 +208,8 @@ class LocalCUDACluster(LocalCluster):
201
208
  Raises
202
209
  ------
203
210
  TypeError
204
- If InfiniBand or NVLink are enabled and ``protocol!="ucx"``.
211
+ If InfiniBand or NVLink are enabled and
212
+ ``protocol not in ("ucx", "ucxx", "ucx-old")``.
205
213
  ValueError
206
214
  If RMM pool, RMM managed memory or RMM async allocator are requested but RMM
207
215
  cannot be imported.
@@ -221,10 +229,9 @@ class LocalCUDACluster(LocalCluster):
221
229
  n_workers=None,
222
230
  threads_per_worker=1,
223
231
  memory_limit="auto",
224
- device_memory_limit=0.8,
232
+ device_memory_limit="default",
225
233
  enable_cudf_spill=False,
226
234
  cudf_spill_stats=0,
227
- data=None,
228
235
  local_directory=None,
229
236
  shared_filesystem=None,
230
237
  protocol=None,
@@ -242,7 +249,6 @@ class LocalCUDACluster(LocalCluster):
242
249
  rmm_track_allocations=False,
243
250
  jit_unspill=None,
244
251
  log_spilling=False,
245
- worker_class=None,
246
252
  pre_import=None,
247
253
  **kwargs,
248
254
  ):
@@ -339,40 +345,29 @@ class LocalCUDACluster(LocalCluster):
339
345
  jit_unspill = dask.config.get("jit-unspill", default=False)
340
346
  data = kwargs.pop("data", None)
341
347
  if data is None:
342
- if device_memory_limit is None and memory_limit is None:
343
- data = {}
344
- elif jit_unspill:
345
- if enable_cudf_spill:
346
- warnings.warn(
347
- "Enabling cuDF spilling and JIT-Unspill together is not "
348
- "safe, consider disabling JIT-Unspill."
349
- )
350
-
351
- data = (
352
- ProxifyHostFile,
353
- {
354
- "device_memory_limit": self.device_memory_limit,
355
- "memory_limit": self.memory_limit,
356
- "shared_filesystem": shared_filesystem,
357
- },
358
- )
359
- else:
360
- data = (
361
- DeviceHostFile,
362
- {
363
- "device_memory_limit": self.device_memory_limit,
364
- "memory_limit": self.memory_limit,
365
- "log_spilling": log_spilling,
366
- },
367
- )
348
+ self.data = worker_data_function(
349
+ device_memory_limit=self.device_memory_limit,
350
+ memory_limit=self.memory_limit,
351
+ jit_unspill=jit_unspill,
352
+ enable_cudf_spill=enable_cudf_spill,
353
+ shared_filesystem=shared_filesystem,
354
+ )
368
355
 
369
356
  if enable_tcp_over_ucx or enable_infiniband or enable_nvlink:
370
357
  if protocol is None:
371
- protocol = "ucx"
372
- elif protocol not in ["ucx", "ucxx"]:
358
+ ucx_protocol = dask.config.get(
359
+ "distributed.comm.ucx.ucx-protocol", default=None
360
+ )
361
+ if ucx_protocol is not None:
362
+ # TODO: remove when UCX-Py is removed,
363
+ # see https://github.com/rapidsai/dask-cuda/issues/1517
364
+ protocol = ucx_protocol
365
+ else:
366
+ protocol = "ucx"
367
+ elif protocol not in ("ucx", "ucxx", "ucx-old"):
373
368
  raise TypeError(
374
- "Enabling InfiniBand or NVLink requires protocol='ucx' or "
375
- "protocol='ucxx'"
369
+ "Enabling InfiniBand or NVLink requires protocol='ucx', "
370
+ "protocol='ucxx' or protocol='ucx-old'"
376
371
  )
377
372
 
378
373
  self.host = kwargs.get("host", None)
@@ -385,6 +380,7 @@ class LocalCUDACluster(LocalCluster):
385
380
  enable_rdmacm=enable_rdmacm,
386
381
  )
387
382
 
383
+ worker_class = kwargs.pop("worker_class", None)
388
384
  if worker_class is not None:
389
385
  if log_spilling is True:
390
386
  raise ValueError(
@@ -441,28 +437,29 @@ class LocalCUDACluster(LocalCluster):
441
437
  spec = copy.deepcopy(self.new_spec)
442
438
  worker_count = self.cuda_visible_devices.index(name)
443
439
  visible_devices = cuda_visible_devices(worker_count, self.cuda_visible_devices)
440
+ device_index = nvml_device_index(0, visible_devices)
444
441
  spec["options"].update(
445
442
  {
446
443
  "env": {
447
444
  "CUDA_VISIBLE_DEVICES": visible_devices,
448
445
  },
449
- "plugins": {
450
- CPUAffinity(
451
- get_cpu_affinity(nvml_device_index(0, visible_devices))
452
- ),
453
- RMMSetup(
454
- initial_pool_size=self.rmm_pool_size,
455
- maximum_pool_size=self.rmm_maximum_pool_size,
456
- managed_memory=self.rmm_managed_memory,
457
- async_alloc=self.rmm_async,
458
- release_threshold=self.rmm_release_threshold,
459
- log_directory=self.rmm_log_directory,
460
- track_allocations=self.rmm_track_allocations,
461
- external_lib_list=self.rmm_allocator_external_lib_list,
446
+ **({"data": self.data(device_index)} if hasattr(self, "data") else {}),
447
+ "plugins": worker_plugins(
448
+ device_index=device_index,
449
+ rmm_initial_pool_size=self.rmm_pool_size,
450
+ rmm_maximum_pool_size=self.rmm_maximum_pool_size,
451
+ rmm_managed_memory=self.rmm_managed_memory,
452
+ rmm_async_alloc=self.rmm_async,
453
+ rmm_release_threshold=self.rmm_release_threshold,
454
+ rmm_log_directory=self.rmm_log_directory,
455
+ rmm_track_allocations=self.rmm_track_allocations,
456
+ rmm_allocator_external_lib_list=(
457
+ self.rmm_allocator_external_lib_list
462
458
  ),
463
- PreImport(self.pre_import),
464
- CUDFSetup(self.enable_cudf_spill, self.cudf_spill_stats),
465
- },
459
+ pre_import=self.pre_import,
460
+ enable_cudf_spill=self.enable_cudf_spill,
461
+ cudf_spill_stats=self.cudf_spill_stats,
462
+ ),
466
463
  }
467
464
  )
468
465
 
dask_cuda/plugins.py CHANGED
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
1
4
  import importlib
2
5
  import logging
3
6
  import os
@@ -5,7 +8,7 @@ from typing import Callable, Dict
5
8
 
6
9
  from distributed import WorkerPlugin
7
10
 
8
- from .utils import get_rmm_log_file_name, parse_device_memory_limit
11
+ from .utils import get_rmm_log_file_name, parse_device_bytes
9
12
 
10
13
 
11
14
  class CPUAffinity(WorkerPlugin):
@@ -75,28 +78,26 @@ class RMMSetup(WorkerPlugin):
75
78
  self.external_lib_list = external_lib_list
76
79
 
77
80
  def setup(self, worker=None):
78
- if self.initial_pool_size is not None:
79
- self.initial_pool_size = parse_device_memory_limit(
80
- self.initial_pool_size, alignment_size=256
81
- )
81
+ self.initial_pool_size = parse_device_bytes(
82
+ self.initial_pool_size, alignment_size=256
83
+ )
82
84
 
83
85
  if self.async_alloc:
84
86
  import rmm
85
87
 
86
- if self.release_threshold is not None:
87
- self.release_threshold = parse_device_memory_limit(
88
- self.release_threshold, alignment_size=256
89
- )
88
+ self.release_threshold = parse_device_bytes(
89
+ self.release_threshold, alignment_size=256
90
+ )
90
91
 
91
92
  mr = rmm.mr.CudaAsyncMemoryResource(
92
93
  initial_pool_size=self.initial_pool_size,
93
94
  release_threshold=self.release_threshold,
94
95
  )
95
96
 
97
+ self.maximum_pool_size = parse_device_bytes(
98
+ self.maximum_pool_size, alignment_size=256
99
+ )
96
100
  if self.maximum_pool_size is not None:
97
- self.maximum_pool_size = parse_device_memory_limit(
98
- self.maximum_pool_size, alignment_size=256
99
- )
100
101
  mr = rmm.mr.LimitingResourceAdaptor(
101
102
  mr, allocation_limit=self.maximum_pool_size
102
103
  )
@@ -114,10 +115,9 @@ class RMMSetup(WorkerPlugin):
114
115
  pool_allocator = False if self.initial_pool_size is None else True
115
116
 
116
117
  if self.initial_pool_size is not None:
117
- if self.maximum_pool_size is not None:
118
- self.maximum_pool_size = parse_device_memory_limit(
119
- self.maximum_pool_size, alignment_size=256
120
- )
118
+ self.maximum_pool_size = parse_device_bytes(
119
+ self.maximum_pool_size, alignment_size=256
120
+ )
121
121
 
122
122
  rmm.reinitialize(
123
123
  pool_allocator=pool_allocator,
@@ -129,6 +129,7 @@ class RMMSetup(WorkerPlugin):
129
129
  worker, self.logging, self.log_directory
130
130
  ),
131
131
  )
132
+
132
133
  if self.rmm_track_allocations:
133
134
  import rmm
134
135
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
1
4
  import functools
2
5
  import pydoc
3
6
  from collections import defaultdict
@@ -58,9 +61,9 @@ def proxify_device_objects(
58
61
  ) -> T:
59
62
  """Wrap device objects in ProxyObject
60
63
 
61
- Search through `obj` and wraps all CUDA device objects in ProxyObject.
62
- It uses `proxied_id_to_proxy` to make sure that identical CUDA device
63
- objects found in `obj` are wrapped by the same ProxyObject.
64
+ Search through ``obj`` and wraps all CUDA device objects in ProxyObject.
65
+ It uses ``proxied_id_to_proxy`` to make sure that identical CUDA device
66
+ objects found in ``obj`` are wrapped by the same ProxyObject.
64
67
 
65
68
  Parameters
66
69
  ----------
@@ -68,11 +71,11 @@ def proxify_device_objects(
68
71
  Object to search through or wrap in a ProxyObject.
69
72
  proxied_id_to_proxy: MutableMapping[int, ProxyObject]
70
73
  Dict mapping the id() of proxied objects (CUDA device objects) to
71
- their proxy and is updated with all new proxied objects found in `obj`.
74
+ their proxy and is updated with all new proxied objects found in ``obj``.
72
75
  If None, use an empty dict.
73
76
  found_proxies: List[ProxyObject]
74
- List of found proxies in `obj`. Notice, this includes all proxies found,
75
- including those already in `proxied_id_to_proxy`.
77
+ List of found proxies in ``obj``. Notice, this includes all proxies found,
78
+ including those already in ``proxied_id_to_proxy``.
76
79
  If None, use an empty list.
77
80
  excl_proxies: bool
78
81
  Don't add found objects that are already ProxyObject to found_proxies.
@@ -83,7 +86,7 @@ def proxify_device_objects(
83
86
  Returns
84
87
  -------
85
88
  ret: Any
86
- A copy of `obj` where all CUDA device objects are wrapped in ProxyObject
89
+ A copy of ``obj`` where all CUDA device objects are wrapped in ProxyObject
87
90
  """
88
91
  _register_incompatible_types()
89
92
 
@@ -102,7 +105,7 @@ def unproxify_device_objects(
102
105
  ) -> T:
103
106
  """Unproxify device objects
104
107
 
105
- Search through `obj` and un-wraps all CUDA device objects.
108
+ Search through ``obj`` and un-wraps all CUDA device objects.
106
109
 
107
110
  Parameters
108
111
  ----------
@@ -117,7 +120,7 @@ def unproxify_device_objects(
117
120
  Returns
118
121
  -------
119
122
  ret: Any
120
- A copy of `obj` where all CUDA device objects are unproxify
123
+ A copy of ``obj`` where all CUDA device objects are unproxify
121
124
  """
122
125
  if isinstance(obj, dict):
123
126
  return {
@@ -242,7 +245,9 @@ def _register_cudf():
242
245
 
243
246
  @dispatch.register(cudf.DataFrame)
244
247
  @dispatch.register(cudf.Series)
245
- @dispatch.register(cudf.BaseIndex)
248
+ @dispatch.register(cudf.Index)
249
+ @dispatch.register(cudf.MultiIndex)
250
+ @dispatch.register(cudf.RangeIndex)
246
251
  def proxify_device_object_cudf_dataframe(
247
252
  obj, proxied_id_to_proxy, found_proxies, excl_proxies
248
253
  ):
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
1
4
  import abc
2
5
  import gc
3
6
  import io
@@ -64,29 +67,29 @@ class Proxies(abc.ABC):
64
67
 
65
68
  @abc.abstractmethod
66
69
  def mem_usage_add(self, proxy: ProxyObject) -> None:
67
- """Given a new proxy, update `self._mem_usage`"""
70
+ """Given a new proxy, update ``self._mem_usage``"""
68
71
 
69
72
  @abc.abstractmethod
70
73
  def mem_usage_remove(self, proxy: ProxyObject) -> None:
71
- """Removal of proxy, update `self._mem_usage`"""
74
+ """Removal of proxy, update ``self._mem_usage``"""
72
75
 
73
76
  @abc.abstractmethod
74
77
  def buffer_info(self) -> List[Tuple[float, int, List[ProxyObject]]]:
75
78
  """Return a list of buffer information
76
79
 
77
80
  The returned format is:
78
- `[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]
81
+ ``[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]``
79
82
  """
80
83
 
81
84
  def add(self, proxy: ProxyObject) -> None:
82
- """Add a proxy for tracking, calls `self.mem_usage_add`"""
85
+ """Add a proxy for tracking, calls ``self.mem_usage_add``"""
83
86
  assert not self.contains_proxy_id(id(proxy))
84
87
  with self._lock:
85
88
  self._proxy_id_to_proxy[id(proxy)] = weakref.ref(proxy)
86
89
  self.mem_usage_add(proxy)
87
90
 
88
91
  def remove(self, proxy: ProxyObject) -> None:
89
- """Remove proxy from tracking, calls `self.mem_usage_remove`"""
92
+ """Remove proxy from tracking, calls ``self.mem_usage_remove``"""
90
93
  with self._lock:
91
94
  del self._proxy_id_to_proxy[id(proxy)]
92
95
  self.mem_usage_remove(proxy)
@@ -323,13 +326,13 @@ class ProxyManager:
323
326
  assert header["serializer"] == pxy.serializer
324
327
 
325
328
  def proxify(self, obj: T, duplicate_check=True) -> Tuple[T, bool]:
326
- """Proxify `obj` and add found proxies to the `Proxies` collections
329
+ """Proxify ``obj`` and add found proxies to the ``Proxies`` collections
327
330
 
328
- Search through `obj` and wrap all CUDA device objects in ProxyObject.
331
+ Search through ``obj`` and wrap all CUDA device objects in ProxyObject.
329
332
  If duplicate_check is True, identical CUDA device objects found in
330
- `obj` are wrapped by the same ProxyObject.
333
+ ``obj`` are wrapped by the same ProxyObject.
331
334
 
332
- Returns the proxified object and a boolean, which is `True` when one or
335
+ Returns the proxified object and a boolean, which is ``True`` when one or
333
336
  more incompatible-types were found.
334
337
 
335
338
  Parameters
@@ -337,7 +340,7 @@ class ProxyManager:
337
340
  obj
338
341
  Object to search through or wrap in a ProxyObject.
339
342
  duplicate_check
340
- Make sure that identical CUDA device objects found in `obj` are
343
+ Make sure that identical CUDA device objects found in ``obj`` are
341
344
  wrapped by the same ProxyObject. This check comes with a significant
342
345
  overhead hence it is recommended setting to False when it is known
343
346
  that no duplicate exist.
@@ -380,11 +383,11 @@ class ProxyManager:
380
383
  proxies_access: Callable[[], List[Tuple[float, int, List[ProxyObject]]]],
381
384
  serializer: Callable[[ProxyObject], None],
382
385
  ) -> int:
383
- """Evict buffers retrieved by calling `proxies_access`
386
+ """Evict buffers retrieved by calling ``proxies_access``
384
387
 
385
- Calls `proxies_access` to retrieve a list of proxies and then spills
386
- enough proxies to free up at a minimum `nbytes` bytes. In order to
387
- spill a proxy, `serializer` is called.
388
+ Calls ``proxies_access`` to retrieve a list of proxies and then spills
389
+ enough proxies to free up at a minimum ``nbytes`` bytes. In order to
390
+ spill a proxy, ``serializer`` is called.
388
391
 
389
392
  Parameters
390
393
  ----------
@@ -392,7 +395,7 @@ class ProxyManager:
392
395
  Number of bytes to evict.
393
396
  proxies_access: callable
394
397
  Function that returns a list of proxies pack in a tuple like:
395
- `[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]
398
+ ``[(<access-time>, <size-of-buffer>, <list-of-proxies>), ...]``
396
399
  serializer: callable
397
400
  Function that serialize the given proxy object.
398
401
 
@@ -423,7 +426,7 @@ class ProxyManager:
423
426
  def maybe_evict_from_device(self, extra_dev_mem=0) -> None:
424
427
  """Evict buffers until total memory usage is below device-memory-limit
425
428
 
426
- Adds `extra_dev_mem` to the current total memory usage when comparing
429
+ Adds ``extra_dev_mem`` to the current total memory usage when comparing
427
430
  against device-memory-limit.
428
431
  """
429
432
  mem_over_usage = (
@@ -439,7 +442,7 @@ class ProxyManager:
439
442
  def maybe_evict_from_host(self, extra_host_mem=0) -> None:
440
443
  """Evict buffers until total memory usage is below host-memory-limit
441
444
 
442
- Adds `extra_host_mem` to the current total memory usage when comparing
445
+ Adds ``extra_host_mem`` to the current total memory usage when comparing
443
446
  against device-memory-limit.
444
447
  """
445
448
  assert self._host_memory_limit is not None
@@ -466,7 +469,7 @@ class ProxifyHostFile(MutableMapping):
466
469
  workers in Distributed.
467
470
 
468
471
  It wraps all CUDA device objects in a ProxyObject instance and maintains
469
- `device_memory_limit` by spilling ProxyObject on-the-fly. This addresses
472
+ ``device_memory_limit`` by spilling ProxyObject on-the-fly. This addresses
470
473
  some issues with the default DeviceHostFile host, which tracks device
471
474
  memory inaccurately see <https://github.com/rapidsai/dask-cuda/pull/451>
472
475
 
@@ -488,16 +491,16 @@ class ProxifyHostFile(MutableMapping):
488
491
  memory_limit: int
489
492
  Number of bytes of host memory used before spilling to disk.
490
493
  shared_filesystem: bool or None, default None
491
- Whether the `local_directory` above is shared between all workers or not.
494
+ Whether the ``local_directory`` above is shared between all workers or not.
492
495
  If ``None``, the "jit-unspill-shared-fs" config value are used, which
493
496
  defaults to False.
494
- Notice, a shared filesystem must support the `os.link()` operation.
497
+ Notice, a shared filesystem must support the ``os.link()`` operation.
495
498
  compatibility_mode: bool or None, default None
496
499
  Enables compatibility-mode, which means that items are un-proxified before
497
500
  retrieval. This makes it possible to get some of the JIT-unspill benefits
498
501
  without having to be ProxyObject compatible. In order to still allow specific
499
- ProxyObjects, set the `mark_as_explicit_proxies=True` when proxifying with
500
- `proxify_device_objects()`. If ``None``, the "jit-unspill-compatibility-mode"
502
+ ProxyObjects, set the ``mark_as_explicit_proxies=True`` when proxifying with
503
+ ``proxify_device_objects()``. If ``None``, the "jit-unspill-compatibility-mode"
501
504
  config value are used, which defaults to False.
502
505
  spill_on_demand: bool or None, default None
503
506
  Enables spilling when the RMM memory pool goes out of memory. If ``None``,
@@ -639,7 +642,7 @@ class ProxifyHostFile(MutableMapping):
639
642
  """Manually evict 1% of host limit.
640
643
 
641
644
  Dask uses this to trigger CPU-to-Disk spilling. We don't know how much
642
- we need to spill but Dask will call `evict()` repeatedly until enough
645
+ we need to spill but Dask will call ``evict()`` repeatedly until enough
643
646
  is spilled. We ask for 1% each time.
644
647
 
645
648
  Return
@@ -658,9 +661,9 @@ class ProxifyHostFile(MutableMapping):
658
661
 
659
662
  @property
660
663
  def fast(self):
661
- """Alternative access to `.evict()` used by Dask
664
+ """Alternative access to ``.evict()`` used by Dask
662
665
 
663
- Dask expects `.fast.evict()` to be available for manually triggering
666
+ Dask expects ``.fast.evict()`` to be available for manually triggering
664
667
  of CPU-to-Disk spilling.
665
668
  """
666
669
  if len(self.manager._host) == 0:
@@ -758,9 +761,9 @@ class ProxifyHostFile(MutableMapping):
758
761
 
759
762
  @classmethod
760
763
  def serialize_proxy_to_disk_inplace(cls, proxy: ProxyObject) -> None:
761
- """Serialize `proxy` to disk.
764
+ """Serialize ``proxy`` to disk.
762
765
 
763
- Avoid de-serializing if `proxy` is serialized using "dask" or
766
+ Avoid de-serializing if ``proxy`` is serialized using "dask" or
764
767
  "pickle". In this case the already serialized data is written
765
768
  directly to disk.
766
769