PyPI - dask-cuda - Versions diffs - 23.12.0a231027__tar.gz → 24.2.0a3__tar.gz - Mend

dask-cuda 23.12.0a231027tar.gz → 24.2.0a3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/MANIFEST.in RENAMED Viewed

	@@ -1 +1,2 @@
1 1	include dask_cuda/_version.py
2	+ include dask_cuda/VERSION

{dask-cuda-23.12.0a231027/dask_cuda.egg-info → dask-cuda-24.2.0a3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dask-cuda
-Version: 23.12.0a231027
+Version: 24.2.0a3
 Summary: Utilities for Dask and CUDA interactions
 Author: NVIDIA Corporation
 License: Apache-2.0
@@ -17,12 +17,11 @@ Classifier: Programming Language :: Python :: 3.10
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: dask==2023.9.2
-Requires-Dist: distributed==2023.9.2
 Requires-Dist: pynvml<11.5,>=11.0.0
 Requires-Dist: numpy>=1.21
 Requires-Dist: numba>=0.57
 Requires-Dist: pandas<1.6.0dev0,>=1.3
+Requires-Dist: rapids-dask-dependency==23.12.*,>=0.0.0a0
 Requires-Dist: zict>=2.0.0
 Provides-Extra: docs
 Requires-Dist: numpydoc>=1.1.0; extra == "docs"

dask-cuda-24.2.0a3/dask_cuda/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 24.02.00a3

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/__init__.py RENAMED Viewed

@@ -11,6 +11,7 @@ import dask.dataframe.shuffle
 import dask.dataframe.multi
 import dask.bag.core
+from ._version import __git_commit__, __version__
 from .cuda_worker import CUDAWorker
 from .explicit_comms.dataframe.shuffle import (
     get_rearrange_by_column_wrapper,
@@ -19,9 +20,6 @@ from .explicit_comms.dataframe.shuffle import (
 from .local_cuda_cluster import LocalCUDACluster
 from .proxify_device_objects import proxify_decorator, unproxify_decorator
-__version__ = "23.12.00"
-from . import compat
 # Monkey patching Dask to make use of explicit-comms when `DASK_EXPLICIT_COMMS=True`
 dask.dataframe.shuffle.rearrange_by_column = get_rearrange_by_column_wrapper(

dask-cuda-24.2.0a3/dask_cuda/_version.py ADDED Viewed

@@ -0,0 +1,20 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib.resources
+__version__ = (
+    importlib.resources.files("dask_cuda").joinpath("VERSION").read_text().strip()
+)
+__git_commit__ = ""

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/benchmarks/local_cudf_groupby.py RENAMED Viewed

@@ -139,7 +139,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
             key="Device memory limit", value=f"{format_bytes(args.device_memory_limit)}"
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
-    if args.protocol == "ucx":
+    if args.protocol in ["ucx", "ucxx"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/benchmarks/local_cudf_merge.py RENAMED Viewed

@@ -217,7 +217,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
     print_key_value(key="Frac-match", value=f"{args.frac_match}")
-    if args.protocol == "ucx":
+    if args.protocol in ["ucx", "ucxx"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/benchmarks/local_cudf_shuffle.py RENAMED Viewed

@@ -146,7 +146,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
             key="Device memory limit", value=f"{format_bytes(args.device_memory_limit)}"
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
-    if args.protocol == "ucx":
+    if args.protocol in ["ucx", "ucxx"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/benchmarks/local_cupy.py RENAMED Viewed

@@ -193,7 +193,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
     print_key_value(key="Protocol", value=f"{args.protocol}")
-    if args.protocol == "ucx":
+    if args.protocol in ["ucx", "ucxx"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/benchmarks/local_cupy_map_overlap.py RENAMED Viewed

@@ -78,7 +78,7 @@ def pretty_print_results(args, address_to_index, p2p_bw, results):
         )
     print_key_value(key="RMM Pool", value=f"{not args.disable_rmm_pool}")
     print_key_value(key="Protocol", value=f"{args.protocol}")
-    if args.protocol == "ucx":
+    if args.protocol in ["ucx", "ucxx"]:
         print_key_value(key="TCP", value=f"{args.enable_tcp_over_ucx}")
         print_key_value(key="InfiniBand", value=f"{args.enable_infiniband}")
         print_key_value(key="NVLink", value=f"{args.enable_nvlink}")

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/benchmarks/utils.py RENAMED Viewed

@@ -73,7 +73,7 @@ def parse_benchmark_args(description="Generic dask-cuda Benchmark", args_list=[]
     cluster_args.add_argument(
         "-p",
         "--protocol",
-        choices=["tcp", "ucx"],
+        choices=["tcp", "ucx", "ucxx"],
         default="tcp",
         type=str,
         help="The communication protocol to use.",

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/device_host_file.py RENAMED Viewed

@@ -17,7 +17,7 @@ from distributed.protocol import (
     serialize_bytelist,
 )
 from distributed.sizeof import safe_sizeof
-from distributed.spill import CustomFile as KeyAsStringFile
+from distributed.spill import AnyKeyFile as KeyAsStringFile
 from distributed.utils import nbytes
 from .is_device_object import is_device_object

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/initialize.py RENAMED Viewed

@@ -5,7 +5,6 @@ import click
 import numba.cuda
 import dask
-import distributed.comm.ucx
 from distributed.diagnostics.nvml import get_device_index_and_uuid, has_cuda_context
 from .utils import get_ucx_config
@@ -23,12 +22,21 @@ def _create_cuda_context_handler():
         numba.cuda.current_context()
-def _create_cuda_context():
+def _create_cuda_context(protocol="ucx"):
+    if protocol not in ["ucx", "ucxx"]:
+        return
     try:
         # Added here to ensure the parent `LocalCUDACluster` process creates the CUDA
         # context directly from the UCX module, thus avoiding a similar warning there.
         try:
-            distributed.comm.ucx.init_once()
+            if protocol == "ucx":
+                import distributed.comm.ucx
+                distributed.comm.ucx.init_once()
+            elif protocol == "ucxx":
+                import distributed_ucxx.ucxx
+                distributed_ucxx.ucxx.init_once()
         except ModuleNotFoundError:
             # UCX initialization has to be delegated to Distributed, it will take care
             # of setting correct environment variables and importing `ucp` after that.
@@ -39,20 +47,35 @@ def _create_cuda_context():
             os.environ.get("CUDA_VISIBLE_DEVICES", "0").split(",")[0]
         )
         ctx = has_cuda_context()
-        if (
-            ctx.has_context
-            and not distributed.comm.ucx.cuda_context_created.has_context
-        ):
-            distributed.comm.ucx._warn_existing_cuda_context(ctx, os.getpid())
+        if protocol == "ucx":
+            if (
+                ctx.has_context
+                and not distributed.comm.ucx.cuda_context_created.has_context
+            ):
+                distributed.comm.ucx._warn_existing_cuda_context(ctx, os.getpid())
+        elif protocol == "ucxx":
+            if (
+                ctx.has_context
+                and not distributed_ucxx.ucxx.cuda_context_created.has_context
+            ):
+                distributed_ucxx.ucxx._warn_existing_cuda_context(ctx, os.getpid())
         _create_cuda_context_handler()
-        if not distributed.comm.ucx.cuda_context_created.has_context:
-            ctx = has_cuda_context()
-            if ctx.has_context and ctx.device_info != cuda_visible_device:
-                distributed.comm.ucx._warn_cuda_context_wrong_device(
-                    cuda_visible_device, ctx.device_info, os.getpid()
-                )
+        if protocol == "ucx":
+            if not distributed.comm.ucx.cuda_context_created.has_context:
+                ctx = has_cuda_context()
+                if ctx.has_context and ctx.device_info != cuda_visible_device:
+                    distributed.comm.ucx._warn_cuda_context_wrong_device(
+                        cuda_visible_device, ctx.device_info, os.getpid()
+                    )
+        elif protocol == "ucxx":
+            if not distributed_ucxx.ucxx.cuda_context_created.has_context:
+                ctx = has_cuda_context()
+                if ctx.has_context and ctx.device_info != cuda_visible_device:
+                    distributed_ucxx.ucxx._warn_cuda_context_wrong_device(
+                        cuda_visible_device, ctx.device_info, os.getpid()
+                    )
     except Exception:
         logger.error("Unable to start CUDA Context", exc_info=True)
@@ -64,6 +87,7 @@ def initialize(
     enable_infiniband=None,
     enable_nvlink=None,
     enable_rdmacm=None,
+    protocol="ucx",
 ):
     """Create CUDA context and initialize UCX-Py, depending on user parameters.
@@ -118,7 +142,7 @@ def initialize(
     dask.config.set({"distributed.comm.ucx": ucx_config})
     if create_cuda_context:
-        _create_cuda_context()
+        _create_cuda_context(protocol=protocol)
 @click.command()
@@ -127,6 +151,12 @@ def initialize(
     default=False,
     help="Create CUDA context",
 )
+@click.option(
+    "--protocol",
+    default=None,
+    type=str,
+    help="Communication protocol, such as: 'tcp', 'tls', 'ucx' or 'ucxx'.",
+)
 @click.option(
     "--enable-tcp-over-ucx/--disable-tcp-over-ucx",
     default=False,
@@ -150,10 +180,11 @@ def initialize(
 def dask_setup(
     service,
     create_cuda_context,
+    protocol,
     enable_tcp_over_ucx,
     enable_infiniband,
     enable_nvlink,
     enable_rdmacm,
 ):
     if create_cuda_context:
-        _create_cuda_context()
+        _create_cuda_context(protocol=protocol)

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/local_cuda_cluster.py RENAMED Viewed

@@ -319,8 +319,11 @@ class LocalCUDACluster(LocalCluster):
         if enable_tcp_over_ucx or enable_infiniband or enable_nvlink:
             if protocol is None:
                 protocol = "ucx"
-            elif protocol != "ucx":
-                raise TypeError("Enabling InfiniBand or NVLink requires protocol='ucx'")
+            elif protocol not in ["ucx", "ucxx"]:
+                raise TypeError(
+                    "Enabling InfiniBand or NVLink requires protocol='ucx' or "
+                    "protocol='ucxx'"
+                )
         self.host = kwargs.get("host", None)
@@ -371,7 +374,7 @@ class LocalCUDACluster(LocalCluster):
         ) + ["dask_cuda.initialize"]
         self.new_spec["options"]["preload_argv"] = self.new_spec["options"].get(
             "preload_argv", []
-        ) + ["--create-cuda-context"]
+        ) + ["--create-cuda-context", "--protocol", protocol]
         self.cuda_visible_devices = CUDA_VISIBLE_DEVICES
         self.scale(n_workers)

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/tests/test_dgx.py RENAMED Viewed

@@ -73,10 +73,13 @@ def test_default():
     assert not p.exitcode
-def _test_tcp_over_ucx():
-    ucp = pytest.importorskip("ucp")
+def _test_tcp_over_ucx(protocol):
+    if protocol == "ucx":
+        ucp = pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        ucp = pytest.importorskip("ucxx")
-    with LocalCUDACluster(enable_tcp_over_ucx=True) as cluster:
+    with LocalCUDACluster(protocol=protocol, enable_tcp_over_ucx=True) as cluster:
         with Client(cluster) as client:
             res = da.from_array(numpy.arange(10000), chunks=(1000,))
             res = res.sum().compute()
@@ -93,10 +96,17 @@ def _test_tcp_over_ucx():
             assert all(client.run(check_ucx_options).values())
-def test_tcp_over_ucx():
-    ucp = pytest.importorskip("ucp")  # NOQA: F841
+@pytest.mark.parametrize(
+    "protocol",
+    ["ucx", "ucxx"],
+)
+def test_tcp_over_ucx(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
-    p = mp.Process(target=_test_tcp_over_ucx)
+    p = mp.Process(target=_test_tcp_over_ucx, args=(protocol,))
     p.start()
     p.join()
     assert not p.exitcode
@@ -117,9 +127,22 @@ def test_tcp_only():
     assert not p.exitcode
-def _test_ucx_infiniband_nvlink(enable_infiniband, enable_nvlink, enable_rdmacm):
+def _test_ucx_infiniband_nvlink(
+    skip_queue, protocol, enable_infiniband, enable_nvlink, enable_rdmacm
+):
     cupy = pytest.importorskip("cupy")
-    ucp = pytest.importorskip("ucp")
+    if protocol == "ucx":
+        ucp = pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        ucp = pytest.importorskip("ucxx")
+    if enable_infiniband and not any(
+        [at.startswith("rc") for at in ucp.get_active_transports()]
+    ):
+        skip_queue.put("No support available for 'rc' transport in UCX")
+        return
+    else:
+        skip_queue.put("ok")
     if enable_infiniband is None and enable_nvlink is None and enable_rdmacm is None:
         enable_tcp_over_ucx = None
@@ -135,6 +158,7 @@ def _test_ucx_infiniband_nvlink(enable_infiniband, enable_nvlink, enable_rdmacm)
             cm_tls_priority = ["tcp"]
     initialize(
+        protocol=protocol,
         enable_tcp_over_ucx=enable_tcp_over_ucx,
         enable_infiniband=enable_infiniband,
         enable_nvlink=enable_nvlink,
@@ -142,6 +166,7 @@ def _test_ucx_infiniband_nvlink(enable_infiniband, enable_nvlink, enable_rdmacm)
     )
     with LocalCUDACluster(
+        protocol=protocol,
         interface="ib0",
         enable_tcp_over_ucx=enable_tcp_over_ucx,
         enable_infiniband=enable_infiniband,
@@ -171,6 +196,7 @@ def _test_ucx_infiniband_nvlink(enable_infiniband, enable_nvlink, enable_rdmacm)
             assert all(client.run(check_ucx_options).values())
+@pytest.mark.parametrize("protocol", ["ucx", "ucxx"])
 @pytest.mark.parametrize(
     "params",
     [
@@ -185,16 +211,19 @@ def _test_ucx_infiniband_nvlink(enable_infiniband, enable_nvlink, enable_rdmacm)
     _get_dgx_version() == DGXVersion.DGX_A100,
     reason="Automatic InfiniBand device detection Unsupported for %s" % _get_dgx_name(),
 )
-def test_ucx_infiniband_nvlink(params):
-    ucp = pytest.importorskip("ucp")  # NOQA: F841
+def test_ucx_infiniband_nvlink(protocol, params):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
-    if params["enable_infiniband"]:
-        if not any([at.startswith("rc") for at in ucp.get_active_transports()]):
-            pytest.skip("No support available for 'rc' transport in UCX")
+    skip_queue = mp.Queue()
     p = mp.Process(
         target=_test_ucx_infiniband_nvlink,
         args=(
+            skip_queue,
+            protocol,
             params["enable_infiniband"],
             params["enable_nvlink"],
             params["enable_rdmacm"],
@@ -203,9 +232,8 @@ def test_ucx_infiniband_nvlink(params):
     p.start()
     p.join()
-    # Starting a new cluster on the same pytest process after an rdmacm cluster
-    # has been used may cause UCX-Py to complain about being already initialized.
-    if params["enable_rdmacm"] is True:
-        ucp.reset()
+    skip_msg = skip_queue.get()
+    if skip_msg != "ok":
+        pytest.skip(skip_msg)
     assert not p.exitcode

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/tests/test_explicit_comms.py RENAMED Viewed

@@ -44,7 +44,7 @@ def _test_local_cluster(protocol):
             assert sum(c.run(my_rank, 0)) == sum(range(4))
-@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
 def test_local_cluster(protocol):
     p = mp.Process(target=_test_local_cluster, args=(protocol,))
     p.start()
@@ -160,7 +160,7 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions):
 @pytest.mark.parametrize("nworkers", [1, 2, 3])
 @pytest.mark.parametrize("backend", ["pandas", "cudf"])
-@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
 @pytest.mark.parametrize("_partitions", [True, False])
 def test_dataframe_shuffle(backend, protocol, nworkers, _partitions):
     if backend == "cudf":
@@ -256,7 +256,7 @@ def _test_dataframe_shuffle_merge(backend, protocol, n_workers):
 @pytest.mark.parametrize("nworkers", [1, 2, 4])
 @pytest.mark.parametrize("backend", ["pandas", "cudf"])
-@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
 def test_dataframe_shuffle_merge(backend, protocol, nworkers):
     if backend == "cudf":
         pytest.importorskip("cudf")
@@ -293,7 +293,7 @@ def _test_jit_unspill(protocol):
             assert_eq(got, expected)
-@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
 def test_jit_unspill(protocol):
     pytest.importorskip("cudf")

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/tests/test_from_array.py RENAMED Viewed

@@ -5,12 +5,16 @@ from distributed import Client
 from dask_cuda import LocalCUDACluster
-pytest.importorskip("ucp")
 cupy = pytest.importorskip("cupy")
-@pytest.mark.parametrize("protocol", ["ucx", "tcp"])
+@pytest.mark.parametrize("protocol", ["ucx", "ucxx", "tcp"])
 def test_ucx_from_array(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
     N = 10_000
     with LocalCUDACluster(protocol=protocol) as cluster:
         with Client(cluster):

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/tests/test_initialize.py RENAMED Viewed

@@ -13,7 +13,6 @@ from dask_cuda.utils import get_ucx_config
 from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
 mp = mp.get_context("spawn")  # type: ignore
-ucp = pytest.importorskip("ucp")
 # Notice, all of the following tests is executed in a new process such
 # that UCX options of the different tests doesn't conflict.
@@ -21,11 +20,16 @@ ucp = pytest.importorskip("ucp")
 # of UCX before retrieving the current config.
-def _test_initialize_ucx_tcp():
+def _test_initialize_ucx_tcp(protocol):
+    if protocol == "ucx":
+        ucp = pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        ucp = pytest.importorskip("ucxx")
     kwargs = {"enable_tcp_over_ucx": True}
-    initialize(**kwargs)
+    initialize(protocol=protocol, **kwargs)
     with LocalCluster(
-        protocol="ucx",
+        protocol=protocol,
         dashboard_address=None,
         n_workers=1,
         threads_per_worker=1,
@@ -50,18 +54,29 @@ def _test_initialize_ucx_tcp():
             assert all(client.run(check_ucx_options).values())
-def test_initialize_ucx_tcp():
-    p = mp.Process(target=_test_initialize_ucx_tcp)
+@pytest.mark.parametrize("protocol", ["ucx", "ucxx"])
+def test_initialize_ucx_tcp(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
+    p = mp.Process(target=_test_initialize_ucx_tcp, args=(protocol,))
     p.start()
     p.join()
     assert not p.exitcode
-def _test_initialize_ucx_nvlink():
+def _test_initialize_ucx_nvlink(protocol):
+    if protocol == "ucx":
+        ucp = pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        ucp = pytest.importorskip("ucxx")
     kwargs = {"enable_nvlink": True}
-    initialize(**kwargs)
+    initialize(protocol=protocol, **kwargs)
     with LocalCluster(
-        protocol="ucx",
+        protocol=protocol,
         dashboard_address=None,
         n_workers=1,
         threads_per_worker=1,
@@ -87,18 +102,29 @@ def _test_initialize_ucx_nvlink():
             assert all(client.run(check_ucx_options).values())
-def test_initialize_ucx_nvlink():
-    p = mp.Process(target=_test_initialize_ucx_nvlink)
+@pytest.mark.parametrize("protocol", ["ucx", "ucxx"])
+def test_initialize_ucx_nvlink(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
+    p = mp.Process(target=_test_initialize_ucx_nvlink, args=(protocol,))
     p.start()
     p.join()
     assert not p.exitcode
-def _test_initialize_ucx_infiniband():
+def _test_initialize_ucx_infiniband(protocol):
+    if protocol == "ucx":
+        ucp = pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        ucp = pytest.importorskip("ucxx")
     kwargs = {"enable_infiniband": True}
-    initialize(**kwargs)
+    initialize(protocol=protocol, **kwargs)
     with LocalCluster(
-        protocol="ucx",
+        protocol=protocol,
         dashboard_address=None,
         n_workers=1,
         threads_per_worker=1,
@@ -127,17 +153,28 @@ def _test_initialize_ucx_infiniband():
 @pytest.mark.skipif(
     "ib0" not in psutil.net_if_addrs(), reason="Infiniband interface ib0 not found"
 )
-def test_initialize_ucx_infiniband():
-    p = mp.Process(target=_test_initialize_ucx_infiniband)
+@pytest.mark.parametrize("protocol", ["ucx", "ucxx"])
+def test_initialize_ucx_infiniband(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
+    p = mp.Process(target=_test_initialize_ucx_infiniband, args=(protocol,))
     p.start()
     p.join()
     assert not p.exitcode
-def _test_initialize_ucx_all():
-    initialize()
+def _test_initialize_ucx_all(protocol):
+    if protocol == "ucx":
+        ucp = pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        ucp = pytest.importorskip("ucxx")
+    initialize(protocol=protocol)
     with LocalCluster(
-        protocol="ucx",
+        protocol=protocol,
         dashboard_address=None,
         n_workers=1,
         threads_per_worker=1,
@@ -166,8 +203,14 @@ def _test_initialize_ucx_all():
             assert all(client.run(check_ucx_options).values())
-def test_initialize_ucx_all():
-    p = mp.Process(target=_test_initialize_ucx_all)
+@pytest.mark.parametrize("protocol", ["ucx", "ucxx"])
+def test_initialize_ucx_all(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
+    p = mp.Process(target=_test_initialize_ucx_all, args=(protocol,))
     p.start()
     p.join()
     assert not p.exitcode

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/tests/test_local_cuda_cluster.py RENAMED Viewed

@@ -87,23 +87,38 @@ async def test_with_subset_of_cuda_visible_devices():
                 }
+@pytest.mark.parametrize(
+    "protocol",
+    ["ucx", "ucxx"],
+)
 @gen_test(timeout=20)
-async def test_ucx_protocol():
-    pytest.importorskip("ucp")
+async def test_ucx_protocol(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
     async with LocalCUDACluster(
-        protocol="ucx", asynchronous=True, data=dict
+        protocol=protocol, asynchronous=True, data=dict
     ) as cluster:
         assert all(
-            ws.address.startswith("ucx://") for ws in cluster.scheduler.workers.values()
+            ws.address.startswith(f"{protocol}://")
+            for ws in cluster.scheduler.workers.values()
         )
+@pytest.mark.parametrize(
+    "protocol",
+    ["ucx", "ucxx"],
+)
 @gen_test(timeout=20)
-async def test_explicit_ucx_with_protocol_none():
-    pytest.importorskip("ucp")
+async def test_explicit_ucx_with_protocol_none(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
-    initialize(enable_tcp_over_ucx=True)
+    initialize(protocol=protocol, enable_tcp_over_ucx=True)
     async with LocalCUDACluster(
         protocol=None, enable_tcp_over_ucx=True, asynchronous=True, data=dict
     ) as cluster:
@@ -113,11 +128,18 @@ async def test_explicit_ucx_with_protocol_none():
 @pytest.mark.filterwarnings("ignore:Exception ignored in")
+@pytest.mark.parametrize(
+    "protocol",
+    ["ucx", "ucxx"],
+)
 @gen_test(timeout=20)
-async def test_ucx_protocol_type_error():
-    pytest.importorskip("ucp")
+async def test_ucx_protocol_type_error(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
-    initialize(enable_tcp_over_ucx=True)
+    initialize(protocol=protocol, enable_tcp_over_ucx=True)
     with pytest.raises(TypeError):
         async with LocalCUDACluster(
             protocol="tcp", enable_tcp_over_ucx=True, asynchronous=True, data=dict
@@ -337,6 +359,7 @@ async def test_pre_import():
 # Intentionally not using @gen_test to skip cleanup checks
+@pytest.mark.xfail(reason="https://github.com/rapidsai/dask-cuda/issues/1265")
 def test_pre_import_not_found():
     async def _test_pre_import_not_found():
         with raises_with_cause(RuntimeError, None, ImportError, None):
@@ -477,20 +500,30 @@ async def test_worker_fraction_limits():
             )
-def test_print_cluster_config(capsys):
+@pytest.mark.parametrize(
+    "protocol",
+    ["ucx", "ucxx"],
+)
+def test_print_cluster_config(capsys, protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
     pytest.importorskip("rich")
     with LocalCUDACluster(
-        n_workers=1, device_memory_limit="1B", jit_unspill=True, protocol="ucx"
+        n_workers=1, device_memory_limit="1B", jit_unspill=True, protocol=protocol
     ) as cluster:
         with Client(cluster) as client:
             print_cluster_config(client)
             captured = capsys.readouterr()
             assert "Dask Cluster Configuration" in captured.out
-            assert "ucx" in captured.out
+            assert protocol in captured.out
             assert "1 B" in captured.out
             assert "[plugin]" in captured.out
+@pytest.mark.xfail(reason="https://github.com/rapidsai/dask-cuda/issues/1265")
 def test_death_timeout_raises():
     with pytest.raises(asyncio.exceptions.TimeoutError):
         with LocalCUDACluster(

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/tests/test_proxy.py RENAMED Viewed

@@ -400,10 +400,14 @@ class _PxyObjTest(proxy_object.ProxyObject):
 @pytest.mark.parametrize("send_serializers", [None, ("dask", "pickle"), ("cuda",)])
-@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
 @gen_test(timeout=120)
 async def test_communicating_proxy_objects(protocol, send_serializers):
     """Testing serialization of cuDF dataframe when communicating"""
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
     cudf = pytest.importorskip("cudf")
     def task(x):
@@ -412,7 +416,7 @@ async def test_communicating_proxy_objects(protocol, send_serializers):
         serializers_used = x._pxy_get().serializer
         # Check that `x` is serialized with the expected serializers
-        if protocol == "ucx":
+        if protocol in ["ucx", "ucxx"]:
             if send_serializers is None:
                 assert serializers_used == "cuda"
             else:
@@ -443,11 +447,15 @@ async def test_communicating_proxy_objects(protocol, send_serializers):
             await client.submit(task, df)
-@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
 @pytest.mark.parametrize("shared_fs", [True, False])
 @gen_test(timeout=20)
 async def test_communicating_disk_objects(protocol, shared_fs):
     """Testing disk serialization of cuDF dataframe when communicating"""
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
     cudf = pytest.importorskip("cudf")
     ProxifyHostFile._spill_to_disk.shared_filesystem = shared_fs

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/tests/test_utils.py RENAMED Viewed

@@ -79,11 +79,18 @@ def test_get_device_total_memory():
             assert total_mem > 0
-def test_get_preload_options_default():
-    pytest.importorskip("ucp")
+@pytest.mark.parametrize(
+    "protocol",
+    ["ucx", "ucxx"],
+)
+def test_get_preload_options_default(protocol):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
     opts = get_preload_options(
-        protocol="ucx",
+        protocol=protocol,
         create_cuda_context=True,
     )
@@ -93,14 +100,21 @@ def test_get_preload_options_default():
     assert opts["preload_argv"] == ["--create-cuda-context"]
+@pytest.mark.parametrize(
+    "protocol",
+    ["ucx", "ucxx"],
+)
 @pytest.mark.parametrize("enable_tcp", [True, False])
 @pytest.mark.parametrize("enable_infiniband", [True, False])
 @pytest.mark.parametrize("enable_nvlink", [True, False])
-def test_get_preload_options(enable_tcp, enable_infiniband, enable_nvlink):
-    pytest.importorskip("ucp")
+def test_get_preload_options(protocol, enable_tcp, enable_infiniband, enable_nvlink):
+    if protocol == "ucx":
+        pytest.importorskip("ucp")
+    elif protocol == "ucxx":
+        pytest.importorskip("ucxx")
     opts = get_preload_options(
-        protocol="ucx",
+        protocol=protocol,
         create_cuda_context=True,
         enable_tcp_over_ucx=enable_tcp,
         enable_infiniband=enable_infiniband,

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda/utils.py RENAMED Viewed

@@ -287,7 +287,7 @@ def get_preload_options(
     if create_cuda_context:
         preload_options["preload_argv"].append("--create-cuda-context")
-    if protocol == "ucx":
+    if protocol in ["ucx", "ucxx"]:
         initialize_ucx_argv = []
         if enable_tcp_over_ucx:
             initialize_ucx_argv.append("--enable-tcp-over-ucx")
@@ -625,6 +625,10 @@ def get_worker_config(dask_worker):
         import ucp
         ret["ucx-transports"] = ucp.get_active_transports()
+    elif scheme == "ucxx":
+        import ucxx
+        ret["ucx-transports"] = ucxx.get_active_transports()
     # comm timeouts
     ret["distributed.comm.timeouts"] = dask.config.get("distributed.comm.timeouts")

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3/dask_cuda.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dask-cuda
-Version: 23.12.0a231027
+Version: 24.2.0a3
 Summary: Utilities for Dask and CUDA interactions
 Author: NVIDIA Corporation
 License: Apache-2.0
@@ -17,12 +17,11 @@ Classifier: Programming Language :: Python :: 3.10
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: dask==2023.9.2
-Requires-Dist: distributed==2023.9.2
 Requires-Dist: pynvml<11.5,>=11.0.0
 Requires-Dist: numpy>=1.21
 Requires-Dist: numba>=0.57
 Requires-Dist: pandas<1.6.0dev0,>=1.3
+Requires-Dist: rapids-dask-dependency==23.12.*,>=0.0.0a0
 Requires-Dist: zict>=2.0.0
 Provides-Extra: docs
 Requires-Dist: numpydoc>=1.1.0; extra == "docs"

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,9 +3,10 @@ MANIFEST.in
 README.md
 pyproject.toml
 setup.py
+dask_cuda/VERSION
 dask_cuda/__init__.py
+dask_cuda/_version.py
 dask_cuda/cli.py
-dask_cuda/compat.py
 dask_cuda/cuda_worker.py
 dask_cuda/device_host_file.py
 dask_cuda/disk_io.py

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/dask_cuda.egg-info/requires.txt RENAMED Viewed

@@ -1,9 +1,8 @@
-dask==2023.9.2
-distributed==2023.9.2
 pynvml<11.5,>=11.0.0
 numpy>=1.21
 numba>=0.57
 pandas<1.6.0dev0,>=1.3
+rapids-dask-dependency==23.12.*,>=0.0.0a0
 zict>=2.0.0
 [docs]

{dask-cuda-23.12.0a231027 → dask-cuda-24.2.0a3}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ requires = [
 [project]
 name = "dask-cuda"
-version = "23.12.00a231027"
+dynamic = ["version"]
 description = "Utilities for Dask and CUDA interactions"
 readme = { file = "README.md", content-type = "text/markdown" }
 authors = [
@@ -16,12 +16,11 @@ authors = [
 license = { text = "Apache-2.0" }
 requires-python = ">=3.9"
 dependencies = [
-    "dask ==2023.9.2",
-    "distributed ==2023.9.2",
     "pynvml >=11.0.0,<11.5",
     "numpy >=1.21",
     "numba >=0.57",
     "pandas >=1.3,<1.6.0dev0",
+    "rapids-dask-dependency==23.12.*,>=0.0.0a0",
     "zict >=2.0.0",
 ]
 classifiers = [
@@ -123,6 +122,9 @@ filterwarnings = [
 [tool.setuptools]
 license-files = ["LICENSE"]
+[tool.setuptools.dynamic]
+version = {file = "dask_cuda/VERSION"}
 [tool.setuptools.packages.find]
 exclude = [
     "docs",

dask-cuda-23.12.0a231027/dask_cuda/compat.py DELETED Viewed

@@ -1,118 +0,0 @@
-import pickle
-import msgpack
-from packaging.version import Version
-import dask
-import distributed
-import distributed.comm.utils
-import distributed.protocol
-from distributed.comm.utils import OFFLOAD_THRESHOLD, nbytes, offload
-from distributed.protocol.core import (
-    Serialized,
-    decompress,
-    logger,
-    merge_and_deserialize,
-    msgpack_decode_default,
-    msgpack_opts,
-)
-if Version(distributed.__version__) >= Version("2023.8.1"):
-    # Monkey-patch protocol.core.loads (and its users)
-    async def from_frames(
-        frames, deserialize=True, deserializers=None, allow_offload=True
-    ):
-        """
-        Unserialize a list of Distributed protocol frames.
-        """
-        size = False
-        def _from_frames():
-            try:
-                # Patched code
-                return loads(
-                    frames, deserialize=deserialize, deserializers=deserializers
-                )
-                # end patched code
-            except EOFError:
-                if size > 1000:
-                    datastr = "[too large to display]"
-                else:
-                    datastr = frames
-                # Aid diagnosing
-                logger.error("truncated data stream (%d bytes): %s", size, datastr)
-                raise
-        if allow_offload and deserialize and OFFLOAD_THRESHOLD:
-            size = sum(map(nbytes, frames))
-        if (
-            allow_offload
-            and deserialize
-            and OFFLOAD_THRESHOLD
-            and size > OFFLOAD_THRESHOLD
-        ):
-            res = await offload(_from_frames)
-        else:
-            res = _from_frames()
-        return res
-    def loads(frames, deserialize=True, deserializers=None):
-        """Transform bytestream back into Python value"""
-        allow_pickle = dask.config.get("distributed.scheduler.pickle")
-        try:
-            def _decode_default(obj):
-                offset = obj.get("__Serialized__", 0)
-                if offset > 0:
-                    sub_header = msgpack.loads(
-                        frames[offset],
-                        object_hook=msgpack_decode_default,
-                        use_list=False,
-                        **msgpack_opts,
-                    )
-                    offset += 1
-                    sub_frames = frames[offset : offset + sub_header["num-sub-frames"]]
-                    if deserialize:
-                        if "compression" in sub_header:
-                            sub_frames = decompress(sub_header, sub_frames)
-                        return merge_and_deserialize(
-                            sub_header, sub_frames, deserializers=deserializers
-                        )
-                    else:
-                        return Serialized(sub_header, sub_frames)
-                offset = obj.get("__Pickled__", 0)
-                if offset > 0:
-                    sub_header = msgpack.loads(frames[offset])
-                    offset += 1
-                    sub_frames = frames[offset : offset + sub_header["num-sub-frames"]]
-                    # Patched code
-                    if "compression" in sub_header:
-                        sub_frames = decompress(sub_header, sub_frames)
-                    # end patched code
-                    if allow_pickle:
-                        return pickle.loads(
-                            sub_header["pickled-obj"], buffers=sub_frames
-                        )
-                    else:
-                        raise ValueError(
-                            "Unpickle on the Scheduler isn't allowed, "
-                            "set `distributed.scheduler.pickle=true`"
-                        )
-                return msgpack_decode_default(obj)
-            return msgpack.loads(
-                frames[0], object_hook=_decode_default, use_list=False, **msgpack_opts
-            )
-        except Exception:
-            logger.critical("Failed to deserialize", exc_info=True)
-            raise
-    distributed.protocol.loads = loads
-    distributed.protocol.core.loads = loads
-    distributed.comm.utils.from_frames = from_frames