PyPI - dask-cuda - Versions diffs - 25.8.0__py3-none-any.whl → 25.10.0__py3-none-any.whl - Mend

dask-cuda 25.8.0py3-none-any.whl → 25.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

dask_cuda/GIT_COMMIT +1 -1
dask_cuda/VERSION +1 -1
dask_cuda/benchmarks/local_cudf_groupby.py +1 -1
dask_cuda/benchmarks/local_cudf_merge.py +1 -1
dask_cuda/benchmarks/local_cudf_shuffle.py +1 -1
dask_cuda/benchmarks/local_cupy.py +1 -1
dask_cuda/benchmarks/local_cupy_map_overlap.py +1 -1
dask_cuda/benchmarks/utils.py +1 -1
dask_cuda/cuda_worker.py +1 -1
dask_cuda/get_device_memory_objects.py +1 -4
dask_cuda/initialize.py +140 -121
dask_cuda/local_cuda_cluster.py +10 -25
dask_cuda/tests/test_cudf_builtin_spilling.py +3 -1
dask_cuda/tests/test_dask_setup.py +193 -0
dask_cuda/tests/test_dgx.py +16 -32
dask_cuda/tests/test_explicit_comms.py +11 -10
dask_cuda/tests/test_from_array.py +1 -5
dask_cuda/tests/test_initialize.py +230 -41
dask_cuda/tests/test_local_cuda_cluster.py +16 -62
dask_cuda/tests/test_proxify_host_file.py +9 -4
dask_cuda/tests/test_proxy.py +8 -8
dask_cuda/tests/test_spill.py +3 -3
dask_cuda/tests/test_utils.py +8 -23
dask_cuda/tests/test_worker_spec.py +5 -2
dask_cuda/utils.py +12 -66
dask_cuda/utils_test.py +0 -13
dask_cuda/worker_spec.py +7 -9
{dask_cuda-25.8.0.dist-info → dask_cuda-25.10.0.dist-info}/METADATA +11 -4
dask_cuda-25.10.0.dist-info/RECORD +63 -0
shared-actions/check_nightly_success/check-nightly-success/check.py +1 -1
dask_cuda/tests/test_rdd_ucx.py +0 -160
dask_cuda-25.8.0.dist-info/RECORD +0 -63
{dask_cuda-25.8.0.dist-info → dask_cuda-25.10.0.dist-info}/WHEEL +0 -0
{dask_cuda-25.8.0.dist-info → dask_cuda-25.10.0.dist-info}/entry_points.txt +0 -0
{dask_cuda-25.8.0.dist-info → dask_cuda-25.10.0.dist-info}/licenses/LICENSE +0 -0
{dask_cuda-25.8.0.dist-info → dask_cuda-25.10.0.dist-info}/top_level.txt +0 -0

dask_cuda/tests/test_dask_setup.py ADDED Viewed

@@ -0,0 +1,193 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+import json
+import os
+import time
+from contextlib import contextmanager
+from unittest.mock import Mock, patch
+import pytest
+from distributed import Client
+from distributed.utils import open_port
+from distributed.utils_test import popen
+from dask_cuda.initialize import dask_setup
+from dask_cuda.utils import wait_workers
+def test_dask_setup_function_with_mock_worker():
+    """Test the dask_setup function directly with mock worker."""
+    # Create a mock worker object
+    mock_worker = Mock()
+    with patch("dask_cuda.initialize._create_cuda_context") as mock_create_context:
+        # Test with create_cuda_context=True
+        # Call the underlying function directly (the Click decorator wraps the real
+        # function)
+        dask_setup.callback(
+            worker=mock_worker,
+            create_cuda_context=True,
+        )
+        mock_create_context.assert_called_once_with()
+        mock_create_context.reset_mock()
+        # Test with create_cuda_context=False
+        dask_setup.callback(
+            worker=mock_worker,
+            create_cuda_context=False,
+        )
+        mock_create_context.assert_not_called()
+@contextmanager
+def start_dask_scheduler(protocol: str, max_attempts: int = 5, timeout: int = 10):
+    """Start Dask scheduler in subprocess.
+    Attempts to start a Dask scheduler in subprocess, if the port is not available
+    retry on a different port up to a maximum of `max_attempts` attempts. The stdout
+    and stderr of the process is read to determine whether the scheduler failed to
+    bind to port or succeeded, and ensures no more than `timeout` seconds are awaited
+    for between reads.
+    This is primarily useful because UCX does not release TCP ports immediately. A
+    workaround without the need for this function is setting `UCX_TCP_CM_REUSEADDR=y`,
+    but that requires to be explicitly set when running tests, and that is not very
+    friendly.
+    Parameters
+    ----------
+    protocol: str
+        Communication protocol to use.
+    max_attempts: int
+        Maximum attempts to try to open scheduler.
+    timeout: int
+        Time to wait while reading stdout/stderr of subprocess.
+    """
+    port = open_port()
+    for _ in range(max_attempts):
+        with popen(
+            [
+                "dask",
+                "scheduler",
+                "--no-dashboard",
+                "--protocol",
+                protocol,
+                "--port",
+                str(port),
+            ],
+            capture_output=True,  # Capture stdout and stderr
+        ) as scheduler_process:
+            # Check if the scheduler process started successfully by streaming output
+            try:
+                start_time = time.monotonic()
+                while True:
+                    if time.monotonic() - start_time > timeout:
+                        raise TimeoutError("Timeout while waiting for scheduler output")
+                    line = scheduler_process.stdout.readline()
+                    if not line:
+                        break  # End of output
+                    print(
+                        line.decode(), end=""
+                    )  # Since capture_output=True, print the line here
+                    if b"Scheduler at:" in line:
+                        # Scheduler is now listening
+                        break
+                    elif b"UCXXBusyError" in line:
+                        raise Exception("UCXXBusyError detected in scheduler output")
+            except Exception:
+                port += 1
+            else:
+                yield scheduler_process, port
+                return
+    else:
+        pytest.fail(f"Failed to start dask scheduler after {max_attempts} attempts.")
+@pytest.mark.timeout(30)
+@patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
+@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucxx"])
+def test_dask_cuda_worker_cli_integration(protocol, tmp_path):
+    """Test that dask cuda worker CLI correctly passes arguments to dask_setup.
+    Verifies the end-to-end integration where the CLI tool actually launches and calls
+    dask_setup with correct args.
+    """
+    # Use pytest's tmp_path for file management
+    capture_file_path = tmp_path / "dask_setup_integration_test.json"
+    preload_file = tmp_path / "preload_capture.py"
+    # Write the preload script to tmp_path
+    preload_file.write_text(
+        f'''
+import json
+import os
+def capture_dask_setup_call(worker, create_cuda_context):
+    """Capture dask_setup arguments and write to file."""
+    result = {{
+        'worker_protocol': getattr(worker, '_protocol', 'unknown'),
+        'create_cuda_context': create_cuda_context,
+        'test_success': True
+    }}
+    # Write immediately to ensure it gets captured
+    with open(r"{capture_file_path}", 'w') as f:
+        json.dump(result, f)
+# Patch dask_setup callback
+from dask_cuda.initialize import dask_setup
+dask_setup.callback = capture_dask_setup_call
+'''
+    )
+    with start_dask_scheduler(protocol=protocol) as scheduler_process_port:
+        scheduler_process, scheduler_port = scheduler_process_port
+        sched_addr = f"{protocol}://127.0.0.1:{scheduler_port}"
+        print(f"{sched_addr=}", flush=True)
+        # Build dask cuda worker args
+        dask_cuda_worker_args = [
+            "dask",
+            "cuda",
+            "worker",
+            sched_addr,
+            "--host",
+            "127.0.0.1",
+            "--no-dashboard",
+            "--preload",
+            str(preload_file),
+            "--death-timeout",
+            "10",
+        ]
+        with popen(dask_cuda_worker_args):
+            # Wait and check for worker connection
+            with Client(sched_addr) as client:
+                assert wait_workers(client, n_gpus=1)
+                # Check if dask_setup was called and captured correctly
+                if capture_file_path.exists():
+                    with open(capture_file_path, "r") as cf:
+                        captured_args = json.load(cf)
+                    # Verify the critical arguments were passed correctly
+                    assert (
+                        captured_args["create_cuda_context"] is True
+                    ), "create_cuda_context should be True"
+                    # Verify worker has a protocol set
+                    assert (
+                        captured_args["worker_protocol"] == protocol
+                    ), "Worker should have a protocol"
+                else:
+                    pytest.fail(
+                        "capture file not found: dask_setup was not called or "
+                        "failed to write to file"
+                    )

dask_cuda/tests/test_dgx.py CHANGED Viewed

@@ -13,16 +13,11 @@ from distributed import Client
 from dask_cuda import LocalCUDACluster
 from dask_cuda.initialize import initialize
-from dask_cuda.utils_test import get_ucx_implementation
 mp = mp.get_context("spawn")  # type: ignore
 psutil = pytest.importorskip("psutil")
-def _is_ucx_116(ucp):
-    return ucp.get_ucx_version()[:2] == (1, 16)
 class DGXVersion(Enum):
     DGX_1 = auto()
     DGX_2 = auto()
@@ -81,17 +76,17 @@ def test_default():
     assert not p.exitcode
-def _test_tcp_over_ucx(protocol):
-    ucp = get_ucx_implementation(protocol)
+def _test_tcp_over_ucx():
+    ucxx = pytest.importorskip("ucxx")
-    with LocalCUDACluster(protocol=protocol, enable_tcp_over_ucx=True) as cluster:
+    with LocalCUDACluster(protocol="ucx", enable_tcp_over_ucx=True) as cluster:
         with Client(cluster) as client:
             res = da.from_array(numpy.arange(10000), chunks=(1000,))
             res = res.sum().compute()
             assert res == 49995000
             def check_ucx_options():
-                conf = ucp.get_config()
+                conf = ucxx.get_config()
                 assert "TLS" in conf
                 assert "tcp" in conf["TLS"]
                 assert "cuda_copy" in conf["TLS"]
@@ -101,16 +96,10 @@ def _test_tcp_over_ucx(protocol):
             assert all(client.run(check_ucx_options).values())
-@pytest.mark.parametrize(
-    "protocol",
-    ["ucx", "ucx-old"],
-)
-def test_tcp_over_ucx(protocol):
-    ucp = get_ucx_implementation(protocol)
-    if _is_ucx_116(ucp):
-        pytest.skip("https://github.com/rapidsai/ucx-py/issues/1037")
+def test_tcp_over_ucx():
+    pytest.importorskip("distributed_ucxx")
-    p = mp.Process(target=_test_tcp_over_ucx, args=(protocol,))
+    p = mp.Process(target=_test_tcp_over_ucx)
     p.start()
     p.join()
     assert not p.exitcode
@@ -132,22 +121,22 @@ def test_tcp_only():
 def _test_ucx_infiniband_nvlink(
-    skip_queue, protocol, enable_infiniband, enable_nvlink, enable_rdmacm
+    skip_queue, enable_infiniband, enable_nvlink, enable_rdmacm
 ):
+    ucxx = pytest.importorskip("ucxx")
     cupy = pytest.importorskip("cupy")
-    ucp = get_ucx_implementation(protocol)
     if enable_infiniband and not any(
-        [at.startswith("rc") for at in ucp.get_active_transports()]
+        [at.startswith("rc") for at in ucxx.get_active_transports()]
     ):
         skip_queue.put("No support available for 'rc' transport in UCX")
         return
     else:
         skip_queue.put("ok")
-    # `ucp.get_active_transports()` call above initializes UCX, we must reset it
+    # `ucxx.get_active_transports()` call above initializes UCX, we must reset it
     # so that Dask doesn't try to initialize it again and raise an exception.
-    ucp.reset()
+    ucxx.reset()
     if enable_infiniband is None and enable_nvlink is None and enable_rdmacm is None:
         enable_tcp_over_ucx = None
@@ -163,7 +152,6 @@ def _test_ucx_infiniband_nvlink(
             cm_tls_priority = ["tcp"]
     initialize(
-        protocol=protocol,
         enable_tcp_over_ucx=enable_tcp_over_ucx,
         enable_infiniband=enable_infiniband,
         enable_nvlink=enable_nvlink,
@@ -171,7 +159,7 @@ def _test_ucx_infiniband_nvlink(
     )
     with LocalCUDACluster(
-        protocol=protocol,
+        protocol="ucx",
         interface="ib0",
         enable_tcp_over_ucx=enable_tcp_over_ucx,
         enable_infiniband=enable_infiniband,
@@ -185,7 +173,7 @@ def _test_ucx_infiniband_nvlink(
             assert res == 49995000
             def check_ucx_options():
-                conf = ucp.get_config()
+                conf = ucxx.get_config()
                 assert "TLS" in conf
                 assert all(t in conf["TLS"] for t in cm_tls)
                 assert all(p in conf["SOCKADDR_TLS_PRIORITY"] for p in cm_tls_priority)
@@ -201,7 +189,6 @@ def _test_ucx_infiniband_nvlink(
             assert all(client.run(check_ucx_options).values())
-@pytest.mark.parametrize("protocol", ["ucx", "ucx-old"])
 @pytest.mark.parametrize(
     "params",
     [
@@ -216,10 +203,8 @@ def _test_ucx_infiniband_nvlink(
     _get_dgx_version() == DGXVersion.DGX_A100,
     reason="Automatic InfiniBand device detection Unsupported for %s" % _get_dgx_name(),
 )
-def test_ucx_infiniband_nvlink(protocol, params):
-    ucp = get_ucx_implementation(protocol)
-    if _is_ucx_116(ucp) and params["enable_infiniband"] is False:
-        pytest.skip("https://github.com/rapidsai/ucx-py/issues/1037")
+def test_ucx_infiniband_nvlink(params):
+    pytest.importorskip("distributed_ucxx")
     skip_queue = mp.Queue()
@@ -227,7 +212,6 @@ def test_ucx_infiniband_nvlink(protocol, params):
         target=_test_ucx_infiniband_nvlink,
         args=(
             skip_queue,
-            protocol,
             params["enable_infiniband"],
             params["enable_nvlink"],
             params["enable_rdmacm"],

dask_cuda/tests/test_explicit_comms.py CHANGED Viewed

@@ -26,10 +26,9 @@ from dask_cuda.explicit_comms.dataframe.shuffle import (
     _contains_shuffle_expr,
     shuffle as explicit_comms_shuffle,
 )
-from dask_cuda.utils_test import IncreasedCloseTimeoutNanny, get_ucx_implementation
+from dask_cuda.utils_test import IncreasedCloseTimeoutNanny
 mp = mp.get_context("spawn")  # type: ignore
-ucp = pytest.importorskip("ucp")
 # Notice, all of the following tests is executed in a new process such
@@ -54,10 +53,11 @@ def _test_local_cluster(protocol):
             assert sum(c.run(my_rank, 0)) == sum(range(4))
-@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucx-old"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
 def test_local_cluster(protocol):
     if protocol.startswith("ucx"):
-        get_ucx_implementation(protocol)
+        pytest.importorskip("distributed_ucxx")
     p = mp.Process(target=_test_local_cluster, args=(protocol,))
     p.start()
     p.join()
@@ -202,13 +202,13 @@ def _test_dataframe_shuffle(backend, protocol, n_workers, _partitions):
 @pytest.mark.parametrize("nworkers", [1, 2, 3])
 @pytest.mark.parametrize("backend", ["pandas", "cudf"])
-@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucx-old"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
 @pytest.mark.parametrize("_partitions", [True, False])
 def test_dataframe_shuffle(backend, protocol, nworkers, _partitions):
     if backend == "cudf":
         pytest.importorskip("cudf")
     if protocol.startswith("ucx"):
-        get_ucx_implementation(protocol)
+        pytest.importorskip("distributed_ucxx")
     p = mp.Process(
         target=_test_dataframe_shuffle, args=(backend, protocol, nworkers, _partitions)
@@ -325,12 +325,13 @@ def _test_dataframe_shuffle_merge(backend, protocol, n_workers):
 @pytest.mark.parametrize("nworkers", [1, 2, 4])
 @pytest.mark.parametrize("backend", ["pandas", "cudf"])
-@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucx-old"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
 def test_dataframe_shuffle_merge(backend, protocol, nworkers):
     if backend == "cudf":
         pytest.importorskip("cudf")
     if protocol.startswith("ucx"):
-        get_ucx_implementation(protocol)
+        pytest.importorskip("distributed_ucxx")
     p = mp.Process(
         target=_test_dataframe_shuffle_merge, args=(backend, protocol, nworkers)
     )
@@ -364,14 +365,14 @@ def _test_jit_unspill(protocol):
             assert_eq(got, expected)
-@pytest.mark.parametrize("protocol", ["tcp", "ucx", "ucx-old"])
+@pytest.mark.parametrize("protocol", ["tcp", "ucx"])
 @pytest.mark.skip_if_no_device_memory(
     "JIT-Unspill not supported in devices without dedicated memory resource"
 )
 def test_jit_unspill(protocol):
     pytest.importorskip("cudf")
     if protocol.startswith("ucx"):
-        get_ucx_implementation(protocol)
+        pytest.importorskip("distributed_ucxx")
     p = mp.Process(target=_test_jit_unspill, args=(protocol,))
     p.start()

dask_cuda/tests/test_from_array.py CHANGED Viewed

@@ -7,16 +7,12 @@ import dask.array as da
 from distributed import Client
 from dask_cuda import LocalCUDACluster
-from dask_cuda.utils_test import get_ucx_implementation
 cupy = pytest.importorskip("cupy")
-@pytest.mark.parametrize("protocol", ["ucx", "ucx-old", "tcp"])
+@pytest.mark.parametrize("protocol", ["ucx", "tcp"])
 def test_ucx_from_array(protocol):
-    if protocol.startswith("ucx"):
-        get_ucx_implementation(protocol)
     N = 10_000
     with LocalCUDACluster(protocol=protocol) as cluster:
         with Client(cluster):

dask-cuda 25.8.0__py3-none-any.whl → 25.10.0__py3-none-any.whl

dask-cuda 25.8.0py3-none-any.whl → 25.10.0py3-none-any.whl