PyPI - dstack - Versions diffs - 0.18.40rc1__py3-none-any.whl → 0.18.42__py3-none-any.whl - Mend

dstack 0.18.40rc1py3-none-any.whl → 0.18.42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

tests/_internal/core/services/ssh/test_tunnel.py CHANGED Viewed

@@ -32,7 +32,9 @@ class TestSSHTunnel:
             options={"Opt1": "opt1"},
             ssh_config_path="/home/user/.ssh/config",
             port=10022,
-            ssh_proxy=SSHConnectionParams(hostname="proxy", username="test", port=10022),
+            ssh_proxies=[
+                (SSHConnectionParams(hostname="proxy", username="test", port=10022), None)
+            ],
             forwarded_sockets=[SocketPair(UnixSocket("/1"), UnixSocket("/2"))],
             reverse_forwarded_sockets=[SocketPair(UnixSocket("/1"), UnixSocket("/2"))],
         )
@@ -105,13 +107,18 @@ class TestSSHTunnel:
         )
     @pytest.mark.usefixtures("ssh_client_info")
-    def test_open_command_with_proxy(self) -> None:
+    def test_open_command_with_one_proxy(self) -> None:
         tunnel = SSHTunnel(
             destination="ubuntu@my-server",
             identity=FilePath("/home/user/.ssh/id_rsa"),
             control_sock_path="/tmp/control.sock",
             options={},
-            ssh_proxy=SSHConnectionParams(hostname="proxy", username="test", port=10022),
+            ssh_proxies=[
+                (
+                    SSHConnectionParams(hostname="proxy", username="test", port=10022),
+                    FilePath("/home/user/.ssh/proxy"),
+                )
+            ],
         )
         assert tunnel.open_command() == [
             "/usr/bin/ssh",
@@ -130,12 +137,57 @@ class TestSSHTunnel:
             "-o",
             (
                 "ProxyCommand="
-                "/usr/bin/ssh -i /home/user/.ssh/id_rsa -W %h:%p -o StrictHostKeyChecking=no"
+                "/usr/bin/ssh -i /home/user/.ssh/proxy -W %h:%p -o StrictHostKeyChecking=no"
                 " -o UserKnownHostsFile=/dev/null -p 10022 test@proxy"
             ),
             "ubuntu@my-server",
         ]
+    @pytest.mark.usefixtures("ssh_client_info")
+    def test_open_command_with_two_proxies(self) -> None:
+        tunnel = SSHTunnel(
+            destination="ubuntu@my-server",
+            identity=FilePath("/home/user/.ssh/id_rsa"),
+            control_sock_path="/tmp/control.sock",
+            options={},
+            ssh_proxies=[
+                (
+                    SSHConnectionParams(hostname="proxy1", username="test1", port=10022),
+                    None,
+                ),
+                (
+                    SSHConnectionParams(hostname="proxy2", username="test2", port=20022),
+                    FilePath("/home/user/.ssh/proxy2"),
+                ),
+            ],
+        )
+        assert tunnel.open_command() == [
+            "/usr/bin/ssh",
+            "-F",
+            "none",
+            "-i",
+            "/home/user/.ssh/id_rsa",
+            "-E",
+            f"{tunnel.temp_dir.name}/tunnel.log",
+            "-N",
+            "-f",
+            "-o",
+            "ControlMaster=auto",
+            "-S",
+            "/tmp/control.sock",
+            "-o",
+            (
+                "ProxyCommand="
+                "/usr/bin/ssh -i /home/user/.ssh/proxy2 -W %h:%p -o StrictHostKeyChecking=no"
+                " -o UserKnownHostsFile=/dev/null"
+                " -o 'ProxyCommand=/usr/bin/ssh -i /home/user/.ssh/id_rsa -W %%h:%%p"
+                " -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
+                " -p 10022 test1@proxy1'"
+                " -p 20022 test2@proxy2"
+            ),
+            "ubuntu@my-server",
+        ]
     @pytest.mark.usefixtures("ssh_client_info")
     def test_open_command_with_forwarding(self) -> None:
         tunnel = SSHTunnel(

tests/_internal/proxy/gateway/routers/test_registry.py CHANGED Viewed

@@ -48,6 +48,24 @@ def register_replica_payload(job_id: str = "xxx-xxx") -> dict:
         "ssh_host": "host.test",
         "ssh_port": 22,
         "ssh_proxy": None,
+        "ssh_head_proxy": None,
+        "ssh_head_proxy_private_key": None,
+    }
+def register_replica_payload_with_head_proxy(job_id: str = "xxx-xxx") -> dict:
+    return {
+        "job_id": job_id,
+        "app_port": 8888,
+        "ssh_host": "host.test",
+        "ssh_port": 22,
+        "ssh_proxy": None,
+        "ssh_head_proxy": {
+            "hostname": "proxy.test",
+            "username": "debian",
+            "port": 222,
+        },
+        "ssh_head_proxy_private_key": "private-key",
     }
@@ -190,13 +208,18 @@ class TestRegisterReplica:
         conf = (tmp_path / "443-test-run.gtw.test.conf").read_text()
         assert "upstream test-run" not in conf
         # register 2 replicas
-        for job_id in ("xxx-xxx", "yyy-yyy"):
-            resp = await client.post(
-                "/api/registry/test-proj/services/test-run/replicas/register",
-                json=register_replica_payload(job_id=job_id),
-            )
-            assert resp.status_code == 200
-            assert resp.json() == {"status": "ok"}
+        resp = await client.post(
+            "/api/registry/test-proj/services/test-run/replicas/register",
+            json=register_replica_payload(job_id="xxx-xxx"),
+        )
+        assert resp.status_code == 200
+        assert resp.json() == {"status": "ok"}
+        resp = await client.post(
+            "/api/registry/test-proj/services/test-run/replicas/register",
+            json=register_replica_payload_with_head_proxy(job_id="yyy-yyy"),
+        )
+        assert resp.status_code == 200
+        assert resp.json() == {"status": "ok"}
         conf = (tmp_path / "443-test-run.gtw.test.conf").read_text()
         assert "upstream test-run" in conf
         assert (m1 := re.search(r"server unix:/(.+)/replica.sock;  # replica xxx-xxx", conf))

tests/_internal/server/background/tasks/test_process_instances.py CHANGED Viewed

@@ -3,6 +3,7 @@ from contextlib import contextmanager
 from typing import Optional
 from unittest.mock import Mock, patch
+import gpuhunt
 import pytest
 from freezegun import freeze_time
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -10,6 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from dstack._internal.core.errors import BackendError
 from dstack._internal.core.models.backends.base import BackendType
 from dstack._internal.core.models.instances import (
+    Gpu,
     InstanceAvailability,
     InstanceOfferWithAvailability,
     InstanceStatus,
@@ -33,6 +35,7 @@ from dstack._internal.server.testing.common import (
     create_repo,
     create_run,
     create_user,
+    get_remote_connection_info,
 )
 from dstack._internal.utils.common import get_current_datetime
@@ -118,11 +121,6 @@ class TestCheckShim:
             repo=repo,
             user=user,
         )
-        job = await create_job(
-            session=session,
-            run=run,
-            status=JobStatus.SUBMITTED,
-        )
         instance = await create_instance(
             session, project, pool, status=InstanceStatus.PROVISIONING
@@ -131,7 +129,13 @@ class TestCheckShim:
             tzinfo=dt.timezone.utc
         ) + dt.timedelta(days=1)
         instance.health_status = "ssh connect problem"
-        instance.job = job
+        job = await create_job(
+            session=session,
+            run=run,
+            status=JobStatus.SUBMITTED,
+            instance=instance,
+        )
         await session.commit()
@@ -142,12 +146,13 @@ class TestCheckShim:
             await process_instances()
         await session.refresh(instance)
+        await session.refresh(job)
         assert instance is not None
         assert instance.status == InstanceStatus.BUSY
         assert instance.termination_deadline is None
         assert instance.health_status is None
-        assert instance.job == job
+        assert job.instance == instance
     @pytest.mark.asyncio
     @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
@@ -310,7 +315,7 @@ class TestTerminateIdleTime:
         assert instance.termination_reason == "Idle timeout"
-class TestOnPremInstanceTerminateProvisionTimeoutExpired:
+class TestSSHInstanceTerminateProvisionTimeoutExpired:
     @pytest.mark.asyncio
     @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
     async def test_terminate_by_idle_timeout(self, test_db, session: AsyncSession):
@@ -469,29 +474,63 @@ class TestTerminate:
         assert instance.status == InstanceStatus.TERMINATED
+@pytest.mark.asyncio
+@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
+@pytest.mark.usefixtures("test_db")
 class TestCreateInstance:
-    @pytest.mark.asyncio
-    @pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
-    async def test_creates_instance(self, test_db, session: AsyncSession):
+    @pytest.mark.parametrize(
+        # requested_blocks = None means `auto` (as many as possible)
+        ["cpus", "gpus", "requested_blocks", "expected_blocks"],
+        [
+            # GPU instances
+            pytest.param(32, 8, 1, 1, id="gpu-instance-no-blocks"),
+            pytest.param(32, 8, 2, 2, id="gpu-instance-four-gpu-per-block"),
+            pytest.param(32, 8, 4, 4, id="gpu-instance-two-gpus-per-block"),
+            pytest.param(32, 8, None, 8, id="gpu-instance-auto-max-gpu"),
+            pytest.param(4, 8, None, 4, id="gpu-instance-auto-max-cpu"),
+            pytest.param(8, 8, None, 8, id="gpu-instance-auto-max-cpu-and-gpu"),
+            # CPU instances
+            pytest.param(32, 0, 1, 1, id="cpu-instance-no-blocks"),
+            pytest.param(32, 0, 2, 2, id="cpu-instance-four-cpu-per-block"),
+            pytest.param(32, 0, 4, 4, id="cpu-instance-two-cpus-per-block"),
+            pytest.param(32, 0, None, 32, id="gpu-instance-auto-max-cpu"),
+        ],
+    )
+    async def test_creates_instance(
+        self,
+        session: AsyncSession,
+        cpus: int,
+        gpus: int,
+        requested_blocks: Optional[int],
+        expected_blocks: int,
+    ):
         project = await create_project(session=session)
         pool = await create_pool(session, project)
-        instance = await create_instance(session, project, pool, status=InstanceStatus.PENDING)
-        with patch(
-            "dstack._internal.server.background.tasks.process_instances.get_create_instance_offers"
-        ) as get_create_instance_offers:
+        instance = await create_instance(
+            session,
+            project,
+            pool,
+            status=InstanceStatus.PENDING,
+            total_blocks=requested_blocks,
+            busy_blocks=0,
+        )
+        with patch("dstack._internal.server.services.backends.get_project_backends") as m:
+            backend_mock = Mock()
+            m.return_value = [backend_mock]
+            backend_mock.TYPE = BackendType.AWS
+            gpu = Gpu(name="T4", memory_mib=16384, vendor=gpuhunt.AcceleratorVendor.NVIDIA)
             offer = InstanceOfferWithAvailability(
                 backend=BackendType.AWS,
                 instance=InstanceType(
                     name="instance",
-                    resources=Resources(cpus=1, memory_mib=512, spot=False, gpus=[]),
+                    resources=Resources(
+                        cpus=cpus, memory_mib=131072, spot=False, gpus=[gpu] * gpus
+                    ),
                 ),
                 region="us",
                 price=1.0,
                 availability=InstanceAvailability.AVAILABLE,
             )
-            backend_mock = Mock()
-            backend_mock.TYPE = BackendType.AWS
             backend_mock.compute.return_value.get_offers_cached.return_value = [offer]
             backend_mock.compute.return_value.create_instance.return_value = JobProvisioningData(
                 backend=offer.backend,
@@ -507,8 +546,87 @@ class TestCreateInstance:
                 dockerized=True,
                 backend_data=None,
             )
-            get_create_instance_offers.return_value = [(backend_mock, offer)]
             await process_instances()
         await session.refresh(instance)
         assert instance.status == InstanceStatus.PROVISIONING
+        assert instance.total_blocks == expected_blocks
+        assert instance.busy_blocks == 0
+@pytest.mark.asyncio
+@pytest.mark.parametrize("test_db", ["sqlite", "postgres"], indirect=True)
+@pytest.mark.usefixtures("test_db", "deploy_instance_mock")
+class TestAddSSHInstance:
+    @pytest.fixture
+    def host_info(self) -> dict:
+        return {
+            "gpu_vendor": "nvidia",
+            "gpu_name": "T4",
+            "gpu_memory": 16384,
+            "gpu_count": 1,
+            "addresses": ["192.168.100.100/24"],
+            "disk_size": 260976517120,
+            "cpus": 32,
+            "memory": 33544130560,
+        }
+    @pytest.fixture
+    def deploy_instance_mock(self, monkeypatch: pytest.MonkeyPatch, host_info: dict):
+        mock = Mock(return_value=(HealthStatus(healthy=True, reason="OK"), host_info))
+        monkeypatch.setattr(
+            "dstack._internal.server.background.tasks.process_instances._deploy_instance", mock
+        )
+        return mock
+    @pytest.mark.parametrize(
+        # requested_blocks = None means `auto` (as many as possible)
+        ["cpus", "gpus", "requested_blocks", "expected_blocks"],
+        [
+            # GPU instances
+            pytest.param(32, 8, 1, 1, id="gpu-instance-no-blocks"),
+            pytest.param(32, 8, 2, 2, id="gpu-instance-four-gpu-per-block"),
+            pytest.param(32, 8, 4, 4, id="gpu-instance-two-gpus-per-block"),
+            pytest.param(32, 8, None, 8, id="gpu-instance-auto-max-gpu"),
+            pytest.param(4, 8, None, 4, id="gpu-instance-auto-max-cpu"),
+            pytest.param(8, 8, None, 8, id="gpu-instance-auto-max-cpu-and-gpu"),
+            # CPU instances
+            pytest.param(32, 0, 1, 1, id="cpu-instance-no-blocks"),
+            pytest.param(32, 0, 2, 2, id="cpu-instance-four-cpu-per-block"),
+            pytest.param(32, 0, 4, 4, id="cpu-instance-two-cpus-per-block"),
+            pytest.param(32, 0, None, 32, id="gpu-instance-auto-max-cpu"),
+        ],
+    )
+    @pytest.mark.usefixtures("deploy_instance_mock")
+    async def test_adds_ssh_instance(
+        self,
+        session: AsyncSession,
+        host_info: dict,
+        cpus: int,
+        gpus: int,
+        requested_blocks: Optional[int],
+        expected_blocks: int,
+    ):
+        host_info["cpus"] = cpus
+        host_info["gpu_count"] = gpus
+        project = await create_project(session=session)
+        pool = await create_pool(session, project)
+        instance = await create_instance(
+            session,
+            project,
+            pool,
+            status=InstanceStatus.PENDING,
+            created_at=get_current_datetime(),
+            remote_connection_info=get_remote_connection_info(),
+            total_blocks=requested_blocks,
+            busy_blocks=0,
+        )
+        await session.commit()
+        await process_instances()
+        await session.refresh(instance)
+        assert instance.status == InstanceStatus.IDLE
+        assert instance.total_blocks == expected_blocks
+        assert instance.busy_blocks == 0

tests/_internal/server/background/tasks/test_process_metrics.py CHANGED Viewed

@@ -6,6 +6,7 @@ from freezegun import freeze_time
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
+from dstack._internal.core.models.instances import InstanceStatus
 from dstack._internal.core.models.runs import JobStatus
 from dstack._internal.core.models.users import GlobalRole, ProjectRole
 from dstack._internal.server import settings
@@ -17,8 +18,10 @@ from dstack._internal.server.models import JobMetricsPoint
 from dstack._internal.server.schemas.runner import GPUMetrics, MetricsResponse
 from dstack._internal.server.services.projects import add_project_member
 from dstack._internal.server.testing.common import (
+    create_instance,
     create_job,
     create_job_metrics_point,
+    create_pool,
     create_project,
     create_repo,
     create_run,
@@ -42,6 +45,13 @@ class TestCollectMetrics:
             session=session,
             project_id=project.id,
         )
+        pool = await create_pool(session=session, project=project)
+        instance = await create_instance(
+            session=session,
+            project=project,
+            pool=pool,
+            status=InstanceStatus.BUSY,
+        )
         run = await create_run(
             session=session,
             project=project,
@@ -53,6 +63,8 @@ class TestCollectMetrics:
             run=run,
             status=JobStatus.RUNNING,
             job_provisioning_data=get_job_provisioning_data(),
+            instance_assigned=True,
+            instance=instance,
         )
         with (
             patch("dstack._internal.server.services.runner.ssh.SSHTunnel") as SSHTunnelMock,

dstack 0.18.40rc1__py3-none-any.whl → 0.18.42__py3-none-any.whl

dstack 0.18.40rc1py3-none-any.whl → 0.18.42py3-none-any.whl