PyPI - dstack - Versions diffs - 0.19.25rc1__py3-none-any.whl → 0.19.27__py3-none-any.whl - Mend

dstack 0.19.25rc1py3-none-any.whl → 0.19.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (161) hide show

dstack/_internal/cli/commands/__init__.py +2 -2
dstack/_internal/cli/commands/apply.py +3 -61
dstack/_internal/cli/commands/attach.py +1 -1
dstack/_internal/cli/commands/completion.py +1 -1
dstack/_internal/cli/commands/delete.py +2 -2
dstack/_internal/cli/commands/fleet.py +1 -1
dstack/_internal/cli/commands/gateway.py +2 -2
dstack/_internal/cli/commands/init.py +56 -24
dstack/_internal/cli/commands/logs.py +1 -1
dstack/_internal/cli/commands/metrics.py +1 -1
dstack/_internal/cli/commands/offer.py +45 -7
dstack/_internal/cli/commands/project.py +2 -2
dstack/_internal/cli/commands/secrets.py +2 -2
dstack/_internal/cli/commands/server.py +1 -1
dstack/_internal/cli/commands/stop.py +1 -1
dstack/_internal/cli/commands/volume.py +1 -1
dstack/_internal/cli/main.py +2 -2
dstack/_internal/cli/services/completion.py +2 -2
dstack/_internal/cli/services/configurators/__init__.py +6 -2
dstack/_internal/cli/services/configurators/base.py +6 -7
dstack/_internal/cli/services/configurators/fleet.py +1 -3
dstack/_internal/cli/services/configurators/gateway.py +2 -4
dstack/_internal/cli/services/configurators/run.py +293 -58
dstack/_internal/cli/services/configurators/volume.py +2 -4
dstack/_internal/cli/services/profile.py +1 -1
dstack/_internal/cli/services/repos.py +35 -48
dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
dstack/_internal/core/backends/aws/compute.py +6 -1
dstack/_internal/core/backends/aws/configurator.py +11 -7
dstack/_internal/core/backends/azure/configurator.py +11 -7
dstack/_internal/core/backends/base/compute.py +33 -5
dstack/_internal/core/backends/base/configurator.py +25 -13
dstack/_internal/core/backends/base/offers.py +2 -0
dstack/_internal/core/backends/cloudrift/configurator.py +13 -7
dstack/_internal/core/backends/configurators.py +15 -0
dstack/_internal/core/backends/cudo/configurator.py +11 -7
dstack/_internal/core/backends/datacrunch/compute.py +5 -1
dstack/_internal/core/backends/datacrunch/configurator.py +13 -7
dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
dstack/_internal/core/backends/digitalocean/backend.py +16 -0
dstack/_internal/core/backends/digitalocean/compute.py +5 -0
dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
dstack/_internal/core/backends/digitalocean_base/compute.py +173 -0
dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
dstack/_internal/core/backends/gcp/compute.py +32 -8
dstack/_internal/core/backends/gcp/configurator.py +11 -7
dstack/_internal/core/backends/hotaisle/api_client.py +25 -33
dstack/_internal/core/backends/hotaisle/compute.py +1 -6
dstack/_internal/core/backends/hotaisle/configurator.py +13 -7
dstack/_internal/core/backends/kubernetes/configurator.py +13 -7
dstack/_internal/core/backends/lambdalabs/configurator.py +11 -7
dstack/_internal/core/backends/models.py +7 -0
dstack/_internal/core/backends/nebius/compute.py +1 -8
dstack/_internal/core/backends/nebius/configurator.py +11 -7
dstack/_internal/core/backends/nebius/resources.py +21 -11
dstack/_internal/core/backends/oci/compute.py +4 -5
dstack/_internal/core/backends/oci/configurator.py +11 -7
dstack/_internal/core/backends/runpod/configurator.py +11 -7
dstack/_internal/core/backends/template/configurator.py.jinja +11 -7
dstack/_internal/core/backends/tensordock/configurator.py +13 -7
dstack/_internal/core/backends/vastai/configurator.py +11 -7
dstack/_internal/core/backends/vultr/compute.py +1 -5
dstack/_internal/core/backends/vultr/configurator.py +11 -4
dstack/_internal/core/compatibility/fleets.py +5 -0
dstack/_internal/core/compatibility/gpus.py +13 -0
dstack/_internal/core/compatibility/runs.py +9 -1
dstack/_internal/core/models/backends/base.py +5 -1
dstack/_internal/core/models/common.py +3 -3
dstack/_internal/core/models/configurations.py +191 -32
dstack/_internal/core/models/files.py +1 -1
dstack/_internal/core/models/fleets.py +80 -3
dstack/_internal/core/models/profiles.py +41 -11
dstack/_internal/core/models/resources.py +46 -42
dstack/_internal/core/models/runs.py +28 -5
dstack/_internal/core/services/configs/__init__.py +6 -3
dstack/_internal/core/services/profiles.py +2 -2
dstack/_internal/core/services/repos.py +86 -79
dstack/_internal/core/services/ssh/ports.py +1 -1
dstack/_internal/proxy/lib/deps.py +6 -2
dstack/_internal/server/app.py +22 -17
dstack/_internal/server/background/tasks/process_fleets.py +109 -13
dstack/_internal/server/background/tasks/process_gateways.py +4 -1
dstack/_internal/server/background/tasks/process_instances.py +22 -73
dstack/_internal/server/background/tasks/process_probes.py +1 -1
dstack/_internal/server/background/tasks/process_running_jobs.py +12 -4
dstack/_internal/server/background/tasks/process_runs.py +3 -1
dstack/_internal/server/background/tasks/process_submitted_jobs.py +67 -44
dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
dstack/_internal/server/background/tasks/process_volumes.py +1 -1
dstack/_internal/server/db.py +8 -4
dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
dstack/_internal/server/models.py +6 -2
dstack/_internal/server/routers/gpus.py +1 -6
dstack/_internal/server/schemas/runner.py +11 -0
dstack/_internal/server/services/backends/__init__.py +14 -8
dstack/_internal/server/services/backends/handlers.py +6 -1
dstack/_internal/server/services/docker.py +5 -5
dstack/_internal/server/services/fleets.py +37 -38
dstack/_internal/server/services/gateways/__init__.py +2 -0
dstack/_internal/server/services/gateways/client.py +5 -2
dstack/_internal/server/services/gateways/connection.py +1 -1
dstack/_internal/server/services/gpus.py +50 -49
dstack/_internal/server/services/instances.py +44 -4
dstack/_internal/server/services/jobs/__init__.py +15 -4
dstack/_internal/server/services/jobs/configurators/base.py +53 -17
dstack/_internal/server/services/jobs/configurators/dev.py +9 -4
dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +6 -8
dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +7 -9
dstack/_internal/server/services/jobs/configurators/service.py +1 -3
dstack/_internal/server/services/jobs/configurators/task.py +3 -3
dstack/_internal/server/services/locking.py +5 -5
dstack/_internal/server/services/logging.py +10 -2
dstack/_internal/server/services/logs/__init__.py +8 -6
dstack/_internal/server/services/logs/aws.py +330 -327
dstack/_internal/server/services/logs/filelog.py +7 -6
dstack/_internal/server/services/logs/gcp.py +141 -139
dstack/_internal/server/services/plugins.py +1 -1
dstack/_internal/server/services/projects.py +2 -5
dstack/_internal/server/services/proxy/repo.py +5 -1
dstack/_internal/server/services/requirements/__init__.py +0 -0
dstack/_internal/server/services/requirements/combine.py +259 -0
dstack/_internal/server/services/runner/client.py +7 -0
dstack/_internal/server/services/runs.py +17 -1
dstack/_internal/server/services/services/__init__.py +8 -2
dstack/_internal/server/services/services/autoscalers.py +2 -0
dstack/_internal/server/services/ssh.py +2 -1
dstack/_internal/server/services/storage/__init__.py +5 -6
dstack/_internal/server/services/storage/gcs.py +49 -49
dstack/_internal/server/services/storage/s3.py +52 -52
dstack/_internal/server/statics/index.html +1 -1
dstack/_internal/server/statics/{main-d151b300fcac3933213d.js → main-4eecc75fbe64067eb1bc.js} +1146 -899
dstack/_internal/server/statics/{main-d151b300fcac3933213d.js.map → main-4eecc75fbe64067eb1bc.js.map} +1 -1
dstack/_internal/server/statics/{main-aec4762350e34d6fbff9.css → main-56191c63d516fd0041c4.css} +1 -1
dstack/_internal/server/testing/common.py +7 -4
dstack/_internal/server/utils/logging.py +3 -3
dstack/_internal/server/utils/provisioning.py +3 -3
dstack/_internal/utils/json_schema.py +3 -1
dstack/_internal/utils/path.py +8 -1
dstack/_internal/utils/ssh.py +7 -0
dstack/_internal/utils/typing.py +14 -0
dstack/api/_public/repos.py +62 -8
dstack/api/_public/runs.py +19 -8
dstack/api/server/__init__.py +17 -19
dstack/api/server/_gpus.py +2 -1
dstack/api/server/_group.py +4 -3
dstack/api/server/_repos.py +20 -3
dstack/plugins/builtin/rest_plugin/_plugin.py +1 -0
dstack/version.py +1 -1
{dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/METADATA +2 -2
{dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/RECORD +160 -142
dstack/api/huggingface/__init__.py +0 -73
{dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/WHEEL +0 -0
{dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/entry_points.txt +0 -0
{dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/licenses/LICENSE.md +0 -0

dstack/_internal/server/services/gateways/__init__.py CHANGED Viewed

@@ -93,6 +93,8 @@ async def create_gateway_compute(
     backend_id: Optional[uuid.UUID] = None,
 ) -> GatewayComputeModel:
     assert isinstance(backend_compute, ComputeWithGatewaySupport)
+    assert configuration.name is not None
     private_bytes, public_bytes = generate_rsa_key_pair_bytes()
     gateway_ssh_private_key = private_bytes.decode()
     gateway_ssh_public_key = public_bytes.decode()

dstack/_internal/server/services/gateways/client.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pydantic import parse_obj_as
 from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
 from dstack._internal.core.errors import GatewayError
-from dstack._internal.core.models.configurations import RateLimit, ServiceConfiguration
+from dstack._internal.core.models.configurations import RateLimit
 from dstack._internal.core.models.instances import SSHConnectionParams
 from dstack._internal.core.models.runs import JobSpec, JobSubmission, Run, get_service_port
 from dstack._internal.proxy.gateway.schemas.stats import ServiceStats
@@ -85,7 +85,7 @@ class GatewayClient:
         ssh_head_proxy: Optional[SSHConnectionParams],
         ssh_head_proxy_private_key: Optional[str],
     ):
-        assert isinstance(run.run_spec.configuration, ServiceConfiguration)
+        assert run.run_spec.configuration.type == "service"
         payload = {
             "job_id": job_submission.id.hex,
             "app_port": get_service_port(job_spec, run.run_spec.configuration),
@@ -93,6 +93,9 @@ class GatewayClient:
             "ssh_head_proxy_private_key": ssh_head_proxy_private_key,
         }
         jpd = job_submission.job_provisioning_data
+        assert jpd is not None
+        assert jpd.hostname is not None
+        assert jpd.ssh_port is not None
         if not jpd.dockerized:
             payload.update(
                 {

dstack/_internal/server/services/gateways/connection.py CHANGED Viewed

@@ -67,7 +67,7 @@ class GatewayConnection:
             # reverse_forwarded_sockets are added later in .open()
         )
         self.tunnel_id = uuid.uuid4()
-        self._client = GatewayClient(uds=self.gateway_socket_path)
+        self._client = GatewayClient(uds=str(self.gateway_socket_path))
     @staticmethod
     def _init_symlink_dir(connection_dir: Path) -> Tuple[TemporaryDirectory, Path]:

dstack/_internal/server/services/gpus.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from typing import Dict, List, Literal, Optional, Tuple
-from sqlalchemy.ext.asyncio import AsyncSession
 from dstack._internal.core.backends.base.backend import Backend
+from dstack._internal.core.errors import ServerClientError
+from dstack._internal.core.models.backends.base import BackendType
 from dstack._internal.core.models.instances import InstanceOfferWithAvailability
 from dstack._internal.core.models.profiles import SpotPolicy
 from dstack._internal.core.models.resources import Range
@@ -15,10 +15,43 @@ from dstack._internal.server.schemas.gpus import (
     ListGpusResponse,
 )
 from dstack._internal.server.services.offers import get_offers_by_requirements
+from dstack._internal.utils.common import get_or_error
+async def list_gpus_grouped(
+    project: ProjectModel,
+    run_spec: RunSpec,
+    group_by: Optional[List[Literal["backend", "region", "count"]]] = None,
+) -> ListGpusResponse:
+    """Retrieves available GPU specifications based on a run spec, with optional grouping."""
+    offers = await _get_gpu_offers(project=project, run_spec=run_spec)
+    backend_gpus = _process_offers_into_backend_gpus(offers)
+    group_by_set = set(group_by) if group_by else set()
+    if "region" in group_by_set and "backend" not in group_by_set:
+        raise ServerClientError("Cannot group by 'region' without also grouping by 'backend'")
+    # Determine grouping strategy based on combination
+    has_backend = "backend" in group_by_set
+    has_region = "region" in group_by_set
+    has_count = "count" in group_by_set
+    if has_backend and has_region and has_count:
+        gpus = _get_gpus_grouped_by_backend_region_and_count(backend_gpus)
+    elif has_backend and has_count:
+        gpus = _get_gpus_grouped_by_backend_and_count(backend_gpus)
+    elif has_backend and has_region:
+        gpus = _get_gpus_grouped_by_backend_and_region(backend_gpus)
+    elif has_backend:
+        gpus = _get_gpus_grouped_by_backend(backend_gpus)
+    elif has_count:
+        gpus = _get_gpus_grouped_by_count(backend_gpus)
+    else:
+        gpus = _get_gpus_with_no_grouping(backend_gpus)
+    return ListGpusResponse(gpus=gpus)
 async def _get_gpu_offers(
-    session: AsyncSession, project: ProjectModel, run_spec: RunSpec
+    project: ProjectModel, run_spec: RunSpec
 ) -> List[Tuple[Backend, InstanceOfferWithAvailability]]:
     """Fetches all available instance offers that match the run spec's GPU requirements."""
     profile = run_spec.merged_profile
@@ -28,7 +61,6 @@ async def _get_gpu_offers(
         spot=get_policy_map(profile.spot_policy, default=SpotPolicy.AUTO),
         reservation=profile.reservation,
     )
     return await get_offers_by_requirements(
         project=project,
         profile=profile,
@@ -45,10 +77,10 @@ def _process_offers_into_backend_gpus(
     offers: List[Tuple[Backend, InstanceOfferWithAvailability]],
 ) -> List[BackendGpus]:
     """Transforms raw offers into a structured list of BackendGpus, aggregating GPU info."""
-    backend_data: Dict[str, Dict] = {}
+    backend_data: Dict[BackendType, Dict] = {}
-    for backend, offer in offers:
-        backend_type = backend.TYPE
+    for _, offer in offers:
+        backend_type = offer.backend
         if backend_type not in backend_data:
             backend_data[backend_type] = {"gpus": {}, "regions": set()}
@@ -111,7 +143,7 @@ def _process_offers_into_backend_gpus(
     return backend_gpus_list
-def _update_gpu_group(row: GpuGroup, gpu: BackendGpu, backend_type: str):
+def _update_gpu_group(row: GpuGroup, gpu: BackendGpu, backend_type: BackendType):
     """Updates an existing GpuGroup with new data from another GPU offer."""
     spot_type: Literal["spot", "on-demand"] = "spot" if gpu.spot else "on-demand"
@@ -122,6 +154,12 @@ def _update_gpu_group(row: GpuGroup, gpu: BackendGpu, backend_type: str):
     if row.backends and backend_type not in row.backends:
         row.backends.append(backend_type)
+    # FIXME: Consider using non-optional range
+    assert row.count.min is not None
+    assert row.count.max is not None
+    assert row.price.min is not None
+    assert row.price.max is not None
     row.count.min = min(row.count.min, gpu.count)
     row.count.max = max(row.count.max, gpu.count)
     per_gpu_price = gpu.price / gpu.count
@@ -194,7 +232,7 @@ def _get_gpus_grouped_by_backend(backend_gpus: List[BackendGpus]) -> List[GpuGro
             not any(av.is_available() for av in g.availability),
             g.price.min,
             g.price.max,
-            g.backend.value,
+            get_or_error(g.backend).value,
             g.name,
             g.memory_mib,
         ),
@@ -229,7 +267,7 @@ def _get_gpus_grouped_by_backend_and_region(backend_gpus: List[BackendGpus]) ->
             not any(av.is_available() for av in g.availability),
             g.price.min,
             g.price.max,
-            g.backend.value,
+            get_or_error(g.backend).value,
             g.region,
             g.name,
             g.memory_mib,
@@ -299,7 +337,7 @@ def _get_gpus_grouped_by_backend_and_count(backend_gpus: List[BackendGpus]) -> L
             not any(av.is_available() for av in g.availability),
             g.price.min,
             g.price.max,
-            g.backend.value,
+            get_or_error(g.backend).value,
             g.count.min,
             g.name,
             g.memory_mib,
@@ -344,47 +382,10 @@ def _get_gpus_grouped_by_backend_region_and_count(
             not any(av.is_available() for av in g.availability),
             g.price.min,
             g.price.max,
-            g.backend.value,
+            get_or_error(g.backend).value,
             g.region,
             g.count.min,
             g.name,
             g.memory_mib,
         ),
     )
-async def list_gpus_grouped(
-    session: AsyncSession,
-    project: ProjectModel,
-    run_spec: RunSpec,
-    group_by: Optional[List[Literal["backend", "region", "count"]]] = None,
-) -> ListGpusResponse:
-    """Retrieves available GPU specifications based on a run spec, with optional grouping."""
-    offers = await _get_gpu_offers(session, project, run_spec)
-    backend_gpus = _process_offers_into_backend_gpus(offers)
-    group_by_set = set(group_by) if group_by else set()
-    if "region" in group_by_set and "backend" not in group_by_set:
-        from dstack._internal.core.errors import ServerClientError
-        raise ServerClientError("Cannot group by 'region' without also grouping by 'backend'")
-    # Determine grouping strategy based on combination
-    has_backend = "backend" in group_by_set
-    has_region = "region" in group_by_set
-    has_count = "count" in group_by_set
-    if has_backend and has_region and has_count:
-        gpus = _get_gpus_grouped_by_backend_region_and_count(backend_gpus)
-    elif has_backend and has_count:
-        gpus = _get_gpus_grouped_by_backend_and_count(backend_gpus)
-    elif has_backend and has_region:
-        gpus = _get_gpus_grouped_by_backend_and_region(backend_gpus)
-    elif has_backend:
-        gpus = _get_gpus_grouped_by_backend(backend_gpus)
-    elif has_count:
-        gpus = _get_gpus_grouped_by_count(backend_gpus)
-    else:
-        gpus = _get_gpus_with_no_grouping(backend_gpus)
-    return ListGpusResponse(gpus=gpus)

dstack/_internal/server/services/instances.py CHANGED Viewed

@@ -39,6 +39,7 @@ from dstack._internal.core.models.profiles import (
 from dstack._internal.core.models.runs import JobProvisioningData, Requirements
 from dstack._internal.core.models.volumes import Volume
 from dstack._internal.core.services.profiles import get_termination
+from dstack._internal.server import settings as server_settings
 from dstack._internal.server.models import (
     FleetModel,
     InstanceHealthCheckModel,
@@ -47,9 +48,11 @@ from dstack._internal.server.models import (
     UserModel,
 )
 from dstack._internal.server.schemas.health.dcgm import DCGMHealthResponse
-from dstack._internal.server.schemas.runner import InstanceHealthResponse
+from dstack._internal.server.schemas.runner import InstanceHealthResponse, TaskStatus
+from dstack._internal.server.services.logging import fmt
 from dstack._internal.server.services.offers import generate_shared_offer
 from dstack._internal.server.services.projects import list_user_project_models
+from dstack._internal.server.services.runner.client import ShimClient
 from dstack._internal.utils import common as common_utils
 from dstack._internal.utils.logging import get_logger
@@ -510,10 +513,10 @@ async def list_active_remote_instances(
     return instance_models
-async def create_instance_model(
+def create_instance_model(
     session: AsyncSession,
     project: ProjectModel,
-    user: UserModel,
+    username: str,
     profile: Profile,
     requirements: Requirements,
     instance_name: str,
@@ -533,7 +536,7 @@ async def create_instance_model(
     instance_config = InstanceConfiguration(
         project_name=project.name,
         instance_name=instance_name,
-        user=user.name,
+        user=username,
         ssh_keys=[project_ssh_key],
         instance_id=str(instance_id),
         reservation=reservation,
@@ -633,3 +636,40 @@ async def create_ssh_instance_model(
         busy_blocks=0,
     )
     return im
+def remove_dangling_tasks_from_instance(shim_client: ShimClient, instance: InstanceModel) -> None:
+    if not shim_client.is_api_v2_supported():
+        return
+    assigned_to_instance_job_ids = {str(j.id) for j in instance.jobs}
+    task_list_response = shim_client.list_tasks()
+    tasks: list[tuple[str, Optional[TaskStatus]]]
+    if task_list_response.tasks is not None:
+        tasks = [(t.id, t.status) for t in task_list_response.tasks]
+    elif task_list_response.ids is not None:
+        # compatibility with pre-0.19.26 shim
+        tasks = [(t_id, None) for t_id in task_list_response.ids]
+    else:
+        raise ValueError("Unexpected task list response, neither `tasks` nor `ids` is set")
+    for task_id, task_status in tasks:
+        if task_id in assigned_to_instance_job_ids:
+            continue
+        should_terminate = task_status != TaskStatus.TERMINATED
+        should_remove = not server_settings.SERVER_KEEP_SHIM_TASKS
+        if not (should_terminate or should_remove):
+            continue
+        logger.warning(
+            "%s: dangling task found, id=%s, status=%s. Terminating and/or removing",
+            fmt(instance),
+            task_id,
+            task_status or "<unknown>",
+        )
+        if should_terminate:
+            shim_client.terminate_task(
+                task_id=task_id,
+                reason=None,
+                message=None,
+                timeout=0,
+            )
+        if should_remove:
+            shim_client.remove_task(task_id=task_id)

dstack/_internal/server/services/jobs/__init__.py CHANGED Viewed

@@ -256,7 +256,16 @@ async def process_terminating_job(
     if jpd is not None:
         logger.debug("%s: stopping container", fmt(job_model))
         ssh_private_keys = get_instance_ssh_private_keys(instance_model)
-        await stop_container(job_model, jpd, ssh_private_keys)
+        if not await stop_container(job_model, jpd, ssh_private_keys):
+            # The dangling container can be removed later during instance processing
+            logger.warning(
+                (
+                    "%s: could not stop container, possibly due to a communication error."
+                    " See debug logs for details."
+                    " Ignoring, can attempt to remove the container later"
+                ),
+                fmt(job_model),
+            )
         if jrd is not None and jrd.volume_names is not None:
             volume_names = jrd.volume_names
         else:
@@ -378,21 +387,22 @@ async def stop_container(
     job_model: JobModel,
     job_provisioning_data: JobProvisioningData,
     ssh_private_keys: tuple[str, Optional[str]],
-):
+) -> bool:
     if job_provisioning_data.dockerized:
         # send a request to the shim to terminate the docker container
         # SSHError and RequestException are caught in the `runner_ssh_tunner` decorator
-        await run_async(
+        return await run_async(
             _shim_submit_stop,
             ssh_private_keys,
             job_provisioning_data,
             None,
             job_model,
         )
+    return True
 @runner_ssh_tunnel(ports=[DSTACK_SHIM_HTTP_PORT])
-def _shim_submit_stop(ports: Dict[int, int], job_model: JobModel):
+def _shim_submit_stop(ports: Dict[int, int], job_model: JobModel) -> bool:
     shim_client = client.ShimClient(port=ports[DSTACK_SHIM_HTTP_PORT])
     resp = shim_client.healthcheck()
@@ -418,6 +428,7 @@ def _shim_submit_stop(ports: Dict[int, int], job_model: JobModel):
             shim_client.remove_task(task_id=job_model.id)
     else:
         shim_client.stop(force=True)
+    return True
 def group_jobs_by_replica_latest(jobs: List[JobModel]) -> Iterable[Tuple[int, List[JobModel]]]:

dstack/_internal/server/services/jobs/configurators/base.py CHANGED Viewed

@@ -3,7 +3,7 @@ import sys
 import threading
 from abc import ABC, abstractmethod
 from pathlib import PurePosixPath
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional
 from cachetools import TTLCache, cached
@@ -16,7 +16,7 @@ from dstack._internal.core.models.configurations import (
     DEFAULT_PROBE_READY_AFTER,
     DEFAULT_PROBE_TIMEOUT,
     DEFAULT_PROBE_URL,
-    DEFAULT_REPO_DIR,
+    LEGACY_REPO_DIR,
     PortMapping,
     ProbeConfig,
     PythonVersion,
@@ -45,6 +45,14 @@ from dstack._internal.server.services.docker import ImageConfig, get_image_confi
 from dstack._internal.utils import crypto
 from dstack._internal.utils.common import run_async
 from dstack._internal.utils.interpolator import InterpolatorError, VariablesInterpolator
+from dstack._internal.utils.logging import get_logger
+from dstack._internal.utils.path import is_absolute_posix_path
+logger = get_logger(__name__)
+DSTACK_DIR = "/dstack"
+DSTACK_PROFILE_PATH = f"{DSTACK_DIR}/profile"
 def get_default_python_verison() -> str:
@@ -160,6 +168,7 @@ class JobConfigurator(ABC):
             ssh_key=self._ssh_key(jobs_per_replica),
             repo_data=self.run_spec.repo_data,
             repo_code_hash=self.run_spec.repo_code_hash,
+            repo_dir=self._repo_dir(),
             file_archives=self.run_spec.file_archives,
             service_port=self._service_port(),
             probes=self._probes(),
@@ -179,6 +188,7 @@ class JobConfigurator(ABC):
     async def _commands(self) -> List[str]:
         if self.run_spec.configuration.entrypoint is not None:  # docker-like format
+            assert self.run_spec.configuration.type != "dev-environment"
             entrypoint = shlex.split(self.run_spec.configuration.entrypoint)
             commands = self.run_spec.configuration.commands
         elif shell_commands := self._shell_commands():
@@ -208,9 +218,17 @@ class JobConfigurator(ABC):
         ):
             return []
         return [
-            f"uv venv --python {self._python()} --prompt workflow --seed {DEFAULT_REPO_DIR}/.venv > /dev/null 2>&1",
-            f"echo 'source {DEFAULT_REPO_DIR}/.venv/bin/activate' >> ~/.bashrc",
-            f"source {DEFAULT_REPO_DIR}/.venv/bin/activate",
+            # `uv` may emit:
+            # > warning: `VIRTUAL_ENV=/dstack/venv` does not match the project environment path
+            # > `.venv` and will be ignored; use `--active` to target the active environment
+            # > instead
+            # Safe to ignore, reusing dstack's venv for `uv` is discouraged (it should only be
+            # used for legacy `pip`-based configurations). `--no-active` suppresses the warning.
+            # Alternatively, the user can call `deactivate` once before using `uv`.
+            # If the user really wants to reuse dstack's venv, they must spefify `--active`.
+            f"uv venv -q --prompt dstack -p {self._python()} --seed {DSTACK_DIR}/venv",
+            f"echo '. {DSTACK_DIR}/venv/bin/activate' >> {DSTACK_PROFILE_PATH}",
+            f". {DSTACK_DIR}/venv/bin/activate",
         ]
     def _app_specs(self) -> List[AppSpec]:
@@ -258,19 +276,17 @@ class JobConfigurator(ABC):
         return self.run_spec.configuration.single_branch
     def _max_duration(self) -> Optional[int]:
-        if self.run_spec.merged_profile.max_duration in [None, True]:
+        if self.run_spec.merged_profile.max_duration is None:
             return self._default_max_duration()
-        if self.run_spec.merged_profile.max_duration in ["off", False]:
+        if self.run_spec.merged_profile.max_duration == "off":
             return None
-        # pydantic validator ensures this is int
         return self.run_spec.merged_profile.max_duration
     def _stop_duration(self) -> Optional[int]:
-        if self.run_spec.merged_profile.stop_duration in [None, True]:
+        if self.run_spec.merged_profile.stop_duration is None:
             return DEFAULT_STOP_DURATION
-        if self.run_spec.merged_profile.stop_duration in ["off", False]:
+        if self.run_spec.merged_profile.stop_duration == "off":
             return None
-        # pydantic validator ensures this is int
         return self.run_spec.merged_profile.stop_duration
     def _utilization_policy(self) -> Optional[UtilizationPolicy]:
@@ -291,11 +307,34 @@ class JobConfigurator(ABC):
     def _retry(self) -> Optional[Retry]:
         return get_retry(self.run_spec.merged_profile)
+    def _repo_dir(self) -> str:
+        """
+        Returns absolute or relative path
+        """
+        repo_dir = self.run_spec.repo_dir
+        if repo_dir is None:
+            return LEGACY_REPO_DIR
+        return repo_dir
     def _working_dir(self) -> Optional[str]:
         """
-        None means default working directory
+        Returns path or None
+        None means the default working directory taken from the image
+        Currently, for compatibility with pre-0.19.27 runners, the path may be relative.
+        Future versions should return only absolute paths
         """
-        return self.run_spec.working_dir
+        working_dir = self.run_spec.configuration.working_dir
+        if working_dir is None:
+            return working_dir
+        # Return a relative path if possible
+        if is_absolute_posix_path(working_dir):
+            try:
+                return str(PurePosixPath(working_dir).relative_to(LEGACY_REPO_DIR))
+            except ValueError:
+                pass
+        return working_dir
     def _python(self) -> str:
         if self.run_spec.configuration.python is not None:
@@ -328,7 +367,7 @@ class JobConfigurator(ABC):
 def interpolate_job_volumes(
-    run_volumes: List[Union[MountPoint, str]],
+    run_volumes: List[MountPoint],
     job_num: int,
 ) -> List[MountPoint]:
     if len(run_volumes) == 0:
@@ -343,9 +382,6 @@ def interpolate_job_volumes(
     )
     job_volumes = []
     for mount_point in run_volumes:
-        if isinstance(mount_point, str):
-            # pydantic validator ensures strings are converted to MountPoint
-            continue
         if not isinstance(mount_point, VolumeMountPoint):
             job_volumes.append(mount_point.copy())
             continue

dstack/_internal/server/services/jobs/configurators/dev.py CHANGED Viewed

@@ -9,8 +9,8 @@ from dstack._internal.server.services.jobs.configurators.extensions.cursor impor
 from dstack._internal.server.services.jobs.configurators.extensions.vscode import VSCodeDesktop
 INSTALL_IPYKERNEL = (
-    "(echo pip install ipykernel... && pip install -q --no-cache-dir ipykernel 2> /dev/null) || "
-    'echo "no pip, ipykernel was not installed"'
+    "(echo 'pip install ipykernel...' && pip install -q --no-cache-dir ipykernel 2> /dev/null) || "
+    "echo 'no pip, ipykernel was not installed'"
 )
@@ -18,6 +18,8 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
     TYPE: RunConfigurationType = RunConfigurationType.DEV_ENVIRONMENT
     def __init__(self, run_spec: RunSpec, secrets: Dict[str, str]):
+        assert run_spec.configuration.type == "dev-environment"
         if run_spec.configuration.ide == "vscode":
             __class = VSCodeDesktop
         elif run_spec.configuration.ide == "cursor":
@@ -32,15 +34,17 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
         super().__init__(run_spec=run_spec, secrets=secrets)
     def _shell_commands(self) -> List[str]:
+        assert self.run_spec.configuration.type == "dev-environment"
         commands = self.ide.get_install_commands()
         commands.append(INSTALL_IPYKERNEL)
         commands += self.run_spec.configuration.setup
-        commands.append("echo ''")
+        commands.append("echo")
         commands += self.run_spec.configuration.init
         commands += self.ide.get_print_readme_commands()
         commands += [
             f"echo 'To connect via SSH, use: `ssh {self.run_spec.run_name}`'",
-            "echo ''",
+            "echo",
             "echo -n 'To exit, press Ctrl+C.'",
         ]
         commands += ["tail -f /dev/null"]  # idle
@@ -56,4 +60,5 @@ class DevEnvironmentJobConfigurator(JobConfigurator):
         return self.run_spec.merged_profile.spot_policy or SpotPolicy.ONDEMAND
     def _ports(self) -> List[PortMapping]:
+        assert self.run_spec.configuration.type == "dev-environment"
         return self.run_spec.configuration.ports

dstack/_internal/server/services/jobs/configurators/extensions/cursor.py CHANGED Viewed

@@ -1,13 +1,11 @@
-from typing import List
-from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
+from typing import List, Optional
 class CursorDesktop:
     def __init__(
         self,
-        run_name: str,
-        version: str,
+        run_name: Optional[str],
+        version: Optional[str],
         extensions: List[str],
     ):
         self.run_name = run_name
@@ -38,7 +36,7 @@ class CursorDesktop:
     def get_print_readme_commands(self) -> List[str]:
         return [
             "echo To open in Cursor, use link below:",
-            "echo ''",
-            f"echo '  cursor://vscode-remote/ssh-remote+{self.run_name}{DEFAULT_REPO_DIR}'",  # TODO use $REPO_DIR
-            "echo ''",
+            "echo",
+            f'echo "  cursor://vscode-remote/ssh-remote+{self.run_name}$DSTACK_REPO_DIR"',
+            "echo",
         ]

dstack/_internal/server/services/jobs/configurators/extensions/vscode.py CHANGED Viewed

@@ -1,13 +1,11 @@
-from typing import List
-from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
+from typing import List, Optional
 class VSCodeDesktop:
     def __init__(
         self,
-        run_name: str,
-        version: str,
+        run_name: Optional[str],
+        version: Optional[str],
         extensions: List[str],
     ):
         self.run_name = run_name
@@ -37,8 +35,8 @@ class VSCodeDesktop:
     def get_print_readme_commands(self) -> List[str]:
         return [
-            "echo To open in VS Code Desktop, use link below:",
-            "echo ''",
-            f"echo '  vscode://vscode-remote/ssh-remote+{self.run_name}{DEFAULT_REPO_DIR}'",  # TODO use $REPO_DIR
-            "echo ''",
+            "echo 'To open in VS Code Desktop, use link below:'",
+            "echo",
+            f'echo "  vscode://vscode-remote/ssh-remote+{self.run_name}$DSTACK_REPO_DIR"',
+            "echo",
         ]

dstack/_internal/server/services/jobs/configurators/service.py CHANGED Viewed

@@ -9,6 +9,7 @@ class ServiceJobConfigurator(JobConfigurator):
     TYPE: RunConfigurationType = RunConfigurationType.SERVICE
     def _shell_commands(self) -> List[str]:
+        assert self.run_spec.configuration.type == "service"
         return self.run_spec.configuration.commands
     def _default_single_branch(self) -> bool:
@@ -22,6 +23,3 @@ class ServiceJobConfigurator(JobConfigurator):
     def _ports(self) -> List[PortMapping]:
         return []
-    def _working_dir(self) -> Optional[str]:
-        return None if not self._shell_commands() else super()._working_dir()

dstack/_internal/server/services/jobs/configurators/task.py CHANGED Viewed

@@ -10,6 +10,7 @@ class TaskJobConfigurator(JobConfigurator):
     TYPE: RunConfigurationType = RunConfigurationType.TASK
     async def get_job_specs(self, replica_num: int) -> List[JobSpec]:
+        assert self.run_spec.configuration.type == "task"
         job_specs = []
         for job_num in range(self.run_spec.configuration.nodes):
             job_spec = await self._get_job_spec(
@@ -21,6 +22,7 @@ class TaskJobConfigurator(JobConfigurator):
         return job_specs
     def _shell_commands(self) -> List[str]:
+        assert self.run_spec.configuration.type == "task"
         return self.run_spec.configuration.commands
     def _default_single_branch(self) -> bool:
@@ -33,7 +35,5 @@ class TaskJobConfigurator(JobConfigurator):
         return self.run_spec.merged_profile.spot_policy or SpotPolicy.ONDEMAND
     def _ports(self) -> List[PortMapping]:
+        assert self.run_spec.configuration.type == "task"
         return self.run_spec.configuration.ports
-    def _working_dir(self) -> Optional[str]:
-        return None if not self._shell_commands() else super()._working_dir()

dstack/_internal/server/services/locking.py CHANGED Viewed

@@ -23,13 +23,13 @@ T = TypeVar("T")
 class Lockset(Protocol[T]):
-    def __contains__(self, item: T) -> bool: ...
+    def __contains__(self, item: T, /) -> bool: ...
     def __iter__(self) -> Iterator[T]: ...
     def __len__(self) -> int: ...
-    def add(self, item: T) -> None: ...
-    def discard(self, item: T) -> None: ...
-    def update(self, other: Iterable[T]) -> None: ...
-    def difference_update(self, other: Iterable[T]) -> None: ...
+    def add(self, item: T, /) -> None: ...
+    def discard(self, item: T, /) -> None: ...
+    def update(self, other: Iterable[T], /) -> None: ...
+    def difference_update(self, other: Iterable[T], /) -> None: ...
 class ResourceLocker:

dstack 0.19.25rc1__py3-none-any.whl → 0.19.27__py3-none-any.whl

Potentially problematic release.

dstack 0.19.25rc1py3-none-any.whl → 0.19.27py3-none-any.whl