PyPI - dstack - Versions diffs - 0.19.8__py3-none-any.whl → 0.19.9__py3-none-any.whl - Mend

dstack 0.19.8py3-none-any.whl → 0.19.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (25) hide show

dstack/_internal/cli/services/configurators/run.py CHANGED Viewed

@@ -98,6 +98,8 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
         print_run_plan(run_plan, max_offers=configurator_args.max_offers)
         confirm_message = "Submit a new run?"
+        if conf.name:
+            confirm_message = f"Submit the run [code]{conf.name}[/]?"
         stop_run_name = None
         if run_plan.current_resource is not None:
             changed_fields = []
@@ -130,11 +132,6 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
                     f"Active run [code]{conf.name}[/] already exists and cannot be updated in-place."
                 )
                 confirm_message = "Stop and override the run?"
-            else:
-                console.print(f"Finished run [code]{conf.name}[/] already exists.")
-                confirm_message = "Override the run?"
-        elif conf.name:
-            confirm_message = f"Submit the run [code]{conf.name}[/]?"
         if not command_args.yes and not confirm_ask(confirm_message):
             console.print("\nExiting...")
@@ -560,7 +557,9 @@ def print_finished_message(run: Run):
         console.print("[code]Done[/]")
         return
-    termination_reason, termination_reason_message = _get_run_termination_reason(run)
+    termination_reason, termination_reason_message, exit_status = (
+        _get_run_termination_reason_and_exit_status(run)
+    )
     message = "Run failed due to unknown reason. Check CLI, server, and run logs."
     if run.status == RunStatus.TERMINATED:
         message = "Run terminated due to unknown reason. Check CLI, server, and run logs."
@@ -572,13 +571,15 @@ def print_finished_message(run: Run):
             "Check CLI and server logs for more details."
         )
     elif termination_reason is not None:
+        exit_status_details = f"Exit status: {exit_status}.\n" if exit_status else ""
         error_details = (
             f"Error: {termination_reason_message}\n" if termination_reason_message else ""
         )
         message = (
             f"Run failed with error code {termination_reason.name}.\n"
+            f"{exit_status_details}"
             f"{error_details}"
-            "Check CLI, server, and run logs for more details."
+            f"Check [bold]dstack logs -d {run.name}[/bold] for more details."
         )
     console.print(f"[error]{message}[/]")
@@ -589,14 +590,20 @@ def get_run_exit_code(run: Run) -> int:
     return 1
-def _get_run_termination_reason(run: Run) -> Tuple[Optional[JobTerminationReason], Optional[str]]:
+def _get_run_termination_reason_and_exit_status(
+    run: Run,
+) -> Tuple[Optional[JobTerminationReason], Optional[str], Optional[int]]:
     if len(run._run.jobs) == 0:
-        return None, None
+        return None, None, None
     job = run._run.jobs[0]
     if len(job.job_submissions) == 0:
-        return None, None
+        return None, None, None
     job_submission = job.job_submissions[0]
-    return job_submission.termination_reason, job_submission.termination_reason_message
+    return (
+        job_submission.termination_reason,
+        job_submission.termination_reason_message,
+        job_submission.exit_status,
+    )
 def _run_resubmitted(run: Run, current_job_submission: Optional[JobSubmission]) -> bool:

dstack/_internal/cli/utils/run.py CHANGED Viewed

@@ -218,6 +218,11 @@ def _get_run_error(run: Run) -> str:
 def _get_job_error(job: Job) -> str:
-    if job.job_submissions[-1].termination_reason is None:
+    job_submission = job.job_submissions[-1]
+    termination_reason = job_submission.termination_reason
+    exit_status = job_submission.exit_status
+    if termination_reason is None:
         return ""
-    return job.job_submissions[-1].termination_reason.name
+    if exit_status:
+        return f"{termination_reason.name} {exit_status}"
+    return termination_reason.name

dstack/_internal/core/backends/cudo/compute.py CHANGED Viewed

@@ -147,7 +147,7 @@ class CudoCompute(
 def _get_image_id(cuda: bool) -> str:
-    image_name = "ubuntu-2204-nvidia-535-docker-v20240214" if cuda else "ubuntu-2204"
+    image_name = "ubuntu-2204-nvidia-535-docker-v20241017" if cuda else "ubuntu-2204"
     return image_name

dstack/_internal/core/backends/nebius/fabrics.py CHANGED Viewed

@@ -20,6 +20,7 @@ INFINIBAND_FABRICS = [
     InfinibandFabric("fabric-5", "gpu-h200-sxm", "eu-west1"),
     InfinibandFabric("fabric-6", "gpu-h100-sxm", "eu-north1"),
     InfinibandFabric("fabric-7", "gpu-h200-sxm", "eu-north1"),
+    InfinibandFabric("us-central1-a", "gpu-h200-sxm", "us-central1"),
 ]

dstack/_internal/core/backends/nebius/models.py CHANGED Viewed

@@ -5,7 +5,7 @@ from pydantic import Field, root_validator
 from dstack._internal.core.backends.base.models import fill_data
 from dstack._internal.core.models.common import CoreModel
-DEFAULT_PROJECT_NAME_PREFIX = "default-project"
+DEFAULT_PROJECT_NAME_PREFIX = "default"
 class NebiusServiceAccountCreds(CoreModel):

dstack/_internal/core/models/resources.py CHANGED Viewed

@@ -126,7 +126,7 @@ class ComputeCapability(Tuple[int, int]):
 DEFAULT_CPU_COUNT = Range[int](min=2)
 DEFAULT_MEMORY_SIZE = Range[Memory](min=Memory.parse("8GB"))
-DEFAULT_GPU_COUNT = Range[int](min=1, max=1)
+DEFAULT_GPU_COUNT = Range[int](min=1)
 class CPUSpec(CoreModel):

dstack/_internal/core/models/runs.py CHANGED Viewed

@@ -104,6 +104,7 @@ class JobTerminationReason(str, Enum):
     # Set by the server
     FAILED_TO_START_DUE_TO_NO_CAPACITY = "failed_to_start_due_to_no_capacity"
     INTERRUPTED_BY_NO_CAPACITY = "interrupted_by_no_capacity"
+    INSTANCE_UNREACHABLE = "instance_unreachable"
     WAITING_INSTANCE_LIMIT_EXCEEDED = "waiting_instance_limit_exceeded"
     WAITING_RUNNER_LIMIT_EXCEEDED = "waiting_runner_limit_exceeded"
     TERMINATED_BY_USER = "terminated_by_user"
@@ -126,6 +127,7 @@ class JobTerminationReason(str, Enum):
         mapping = {
             self.FAILED_TO_START_DUE_TO_NO_CAPACITY: JobStatus.FAILED,
             self.INTERRUPTED_BY_NO_CAPACITY: JobStatus.FAILED,
+            self.INSTANCE_UNREACHABLE: JobStatus.FAILED,
             self.WAITING_INSTANCE_LIMIT_EXCEEDED: JobStatus.FAILED,
             self.WAITING_RUNNER_LIMIT_EXCEEDED: JobStatus.FAILED,
             self.TERMINATED_BY_USER: JobStatus.TERMINATED,
@@ -262,9 +264,9 @@ class JobRuntimeData(CoreModel):
     # or not applicable (container-based backends)
     ports: Optional[dict[int, int]] = None
     # List of volumes used by the job
-    volume_names: Optional[list[str]] = None  # None for backward compalibility
+    volume_names: Optional[list[str]] = None  # None for backward compatibility
     # Virtual shared offer
-    offer: Optional[InstanceOfferWithAvailability] = None  # None for backward compalibility
+    offer: Optional[InstanceOfferWithAvailability] = None  # None for backward compatibility
 class ClusterInfo(CoreModel):
@@ -283,6 +285,7 @@ class JobSubmission(CoreModel):
     status: JobStatus
     termination_reason: Optional[JobTerminationReason]
     termination_reason_message: Optional[str]
+    exit_status: Optional[int]
     job_provisioning_data: Optional[JobProvisioningData]
     job_runtime_data: Optional[JobRuntimeData]
@@ -508,7 +511,9 @@ def _get_run_error(
         return ""
     if len(run_jobs) > 1:
         return run_termination_reason.name
-    run_job_termination_reason = _get_run_job_termination_reason(run_jobs)
+    run_job_termination_reason, exit_status = _get_run_job_termination_reason_and_exit_status(
+        run_jobs
+    )
     # For failed runs, also show termination reason to provide more context.
     # For other run statuses, the job termination reason will duplicate run status.
     if run_job_termination_reason is not None and run_termination_reason in [
@@ -516,13 +521,20 @@ def _get_run_error(
         RunTerminationReason.SERVER_ERROR,
         RunTerminationReason.RETRY_LIMIT_EXCEEDED,
     ]:
+        if exit_status:
+            return (
+                f"{run_termination_reason.name}\n({run_job_termination_reason.name} {exit_status})"
+            )
         return f"{run_termination_reason.name}\n({run_job_termination_reason.name})"
     return run_termination_reason.name
-def _get_run_job_termination_reason(run_jobs: List[Job]) -> Optional[JobTerminationReason]:
+def _get_run_job_termination_reason_and_exit_status(
+    run_jobs: List[Job],
+) -> tuple[Optional[JobTerminationReason], Optional[int]]:
     for job in run_jobs:
         if len(job.job_submissions) > 0:
-            if job.job_submissions[-1].termination_reason is not None:
-                return job.job_submissions[-1].termination_reason
-    return None
+            job_submission = job.job_submissions[-1]
+            if job_submission.termination_reason is not None:
+                return job_submission.termination_reason, job_submission.exit_status
+    return None, None

dstack/_internal/server/background/tasks/process_metrics.py CHANGED Viewed

@@ -42,10 +42,33 @@ async def collect_metrics():
 async def delete_metrics():
-    cutoff = _get_delete_metrics_cutoff()
+    now_timestamp_micro = int(get_current_datetime().timestamp() * 1_000_000)
+    running_timestamp_micro_cutoff = (
+        now_timestamp_micro - settings.SERVER_METRICS_RUNNING_TTL_SECONDS * 1_000_000
+    )
+    finished_timestamp_micro_cutoff = (
+        now_timestamp_micro - settings.SERVER_METRICS_FINISHED_TTL_SECONDS * 1_000_000
+    )
     async with get_session_ctx() as session:
-        await session.execute(
-            delete(JobMetricsPoint).where(JobMetricsPoint.timestamp_micro < cutoff)
+        await asyncio.gather(
+            session.execute(
+                delete(JobMetricsPoint).where(
+                    JobMetricsPoint.job_id.in_(
+                        select(JobModel.id).where(JobModel.status.in_([JobStatus.RUNNING]))
+                    ),
+                    JobMetricsPoint.timestamp_micro < running_timestamp_micro_cutoff,
+                )
+            ),
+            session.execute(
+                delete(JobMetricsPoint).where(
+                    JobMetricsPoint.job_id.in_(
+                        select(JobModel.id).where(
+                            JobModel.status.in_(JobStatus.finished_statuses())
+                        )
+                    ),
+                    JobMetricsPoint.timestamp_micro < finished_timestamp_micro_cutoff,
+                )
+            ),
         )
         await session.commit()
@@ -134,9 +157,3 @@ def _pull_runner_metrics(
 ) -> Optional[MetricsResponse]:
     runner_client = client.RunnerClient(port=ports[DSTACK_RUNNER_HTTP_PORT])
     return runner_client.get_metrics()
-def _get_delete_metrics_cutoff() -> int:
-    now = int(get_current_datetime().timestamp() * 1_000_000)
-    cutoff = now - (settings.SERVER_METRICS_TTL_SECONDS * 1_000_000)
-    return cutoff

dstack/_internal/server/background/tasks/process_running_jobs.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 from collections.abc import Iterable
-from datetime import timedelta
+from datetime import timedelta, timezone
 from typing import Dict, List, Optional
 from sqlalchemy import select
@@ -71,6 +71,12 @@ from dstack._internal.utils.logging import get_logger
 logger = get_logger(__name__)
+# Minimum time before terminating active job in case of connectivity issues.
+# Should be sufficient to survive most problems caused by
+# the server network flickering and providers' glitches.
+JOB_DISCONNECTED_RETRY_TIMEOUT = timedelta(minutes=2)
 async def process_running_jobs(batch_size: int = 1):
     tasks = []
     for _ in range(batch_size):
@@ -202,7 +208,7 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
                 user_ssh_key = run.run_spec.ssh_key_pub.strip()
                 public_keys = [project.ssh_public_key.strip(), user_ssh_key]
                 if job_provisioning_data.backend == BackendType.LOCAL:
-                    # No need to update ~/.ssh/authorized_keys when running shim localy
+                    # No need to update ~/.ssh/authorized_keys when running shim locally
                     user_ssh_key = ""
                 success = await common_utils.run_async(
                     _process_provisioning_with_shim,
@@ -299,19 +305,38 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel):
                 run_model,
                 job_model,
             )
-            if not success:
-                job_model.termination_reason = JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY
-        if not success:  # kill the job
-            logger.warning(
-                "%s: failed because runner is not available or return an error,  age=%s",
-                fmt(job_model),
-                job_submission.age,
-            )
-            job_model.status = JobStatus.TERMINATING
-            if not job_model.termination_reason:
-                job_model.termination_reason = JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY
-            # job will be terminated and instance will be emptied by process_terminating_jobs
+        if success:
+            job_model.disconnected_at = None
+        else:
+            if job_model.termination_reason:
+                logger.warning(
+                    "%s: failed because shim/runner returned an error, age=%s",
+                    fmt(job_model),
+                    job_submission.age,
+                )
+                job_model.status = JobStatus.TERMINATING
+                # job will be terminated and instance will be emptied by process_terminating_jobs
+            else:
+                # No job_model.termination_reason set means ssh connection failed
+                if job_model.disconnected_at is None:
+                    job_model.disconnected_at = common_utils.get_current_datetime()
+                if _should_terminate_job_due_to_disconnect(job_model):
+                    logger.warning(
+                        "%s: failed because instance is unreachable, age=%s",
+                        fmt(job_model),
+                        job_submission.age,
+                    )
+                    # TODO: Replace with JobTerminationReason.INSTANCE_UNREACHABLE in 0.20 or
+                    # when CLI <= 0.19.8 is no longer supported
+                    job_model.termination_reason = JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY
+                    job_model.status = JobStatus.TERMINATING
+                else:
+                    logger.warning(
+                        "%s: is unreachable, waiting for the instance to become reachable again, age=%s",
+                        fmt(job_model),
+                        job_submission.age,
+                    )
     if (
         initial_status != job_model.status
@@ -543,7 +568,7 @@ def _process_pulling_with_shim(
     if shim_client.is_api_v2_supported():  # raises error if shim is down, causes retry
         task = shim_client.get_task(job_model.id)
-        # If task goes to terminated before the job is submitted to runner, then an error occured
+        # If task goes to terminated before the job is submitted to runner, then an error occurred
         if task.status == TaskStatus.TERMINATED:
             logger.warning(
                 "shim failed to execute job %s: %s (%s)",
@@ -572,7 +597,7 @@ def _process_pulling_with_shim(
     else:
         shim_status = shim_client.pull()  # raises error if shim is down, causes retry
-        # If shim goes to pending before the job is submitted to runner, then an error occured
+        # If shim goes to pending before the job is submitted to runner, then an error occurred
         if (
             shim_status.state == "pending"
             and shim_status.result is not None
@@ -651,6 +676,10 @@ def _process_running(
                 )
             if latest_state_event.termination_message:
                 job_model.termination_reason_message = latest_state_event.termination_message
+        if (exit_status := latest_state_event.exit_status) is not None:
+            job_model.exit_status = exit_status
+            if exit_status != 0:
+                logger.info("%s: non-zero exit status %s", fmt(job_model), exit_status)
     else:
         _terminate_if_inactivity_duration_exceeded(run_model, job_model, resp.no_connections_secs)
     if job_model.status != previous_status:
@@ -688,6 +717,15 @@ def _terminate_if_inactivity_duration_exceeded(
         )
+def _should_terminate_job_due_to_disconnect(job_model: JobModel) -> bool:
+    if job_model.disconnected_at is None:
+        return False
+    return (
+        common_utils.get_current_datetime()
+        > job_model.disconnected_at.replace(tzinfo=timezone.utc) + JOB_DISCONNECTED_RETRY_TIMEOUT
+    )
 async def _check_gpu_utilization(session: AsyncSession, job_model: JobModel, job: Job) -> None:
     policy = job.job_spec.utilization_policy
     if policy is None:
@@ -818,8 +856,8 @@ def _submit_job_to_runner(
         return success_if_not_available
     runner_client.submit_job(
-        run_spec=run.run_spec,
-        job_spec=job.job_spec,
+        run=run,
+        job=job,
         cluster_info=cluster_info,
         secrets=secrets,
         repo_credentials=repo_credentials,

dstack/_internal/server/migrations/versions/20166748b60c_add_jobmodel_disconnected_at.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Add JobModel.disconnected_at
+Revision ID: 20166748b60c
+Revises: 6c1a9d6530ee
+Create Date: 2025-05-13 16:24:32.496578
+"""
+import sqlalchemy as sa
+from alembic import op
+from alembic_postgresql_enum import TableReference
+import dstack._internal.server.models
+# revision identifiers, used by Alembic.
+revision = "20166748b60c"
+down_revision = "6c1a9d6530ee"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column(
+                "disconnected_at", dstack._internal.server.models.NaiveDateTime(), nullable=True
+            )
+        )
+    op.sync_enum_values(
+        enum_schema="public",
+        enum_name="jobterminationreason",
+        new_values=[
+            "FAILED_TO_START_DUE_TO_NO_CAPACITY",
+            "INTERRUPTED_BY_NO_CAPACITY",
+            "INSTANCE_UNREACHABLE",
+            "WAITING_INSTANCE_LIMIT_EXCEEDED",
+            "WAITING_RUNNER_LIMIT_EXCEEDED",
+            "TERMINATED_BY_USER",
+            "VOLUME_ERROR",
+            "GATEWAY_ERROR",
+            "SCALED_DOWN",
+            "DONE_BY_RUNNER",
+            "ABORTED_BY_USER",
+            "TERMINATED_BY_SERVER",
+            "INACTIVITY_DURATION_EXCEEDED",
+            "TERMINATED_DUE_TO_UTILIZATION_POLICY",
+            "CONTAINER_EXITED_WITH_ERROR",
+            "PORTS_BINDING_FAILED",
+            "CREATING_CONTAINER_ERROR",
+            "EXECUTOR_ERROR",
+            "MAX_DURATION_EXCEEDED",
+        ],
+        affected_columns=[
+            TableReference(
+                table_schema="public", table_name="jobs", column_name="termination_reason"
+            )
+        ],
+        enum_values_to_rename=[],
+    )
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.sync_enum_values(
+        enum_schema="public",
+        enum_name="jobterminationreason",
+        new_values=[
+            "FAILED_TO_START_DUE_TO_NO_CAPACITY",
+            "INTERRUPTED_BY_NO_CAPACITY",
+            "WAITING_INSTANCE_LIMIT_EXCEEDED",
+            "WAITING_RUNNER_LIMIT_EXCEEDED",
+            "TERMINATED_BY_USER",
+            "VOLUME_ERROR",
+            "GATEWAY_ERROR",
+            "SCALED_DOWN",
+            "DONE_BY_RUNNER",
+            "ABORTED_BY_USER",
+            "TERMINATED_BY_SERVER",
+            "INACTIVITY_DURATION_EXCEEDED",
+            "TERMINATED_DUE_TO_UTILIZATION_POLICY",
+            "CONTAINER_EXITED_WITH_ERROR",
+            "PORTS_BINDING_FAILED",
+            "CREATING_CONTAINER_ERROR",
+            "EXECUTOR_ERROR",
+            "MAX_DURATION_EXCEEDED",
+        ],
+        affected_columns=[
+            TableReference(
+                table_schema="public", table_name="jobs", column_name="termination_reason"
+            )
+        ],
+        enum_values_to_rename=[],
+    )
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.drop_column("disconnected_at")
+    # ### end Alembic commands ###

dstack/_internal/server/migrations/versions/6c1a9d6530ee_add_jobmodel_exit_status.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""Add JobModel.exit_status
+Revision ID: 6c1a9d6530ee
+Revises: 7ba3b59d7ca6
+Create Date: 2025-05-09 10:25:19.715852
+"""
+import sqlalchemy as sa
+from alembic import op
+# revision identifiers, used by Alembic.
+revision = "6c1a9d6530ee"
+down_revision = "7ba3b59d7ca6"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("exit_status", sa.Integer(), nullable=True))
+def downgrade() -> None:
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.drop_column("exit_status")

dstack/_internal/server/models.py CHANGED Viewed

@@ -382,6 +382,10 @@ class JobModel(BaseModel):
         Enum(JobTerminationReason)
     )
     termination_reason_message: Mapped[Optional[str]] = mapped_column(Text)
+    # `disconnected_at` stores the first time of connectivity issues with the instance.
+    # Resets every time connectivity is restored.
+    disconnected_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
+    exit_status: Mapped[Optional[int]] = mapped_column(Integer)
     job_spec_data: Mapped[str] = mapped_column(Text)
     job_provisioning_data: Mapped[Optional[str]] = mapped_column(Text)
     runner_timestamp: Mapped[Optional[int]] = mapped_column(BigInteger)
@@ -390,7 +394,7 @@ class JobModel(BaseModel):
     remove_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     volumes_detached_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     # `instance_assigned` means instance assignment was done.
-    # if `instance_assigned` is True and `instance` is None, no instance was assiged.
+    # if `instance_assigned` is True and `instance` is None, no instance was assigned.
     instance_assigned: Mapped[bool] = mapped_column(Boolean, default=False)
     instance_id: Mapped[Optional[uuid.UUID]] = mapped_column(
         ForeignKey("instances.id", ondelete="CASCADE")

dstack/_internal/server/schemas/runner.py CHANGED Viewed

@@ -7,7 +7,14 @@ from typing_extensions import Annotated
 from dstack._internal.core.models.common import CoreModel, NetworkMode
 from dstack._internal.core.models.repos.remote import RemoteRepoCreds
-from dstack._internal.core.models.runs import ClusterInfo, JobSpec, JobStatus, RunSpec
+from dstack._internal.core.models.runs import (
+    ClusterInfo,
+    JobSpec,
+    JobStatus,
+    JobSubmission,
+    Run,
+    RunSpec,
+)
 from dstack._internal.core.models.volumes import InstanceMountPoint, VolumeMountPoint
@@ -16,6 +23,7 @@ class JobStateEvent(CoreModel):
     state: JobStatus
     termination_reason: Optional[str] = None
     termination_message: Optional[str] = None
+    exit_status: Optional[int] = None
 class LogEvent(CoreModel):
@@ -38,15 +46,18 @@ class PullResponse(CoreModel):
 class SubmitBody(CoreModel):
-    run_spec: Annotated[
-        RunSpec,
+    run: Annotated[
+        Run,
         Field(
             include={
-                "run_name",
-                "repo_id",
-                "repo_data",
-                "configuration",
-                "configuration_path",
+                "id": True,
+                "run_spec": {
+                    "run_name",
+                    "repo_id",
+                    "repo_data",
+                    "configuration",
+                    "configuration_path",
+                },
             }
         ),
     ]
@@ -69,9 +80,31 @@ class SubmitBody(CoreModel):
             }
         ),
     ]
+    job_submission: Annotated[
+        JobSubmission,
+        Field(
+            include={
+                "id",
+            }
+        ),
+    ]
     cluster_info: Annotated[Optional[ClusterInfo], Field(include=True)]
     secrets: Annotated[Optional[Dict[str, str]], Field(include=True)]
     repo_credentials: Annotated[Optional[RemoteRepoCreds], Field(include=True)]
+    # run_spec is deprecated in favor of run.run_spec
+    # TODO: Remove once we no longer support instances deployed with 0.19.8 or earlier.
+    run_spec: Annotated[
+        RunSpec,
+        Field(
+            include={
+                "run_name",
+                "repo_id",
+                "repo_data",
+                "configuration",
+                "configuration_path",
+            },
+        ),
+    ]
 class HealthcheckResponse(CoreModel):

dstack/_internal/server/services/jobs/__init__.py CHANGED Viewed

@@ -135,6 +135,7 @@ def job_model_to_job_submission(job_model: JobModel) -> JobSubmission:
         status=job_model.status,
         termination_reason=job_model.termination_reason,
         termination_reason_message=job_model.termination_reason_message,
+        exit_status=job_model.exit_status,
         job_provisioning_data=job_provisioning_data,
         job_runtime_data=get_job_runtime_data(job_model),
     )

dstack/_internal/server/services/runner/client.py CHANGED Viewed

@@ -12,7 +12,7 @@ from dstack._internal.core.models.common import CoreModel, NetworkMode
 from dstack._internal.core.models.envs import Env
 from dstack._internal.core.models.repos.remote import RemoteRepoCreds
 from dstack._internal.core.models.resources import Memory
-from dstack._internal.core.models.runs import ClusterInfo, JobSpec, RunSpec
+from dstack._internal.core.models.runs import ClusterInfo, Job, Run
 from dstack._internal.core.models.volumes import InstanceMountPoint, Volume, VolumeMountPoint
 from dstack._internal.server.schemas.runner import (
     GPUDevice,
@@ -72,8 +72,8 @@ class RunnerClient:
     def submit_job(
         self,
-        run_spec: RunSpec,
-        job_spec: JobSpec,
+        run: Run,
+        job: Job,
         cluster_info: ClusterInfo,
         secrets: Dict[str, str],
         repo_credentials: Optional[RemoteRepoCreds],
@@ -81,6 +81,7 @@ class RunnerClient:
     ):
         # XXX: This is a quick-and-dirty hack to deliver InstanceModel-specific environment
         # variables to the runner without runner API modification.
+        job_spec = job.job_spec
         if instance_env is not None:
             if isinstance(instance_env, Env):
                 merged_env = instance_env.as_dict()
@@ -90,11 +91,13 @@ class RunnerClient:
             job_spec = job_spec.copy(deep=True)
             job_spec.env = merged_env
         body = SubmitBody(
-            run_spec=run_spec,
+            run=run,
             job_spec=job_spec,
+            job_submission=job.job_submissions[-1],
             cluster_info=cluster_info,
             secrets=secrets,
             repo_credentials=repo_credentials,
+            run_spec=run.run_spec,
         )
         resp = requests.post(
             # use .json() to encode enums

dstack/_internal/server/services/runs.py CHANGED Viewed

@@ -870,10 +870,10 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
     if (
         run_spec.merged_profile.utilization_policy is not None
         and run_spec.merged_profile.utilization_policy.time_window
-        > settings.SERVER_METRICS_TTL_SECONDS
+        > settings.SERVER_METRICS_RUNNING_TTL_SECONDS
     ):
         raise ServerClientError(
-            f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_TTL_SECONDS}s"
+            f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_RUNNING_TTL_SECONDS}s"
         )
     set_resources_defaults(run_spec.configuration.resources)

dstack/_internal/server/settings.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import warnings
 from pathlib import Path
 DSTACK_DIR_PATH = Path("~/.dstack/").expanduser()
@@ -45,7 +46,25 @@ SERVER_CLOUDWATCH_LOG_REGION = os.getenv("DSTACK_SERVER_CLOUDWATCH_LOG_REGION")
 SERVER_GCP_LOGGING_PROJECT = os.getenv("DSTACK_SERVER_GCP_LOGGING_PROJECT")
-SERVER_METRICS_TTL_SECONDS = int(os.getenv("DSTACK_SERVER_METRICS_TTL_SECONDS", 3600))
+SERVER_METRICS_RUNNING_TTL_SECONDS: int
+_SERVER_METRICS_RUNNING_TTL_SECONDS = os.getenv("DSTACK_SERVER_METRICS_RUNNING_TTL_SECONDS")
+if _SERVER_METRICS_RUNNING_TTL_SECONDS is None:
+    _SERVER_METRICS_RUNNING_TTL_SECONDS = os.getenv("DSTACK_SERVER_METRICS_TTL_SECONDS")
+    if _SERVER_METRICS_RUNNING_TTL_SECONDS is not None:
+        warnings.warn(
+            (
+                "DSTACK_SERVER_METRICS_TTL_SECONDS is deprecated,"
+                " use DSTACK_SERVER_METRICS_RUNNING_TTL_SECONDS instead"
+            ),
+            DeprecationWarning,
+        )
+    else:
+        _SERVER_METRICS_RUNNING_TTL_SECONDS = 3600
+SERVER_METRICS_RUNNING_TTL_SECONDS = int(_SERVER_METRICS_RUNNING_TTL_SECONDS)
+del _SERVER_METRICS_RUNNING_TTL_SECONDS
+SERVER_METRICS_FINISHED_TTL_SECONDS = int(
+    os.getenv("DSTACK_SERVER_METRICS_FINISHED_TTL_SECONDS", 7 * 24 * 3600)
+)
 DEFAULT_PROJECT_NAME = "main"

dstack/_internal/server/testing/common.py CHANGED Viewed

@@ -302,6 +302,7 @@ async def create_job(
     job_num: int = 0,
     replica_num: int = 0,
     instance_assigned: bool = False,
+    disconnected_at: Optional[datetime] = None,
 ) -> JobModel:
     run_spec = RunSpec.parse_raw(run.run_spec)
     job_spec = (await get_job_specs_from_run_spec(run_spec, replica_num=replica_num))[0]
@@ -323,6 +324,7 @@ async def create_job(
         instance=instance,
         instance_assigned=instance_assigned,
         used_instance_id=instance.id if instance is not None else None,
+        disconnected_at=disconnected_at,
     )
     session.add(job)
     await session.commit()

dstack/api/server/_runs.py CHANGED Viewed

@@ -115,6 +115,8 @@ def _get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[Dict]:
             job_submissions_excludes["job_runtime_data"] = {
                 "offer": {"instance": {"resources": {"cpu_arch"}}}
             }
+        if all(js.exit_status is None for js in job_submissions):
+            job_submissions_excludes["exit_status"] = True
         latest_job_submission = current_resource.latest_job_submission
         if latest_job_submission is not None:
             latest_job_submission_excludes = {}
@@ -127,6 +129,8 @@ def _get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[Dict]:
                 latest_job_submission_excludes["job_runtime_data"] = {
                     "offer": {"instance": {"resources": {"cpu_arch"}}}
                 }
+            if latest_job_submission.exit_status is None:
+                latest_job_submission_excludes["exit_status"] = True
     return {"plan": apply_plan_excludes}

dstack/version.py CHANGED Viewed

@@ -1,3 +1,3 @@
-__version__ = "0.19.8"
+__version__ = "0.19.9"
 __is_release__ = True
 base_image = "0.7"

{dstack-0.19.8.dist-info → dstack-0.19.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dstack
-Version: 0.19.8
+Version: 0.19.9
 Summary: dstack is an open-source orchestration engine for running AI workloads on any cloud or on-premises.
 Project-URL: Homepage, https://dstack.ai
 Project-URL: Source, https://github.com/dstackai/dstack
@@ -54,7 +54,7 @@ Requires-Dist: azure-mgmt-network<28.0.0,>=23.0.0; extra == 'all'
 Requires-Dist: azure-mgmt-resource>=22.0.0; extra == 'all'
 Requires-Dist: azure-mgmt-subscription>=3.1.1; extra == 'all'
 Requires-Dist: backports-entry-points-selectable; extra == 'all'
-Requires-Dist: boto3; extra == 'all'
+Requires-Dist: boto3>=1.38.13; extra == 'all'
 Requires-Dist: botocore; extra == 'all'
 Requires-Dist: datacrunch; extra == 'all'
 Requires-Dist: docker>=6.0.0; extra == 'all'
@@ -90,7 +90,7 @@ Requires-Dist: alembic>=1.10.2; extra == 'aws'
 Requires-Dist: apscheduler<4; extra == 'aws'
 Requires-Dist: asyncpg; extra == 'aws'
 Requires-Dist: backports-entry-points-selectable; extra == 'aws'
-Requires-Dist: boto3; extra == 'aws'
+Requires-Dist: boto3>=1.38.13; extra == 'aws'
 Requires-Dist: botocore; extra == 'aws'
 Requires-Dist: docker>=6.0.0; extra == 'aws'
 Requires-Dist: fastapi; extra == 'aws'
@@ -231,7 +231,7 @@ Requires-Dist: alembic>=1.10.2; extra == 'lambda'
 Requires-Dist: apscheduler<4; extra == 'lambda'
 Requires-Dist: asyncpg; extra == 'lambda'
 Requires-Dist: backports-entry-points-selectable; extra == 'lambda'
-Requires-Dist: boto3; extra == 'lambda'
+Requires-Dist: boto3>=1.38.13; extra == 'lambda'
 Requires-Dist: botocore; extra == 'lambda'
 Requires-Dist: docker>=6.0.0; extra == 'lambda'
 Requires-Dist: fastapi; extra == 'lambda'
@@ -338,24 +338,27 @@ orchestration for ML teams across top clouds and on-prem clusters.
 #### Accelerators
-`dstack` supports `NVIDIA`, `AMD`, `Google TPU`, and `Intel Gaudi` accelerators out of the box.
+`dstack` supports `NVIDIA`, `AMD`, `Google TPU`, `Intel Gaudi`, and `Tenstorrent` accelerators out of the box.
-## Major news ✨
+## Latest news ✨
-- [2025/02] [dstack 0.18.41: GPU blocks, Proxy jump, inactivity duration, and more](https://github.com/dstackai/dstack/releases/tag/0.18.41)
-- [2025/01] [dstack 0.18.38: Intel Gaudi](https://github.com/dstackai/dstack/releases/tag/0.18.38)
-- [2025/01] [dstack 0.18.35: Vultr](https://github.com/dstackai/dstack/releases/tag/0.18.35)
-- [2024/12] [dstack 0.18.30: AWS Capacity Reservations and Capacity Blocks](https://github.com/dstackai/dstack/releases/tag/0.18.30)
-- [2024/10] [dstack 0.18.21: Instance volumes](https://github.com/dstackai/dstack/releases/tag/0.18.21)
-- [2024/10] [dstack 0.18.18: Hardware metrics monitoring](https://github.com/dstackai/dstack/releases/tag/0.18.18)
+- [2025/05] [dstack 0.19.8: Nebius clusters, GH200 on Lambda](https://github.com/dstackai/dstack/releases/tag/0.19.8)
+- [2025/04] [dstack 0.19.6: Tenstorrent, Plugins](https://github.com/dstackai/dstack/releases/tag/0.19.6)
+- [2025/04] [dstack 0.19.5: GCP A3 High clusters](https://github.com/dstackai/dstack/releases/tag/0.19.5)
+- [2025/04] [dstack 0.19.3: GCP A3 Mega clusters](https://github.com/dstackai/dstack/releases/tag/0.19.3)
+- [2025/03] [dstack 0.19.0: Prometheus](https://github.com/dstackai/dstack/releases/tag/0.19.0)
-## Installation
+## How does it work?
+<img src="https://dstack.ai/static-assets/static-assets/images/dstack-architecture-diagram-v8.svg" width="750" />
+### Installation
 > Before using `dstack` through CLI or API, set up a `dstack` server. If you already have a running `dstack` server, you only need to [set up the CLI](#set-up-the-cli).
-### Set up the server
+#### Set up the server
-#### (Optional) Configure backends
+##### (Optional) Configure backends
 To use `dstack` with cloud providers, configure backends
 via the `~/.dstack/server/config.yml` file.
@@ -365,21 +368,21 @@ For more details on how to configure backends, check [Backends](https://dstack.a
 > For using `dstack` with on-prem servers, create [SSH fleets](https://dstack.ai/docs/concepts/fleets#ssh)
 > once the server is up.
-#### Start the server
+##### Start the server
 You can install the server on Linux, macOS, and Windows (via WSL 2). It requires Git and
 OpenSSH.
-##### pip
+##### uv
 ```shell
-$ pip install "dstack[all]" -U
+$ uv tool install "dstack[all]" -U
 ```
-##### uv
+##### pip
 ```shell
-$ uv tool install "dstack[all]" -U
+$ pip install "dstack[all]" -U
 ```
 Once it's installed, go ahead and start the server.
@@ -392,25 +395,28 @@ The admin token is "bbae0f28-d3dd-4820-bf61-8f4bb40815da"
 The server is running at http://127.0.0.1:3000/
 ```
-For more details on server configuration options, see the
+> For more details on server configuration options, see the
 [Server deployment](https://dstack.ai/docs/guides/server-deployment) guide.
-### Set up the CLI
+<details><summary>Set up the CLI</summary>
+#### Set up the CLI
 Once the server is up, you can access it via the `dstack` CLI.
 The CLI can be installed on Linux, macOS, and Windows. It requires Git and OpenSSH.
-##### pip
+##### uv
 ```shell
-$ pip install dstack -U
+$ uv tool install dstack -U
 ```
-##### uv
+##### pip
 ```shell
-$ uv tool install dstack -U
+$ pip install dstack -U
 ```
 To point the CLI to the `dstack` server, configure it
@@ -425,9 +431,9 @@ $ dstack config \
 Configuration is updated at ~/.dstack/config.yml
 ```
-## How does it work?
+</details>
-### 1. Define configurations
+### Define configurations
 `dstack` supports the following configurations:
@@ -440,7 +446,7 @@ Configuration is updated at ~/.dstack/config.yml
 Configuration can be defined as YAML files within your repo.
-### 2. Apply configurations
+### Apply configurations
 Apply the configuration either via the `dstack apply` CLI command or through a programmatic API.
@@ -452,6 +458,7 @@ out-of-capacity errors, port-forwarding, and more &mdash; across clouds and on-p
 For additional information, see the following links:
 * [Docs](https://dstack.ai/docs)
+* [Examples](https://dstack.ai/examples)
 * [Discord](https://discord.gg/u8SmfwPpMd)
 ## Contributing

{dstack-0.19.8.dist-info → dstack-0.19.9.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 dstack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dstack/version.py,sha256=oR0QmUkjAU0DEnlfWpUCO78TpWiiucCIz4_FvmSdXUE,64
+dstack/version.py,sha256=lJgn5pYWtoxN-mAxtEnIXX2nGXSQrLvGxKsa1F9aNgM,64
 dstack/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dstack/_internal/compat.py,sha256=bF9U9fTMfL8UVhCouedoUSTYFl7UAOiU0WXrnRoByxw,40
 dstack/_internal/settings.py,sha256=otvcNT0X5UnGZdoNIWNFZBohQRzLme9Zc6oiBzc1BEk,796
@@ -31,14 +31,14 @@ dstack/_internal/cli/services/configurators/__init__.py,sha256=z94VPBFqybP8Zpwy3
 dstack/_internal/cli/services/configurators/base.py,sha256=bGfde2zoma28lLE8MUACO4-NKT1CdJJQJoXrzjpz0mQ,3360
 dstack/_internal/cli/services/configurators/fleet.py,sha256=jm4tNH6QQVplLdboCTlvRYUee3nZ0UYb_qLTrvtYVYM,14049
 dstack/_internal/cli/services/configurators/gateway.py,sha256=czB2s89s7IowOmWnpDwWErPAUlW3FvFMizImhrkQiBM,8927
-dstack/_internal/cli/services/configurators/run.py,sha256=ygfFWcZZ6nBXZUPmBtX5s0r0szOTjR8tNnErHsizDnk,25383
+dstack/_internal/cli/services/configurators/run.py,sha256=nXNjFrM5YT6RFqPXJQa4MOiEsG6IFiANyGKP-PXILdc,25518
 dstack/_internal/cli/services/configurators/volume.py,sha256=riMXLQbgvHIIFwLKdHfad-_0iE9wE3G_rUmXU5P3ZS8,8519
 dstack/_internal/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dstack/_internal/cli/utils/common.py,sha256=rfmzqrsgR3rXW3wj0vxDdvrhUUg2aIy4A6E9MZbd55g,1763
 dstack/_internal/cli/utils/fleet.py,sha256=ch-LN1X9boSm-rFLW4mAJRmz0XliLhH0LvKD2DqSt2g,3942
 dstack/_internal/cli/utils/gateway.py,sha256=qMYa1NTAT_O98x2_mSyWDRbiHj5fqt6xUXFh9NIUwAM,1502
 dstack/_internal/cli/utils/rich.py,sha256=Gx1MJU929kMKsbdo9qF7XHARNta2426Ssb-xMLVhwbQ,5710
-dstack/_internal/cli/utils/run.py,sha256=-zfOA_SqBOqHXuQIXnZrxhxt7iYOnsUjqZZ1TzVHmUE,9023
+dstack/_internal/cli/utils/run.py,sha256=nCQwAU3VDS8ec2oWNjRKi5xIGdwwKI_YNr8vgGyDPzQ,9202
 dstack/_internal/cli/utils/updates.py,sha256=sAPYYptkFzQnGaRjv7FV7HOj-Be3IXGe63xj-sVEpv4,2566
 dstack/_internal/cli/utils/volume.py,sha256=mU9I06dVMFbpjfkefxrZNoSWadKLoib3U14rHudNQN4,1975
 dstack/_internal/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -71,7 +71,7 @@ dstack/_internal/core/backends/base/offers.py,sha256=AzAAx5eSTaHv8CbWuGERTHS151x
 dstack/_internal/core/backends/cudo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dstack/_internal/core/backends/cudo/api_client.py,sha256=ygq1Gx7ZvwKaifdXtvzDSw4xR4ZH6UWd5J47BjuaGh0,3685
 dstack/_internal/core/backends/cudo/backend.py,sha256=i13YoAkUfIStc3Yyyt_3YmL30eVrKtrhwnE9_B1iBRI,546
-dstack/_internal/core/backends/cudo/compute.py,sha256=xtA09zvcM6xpp6YdHK6W20GcXA4zOfuHnU0tbODUo14,6466
+dstack/_internal/core/backends/cudo/compute.py,sha256=wGMdH4Me-IHuQ-U1_XiuOqtHT86AgHyofUi449eqijo,6466
 dstack/_internal/core/backends/cudo/configurator.py,sha256=pkAT1MtL6_yYvYoqCglvPE-DiUdL8-XEviyN1yUSYyw,2056
 dstack/_internal/core/backends/cudo/models.py,sha256=6sfEqY2hvTpIACkyT4mhD3D8K5TsW_pupys9nqtrgoI,1055
 dstack/_internal/core/backends/datacrunch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -109,8 +109,8 @@ dstack/_internal/core/backends/nebius/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
 dstack/_internal/core/backends/nebius/backend.py,sha256=2XqZIbSR8VzlfOnuVklXlDxNmwAkQj7txQN8VXF1j2E,566
 dstack/_internal/core/backends/nebius/compute.py,sha256=OUNvDk9rU13NR0CADFqn7nQL8kxgSvL7kbNEV4cLzyQ,14761
 dstack/_internal/core/backends/nebius/configurator.py,sha256=ML2KCD6Ddxc2f6X1juxqKulUcOjF6uJk20_0Teyi65A,3072
-dstack/_internal/core/backends/nebius/fabrics.py,sha256=09eXtzSWpK7Oxv4N1pOmXzBJgbi2d5yC6gvcizHhplg,1476
-dstack/_internal/core/backends/nebius/models.py,sha256=-qM-F_c2Hf4ZL5AXmtQiGA5q1PyGsCCPhmRFHEyx2dw,4253
+dstack/_internal/core/backends/nebius/fabrics.py,sha256=jC7ngUO54rXbyXI4hkl5_9GdBk7h4Ivyh88CH4S37ds,1546
+dstack/_internal/core/backends/nebius/models.py,sha256=UudYX32p-ZY-GWR83VEtY5dpZBaWhKXQIfn2nrBCq-4,4245
 dstack/_internal/core/backends/nebius/resources.py,sha256=hx_VqiaurGO0MYT2KEvMl9EYdcglBRQsWSY5kHKjR00,12163
 dstack/_internal/core/backends/oci/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dstack/_internal/core/backends/oci/auth.py,sha256=8Cr18y_LOsyRP-16yfFpT70Cofpm0clB3KawS_7aRl4,717
@@ -165,8 +165,8 @@ dstack/_internal/core/models/metrics.py,sha256=Xb8hCXUL-ncQ3PMsErIUAJTe9gwh5jyrQ
 dstack/_internal/core/models/placement.py,sha256=WJVq5ENJykyRarQzL2EeYQag_9_jV7VSAtR_xoFvPVM,720
 dstack/_internal/core/models/profiles.py,sha256=seeysTuMv1vVUmpHAZgrMUGcbMtH7hSMFIvfx0Qk__0,10406
 dstack/_internal/core/models/projects.py,sha256=H5ZZRiyUEKifpTFAhl45KBi5ly7ooE0WmI329myK360,643
-dstack/_internal/core/models/resources.py,sha256=fDAcbXNYQHb9KopFVMLNXftC9OsJaOFiKP1cIrPDnps,14121
-dstack/_internal/core/models/runs.py,sha256=OMT69BfUUiaxRNUjTCZbWahHCTVBXXUF9jaYa6xgH38,18531
+dstack/_internal/core/models/resources.py,sha256=rsf6hAhi5bfSb_Z9VcS3UoEG0G8Ohl6ekyrOStLOAqw,14114
+dstack/_internal/core/models/runs.py,sha256=Xkv1kY68JA0eJUeCVJjM9YWRkfy6P1RGXslBIMtox2E,18985
 dstack/_internal/core/models/secrets.py,sha256=IQyemsNpSzqOCB-VlVTuc4gyPFmXXO4mhko0Ur0ey3I,221
 dstack/_internal/core/models/server.py,sha256=Hkc1v2s3KOiwslsWVmhUOAzcSeREoG-HD1SzSX9WUGg,152
 dstack/_internal/core/models/services.py,sha256=2Hpi7j0Q1shaf_0wd0C0044AJAmuYi-D3qx3PH849oI,3076
@@ -250,18 +250,18 @@ dstack/_internal/server/app.py,sha256=K2NojwUKdktdenrR61I21kXIMX6ars6zB9v6Ea-evz
 dstack/_internal/server/db.py,sha256=WjuqmjG3QAZmSMCeUaJ_ynbowlHuNAvYCZO649cTPHc,3210
 dstack/_internal/server/deps.py,sha256=31e8SU_ogPJWHIDLkgl7cuC_5V91xbJoLyAj17VanfM,670
 dstack/_internal/server/main.py,sha256=kztKhCYNoHSDyJJQScWfZXE0naNleJOCQULW6dd8SGw,109
-dstack/_internal/server/models.py,sha256=S0L7G_3q6akytdCZ1svukzCSY0kdwnGplbHwTAUBpwo,29351
-dstack/_internal/server/settings.py,sha256=1iqXWgvvsr19jXX1javGdPj6UhOfOHuuXSXgmGtjO2A,3335
+dstack/_internal/server/models.py,sha256=GWl78Zl-_w1UyW9nB6DDS95Ko_osbLQtLb2DIi1JDLo,29633
+dstack/_internal/server/settings.py,sha256=XkLexvylNbU3iRM0KHnTX0fywLGczBHya5lVmeptbqY,4123
 dstack/_internal/server/background/__init__.py,sha256=8kTbhEHCeXTibsOlHY1HwqIO6gGb4q8fUa2fcDrah1c,3893
 dstack/_internal/server/background/tasks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dstack/_internal/server/background/tasks/common.py,sha256=N7xSXbf2MoBWgbJ1e3AEzYBTf1Gn-pDXYND8Zr_YCJQ,970
 dstack/_internal/server/background/tasks/process_fleets.py,sha256=LsD3I7iCbQs_nexJ1kfnn1VPz3BffFmALRgpk5DHGO4,2985
 dstack/_internal/server/background/tasks/process_gateways.py,sha256=hoUI1CSqbHt_uMwnzTRAEDl-LBw0wUk_W4xobIbdvRc,7017
 dstack/_internal/server/background/tasks/process_instances.py,sha256=E1NLac4ubiF6jUr9M7rj8cjQf4zFZCBVr428UBgFeGI,42855
-dstack/_internal/server/background/tasks/process_metrics.py,sha256=acySfsacpYbTPV9Yivs-oU37z1S2sUdWhRHdJkfBcCA,5332
+dstack/_internal/server/background/tasks/process_metrics.py,sha256=IDGyVQtGLua_NoY8sLv0RH18iV_3-LUONj6cEI181QM,6136
 dstack/_internal/server/background/tasks/process_placement_groups.py,sha256=gJ8Um3Vx-brazHVWdtVXPnov4rwvDr-0Vn1Voq1cYBQ,4108
 dstack/_internal/server/background/tasks/process_prometheus_metrics.py,sha256=9VoGFqdiXcVkCi_NV1VqQdqllDkB0bHIqOHKMIZK1Fg,5183
-dstack/_internal/server/background/tasks/process_running_jobs.py,sha256=U6JdkEnpIApbiSRLKxqjNwA9WFAZY2zZNXujofhUd_g,34719
+dstack/_internal/server/background/tasks/process_running_jobs.py,sha256=jyuYOQfXIVGu5ugTennwSGXL_c6VDNJN96-Qrz7Hgic,36441
 dstack/_internal/server/background/tasks/process_runs.py,sha256=EI1W6HUyB-og3g8BDP_GsBrJjQ-Z3JvZHTuJf7CRKRM,17974
 dstack/_internal/server/background/tasks/process_submitted_jobs.py,sha256=-XOApBgmn9ZyCoeXgnbp6cnsFT3uxE_-xqLtn1ez5dc,26603
 dstack/_internal/server/background/tasks/process_terminating_jobs.py,sha256=0Z3Q409RwSxOL_pgK8JktBthjtESEUH3ahwTLsTdYPk,3800
@@ -277,6 +277,7 @@ dstack/_internal/server/migrations/versions/14f2cb002fc2_add_jobmodel_removed_fl
 dstack/_internal/server/migrations/versions/1a48dfe44a40_rework_termination_handling.py,sha256=sqYOR7ZoUifmRrZz2DBaO-D_Pgu20nup15yihg1FBcM,1417
 dstack/_internal/server/migrations/versions/1e3fb39ef74b_add_remote_connection_details.py,sha256=x4FdfUD4XI7nxcppnw5juFKksusYMA4EXNxs0UEETFE,649
 dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py,sha256=4-H_mGGSD6tI7H0HQ-pBs5wixMKdDro6KtLdH_QId28,831
+dstack/_internal/server/migrations/versions/20166748b60c_add_jobmodel_disconnected_at.py,sha256=Vj7VPo67pAy6gHztSrbp9HmzAz0ckROeV6HxDfvRwOw,3085
 dstack/_internal/server/migrations/versions/23e01c56279a_make_blob_nullable.py,sha256=O5ZrwAXs1ubPrChyItCbuEeAPrlRF_ys1nVw4knAO5g,845
 dstack/_internal/server/migrations/versions/252d3743b641_.py,sha256=z3mMF3YCEg6ueoj746cDNBNlQSimmBOcVLNupOv2UuU,1246
 dstack/_internal/server/migrations/versions/27d3e55759fa_add_pools.py,sha256=yKEQ1OdPZSaO8YxRd986mJjNc9CjeO6SqY5SmL6aGfE,5433
@@ -296,6 +297,7 @@ dstack/_internal/server/migrations/versions/5ec538b70e71_replace_instansestatus.
 dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py,sha256=PKqFqe6x6bkO8hrv73fes4uyBpzsemqwp3c-i5XzaS8,1195
 dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py,sha256=UyNT3d8Osa2MpuODQRdmmbHlt-qT-wHNNOUTzUEcLLI,2512
 dstack/_internal/server/migrations/versions/686fb8341ea5_add_user_emails.py,sha256=0FlRf5Mis9ZIc7X6M9yqPtyFqR2SKCTUFfZeDbqAXBU,809
+dstack/_internal/server/migrations/versions/6c1a9d6530ee_add_jobmodel_exit_status.py,sha256=aYZIGWhM38hZ6SkrUsFESOmOIVhSAIbA9dbyFKsLUr8,615
 dstack/_internal/server/migrations/versions/710e5b3fac8f_add_encryption.py,sha256=nBURp4A6TpT13H1ccH4WyzkU2GGy3uDGKCNG88cAciw,1827
 dstack/_internal/server/migrations/versions/7b24b1c8eba7_add_instancemodel_last_processed_at.py,sha256=o1A8nzrmMFcivBzaIMemCtRfCZ9mq1IuBko1CJXoVOo,2124
 dstack/_internal/server/migrations/versions/7ba3b59d7ca6_add_runmodel_resubmission_attempt.py,sha256=FUvCRzOzkp8HjRYy0-kuOwvBGbwuCgyjXU7hD-AWRJs,1045
@@ -358,7 +360,7 @@ dstack/_internal/server/schemas/instances.py,sha256=MedAVbKa_1F9zhdSPrjMmL-Og5Dp
 dstack/_internal/server/schemas/logs.py,sha256=JGt39fBEFRjHhlGT1jIC6kwQhujxPO8uecjplzjTZXY,402
 dstack/_internal/server/schemas/projects.py,sha256=UmHtX0pkr3L_vPsosvRC9JneqBrYaJvVKd4OxhYySHc,566
 dstack/_internal/server/schemas/repos.py,sha256=Sit0Tqol79VOMGGp8ncZXLLsZ4INcF-pAA_jwRchYqA,666
-dstack/_internal/server/schemas/runner.py,sha256=L9cG4n8bt_wJQhk9iWeIHEOlOPuoE0aDXlMEfPNZX7s,4424
+dstack/_internal/server/schemas/runner.py,sha256=CqlP90yvPKaavAqxLOxewIMq3cHeT_NWfR0YO81JfU0,5151
 dstack/_internal/server/schemas/runs.py,sha256=XhlTnn67g4NWVmIJFQdy2yPK_EcnSIYOCCSex0XOIes,1341
 dstack/_internal/server/schemas/secrets.py,sha256=mfqLSM7PqxVQ-GIWB6RfPRUOvSvvaRv-JxXAYxZ6dyY,373
 dstack/_internal/server/schemas/users.py,sha256=FuDqwRVe3mOmv497vOZKjI0a_d4Wt2g4ZiCJcyfHEKA,495
@@ -381,7 +383,7 @@ dstack/_internal/server/services/projects.py,sha256=Je1iWZ-ArmyFxK1yMUzod5WRXyiI
 dstack/_internal/server/services/prometheus.py,sha256=xq5G-Q2BJup9lS2F6__0wUVTs-k1Gr3dYclGzo2WoWo,12474
 dstack/_internal/server/services/repos.py,sha256=f9ztN7jz_2gvD9hXF5sJwWDVyG2-NHRfjIdSukowPh8,9342
 dstack/_internal/server/services/resources.py,sha256=VRFOih_cMJdc0c2m9nSGsX8vWAJQV3M6N87aqS_JXfw,699
-dstack/_internal/server/services/runs.py,sha256=Wcvz65TYtWv2YWeSseNvKSnOD85roADh3N8UToHP3nc,38543
+dstack/_internal/server/services/runs.py,sha256=K3rqzfqkUY9dhTO-2W9_PGH8gCdYhCJK-S-gbM5drx4,38559
 dstack/_internal/server/services/storage.py,sha256=6I0xI_3_RpJNbKZwHjDnjrEwXGdHfiaeb5li15T-M1I,1884
 dstack/_internal/server/services/users.py,sha256=W-5xL7zsHNjeG7BBK54RWGvIrBOrw-FF0NcG_z9qhoE,7466
 dstack/_internal/server/services/volumes.py,sha256=vfKY6eZp64I58Mfdvrk9Wig7deveD2Rw4ET1cbc1Sog,16238
@@ -396,7 +398,7 @@ dstack/_internal/server/services/gateways/__init__.py,sha256=Up8uFsEQDBE0yOXn7n5
 dstack/_internal/server/services/gateways/client.py,sha256=XIJX3fGBbZ_AG8qZMTSE8KAB_ojq5YJFa0OXoD_dofg,7493
 dstack/_internal/server/services/gateways/connection.py,sha256=ot3lV85XdmCT45vBWeyj57nLPcLPNm316zu3jMyeWjA,5625
 dstack/_internal/server/services/gateways/pool.py,sha256=0LclTl1tyx-doS78LeaAKjr-SMp98zuwh5f9s06JSd0,1914
-dstack/_internal/server/services/jobs/__init__.py,sha256=XR23KU--yX4sLszLoCCQDPKLw9cKgrHdkI9SgIHvwHY,25535
+dstack/_internal/server/services/jobs/__init__.py,sha256=GU3vMC0SZKyvL564A7t_QRoDjf83-8GsUkguDWK5x6c,25578
 dstack/_internal/server/services/jobs/configurators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dstack/_internal/server/services/jobs/configurators/base.py,sha256=QSIU1OoZ794HKdwjo5iXxGUvFE8a2-g_SoYjAQjXhcI,11354
 dstack/_internal/server/services/jobs/configurators/dev.py,sha256=ufN6Sd8TwIsjQYNZE32fkAqJI7o2zjgoZThbrP-bd7U,2378
@@ -420,7 +422,7 @@ dstack/_internal/server/services/proxy/routers/service_proxy.py,sha256=5oB-SX8f_
 dstack/_internal/server/services/proxy/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dstack/_internal/server/services/proxy/services/service_proxy.py,sha256=4JrSxHqhBYqU1oENii89Db-bzkFWExYrOy-0mNEhWBs,4879
 dstack/_internal/server/services/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dstack/_internal/server/services/runner/client.py,sha256=bEA_1NvOJpznSuoVfVfvIJJ03zzAHj_lXRUbA82SbPY,15503
+dstack/_internal/server/services/runner/client.py,sha256=jQDxv_Yaiwr2e3m1LqPCFtToB_GrsC2yVQfgXzyn6g8,15586
 dstack/_internal/server/services/runner/ssh.py,sha256=H-X0015ZPwYq5tc31ytFF1uNaUAr9itAsABI2oPJWrk,5017
 dstack/_internal/server/services/services/__init__.py,sha256=HQz72SNN8W9gUQ5INyO_Wd8TR9j3V6qoHFGEDEI920w,10862
 dstack/_internal/server/services/services/autoscalers.py,sha256=0o_w9La-ex_P3VKG88w_XN3hkLkzryv5l1cH3pkZyAE,4315
@@ -531,7 +533,7 @@ dstack/_internal/server/statics/static/media/logo.f602feeb138844eda97c8cb6414614
 dstack/_internal/server/statics/static/media/okta.12f178e6873a1100965f2a4dbd18fcec.svg,sha256=KqFI05gQM135zC1plF1WBRF2F7CyKL7km97WKsZjAHI,319
 dstack/_internal/server/statics/static/media/theme.3994c817bb7dda191c1c9640dee0bf42.svg,sha256=ZxFFBVZWuRLqmWH4zhwGLNtKjOzHj-5MGJRunFAtu1I,561
 dstack/_internal/server/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-dstack/_internal/server/testing/common.py,sha256=gsSjGb6c7Kp-f345srXxca0EuUL-TV_So7irLr_iss8,31391
+dstack/_internal/server/testing/common.py,sha256=uzmF9_xsiHkb8l8adljuYSpAMEH4hmRZAbv6-96rN58,31480
 dstack/_internal/server/testing/conf.py,sha256=-zhujfFjTHNfQDOK-hBck32By11c_kC0OeinB3esQGg,1902
 dstack/_internal/server/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dstack/_internal/server/utils/common.py,sha256=PbjXtqYy1taKXpyG5ys8cIrz9MXqc9dBAsR_9D1brrk,1414
@@ -570,7 +572,7 @@ dstack/api/server/_logs.py,sha256=ng8QvFAIaoVOVChTK6Wuu5BeM6y7gAdx30KEYRsn9xA,50
 dstack/api/server/_metrics.py,sha256=OPb8sLhI_U605sHOPrELgy0_6cNFLJVfpvr-qkEukRM,670
 dstack/api/server/_projects.py,sha256=g6kNSU6jer8u7Kaut1I0Ft4wRMLBBCQShJf3fOB63hQ,1440
 dstack/api/server/_repos.py,sha256=bqsKuZWyiNLE8UAdSZrYNtk1J3Gu5MXXnTMIoM9jxpI,1770
-dstack/api/server/_runs.py,sha256=qxKlHcW73HRglW5iogz5FYPtd85zb0vu2uKBuZx8BIc,7662
+dstack/api/server/_runs.py,sha256=uVTY57BlBvB86pkKNjUh-Nc5AYNmfH9kfBNbgzTnTyw,7914
 dstack/api/server/_secrets.py,sha256=VqLfrIcmBJtPxNDRkXTG44H5SWoY788YJapScUukvdY,1576
 dstack/api/server/_users.py,sha256=XzhgGKc5Tsr0-xkz3T6rGyWZ1tO7aYNhLux2eE7dAoY,1738
 dstack/api/server/_volumes.py,sha256=xxOt8o5G-bhMh6wSvF4BDFNoqVEhlM4BXQr2KvX0pN0,1937
@@ -580,8 +582,8 @@ dstack/plugins/__init__.py,sha256=buT1pcyORLgVbl89ATkRWJPhvejriVz7sNBjvuZRCRE,40
 dstack/plugins/_base.py,sha256=-etiB-EozaJCg2wtmONfj8ic-K03qXvXyl_TIDp-kNE,2662
 dstack/plugins/_models.py,sha256=1Gw--mDQ1_0FFr9Zur9LE8UbMoWESUpTdHHt12AyIZo,341
 dstack/plugins/_utils.py,sha256=FqeWYb7zOrgZkO9Bd8caL5I81_TUEsysIzvxsULrmzk,392
-dstack-0.19.8.dist-info/METADATA,sha256=OZnwMM_G_MuMYJbZ2StSOZnkeV2pbEPRE4_qWv8jFoQ,20150
-dstack-0.19.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-dstack-0.19.8.dist-info/entry_points.txt,sha256=GnLrMS8hx3rWAySQjA7tPNhtixV6a-brRkmal1PKoHc,58
-dstack-0.19.8.dist-info/licenses/LICENSE.md,sha256=qDABaRGjSKVOib1U8viw2P_96sIK7Puo426784oD9f8,15976
-dstack-0.19.8.dist-info/RECORD,,
+dstack-0.19.9.dist-info/METADATA,sha256=0gv_xHbluxlydceXCwjWo2m-CjyWGjNiR4gNpBKOpE0,20254
+dstack-0.19.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+dstack-0.19.9.dist-info/entry_points.txt,sha256=GnLrMS8hx3rWAySQjA7tPNhtixV6a-brRkmal1PKoHc,58
+dstack-0.19.9.dist-info/licenses/LICENSE.md,sha256=qDABaRGjSKVOib1U8viw2P_96sIK7Puo426784oD9f8,15976
+dstack-0.19.9.dist-info/RECORD,,

{dstack-0.19.8.dist-info → dstack-0.19.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{dstack-0.19.8.dist-info → dstack-0.19.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dstack-0.19.8.dist-info → dstack-0.19.9.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

dstack 0.19.8__py3-none-any.whl → 0.19.9__py3-none-any.whl

Potentially problematic release.

dstack 0.19.8py3-none-any.whl → 0.19.9py3-none-any.whl