PyPI - dstack - Versions diffs - 0.19.8__py3-none-any.whl → 0.19.10__py3-none-any.whl - Mend

dstack 0.19.8py3-none-any.whl → 0.19.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (42) hide show

dstack/_internal/server/background/tasks/process_submitted_jobs.py CHANGED Viewed

@@ -93,11 +93,20 @@ async def _process_next_submitted_job():
         async with lock:
             res = await session.execute(
                 select(JobModel)
+                .join(JobModel.run)
                 .where(
                     JobModel.status == JobStatus.SUBMITTED,
                     JobModel.id.not_in(lockset),
                 )
-                .order_by(JobModel.last_processed_at.asc())
+                # Jobs are process in FIFO sorted by priority globally,
+                # thus runs from different project can "overtake" each other by using higher priorities.
+                # That's not a big problem as long as projects do not compete for the same compute resources.
+                # Jobs with lower priorities from other projects will be processed without major lag
+                # as long as new higher priority runs are not constantly submitted.
+                # TODO: Consider processing jobs from different projects fairly/round-robin
+                # Fully fair processing can be tricky to implement via the current DB queue as
+                # there can be many projects and we are limited by the max DB connections.
+                .order_by(RunModel.priority.desc(), JobModel.last_processed_at.asc())
                 .limit(1)
                 .with_for_update(skip_locked=True)
             )
@@ -360,16 +369,16 @@ async def _assign_job_to_pool_instance(
         (instance, common_utils.get_or_error(get_instance_offer(instance)))
         for instance in nonshared_instances
     ]
-    if not multinode:
-        shared_instances_with_offers = get_shared_pool_instances_with_offers(
-            pool_instances=pool_instances,
-            profile=profile,
-            requirements=job.job_spec.requirements,
-            idle_only=True,
-            fleet_model=fleet_model,
-            volumes=volumes,
-        )
-        instances_with_offers.extend(shared_instances_with_offers)
+    shared_instances_with_offers = get_shared_pool_instances_with_offers(
+        pool_instances=pool_instances,
+        profile=profile,
+        requirements=job.job_spec.requirements,
+        idle_only=True,
+        fleet_model=fleet_model,
+        multinode=multinode,
+        volumes=volumes,
+    )
+    instances_with_offers.extend(shared_instances_with_offers)
     if len(instances_with_offers) == 0:
         return None
@@ -572,7 +581,7 @@ def _create_instance_model_for_job(
 def _prepare_job_runtime_data(offer: InstanceOfferWithAvailability) -> JobRuntimeData:
-    if offer.total_blocks == 1:
+    if offer.blocks == offer.total_blocks:
         if env_utils.get_bool("DSTACK_FORCE_BRIDGE_NETWORK"):
             network_mode = NetworkMode.BRIDGE
         else:

dstack/_internal/server/migrations/versions/20166748b60c_add_jobmodel_disconnected_at.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Add JobModel.disconnected_at
+Revision ID: 20166748b60c
+Revises: 6c1a9d6530ee
+Create Date: 2025-05-13 16:24:32.496578
+"""
+import sqlalchemy as sa
+from alembic import op
+from alembic_postgresql_enum import TableReference
+import dstack._internal.server.models
+# revision identifiers, used by Alembic.
+revision = "20166748b60c"
+down_revision = "6c1a9d6530ee"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column(
+                "disconnected_at", dstack._internal.server.models.NaiveDateTime(), nullable=True
+            )
+        )
+    op.sync_enum_values(
+        enum_schema="public",
+        enum_name="jobterminationreason",
+        new_values=[
+            "FAILED_TO_START_DUE_TO_NO_CAPACITY",
+            "INTERRUPTED_BY_NO_CAPACITY",
+            "INSTANCE_UNREACHABLE",
+            "WAITING_INSTANCE_LIMIT_EXCEEDED",
+            "WAITING_RUNNER_LIMIT_EXCEEDED",
+            "TERMINATED_BY_USER",
+            "VOLUME_ERROR",
+            "GATEWAY_ERROR",
+            "SCALED_DOWN",
+            "DONE_BY_RUNNER",
+            "ABORTED_BY_USER",
+            "TERMINATED_BY_SERVER",
+            "INACTIVITY_DURATION_EXCEEDED",
+            "TERMINATED_DUE_TO_UTILIZATION_POLICY",
+            "CONTAINER_EXITED_WITH_ERROR",
+            "PORTS_BINDING_FAILED",
+            "CREATING_CONTAINER_ERROR",
+            "EXECUTOR_ERROR",
+            "MAX_DURATION_EXCEEDED",
+        ],
+        affected_columns=[
+            TableReference(
+                table_schema="public", table_name="jobs", column_name="termination_reason"
+            )
+        ],
+        enum_values_to_rename=[],
+    )
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.sync_enum_values(
+        enum_schema="public",
+        enum_name="jobterminationreason",
+        new_values=[
+            "FAILED_TO_START_DUE_TO_NO_CAPACITY",
+            "INTERRUPTED_BY_NO_CAPACITY",
+            "WAITING_INSTANCE_LIMIT_EXCEEDED",
+            "WAITING_RUNNER_LIMIT_EXCEEDED",
+            "TERMINATED_BY_USER",
+            "VOLUME_ERROR",
+            "GATEWAY_ERROR",
+            "SCALED_DOWN",
+            "DONE_BY_RUNNER",
+            "ABORTED_BY_USER",
+            "TERMINATED_BY_SERVER",
+            "INACTIVITY_DURATION_EXCEEDED",
+            "TERMINATED_DUE_TO_UTILIZATION_POLICY",
+            "CONTAINER_EXITED_WITH_ERROR",
+            "PORTS_BINDING_FAILED",
+            "CREATING_CONTAINER_ERROR",
+            "EXECUTOR_ERROR",
+            "MAX_DURATION_EXCEEDED",
+        ],
+        affected_columns=[
+            TableReference(
+                table_schema="public", table_name="jobs", column_name="termination_reason"
+            )
+        ],
+        enum_values_to_rename=[],
+    )
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.drop_column("disconnected_at")
+    # ### end Alembic commands ###

dstack/_internal/server/migrations/versions/6c1a9d6530ee_add_jobmodel_exit_status.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""Add JobModel.exit_status
+Revision ID: 6c1a9d6530ee
+Revises: 7ba3b59d7ca6
+Create Date: 2025-05-09 10:25:19.715852
+"""
+import sqlalchemy as sa
+from alembic import op
+# revision identifiers, used by Alembic.
+revision = "6c1a9d6530ee"
+down_revision = "7ba3b59d7ca6"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("exit_status", sa.Integer(), nullable=True))
+def downgrade() -> None:
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.drop_column("exit_status")

dstack/_internal/server/migrations/versions/bca2fdf130bf_add_runmodel_priority.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Add RunModel.priority
+Revision ID: bca2fdf130bf
+Revises: 20166748b60c
+Create Date: 2025-05-14 15:24:21.269775
+"""
+import sqlalchemy as sa
+from alembic import op
+# revision identifiers, used by Alembic.
+revision = "bca2fdf130bf"
+down_revision = "20166748b60c"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("priority", sa.Integer(), nullable=True))
+    batch_op.execute("UPDATE runs SET priority = 0")
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.alter_column("priority", nullable=False)
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.drop_column("priority")
+    # ### end Alembic commands ###

dstack/_internal/server/models.py CHANGED Viewed

@@ -348,6 +348,7 @@ class RunModel(BaseModel):
     resubmission_attempt: Mapped[int] = mapped_column(Integer, default=0)
     run_spec: Mapped[str] = mapped_column(Text)
     service_spec: Mapped[Optional[str]] = mapped_column(Text)
+    priority: Mapped[int] = mapped_column(Integer, default=0)
     jobs: Mapped[List["JobModel"]] = relationship(
         back_populates="run", lazy="selectin", order_by="[JobModel.replica_num, JobModel.job_num]"
@@ -382,6 +383,10 @@ class JobModel(BaseModel):
         Enum(JobTerminationReason)
     )
     termination_reason_message: Mapped[Optional[str]] = mapped_column(Text)
+    # `disconnected_at` stores the first time of connectivity issues with the instance.
+    # Resets every time connectivity is restored.
+    disconnected_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
+    exit_status: Mapped[Optional[int]] = mapped_column(Integer)
     job_spec_data: Mapped[str] = mapped_column(Text)
     job_provisioning_data: Mapped[Optional[str]] = mapped_column(Text)
     runner_timestamp: Mapped[Optional[int]] = mapped_column(BigInteger)
@@ -390,7 +395,7 @@ class JobModel(BaseModel):
     remove_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     volumes_detached_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     # `instance_assigned` means instance assignment was done.
-    # if `instance_assigned` is True and `instance` is None, no instance was assiged.
+    # if `instance_assigned` is True and `instance` is None, no instance was assigned.
     instance_assigned: Mapped[bool] = mapped_column(Boolean, default=False)
     instance_id: Mapped[Optional[uuid.UUID]] = mapped_column(
         ForeignKey("instances.id", ondelete="CASCADE")

dstack/_internal/server/routers/repos.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List, Tuple
 from fastapi import APIRouter, Depends, Request, UploadFile
+from humanize import naturalsize
 from sqlalchemy.ext.asyncio import AsyncSession
 from dstack._internal.core.errors import ResourceNotExistsError, ServerClientError
@@ -14,9 +15,10 @@ from dstack._internal.server.schemas.repos import (
 )
 from dstack._internal.server.security.permissions import ProjectMember
 from dstack._internal.server.services import repos
+from dstack._internal.server.settings import SERVER_CODE_UPLOAD_LIMIT
 from dstack._internal.server.utils.routers import (
     get_base_api_additional_responses,
-    request_size_exceeded,
+    get_request_size,
 )
 router = APIRouter(
@@ -94,10 +96,12 @@ async def upload_code(
     session: AsyncSession = Depends(get_session),
     user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectMember()),
 ):
-    if request_size_exceeded(request, limit=2 * 2**20):
+    request_size = get_request_size(request)
+    if SERVER_CODE_UPLOAD_LIMIT > 0 and request_size > SERVER_CODE_UPLOAD_LIMIT:
         raise ServerClientError(
-            "Repo diff size exceeds the limit of 2MB. "
-            "Use .gitignore to exclude large files from the repo."
+            f"Repo diff size is {naturalsize(request_size)}, which exceeds the limit of "
+            f"{naturalsize(SERVER_CODE_UPLOAD_LIMIT)}. Use .gitignore to exclude large files from the repo. This "
+            f"limit can be modified by setting the DSTACK_SERVER_CODE_UPLOAD_LIMIT_BYTES environment variable"
         )
     _, project = user_project
     await repos.upload_code(

dstack/_internal/server/schemas/runner.py CHANGED Viewed

@@ -7,7 +7,14 @@ from typing_extensions import Annotated
 from dstack._internal.core.models.common import CoreModel, NetworkMode
 from dstack._internal.core.models.repos.remote import RemoteRepoCreds
-from dstack._internal.core.models.runs import ClusterInfo, JobSpec, JobStatus, RunSpec
+from dstack._internal.core.models.runs import (
+    ClusterInfo,
+    JobSpec,
+    JobStatus,
+    JobSubmission,
+    Run,
+    RunSpec,
+)
 from dstack._internal.core.models.volumes import InstanceMountPoint, VolumeMountPoint
@@ -16,6 +23,7 @@ class JobStateEvent(CoreModel):
     state: JobStatus
     termination_reason: Optional[str] = None
     termination_message: Optional[str] = None
+    exit_status: Optional[int] = None
 class LogEvent(CoreModel):
@@ -38,15 +46,18 @@ class PullResponse(CoreModel):
 class SubmitBody(CoreModel):
-    run_spec: Annotated[
-        RunSpec,
+    run: Annotated[
+        Run,
         Field(
             include={
-                "run_name",
-                "repo_id",
-                "repo_data",
-                "configuration",
-                "configuration_path",
+                "id": True,
+                "run_spec": {
+                    "run_name",
+                    "repo_id",
+                    "repo_data",
+                    "configuration",
+                    "configuration_path",
+                },
             }
         ),
     ]
@@ -69,9 +80,31 @@ class SubmitBody(CoreModel):
             }
         ),
     ]
+    job_submission: Annotated[
+        JobSubmission,
+        Field(
+            include={
+                "id",
+            }
+        ),
+    ]
     cluster_info: Annotated[Optional[ClusterInfo], Field(include=True)]
     secrets: Annotated[Optional[Dict[str, str]], Field(include=True)]
     repo_credentials: Annotated[Optional[RemoteRepoCreds], Field(include=True)]
+    # run_spec is deprecated in favor of run.run_spec
+    # TODO: Remove once we no longer support instances deployed with 0.19.8 or earlier.
+    run_spec: Annotated[
+        RunSpec,
+        Field(
+            include={
+                "run_name",
+                "repo_id",
+                "repo_data",
+                "configuration",
+                "configuration_path",
+            },
+        ),
+    ]
 class HealthcheckResponse(CoreModel):

dstack/_internal/server/services/instances.py CHANGED Viewed

@@ -235,6 +235,7 @@ def get_shared_pool_instances_with_offers(
     *,
     idle_only: bool = False,
     fleet_model: Optional[FleetModel] = None,
+    multinode: bool = False,
     volumes: Optional[List[List[Volume]]] = None,
 ) -> list[tuple[InstanceModel, InstanceOfferWithAvailability]]:
     instances_with_offers: list[tuple[InstanceModel, InstanceOfferWithAvailability]] = []
@@ -243,19 +244,22 @@ def get_shared_pool_instances_with_offers(
         pool_instances=pool_instances,
         profile=profile,
         fleet_model=fleet_model,
-        multinode=False,
+        multinode=multinode,
         volumes=volumes,
         shared=True,
     )
     for instance in filtered_instances:
         if idle_only and instance.status not in [InstanceStatus.IDLE, InstanceStatus.BUSY]:
             continue
+        if multinode and instance.busy_blocks > 0:
+            continue
         offer = get_instance_offer(instance)
         if offer is None:
             continue
         total_blocks = common_utils.get_or_error(instance.total_blocks)
         idle_blocks = total_blocks - instance.busy_blocks
-        for blocks in range(1, total_blocks + 1):
+        min_blocks = total_blocks if multinode else 1
+        for blocks in range(min_blocks, total_blocks + 1):
             shared_offer = generate_shared_offer(offer, blocks, total_blocks)
             catalog_item = offer_to_catalog_item(shared_offer)
             if gpuhunt.matches(catalog_item, query_filter):

dstack/_internal/server/services/jobs/__init__.py CHANGED Viewed

@@ -135,6 +135,7 @@ def job_model_to_job_submission(job_model: JobModel) -> JobSubmission:
         status=job_model.status,
         termination_reason=job_model.termination_reason,
         termination_reason_message=job_model.termination_reason_message,
+        exit_status=job_model.exit_status,
         job_provisioning_data=job_provisioning_data,
         job_runtime_data=get_job_runtime_data(job_model),
     )

dstack/_internal/server/services/jobs/configurators/base.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Union
 from cachetools import TTLCache, cached
-import dstack.version as version
+from dstack._internal import settings
 from dstack._internal.core.errors import DockerRegistryError, ServerClientError
 from dstack._internal.core.models.common import RegistryAuth
 from dstack._internal.core.models.configurations import (
@@ -53,14 +53,14 @@ def get_default_image(python_version: str, nvcc: bool = False) -> str:
     suffix = ""
     if nvcc:
         suffix = "-devel"
-    return f"dstackai/base:py{python_version}-{version.base_image}-cuda-12.1{suffix}"
+    return f"{settings.DSTACK_BASE_IMAGE}:py{python_version}-{settings.DSTACK_BASE_IMAGE_VERSION}-cuda-12.1{suffix}"
 class JobConfigurator(ABC):
     TYPE: RunConfigurationType
     _image_config: Optional[ImageConfig] = None
-    # JobSSHKey should be shared for all jobs in a replica for inter-node communitation.
+    # JobSSHKey should be shared for all jobs in a replica for inter-node communication.
     _job_ssh_key: Optional[JobSSHKey] = None
     def __init__(self, run_spec: RunSpec):

dstack/_internal/server/services/runner/client.py CHANGED Viewed

@@ -12,7 +12,7 @@ from dstack._internal.core.models.common import CoreModel, NetworkMode
 from dstack._internal.core.models.envs import Env
 from dstack._internal.core.models.repos.remote import RemoteRepoCreds
 from dstack._internal.core.models.resources import Memory
-from dstack._internal.core.models.runs import ClusterInfo, JobSpec, RunSpec
+from dstack._internal.core.models.runs import ClusterInfo, Job, Run
 from dstack._internal.core.models.volumes import InstanceMountPoint, Volume, VolumeMountPoint
 from dstack._internal.server.schemas.runner import (
     GPUDevice,
@@ -72,8 +72,8 @@ class RunnerClient:
     def submit_job(
         self,
-        run_spec: RunSpec,
-        job_spec: JobSpec,
+        run: Run,
+        job: Job,
         cluster_info: ClusterInfo,
         secrets: Dict[str, str],
         repo_credentials: Optional[RemoteRepoCreds],
@@ -81,6 +81,7 @@ class RunnerClient:
     ):
         # XXX: This is a quick-and-dirty hack to deliver InstanceModel-specific environment
         # variables to the runner without runner API modification.
+        job_spec = job.job_spec
         if instance_env is not None:
             if isinstance(instance_env, Env):
                 merged_env = instance_env.as_dict()
@@ -90,11 +91,13 @@ class RunnerClient:
             job_spec = job_spec.copy(deep=True)
             job_spec.env = merged_env
         body = SubmitBody(
-            run_spec=run_spec,
+            run=run,
             job_spec=job_spec,
+            job_submission=job.job_submissions[-1],
             cluster_info=cluster_info,
             secrets=secrets,
             repo_credentials=repo_credentials,
+            run_spec=run.run_spec,
         )
         resp = requests.post(
             # use .json() to encode enums

dstack/_internal/server/services/runs.py CHANGED Viewed

@@ -16,7 +16,7 @@ from dstack._internal.core.errors import (
     ServerClientError,
 )
 from dstack._internal.core.models.common import ApplyAction
-from dstack._internal.core.models.configurations import AnyRunConfiguration
+from dstack._internal.core.models.configurations import RUN_PRIORITY_DEFAULT, AnyRunConfiguration
 from dstack._internal.core.models.instances import (
     InstanceAvailability,
     InstanceOfferWithAvailability,
@@ -434,7 +434,12 @@ async def apply_plan(
     # FIXME: potentially long write transaction
     # Avoid getting run_model after update
     await session.execute(
-        update(RunModel).where(RunModel.id == current_resource.id).values(run_spec=run_spec.json())
+        update(RunModel)
+        .where(RunModel.id == current_resource.id)
+        .values(
+            run_spec=run_spec.json(),
+            priority=run_spec.configuration.priority,
+        )
     )
     run = await get_run_by_name(
         session=session,
@@ -495,6 +500,7 @@ async def submit_run(
             status=RunStatus.SUBMITTED,
             run_spec=run_spec.json(),
             last_processed_at=submitted_at,
+            priority=run_spec.configuration.priority,
         )
         session.add(run_model)
@@ -721,15 +727,15 @@ async def _get_pool_offers(
     pool_instances = [i for i in pool_instances if i.id not in detaching_instances_ids]
     multinode = job.job_spec.jobs_per_replica > 1
-    if not multinode:
-        shared_instances_with_offers = get_shared_pool_instances_with_offers(
-            pool_instances=pool_instances,
-            profile=run_spec.merged_profile,
-            requirements=job.job_spec.requirements,
-            volumes=volumes,
-        )
-        for _, offer in shared_instances_with_offers:
-            pool_offers.append(offer)
+    shared_instances_with_offers = get_shared_pool_instances_with_offers(
+        pool_instances=pool_instances,
+        profile=run_spec.merged_profile,
+        requirements=job.job_spec.requirements,
+        volumes=volumes,
+        multinode=multinode,
+    )
+    for _, offer in shared_instances_with_offers:
+        pool_offers.append(offer)
     nonshared_instances = filter_pool_instances(
         pool_instances=pool_instances,
@@ -852,6 +858,13 @@ def _get_job_submission_cost(job_submission: JobSubmission) -> float:
 def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
+    # This function may set defaults for null run_spec values,
+    # although most defaults are resolved when building job_spec
+    # so that we can keep both the original user-supplied value (null in run_spec)
+    # and the default in job_spec.
+    # If a property is stored in job_spec - resolve the default there.
+    # Server defaults are preferable over client defaults so that
+    # the defaults depend on the server version, not the client version.
     if run_spec.run_name is not None:
         validate_dstack_resource_name(run_spec.run_name)
     for mount_point in run_spec.configuration.volumes:
@@ -870,16 +883,19 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
     if (
         run_spec.merged_profile.utilization_policy is not None
         and run_spec.merged_profile.utilization_policy.time_window
-        > settings.SERVER_METRICS_TTL_SECONDS
+        > settings.SERVER_METRICS_RUNNING_TTL_SECONDS
     ):
         raise ServerClientError(
-            f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_TTL_SECONDS}s"
+            f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_RUNNING_TTL_SECONDS}s"
         )
+    if run_spec.configuration.priority is None:
+        run_spec.configuration.priority = RUN_PRIORITY_DEFAULT
     set_resources_defaults(run_spec.configuration.resources)
 _UPDATABLE_SPEC_FIELDS = ["repo_code_hash", "configuration"]
-_CONF_TYPE_TO_UPDATABLE_FIELDS = {
+_CONF_UPDATABLE_FIELDS = ["priority"]
+_TYPE_SPECIFIC_CONF_UPDATABLE_FIELDS = {
     "dev-environment": ["inactivity_duration"],
     # Most service fields can be updated via replica redeployment.
     # TODO: Allow updating other fields when rolling deployment is supported.
@@ -915,12 +931,9 @@ def _check_can_update_configuration(
         raise ServerClientError(
             f"Configuration type changed from {current.type} to {new.type}, cannot update"
         )
-    updatable_fields = _CONF_TYPE_TO_UPDATABLE_FIELDS.get(new.type)
-    if updatable_fields is None:
-        raise ServerClientError(
-            f"Can only update {', '.join(_CONF_TYPE_TO_UPDATABLE_FIELDS)} configurations."
-            f" Not {new.type}"
-        )
+    updatable_fields = _CONF_UPDATABLE_FIELDS + _TYPE_SPECIFIC_CONF_UPDATABLE_FIELDS.get(
+        new.type, []
+    )
     diff = diff_models(current, new)
     changed_fields = list(diff.keys())
     for key in changed_fields:

dstack/_internal/server/settings.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import warnings
 from pathlib import Path
 DSTACK_DIR_PATH = Path("~/.dstack/").expanduser()
@@ -45,7 +46,25 @@ SERVER_CLOUDWATCH_LOG_REGION = os.getenv("DSTACK_SERVER_CLOUDWATCH_LOG_REGION")
 SERVER_GCP_LOGGING_PROJECT = os.getenv("DSTACK_SERVER_GCP_LOGGING_PROJECT")
-SERVER_METRICS_TTL_SECONDS = int(os.getenv("DSTACK_SERVER_METRICS_TTL_SECONDS", 3600))
+SERVER_METRICS_RUNNING_TTL_SECONDS: int
+_SERVER_METRICS_RUNNING_TTL_SECONDS = os.getenv("DSTACK_SERVER_METRICS_RUNNING_TTL_SECONDS")
+if _SERVER_METRICS_RUNNING_TTL_SECONDS is None:
+    _SERVER_METRICS_RUNNING_TTL_SECONDS = os.getenv("DSTACK_SERVER_METRICS_TTL_SECONDS")
+    if _SERVER_METRICS_RUNNING_TTL_SECONDS is not None:
+        warnings.warn(
+            (
+                "DSTACK_SERVER_METRICS_TTL_SECONDS is deprecated,"
+                " use DSTACK_SERVER_METRICS_RUNNING_TTL_SECONDS instead"
+            ),
+            DeprecationWarning,
+        )
+    else:
+        _SERVER_METRICS_RUNNING_TTL_SECONDS = 3600
+SERVER_METRICS_RUNNING_TTL_SECONDS = int(_SERVER_METRICS_RUNNING_TTL_SECONDS)
+del _SERVER_METRICS_RUNNING_TTL_SECONDS
+SERVER_METRICS_FINISHED_TTL_SECONDS = int(
+    os.getenv("DSTACK_SERVER_METRICS_FINISHED_TTL_SECONDS", 7 * 24 * 3600)
+)
 DEFAULT_PROJECT_NAME = "main"
@@ -66,6 +85,7 @@ DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE = int(
 USER_PROJECT_DEFAULT_QUOTA = int(os.getenv("DSTACK_USER_PROJECT_DEFAULT_QUOTA", 10))
 FORBID_SERVICES_WITHOUT_GATEWAY = os.getenv("DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY") is not None
+SERVER_CODE_UPLOAD_LIMIT = int(os.getenv("DSTACK_SERVER_CODE_UPLOAD_LIMIT", 2 * 2**20))
 # Development settings

dstack 0.19.8__py3-none-any.whl → 0.19.10__py3-none-any.whl

Potentially problematic release.

dstack 0.19.8py3-none-any.whl → 0.19.10py3-none-any.whl