PyPI - dstack - Versions diffs - 0.19.20__py3-none-any.whl → 0.19.21__py3-none-any.whl - Mend

dstack 0.19.20py3-none-any.whl → 0.19.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (44) hide show

dstack/_internal/server/services/gateways/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import asyncio
 import datetime
 import uuid
-from datetime import timedelta, timezone
+from datetime import timedelta
 from functools import partial
 from typing import List, Optional, Sequence
@@ -11,16 +11,16 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import selectinload
 import dstack._internal.utils.random_names as random_names
-from dstack._internal.core.backends import (
-    BACKENDS_WITH_GATEWAY_SUPPORT,
-    BACKENDS_WITH_PRIVATE_GATEWAY_SUPPORT,
-)
 from dstack._internal.core.backends.base.compute import (
     Compute,
     ComputeWithGatewaySupport,
     get_dstack_gateway_wheel,
     get_dstack_runner_version,
 )
+from dstack._internal.core.backends.features import (
+    BACKENDS_WITH_GATEWAY_SUPPORT,
+    BACKENDS_WITH_PRIVATE_GATEWAY_SUPPORT,
+)
 from dstack._internal.core.errors import (
     GatewayError,
     ResourceNotExistsError,
@@ -86,15 +86,6 @@ async def get_gateway_by_name(
     return gateway_model_to_gateway(gateway)
-async def get_project_default_gateway(
-    session: AsyncSession, project: ProjectModel
-) -> Optional[Gateway]:
-    gateway: Optional[GatewayModel] = project.default_gateway
-    if gateway is None:
-        return None
-    return gateway_model_to_gateway(gateway)
 async def create_gateway_compute(
     project_name: str,
     backend_compute: Compute,
@@ -181,9 +172,9 @@ async def create_gateway(
         session.add(gateway)
         await session.commit()
-        if project.default_gateway is None or configuration.default:
+        default_gateway = await get_project_default_gateway_model(session=session, project=project)
+        if default_gateway is None or configuration.default:
             await set_default_gateway(session=session, project=project, name=configuration.name)
         return gateway_model_to_gateway(gateway)
@@ -349,6 +340,15 @@ async def get_project_gateway_model_by_name(
     return res.scalar()
+async def get_project_default_gateway_model(
+    session: AsyncSession, project: ProjectModel
+) -> Optional[GatewayModel]:
+    res = await session.execute(
+        select(GatewayModel).where(GatewayModel.id == project.default_gateway_id)
+    )
+    return res.scalar_one_or_none()
 async def generate_gateway_name(session: AsyncSession, project: ProjectModel) -> str:
     gateways = await list_project_gateway_models(session=session, project=project)
     names = {g.name for g in gateways}
@@ -557,7 +557,7 @@ def gateway_model_to_gateway(gateway_model: GatewayModel) -> Gateway:
         region=gateway_model.region,
         wildcard_domain=gateway_model.wildcard_domain,
         default=gateway_model.project.default_gateway_id == gateway_model.id,
-        created_at=gateway_model.created_at.replace(tzinfo=timezone.utc),
+        created_at=gateway_model.created_at,
         status=gateway_model.status,
         status_message=gateway_model.status_message,
         configuration=configuration,

dstack/_internal/server/services/instances.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import uuid
 from collections.abc import Container, Iterable
-from datetime import datetime, timezone
+from datetime import datetime
 from typing import Dict, List, Literal, Optional, Union
 import gpuhunt
@@ -8,11 +8,11 @@ from sqlalchemy import and_, or_, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import joinedload
-from dstack._internal.core.backends import BACKENDS_WITH_MULTINODE_SUPPORT
 from dstack._internal.core.backends.base.offers import (
     offer_to_catalog_item,
     requirements_to_query_filter,
 )
+from dstack._internal.core.backends.features import BACKENDS_WITH_MULTINODE_SUPPORT
 from dstack._internal.core.models.backends.base import BackendType
 from dstack._internal.core.models.envs import Env
 from dstack._internal.core.models.instances import (
@@ -34,7 +34,6 @@ from dstack._internal.core.models.profiles import (
     TerminationPolicy,
 )
 from dstack._internal.core.models.runs import JobProvisioningData, Requirements
-from dstack._internal.core.models.users import GlobalRole
 from dstack._internal.core.models.volumes import Volume
 from dstack._internal.core.services.profiles import get_termination
 from dstack._internal.server.models import (
@@ -44,7 +43,7 @@ from dstack._internal.server.models import (
     UserModel,
 )
 from dstack._internal.server.services.offers import generate_shared_offer
-from dstack._internal.server.services.projects import list_project_models, list_user_project_models
+from dstack._internal.server.services.projects import list_user_project_models
 from dstack._internal.utils import common as common_utils
 from dstack._internal.utils.logging import get_logger
@@ -62,7 +61,7 @@ def instance_model_to_instance(instance_model: InstanceModel) -> Instance:
         status=instance_model.status,
         unreachable=instance_model.unreachable,
         termination_reason=instance_model.termination_reason,
-        created=instance_model.created_at.replace(tzinfo=timezone.utc),
+        created=instance_model.created_at,
         total_blocks=instance_model.total_blocks,
         busy_blocks=instance_model.busy_blocks,
     )
@@ -372,18 +371,15 @@ async def list_user_instances(
     limit: int,
     ascending: bool,
 ) -> List[Instance]:
-    if user.global_role == GlobalRole.ADMIN:
-        projects = await list_project_models(session=session)
-    else:
-        projects = await list_user_project_models(session=session, user=user)
-    if not projects:
-        return []
+    projects = await list_user_project_models(
+        session=session,
+        user=user,
+        only_names=True,
+    )
     if project_names is not None:
-        projects = [proj for proj in projects if proj.name in project_names]
+        projects = [p for p in projects if p.name in project_names]
         if len(projects) == 0:
             return []
     instance_models = await list_projects_instance_models(
         session=session,
         projects=projects,

dstack/_internal/server/services/jobs/__init__.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import itertools
 import json
-from datetime import timedelta, timezone
+from datetime import timedelta
 from typing import Dict, Iterable, List, Optional, Tuple
 from uuid import UUID
 import requests
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload
+from sqlalchemy.orm import joinedload, load_only
 import dstack._internal.server.services.backends as backends_services
 from dstack._internal.core.backends.base.backend import Backend
@@ -130,7 +130,7 @@ def job_model_to_job_submission(job_model: JobModel) -> JobSubmission:
         ):
             backend_data = json.loads(job_provisioning_data.backend_data)
             job_provisioning_data.backend = backend_data["base_backend"]
-    last_processed_at = job_model.last_processed_at.replace(tzinfo=timezone.utc)
+    last_processed_at = job_model.last_processed_at
     finished_at = None
     if job_model.status.is_finished():
         finished_at = last_processed_at
@@ -140,7 +140,7 @@ def job_model_to_job_submission(job_model: JobModel) -> JobSubmission:
         id=job_model.id,
         submission_num=job_model.submission_num,
         deployment_num=job_model.deployment_num,
-        submitted_at=job_model.submitted_at.replace(tzinfo=timezone.utc),
+        submitted_at=job_model.submitted_at,
         last_processed_at=last_processed_at,
         finished_at=finished_at,
         inactivity_secs=job_model.inactivity_secs,
@@ -231,10 +231,7 @@ async def process_terminating_job(
     Graceful stop should already be done by `process_terminating_run`.
     Caller must acquire the locks on the job and the job's instance.
     """
-    if (
-        job_model.remove_at is not None
-        and job_model.remove_at.replace(tzinfo=timezone.utc) > common.get_current_datetime()
-    ):
+    if job_model.remove_at is not None and job_model.remove_at > common.get_current_datetime():
         # it's too early to terminate the instance
         return
@@ -550,24 +547,25 @@ def _should_force_detach_volume(job_model: JobModel, stop_duration: Optional[int
     return (
         job_model.volumes_detached_at is not None
         and common.get_current_datetime()
-        > job_model.volumes_detached_at.replace(tzinfo=timezone.utc) + MIN_FORCE_DETACH_WAIT_PERIOD
+        > job_model.volumes_detached_at + MIN_FORCE_DETACH_WAIT_PERIOD
         and (
             job_model.termination_reason == JobTerminationReason.ABORTED_BY_USER
             or stop_duration is not None
             and common.get_current_datetime()
-            > job_model.volumes_detached_at.replace(tzinfo=timezone.utc)
-            + timedelta(seconds=stop_duration)
+            > job_model.volumes_detached_at + timedelta(seconds=stop_duration)
         )
     )
 async def get_instances_ids_with_detaching_volumes(session: AsyncSession) -> List[UUID]:
     res = await session.execute(
-        select(JobModel).where(
+        select(JobModel)
+        .where(
             JobModel.status == JobStatus.TERMINATING,
             JobModel.used_instance_id.is_not(None),
             JobModel.volumes_detached_at.is_not(None),
         )
+        .options(load_only(JobModel.used_instance_id))
     )
     job_models = res.scalars().all()
     return [jm.used_instance_id for jm in job_models if jm.used_instance_id]

dstack/_internal/server/services/offers.py CHANGED Viewed

@@ -2,13 +2,13 @@ from typing import List, Literal, Optional, Tuple, Union
 import gpuhunt
-from dstack._internal.core.backends import (
+from dstack._internal.core.backends.base.backend import Backend
+from dstack._internal.core.backends.base.compute import ComputeWithPlacementGroupSupport
+from dstack._internal.core.backends.features import (
     BACKENDS_WITH_CREATE_INSTANCE_SUPPORT,
     BACKENDS_WITH_MULTINODE_SUPPORT,
     BACKENDS_WITH_RESERVATION_SUPPORT,
 )
-from dstack._internal.core.backends.base.backend import Backend
-from dstack._internal.core.backends.base.compute import ComputeWithPlacementGroupSupport
 from dstack._internal.core.models.backends.base import BackendType
 from dstack._internal.core.models.instances import (
     InstanceOfferWithAvailability,

dstack/_internal/server/services/projects.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import uuid
-from datetime import timezone
 from typing import Awaitable, Callable, List, Optional, Tuple
 from sqlalchemy import delete, select, update
 from sqlalchemy import func as safunc
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload
+from sqlalchemy.orm import QueryableAttribute, joinedload, load_only
 from dstack._internal.core.backends.configurators import get_configurator
 from dstack._internal.core.backends.dstack.models import (
@@ -54,13 +53,12 @@ async def list_user_projects(
     user: UserModel,
 ) -> List[Project]:
     """
-    Returns projects where the user is a member.
+    Returns projects where the user is a member or all projects for global admins.
     """
-    if user.global_role == GlobalRole.ADMIN:
-        projects = await list_project_models(session=session)
-    else:
-        projects = await list_user_project_models(session=session, user=user)
+    projects = await list_user_project_models(
+        session=session,
+        user=user,
+    )
     projects = sorted(projects, key=lambda p: p.created_at)
     return [
         project_model_to_project(p, include_backends=False, include_members=False)
@@ -80,7 +78,7 @@ async def list_user_accessible_projects(
     if user.global_role == GlobalRole.ADMIN:
         projects = await list_project_models(session=session)
     else:
-        member_projects = await list_user_project_models(session=session, user=user)
+        member_projects = await list_member_project_models(session=session, user=user)
         public_projects = await list_public_non_member_project_models(session=session, user=user)
         projects = member_projects + public_projects
@@ -167,7 +165,7 @@ async def delete_projects(
     projects_names: List[str],
 ):
     if user.global_role != GlobalRole.ADMIN:
-        user_projects = await list_user_project_models(
+        user_projects = await list_member_project_models(
             session=session, user=user, include_members=True
         )
         user_project_names = [p.name for p in user_projects]
@@ -339,9 +337,25 @@ async def clear_project_members(
 async def list_user_project_models(
+    session: AsyncSession,
+    user: UserModel,
+    only_names: bool = False,
+) -> List[ProjectModel]:
+    load_only_attrs = []
+    if only_names:
+        load_only_attrs += [ProjectModel.id, ProjectModel.name]
+    if user.global_role == GlobalRole.ADMIN:
+        return await list_project_models(session=session, load_only_attrs=load_only_attrs)
+    return await list_member_project_models(
+        session=session, user=user, load_only_attrs=load_only_attrs
+    )
+async def list_member_project_models(
     session: AsyncSession,
     user: UserModel,
     include_members: bool = False,
+    load_only_attrs: Optional[List[QueryableAttribute]] = None,
 ) -> List[ProjectModel]:
     """
     List project models for a user where they are a member.
@@ -349,6 +363,8 @@ async def list_user_project_models(
     options = []
     if include_members:
         options.append(joinedload(ProjectModel.members))
+    if load_only_attrs:
+        options.append(load_only(*load_only_attrs))
     res = await session.execute(
         select(ProjectModel)
         .where(
@@ -395,13 +411,20 @@ async def list_user_owned_project_models(
 async def list_project_models(
     session: AsyncSession,
+    load_only_attrs: Optional[List[QueryableAttribute]] = None,
 ) -> List[ProjectModel]:
+    options = []
+    if load_only_attrs:
+        options.append(load_only(*load_only_attrs))
     res = await session.execute(
-        select(ProjectModel).where(ProjectModel.deleted == False),
+        select(ProjectModel).where(ProjectModel.deleted == False).options(*options)
     )
     return list(res.scalars().all())
+# TODO: Do not load ProjectModel.backends and ProjectModel.members by default when getting project
 async def get_project_model_by_name(
     session: AsyncSession, project_name: str, ignore_case: bool = True
 ) -> Optional[ProjectModel]:
@@ -415,7 +438,6 @@ async def get_project_model_by_name(
         .where(*filters)
         .options(joinedload(ProjectModel.backends))
         .options(joinedload(ProjectModel.members))
-        .options(joinedload(ProjectModel.default_gateway))
     )
     return res.unique().scalar()
@@ -432,7 +454,6 @@ async def get_project_model_by_name_or_error(
         )
         .options(joinedload(ProjectModel.backends))
         .options(joinedload(ProjectModel.members))
-        .options(joinedload(ProjectModel.default_gateway))
     )
     return res.unique().scalar_one()
@@ -449,7 +470,6 @@ async def get_project_model_by_id_or_error(
         )
         .options(joinedload(ProjectModel.backends))
         .options(joinedload(ProjectModel.members))
-        .options(joinedload(ProjectModel.default_gateway))
     )
     return res.unique().scalar_one()
@@ -537,7 +557,7 @@ def project_model_to_project(
         project_id=project_model.id,
         project_name=project_model.name,
         owner=users.user_model_to_user(project_model.owner),
-        created_at=project_model.created_at.replace(tzinfo=timezone.utc),
+        created_at=project_model.created_at,
         backends=backends,
         members=members,
         is_public=project_model.is_public,

dstack/_internal/server/services/prometheus/client_metrics.py CHANGED Viewed

@@ -5,6 +5,9 @@ class RunMetrics:
     """Wrapper class for run-related Prometheus metrics."""
     def __init__(self):
+        # submit_to_provision_duration reflects real provisioning time
+        # but does not reflect how quickly provisioning processing works
+        # since it includes scheduling time, retrying, etc.
         self._submit_to_provision_duration = Histogram(
             "dstack_submit_to_provision_duration_seconds",
             "Time from when a run has been submitted and first job provisioning",

dstack/_internal/server/services/prometheus/custom_metrics.py CHANGED Viewed

@@ -2,7 +2,6 @@ import itertools
 import json
 from collections import defaultdict
 from collections.abc import Generator, Iterable
-from datetime import timezone
 from typing import ClassVar
 from uuid import UUID
@@ -80,7 +79,7 @@ async def get_instance_metrics(session: AsyncSession) -> Iterable[Metric]:
             "dstack_backend": instance.backend.value if instance.backend is not None else "",
             "dstack_gpu": gpu,
         }
-        duration = (now - instance.created_at.replace(tzinfo=timezone.utc)).total_seconds()
+        duration = (now - instance.created_at).total_seconds()
         metrics.add_sample(_INSTANCE_DURATION, labels, duration)
         metrics.add_sample(_INSTANCE_PRICE, labels, instance.price or 0.0)
         metrics.add_sample(_INSTANCE_GPU_COUNT, labels, gpu_count)
@@ -167,7 +166,7 @@ async def get_job_metrics(session: AsyncSession) -> Iterable[Metric]:
             "dstack_backend": jpd.get_base_backend().value,
             "dstack_gpu": gpus[0].name if gpus else "",
         }
-        duration = (now - job.submitted_at.replace(tzinfo=timezone.utc)).total_seconds()
+        duration = (now - job.submitted_at).total_seconds()
         metrics.add_sample(_JOB_DURATION, labels, duration)
         metrics.add_sample(_JOB_PRICE, labels, price)
         metrics.add_sample(_JOB_GPU_COUNT, labels, len(gpus))

dstack/_internal/server/services/runs.py CHANGED Viewed

@@ -5,9 +5,10 @@ from datetime import datetime, timezone
 from typing import List, Optional
 import pydantic
+from apscheduler.triggers.cron import CronTrigger
 from sqlalchemy import and_, func, or_, select, update
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload, selectinload
+from sqlalchemy.orm import joinedload
 import dstack._internal.utils.common as common_utils
 from dstack._internal.core.errors import (
@@ -42,7 +43,6 @@ from dstack._internal.core.models.runs import (
     RunTerminationReason,
     ServiceSpec,
 )
-from dstack._internal.core.models.users import GlobalRole
 from dstack._internal.core.models.volumes import (
     InstanceMountPoint,
     Volume,
@@ -81,7 +81,7 @@ from dstack._internal.server.services.locking import get_locker, string_to_lock_
 from dstack._internal.server.services.logging import fmt
 from dstack._internal.server.services.offers import get_offers_by_requirements
 from dstack._internal.server.services.plugins import apply_plugin_policies
-from dstack._internal.server.services.projects import list_project_models, list_user_project_models
+from dstack._internal.server.services.projects import list_user_project_models
 from dstack._internal.server.services.resources import set_resources_defaults
 from dstack._internal.server.services.secrets import get_project_secrets_mapping
 from dstack._internal.server.services.users import get_user_model_by_name
@@ -115,10 +115,11 @@ async def list_user_runs(
 ) -> List[Run]:
     if project_name is None and repo_id is not None:
         return []
-    if user.global_role == GlobalRole.ADMIN:
-        projects = await list_project_models(session=session)
-    else:
-        projects = await list_user_project_models(session=session, user=user)
+    projects = await list_user_project_models(
+        session=session,
+        user=user,
+        only_names=True,
+    )
     runs_user = None
     if username is not None:
         runs_user = await get_user_model_by_name(session=session, username=username)
@@ -217,9 +218,9 @@ async def list_projects_run_models(
     res = await session.execute(
         select(RunModel)
         .where(*filters)
+        .options(joinedload(RunModel.user).load_only(UserModel.name))
         .order_by(*order_by)
         .limit(limit)
-        .options(selectinload(RunModel.user))
     )
     run_models = list(res.scalars().all())
     return run_models
@@ -511,6 +512,14 @@ async def submit_run(
         )
         submitted_at = common_utils.get_current_datetime()
+        initial_status = RunStatus.SUBMITTED
+        initial_replicas = 1
+        if run_spec.merged_profile.schedule is not None:
+            initial_status = RunStatus.PENDING
+            initial_replicas = 0
+        elif run_spec.configuration.type == "service":
+            initial_replicas = run_spec.configuration.replicas.min
         run_model = RunModel(
             id=uuid.uuid4(),
             project_id=project.id,
@@ -519,21 +528,20 @@ async def submit_run(
             user_id=user.id,
             run_name=run_spec.run_name,
             submitted_at=submitted_at,
-            status=RunStatus.SUBMITTED,
+            status=initial_status,
             run_spec=run_spec.json(),
             last_processed_at=submitted_at,
             priority=run_spec.configuration.priority,
             deployment_num=0,
             desired_replica_count=1,  # a relevant value will be set in process_runs.py
+            next_triggered_at=_get_next_triggered_at(run_spec),
         )
         session.add(run_model)
-        replicas = 1
         if run_spec.configuration.type == "service":
-            replicas = run_spec.configuration.replicas.min
             await services.register_service(session, run_model, run_spec)
-        for replica_num in range(replicas):
+        for replica_num in range(initial_replicas):
             jobs = await get_jobs_from_run_spec(
                 run_spec=run_spec,
                 secrets=secrets,
@@ -693,8 +701,8 @@ def run_model_to_run(
         id=run_model.id,
         project_name=run_model.project.name,
         user=run_model.user.name,
-        submitted_at=run_model.submitted_at.replace(tzinfo=timezone.utc),
-        last_processed_at=run_model.last_processed_at.replace(tzinfo=timezone.utc),
+        submitted_at=run_model.submitted_at,
+        last_processed_at=run_model.last_processed_at,
         status=run_model.status,
         status_message=status_message,
         termination_reason=run_model.termination_reason,
@@ -972,6 +980,12 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
         raise ServerClientError(
             f"Maximum utilization_policy.time_window is {settings.SERVER_METRICS_RUNNING_TTL_SECONDS}s"
         )
+    if (
+        run_spec.merged_profile.schedule
+        and run_spec.configuration.type == "service"
+        and run_spec.configuration.replicas.min == 0
+    ):
+        raise ServerClientError("Scheduled services with autoscaling to zero are not supported")
     if run_spec.configuration.priority is None:
         run_spec.configuration.priority = RUN_PRIORITY_DEFAULT
     set_resources_defaults(run_spec.configuration.resources)
@@ -1059,7 +1073,7 @@ def _check_can_update_configuration(
             )
-async def process_terminating_run(session: AsyncSession, run: RunModel):
+async def process_terminating_run(session: AsyncSession, run_model: RunModel):
     """
     Used by both `process_runs` and `stop_run` to process a TERMINATING run.
     Stops the jobs gracefully and marks them as TERMINATING.
@@ -1067,44 +1081,54 @@ async def process_terminating_run(session: AsyncSession, run: RunModel):
     When all jobs are terminated, assigns a finished status to the run.
     Caller must acquire the lock on run.
     """
-    assert run.termination_reason is not None
-    job_termination_reason = run.termination_reason.to_job_termination_reason()
+    assert run_model.termination_reason is not None
+    run = run_model_to_run(run_model, include_jobs=False)
+    job_termination_reason = run_model.termination_reason.to_job_termination_reason()
     unfinished_jobs_count = 0
-    for job in run.jobs:
-        if job.status.is_finished():
+    for job_model in run_model.jobs:
+        if job_model.status.is_finished():
             continue
         unfinished_jobs_count += 1
-        if job.status == JobStatus.TERMINATING:
+        if job_model.status == JobStatus.TERMINATING:
             if job_termination_reason == JobTerminationReason.ABORTED_BY_USER:
                 # Override termination reason so that
                 # abort actions such as volume force detach are triggered
-                job.termination_reason = job_termination_reason
+                job_model.termination_reason = job_termination_reason
             continue
-        if job.status == JobStatus.RUNNING and job_termination_reason not in {
+        if job_model.status == JobStatus.RUNNING and job_termination_reason not in {
             JobTerminationReason.ABORTED_BY_USER,
             JobTerminationReason.DONE_BY_RUNNER,
         }:
             # Send a signal to stop the job gracefully
-            await stop_runner(session, job)
-            delay_job_instance_termination(job)
-        job.status = JobStatus.TERMINATING
-        job.termination_reason = job_termination_reason
-        job.last_processed_at = common_utils.get_current_datetime()
+            await stop_runner(session, job_model)
+            delay_job_instance_termination(job_model)
+        job_model.status = JobStatus.TERMINATING
+        job_model.termination_reason = job_termination_reason
+        job_model.last_processed_at = common_utils.get_current_datetime()
     if unfinished_jobs_count == 0:
-        if run.service_spec is not None:
+        if run_model.service_spec is not None:
             try:
-                await services.unregister_service(session, run)
+                await services.unregister_service(session, run_model)
             except Exception as e:
-                logger.warning("%s: failed to unregister service: %s", fmt(run), repr(e))
-        run.status = run.termination_reason.to_status()
+                logger.warning("%s: failed to unregister service: %s", fmt(run_model), repr(e))
+        if (
+            run.run_spec.merged_profile.schedule is not None
+            and run_model.termination_reason
+            not in [RunTerminationReason.ABORTED_BY_USER, RunTerminationReason.STOPPED_BY_USER]
+        ):
+            run_model.next_triggered_at = _get_next_triggered_at(run.run_spec)
+            run_model.status = RunStatus.PENDING
+        else:
+            run_model.status = run_model.termination_reason.to_status()
         logger.info(
             "%s: run status has changed TERMINATING -> %s, reason: %s",
-            fmt(run),
-            run.status.name,
-            run.termination_reason.name,
+            fmt(run_model),
+            run_model.status.name,
+            run_model.termination_reason.name,
         )
@@ -1224,3 +1248,19 @@ async def retry_run_replica_jobs(
 def _remove_job_spec_sensitive_info(spec: JobSpec):
     spec.ssh_key = None
+def _get_next_triggered_at(run_spec: RunSpec) -> Optional[datetime]:
+    if run_spec.merged_profile.schedule is None:
+        return None
+    now = common_utils.get_current_datetime()
+    fire_times = []
+    for cron in run_spec.merged_profile.schedule.crons:
+        cron_trigger = CronTrigger.from_crontab(cron, timezone=timezone.utc)
+        fire_times.append(
+            cron_trigger.get_next_fire_time(
+                previous_fire_time=None,
+                now=now,
+            )
+        )
+    return min(fire_times)

dstack/_internal/server/services/services/__init__.py CHANGED Viewed

@@ -28,6 +28,7 @@ from dstack._internal.server.models import GatewayModel, JobModel, ProjectModel,
 from dstack._internal.server.services.gateways import (
     get_gateway_configuration,
     get_or_add_gateway_connection,
+    get_project_default_gateway_model,
     get_project_gateway_model_by_name,
 )
 from dstack._internal.server.services.logging import fmt
@@ -52,7 +53,9 @@ async def register_service(session: AsyncSession, run_model: RunModel, run_spec:
     elif run_spec.configuration.gateway == False:
         gateway = None
     else:
-        gateway = run_model.project.default_gateway
+        gateway = await get_project_default_gateway_model(
+            session=session, project=run_model.project
+        )
     if gateway is not None:
         service_spec = await _register_service_in_gateway(session, run_model, run_spec, gateway)

dstack 0.19.20__py3-none-any.whl → 0.19.21__py3-none-any.whl

Potentially problematic release.

dstack 0.19.20py3-none-any.whl → 0.19.21py3-none-any.whl