PyPI - dstack - Versions diffs - 0.19.20__py3-none-any.whl → 0.19.22__py3-none-any.whl - Mend

dstack 0.19.20py3-none-any.whl → 0.19.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (93) hide show

dstack/_internal/server/background/tasks/process_submitted_jobs.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import List, Optional, Tuple
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload, lazyload, selectinload
+from sqlalchemy.orm import joinedload, load_only, selectinload
 from dstack._internal.core.backends.base.backend import Backend
 from dstack._internal.core.backends.base.compute import ComputeWithVolumeSupport
@@ -43,6 +43,7 @@ from dstack._internal.server.models import (
     JobModel,
     ProjectModel,
     RunModel,
+    UserModel,
     VolumeAttachmentModel,
     VolumeModel,
 )
@@ -74,6 +75,7 @@ from dstack._internal.server.services.runs import (
 from dstack._internal.server.services.volumes import (
     volume_model_to_volume,
 )
+from dstack._internal.server.utils import sentry_utils
 from dstack._internal.utils import common as common_utils
 from dstack._internal.utils import env as env_utils
 from dstack._internal.utils.logging import get_logger
@@ -108,6 +110,7 @@ def _get_effective_batch_size(batch_size: int) -> int:
     return batch_size
+@sentry_utils.instrument_background_task
 async def _process_next_submitted_job():
     lock, lockset = get_locker(get_db().dialect_name).get_lockset(JobModel.__tablename__)
     async with get_session_ctx() as session:
@@ -119,6 +122,7 @@ async def _process_next_submitted_job():
                     JobModel.status == JobStatus.SUBMITTED,
                     JobModel.id.not_in(lockset),
                 )
+                .options(load_only(JobModel.id))
                 # Jobs are process in FIFO sorted by priority globally,
                 # thus runs from different projects can "overtake" each other by using higher priorities.
                 # That's not a big problem as long as projects do not compete for the same compute resources.
@@ -151,9 +155,7 @@ async def _process_next_submitted_job():
 async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
-    logger.debug("%s: provisioning has started", fmt(job_model))
     # Refetch to load related attributes.
-    # joinedload produces LEFT OUTER JOIN that can't be used with FOR UPDATE.
     res = await session.execute(
         select(JobModel).where(JobModel.id == job_model.id).options(joinedload(JobModel.instance))
     )
@@ -162,15 +164,16 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
         select(RunModel)
         .where(RunModel.id == job_model.run_id)
         .options(joinedload(RunModel.project).joinedload(ProjectModel.backends))
-        .options(joinedload(RunModel.user))
+        .options(joinedload(RunModel.user).load_only(UserModel.name))
         .options(joinedload(RunModel.fleet).joinedload(FleetModel.instances))
     )
     run_model = res.unique().scalar_one()
-    project = run_model.project
-    run_spec = RunSpec.__response__.parse_raw(run_model.run_spec)
-    profile = run_spec.merged_profile
+    logger.debug("%s: provisioning has started", fmt(job_model))
+    project = run_model.project
     run = run_model_to_run(run_model)
+    run_spec = run.run_spec
+    profile = run_spec.merged_profile
     job = find_job(run.jobs, job_model.replica_num, job_model.job_num)
     master_job = find_job(run.jobs, job_model.replica_num, 0)
@@ -228,7 +231,6 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
                 InstanceModel.deleted == False,
                 InstanceModel.total_blocks > InstanceModel.busy_blocks,
             )
-            .options(lazyload(InstanceModel.jobs))
             .order_by(InstanceModel.id)  # take locks in order
             .with_for_update(key_share=True)
         )
@@ -357,9 +359,9 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
     await session.execute(
         select(VolumeModel)
         .where(VolumeModel.id.in_(volumes_ids))
-        .options(selectinload(VolumeModel.user))
+        .options(joinedload(VolumeModel.user).load_only(UserModel.name))
         .order_by(VolumeModel.id)  # take locks in order
-        .with_for_update(key_share=True)
+        .with_for_update(key_share=True, of=VolumeModel)
     )
     async with get_locker(get_db().dialect_name).lock_ctx(VolumeModel.__tablename__, volumes_ids):
         if len(volume_models) > 0:

dstack/_internal/server/background/tasks/process_terminating_jobs.py CHANGED Viewed

@@ -2,7 +2,7 @@ import asyncio
 from sqlalchemy import or_, select
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload, lazyload
+from sqlalchemy.orm import joinedload
 from dstack._internal.core.models.runs import JobStatus
 from dstack._internal.server.db import get_db, get_session_ctx
@@ -18,7 +18,11 @@ from dstack._internal.server.services.jobs import (
 )
 from dstack._internal.server.services.locking import get_locker
 from dstack._internal.server.services.logging import fmt
-from dstack._internal.utils.common import get_current_datetime, get_or_error
+from dstack._internal.server.utils import sentry_utils
+from dstack._internal.utils.common import (
+    get_current_datetime,
+    get_or_error,
+)
 from dstack._internal.utils.logging import get_logger
 logger = get_logger(__name__)
@@ -31,6 +35,7 @@ async def process_terminating_jobs(batch_size: int = 1):
     await asyncio.gather(*tasks)
+@sentry_utils.instrument_background_task
 async def _process_next_terminating_job():
     job_lock, job_lockset = get_locker(get_db().dialect_name).get_lockset(JobModel.__tablename__)
     instance_lock, instance_lockset = get_locker(get_db().dialect_name).get_lockset(
@@ -43,7 +48,10 @@ async def _process_next_terminating_job():
                 .where(
                     JobModel.id.not_in(job_lockset),
                     JobModel.status == JobStatus.TERMINATING,
-                    or_(JobModel.remove_at.is_(None), JobModel.remove_at < get_current_datetime()),
+                    or_(
+                        JobModel.remove_at.is_(None),
+                        JobModel.remove_at < get_current_datetime(),
+                    ),
                 )
                 .order_by(JobModel.last_processed_at.asc())
                 .limit(1)
@@ -59,7 +67,6 @@ async def _process_next_terminating_job():
                         InstanceModel.id == job_model.used_instance_id,
                         InstanceModel.id.not_in(instance_lockset),
                     )
-                    .options(lazyload(InstanceModel.jobs))
                     .with_for_update(skip_locked=True, key_share=True)
                 )
                 instance_model = res.scalar()
@@ -88,6 +95,7 @@ async def _process_job(session: AsyncSession, job_model: JobModel):
         .options(
             joinedload(InstanceModel.project).joinedload(ProjectModel.backends),
             joinedload(InstanceModel.volume_attachments).joinedload(VolumeAttachmentModel.volume),
+            joinedload(InstanceModel.jobs).load_only(JobModel.id),
         )
     )
     instance_model = res.unique().scalar()

dstack/_internal/server/background/tasks/process_volumes.py CHANGED Viewed

@@ -7,6 +7,7 @@ from dstack._internal.core.errors import BackendError, BackendNotAvailable
 from dstack._internal.core.models.volumes import VolumeStatus
 from dstack._internal.server.db import get_db, get_session_ctx
 from dstack._internal.server.models import (
+    FleetModel,
     InstanceModel,
     ProjectModel,
     VolumeAttachmentModel,
@@ -15,12 +16,14 @@ from dstack._internal.server.models import (
 from dstack._internal.server.services import backends as backends_services
 from dstack._internal.server.services import volumes as volumes_services
 from dstack._internal.server.services.locking import get_locker
+from dstack._internal.server.utils import sentry_utils
 from dstack._internal.utils.common import get_current_datetime, run_async
 from dstack._internal.utils.logging import get_logger
 logger = get_logger(__name__)
+@sentry_utils.instrument_background_task
 async def process_submitted_volumes():
     lock, lockset = get_locker(get_db().dialect_name).get_lockset(VolumeModel.__tablename__)
     async with get_session_ctx() as session:
@@ -49,7 +52,6 @@ async def process_submitted_volumes():
 async def _process_submitted_volume(session: AsyncSession, volume_model: VolumeModel):
     logger.info("Started submitted volume %s processing", volume_model.name)
     # Refetch to load related attributes.
-    # joinedload produces LEFT OUTER JOIN that can't be used with FOR UPDATE.
     res = await session.execute(
         select(VolumeModel)
         .where(VolumeModel.id == volume_model.id)
@@ -59,6 +61,7 @@ async def _process_submitted_volume(session: AsyncSession, volume_model: VolumeM
             joinedload(VolumeModel.attachments)
             .joinedload(VolumeAttachmentModel.instance)
             .joinedload(InstanceModel.fleet)
+            .load_only(FleetModel.name)
         )
         .execution_options(populate_existing=True)
     )

dstack/_internal/server/migrations/versions/25479f540245_add_probes.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Add probes
+Revision ID: 25479f540245
+Revises: 50dd7ea98639
+Create Date: 2025-08-03 19:51:07.722217
+"""
+import sqlalchemy as sa
+import sqlalchemy_utils
+from alembic import op
+import dstack._internal.server.models
+# revision identifiers, used by Alembic.
+revision = "25479f540245"
+down_revision = "50dd7ea98639"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "probes",
+        sa.Column("id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False),
+        sa.Column("name", sa.String(length=100), nullable=False),
+        sa.Column("job_id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False),
+        sa.Column("probe_num", sa.Integer(), nullable=False),
+        sa.Column("due", dstack._internal.server.models.NaiveDateTime(), nullable=False),
+        sa.Column("success_streak", sa.BigInteger(), nullable=False),
+        sa.Column("active", sa.Boolean(), nullable=False),
+        sa.ForeignKeyConstraint(["job_id"], ["jobs.id"], name=op.f("fk_probes_job_id_jobs")),
+        sa.PrimaryKeyConstraint("id", "job_id", name=op.f("pk_probes")),
+        sa.UniqueConstraint("job_id", "probe_num", name="uq_probes_job_id_probe_num"),
+    )
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table("probes")
+    # ### end Alembic commands ###

dstack/_internal/server/migrations/versions/50dd7ea98639_index_status_columns.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""Index status columns
+Revision ID: 50dd7ea98639
+Revises: ec02a26a256c
+Create Date: 2025-07-25 10:36:25.127923
+"""
+from alembic import op
+# revision identifiers, used by Alembic.
+revision = "50dd7ea98639"
+down_revision = "ec02a26a256c"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_runs_status"), ["status"], unique=False)
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_jobs_status"), ["status"], unique=False)
+    with op.batch_alter_table("fleets", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_fleets_status"), ["status"], unique=False)
+    with op.batch_alter_table("instances", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_instances_status"), ["status"], unique=False)
+    with op.batch_alter_table("volumes", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_volumes_status"), ["status"], unique=False)
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_runs_status"))
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_jobs_status"))
+    with op.batch_alter_table("fleets", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_fleets_status"))
+    with op.batch_alter_table("instances", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_instances_status"))
+    with op.batch_alter_table("volumes", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_volumes_status"))
+    # ### end Alembic commands ###

dstack/_internal/server/migrations/versions/728b1488b1b4_add_instance_health.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""Add instance health
+Revision ID: 728b1488b1b4
+Revises: 25479f540245
+Create Date: 2025-08-01 14:56:20.466990
+"""
+import sqlalchemy as sa
+import sqlalchemy_utils
+from alembic import op
+import dstack._internal.server.models
+# revision identifiers, used by Alembic.
+revision = "728b1488b1b4"
+down_revision = "25479f540245"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    op.create_table(
+        "instance_health_checks",
+        sa.Column("id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False),
+        sa.Column(
+            "instance_id", sqlalchemy_utils.types.uuid.UUIDType(binary=False), nullable=False
+        ),
+        sa.Column("collected_at", dstack._internal.server.models.NaiveDateTime(), nullable=False),
+        sa.Column("status", sa.VARCHAR(length=100), nullable=False),
+        sa.Column("response", sa.Text(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["instance_id"],
+            ["instances.id"],
+            name=op.f("fk_instance_health_checks_instance_id_instances"),
+        ),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_instance_health_checks")),
+    )
+    with op.batch_alter_table("instances", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("health", sa.VARCHAR(length=100), nullable=True))
+    op.execute("UPDATE instances SET health = 'HEALTHY'")
+    with op.batch_alter_table("instances", schema=None) as batch_op:
+        batch_op.alter_column("health", existing_type=sa.VARCHAR(length=100), nullable=False)
+def downgrade() -> None:
+    with op.batch_alter_table("instances", schema=None) as batch_op:
+        batch_op.drop_column("health")
+    op.drop_table("instance_health_checks")

dstack/_internal/server/migrations/versions/ec02a26a256c_add_runmodel_next_triggered_at.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""Add RunModel.next_triggered_at
+Revision ID: ec02a26a256c
+Revises: d5863798bf41
+Create Date: 2025-07-17 15:47:00.443217
+"""
+import sqlalchemy as sa
+from alembic import op
+import dstack._internal.server.models
+# revision identifiers, used by Alembic.
+revision = "ec02a26a256c"
+down_revision = "d5863798bf41"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column(
+                "next_triggered_at", dstack._internal.server.models.NaiveDateTime(), nullable=True
+            )
+        )
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.drop_column("next_triggered_at")
+    # ### end Alembic commands ###

dstack/_internal/server/models.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import enum
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Callable, List, Optional, Union
 from sqlalchemy import (
@@ -28,6 +28,7 @@ from dstack._internal.core.models.backends.base import BackendType
 from dstack._internal.core.models.common import CoreModel
 from dstack._internal.core.models.fleets import FleetStatus
 from dstack._internal.core.models.gateways import GatewayStatus
+from dstack._internal.core.models.health import HealthStatus
 from dstack._internal.core.models.instances import InstanceStatus
 from dstack._internal.core.models.profiles import (
     DEFAULT_FLEET_TERMINATION_IDLE_TIME,
@@ -51,9 +52,10 @@ logger = get_logger(__name__)
 class NaiveDateTime(TypeDecorator):
     """
-    A custom type decorator that ensures datetime objects are offset-naive when stored in the database.
-    This is needed because we use datetimes in UTC only and store them as offset-naive.
-    Some databases (e.g. Postgres) throw an error if the timezone is set.
+    A custom type decorator that ensures datetime objects are offset-naive when stored in the database
+    and offset-aware with UTC timezone when loaded from the database.
+    This is because we use datetimes in UTC everywhere, and
+    some databases (e.g. Postgres) throw an error if the timezone is set.
     """
     impl = DateTime
@@ -65,7 +67,9 @@ class NaiveDateTime(TypeDecorator):
         return value
     def process_result_value(self, value, dialect):
-        return value
+        if value is None:
+            return None
+        return value.replace(tzinfo=timezone.utc)
 class DecryptedString(CoreModel):
@@ -355,7 +359,8 @@ class RunModel(BaseModel):
     run_name: Mapped[str] = mapped_column(String(100))
     submitted_at: Mapped[datetime] = mapped_column(NaiveDateTime)
     last_processed_at: Mapped[datetime] = mapped_column(NaiveDateTime)
-    status: Mapped[RunStatus] = mapped_column(Enum(RunStatus))
+    next_triggered_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
+    status: Mapped[RunStatus] = mapped_column(Enum(RunStatus), index=True)
     termination_reason: Mapped[Optional[RunTerminationReason]] = mapped_column(
         Enum(RunTerminationReason)
     )
@@ -396,7 +401,7 @@ class JobModel(BaseModel):
     submission_num: Mapped[int] = mapped_column(Integer)
     submitted_at: Mapped[datetime] = mapped_column(NaiveDateTime)
     last_processed_at: Mapped[datetime] = mapped_column(NaiveDateTime)
-    status: Mapped[JobStatus] = mapped_column(Enum(JobStatus))
+    status: Mapped[JobStatus] = mapped_column(Enum(JobStatus), index=True)
     termination_reason: Mapped[Optional[JobTerminationReason]] = mapped_column(
         Enum(JobTerminationReason)
     )
@@ -423,6 +428,9 @@ class JobModel(BaseModel):
     replica_num: Mapped[int] = mapped_column(Integer)
     deployment_num: Mapped[int] = mapped_column(Integer)
     job_runtime_data: Mapped[Optional[str]] = mapped_column(Text)
+    probes: Mapped[list["ProbeModel"]] = relationship(
+        back_populates="job", order_by="ProbeModel.probe_num"
+    )
 class GatewayModel(BaseModel):
@@ -524,7 +532,7 @@ class FleetModel(BaseModel):
     deleted: Mapped[bool] = mapped_column(Boolean, default=False)
     deleted_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
-    status: Mapped[FleetStatus] = mapped_column(Enum(FleetStatus))
+    status: Mapped[FleetStatus] = mapped_column(Enum(FleetStatus), index=True)
     status_message: Mapped[Optional[str]] = mapped_column(Text)
     spec: Mapped[str] = mapped_column(Text)
@@ -543,7 +551,6 @@ class InstanceModel(BaseModel):
     instance_num: Mapped[int] = mapped_column(Integer, default=0)
-    # instance
     created_at: Mapped[datetime] = mapped_column(NaiveDateTime, default=get_current_datetime)
     last_processed_at: Mapped[datetime] = mapped_column(
         NaiveDateTime, default=get_current_datetime
@@ -564,7 +571,7 @@ class InstanceModel(BaseModel):
     fleet_id: Mapped[Optional[uuid.UUID]] = mapped_column(ForeignKey("fleets.id"))
     fleet: Mapped[Optional["FleetModel"]] = relationship(back_populates="instances")
-    status: Mapped[InstanceStatus] = mapped_column(Enum(InstanceStatus))
+    status: Mapped[InstanceStatus] = mapped_column(Enum(InstanceStatus), index=True)
     unreachable: Mapped[bool] = mapped_column(Boolean)
     # VM
@@ -580,7 +587,6 @@ class InstanceModel(BaseModel):
     requirements: Mapped[Optional[str]] = mapped_column(Text)
     instance_configuration: Mapped[Optional[str]] = mapped_column(Text)
-    # temination policy
     termination_policy: Mapped[Optional[TerminationPolicy]] = mapped_column(String(100))
     # TODO: Suggestion: do not assign DEFAULT_FLEET_TERMINATION_IDLE_TIME as the default here
     # (make Optional instead; also instead of -1)
@@ -594,15 +600,17 @@ class InstanceModel(BaseModel):
     # instance termination handling
     termination_deadline: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     termination_reason: Mapped[Optional[str]] = mapped_column(String(4000))
+    # Deprecated since 0.19.22, not used
     health_status: Mapped[Optional[str]] = mapped_column(String(4000))
+    health: Mapped[HealthStatus] = mapped_column(
+        EnumAsString(HealthStatus, 100), default=HealthStatus.HEALTHY
+    )
     first_termination_retry_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     last_termination_retry_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
-    # backend
     backend: Mapped[Optional[BackendType]] = mapped_column(EnumAsString(BackendType, 100))
     backend_data: Mapped[Optional[str]] = mapped_column(Text)
-    # offer
     offer: Mapped[Optional[str]] = mapped_column(Text)
     region: Mapped[Optional[str]] = mapped_column(String(2000))
     price: Mapped[Optional[float]] = mapped_column(Float)
@@ -615,18 +623,33 @@ class InstanceModel(BaseModel):
     total_blocks: Mapped[Optional[int]] = mapped_column(Integer)
     busy_blocks: Mapped[int] = mapped_column(Integer, default=0)
-    jobs: Mapped[list["JobModel"]] = relationship(back_populates="instance", lazy="joined")
+    jobs: Mapped[list["JobModel"]] = relationship(back_populates="instance")
     last_job_processed_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     volume_attachments: Mapped[List["VolumeAttachmentModel"]] = relationship(
         back_populates="instance",
         # Add delete-orphan option so that removing entries from volume_attachments
         # automatically marks them for deletion.
-        # SQLalchemy requires delete when using delete-orphan.
+        # SQLAlchemy requires delete when using delete-orphan.
         cascade="save-update, merge, delete-orphan, delete",
     )
+class InstanceHealthCheckModel(BaseModel):
+    __tablename__ = "instance_health_checks"
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUIDType(binary=False), primary_key=True, default=uuid.uuid4
+    )
+    instance_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("instances.id"))
+    instance: Mapped["InstanceModel"] = relationship()
+    collected_at: Mapped[datetime] = mapped_column(NaiveDateTime)
+    status: Mapped[HealthStatus] = mapped_column(EnumAsString(HealthStatus, 100))
+    response: Mapped[str] = mapped_column(Text)
 class VolumeModel(BaseModel):
     __tablename__ = "volumes"
@@ -649,7 +672,7 @@ class VolumeModel(BaseModel):
     deleted: Mapped[bool] = mapped_column(Boolean, default=False)
     deleted_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
-    status: Mapped[VolumeStatus] = mapped_column(Enum(VolumeStatus))
+    status: Mapped[VolumeStatus] = mapped_column(Enum(VolumeStatus), index=True)
     status_message: Mapped[Optional[str]] = mapped_column(Text)
     configuration: Mapped[str] = mapped_column(Text)
@@ -729,6 +752,24 @@ class JobPrometheusMetrics(BaseModel):
     text: Mapped[str] = mapped_column(Text)
+class ProbeModel(BaseModel):
+    __tablename__ = "probes"
+    __table_args__ = (UniqueConstraint("job_id", "probe_num", name="uq_probes_job_id_probe_num"),)
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUIDType(binary=False), primary_key=True, default=uuid.uuid4
+    )
+    name: Mapped[str] = mapped_column(String(100))
+    job_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("jobs.id"), primary_key=True)
+    job: Mapped["JobModel"] = relationship(back_populates="probes")
+    probe_num: Mapped[int] = mapped_column(Integer)  # index in JobSpec.probes
+    due: Mapped[datetime] = mapped_column(NaiveDateTime)
+    success_streak: Mapped[int] = mapped_column(BigInteger)
+    active: Mapped[bool] = mapped_column(Boolean)
 class SecretModel(BaseModel):
     __tablename__ = "secrets"
     __table_args__ = (UniqueConstraint("project_id", "name", name="uq_secrets_project_id_name"),)

dstack/_internal/server/routers/instances.py CHANGED Viewed

@@ -3,12 +3,16 @@ from typing import List
 from fastapi import APIRouter, Depends
 from sqlalchemy.ext.asyncio import AsyncSession
-import dstack._internal.server.services.instances as instances
+import dstack._internal.server.services.instances as instances_services
 from dstack._internal.core.models.instances import Instance
 from dstack._internal.server.db import get_session
-from dstack._internal.server.models import UserModel
-from dstack._internal.server.schemas.instances import ListInstancesRequest
-from dstack._internal.server.security.permissions import Authenticated
+from dstack._internal.server.models import ProjectModel, UserModel
+from dstack._internal.server.schemas.instances import (
+    GetInstanceHealthChecksRequest,
+    GetInstanceHealthChecksResponse,
+    ListInstancesRequest,
+)
+from dstack._internal.server.security.permissions import Authenticated, ProjectMember
 from dstack._internal.server.utils.routers import (
     CustomORJSONResponse,
     get_base_api_additional_responses,
@@ -19,6 +23,11 @@ root_router = APIRouter(
     tags=["instances"],
     responses=get_base_api_additional_responses(),
 )
+project_router = APIRouter(
+    prefix="/api/project/{project_name}/instances",
+    tags=["instances"],
+    responses=get_base_api_additional_responses(),
+)
 @root_router.post("/list", response_model=List[Instance])
@@ -35,7 +44,7 @@ async def list_instances(
     the last instance from the previous page as `prev_created_at` and `prev_id`.
     """
     return CustomORJSONResponse(
-        await instances.list_user_instances(
+        await instances_services.list_user_instances(
             session=session,
             user=user,
             project_names=body.project_names,
@@ -47,3 +56,22 @@ async def list_instances(
             ascending=body.ascending,
         )
     )
+@project_router.post("/get_instance_health_checks", response_model=GetInstanceHealthChecksResponse)
+async def get_instance_health_checks(
+    body: GetInstanceHealthChecksRequest,
+    session: AsyncSession = Depends(get_session),
+    user_project: tuple[UserModel, ProjectModel] = Depends(ProjectMember()),
+):
+    _, project = user_project
+    health_checks = await instances_services.get_instance_health_checks(
+        session=session,
+        project=project,
+        fleet_name=body.fleet_name,
+        instance_num=body.instance_num,
+        after=body.after,
+        before=body.before,
+        limit=body.limit,
+    )
+    return CustomORJSONResponse(GetInstanceHealthChecksResponse(health_checks=health_checks))

dstack 0.19.20__py3-none-any.whl → 0.19.22__py3-none-any.whl

Potentially problematic release.

dstack 0.19.20py3-none-any.whl → 0.19.22py3-none-any.whl