PyPI - dstack - Versions diffs - 0.19.19__py3-none-any.whl → 0.19.21__py3-none-any.whl - Mend

dstack 0.19.19py3-none-any.whl → 0.19.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (54) hide show

dstack/_internal/server/background/tasks/process_runs.py CHANGED Viewed

@@ -2,9 +2,9 @@ import asyncio
 import datetime
 from typing import List, Optional, Set, Tuple
-from sqlalchemy import select
+from sqlalchemy import and_, or_, select
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload, selectinload
+from sqlalchemy.orm import joinedload, load_only, selectinload
 import dstack._internal.server.services.services.autoscalers as autoscalers
 from dstack._internal.core.errors import ServerError
@@ -20,7 +20,13 @@ from dstack._internal.core.models.runs import (
     RunTerminationReason,
 )
 from dstack._internal.server.db import get_db, get_session_ctx
-from dstack._internal.server.models import JobModel, ProjectModel, RunModel
+from dstack._internal.server.models import (
+    InstanceModel,
+    JobModel,
+    ProjectModel,
+    RunModel,
+    UserModel,
+)
 from dstack._internal.server.services.jobs import (
     find_job,
     get_job_specs_from_run_spec,
@@ -37,6 +43,7 @@ from dstack._internal.server.services.runs import (
 )
 from dstack._internal.server.services.secrets import get_project_secrets_mapping
 from dstack._internal.server.services.services import update_service_desired_replica_count
+from dstack._internal.server.utils import sentry_utils
 from dstack._internal.utils import common
 from dstack._internal.utils.logging import get_logger
@@ -53,22 +60,54 @@ async def process_runs(batch_size: int = 1):
     await asyncio.gather(*tasks)
+@sentry_utils.instrument_background_task
 async def _process_next_run():
     run_lock, run_lockset = get_locker(get_db().dialect_name).get_lockset(RunModel.__tablename__)
     job_lock, job_lockset = get_locker(get_db().dialect_name).get_lockset(JobModel.__tablename__)
+    now = common.get_current_datetime()
     async with get_session_ctx() as session:
         async with run_lock, job_lock:
             res = await session.execute(
                 select(RunModel)
                 .where(
-                    RunModel.status.not_in(RunStatus.finished_statuses()),
                     RunModel.id.not_in(run_lockset),
-                    RunModel.last_processed_at
-                    < common.get_current_datetime().replace(tzinfo=None) - MIN_PROCESSING_INTERVAL,
+                    RunModel.last_processed_at < now - MIN_PROCESSING_INTERVAL,
+                    # Filter out runs that don't need to be processed.
+                    # This is only to reduce unnecessary commits.
+                    # Otherwise, we could fetch all active runs and filter them when processing.
+                    or_(
+                        # Active non-pending runs:
+                        RunModel.status.not_in(
+                            RunStatus.finished_statuses() + [RunStatus.PENDING]
+                        ),
+                        # Retrying runs:
+                        and_(
+                            RunModel.status == RunStatus.PENDING,
+                            RunModel.resubmission_attempt > 0,
+                        ),
+                        # Scheduled ready runs:
+                        and_(
+                            RunModel.status == RunStatus.PENDING,
+                            RunModel.resubmission_attempt == 0,
+                            RunModel.next_triggered_at.is_not(None),
+                            RunModel.next_triggered_at < now,
+                        ),
+                        # Scaled-to-zero runs:
+                        # Such runs cannot be scheduled, thus we check next_triggered_at.
+                        # If we allow scheduled services with downscaling to zero
+                        # This check won't pass.
+                        and_(
+                            RunModel.status == RunStatus.PENDING,
+                            RunModel.resubmission_attempt == 0,
+                            RunModel.next_triggered_at.is_(None),
+                        ),
+                    ),
                 )
+                .options(joinedload(RunModel.jobs).load_only(JobModel.id))
+                .options(load_only(RunModel.id))
                 .order_by(RunModel.last_processed_at.asc())
                 .limit(1)
-                .with_for_update(skip_locked=True, key_share=True)
+                .with_for_update(skip_locked=True, key_share=True, of=RunModel)
             )
             run_model = res.scalar()
             if run_model is None:
@@ -98,20 +137,22 @@ async def _process_next_run():
 async def _process_run(session: AsyncSession, run_model: RunModel):
-    logger.debug("%s: processing run", fmt(run_model))
     # Refetch to load related attributes.
-    # joinedload produces LEFT OUTER JOIN that can't be used with FOR UPDATE.
     res = await session.execute(
         select(RunModel)
         .where(RunModel.id == run_model.id)
         .execution_options(populate_existing=True)
-        .options(joinedload(RunModel.project).joinedload(ProjectModel.backends))
-        .options(joinedload(RunModel.user))
-        .options(joinedload(RunModel.repo))
-        .options(selectinload(RunModel.jobs).joinedload(JobModel.instance))
+        .options(joinedload(RunModel.project).load_only(ProjectModel.id, ProjectModel.name))
+        .options(joinedload(RunModel.user).load_only(UserModel.name))
+        .options(
+            selectinload(RunModel.jobs)
+            .joinedload(JobModel.instance)
+            .load_only(InstanceModel.fleet_id)
+        )
         .execution_options(populate_existing=True)
     )
     run_model = res.unique().scalar_one()
+    logger.debug("%s: processing run", fmt(run_model))
     try:
         if run_model.status == RunStatus.PENDING:
             await _process_pending_run(session, run_model)
@@ -135,8 +176,12 @@ async def _process_run(session: AsyncSession, run_model: RunModel):
 async def _process_pending_run(session: AsyncSession, run_model: RunModel):
     """Jobs are not created yet"""
     run = run_model_to_run(run_model)
-    if not _pending_run_ready_for_resubmission(run_model, run):
-        logger.debug("%s: pending run is not yet ready for resubmission", fmt(run_model))
+    # TODO: Do not select such runs in the first place to avoid redundant processing
+    if run_model.resubmission_attempt > 0 and not _retrying_run_ready_for_resubmission(
+        run_model, run
+    ):
+        logger.debug("%s: retrying run is not yet ready for resubmission", fmt(run_model))
         return
     run_model.desired_replica_count = 1
@@ -160,7 +205,7 @@ async def _process_pending_run(session: AsyncSession, run_model: RunModel):
     logger.info("%s: run status has changed PENDING -> SUBMITTED", fmt(run_model))
-def _pending_run_ready_for_resubmission(run_model: RunModel, run: Run) -> bool:
+def _retrying_run_ready_for_resubmission(run_model: RunModel, run: Run) -> bool:
     if run.latest_job_submission is None:
         # Should not be possible
         return True
@@ -197,7 +242,7 @@ async def _process_active_run(session: AsyncSession, run_model: RunModel):
     We handle fails, scaling, and status changes.
     """
     run = run_model_to_run(run_model)
-    run_spec = RunSpec.__response__.parse_raw(run_model.run_spec)
+    run_spec = run.run_spec
     retry_single_job = _can_retry_single_job(run_spec)
     run_statuses: Set[RunStatus] = set()
@@ -337,9 +382,7 @@ async def _process_active_run(session: AsyncSession, run_model: RunModel):
         )
         if run_model.status == RunStatus.SUBMITTED and new_status == RunStatus.PROVISIONING:
             current_time = common.get_current_datetime()
-            submit_to_provision_duration = (
-                current_time - run_model.submitted_at.replace(tzinfo=datetime.timezone.utc)
-            ).total_seconds()
+            submit_to_provision_duration = (current_time - run_model.submitted_at).total_seconds()
             logger.info(
                 "%s: run took %.2f seconds from submission to provisioning.",
                 fmt(run_model),

dstack/_internal/server/background/tasks/process_submitted_jobs.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import List, Optional, Tuple
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload, lazyload, selectinload
+from sqlalchemy.orm import joinedload, load_only, selectinload
 from dstack._internal.core.backends.base.backend import Backend
 from dstack._internal.core.backends.base.compute import ComputeWithVolumeSupport
@@ -43,6 +43,7 @@ from dstack._internal.server.models import (
     JobModel,
     ProjectModel,
     RunModel,
+    UserModel,
     VolumeAttachmentModel,
     VolumeModel,
 )
@@ -74,6 +75,7 @@ from dstack._internal.server.services.runs import (
 from dstack._internal.server.services.volumes import (
     volume_model_to_volume,
 )
+from dstack._internal.server.utils import sentry_utils
 from dstack._internal.utils import common as common_utils
 from dstack._internal.utils import env as env_utils
 from dstack._internal.utils.logging import get_logger
@@ -108,6 +110,7 @@ def _get_effective_batch_size(batch_size: int) -> int:
     return batch_size
+@sentry_utils.instrument_background_task
 async def _process_next_submitted_job():
     lock, lockset = get_locker(get_db().dialect_name).get_lockset(JobModel.__tablename__)
     async with get_session_ctx() as session:
@@ -119,6 +122,7 @@ async def _process_next_submitted_job():
                     JobModel.status == JobStatus.SUBMITTED,
                     JobModel.id.not_in(lockset),
                 )
+                .options(load_only(JobModel.id))
                 # Jobs are process in FIFO sorted by priority globally,
                 # thus runs from different projects can "overtake" each other by using higher priorities.
                 # That's not a big problem as long as projects do not compete for the same compute resources.
@@ -151,9 +155,7 @@ async def _process_next_submitted_job():
 async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
-    logger.debug("%s: provisioning has started", fmt(job_model))
     # Refetch to load related attributes.
-    # joinedload produces LEFT OUTER JOIN that can't be used with FOR UPDATE.
     res = await session.execute(
         select(JobModel).where(JobModel.id == job_model.id).options(joinedload(JobModel.instance))
     )
@@ -162,15 +164,16 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
         select(RunModel)
         .where(RunModel.id == job_model.run_id)
         .options(joinedload(RunModel.project).joinedload(ProjectModel.backends))
-        .options(joinedload(RunModel.user))
+        .options(joinedload(RunModel.user).load_only(UserModel.name))
         .options(joinedload(RunModel.fleet).joinedload(FleetModel.instances))
     )
     run_model = res.unique().scalar_one()
-    project = run_model.project
-    run_spec = RunSpec.__response__.parse_raw(run_model.run_spec)
-    profile = run_spec.merged_profile
+    logger.debug("%s: provisioning has started", fmt(job_model))
+    project = run_model.project
     run = run_model_to_run(run_model)
+    run_spec = run.run_spec
+    profile = run_spec.merged_profile
     job = find_job(run.jobs, job_model.replica_num, job_model.job_num)
     master_job = find_job(run.jobs, job_model.replica_num, 0)
@@ -228,7 +231,6 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
                 InstanceModel.deleted == False,
                 InstanceModel.total_blocks > InstanceModel.busy_blocks,
             )
-            .options(lazyload(InstanceModel.jobs))
             .order_by(InstanceModel.id)  # take locks in order
             .with_for_update(key_share=True)
         )
@@ -357,9 +359,9 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
     await session.execute(
         select(VolumeModel)
         .where(VolumeModel.id.in_(volumes_ids))
-        .options(selectinload(VolumeModel.user))
+        .options(joinedload(VolumeModel.user).load_only(UserModel.name))
         .order_by(VolumeModel.id)  # take locks in order
-        .with_for_update(key_share=True)
+        .with_for_update(key_share=True, of=VolumeModel)
     )
     async with get_locker(get_db().dialect_name).lock_ctx(VolumeModel.__tablename__, volumes_ids):
         if len(volume_models) > 0:

dstack/_internal/server/background/tasks/process_terminating_jobs.py CHANGED Viewed

@@ -2,7 +2,7 @@ import asyncio
 from sqlalchemy import or_, select
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload, lazyload
+from sqlalchemy.orm import joinedload
 from dstack._internal.core.models.runs import JobStatus
 from dstack._internal.server.db import get_db, get_session_ctx
@@ -18,7 +18,11 @@ from dstack._internal.server.services.jobs import (
 )
 from dstack._internal.server.services.locking import get_locker
 from dstack._internal.server.services.logging import fmt
-from dstack._internal.utils.common import get_current_datetime, get_or_error
+from dstack._internal.server.utils import sentry_utils
+from dstack._internal.utils.common import (
+    get_current_datetime,
+    get_or_error,
+)
 from dstack._internal.utils.logging import get_logger
 logger = get_logger(__name__)
@@ -31,6 +35,7 @@ async def process_terminating_jobs(batch_size: int = 1):
     await asyncio.gather(*tasks)
+@sentry_utils.instrument_background_task
 async def _process_next_terminating_job():
     job_lock, job_lockset = get_locker(get_db().dialect_name).get_lockset(JobModel.__tablename__)
     instance_lock, instance_lockset = get_locker(get_db().dialect_name).get_lockset(
@@ -43,7 +48,10 @@ async def _process_next_terminating_job():
                 .where(
                     JobModel.id.not_in(job_lockset),
                     JobModel.status == JobStatus.TERMINATING,
-                    or_(JobModel.remove_at.is_(None), JobModel.remove_at < get_current_datetime()),
+                    or_(
+                        JobModel.remove_at.is_(None),
+                        JobModel.remove_at < get_current_datetime(),
+                    ),
                 )
                 .order_by(JobModel.last_processed_at.asc())
                 .limit(1)
@@ -59,7 +67,6 @@ async def _process_next_terminating_job():
                         InstanceModel.id == job_model.used_instance_id,
                         InstanceModel.id.not_in(instance_lockset),
                     )
-                    .options(lazyload(InstanceModel.jobs))
                     .with_for_update(skip_locked=True, key_share=True)
                 )
                 instance_model = res.scalar()
@@ -88,6 +95,7 @@ async def _process_job(session: AsyncSession, job_model: JobModel):
         .options(
             joinedload(InstanceModel.project).joinedload(ProjectModel.backends),
             joinedload(InstanceModel.volume_attachments).joinedload(VolumeAttachmentModel.volume),
+            joinedload(InstanceModel.jobs).load_only(JobModel.id),
         )
     )
     instance_model = res.unique().scalar()

dstack/_internal/server/background/tasks/process_volumes.py CHANGED Viewed

@@ -7,6 +7,7 @@ from dstack._internal.core.errors import BackendError, BackendNotAvailable
 from dstack._internal.core.models.volumes import VolumeStatus
 from dstack._internal.server.db import get_db, get_session_ctx
 from dstack._internal.server.models import (
+    FleetModel,
     InstanceModel,
     ProjectModel,
     VolumeAttachmentModel,
@@ -15,12 +16,14 @@ from dstack._internal.server.models import (
 from dstack._internal.server.services import backends as backends_services
 from dstack._internal.server.services import volumes as volumes_services
 from dstack._internal.server.services.locking import get_locker
+from dstack._internal.server.utils import sentry_utils
 from dstack._internal.utils.common import get_current_datetime, run_async
 from dstack._internal.utils.logging import get_logger
 logger = get_logger(__name__)
+@sentry_utils.instrument_background_task
 async def process_submitted_volumes():
     lock, lockset = get_locker(get_db().dialect_name).get_lockset(VolumeModel.__tablename__)
     async with get_session_ctx() as session:
@@ -49,7 +52,6 @@ async def process_submitted_volumes():
 async def _process_submitted_volume(session: AsyncSession, volume_model: VolumeModel):
     logger.info("Started submitted volume %s processing", volume_model.name)
     # Refetch to load related attributes.
-    # joinedload produces LEFT OUTER JOIN that can't be used with FOR UPDATE.
     res = await session.execute(
         select(VolumeModel)
         .where(VolumeModel.id == volume_model.id)
@@ -59,6 +61,7 @@ async def _process_submitted_volume(session: AsyncSession, volume_model: VolumeM
             joinedload(VolumeModel.attachments)
             .joinedload(VolumeAttachmentModel.instance)
             .joinedload(InstanceModel.fleet)
+            .load_only(FleetModel.name)
         )
         .execution_options(populate_existing=True)
     )

dstack/_internal/server/migrations/versions/50dd7ea98639_index_status_columns.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""Index status columns
+Revision ID: 50dd7ea98639
+Revises: ec02a26a256c
+Create Date: 2025-07-25 10:36:25.127923
+"""
+from alembic import op
+# revision identifiers, used by Alembic.
+revision = "50dd7ea98639"
+down_revision = "ec02a26a256c"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_runs_status"), ["status"], unique=False)
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_jobs_status"), ["status"], unique=False)
+    with op.batch_alter_table("fleets", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_fleets_status"), ["status"], unique=False)
+    with op.batch_alter_table("instances", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_instances_status"), ["status"], unique=False)
+    with op.batch_alter_table("volumes", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_volumes_status"), ["status"], unique=False)
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_runs_status"))
+    with op.batch_alter_table("jobs", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_jobs_status"))
+    with op.batch_alter_table("fleets", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_fleets_status"))
+    with op.batch_alter_table("instances", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_instances_status"))
+    with op.batch_alter_table("volumes", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_volumes_status"))
+    # ### end Alembic commands ###

dstack/_internal/server/migrations/versions/ec02a26a256c_add_runmodel_next_triggered_at.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""Add RunModel.next_triggered_at
+Revision ID: ec02a26a256c
+Revises: d5863798bf41
+Create Date: 2025-07-17 15:47:00.443217
+"""
+import sqlalchemy as sa
+from alembic import op
+import dstack._internal.server.models
+# revision identifiers, used by Alembic.
+revision = "ec02a26a256c"
+down_revision = "d5863798bf41"
+branch_labels = None
+depends_on = None
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column(
+                "next_triggered_at", dstack._internal.server.models.NaiveDateTime(), nullable=True
+            )
+        )
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("runs", schema=None) as batch_op:
+        batch_op.drop_column("next_triggered_at")
+    # ### end Alembic commands ###

dstack/_internal/server/models.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import enum
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Callable, List, Optional, Union
 from sqlalchemy import (
@@ -51,9 +51,10 @@ logger = get_logger(__name__)
 class NaiveDateTime(TypeDecorator):
     """
-    A custom type decorator that ensures datetime objects are offset-naive when stored in the database.
-    This is needed because we use datetimes in UTC only and store them as offset-naive.
-    Some databases (e.g. Postgres) throw an error if the timezone is set.
+    A custom type decorator that ensures datetime objects are offset-naive when stored in the database
+    and offset-aware with UTC timezone when loaded from the database.
+    This is because we use datetimes in UTC everywhere, and
+    some databases (e.g. Postgres) throw an error if the timezone is set.
     """
     impl = DateTime
@@ -65,7 +66,9 @@ class NaiveDateTime(TypeDecorator):
         return value
     def process_result_value(self, value, dialect):
-        return value
+        if value is None:
+            return None
+        return value.replace(tzinfo=timezone.utc)
 class DecryptedString(CoreModel):
@@ -355,7 +358,8 @@ class RunModel(BaseModel):
     run_name: Mapped[str] = mapped_column(String(100))
     submitted_at: Mapped[datetime] = mapped_column(NaiveDateTime)
     last_processed_at: Mapped[datetime] = mapped_column(NaiveDateTime)
-    status: Mapped[RunStatus] = mapped_column(Enum(RunStatus))
+    next_triggered_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
+    status: Mapped[RunStatus] = mapped_column(Enum(RunStatus), index=True)
     termination_reason: Mapped[Optional[RunTerminationReason]] = mapped_column(
         Enum(RunTerminationReason)
     )
@@ -396,7 +400,7 @@ class JobModel(BaseModel):
     submission_num: Mapped[int] = mapped_column(Integer)
     submitted_at: Mapped[datetime] = mapped_column(NaiveDateTime)
     last_processed_at: Mapped[datetime] = mapped_column(NaiveDateTime)
-    status: Mapped[JobStatus] = mapped_column(Enum(JobStatus))
+    status: Mapped[JobStatus] = mapped_column(Enum(JobStatus), index=True)
     termination_reason: Mapped[Optional[JobTerminationReason]] = mapped_column(
         Enum(JobTerminationReason)
     )
@@ -524,7 +528,7 @@ class FleetModel(BaseModel):
     deleted: Mapped[bool] = mapped_column(Boolean, default=False)
     deleted_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
-    status: Mapped[FleetStatus] = mapped_column(Enum(FleetStatus))
+    status: Mapped[FleetStatus] = mapped_column(Enum(FleetStatus), index=True)
     status_message: Mapped[Optional[str]] = mapped_column(Text)
     spec: Mapped[str] = mapped_column(Text)
@@ -543,7 +547,6 @@ class InstanceModel(BaseModel):
     instance_num: Mapped[int] = mapped_column(Integer, default=0)
-    # instance
     created_at: Mapped[datetime] = mapped_column(NaiveDateTime, default=get_current_datetime)
     last_processed_at: Mapped[datetime] = mapped_column(
         NaiveDateTime, default=get_current_datetime
@@ -564,7 +567,7 @@ class InstanceModel(BaseModel):
     fleet_id: Mapped[Optional[uuid.UUID]] = mapped_column(ForeignKey("fleets.id"))
     fleet: Mapped[Optional["FleetModel"]] = relationship(back_populates="instances")
-    status: Mapped[InstanceStatus] = mapped_column(Enum(InstanceStatus))
+    status: Mapped[InstanceStatus] = mapped_column(Enum(InstanceStatus), index=True)
     unreachable: Mapped[bool] = mapped_column(Boolean)
     # VM
@@ -580,7 +583,6 @@ class InstanceModel(BaseModel):
     requirements: Mapped[Optional[str]] = mapped_column(Text)
     instance_configuration: Mapped[Optional[str]] = mapped_column(Text)
-    # temination policy
     termination_policy: Mapped[Optional[TerminationPolicy]] = mapped_column(String(100))
     # TODO: Suggestion: do not assign DEFAULT_FLEET_TERMINATION_IDLE_TIME as the default here
     # (make Optional instead; also instead of -1)
@@ -598,11 +600,9 @@ class InstanceModel(BaseModel):
     first_termination_retry_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     last_termination_retry_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
-    # backend
     backend: Mapped[Optional[BackendType]] = mapped_column(EnumAsString(BackendType, 100))
     backend_data: Mapped[Optional[str]] = mapped_column(Text)
-    # offer
     offer: Mapped[Optional[str]] = mapped_column(Text)
     region: Mapped[Optional[str]] = mapped_column(String(2000))
     price: Mapped[Optional[float]] = mapped_column(Float)
@@ -615,14 +615,14 @@ class InstanceModel(BaseModel):
     total_blocks: Mapped[Optional[int]] = mapped_column(Integer)
     busy_blocks: Mapped[int] = mapped_column(Integer, default=0)
-    jobs: Mapped[list["JobModel"]] = relationship(back_populates="instance", lazy="joined")
+    jobs: Mapped[list["JobModel"]] = relationship(back_populates="instance")
     last_job_processed_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
     volume_attachments: Mapped[List["VolumeAttachmentModel"]] = relationship(
         back_populates="instance",
         # Add delete-orphan option so that removing entries from volume_attachments
         # automatically marks them for deletion.
-        # SQLalchemy requires delete when using delete-orphan.
+        # SQLAlchemy requires delete when using delete-orphan.
         cascade="save-update, merge, delete-orphan, delete",
     )
@@ -649,7 +649,7 @@ class VolumeModel(BaseModel):
     deleted: Mapped[bool] = mapped_column(Boolean, default=False)
     deleted_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
-    status: Mapped[VolumeStatus] = mapped_column(Enum(VolumeStatus))
+    status: Mapped[VolumeStatus] = mapped_column(Enum(VolumeStatus), index=True)
     status_message: Mapped[Optional[str]] = mapped_column(Text)
     configuration: Mapped[str] = mapped_column(Text)

dstack/_internal/server/schemas/logs.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from datetime import datetime
 from typing import Optional
-from pydantic import UUID4, Field, validator
+from pydantic import UUID4, Field
 from dstack._internal.core.models.common import CoreModel
@@ -15,11 +15,3 @@ class PollLogsRequest(CoreModel):
     next_token: Optional[str] = None
     limit: int = Field(100, ge=0, le=1000)
     diagnose: bool = False
-    @validator("descending")
-    @classmethod
-    def validate_descending(cls, v):
-        # Descending is not supported until we migrate from base64-encoded logs to plain text logs.
-        if v is True:
-            raise ValueError("descending: true is not supported")
-        return v

dstack/_internal/server/services/fleets.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import uuid
 from collections.abc import Callable
-from datetime import datetime, timezone
+from datetime import datetime
 from functools import wraps
 from typing import List, Literal, Optional, Tuple, TypeVar, Union, cast
@@ -8,8 +8,8 @@ from sqlalchemy import and_, func, or_, select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import joinedload, selectinload
-from dstack._internal.core.backends import BACKENDS_WITH_CREATE_INSTANCE_SUPPORT
 from dstack._internal.core.backends.base.backend import Backend
+from dstack._internal.core.backends.features import BACKENDS_WITH_CREATE_INSTANCE_SUPPORT
 from dstack._internal.core.errors import (
     ForbiddenError,
     ResourceExistsError,
@@ -49,6 +49,7 @@ from dstack._internal.server.db import get_db
 from dstack._internal.server.models import (
     FleetModel,
     InstanceModel,
+    JobModel,
     ProjectModel,
     UserModel,
 )
@@ -66,7 +67,6 @@ from dstack._internal.server.services.plugins import apply_plugin_policies
 from dstack._internal.server.services.projects import (
     get_member,
     get_member_permissions,
-    list_project_models,
     list_user_project_models,
 )
 from dstack._internal.server.services.resources import set_resources_defaults
@@ -87,10 +87,11 @@ async def list_fleets(
     limit: int,
     ascending: bool,
 ) -> List[Fleet]:
-    if user.global_role == GlobalRole.ADMIN:
-        projects = await list_project_models(session=session)
-    else:
-        projects = await list_user_project_models(session=session, user=user)
+    projects = await list_user_project_models(
+        session=session,
+        user=user,
+        only_names=True,
+    )
     if project_name is not None:
         projects = [p for p in projects if p.name == project_name]
     fleet_models = await list_projects_fleet_models(
@@ -398,7 +399,11 @@ async def apply_plan(
                 FleetModel.id == fleet_model.id,
                 FleetModel.deleted == False,
             )
-            .options(selectinload(FleetModel.instances))
+            .options(
+                selectinload(FleetModel.instances)
+                .joinedload(InstanceModel.jobs)
+                .load_only(JobModel.id)
+            )
             .options(selectinload(FleetModel.runs))
             .execution_options(populate_existing=True)
             .order_by(FleetModel.id)  # take locks in order
@@ -563,7 +568,11 @@ async def delete_fleets(
                 FleetModel.name.in_(names),
                 FleetModel.deleted == False,
             )
-            .options(selectinload(FleetModel.instances))
+            .options(
+                selectinload(FleetModel.instances)
+                .joinedload(InstanceModel.jobs)
+                .load_only(JobModel.id)
+            )
             .options(selectinload(FleetModel.runs))
             .execution_options(populate_existing=True)
             .order_by(FleetModel.id)  # take locks in order
@@ -600,7 +609,7 @@ def fleet_model_to_fleet(
         name=fleet_model.name,
         project_name=fleet_model.project.name,
         spec=spec,
-        created_at=fleet_model.created_at.replace(tzinfo=timezone.utc),
+        created_at=fleet_model.created_at,
         status=fleet_model.status,
         status_message=fleet_model.status_message,
         instances=instances,

dstack 0.19.19__py3-none-any.whl → 0.19.21__py3-none-any.whl

Potentially problematic release.

dstack 0.19.19py3-none-any.whl → 0.19.21py3-none-any.whl