PyPI - dstack - Versions diffs - 0.19.30rc1__py3-none-any.whl → 0.19.32__py3-none-any.whl - Mend

dstack 0.19.30rc1py3-none-any.whl → 0.19.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (54) hide show

dstack/_internal/cli/commands/__init__.py +8 -0
dstack/_internal/cli/commands/project.py +27 -20
dstack/_internal/cli/commands/server.py +5 -0
dstack/_internal/cli/services/configurators/fleet.py +20 -6
dstack/_internal/cli/utils/gpu.py +2 -2
dstack/_internal/core/backends/aws/compute.py +13 -5
dstack/_internal/core/backends/aws/resources.py +11 -6
dstack/_internal/core/backends/azure/compute.py +17 -6
dstack/_internal/core/backends/base/compute.py +57 -9
dstack/_internal/core/backends/base/offers.py +1 -0
dstack/_internal/core/backends/cloudrift/compute.py +2 -0
dstack/_internal/core/backends/cudo/compute.py +2 -0
dstack/_internal/core/backends/datacrunch/compute.py +2 -0
dstack/_internal/core/backends/digitalocean_base/compute.py +2 -0
dstack/_internal/core/backends/features.py +5 -0
dstack/_internal/core/backends/gcp/compute.py +87 -38
dstack/_internal/core/backends/gcp/configurator.py +1 -1
dstack/_internal/core/backends/gcp/models.py +14 -1
dstack/_internal/core/backends/gcp/resources.py +35 -12
dstack/_internal/core/backends/hotaisle/compute.py +22 -0
dstack/_internal/core/backends/kubernetes/compute.py +531 -215
dstack/_internal/core/backends/kubernetes/models.py +13 -16
dstack/_internal/core/backends/kubernetes/utils.py +145 -8
dstack/_internal/core/backends/lambdalabs/compute.py +2 -0
dstack/_internal/core/backends/local/compute.py +2 -0
dstack/_internal/core/backends/nebius/compute.py +17 -0
dstack/_internal/core/backends/nebius/configurator.py +15 -0
dstack/_internal/core/backends/nebius/models.py +57 -5
dstack/_internal/core/backends/nebius/resources.py +45 -2
dstack/_internal/core/backends/oci/compute.py +7 -1
dstack/_internal/core/backends/oci/resources.py +8 -3
dstack/_internal/core/backends/template/compute.py.jinja +2 -0
dstack/_internal/core/backends/tensordock/compute.py +2 -0
dstack/_internal/core/backends/vultr/compute.py +2 -0
dstack/_internal/core/compatibility/runs.py +8 -0
dstack/_internal/core/consts.py +2 -0
dstack/_internal/core/models/profiles.py +11 -4
dstack/_internal/core/services/repos.py +101 -11
dstack/_internal/server/background/tasks/common.py +2 -0
dstack/_internal/server/background/tasks/process_fleets.py +75 -17
dstack/_internal/server/background/tasks/process_instances.py +3 -5
dstack/_internal/server/background/tasks/process_running_jobs.py +1 -1
dstack/_internal/server/background/tasks/process_runs.py +27 -23
dstack/_internal/server/background/tasks/process_submitted_jobs.py +107 -54
dstack/_internal/server/services/offers.py +7 -1
dstack/_internal/server/testing/common.py +2 -0
dstack/_internal/server/utils/provisioning.py +3 -10
dstack/_internal/utils/ssh.py +22 -2
dstack/version.py +2 -2
{dstack-0.19.30rc1.dist-info → dstack-0.19.32.dist-info}/METADATA +20 -18
{dstack-0.19.30rc1.dist-info → dstack-0.19.32.dist-info}/RECORD +54 -54
{dstack-0.19.30rc1.dist-info → dstack-0.19.32.dist-info}/WHEEL +0 -0
{dstack-0.19.30rc1.dist-info → dstack-0.19.32.dist-info}/entry_points.txt +0 -0
{dstack-0.19.30rc1.dist-info → dstack-0.19.32.dist-info}/licenses/LICENSE.md +0 -0

dstack/_internal/core/models/profiles.py CHANGED Viewed

@@ -80,14 +80,21 @@ def parse_stop_duration(
 def parse_off_duration(v: Optional[Union[int, str, bool]]) -> Optional[Union[Literal["off"], int]]:
     if v == "off" or v is False:
         return "off"
-    if v is True:
+    if v is True or v is None:
         return None
-    return parse_duration(v)
+    duration = parse_duration(v)
+    if duration < 0:
+        raise ValueError("Duration cannot be negative")
+    return duration
-def parse_idle_duration(v: Optional[Union[int, str]]) -> Optional[int]:
-    if v == "off" or v == -1:
+def parse_idle_duration(v: Optional[Union[int, str, bool]]) -> Optional[int]:
+    # Differs from `parse_off_duration` to accept negative durations as `off`
+    # for backward compatibility.
+    if v == "off" or v is False or v == -1:
         return -1
+    if v is True:
+        return None
     return parse_duration(v)

dstack/_internal/core/services/repos.py CHANGED Viewed

@@ -36,24 +36,59 @@ def get_repo_creds_and_default_branch(
     # no auth
     with suppress(InvalidRepoCredentialsError):
-        return _get_repo_creds_and_default_branch_https(url)
+        creds, default_branch = _get_repo_creds_and_default_branch_https(url)
+        logger.debug(
+            "Git repo %s is public. Using no auth. Default branch: %s", repo_url, default_branch
+        )
+        return creds, default_branch
     # ssh key provided by the user or pulled from the server
     if identity_file is not None or private_key is not None:
         if identity_file is not None:
             private_key = _read_private_key(identity_file)
-            return _get_repo_creds_and_default_branch_ssh(url, identity_file, private_key)
+            creds, default_branch = _get_repo_creds_and_default_branch_ssh(
+                url, identity_file, private_key
+            )
+            logger.debug(
+                "Git repo %s is private. Using identity file: %s. Default branch: %s",
+                repo_url,
+                identity_file,
+                default_branch,
+            )
+            return creds, default_branch
         elif private_key is not None:
             with NamedTemporaryFile("w+", 0o600) as f:
                 f.write(private_key)
                 f.flush()
-                return _get_repo_creds_and_default_branch_ssh(url, f.name, private_key)
+                creds, default_branch = _get_repo_creds_and_default_branch_ssh(
+                    url, f.name, private_key
+                )
+                masked_key = "***" + private_key[-10:] if len(private_key) > 10 else "***MASKED***"
+                logger.debug(
+                    "Git repo %s is private. Using private key: %s. Default branch: %s",
+                    repo_url,
+                    masked_key,
+                    default_branch,
+                )
+                return creds, default_branch
         else:
             assert False, "should not reach here"
     # oauth token provided by the user or pulled from the server
     if oauth_token is not None:
-        return _get_repo_creds_and_default_branch_https(url, oauth_token)
+        creds, default_branch = _get_repo_creds_and_default_branch_https(url, oauth_token)
+        masked_token = (
+            len(oauth_token[:-4]) * "*" + oauth_token[-4:]
+            if len(oauth_token) > 4
+            else "***MASKED***"
+        )
+        logger.debug(
+            "Git repo %s is private. Using provided OAuth token: %s. Default branch: %s",
+            repo_url,
+            masked_token,
+            default_branch,
+        )
+        return creds, default_branch
     # key from ssh config
     identities = get_host_config(url.original_host).get("identityfile")
@@ -61,7 +96,16 @@ def get_repo_creds_and_default_branch(
         _identity_file = identities[0]
         with suppress(InvalidRepoCredentialsError):
             _private_key = _read_private_key(_identity_file)
-            return _get_repo_creds_and_default_branch_ssh(url, _identity_file, _private_key)
+            creds, default_branch = _get_repo_creds_and_default_branch_ssh(
+                url, _identity_file, _private_key
+            )
+            logger.debug(
+                "Git repo %s is private. Using SSH config identity file: %s. Default branch: %s",
+                repo_url,
+                _identity_file,
+                default_branch,
+            )
+            return creds, default_branch
     # token from gh config
     if os.path.exists(gh_config_path):
@@ -70,13 +114,35 @@ def get_repo_creds_and_default_branch(
         _oauth_token = gh_hosts.get(url.host, {}).get("oauth_token")
         if _oauth_token is not None:
             with suppress(InvalidRepoCredentialsError):
-                return _get_repo_creds_and_default_branch_https(url, _oauth_token)
+                creds, default_branch = _get_repo_creds_and_default_branch_https(url, _oauth_token)
+                masked_token = (
+                    len(_oauth_token[:-4]) * "*" + _oauth_token[-4:]
+                    if len(_oauth_token) > 4
+                    else "***MASKED***"
+                )
+                logger.debug(
+                    "Git repo %s is private. Using GitHub config token: %s from %s. Default branch: %s",
+                    repo_url,
+                    masked_token,
+                    gh_config_path,
+                    default_branch,
+                )
+                return creds, default_branch
     # default user key
     if os.path.exists(default_ssh_key):
         with suppress(InvalidRepoCredentialsError):
             _private_key = _read_private_key(default_ssh_key)
-            return _get_repo_creds_and_default_branch_ssh(url, default_ssh_key, _private_key)
+            creds, default_branch = _get_repo_creds_and_default_branch_ssh(
+                url, default_ssh_key, _private_key
+            )
+            logger.debug(
+                "Git repo %s is private. Using default identity file: %s. Default branch: %s",
+                repo_url,
+                default_ssh_key,
+                default_branch,
+            )
+            return creds, default_branch
     raise InvalidRepoCredentialsError(
         "No valid default Git credentials found. Pass valid `--token` or `--git-identity`."
@@ -87,8 +153,9 @@ def _get_repo_creds_and_default_branch_ssh(
     url: GitRepoURL, identity_file: PathLike, private_key: str
 ) -> tuple[RemoteRepoCreds, Optional[str]]:
     _url = url.as_ssh()
+    env = _make_git_env_for_creds_check(identity_file=identity_file)
     try:
-        default_branch = _get_repo_default_branch(_url, make_git_env(identity_file=identity_file))
+        default_branch = _get_repo_default_branch(_url, env)
     except GitCommandError as e:
         message = f"Cannot access `{_url}` using the `{identity_file}` private SSH key"
         raise InvalidRepoCredentialsError(message) from e
@@ -104,8 +171,9 @@ def _get_repo_creds_and_default_branch_https(
     url: GitRepoURL, oauth_token: Optional[str] = None
 ) -> tuple[RemoteRepoCreds, Optional[str]]:
     _url = url.as_https()
+    env = _make_git_env_for_creds_check()
     try:
-        default_branch = _get_repo_default_branch(url.as_https(oauth_token), make_git_env())
+        default_branch = _get_repo_default_branch(url.as_https(oauth_token), env)
     except GitCommandError as e:
         message = f"Cannot access `{_url}`"
         if oauth_token is not None:
@@ -120,10 +188,32 @@ def _get_repo_creds_and_default_branch_https(
     return creds, default_branch
+def _make_git_env_for_creds_check(identity_file: Optional[PathLike] = None) -> dict[str, str]:
+    # Our goal is to check if _provided_ creds (if any) are correct, so we need to be sure that
+    # only the provided creds are used, without falling back to any additional mechanisms.
+    # To do this, we:
+    # 1. Disable all configs to ignore any stored creds
+    # 2. Disable askpass to avoid asking for creds interactively or fetching stored creds from
+    # a non-interactive askpass helper (for example, VS Code sets GIT_ASKPASS to its own helper,
+    # which silently provides creds to Git).
+    return make_git_env(disable_config=True, disable_askpass=True, identity_file=identity_file)
 def _get_repo_default_branch(url: str, env: dict[str, str]) -> Optional[str]:
+    # Git shipped by Apple with XCode is patched to support an additional config scope
+    # above "system" called "xcode". There is no option in `git config list` to show this config,
+    # but you can list the merged config (`git config list` without options) and then exclude
+    # all settings listed in `git config list --{system,global,local,worktree}`.
+    # As of time of writing, there are only two settings in the "xcode" config, one of which breaks
+    # our "is repo public?" check, namely "credential.helper=osxkeychain".
+    # As there is no way to disable "xcode" config (no env variable, no CLI option, etc.),
+    # the only way to disable credential helper is to override this specific setting with an empty
+    # string via command line argument: `git -c credential.helper= COMMAND [ARGS ...]`.
+    # See: https://github.com/git/git/commit/3d4355712b9fe77a96ad4ad877d92dc7ff6e0874
+    # See: https://gist.github.com/ChrisTollefson/ab9c0a5d1dd4dd615217345c6936a307
+    _git = git.cmd.Git()(c="credential.helper=")
     # output example: "ref: refs/heads/dev\tHEAD\n545344f77c0df78367085952a97fc3a058eb4c65\tHEAD"
-    # Disable credential helpers to exclude any default credentials from being used
-    output: str = git.cmd.Git()(c="credential.helper=").ls_remote("--symref", url, "HEAD", env=env)
+    output: str = _git.ls_remote("--symref", url, "HEAD", env=env)
     for line in output.splitlines():
         # line format: `<oid> TAB <ref> LF`
         oid, _, ref = line.partition("\t")

dstack/_internal/server/background/tasks/common.py CHANGED Viewed

@@ -19,4 +19,6 @@ def get_provisioning_timeout(backend_type: BackendType, instance_type_name: str)
         return timedelta(minutes=20)
     if backend_type == BackendType.VULTR and instance_type_name.startswith("vbm"):
         return timedelta(minutes=55)
+    if backend_type == BackendType.GCP and instance_type_name == "a4-highgpu-8g":
+        return timedelta(minutes=16)
     return timedelta(minutes=10)

dstack/_internal/server/background/tasks/process_fleets.py CHANGED Viewed

@@ -1,10 +1,11 @@
+from collections import defaultdict
 from datetime import timedelta
 from typing import List
 from uuid import UUID
 from sqlalchemy import select, update
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload, load_only
+from sqlalchemy.orm import joinedload, load_only, selectinload
 from dstack._internal.core.models.fleets import FleetSpec, FleetStatus
 from dstack._internal.core.models.instances import InstanceStatus
@@ -37,30 +38,68 @@ MIN_PROCESSING_INTERVAL = timedelta(seconds=30)
 @sentry_utils.instrument_background_task
 async def process_fleets():
-    lock, lockset = get_locker(get_db().dialect_name).get_lockset(FleetModel.__tablename__)
+    fleet_lock, fleet_lockset = get_locker(get_db().dialect_name).get_lockset(
+        FleetModel.__tablename__
+    )
+    instance_lock, instance_lockset = get_locker(get_db().dialect_name).get_lockset(
+        InstanceModel.__tablename__
+    )
     async with get_session_ctx() as session:
-        async with lock:
+        async with fleet_lock, instance_lock:
             res = await session.execute(
                 select(FleetModel)
                 .where(
                     FleetModel.deleted == False,
-                    FleetModel.id.not_in(lockset),
+                    FleetModel.id.not_in(fleet_lockset),
                     FleetModel.last_processed_at
                     < get_current_datetime() - MIN_PROCESSING_INTERVAL,
                 )
-                .options(load_only(FleetModel.id))
+                .options(
+                    load_only(FleetModel.id, FleetModel.name),
+                    selectinload(FleetModel.instances).load_only(InstanceModel.id),
+                )
                 .order_by(FleetModel.last_processed_at.asc())
                 .limit(BATCH_SIZE)
                 .with_for_update(skip_locked=True, key_share=True)
             )
-            fleet_models = list(res.scalars().all())
+            fleet_models = list(res.scalars().unique().all())
             fleet_ids = [fm.id for fm in fleet_models]
+            res = await session.execute(
+                select(InstanceModel)
+                .where(
+                    InstanceModel.id.not_in(instance_lockset),
+                    InstanceModel.fleet_id.in_(fleet_ids),
+                )
+                .options(load_only(InstanceModel.id, InstanceModel.fleet_id))
+                .order_by(InstanceModel.id)
+                .with_for_update(skip_locked=True, key_share=True)
+            )
+            instance_models = list(res.scalars().all())
+            fleet_id_to_locked_instances = defaultdict(list)
+            for instance_model in instance_models:
+                fleet_id_to_locked_instances[instance_model.fleet_id].append(instance_model)
+            # Process only fleets with all instances locked.
+            # Other fleets won't be processed but will still be locked to avoid new transaction.
+            # This should not be problematic as long as process_fleets is quick.
+            fleet_models_to_process = []
+            for fleet_model in fleet_models:
+                if len(fleet_model.instances) == len(fleet_id_to_locked_instances[fleet_model.id]):
+                    fleet_models_to_process.append(fleet_model)
+                else:
+                    logger.debug(
+                        "Fleet %s processing will be skipped: some instance were not locked",
+                        fleet_model.name,
+                    )
             for fleet_id in fleet_ids:
-                lockset.add(fleet_id)
+                fleet_lockset.add(fleet_id)
+            instance_ids = [im.id for im in instance_models]
+            for instance_id in instance_ids:
+                instance_lockset.add(instance_id)
         try:
-            await _process_fleets(session=session, fleet_models=fleet_models)
+            await _process_fleets(session=session, fleet_models=fleet_models_to_process)
         finally:
-            lockset.difference_update(fleet_ids)
+            fleet_lockset.difference_update(fleet_ids)
+            instance_lockset.difference_update(instance_ids)
 async def _process_fleets(session: AsyncSession, fleet_models: List[FleetModel]):
@@ -99,8 +138,8 @@ def _consolidate_fleet_state_with_spec(session: AsyncSession, fleet_model: Fleet
         return
     if not _is_fleet_ready_for_consolidation(fleet_model):
         return
-    added_instances = _maintain_fleet_nodes_min(session, fleet_model, fleet_spec)
-    if added_instances:
+    changed_instances = _maintain_fleet_nodes_in_min_max_range(session, fleet_model, fleet_spec)
+    if changed_instances:
         fleet_model.consolidation_attempt += 1
     else:
         # The fleet is already consolidated or consolidation is in progress.
@@ -138,28 +177,47 @@ def _get_consolidation_retry_delay(consolidation_attempt: int) -> timedelta:
     return _CONSOLIDATION_RETRY_DELAYS[-1]
-def _maintain_fleet_nodes_min(
+def _maintain_fleet_nodes_in_min_max_range(
     session: AsyncSession,
     fleet_model: FleetModel,
     fleet_spec: FleetSpec,
 ) -> bool:
     """
-    Ensures the fleet has at least `nodes.min` instances.
-    Returns `True` if retried or added new instances and `False` otherwise.
+    Ensures the fleet has at least `nodes.min` and at most `nodes.max` instances.
+    Returns `True` if retried, added new instances, or terminated redundant instances and `False` otherwise.
     """
     assert fleet_spec.configuration.nodes is not None
     for instance in fleet_model.instances:
         # Delete terminated but not deleted instances since
         # they are going to be replaced with new pending instances.
         if instance.status == InstanceStatus.TERMINATED and not instance.deleted:
-            # It's safe to modify instances without instance lock since
-            # no other task modifies already terminated instances.
             instance.deleted = True
             instance.deleted_at = get_current_datetime()
     active_instances = [i for i in fleet_model.instances if not i.deleted]
     active_instances_num = len(active_instances)
     if active_instances_num >= fleet_spec.configuration.nodes.min:
-        return False
+        if (
+            fleet_spec.configuration.nodes.max is None
+            or active_instances_num <= fleet_spec.configuration.nodes.max
+        ):
+            return False
+        # Fleet has more instances than allowed by nodes.max.
+        # This is possible due to race conditions (e.g. provisioning jobs in a fleet concurrently)
+        # or if nodes.max is updated.
+        nodes_redundant = active_instances_num - fleet_spec.configuration.nodes.max
+        for instance in fleet_model.instances:
+            if nodes_redundant == 0:
+                break
+            if instance.status in [InstanceStatus.IDLE]:
+                instance.status = InstanceStatus.TERMINATING
+                instance.termination_reason = "Fleet has too many instances"
+                nodes_redundant -= 1
+                logger.info(
+                    "Terminating instance %s: %s",
+                    instance.name,
+                    instance.termination_reason,
+                )
+        return True
     nodes_missing = fleet_spec.configuration.nodes.min - active_instances_num
     for i in range(nodes_missing):
         instance_model = create_fleet_instance_model(

dstack/_internal/server/background/tasks/process_instances.py CHANGED Viewed

@@ -259,9 +259,7 @@ async def _add_remote(instance: InstanceModel) -> None:
     if instance.status == InstanceStatus.PENDING:
         instance.status = InstanceStatus.PROVISIONING
-    retry_duration_deadline = instance.created_at.replace(
-        tzinfo=datetime.timezone.utc
-    ) + timedelta(seconds=PROVISIONING_TIMEOUT_SECONDS)
+    retry_duration_deadline = instance.created_at + timedelta(seconds=PROVISIONING_TIMEOUT_SECONDS)
     if retry_duration_deadline < get_current_datetime():
         instance.status = InstanceStatus.TERMINATED
         instance.termination_reason = "Provisioning timeout expired"
@@ -307,7 +305,7 @@ async def _add_remote(instance: InstanceModel) -> None:
             )
             deploy_timeout = 20 * 60  # 20 minutes
             result = await asyncio.wait_for(future, timeout=deploy_timeout)
-            health, host_info, cpu_arch = result
+            health, host_info, arch = result
         except (asyncio.TimeoutError, TimeoutError) as e:
             raise ProvisioningError(f"Deploy timeout: {e}") from e
         except Exception as e:
@@ -327,7 +325,7 @@ async def _add_remote(instance: InstanceModel) -> None:
         instance.status = InstanceStatus.PENDING
         return
-    instance_type = host_info_to_instance_type(host_info, cpu_arch)
+    instance_type = host_info_to_instance_type(host_info, arch)
     instance_network = None
     internal_ip = None
     try:

dstack/_internal/server/background/tasks/process_running_jobs.py CHANGED Viewed

@@ -1139,7 +1139,7 @@ def _patch_base_image_for_aws_efa(
     efa_enabled_patterns = [
         # TODO: p6-b200 isn't supported yet in gpuhunt
         r"^p6-b200\.(48xlarge)$",
-        r"^p5\.(48xlarge)$",
+        r"^p5\.(4xlarge|48xlarge)$",
         r"^p5e\.(48xlarge)$",
         r"^p5en\.(48xlarge)$",
         r"^p4d\.(24xlarge)$",

dstack/_internal/server/background/tasks/process_runs.py CHANGED Viewed

@@ -256,8 +256,8 @@ async def _process_active_run(session: AsyncSession, run_model: RunModel):
     for replica_num, job_models in group_jobs_by_replica_latest(run_model.jobs):
         replica_statuses: Set[RunStatus] = set()
         replica_needs_retry = False
         replica_active = True
+        jobs_done_num = 0
         for job_model in job_models:
             job = find_job(run.jobs, job_model.replica_num, job_model.job_num)
             if (
@@ -272,8 +272,7 @@ async def _process_active_run(session: AsyncSession, run_model: RunModel):
             ):
                 # the job is done or going to be done
                 replica_statuses.add(RunStatus.DONE)
-                # for some reason the replica is done, it's not active
-                replica_active = False
+                jobs_done_num += 1
             elif job_model.termination_reason == JobTerminationReason.SCALED_DOWN:
                 # the job was scaled down
                 replica_active = False
@@ -313,26 +312,14 @@ async def _process_active_run(session: AsyncSession, run_model: RunModel):
             if not replica_needs_retry or retry_single_job:
                 run_statuses.update(replica_statuses)
-        if replica_active:
-            # submitted_at = replica created
-            replicas_info.append(
-                autoscalers.ReplicaInfo(
-                    active=True,
-                    timestamp=min(job.submitted_at for job in job_models).replace(
-                        tzinfo=datetime.timezone.utc
-                    ),
-                )
-            )
-        else:
-            # last_processed_at = replica scaled down
-            replicas_info.append(
-                autoscalers.ReplicaInfo(
-                    active=False,
-                    timestamp=max(job.last_processed_at for job in job_models).replace(
-                        tzinfo=datetime.timezone.utc
-                    ),
-                )
-            )
+        if jobs_done_num == len(job_models):
+            # Consider replica inactive if all its jobs are done for some reason.
+            # If only some jobs are done, replica is considered active to avoid
+            # provisioning new replicas for partially done multi-node tasks.
+            replica_active = False
+        replica_info = _get_replica_info(job_models, replica_active)
+        replicas_info.append(replica_info)
     termination_reason: Optional[RunTerminationReason] = None
     if RunStatus.FAILED in run_statuses:
@@ -410,6 +397,23 @@ async def _process_active_run(session: AsyncSession, run_model: RunModel):
             run_model.resubmission_attempt += 1
+def _get_replica_info(
+    replica_job_models: list[JobModel],
+    replica_active: bool,
+) -> autoscalers.ReplicaInfo:
+    if replica_active:
+        # submitted_at = replica created
+        return autoscalers.ReplicaInfo(
+            active=True,
+            timestamp=min(job.submitted_at for job in replica_job_models),
+        )
+    # last_processed_at = replica scaled down
+    return autoscalers.ReplicaInfo(
+        active=False,
+        timestamp=max(job.last_processed_at for job in replica_job_models),
+    )
 async def _handle_run_replicas(
     session: AsyncSession,
     run_model: RunModel,

dstack 0.19.30rc1__py3-none-any.whl → 0.19.32__py3-none-any.whl

Potentially problematic release.

dstack 0.19.30rc1py3-none-any.whl → 0.19.32py3-none-any.whl