PyPI - dstack - Versions diffs - 0.19.30rc1__py3-none-any.whl → 0.19.31__py3-none-any.whl - Mend

dstack 0.19.30rc1py3-none-any.whl → 0.19.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (47) hide show

dstack/_internal/server/background/tasks/process_submitted_jobs.py CHANGED Viewed

@@ -3,7 +3,7 @@ import itertools
 import math
 import uuid
 from datetime import datetime, timedelta
-from typing import List, Optional, Tuple
+from typing import List, Optional
 from sqlalchemy import and_, func, not_, or_, select
 from sqlalchemy.ext.asyncio import AsyncSession
@@ -25,6 +25,7 @@ from dstack._internal.core.models.instances import InstanceOfferWithAvailability
 from dstack._internal.core.models.profiles import (
     DEFAULT_RUN_TERMINATION_IDLE_TIME,
     CreationPolicy,
+    Profile,
     TerminationPolicy,
 )
 from dstack._internal.core.models.resources import Memory
@@ -34,6 +35,7 @@ from dstack._internal.core.models.runs import (
     JobRuntimeData,
     JobStatus,
     JobTerminationReason,
+    Requirements,
     Run,
     RunSpec,
 )
@@ -186,7 +188,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
     project = run_model.project
     run = run_model_to_run(run_model)
     run_spec = run.run_spec
-    profile = run_spec.merged_profile
+    run_profile = run_spec.merged_profile
     job = find_job(run.jobs, job_model.replica_num, job_model.job_num)
     multinode = job.job_spec.jobs_per_replica > 1
@@ -333,7 +335,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
         job_model.status = JobStatus.PROVISIONING
     else:
         # Assigned no instance, create a new one
-        if profile.creation_policy == CreationPolicy.REUSE:
+        if run_profile.creation_policy == CreationPolicy.REUSE:
             logger.debug("%s: reuse instance failed", fmt(job_model))
             job_model.status = JobStatus.TERMINATING
             job_model.termination_reason = JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY
@@ -362,7 +364,7 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
             return
         logger.info("%s: now is provisioning a new instance", fmt(job_model))
-        job_provisioning_data, offer = run_job_result
+        job_provisioning_data, offer, effective_profile, _ = run_job_result
         job_model.job_provisioning_data = job_provisioning_data.json()
         job_model.status = JobStatus.PROVISIONING
         if fleet_model is None:
@@ -382,12 +384,11 @@ async def _process_submitted_job(session: AsyncSession, job_model: JobModel):
         instance = _create_instance_model_for_job(
             project=project,
             fleet_model=fleet_model,
-            run_spec=run_spec,
             job_model=job_model,
-            job=job,
             job_provisioning_data=job_provisioning_data,
             offer=offer,
             instance_num=instance_num,
+            profile=effective_profile,
         )
         job_model.job_runtime_data = _prepare_job_runtime_data(offer, multinode).json()
         # Both this task and process_fleets can add instances to fleets.
@@ -546,23 +547,22 @@ async def _find_optimal_fleet_with_offers(
             fleet_cheapest_pool_offer = fleet_instances_with_pool_offers[0][1].price
         candidate_fleet = fleet_model_to_fleet(candidate_fleet_model)
-        profile = combine_fleet_and_run_profiles(
-            candidate_fleet.spec.merged_profile, run_spec.merged_profile
-        )
-        fleet_requirements = get_fleet_requirements(candidate_fleet.spec)
-        requirements = combine_fleet_and_run_requirements(
-            fleet_requirements, job.job_spec.requirements
-        )
-        multinode = (
-            candidate_fleet.spec.configuration.placement == InstanceGroupPlacement.CLUSTER
-            or job.job_spec.jobs_per_replica > 1
-        )
+        profile = None
+        requirements = None
+        try:
+            profile, requirements = _get_run_profile_and_requirements_in_fleet(
+                job=job,
+                run_spec=run_spec,
+                fleet=candidate_fleet,
+            )
+        except ValueError:
+            pass
         fleet_backend_offers = []
-        if (
-            _check_can_create_new_instance_in_fleet(candidate_fleet)
-            and profile is not None
-            and requirements is not None
-        ):
+        if profile is not None and requirements is not None:
+            multinode = (
+                candidate_fleet.spec.configuration.placement == InstanceGroupPlacement.CLUSTER
+                or job.job_spec.jobs_per_replica > 1
+            )
             fleet_backend_offers = await get_offers_by_requirements(
                 project=project,
                 profile=profile,
@@ -704,7 +704,7 @@ async def _run_job_on_new_instance(
     master_job_provisioning_data: Optional[JobProvisioningData] = None,
     volumes: Optional[List[List[Volume]]] = None,
     fleet_model: Optional[FleetModel] = None,
-) -> Optional[Tuple[JobProvisioningData, InstanceOfferWithAvailability]]:
+) -> Optional[tuple[JobProvisioningData, InstanceOfferWithAvailability, Profile, Requirements]]:
     if volumes is None:
         volumes = []
     profile = run.run_spec.merged_profile
@@ -712,21 +712,14 @@ async def _run_job_on_new_instance(
     fleet = None
     if fleet_model is not None:
         fleet = fleet_model_to_fleet(fleet_model)
-        if not _check_can_create_new_instance_in_fleet(fleet):
-            logger.debug(
-                "%s: cannot fit new instance into fleet %s", fmt(job_model), fleet_model.name
-            )
-            return None
-        profile = combine_fleet_and_run_profiles(fleet.spec.merged_profile, profile)
-        if profile is None:
-            logger.debug("%s: cannot combine fleet %s profile", fmt(job_model), fleet_model.name)
-            return None
-        fleet_requirements = get_fleet_requirements(fleet.spec)
-        requirements = combine_fleet_and_run_requirements(fleet_requirements, requirements)
-        if requirements is None:
-            logger.debug(
-                "%s: cannot combine fleet %s requirements", fmt(job_model), fleet_model.name
+        try:
+            profile, requirements = _get_run_profile_and_requirements_in_fleet(
+                job=job,
+                run_spec=run.run_spec,
+                fleet=fleet,
             )
+        except ValueError as e:
+            logger.debug("%s: %s", fmt(job_model), e.args[0])
             return None
         # TODO: Respect fleet provisioning properties such as tags
@@ -766,7 +759,7 @@ async def _run_job_on_new_instance(
                 project_ssh_private_key,
                 offer_volumes,
             )
-            return job_provisioning_data, offer
+            return job_provisioning_data, offer, profile, requirements
         except BackendError as e:
             logger.warning(
                 "%s: %s launch in %s/%s failed: %s",
@@ -789,6 +782,25 @@ async def _run_job_on_new_instance(
     return None
+def _get_run_profile_and_requirements_in_fleet(
+    job: Job,
+    run_spec: RunSpec,
+    fleet: Fleet,
+) -> tuple[Profile, Requirements]:
+    if not _check_can_create_new_instance_in_fleet(fleet):
+        raise ValueError("Cannot fit new instance into fleet")
+    profile = combine_fleet_and_run_profiles(fleet.spec.merged_profile, run_spec.merged_profile)
+    if profile is None:
+        raise ValueError("Cannot combine fleet profile")
+    fleet_requirements = get_fleet_requirements(fleet.spec)
+    requirements = combine_fleet_and_run_requirements(
+        fleet_requirements, job.job_spec.requirements
+    )
+    if requirements is None:
+        raise ValueError("Cannot combine fleet requirements")
+    return profile, requirements
 def _check_can_create_new_instance_in_fleet(fleet: Fleet) -> bool:
     if fleet.spec.configuration.ssh_config is not None:
         return False
@@ -857,14 +869,12 @@ async def _get_next_instance_num(session: AsyncSession, fleet_model: FleetModel)
 def _create_instance_model_for_job(
     project: ProjectModel,
     fleet_model: FleetModel,
-    run_spec: RunSpec,
     job_model: JobModel,
-    job: Job,
     job_provisioning_data: JobProvisioningData,
     offer: InstanceOfferWithAvailability,
     instance_num: int,
+    profile: Profile,
 ) -> InstanceModel:
-    profile = run_spec.merged_profile
     if not job_provisioning_data.dockerized:
         # terminate vastai/k8s instances immediately
         termination_policy = TerminationPolicy.DESTROY_AFTER_IDLE

dstack/_internal/server/services/offers.py CHANGED Viewed

@@ -7,6 +7,7 @@ from dstack._internal.core.backends.base.compute import ComputeWithPlacementGrou
 from dstack._internal.core.backends.features import (
     BACKENDS_WITH_CREATE_INSTANCE_SUPPORT,
     BACKENDS_WITH_MULTINODE_SUPPORT,
+    BACKENDS_WITH_PRIVILEGED_SUPPORT,
     BACKENDS_WITH_RESERVATION_SUPPORT,
 )
 from dstack._internal.core.models.backends.base import BackendType
@@ -67,7 +68,12 @@ async def get_offers_by_requirements(
             backend_types = BACKENDS_WITH_MULTINODE_SUPPORT
         backend_types = [b for b in backend_types if b in BACKENDS_WITH_MULTINODE_SUPPORT]
-    if privileged or instance_mounts:
+    if privileged:
+        if backend_types is None:
+            backend_types = BACKENDS_WITH_PRIVILEGED_SUPPORT
+        backend_types = [b for b in backend_types if b in BACKENDS_WITH_PRIVILEGED_SUPPORT]
+    if instance_mounts:
         if backend_types is None:
             backend_types = BACKENDS_WITH_CREATE_INSTANCE_SUPPORT
         backend_types = [b for b in backend_types if b in BACKENDS_WITH_CREATE_INSTANCE_SUPPORT]

dstack/_internal/server/testing/common.py CHANGED Viewed

@@ -16,6 +16,7 @@ from dstack._internal.core.backends.base.compute import (
     ComputeWithMultinodeSupport,
     ComputeWithPlacementGroupSupport,
     ComputeWithPrivateGatewaySupport,
+    ComputeWithPrivilegedSupport,
     ComputeWithReservationSupport,
     ComputeWithVolumeSupport,
 )
@@ -1131,6 +1132,7 @@ class AsyncContextManager:
 class ComputeMockSpec(
     Compute,
     ComputeWithCreateInstanceSupport,
+    ComputeWithPrivilegedSupport,
     ComputeWithMultinodeSupport,
     ComputeWithReservationSupport,
     ComputeWithPlacementGroupSupport,

dstack/_internal/server/utils/provisioning.py CHANGED Viewed

@@ -6,7 +6,7 @@ from textwrap import dedent
 from typing import Any, Dict, Generator, List, Optional
 import paramiko
-from gpuhunt import AcceleratorVendor, CPUArchitecture, correct_gpu_memory_gib
+from gpuhunt import AcceleratorVendor, correct_gpu_memory_gib
 from dstack._internal.core.backends.base.compute import GoArchType, normalize_arch
 from dstack._internal.core.consts import DSTACK_SHIM_HTTP_PORT
@@ -248,14 +248,7 @@ def _get_shim_healthcheck(client: paramiko.SSHClient) -> Optional[str]:
     return out
-def host_info_to_instance_type(host_info: Dict[str, Any], cpu_arch: GoArchType) -> InstanceType:
-    _cpu_arch: CPUArchitecture
-    if cpu_arch == "amd64":
-        _cpu_arch = CPUArchitecture.X86
-    elif cpu_arch == "arm64":
-        _cpu_arch = CPUArchitecture.ARM
-    else:
-        raise ValueError(f"Unexpected cpu_arch: {cpu_arch}")
+def host_info_to_instance_type(host_info: Dict[str, Any], arch: GoArchType) -> InstanceType:
     gpu_count = host_info.get("gpu_count", 0)
     if gpu_count > 0:
         gpu_vendor = AcceleratorVendor.cast(host_info.get("gpu_vendor", "nvidia"))
@@ -280,7 +273,7 @@ def host_info_to_instance_type(host_info: Dict[str, Any], cpu_arch: GoArchType)
     instance_type = InstanceType(
         name="instance",
         resources=Resources(
-            cpu_arch=_cpu_arch,
+            cpu_arch=arch.to_cpu_architecture(),
             cpus=host_info["cpus"],
             memory_mib=host_info["memory"] / 1024 / 1024,
             spot=False,

dstack/_internal/utils/ssh.py CHANGED Viewed

@@ -50,8 +50,28 @@ def make_ssh_command_for_git(identity_file: PathLike) -> str:
     )
-def make_git_env(*, identity_file: Optional[PathLike] = None) -> dict[str, str]:
-    env: dict[str, str] = {"GIT_TERMINAL_PROMPT": "0"}
+def make_git_env(
+    *,
+    disable_prompt: bool = True,
+    disable_askpass: bool = False,
+    disable_config: bool = False,
+    identity_file: Optional[PathLike] = None,
+) -> dict[str, str]:
+    env: dict[str, str] = {}
+    if disable_prompt:
+        # Fail with error instead of prompting on the terminal (e.g., when asking for
+        # HTTP authentication)
+        env["GIT_TERMINAL_PROMPT"] = "0"
+    if disable_askpass:
+        env["GIT_ASKPASS"] = ""
+        env["SSH_ASKPASS"] = ""
+    if disable_config:
+        # Disable system-wide config (usually /etc/gitconfig)
+        env["GIT_CONFIG_SYSTEM"] = os.devnull
+        # Disable user (aka "global") config ($XDG_CONFIG_HOME/git/config or ~/.git/config)
+        env["GIT_CONFIG_GLOBAL"] = os.devnull
+        # Disable repo (aka "local") config (./.git/config)
+        env["GIT_DIR"] = os.devnull
     if identity_file is not None:
         env["GIT_SSH_COMMAND"] = make_ssh_command_for_git(identity_file)
     return env

dstack/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.19.30rc1"
+__version__ = "0.19.31"
 __is_release__ = True
-base_image = "0.10"
+base_image = "0.11rc2"
 base_image_ubuntu_version = "22.04"

{dstack-0.19.30rc1.dist-info → dstack-0.19.31.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dstack
-Version: 0.19.30rc1
+Version: 0.19.31
 Summary: dstack is an open-source orchestration engine for running AI workloads on any cloud or on-premises.
 Project-URL: Homepage, https://dstack.ai
 Project-URL: Source, https://github.com/dstackai/dstack
@@ -331,24 +331,28 @@ Description-Content-Type: text/markdown
 </div>
-`dstack` is an open-source container orchestrator that simplifies workload orchestration and drives GPU utilization for ML teams. It works with any GPU cloud, on-prem cluster, or accelerated hardware.
+`dstack` is a unified control plane for GPU provisioning and orchestration that works with any GPU cloud, Kubernetes, or on-prem clusters.
-#### Accelerators
+It streamlines development, training, and inference, and is compatible with any hardware, open-source tools, and frameworks.
+#### Hardware
 `dstack` supports `NVIDIA`, `AMD`, `Google TPU`, `Intel Gaudi`, and `Tenstorrent` accelerators out of the box.
 ## Latest news ✨
+- [2025/09] [dstack 0.19.27: Offers UI, Digital Ocean and AMD Developer Cloud](https://github.com/dstackai/dstack/releases/tag/0.19.27)
+- [2025/08] [dstack 0.19.26: Repos – explicit repo configuration via YAML](https://github.com/dstackai/dstack/releases/tag/0.19.26)
+- [2025/08] [dstack 0.19.25: `dstack offer` CLI command](https://github.com/dstackai/dstack/releases/tag/0.19.25)
+- [2025/08] [dstack 0.19.22: Service probes, GPU health-checks, Tenstorrent Galaxy, Secrets UI](https://github.com/dstackai/dstack/releases/tag/0.19.22)
+- [2025/07] [dstack 0.19.21: Scheduled tasks](https://github.com/dstackai/dstack/releases/tag/0.19.21)
 - [2025/07] [dstack 0.19.17: Secrets, Files, Rolling deployment](https://github.com/dstackai/dstack/releases/tag/0.19.17)
 - [2025/06] [dstack 0.19.16: Docker in Docker, CloudRift](https://github.com/dstackai/dstack/releases/tag/0.19.16)
 - [2025/06] [dstack 0.19.13: InfiniBand support in default images](https://github.com/dstackai/dstack/releases/tag/0.19.13)
 - [2025/06] [dstack 0.19.12: Simplified use of MPI](https://github.com/dstackai/dstack/releases/tag/0.19.12)
-- [2025/05] [dstack 0.19.10: Priorities](https://github.com/dstackai/dstack/releases/tag/0.19.10)
-- [2025/05] [dstack 0.19.8: Nebius clusters, GH200 on Lambda](https://github.com/dstackai/dstack/releases/tag/0.19.8)
-- [2025/04] [dstack 0.19.6: Tenstorrent, Plugins](https://github.com/dstackai/dstack/releases/tag/0.19.6)
 ## How does it work?
-<img src="https://dstack.ai/static-assets/static-assets/images/dstack-architecture-diagram-v10.svg" width="750" />
+<img src="https://dstack.ai/static-assets/static-assets/images/dstack-architecture-diagram-v11.svg" width="750" />
 ### Installation
@@ -356,15 +360,15 @@ Description-Content-Type: text/markdown
 #### Set up the server
-##### (Optional) Configure backends
+##### Configure backends
+To orchestrate compute across cloud providers or existing Kubernetes clusters, you need to configure backends.
-To use `dstack` with cloud providers, configure backends
-via the `~/.dstack/server/config.yml` file.
+Backends can be set up in `~/.dstack/server/config.yml` or through the [project settings page](../concepts/projects.md#backends) in the UI.
-For more details on how to configure backends, check [Backends](https://dstack.ai/docs/concepts/backends).
+For more details, see [Backends](../concepts/backends.md).
-> For using `dstack` with on-prem servers, create [SSH fleets](https://dstack.ai/docs/concepts/fleets#ssh)
-> once the server is up.
+> When using `dstack` with on-prem servers, backend configuration isn’t required. Simply create [SSH fleets](../concepts/fleets.md#ssh) once the server is up.
 ##### Start the server

dstack 0.19.30rc1__py3-none-any.whl → 0.19.31__py3-none-any.whl

Potentially problematic release.

dstack 0.19.30rc1py3-none-any.whl → 0.19.31py3-none-any.whl