dstack 0.19.4rc3__py3-none-any.whl → 0.19.6rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/attach.py +22 -20
- dstack/_internal/cli/commands/offer.py +116 -0
- dstack/_internal/cli/main.py +2 -0
- dstack/_internal/cli/services/configurators/base.py +1 -2
- dstack/_internal/cli/services/configurators/fleet.py +43 -20
- dstack/_internal/cli/services/configurators/run.py +3 -3
- dstack/_internal/cli/utils/run.py +43 -38
- dstack/_internal/core/backends/aws/auth.py +1 -2
- dstack/_internal/core/backends/aws/compute.py +24 -9
- dstack/_internal/core/backends/aws/configurator.py +2 -3
- dstack/_internal/core/backends/aws/resources.py +10 -0
- dstack/_internal/core/backends/azure/auth.py +1 -2
- dstack/_internal/core/backends/azure/compute.py +15 -5
- dstack/_internal/core/backends/azure/configurator.py +4 -5
- dstack/_internal/core/backends/azure/resources.py +14 -0
- dstack/_internal/core/backends/base/compute.py +99 -31
- dstack/_internal/core/backends/gcp/auth.py +1 -2
- dstack/_internal/core/backends/gcp/compute.py +58 -14
- dstack/_internal/core/backends/gcp/configurator.py +2 -3
- dstack/_internal/core/backends/gcp/features/tcpx.py +31 -0
- dstack/_internal/core/backends/gcp/resources.py +10 -0
- dstack/_internal/core/backends/nebius/compute.py +6 -2
- dstack/_internal/core/backends/nebius/configurator.py +4 -10
- dstack/_internal/core/backends/nebius/models.py +14 -1
- dstack/_internal/core/backends/nebius/resources.py +91 -10
- dstack/_internal/core/backends/oci/auth.py +1 -2
- dstack/_internal/core/backends/oci/configurator.py +1 -2
- dstack/_internal/core/backends/runpod/compute.py +1 -1
- dstack/_internal/core/errors.py +4 -0
- dstack/_internal/core/models/common.py +2 -14
- dstack/_internal/core/models/configurations.py +24 -2
- dstack/_internal/core/models/envs.py +2 -2
- dstack/_internal/core/models/fleets.py +34 -3
- dstack/_internal/core/models/gateways.py +18 -4
- dstack/_internal/core/models/instances.py +2 -1
- dstack/_internal/core/models/profiles.py +12 -0
- dstack/_internal/core/models/runs.py +6 -0
- dstack/_internal/core/models/secrets.py +1 -1
- dstack/_internal/core/models/volumes.py +17 -1
- dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +3 -3
- dstack/_internal/proxy/gateway/services/nginx.py +0 -1
- dstack/_internal/proxy/gateway/services/registry.py +0 -1
- dstack/_internal/server/background/tasks/process_instances.py +12 -9
- dstack/_internal/server/background/tasks/process_running_jobs.py +66 -15
- dstack/_internal/server/routers/fleets.py +22 -0
- dstack/_internal/server/routers/runs.py +1 -0
- dstack/_internal/server/schemas/fleets.py +12 -2
- dstack/_internal/server/schemas/runner.py +6 -0
- dstack/_internal/server/schemas/runs.py +3 -0
- dstack/_internal/server/services/docker.py +1 -2
- dstack/_internal/server/services/fleets.py +30 -12
- dstack/_internal/server/services/gateways/__init__.py +1 -0
- dstack/_internal/server/services/instances.py +3 -1
- dstack/_internal/server/services/jobs/__init__.py +1 -2
- dstack/_internal/server/services/jobs/configurators/base.py +17 -8
- dstack/_internal/server/services/locking.py +16 -1
- dstack/_internal/server/services/projects.py +1 -2
- dstack/_internal/server/services/proxy/repo.py +1 -2
- dstack/_internal/server/services/runner/client.py +3 -0
- dstack/_internal/server/services/runs.py +19 -16
- dstack/_internal/server/services/services/__init__.py +1 -2
- dstack/_internal/server/services/volumes.py +29 -2
- dstack/_internal/server/statics/00a6e1fb461ed2929fb9.png +0 -0
- dstack/_internal/server/statics/0cae4d9f0a36034984a7.png +0 -0
- dstack/_internal/server/statics/391de232cc0e30cae513.png +0 -0
- dstack/_internal/server/statics/4e0eead8c1a73689ef9d.svg +1 -0
- dstack/_internal/server/statics/544afa2f63428c2235b0.png +0 -0
- dstack/_internal/server/statics/54a4f50f74c6b9381530.svg +7 -0
- dstack/_internal/server/statics/68dd1360a7d2611e0132.svg +4 -0
- dstack/_internal/server/statics/69544b4c81973b54a66f.png +0 -0
- dstack/_internal/server/statics/77a8b02b17af19e39266.png +0 -0
- dstack/_internal/server/statics/83a93a8871c219104367.svg +9 -0
- dstack/_internal/server/statics/8f28bb8e9999e5e6a48b.svg +4 -0
- dstack/_internal/server/statics/9124086961ab8c366bc4.svg +9 -0
- dstack/_internal/server/statics/9a9ebaeb54b025dbac0a.svg +5 -0
- dstack/_internal/server/statics/a3428392dc534f3b15c4.svg +7 -0
- dstack/_internal/server/statics/ae22625574d69361f72c.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-144x144.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-192x192.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-256x256.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-36x36.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-384x384.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-48x48.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-512x512.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-72x72.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-96x96.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-1024x1024.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-114x114.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-120x120.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-144x144.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-152x152.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-167x167.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-180x180.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-57x57.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-60x60.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-72x72.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-76x76.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-precomposed.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1170x2532.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1179x2556.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1284x2778.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1290x2796.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1488x2266.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1640x2160.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1640.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2266x1488.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2532x1170.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2556x1179.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2778x1284.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2796x1290.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
- dstack/_internal/server/statics/assets/browserconfig.xml +12 -0
- dstack/_internal/server/statics/assets/favicon-16x16.png +0 -0
- dstack/_internal/server/statics/assets/favicon-32x32.png +0 -0
- dstack/_internal/server/statics/assets/favicon-48x48.png +0 -0
- dstack/_internal/server/statics/assets/favicon.ico +0 -0
- dstack/_internal/server/statics/assets/manifest.webmanifest +67 -0
- dstack/_internal/server/statics/assets/mstile-144x144.png +0 -0
- dstack/_internal/server/statics/assets/mstile-150x150.png +0 -0
- dstack/_internal/server/statics/assets/mstile-310x150.png +0 -0
- dstack/_internal/server/statics/assets/mstile-310x310.png +0 -0
- dstack/_internal/server/statics/assets/mstile-70x70.png +0 -0
- dstack/_internal/server/statics/assets/yandex-browser-50x50.png +0 -0
- dstack/_internal/server/statics/assets/yandex-browser-manifest.json +9 -0
- dstack/_internal/server/statics/b7ae68f44193474fc578.png +0 -0
- dstack/_internal/server/statics/d2f008c75b2b5b191f3f.png +0 -0
- dstack/_internal/server/statics/d44c33e1b92e05c379fd.png +0 -0
- dstack/_internal/server/statics/dd43ff0552815179d7ab.png +0 -0
- dstack/_internal/server/statics/dd4e7166c0b9aac197d7.png +0 -0
- dstack/_internal/server/statics/e30b27916930d43d2271.png +0 -0
- dstack/_internal/server/statics/e467d7d60aae81ab198b.svg +6 -0
- dstack/_internal/server/statics/eb9b344b73818fe2b71a.png +0 -0
- dstack/_internal/server/statics/f517dd626eb964120de0.png +0 -0
- dstack/_internal/server/statics/f958aecddee5d8e3222c.png +0 -0
- dstack/_internal/server/statics/index.html +3 -0
- dstack/_internal/server/statics/main-8f9c66f404e9c7e7e020.css +3 -0
- dstack/_internal/server/statics/main-b4f65323f5df007e1664.js +136480 -0
- dstack/_internal/server/statics/main-b4f65323f5df007e1664.js.map +1 -0
- dstack/_internal/server/statics/manifest.json +16 -0
- dstack/_internal/server/statics/robots.txt +3 -0
- dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
- dstack/_internal/server/statics/static/media/github.1f7102513534c83a9d8d735d2b8c12a2.svg +3 -0
- dstack/_internal/server/statics/static/media/logo.f602feeb138844eda97c8cb641461448.svg +124 -0
- dstack/_internal/server/statics/static/media/okta.12f178e6873a1100965f2a4dbd18fcec.svg +2 -0
- dstack/_internal/server/statics/static/media/theme.3994c817bb7dda191c1c9640dee0bf42.svg +3 -0
- dstack/_internal/server/testing/common.py +10 -0
- dstack/_internal/utils/tags.py +42 -0
- dstack/api/server/__init__.py +3 -1
- dstack/api/server/_fleets.py +52 -9
- dstack/api/server/_gateways.py +17 -2
- dstack/api/server/_runs.py +34 -11
- dstack/api/server/_volumes.py +2 -3
- dstack/version.py +1 -1
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/METADATA +2 -2
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/RECORD +180 -76
- dstack-0.19.4rc3.data/data/dstack/_internal/proxy/gateway/resources/nginx/00-log-format.conf +0 -1
- dstack-0.19.4rc3.data/data/dstack/_internal/proxy/gateway/resources/nginx/entrypoint.jinja2 +0 -27
- dstack-0.19.4rc3.data/data/dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +0 -88
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/WHEEL +0 -0
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -10,7 +10,7 @@ from sqlalchemy.orm import joinedload
|
|
|
10
10
|
from dstack._internal.core.consts import DSTACK_RUNNER_HTTP_PORT, DSTACK_SHIM_HTTP_PORT
|
|
11
11
|
from dstack._internal.core.errors import GatewayError
|
|
12
12
|
from dstack._internal.core.models.backends.base import BackendType
|
|
13
|
-
from dstack._internal.core.models.common import NetworkMode, RegistryAuth
|
|
13
|
+
from dstack._internal.core.models.common import NetworkMode, RegistryAuth
|
|
14
14
|
from dstack._internal.core.models.configurations import DevEnvironmentConfiguration
|
|
15
15
|
from dstack._internal.core.models.instances import (
|
|
16
16
|
InstanceStatus,
|
|
@@ -40,7 +40,7 @@ from dstack._internal.server.models import (
|
|
|
40
40
|
RepoModel,
|
|
41
41
|
RunModel,
|
|
42
42
|
)
|
|
43
|
-
from dstack._internal.server.schemas.runner import TaskStatus
|
|
43
|
+
from dstack._internal.server.schemas.runner import GPUDevice, TaskStatus
|
|
44
44
|
from dstack._internal.server.services import logs as logs_services
|
|
45
45
|
from dstack._internal.server.services import services
|
|
46
46
|
from dstack._internal.server.services.instances import get_instance_ssh_private_keys
|
|
@@ -422,9 +422,9 @@ def _process_provisioning_with_shim(
|
|
|
422
422
|
volume_mounts: List[VolumeMountPoint] = []
|
|
423
423
|
instance_mounts: List[InstanceMountPoint] = []
|
|
424
424
|
for mount in run.run_spec.configuration.volumes:
|
|
425
|
-
if
|
|
425
|
+
if isinstance(mount, VolumeMountPoint):
|
|
426
426
|
volume_mounts.append(mount.copy())
|
|
427
|
-
elif
|
|
427
|
+
elif isinstance(mount, InstanceMountPoint):
|
|
428
428
|
instance_mounts.append(mount)
|
|
429
429
|
else:
|
|
430
430
|
assert False, f"unexpected mount point: {mount!r}"
|
|
@@ -438,6 +438,10 @@ def _process_provisioning_with_shim(
|
|
|
438
438
|
job_provisioning_data.backend, job_provisioning_data.instance_type.name
|
|
439
439
|
)
|
|
440
440
|
|
|
441
|
+
gpu_devices = _get_instance_specific_gpu_devices(
|
|
442
|
+
job_provisioning_data.backend, job_provisioning_data.instance_type.name
|
|
443
|
+
)
|
|
444
|
+
|
|
441
445
|
container_user = "root"
|
|
442
446
|
|
|
443
447
|
job_runtime_data = get_job_runtime_data(job_model)
|
|
@@ -471,6 +475,7 @@ def _process_provisioning_with_shim(
|
|
|
471
475
|
volumes=volumes,
|
|
472
476
|
volume_mounts=volume_mounts,
|
|
473
477
|
instance_mounts=instance_mounts,
|
|
478
|
+
gpu_devices=gpu_devices,
|
|
474
479
|
host_ssh_user=ssh_user,
|
|
475
480
|
host_ssh_keys=[ssh_key] if ssh_key else [],
|
|
476
481
|
container_ssh_keys=public_keys,
|
|
@@ -657,7 +662,7 @@ def _terminate_if_inactivity_duration_exceeded(
|
|
|
657
662
|
run_model: RunModel, job_model: JobModel, no_connections_secs: Optional[int]
|
|
658
663
|
) -> None:
|
|
659
664
|
conf = RunSpec.__response__.parse_raw(run_model.run_spec).configuration
|
|
660
|
-
if not
|
|
665
|
+
if not isinstance(conf, DevEnvironmentConfiguration) or not isinstance(
|
|
661
666
|
conf.inactivity_duration, int
|
|
662
667
|
):
|
|
663
668
|
# reset in case inactivity_duration was disabled via in-place update
|
|
@@ -834,14 +839,60 @@ def _submit_job_to_runner(
|
|
|
834
839
|
def _get_instance_specific_mounts(
|
|
835
840
|
backend_type: BackendType, instance_type_name: str
|
|
836
841
|
) -> List[InstanceMountPoint]:
|
|
837
|
-
if backend_type == BackendType.GCP
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
842
|
+
if backend_type == BackendType.GCP:
|
|
843
|
+
if instance_type_name == "a3-megagpu-8g":
|
|
844
|
+
return [
|
|
845
|
+
InstanceMountPoint(
|
|
846
|
+
instance_path="/dev/aperture_devices",
|
|
847
|
+
path="/dev/aperture_devices",
|
|
848
|
+
),
|
|
849
|
+
InstanceMountPoint(
|
|
850
|
+
instance_path="/var/lib/tcpxo/lib64",
|
|
851
|
+
path="/var/lib/tcpxo/lib64",
|
|
852
|
+
),
|
|
853
|
+
InstanceMountPoint(
|
|
854
|
+
instance_path="/var/lib/fastrak/lib64",
|
|
855
|
+
path="/var/lib/fastrak/lib64",
|
|
856
|
+
),
|
|
857
|
+
]
|
|
858
|
+
if instance_type_name in ["a3-edgegpu-8g", "a3-highgpu-8g"]:
|
|
859
|
+
return [
|
|
860
|
+
InstanceMountPoint(
|
|
861
|
+
instance_path="/var/lib/nvidia/lib64",
|
|
862
|
+
path="/usr/local/nvidia/lib64",
|
|
863
|
+
),
|
|
864
|
+
InstanceMountPoint(
|
|
865
|
+
instance_path="/var/lib/nvidia/bin",
|
|
866
|
+
path="/usr/local/nvidia/bin",
|
|
867
|
+
),
|
|
868
|
+
InstanceMountPoint(
|
|
869
|
+
instance_path="/var/lib/tcpx/lib64",
|
|
870
|
+
path="/usr/local/tcpx/lib64",
|
|
871
|
+
),
|
|
872
|
+
InstanceMountPoint(
|
|
873
|
+
instance_path="/run/tcpx",
|
|
874
|
+
path="/run/tcpx",
|
|
875
|
+
),
|
|
876
|
+
]
|
|
847
877
|
return []
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
def _get_instance_specific_gpu_devices(
|
|
881
|
+
backend_type: BackendType, instance_type_name: str
|
|
882
|
+
) -> List[GPUDevice]:
|
|
883
|
+
gpu_devices = []
|
|
884
|
+
if backend_type == BackendType.GCP and instance_type_name in [
|
|
885
|
+
"a3-edgegpu-8g",
|
|
886
|
+
"a3-highgpu-8g",
|
|
887
|
+
]:
|
|
888
|
+
for i in range(8):
|
|
889
|
+
gpu_devices.append(
|
|
890
|
+
GPUDevice(path_on_host=f"/dev/nvidia{i}", path_in_container=f"/dev/nvidia{i}")
|
|
891
|
+
)
|
|
892
|
+
gpu_devices.append(
|
|
893
|
+
GPUDevice(path_on_host="/dev/nvidia-uvm", path_in_container="/dev/nvidia-uvm")
|
|
894
|
+
)
|
|
895
|
+
gpu_devices.append(
|
|
896
|
+
GPUDevice(path_on_host="/dev/nvidiactl", path_in_container="/dev/nvidiactl")
|
|
897
|
+
)
|
|
898
|
+
return gpu_devices
|
|
@@ -9,6 +9,7 @@ from dstack._internal.core.models.fleets import Fleet, FleetPlan
|
|
|
9
9
|
from dstack._internal.server.db import get_session
|
|
10
10
|
from dstack._internal.server.models import ProjectModel, UserModel
|
|
11
11
|
from dstack._internal.server.schemas.fleets import (
|
|
12
|
+
ApplyFleetPlanRequest,
|
|
12
13
|
CreateFleetRequest,
|
|
13
14
|
DeleteFleetInstancesRequest,
|
|
14
15
|
DeleteFleetsRequest,
|
|
@@ -107,6 +108,27 @@ async def get_plan(
|
|
|
107
108
|
return plan
|
|
108
109
|
|
|
109
110
|
|
|
111
|
+
@project_router.post("/apply")
|
|
112
|
+
async def apply_plan(
|
|
113
|
+
body: ApplyFleetPlanRequest,
|
|
114
|
+
session: AsyncSession = Depends(get_session),
|
|
115
|
+
user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectMember()),
|
|
116
|
+
) -> Fleet:
|
|
117
|
+
"""
|
|
118
|
+
Creates a new fleet or updates an existing fleet.
|
|
119
|
+
Errors if the expected current resource from the plan does not match the current resource.
|
|
120
|
+
Use `force: true` to apply even if the current resource does not match.
|
|
121
|
+
"""
|
|
122
|
+
user, project = user_project
|
|
123
|
+
return await fleets_services.apply_plan(
|
|
124
|
+
session=session,
|
|
125
|
+
user=user,
|
|
126
|
+
project=project,
|
|
127
|
+
plan=body.plan,
|
|
128
|
+
force=body.force,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
110
132
|
@project_router.post("/create")
|
|
111
133
|
async def create_fleet(
|
|
112
134
|
body: CreateFleetRequest,
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import Annotated, List, Optional
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
5
|
from pydantic import Field
|
|
6
6
|
|
|
7
7
|
from dstack._internal.core.models.common import CoreModel
|
|
8
|
-
from dstack._internal.core.models.fleets import FleetSpec
|
|
8
|
+
from dstack._internal.core.models.fleets import ApplyFleetPlanInput, FleetSpec
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class ListFleetsRequest(CoreModel):
|
|
@@ -26,6 +26,16 @@ class GetFleetPlanRequest(CoreModel):
|
|
|
26
26
|
spec: FleetSpec
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
class ApplyFleetPlanRequest(CoreModel):
|
|
30
|
+
plan: ApplyFleetPlanInput
|
|
31
|
+
force: Annotated[
|
|
32
|
+
bool,
|
|
33
|
+
Field(
|
|
34
|
+
description="Use `force: true` to apply even if the expected resource does not match."
|
|
35
|
+
),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
29
39
|
class CreateFleetRequest(CoreModel):
|
|
30
40
|
spec: FleetSpec
|
|
31
41
|
|
|
@@ -114,6 +114,11 @@ class TaskStatus(str, Enum):
|
|
|
114
114
|
TERMINATED = "terminated"
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
class GPUDevice(CoreModel):
|
|
118
|
+
path_on_host: str
|
|
119
|
+
path_in_container: str
|
|
120
|
+
|
|
121
|
+
|
|
117
122
|
class TaskInfoResponse(CoreModel):
|
|
118
123
|
id: str
|
|
119
124
|
status: TaskStatus
|
|
@@ -139,6 +144,7 @@ class TaskSubmitRequest(CoreModel):
|
|
|
139
144
|
volumes: list[ShimVolumeInfo]
|
|
140
145
|
volume_mounts: list[VolumeMountPoint]
|
|
141
146
|
instance_mounts: list[InstanceMountPoint]
|
|
147
|
+
gpu_devices: list[GPUDevice]
|
|
142
148
|
host_ssh_user: str
|
|
143
149
|
host_ssh_keys: list[str]
|
|
144
150
|
container_ssh_keys: list[str]
|
|
@@ -91,8 +91,7 @@ def get_image_config(image_name: str, registry_auth: Optional[RegistryAuth]) ->
|
|
|
91
91
|
config_resp = join_byte_stream_checked(config_stream, MAX_CONFIG_OBJECT_SIZE)
|
|
92
92
|
if config_resp is None:
|
|
93
93
|
raise DockerRegistryError(
|
|
94
|
-
"Image config object exceeds the size limit of "
|
|
95
|
-
f"{MAX_CONFIG_OBJECT_SIZE} bytes"
|
|
94
|
+
f"Image config object exceeds the size limit of {MAX_CONFIG_OBJECT_SIZE} bytes"
|
|
96
95
|
)
|
|
97
96
|
return ImageConfigObject.__response__.parse_raw(config_resp)
|
|
98
97
|
|
|
@@ -15,9 +15,9 @@ from dstack._internal.core.errors import (
|
|
|
15
15
|
ResourceExistsError,
|
|
16
16
|
ServerClientError,
|
|
17
17
|
)
|
|
18
|
-
from dstack._internal.core.models.common import is_core_model_instance
|
|
19
18
|
from dstack._internal.core.models.envs import Env
|
|
20
19
|
from dstack._internal.core.models.fleets import (
|
|
20
|
+
ApplyFleetPlanInput,
|
|
21
21
|
Fleet,
|
|
22
22
|
FleetPlan,
|
|
23
23
|
FleetSpec,
|
|
@@ -234,32 +234,34 @@ async def get_plan(
|
|
|
234
234
|
user: UserModel,
|
|
235
235
|
spec: FleetSpec,
|
|
236
236
|
) -> FleetPlan:
|
|
237
|
+
effective_spec = FleetSpec.parse_obj(spec.dict())
|
|
237
238
|
current_fleet: Optional[Fleet] = None
|
|
238
239
|
current_fleet_id: Optional[uuid.UUID] = None
|
|
239
|
-
if
|
|
240
|
+
if effective_spec.configuration.name is not None:
|
|
240
241
|
current_fleet_model = await get_project_fleet_model_by_name(
|
|
241
|
-
session=session, project=project, name=
|
|
242
|
+
session=session, project=project, name=effective_spec.configuration.name
|
|
242
243
|
)
|
|
243
244
|
if current_fleet_model is not None:
|
|
244
245
|
current_fleet = fleet_model_to_fleet(current_fleet_model)
|
|
245
246
|
current_fleet_id = current_fleet_model.id
|
|
246
|
-
await _check_ssh_hosts_not_yet_added(session,
|
|
247
|
+
await _check_ssh_hosts_not_yet_added(session, effective_spec, current_fleet_id)
|
|
247
248
|
|
|
248
249
|
offers = []
|
|
249
|
-
if
|
|
250
|
+
if effective_spec.configuration.ssh_config is None:
|
|
250
251
|
offers_with_backends = await get_create_instance_offers(
|
|
251
252
|
project=project,
|
|
252
|
-
profile=
|
|
253
|
-
requirements=_get_fleet_requirements(
|
|
254
|
-
fleet_spec=
|
|
255
|
-
blocks=
|
|
253
|
+
profile=effective_spec.merged_profile,
|
|
254
|
+
requirements=_get_fleet_requirements(effective_spec),
|
|
255
|
+
fleet_spec=effective_spec,
|
|
256
|
+
blocks=effective_spec.configuration.blocks,
|
|
256
257
|
)
|
|
257
258
|
offers = [offer for _, offer in offers_with_backends]
|
|
258
|
-
_remove_fleet_spec_sensitive_info(
|
|
259
|
+
_remove_fleet_spec_sensitive_info(effective_spec)
|
|
259
260
|
plan = FleetPlan(
|
|
260
261
|
project_name=project.name,
|
|
261
262
|
user=user.name,
|
|
262
263
|
spec=spec,
|
|
264
|
+
effective_spec=effective_spec,
|
|
263
265
|
current_resource=current_fleet,
|
|
264
266
|
offers=offers[:50],
|
|
265
267
|
total_offers=len(offers),
|
|
@@ -307,6 +309,21 @@ async def get_create_instance_offers(
|
|
|
307
309
|
return offers
|
|
308
310
|
|
|
309
311
|
|
|
312
|
+
async def apply_plan(
|
|
313
|
+
session: AsyncSession,
|
|
314
|
+
user: UserModel,
|
|
315
|
+
project: ProjectModel,
|
|
316
|
+
plan: ApplyFleetPlanInput,
|
|
317
|
+
force: bool,
|
|
318
|
+
) -> Fleet:
|
|
319
|
+
return await create_fleet(
|
|
320
|
+
session=session,
|
|
321
|
+
project=project,
|
|
322
|
+
user=user,
|
|
323
|
+
spec=plan.spec,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
310
327
|
async def create_fleet(
|
|
311
328
|
session: AsyncSession,
|
|
312
329
|
project: ProjectModel,
|
|
@@ -320,7 +337,7 @@ async def create_fleet(
|
|
|
320
337
|
|
|
321
338
|
lock_namespace = f"fleet_names_{project.name}"
|
|
322
339
|
if get_db().dialect_name == "sqlite":
|
|
323
|
-
# Start new transaction to see
|
|
340
|
+
# Start new transaction to see committed changes after lock
|
|
324
341
|
await session.commit()
|
|
325
342
|
elif get_db().dialect_name == "postgresql":
|
|
326
343
|
await session.execute(
|
|
@@ -402,6 +419,7 @@ async def create_fleet_instance_model(
|
|
|
402
419
|
placement_group_name=placement_group_name,
|
|
403
420
|
reservation=reservation,
|
|
404
421
|
blocks=spec.configuration.blocks,
|
|
422
|
+
tags=spec.configuration.tags,
|
|
405
423
|
)
|
|
406
424
|
return instance_model
|
|
407
425
|
|
|
@@ -629,7 +647,7 @@ def _validate_fleet_spec(spec: FleetSpec):
|
|
|
629
647
|
if spec.configuration.ssh_config.ssh_key is not None:
|
|
630
648
|
_validate_ssh_key(spec.configuration.ssh_config.ssh_key)
|
|
631
649
|
for host in spec.configuration.ssh_config.hosts:
|
|
632
|
-
if
|
|
650
|
+
if isinstance(host, SSHHostParams) and host.ssh_key is not None:
|
|
633
651
|
_validate_ssh_key(host.ssh_key)
|
|
634
652
|
_validate_internal_ips(spec.configuration.ssh_config)
|
|
635
653
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import uuid
|
|
2
2
|
from collections.abc import Container, Iterable
|
|
3
3
|
from datetime import datetime, timezone
|
|
4
|
-
from typing import List, Literal, Optional, Union
|
|
4
|
+
from typing import Dict, List, Literal, Optional, Union
|
|
5
5
|
|
|
6
6
|
import gpuhunt
|
|
7
7
|
from sqlalchemy import and_, or_, select
|
|
@@ -411,6 +411,7 @@ async def create_instance_model(
|
|
|
411
411
|
placement_group_name: Optional[str],
|
|
412
412
|
reservation: Optional[str],
|
|
413
413
|
blocks: Union[Literal["auto"], int],
|
|
414
|
+
tags: Optional[Dict[str, str]],
|
|
414
415
|
) -> InstanceModel:
|
|
415
416
|
termination_policy, termination_idle_time = get_termination(
|
|
416
417
|
profile, DEFAULT_FLEET_TERMINATION_IDLE_TIME
|
|
@@ -428,6 +429,7 @@ async def create_instance_model(
|
|
|
428
429
|
instance_id=str(instance_id),
|
|
429
430
|
placement_group_name=placement_group_name,
|
|
430
431
|
reservation=reservation,
|
|
432
|
+
tags=tags,
|
|
431
433
|
)
|
|
432
434
|
instance = InstanceModel(
|
|
433
435
|
id=instance_id,
|
|
@@ -20,7 +20,6 @@ from dstack._internal.core.errors import (
|
|
|
20
20
|
SSHError,
|
|
21
21
|
)
|
|
22
22
|
from dstack._internal.core.models.backends.base import BackendType
|
|
23
|
-
from dstack._internal.core.models.common import is_core_model_instance
|
|
24
23
|
from dstack._internal.core.models.configurations import RunConfigurationType
|
|
25
24
|
from dstack._internal.core.models.instances import InstanceStatus
|
|
26
25
|
from dstack._internal.core.models.runs import (
|
|
@@ -585,7 +584,7 @@ async def get_job_configured_volume_models(
|
|
|
585
584
|
job_volumes = interpolate_job_volumes(run_spec.configuration.volumes, job_num)
|
|
586
585
|
volume_models = []
|
|
587
586
|
for mount_point in job_volumes:
|
|
588
|
-
if not
|
|
587
|
+
if not isinstance(mount_point, VolumeMountPoint):
|
|
589
588
|
continue
|
|
590
589
|
if isinstance(mount_point.name, str):
|
|
591
590
|
names = [mount_point.name]
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import shlex
|
|
2
2
|
import sys
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import PurePosixPath
|
|
4
5
|
from typing import Dict, List, Optional, Union
|
|
5
6
|
|
|
6
7
|
from cachetools import TTLCache, cached
|
|
7
8
|
|
|
8
9
|
import dstack.version as version
|
|
9
10
|
from dstack._internal.core.errors import DockerRegistryError, ServerClientError
|
|
10
|
-
from dstack._internal.core.models.common import RegistryAuth
|
|
11
|
+
from dstack._internal.core.models.common import RegistryAuth
|
|
11
12
|
from dstack._internal.core.models.configurations import (
|
|
12
13
|
PortMapping,
|
|
13
14
|
PythonVersion,
|
|
@@ -131,16 +132,24 @@ class JobConfigurator(ABC):
|
|
|
131
132
|
)
|
|
132
133
|
return job_spec
|
|
133
134
|
|
|
135
|
+
def _shell(self) -> str:
|
|
136
|
+
shell = self.run_spec.configuration.shell
|
|
137
|
+
if shell is not None:
|
|
138
|
+
path = PurePosixPath(shell)
|
|
139
|
+
if path.is_absolute():
|
|
140
|
+
return shell
|
|
141
|
+
return str("/bin" / path)
|
|
142
|
+
if self.run_spec.configuration.image is None: # dstackai/base
|
|
143
|
+
return "/bin/bash"
|
|
144
|
+
return "/bin/sh"
|
|
145
|
+
|
|
134
146
|
async def _commands(self) -> List[str]:
|
|
135
147
|
if self.run_spec.configuration.entrypoint is not None: # docker-like format
|
|
136
148
|
entrypoint = shlex.split(self.run_spec.configuration.entrypoint)
|
|
137
149
|
commands = self.run_spec.configuration.commands
|
|
138
|
-
elif self.
|
|
139
|
-
entrypoint = [
|
|
140
|
-
commands = [_join_shell_commands(
|
|
141
|
-
elif self._shell_commands(): # custom docker image with shell commands
|
|
142
|
-
entrypoint = ["/bin/sh", "-i", "-c"]
|
|
143
|
-
commands = [_join_shell_commands(self._shell_commands())]
|
|
150
|
+
elif shell_commands := self._shell_commands():
|
|
151
|
+
entrypoint = [self._shell(), "-i", "-c"]
|
|
152
|
+
commands = [_join_shell_commands(shell_commands)]
|
|
144
153
|
else: # custom docker image without commands
|
|
145
154
|
image_config = await self._get_image_config()
|
|
146
155
|
entrypoint = image_config.entrypoint or []
|
|
@@ -274,7 +283,7 @@ def interpolate_job_volumes(
|
|
|
274
283
|
if isinstance(mount_point, str):
|
|
275
284
|
# pydantic validator ensures strings are converted to MountPoint
|
|
276
285
|
continue
|
|
277
|
-
if not
|
|
286
|
+
if not isinstance(mount_point, VolumeMountPoint):
|
|
278
287
|
job_volumes.append(mount_point.copy())
|
|
279
288
|
continue
|
|
280
289
|
if isinstance(mount_point.name, str):
|
|
@@ -2,7 +2,7 @@ import asyncio
|
|
|
2
2
|
import hashlib
|
|
3
3
|
from asyncio import Lock
|
|
4
4
|
from contextlib import asynccontextmanager
|
|
5
|
-
from typing import Dict, List, Set, Tuple, TypeVar, Union
|
|
5
|
+
from typing import AsyncGenerator, Dict, List, Set, Tuple, TypeVar, Union
|
|
6
6
|
|
|
7
7
|
from sqlalchemy import func, select
|
|
8
8
|
from sqlalchemy.ext.asyncio import AsyncConnection, AsyncSession
|
|
@@ -52,6 +52,21 @@ async def advisory_lock_ctx(
|
|
|
52
52
|
await bind.execute(select(func.pg_advisory_unlock(string_to_lock_id(resource))))
|
|
53
53
|
|
|
54
54
|
|
|
55
|
+
@asynccontextmanager
|
|
56
|
+
async def try_advisory_lock_ctx(
|
|
57
|
+
bind: Union[AsyncConnection, AsyncSession], dialect_name: str, resource: str
|
|
58
|
+
) -> AsyncGenerator[bool, None]:
|
|
59
|
+
locked = True
|
|
60
|
+
if dialect_name == "postgresql":
|
|
61
|
+
res = await bind.execute(select(func.pg_try_advisory_lock(string_to_lock_id(resource))))
|
|
62
|
+
locked = res.scalar_one()
|
|
63
|
+
try:
|
|
64
|
+
yield locked
|
|
65
|
+
finally:
|
|
66
|
+
if dialect_name == "postgresql" and locked:
|
|
67
|
+
await bind.execute(select(func.pg_advisory_unlock(string_to_lock_id(resource))))
|
|
68
|
+
|
|
69
|
+
|
|
55
70
|
_locker = ResourceLocker()
|
|
56
71
|
|
|
57
72
|
|
|
@@ -14,7 +14,6 @@ from dstack._internal.core.backends.dstack.models import (
|
|
|
14
14
|
)
|
|
15
15
|
from dstack._internal.core.backends.models import BackendInfo
|
|
16
16
|
from dstack._internal.core.errors import ForbiddenError, ResourceExistsError, ServerClientError
|
|
17
|
-
from dstack._internal.core.models.common import is_core_model_instance
|
|
18
17
|
from dstack._internal.core.models.projects import Member, MemberPermissions, Project
|
|
19
18
|
from dstack._internal.core.models.users import GlobalRole, ProjectRole
|
|
20
19
|
from dstack._internal.server.models import MemberModel, ProjectModel, UserModel
|
|
@@ -386,7 +385,7 @@ def project_model_to_project(
|
|
|
386
385
|
backend_config = get_backend_config_from_backend_model(
|
|
387
386
|
configurator, b, include_creds=False
|
|
388
387
|
)
|
|
389
|
-
if
|
|
388
|
+
if isinstance(backend_config, DstackBackendConfig):
|
|
390
389
|
for backend_type in backend_config.base_backends:
|
|
391
390
|
backends.append(
|
|
392
391
|
BackendInfo(
|
|
@@ -7,7 +7,6 @@ from sqlalchemy.orm import joinedload
|
|
|
7
7
|
|
|
8
8
|
import dstack._internal.server.services.jobs as jobs_services
|
|
9
9
|
from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
|
|
10
|
-
from dstack._internal.core.models.common import is_core_model_instance
|
|
11
10
|
from dstack._internal.core.models.configurations import ServiceConfiguration
|
|
12
11
|
from dstack._internal.core.models.instances import RemoteConnectionInfo, SSHConnectionParams
|
|
13
12
|
from dstack._internal.core.models.runs import (
|
|
@@ -64,7 +63,7 @@ class ServerProxyRepo(BaseProxyRepo):
|
|
|
64
63
|
return None
|
|
65
64
|
run = jobs[0].run
|
|
66
65
|
run_spec = RunSpec.__response__.parse_raw(run.run_spec)
|
|
67
|
-
if not
|
|
66
|
+
if not isinstance(run_spec.configuration, ServiceConfiguration):
|
|
68
67
|
return None
|
|
69
68
|
replicas = []
|
|
70
69
|
for job in jobs:
|
|
@@ -15,6 +15,7 @@ from dstack._internal.core.models.resources import Memory
|
|
|
15
15
|
from dstack._internal.core.models.runs import ClusterInfo, JobSpec, RunSpec
|
|
16
16
|
from dstack._internal.core.models.volumes import InstanceMountPoint, Volume, VolumeMountPoint
|
|
17
17
|
from dstack._internal.server.schemas.runner import (
|
|
18
|
+
GPUDevice,
|
|
18
19
|
HealthcheckResponse,
|
|
19
20
|
LegacyPullResponse,
|
|
20
21
|
LegacyStopBody,
|
|
@@ -233,6 +234,7 @@ class ShimClient:
|
|
|
233
234
|
volumes: list[Volume],
|
|
234
235
|
volume_mounts: list[VolumeMountPoint],
|
|
235
236
|
instance_mounts: list[InstanceMountPoint],
|
|
237
|
+
gpu_devices: list[GPUDevice],
|
|
236
238
|
host_ssh_user: str,
|
|
237
239
|
host_ssh_keys: list[str],
|
|
238
240
|
container_ssh_keys: list[str],
|
|
@@ -256,6 +258,7 @@ class ShimClient:
|
|
|
256
258
|
volumes=[_volume_to_shim_volume_info(v, instance_id) for v in volumes],
|
|
257
259
|
volume_mounts=volume_mounts,
|
|
258
260
|
instance_mounts=instance_mounts,
|
|
261
|
+
gpu_devices=gpu_devices,
|
|
259
262
|
host_ssh_user=host_ssh_user,
|
|
260
263
|
host_ssh_keys=host_ssh_keys,
|
|
261
264
|
container_ssh_keys=container_ssh_keys,
|
|
@@ -15,7 +15,7 @@ from dstack._internal.core.errors import (
|
|
|
15
15
|
ResourceNotExistsError,
|
|
16
16
|
ServerClientError,
|
|
17
17
|
)
|
|
18
|
-
from dstack._internal.core.models.common import ApplyAction
|
|
18
|
+
from dstack._internal.core.models.common import ApplyAction
|
|
19
19
|
from dstack._internal.core.models.configurations import AnyRunConfiguration
|
|
20
20
|
from dstack._internal.core.models.instances import (
|
|
21
21
|
InstanceAvailability,
|
|
@@ -92,6 +92,8 @@ JOB_TERMINATION_REASONS_TO_RETRY = {
|
|
|
92
92
|
JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY,
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
+
DEFAULT_MAX_OFFERS = 50
|
|
96
|
+
|
|
95
97
|
|
|
96
98
|
async def list_user_runs(
|
|
97
99
|
session: AsyncSession,
|
|
@@ -275,46 +277,46 @@ async def get_plan(
|
|
|
275
277
|
project: ProjectModel,
|
|
276
278
|
user: UserModel,
|
|
277
279
|
run_spec: RunSpec,
|
|
280
|
+
max_offers: Optional[int],
|
|
278
281
|
) -> RunPlan:
|
|
279
|
-
|
|
282
|
+
effective_run_spec = RunSpec.parse_obj(run_spec.dict())
|
|
283
|
+
_validate_run_spec_and_set_defaults(effective_run_spec)
|
|
280
284
|
|
|
281
|
-
profile =
|
|
285
|
+
profile = effective_run_spec.merged_profile
|
|
282
286
|
creation_policy = profile.creation_policy
|
|
283
287
|
|
|
284
288
|
current_resource = None
|
|
285
289
|
action = ApplyAction.CREATE
|
|
286
|
-
if
|
|
290
|
+
if effective_run_spec.run_name is not None:
|
|
287
291
|
current_resource = await get_run_by_name(
|
|
288
292
|
session=session,
|
|
289
293
|
project=project,
|
|
290
|
-
run_name=
|
|
294
|
+
run_name=effective_run_spec.run_name,
|
|
291
295
|
)
|
|
292
296
|
if (
|
|
293
297
|
current_resource is not None
|
|
294
298
|
and not current_resource.status.is_finished()
|
|
295
|
-
and _can_update_run_spec(current_resource.run_spec,
|
|
299
|
+
and _can_update_run_spec(current_resource.run_spec, effective_run_spec)
|
|
296
300
|
):
|
|
297
301
|
action = ApplyAction.UPDATE
|
|
298
302
|
|
|
299
|
-
|
|
300
|
-
jobs = await get_jobs_from_run_spec(run_spec, replica_num=0)
|
|
303
|
+
jobs = await get_jobs_from_run_spec(effective_run_spec, replica_num=0)
|
|
301
304
|
|
|
302
305
|
volumes = await get_job_configured_volumes(
|
|
303
306
|
session=session,
|
|
304
307
|
project=project,
|
|
305
|
-
run_spec=
|
|
308
|
+
run_spec=effective_run_spec,
|
|
306
309
|
job_num=0,
|
|
307
310
|
)
|
|
308
311
|
|
|
309
312
|
pool_offers = await _get_pool_offers(
|
|
310
313
|
session=session,
|
|
311
314
|
project=project,
|
|
312
|
-
run_spec=
|
|
315
|
+
run_spec=effective_run_spec,
|
|
313
316
|
job=jobs[0],
|
|
314
317
|
volumes=volumes,
|
|
315
318
|
)
|
|
316
|
-
run_name =
|
|
317
|
-
run_spec.run_name = "dry-run" # will regenerate jobs on submission
|
|
319
|
+
effective_run_spec.run_name = "dry-run" # will regenerate jobs on submission
|
|
318
320
|
|
|
319
321
|
# Get offers once for all jobs
|
|
320
322
|
offers = []
|
|
@@ -327,7 +329,7 @@ async def get_plan(
|
|
|
327
329
|
multinode=jobs[0].job_spec.jobs_per_replica > 1,
|
|
328
330
|
volumes=volumes,
|
|
329
331
|
privileged=jobs[0].job_spec.privileged,
|
|
330
|
-
instance_mounts=check_run_spec_requires_instance_mounts(
|
|
332
|
+
instance_mounts=check_run_spec_requires_instance_mounts(effective_run_spec),
|
|
331
333
|
)
|
|
332
334
|
|
|
333
335
|
job_plans = []
|
|
@@ -342,17 +344,18 @@ async def get_plan(
|
|
|
342
344
|
|
|
343
345
|
job_plan = JobPlan(
|
|
344
346
|
job_spec=job_spec,
|
|
345
|
-
offers=job_offers[:
|
|
347
|
+
offers=job_offers[: (max_offers or DEFAULT_MAX_OFFERS)],
|
|
346
348
|
total_offers=len(job_offers),
|
|
347
349
|
max_price=max((offer.price for offer in job_offers), default=None),
|
|
348
350
|
)
|
|
349
351
|
job_plans.append(job_plan)
|
|
350
352
|
|
|
351
|
-
|
|
353
|
+
effective_run_spec.run_name = run_spec.run_name # restore run_name
|
|
352
354
|
run_plan = RunPlan(
|
|
353
355
|
project_name=project.name,
|
|
354
356
|
user=user.name,
|
|
355
357
|
run_spec=run_spec,
|
|
358
|
+
effective_run_spec=effective_run_spec,
|
|
356
359
|
job_plans=job_plans,
|
|
357
360
|
current_resource=current_resource,
|
|
358
361
|
action=action,
|
|
@@ -748,7 +751,7 @@ async def _generate_run_name(
|
|
|
748
751
|
|
|
749
752
|
def check_run_spec_requires_instance_mounts(run_spec: RunSpec) -> bool:
|
|
750
753
|
return any(
|
|
751
|
-
|
|
754
|
+
isinstance(mp, InstanceMountPoint) and not mp.optional
|
|
752
755
|
for mp in run_spec.configuration.volumes
|
|
753
756
|
)
|
|
754
757
|
|