dstack 0.18.43__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/gateway.py +15 -3
- dstack/_internal/cli/commands/logs.py +0 -22
- dstack/_internal/cli/commands/stats.py +8 -17
- dstack/_internal/cli/main.py +1 -5
- dstack/_internal/cli/services/configurators/fleet.py +4 -39
- dstack/_internal/cli/services/configurators/run.py +22 -20
- dstack/_internal/cli/services/profile.py +34 -83
- dstack/_internal/cli/utils/gateway.py +1 -1
- dstack/_internal/cli/utils/run.py +11 -0
- dstack/_internal/core/backends/__init__.py +56 -39
- dstack/_internal/core/backends/aws/__init__.py +0 -25
- dstack/_internal/core/backends/aws/auth.py +1 -10
- dstack/_internal/core/backends/aws/backend.py +26 -0
- dstack/_internal/core/backends/aws/compute.py +21 -45
- dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
- dstack/_internal/core/backends/aws/models.py +135 -0
- dstack/_internal/core/backends/aws/resources.py +1 -1
- dstack/_internal/core/backends/azure/__init__.py +0 -20
- dstack/_internal/core/backends/azure/auth.py +2 -11
- dstack/_internal/core/backends/azure/backend.py +21 -0
- dstack/_internal/core/backends/azure/compute.py +14 -28
- dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
- dstack/_internal/core/backends/azure/models.py +89 -0
- dstack/_internal/core/backends/base/__init__.py +0 -12
- dstack/_internal/core/backends/base/backend.py +18 -0
- dstack/_internal/core/backends/base/compute.py +153 -33
- dstack/_internal/core/backends/base/configurator.py +105 -0
- dstack/_internal/core/backends/base/models.py +14 -0
- dstack/_internal/core/backends/configurators.py +138 -0
- dstack/_internal/core/backends/cudo/__init__.py +0 -15
- dstack/_internal/core/backends/cudo/backend.py +16 -0
- dstack/_internal/core/backends/cudo/compute.py +8 -26
- dstack/_internal/core/backends/cudo/configurator.py +72 -0
- dstack/_internal/core/backends/cudo/models.py +37 -0
- dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
- dstack/_internal/core/backends/datacrunch/backend.py +16 -0
- dstack/_internal/core/backends/datacrunch/compute.py +8 -25
- dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
- dstack/_internal/core/backends/datacrunch/models.py +38 -0
- dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
- dstack/_internal/core/backends/gcp/__init__.py +0 -16
- dstack/_internal/core/backends/gcp/auth.py +2 -11
- dstack/_internal/core/backends/gcp/backend.py +17 -0
- dstack/_internal/core/backends/gcp/compute.py +14 -44
- dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
- dstack/_internal/core/backends/gcp/models.py +125 -0
- dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
- dstack/_internal/core/backends/kubernetes/backend.py +16 -0
- dstack/_internal/core/backends/kubernetes/compute.py +16 -5
- dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
- dstack/_internal/core/backends/kubernetes/models.py +72 -0
- dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
- dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
- dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
- dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
- dstack/_internal/core/backends/lambdalabs/models.py +37 -0
- dstack/_internal/core/backends/local/__init__.py +0 -13
- dstack/_internal/core/backends/local/backend.py +14 -0
- dstack/_internal/core/backends/local/compute.py +16 -2
- dstack/_internal/core/backends/models.py +128 -0
- dstack/_internal/core/backends/oci/__init__.py +0 -15
- dstack/_internal/core/backends/oci/auth.py +1 -5
- dstack/_internal/core/backends/oci/backend.py +16 -0
- dstack/_internal/core/backends/oci/compute.py +9 -23
- dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
- dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
- dstack/_internal/core/backends/oci/region.py +1 -1
- dstack/_internal/core/backends/runpod/__init__.py +0 -15
- dstack/_internal/core/backends/runpod/backend.py +16 -0
- dstack/_internal/core/backends/runpod/compute.py +28 -6
- dstack/_internal/core/backends/runpod/configurator.py +59 -0
- dstack/_internal/core/backends/runpod/models.py +54 -0
- dstack/_internal/core/backends/template/__init__.py +0 -0
- dstack/_internal/core/backends/tensordock/__init__.py +0 -15
- dstack/_internal/core/backends/tensordock/backend.py +16 -0
- dstack/_internal/core/backends/tensordock/compute.py +8 -27
- dstack/_internal/core/backends/tensordock/configurator.py +68 -0
- dstack/_internal/core/backends/tensordock/models.py +38 -0
- dstack/_internal/core/backends/vastai/__init__.py +0 -15
- dstack/_internal/core/backends/vastai/backend.py +16 -0
- dstack/_internal/core/backends/vastai/compute.py +2 -2
- dstack/_internal/core/backends/vastai/configurator.py +66 -0
- dstack/_internal/core/backends/vastai/models.py +37 -0
- dstack/_internal/core/backends/vultr/__init__.py +0 -15
- dstack/_internal/core/backends/vultr/backend.py +16 -0
- dstack/_internal/core/backends/vultr/compute.py +10 -24
- dstack/_internal/core/backends/vultr/configurator.py +64 -0
- dstack/_internal/core/backends/vultr/models.py +34 -0
- dstack/_internal/core/models/backends/__init__.py +0 -184
- dstack/_internal/core/models/backends/base.py +0 -19
- dstack/_internal/core/models/configurations.py +22 -16
- dstack/_internal/core/models/envs.py +4 -3
- dstack/_internal/core/models/fleets.py +17 -22
- dstack/_internal/core/models/gateways.py +3 -3
- dstack/_internal/core/models/instances.py +24 -0
- dstack/_internal/core/models/profiles.py +85 -45
- dstack/_internal/core/models/projects.py +1 -1
- dstack/_internal/core/models/repos/base.py +0 -5
- dstack/_internal/core/models/repos/local.py +3 -3
- dstack/_internal/core/models/repos/remote.py +26 -12
- dstack/_internal/core/models/repos/virtual.py +1 -1
- dstack/_internal/core/models/resources.py +45 -76
- dstack/_internal/core/models/runs.py +21 -19
- dstack/_internal/core/models/volumes.py +1 -3
- dstack/_internal/core/services/profiles.py +7 -16
- dstack/_internal/core/services/repos.py +0 -4
- dstack/_internal/server/app.py +11 -4
- dstack/_internal/server/background/__init__.py +10 -0
- dstack/_internal/server/background/tasks/process_gateways.py +4 -8
- dstack/_internal/server/background/tasks/process_instances.py +14 -9
- dstack/_internal/server/background/tasks/process_metrics.py +1 -1
- dstack/_internal/server/background/tasks/process_placement_groups.py +5 -1
- dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
- dstack/_internal/server/background/tasks/process_running_jobs.py +80 -24
- dstack/_internal/server/background/tasks/process_runs.py +1 -0
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +20 -38
- dstack/_internal/server/background/tasks/process_volumes.py +5 -2
- dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
- dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
- dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
- dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
- dstack/_internal/server/models.py +59 -9
- dstack/_internal/server/routers/backends.py +14 -23
- dstack/_internal/server/routers/instances.py +3 -4
- dstack/_internal/server/routers/metrics.py +31 -10
- dstack/_internal/server/routers/prometheus.py +36 -0
- dstack/_internal/server/routers/repos.py +1 -2
- dstack/_internal/server/routers/runs.py +13 -59
- dstack/_internal/server/schemas/gateways.py +14 -23
- dstack/_internal/server/schemas/projects.py +7 -2
- dstack/_internal/server/schemas/repos.py +2 -38
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/schemas/runs.py +1 -24
- dstack/_internal/server/security/permissions.py +1 -1
- dstack/_internal/server/services/backends/__init__.py +85 -158
- dstack/_internal/server/services/config.py +53 -567
- dstack/_internal/server/services/fleets.py +9 -103
- dstack/_internal/server/services/gateways/__init__.py +13 -4
- dstack/_internal/server/services/{pools.py → instances.py} +22 -329
- dstack/_internal/server/services/jobs/__init__.py +9 -6
- dstack/_internal/server/services/jobs/configurators/base.py +25 -1
- dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
- dstack/_internal/server/services/metrics.py +131 -72
- dstack/_internal/server/services/offers.py +1 -1
- dstack/_internal/server/services/projects.py +23 -14
- dstack/_internal/server/services/prometheus.py +245 -0
- dstack/_internal/server/services/runner/client.py +14 -3
- dstack/_internal/server/services/runs.py +67 -31
- dstack/_internal/server/services/volumes.py +9 -4
- dstack/_internal/server/settings.py +3 -0
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4a0fe83e84574654e397.js} +76 -19
- dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4a0fe83e84574654e397.js.map} +1 -1
- dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
- dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
- dstack/_internal/server/testing/common.py +75 -32
- dstack/_internal/utils/json_schema.py +6 -0
- dstack/_internal/utils/ssh.py +2 -1
- dstack/api/__init__.py +4 -0
- dstack/api/_public/__init__.py +16 -20
- dstack/api/_public/backends.py +1 -1
- dstack/api/_public/repos.py +36 -36
- dstack/api/_public/runs.py +170 -83
- dstack/api/server/__init__.py +11 -13
- dstack/api/server/_backends.py +12 -16
- dstack/api/server/_fleets.py +15 -55
- dstack/api/server/_gateways.py +3 -14
- dstack/api/server/_repos.py +1 -4
- dstack/api/server/_runs.py +21 -96
- dstack/api/server/_volumes.py +10 -5
- dstack/api/utils.py +3 -0
- dstack/version.py +1 -1
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/METADATA +10 -1
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/RECORD +229 -206
- tests/_internal/cli/services/configurators/test_profile.py +6 -6
- tests/_internal/core/backends/aws/test_configurator.py +35 -0
- tests/_internal/core/backends/aws/test_resources.py +1 -1
- tests/_internal/core/backends/azure/test_configurator.py +61 -0
- tests/_internal/core/backends/cudo/__init__.py +0 -0
- tests/_internal/core/backends/cudo/test_configurator.py +37 -0
- tests/_internal/core/backends/datacrunch/__init__.py +0 -0
- tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
- tests/_internal/core/backends/gcp/test_configurator.py +42 -0
- tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
- tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
- tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
- tests/_internal/core/backends/oci/test_configurator.py +55 -0
- tests/_internal/core/backends/runpod/__init__.py +0 -0
- tests/_internal/core/backends/runpod/test_configurator.py +33 -0
- tests/_internal/core/backends/tensordock/__init__.py +0 -0
- tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
- tests/_internal/core/backends/vastai/__init__.py +0 -0
- tests/_internal/core/backends/vastai/test_configurator.py +33 -0
- tests/_internal/core/backends/vultr/__init__.py +0 -0
- tests/_internal/core/backends/vultr/test_configurator.py +33 -0
- tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
- tests/_internal/server/background/tasks/test_process_instances.py +49 -48
- tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
- tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
- tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +186 -0
- tests/_internal/server/background/tasks/test_process_running_jobs.py +123 -19
- tests/_internal/server/background/tasks/test_process_runs.py +8 -22
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
- tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
- tests/_internal/server/routers/test_backends.py +6 -764
- tests/_internal/server/routers/test_fleets.py +2 -26
- tests/_internal/server/routers/test_gateways.py +27 -3
- tests/_internal/server/routers/test_instances.py +0 -10
- tests/_internal/server/routers/test_metrics.py +42 -0
- tests/_internal/server/routers/test_projects.py +56 -0
- tests/_internal/server/routers/test_prometheus.py +333 -0
- tests/_internal/server/routers/test_repos.py +0 -15
- tests/_internal/server/routers/test_runs.py +83 -275
- tests/_internal/server/routers/test_volumes.py +2 -3
- tests/_internal/server/services/backends/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
- tests/_internal/server/services/test_config.py +7 -4
- tests/_internal/server/services/test_fleets.py +1 -4
- tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
- tests/_internal/server/services/test_metrics.py +167 -0
- tests/_internal/server/services/test_repos.py +1 -14
- tests/_internal/server/services/test_runs.py +0 -4
- dstack/_internal/cli/commands/pool.py +0 -581
- dstack/_internal/cli/commands/run.py +0 -75
- dstack/_internal/core/backends/aws/config.py +0 -18
- dstack/_internal/core/backends/azure/config.py +0 -12
- dstack/_internal/core/backends/base/config.py +0 -5
- dstack/_internal/core/backends/cudo/config.py +0 -9
- dstack/_internal/core/backends/datacrunch/config.py +0 -9
- dstack/_internal/core/backends/gcp/config.py +0 -22
- dstack/_internal/core/backends/kubernetes/config.py +0 -6
- dstack/_internal/core/backends/lambdalabs/config.py +0 -9
- dstack/_internal/core/backends/nebius/__init__.py +0 -15
- dstack/_internal/core/backends/nebius/api_client.py +0 -319
- dstack/_internal/core/backends/nebius/compute.py +0 -220
- dstack/_internal/core/backends/nebius/config.py +0 -6
- dstack/_internal/core/backends/nebius/types.py +0 -37
- dstack/_internal/core/backends/oci/config.py +0 -6
- dstack/_internal/core/backends/runpod/config.py +0 -9
- dstack/_internal/core/backends/tensordock/config.py +0 -9
- dstack/_internal/core/backends/vastai/config.py +0 -6
- dstack/_internal/core/backends/vultr/config.py +0 -9
- dstack/_internal/core/models/backends/aws.py +0 -86
- dstack/_internal/core/models/backends/azure.py +0 -68
- dstack/_internal/core/models/backends/cudo.py +0 -43
- dstack/_internal/core/models/backends/datacrunch.py +0 -44
- dstack/_internal/core/models/backends/gcp.py +0 -67
- dstack/_internal/core/models/backends/kubernetes.py +0 -40
- dstack/_internal/core/models/backends/lambdalabs.py +0 -43
- dstack/_internal/core/models/backends/nebius.py +0 -54
- dstack/_internal/core/models/backends/runpod.py +0 -40
- dstack/_internal/core/models/backends/tensordock.py +0 -44
- dstack/_internal/core/models/backends/vastai.py +0 -43
- dstack/_internal/core/models/backends/vultr.py +0 -40
- dstack/_internal/core/models/pools.py +0 -43
- dstack/_internal/server/routers/pools.py +0 -142
- dstack/_internal/server/schemas/pools.py +0 -38
- dstack/_internal/server/services/backends/configurators/base.py +0 -72
- dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
- dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
- dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
- dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
- dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
- dstack/_internal/server/services/backends/configurators/runpod.py +0 -97
- dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
- dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
- dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
- dstack/api/_public/pools.py +0 -41
- dstack/api/_public/resources.py +0 -105
- dstack/api/server/_pools.py +0 -63
- tests/_internal/server/routers/test_pools.py +0 -612
- /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/WHEEL +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
|
-
from typing import List, Optional, Union
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union, overload
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, root_validator, validator
|
|
5
5
|
from typing_extensions import Annotated, Literal
|
|
@@ -8,12 +8,9 @@ from dstack._internal.core.models.backends.base import BackendType
|
|
|
8
8
|
from dstack._internal.core.models.common import CoreModel, Duration
|
|
9
9
|
|
|
10
10
|
DEFAULT_RETRY_DURATION = 3600
|
|
11
|
-
DEFAULT_POOL_NAME = "default-pool"
|
|
12
11
|
|
|
13
12
|
DEFAULT_RUN_TERMINATION_IDLE_TIME = 5 * 60 # 5 minutes
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
DEFAULT_INSTANCE_RETRY_DURATION = 60 * 60 * 24 # 24h
|
|
13
|
+
DEFAULT_FLEET_TERMINATION_IDLE_TIME = 72 * 60 * 60 # 3 days
|
|
17
14
|
|
|
18
15
|
DEFAULT_STOP_DURATION = 300
|
|
19
16
|
|
|
@@ -34,6 +31,14 @@ class TerminationPolicy(str, Enum):
|
|
|
34
31
|
DESTROY_AFTER_IDLE = "destroy-after-idle"
|
|
35
32
|
|
|
36
33
|
|
|
34
|
+
@overload
|
|
35
|
+
def parse_duration(v: None) -> None: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@overload
|
|
39
|
+
def parse_duration(v: Union[int, str]) -> int: ...
|
|
40
|
+
|
|
41
|
+
|
|
37
42
|
def parse_duration(v: Optional[Union[int, str]]) -> Optional[int]:
|
|
38
43
|
if v is None:
|
|
39
44
|
return None
|
|
@@ -64,6 +69,8 @@ def parse_idle_duration(v: Optional[Union[int, str, bool]]) -> Optional[Union[st
|
|
|
64
69
|
return parse_duration(v)
|
|
65
70
|
|
|
66
71
|
|
|
72
|
+
# Deprecated in favor of ProfileRetry().
|
|
73
|
+
# TODO: Remove when no longer referenced.
|
|
67
74
|
class ProfileRetryPolicy(CoreModel):
|
|
68
75
|
retry: Annotated[bool, Field(description="Whether to retry the run on failure or not")] = False
|
|
69
76
|
duration: Annotated[
|
|
@@ -90,14 +97,15 @@ class RetryEvent(str, Enum):
|
|
|
90
97
|
|
|
91
98
|
class ProfileRetry(CoreModel):
|
|
92
99
|
on_events: Annotated[
|
|
93
|
-
List[RetryEvent],
|
|
100
|
+
Optional[List[RetryEvent]],
|
|
94
101
|
Field(
|
|
95
102
|
description=(
|
|
96
103
|
"The list of events that should be handled with retry."
|
|
97
|
-
" Supported events are `no-capacity`, `interruption`, and `error
|
|
104
|
+
" Supported events are `no-capacity`, `interruption`, and `error`."
|
|
105
|
+
" Omit to retry on all events"
|
|
98
106
|
)
|
|
99
107
|
),
|
|
100
|
-
]
|
|
108
|
+
] = None
|
|
101
109
|
duration: Annotated[
|
|
102
110
|
Optional[Union[int, str]],
|
|
103
111
|
Field(description="The maximum period of retrying the run, e.g., `4h` or `1d`"),
|
|
@@ -107,22 +115,56 @@ class ProfileRetry(CoreModel):
|
|
|
107
115
|
|
|
108
116
|
@root_validator
|
|
109
117
|
def _validate_fields(cls, values):
|
|
110
|
-
|
|
118
|
+
on_events = values.get("on_events", None)
|
|
119
|
+
if on_events is not None and len(values["on_events"]) == 0:
|
|
111
120
|
raise ValueError("`on_events` cannot be empty")
|
|
112
121
|
return values
|
|
113
122
|
|
|
114
123
|
|
|
124
|
+
class UtilizationPolicy(CoreModel):
|
|
125
|
+
_min_time_window = "5m"
|
|
126
|
+
|
|
127
|
+
min_gpu_utilization: Annotated[
|
|
128
|
+
int,
|
|
129
|
+
Field(
|
|
130
|
+
description=(
|
|
131
|
+
"Minimum required GPU utilization, percent."
|
|
132
|
+
" If any GPU has utilization below specified value during the whole time window,"
|
|
133
|
+
" the run is terminated"
|
|
134
|
+
),
|
|
135
|
+
ge=0,
|
|
136
|
+
le=100,
|
|
137
|
+
),
|
|
138
|
+
]
|
|
139
|
+
time_window: Annotated[
|
|
140
|
+
Union[int, str],
|
|
141
|
+
Field(
|
|
142
|
+
description=(
|
|
143
|
+
"The time window of metric samples taking into account to measure utilization"
|
|
144
|
+
f" (e.g., `30m`, `1h`). Minimum is `{_min_time_window}`"
|
|
145
|
+
)
|
|
146
|
+
),
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
@validator("time_window", pre=True)
|
|
150
|
+
def validate_time_window(cls, v: Union[int, str]) -> int:
|
|
151
|
+
v = parse_duration(v)
|
|
152
|
+
if v < parse_duration(cls._min_time_window):
|
|
153
|
+
raise ValueError(f"Minimum time_window is {cls._min_time_window}")
|
|
154
|
+
return v
|
|
155
|
+
|
|
156
|
+
|
|
115
157
|
class ProfileParams(CoreModel):
|
|
116
158
|
backends: Annotated[
|
|
117
159
|
Optional[List[BackendType]],
|
|
118
160
|
Field(description="The backends to consider for provisioning (e.g., `[aws, gcp]`)"),
|
|
119
|
-
]
|
|
161
|
+
] = None
|
|
120
162
|
regions: Annotated[
|
|
121
163
|
Optional[List[str]],
|
|
122
164
|
Field(
|
|
123
165
|
description="The regions to consider for provisioning (e.g., `[eu-west-1, us-west4, westeurope]`)"
|
|
124
166
|
),
|
|
125
|
-
]
|
|
167
|
+
] = None
|
|
126
168
|
availability_zones: Annotated[
|
|
127
169
|
Optional[List[str]],
|
|
128
170
|
Field(
|
|
@@ -134,7 +176,7 @@ class ProfileParams(CoreModel):
|
|
|
134
176
|
Field(
|
|
135
177
|
description="The cloud-specific instance types to consider for provisioning (e.g., `[p3.8xlarge, n1-standard-4]`)"
|
|
136
178
|
),
|
|
137
|
-
]
|
|
179
|
+
] = None
|
|
138
180
|
reservation: Annotated[
|
|
139
181
|
Optional[str],
|
|
140
182
|
Field(
|
|
@@ -143,17 +185,17 @@ class ProfileParams(CoreModel):
|
|
|
143
185
|
" Supports AWS Capacity Reservations and Capacity Blocks"
|
|
144
186
|
)
|
|
145
187
|
),
|
|
146
|
-
]
|
|
188
|
+
] = None
|
|
147
189
|
spot_policy: Annotated[
|
|
148
190
|
Optional[SpotPolicy],
|
|
149
191
|
Field(
|
|
150
192
|
description="The policy for provisioning spot or on-demand instances: `spot`, `on-demand`, or `auto`. Defaults to `on-demand`"
|
|
151
193
|
),
|
|
152
|
-
]
|
|
194
|
+
] = None
|
|
153
195
|
retry: Annotated[
|
|
154
196
|
Optional[Union[ProfileRetry, bool]],
|
|
155
197
|
Field(description="The policy for resubmitting the run. Defaults to `false`"),
|
|
156
|
-
]
|
|
198
|
+
] = None
|
|
157
199
|
max_duration: Annotated[
|
|
158
200
|
Optional[Union[Literal["off"], str, int, bool]],
|
|
159
201
|
Field(
|
|
@@ -163,7 +205,7 @@ class ProfileParams(CoreModel):
|
|
|
163
205
|
" Use `off` for unlimited duration. Defaults to `off`"
|
|
164
206
|
)
|
|
165
207
|
),
|
|
166
|
-
]
|
|
208
|
+
] = None
|
|
167
209
|
stop_duration: Annotated[
|
|
168
210
|
Optional[Union[Literal["off"], str, int, bool]],
|
|
169
211
|
Field(
|
|
@@ -174,17 +216,17 @@ class ProfileParams(CoreModel):
|
|
|
174
216
|
" Use `off` for unlimited duration. Defaults to `5m`"
|
|
175
217
|
)
|
|
176
218
|
),
|
|
177
|
-
]
|
|
219
|
+
] = None
|
|
178
220
|
max_price: Annotated[
|
|
179
221
|
Optional[float],
|
|
180
222
|
Field(description="The maximum instance price per hour, in dollars", gt=0.0),
|
|
181
|
-
]
|
|
223
|
+
] = None
|
|
182
224
|
creation_policy: Annotated[
|
|
183
225
|
Optional[CreationPolicy],
|
|
184
226
|
Field(
|
|
185
|
-
description="The policy for using instances from
|
|
227
|
+
description="The policy for using instances from fleets. Defaults to `reuse-or-create`"
|
|
186
228
|
),
|
|
187
|
-
]
|
|
229
|
+
] = None
|
|
188
230
|
idle_duration: Annotated[
|
|
189
231
|
Optional[Union[Literal["off"], str, int, bool]],
|
|
190
232
|
Field(
|
|
@@ -193,26 +235,27 @@ class ProfileParams(CoreModel):
|
|
|
193
235
|
" Defaults to `5m` for runs and `3d` for fleets. Use `off` for unlimited duration"
|
|
194
236
|
)
|
|
195
237
|
),
|
|
196
|
-
]
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
]
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
238
|
+
] = None
|
|
239
|
+
utilization_policy: Annotated[
|
|
240
|
+
Optional[UtilizationPolicy],
|
|
241
|
+
Field(description="Run termination policy based on utilization"),
|
|
242
|
+
] = None
|
|
243
|
+
|
|
244
|
+
# Deprecated and unused. Left for compatibility with 0.18 clients.
|
|
245
|
+
pool_name: Annotated[Optional[str], Field(exclude=True)] = None
|
|
246
|
+
instance_name: Annotated[Optional[str], Field(exclude=True)] = None
|
|
247
|
+
retry_policy: Annotated[Optional[ProfileRetryPolicy], Field(exclude=True)] = None
|
|
248
|
+
termination_policy: Annotated[Optional[TerminationPolicy], Field(exclude=True)] = None
|
|
249
|
+
termination_idle_time: Annotated[Optional[Union[str, int]], Field(exclude=True)] = None
|
|
250
|
+
|
|
251
|
+
class Config:
|
|
252
|
+
@staticmethod
|
|
253
|
+
def schema_extra(schema: Dict[str, Any]) -> None:
|
|
254
|
+
del schema["properties"]["pool_name"]
|
|
255
|
+
del schema["properties"]["instance_name"]
|
|
256
|
+
del schema["properties"]["retry_policy"]
|
|
257
|
+
del schema["properties"]["termination_policy"]
|
|
258
|
+
del schema["properties"]["termination_idle_time"]
|
|
216
259
|
|
|
217
260
|
_validate_max_duration = validator("max_duration", pre=True, allow_reuse=True)(
|
|
218
261
|
parse_max_duration
|
|
@@ -220,9 +263,6 @@ class ProfileParams(CoreModel):
|
|
|
220
263
|
_validate_stop_duration = validator("stop_duration", pre=True, allow_reuse=True)(
|
|
221
264
|
parse_stop_duration
|
|
222
265
|
)
|
|
223
|
-
_validate_termination_idle_time = validator(
|
|
224
|
-
"termination_idle_time", pre=True, allow_reuse=True
|
|
225
|
-
)(parse_duration)
|
|
226
266
|
_validate_idle_duration = validator("idle_duration", pre=True, allow_reuse=True)(
|
|
227
267
|
parse_idle_duration
|
|
228
268
|
)
|
|
@@ -232,11 +272,11 @@ class ProfileProps(CoreModel):
|
|
|
232
272
|
name: Annotated[
|
|
233
273
|
str,
|
|
234
274
|
Field(
|
|
235
|
-
description="The name of the profile that can be passed as `--profile` to `dstack
|
|
275
|
+
description="The name of the profile that can be passed as `--profile` to `dstack apply`"
|
|
236
276
|
),
|
|
237
277
|
]
|
|
238
278
|
default: Annotated[
|
|
239
|
-
bool, Field(description="If set to true, `dstack
|
|
279
|
+
bool, Field(description="If set to true, `dstack apply` will use this profile by default.")
|
|
240
280
|
] = False
|
|
241
281
|
|
|
242
282
|
|
|
@@ -3,7 +3,7 @@ from typing import List, Optional
|
|
|
3
3
|
|
|
4
4
|
from pydantic import UUID4
|
|
5
5
|
|
|
6
|
-
from dstack._internal.core.models
|
|
6
|
+
from dstack._internal.core.backends.models import BackendInfo
|
|
7
7
|
from dstack._internal.core.models.common import CoreModel
|
|
8
8
|
from dstack._internal.core.models.users import ProjectRole, User
|
|
9
9
|
|
|
@@ -26,7 +26,7 @@ class LocalRepo(Repo):
|
|
|
26
26
|
Example:
|
|
27
27
|
|
|
28
28
|
```python
|
|
29
|
-
run = client.runs.
|
|
29
|
+
run = client.runs.apply_configuration(
|
|
30
30
|
configuration=...,
|
|
31
31
|
repo=LocalRepo.from_dir("."), # Mount the current folder to the run
|
|
32
32
|
)
|
|
@@ -41,10 +41,10 @@ class LocalRepo(Repo):
|
|
|
41
41
|
Creates an instance of a local repo from a local path.
|
|
42
42
|
|
|
43
43
|
Args:
|
|
44
|
-
repo_dir: The path to a local folder
|
|
44
|
+
repo_dir: The path to a local folder.
|
|
45
45
|
|
|
46
46
|
Returns:
|
|
47
|
-
A local repo instance
|
|
47
|
+
A local repo instance.
|
|
48
48
|
"""
|
|
49
49
|
return LocalRepo(repo_dir=repo_dir)
|
|
50
50
|
|
|
@@ -3,7 +3,7 @@ import re
|
|
|
3
3
|
import subprocess
|
|
4
4
|
import time
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from typing import BinaryIO, Callable, Dict, Optional
|
|
6
|
+
from typing import Annotated, Any, BinaryIO, Callable, Dict, Optional
|
|
7
7
|
|
|
8
8
|
import git
|
|
9
9
|
import pydantic
|
|
@@ -12,7 +12,7 @@ from typing_extensions import Literal
|
|
|
12
12
|
|
|
13
13
|
from dstack._internal.core.errors import DstackError
|
|
14
14
|
from dstack._internal.core.models.common import CoreModel
|
|
15
|
-
from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
|
|
15
|
+
from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
|
|
16
16
|
from dstack._internal.utils.hash import get_sha256, slugify
|
|
17
17
|
from dstack._internal.utils.path import PathLike
|
|
18
18
|
from dstack._internal.utils.ssh import get_host_config
|
|
@@ -25,20 +25,34 @@ class RepoError(DstackError):
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class RemoteRepoCreds(CoreModel):
|
|
28
|
-
protocol: RepoProtocol # TODO: remove in 0.19
|
|
29
28
|
clone_url: str
|
|
30
29
|
private_key: Optional[str]
|
|
31
30
|
oauth_token: Optional[str]
|
|
32
31
|
|
|
32
|
+
# TODO: remove in 0.20. Left for compatibility with CLI <=0.18.44
|
|
33
|
+
protocol: Annotated[Optional[str], Field(exclude=True)] = None
|
|
34
|
+
|
|
35
|
+
class Config:
|
|
36
|
+
@staticmethod
|
|
37
|
+
def schema_extra(schema: Dict[str, Any]) -> None:
|
|
38
|
+
del schema["properties"]["protocol"]
|
|
39
|
+
|
|
33
40
|
|
|
34
41
|
class RemoteRepoInfo(BaseRepoInfo):
|
|
35
42
|
repo_type: Literal["remote"] = "remote"
|
|
36
43
|
repo_name: str
|
|
37
44
|
|
|
38
|
-
# TODO: remove in 0.
|
|
39
|
-
repo_host_name: str =
|
|
40
|
-
repo_port: Optional[int] = None
|
|
41
|
-
repo_user_name: str =
|
|
45
|
+
# TODO: remove in 0.20. Left for compatibility with CLI <=0.18.44
|
|
46
|
+
repo_host_name: Annotated[Optional[str], Field(exclude=True)] = None
|
|
47
|
+
repo_port: Annotated[Optional[int], Field(exclude=True)] = None
|
|
48
|
+
repo_user_name: Annotated[Optional[str], Field(exclude=True)] = None
|
|
49
|
+
|
|
50
|
+
class Config:
|
|
51
|
+
@staticmethod
|
|
52
|
+
def schema_extra(schema: Dict[str, Any]) -> None:
|
|
53
|
+
del schema["properties"]["repo_host_name"]
|
|
54
|
+
del schema["properties"]["repo_port"]
|
|
55
|
+
del schema["properties"]["repo_user_name"]
|
|
42
56
|
|
|
43
57
|
|
|
44
58
|
class RemoteRunRepoData(RemoteRepoInfo):
|
|
@@ -84,7 +98,7 @@ class RemoteRepo(Repo):
|
|
|
84
98
|
Finally, you can pass the repo object to the run:
|
|
85
99
|
|
|
86
100
|
```python
|
|
87
|
-
run = client.runs.
|
|
101
|
+
run = client.runs.apply_configuration(
|
|
88
102
|
configuration=...,
|
|
89
103
|
repo=repo,
|
|
90
104
|
)
|
|
@@ -100,10 +114,10 @@ class RemoteRepo(Repo):
|
|
|
100
114
|
Creates an instance of a remote repo from a local path.
|
|
101
115
|
|
|
102
116
|
Args:
|
|
103
|
-
repo_dir: The path to a local folder
|
|
117
|
+
repo_dir: The path to a local folder.
|
|
104
118
|
|
|
105
119
|
Returns:
|
|
106
|
-
A remote repo instance
|
|
120
|
+
A remote repo instance.
|
|
107
121
|
"""
|
|
108
122
|
return RemoteRepo(local_repo_dir=repo_dir)
|
|
109
123
|
|
|
@@ -115,12 +129,12 @@ class RemoteRepo(Repo):
|
|
|
115
129
|
Creates an instance of a remote repo from a URL.
|
|
116
130
|
|
|
117
131
|
Args:
|
|
118
|
-
repo_url: The URL of a remote Git repo
|
|
132
|
+
repo_url: The URL of a remote Git repo.
|
|
119
133
|
repo_branch: The name of the remote branch. Must be specified if `hash` is not specified.
|
|
120
134
|
repo_hash: The hash of the revision. Must be specified if `branch` is not specified.
|
|
121
135
|
|
|
122
136
|
Returns:
|
|
123
|
-
A remote repo instance
|
|
137
|
+
A remote repo instance.
|
|
124
138
|
"""
|
|
125
139
|
if repo_branch is None and repo_hash is None:
|
|
126
140
|
raise ValueError("Either `repo_branch` or `repo_hash` must be specified.")
|
|
@@ -30,7 +30,7 @@ class VirtualRepo(Repo):
|
|
|
30
30
|
virtual_repo.add_file_from_package(package=some_package, path="requirements.txt")
|
|
31
31
|
virtual_repo.add_file_from_package(package=some_package, path="train.py")
|
|
32
32
|
|
|
33
|
-
run = client.runs.
|
|
33
|
+
run = client.runs.apply_configuration(
|
|
34
34
|
configuration=...,
|
|
35
35
|
repo=virtual_repo,
|
|
36
36
|
)
|
|
@@ -8,6 +8,7 @@ from typing_extensions import Annotated
|
|
|
8
8
|
|
|
9
9
|
from dstack._internal.core.models.common import CoreModel
|
|
10
10
|
from dstack._internal.utils.common import pretty_resources
|
|
11
|
+
from dstack._internal.utils.json_schema import add_extra_schema_types
|
|
11
12
|
from dstack._internal.utils.logging import get_logger
|
|
12
13
|
|
|
13
14
|
logger = get_logger(__name__)
|
|
@@ -128,6 +129,22 @@ DEFAULT_GPU_COUNT = Range[int](min=1, max=1)
|
|
|
128
129
|
|
|
129
130
|
|
|
130
131
|
class GPUSpec(CoreModel):
|
|
132
|
+
class Config:
|
|
133
|
+
@staticmethod
|
|
134
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
135
|
+
add_extra_schema_types(
|
|
136
|
+
schema["properties"]["count"],
|
|
137
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
138
|
+
)
|
|
139
|
+
add_extra_schema_types(
|
|
140
|
+
schema["properties"]["memory"],
|
|
141
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
142
|
+
)
|
|
143
|
+
add_extra_schema_types(
|
|
144
|
+
schema["properties"]["total_memory"],
|
|
145
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
146
|
+
)
|
|
147
|
+
|
|
131
148
|
vendor: Annotated[
|
|
132
149
|
Optional[gpuhunt.AcceleratorVendor],
|
|
133
150
|
Field(
|
|
@@ -233,6 +250,14 @@ class GPUSpec(CoreModel):
|
|
|
233
250
|
|
|
234
251
|
|
|
235
252
|
class DiskSpec(CoreModel):
|
|
253
|
+
class Config:
|
|
254
|
+
@staticmethod
|
|
255
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
256
|
+
add_extra_schema_types(
|
|
257
|
+
schema["properties"]["size"],
|
|
258
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
259
|
+
)
|
|
260
|
+
|
|
236
261
|
size: Annotated[Range[Memory], Field(description="Disk size")]
|
|
237
262
|
|
|
238
263
|
@classmethod
|
|
@@ -254,11 +279,26 @@ class ResourcesSpec(CoreModel):
|
|
|
254
279
|
class Config:
|
|
255
280
|
@staticmethod
|
|
256
281
|
def schema_extra(schema: Dict[str, Any]):
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
282
|
+
add_extra_schema_types(
|
|
283
|
+
schema["properties"]["cpu"],
|
|
284
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
285
|
+
)
|
|
286
|
+
add_extra_schema_types(
|
|
287
|
+
schema["properties"]["memory"],
|
|
288
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
289
|
+
)
|
|
290
|
+
add_extra_schema_types(
|
|
291
|
+
schema["properties"]["shm_size"],
|
|
292
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
293
|
+
)
|
|
294
|
+
add_extra_schema_types(
|
|
295
|
+
schema["properties"]["gpu"],
|
|
296
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
297
|
+
)
|
|
298
|
+
add_extra_schema_types(
|
|
299
|
+
schema["properties"]["disk"],
|
|
300
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
301
|
+
)
|
|
262
302
|
|
|
263
303
|
cpu: Annotated[Range[int], Field(description="The number of CPU cores")] = DEFAULT_CPU_COUNT
|
|
264
304
|
memory: Annotated[Range[Memory], Field(description="The RAM size (e.g., `8GB`)")] = (
|
|
@@ -290,74 +330,3 @@ class ResourcesSpec(CoreModel):
|
|
|
290
330
|
resources.update(disk_size=self.disk.size)
|
|
291
331
|
res = pretty_resources(**resources)
|
|
292
332
|
return res
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
IntRangeLike = Union[Range[Union[int, str]], int, str]
|
|
296
|
-
MemoryRangeLike = Union[Range[Union[Memory, float, int, str]], float, int, str]
|
|
297
|
-
MemoryLike = Union[Memory, float, int, str]
|
|
298
|
-
GPULike = Union[GPUSpec, "GPUSpecSchema", int, str]
|
|
299
|
-
DiskLike = Union[DiskSpec, "DiskSpecSchema", float, int, str]
|
|
300
|
-
ComputeCapabilityLike = Union[ComputeCapability, float, str]
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
class GPUSpecSchema(CoreModel):
|
|
304
|
-
vendor: Annotated[
|
|
305
|
-
Optional[gpuhunt.AcceleratorVendor],
|
|
306
|
-
Field(
|
|
307
|
-
description="The vendor of the GPU/accelerator, one of: `nvidia`, `amd`, `google` (alias: `tpu`), `intel`"
|
|
308
|
-
),
|
|
309
|
-
] = None
|
|
310
|
-
name: Annotated[
|
|
311
|
-
Optional[Union[List[str], str]], Field(description="The GPU name or list of names")
|
|
312
|
-
] = None
|
|
313
|
-
count: Annotated[IntRangeLike, Field(description="The number of GPUs")] = DEFAULT_GPU_COUNT
|
|
314
|
-
memory: Annotated[
|
|
315
|
-
Optional[MemoryRangeLike],
|
|
316
|
-
Field(
|
|
317
|
-
description="The RAM size (e.g., `16GB`). Can be set to a range (e.g. `16GB..`, or `16GB..80GB`)"
|
|
318
|
-
),
|
|
319
|
-
] = None
|
|
320
|
-
total_memory: Annotated[
|
|
321
|
-
Optional[MemoryRangeLike],
|
|
322
|
-
Field(
|
|
323
|
-
description="The total RAM size (e.g., `32GB`). Can be set to a range (e.g. `16GB..`, or `16GB..80GB`)"
|
|
324
|
-
),
|
|
325
|
-
] = None
|
|
326
|
-
compute_capability: Annotated[
|
|
327
|
-
Optional[ComputeCapabilityLike],
|
|
328
|
-
Field(description="The minimum compute capability of the GPU (e.g., `7.5`)"),
|
|
329
|
-
] = None
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
class DiskSpecSchema(CoreModel):
|
|
333
|
-
size: Annotated[
|
|
334
|
-
MemoryRangeLike,
|
|
335
|
-
Field(
|
|
336
|
-
description="The disk size. Can be set to a range (e.g., `100GB..` or `100GB..200GB`)"
|
|
337
|
-
),
|
|
338
|
-
]
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
class ResourcesSpecSchema(CoreModel):
|
|
342
|
-
cpu: Annotated[Optional[IntRangeLike], Field(description="The number of CPU cores")] = (
|
|
343
|
-
DEFAULT_CPU_COUNT
|
|
344
|
-
)
|
|
345
|
-
memory: Annotated[
|
|
346
|
-
Optional[MemoryRangeLike],
|
|
347
|
-
Field(description="The RAM size (e.g., `8GB`)"),
|
|
348
|
-
] = DEFAULT_MEMORY_SIZE
|
|
349
|
-
shm_size: Annotated[
|
|
350
|
-
Optional[MemoryLike],
|
|
351
|
-
Field(
|
|
352
|
-
description="The size of shared memory (e.g., `8GB`). "
|
|
353
|
-
"If you are using parallel communicating processes (e.g., dataloaders in PyTorch), "
|
|
354
|
-
"you may need to configure this"
|
|
355
|
-
),
|
|
356
|
-
] = None
|
|
357
|
-
gpu: Annotated[
|
|
358
|
-
Optional[GPULike],
|
|
359
|
-
Field(
|
|
360
|
-
description="The GPU requirements. Can be set to a number, a string (e.g. `A100`, `80GB:2`, etc.), or an object"
|
|
361
|
-
),
|
|
362
|
-
] = None
|
|
363
|
-
disk: Annotated[Optional[DiskLike], Field(description="The disk resources")] = DEFAULT_DISK
|
|
@@ -20,9 +20,9 @@ from dstack._internal.core.models.profiles import (
|
|
|
20
20
|
CreationPolicy,
|
|
21
21
|
Profile,
|
|
22
22
|
ProfileParams,
|
|
23
|
-
ProfileRetryPolicy,
|
|
24
23
|
RetryEvent,
|
|
25
24
|
SpotPolicy,
|
|
25
|
+
UtilizationPolicy,
|
|
26
26
|
)
|
|
27
27
|
from dstack._internal.core.models.repos import AnyRunRepoData
|
|
28
28
|
from dstack._internal.core.models.resources import Memory, ResourcesSpec
|
|
@@ -114,6 +114,7 @@ class JobTerminationReason(str, Enum):
|
|
|
114
114
|
ABORTED_BY_USER = "aborted_by_user"
|
|
115
115
|
TERMINATED_BY_SERVER = "terminated_by_server"
|
|
116
116
|
INACTIVITY_DURATION_EXCEEDED = "inactivity_duration_exceeded"
|
|
117
|
+
TERMINATED_DUE_TO_UTILIZATION_POLICY = "terminated_due_to_utilization_policy"
|
|
117
118
|
# Set by the runner
|
|
118
119
|
CONTAINER_EXITED_WITH_ERROR = "container_exited_with_error"
|
|
119
120
|
PORTS_BINDING_FAILED = "ports_binding_failed"
|
|
@@ -135,6 +136,7 @@ class JobTerminationReason(str, Enum):
|
|
|
135
136
|
self.ABORTED_BY_USER: JobStatus.ABORTED,
|
|
136
137
|
self.TERMINATED_BY_SERVER: JobStatus.TERMINATED,
|
|
137
138
|
self.INACTIVITY_DURATION_EXCEEDED: JobStatus.TERMINATED,
|
|
139
|
+
self.TERMINATED_DUE_TO_UTILIZATION_POLICY: JobStatus.TERMINATED,
|
|
138
140
|
self.CONTAINER_EXITED_WITH_ERROR: JobStatus.FAILED,
|
|
139
141
|
self.PORTS_BINDING_FAILED: JobStatus.FAILED,
|
|
140
142
|
self.CREATING_CONTAINER_ERROR: JobStatus.FAILED,
|
|
@@ -175,6 +177,11 @@ class Gateway(CoreModel):
|
|
|
175
177
|
options: dict = {}
|
|
176
178
|
|
|
177
179
|
|
|
180
|
+
class JobSSHKey(CoreModel):
|
|
181
|
+
private: str
|
|
182
|
+
public: str
|
|
183
|
+
|
|
184
|
+
|
|
178
185
|
class JobSpec(CoreModel):
|
|
179
186
|
replica_num: int = 0 # default value for backward compatibility
|
|
180
187
|
job_num: int
|
|
@@ -190,13 +197,12 @@ class JobSpec(CoreModel):
|
|
|
190
197
|
single_branch: Optional[bool] = None
|
|
191
198
|
max_duration: Optional[int]
|
|
192
199
|
stop_duration: Optional[int] = None
|
|
200
|
+
utilization_policy: Optional[UtilizationPolicy] = None
|
|
193
201
|
registry_auth: Optional[RegistryAuth]
|
|
194
202
|
requirements: Requirements
|
|
195
203
|
retry: Optional[Retry]
|
|
196
204
|
volumes: Optional[List[MountPoint]] = None
|
|
197
|
-
|
|
198
|
-
# TODO: remove in 0.19
|
|
199
|
-
retry_policy: ProfileRetryPolicy = ProfileRetryPolicy(retry=False)
|
|
205
|
+
ssh_key: Optional[JobSSHKey] = None
|
|
200
206
|
working_dir: Optional[str]
|
|
201
207
|
|
|
202
208
|
|
|
@@ -302,7 +308,7 @@ class RunSpec(CoreModel):
|
|
|
302
308
|
run_name: Annotated[
|
|
303
309
|
Optional[str],
|
|
304
310
|
Field(description="The run name. If not set, the run name is generated automatically."),
|
|
305
|
-
]
|
|
311
|
+
] = None
|
|
306
312
|
repo_id: Annotated[
|
|
307
313
|
Optional[str],
|
|
308
314
|
Field(
|
|
@@ -312,15 +318,18 @@ class RunSpec(CoreModel):
|
|
|
312
318
|
" If not specified, a default virtual repo is used."
|
|
313
319
|
)
|
|
314
320
|
),
|
|
315
|
-
]
|
|
321
|
+
] = None
|
|
316
322
|
repo_data: Annotated[
|
|
317
323
|
Optional[AnyRunRepoData],
|
|
318
324
|
Field(
|
|
319
325
|
discriminator="repo_type",
|
|
320
326
|
description="The repo data such as the current branch and commit.",
|
|
321
327
|
),
|
|
322
|
-
]
|
|
323
|
-
repo_code_hash: Annotated[
|
|
328
|
+
] = None
|
|
329
|
+
repo_code_hash: Annotated[
|
|
330
|
+
Optional[str],
|
|
331
|
+
Field(description="The hash of the repo diff. Can be omitted if there is no repo diff."),
|
|
332
|
+
] = None
|
|
324
333
|
working_dir: Annotated[
|
|
325
334
|
Optional[str],
|
|
326
335
|
Field(
|
|
@@ -330,7 +339,7 @@ class RunSpec(CoreModel):
|
|
|
330
339
|
' Defaults to `"."`.'
|
|
331
340
|
)
|
|
332
341
|
),
|
|
333
|
-
]
|
|
342
|
+
] = None
|
|
334
343
|
configuration_path: Annotated[
|
|
335
344
|
Optional[str],
|
|
336
345
|
Field(
|
|
@@ -339,9 +348,9 @@ class RunSpec(CoreModel):
|
|
|
339
348
|
" It can be omitted when using the programmatic API."
|
|
340
349
|
)
|
|
341
350
|
),
|
|
342
|
-
]
|
|
351
|
+
] = None
|
|
343
352
|
configuration: Annotated[AnyRunConfiguration, Field(discriminator="type")]
|
|
344
|
-
profile: Annotated[Optional[Profile], Field(description="The profile parameters")]
|
|
353
|
+
profile: Annotated[Optional[Profile], Field(description="The profile parameters")] = None
|
|
345
354
|
ssh_key_pub: Annotated[
|
|
346
355
|
str,
|
|
347
356
|
Field(
|
|
@@ -448,9 +457,7 @@ class RunPlan(CoreModel):
|
|
|
448
457
|
run_spec: RunSpec
|
|
449
458
|
job_plans: List[JobPlan]
|
|
450
459
|
current_resource: Optional[Run] = None
|
|
451
|
-
|
|
452
|
-
# TODO: make required in 0.19
|
|
453
|
-
action: Optional[ApplyAction] = None
|
|
460
|
+
action: ApplyAction
|
|
454
461
|
|
|
455
462
|
|
|
456
463
|
class ApplyRunPlanInput(CoreModel):
|
|
@@ -466,11 +473,6 @@ class ApplyRunPlanInput(CoreModel):
|
|
|
466
473
|
] = None
|
|
467
474
|
|
|
468
475
|
|
|
469
|
-
class PoolInstanceOffers(CoreModel):
|
|
470
|
-
pool_name: str
|
|
471
|
-
instances: List[InstanceOfferWithAvailability]
|
|
472
|
-
|
|
473
|
-
|
|
474
476
|
def get_policy_map(spot_policy: Optional[SpotPolicy], default: SpotPolicy) -> Optional[bool]:
|
|
475
477
|
"""
|
|
476
478
|
Map profile.spot_policy[SpotPolicy|None] to requirements.spot[bool|None]
|