dstack 0.19.27__py3-none-any.whl → 0.19.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/__init__.py +11 -8
- dstack/_internal/cli/commands/apply.py +6 -3
- dstack/_internal/cli/commands/completion.py +3 -1
- dstack/_internal/cli/commands/config.py +1 -0
- dstack/_internal/cli/commands/init.py +2 -2
- dstack/_internal/cli/commands/offer.py +1 -1
- dstack/_internal/cli/commands/project.py +1 -0
- dstack/_internal/cli/commands/server.py +2 -2
- dstack/_internal/cli/main.py +1 -1
- dstack/_internal/cli/services/configurators/base.py +2 -4
- dstack/_internal/cli/services/configurators/fleet.py +4 -5
- dstack/_internal/cli/services/configurators/gateway.py +3 -5
- dstack/_internal/cli/services/configurators/run.py +51 -27
- dstack/_internal/cli/services/configurators/volume.py +3 -5
- dstack/_internal/core/backends/aws/compute.py +51 -36
- dstack/_internal/core/backends/azure/compute.py +10 -7
- dstack/_internal/core/backends/base/compute.py +96 -14
- dstack/_internal/core/backends/base/offers.py +34 -4
- dstack/_internal/core/backends/cloudrift/compute.py +5 -7
- dstack/_internal/core/backends/cudo/compute.py +4 -2
- dstack/_internal/core/backends/datacrunch/compute.py +13 -11
- dstack/_internal/core/backends/digitalocean_base/compute.py +4 -5
- dstack/_internal/core/backends/gcp/compute.py +12 -7
- dstack/_internal/core/backends/hotaisle/compute.py +4 -7
- dstack/_internal/core/backends/kubernetes/compute.py +6 -4
- dstack/_internal/core/backends/lambdalabs/compute.py +4 -5
- dstack/_internal/core/backends/local/compute.py +1 -3
- dstack/_internal/core/backends/nebius/compute.py +10 -7
- dstack/_internal/core/backends/oci/compute.py +10 -7
- dstack/_internal/core/backends/runpod/compute.py +15 -6
- dstack/_internal/core/backends/template/compute.py.jinja +3 -1
- dstack/_internal/core/backends/tensordock/compute.py +1 -3
- dstack/_internal/core/backends/tensordock/models.py +2 -0
- dstack/_internal/core/backends/vastai/compute.py +7 -3
- dstack/_internal/core/backends/vultr/compute.py +5 -5
- dstack/_internal/core/compatibility/runs.py +2 -0
- dstack/_internal/core/models/common.py +67 -43
- dstack/_internal/core/models/configurations.py +88 -62
- dstack/_internal/core/models/fleets.py +41 -24
- dstack/_internal/core/models/instances.py +5 -5
- dstack/_internal/core/models/profiles.py +66 -47
- dstack/_internal/core/models/projects.py +8 -0
- dstack/_internal/core/models/repos/remote.py +21 -16
- dstack/_internal/core/models/resources.py +69 -65
- dstack/_internal/core/models/runs.py +17 -9
- dstack/_internal/server/app.py +5 -0
- dstack/_internal/server/background/tasks/process_fleets.py +8 -0
- dstack/_internal/server/background/tasks/process_instances.py +3 -2
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +97 -34
- dstack/_internal/server/models.py +6 -5
- dstack/_internal/server/schemas/gateways.py +10 -9
- dstack/_internal/server/services/backends/__init__.py +1 -1
- dstack/_internal/server/services/backends/handlers.py +2 -0
- dstack/_internal/server/services/docker.py +8 -7
- dstack/_internal/server/services/projects.py +63 -4
- dstack/_internal/server/services/runs.py +2 -0
- dstack/_internal/server/settings.py +46 -0
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/main-56191fbfe77f49b251de.css +3 -0
- dstack/_internal/server/statics/{main-4eecc75fbe64067eb1bc.js → main-c51afa7f243e24d3e446.js} +61115 -49101
- dstack/_internal/server/statics/{main-4eecc75fbe64067eb1bc.js.map → main-c51afa7f243e24d3e446.js.map} +1 -1
- dstack/_internal/utils/env.py +85 -11
- dstack/version.py +1 -1
- {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/METADATA +1 -1
- {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/RECORD +68 -73
- dstack/_internal/core/backends/tensordock/__init__.py +0 -0
- dstack/_internal/core/backends/tensordock/api_client.py +0 -104
- dstack/_internal/core/backends/tensordock/backend.py +0 -16
- dstack/_internal/core/backends/tensordock/configurator.py +0 -74
- dstack/_internal/server/statics/main-56191c63d516fd0041c4.css +0 -3
- dstack/_internal/server/statics/static/media/github.1f7102513534c83a9d8d735d2b8c12a2.svg +0 -3
- {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/WHEEL +0 -0
- {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.27.dist-info → dstack-0.19.29.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -2,13 +2,18 @@ import ipaddress
|
|
|
2
2
|
import uuid
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from enum import Enum
|
|
5
|
-
from typing import Any, Dict, List, Optional,
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
from pydantic import Field, root_validator, validator
|
|
8
8
|
from typing_extensions import Annotated, Literal
|
|
9
9
|
|
|
10
10
|
from dstack._internal.core.models.backends.base import BackendType
|
|
11
|
-
from dstack._internal.core.models.common import
|
|
11
|
+
from dstack._internal.core.models.common import (
|
|
12
|
+
ApplyAction,
|
|
13
|
+
CoreConfig,
|
|
14
|
+
CoreModel,
|
|
15
|
+
generate_dual_core_model,
|
|
16
|
+
)
|
|
12
17
|
from dstack._internal.core.models.envs import Env
|
|
13
18
|
from dstack._internal.core.models.instances import Instance, InstanceOfferWithAvailability, SSHKey
|
|
14
19
|
from dstack._internal.core.models.profiles import (
|
|
@@ -202,6 +207,21 @@ class FleetNodesSpec(CoreModel):
|
|
|
202
207
|
return values
|
|
203
208
|
|
|
204
209
|
|
|
210
|
+
class InstanceGroupParamsConfig(CoreConfig):
|
|
211
|
+
@staticmethod
|
|
212
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
213
|
+
del schema["properties"]["termination_policy"]
|
|
214
|
+
del schema["properties"]["termination_idle_time"]
|
|
215
|
+
add_extra_schema_types(
|
|
216
|
+
schema["properties"]["nodes"],
|
|
217
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
218
|
+
)
|
|
219
|
+
add_extra_schema_types(
|
|
220
|
+
schema["properties"]["idle_duration"],
|
|
221
|
+
extra_types=[{"type": "string"}],
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
|
|
205
225
|
class InstanceGroupParams(CoreModel):
|
|
206
226
|
env: Annotated[
|
|
207
227
|
Env,
|
|
@@ -297,20 +317,6 @@ class InstanceGroupParams(CoreModel):
|
|
|
297
317
|
termination_policy: Annotated[Optional[TerminationPolicy], Field(exclude=True)] = None
|
|
298
318
|
termination_idle_time: Annotated[Optional[Union[str, int]], Field(exclude=True)] = None
|
|
299
319
|
|
|
300
|
-
class Config(CoreModel.Config):
|
|
301
|
-
@staticmethod
|
|
302
|
-
def schema_extra(schema: Dict[str, Any], model: Type):
|
|
303
|
-
del schema["properties"]["termination_policy"]
|
|
304
|
-
del schema["properties"]["termination_idle_time"]
|
|
305
|
-
add_extra_schema_types(
|
|
306
|
-
schema["properties"]["nodes"],
|
|
307
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
308
|
-
)
|
|
309
|
-
add_extra_schema_types(
|
|
310
|
-
schema["properties"]["idle_duration"],
|
|
311
|
-
extra_types=[{"type": "string"}],
|
|
312
|
-
)
|
|
313
|
-
|
|
314
320
|
@validator("nodes", pre=True)
|
|
315
321
|
def parse_nodes(cls, v: Optional[Union[dict, str]]) -> Optional[dict]:
|
|
316
322
|
if isinstance(v, str) and ".." in v:
|
|
@@ -331,7 +337,17 @@ class FleetProps(CoreModel):
|
|
|
331
337
|
name: Annotated[Optional[str], Field(description="The fleet name")] = None
|
|
332
338
|
|
|
333
339
|
|
|
334
|
-
class
|
|
340
|
+
class FleetConfigurationConfig(InstanceGroupParamsConfig):
|
|
341
|
+
@staticmethod
|
|
342
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
343
|
+
InstanceGroupParamsConfig.schema_extra(schema)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class FleetConfiguration(
|
|
347
|
+
InstanceGroupParams,
|
|
348
|
+
FleetProps,
|
|
349
|
+
generate_dual_core_model(FleetConfigurationConfig),
|
|
350
|
+
):
|
|
335
351
|
tags: Annotated[
|
|
336
352
|
Optional[Dict[str, str]],
|
|
337
353
|
Field(
|
|
@@ -346,7 +362,14 @@ class FleetConfiguration(InstanceGroupParams, FleetProps):
|
|
|
346
362
|
_validate_tags = validator("tags", pre=True, allow_reuse=True)(tags_validator)
|
|
347
363
|
|
|
348
364
|
|
|
349
|
-
class
|
|
365
|
+
class FleetSpecConfig(CoreConfig):
|
|
366
|
+
@staticmethod
|
|
367
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
368
|
+
prop = schema.get("properties", {})
|
|
369
|
+
prop.pop("merged_profile", None)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class FleetSpec(generate_dual_core_model(FleetSpecConfig)):
|
|
350
373
|
configuration: FleetConfiguration
|
|
351
374
|
configuration_path: Optional[str] = None
|
|
352
375
|
profile: Profile
|
|
@@ -356,12 +379,6 @@ class FleetSpec(CoreModel):
|
|
|
356
379
|
# TODO: make merged_profile a computed field after migrating to pydanticV2
|
|
357
380
|
merged_profile: Annotated[Profile, Field(exclude=True)] = None
|
|
358
381
|
|
|
359
|
-
class Config(CoreModel.Config):
|
|
360
|
-
@staticmethod
|
|
361
|
-
def schema_extra(schema: Dict[str, Any], model: Type) -> None:
|
|
362
|
-
prop = schema.get("properties", {})
|
|
363
|
-
prop.pop("merged_profile", None)
|
|
364
|
-
|
|
365
382
|
@root_validator
|
|
366
383
|
def _merged_profile(cls, values) -> Dict:
|
|
367
384
|
try:
|
|
@@ -7,7 +7,10 @@ import gpuhunt
|
|
|
7
7
|
from pydantic import root_validator
|
|
8
8
|
|
|
9
9
|
from dstack._internal.core.models.backends.base import BackendType
|
|
10
|
-
from dstack._internal.core.models.common import
|
|
10
|
+
from dstack._internal.core.models.common import (
|
|
11
|
+
CoreModel,
|
|
12
|
+
FrozenCoreModel,
|
|
13
|
+
)
|
|
11
14
|
from dstack._internal.core.models.envs import Env
|
|
12
15
|
from dstack._internal.core.models.health import HealthStatus
|
|
13
16
|
from dstack._internal.core.models.volumes import Volume
|
|
@@ -117,14 +120,11 @@ class InstanceType(CoreModel):
|
|
|
117
120
|
resources: Resources
|
|
118
121
|
|
|
119
122
|
|
|
120
|
-
class SSHConnectionParams(
|
|
123
|
+
class SSHConnectionParams(FrozenCoreModel):
|
|
121
124
|
hostname: str
|
|
122
125
|
username: str
|
|
123
126
|
port: int
|
|
124
127
|
|
|
125
|
-
class Config(CoreModel.Config):
|
|
126
|
-
frozen = True
|
|
127
|
-
|
|
128
128
|
|
|
129
129
|
class SSHKey(CoreModel):
|
|
130
130
|
public: str
|
|
@@ -6,7 +6,12 @@ from pydantic import Field, root_validator, validator
|
|
|
6
6
|
from typing_extensions import Annotated, Literal
|
|
7
7
|
|
|
8
8
|
from dstack._internal.core.models.backends.base import BackendType
|
|
9
|
-
from dstack._internal.core.models.common import
|
|
9
|
+
from dstack._internal.core.models.common import (
|
|
10
|
+
CoreConfig,
|
|
11
|
+
CoreModel,
|
|
12
|
+
Duration,
|
|
13
|
+
generate_dual_core_model,
|
|
14
|
+
)
|
|
10
15
|
from dstack._internal.utils.common import list_enum_values_for_annotation
|
|
11
16
|
from dstack._internal.utils.cron import validate_cron
|
|
12
17
|
from dstack._internal.utils.json_schema import add_extra_schema_types
|
|
@@ -112,7 +117,16 @@ class RetryEvent(str, Enum):
|
|
|
112
117
|
ERROR = "error"
|
|
113
118
|
|
|
114
119
|
|
|
115
|
-
class
|
|
120
|
+
class ProfileRetryConfig(CoreConfig):
|
|
121
|
+
@staticmethod
|
|
122
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
123
|
+
add_extra_schema_types(
|
|
124
|
+
schema["properties"]["duration"],
|
|
125
|
+
extra_types=[{"type": "string"}],
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class ProfileRetry(generate_dual_core_model(ProfileRetryConfig)):
|
|
116
130
|
on_events: Annotated[
|
|
117
131
|
Optional[List[RetryEvent]],
|
|
118
132
|
Field(
|
|
@@ -128,14 +142,6 @@ class ProfileRetry(CoreModel):
|
|
|
128
142
|
Field(description="The maximum period of retrying the run, e.g., `4h` or `1d`"),
|
|
129
143
|
] = None
|
|
130
144
|
|
|
131
|
-
class Config(CoreModel.Config):
|
|
132
|
-
@staticmethod
|
|
133
|
-
def schema_extra(schema: Dict[str, Any]):
|
|
134
|
-
add_extra_schema_types(
|
|
135
|
-
schema["properties"]["duration"],
|
|
136
|
-
extra_types=[{"type": "string"}],
|
|
137
|
-
)
|
|
138
|
-
|
|
139
145
|
_validate_duration = validator("duration", pre=True, allow_reuse=True)(parse_duration)
|
|
140
146
|
|
|
141
147
|
@root_validator
|
|
@@ -146,7 +152,16 @@ class ProfileRetry(CoreModel):
|
|
|
146
152
|
return values
|
|
147
153
|
|
|
148
154
|
|
|
149
|
-
class
|
|
155
|
+
class UtilizationPolicyConfig(CoreConfig):
|
|
156
|
+
@staticmethod
|
|
157
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
158
|
+
add_extra_schema_types(
|
|
159
|
+
schema["properties"]["time_window"],
|
|
160
|
+
extra_types=[{"type": "string"}],
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class UtilizationPolicy(generate_dual_core_model(UtilizationPolicyConfig)):
|
|
150
165
|
_min_time_window = "5m"
|
|
151
166
|
|
|
152
167
|
min_gpu_utilization: Annotated[
|
|
@@ -171,14 +186,6 @@ class UtilizationPolicy(CoreModel):
|
|
|
171
186
|
),
|
|
172
187
|
]
|
|
173
188
|
|
|
174
|
-
class Config(CoreModel.Config):
|
|
175
|
-
@staticmethod
|
|
176
|
-
def schema_extra(schema: Dict[str, Any]):
|
|
177
|
-
add_extra_schema_types(
|
|
178
|
-
schema["properties"]["time_window"],
|
|
179
|
-
extra_types=[{"type": "string"}],
|
|
180
|
-
)
|
|
181
|
-
|
|
182
189
|
@validator("time_window", pre=True)
|
|
183
190
|
def validate_time_window(cls, v: Union[int, str]) -> int:
|
|
184
191
|
v = parse_duration(v)
|
|
@@ -219,6 +226,28 @@ class Schedule(CoreModel):
|
|
|
219
226
|
return self.cron
|
|
220
227
|
|
|
221
228
|
|
|
229
|
+
class ProfileParamsConfig(CoreConfig):
|
|
230
|
+
@staticmethod
|
|
231
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
232
|
+
del schema["properties"]["pool_name"]
|
|
233
|
+
del schema["properties"]["instance_name"]
|
|
234
|
+
del schema["properties"]["retry_policy"]
|
|
235
|
+
del schema["properties"]["termination_policy"]
|
|
236
|
+
del schema["properties"]["termination_idle_time"]
|
|
237
|
+
add_extra_schema_types(
|
|
238
|
+
schema["properties"]["max_duration"],
|
|
239
|
+
extra_types=[{"type": "boolean"}, {"type": "string"}],
|
|
240
|
+
)
|
|
241
|
+
add_extra_schema_types(
|
|
242
|
+
schema["properties"]["stop_duration"],
|
|
243
|
+
extra_types=[{"type": "boolean"}, {"type": "string"}],
|
|
244
|
+
)
|
|
245
|
+
add_extra_schema_types(
|
|
246
|
+
schema["properties"]["idle_duration"],
|
|
247
|
+
extra_types=[{"type": "string"}],
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
222
251
|
class ProfileParams(CoreModel):
|
|
223
252
|
backends: Annotated[
|
|
224
253
|
Optional[List[BackendType]],
|
|
@@ -358,27 +387,6 @@ class ProfileParams(CoreModel):
|
|
|
358
387
|
termination_policy: Annotated[Optional[TerminationPolicy], Field(exclude=True)] = None
|
|
359
388
|
termination_idle_time: Annotated[Optional[Union[str, int]], Field(exclude=True)] = None
|
|
360
389
|
|
|
361
|
-
class Config(CoreModel.Config):
|
|
362
|
-
@staticmethod
|
|
363
|
-
def schema_extra(schema: Dict[str, Any]) -> None:
|
|
364
|
-
del schema["properties"]["pool_name"]
|
|
365
|
-
del schema["properties"]["instance_name"]
|
|
366
|
-
del schema["properties"]["retry_policy"]
|
|
367
|
-
del schema["properties"]["termination_policy"]
|
|
368
|
-
del schema["properties"]["termination_idle_time"]
|
|
369
|
-
add_extra_schema_types(
|
|
370
|
-
schema["properties"]["max_duration"],
|
|
371
|
-
extra_types=[{"type": "boolean"}, {"type": "string"}],
|
|
372
|
-
)
|
|
373
|
-
add_extra_schema_types(
|
|
374
|
-
schema["properties"]["stop_duration"],
|
|
375
|
-
extra_types=[{"type": "boolean"}, {"type": "string"}],
|
|
376
|
-
)
|
|
377
|
-
add_extra_schema_types(
|
|
378
|
-
schema["properties"]["idle_duration"],
|
|
379
|
-
extra_types=[{"type": "string"}],
|
|
380
|
-
)
|
|
381
|
-
|
|
382
390
|
_validate_max_duration = validator("max_duration", pre=True, allow_reuse=True)(
|
|
383
391
|
parse_max_duration
|
|
384
392
|
)
|
|
@@ -403,17 +411,28 @@ class ProfileProps(CoreModel):
|
|
|
403
411
|
] = False
|
|
404
412
|
|
|
405
413
|
|
|
406
|
-
class
|
|
414
|
+
class ProfileConfig(ProfileParamsConfig):
|
|
415
|
+
@staticmethod
|
|
416
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
417
|
+
ProfileParamsConfig.schema_extra(schema)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
class Profile(
|
|
421
|
+
ProfileProps,
|
|
422
|
+
ProfileParams,
|
|
423
|
+
generate_dual_core_model(ProfileConfig),
|
|
424
|
+
):
|
|
407
425
|
pass
|
|
408
426
|
|
|
409
427
|
|
|
410
|
-
class
|
|
411
|
-
|
|
428
|
+
class ProfilesConfigConfig(CoreConfig):
|
|
429
|
+
json_loads = orjson.loads
|
|
430
|
+
json_dumps = pydantic_orjson_dumps_with_indent
|
|
431
|
+
schema_extra = {"$schema": "http://json-schema.org/draft-07/schema#"}
|
|
412
432
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
schema_extra = {"$schema": "http://json-schema.org/draft-07/schema#"}
|
|
433
|
+
|
|
434
|
+
class ProfilesConfig(generate_dual_core_model(ProfilesConfigConfig)):
|
|
435
|
+
profiles: List[Profile]
|
|
417
436
|
|
|
418
437
|
def default(self) -> Optional[Profile]:
|
|
419
438
|
for p in self.profiles:
|
|
@@ -26,3 +26,11 @@ class Project(CoreModel):
|
|
|
26
26
|
backends: List[BackendInfo]
|
|
27
27
|
members: List[Member]
|
|
28
28
|
is_public: bool = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ProjectHookConfig(CoreModel):
|
|
32
|
+
"""
|
|
33
|
+
This class can be inherited to extend the project creation configuration passed to the hooks.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
pass
|
|
@@ -11,7 +11,7 @@ from pydantic import Field
|
|
|
11
11
|
from typing_extensions import Literal
|
|
12
12
|
|
|
13
13
|
from dstack._internal.core.errors import DstackError
|
|
14
|
-
from dstack._internal.core.models.common import
|
|
14
|
+
from dstack._internal.core.models.common import CoreConfig, generate_dual_core_model
|
|
15
15
|
from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
|
|
16
16
|
from dstack._internal.utils.hash import get_sha256, slugify
|
|
17
17
|
from dstack._internal.utils.path import PathLike
|
|
@@ -24,21 +24,33 @@ class RepoError(DstackError):
|
|
|
24
24
|
pass
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
class
|
|
27
|
+
class RemoteRepoCredsConfig(CoreConfig):
|
|
28
|
+
@staticmethod
|
|
29
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
30
|
+
del schema["properties"]["protocol"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class RemoteRepoCreds(generate_dual_core_model(RemoteRepoCredsConfig)):
|
|
28
34
|
clone_url: str
|
|
29
|
-
private_key: Optional[str]
|
|
30
|
-
oauth_token: Optional[str]
|
|
35
|
+
private_key: Optional[str] = None
|
|
36
|
+
oauth_token: Optional[str] = None
|
|
31
37
|
|
|
32
38
|
# TODO: remove in 0.20. Left for compatibility with CLI <=0.18.44
|
|
33
39
|
protocol: Annotated[Optional[str], Field(exclude=True)] = None
|
|
34
40
|
|
|
35
|
-
class Config(CoreModel.Config):
|
|
36
|
-
@staticmethod
|
|
37
|
-
def schema_extra(schema: Dict[str, Any]) -> None:
|
|
38
|
-
del schema["properties"]["protocol"]
|
|
39
41
|
|
|
42
|
+
class RemoteRepoInfoConfig(CoreConfig):
|
|
43
|
+
@staticmethod
|
|
44
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
45
|
+
del schema["properties"]["repo_host_name"]
|
|
46
|
+
del schema["properties"]["repo_port"]
|
|
47
|
+
del schema["properties"]["repo_user_name"]
|
|
40
48
|
|
|
41
|
-
|
|
49
|
+
|
|
50
|
+
class RemoteRepoInfo(
|
|
51
|
+
BaseRepoInfo,
|
|
52
|
+
generate_dual_core_model(RemoteRepoInfoConfig),
|
|
53
|
+
):
|
|
42
54
|
repo_type: Literal["remote"] = "remote"
|
|
43
55
|
repo_name: str
|
|
44
56
|
|
|
@@ -47,13 +59,6 @@ class RemoteRepoInfo(BaseRepoInfo):
|
|
|
47
59
|
repo_port: Annotated[Optional[int], Field(exclude=True)] = None
|
|
48
60
|
repo_user_name: Annotated[Optional[str], Field(exclude=True)] = None
|
|
49
61
|
|
|
50
|
-
class Config(BaseRepoInfo.Config):
|
|
51
|
-
@staticmethod
|
|
52
|
-
def schema_extra(schema: Dict[str, Any]) -> None:
|
|
53
|
-
del schema["properties"]["repo_host_name"]
|
|
54
|
-
del schema["properties"]["repo_port"]
|
|
55
|
-
del schema["properties"]["repo_user_name"]
|
|
56
|
-
|
|
57
62
|
|
|
58
63
|
class RemoteRunRepoData(RemoteRepoInfo):
|
|
59
64
|
repo_branch: Optional[str] = None
|
|
@@ -7,7 +7,7 @@ from pydantic import Field, parse_obj_as, root_validator, validator
|
|
|
7
7
|
from pydantic.generics import GenericModel
|
|
8
8
|
from typing_extensions import Annotated
|
|
9
9
|
|
|
10
|
-
from dstack._internal.core.models.common import CoreModel
|
|
10
|
+
from dstack._internal.core.models.common import CoreConfig, CoreModel, generate_dual_core_model
|
|
11
11
|
from dstack._internal.utils.common import pretty_resources
|
|
12
12
|
from dstack._internal.utils.json_schema import add_extra_schema_types
|
|
13
13
|
from dstack._internal.utils.logging import get_logger
|
|
@@ -129,21 +129,22 @@ DEFAULT_MEMORY_SIZE = Range[Memory](min=Memory.parse("8GB"))
|
|
|
129
129
|
DEFAULT_GPU_COUNT = Range[int](min=1)
|
|
130
130
|
|
|
131
131
|
|
|
132
|
-
class
|
|
132
|
+
class CPUSpecConfig(CoreConfig):
|
|
133
|
+
@staticmethod
|
|
134
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
135
|
+
add_extra_schema_types(
|
|
136
|
+
schema["properties"]["count"],
|
|
137
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class CPUSpec(generate_dual_core_model(CPUSpecConfig)):
|
|
133
142
|
arch: Annotated[
|
|
134
143
|
Optional[gpuhunt.CPUArchitecture],
|
|
135
144
|
Field(description="The CPU architecture, one of: `x86`, `arm`"),
|
|
136
145
|
] = None
|
|
137
146
|
count: Annotated[Range[int], Field(description="The number of CPU cores")] = DEFAULT_CPU_COUNT
|
|
138
147
|
|
|
139
|
-
class Config(CoreModel.Config):
|
|
140
|
-
@staticmethod
|
|
141
|
-
def schema_extra(schema: Dict[str, Any]):
|
|
142
|
-
add_extra_schema_types(
|
|
143
|
-
schema["properties"]["count"],
|
|
144
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
145
|
-
)
|
|
146
|
-
|
|
147
148
|
@classmethod
|
|
148
149
|
def __get_validators__(cls):
|
|
149
150
|
yield cls.parse
|
|
@@ -190,7 +191,28 @@ class CPUSpec(CoreModel):
|
|
|
190
191
|
return v
|
|
191
192
|
|
|
192
193
|
|
|
193
|
-
class
|
|
194
|
+
class GPUSpecConfig(CoreConfig):
|
|
195
|
+
@staticmethod
|
|
196
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
197
|
+
add_extra_schema_types(
|
|
198
|
+
schema["properties"]["count"],
|
|
199
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
200
|
+
)
|
|
201
|
+
add_extra_schema_types(
|
|
202
|
+
schema["properties"]["name"],
|
|
203
|
+
extra_types=[{"type": "string"}],
|
|
204
|
+
)
|
|
205
|
+
add_extra_schema_types(
|
|
206
|
+
schema["properties"]["memory"],
|
|
207
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
208
|
+
)
|
|
209
|
+
add_extra_schema_types(
|
|
210
|
+
schema["properties"]["total_memory"],
|
|
211
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class GPUSpec(generate_dual_core_model(GPUSpecConfig)):
|
|
194
216
|
vendor: Annotated[
|
|
195
217
|
Optional[gpuhunt.AcceleratorVendor],
|
|
196
218
|
Field(
|
|
@@ -218,26 +240,6 @@ class GPUSpec(CoreModel):
|
|
|
218
240
|
Field(description="The minimum compute capability of the GPU (e.g., `7.5`)"),
|
|
219
241
|
] = None
|
|
220
242
|
|
|
221
|
-
class Config(CoreModel.Config):
|
|
222
|
-
@staticmethod
|
|
223
|
-
def schema_extra(schema: Dict[str, Any]):
|
|
224
|
-
add_extra_schema_types(
|
|
225
|
-
schema["properties"]["count"],
|
|
226
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
227
|
-
)
|
|
228
|
-
add_extra_schema_types(
|
|
229
|
-
schema["properties"]["name"],
|
|
230
|
-
extra_types=[{"type": "string"}],
|
|
231
|
-
)
|
|
232
|
-
add_extra_schema_types(
|
|
233
|
-
schema["properties"]["memory"],
|
|
234
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
235
|
-
)
|
|
236
|
-
add_extra_schema_types(
|
|
237
|
-
schema["properties"]["total_memory"],
|
|
238
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
239
|
-
)
|
|
240
|
-
|
|
241
243
|
@classmethod
|
|
242
244
|
def __get_validators__(cls):
|
|
243
245
|
yield cls.parse
|
|
@@ -317,16 +319,17 @@ class GPUSpec(CoreModel):
|
|
|
317
319
|
return gpuhunt.AcceleratorVendor.cast(v)
|
|
318
320
|
|
|
319
321
|
|
|
320
|
-
class
|
|
321
|
-
|
|
322
|
+
class DiskSpecConfig(CoreConfig):
|
|
323
|
+
@staticmethod
|
|
324
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
325
|
+
add_extra_schema_types(
|
|
326
|
+
schema["properties"]["size"],
|
|
327
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
328
|
+
)
|
|
322
329
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
add_extra_schema_types(
|
|
327
|
-
schema["properties"]["size"],
|
|
328
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
329
|
-
)
|
|
330
|
+
|
|
331
|
+
class DiskSpec(generate_dual_core_model(DiskSpecConfig)):
|
|
332
|
+
size: Annotated[Range[Memory], Field(description="Disk size")]
|
|
330
333
|
|
|
331
334
|
@classmethod
|
|
332
335
|
def __get_validators__(cls):
|
|
@@ -343,7 +346,32 @@ class DiskSpec(CoreModel):
|
|
|
343
346
|
DEFAULT_DISK = DiskSpec(size=Range[Memory](min=Memory.parse("100GB"), max=None))
|
|
344
347
|
|
|
345
348
|
|
|
346
|
-
class
|
|
349
|
+
class ResourcesSpecConfig(CoreConfig):
|
|
350
|
+
@staticmethod
|
|
351
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
352
|
+
add_extra_schema_types(
|
|
353
|
+
schema["properties"]["cpu"],
|
|
354
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
355
|
+
)
|
|
356
|
+
add_extra_schema_types(
|
|
357
|
+
schema["properties"]["memory"],
|
|
358
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
359
|
+
)
|
|
360
|
+
add_extra_schema_types(
|
|
361
|
+
schema["properties"]["shm_size"],
|
|
362
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
363
|
+
)
|
|
364
|
+
add_extra_schema_types(
|
|
365
|
+
schema["properties"]["gpu"],
|
|
366
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
367
|
+
)
|
|
368
|
+
add_extra_schema_types(
|
|
369
|
+
schema["properties"]["disk"],
|
|
370
|
+
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class ResourcesSpec(generate_dual_core_model(ResourcesSpecConfig)):
|
|
347
375
|
# TODO: Remove Range[int] in 0.20. Range[int] for backward compatibility only.
|
|
348
376
|
cpu: Annotated[Union[CPUSpec, Range[int]], Field(description="The CPU requirements")] = (
|
|
349
377
|
CPUSpec()
|
|
@@ -362,30 +390,6 @@ class ResourcesSpec(CoreModel):
|
|
|
362
390
|
gpu: Annotated[Optional[GPUSpec], Field(description="The GPU requirements")] = None
|
|
363
391
|
disk: Annotated[Optional[DiskSpec], Field(description="The disk resources")] = DEFAULT_DISK
|
|
364
392
|
|
|
365
|
-
class Config(CoreModel.Config):
|
|
366
|
-
@staticmethod
|
|
367
|
-
def schema_extra(schema: Dict[str, Any]):
|
|
368
|
-
add_extra_schema_types(
|
|
369
|
-
schema["properties"]["cpu"],
|
|
370
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
371
|
-
)
|
|
372
|
-
add_extra_schema_types(
|
|
373
|
-
schema["properties"]["memory"],
|
|
374
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
375
|
-
)
|
|
376
|
-
add_extra_schema_types(
|
|
377
|
-
schema["properties"]["shm_size"],
|
|
378
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
379
|
-
)
|
|
380
|
-
add_extra_schema_types(
|
|
381
|
-
schema["properties"]["gpu"],
|
|
382
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
383
|
-
)
|
|
384
|
-
add_extra_schema_types(
|
|
385
|
-
schema["properties"]["disk"],
|
|
386
|
-
extra_types=[{"type": "integer"}, {"type": "string"}],
|
|
387
|
-
)
|
|
388
|
-
|
|
389
393
|
def pretty_format(self) -> str:
|
|
390
394
|
# TODO: Remove in 0.20. Use self.cpu directly
|
|
391
395
|
cpu = parse_obj_as(CPUSpec, self.cpu)
|
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
from datetime import datetime, timedelta
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import Any, Dict, List, Literal, Optional
|
|
3
|
+
from typing import Any, Dict, List, Literal, Optional
|
|
4
4
|
from urllib.parse import urlparse
|
|
5
5
|
|
|
6
6
|
from pydantic import UUID4, Field, root_validator
|
|
7
7
|
from typing_extensions import Annotated
|
|
8
8
|
|
|
9
9
|
from dstack._internal.core.models.backends.base import BackendType
|
|
10
|
-
from dstack._internal.core.models.common import
|
|
10
|
+
from dstack._internal.core.models.common import (
|
|
11
|
+
ApplyAction,
|
|
12
|
+
CoreConfig,
|
|
13
|
+
CoreModel,
|
|
14
|
+
NetworkMode,
|
|
15
|
+
RegistryAuth,
|
|
16
|
+
generate_dual_core_model,
|
|
17
|
+
)
|
|
11
18
|
from dstack._internal.core.models.configurations import (
|
|
12
19
|
DEFAULT_PROBE_METHOD,
|
|
13
20
|
LEGACY_REPO_DIR,
|
|
@@ -385,7 +392,14 @@ class Job(CoreModel):
|
|
|
385
392
|
job_submissions: List[JobSubmission]
|
|
386
393
|
|
|
387
394
|
|
|
388
|
-
class
|
|
395
|
+
class RunSpecConfig(CoreConfig):
|
|
396
|
+
@staticmethod
|
|
397
|
+
def schema_extra(schema: Dict[str, Any]):
|
|
398
|
+
prop = schema.get("properties", {})
|
|
399
|
+
prop.pop("merged_profile", None)
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
class RunSpec(generate_dual_core_model(RunSpecConfig)):
|
|
389
403
|
# TODO: run_name, working_dir are redundant here since they already passed in configuration
|
|
390
404
|
run_name: Annotated[
|
|
391
405
|
Optional[str],
|
|
@@ -458,12 +472,6 @@ class RunSpec(CoreModel):
|
|
|
458
472
|
# TODO: make merged_profile a computed field after migrating to pydanticV2
|
|
459
473
|
merged_profile: Annotated[Profile, Field(exclude=True)] = None
|
|
460
474
|
|
|
461
|
-
class Config(CoreModel.Config):
|
|
462
|
-
@staticmethod
|
|
463
|
-
def schema_extra(schema: Dict[str, Any], model: Type) -> None:
|
|
464
|
-
prop = schema.get("properties", {})
|
|
465
|
-
prop.pop("merged_profile", None)
|
|
466
|
-
|
|
467
475
|
@root_validator
|
|
468
476
|
def _merged_profile(cls, values) -> Dict:
|
|
469
477
|
if values.get("profile") is None:
|
dstack/_internal/server/app.py
CHANGED
|
@@ -160,6 +160,11 @@ async def lifespan(app: FastAPI):
|
|
|
160
160
|
logger.info("Background processing is disabled")
|
|
161
161
|
PROBES_SCHEDULER.start()
|
|
162
162
|
dstack_version = DSTACK_VERSION if DSTACK_VERSION else "(no version)"
|
|
163
|
+
logger.info(
|
|
164
|
+
"Job network mode: %s (%d)",
|
|
165
|
+
settings.JOB_NETWORK_MODE.name,
|
|
166
|
+
settings.JOB_NETWORK_MODE.value,
|
|
167
|
+
)
|
|
163
168
|
logger.info(f"The admin token is {admin.token.get_plaintext_or_error()}", {"show_path": False})
|
|
164
169
|
logger.info(
|
|
165
170
|
f"The dstack server {dstack_version} is running at {SERVER_URL}",
|
|
@@ -177,6 +177,14 @@ def _maintain_fleet_nodes_min(
|
|
|
177
177
|
|
|
178
178
|
|
|
179
179
|
def _autodelete_fleet(fleet_model: FleetModel) -> bool:
|
|
180
|
+
if fleet_model.project.deleted:
|
|
181
|
+
# It used to be possible to delete project with active resources:
|
|
182
|
+
# https://github.com/dstackai/dstack/issues/3077
|
|
183
|
+
fleet_model.status = FleetStatus.TERMINATED
|
|
184
|
+
fleet_model.deleted = True
|
|
185
|
+
logger.info("Fleet %s deleted due to deleted project", fleet_model.name)
|
|
186
|
+
return True
|
|
187
|
+
|
|
180
188
|
if is_fleet_in_use(fleet_model) or not is_fleet_empty(fleet_model):
|
|
181
189
|
return False
|
|
182
190
|
|
|
@@ -578,7 +578,6 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
|
|
|
578
578
|
if placement_group_model is None: # error occurred
|
|
579
579
|
continue
|
|
580
580
|
session.add(placement_group_model)
|
|
581
|
-
await session.flush()
|
|
582
581
|
placement_group_models.append(placement_group_model)
|
|
583
582
|
logger.debug(
|
|
584
583
|
"Trying %s in %s/%s for $%0.4f per hour",
|
|
@@ -636,7 +635,9 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
|
|
|
636
635
|
},
|
|
637
636
|
)
|
|
638
637
|
if instance.fleet_id and _is_fleet_master_instance(instance):
|
|
639
|
-
# Clean up placement groups that did not end up being used
|
|
638
|
+
# Clean up placement groups that did not end up being used.
|
|
639
|
+
# Flush to update still uncommitted placement groups.
|
|
640
|
+
await session.flush()
|
|
640
641
|
await schedule_fleet_placement_groups_deletion(
|
|
641
642
|
session=session,
|
|
642
643
|
fleet_id=instance.fleet_id,
|