dstack 0.19.26__py3-none-any.whl → 0.19.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/__init__.py +11 -8
- dstack/_internal/cli/commands/apply.py +6 -3
- dstack/_internal/cli/commands/completion.py +3 -1
- dstack/_internal/cli/commands/config.py +1 -0
- dstack/_internal/cli/commands/init.py +4 -4
- dstack/_internal/cli/commands/offer.py +1 -1
- dstack/_internal/cli/commands/project.py +1 -0
- dstack/_internal/cli/commands/server.py +2 -2
- dstack/_internal/cli/main.py +1 -1
- dstack/_internal/cli/services/configurators/base.py +2 -4
- dstack/_internal/cli/services/configurators/fleet.py +4 -5
- dstack/_internal/cli/services/configurators/gateway.py +3 -5
- dstack/_internal/cli/services/configurators/run.py +165 -43
- dstack/_internal/cli/services/configurators/volume.py +3 -5
- dstack/_internal/cli/services/repos.py +1 -18
- dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
- dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
- dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
- dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
- dstack/_internal/core/backends/aws/compute.py +6 -1
- dstack/_internal/core/backends/base/compute.py +33 -5
- dstack/_internal/core/backends/base/offers.py +2 -0
- dstack/_internal/core/backends/configurators.py +15 -0
- dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
- dstack/_internal/core/backends/digitalocean/backend.py +16 -0
- dstack/_internal/core/backends/digitalocean/compute.py +5 -0
- dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
- dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
- dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
- dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
- dstack/_internal/core/backends/digitalocean_base/compute.py +173 -0
- dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
- dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
- dstack/_internal/core/backends/gcp/compute.py +32 -8
- dstack/_internal/core/backends/hotaisle/api_client.py +25 -33
- dstack/_internal/core/backends/hotaisle/compute.py +1 -6
- dstack/_internal/core/backends/models.py +7 -0
- dstack/_internal/core/backends/nebius/compute.py +0 -7
- dstack/_internal/core/backends/oci/compute.py +4 -5
- dstack/_internal/core/backends/vultr/compute.py +1 -5
- dstack/_internal/core/compatibility/fleets.py +5 -0
- dstack/_internal/core/compatibility/runs.py +10 -1
- dstack/_internal/core/models/backends/base.py +5 -1
- dstack/_internal/core/models/common.py +67 -43
- dstack/_internal/core/models/configurations.py +109 -69
- dstack/_internal/core/models/files.py +1 -1
- dstack/_internal/core/models/fleets.py +115 -25
- dstack/_internal/core/models/instances.py +5 -5
- dstack/_internal/core/models/profiles.py +66 -47
- dstack/_internal/core/models/repos/remote.py +21 -16
- dstack/_internal/core/models/resources.py +69 -65
- dstack/_internal/core/models/runs.py +41 -14
- dstack/_internal/core/services/repos.py +85 -80
- dstack/_internal/server/app.py +5 -0
- dstack/_internal/server/background/tasks/process_fleets.py +117 -13
- dstack/_internal/server/background/tasks/process_instances.py +12 -71
- dstack/_internal/server/background/tasks/process_running_jobs.py +2 -0
- dstack/_internal/server/background/tasks/process_runs.py +2 -0
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +48 -16
- dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
- dstack/_internal/server/models.py +11 -7
- dstack/_internal/server/schemas/gateways.py +10 -9
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/services/backends/handlers.py +2 -0
- dstack/_internal/server/services/docker.py +8 -7
- dstack/_internal/server/services/fleets.py +23 -25
- dstack/_internal/server/services/instances.py +3 -3
- dstack/_internal/server/services/jobs/configurators/base.py +46 -6
- dstack/_internal/server/services/jobs/configurators/dev.py +4 -4
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +3 -5
- dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +4 -6
- dstack/_internal/server/services/jobs/configurators/service.py +0 -3
- dstack/_internal/server/services/jobs/configurators/task.py +0 -3
- dstack/_internal/server/services/projects.py +52 -1
- dstack/_internal/server/services/runs.py +16 -0
- dstack/_internal/server/settings.py +46 -0
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-aec4762350e34d6fbff9.css → main-5e0d56245c4bd241ec27.css} +1 -1
- dstack/_internal/server/statics/{main-d151b300fcac3933213d.js → main-a2a16772fbf11a14d191.js} +1215 -998
- dstack/_internal/server/statics/{main-d151b300fcac3933213d.js.map → main-a2a16772fbf11a14d191.js.map} +1 -1
- dstack/_internal/server/testing/common.py +6 -3
- dstack/_internal/utils/env.py +85 -11
- dstack/_internal/utils/path.py +8 -1
- dstack/_internal/utils/ssh.py +7 -0
- dstack/api/_public/repos.py +41 -6
- dstack/api/_public/runs.py +14 -1
- dstack/version.py +1 -1
- {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/METADATA +2 -2
- {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/RECORD +92 -78
- dstack/_internal/server/statics/static/media/github.1f7102513534c83a9d8d735d2b8c12a2.svg +0 -3
- {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/WHEEL +0 -0
- {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.26.dist-info → dstack-0.19.28.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from dstack._internal.core.backends.base.configurator import (
|
|
5
|
+
BackendRecord,
|
|
6
|
+
Configurator,
|
|
7
|
+
)
|
|
8
|
+
from dstack._internal.core.backends.digitalocean_base.backend import BaseDigitalOceanBackend
|
|
9
|
+
from dstack._internal.core.backends.digitalocean_base.models import (
|
|
10
|
+
AnyBaseDigitalOceanCreds,
|
|
11
|
+
BaseDigitalOceanBackendConfig,
|
|
12
|
+
BaseDigitalOceanBackendConfigWithCreds,
|
|
13
|
+
BaseDigitalOceanConfig,
|
|
14
|
+
BaseDigitalOceanCreds,
|
|
15
|
+
BaseDigitalOceanStoredConfig,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BaseDigitalOceanConfigurator(Configurator):
|
|
20
|
+
def validate_config(
|
|
21
|
+
self, config: BaseDigitalOceanBackendConfigWithCreds, default_creds_enabled: bool
|
|
22
|
+
):
|
|
23
|
+
self._validate_creds(config.creds, config.project_name)
|
|
24
|
+
|
|
25
|
+
def create_backend(
|
|
26
|
+
self, project_name: str, config: BaseDigitalOceanBackendConfigWithCreds
|
|
27
|
+
) -> BackendRecord:
|
|
28
|
+
return BackendRecord(
|
|
29
|
+
config=BaseDigitalOceanStoredConfig(
|
|
30
|
+
**BaseDigitalOceanBackendConfig.__response__.parse_obj(config).dict()
|
|
31
|
+
).json(),
|
|
32
|
+
auth=BaseDigitalOceanCreds.parse_obj(config.creds).json(),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def get_backend_config_with_creds(
|
|
36
|
+
self, record: BackendRecord
|
|
37
|
+
) -> BaseDigitalOceanBackendConfigWithCreds:
|
|
38
|
+
config = self._get_config(record)
|
|
39
|
+
return BaseDigitalOceanBackendConfigWithCreds.__response__.parse_obj(config)
|
|
40
|
+
|
|
41
|
+
def get_backend_config_without_creds(
|
|
42
|
+
self, record: BackendRecord
|
|
43
|
+
) -> BaseDigitalOceanBackendConfig:
|
|
44
|
+
config = self._get_config(record)
|
|
45
|
+
return BaseDigitalOceanBackendConfig.__response__.parse_obj(config)
|
|
46
|
+
|
|
47
|
+
def get_backend(self, record: BackendRecord) -> BaseDigitalOceanBackend:
|
|
48
|
+
raise NotImplementedError("Subclasses must implement get_backend")
|
|
49
|
+
|
|
50
|
+
def _get_config(self, record: BackendRecord) -> BaseDigitalOceanConfig:
|
|
51
|
+
return BaseDigitalOceanConfig.__response__(
|
|
52
|
+
**json.loads(record.config),
|
|
53
|
+
creds=BaseDigitalOceanCreds.parse_raw(record.auth),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def _validate_creds(self, creds: AnyBaseDigitalOceanCreds, project_name: Optional[str] = None):
|
|
57
|
+
pass
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import Annotated, List, Literal, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from dstack._internal.core.models.common import CoreModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseDigitalOceanAPIKeyCreds(CoreModel):
|
|
9
|
+
type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
|
|
10
|
+
api_key: Annotated[str, Field(description="The API key")]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
AnyBaseDigitalOceanCreds = BaseDigitalOceanAPIKeyCreds
|
|
14
|
+
BaseDigitalOceanCreds = AnyBaseDigitalOceanCreds
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseDigitalOceanBackendConfig(CoreModel):
|
|
18
|
+
type: Annotated[
|
|
19
|
+
Literal["amddevcloud", "digitalocean"],
|
|
20
|
+
Field(description="The type of backend"),
|
|
21
|
+
]
|
|
22
|
+
project_name: Annotated[Optional[str], Field(description="The name of the project")] = None
|
|
23
|
+
regions: Annotated[
|
|
24
|
+
Optional[List[str]],
|
|
25
|
+
Field(description="The list of regions. Omit to use all regions"),
|
|
26
|
+
] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BaseDigitalOceanBackendConfigWithCreds(BaseDigitalOceanBackendConfig):
|
|
30
|
+
creds: Annotated[AnyBaseDigitalOceanCreds, Field(description="The credentials")]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
AnyBaseDigitalOceanBackendConfig = Union[
|
|
34
|
+
BaseDigitalOceanBackendConfig, BaseDigitalOceanBackendConfigWithCreds
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BaseDigitalOceanStoredConfig(BaseDigitalOceanBackendConfig):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class BaseDigitalOceanConfig(BaseDigitalOceanStoredConfig):
|
|
43
|
+
creds: AnyBaseDigitalOceanCreds
|
|
@@ -2,6 +2,7 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import threading
|
|
4
4
|
from collections import defaultdict
|
|
5
|
+
from dataclasses import dataclass
|
|
5
6
|
from typing import Callable, Dict, List, Literal, Optional, Tuple
|
|
6
7
|
|
|
7
8
|
import google.api_core.exceptions
|
|
@@ -285,16 +286,18 @@ class GCPCompute(
|
|
|
285
286
|
)
|
|
286
287
|
raise NoCapacityError()
|
|
287
288
|
|
|
289
|
+
image = _get_image(
|
|
290
|
+
instance_type_name=instance_offer.instance.name,
|
|
291
|
+
cuda=len(instance_offer.instance.resources.gpus) > 0,
|
|
292
|
+
)
|
|
293
|
+
|
|
288
294
|
for zone in zones:
|
|
289
295
|
request = compute_v1.InsertInstanceRequest()
|
|
290
296
|
request.zone = zone
|
|
291
297
|
request.project = self.config.project_id
|
|
292
298
|
request.instance_resource = gcp_resources.create_instance_struct(
|
|
293
299
|
disk_size=disk_size,
|
|
294
|
-
image_id=
|
|
295
|
-
instance_type_name=instance_offer.instance.name,
|
|
296
|
-
cuda=len(instance_offer.instance.resources.gpus) > 0,
|
|
297
|
-
),
|
|
300
|
+
image_id=image.id,
|
|
298
301
|
machine_type=instance_offer.instance.name,
|
|
299
302
|
accelerators=gcp_resources.get_accelerators(
|
|
300
303
|
project_id=self.config.project_id,
|
|
@@ -305,6 +308,7 @@ class GCPCompute(
|
|
|
305
308
|
user_data=_get_user_data(
|
|
306
309
|
authorized_keys=authorized_keys,
|
|
307
310
|
instance_type_name=instance_offer.instance.name,
|
|
311
|
+
is_ufw_installed=image.is_ufw_installed,
|
|
308
312
|
),
|
|
309
313
|
authorized_keys=authorized_keys,
|
|
310
314
|
labels=labels,
|
|
@@ -889,24 +893,41 @@ def _get_vpc_subnet(
|
|
|
889
893
|
)
|
|
890
894
|
|
|
891
895
|
|
|
892
|
-
|
|
896
|
+
@dataclass
|
|
897
|
+
class GCPImage:
|
|
898
|
+
id: str
|
|
899
|
+
is_ufw_installed: bool
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
def _get_image(instance_type_name: str, cuda: bool) -> GCPImage:
|
|
893
903
|
if instance_type_name == "a3-megagpu-8g":
|
|
894
904
|
image_name = "dstack-a3mega-5"
|
|
905
|
+
is_ufw_installed = False
|
|
895
906
|
elif instance_type_name in ["a3-edgegpu-8g", "a3-highgpu-8g"]:
|
|
896
|
-
return
|
|
907
|
+
return GCPImage(
|
|
908
|
+
id="projects/cos-cloud/global/images/cos-105-17412-535-78",
|
|
909
|
+
is_ufw_installed=False,
|
|
910
|
+
)
|
|
897
911
|
elif cuda:
|
|
898
912
|
image_name = f"dstack-cuda-{version.base_image}"
|
|
913
|
+
is_ufw_installed = True
|
|
899
914
|
else:
|
|
900
915
|
image_name = f"dstack-{version.base_image}"
|
|
916
|
+
is_ufw_installed = True
|
|
901
917
|
image_name = image_name.replace(".", "-")
|
|
902
|
-
return
|
|
918
|
+
return GCPImage(
|
|
919
|
+
id=f"projects/dstack/global/images/{image_name}",
|
|
920
|
+
is_ufw_installed=is_ufw_installed,
|
|
921
|
+
)
|
|
903
922
|
|
|
904
923
|
|
|
905
924
|
def _get_gateway_image_id() -> str:
|
|
906
925
|
return "projects/ubuntu-os-cloud/global/images/ubuntu-2204-jammy-v20230714"
|
|
907
926
|
|
|
908
927
|
|
|
909
|
-
def _get_user_data(
|
|
928
|
+
def _get_user_data(
|
|
929
|
+
authorized_keys: List[str], instance_type_name: str, is_ufw_installed: bool
|
|
930
|
+
) -> str:
|
|
910
931
|
base_path = None
|
|
911
932
|
bin_path = None
|
|
912
933
|
backend_shim_env = None
|
|
@@ -929,6 +950,9 @@ def _get_user_data(authorized_keys: List[str], instance_type_name: str) -> str:
|
|
|
929
950
|
base_path=base_path,
|
|
930
951
|
bin_path=bin_path,
|
|
931
952
|
backend_shim_env=backend_shim_env,
|
|
953
|
+
# Instance-level firewall is optional on GCP. The main protection comes from GCP firewalls.
|
|
954
|
+
# So only set up instance-level firewall as an additional measure if ufw is available.
|
|
955
|
+
skip_firewall_setup=not is_ufw_installed,
|
|
932
956
|
)
|
|
933
957
|
|
|
934
958
|
|
|
@@ -16,46 +16,38 @@ class HotAisleAPIClient:
|
|
|
16
16
|
self.team_handle = team_handle
|
|
17
17
|
|
|
18
18
|
def validate_api_key(self) -> bool:
|
|
19
|
+
url = f"{API_URL}/user/"
|
|
19
20
|
try:
|
|
20
|
-
self.
|
|
21
|
-
|
|
21
|
+
response = self._make_request("GET", url)
|
|
22
|
+
response.raise_for_status()
|
|
22
23
|
except requests.HTTPError as e:
|
|
23
|
-
if e.response
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
error_message = str(e)
|
|
35
|
-
if "No Hot Aisle teams found" in error_message:
|
|
36
|
-
raise_invalid_credentials_error(
|
|
37
|
-
fields=[["creds", "api_key"]],
|
|
38
|
-
details="Valid API key but no teams found for this user",
|
|
39
|
-
)
|
|
40
|
-
elif "not found" in error_message:
|
|
41
|
-
raise_invalid_credentials_error(
|
|
42
|
-
fields=[["team_handle"]], details=f"Team handle '{self.team_handle}' not found"
|
|
43
|
-
)
|
|
44
|
-
raise e
|
|
45
|
-
|
|
46
|
-
def _validate_user_and_team(self) -> None:
|
|
47
|
-
url = f"{API_URL}/user/"
|
|
48
|
-
response = self._make_request("GET", url)
|
|
49
|
-
response.raise_for_status()
|
|
50
|
-
user_data = response.json()
|
|
24
|
+
if e.response is not None:
|
|
25
|
+
if e.response.status_code == 401:
|
|
26
|
+
raise_invalid_credentials_error(
|
|
27
|
+
fields=[["creds", "api_key"]], details="Invalid API key"
|
|
28
|
+
)
|
|
29
|
+
if e.response.status_code == 403:
|
|
30
|
+
raise_invalid_credentials_error(
|
|
31
|
+
fields=[["creds", "api_key"]],
|
|
32
|
+
details="Authenticated user does not have required permissions",
|
|
33
|
+
)
|
|
34
|
+
raise
|
|
51
35
|
|
|
52
|
-
|
|
36
|
+
user_data = response.json()
|
|
37
|
+
teams = user_data["teams"]
|
|
53
38
|
if not teams:
|
|
54
|
-
|
|
39
|
+
raise_invalid_credentials_error(
|
|
40
|
+
fields=[["creds", "api_key"]],
|
|
41
|
+
details="Valid API key but no teams found for this user",
|
|
42
|
+
)
|
|
55
43
|
|
|
56
44
|
available_teams = [team["handle"] for team in teams]
|
|
57
45
|
if self.team_handle not in available_teams:
|
|
58
|
-
|
|
46
|
+
raise_invalid_credentials_error(
|
|
47
|
+
fields=[["team_handle"]],
|
|
48
|
+
details=f"Team handle '{self.team_handle}' not found",
|
|
49
|
+
)
|
|
50
|
+
return True
|
|
59
51
|
|
|
60
52
|
def upload_ssh_key(self, public_key: str) -> bool:
|
|
61
53
|
url = f"{API_URL}/user/ssh_keys/"
|
|
@@ -28,8 +28,6 @@ from dstack._internal.utils.logging import get_logger
|
|
|
28
28
|
|
|
29
29
|
logger = get_logger(__name__)
|
|
30
30
|
|
|
31
|
-
MAX_INSTANCE_NAME_LEN = 60
|
|
32
|
-
|
|
33
31
|
|
|
34
32
|
INSTANCE_TYPE_SPECS = {
|
|
35
33
|
"1x MI300X 8x Xeon Platinum 8462Y+": {
|
|
@@ -130,9 +128,7 @@ class HotAisleCompute(
|
|
|
130
128
|
ssh_port=22,
|
|
131
129
|
dockerized=True,
|
|
132
130
|
ssh_proxy=None,
|
|
133
|
-
backend_data=HotAisleInstanceBackendData(
|
|
134
|
-
ip_address=vm_data["ip_address"], vm_id=vm_data["name"]
|
|
135
|
-
).json(),
|
|
131
|
+
backend_data=HotAisleInstanceBackendData(ip_address=vm_data["ip_address"]).json(),
|
|
136
132
|
)
|
|
137
133
|
|
|
138
134
|
def update_provisioning_data(
|
|
@@ -217,7 +213,6 @@ def _run_ssh_command(hostname: str, ssh_private_key: str, command: str):
|
|
|
217
213
|
|
|
218
214
|
class HotAisleInstanceBackendData(CoreModel):
|
|
219
215
|
ip_address: str
|
|
220
|
-
vm_id: Optional[str] = None
|
|
221
216
|
|
|
222
217
|
@classmethod
|
|
223
218
|
def load(cls, raw: Optional[str]) -> "HotAisleInstanceBackendData":
|
|
@@ -20,6 +20,10 @@ from dstack._internal.core.backends.datacrunch.models import (
|
|
|
20
20
|
DataCrunchBackendConfig,
|
|
21
21
|
DataCrunchBackendConfigWithCreds,
|
|
22
22
|
)
|
|
23
|
+
from dstack._internal.core.backends.digitalocean_base.models import (
|
|
24
|
+
BaseDigitalOceanBackendConfig,
|
|
25
|
+
BaseDigitalOceanBackendConfigWithCreds,
|
|
26
|
+
)
|
|
23
27
|
from dstack._internal.core.backends.dstack.models import (
|
|
24
28
|
DstackBackendConfig,
|
|
25
29
|
DstackBaseBackendConfig,
|
|
@@ -77,6 +81,7 @@ AnyBackendConfigWithoutCreds = Union[
|
|
|
77
81
|
CloudRiftBackendConfig,
|
|
78
82
|
CudoBackendConfig,
|
|
79
83
|
DataCrunchBackendConfig,
|
|
84
|
+
BaseDigitalOceanBackendConfig,
|
|
80
85
|
GCPBackendConfig,
|
|
81
86
|
HotAisleBackendConfig,
|
|
82
87
|
KubernetesBackendConfig,
|
|
@@ -100,6 +105,7 @@ AnyBackendConfigWithCreds = Union[
|
|
|
100
105
|
CloudRiftBackendConfigWithCreds,
|
|
101
106
|
CudoBackendConfigWithCreds,
|
|
102
107
|
DataCrunchBackendConfigWithCreds,
|
|
108
|
+
BaseDigitalOceanBackendConfigWithCreds,
|
|
103
109
|
GCPBackendConfigWithCreds,
|
|
104
110
|
HotAisleBackendConfigWithCreds,
|
|
105
111
|
KubernetesBackendConfigWithCreds,
|
|
@@ -122,6 +128,7 @@ AnyBackendFileConfigWithCreds = Union[
|
|
|
122
128
|
CloudRiftBackendConfigWithCreds,
|
|
123
129
|
CudoBackendConfigWithCreds,
|
|
124
130
|
DataCrunchBackendConfigWithCreds,
|
|
131
|
+
BaseDigitalOceanBackendConfigWithCreds,
|
|
125
132
|
GCPBackendFileConfigWithCreds,
|
|
126
133
|
HotAisleBackendFileConfigWithCreds,
|
|
127
134
|
KubernetesBackendFileConfigWithCreds,
|
|
@@ -59,13 +59,6 @@ DOCKER_DAEMON_CONFIG = {
|
|
|
59
59
|
"exec-opts": ["native.cgroupdriver=cgroupfs"],
|
|
60
60
|
}
|
|
61
61
|
SETUP_COMMANDS = [
|
|
62
|
-
"ufw allow ssh",
|
|
63
|
-
"ufw allow from 10.0.0.0/8",
|
|
64
|
-
"ufw allow from 172.16.0.0/12",
|
|
65
|
-
"ufw allow from 192.168.0.0/16",
|
|
66
|
-
"ufw default deny incoming",
|
|
67
|
-
"ufw default allow outgoing",
|
|
68
|
-
"ufw enable",
|
|
69
62
|
'sed -i "s/.*AllowTcpForwarding.*/AllowTcpForwarding yes/g" /etc/ssh/sshd_config',
|
|
70
63
|
"service ssh restart",
|
|
71
64
|
f"echo {shlex.quote(json.dumps(DOCKER_DAEMON_CONFIG))} > /etc/docker/daemon.json",
|
|
@@ -135,11 +135,10 @@ class OCICompute(
|
|
|
135
135
|
security_group.id, region.virtual_network_client
|
|
136
136
|
)
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
cloud_init_user_data = get_user_data(instance_config.get_public_keys(), setup_commands)
|
|
138
|
+
cloud_init_user_data = get_user_data(
|
|
139
|
+
authorized_keys=instance_config.get_public_keys(),
|
|
140
|
+
firewall_allow_from_subnets=[resources.VCN_CIDR],
|
|
141
|
+
)
|
|
143
142
|
|
|
144
143
|
display_name = generate_unique_instance_name(instance_config)
|
|
145
144
|
try:
|
|
@@ -75,17 +75,13 @@ class VultrCompute(
|
|
|
75
75
|
subnet = vpc["v4_subnet"]
|
|
76
76
|
subnet_mask = vpc["v4_subnet_mask"]
|
|
77
77
|
|
|
78
|
-
setup_commands = [
|
|
79
|
-
f"sudo ufw allow from {subnet}/{subnet_mask}",
|
|
80
|
-
"sudo ufw reload",
|
|
81
|
-
]
|
|
82
78
|
instance_id = self.api_client.launch_instance(
|
|
83
79
|
region=instance_offer.region,
|
|
84
80
|
label=instance_name,
|
|
85
81
|
plan=instance_offer.instance.name,
|
|
86
82
|
user_data=get_user_data(
|
|
87
83
|
authorized_keys=instance_config.get_public_keys(),
|
|
88
|
-
|
|
84
|
+
firewall_allow_from_subnets=[f"{subnet}/{subnet_mask}"],
|
|
89
85
|
),
|
|
90
86
|
vpc_id=vpc["id"],
|
|
91
87
|
)
|
|
@@ -59,6 +59,11 @@ def get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[IncludeExcludeDic
|
|
|
59
59
|
profile_excludes.add("stop_criteria")
|
|
60
60
|
if profile.schedule is None:
|
|
61
61
|
profile_excludes.add("schedule")
|
|
62
|
+
if (
|
|
63
|
+
fleet_spec.configuration.nodes
|
|
64
|
+
and fleet_spec.configuration.nodes.min == fleet_spec.configuration.nodes.target
|
|
65
|
+
):
|
|
66
|
+
configuration_excludes["nodes"] = {"target"}
|
|
62
67
|
if configuration_excludes:
|
|
63
68
|
spec_excludes["configuration"] = configuration_excludes
|
|
64
69
|
if profile_excludes:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
3
|
from dstack._internal.core.models.common import IncludeExcludeDictType, IncludeExcludeSetType
|
|
4
|
-
from dstack._internal.core.models.configurations import ServiceConfiguration
|
|
4
|
+
from dstack._internal.core.models.configurations import LEGACY_REPO_DIR, ServiceConfiguration
|
|
5
5
|
from dstack._internal.core.models.runs import ApplyRunPlanInput, JobSpec, JobSubmission, RunSpec
|
|
6
6
|
from dstack._internal.server.schemas.runs import GetRunPlanRequest, ListRunsRequest
|
|
7
7
|
|
|
@@ -31,6 +31,8 @@ def get_apply_plan_excludes(plan: ApplyRunPlanInput) -> Optional[IncludeExcludeD
|
|
|
31
31
|
current_resource_excludes["status_message"] = True
|
|
32
32
|
if current_resource.deployment_num == 0:
|
|
33
33
|
current_resource_excludes["deployment_num"] = True
|
|
34
|
+
if current_resource.fleet is None:
|
|
35
|
+
current_resource_excludes["fleet"] = True
|
|
34
36
|
apply_plan_excludes["current_resource"] = current_resource_excludes
|
|
35
37
|
current_resource_excludes["run_spec"] = get_run_spec_excludes(current_resource.run_spec)
|
|
36
38
|
job_submissions_excludes: IncludeExcludeDictType = {}
|
|
@@ -102,6 +104,11 @@ def get_run_spec_excludes(run_spec: RunSpec) -> IncludeExcludeDictType:
|
|
|
102
104
|
configuration = run_spec.configuration
|
|
103
105
|
profile = run_spec.profile
|
|
104
106
|
|
|
107
|
+
if run_spec.repo_dir in [None, LEGACY_REPO_DIR]:
|
|
108
|
+
spec_excludes["repo_dir"] = True
|
|
109
|
+
elif run_spec.repo_dir == "." and configuration.working_dir in [None, LEGACY_REPO_DIR, "."]:
|
|
110
|
+
spec_excludes["repo_dir"] = True
|
|
111
|
+
|
|
105
112
|
if configuration.fleets is None:
|
|
106
113
|
configuration_excludes["fleets"] = True
|
|
107
114
|
if profile is not None and profile.fleets is None:
|
|
@@ -163,6 +170,8 @@ def get_job_spec_excludes(job_specs: list[JobSpec]) -> IncludeExcludeDictType:
|
|
|
163
170
|
spec_excludes["service_port"] = True
|
|
164
171
|
if all(not s.probes for s in job_specs):
|
|
165
172
|
spec_excludes["probes"] = True
|
|
173
|
+
if all(s.repo_dir in [None, LEGACY_REPO_DIR] for s in job_specs):
|
|
174
|
+
spec_excludes["repo_dir"] = True
|
|
166
175
|
|
|
167
176
|
return spec_excludes
|
|
168
177
|
|
|
@@ -4,13 +4,15 @@ import enum
|
|
|
4
4
|
class BackendType(str, enum.Enum):
|
|
5
5
|
"""
|
|
6
6
|
Attributes:
|
|
7
|
+
AMDDEVCLOUD (BackendType): AMD Developer Cloud
|
|
7
8
|
AWS (BackendType): Amazon Web Services
|
|
8
9
|
AZURE (BackendType): Microsoft Azure
|
|
9
10
|
CLOUDRIFT (BackendType): CloudRift
|
|
10
11
|
CUDO (BackendType): Cudo
|
|
12
|
+
DATACRUNCH (BackendType): DataCrunch
|
|
13
|
+
DIGITALOCEAN (BackendType): DigitalOcean
|
|
11
14
|
DSTACK (BackendType): dstack Sky
|
|
12
15
|
GCP (BackendType): Google Cloud Platform
|
|
13
|
-
DATACRUNCH (BackendType): DataCrunch
|
|
14
16
|
HOTAISLE (BackendType): Hot Aisle
|
|
15
17
|
KUBERNETES (BackendType): Kubernetes
|
|
16
18
|
LAMBDA (BackendType): Lambda Cloud
|
|
@@ -22,11 +24,13 @@ class BackendType(str, enum.Enum):
|
|
|
22
24
|
VULTR (BackendType): Vultr
|
|
23
25
|
"""
|
|
24
26
|
|
|
27
|
+
AMDDEVCLOUD = "amddevcloud"
|
|
25
28
|
AWS = "aws"
|
|
26
29
|
AZURE = "azure"
|
|
27
30
|
CLOUDRIFT = "cloudrift"
|
|
28
31
|
CUDO = "cudo"
|
|
29
32
|
DATACRUNCH = "datacrunch"
|
|
33
|
+
DIGITALOCEAN = "digitalocean"
|
|
30
34
|
DSTACK = "dstack"
|
|
31
35
|
GCP = "gcp"
|
|
32
36
|
HOTAISLE = "hotaisle"
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import Any, Callable, Optional, Union
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Callable, Mapping, Optional, Union
|
|
4
4
|
|
|
5
5
|
import orjson
|
|
6
6
|
from pydantic import Field
|
|
7
|
-
from pydantic_duality import
|
|
7
|
+
from pydantic_duality import generate_dual_base_model
|
|
8
8
|
from typing_extensions import Annotated
|
|
9
9
|
|
|
10
10
|
from dstack._internal.utils.json_utils import pydantic_orjson_dumps
|
|
@@ -17,46 +17,73 @@ IncludeExcludeDictType = dict[
|
|
|
17
17
|
IncludeExcludeType = Union[IncludeExcludeSetType, IncludeExcludeDictType]
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
class CoreConfig:
|
|
21
|
+
json_loads = orjson.loads
|
|
22
|
+
json_dumps = pydantic_orjson_dumps
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# All dstack models inherit from pydantic-duality's DualBaseModel.
|
|
20
26
|
# DualBaseModel creates two classes for the model:
|
|
21
27
|
# one with extra = "forbid" (CoreModel/CoreModel.__request__),
|
|
22
28
|
# and another with extra = "ignore" (CoreModel.__response__).
|
|
23
|
-
# This allows to use the same model both for
|
|
24
|
-
# for
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
29
|
+
# This allows to use the same model both for strict parsing of the user input and
|
|
30
|
+
# for permissive parsing of the server responses.
|
|
31
|
+
#
|
|
32
|
+
# We define a func to generate CoreModel dynamically that can be used
|
|
33
|
+
# to define custom Config for both __request__ and __response__ models.
|
|
34
|
+
# Note: Defining config in the model class directly overrides
|
|
35
|
+
# pydantic-duality's base config, breaking __response__.
|
|
36
|
+
def generate_dual_core_model(
|
|
37
|
+
custom_config: Union[type, Mapping],
|
|
38
|
+
) -> "type[CoreModel]":
|
|
39
|
+
class CoreModel(generate_dual_base_model(custom_config)):
|
|
40
|
+
def json(
|
|
41
|
+
self,
|
|
42
|
+
*,
|
|
43
|
+
include: Optional[IncludeExcludeType] = None,
|
|
44
|
+
exclude: Optional[IncludeExcludeType] = None,
|
|
45
|
+
by_alias: bool = False,
|
|
46
|
+
skip_defaults: Optional[bool] = None, # ignore as it's deprecated
|
|
47
|
+
exclude_unset: bool = False,
|
|
48
|
+
exclude_defaults: bool = False,
|
|
49
|
+
exclude_none: bool = False,
|
|
50
|
+
encoder: Optional[Callable[[Any], Any]] = None,
|
|
51
|
+
models_as_dict: bool = True, # does not seems to be needed by dstack or dependencies
|
|
52
|
+
**dumps_kwargs: Any,
|
|
53
|
+
) -> str:
|
|
54
|
+
"""
|
|
55
|
+
Override `json()` method so that it calls `dict()`.
|
|
56
|
+
Allows changing how models are serialized by overriding `dict()` only.
|
|
57
|
+
By default, `json()` won't call `dict()`, so changes applied in `dict()` won't take place.
|
|
58
|
+
"""
|
|
59
|
+
data = self.dict(
|
|
60
|
+
by_alias=by_alias,
|
|
61
|
+
include=include,
|
|
62
|
+
exclude=exclude,
|
|
63
|
+
exclude_unset=exclude_unset,
|
|
64
|
+
exclude_defaults=exclude_defaults,
|
|
65
|
+
exclude_none=exclude_none,
|
|
66
|
+
)
|
|
67
|
+
if self.__custom_root_type__:
|
|
68
|
+
data = data["__root__"]
|
|
69
|
+
return self.__config__.json_dumps(data, default=encoder, **dumps_kwargs)
|
|
70
|
+
|
|
71
|
+
return CoreModel
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
if TYPE_CHECKING:
|
|
75
|
+
|
|
76
|
+
class CoreModel(generate_dual_base_model(CoreConfig)):
|
|
77
|
+
pass
|
|
78
|
+
else:
|
|
79
|
+
CoreModel = generate_dual_core_model(CoreConfig)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class FrozenConfig(CoreConfig):
|
|
83
|
+
frozen = True
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
FrozenCoreModel = generate_dual_core_model(FrozenConfig)
|
|
60
87
|
|
|
61
88
|
|
|
62
89
|
class Duration(int):
|
|
@@ -93,7 +120,7 @@ class Duration(int):
|
|
|
93
120
|
raise ValueError(f"Cannot parse the duration {v}")
|
|
94
121
|
|
|
95
122
|
|
|
96
|
-
class RegistryAuth(
|
|
123
|
+
class RegistryAuth(FrozenCoreModel):
|
|
97
124
|
"""
|
|
98
125
|
Credentials for pulling a private Docker image.
|
|
99
126
|
|
|
@@ -105,9 +132,6 @@ class RegistryAuth(CoreModel):
|
|
|
105
132
|
username: Annotated[str, Field(description="The username")]
|
|
106
133
|
password: Annotated[str, Field(description="The password or access token")]
|
|
107
134
|
|
|
108
|
-
class Config(CoreModel.Config):
|
|
109
|
-
frozen = True
|
|
110
|
-
|
|
111
135
|
|
|
112
136
|
class ApplyAction(str, Enum):
|
|
113
137
|
CREATE = "create" # resource is to be created or overridden
|