dstack 0.19.4rc3__py3-none-any.whl → 0.19.6rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/attach.py +22 -20
- dstack/_internal/cli/commands/offer.py +116 -0
- dstack/_internal/cli/main.py +2 -0
- dstack/_internal/cli/services/configurators/base.py +1 -2
- dstack/_internal/cli/services/configurators/fleet.py +43 -20
- dstack/_internal/cli/services/configurators/run.py +3 -3
- dstack/_internal/cli/utils/run.py +43 -38
- dstack/_internal/core/backends/aws/auth.py +1 -2
- dstack/_internal/core/backends/aws/compute.py +24 -9
- dstack/_internal/core/backends/aws/configurator.py +2 -3
- dstack/_internal/core/backends/aws/resources.py +10 -0
- dstack/_internal/core/backends/azure/auth.py +1 -2
- dstack/_internal/core/backends/azure/compute.py +15 -5
- dstack/_internal/core/backends/azure/configurator.py +4 -5
- dstack/_internal/core/backends/azure/resources.py +14 -0
- dstack/_internal/core/backends/base/compute.py +99 -31
- dstack/_internal/core/backends/gcp/auth.py +1 -2
- dstack/_internal/core/backends/gcp/compute.py +58 -14
- dstack/_internal/core/backends/gcp/configurator.py +2 -3
- dstack/_internal/core/backends/gcp/features/tcpx.py +31 -0
- dstack/_internal/core/backends/gcp/resources.py +10 -0
- dstack/_internal/core/backends/nebius/compute.py +6 -2
- dstack/_internal/core/backends/nebius/configurator.py +4 -10
- dstack/_internal/core/backends/nebius/models.py +14 -1
- dstack/_internal/core/backends/nebius/resources.py +91 -10
- dstack/_internal/core/backends/oci/auth.py +1 -2
- dstack/_internal/core/backends/oci/configurator.py +1 -2
- dstack/_internal/core/backends/runpod/compute.py +1 -1
- dstack/_internal/core/errors.py +4 -0
- dstack/_internal/core/models/common.py +2 -14
- dstack/_internal/core/models/configurations.py +24 -2
- dstack/_internal/core/models/envs.py +2 -2
- dstack/_internal/core/models/fleets.py +34 -3
- dstack/_internal/core/models/gateways.py +18 -4
- dstack/_internal/core/models/instances.py +2 -1
- dstack/_internal/core/models/profiles.py +12 -0
- dstack/_internal/core/models/runs.py +6 -0
- dstack/_internal/core/models/secrets.py +1 -1
- dstack/_internal/core/models/volumes.py +17 -1
- dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +3 -3
- dstack/_internal/proxy/gateway/services/nginx.py +0 -1
- dstack/_internal/proxy/gateway/services/registry.py +0 -1
- dstack/_internal/server/background/tasks/process_instances.py +12 -9
- dstack/_internal/server/background/tasks/process_running_jobs.py +66 -15
- dstack/_internal/server/routers/fleets.py +22 -0
- dstack/_internal/server/routers/runs.py +1 -0
- dstack/_internal/server/schemas/fleets.py +12 -2
- dstack/_internal/server/schemas/runner.py +6 -0
- dstack/_internal/server/schemas/runs.py +3 -0
- dstack/_internal/server/services/docker.py +1 -2
- dstack/_internal/server/services/fleets.py +30 -12
- dstack/_internal/server/services/gateways/__init__.py +1 -0
- dstack/_internal/server/services/instances.py +3 -1
- dstack/_internal/server/services/jobs/__init__.py +1 -2
- dstack/_internal/server/services/jobs/configurators/base.py +17 -8
- dstack/_internal/server/services/locking.py +16 -1
- dstack/_internal/server/services/projects.py +1 -2
- dstack/_internal/server/services/proxy/repo.py +1 -2
- dstack/_internal/server/services/runner/client.py +3 -0
- dstack/_internal/server/services/runs.py +19 -16
- dstack/_internal/server/services/services/__init__.py +1 -2
- dstack/_internal/server/services/volumes.py +29 -2
- dstack/_internal/server/statics/00a6e1fb461ed2929fb9.png +0 -0
- dstack/_internal/server/statics/0cae4d9f0a36034984a7.png +0 -0
- dstack/_internal/server/statics/391de232cc0e30cae513.png +0 -0
- dstack/_internal/server/statics/4e0eead8c1a73689ef9d.svg +1 -0
- dstack/_internal/server/statics/544afa2f63428c2235b0.png +0 -0
- dstack/_internal/server/statics/54a4f50f74c6b9381530.svg +7 -0
- dstack/_internal/server/statics/68dd1360a7d2611e0132.svg +4 -0
- dstack/_internal/server/statics/69544b4c81973b54a66f.png +0 -0
- dstack/_internal/server/statics/77a8b02b17af19e39266.png +0 -0
- dstack/_internal/server/statics/83a93a8871c219104367.svg +9 -0
- dstack/_internal/server/statics/8f28bb8e9999e5e6a48b.svg +4 -0
- dstack/_internal/server/statics/9124086961ab8c366bc4.svg +9 -0
- dstack/_internal/server/statics/9a9ebaeb54b025dbac0a.svg +5 -0
- dstack/_internal/server/statics/a3428392dc534f3b15c4.svg +7 -0
- dstack/_internal/server/statics/ae22625574d69361f72c.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-144x144.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-192x192.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-256x256.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-36x36.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-384x384.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-48x48.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-512x512.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-72x72.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-96x96.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-1024x1024.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-114x114.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-120x120.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-144x144.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-152x152.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-167x167.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-180x180.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-57x57.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-60x60.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-72x72.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-76x76.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-precomposed.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1170x2532.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1179x2556.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1284x2778.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1290x2796.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1488x2266.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1640x2160.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1640.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2266x1488.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2532x1170.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2556x1179.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2778x1284.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2796x1290.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
- dstack/_internal/server/statics/assets/browserconfig.xml +12 -0
- dstack/_internal/server/statics/assets/favicon-16x16.png +0 -0
- dstack/_internal/server/statics/assets/favicon-32x32.png +0 -0
- dstack/_internal/server/statics/assets/favicon-48x48.png +0 -0
- dstack/_internal/server/statics/assets/favicon.ico +0 -0
- dstack/_internal/server/statics/assets/manifest.webmanifest +67 -0
- dstack/_internal/server/statics/assets/mstile-144x144.png +0 -0
- dstack/_internal/server/statics/assets/mstile-150x150.png +0 -0
- dstack/_internal/server/statics/assets/mstile-310x150.png +0 -0
- dstack/_internal/server/statics/assets/mstile-310x310.png +0 -0
- dstack/_internal/server/statics/assets/mstile-70x70.png +0 -0
- dstack/_internal/server/statics/assets/yandex-browser-50x50.png +0 -0
- dstack/_internal/server/statics/assets/yandex-browser-manifest.json +9 -0
- dstack/_internal/server/statics/b7ae68f44193474fc578.png +0 -0
- dstack/_internal/server/statics/d2f008c75b2b5b191f3f.png +0 -0
- dstack/_internal/server/statics/d44c33e1b92e05c379fd.png +0 -0
- dstack/_internal/server/statics/dd43ff0552815179d7ab.png +0 -0
- dstack/_internal/server/statics/dd4e7166c0b9aac197d7.png +0 -0
- dstack/_internal/server/statics/e30b27916930d43d2271.png +0 -0
- dstack/_internal/server/statics/e467d7d60aae81ab198b.svg +6 -0
- dstack/_internal/server/statics/eb9b344b73818fe2b71a.png +0 -0
- dstack/_internal/server/statics/f517dd626eb964120de0.png +0 -0
- dstack/_internal/server/statics/f958aecddee5d8e3222c.png +0 -0
- dstack/_internal/server/statics/index.html +3 -0
- dstack/_internal/server/statics/main-8f9c66f404e9c7e7e020.css +3 -0
- dstack/_internal/server/statics/main-b4f65323f5df007e1664.js +136480 -0
- dstack/_internal/server/statics/main-b4f65323f5df007e1664.js.map +1 -0
- dstack/_internal/server/statics/manifest.json +16 -0
- dstack/_internal/server/statics/robots.txt +3 -0
- dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
- dstack/_internal/server/statics/static/media/github.1f7102513534c83a9d8d735d2b8c12a2.svg +3 -0
- dstack/_internal/server/statics/static/media/logo.f602feeb138844eda97c8cb641461448.svg +124 -0
- dstack/_internal/server/statics/static/media/okta.12f178e6873a1100965f2a4dbd18fcec.svg +2 -0
- dstack/_internal/server/statics/static/media/theme.3994c817bb7dda191c1c9640dee0bf42.svg +3 -0
- dstack/_internal/server/testing/common.py +10 -0
- dstack/_internal/utils/tags.py +42 -0
- dstack/api/server/__init__.py +3 -1
- dstack/api/server/_fleets.py +52 -9
- dstack/api/server/_gateways.py +17 -2
- dstack/api/server/_runs.py +34 -11
- dstack/api/server/_volumes.py +2 -3
- dstack/version.py +1 -1
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/METADATA +2 -2
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/RECORD +180 -76
- dstack-0.19.4rc3.data/data/dstack/_internal/proxy/gateway/resources/nginx/00-log-format.conf +0 -1
- dstack-0.19.4rc3.data/data/dstack/_internal/proxy/gateway/resources/nginx/entrypoint.jinja2 +0 -27
- dstack-0.19.4rc3.data/data/dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +0 -88
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/WHEEL +0 -0
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.4rc3.dist-info → dstack-0.19.6rc1.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -136,13 +136,18 @@ class AzureCompute(
|
|
|
136
136
|
location=location,
|
|
137
137
|
)
|
|
138
138
|
|
|
139
|
-
|
|
139
|
+
base_tags = {
|
|
140
140
|
"owner": "dstack",
|
|
141
141
|
"dstack_project": instance_config.project_name,
|
|
142
142
|
"dstack_name": instance_config.instance_name,
|
|
143
143
|
"dstack_user": instance_config.user,
|
|
144
144
|
}
|
|
145
|
-
tags = merge_tags(
|
|
145
|
+
tags = merge_tags(
|
|
146
|
+
base_tags=base_tags,
|
|
147
|
+
backend_tags=self.config.tags,
|
|
148
|
+
resource_tags=instance_config.tags,
|
|
149
|
+
)
|
|
150
|
+
tags = azure_resources.filter_invalid_tags(tags)
|
|
146
151
|
|
|
147
152
|
# TODO: Support custom availability_zones.
|
|
148
153
|
# Currently, VMs are regional, which means they don't have zone info.
|
|
@@ -228,14 +233,19 @@ class AzureCompute(
|
|
|
228
233
|
location=configuration.region,
|
|
229
234
|
)
|
|
230
235
|
|
|
231
|
-
|
|
236
|
+
base_tags = {
|
|
232
237
|
"owner": "dstack",
|
|
233
238
|
"dstack_project": configuration.project_name,
|
|
234
239
|
"dstack_name": configuration.instance_name,
|
|
235
240
|
}
|
|
236
241
|
if settings.DSTACK_VERSION is not None:
|
|
237
|
-
|
|
238
|
-
tags = merge_tags(
|
|
242
|
+
base_tags["dstack_version"] = settings.DSTACK_VERSION
|
|
243
|
+
tags = merge_tags(
|
|
244
|
+
base_tags=base_tags,
|
|
245
|
+
backend_tags=self.config.tags,
|
|
246
|
+
resource_tags=configuration.tags,
|
|
247
|
+
)
|
|
248
|
+
tags = azure_resources.filter_invalid_tags(tags)
|
|
239
249
|
|
|
240
250
|
vm = _launch_instance(
|
|
241
251
|
compute_client=self._compute_client,
|
|
@@ -46,7 +46,6 @@ from dstack._internal.core.errors import (
|
|
|
46
46
|
from dstack._internal.core.models.backends.base import (
|
|
47
47
|
BackendType,
|
|
48
48
|
)
|
|
49
|
-
from dstack._internal.core.models.common import is_core_model_instance
|
|
50
49
|
|
|
51
50
|
LOCATIONS = [
|
|
52
51
|
("(US) Central US", "centralus"),
|
|
@@ -76,14 +75,14 @@ class AzureConfigurator(Configurator):
|
|
|
76
75
|
BACKEND_CLASS = AzureBackend
|
|
77
76
|
|
|
78
77
|
def validate_config(self, config: AzureBackendConfigWithCreds, default_creds_enabled: bool):
|
|
79
|
-
if
|
|
78
|
+
if isinstance(config.creds, AzureDefaultCreds) and not default_creds_enabled:
|
|
80
79
|
raise_invalid_credentials_error(fields=[["creds"]])
|
|
81
|
-
if
|
|
80
|
+
if isinstance(config.creds, AzureClientCreds):
|
|
82
81
|
self._set_client_creds_tenant_id(config.creds, config.tenant_id)
|
|
83
82
|
try:
|
|
84
83
|
credential, _ = auth.authenticate(config.creds)
|
|
85
84
|
except BackendAuthError:
|
|
86
|
-
if
|
|
85
|
+
if isinstance(config.creds, AzureClientCreds):
|
|
87
86
|
raise_invalid_credentials_error(
|
|
88
87
|
fields=[
|
|
89
88
|
["creds", "tenant_id"],
|
|
@@ -105,7 +104,7 @@ class AzureConfigurator(Configurator):
|
|
|
105
104
|
) -> BackendRecord:
|
|
106
105
|
if config.regions is None:
|
|
107
106
|
config.regions = DEFAULT_LOCATIONS
|
|
108
|
-
if
|
|
107
|
+
if isinstance(config.creds, AzureClientCreds):
|
|
109
108
|
self._set_client_creds_tenant_id(config.creds, config.tenant_id)
|
|
110
109
|
credential, _ = auth.authenticate(config.creds)
|
|
111
110
|
if config.resource_group is None:
|
|
@@ -5,6 +5,10 @@ from azure.mgmt import network as network_mgmt
|
|
|
5
5
|
from azure.mgmt.network.models import Subnet
|
|
6
6
|
|
|
7
7
|
from dstack._internal.core.errors import BackendError
|
|
8
|
+
from dstack._internal.utils.logging import get_logger
|
|
9
|
+
|
|
10
|
+
logger = get_logger(__name__)
|
|
11
|
+
|
|
8
12
|
|
|
9
13
|
MAX_RESOURCE_NAME_LEN = 64
|
|
10
14
|
|
|
@@ -77,6 +81,16 @@ def _is_eligible_private_subnet(
|
|
|
77
81
|
return False
|
|
78
82
|
|
|
79
83
|
|
|
84
|
+
def filter_invalid_tags(tags: Dict[str, str]) -> Dict[str, str]:
|
|
85
|
+
filtered_tags = {}
|
|
86
|
+
for k, v in tags.items():
|
|
87
|
+
if not _is_valid_tag(k, v):
|
|
88
|
+
logger.warning("Skipping invalid tag '%s: %s'", k, v)
|
|
89
|
+
continue
|
|
90
|
+
filtered_tags[k] = v
|
|
91
|
+
return filtered_tags
|
|
92
|
+
|
|
93
|
+
|
|
80
94
|
def validate_tags(tags: Dict[str, str]):
|
|
81
95
|
for k, v in tags.items():
|
|
82
96
|
if not _is_valid_tag(k, v):
|
|
@@ -5,6 +5,7 @@ import string
|
|
|
5
5
|
import threading
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
7
|
from functools import lru_cache
|
|
8
|
+
from pathlib import Path
|
|
8
9
|
from typing import Dict, List, Optional
|
|
9
10
|
|
|
10
11
|
import git
|
|
@@ -36,14 +37,12 @@ from dstack._internal.core.models.volumes import (
|
|
|
36
37
|
)
|
|
37
38
|
from dstack._internal.core.services import is_valid_dstack_resource_name
|
|
38
39
|
from dstack._internal.utils.logging import get_logger
|
|
40
|
+
from dstack._internal.utils.path import PathLike
|
|
39
41
|
|
|
40
42
|
logger = get_logger(__name__)
|
|
41
43
|
|
|
42
|
-
DSTACK_WORKING_DIR = "/root/.dstack"
|
|
43
44
|
DSTACK_SHIM_BINARY_NAME = "dstack-shim"
|
|
44
|
-
DSTACK_SHIM_BINARY_PATH = f"/usr/local/bin/{DSTACK_SHIM_BINARY_NAME}"
|
|
45
45
|
DSTACK_RUNNER_BINARY_NAME = "dstack-runner"
|
|
46
|
-
DSTACK_RUNNER_BINARY_PATH = f"/usr/local/bin/{DSTACK_RUNNER_BINARY_NAME}"
|
|
47
46
|
|
|
48
47
|
|
|
49
48
|
class Compute(ABC):
|
|
@@ -173,6 +172,7 @@ class ComputeWithCreateInstanceSupport(ABC):
|
|
|
173
172
|
ssh_keys=[SSHKey(public=project_ssh_public_key.strip())],
|
|
174
173
|
volumes=volumes,
|
|
175
174
|
reservation=run.run_spec.configuration.reservation,
|
|
175
|
+
tags=run.run_spec.merged_profile.tags,
|
|
176
176
|
)
|
|
177
177
|
instance_offer = instance_offer.copy()
|
|
178
178
|
self._restrict_instance_offer_az_to_volumes_az(instance_offer, volumes)
|
|
@@ -335,6 +335,24 @@ class ComputeWithVolumeSupport(ABC):
|
|
|
335
335
|
return True
|
|
336
336
|
|
|
337
337
|
|
|
338
|
+
def get_dstack_working_dir(base_path: Optional[PathLike] = None) -> str:
|
|
339
|
+
if base_path is None:
|
|
340
|
+
base_path = "/root"
|
|
341
|
+
return str(Path(base_path, ".dstack"))
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def get_dstack_shim_binary_path(bin_path: Optional[PathLike] = None) -> str:
|
|
345
|
+
if bin_path is None:
|
|
346
|
+
bin_path = "/usr/local/bin"
|
|
347
|
+
return str(Path(bin_path, DSTACK_SHIM_BINARY_NAME))
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def get_dstack_runner_binary_path(bin_path: Optional[PathLike] = None) -> str:
|
|
351
|
+
if bin_path is None:
|
|
352
|
+
bin_path = "/usr/local/bin"
|
|
353
|
+
return str(Path(bin_path, DSTACK_RUNNER_BINARY_NAME))
|
|
354
|
+
|
|
355
|
+
|
|
338
356
|
def get_job_instance_name(run: Run, job: Job) -> str:
|
|
339
357
|
return job.job_spec.job_name
|
|
340
358
|
|
|
@@ -441,9 +459,18 @@ def get_cloud_config(**config) -> str:
|
|
|
441
459
|
|
|
442
460
|
|
|
443
461
|
def get_user_data(
|
|
444
|
-
authorized_keys: List[str],
|
|
462
|
+
authorized_keys: List[str],
|
|
463
|
+
backend_specific_commands: Optional[List[str]] = None,
|
|
464
|
+
base_path: Optional[PathLike] = None,
|
|
465
|
+
bin_path: Optional[PathLike] = None,
|
|
466
|
+
backend_shim_env: Optional[Dict[str, str]] = None,
|
|
445
467
|
) -> str:
|
|
446
|
-
shim_commands = get_shim_commands(
|
|
468
|
+
shim_commands = get_shim_commands(
|
|
469
|
+
authorized_keys=authorized_keys,
|
|
470
|
+
base_path=base_path,
|
|
471
|
+
bin_path=bin_path,
|
|
472
|
+
backend_shim_env=backend_shim_env,
|
|
473
|
+
)
|
|
447
474
|
commands = (backend_specific_commands or []) + shim_commands
|
|
448
475
|
return get_cloud_config(
|
|
449
476
|
runcmd=[["sh", "-c", " && ".join(commands)]],
|
|
@@ -451,29 +478,55 @@ def get_user_data(
|
|
|
451
478
|
)
|
|
452
479
|
|
|
453
480
|
|
|
454
|
-
def get_shim_env(
|
|
481
|
+
def get_shim_env(
|
|
482
|
+
authorized_keys: List[str],
|
|
483
|
+
base_path: Optional[PathLike] = None,
|
|
484
|
+
bin_path: Optional[PathLike] = None,
|
|
485
|
+
backend_shim_env: Optional[Dict[str, str]] = None,
|
|
486
|
+
) -> Dict[str, str]:
|
|
455
487
|
log_level = "6" # Trace
|
|
456
488
|
envs = {
|
|
457
|
-
"DSTACK_SHIM_HOME":
|
|
489
|
+
"DSTACK_SHIM_HOME": get_dstack_working_dir(base_path),
|
|
458
490
|
"DSTACK_SHIM_HTTP_PORT": str(DSTACK_SHIM_HTTP_PORT),
|
|
459
491
|
"DSTACK_SHIM_LOG_LEVEL": log_level,
|
|
460
492
|
"DSTACK_RUNNER_DOWNLOAD_URL": get_dstack_runner_download_url(),
|
|
461
|
-
"DSTACK_RUNNER_BINARY_PATH":
|
|
493
|
+
"DSTACK_RUNNER_BINARY_PATH": get_dstack_runner_binary_path(bin_path),
|
|
462
494
|
"DSTACK_RUNNER_HTTP_PORT": str(DSTACK_RUNNER_HTTP_PORT),
|
|
463
495
|
"DSTACK_RUNNER_SSH_PORT": str(DSTACK_RUNNER_SSH_PORT),
|
|
464
496
|
"DSTACK_RUNNER_LOG_LEVEL": log_level,
|
|
465
497
|
"DSTACK_PUBLIC_SSH_KEY": "\n".join(authorized_keys),
|
|
466
498
|
}
|
|
499
|
+
if backend_shim_env is not None:
|
|
500
|
+
envs |= backend_shim_env
|
|
467
501
|
return envs
|
|
468
502
|
|
|
469
503
|
|
|
470
504
|
def get_shim_commands(
|
|
471
|
-
authorized_keys: List[str],
|
|
505
|
+
authorized_keys: List[str],
|
|
506
|
+
*,
|
|
507
|
+
is_privileged: bool = False,
|
|
508
|
+
pjrt_device: Optional[str] = None,
|
|
509
|
+
base_path: Optional[PathLike] = None,
|
|
510
|
+
bin_path: Optional[PathLike] = None,
|
|
511
|
+
backend_shim_env: Optional[Dict[str, str]] = None,
|
|
472
512
|
) -> List[str]:
|
|
473
|
-
commands = get_shim_pre_start_commands(
|
|
474
|
-
|
|
513
|
+
commands = get_shim_pre_start_commands(
|
|
514
|
+
base_path=base_path,
|
|
515
|
+
bin_path=bin_path,
|
|
516
|
+
)
|
|
517
|
+
shim_env = get_shim_env(
|
|
518
|
+
authorized_keys=authorized_keys,
|
|
519
|
+
base_path=base_path,
|
|
520
|
+
bin_path=bin_path,
|
|
521
|
+
backend_shim_env=backend_shim_env,
|
|
522
|
+
)
|
|
523
|
+
for k, v in shim_env.items():
|
|
475
524
|
commands += [f'export "{k}={v}"']
|
|
476
|
-
commands += get_run_shim_script(
|
|
525
|
+
commands += get_run_shim_script(
|
|
526
|
+
is_privileged=is_privileged,
|
|
527
|
+
pjrt_device=pjrt_device,
|
|
528
|
+
bin_path=bin_path,
|
|
529
|
+
)
|
|
477
530
|
return commands
|
|
478
531
|
|
|
479
532
|
|
|
@@ -510,25 +563,33 @@ def get_dstack_shim_download_url() -> str:
|
|
|
510
563
|
return f"https://{bucket}.s3.eu-west-1.amazonaws.com/{build}/binaries/dstack-shim-linux-amd64"
|
|
511
564
|
|
|
512
565
|
|
|
513
|
-
def get_shim_pre_start_commands(
|
|
566
|
+
def get_shim_pre_start_commands(
|
|
567
|
+
base_path: Optional[PathLike] = None,
|
|
568
|
+
bin_path: Optional[PathLike] = None,
|
|
569
|
+
) -> List[str]:
|
|
514
570
|
url = get_dstack_shim_download_url()
|
|
515
|
-
|
|
571
|
+
dstack_shim_binary_path = get_dstack_shim_binary_path(bin_path)
|
|
572
|
+
dstack_working_dir = get_dstack_working_dir(base_path)
|
|
516
573
|
return [
|
|
517
574
|
f"dlpath=$(sudo mktemp -t {DSTACK_SHIM_BINARY_NAME}.XXXXXXXXXX)",
|
|
518
575
|
# -sS -- disable progress meter and warnings, but still show errors (unlike bare -s)
|
|
519
576
|
f'sudo curl -sS --compressed --connect-timeout 60 --max-time 240 --retry 1 --output "$dlpath" "{url}"',
|
|
520
|
-
f'sudo mv "$dlpath" {
|
|
521
|
-
f"sudo chmod +x {
|
|
522
|
-
f"sudo mkdir {
|
|
577
|
+
f'sudo mv "$dlpath" {dstack_shim_binary_path}',
|
|
578
|
+
f"sudo chmod +x {dstack_shim_binary_path}",
|
|
579
|
+
f"sudo mkdir {dstack_working_dir} -p",
|
|
523
580
|
]
|
|
524
581
|
|
|
525
582
|
|
|
526
|
-
def get_run_shim_script(
|
|
583
|
+
def get_run_shim_script(
|
|
584
|
+
is_privileged: bool,
|
|
585
|
+
pjrt_device: Optional[str],
|
|
586
|
+
bin_path: Optional[PathLike] = None,
|
|
587
|
+
) -> List[str]:
|
|
588
|
+
dstack_shim_binary_path = get_dstack_shim_binary_path(bin_path)
|
|
527
589
|
privileged_flag = "--privileged" if is_privileged else ""
|
|
528
590
|
pjrt_device_env = f"--pjrt-device={pjrt_device}" if pjrt_device else ""
|
|
529
|
-
|
|
530
591
|
return [
|
|
531
|
-
f"nohup {
|
|
592
|
+
f"nohup {dstack_shim_binary_path} {privileged_flag} {pjrt_device_env} &",
|
|
532
593
|
]
|
|
533
594
|
|
|
534
595
|
|
|
@@ -555,8 +616,10 @@ def get_gateway_user_data(authorized_key: str) -> str:
|
|
|
555
616
|
|
|
556
617
|
|
|
557
618
|
def get_docker_commands(
|
|
558
|
-
authorized_keys:
|
|
559
|
-
|
|
619
|
+
authorized_keys: list[str],
|
|
620
|
+
bin_path: Optional[PathLike] = None,
|
|
621
|
+
) -> list[str]:
|
|
622
|
+
dstack_runner_binary_path = get_dstack_runner_binary_path(bin_path)
|
|
560
623
|
authorized_keys_content = "\n".join(authorized_keys).strip()
|
|
561
624
|
commands = [
|
|
562
625
|
# save and unset ld.so variables
|
|
@@ -580,9 +643,6 @@ def get_docker_commands(
|
|
|
580
643
|
"chmod 700 ~/.ssh",
|
|
581
644
|
f"echo '{authorized_keys_content}' > ~/.ssh/authorized_keys",
|
|
582
645
|
"chmod 600 ~/.ssh/authorized_keys",
|
|
583
|
-
r"""if [ -f ~/.profile ]; then sed -ie '1s@^@export PATH="'"$PATH"':$PATH"\n\n@' ~/.profile; fi"""
|
|
584
|
-
if fix_path_in_dot_profile
|
|
585
|
-
else ":",
|
|
586
646
|
# regenerate host keys
|
|
587
647
|
"rm -rf /etc/ssh/ssh_host_*",
|
|
588
648
|
"ssh-keygen -A > /dev/null",
|
|
@@ -600,7 +660,6 @@ def get_docker_commands(
|
|
|
600
660
|
" -o PidFile=none"
|
|
601
661
|
" -o PasswordAuthentication=no"
|
|
602
662
|
" -o AllowTcpForwarding=yes"
|
|
603
|
-
" -o PermitUserEnvironment=yes"
|
|
604
663
|
" -o ClientAliveInterval=30"
|
|
605
664
|
" -o ClientAliveCountMax=4"
|
|
606
665
|
),
|
|
@@ -611,10 +670,10 @@ def get_docker_commands(
|
|
|
611
670
|
|
|
612
671
|
url = get_dstack_runner_download_url()
|
|
613
672
|
commands += [
|
|
614
|
-
f"curl --connect-timeout 60 --max-time 240 --retry 1 --output {
|
|
615
|
-
f"chmod +x {
|
|
673
|
+
f"curl --connect-timeout 60 --max-time 240 --retry 1 --output {dstack_runner_binary_path} {url}",
|
|
674
|
+
f"chmod +x {dstack_runner_binary_path}",
|
|
616
675
|
(
|
|
617
|
-
f"{
|
|
676
|
+
f"{dstack_runner_binary_path}"
|
|
618
677
|
" --log-level 6"
|
|
619
678
|
" start"
|
|
620
679
|
f" --http-port {DSTACK_RUNNER_HTTP_PORT}"
|
|
@@ -692,9 +751,18 @@ def get_dstack_gateway_commands() -> List[str]:
|
|
|
692
751
|
]
|
|
693
752
|
|
|
694
753
|
|
|
695
|
-
def merge_tags(
|
|
696
|
-
|
|
754
|
+
def merge_tags(
|
|
755
|
+
base_tags: Dict[str, str],
|
|
756
|
+
backend_tags: Optional[Dict[str, str]] = None,
|
|
757
|
+
resource_tags: Optional[Dict[str, str]] = None,
|
|
758
|
+
) -> Dict[str, str]:
|
|
759
|
+
res = base_tags.copy()
|
|
760
|
+
# backend_tags have priority over resource_tags
|
|
761
|
+
# so that regular users do not override the tags set by admins
|
|
697
762
|
if backend_tags is not None:
|
|
698
763
|
for k, v in backend_tags.items():
|
|
699
764
|
res.setdefault(k, v)
|
|
765
|
+
if resource_tags is not None:
|
|
766
|
+
for k, v in resource_tags.items():
|
|
767
|
+
res.setdefault(k, v)
|
|
700
768
|
return res
|
|
@@ -13,7 +13,6 @@ from dstack._internal.core.backends.gcp.models import (
|
|
|
13
13
|
GCPServiceAccountCreds,
|
|
14
14
|
)
|
|
15
15
|
from dstack._internal.core.errors import BackendAuthError
|
|
16
|
-
from dstack._internal.core.models.common import is_core_model_instance
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
def authenticate(creds: AnyGCPCreds, project_id: Optional[str] = None) -> Tuple[Credentials, str]:
|
|
@@ -30,7 +29,7 @@ def authenticate(creds: AnyGCPCreds, project_id: Optional[str] = None) -> Tuple[
|
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
def get_credentials(creds: AnyGCPCreds) -> Tuple[Credentials, Optional[str]]:
|
|
33
|
-
if
|
|
32
|
+
if isinstance(creds, GCPServiceAccountCreds):
|
|
34
33
|
try:
|
|
35
34
|
service_account_info = json.loads(creds.data)
|
|
36
35
|
credentials = service_account.Credentials.from_service_account_info(
|
|
@@ -211,8 +211,12 @@ class GCPCompute(
|
|
|
211
211
|
"dstack_name": instance_config.instance_name,
|
|
212
212
|
"dstack_user": instance_config.user.lower(),
|
|
213
213
|
}
|
|
214
|
-
labels =
|
|
215
|
-
|
|
214
|
+
labels = merge_tags(
|
|
215
|
+
base_tags=labels,
|
|
216
|
+
backend_tags=self.config.tags,
|
|
217
|
+
resource_tags=instance_config.tags,
|
|
218
|
+
)
|
|
219
|
+
labels = gcp_resources.filter_invalid_labels(labels)
|
|
216
220
|
is_tpu = (
|
|
217
221
|
_is_tpu(instance_offer.instance.resources.gpus[0].name)
|
|
218
222
|
if instance_offer.instance.resources.gpus
|
|
@@ -292,11 +296,9 @@ class GCPCompute(
|
|
|
292
296
|
gpus=instance_offer.instance.resources.gpus,
|
|
293
297
|
),
|
|
294
298
|
spot=instance_offer.instance.resources.spot,
|
|
295
|
-
user_data=
|
|
296
|
-
authorized_keys,
|
|
297
|
-
|
|
298
|
-
instance_offer.instance.name
|
|
299
|
-
),
|
|
299
|
+
user_data=_get_user_data(
|
|
300
|
+
authorized_keys=authorized_keys,
|
|
301
|
+
instance_type_name=instance_offer.instance.name,
|
|
300
302
|
),
|
|
301
303
|
authorized_keys=authorized_keys,
|
|
302
304
|
labels=labels,
|
|
@@ -471,8 +473,12 @@ class GCPCompute(
|
|
|
471
473
|
"dstack_project": configuration.project_name.lower(),
|
|
472
474
|
"dstack_name": configuration.instance_name,
|
|
473
475
|
}
|
|
474
|
-
labels =
|
|
475
|
-
|
|
476
|
+
labels = merge_tags(
|
|
477
|
+
base_tags=labels,
|
|
478
|
+
backend_tags=self.config.tags,
|
|
479
|
+
resource_tags=configuration.tags,
|
|
480
|
+
)
|
|
481
|
+
labels = gcp_resources.filter_invalid_labels(labels)
|
|
476
482
|
|
|
477
483
|
request = compute_v1.InsertInstanceRequest()
|
|
478
484
|
request.zone = zone
|
|
@@ -573,8 +579,12 @@ class GCPCompute(
|
|
|
573
579
|
"dstack_name": volume.name,
|
|
574
580
|
"dstack_user": volume.user,
|
|
575
581
|
}
|
|
576
|
-
labels =
|
|
577
|
-
|
|
582
|
+
labels = merge_tags(
|
|
583
|
+
base_tags=labels,
|
|
584
|
+
backend_tags=self.config.tags,
|
|
585
|
+
resource_tags=volume.configuration.tags,
|
|
586
|
+
)
|
|
587
|
+
labels = gcp_resources.filter_invalid_labels(labels)
|
|
578
588
|
|
|
579
589
|
disk = compute_v1.Disk()
|
|
580
590
|
disk.name = disk_name
|
|
@@ -829,10 +839,14 @@ def _get_extra_subnets(
|
|
|
829
839
|
) -> List[Tuple[str, str]]:
|
|
830
840
|
if config.extra_vpcs is None:
|
|
831
841
|
return []
|
|
832
|
-
if instance_type_name
|
|
842
|
+
if instance_type_name == "a3-megagpu-8g":
|
|
843
|
+
subnets_num = 8
|
|
844
|
+
elif instance_type_name in ["a3-edgegpu-8g", "a3-highgpu-8g"]:
|
|
845
|
+
subnets_num = 4
|
|
846
|
+
else:
|
|
833
847
|
return []
|
|
834
848
|
extra_subnets = []
|
|
835
|
-
for vpc_name in config.extra_vpcs:
|
|
849
|
+
for vpc_name in config.extra_vpcs[:subnets_num]:
|
|
836
850
|
subnet = gcp_resources.get_vpc_subnet_or_error(
|
|
837
851
|
subnetworks_client=subnetworks_client,
|
|
838
852
|
vpc_project_id=config.vpc_project_id or config.project_id,
|
|
@@ -844,12 +858,14 @@ def _get_extra_subnets(
|
|
|
844
858
|
vpc_name=vpc_name,
|
|
845
859
|
)
|
|
846
860
|
extra_subnets.append((vpc_resource_name, subnet))
|
|
847
|
-
return extra_subnets
|
|
861
|
+
return extra_subnets
|
|
848
862
|
|
|
849
863
|
|
|
850
864
|
def _get_image_id(instance_type_name: str, cuda: bool) -> str:
|
|
851
865
|
if instance_type_name == "a3-megagpu-8g":
|
|
852
866
|
image_name = "dstack-a3mega-5"
|
|
867
|
+
elif instance_type_name in ["a3-edgegpu-8g", "a3-highgpu-8g"]:
|
|
868
|
+
return "projects/cos-cloud/global/images/cos-105-17412-535-78"
|
|
853
869
|
elif cuda:
|
|
854
870
|
image_name = f"dstack-cuda-{version.base_image}"
|
|
855
871
|
else:
|
|
@@ -862,9 +878,37 @@ def _get_gateway_image_id() -> str:
|
|
|
862
878
|
return "projects/ubuntu-os-cloud/global/images/ubuntu-2204-jammy-v20230714"
|
|
863
879
|
|
|
864
880
|
|
|
881
|
+
def _get_user_data(authorized_keys: List[str], instance_type_name: str) -> str:
|
|
882
|
+
base_path = None
|
|
883
|
+
bin_path = None
|
|
884
|
+
backend_shim_env = None
|
|
885
|
+
if instance_type_name in ["a3-edgegpu-8g", "a3-highgpu-8g"]:
|
|
886
|
+
# In the COS image the / file system is not writable.
|
|
887
|
+
# /home and /var are writable but not executable.
|
|
888
|
+
# Only /etc is both writable and executable, so use it for shim/runner binaries.
|
|
889
|
+
# See: https://cloud.google.com/container-optimized-os/docs/concepts/disks-and-filesystem
|
|
890
|
+
base_path = bin_path = "/etc"
|
|
891
|
+
backend_shim_env = {
|
|
892
|
+
# In COS nvidia binaries are not installed on PATH by default.
|
|
893
|
+
# Set so that shim can run nvidia-smi.
|
|
894
|
+
"PATH": "/var/lib/nvidia/bin:$PATH",
|
|
895
|
+
}
|
|
896
|
+
return get_user_data(
|
|
897
|
+
authorized_keys=authorized_keys,
|
|
898
|
+
backend_specific_commands=_get_backend_specific_commands(
|
|
899
|
+
instance_type_name=instance_type_name,
|
|
900
|
+
),
|
|
901
|
+
base_path=base_path,
|
|
902
|
+
bin_path=bin_path,
|
|
903
|
+
backend_shim_env=backend_shim_env,
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
|
|
865
907
|
def _get_backend_specific_commands(instance_type_name: str) -> List[str]:
|
|
866
908
|
if instance_type_name == "a3-megagpu-8g":
|
|
867
909
|
return tcpx_features.get_backend_specific_commands_tcpxo()
|
|
910
|
+
if instance_type_name in ["a3-edgegpu-8g", "a3-highgpu-8g"]:
|
|
911
|
+
return tcpx_features.get_backend_specific_commands_tcpx()
|
|
868
912
|
return []
|
|
869
913
|
|
|
870
914
|
|
|
@@ -24,7 +24,6 @@ from dstack._internal.core.errors import BackendAuthError, BackendError, ServerC
|
|
|
24
24
|
from dstack._internal.core.models.backends.base import (
|
|
25
25
|
BackendType,
|
|
26
26
|
)
|
|
27
|
-
from dstack._internal.core.models.common import is_core_model_instance
|
|
28
27
|
|
|
29
28
|
LOCATIONS = [
|
|
30
29
|
{
|
|
@@ -115,7 +114,7 @@ class GCPConfigurator(Configurator):
|
|
|
115
114
|
BACKEND_CLASS = GCPBackend
|
|
116
115
|
|
|
117
116
|
def validate_config(self, config: GCPBackendConfigWithCreds, default_creds_enabled: bool):
|
|
118
|
-
if
|
|
117
|
+
if isinstance(config.creds, GCPDefaultCreds) and not default_creds_enabled:
|
|
119
118
|
raise_invalid_credentials_error(fields=[["creds"]])
|
|
120
119
|
try:
|
|
121
120
|
credentials, _ = auth.authenticate(creds=config.creds, project_id=config.project_id)
|
|
@@ -123,7 +122,7 @@ class GCPConfigurator(Configurator):
|
|
|
123
122
|
details = None
|
|
124
123
|
if len(e.args) > 0:
|
|
125
124
|
details = e.args[0]
|
|
126
|
-
if
|
|
125
|
+
if isinstance(config.creds, GCPServiceAccountCreds):
|
|
127
126
|
raise_invalid_credentials_error(fields=[["creds", "data"]], details=details)
|
|
128
127
|
else:
|
|
129
128
|
raise_invalid_credentials_error(fields=[["creds"]], details=details)
|
|
@@ -32,3 +32,34 @@ def get_backend_specific_commands_tcpxo() -> List[str]:
|
|
|
32
32
|
"--num_hops=2 --num_nics=8 --uid= --alsologtostderr"
|
|
33
33
|
),
|
|
34
34
|
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_backend_specific_commands_tcpx() -> List[str]:
|
|
38
|
+
return [
|
|
39
|
+
"cos-extensions install gpu -- --version=latest",
|
|
40
|
+
"sudo mount --bind /var/lib/nvidia /var/lib/nvidia",
|
|
41
|
+
"sudo mount -o remount,exec /var/lib/nvidia",
|
|
42
|
+
(
|
|
43
|
+
"docker run "
|
|
44
|
+
"--detach "
|
|
45
|
+
"--pull=always "
|
|
46
|
+
"--name receive-datapath-manager "
|
|
47
|
+
"--privileged "
|
|
48
|
+
"--cap-add=NET_ADMIN --network=host "
|
|
49
|
+
"--volume /var/lib/nvidia/lib64:/usr/local/nvidia/lib64 "
|
|
50
|
+
"--device /dev/nvidia0:/dev/nvidia0 --device /dev/nvidia1:/dev/nvidia1 "
|
|
51
|
+
"--device /dev/nvidia2:/dev/nvidia2 --device /dev/nvidia3:/dev/nvidia3 "
|
|
52
|
+
"--device /dev/nvidia4:/dev/nvidia4 --device /dev/nvidia5:/dev/nvidia5 "
|
|
53
|
+
"--device /dev/nvidia6:/dev/nvidia6 --device /dev/nvidia7:/dev/nvidia7 "
|
|
54
|
+
"--device /dev/nvidia-uvm:/dev/nvidia-uvm --device /dev/nvidiactl:/dev/nvidiactl "
|
|
55
|
+
"--env LD_LIBRARY_PATH=/usr/local/nvidia/lib64 "
|
|
56
|
+
"--volume /run/tcpx:/run/tcpx "
|
|
57
|
+
"--entrypoint /tcpgpudmarxd/build/app/tcpgpudmarxd "
|
|
58
|
+
"us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpx/tcpgpudmarxd "
|
|
59
|
+
'--gpu_nic_preset a3vm --gpu_shmem_type fd --uds_path "/run/tcpx" --setup_param "--verbose 128 2 0"'
|
|
60
|
+
),
|
|
61
|
+
"sudo iptables -I INPUT -p tcp -m tcp -j ACCEPT",
|
|
62
|
+
"docker run --rm -v /var/lib:/var/lib us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpx/nccl-plugin-gpudirecttcpx install --install-nccl",
|
|
63
|
+
"sudo mount --bind /var/lib/tcpx /var/lib/tcpx",
|
|
64
|
+
"sudo mount -o remount,exec /var/lib/tcpx",
|
|
65
|
+
]
|
|
@@ -332,6 +332,16 @@ def get_accelerators(
|
|
|
332
332
|
return [accelerator_config]
|
|
333
333
|
|
|
334
334
|
|
|
335
|
+
def filter_invalid_labels(labels: Dict[str, str]) -> Dict[str, str]:
|
|
336
|
+
filtered_labels = {}
|
|
337
|
+
for k, v in labels.items():
|
|
338
|
+
if not _is_valid_label(k, v):
|
|
339
|
+
logger.warning("Skipping invalid label '%s: %s'", k, v)
|
|
340
|
+
continue
|
|
341
|
+
filtered_labels[k] = v
|
|
342
|
+
return filtered_labels
|
|
343
|
+
|
|
344
|
+
|
|
335
345
|
def validate_labels(labels: Dict[str, str]):
|
|
336
346
|
for k, v in labels.items():
|
|
337
347
|
if not _is_valid_label(k, v):
|
|
@@ -86,7 +86,11 @@ class NebiusCompute(
|
|
|
86
86
|
|
|
87
87
|
@cached_property
|
|
88
88
|
def _region_to_project_id(self) -> dict[str, str]:
|
|
89
|
-
return resources.get_region_to_project_id_map(
|
|
89
|
+
return resources.get_region_to_project_id_map(
|
|
90
|
+
self._sdk,
|
|
91
|
+
configured_regions=self.config.regions,
|
|
92
|
+
configured_project_ids=self.config.projects,
|
|
93
|
+
)
|
|
90
94
|
|
|
91
95
|
def _get_subnet_id(self, region: str) -> str:
|
|
92
96
|
if region not in self._subnet_id_cache:
|
|
@@ -100,7 +104,7 @@ class NebiusCompute(
|
|
|
100
104
|
) -> List[InstanceOfferWithAvailability]:
|
|
101
105
|
offers = get_catalog_offers(
|
|
102
106
|
backend=BackendType.NEBIUS,
|
|
103
|
-
locations=
|
|
107
|
+
locations=list(self._region_to_project_id),
|
|
104
108
|
requirements=requirements,
|
|
105
109
|
extra_filter=_supported_instances,
|
|
106
110
|
configurable_disk_size=CONFIGURABLE_DISK_SIZE,
|
|
@@ -29,21 +29,15 @@ class NebiusConfigurator(Configurator):
|
|
|
29
29
|
assert isinstance(config.creds, NebiusServiceAccountCreds)
|
|
30
30
|
try:
|
|
31
31
|
sdk = resources.make_sdk(config.creds)
|
|
32
|
-
|
|
32
|
+
# check that it's possible to build the projects map with configured settings
|
|
33
|
+
resources.get_region_to_project_id_map(
|
|
34
|
+
sdk, configured_regions=config.regions, configured_project_ids=config.projects
|
|
35
|
+
)
|
|
33
36
|
except (ValueError, RequestError) as e:
|
|
34
37
|
raise_invalid_credentials_error(
|
|
35
38
|
fields=[["creds"]],
|
|
36
39
|
details=str(e),
|
|
37
40
|
)
|
|
38
|
-
if invalid_regions := set(config.regions or []) - available_regions:
|
|
39
|
-
raise_invalid_credentials_error(
|
|
40
|
-
fields=[["regions"]],
|
|
41
|
-
details=(
|
|
42
|
-
f"Configured regions {invalid_regions} do not exist in this Nebius tenancy."
|
|
43
|
-
" Omit `regions` to use all regions or select some of the available regions:"
|
|
44
|
-
f" {available_regions}"
|
|
45
|
-
),
|
|
46
|
-
)
|
|
47
41
|
|
|
48
42
|
def create_backend(
|
|
49
43
|
self, project_name: str, config: NebiusBackendConfigWithCreds
|
|
@@ -5,6 +5,8 @@ from pydantic import Field, root_validator
|
|
|
5
5
|
from dstack._internal.core.backends.base.models import fill_data
|
|
6
6
|
from dstack._internal.core.models.common import CoreModel
|
|
7
7
|
|
|
8
|
+
DEFAULT_PROJECT_NAME_PREFIX = "default-project"
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
class NebiusServiceAccountCreds(CoreModel):
|
|
10
12
|
type: Annotated[Literal["service_account"], Field(description="The type of credentials")] = (
|
|
@@ -70,9 +72,20 @@ class NebiusBackendConfig(CoreModel):
|
|
|
70
72
|
Literal["nebius"],
|
|
71
73
|
Field(description="The type of backend"),
|
|
72
74
|
] = "nebius"
|
|
75
|
+
projects: Annotated[
|
|
76
|
+
Optional[list[str]],
|
|
77
|
+
Field(
|
|
78
|
+
description=(
|
|
79
|
+
"The list of allowed Nebius project IDs."
|
|
80
|
+
" Omit to use the default project in each region."
|
|
81
|
+
" The project is considered default if it is the only project in the region"
|
|
82
|
+
f" or if its name starts with `{DEFAULT_PROJECT_NAME_PREFIX}`"
|
|
83
|
+
)
|
|
84
|
+
),
|
|
85
|
+
] = None
|
|
73
86
|
regions: Annotated[
|
|
74
87
|
Optional[list[str]],
|
|
75
|
-
Field(description="The list of Nebius regions. Omit to
|
|
88
|
+
Field(description="The list of allowed Nebius regions. Omit to allow all regions"),
|
|
76
89
|
] = None
|
|
77
90
|
|
|
78
91
|
|