dstack 0.19.25rc1__py3-none-any.whl → 0.19.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/__init__.py +2 -2
- dstack/_internal/cli/commands/apply.py +3 -61
- dstack/_internal/cli/commands/attach.py +1 -1
- dstack/_internal/cli/commands/completion.py +1 -1
- dstack/_internal/cli/commands/delete.py +2 -2
- dstack/_internal/cli/commands/fleet.py +1 -1
- dstack/_internal/cli/commands/gateway.py +2 -2
- dstack/_internal/cli/commands/init.py +56 -24
- dstack/_internal/cli/commands/logs.py +1 -1
- dstack/_internal/cli/commands/metrics.py +1 -1
- dstack/_internal/cli/commands/offer.py +45 -7
- dstack/_internal/cli/commands/project.py +2 -2
- dstack/_internal/cli/commands/secrets.py +2 -2
- dstack/_internal/cli/commands/server.py +1 -1
- dstack/_internal/cli/commands/stop.py +1 -1
- dstack/_internal/cli/commands/volume.py +1 -1
- dstack/_internal/cli/main.py +2 -2
- dstack/_internal/cli/services/completion.py +2 -2
- dstack/_internal/cli/services/configurators/__init__.py +6 -2
- dstack/_internal/cli/services/configurators/base.py +6 -7
- dstack/_internal/cli/services/configurators/fleet.py +1 -3
- dstack/_internal/cli/services/configurators/gateway.py +2 -4
- dstack/_internal/cli/services/configurators/run.py +293 -58
- dstack/_internal/cli/services/configurators/volume.py +2 -4
- dstack/_internal/cli/services/profile.py +1 -1
- dstack/_internal/cli/services/repos.py +35 -48
- dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
- dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
- dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
- dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
- dstack/_internal/core/backends/aws/compute.py +6 -1
- dstack/_internal/core/backends/aws/configurator.py +11 -7
- dstack/_internal/core/backends/azure/configurator.py +11 -7
- dstack/_internal/core/backends/base/compute.py +33 -5
- dstack/_internal/core/backends/base/configurator.py +25 -13
- dstack/_internal/core/backends/base/offers.py +2 -0
- dstack/_internal/core/backends/cloudrift/configurator.py +13 -7
- dstack/_internal/core/backends/configurators.py +15 -0
- dstack/_internal/core/backends/cudo/configurator.py +11 -7
- dstack/_internal/core/backends/datacrunch/compute.py +5 -1
- dstack/_internal/core/backends/datacrunch/configurator.py +13 -7
- dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
- dstack/_internal/core/backends/digitalocean/backend.py +16 -0
- dstack/_internal/core/backends/digitalocean/compute.py +5 -0
- dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
- dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
- dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
- dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
- dstack/_internal/core/backends/digitalocean_base/compute.py +173 -0
- dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
- dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
- dstack/_internal/core/backends/gcp/compute.py +32 -8
- dstack/_internal/core/backends/gcp/configurator.py +11 -7
- dstack/_internal/core/backends/hotaisle/api_client.py +25 -33
- dstack/_internal/core/backends/hotaisle/compute.py +1 -6
- dstack/_internal/core/backends/hotaisle/configurator.py +13 -7
- dstack/_internal/core/backends/kubernetes/configurator.py +13 -7
- dstack/_internal/core/backends/lambdalabs/configurator.py +11 -7
- dstack/_internal/core/backends/models.py +7 -0
- dstack/_internal/core/backends/nebius/compute.py +1 -8
- dstack/_internal/core/backends/nebius/configurator.py +11 -7
- dstack/_internal/core/backends/nebius/resources.py +21 -11
- dstack/_internal/core/backends/oci/compute.py +4 -5
- dstack/_internal/core/backends/oci/configurator.py +11 -7
- dstack/_internal/core/backends/runpod/configurator.py +11 -7
- dstack/_internal/core/backends/template/configurator.py.jinja +11 -7
- dstack/_internal/core/backends/tensordock/configurator.py +13 -7
- dstack/_internal/core/backends/vastai/configurator.py +11 -7
- dstack/_internal/core/backends/vultr/compute.py +1 -5
- dstack/_internal/core/backends/vultr/configurator.py +11 -4
- dstack/_internal/core/compatibility/fleets.py +5 -0
- dstack/_internal/core/compatibility/gpus.py +13 -0
- dstack/_internal/core/compatibility/runs.py +9 -1
- dstack/_internal/core/models/backends/base.py +5 -1
- dstack/_internal/core/models/common.py +3 -3
- dstack/_internal/core/models/configurations.py +191 -32
- dstack/_internal/core/models/files.py +1 -1
- dstack/_internal/core/models/fleets.py +80 -3
- dstack/_internal/core/models/profiles.py +41 -11
- dstack/_internal/core/models/resources.py +46 -42
- dstack/_internal/core/models/runs.py +28 -5
- dstack/_internal/core/services/configs/__init__.py +6 -3
- dstack/_internal/core/services/profiles.py +2 -2
- dstack/_internal/core/services/repos.py +86 -79
- dstack/_internal/core/services/ssh/ports.py +1 -1
- dstack/_internal/proxy/lib/deps.py +6 -2
- dstack/_internal/server/app.py +22 -17
- dstack/_internal/server/background/tasks/process_fleets.py +109 -13
- dstack/_internal/server/background/tasks/process_gateways.py +4 -1
- dstack/_internal/server/background/tasks/process_instances.py +22 -73
- dstack/_internal/server/background/tasks/process_probes.py +1 -1
- dstack/_internal/server/background/tasks/process_running_jobs.py +12 -4
- dstack/_internal/server/background/tasks/process_runs.py +3 -1
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +67 -44
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
- dstack/_internal/server/background/tasks/process_volumes.py +1 -1
- dstack/_internal/server/db.py +8 -4
- dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
- dstack/_internal/server/models.py +6 -2
- dstack/_internal/server/routers/gpus.py +1 -6
- dstack/_internal/server/schemas/runner.py +11 -0
- dstack/_internal/server/services/backends/__init__.py +14 -8
- dstack/_internal/server/services/backends/handlers.py +6 -1
- dstack/_internal/server/services/docker.py +5 -5
- dstack/_internal/server/services/fleets.py +37 -38
- dstack/_internal/server/services/gateways/__init__.py +2 -0
- dstack/_internal/server/services/gateways/client.py +5 -2
- dstack/_internal/server/services/gateways/connection.py +1 -1
- dstack/_internal/server/services/gpus.py +50 -49
- dstack/_internal/server/services/instances.py +44 -4
- dstack/_internal/server/services/jobs/__init__.py +15 -4
- dstack/_internal/server/services/jobs/configurators/base.py +53 -17
- dstack/_internal/server/services/jobs/configurators/dev.py +9 -4
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +6 -8
- dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +7 -9
- dstack/_internal/server/services/jobs/configurators/service.py +1 -3
- dstack/_internal/server/services/jobs/configurators/task.py +3 -3
- dstack/_internal/server/services/locking.py +5 -5
- dstack/_internal/server/services/logging.py +10 -2
- dstack/_internal/server/services/logs/__init__.py +8 -6
- dstack/_internal/server/services/logs/aws.py +330 -327
- dstack/_internal/server/services/logs/filelog.py +7 -6
- dstack/_internal/server/services/logs/gcp.py +141 -139
- dstack/_internal/server/services/plugins.py +1 -1
- dstack/_internal/server/services/projects.py +2 -5
- dstack/_internal/server/services/proxy/repo.py +5 -1
- dstack/_internal/server/services/requirements/__init__.py +0 -0
- dstack/_internal/server/services/requirements/combine.py +259 -0
- dstack/_internal/server/services/runner/client.py +7 -0
- dstack/_internal/server/services/runs.py +17 -1
- dstack/_internal/server/services/services/__init__.py +8 -2
- dstack/_internal/server/services/services/autoscalers.py +2 -0
- dstack/_internal/server/services/ssh.py +2 -1
- dstack/_internal/server/services/storage/__init__.py +5 -6
- dstack/_internal/server/services/storage/gcs.py +49 -49
- dstack/_internal/server/services/storage/s3.py +52 -52
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-d151b300fcac3933213d.js → main-4eecc75fbe64067eb1bc.js} +1146 -899
- dstack/_internal/server/statics/{main-d151b300fcac3933213d.js.map → main-4eecc75fbe64067eb1bc.js.map} +1 -1
- dstack/_internal/server/statics/{main-aec4762350e34d6fbff9.css → main-56191c63d516fd0041c4.css} +1 -1
- dstack/_internal/server/testing/common.py +7 -4
- dstack/_internal/server/utils/logging.py +3 -3
- dstack/_internal/server/utils/provisioning.py +3 -3
- dstack/_internal/utils/json_schema.py +3 -1
- dstack/_internal/utils/path.py +8 -1
- dstack/_internal/utils/ssh.py +7 -0
- dstack/_internal/utils/typing.py +14 -0
- dstack/api/_public/repos.py +62 -8
- dstack/api/_public/runs.py +19 -8
- dstack/api/server/__init__.py +17 -19
- dstack/api/server/_gpus.py +2 -1
- dstack/api/server/_group.py +4 -3
- dstack/api/server/_repos.py +20 -3
- dstack/plugins/builtin/rest_plugin/_plugin.py +1 -0
- dstack/version.py +1 -1
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/METADATA +2 -2
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/RECORD +160 -142
- dstack/api/huggingface/__init__.py +0 -73
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/WHEEL +0 -0
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/entry_points.txt +0 -0
- {dstack-0.19.25rc1.dist-info → dstack-0.19.27.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -59,13 +59,6 @@ DOCKER_DAEMON_CONFIG = {
|
|
|
59
59
|
"exec-opts": ["native.cgroupdriver=cgroupfs"],
|
|
60
60
|
}
|
|
61
61
|
SETUP_COMMANDS = [
|
|
62
|
-
"ufw allow ssh",
|
|
63
|
-
"ufw allow from 10.0.0.0/8",
|
|
64
|
-
"ufw allow from 172.16.0.0/12",
|
|
65
|
-
"ufw allow from 192.168.0.0/16",
|
|
66
|
-
"ufw default deny incoming",
|
|
67
|
-
"ufw default allow outgoing",
|
|
68
|
-
"ufw enable",
|
|
69
62
|
'sed -i "s/.*AllowTcpForwarding.*/AllowTcpForwarding yes/g" /etc/ssh/sshd_config',
|
|
70
63
|
"service ssh restart",
|
|
71
64
|
f"echo {shlex.quote(json.dumps(DOCKER_DAEMON_CONFIG))} > /etc/docker/daemon.json",
|
|
@@ -364,7 +357,7 @@ def _wait_for_instance(sdk: SDK, op: SDKOperation[Operation]) -> None:
|
|
|
364
357
|
)
|
|
365
358
|
time.sleep(WAIT_FOR_INSTANCE_UPDATE_INTERVAL)
|
|
366
359
|
resources.LOOP.await_(
|
|
367
|
-
op.update(
|
|
360
|
+
op.update(per_retry_timeout=resources.REQUEST_TIMEOUT, metadata=resources.REQUEST_MD)
|
|
368
361
|
)
|
|
369
362
|
|
|
370
363
|
|
|
@@ -11,7 +11,6 @@ from dstack._internal.core.backends.nebius import resources
|
|
|
11
11
|
from dstack._internal.core.backends.nebius.backend import NebiusBackend
|
|
12
12
|
from dstack._internal.core.backends.nebius.fabrics import get_all_infiniband_fabrics
|
|
13
13
|
from dstack._internal.core.backends.nebius.models import (
|
|
14
|
-
AnyNebiusBackendConfig,
|
|
15
14
|
NebiusBackendConfig,
|
|
16
15
|
NebiusBackendConfigWithCreds,
|
|
17
16
|
NebiusConfig,
|
|
@@ -22,7 +21,12 @@ from dstack._internal.core.backends.nebius.models import (
|
|
|
22
21
|
from dstack._internal.core.models.backends.base import BackendType
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
class NebiusConfigurator(
|
|
24
|
+
class NebiusConfigurator(
|
|
25
|
+
Configurator[
|
|
26
|
+
NebiusBackendConfig,
|
|
27
|
+
NebiusBackendConfigWithCreds,
|
|
28
|
+
]
|
|
29
|
+
):
|
|
26
30
|
TYPE = BackendType.NEBIUS
|
|
27
31
|
BACKEND_CLASS = NebiusBackend
|
|
28
32
|
|
|
@@ -60,12 +64,12 @@ class NebiusConfigurator(Configurator):
|
|
|
60
64
|
auth=NebiusCreds.parse_obj(config.creds).json(),
|
|
61
65
|
)
|
|
62
66
|
|
|
63
|
-
def
|
|
64
|
-
self
|
|
65
|
-
|
|
67
|
+
def get_backend_config_with_creds(self, record: BackendRecord) -> NebiusBackendConfigWithCreds:
|
|
68
|
+
config = self._get_config(record)
|
|
69
|
+
return NebiusBackendConfigWithCreds.__response__.parse_obj(config)
|
|
70
|
+
|
|
71
|
+
def get_backend_config_without_creds(self, record: BackendRecord) -> NebiusBackendConfig:
|
|
66
72
|
config = self._get_config(record)
|
|
67
|
-
if include_creds:
|
|
68
|
-
return NebiusBackendConfigWithCreds.__response__.parse_obj(config)
|
|
69
73
|
return NebiusBackendConfig.__response__.parse_obj(config)
|
|
70
74
|
|
|
71
75
|
def get_backend(self, record: BackendRecord) -> NebiusBackend:
|
|
@@ -119,7 +119,7 @@ def wait_for_operation(
|
|
|
119
119
|
if time.monotonic() + interval > deadline:
|
|
120
120
|
raise TimeoutError(f"Operation {op.id} wait timeout")
|
|
121
121
|
time.sleep(interval)
|
|
122
|
-
LOOP.await_(op.update(
|
|
122
|
+
LOOP.await_(op.update(per_retry_timeout=REQUEST_TIMEOUT, metadata=REQUEST_MD))
|
|
123
123
|
|
|
124
124
|
|
|
125
125
|
def get_region_to_project_id_map(
|
|
@@ -155,7 +155,7 @@ def validate_regions(configured: set[str], available: set[str]) -> None:
|
|
|
155
155
|
def list_tenant_projects(sdk: SDK) -> Sequence[Container]:
|
|
156
156
|
tenants = LOOP.await_(
|
|
157
157
|
TenantServiceClient(sdk).list(
|
|
158
|
-
ListTenantsRequest(),
|
|
158
|
+
ListTenantsRequest(), per_retry_timeout=REQUEST_TIMEOUT, metadata=REQUEST_MD
|
|
159
159
|
)
|
|
160
160
|
)
|
|
161
161
|
if len(tenants.items) != 1:
|
|
@@ -164,7 +164,7 @@ def list_tenant_projects(sdk: SDK) -> Sequence[Container]:
|
|
|
164
164
|
projects = LOOP.await_(
|
|
165
165
|
ProjectServiceClient(sdk).list(
|
|
166
166
|
ListProjectsRequest(parent_id=tenant_id, page_size=999),
|
|
167
|
-
|
|
167
|
+
per_retry_timeout=REQUEST_TIMEOUT,
|
|
168
168
|
metadata=REQUEST_MD,
|
|
169
169
|
)
|
|
170
170
|
)
|
|
@@ -238,7 +238,7 @@ def get_default_subnet(sdk: SDK, project_id: str) -> Subnet:
|
|
|
238
238
|
subnets = LOOP.await_(
|
|
239
239
|
SubnetServiceClient(sdk).list(
|
|
240
240
|
ListSubnetsRequest(parent_id=project_id, page_size=999),
|
|
241
|
-
|
|
241
|
+
per_retry_timeout=REQUEST_TIMEOUT,
|
|
242
242
|
metadata=REQUEST_MD,
|
|
243
243
|
)
|
|
244
244
|
)
|
|
@@ -264,13 +264,15 @@ def create_disk(
|
|
|
264
264
|
),
|
|
265
265
|
)
|
|
266
266
|
with wrap_capacity_errors():
|
|
267
|
-
return LOOP.await_(
|
|
267
|
+
return LOOP.await_(
|
|
268
|
+
client.create(request, per_retry_timeout=REQUEST_TIMEOUT, metadata=REQUEST_MD)
|
|
269
|
+
)
|
|
268
270
|
|
|
269
271
|
|
|
270
272
|
def delete_disk(sdk: SDK, disk_id: str) -> None:
|
|
271
273
|
LOOP.await_(
|
|
272
274
|
DiskServiceClient(sdk).delete(
|
|
273
|
-
DeleteDiskRequest(id=disk_id),
|
|
275
|
+
DeleteDiskRequest(id=disk_id), per_retry_timeout=REQUEST_TIMEOUT, metadata=REQUEST_MD
|
|
274
276
|
)
|
|
275
277
|
)
|
|
276
278
|
|
|
@@ -318,13 +320,17 @@ def create_instance(
|
|
|
318
320
|
),
|
|
319
321
|
)
|
|
320
322
|
with wrap_capacity_errors():
|
|
321
|
-
return LOOP.await_(
|
|
323
|
+
return LOOP.await_(
|
|
324
|
+
client.create(request, per_retry_timeout=REQUEST_TIMEOUT, metadata=REQUEST_MD)
|
|
325
|
+
)
|
|
322
326
|
|
|
323
327
|
|
|
324
328
|
def get_instance(sdk: SDK, instance_id: str) -> Instance:
|
|
325
329
|
return LOOP.await_(
|
|
326
330
|
InstanceServiceClient(sdk).get(
|
|
327
|
-
GetInstanceRequest(id=instance_id),
|
|
331
|
+
GetInstanceRequest(id=instance_id),
|
|
332
|
+
per_retry_timeout=REQUEST_TIMEOUT,
|
|
333
|
+
metadata=REQUEST_MD,
|
|
328
334
|
)
|
|
329
335
|
)
|
|
330
336
|
|
|
@@ -332,7 +338,9 @@ def get_instance(sdk: SDK, instance_id: str) -> Instance:
|
|
|
332
338
|
def delete_instance(sdk: SDK, instance_id: str) -> SDKOperation[Operation]:
|
|
333
339
|
return LOOP.await_(
|
|
334
340
|
InstanceServiceClient(sdk).delete(
|
|
335
|
-
DeleteInstanceRequest(id=instance_id),
|
|
341
|
+
DeleteInstanceRequest(id=instance_id),
|
|
342
|
+
per_retry_timeout=REQUEST_TIMEOUT,
|
|
343
|
+
metadata=REQUEST_MD,
|
|
336
344
|
)
|
|
337
345
|
)
|
|
338
346
|
|
|
@@ -345,7 +353,7 @@ def create_cluster(sdk: SDK, name: str, project_id: str, fabric: str) -> SDKOper
|
|
|
345
353
|
metadata=ResourceMetadata(name=name, parent_id=project_id),
|
|
346
354
|
spec=GpuClusterSpec(infiniband_fabric=fabric),
|
|
347
355
|
),
|
|
348
|
-
|
|
356
|
+
per_retry_timeout=REQUEST_TIMEOUT,
|
|
349
357
|
metadata=REQUEST_MD,
|
|
350
358
|
)
|
|
351
359
|
)
|
|
@@ -354,6 +362,8 @@ def create_cluster(sdk: SDK, name: str, project_id: str, fabric: str) -> SDKOper
|
|
|
354
362
|
def delete_cluster(sdk: SDK, cluster_id: str) -> None:
|
|
355
363
|
return LOOP.await_(
|
|
356
364
|
GpuClusterServiceClient(sdk).delete(
|
|
357
|
-
DeleteGpuClusterRequest(id=cluster_id),
|
|
365
|
+
DeleteGpuClusterRequest(id=cluster_id),
|
|
366
|
+
per_retry_timeout=REQUEST_TIMEOUT,
|
|
367
|
+
metadata=REQUEST_MD,
|
|
358
368
|
)
|
|
359
369
|
)
|
|
@@ -135,11 +135,10 @@ class OCICompute(
|
|
|
135
135
|
security_group.id, region.virtual_network_client
|
|
136
136
|
)
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
cloud_init_user_data = get_user_data(instance_config.get_public_keys(), setup_commands)
|
|
138
|
+
cloud_init_user_data = get_user_data(
|
|
139
|
+
authorized_keys=instance_config.get_public_keys(),
|
|
140
|
+
firewall_allow_from_subnets=[resources.VCN_CIDR],
|
|
141
|
+
)
|
|
143
142
|
|
|
144
143
|
display_name = generate_unique_instance_name(instance_config)
|
|
145
144
|
try:
|
|
@@ -10,7 +10,6 @@ from dstack._internal.core.backends.oci import resources
|
|
|
10
10
|
from dstack._internal.core.backends.oci.backend import OCIBackend
|
|
11
11
|
from dstack._internal.core.backends.oci.exceptions import any_oci_exception
|
|
12
12
|
from dstack._internal.core.backends.oci.models import (
|
|
13
|
-
AnyOCIBackendConfig,
|
|
14
13
|
OCIBackendConfig,
|
|
15
14
|
OCIBackendConfigWithCreds,
|
|
16
15
|
OCIConfig,
|
|
@@ -42,7 +41,12 @@ SUPPORTED_REGIONS = frozenset(
|
|
|
42
41
|
)
|
|
43
42
|
|
|
44
43
|
|
|
45
|
-
class OCIConfigurator(
|
|
44
|
+
class OCIConfigurator(
|
|
45
|
+
Configurator[
|
|
46
|
+
OCIBackendConfig,
|
|
47
|
+
OCIBackendConfigWithCreds,
|
|
48
|
+
]
|
|
49
|
+
):
|
|
46
50
|
TYPE = BackendType.OCI
|
|
47
51
|
BACKEND_CLASS = OCIBackend
|
|
48
52
|
|
|
@@ -83,12 +87,12 @@ class OCIConfigurator(Configurator):
|
|
|
83
87
|
auth=OCICreds.parse_obj(config.creds).json(),
|
|
84
88
|
)
|
|
85
89
|
|
|
86
|
-
def
|
|
87
|
-
self
|
|
88
|
-
|
|
90
|
+
def get_backend_config_with_creds(self, record: BackendRecord) -> OCIBackendConfigWithCreds:
|
|
91
|
+
config = self._get_config(record)
|
|
92
|
+
return OCIBackendConfigWithCreds.__response__.parse_obj(config)
|
|
93
|
+
|
|
94
|
+
def get_backend_config_without_creds(self, record: BackendRecord) -> OCIBackendConfig:
|
|
89
95
|
config = self._get_config(record)
|
|
90
|
-
if include_creds:
|
|
91
|
-
return OCIBackendConfigWithCreds.__response__.parse_obj(config)
|
|
92
96
|
return OCIBackendConfig.__response__.parse_obj(config)
|
|
93
97
|
|
|
94
98
|
def get_backend(self, record: BackendRecord) -> OCIBackend:
|
|
@@ -8,7 +8,6 @@ from dstack._internal.core.backends.base.configurator import (
|
|
|
8
8
|
from dstack._internal.core.backends.runpod import api_client
|
|
9
9
|
from dstack._internal.core.backends.runpod.backend import RunpodBackend
|
|
10
10
|
from dstack._internal.core.backends.runpod.models import (
|
|
11
|
-
AnyRunpodBackendConfig,
|
|
12
11
|
RunpodBackendConfig,
|
|
13
12
|
RunpodBackendConfigWithCreds,
|
|
14
13
|
RunpodConfig,
|
|
@@ -18,7 +17,12 @@ from dstack._internal.core.backends.runpod.models import (
|
|
|
18
17
|
from dstack._internal.core.models.backends.base import BackendType
|
|
19
18
|
|
|
20
19
|
|
|
21
|
-
class RunpodConfigurator(
|
|
20
|
+
class RunpodConfigurator(
|
|
21
|
+
Configurator[
|
|
22
|
+
RunpodBackendConfig,
|
|
23
|
+
RunpodBackendConfigWithCreds,
|
|
24
|
+
]
|
|
25
|
+
):
|
|
22
26
|
TYPE = BackendType.RUNPOD
|
|
23
27
|
BACKEND_CLASS = RunpodBackend
|
|
24
28
|
|
|
@@ -35,12 +39,12 @@ class RunpodConfigurator(Configurator):
|
|
|
35
39
|
auth=RunpodCreds.parse_obj(config.creds).json(),
|
|
36
40
|
)
|
|
37
41
|
|
|
38
|
-
def
|
|
39
|
-
self
|
|
40
|
-
|
|
42
|
+
def get_backend_config_with_creds(self, record: BackendRecord) -> RunpodBackendConfigWithCreds:
|
|
43
|
+
config = self._get_config(record)
|
|
44
|
+
return RunpodBackendConfigWithCreds.__response__.parse_obj(config)
|
|
45
|
+
|
|
46
|
+
def get_backend_config_without_creds(self, record: BackendRecord) -> RunpodBackendConfig:
|
|
41
47
|
config = self._get_config(record)
|
|
42
|
-
if include_creds:
|
|
43
|
-
return RunpodBackendConfigWithCreds.__response__.parse_obj(config)
|
|
44
48
|
return RunpodBackendConfig.__response__.parse_obj(config)
|
|
45
49
|
|
|
46
50
|
def get_backend(self, record: BackendRecord) -> RunpodBackend:
|
|
@@ -7,7 +7,6 @@ from dstack._internal.core.backends.base.configurator import (
|
|
|
7
7
|
)
|
|
8
8
|
from dstack._internal.core.backends.{{ backend_name|lower }}.backend import {{ backend_name }}Backend
|
|
9
9
|
from dstack._internal.core.backends.{{ backend_name|lower }}.models import (
|
|
10
|
-
Any{{ backend_name }}BackendConfig,
|
|
11
10
|
Any{{ backend_name }}Creds,
|
|
12
11
|
{{ backend_name }}BackendConfig,
|
|
13
12
|
{{ backend_name }}BackendConfigWithCreds,
|
|
@@ -20,7 +19,12 @@ from dstack._internal.core.models.backends.base import (
|
|
|
20
19
|
)
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
class {{ backend_name }}Configurator(
|
|
22
|
+
class {{ backend_name }}Configurator(
|
|
23
|
+
Configurator[
|
|
24
|
+
{{ backend_name }}BackendConfig,
|
|
25
|
+
{{ backend_name }}BackendConfigWithCreds,
|
|
26
|
+
]
|
|
27
|
+
):
|
|
24
28
|
TYPE = BackendType.{{ backend_name|upper }}
|
|
25
29
|
BACKEND_CLASS = {{ backend_name }}Backend
|
|
26
30
|
|
|
@@ -40,12 +44,12 @@ class {{ backend_name }}Configurator(Configurator):
|
|
|
40
44
|
auth={{ backend_name }}Creds.parse_obj(config.creds).json(),
|
|
41
45
|
)
|
|
42
46
|
|
|
43
|
-
def
|
|
44
|
-
self
|
|
45
|
-
|
|
47
|
+
def get_backend_config_with_creds(self, record: BackendRecord) -> {{ backend_name }}BackendConfigWithCreds:
|
|
48
|
+
config = self._get_config(record)
|
|
49
|
+
return {{ backend_name }}BackendConfigWithCreds.__response__.parse_obj(config)
|
|
50
|
+
|
|
51
|
+
def get_backend_config_without_creds(self, record: BackendRecord) -> {{ backend_name }}BackendConfig:
|
|
46
52
|
config = self._get_config(record)
|
|
47
|
-
if include_creds:
|
|
48
|
-
return {{ backend_name }}BackendConfigWithCreds.__response__.parse_obj(config)
|
|
49
53
|
return {{ backend_name }}BackendConfig.__response__.parse_obj(config)
|
|
50
54
|
|
|
51
55
|
def get_backend(self, record: BackendRecord) -> {{ backend_name }}Backend:
|
|
@@ -8,7 +8,6 @@ from dstack._internal.core.backends.base.configurator import (
|
|
|
8
8
|
from dstack._internal.core.backends.tensordock import api_client
|
|
9
9
|
from dstack._internal.core.backends.tensordock.backend import TensorDockBackend
|
|
10
10
|
from dstack._internal.core.backends.tensordock.models import (
|
|
11
|
-
AnyTensorDockBackendConfig,
|
|
12
11
|
TensorDockBackendConfig,
|
|
13
12
|
TensorDockBackendConfigWithCreds,
|
|
14
13
|
TensorDockConfig,
|
|
@@ -23,7 +22,12 @@ from dstack._internal.core.models.backends.base import (
|
|
|
23
22
|
REGIONS = []
|
|
24
23
|
|
|
25
24
|
|
|
26
|
-
class TensorDockConfigurator(
|
|
25
|
+
class TensorDockConfigurator(
|
|
26
|
+
Configurator[
|
|
27
|
+
TensorDockBackendConfig,
|
|
28
|
+
TensorDockBackendConfigWithCreds,
|
|
29
|
+
]
|
|
30
|
+
):
|
|
27
31
|
TYPE = BackendType.TENSORDOCK
|
|
28
32
|
BACKEND_CLASS = TensorDockBackend
|
|
29
33
|
|
|
@@ -44,12 +48,14 @@ class TensorDockConfigurator(Configurator):
|
|
|
44
48
|
auth=TensorDockCreds.parse_obj(config.creds).json(),
|
|
45
49
|
)
|
|
46
50
|
|
|
47
|
-
def
|
|
48
|
-
self, record: BackendRecord
|
|
49
|
-
) ->
|
|
51
|
+
def get_backend_config_with_creds(
|
|
52
|
+
self, record: BackendRecord
|
|
53
|
+
) -> TensorDockBackendConfigWithCreds:
|
|
54
|
+
config = self._get_config(record)
|
|
55
|
+
return TensorDockBackendConfigWithCreds.__response__.parse_obj(config)
|
|
56
|
+
|
|
57
|
+
def get_backend_config_without_creds(self, record: BackendRecord) -> TensorDockBackendConfig:
|
|
50
58
|
config = self._get_config(record)
|
|
51
|
-
if include_creds:
|
|
52
|
-
return TensorDockBackendConfigWithCreds.__response__.parse_obj(config)
|
|
53
59
|
return TensorDockBackendConfig.__response__.parse_obj(config)
|
|
54
60
|
|
|
55
61
|
def get_backend(self, record: BackendRecord) -> TensorDockBackend:
|
|
@@ -8,7 +8,6 @@ from dstack._internal.core.backends.base.configurator import (
|
|
|
8
8
|
from dstack._internal.core.backends.vastai import api_client
|
|
9
9
|
from dstack._internal.core.backends.vastai.backend import VastAIBackend
|
|
10
10
|
from dstack._internal.core.backends.vastai.models import (
|
|
11
|
-
AnyVastAIBackendConfig,
|
|
12
11
|
VastAIBackendConfig,
|
|
13
12
|
VastAIBackendConfigWithCreds,
|
|
14
13
|
VastAIConfig,
|
|
@@ -23,7 +22,12 @@ from dstack._internal.core.models.backends.base import (
|
|
|
23
22
|
REGIONS = []
|
|
24
23
|
|
|
25
24
|
|
|
26
|
-
class VastAIConfigurator(
|
|
25
|
+
class VastAIConfigurator(
|
|
26
|
+
Configurator[
|
|
27
|
+
VastAIBackendConfig,
|
|
28
|
+
VastAIBackendConfigWithCreds,
|
|
29
|
+
]
|
|
30
|
+
):
|
|
27
31
|
TYPE = BackendType.VASTAI
|
|
28
32
|
BACKEND_CLASS = VastAIBackend
|
|
29
33
|
|
|
@@ -42,12 +46,12 @@ class VastAIConfigurator(Configurator):
|
|
|
42
46
|
auth=VastAICreds.parse_obj(config.creds).json(),
|
|
43
47
|
)
|
|
44
48
|
|
|
45
|
-
def
|
|
46
|
-
self
|
|
47
|
-
|
|
49
|
+
def get_backend_config_with_creds(self, record: BackendRecord) -> VastAIBackendConfigWithCreds:
|
|
50
|
+
config = self._get_config(record)
|
|
51
|
+
return VastAIBackendConfigWithCreds.__response__.parse_obj(config)
|
|
52
|
+
|
|
53
|
+
def get_backend_config_without_creds(self, record: BackendRecord) -> VastAIBackendConfig:
|
|
48
54
|
config = self._get_config(record)
|
|
49
|
-
if include_creds:
|
|
50
|
-
return VastAIBackendConfigWithCreds.__response__.parse_obj(config)
|
|
51
55
|
return VastAIBackendConfig.__response__.parse_obj(config)
|
|
52
56
|
|
|
53
57
|
def get_backend(self, record: BackendRecord) -> VastAIBackend:
|
|
@@ -75,17 +75,13 @@ class VultrCompute(
|
|
|
75
75
|
subnet = vpc["v4_subnet"]
|
|
76
76
|
subnet_mask = vpc["v4_subnet_mask"]
|
|
77
77
|
|
|
78
|
-
setup_commands = [
|
|
79
|
-
f"sudo ufw allow from {subnet}/{subnet_mask}",
|
|
80
|
-
"sudo ufw reload",
|
|
81
|
-
]
|
|
82
78
|
instance_id = self.api_client.launch_instance(
|
|
83
79
|
region=instance_offer.region,
|
|
84
80
|
label=instance_name,
|
|
85
81
|
plan=instance_offer.instance.name,
|
|
86
82
|
user_data=get_user_data(
|
|
87
83
|
authorized_keys=instance_config.get_public_keys(),
|
|
88
|
-
|
|
84
|
+
firewall_allow_from_subnets=[f"{subnet}/{subnet_mask}"],
|
|
89
85
|
),
|
|
90
86
|
vpc_id=vpc["id"],
|
|
91
87
|
)
|
|
@@ -23,7 +23,12 @@ from dstack._internal.core.models.backends.base import (
|
|
|
23
23
|
REGIONS = []
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class VultrConfigurator(
|
|
26
|
+
class VultrConfigurator(
|
|
27
|
+
Configurator[
|
|
28
|
+
VultrBackendConfig,
|
|
29
|
+
VultrBackendConfigWithCreds,
|
|
30
|
+
]
|
|
31
|
+
):
|
|
27
32
|
TYPE = BackendType.VULTR
|
|
28
33
|
BACKEND_CLASS = VultrBackend
|
|
29
34
|
|
|
@@ -42,10 +47,12 @@ class VultrConfigurator(Configurator):
|
|
|
42
47
|
auth=VultrCreds.parse_obj(config.creds).json(),
|
|
43
48
|
)
|
|
44
49
|
|
|
45
|
-
def
|
|
50
|
+
def get_backend_config_with_creds(self, record: BackendRecord) -> VultrBackendConfigWithCreds:
|
|
51
|
+
config = self._get_config(record)
|
|
52
|
+
return VultrBackendConfigWithCreds.__response__.parse_obj(config)
|
|
53
|
+
|
|
54
|
+
def get_backend_config_without_creds(self, record: BackendRecord) -> VultrBackendConfig:
|
|
46
55
|
config = self._get_config(record)
|
|
47
|
-
if include_creds:
|
|
48
|
-
return VultrBackendConfigWithCreds.__response__.parse_obj(config)
|
|
49
56
|
return VultrBackendConfig.__response__.parse_obj(config)
|
|
50
57
|
|
|
51
58
|
def get_backend(self, record: BackendRecord) -> VultrBackend:
|
|
@@ -59,6 +59,11 @@ def get_fleet_spec_excludes(fleet_spec: FleetSpec) -> Optional[IncludeExcludeDic
|
|
|
59
59
|
profile_excludes.add("stop_criteria")
|
|
60
60
|
if profile.schedule is None:
|
|
61
61
|
profile_excludes.add("schedule")
|
|
62
|
+
if (
|
|
63
|
+
fleet_spec.configuration.nodes
|
|
64
|
+
and fleet_spec.configuration.nodes.min == fleet_spec.configuration.nodes.target
|
|
65
|
+
):
|
|
66
|
+
configuration_excludes["nodes"] = {"target"}
|
|
62
67
|
if configuration_excludes:
|
|
63
68
|
spec_excludes["configuration"] = configuration_excludes
|
|
64
69
|
if profile_excludes:
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from dstack._internal.core.compatibility.runs import get_run_spec_excludes
|
|
4
|
+
from dstack._internal.core.models.common import IncludeExcludeDictType
|
|
5
|
+
from dstack._internal.server.schemas.gpus import ListGpusRequest
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_list_gpus_excludes(request: ListGpusRequest) -> Optional[IncludeExcludeDictType]:
|
|
9
|
+
list_gpus_excludes: IncludeExcludeDictType = {}
|
|
10
|
+
run_spec_excludes = get_run_spec_excludes(request.run_spec)
|
|
11
|
+
if run_spec_excludes is not None:
|
|
12
|
+
list_gpus_excludes["run_spec"] = run_spec_excludes
|
|
13
|
+
return list_gpus_excludes
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
3
|
from dstack._internal.core.models.common import IncludeExcludeDictType, IncludeExcludeSetType
|
|
4
|
-
from dstack._internal.core.models.configurations import ServiceConfiguration
|
|
4
|
+
from dstack._internal.core.models.configurations import LEGACY_REPO_DIR, ServiceConfiguration
|
|
5
5
|
from dstack._internal.core.models.runs import ApplyRunPlanInput, JobSpec, JobSubmission, RunSpec
|
|
6
6
|
from dstack._internal.server.schemas.runs import GetRunPlanRequest, ListRunsRequest
|
|
7
7
|
|
|
@@ -102,6 +102,11 @@ def get_run_spec_excludes(run_spec: RunSpec) -> IncludeExcludeDictType:
|
|
|
102
102
|
configuration = run_spec.configuration
|
|
103
103
|
profile = run_spec.profile
|
|
104
104
|
|
|
105
|
+
if run_spec.repo_dir in [None, LEGACY_REPO_DIR]:
|
|
106
|
+
spec_excludes["repo_dir"] = True
|
|
107
|
+
elif run_spec.repo_dir == "." and configuration.working_dir in [None, LEGACY_REPO_DIR, "."]:
|
|
108
|
+
spec_excludes["repo_dir"] = True
|
|
109
|
+
|
|
105
110
|
if configuration.fleets is None:
|
|
106
111
|
configuration_excludes["fleets"] = True
|
|
107
112
|
if profile is not None and profile.fleets is None:
|
|
@@ -136,6 +141,7 @@ def get_run_spec_excludes(run_spec: RunSpec) -> IncludeExcludeDictType:
|
|
|
136
141
|
configuration_excludes["schedule"] = True
|
|
137
142
|
if profile is not None and profile.schedule is None:
|
|
138
143
|
profile_excludes.add("schedule")
|
|
144
|
+
configuration_excludes["repos"] = True
|
|
139
145
|
|
|
140
146
|
if configuration_excludes:
|
|
141
147
|
spec_excludes["configuration"] = configuration_excludes
|
|
@@ -162,6 +168,8 @@ def get_job_spec_excludes(job_specs: list[JobSpec]) -> IncludeExcludeDictType:
|
|
|
162
168
|
spec_excludes["service_port"] = True
|
|
163
169
|
if all(not s.probes for s in job_specs):
|
|
164
170
|
spec_excludes["probes"] = True
|
|
171
|
+
if all(s.repo_dir in [None, LEGACY_REPO_DIR] for s in job_specs):
|
|
172
|
+
spec_excludes["repo_dir"] = True
|
|
165
173
|
|
|
166
174
|
return spec_excludes
|
|
167
175
|
|
|
@@ -4,13 +4,15 @@ import enum
|
|
|
4
4
|
class BackendType(str, enum.Enum):
|
|
5
5
|
"""
|
|
6
6
|
Attributes:
|
|
7
|
+
AMDDEVCLOUD (BackendType): AMD Developer Cloud
|
|
7
8
|
AWS (BackendType): Amazon Web Services
|
|
8
9
|
AZURE (BackendType): Microsoft Azure
|
|
9
10
|
CLOUDRIFT (BackendType): CloudRift
|
|
10
11
|
CUDO (BackendType): Cudo
|
|
12
|
+
DATACRUNCH (BackendType): DataCrunch
|
|
13
|
+
DIGITALOCEAN (BackendType): DigitalOcean
|
|
11
14
|
DSTACK (BackendType): dstack Sky
|
|
12
15
|
GCP (BackendType): Google Cloud Platform
|
|
13
|
-
DATACRUNCH (BackendType): DataCrunch
|
|
14
16
|
HOTAISLE (BackendType): Hot Aisle
|
|
15
17
|
KUBERNETES (BackendType): Kubernetes
|
|
16
18
|
LAMBDA (BackendType): Lambda Cloud
|
|
@@ -22,11 +24,13 @@ class BackendType(str, enum.Enum):
|
|
|
22
24
|
VULTR (BackendType): Vultr
|
|
23
25
|
"""
|
|
24
26
|
|
|
27
|
+
AMDDEVCLOUD = "amddevcloud"
|
|
25
28
|
AWS = "aws"
|
|
26
29
|
AZURE = "azure"
|
|
27
30
|
CLOUDRIFT = "cloudrift"
|
|
28
31
|
CUDO = "cudo"
|
|
29
32
|
DATACRUNCH = "datacrunch"
|
|
33
|
+
DIGITALOCEAN = "digitalocean"
|
|
30
34
|
DSTACK = "dstack"
|
|
31
35
|
GCP = "gcp"
|
|
32
36
|
HOTAISLE = "hotaisle"
|
|
@@ -102,12 +102,12 @@ class RegistryAuth(CoreModel):
|
|
|
102
102
|
password (str): The password or access token
|
|
103
103
|
"""
|
|
104
104
|
|
|
105
|
-
class Config(CoreModel.Config):
|
|
106
|
-
frozen = True
|
|
107
|
-
|
|
108
105
|
username: Annotated[str, Field(description="The username")]
|
|
109
106
|
password: Annotated[str, Field(description="The password or access token")]
|
|
110
107
|
|
|
108
|
+
class Config(CoreModel.Config):
|
|
109
|
+
frozen = True
|
|
110
|
+
|
|
111
111
|
|
|
112
112
|
class ApplyAction(str, Enum):
|
|
113
113
|
CREATE = "create" # resource is to be created or overridden
|