dstack 0.18.44__py3-none-any.whl → 0.19.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/gateway.py +15 -3
- dstack/_internal/cli/commands/logs.py +0 -22
- dstack/_internal/cli/commands/stats.py +8 -17
- dstack/_internal/cli/main.py +1 -5
- dstack/_internal/cli/services/configurators/fleet.py +4 -39
- dstack/_internal/cli/services/configurators/run.py +22 -21
- dstack/_internal/cli/services/profile.py +34 -83
- dstack/_internal/cli/utils/gateway.py +1 -1
- dstack/_internal/core/backends/__init__.py +56 -39
- dstack/_internal/core/backends/aws/__init__.py +0 -25
- dstack/_internal/core/backends/aws/auth.py +1 -10
- dstack/_internal/core/backends/aws/backend.py +26 -0
- dstack/_internal/core/backends/aws/compute.py +20 -45
- dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
- dstack/_internal/core/backends/aws/models.py +135 -0
- dstack/_internal/core/backends/aws/resources.py +1 -1
- dstack/_internal/core/backends/azure/__init__.py +0 -20
- dstack/_internal/core/backends/azure/auth.py +2 -11
- dstack/_internal/core/backends/azure/backend.py +21 -0
- dstack/_internal/core/backends/azure/compute.py +13 -27
- dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
- dstack/_internal/core/backends/azure/models.py +89 -0
- dstack/_internal/core/backends/base/__init__.py +0 -12
- dstack/_internal/core/backends/base/backend.py +18 -0
- dstack/_internal/core/backends/base/compute.py +153 -33
- dstack/_internal/core/backends/base/configurator.py +105 -0
- dstack/_internal/core/backends/base/models.py +14 -0
- dstack/_internal/core/backends/configurators.py +138 -0
- dstack/_internal/core/backends/cudo/__init__.py +0 -15
- dstack/_internal/core/backends/cudo/backend.py +16 -0
- dstack/_internal/core/backends/cudo/compute.py +8 -26
- dstack/_internal/core/backends/cudo/configurator.py +72 -0
- dstack/_internal/core/backends/cudo/models.py +37 -0
- dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
- dstack/_internal/core/backends/datacrunch/backend.py +16 -0
- dstack/_internal/core/backends/datacrunch/compute.py +8 -25
- dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
- dstack/_internal/core/backends/datacrunch/models.py +38 -0
- dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
- dstack/_internal/core/backends/gcp/__init__.py +0 -16
- dstack/_internal/core/backends/gcp/auth.py +2 -11
- dstack/_internal/core/backends/gcp/backend.py +17 -0
- dstack/_internal/core/backends/gcp/compute.py +13 -43
- dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
- dstack/_internal/core/backends/gcp/models.py +125 -0
- dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
- dstack/_internal/core/backends/kubernetes/backend.py +16 -0
- dstack/_internal/core/backends/kubernetes/compute.py +16 -5
- dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
- dstack/_internal/core/backends/kubernetes/models.py +72 -0
- dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
- dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
- dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
- dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
- dstack/_internal/core/backends/lambdalabs/models.py +37 -0
- dstack/_internal/core/backends/local/__init__.py +0 -13
- dstack/_internal/core/backends/local/backend.py +14 -0
- dstack/_internal/core/backends/local/compute.py +16 -2
- dstack/_internal/core/backends/models.py +128 -0
- dstack/_internal/core/backends/oci/__init__.py +0 -15
- dstack/_internal/core/backends/oci/auth.py +1 -5
- dstack/_internal/core/backends/oci/backend.py +16 -0
- dstack/_internal/core/backends/oci/compute.py +9 -23
- dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
- dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
- dstack/_internal/core/backends/oci/region.py +1 -1
- dstack/_internal/core/backends/runpod/__init__.py +0 -15
- dstack/_internal/core/backends/runpod/backend.py +16 -0
- dstack/_internal/core/backends/runpod/compute.py +7 -3
- dstack/_internal/core/backends/runpod/configurator.py +59 -0
- dstack/_internal/core/backends/runpod/models.py +54 -0
- dstack/_internal/core/backends/template/__init__.py +0 -0
- dstack/_internal/core/backends/tensordock/__init__.py +0 -15
- dstack/_internal/core/backends/tensordock/backend.py +16 -0
- dstack/_internal/core/backends/tensordock/compute.py +8 -27
- dstack/_internal/core/backends/tensordock/configurator.py +68 -0
- dstack/_internal/core/backends/tensordock/models.py +38 -0
- dstack/_internal/core/backends/vastai/__init__.py +0 -15
- dstack/_internal/core/backends/vastai/backend.py +16 -0
- dstack/_internal/core/backends/vastai/compute.py +2 -2
- dstack/_internal/core/backends/vastai/configurator.py +66 -0
- dstack/_internal/core/backends/vastai/models.py +37 -0
- dstack/_internal/core/backends/vultr/__init__.py +0 -15
- dstack/_internal/core/backends/vultr/backend.py +16 -0
- dstack/_internal/core/backends/vultr/compute.py +10 -24
- dstack/_internal/core/backends/vultr/configurator.py +64 -0
- dstack/_internal/core/backends/vultr/models.py +34 -0
- dstack/_internal/core/models/backends/__init__.py +0 -184
- dstack/_internal/core/models/backends/base.py +0 -19
- dstack/_internal/core/models/configurations.py +20 -15
- dstack/_internal/core/models/envs.py +4 -3
- dstack/_internal/core/models/fleets.py +17 -22
- dstack/_internal/core/models/gateways.py +3 -3
- dstack/_internal/core/models/instances.py +24 -0
- dstack/_internal/core/models/profiles.py +41 -46
- dstack/_internal/core/models/projects.py +1 -1
- dstack/_internal/core/models/repos/base.py +0 -5
- dstack/_internal/core/models/repos/local.py +3 -3
- dstack/_internal/core/models/repos/remote.py +26 -12
- dstack/_internal/core/models/repos/virtual.py +1 -1
- dstack/_internal/core/models/resources.py +45 -76
- dstack/_internal/core/models/runs.py +17 -19
- dstack/_internal/core/models/volumes.py +1 -3
- dstack/_internal/core/services/profiles.py +7 -16
- dstack/_internal/core/services/repos.py +0 -4
- dstack/_internal/server/app.py +0 -3
- dstack/_internal/server/background/tasks/process_gateways.py +4 -8
- dstack/_internal/server/background/tasks/process_instances.py +14 -9
- dstack/_internal/server/background/tasks/process_metrics.py +1 -1
- dstack/_internal/server/background/tasks/process_placement_groups.py +4 -1
- dstack/_internal/server/background/tasks/process_prometheus_metrics.py +1 -1
- dstack/_internal/server/background/tasks/process_running_jobs.py +14 -5
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +16 -37
- dstack/_internal/server/background/tasks/process_volumes.py +5 -2
- dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
- dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
- dstack/_internal/server/models.py +48 -9
- dstack/_internal/server/routers/backends.py +14 -23
- dstack/_internal/server/routers/instances.py +3 -4
- dstack/_internal/server/routers/metrics.py +10 -8
- dstack/_internal/server/routers/prometheus.py +1 -1
- dstack/_internal/server/routers/repos.py +1 -2
- dstack/_internal/server/routers/runs.py +13 -59
- dstack/_internal/server/schemas/gateways.py +14 -23
- dstack/_internal/server/schemas/projects.py +7 -2
- dstack/_internal/server/schemas/repos.py +2 -38
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/schemas/runs.py +1 -24
- dstack/_internal/server/services/backends/__init__.py +85 -158
- dstack/_internal/server/services/config.py +52 -576
- dstack/_internal/server/services/fleets.py +8 -103
- dstack/_internal/server/services/gateways/__init__.py +12 -4
- dstack/_internal/server/services/{pools.py → instances.py} +22 -329
- dstack/_internal/server/services/jobs/__init__.py +9 -6
- dstack/_internal/server/services/jobs/configurators/base.py +16 -0
- dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
- dstack/_internal/server/services/metrics.py +39 -13
- dstack/_internal/server/services/offers.py +1 -1
- dstack/_internal/server/services/projects.py +23 -14
- dstack/_internal/server/services/prometheus.py +176 -18
- dstack/_internal/server/services/runs.py +24 -16
- dstack/_internal/server/services/volumes.py +8 -4
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-4eb116b97819badd1e2c.js → main-4fd5a4770eff59325ee3.js} +7 -7
- dstack/_internal/server/statics/{main-4eb116b97819badd1e2c.js.map → main-4fd5a4770eff59325ee3.js.map} +1 -1
- dstack/_internal/server/testing/common.py +58 -32
- dstack/_internal/utils/json_schema.py +6 -0
- dstack/_internal/utils/ssh.py +2 -1
- dstack/api/__init__.py +4 -0
- dstack/api/_public/__init__.py +16 -20
- dstack/api/_public/backends.py +1 -1
- dstack/api/_public/repos.py +36 -36
- dstack/api/_public/runs.py +167 -83
- dstack/api/server/__init__.py +11 -13
- dstack/api/server/_backends.py +12 -16
- dstack/api/server/_fleets.py +15 -57
- dstack/api/server/_gateways.py +3 -14
- dstack/api/server/_repos.py +1 -4
- dstack/api/server/_runs.py +21 -100
- dstack/api/server/_volumes.py +10 -5
- dstack/version.py +1 -1
- {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/METADATA +1 -1
- {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/RECORD +218 -204
- tests/_internal/cli/services/configurators/test_profile.py +6 -6
- tests/_internal/core/backends/aws/test_configurator.py +35 -0
- tests/_internal/core/backends/aws/test_resources.py +1 -1
- tests/_internal/core/backends/azure/test_configurator.py +61 -0
- tests/_internal/core/backends/cudo/__init__.py +0 -0
- tests/_internal/core/backends/cudo/test_configurator.py +37 -0
- tests/_internal/core/backends/datacrunch/__init__.py +0 -0
- tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
- tests/_internal/core/backends/gcp/test_configurator.py +42 -0
- tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
- tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
- tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
- tests/_internal/core/backends/oci/test_configurator.py +55 -0
- tests/_internal/core/backends/runpod/__init__.py +0 -0
- tests/_internal/core/backends/runpod/test_configurator.py +33 -0
- tests/_internal/core/backends/tensordock/__init__.py +0 -0
- tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
- tests/_internal/core/backends/vastai/__init__.py +0 -0
- tests/_internal/core/backends/vastai/test_configurator.py +33 -0
- tests/_internal/core/backends/vultr/__init__.py +0 -0
- tests/_internal/core/backends/vultr/test_configurator.py +33 -0
- tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
- tests/_internal/server/background/tasks/test_process_instances.py +49 -48
- tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
- tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
- tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +0 -3
- tests/_internal/server/background/tasks/test_process_running_jobs.py +0 -21
- tests/_internal/server/background/tasks/test_process_runs.py +8 -22
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
- tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
- tests/_internal/server/routers/test_backends.py +6 -764
- tests/_internal/server/routers/test_fleets.py +0 -26
- tests/_internal/server/routers/test_gateways.py +27 -3
- tests/_internal/server/routers/test_instances.py +0 -10
- tests/_internal/server/routers/test_metrics.py +27 -0
- tests/_internal/server/routers/test_projects.py +56 -0
- tests/_internal/server/routers/test_prometheus.py +116 -27
- tests/_internal/server/routers/test_repos.py +0 -15
- tests/_internal/server/routers/test_runs.py +4 -219
- tests/_internal/server/routers/test_volumes.py +2 -3
- tests/_internal/server/services/backends/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
- tests/_internal/server/services/test_config.py +7 -4
- tests/_internal/server/services/test_fleets.py +1 -4
- tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
- tests/_internal/server/services/test_metrics.py +9 -5
- tests/_internal/server/services/test_repos.py +1 -14
- tests/_internal/server/services/test_runs.py +0 -4
- dstack/_internal/cli/commands/pool.py +0 -581
- dstack/_internal/cli/commands/run.py +0 -75
- dstack/_internal/core/backends/aws/config.py +0 -18
- dstack/_internal/core/backends/azure/config.py +0 -12
- dstack/_internal/core/backends/base/config.py +0 -5
- dstack/_internal/core/backends/cudo/config.py +0 -9
- dstack/_internal/core/backends/datacrunch/config.py +0 -9
- dstack/_internal/core/backends/gcp/config.py +0 -22
- dstack/_internal/core/backends/kubernetes/config.py +0 -6
- dstack/_internal/core/backends/lambdalabs/config.py +0 -9
- dstack/_internal/core/backends/nebius/__init__.py +0 -15
- dstack/_internal/core/backends/nebius/api_client.py +0 -319
- dstack/_internal/core/backends/nebius/compute.py +0 -220
- dstack/_internal/core/backends/nebius/config.py +0 -6
- dstack/_internal/core/backends/nebius/types.py +0 -37
- dstack/_internal/core/backends/oci/config.py +0 -6
- dstack/_internal/core/backends/runpod/config.py +0 -17
- dstack/_internal/core/backends/tensordock/config.py +0 -9
- dstack/_internal/core/backends/vastai/config.py +0 -6
- dstack/_internal/core/backends/vultr/config.py +0 -9
- dstack/_internal/core/models/backends/aws.py +0 -86
- dstack/_internal/core/models/backends/azure.py +0 -68
- dstack/_internal/core/models/backends/cudo.py +0 -43
- dstack/_internal/core/models/backends/datacrunch.py +0 -44
- dstack/_internal/core/models/backends/gcp.py +0 -67
- dstack/_internal/core/models/backends/kubernetes.py +0 -40
- dstack/_internal/core/models/backends/lambdalabs.py +0 -43
- dstack/_internal/core/models/backends/nebius.py +0 -54
- dstack/_internal/core/models/backends/runpod.py +0 -42
- dstack/_internal/core/models/backends/tensordock.py +0 -44
- dstack/_internal/core/models/backends/vastai.py +0 -43
- dstack/_internal/core/models/backends/vultr.py +0 -40
- dstack/_internal/core/models/pools.py +0 -43
- dstack/_internal/server/routers/pools.py +0 -142
- dstack/_internal/server/schemas/pools.py +0 -38
- dstack/_internal/server/services/backends/configurators/base.py +0 -72
- dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
- dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
- dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
- dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
- dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
- dstack/_internal/server/services/backends/configurators/runpod.py +0 -67
- dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
- dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
- dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
- dstack/api/_public/pools.py +0 -41
- dstack/api/_public/resources.py +0 -105
- dstack/api/server/_pools.py +0 -63
- tests/_internal/server/routers/test_pools.py +0 -612
- /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
- {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/WHEEL +0 -0
- {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.44.dist-info → dstack-0.19.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -25,6 +25,7 @@ from dstack._internal.core.models.gateways import (
|
|
|
25
25
|
from dstack._internal.core.models.instances import (
|
|
26
26
|
InstanceConfiguration,
|
|
27
27
|
InstanceOfferWithAvailability,
|
|
28
|
+
SSHKey,
|
|
28
29
|
)
|
|
29
30
|
from dstack._internal.core.models.placement import PlacementGroup, PlacementGroupProvisioningData
|
|
30
31
|
from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
|
|
@@ -46,6 +47,11 @@ DSTACK_RUNNER_BINARY_PATH = f"/usr/local/bin/{DSTACK_RUNNER_BINARY_NAME}"
|
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
class Compute(ABC):
|
|
50
|
+
"""
|
|
51
|
+
A base class for all compute implementations with minimal features.
|
|
52
|
+
If a compute supports additional features, it must also subclass `ComputeWith*` classes.
|
|
53
|
+
"""
|
|
54
|
+
|
|
49
55
|
def __init__(self):
|
|
50
56
|
self._offers_cache_lock = threading.Lock()
|
|
51
57
|
self._offers_cache = TTLCache(maxsize=5, ttl=30)
|
|
@@ -54,6 +60,11 @@ class Compute(ABC):
|
|
|
54
60
|
def get_offers(
|
|
55
61
|
self, requirements: Optional[Requirements] = None
|
|
56
62
|
) -> List[InstanceOfferWithAvailability]:
|
|
63
|
+
"""
|
|
64
|
+
Returns offers with availability matching `requirements`.
|
|
65
|
+
If the provider is added to gpuhunt, typically gets offers using `base.offers.get_catalog_offers()`
|
|
66
|
+
and extends them with availability info.
|
|
67
|
+
"""
|
|
57
68
|
pass
|
|
58
69
|
|
|
59
70
|
@abstractmethod
|
|
@@ -86,6 +97,47 @@ class Compute(ABC):
|
|
|
86
97
|
"""
|
|
87
98
|
pass
|
|
88
99
|
|
|
100
|
+
def update_provisioning_data(
|
|
101
|
+
self,
|
|
102
|
+
provisioning_data: JobProvisioningData,
|
|
103
|
+
project_ssh_public_key: str,
|
|
104
|
+
project_ssh_private_key: str,
|
|
105
|
+
):
|
|
106
|
+
"""
|
|
107
|
+
This method is called if `JobProvisioningData` returned from `run_job()`/`create_instance()`
|
|
108
|
+
is not complete, e.g. missing `hostname` or `ssh_port`.
|
|
109
|
+
It can be used if getting complete provisioning data takes a long of time.
|
|
110
|
+
It should not wait but return immediately.
|
|
111
|
+
If it raises `ProvisioningError`, there will be no further attempts to update the provisioning data,
|
|
112
|
+
and the run will be terminated.
|
|
113
|
+
"""
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
def _get_offers_cached_key(self, requirements: Optional[Requirements] = None) -> int:
|
|
117
|
+
# Requirements is not hashable, so we use a hack to get arguments hash
|
|
118
|
+
if requirements is None:
|
|
119
|
+
return hash(None)
|
|
120
|
+
return hash(requirements.json())
|
|
121
|
+
|
|
122
|
+
@cachedmethod(
|
|
123
|
+
cache=lambda self: self._offers_cache,
|
|
124
|
+
key=_get_offers_cached_key,
|
|
125
|
+
lock=lambda self: self._offers_cache_lock,
|
|
126
|
+
)
|
|
127
|
+
def get_offers_cached(
|
|
128
|
+
self, requirements: Optional[Requirements] = None
|
|
129
|
+
) -> List[InstanceOfferWithAvailability]:
|
|
130
|
+
return self.get_offers(requirements)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class ComputeWithCreateInstanceSupport(ABC):
|
|
134
|
+
"""
|
|
135
|
+
Must be subclassed and implemented to support fleets (instance creation without running a job).
|
|
136
|
+
Typically, a compute that runs VMs would implement it,
|
|
137
|
+
and a compute that runs containers would not.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
@abstractmethod
|
|
89
141
|
def create_instance(
|
|
90
142
|
self,
|
|
91
143
|
instance_offer: InstanceOfferWithAvailability,
|
|
@@ -96,24 +148,77 @@ class Compute(ABC):
|
|
|
96
148
|
If required to wait to get the IP address or SSH port, return partially filled `JobProvisioningData`
|
|
97
149
|
and implement `update_provisioning_data()`.
|
|
98
150
|
"""
|
|
99
|
-
|
|
151
|
+
pass
|
|
100
152
|
|
|
101
|
-
def
|
|
153
|
+
def run_job(
|
|
102
154
|
self,
|
|
103
|
-
|
|
155
|
+
run: Run,
|
|
156
|
+
job: Job,
|
|
157
|
+
instance_offer: InstanceOfferWithAvailability,
|
|
104
158
|
project_ssh_public_key: str,
|
|
105
159
|
project_ssh_private_key: str,
|
|
106
|
-
|
|
160
|
+
volumes: List[Volume],
|
|
161
|
+
) -> JobProvisioningData:
|
|
107
162
|
"""
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
It can be used if getting complete provisioning data takes a long of time.
|
|
111
|
-
It should not wait but return immediately.
|
|
112
|
-
If it raises `ProvisioningError`, there will be no further attempts to update the provisioning data,
|
|
113
|
-
and the run will be terminated.
|
|
163
|
+
The default `run_job()` implementation for all backends that support `create_instance()`.
|
|
164
|
+
Override only if custom `run_job()` behavior is required.
|
|
114
165
|
"""
|
|
115
|
-
|
|
166
|
+
instance_config = InstanceConfiguration(
|
|
167
|
+
project_name=run.project_name,
|
|
168
|
+
instance_name=get_job_instance_name(run, job),
|
|
169
|
+
user=run.user,
|
|
170
|
+
ssh_keys=[SSHKey(public=project_ssh_public_key.strip())],
|
|
171
|
+
volumes=volumes,
|
|
172
|
+
reservation=run.run_spec.configuration.reservation,
|
|
173
|
+
)
|
|
174
|
+
instance_offer = instance_offer.copy()
|
|
175
|
+
self._restrict_instance_offer_az_to_volumes_az(instance_offer, volumes)
|
|
176
|
+
return self.create_instance(instance_offer, instance_config)
|
|
177
|
+
|
|
178
|
+
def _restrict_instance_offer_az_to_volumes_az(
|
|
179
|
+
self,
|
|
180
|
+
instance_offer: InstanceOfferWithAvailability,
|
|
181
|
+
volumes: List[Volume],
|
|
182
|
+
):
|
|
183
|
+
if len(volumes) == 0:
|
|
184
|
+
return
|
|
185
|
+
volume = volumes[0]
|
|
186
|
+
if (
|
|
187
|
+
volume.provisioning_data is not None
|
|
188
|
+
and volume.provisioning_data.availability_zone is not None
|
|
189
|
+
):
|
|
190
|
+
if instance_offer.availability_zones is None:
|
|
191
|
+
instance_offer.availability_zones = [volume.provisioning_data.availability_zone]
|
|
192
|
+
instance_offer.availability_zones = [
|
|
193
|
+
z
|
|
194
|
+
for z in instance_offer.availability_zones
|
|
195
|
+
if z == volume.provisioning_data.availability_zone
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class ComputeWithMultinodeSupport:
|
|
200
|
+
"""
|
|
201
|
+
Must be subclassed to support multinode tasks and cluster fleets.
|
|
202
|
+
Instances provisioned in the same project/region must be interconnected.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class ComputeWithReservationSupport:
|
|
209
|
+
"""
|
|
210
|
+
Must be subclassed to support provisioning from reservations.
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
pass
|
|
214
|
+
|
|
116
215
|
|
|
216
|
+
class ComputeWithPlacementGroupSupport(ABC):
|
|
217
|
+
"""
|
|
218
|
+
Must be subclassed and implemented to support placement groups.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
@abstractmethod
|
|
117
222
|
def create_placement_group(
|
|
118
223
|
self,
|
|
119
224
|
placement_group: PlacementGroup,
|
|
@@ -121,8 +226,9 @@ class Compute(ABC):
|
|
|
121
226
|
"""
|
|
122
227
|
Creates a placement group.
|
|
123
228
|
"""
|
|
124
|
-
|
|
229
|
+
pass
|
|
125
230
|
|
|
231
|
+
@abstractmethod
|
|
126
232
|
def delete_placement_group(
|
|
127
233
|
self,
|
|
128
234
|
placement_group: PlacementGroup,
|
|
@@ -131,8 +237,15 @@ class Compute(ABC):
|
|
|
131
237
|
Deletes a placement group.
|
|
132
238
|
If the group does not exist, it should not raise errors but return silently.
|
|
133
239
|
"""
|
|
134
|
-
|
|
240
|
+
pass
|
|
241
|
+
|
|
135
242
|
|
|
243
|
+
class ComputeWithGatewaySupport(ABC):
|
|
244
|
+
"""
|
|
245
|
+
Must be subclassed and imlemented to support gateways.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
@abstractmethod
|
|
136
249
|
def create_gateway(
|
|
137
250
|
self,
|
|
138
251
|
configuration: GatewayComputeConfiguration,
|
|
@@ -140,8 +253,9 @@ class Compute(ABC):
|
|
|
140
253
|
"""
|
|
141
254
|
Creates a gateway instance.
|
|
142
255
|
"""
|
|
143
|
-
|
|
256
|
+
pass
|
|
144
257
|
|
|
258
|
+
@abstractmethod
|
|
145
259
|
def terminate_gateway(
|
|
146
260
|
self,
|
|
147
261
|
instance_id: str,
|
|
@@ -152,21 +266,39 @@ class Compute(ABC):
|
|
|
152
266
|
Terminates a gateway instance. Generally, it passes the call to `terminate_instance()`,
|
|
153
267
|
but may perform additional work such as deleting a load balancer when a gateway has one.
|
|
154
268
|
"""
|
|
155
|
-
|
|
269
|
+
pass
|
|
156
270
|
|
|
271
|
+
|
|
272
|
+
class ComputeWithPrivateGatewaySupport:
|
|
273
|
+
"""
|
|
274
|
+
Must be subclassed to support private gateways.
|
|
275
|
+
`create_gateway()` must be able to create private gateways.
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
pass
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class ComputeWithVolumeSupport(ABC):
|
|
282
|
+
"""
|
|
283
|
+
Must be subclassed and implemented to support volumes.
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
@abstractmethod
|
|
157
287
|
def register_volume(self, volume: Volume) -> VolumeProvisioningData:
|
|
158
288
|
"""
|
|
159
289
|
Returns VolumeProvisioningData for an existing volume.
|
|
160
290
|
Used to add external volumes to dstack.
|
|
161
291
|
"""
|
|
162
|
-
|
|
292
|
+
pass
|
|
163
293
|
|
|
294
|
+
@abstractmethod
|
|
164
295
|
def create_volume(self, volume: Volume) -> VolumeProvisioningData:
|
|
165
296
|
"""
|
|
166
297
|
Creates a new volume.
|
|
167
298
|
"""
|
|
168
299
|
raise NotImplementedError()
|
|
169
300
|
|
|
301
|
+
@abstractmethod
|
|
170
302
|
def delete_volume(self, volume: Volume):
|
|
171
303
|
"""
|
|
172
304
|
Deletes a volume.
|
|
@@ -176,13 +308,17 @@ class Compute(ABC):
|
|
|
176
308
|
def attach_volume(self, volume: Volume, instance_id: str) -> VolumeAttachmentData:
|
|
177
309
|
"""
|
|
178
310
|
Attaches a volume to the instance.
|
|
179
|
-
If the volume is not found, it should raise `ComputeError()
|
|
311
|
+
If the volume is not found, it should raise `ComputeError()`.
|
|
312
|
+
Implement only if compute may return `VolumeProvisioningData.attachable`.
|
|
313
|
+
Otherwise, volumes should be attached by `run_job()`.
|
|
180
314
|
"""
|
|
181
315
|
raise NotImplementedError()
|
|
182
316
|
|
|
183
317
|
def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
|
|
184
318
|
"""
|
|
185
319
|
Detaches a volume from the instance.
|
|
320
|
+
Implement only if compute may return `VolumeProvisioningData.detachable`.
|
|
321
|
+
Otherwise, volumes should be detached on instance termination.
|
|
186
322
|
"""
|
|
187
323
|
raise NotImplementedError()
|
|
188
324
|
|
|
@@ -195,22 +331,6 @@ class Compute(ABC):
|
|
|
195
331
|
"""
|
|
196
332
|
return True
|
|
197
333
|
|
|
198
|
-
def _get_offers_cached_key(self, requirements: Optional[Requirements] = None) -> int:
|
|
199
|
-
# Requirements is not hashable, so we use a hack to get arguments hash
|
|
200
|
-
if requirements is None:
|
|
201
|
-
return hash(None)
|
|
202
|
-
return hash(requirements.json())
|
|
203
|
-
|
|
204
|
-
@cachedmethod(
|
|
205
|
-
cache=lambda self: self._offers_cache,
|
|
206
|
-
key=_get_offers_cached_key,
|
|
207
|
-
lock=lambda self: self._offers_cache_lock,
|
|
208
|
-
)
|
|
209
|
-
def get_offers_cached(
|
|
210
|
-
self, requirements: Optional[Requirements] = None
|
|
211
|
-
) -> List[InstanceOfferWithAvailability]:
|
|
212
|
-
return self.get_offers(requirements)
|
|
213
|
-
|
|
214
334
|
|
|
215
335
|
def get_job_instance_name(run: Run, job: Job) -> str:
|
|
216
336
|
return job.job_spec.job_name
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any, ClassVar, List, Optional
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
from dstack._internal.core.backends.base.backend import Backend
|
|
6
|
+
from dstack._internal.core.backends.models import (
|
|
7
|
+
AnyBackendConfig,
|
|
8
|
+
AnyBackendConfigWithCreds,
|
|
9
|
+
)
|
|
10
|
+
from dstack._internal.core.errors import BackendInvalidCredentialsError
|
|
11
|
+
from dstack._internal.core.models.backends.base import BackendType
|
|
12
|
+
from dstack._internal.core.models.common import CoreModel
|
|
13
|
+
|
|
14
|
+
# Most clouds allow ~ 40-60 tags/labels per resource.
|
|
15
|
+
# We'll introduce our own base limit that can be customized per backend if required.
|
|
16
|
+
TAGS_MAX_NUM = 25
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BackendRecord(CoreModel):
|
|
20
|
+
"""
|
|
21
|
+
This model includes backend parameters to store in the DB.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# `config` stores text-encoded non-sensitive backend config parameters (e.g. json)
|
|
25
|
+
config: str
|
|
26
|
+
# `auth` stores text-encoded sensitive backend config parameters (e.g. json).
|
|
27
|
+
# Configurator should not encrypt/decrypt it. This is done by the caller.
|
|
28
|
+
auth: str
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class StoredBackendRecord(BackendRecord):
|
|
32
|
+
"""
|
|
33
|
+
This model includes backend parameters stored in the DB.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
# IDs of DB models.
|
|
37
|
+
# Can be used by externally-registered Configurator to work with the DB directly.
|
|
38
|
+
project_id: UUID
|
|
39
|
+
backend_id: UUID
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Configurator(ABC):
|
|
43
|
+
"""
|
|
44
|
+
`Configurator` is responsible for configuring backends
|
|
45
|
+
and initializing `Backend` instances from backend configs.
|
|
46
|
+
Every backend must implement `Configurator` and register it
|
|
47
|
+
in `dstack._internal.core.backends.configurators`.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
TYPE: ClassVar[BackendType]
|
|
51
|
+
# `BACKEND_CLASS` is used to introspect backend features without initializing it.
|
|
52
|
+
BACKEND_CLASS: ClassVar[type[Backend]]
|
|
53
|
+
|
|
54
|
+
@abstractmethod
|
|
55
|
+
def validate_config(self, config: AnyBackendConfigWithCreds, default_creds_enabled: bool):
|
|
56
|
+
"""
|
|
57
|
+
Validates backend config including backend creds and other parameters.
|
|
58
|
+
Raises `ServerClientError` or its subclass if config is invalid.
|
|
59
|
+
If the backend supports default creds and not `default_creds_enabled`, should raise an error.
|
|
60
|
+
"""
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def create_backend(
|
|
65
|
+
self, project_name: str, config: AnyBackendConfigWithCreds
|
|
66
|
+
) -> BackendRecord:
|
|
67
|
+
"""
|
|
68
|
+
Sets up backend given backend config and returns
|
|
69
|
+
text-encoded config and creds to be stored in the DB.
|
|
70
|
+
It may perform backend initialization, create
|
|
71
|
+
cloud resources such as networks and managed identities, and
|
|
72
|
+
save additional configuration parameters.
|
|
73
|
+
It does not need to duplicate validation done by `validate_config()`
|
|
74
|
+
since the caller guarantees to call `validate_config()` first.
|
|
75
|
+
It may perform additional validation not possible in `validate_config()`
|
|
76
|
+
and raise `ServerClientError` or its subclass if config is invalid.
|
|
77
|
+
"""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
@abstractmethod
|
|
81
|
+
def get_backend_config(
|
|
82
|
+
self, record: StoredBackendRecord, include_creds: bool
|
|
83
|
+
) -> AnyBackendConfig:
|
|
84
|
+
"""
|
|
85
|
+
Constructs `BackendConfig` to be returned in API responses.
|
|
86
|
+
Project admins may need to see backend's creds. In this case `include_creds` will be `True`.
|
|
87
|
+
Otherwise, no sensitive information should be included.
|
|
88
|
+
"""
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
@abstractmethod
|
|
92
|
+
def get_backend(self, record: StoredBackendRecord) -> Backend:
|
|
93
|
+
"""
|
|
94
|
+
Returns `Backend` instance from config and creds stored in `record`.
|
|
95
|
+
"""
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def raise_invalid_credentials_error(
|
|
100
|
+
fields: Optional[List[List[str]]] = None, details: Optional[Any] = None
|
|
101
|
+
):
|
|
102
|
+
msg = BackendInvalidCredentialsError.msg
|
|
103
|
+
if details:
|
|
104
|
+
msg += f": {details}"
|
|
105
|
+
raise BackendInvalidCredentialsError(fields=fields, msg=msg)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def fill_data(values: dict):
|
|
5
|
+
if values.get("data") is not None:
|
|
6
|
+
return values
|
|
7
|
+
if "filename" not in values:
|
|
8
|
+
raise ValueError()
|
|
9
|
+
try:
|
|
10
|
+
with open(Path(values["filename"]).expanduser()) as f:
|
|
11
|
+
values["data"] = f.read()
|
|
12
|
+
except OSError:
|
|
13
|
+
raise ValueError(f"No such file {values['filename']}")
|
|
14
|
+
return values
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from typing import List, Optional, Type, Union
|
|
2
|
+
|
|
3
|
+
from dstack._internal.core.backends.base.configurator import Configurator
|
|
4
|
+
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
+
|
|
6
|
+
_CONFIGURATOR_CLASSES: List[Type[Configurator]] = []
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from dstack._internal.core.backends.aws.configurator import AWSConfigurator
|
|
11
|
+
|
|
12
|
+
_CONFIGURATOR_CLASSES.append(AWSConfigurator)
|
|
13
|
+
except ImportError:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from dstack._internal.core.backends.azure.configurator import AzureConfigurator
|
|
18
|
+
|
|
19
|
+
_CONFIGURATOR_CLASSES.append(AzureConfigurator)
|
|
20
|
+
except ImportError:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
from dstack._internal.core.backends.cudo.configurator import (
|
|
25
|
+
CudoConfigurator,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_CONFIGURATOR_CLASSES.append(CudoConfigurator)
|
|
29
|
+
except ImportError:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
from dstack._internal.core.backends.datacrunch.configurator import (
|
|
34
|
+
DataCrunchConfigurator,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
_CONFIGURATOR_CLASSES.append(DataCrunchConfigurator)
|
|
38
|
+
except ImportError:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
from dstack._internal.core.backends.gcp.configurator import GCPConfigurator
|
|
43
|
+
|
|
44
|
+
_CONFIGURATOR_CLASSES.append(GCPConfigurator)
|
|
45
|
+
except ImportError:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
from dstack._internal.core.backends.kubernetes.configurator import (
|
|
50
|
+
KubernetesConfigurator,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
_CONFIGURATOR_CLASSES.append(KubernetesConfigurator)
|
|
54
|
+
except ImportError:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
from dstack._internal.core.backends.lambdalabs.configurator import (
|
|
59
|
+
LambdaConfigurator,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
_CONFIGURATOR_CLASSES.append(LambdaConfigurator)
|
|
63
|
+
except ImportError:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
from dstack._internal.core.backends.oci.configurator import OCIConfigurator
|
|
68
|
+
|
|
69
|
+
_CONFIGURATOR_CLASSES.append(OCIConfigurator)
|
|
70
|
+
except ImportError:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
from dstack._internal.core.backends.runpod.configurator import RunpodConfigurator
|
|
75
|
+
|
|
76
|
+
_CONFIGURATOR_CLASSES.append(RunpodConfigurator)
|
|
77
|
+
except ImportError:
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
from dstack._internal.core.backends.tensordock.configurator import (
|
|
82
|
+
TensorDockConfigurator,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
_CONFIGURATOR_CLASSES.append(TensorDockConfigurator)
|
|
86
|
+
except ImportError:
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
from dstack._internal.core.backends.vastai.configurator import VastAIConfigurator
|
|
91
|
+
|
|
92
|
+
_CONFIGURATOR_CLASSES.append(VastAIConfigurator)
|
|
93
|
+
except ImportError:
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
from dstack._internal.core.backends.vultr.configurator import VultrConfigurator
|
|
98
|
+
|
|
99
|
+
_CONFIGURATOR_CLASSES.append(VultrConfigurator)
|
|
100
|
+
except ImportError:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
_BACKEND_TYPE_TO_CONFIGURATOR_CLASS_MAP = {c.TYPE: c for c in _CONFIGURATOR_CLASSES}
|
|
105
|
+
_BACKEND_TYPES = [c.TYPE for c in _CONFIGURATOR_CLASSES]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_configurator(backend_type: Union[BackendType, str]) -> Optional[Configurator]:
|
|
109
|
+
"""
|
|
110
|
+
Returns an available `Configurator` for a given `backend_type`.
|
|
111
|
+
"""
|
|
112
|
+
backend_type = BackendType(backend_type)
|
|
113
|
+
configurator_class = _BACKEND_TYPE_TO_CONFIGURATOR_CLASS_MAP.get(backend_type)
|
|
114
|
+
if configurator_class is None:
|
|
115
|
+
return None
|
|
116
|
+
return configurator_class()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def list_available_backend_types() -> List[BackendType]:
|
|
120
|
+
"""
|
|
121
|
+
Lists all backend types available on the server.
|
|
122
|
+
"""
|
|
123
|
+
return _BACKEND_TYPES
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def list_available_configurator_classes() -> List[type[Configurator]]:
|
|
127
|
+
"""
|
|
128
|
+
Lists all backend configurator classes available on the server.
|
|
129
|
+
"""
|
|
130
|
+
return _CONFIGURATOR_CLASSES
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def register_configurator(configurator: Type[Configurator]):
|
|
134
|
+
"""
|
|
135
|
+
A hook to for registering new configurators without importing them.
|
|
136
|
+
Can be used to extend dstack functionality.
|
|
137
|
+
"""
|
|
138
|
+
_BACKEND_TYPE_TO_CONFIGURATOR_CLASS_MAP[configurator.TYPE] = configurator
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from dstack._internal.core.backends.base import Backend
|
|
2
|
-
from dstack._internal.core.backends.cudo.compute import CudoCompute
|
|
3
|
-
from dstack._internal.core.backends.cudo.config import CudoConfig
|
|
4
|
-
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class CudoBackend(Backend):
|
|
8
|
-
TYPE: BackendType = BackendType.CUDO
|
|
9
|
-
|
|
10
|
-
def __init__(self, config: CudoConfig):
|
|
11
|
-
self.config = config
|
|
12
|
-
self._compute = CudoCompute(self.config)
|
|
13
|
-
|
|
14
|
-
def compute(self) -> CudoCompute:
|
|
15
|
-
return self._compute
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from dstack._internal.core.backends.base.backend import Backend
|
|
2
|
+
from dstack._internal.core.backends.cudo.compute import CudoCompute
|
|
3
|
+
from dstack._internal.core.backends.cudo.models import CudoConfig
|
|
4
|
+
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CudoBackend(Backend):
|
|
8
|
+
TYPE = BackendType.CUDO
|
|
9
|
+
COMPUTE_CLASS = CudoCompute
|
|
10
|
+
|
|
11
|
+
def __init__(self, config: CudoConfig):
|
|
12
|
+
self.config = config
|
|
13
|
+
self._compute = CudoCompute(self.config)
|
|
14
|
+
|
|
15
|
+
def compute(self) -> CudoCompute:
|
|
16
|
+
return self._compute
|
|
@@ -2,25 +2,23 @@ from typing import List, Optional
|
|
|
2
2
|
|
|
3
3
|
import requests
|
|
4
4
|
|
|
5
|
-
from dstack._internal.core.backends.base import Compute
|
|
5
|
+
from dstack._internal.core.backends.base.backend import Compute
|
|
6
6
|
from dstack._internal.core.backends.base.compute import (
|
|
7
|
+
ComputeWithCreateInstanceSupport,
|
|
7
8
|
generate_unique_instance_name,
|
|
8
|
-
get_job_instance_name,
|
|
9
9
|
get_shim_commands,
|
|
10
10
|
)
|
|
11
11
|
from dstack._internal.core.backends.base.offers import get_catalog_offers
|
|
12
12
|
from dstack._internal.core.backends.cudo.api_client import CudoApiClient
|
|
13
|
-
from dstack._internal.core.backends.cudo.
|
|
13
|
+
from dstack._internal.core.backends.cudo.models import CudoConfig
|
|
14
14
|
from dstack._internal.core.errors import BackendError, NoCapacityError, ProvisioningError
|
|
15
15
|
from dstack._internal.core.models.backends.base import BackendType
|
|
16
16
|
from dstack._internal.core.models.instances import (
|
|
17
17
|
InstanceAvailability,
|
|
18
18
|
InstanceConfiguration,
|
|
19
19
|
InstanceOfferWithAvailability,
|
|
20
|
-
SSHKey,
|
|
21
20
|
)
|
|
22
|
-
from dstack._internal.core.models.runs import
|
|
23
|
-
from dstack._internal.core.models.volumes import Volume
|
|
21
|
+
from dstack._internal.core.models.runs import JobProvisioningData, Requirements
|
|
24
22
|
from dstack._internal.utils.logging import get_logger
|
|
25
23
|
|
|
26
24
|
logger = get_logger(__name__)
|
|
@@ -29,7 +27,10 @@ logger = get_logger(__name__)
|
|
|
29
27
|
MAX_RESOURCE_NAME_LEN = 30
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
class CudoCompute(
|
|
30
|
+
class CudoCompute(
|
|
31
|
+
ComputeWithCreateInstanceSupport,
|
|
32
|
+
Compute,
|
|
33
|
+
):
|
|
33
34
|
def __init__(self, config: CudoConfig):
|
|
34
35
|
super().__init__()
|
|
35
36
|
self.config = config
|
|
@@ -51,25 +52,6 @@ class CudoCompute(Compute):
|
|
|
51
52
|
]
|
|
52
53
|
return offers
|
|
53
54
|
|
|
54
|
-
def run_job(
|
|
55
|
-
self,
|
|
56
|
-
run: Run,
|
|
57
|
-
job: Job,
|
|
58
|
-
instance_offer: InstanceOfferWithAvailability,
|
|
59
|
-
project_ssh_public_key: str,
|
|
60
|
-
project_ssh_private_key: str,
|
|
61
|
-
volumes: List[Volume],
|
|
62
|
-
) -> JobProvisioningData:
|
|
63
|
-
instance_config = InstanceConfiguration(
|
|
64
|
-
project_name=run.project_name,
|
|
65
|
-
instance_name=get_job_instance_name(run, job),
|
|
66
|
-
ssh_keys=[
|
|
67
|
-
SSHKey(public=project_ssh_public_key.strip()),
|
|
68
|
-
],
|
|
69
|
-
user=run.user,
|
|
70
|
-
)
|
|
71
|
-
return self.create_instance(instance_offer, instance_config)
|
|
72
|
-
|
|
73
55
|
def create_instance(
|
|
74
56
|
self,
|
|
75
57
|
instance_offer: InstanceOfferWithAvailability,
|