dstack 0.18.43__py3-none-any.whl → 0.19.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/gateway.py +15 -3
- dstack/_internal/cli/commands/logs.py +0 -22
- dstack/_internal/cli/commands/stats.py +8 -17
- dstack/_internal/cli/main.py +1 -5
- dstack/_internal/cli/services/configurators/fleet.py +4 -39
- dstack/_internal/cli/services/configurators/run.py +22 -20
- dstack/_internal/cli/services/profile.py +34 -83
- dstack/_internal/cli/utils/gateway.py +1 -1
- dstack/_internal/cli/utils/run.py +11 -0
- dstack/_internal/core/backends/__init__.py +56 -39
- dstack/_internal/core/backends/aws/__init__.py +0 -25
- dstack/_internal/core/backends/aws/auth.py +1 -10
- dstack/_internal/core/backends/aws/backend.py +26 -0
- dstack/_internal/core/backends/aws/compute.py +21 -45
- dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
- dstack/_internal/core/backends/aws/models.py +135 -0
- dstack/_internal/core/backends/aws/resources.py +1 -1
- dstack/_internal/core/backends/azure/__init__.py +0 -20
- dstack/_internal/core/backends/azure/auth.py +2 -11
- dstack/_internal/core/backends/azure/backend.py +21 -0
- dstack/_internal/core/backends/azure/compute.py +14 -28
- dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
- dstack/_internal/core/backends/azure/models.py +89 -0
- dstack/_internal/core/backends/base/__init__.py +0 -12
- dstack/_internal/core/backends/base/backend.py +18 -0
- dstack/_internal/core/backends/base/compute.py +153 -33
- dstack/_internal/core/backends/base/configurator.py +105 -0
- dstack/_internal/core/backends/base/models.py +14 -0
- dstack/_internal/core/backends/configurators.py +138 -0
- dstack/_internal/core/backends/cudo/__init__.py +0 -15
- dstack/_internal/core/backends/cudo/backend.py +16 -0
- dstack/_internal/core/backends/cudo/compute.py +8 -26
- dstack/_internal/core/backends/cudo/configurator.py +72 -0
- dstack/_internal/core/backends/cudo/models.py +37 -0
- dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
- dstack/_internal/core/backends/datacrunch/backend.py +16 -0
- dstack/_internal/core/backends/datacrunch/compute.py +8 -25
- dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
- dstack/_internal/core/backends/datacrunch/models.py +38 -0
- dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
- dstack/_internal/core/backends/gcp/__init__.py +0 -16
- dstack/_internal/core/backends/gcp/auth.py +2 -11
- dstack/_internal/core/backends/gcp/backend.py +17 -0
- dstack/_internal/core/backends/gcp/compute.py +14 -44
- dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
- dstack/_internal/core/backends/gcp/models.py +125 -0
- dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
- dstack/_internal/core/backends/kubernetes/backend.py +16 -0
- dstack/_internal/core/backends/kubernetes/compute.py +16 -5
- dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
- dstack/_internal/core/backends/kubernetes/models.py +72 -0
- dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
- dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
- dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
- dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
- dstack/_internal/core/backends/lambdalabs/models.py +37 -0
- dstack/_internal/core/backends/local/__init__.py +0 -13
- dstack/_internal/core/backends/local/backend.py +14 -0
- dstack/_internal/core/backends/local/compute.py +16 -2
- dstack/_internal/core/backends/models.py +128 -0
- dstack/_internal/core/backends/oci/__init__.py +0 -15
- dstack/_internal/core/backends/oci/auth.py +1 -5
- dstack/_internal/core/backends/oci/backend.py +16 -0
- dstack/_internal/core/backends/oci/compute.py +9 -23
- dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
- dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
- dstack/_internal/core/backends/oci/region.py +1 -1
- dstack/_internal/core/backends/runpod/__init__.py +0 -15
- dstack/_internal/core/backends/runpod/backend.py +16 -0
- dstack/_internal/core/backends/runpod/compute.py +28 -6
- dstack/_internal/core/backends/runpod/configurator.py +59 -0
- dstack/_internal/core/backends/runpod/models.py +54 -0
- dstack/_internal/core/backends/template/__init__.py +0 -0
- dstack/_internal/core/backends/tensordock/__init__.py +0 -15
- dstack/_internal/core/backends/tensordock/backend.py +16 -0
- dstack/_internal/core/backends/tensordock/compute.py +8 -27
- dstack/_internal/core/backends/tensordock/configurator.py +68 -0
- dstack/_internal/core/backends/tensordock/models.py +38 -0
- dstack/_internal/core/backends/vastai/__init__.py +0 -15
- dstack/_internal/core/backends/vastai/backend.py +16 -0
- dstack/_internal/core/backends/vastai/compute.py +2 -2
- dstack/_internal/core/backends/vastai/configurator.py +66 -0
- dstack/_internal/core/backends/vastai/models.py +37 -0
- dstack/_internal/core/backends/vultr/__init__.py +0 -15
- dstack/_internal/core/backends/vultr/backend.py +16 -0
- dstack/_internal/core/backends/vultr/compute.py +10 -24
- dstack/_internal/core/backends/vultr/configurator.py +64 -0
- dstack/_internal/core/backends/vultr/models.py +34 -0
- dstack/_internal/core/models/backends/__init__.py +0 -184
- dstack/_internal/core/models/backends/base.py +0 -19
- dstack/_internal/core/models/configurations.py +22 -16
- dstack/_internal/core/models/envs.py +4 -3
- dstack/_internal/core/models/fleets.py +17 -22
- dstack/_internal/core/models/gateways.py +3 -3
- dstack/_internal/core/models/instances.py +24 -0
- dstack/_internal/core/models/profiles.py +85 -45
- dstack/_internal/core/models/projects.py +1 -1
- dstack/_internal/core/models/repos/base.py +0 -5
- dstack/_internal/core/models/repos/local.py +3 -3
- dstack/_internal/core/models/repos/remote.py +26 -12
- dstack/_internal/core/models/repos/virtual.py +1 -1
- dstack/_internal/core/models/resources.py +45 -76
- dstack/_internal/core/models/runs.py +21 -19
- dstack/_internal/core/models/volumes.py +1 -3
- dstack/_internal/core/services/profiles.py +7 -16
- dstack/_internal/core/services/repos.py +0 -4
- dstack/_internal/server/app.py +11 -4
- dstack/_internal/server/background/__init__.py +10 -0
- dstack/_internal/server/background/tasks/process_gateways.py +4 -8
- dstack/_internal/server/background/tasks/process_instances.py +14 -9
- dstack/_internal/server/background/tasks/process_metrics.py +1 -1
- dstack/_internal/server/background/tasks/process_placement_groups.py +5 -1
- dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
- dstack/_internal/server/background/tasks/process_running_jobs.py +80 -24
- dstack/_internal/server/background/tasks/process_runs.py +1 -0
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +20 -38
- dstack/_internal/server/background/tasks/process_volumes.py +5 -2
- dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
- dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
- dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
- dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
- dstack/_internal/server/models.py +59 -9
- dstack/_internal/server/routers/backends.py +14 -23
- dstack/_internal/server/routers/instances.py +3 -4
- dstack/_internal/server/routers/metrics.py +31 -10
- dstack/_internal/server/routers/prometheus.py +36 -0
- dstack/_internal/server/routers/repos.py +1 -2
- dstack/_internal/server/routers/runs.py +13 -59
- dstack/_internal/server/schemas/gateways.py +14 -23
- dstack/_internal/server/schemas/projects.py +7 -2
- dstack/_internal/server/schemas/repos.py +2 -38
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/schemas/runs.py +1 -24
- dstack/_internal/server/security/permissions.py +1 -1
- dstack/_internal/server/services/backends/__init__.py +85 -158
- dstack/_internal/server/services/config.py +53 -567
- dstack/_internal/server/services/fleets.py +9 -103
- dstack/_internal/server/services/gateways/__init__.py +13 -4
- dstack/_internal/server/services/{pools.py → instances.py} +22 -329
- dstack/_internal/server/services/jobs/__init__.py +9 -6
- dstack/_internal/server/services/jobs/configurators/base.py +25 -1
- dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
- dstack/_internal/server/services/metrics.py +131 -72
- dstack/_internal/server/services/offers.py +1 -1
- dstack/_internal/server/services/projects.py +23 -14
- dstack/_internal/server/services/prometheus.py +245 -0
- dstack/_internal/server/services/runner/client.py +14 -3
- dstack/_internal/server/services/runs.py +67 -31
- dstack/_internal/server/services/volumes.py +9 -4
- dstack/_internal/server/settings.py +3 -0
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4fd5a4770eff59325ee3.js} +68 -15
- dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4fd5a4770eff59325ee3.js.map} +1 -1
- dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
- dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
- dstack/_internal/server/testing/common.py +75 -32
- dstack/_internal/utils/json_schema.py +6 -0
- dstack/_internal/utils/ssh.py +2 -1
- dstack/api/__init__.py +4 -0
- dstack/api/_public/__init__.py +16 -20
- dstack/api/_public/backends.py +1 -1
- dstack/api/_public/repos.py +36 -36
- dstack/api/_public/runs.py +170 -83
- dstack/api/server/__init__.py +11 -13
- dstack/api/server/_backends.py +12 -16
- dstack/api/server/_fleets.py +15 -55
- dstack/api/server/_gateways.py +3 -14
- dstack/api/server/_repos.py +1 -4
- dstack/api/server/_runs.py +21 -96
- dstack/api/server/_volumes.py +10 -5
- dstack/api/utils.py +3 -0
- dstack/version.py +1 -1
- {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/METADATA +10 -1
- {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/RECORD +229 -206
- tests/_internal/cli/services/configurators/test_profile.py +6 -6
- tests/_internal/core/backends/aws/test_configurator.py +35 -0
- tests/_internal/core/backends/aws/test_resources.py +1 -1
- tests/_internal/core/backends/azure/test_configurator.py +61 -0
- tests/_internal/core/backends/cudo/__init__.py +0 -0
- tests/_internal/core/backends/cudo/test_configurator.py +37 -0
- tests/_internal/core/backends/datacrunch/__init__.py +0 -0
- tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
- tests/_internal/core/backends/gcp/test_configurator.py +42 -0
- tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
- tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
- tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
- tests/_internal/core/backends/oci/test_configurator.py +55 -0
- tests/_internal/core/backends/runpod/__init__.py +0 -0
- tests/_internal/core/backends/runpod/test_configurator.py +33 -0
- tests/_internal/core/backends/tensordock/__init__.py +0 -0
- tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
- tests/_internal/core/backends/vastai/__init__.py +0 -0
- tests/_internal/core/backends/vastai/test_configurator.py +33 -0
- tests/_internal/core/backends/vultr/__init__.py +0 -0
- tests/_internal/core/backends/vultr/test_configurator.py +33 -0
- tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
- tests/_internal/server/background/tasks/test_process_instances.py +49 -48
- tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
- tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
- tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +186 -0
- tests/_internal/server/background/tasks/test_process_running_jobs.py +123 -19
- tests/_internal/server/background/tasks/test_process_runs.py +8 -22
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
- tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
- tests/_internal/server/routers/test_backends.py +6 -764
- tests/_internal/server/routers/test_fleets.py +2 -26
- tests/_internal/server/routers/test_gateways.py +27 -3
- tests/_internal/server/routers/test_instances.py +0 -10
- tests/_internal/server/routers/test_metrics.py +42 -0
- tests/_internal/server/routers/test_projects.py +56 -0
- tests/_internal/server/routers/test_prometheus.py +333 -0
- tests/_internal/server/routers/test_repos.py +0 -15
- tests/_internal/server/routers/test_runs.py +83 -275
- tests/_internal/server/routers/test_volumes.py +2 -3
- tests/_internal/server/services/backends/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
- tests/_internal/server/services/test_config.py +7 -4
- tests/_internal/server/services/test_fleets.py +1 -4
- tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
- tests/_internal/server/services/test_metrics.py +167 -0
- tests/_internal/server/services/test_repos.py +1 -14
- tests/_internal/server/services/test_runs.py +0 -4
- dstack/_internal/cli/commands/pool.py +0 -581
- dstack/_internal/cli/commands/run.py +0 -75
- dstack/_internal/core/backends/aws/config.py +0 -18
- dstack/_internal/core/backends/azure/config.py +0 -12
- dstack/_internal/core/backends/base/config.py +0 -5
- dstack/_internal/core/backends/cudo/config.py +0 -9
- dstack/_internal/core/backends/datacrunch/config.py +0 -9
- dstack/_internal/core/backends/gcp/config.py +0 -22
- dstack/_internal/core/backends/kubernetes/config.py +0 -6
- dstack/_internal/core/backends/lambdalabs/config.py +0 -9
- dstack/_internal/core/backends/nebius/__init__.py +0 -15
- dstack/_internal/core/backends/nebius/api_client.py +0 -319
- dstack/_internal/core/backends/nebius/compute.py +0 -220
- dstack/_internal/core/backends/nebius/config.py +0 -6
- dstack/_internal/core/backends/nebius/types.py +0 -37
- dstack/_internal/core/backends/oci/config.py +0 -6
- dstack/_internal/core/backends/runpod/config.py +0 -9
- dstack/_internal/core/backends/tensordock/config.py +0 -9
- dstack/_internal/core/backends/vastai/config.py +0 -6
- dstack/_internal/core/backends/vultr/config.py +0 -9
- dstack/_internal/core/models/backends/aws.py +0 -86
- dstack/_internal/core/models/backends/azure.py +0 -68
- dstack/_internal/core/models/backends/cudo.py +0 -43
- dstack/_internal/core/models/backends/datacrunch.py +0 -44
- dstack/_internal/core/models/backends/gcp.py +0 -67
- dstack/_internal/core/models/backends/kubernetes.py +0 -40
- dstack/_internal/core/models/backends/lambdalabs.py +0 -43
- dstack/_internal/core/models/backends/nebius.py +0 -54
- dstack/_internal/core/models/backends/runpod.py +0 -40
- dstack/_internal/core/models/backends/tensordock.py +0 -44
- dstack/_internal/core/models/backends/vastai.py +0 -43
- dstack/_internal/core/models/backends/vultr.py +0 -40
- dstack/_internal/core/models/pools.py +0 -43
- dstack/_internal/server/routers/pools.py +0 -142
- dstack/_internal/server/schemas/pools.py +0 -38
- dstack/_internal/server/services/backends/configurators/base.py +0 -72
- dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
- dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
- dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
- dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
- dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
- dstack/_internal/server/services/backends/configurators/runpod.py +0 -97
- dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
- dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
- dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
- dstack/api/_public/pools.py +0 -41
- dstack/api/_public/resources.py +0 -105
- dstack/api/server/_pools.py +0 -63
- tests/_internal/server/routers/test_pools.py +0 -612
- /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/WHEEL +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,10 @@
|
|
|
1
|
-
from typing import Dict
|
|
1
|
+
from typing import Annotated, Dict, List, Literal, Optional, Union
|
|
2
2
|
|
|
3
3
|
from pydantic import Field, root_validator
|
|
4
|
-
from typing_extensions import Annotated, List, Literal, Optional, Union
|
|
5
4
|
|
|
6
|
-
from dstack._internal.core.models.backends.base import ConfigMultiElement
|
|
7
5
|
from dstack._internal.core.models.common import CoreModel
|
|
8
6
|
|
|
9
7
|
|
|
10
|
-
class OCIConfigInfo(CoreModel):
|
|
11
|
-
type: Literal["oci"] = "oci"
|
|
12
|
-
regions: Optional[List[str]] = None
|
|
13
|
-
compartment_id: Optional[str] = None
|
|
14
|
-
|
|
15
|
-
|
|
16
8
|
class OCIClientCreds(CoreModel):
|
|
17
9
|
type: Annotated[Literal["client"], Field(description="The type of credentials")] = "client"
|
|
18
10
|
user: Annotated[str, Field(description="User OCID")]
|
|
@@ -62,27 +54,34 @@ class OCICreds(CoreModel):
|
|
|
62
54
|
__root__: AnyOCICreds = Field(..., discriminator="type")
|
|
63
55
|
|
|
64
56
|
|
|
65
|
-
class
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
57
|
+
class OCIBackendConfig(CoreModel):
|
|
58
|
+
type: Annotated[Literal["oci"], Field(description="The type of backend")] = "oci"
|
|
59
|
+
regions: Annotated[
|
|
60
|
+
Optional[List[str]],
|
|
61
|
+
Field(description="The list of OCI regions. Omit to use all regions"),
|
|
62
|
+
] = None
|
|
63
|
+
compartment_id: Annotated[
|
|
64
|
+
Optional[str],
|
|
65
|
+
Field(
|
|
66
|
+
description=(
|
|
67
|
+
"Compartment where `dstack` will create all resources."
|
|
68
|
+
" Omit to instruct `dstack` to create a new compartment"
|
|
69
|
+
)
|
|
70
|
+
),
|
|
71
|
+
] = None
|
|
70
72
|
|
|
71
73
|
|
|
72
|
-
class
|
|
73
|
-
|
|
74
|
-
creds: Optional[AnyOCICreds]
|
|
75
|
-
regions: Optional[List[str]]
|
|
76
|
-
compartment_id: Optional[str]
|
|
74
|
+
class OCIBackendConfigWithCreds(OCIBackendConfig):
|
|
75
|
+
creds: Annotated[AnyOCICreds, Field(description="The credentials", discriminator="type")]
|
|
77
76
|
|
|
78
77
|
|
|
79
|
-
|
|
80
|
-
type: Literal["oci"] = "oci"
|
|
81
|
-
default_creds: bool = False
|
|
82
|
-
regions: Optional[ConfigMultiElement]
|
|
83
|
-
compartment_id: Optional[str] = None
|
|
78
|
+
AnyOCIBackendConfig = Union[OCIBackendConfig, OCIBackendConfigWithCreds]
|
|
84
79
|
|
|
85
80
|
|
|
86
|
-
class OCIStoredConfig(
|
|
81
|
+
class OCIStoredConfig(OCIBackendConfig):
|
|
87
82
|
compartment_id: str
|
|
88
83
|
subnet_ids_per_region: Dict[str, str]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class OCIConfig(OCIStoredConfig):
|
|
87
|
+
creds: AnyOCICreds
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Set
|
|
|
5
5
|
import oci
|
|
6
6
|
|
|
7
7
|
from dstack._internal.core.backends.oci.auth import get_client_config
|
|
8
|
-
from dstack._internal.core.
|
|
8
|
+
from dstack._internal.core.backends.oci.models import AnyOCICreds
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class OCIRegionClient:
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from dstack._internal.core.backends.base import Backend
|
|
2
|
-
from dstack._internal.core.backends.runpod.compute import RunpodCompute
|
|
3
|
-
from dstack._internal.core.backends.runpod.config import RunpodConfig
|
|
4
|
-
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class RunpodBackend(Backend):
|
|
8
|
-
TYPE: BackendType = BackendType.RUNPOD
|
|
9
|
-
|
|
10
|
-
def __init__(self, config: RunpodConfig):
|
|
11
|
-
self.config = config
|
|
12
|
-
self._compute = RunpodCompute(self.config)
|
|
13
|
-
|
|
14
|
-
def compute(self) -> RunpodCompute:
|
|
15
|
-
return self._compute
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from dstack._internal.core.backends.base.backend import Backend
|
|
2
|
+
from dstack._internal.core.backends.runpod.compute import RunpodCompute
|
|
3
|
+
from dstack._internal.core.backends.runpod.models import RunpodConfig
|
|
4
|
+
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RunpodBackend(Backend):
|
|
8
|
+
TYPE = BackendType.RUNPOD
|
|
9
|
+
COMPUTE_CLASS = RunpodCompute
|
|
10
|
+
|
|
11
|
+
def __init__(self, config: RunpodConfig):
|
|
12
|
+
self.config = config
|
|
13
|
+
self._compute = RunpodCompute(self.config)
|
|
14
|
+
|
|
15
|
+
def compute(self) -> RunpodCompute:
|
|
16
|
+
return self._compute
|
|
@@ -3,8 +3,9 @@ import uuid
|
|
|
3
3
|
from datetime import timedelta
|
|
4
4
|
from typing import List, Optional
|
|
5
5
|
|
|
6
|
-
from dstack._internal.core.backends.base import Compute
|
|
6
|
+
from dstack._internal.core.backends.base.backend import Compute
|
|
7
7
|
from dstack._internal.core.backends.base.compute import (
|
|
8
|
+
ComputeWithVolumeSupport,
|
|
8
9
|
generate_unique_instance_name,
|
|
9
10
|
generate_unique_volume_name,
|
|
10
11
|
get_docker_commands,
|
|
@@ -12,7 +13,7 @@ from dstack._internal.core.backends.base.compute import (
|
|
|
12
13
|
)
|
|
13
14
|
from dstack._internal.core.backends.base.offers import get_catalog_offers
|
|
14
15
|
from dstack._internal.core.backends.runpod.api_client import RunpodApiClient
|
|
15
|
-
from dstack._internal.core.backends.runpod.
|
|
16
|
+
from dstack._internal.core.backends.runpod.models import RunpodConfig
|
|
16
17
|
from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
|
|
17
18
|
from dstack._internal.core.errors import (
|
|
18
19
|
BackendError,
|
|
@@ -39,7 +40,10 @@ MAX_RESOURCE_NAME_LEN = 60
|
|
|
39
40
|
CONTAINER_REGISTRY_AUTH_CLEANUP_INTERVAL = 60 * 60 * 24 # 24 hour
|
|
40
41
|
|
|
41
42
|
|
|
42
|
-
class RunpodCompute(
|
|
43
|
+
class RunpodCompute(
|
|
44
|
+
ComputeWithVolumeSupport,
|
|
45
|
+
Compute,
|
|
46
|
+
):
|
|
43
47
|
_last_cleanup_time = None
|
|
44
48
|
|
|
45
49
|
def __init__(self, config: RunpodConfig):
|
|
@@ -52,8 +56,9 @@ class RunpodCompute(Compute):
|
|
|
52
56
|
) -> List[InstanceOfferWithAvailability]:
|
|
53
57
|
offers = get_catalog_offers(
|
|
54
58
|
backend=BackendType.RUNPOD,
|
|
55
|
-
locations=self.config.regions,
|
|
59
|
+
locations=self.config.regions or None,
|
|
56
60
|
requirements=requirements,
|
|
61
|
+
extra_filter=lambda o: _is_secure_cloud(o.region) or self.config.allow_community_cloud,
|
|
57
62
|
)
|
|
58
63
|
offers = [
|
|
59
64
|
InstanceOfferWithAvailability(
|
|
@@ -102,13 +107,22 @@ class RunpodCompute(Compute):
|
|
|
102
107
|
bid_per_gpu = None
|
|
103
108
|
if instance_offer.instance.resources.spot and gpu_count:
|
|
104
109
|
bid_per_gpu = instance_offer.price / gpu_count
|
|
110
|
+
if _is_secure_cloud(instance_offer.region):
|
|
111
|
+
cloud_type = "SECURE"
|
|
112
|
+
data_center_id = instance_offer.region
|
|
113
|
+
country_code = None
|
|
114
|
+
else:
|
|
115
|
+
cloud_type = "COMMUNITY"
|
|
116
|
+
data_center_id = None
|
|
117
|
+
country_code = instance_offer.region
|
|
105
118
|
|
|
106
119
|
resp = self.api_client.create_pod(
|
|
107
120
|
name=pod_name,
|
|
108
121
|
image_name=job.job_spec.image_name,
|
|
109
122
|
gpu_type_id=instance_offer.instance.name,
|
|
110
|
-
cloud_type=
|
|
111
|
-
data_center_id=
|
|
123
|
+
cloud_type=cloud_type,
|
|
124
|
+
data_center_id=data_center_id,
|
|
125
|
+
country_code=country_code,
|
|
112
126
|
gpu_count=gpu_count,
|
|
113
127
|
container_disk_in_gb=disk_size,
|
|
114
128
|
min_vcpu_count=instance_offer.instance.resources.cpus,
|
|
@@ -257,3 +271,11 @@ def _get_volume_price(size: int) -> float:
|
|
|
257
271
|
if size < 1000:
|
|
258
272
|
return 0.07 * size
|
|
259
273
|
return 0.05 * size
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _is_secure_cloud(region: str) -> str:
|
|
277
|
+
"""
|
|
278
|
+
Secure cloud regions are datacenter IDs: CA-MTL-1, EU-NL-1, etc.
|
|
279
|
+
Community cloud regions are country codes: CA, NL, etc.
|
|
280
|
+
"""
|
|
281
|
+
return "-" in region
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from dstack._internal.core.backends.base.configurator import (
|
|
4
|
+
BackendRecord,
|
|
5
|
+
Configurator,
|
|
6
|
+
raise_invalid_credentials_error,
|
|
7
|
+
)
|
|
8
|
+
from dstack._internal.core.backends.runpod import api_client
|
|
9
|
+
from dstack._internal.core.backends.runpod.backend import RunpodBackend
|
|
10
|
+
from dstack._internal.core.backends.runpod.models import (
|
|
11
|
+
AnyRunpodBackendConfig,
|
|
12
|
+
RunpodBackendConfig,
|
|
13
|
+
RunpodBackendConfigWithCreds,
|
|
14
|
+
RunpodConfig,
|
|
15
|
+
RunpodCreds,
|
|
16
|
+
RunpodStoredConfig,
|
|
17
|
+
)
|
|
18
|
+
from dstack._internal.core.models.backends.base import BackendType
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RunpodConfigurator(Configurator):
|
|
22
|
+
TYPE = BackendType.RUNPOD
|
|
23
|
+
BACKEND_CLASS = RunpodBackend
|
|
24
|
+
|
|
25
|
+
def validate_config(self, config: RunpodBackendConfigWithCreds, default_creds_enabled: bool):
|
|
26
|
+
self._validate_runpod_api_key(config.creds.api_key)
|
|
27
|
+
|
|
28
|
+
def create_backend(
|
|
29
|
+
self, project_name: str, config: RunpodBackendConfigWithCreds
|
|
30
|
+
) -> BackendRecord:
|
|
31
|
+
return BackendRecord(
|
|
32
|
+
config=RunpodStoredConfig(
|
|
33
|
+
**RunpodBackendConfig.__response__.parse_obj(config).dict()
|
|
34
|
+
).json(),
|
|
35
|
+
auth=RunpodCreds.parse_obj(config.creds).json(),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def get_backend_config(
|
|
39
|
+
self, record: BackendRecord, include_creds: bool
|
|
40
|
+
) -> AnyRunpodBackendConfig:
|
|
41
|
+
config = self._get_config(record)
|
|
42
|
+
if include_creds:
|
|
43
|
+
return RunpodBackendConfigWithCreds.__response__.parse_obj(config)
|
|
44
|
+
return RunpodBackendConfig.__response__.parse_obj(config)
|
|
45
|
+
|
|
46
|
+
def get_backend(self, record: BackendRecord) -> RunpodBackend:
|
|
47
|
+
config = self._get_config(record)
|
|
48
|
+
return RunpodBackend(config=config)
|
|
49
|
+
|
|
50
|
+
def _get_config(self, record: BackendRecord) -> RunpodConfig:
|
|
51
|
+
return RunpodConfig(
|
|
52
|
+
**json.loads(record.config),
|
|
53
|
+
creds=RunpodCreds.parse_raw(record.auth),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def _validate_runpod_api_key(self, api_key: str):
|
|
57
|
+
client = api_client.RunpodApiClient(api_key=api_key)
|
|
58
|
+
if not client.validate_api_key():
|
|
59
|
+
raise_invalid_credentials_error(fields=[["creds", "api_key"]])
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from typing import Annotated, List, Literal, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from dstack._internal.core.models.common import CoreModel
|
|
6
|
+
|
|
7
|
+
RUNPOD_COMMUNITY_CLOUD_DEFAULT = True
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RunpodAPIKeyCreds(CoreModel):
|
|
11
|
+
type: Literal["api_key"] = "api_key"
|
|
12
|
+
api_key: Annotated[str, Field(description="The API key")]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
AnyRunpodCreds = RunpodAPIKeyCreds
|
|
16
|
+
RunpodCreds = AnyRunpodCreds
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RunpodBackendConfig(CoreModel):
|
|
20
|
+
type: Literal["runpod"] = "runpod"
|
|
21
|
+
regions: Annotated[
|
|
22
|
+
Optional[List[str]],
|
|
23
|
+
Field(description="The list of RunPod regions. Omit to use all regions"),
|
|
24
|
+
] = None
|
|
25
|
+
community_cloud: Annotated[
|
|
26
|
+
Optional[bool],
|
|
27
|
+
Field(
|
|
28
|
+
description=(
|
|
29
|
+
"Whether Community Cloud offers can be suggested in addition to Secure Cloud."
|
|
30
|
+
f" Defaults to `{str(RUNPOD_COMMUNITY_CLOUD_DEFAULT).lower()}`"
|
|
31
|
+
)
|
|
32
|
+
),
|
|
33
|
+
] = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RunpodBackendConfigWithCreds(RunpodBackendConfig):
|
|
37
|
+
creds: Annotated[AnyRunpodCreds, Field(description="The credentials")]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
AnyRunpodBackendConfig = Union[RunpodBackendConfig, RunpodBackendConfigWithCreds]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class RunpodStoredConfig(RunpodBackendConfig):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class RunpodConfig(RunpodStoredConfig):
|
|
48
|
+
creds: AnyRunpodCreds
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def allow_community_cloud(self) -> bool:
|
|
52
|
+
if self.community_cloud is not None:
|
|
53
|
+
return self.community_cloud
|
|
54
|
+
return RUNPOD_COMMUNITY_CLOUD_DEFAULT
|
|
File without changes
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from dstack._internal.core.backends.base import Backend
|
|
2
|
-
from dstack._internal.core.backends.tensordock.compute import TensorDockCompute
|
|
3
|
-
from dstack._internal.core.backends.tensordock.config import TensorDockConfig
|
|
4
|
-
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class TensorDockBackend(Backend):
|
|
8
|
-
TYPE: BackendType = BackendType.TENSORDOCK
|
|
9
|
-
|
|
10
|
-
def __init__(self, config: TensorDockConfig):
|
|
11
|
-
self.config = config
|
|
12
|
-
self._compute = TensorDockCompute(self.config)
|
|
13
|
-
|
|
14
|
-
def compute(self) -> TensorDockCompute:
|
|
15
|
-
return self._compute
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from dstack._internal.core.backends.base.backend import Backend
|
|
2
|
+
from dstack._internal.core.backends.tensordock.compute import TensorDockCompute
|
|
3
|
+
from dstack._internal.core.backends.tensordock.models import TensorDockConfig
|
|
4
|
+
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TensorDockBackend(Backend):
|
|
8
|
+
TYPE = BackendType.TENSORDOCK
|
|
9
|
+
COMPUTE_CLASS = TensorDockCompute
|
|
10
|
+
|
|
11
|
+
def __init__(self, config: TensorDockConfig):
|
|
12
|
+
self.config = config
|
|
13
|
+
self._compute = TensorDockCompute(self.config)
|
|
14
|
+
|
|
15
|
+
def compute(self) -> TensorDockCompute:
|
|
16
|
+
return self._compute
|
|
@@ -3,25 +3,23 @@ from typing import List, Optional
|
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
|
|
6
|
-
from dstack._internal.core.backends.base import Compute
|
|
6
|
+
from dstack._internal.core.backends.base.backend import Compute
|
|
7
7
|
from dstack._internal.core.backends.base.compute import (
|
|
8
|
+
ComputeWithCreateInstanceSupport,
|
|
8
9
|
generate_unique_instance_name,
|
|
9
|
-
get_job_instance_name,
|
|
10
10
|
get_shim_commands,
|
|
11
11
|
)
|
|
12
12
|
from dstack._internal.core.backends.base.offers import get_catalog_offers
|
|
13
13
|
from dstack._internal.core.backends.tensordock.api_client import TensorDockAPIClient
|
|
14
|
-
from dstack._internal.core.backends.tensordock.
|
|
14
|
+
from dstack._internal.core.backends.tensordock.models import TensorDockConfig
|
|
15
15
|
from dstack._internal.core.errors import BackendError, NoCapacityError
|
|
16
16
|
from dstack._internal.core.models.backends.base import BackendType
|
|
17
17
|
from dstack._internal.core.models.instances import (
|
|
18
18
|
InstanceAvailability,
|
|
19
19
|
InstanceConfiguration,
|
|
20
20
|
InstanceOfferWithAvailability,
|
|
21
|
-
SSHKey,
|
|
22
21
|
)
|
|
23
|
-
from dstack._internal.core.models.runs import
|
|
24
|
-
from dstack._internal.core.models.volumes import Volume
|
|
22
|
+
from dstack._internal.core.models.runs import JobProvisioningData, Requirements
|
|
25
23
|
from dstack._internal.utils.logging import get_logger
|
|
26
24
|
|
|
27
25
|
logger = get_logger(__name__)
|
|
@@ -31,7 +29,10 @@ logger = get_logger(__name__)
|
|
|
31
29
|
MAX_INSTANCE_NAME_LEN = 60
|
|
32
30
|
|
|
33
31
|
|
|
34
|
-
class TensorDockCompute(
|
|
32
|
+
class TensorDockCompute(
|
|
33
|
+
ComputeWithCreateInstanceSupport,
|
|
34
|
+
Compute,
|
|
35
|
+
):
|
|
35
36
|
def __init__(self, config: TensorDockConfig):
|
|
36
37
|
super().__init__()
|
|
37
38
|
self.config = config
|
|
@@ -113,26 +114,6 @@ class TensorDockCompute(Compute):
|
|
|
113
114
|
backend_data=None,
|
|
114
115
|
)
|
|
115
116
|
|
|
116
|
-
def run_job(
|
|
117
|
-
self,
|
|
118
|
-
run: Run,
|
|
119
|
-
job: Job,
|
|
120
|
-
instance_offer: InstanceOfferWithAvailability,
|
|
121
|
-
project_ssh_public_key: str,
|
|
122
|
-
project_ssh_private_key: str,
|
|
123
|
-
volumes: List[Volume],
|
|
124
|
-
) -> JobProvisioningData:
|
|
125
|
-
instance_config = InstanceConfiguration(
|
|
126
|
-
project_name=run.project_name,
|
|
127
|
-
instance_name=get_job_instance_name(run, job), # TODO: generate name
|
|
128
|
-
ssh_keys=[
|
|
129
|
-
SSHKey(public=run.run_spec.ssh_key_pub.strip()),
|
|
130
|
-
SSHKey(public=project_ssh_public_key.strip()),
|
|
131
|
-
],
|
|
132
|
-
user=run.user,
|
|
133
|
-
)
|
|
134
|
-
return self.create_instance(instance_offer, instance_config)
|
|
135
|
-
|
|
136
117
|
def terminate_instance(
|
|
137
118
|
self, instance_id: str, region: str, backend_data: Optional[str] = None
|
|
138
119
|
):
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from dstack._internal.core.backends.base.configurator import (
|
|
4
|
+
BackendRecord,
|
|
5
|
+
Configurator,
|
|
6
|
+
raise_invalid_credentials_error,
|
|
7
|
+
)
|
|
8
|
+
from dstack._internal.core.backends.tensordock import api_client
|
|
9
|
+
from dstack._internal.core.backends.tensordock.backend import TensorDockBackend
|
|
10
|
+
from dstack._internal.core.backends.tensordock.models import (
|
|
11
|
+
AnyTensorDockBackendConfig,
|
|
12
|
+
TensorDockBackendConfig,
|
|
13
|
+
TensorDockBackendConfigWithCreds,
|
|
14
|
+
TensorDockConfig,
|
|
15
|
+
TensorDockCreds,
|
|
16
|
+
TensorDockStoredConfig,
|
|
17
|
+
)
|
|
18
|
+
from dstack._internal.core.models.backends.base import (
|
|
19
|
+
BackendType,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# TensorDock regions are dynamic, currently we don't offer any filtering
|
|
23
|
+
REGIONS = []
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TensorDockConfigurator(Configurator):
|
|
27
|
+
TYPE = BackendType.TENSORDOCK
|
|
28
|
+
BACKEND_CLASS = TensorDockBackend
|
|
29
|
+
|
|
30
|
+
def validate_config(
|
|
31
|
+
self, config: TensorDockBackendConfigWithCreds, default_creds_enabled: bool
|
|
32
|
+
):
|
|
33
|
+
self._validate_tensordock_creds(config.creds.api_key, config.creds.api_token)
|
|
34
|
+
|
|
35
|
+
def create_backend(
|
|
36
|
+
self, project_name: str, config: TensorDockBackendConfigWithCreds
|
|
37
|
+
) -> BackendRecord:
|
|
38
|
+
if config.regions is None:
|
|
39
|
+
config.regions = REGIONS
|
|
40
|
+
return BackendRecord(
|
|
41
|
+
config=TensorDockStoredConfig(
|
|
42
|
+
**TensorDockBackendConfig.__response__.parse_obj(config).dict()
|
|
43
|
+
).json(),
|
|
44
|
+
auth=TensorDockCreds.parse_obj(config.creds).json(),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def get_backend_config(
|
|
48
|
+
self, record: BackendRecord, include_creds: bool
|
|
49
|
+
) -> AnyTensorDockBackendConfig:
|
|
50
|
+
config = self._get_config(record)
|
|
51
|
+
if include_creds:
|
|
52
|
+
return TensorDockBackendConfigWithCreds.__response__.parse_obj(config)
|
|
53
|
+
return TensorDockBackendConfig.__response__.parse_obj(config)
|
|
54
|
+
|
|
55
|
+
def get_backend(self, record: BackendRecord) -> TensorDockBackend:
|
|
56
|
+
config = self._get_config(record)
|
|
57
|
+
return TensorDockBackend(config=config)
|
|
58
|
+
|
|
59
|
+
def _get_config(self, record: BackendRecord) -> TensorDockConfig:
|
|
60
|
+
return TensorDockConfig.__response__(
|
|
61
|
+
**json.loads(record.config),
|
|
62
|
+
creds=TensorDockCreds.parse_raw(record.auth),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def _validate_tensordock_creds(self, api_key: str, api_token: str):
|
|
66
|
+
client = api_client.TensorDockAPIClient(api_key=api_key, api_token=api_token)
|
|
67
|
+
if not client.auth_test():
|
|
68
|
+
raise_invalid_credentials_error(fields=[["creds", "api_key"], ["creds", "api_token"]])
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from typing import Annotated, List, Literal, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from dstack._internal.core.models.common import CoreModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TensorDockAPIKeyCreds(CoreModel):
|
|
9
|
+
type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
|
|
10
|
+
api_key: Annotated[str, Field(description="The API key")]
|
|
11
|
+
api_token: Annotated[str, Field(description="The API token")]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
AnyTensorDockCreds = TensorDockAPIKeyCreds
|
|
15
|
+
TensorDockCreds = AnyTensorDockCreds
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TensorDockBackendConfig(CoreModel):
|
|
19
|
+
type: Annotated[Literal["tensordock"], Field(description="The type of backend")] = "tensordock"
|
|
20
|
+
regions: Annotated[
|
|
21
|
+
Optional[List[str]],
|
|
22
|
+
Field(description="The list of TensorDock regions. Omit to use all regions"),
|
|
23
|
+
] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TensorDockBackendConfigWithCreds(TensorDockBackendConfig):
|
|
27
|
+
creds: Annotated[AnyTensorDockCreds, Field(description="The credentials")]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
AnyTensorDockBackendConfig = Union[TensorDockBackendConfig, TensorDockBackendConfigWithCreds]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TensorDockStoredConfig(TensorDockBackendConfig):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TensorDockConfig(TensorDockStoredConfig):
|
|
38
|
+
creds: AnyTensorDockCreds
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from dstack._internal.core.backends.base import Backend
|
|
2
|
-
from dstack._internal.core.backends.vastai.compute import VastAICompute
|
|
3
|
-
from dstack._internal.core.backends.vastai.config import VastAIConfig
|
|
4
|
-
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class VastAIBackend(Backend):
|
|
8
|
-
TYPE: BackendType = BackendType.VASTAI
|
|
9
|
-
|
|
10
|
-
def __init__(self, config: VastAIConfig):
|
|
11
|
-
self.config = config
|
|
12
|
-
self._compute = VastAICompute(self.config)
|
|
13
|
-
|
|
14
|
-
def compute(self) -> VastAICompute:
|
|
15
|
-
return self._compute
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from dstack._internal.core.backends.base.backend import Backend
|
|
2
|
+
from dstack._internal.core.backends.vastai.compute import VastAICompute
|
|
3
|
+
from dstack._internal.core.backends.vastai.models import VastAIConfig
|
|
4
|
+
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class VastAIBackend(Backend):
|
|
8
|
+
TYPE = BackendType.VASTAI
|
|
9
|
+
COMPUTE_CLASS = VastAICompute
|
|
10
|
+
|
|
11
|
+
def __init__(self, config: VastAIConfig):
|
|
12
|
+
self.config = config
|
|
13
|
+
self._compute = VastAICompute(self.config)
|
|
14
|
+
|
|
15
|
+
def compute(self) -> VastAICompute:
|
|
16
|
+
return self._compute
|
|
@@ -3,14 +3,14 @@ from typing import List, Optional
|
|
|
3
3
|
import gpuhunt
|
|
4
4
|
from gpuhunt.providers.vastai import VastAIProvider
|
|
5
5
|
|
|
6
|
-
from dstack._internal.core.backends.base import Compute
|
|
6
|
+
from dstack._internal.core.backends.base.backend import Compute
|
|
7
7
|
from dstack._internal.core.backends.base.compute import (
|
|
8
8
|
generate_unique_instance_name_for_job,
|
|
9
9
|
get_docker_commands,
|
|
10
10
|
)
|
|
11
11
|
from dstack._internal.core.backends.base.offers import get_catalog_offers
|
|
12
12
|
from dstack._internal.core.backends.vastai.api_client import VastAIAPIClient
|
|
13
|
-
from dstack._internal.core.backends.vastai.
|
|
13
|
+
from dstack._internal.core.backends.vastai.models import VastAIConfig
|
|
14
14
|
from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
|
|
15
15
|
from dstack._internal.core.errors import ProvisioningError
|
|
16
16
|
from dstack._internal.core.models.backends.base import BackendType
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from dstack._internal.core.backends.base.configurator import (
|
|
4
|
+
BackendRecord,
|
|
5
|
+
Configurator,
|
|
6
|
+
raise_invalid_credentials_error,
|
|
7
|
+
)
|
|
8
|
+
from dstack._internal.core.backends.vastai import api_client
|
|
9
|
+
from dstack._internal.core.backends.vastai.backend import VastAIBackend
|
|
10
|
+
from dstack._internal.core.backends.vastai.models import (
|
|
11
|
+
AnyVastAIBackendConfig,
|
|
12
|
+
VastAIBackendConfig,
|
|
13
|
+
VastAIBackendConfigWithCreds,
|
|
14
|
+
VastAIConfig,
|
|
15
|
+
VastAICreds,
|
|
16
|
+
VastAIStoredConfig,
|
|
17
|
+
)
|
|
18
|
+
from dstack._internal.core.models.backends.base import (
|
|
19
|
+
BackendType,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# VastAI regions are dynamic, currently we don't offer any filtering
|
|
23
|
+
REGIONS = []
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class VastAIConfigurator(Configurator):
|
|
27
|
+
TYPE = BackendType.VASTAI
|
|
28
|
+
BACKEND_CLASS = VastAIBackend
|
|
29
|
+
|
|
30
|
+
def validate_config(self, config: VastAIBackendConfigWithCreds, default_creds_enabled: bool):
|
|
31
|
+
self._validate_vastai_creds(config.creds.api_key)
|
|
32
|
+
|
|
33
|
+
def create_backend(
|
|
34
|
+
self, project_name: str, config: VastAIBackendConfigWithCreds
|
|
35
|
+
) -> BackendRecord:
|
|
36
|
+
if config.regions is None:
|
|
37
|
+
config.regions = REGIONS
|
|
38
|
+
return BackendRecord(
|
|
39
|
+
config=VastAIStoredConfig(
|
|
40
|
+
**VastAIBackendConfig.__response__.parse_obj(config).dict()
|
|
41
|
+
).json(),
|
|
42
|
+
auth=VastAICreds.parse_obj(config.creds).json(),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def get_backend_config(
|
|
46
|
+
self, record: BackendRecord, include_creds: bool
|
|
47
|
+
) -> AnyVastAIBackendConfig:
|
|
48
|
+
config = self._get_config(record)
|
|
49
|
+
if include_creds:
|
|
50
|
+
return VastAIBackendConfigWithCreds.__response__.parse_obj(config)
|
|
51
|
+
return VastAIBackendConfig.__response__.parse_obj(config)
|
|
52
|
+
|
|
53
|
+
def get_backend(self, record: BackendRecord) -> VastAIBackend:
|
|
54
|
+
config = self._get_config(record)
|
|
55
|
+
return VastAIBackend(config=config)
|
|
56
|
+
|
|
57
|
+
def _get_config(self, record: BackendRecord) -> VastAIConfig:
|
|
58
|
+
return VastAIConfig.__response__(
|
|
59
|
+
**json.loads(record.config),
|
|
60
|
+
creds=VastAICreds.parse_raw(record.auth),
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def _validate_vastai_creds(self, api_key: str):
|
|
64
|
+
client = api_client.VastAIAPIClient(api_key=api_key)
|
|
65
|
+
if not client.auth_test():
|
|
66
|
+
raise_invalid_credentials_error(fields=[["creds", "api_key"]])
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Annotated, List, Literal, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from dstack._internal.core.models.common import CoreModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class VastAIAPIKeyCreds(CoreModel):
|
|
9
|
+
type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
|
|
10
|
+
api_key: Annotated[str, Field(description="The API key")]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
AnyVastAICreds = VastAIAPIKeyCreds
|
|
14
|
+
VastAICreds = AnyVastAICreds
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VastAIBackendConfig(CoreModel):
|
|
18
|
+
type: Annotated[Literal["vastai"], Field(description="The type of backend")] = "vastai"
|
|
19
|
+
regions: Annotated[
|
|
20
|
+
Optional[List[str]],
|
|
21
|
+
Field(description="The list of VastAI regions. Omit to use all regions"),
|
|
22
|
+
] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class VastAIBackendConfigWithCreds(VastAIBackendConfig):
|
|
26
|
+
creds: Annotated[AnyVastAICreds, Field(description="The credentials")]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
AnyVastAIBackendConfig = Union[VastAIBackendConfig, VastAIBackendConfigWithCreds]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class VastAIStoredConfig(VastAIBackendConfig):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class VastAIConfig(VastAIStoredConfig):
|
|
37
|
+
creds: AnyVastAICreds
|