dstack 0.18.43__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dstack might be problematic. Click here for more details.
- dstack/_internal/cli/commands/gateway.py +15 -3
- dstack/_internal/cli/commands/logs.py +0 -22
- dstack/_internal/cli/commands/stats.py +8 -17
- dstack/_internal/cli/main.py +1 -5
- dstack/_internal/cli/services/configurators/fleet.py +4 -39
- dstack/_internal/cli/services/configurators/run.py +22 -20
- dstack/_internal/cli/services/profile.py +34 -83
- dstack/_internal/cli/utils/gateway.py +1 -1
- dstack/_internal/cli/utils/run.py +11 -0
- dstack/_internal/core/backends/__init__.py +56 -39
- dstack/_internal/core/backends/aws/__init__.py +0 -25
- dstack/_internal/core/backends/aws/auth.py +1 -10
- dstack/_internal/core/backends/aws/backend.py +26 -0
- dstack/_internal/core/backends/aws/compute.py +21 -45
- dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
- dstack/_internal/core/backends/aws/models.py +135 -0
- dstack/_internal/core/backends/aws/resources.py +1 -1
- dstack/_internal/core/backends/azure/__init__.py +0 -20
- dstack/_internal/core/backends/azure/auth.py +2 -11
- dstack/_internal/core/backends/azure/backend.py +21 -0
- dstack/_internal/core/backends/azure/compute.py +14 -28
- dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
- dstack/_internal/core/backends/azure/models.py +89 -0
- dstack/_internal/core/backends/base/__init__.py +0 -12
- dstack/_internal/core/backends/base/backend.py +18 -0
- dstack/_internal/core/backends/base/compute.py +153 -33
- dstack/_internal/core/backends/base/configurator.py +105 -0
- dstack/_internal/core/backends/base/models.py +14 -0
- dstack/_internal/core/backends/configurators.py +138 -0
- dstack/_internal/core/backends/cudo/__init__.py +0 -15
- dstack/_internal/core/backends/cudo/backend.py +16 -0
- dstack/_internal/core/backends/cudo/compute.py +8 -26
- dstack/_internal/core/backends/cudo/configurator.py +72 -0
- dstack/_internal/core/backends/cudo/models.py +37 -0
- dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
- dstack/_internal/core/backends/datacrunch/backend.py +16 -0
- dstack/_internal/core/backends/datacrunch/compute.py +8 -25
- dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
- dstack/_internal/core/backends/datacrunch/models.py +38 -0
- dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
- dstack/_internal/core/backends/gcp/__init__.py +0 -16
- dstack/_internal/core/backends/gcp/auth.py +2 -11
- dstack/_internal/core/backends/gcp/backend.py +17 -0
- dstack/_internal/core/backends/gcp/compute.py +14 -44
- dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
- dstack/_internal/core/backends/gcp/models.py +125 -0
- dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
- dstack/_internal/core/backends/kubernetes/backend.py +16 -0
- dstack/_internal/core/backends/kubernetes/compute.py +16 -5
- dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
- dstack/_internal/core/backends/kubernetes/models.py +72 -0
- dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
- dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
- dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
- dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
- dstack/_internal/core/backends/lambdalabs/models.py +37 -0
- dstack/_internal/core/backends/local/__init__.py +0 -13
- dstack/_internal/core/backends/local/backend.py +14 -0
- dstack/_internal/core/backends/local/compute.py +16 -2
- dstack/_internal/core/backends/models.py +128 -0
- dstack/_internal/core/backends/oci/__init__.py +0 -15
- dstack/_internal/core/backends/oci/auth.py +1 -5
- dstack/_internal/core/backends/oci/backend.py +16 -0
- dstack/_internal/core/backends/oci/compute.py +9 -23
- dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
- dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
- dstack/_internal/core/backends/oci/region.py +1 -1
- dstack/_internal/core/backends/runpod/__init__.py +0 -15
- dstack/_internal/core/backends/runpod/backend.py +16 -0
- dstack/_internal/core/backends/runpod/compute.py +28 -6
- dstack/_internal/core/backends/runpod/configurator.py +59 -0
- dstack/_internal/core/backends/runpod/models.py +54 -0
- dstack/_internal/core/backends/template/__init__.py +0 -0
- dstack/_internal/core/backends/tensordock/__init__.py +0 -15
- dstack/_internal/core/backends/tensordock/backend.py +16 -0
- dstack/_internal/core/backends/tensordock/compute.py +8 -27
- dstack/_internal/core/backends/tensordock/configurator.py +68 -0
- dstack/_internal/core/backends/tensordock/models.py +38 -0
- dstack/_internal/core/backends/vastai/__init__.py +0 -15
- dstack/_internal/core/backends/vastai/backend.py +16 -0
- dstack/_internal/core/backends/vastai/compute.py +2 -2
- dstack/_internal/core/backends/vastai/configurator.py +66 -0
- dstack/_internal/core/backends/vastai/models.py +37 -0
- dstack/_internal/core/backends/vultr/__init__.py +0 -15
- dstack/_internal/core/backends/vultr/backend.py +16 -0
- dstack/_internal/core/backends/vultr/compute.py +10 -24
- dstack/_internal/core/backends/vultr/configurator.py +64 -0
- dstack/_internal/core/backends/vultr/models.py +34 -0
- dstack/_internal/core/models/backends/__init__.py +0 -184
- dstack/_internal/core/models/backends/base.py +0 -19
- dstack/_internal/core/models/configurations.py +22 -16
- dstack/_internal/core/models/envs.py +4 -3
- dstack/_internal/core/models/fleets.py +17 -22
- dstack/_internal/core/models/gateways.py +3 -3
- dstack/_internal/core/models/instances.py +24 -0
- dstack/_internal/core/models/profiles.py +85 -45
- dstack/_internal/core/models/projects.py +1 -1
- dstack/_internal/core/models/repos/base.py +0 -5
- dstack/_internal/core/models/repos/local.py +3 -3
- dstack/_internal/core/models/repos/remote.py +26 -12
- dstack/_internal/core/models/repos/virtual.py +1 -1
- dstack/_internal/core/models/resources.py +45 -76
- dstack/_internal/core/models/runs.py +21 -19
- dstack/_internal/core/models/volumes.py +1 -3
- dstack/_internal/core/services/profiles.py +7 -16
- dstack/_internal/core/services/repos.py +0 -4
- dstack/_internal/server/app.py +11 -4
- dstack/_internal/server/background/__init__.py +10 -0
- dstack/_internal/server/background/tasks/process_gateways.py +4 -8
- dstack/_internal/server/background/tasks/process_instances.py +14 -9
- dstack/_internal/server/background/tasks/process_metrics.py +1 -1
- dstack/_internal/server/background/tasks/process_placement_groups.py +5 -1
- dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
- dstack/_internal/server/background/tasks/process_running_jobs.py +80 -24
- dstack/_internal/server/background/tasks/process_runs.py +1 -0
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +20 -38
- dstack/_internal/server/background/tasks/process_volumes.py +5 -2
- dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
- dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
- dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
- dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
- dstack/_internal/server/models.py +59 -9
- dstack/_internal/server/routers/backends.py +14 -23
- dstack/_internal/server/routers/instances.py +3 -4
- dstack/_internal/server/routers/metrics.py +31 -10
- dstack/_internal/server/routers/prometheus.py +36 -0
- dstack/_internal/server/routers/repos.py +1 -2
- dstack/_internal/server/routers/runs.py +13 -59
- dstack/_internal/server/schemas/gateways.py +14 -23
- dstack/_internal/server/schemas/projects.py +7 -2
- dstack/_internal/server/schemas/repos.py +2 -38
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/schemas/runs.py +1 -24
- dstack/_internal/server/security/permissions.py +1 -1
- dstack/_internal/server/services/backends/__init__.py +85 -158
- dstack/_internal/server/services/config.py +53 -567
- dstack/_internal/server/services/fleets.py +9 -103
- dstack/_internal/server/services/gateways/__init__.py +13 -4
- dstack/_internal/server/services/{pools.py → instances.py} +22 -329
- dstack/_internal/server/services/jobs/__init__.py +9 -6
- dstack/_internal/server/services/jobs/configurators/base.py +25 -1
- dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
- dstack/_internal/server/services/metrics.py +131 -72
- dstack/_internal/server/services/offers.py +1 -1
- dstack/_internal/server/services/projects.py +23 -14
- dstack/_internal/server/services/prometheus.py +245 -0
- dstack/_internal/server/services/runner/client.py +14 -3
- dstack/_internal/server/services/runs.py +67 -31
- dstack/_internal/server/services/volumes.py +9 -4
- dstack/_internal/server/settings.py +3 -0
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4a0fe83e84574654e397.js} +76 -19
- dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4a0fe83e84574654e397.js.map} +1 -1
- dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
- dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
- dstack/_internal/server/testing/common.py +75 -32
- dstack/_internal/utils/json_schema.py +6 -0
- dstack/_internal/utils/ssh.py +2 -1
- dstack/api/__init__.py +4 -0
- dstack/api/_public/__init__.py +16 -20
- dstack/api/_public/backends.py +1 -1
- dstack/api/_public/repos.py +36 -36
- dstack/api/_public/runs.py +170 -83
- dstack/api/server/__init__.py +11 -13
- dstack/api/server/_backends.py +12 -16
- dstack/api/server/_fleets.py +15 -55
- dstack/api/server/_gateways.py +3 -14
- dstack/api/server/_repos.py +1 -4
- dstack/api/server/_runs.py +21 -96
- dstack/api/server/_volumes.py +10 -5
- dstack/api/utils.py +3 -0
- dstack/version.py +1 -1
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/METADATA +10 -1
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/RECORD +229 -206
- tests/_internal/cli/services/configurators/test_profile.py +6 -6
- tests/_internal/core/backends/aws/test_configurator.py +35 -0
- tests/_internal/core/backends/aws/test_resources.py +1 -1
- tests/_internal/core/backends/azure/test_configurator.py +61 -0
- tests/_internal/core/backends/cudo/__init__.py +0 -0
- tests/_internal/core/backends/cudo/test_configurator.py +37 -0
- tests/_internal/core/backends/datacrunch/__init__.py +0 -0
- tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
- tests/_internal/core/backends/gcp/test_configurator.py +42 -0
- tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
- tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
- tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
- tests/_internal/core/backends/oci/test_configurator.py +55 -0
- tests/_internal/core/backends/runpod/__init__.py +0 -0
- tests/_internal/core/backends/runpod/test_configurator.py +33 -0
- tests/_internal/core/backends/tensordock/__init__.py +0 -0
- tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
- tests/_internal/core/backends/vastai/__init__.py +0 -0
- tests/_internal/core/backends/vastai/test_configurator.py +33 -0
- tests/_internal/core/backends/vultr/__init__.py +0 -0
- tests/_internal/core/backends/vultr/test_configurator.py +33 -0
- tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
- tests/_internal/server/background/tasks/test_process_instances.py +49 -48
- tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
- tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
- tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +186 -0
- tests/_internal/server/background/tasks/test_process_running_jobs.py +123 -19
- tests/_internal/server/background/tasks/test_process_runs.py +8 -22
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
- tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
- tests/_internal/server/routers/test_backends.py +6 -764
- tests/_internal/server/routers/test_fleets.py +2 -26
- tests/_internal/server/routers/test_gateways.py +27 -3
- tests/_internal/server/routers/test_instances.py +0 -10
- tests/_internal/server/routers/test_metrics.py +42 -0
- tests/_internal/server/routers/test_projects.py +56 -0
- tests/_internal/server/routers/test_prometheus.py +333 -0
- tests/_internal/server/routers/test_repos.py +0 -15
- tests/_internal/server/routers/test_runs.py +83 -275
- tests/_internal/server/routers/test_volumes.py +2 -3
- tests/_internal/server/services/backends/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
- tests/_internal/server/services/test_config.py +7 -4
- tests/_internal/server/services/test_fleets.py +1 -4
- tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
- tests/_internal/server/services/test_metrics.py +167 -0
- tests/_internal/server/services/test_repos.py +1 -14
- tests/_internal/server/services/test_runs.py +0 -4
- dstack/_internal/cli/commands/pool.py +0 -581
- dstack/_internal/cli/commands/run.py +0 -75
- dstack/_internal/core/backends/aws/config.py +0 -18
- dstack/_internal/core/backends/azure/config.py +0 -12
- dstack/_internal/core/backends/base/config.py +0 -5
- dstack/_internal/core/backends/cudo/config.py +0 -9
- dstack/_internal/core/backends/datacrunch/config.py +0 -9
- dstack/_internal/core/backends/gcp/config.py +0 -22
- dstack/_internal/core/backends/kubernetes/config.py +0 -6
- dstack/_internal/core/backends/lambdalabs/config.py +0 -9
- dstack/_internal/core/backends/nebius/__init__.py +0 -15
- dstack/_internal/core/backends/nebius/api_client.py +0 -319
- dstack/_internal/core/backends/nebius/compute.py +0 -220
- dstack/_internal/core/backends/nebius/config.py +0 -6
- dstack/_internal/core/backends/nebius/types.py +0 -37
- dstack/_internal/core/backends/oci/config.py +0 -6
- dstack/_internal/core/backends/runpod/config.py +0 -9
- dstack/_internal/core/backends/tensordock/config.py +0 -9
- dstack/_internal/core/backends/vastai/config.py +0 -6
- dstack/_internal/core/backends/vultr/config.py +0 -9
- dstack/_internal/core/models/backends/aws.py +0 -86
- dstack/_internal/core/models/backends/azure.py +0 -68
- dstack/_internal/core/models/backends/cudo.py +0 -43
- dstack/_internal/core/models/backends/datacrunch.py +0 -44
- dstack/_internal/core/models/backends/gcp.py +0 -67
- dstack/_internal/core/models/backends/kubernetes.py +0 -40
- dstack/_internal/core/models/backends/lambdalabs.py +0 -43
- dstack/_internal/core/models/backends/nebius.py +0 -54
- dstack/_internal/core/models/backends/runpod.py +0 -40
- dstack/_internal/core/models/backends/tensordock.py +0 -44
- dstack/_internal/core/models/backends/vastai.py +0 -43
- dstack/_internal/core/models/backends/vultr.py +0 -40
- dstack/_internal/core/models/pools.py +0 -43
- dstack/_internal/server/routers/pools.py +0 -142
- dstack/_internal/server/schemas/pools.py +0 -38
- dstack/_internal/server/services/backends/configurators/base.py +0 -72
- dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
- dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
- dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
- dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
- dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
- dstack/_internal/server/services/backends/configurators/runpod.py +0 -97
- dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
- dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
- dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
- dstack/api/_public/pools.py +0 -41
- dstack/api/_public/resources.py +0 -105
- dstack/api/server/_pools.py +0 -63
- tests/_internal/server/routers/test_pools.py +0 -612
- /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/WHEEL +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
from dstack._internal.core.backends.base.config import BackendConfig
|
|
2
|
-
from dstack._internal.core.models.backends.datacrunch import (
|
|
3
|
-
AnyDataCrunchCreds,
|
|
4
|
-
DataCrunchStoredConfig,
|
|
5
|
-
)
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class DataCrunchConfig(DataCrunchStoredConfig, BackendConfig):
|
|
9
|
-
creds: AnyDataCrunchCreds
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
from dstack._internal.core.backends.base.config import BackendConfig
|
|
2
|
-
from dstack._internal.core.models.backends.gcp import AnyGCPCreds, GCPStoredConfig
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class GCPConfig(GCPStoredConfig, BackendConfig):
|
|
6
|
-
creds: AnyGCPCreds
|
|
7
|
-
|
|
8
|
-
@property
|
|
9
|
-
def allocate_public_ips(self) -> bool:
|
|
10
|
-
if self.public_ips is not None:
|
|
11
|
-
return self.public_ips
|
|
12
|
-
return True
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def vpc_resource_name(self) -> str:
|
|
16
|
-
vpc_name = self.vpc_name
|
|
17
|
-
if vpc_name is None:
|
|
18
|
-
vpc_name = "default"
|
|
19
|
-
project_id = self.project_id
|
|
20
|
-
if self.vpc_project_id is not None:
|
|
21
|
-
project_id = self.vpc_project_id
|
|
22
|
-
return f"projects/{project_id}/global/networks/{vpc_name}"
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from dstack._internal.core.backends.base import Backend
|
|
2
|
-
from dstack._internal.core.backends.nebius.compute import NebiusCompute
|
|
3
|
-
from dstack._internal.core.backends.nebius.config import NebiusConfig
|
|
4
|
-
from dstack._internal.core.models.backends.base import BackendType
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class NebiusBackend(Backend):
|
|
8
|
-
TYPE: BackendType = BackendType.NEBIUS
|
|
9
|
-
|
|
10
|
-
def __init__(self, config: NebiusConfig):
|
|
11
|
-
self.config = config
|
|
12
|
-
self._compute = NebiusCompute(self.config)
|
|
13
|
-
|
|
14
|
-
def compute(self) -> NebiusCompute:
|
|
15
|
-
return self._compute
|
|
@@ -1,319 +0,0 @@
|
|
|
1
|
-
import time
|
|
2
|
-
from typing import Dict, List, Optional
|
|
3
|
-
|
|
4
|
-
import jwt
|
|
5
|
-
import requests
|
|
6
|
-
|
|
7
|
-
from dstack._internal.core.backends.nebius.types import (
|
|
8
|
-
ClientError,
|
|
9
|
-
ConflictError,
|
|
10
|
-
ForbiddenError,
|
|
11
|
-
NebiusError,
|
|
12
|
-
NotFoundError,
|
|
13
|
-
ResourcesSpec,
|
|
14
|
-
ServiceAccount,
|
|
15
|
-
)
|
|
16
|
-
from dstack._internal.utils.logging import get_logger
|
|
17
|
-
|
|
18
|
-
logger = get_logger("nebius")
|
|
19
|
-
API_URL = "api.ai.nebius.cloud"
|
|
20
|
-
REQUEST_TIMEOUT = 15
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class NebiusAPIClient:
|
|
24
|
-
# Reference: https://nebius.ai/docs/api-design-guide/
|
|
25
|
-
def __init__(self, service_account: ServiceAccount):
|
|
26
|
-
self.service_account = service_account
|
|
27
|
-
self.s = requests.Session()
|
|
28
|
-
self.expires_at = 0
|
|
29
|
-
|
|
30
|
-
def get_token(self):
|
|
31
|
-
now = int(time.time())
|
|
32
|
-
if now + 60 < self.expires_at:
|
|
33
|
-
return
|
|
34
|
-
logger.debug("Refreshing IAM token")
|
|
35
|
-
expires_at = now + 3600
|
|
36
|
-
payload = {
|
|
37
|
-
"aud": self.url("iam", "/tokens"),
|
|
38
|
-
"iss": self.service_account["service_account_id"],
|
|
39
|
-
"iat": now,
|
|
40
|
-
"exp": expires_at,
|
|
41
|
-
}
|
|
42
|
-
jwt_token = jwt.encode(
|
|
43
|
-
payload,
|
|
44
|
-
self.service_account["private_key"],
|
|
45
|
-
algorithm="PS256",
|
|
46
|
-
headers={"kid": self.service_account["id"]},
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
resp = requests.post(payload["aud"], json={"jwt": jwt_token}, timeout=REQUEST_TIMEOUT)
|
|
50
|
-
resp.raise_for_status()
|
|
51
|
-
iam_token = resp.json()["iamToken"]
|
|
52
|
-
self.s.headers["Authorization"] = f"Bearer {iam_token}"
|
|
53
|
-
self.expires_at = expires_at
|
|
54
|
-
|
|
55
|
-
def compute_zones_list(self) -> List[dict]:
|
|
56
|
-
logger.debug("Fetching compute zones")
|
|
57
|
-
self.get_token()
|
|
58
|
-
resp = self.s.get(self.url("compute", "/zones"), timeout=REQUEST_TIMEOUT)
|
|
59
|
-
self.raise_for_status(resp)
|
|
60
|
-
return resp.json()["zones"]
|
|
61
|
-
|
|
62
|
-
def resource_manager_folders_create(self, cloud_id: str, name: str, **kwargs) -> dict:
|
|
63
|
-
logger.debug("Creating folder %s", name)
|
|
64
|
-
self.get_token()
|
|
65
|
-
resp = self.s.post(
|
|
66
|
-
self.url("resource-manager", "/folders"),
|
|
67
|
-
json=omit_none(
|
|
68
|
-
cloudId=cloud_id,
|
|
69
|
-
name=name,
|
|
70
|
-
**kwargs,
|
|
71
|
-
),
|
|
72
|
-
timeout=REQUEST_TIMEOUT,
|
|
73
|
-
)
|
|
74
|
-
self.raise_for_status(resp)
|
|
75
|
-
return resp.json()
|
|
76
|
-
|
|
77
|
-
def vpc_networks_create(self, folder_id: str, name: str, **kwargs) -> dict:
|
|
78
|
-
logger.debug("Creating network %s in %s", name, folder_id)
|
|
79
|
-
self.get_token()
|
|
80
|
-
resp = self.s.post(
|
|
81
|
-
self.url("vpc", "/networks"),
|
|
82
|
-
json=omit_none(
|
|
83
|
-
folderId=folder_id,
|
|
84
|
-
name=name,
|
|
85
|
-
**kwargs,
|
|
86
|
-
),
|
|
87
|
-
timeout=REQUEST_TIMEOUT,
|
|
88
|
-
)
|
|
89
|
-
self.raise_for_status(resp)
|
|
90
|
-
return resp.json()
|
|
91
|
-
|
|
92
|
-
def vpc_networks_list(self, folder_id: str, filter: Optional[str] = None) -> List[dict]:
|
|
93
|
-
logger.debug("Fetching networks in %s", folder_id)
|
|
94
|
-
return self.list(
|
|
95
|
-
"vpc",
|
|
96
|
-
"networks",
|
|
97
|
-
params=dict(
|
|
98
|
-
folderId=folder_id,
|
|
99
|
-
filter=filter,
|
|
100
|
-
),
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
def vpc_subnets_create(
|
|
104
|
-
self,
|
|
105
|
-
folder_id: str,
|
|
106
|
-
name: str,
|
|
107
|
-
network_id: str,
|
|
108
|
-
zone: str,
|
|
109
|
-
cird_blocks: List[str],
|
|
110
|
-
**kwargs,
|
|
111
|
-
) -> dict:
|
|
112
|
-
logger.debug("Creating subnet %s in %s", name, network_id)
|
|
113
|
-
self.get_token()
|
|
114
|
-
resp = self.s.post(
|
|
115
|
-
self.url("vpc", "/subnets"),
|
|
116
|
-
json=omit_none(
|
|
117
|
-
folderId=folder_id,
|
|
118
|
-
name=name,
|
|
119
|
-
networkId=network_id,
|
|
120
|
-
zoneId=zone,
|
|
121
|
-
v4CidrBlocks=cird_blocks,
|
|
122
|
-
**kwargs,
|
|
123
|
-
),
|
|
124
|
-
timeout=REQUEST_TIMEOUT,
|
|
125
|
-
)
|
|
126
|
-
self.raise_for_status(resp)
|
|
127
|
-
return resp.json()
|
|
128
|
-
|
|
129
|
-
def vpc_subnets_list(self, folder_id: str, filter: Optional[str] = None) -> List[dict]:
|
|
130
|
-
logger.debug("Fetching subnets in %s", folder_id)
|
|
131
|
-
return self.list(
|
|
132
|
-
"vpc",
|
|
133
|
-
"subnets",
|
|
134
|
-
params=dict(
|
|
135
|
-
folderId=folder_id,
|
|
136
|
-
filter=filter,
|
|
137
|
-
),
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
def vpc_security_groups_create(
|
|
141
|
-
self, folder_id: str, name: str, network_id: str, rule_specs: List[dict], **kwargs
|
|
142
|
-
) -> dict:
|
|
143
|
-
logger.debug("Creating security group %s in %s", name, folder_id)
|
|
144
|
-
self.get_token()
|
|
145
|
-
resp = self.s.post(
|
|
146
|
-
self.url("vpc", "/securityGroups"),
|
|
147
|
-
json=omit_none(
|
|
148
|
-
folderId=folder_id,
|
|
149
|
-
name=name,
|
|
150
|
-
networkId=network_id,
|
|
151
|
-
ruleSpecs=rule_specs,
|
|
152
|
-
**kwargs,
|
|
153
|
-
),
|
|
154
|
-
timeout=REQUEST_TIMEOUT,
|
|
155
|
-
)
|
|
156
|
-
self.raise_for_status(resp)
|
|
157
|
-
return resp.json()
|
|
158
|
-
|
|
159
|
-
def vpc_security_groups_list(self, folder_id: str, filter: Optional[str] = None) -> List[dict]:
|
|
160
|
-
logger.debug("Fetching security groups in %s", folder_id)
|
|
161
|
-
return self.list(
|
|
162
|
-
"vpc",
|
|
163
|
-
"securityGroups",
|
|
164
|
-
params=dict(
|
|
165
|
-
folderId=folder_id,
|
|
166
|
-
filter=filter,
|
|
167
|
-
),
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
def vpc_security_groups_delete(self, security_group_id: str):
|
|
171
|
-
logger.debug("Deleting security group %s", security_group_id)
|
|
172
|
-
self.get_token()
|
|
173
|
-
resp = self.s.delete(
|
|
174
|
-
self.url("vpc", f"/securityGroups/{security_group_id}"), timeout=REQUEST_TIMEOUT
|
|
175
|
-
)
|
|
176
|
-
self.raise_for_status(resp)
|
|
177
|
-
|
|
178
|
-
def compute_instances_create(
|
|
179
|
-
self,
|
|
180
|
-
folder_id: str,
|
|
181
|
-
name: str,
|
|
182
|
-
zone_id: str,
|
|
183
|
-
platform_id: str,
|
|
184
|
-
resources_spec: ResourcesSpec,
|
|
185
|
-
metadata: Optional[Dict[str, str]],
|
|
186
|
-
disk_size_gb: int,
|
|
187
|
-
image_id: str,
|
|
188
|
-
subnet_id: str,
|
|
189
|
-
security_group_ids: List[str],
|
|
190
|
-
**kwargs,
|
|
191
|
-
) -> dict:
|
|
192
|
-
# Reference: https://nebius.ai/docs/api-design-guide/compute/v1/api-ref/Instance/create
|
|
193
|
-
logger.debug("Creating instance %s (%s) in %s", name, platform_id, folder_id)
|
|
194
|
-
self.get_token()
|
|
195
|
-
resp = self.s.post(
|
|
196
|
-
self.url("compute", "/instances"),
|
|
197
|
-
json=omit_none(
|
|
198
|
-
folderId=folder_id,
|
|
199
|
-
name=name,
|
|
200
|
-
zoneId=zone_id,
|
|
201
|
-
platformId=platform_id,
|
|
202
|
-
resourcesSpec=resources_spec,
|
|
203
|
-
metadata=metadata,
|
|
204
|
-
boot_disk_spec=dict(
|
|
205
|
-
autoDelete=True,
|
|
206
|
-
diskSpec=dict(
|
|
207
|
-
typeId="network-ssd",
|
|
208
|
-
size=disk_size_gb * 1024 * 1024 * 1024,
|
|
209
|
-
imageId=image_id,
|
|
210
|
-
),
|
|
211
|
-
),
|
|
212
|
-
networkInterfaceSpecs=[
|
|
213
|
-
dict(
|
|
214
|
-
subnetId=subnet_id,
|
|
215
|
-
primaryV4AddressSpec=dict(
|
|
216
|
-
oneToOneNatSpec=dict(
|
|
217
|
-
ipVersion="IPV4",
|
|
218
|
-
),
|
|
219
|
-
),
|
|
220
|
-
securityGroupIds=security_group_ids,
|
|
221
|
-
)
|
|
222
|
-
],
|
|
223
|
-
**kwargs,
|
|
224
|
-
),
|
|
225
|
-
timeout=REQUEST_TIMEOUT,
|
|
226
|
-
)
|
|
227
|
-
self.raise_for_status(resp)
|
|
228
|
-
return resp.json()
|
|
229
|
-
|
|
230
|
-
def compute_instances_list(
|
|
231
|
-
self, folder_id: str, filter: Optional[str] = None, order_by: Optional[str] = None
|
|
232
|
-
) -> List[dict]:
|
|
233
|
-
logger.debug("Fetching instances in %s", folder_id)
|
|
234
|
-
return self.list(
|
|
235
|
-
"compute",
|
|
236
|
-
"instances",
|
|
237
|
-
params=dict(
|
|
238
|
-
folderId=folder_id,
|
|
239
|
-
filter=filter,
|
|
240
|
-
orderBy=order_by,
|
|
241
|
-
),
|
|
242
|
-
)
|
|
243
|
-
|
|
244
|
-
def compute_instances_delete(self, instance_id: str):
|
|
245
|
-
logger.debug("Deleting instance %s", instance_id)
|
|
246
|
-
self.get_token()
|
|
247
|
-
resp = self.s.delete(
|
|
248
|
-
self.url("compute", f"/instances/{instance_id}"), timeout=REQUEST_TIMEOUT
|
|
249
|
-
)
|
|
250
|
-
self.raise_for_status(resp)
|
|
251
|
-
|
|
252
|
-
def compute_instances_get(self, instance_id: str, full: bool = False) -> dict:
|
|
253
|
-
logger.debug("Fetching instance %s", instance_id)
|
|
254
|
-
self.get_token()
|
|
255
|
-
resp = self.s.get(
|
|
256
|
-
self.url("compute", f"/instances/{instance_id}"),
|
|
257
|
-
params=dict(
|
|
258
|
-
view="FULL" if full else "BASIC",
|
|
259
|
-
),
|
|
260
|
-
timeout=REQUEST_TIMEOUT,
|
|
261
|
-
)
|
|
262
|
-
self.raise_for_status(resp)
|
|
263
|
-
return resp.json()
|
|
264
|
-
|
|
265
|
-
def compute_images_list(
|
|
266
|
-
self, folder_id: str, filter: Optional[str] = None, order_by: Optional[str] = None
|
|
267
|
-
):
|
|
268
|
-
logger.debug("Fetching images in %s", folder_id)
|
|
269
|
-
return self.list(
|
|
270
|
-
"compute",
|
|
271
|
-
"images",
|
|
272
|
-
params=dict(
|
|
273
|
-
folderId=folder_id,
|
|
274
|
-
filter=filter,
|
|
275
|
-
orderBy=order_by,
|
|
276
|
-
),
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
def list(self, service: str, resource: str, params: dict, page_size: int = 1000) -> List[dict]:
|
|
280
|
-
page_token = None
|
|
281
|
-
output = []
|
|
282
|
-
while True:
|
|
283
|
-
self.get_token()
|
|
284
|
-
resp = self.s.get(
|
|
285
|
-
self.url(service, f"/{resource}"),
|
|
286
|
-
params=omit_none(
|
|
287
|
-
pageSize=page_size,
|
|
288
|
-
pageToken=page_token,
|
|
289
|
-
**params,
|
|
290
|
-
),
|
|
291
|
-
timeout=REQUEST_TIMEOUT,
|
|
292
|
-
)
|
|
293
|
-
self.raise_for_status(resp)
|
|
294
|
-
data = resp.json()
|
|
295
|
-
output += data.get(resource, [])
|
|
296
|
-
page_token = data.get("nextPageToken")
|
|
297
|
-
if not page_token:
|
|
298
|
-
break
|
|
299
|
-
return output
|
|
300
|
-
|
|
301
|
-
def url(self, service: str, path: str, version="v1") -> str:
|
|
302
|
-
return f"https://{service}.{API_URL.rstrip('/')}/{service}/{version}/{path.lstrip('/')}"
|
|
303
|
-
|
|
304
|
-
def raise_for_status(self, resp: requests.Response):
|
|
305
|
-
if resp.status_code == 400:
|
|
306
|
-
raise NebiusError(resp.text)
|
|
307
|
-
if resp.status_code == 401:
|
|
308
|
-
raise ClientError(resp.text)
|
|
309
|
-
if resp.status_code == 403:
|
|
310
|
-
raise ForbiddenError(resp.text)
|
|
311
|
-
if resp.status_code == 404:
|
|
312
|
-
raise NotFoundError(resp.text)
|
|
313
|
-
if resp.status_code == 409:
|
|
314
|
-
raise ConflictError(resp.text)
|
|
315
|
-
resp.raise_for_status()
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
def omit_none(**kwargs) -> dict:
|
|
319
|
-
return {k: v for k, v in kwargs.items() if v is not None}
|
|
@@ -1,220 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import re
|
|
3
|
-
import time
|
|
4
|
-
from typing import List, Optional
|
|
5
|
-
|
|
6
|
-
import dstack.version as version
|
|
7
|
-
from dstack._internal import settings
|
|
8
|
-
from dstack._internal.core.backends.base import Compute
|
|
9
|
-
from dstack._internal.core.backends.base.compute import get_job_instance_name, get_user_data
|
|
10
|
-
from dstack._internal.core.backends.base.offers import get_catalog_offers
|
|
11
|
-
from dstack._internal.core.backends.nebius.api_client import NebiusAPIClient
|
|
12
|
-
from dstack._internal.core.backends.nebius.config import NebiusConfig
|
|
13
|
-
from dstack._internal.core.backends.nebius.types import (
|
|
14
|
-
ForbiddenError,
|
|
15
|
-
NotFoundError,
|
|
16
|
-
ResourcesSpec,
|
|
17
|
-
)
|
|
18
|
-
from dstack._internal.core.errors import NoCapacityError
|
|
19
|
-
from dstack._internal.core.models.backends.base import BackendType
|
|
20
|
-
from dstack._internal.core.models.instances import (
|
|
21
|
-
InstanceAvailability,
|
|
22
|
-
InstanceConfiguration,
|
|
23
|
-
InstanceOfferWithAvailability,
|
|
24
|
-
SSHKey,
|
|
25
|
-
)
|
|
26
|
-
from dstack._internal.core.models.resources import Memory, Range
|
|
27
|
-
from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
|
|
28
|
-
from dstack._internal.core.models.volumes import Volume
|
|
29
|
-
|
|
30
|
-
MEGABYTE = 1024**2
|
|
31
|
-
INSTANCE_PULL_INTERVAL = 10
|
|
32
|
-
# TODO: find out the actual lower bound considering dstack image size, 50GB is made up
|
|
33
|
-
CONFIGURABLE_DISK_SIZE = Range[Memory](min=Memory.parse("50GB"), max=Memory.parse("4TB"))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class NebiusCompute(Compute):
|
|
37
|
-
def __init__(self, config: NebiusConfig):
|
|
38
|
-
super().__init__()
|
|
39
|
-
self.config = config
|
|
40
|
-
self.api_client = NebiusAPIClient(json.loads(self.config.creds.data))
|
|
41
|
-
|
|
42
|
-
def get_offers(
|
|
43
|
-
self, requirements: Optional[Requirements] = None
|
|
44
|
-
) -> List[InstanceOfferWithAvailability]:
|
|
45
|
-
offers = get_catalog_offers(
|
|
46
|
-
backend=BackendType.NEBIUS,
|
|
47
|
-
locations=self.config.regions,
|
|
48
|
-
requirements=requirements,
|
|
49
|
-
configurable_disk_size=CONFIGURABLE_DISK_SIZE,
|
|
50
|
-
)
|
|
51
|
-
# TODO(egor-s) quotas
|
|
52
|
-
return [
|
|
53
|
-
InstanceOfferWithAvailability(
|
|
54
|
-
**offer.dict(), availability=InstanceAvailability.UNKNOWN
|
|
55
|
-
)
|
|
56
|
-
for offer in offers
|
|
57
|
-
]
|
|
58
|
-
|
|
59
|
-
def create_instance(
|
|
60
|
-
self,
|
|
61
|
-
instance_offer: InstanceOfferWithAvailability,
|
|
62
|
-
instance_config: InstanceConfiguration,
|
|
63
|
-
) -> JobProvisioningData:
|
|
64
|
-
cuda = len(instance_offer.instance.resources.gpus) > 0
|
|
65
|
-
security_group_id = self._get_security_group_id(project_name=instance_config.project_name)
|
|
66
|
-
subnet_id = self._get_subnet_id(zone=instance_offer.region)
|
|
67
|
-
image_id = self._get_image_id(cuda=cuda)
|
|
68
|
-
|
|
69
|
-
try:
|
|
70
|
-
disk_size = round(instance_offer.instance.resources.disk.size_mib / 1024)
|
|
71
|
-
resp = self.api_client.compute_instances_create(
|
|
72
|
-
folder_id=self.config.folder_id,
|
|
73
|
-
name=instance_config.instance_name,
|
|
74
|
-
zone_id=instance_offer.region,
|
|
75
|
-
platform_id=instance_offer.instance.name,
|
|
76
|
-
resources_spec=ResourcesSpec(
|
|
77
|
-
memory=int(instance_offer.instance.resources.memory_mib * MEGABYTE),
|
|
78
|
-
cores=instance_offer.instance.resources.cpus,
|
|
79
|
-
coreFraction=100,
|
|
80
|
-
gpus=len(instance_offer.instance.resources.gpus),
|
|
81
|
-
),
|
|
82
|
-
metadata={
|
|
83
|
-
"user-data": get_user_data(authorized_keys=instance_config.get_public_keys())
|
|
84
|
-
},
|
|
85
|
-
disk_size_gb=disk_size,
|
|
86
|
-
image_id=image_id,
|
|
87
|
-
subnet_id=subnet_id,
|
|
88
|
-
security_group_ids=[security_group_id],
|
|
89
|
-
labels=self._get_labels(project=instance_config.project_name),
|
|
90
|
-
)
|
|
91
|
-
except ForbiddenError as e:
|
|
92
|
-
if instance_offer.instance.name in e.args[0]:
|
|
93
|
-
raise NoCapacityError(json.loads(e.args[0])["message"])
|
|
94
|
-
raise
|
|
95
|
-
instance_id = resp["metadata"]["instanceId"]
|
|
96
|
-
try:
|
|
97
|
-
while True:
|
|
98
|
-
instance = self.api_client.compute_instances_get(instance_id)
|
|
99
|
-
if "primaryV4Address" in instance["networkInterfaces"][0]:
|
|
100
|
-
break
|
|
101
|
-
time.sleep(INSTANCE_PULL_INTERVAL)
|
|
102
|
-
except Exception:
|
|
103
|
-
self.terminate_instance(instance_id, instance_offer.region)
|
|
104
|
-
raise
|
|
105
|
-
return JobProvisioningData(
|
|
106
|
-
backend=instance_offer.backend,
|
|
107
|
-
instance_type=instance_offer.instance,
|
|
108
|
-
instance_id=instance_id,
|
|
109
|
-
hostname=instance["networkInterfaces"][0]["primaryV4Address"]["oneToOneNat"][
|
|
110
|
-
"address"
|
|
111
|
-
],
|
|
112
|
-
internal_ip=None,
|
|
113
|
-
region=instance_offer.region,
|
|
114
|
-
price=instance_offer.price,
|
|
115
|
-
username="ubuntu",
|
|
116
|
-
ssh_port=22,
|
|
117
|
-
dockerized=True,
|
|
118
|
-
ssh_proxy=None,
|
|
119
|
-
backend_data=None,
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
def run_job(
|
|
123
|
-
self,
|
|
124
|
-
run: Run,
|
|
125
|
-
job: Job,
|
|
126
|
-
instance_offer: InstanceOfferWithAvailability,
|
|
127
|
-
project_ssh_public_key: str,
|
|
128
|
-
project_ssh_private_key: str,
|
|
129
|
-
volumes: List[Volume],
|
|
130
|
-
) -> JobProvisioningData:
|
|
131
|
-
instance_config = InstanceConfiguration(
|
|
132
|
-
project_name=run.project_name,
|
|
133
|
-
instance_name=get_job_instance_name(run, job), # TODO: generate name
|
|
134
|
-
ssh_keys=[
|
|
135
|
-
SSHKey(public=project_ssh_public_key.strip()),
|
|
136
|
-
],
|
|
137
|
-
user=run.user,
|
|
138
|
-
)
|
|
139
|
-
return self.create_instance(instance_offer, instance_config)
|
|
140
|
-
|
|
141
|
-
def terminate_instance(
|
|
142
|
-
self, instance_id: str, region: str, backend_data: Optional[str] = None
|
|
143
|
-
):
|
|
144
|
-
try:
|
|
145
|
-
self.api_client.compute_instances_delete(instance_id)
|
|
146
|
-
except NotFoundError:
|
|
147
|
-
pass
|
|
148
|
-
|
|
149
|
-
def _get_security_group_id(self, project_name: str) -> str:
|
|
150
|
-
name = project_name
|
|
151
|
-
security_groups = self.api_client.vpc_security_groups_list(
|
|
152
|
-
folder_id=self.config.folder_id,
|
|
153
|
-
filter=f'name="{name}"',
|
|
154
|
-
)
|
|
155
|
-
if security_groups:
|
|
156
|
-
return security_groups[0]["id"]
|
|
157
|
-
resp = self.api_client.vpc_security_groups_create(
|
|
158
|
-
folder_id=self.config.folder_id,
|
|
159
|
-
name=name,
|
|
160
|
-
network_id=self.config.network_id,
|
|
161
|
-
rule_specs=[
|
|
162
|
-
{
|
|
163
|
-
"description": "SSH access",
|
|
164
|
-
"direction": "INGRESS",
|
|
165
|
-
"ports": {"fromPort": 22, "toPort": 22},
|
|
166
|
-
"protocolName": "ANY",
|
|
167
|
-
"cidrBlocks": {"v4CidrBlocks": ["0.0.0.0/0"]},
|
|
168
|
-
},
|
|
169
|
-
{
|
|
170
|
-
"description": "Project intranet",
|
|
171
|
-
"direction": "INGRESS",
|
|
172
|
-
"protocolName": "ANY",
|
|
173
|
-
"predefinedTarget": "self_security_group",
|
|
174
|
-
},
|
|
175
|
-
{
|
|
176
|
-
"description": "Internet access",
|
|
177
|
-
"direction": "EGRESS",
|
|
178
|
-
"protocolName": "ANY",
|
|
179
|
-
"cidrBlocks": {"v4CidrBlocks": ["0.0.0.0/0"]},
|
|
180
|
-
},
|
|
181
|
-
],
|
|
182
|
-
description="For job instance, by dstack",
|
|
183
|
-
labels=self._get_labels(project=project_name),
|
|
184
|
-
)
|
|
185
|
-
return resp["response"]["id"]
|
|
186
|
-
|
|
187
|
-
def _get_subnet_id(self, zone: str, name: Optional[str] = None) -> str:
|
|
188
|
-
name = name or f"default-{zone}"
|
|
189
|
-
subnets = self.api_client.vpc_subnets_list(folder_id=self.config.folder_id)
|
|
190
|
-
for subnet in subnets:
|
|
191
|
-
if subnet["name"] == name:
|
|
192
|
-
return subnet["id"]
|
|
193
|
-
n = len(subnets)
|
|
194
|
-
resp = self.api_client.vpc_subnets_create(
|
|
195
|
-
folder_id=self.config.folder_id,
|
|
196
|
-
name=name,
|
|
197
|
-
network_id=self.config.network_id,
|
|
198
|
-
zone=zone,
|
|
199
|
-
cird_blocks=[f"10.{n}.0.0/16"],
|
|
200
|
-
labels=self._get_labels(),
|
|
201
|
-
)
|
|
202
|
-
return resp["response"]["id"]
|
|
203
|
-
|
|
204
|
-
def _get_image_id(self, cuda: bool) -> str:
|
|
205
|
-
image_name = re.sub(r"[^a-z0-9-]", "-", f"dstack-{version.base_image}")
|
|
206
|
-
if cuda:
|
|
207
|
-
image_name += "-cuda"
|
|
208
|
-
images = self.api_client.compute_images_list(
|
|
209
|
-
folder_id="bjel82ie37qos4pc6guk", filter=f'name="{image_name}"'
|
|
210
|
-
)
|
|
211
|
-
return images[0]["id"]
|
|
212
|
-
|
|
213
|
-
def _get_labels(self, **kwargs) -> dict:
|
|
214
|
-
labels = {
|
|
215
|
-
"owner": "dstack",
|
|
216
|
-
**kwargs,
|
|
217
|
-
}
|
|
218
|
-
if settings.DSTACK_VERSION is not None:
|
|
219
|
-
labels["dstack-version"] = settings.DSTACK_VERSION.replace(".", "-")
|
|
220
|
-
return labels
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
from typing import TypedDict
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class ServiceAccount(TypedDict):
|
|
5
|
-
id: str
|
|
6
|
-
service_account_id: str
|
|
7
|
-
created_at: str
|
|
8
|
-
key_algorithm: str
|
|
9
|
-
public_key: str
|
|
10
|
-
private_key: str
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class ResourcesSpec(TypedDict):
|
|
14
|
-
memory: int
|
|
15
|
-
cores: int
|
|
16
|
-
coreFraction: int
|
|
17
|
-
gpus: int
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class NebiusError(Exception):
|
|
21
|
-
pass
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class ClientError(NebiusError):
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class ForbiddenError(NebiusError):
|
|
29
|
-
pass
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class NotFoundError(NebiusError):
|
|
33
|
-
pass
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class ConflictError(NebiusError):
|
|
37
|
-
pass
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
from dstack._internal.core.backends.base.config import BackendConfig
|
|
2
|
-
from dstack._internal.core.models.backends.tensordock import (
|
|
3
|
-
AnyTensorDockCreds,
|
|
4
|
-
TensorDockStoredConfig,
|
|
5
|
-
)
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class TensorDockConfig(TensorDockStoredConfig, BackendConfig):
|
|
9
|
-
creds: AnyTensorDockCreds
|