dstack 0.18.44__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. dstack/_internal/cli/commands/gateway.py +15 -3
  2. dstack/_internal/cli/commands/logs.py +0 -22
  3. dstack/_internal/cli/commands/stats.py +8 -17
  4. dstack/_internal/cli/main.py +1 -5
  5. dstack/_internal/cli/services/configurators/fleet.py +4 -39
  6. dstack/_internal/cli/services/configurators/run.py +22 -21
  7. dstack/_internal/cli/services/profile.py +34 -83
  8. dstack/_internal/cli/utils/gateway.py +1 -1
  9. dstack/_internal/core/backends/__init__.py +56 -39
  10. dstack/_internal/core/backends/aws/__init__.py +0 -25
  11. dstack/_internal/core/backends/aws/auth.py +1 -10
  12. dstack/_internal/core/backends/aws/backend.py +26 -0
  13. dstack/_internal/core/backends/aws/compute.py +20 -45
  14. dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
  15. dstack/_internal/core/backends/aws/models.py +135 -0
  16. dstack/_internal/core/backends/aws/resources.py +1 -1
  17. dstack/_internal/core/backends/azure/__init__.py +0 -20
  18. dstack/_internal/core/backends/azure/auth.py +2 -11
  19. dstack/_internal/core/backends/azure/backend.py +21 -0
  20. dstack/_internal/core/backends/azure/compute.py +13 -27
  21. dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
  22. dstack/_internal/core/backends/azure/models.py +89 -0
  23. dstack/_internal/core/backends/base/__init__.py +0 -12
  24. dstack/_internal/core/backends/base/backend.py +18 -0
  25. dstack/_internal/core/backends/base/compute.py +153 -33
  26. dstack/_internal/core/backends/base/configurator.py +105 -0
  27. dstack/_internal/core/backends/base/models.py +14 -0
  28. dstack/_internal/core/backends/configurators.py +138 -0
  29. dstack/_internal/core/backends/cudo/__init__.py +0 -15
  30. dstack/_internal/core/backends/cudo/backend.py +16 -0
  31. dstack/_internal/core/backends/cudo/compute.py +8 -26
  32. dstack/_internal/core/backends/cudo/configurator.py +72 -0
  33. dstack/_internal/core/backends/cudo/models.py +37 -0
  34. dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
  35. dstack/_internal/core/backends/datacrunch/backend.py +16 -0
  36. dstack/_internal/core/backends/datacrunch/compute.py +8 -25
  37. dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
  38. dstack/_internal/core/backends/datacrunch/models.py +38 -0
  39. dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
  40. dstack/_internal/core/backends/gcp/__init__.py +0 -16
  41. dstack/_internal/core/backends/gcp/auth.py +2 -11
  42. dstack/_internal/core/backends/gcp/backend.py +17 -0
  43. dstack/_internal/core/backends/gcp/compute.py +13 -43
  44. dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
  45. dstack/_internal/core/backends/gcp/models.py +125 -0
  46. dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
  47. dstack/_internal/core/backends/kubernetes/backend.py +16 -0
  48. dstack/_internal/core/backends/kubernetes/compute.py +16 -5
  49. dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
  50. dstack/_internal/core/backends/kubernetes/models.py +72 -0
  51. dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
  52. dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
  53. dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
  54. dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
  55. dstack/_internal/core/backends/lambdalabs/models.py +37 -0
  56. dstack/_internal/core/backends/local/__init__.py +0 -13
  57. dstack/_internal/core/backends/local/backend.py +14 -0
  58. dstack/_internal/core/backends/local/compute.py +16 -2
  59. dstack/_internal/core/backends/models.py +128 -0
  60. dstack/_internal/core/backends/oci/__init__.py +0 -15
  61. dstack/_internal/core/backends/oci/auth.py +1 -5
  62. dstack/_internal/core/backends/oci/backend.py +16 -0
  63. dstack/_internal/core/backends/oci/compute.py +9 -23
  64. dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
  65. dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
  66. dstack/_internal/core/backends/oci/region.py +1 -1
  67. dstack/_internal/core/backends/runpod/__init__.py +0 -15
  68. dstack/_internal/core/backends/runpod/backend.py +16 -0
  69. dstack/_internal/core/backends/runpod/compute.py +7 -3
  70. dstack/_internal/core/backends/runpod/configurator.py +59 -0
  71. dstack/_internal/core/backends/runpod/models.py +54 -0
  72. dstack/_internal/core/backends/template/__init__.py +0 -0
  73. dstack/_internal/core/backends/tensordock/__init__.py +0 -15
  74. dstack/_internal/core/backends/tensordock/backend.py +16 -0
  75. dstack/_internal/core/backends/tensordock/compute.py +8 -27
  76. dstack/_internal/core/backends/tensordock/configurator.py +68 -0
  77. dstack/_internal/core/backends/tensordock/models.py +38 -0
  78. dstack/_internal/core/backends/vastai/__init__.py +0 -15
  79. dstack/_internal/core/backends/vastai/backend.py +16 -0
  80. dstack/_internal/core/backends/vastai/compute.py +2 -2
  81. dstack/_internal/core/backends/vastai/configurator.py +66 -0
  82. dstack/_internal/core/backends/vastai/models.py +37 -0
  83. dstack/_internal/core/backends/vultr/__init__.py +0 -15
  84. dstack/_internal/core/backends/vultr/backend.py +16 -0
  85. dstack/_internal/core/backends/vultr/compute.py +10 -24
  86. dstack/_internal/core/backends/vultr/configurator.py +64 -0
  87. dstack/_internal/core/backends/vultr/models.py +34 -0
  88. dstack/_internal/core/models/backends/__init__.py +0 -184
  89. dstack/_internal/core/models/backends/base.py +0 -19
  90. dstack/_internal/core/models/configurations.py +20 -15
  91. dstack/_internal/core/models/envs.py +4 -3
  92. dstack/_internal/core/models/fleets.py +17 -22
  93. dstack/_internal/core/models/gateways.py +3 -3
  94. dstack/_internal/core/models/instances.py +24 -0
  95. dstack/_internal/core/models/profiles.py +41 -46
  96. dstack/_internal/core/models/projects.py +1 -1
  97. dstack/_internal/core/models/repos/base.py +0 -5
  98. dstack/_internal/core/models/repos/local.py +3 -3
  99. dstack/_internal/core/models/repos/remote.py +26 -12
  100. dstack/_internal/core/models/repos/virtual.py +1 -1
  101. dstack/_internal/core/models/resources.py +45 -76
  102. dstack/_internal/core/models/runs.py +17 -19
  103. dstack/_internal/core/models/volumes.py +1 -3
  104. dstack/_internal/core/services/profiles.py +7 -16
  105. dstack/_internal/core/services/repos.py +0 -4
  106. dstack/_internal/server/app.py +0 -3
  107. dstack/_internal/server/background/tasks/process_gateways.py +4 -8
  108. dstack/_internal/server/background/tasks/process_instances.py +14 -9
  109. dstack/_internal/server/background/tasks/process_metrics.py +1 -1
  110. dstack/_internal/server/background/tasks/process_placement_groups.py +4 -1
  111. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +1 -1
  112. dstack/_internal/server/background/tasks/process_running_jobs.py +14 -5
  113. dstack/_internal/server/background/tasks/process_submitted_jobs.py +16 -37
  114. dstack/_internal/server/background/tasks/process_volumes.py +5 -2
  115. dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
  116. dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
  117. dstack/_internal/server/models.py +48 -9
  118. dstack/_internal/server/routers/backends.py +14 -23
  119. dstack/_internal/server/routers/instances.py +3 -4
  120. dstack/_internal/server/routers/metrics.py +10 -8
  121. dstack/_internal/server/routers/prometheus.py +1 -1
  122. dstack/_internal/server/routers/repos.py +1 -2
  123. dstack/_internal/server/routers/runs.py +13 -59
  124. dstack/_internal/server/schemas/gateways.py +14 -23
  125. dstack/_internal/server/schemas/projects.py +7 -2
  126. dstack/_internal/server/schemas/repos.py +2 -38
  127. dstack/_internal/server/schemas/runner.py +1 -0
  128. dstack/_internal/server/schemas/runs.py +1 -24
  129. dstack/_internal/server/services/backends/__init__.py +85 -158
  130. dstack/_internal/server/services/config.py +52 -576
  131. dstack/_internal/server/services/fleets.py +8 -103
  132. dstack/_internal/server/services/gateways/__init__.py +12 -4
  133. dstack/_internal/server/services/{pools.py → instances.py} +22 -329
  134. dstack/_internal/server/services/jobs/__init__.py +9 -6
  135. dstack/_internal/server/services/jobs/configurators/base.py +16 -0
  136. dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
  137. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
  138. dstack/_internal/server/services/metrics.py +39 -13
  139. dstack/_internal/server/services/offers.py +1 -1
  140. dstack/_internal/server/services/projects.py +23 -14
  141. dstack/_internal/server/services/prometheus.py +176 -18
  142. dstack/_internal/server/services/runs.py +24 -16
  143. dstack/_internal/server/services/volumes.py +8 -4
  144. dstack/_internal/server/statics/index.html +1 -1
  145. dstack/_internal/server/statics/{main-4eb116b97819badd1e2c.js → main-4a0fe83e84574654e397.js} +18 -14
  146. dstack/_internal/server/statics/{main-4eb116b97819badd1e2c.js.map → main-4a0fe83e84574654e397.js.map} +1 -1
  147. dstack/_internal/server/testing/common.py +58 -32
  148. dstack/_internal/utils/json_schema.py +6 -0
  149. dstack/_internal/utils/ssh.py +2 -1
  150. dstack/api/__init__.py +4 -0
  151. dstack/api/_public/__init__.py +16 -20
  152. dstack/api/_public/backends.py +1 -1
  153. dstack/api/_public/repos.py +36 -36
  154. dstack/api/_public/runs.py +167 -83
  155. dstack/api/server/__init__.py +11 -13
  156. dstack/api/server/_backends.py +12 -16
  157. dstack/api/server/_fleets.py +15 -57
  158. dstack/api/server/_gateways.py +3 -14
  159. dstack/api/server/_repos.py +1 -4
  160. dstack/api/server/_runs.py +21 -100
  161. dstack/api/server/_volumes.py +10 -5
  162. dstack/version.py +1 -1
  163. {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/METADATA +1 -1
  164. {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/RECORD +218 -204
  165. tests/_internal/cli/services/configurators/test_profile.py +6 -6
  166. tests/_internal/core/backends/aws/test_configurator.py +35 -0
  167. tests/_internal/core/backends/aws/test_resources.py +1 -1
  168. tests/_internal/core/backends/azure/test_configurator.py +61 -0
  169. tests/_internal/core/backends/cudo/__init__.py +0 -0
  170. tests/_internal/core/backends/cudo/test_configurator.py +37 -0
  171. tests/_internal/core/backends/datacrunch/__init__.py +0 -0
  172. tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
  173. tests/_internal/core/backends/gcp/test_configurator.py +42 -0
  174. tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
  175. tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
  176. tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
  177. tests/_internal/core/backends/oci/test_configurator.py +55 -0
  178. tests/_internal/core/backends/runpod/__init__.py +0 -0
  179. tests/_internal/core/backends/runpod/test_configurator.py +33 -0
  180. tests/_internal/core/backends/tensordock/__init__.py +0 -0
  181. tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
  182. tests/_internal/core/backends/vastai/__init__.py +0 -0
  183. tests/_internal/core/backends/vastai/test_configurator.py +33 -0
  184. tests/_internal/core/backends/vultr/__init__.py +0 -0
  185. tests/_internal/core/backends/vultr/test_configurator.py +33 -0
  186. tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
  187. tests/_internal/server/background/tasks/test_process_instances.py +49 -48
  188. tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
  189. tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
  190. tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +0 -3
  191. tests/_internal/server/background/tasks/test_process_running_jobs.py +0 -21
  192. tests/_internal/server/background/tasks/test_process_runs.py +8 -22
  193. tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
  194. tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
  195. tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
  196. tests/_internal/server/routers/test_backends.py +6 -764
  197. tests/_internal/server/routers/test_fleets.py +0 -26
  198. tests/_internal/server/routers/test_gateways.py +27 -3
  199. tests/_internal/server/routers/test_instances.py +0 -10
  200. tests/_internal/server/routers/test_metrics.py +27 -0
  201. tests/_internal/server/routers/test_projects.py +56 -0
  202. tests/_internal/server/routers/test_prometheus.py +116 -27
  203. tests/_internal/server/routers/test_repos.py +0 -15
  204. tests/_internal/server/routers/test_runs.py +4 -219
  205. tests/_internal/server/routers/test_volumes.py +2 -3
  206. tests/_internal/server/services/backends/__init__.py +0 -0
  207. tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
  208. tests/_internal/server/services/test_config.py +7 -4
  209. tests/_internal/server/services/test_fleets.py +1 -4
  210. tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
  211. tests/_internal/server/services/test_metrics.py +9 -5
  212. tests/_internal/server/services/test_repos.py +1 -14
  213. tests/_internal/server/services/test_runs.py +0 -4
  214. dstack/_internal/cli/commands/pool.py +0 -581
  215. dstack/_internal/cli/commands/run.py +0 -75
  216. dstack/_internal/core/backends/aws/config.py +0 -18
  217. dstack/_internal/core/backends/azure/config.py +0 -12
  218. dstack/_internal/core/backends/base/config.py +0 -5
  219. dstack/_internal/core/backends/cudo/config.py +0 -9
  220. dstack/_internal/core/backends/datacrunch/config.py +0 -9
  221. dstack/_internal/core/backends/gcp/config.py +0 -22
  222. dstack/_internal/core/backends/kubernetes/config.py +0 -6
  223. dstack/_internal/core/backends/lambdalabs/config.py +0 -9
  224. dstack/_internal/core/backends/nebius/__init__.py +0 -15
  225. dstack/_internal/core/backends/nebius/api_client.py +0 -319
  226. dstack/_internal/core/backends/nebius/compute.py +0 -220
  227. dstack/_internal/core/backends/nebius/config.py +0 -6
  228. dstack/_internal/core/backends/nebius/types.py +0 -37
  229. dstack/_internal/core/backends/oci/config.py +0 -6
  230. dstack/_internal/core/backends/runpod/config.py +0 -17
  231. dstack/_internal/core/backends/tensordock/config.py +0 -9
  232. dstack/_internal/core/backends/vastai/config.py +0 -6
  233. dstack/_internal/core/backends/vultr/config.py +0 -9
  234. dstack/_internal/core/models/backends/aws.py +0 -86
  235. dstack/_internal/core/models/backends/azure.py +0 -68
  236. dstack/_internal/core/models/backends/cudo.py +0 -43
  237. dstack/_internal/core/models/backends/datacrunch.py +0 -44
  238. dstack/_internal/core/models/backends/gcp.py +0 -67
  239. dstack/_internal/core/models/backends/kubernetes.py +0 -40
  240. dstack/_internal/core/models/backends/lambdalabs.py +0 -43
  241. dstack/_internal/core/models/backends/nebius.py +0 -54
  242. dstack/_internal/core/models/backends/runpod.py +0 -42
  243. dstack/_internal/core/models/backends/tensordock.py +0 -44
  244. dstack/_internal/core/models/backends/vastai.py +0 -43
  245. dstack/_internal/core/models/backends/vultr.py +0 -40
  246. dstack/_internal/core/models/pools.py +0 -43
  247. dstack/_internal/server/routers/pools.py +0 -142
  248. dstack/_internal/server/schemas/pools.py +0 -38
  249. dstack/_internal/server/services/backends/configurators/base.py +0 -72
  250. dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
  251. dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
  252. dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
  253. dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
  254. dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
  255. dstack/_internal/server/services/backends/configurators/runpod.py +0 -67
  256. dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
  257. dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
  258. dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
  259. dstack/api/_public/pools.py +0 -41
  260. dstack/api/_public/resources.py +0 -105
  261. dstack/api/server/_pools.py +0 -63
  262. tests/_internal/server/routers/test_pools.py +0 -612
  263. /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
  264. {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/LICENSE.md +0 -0
  265. {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/WHEEL +0 -0
  266. {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/entry_points.txt +0 -0
  267. {dstack-0.18.44.dist-info → dstack-0.19.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +0,0 @@
1
- from dstack._internal.core.backends.base.config import BackendConfig
2
- from dstack._internal.core.models.backends.cudo import (
3
- AnyCudoCreds,
4
- CudoStoredConfig,
5
- )
6
-
7
-
8
- class CudoConfig(CudoStoredConfig, BackendConfig):
9
- creds: AnyCudoCreds
@@ -1,9 +0,0 @@
1
- from dstack._internal.core.backends.base.config import BackendConfig
2
- from dstack._internal.core.models.backends.datacrunch import (
3
- AnyDataCrunchCreds,
4
- DataCrunchStoredConfig,
5
- )
6
-
7
-
8
- class DataCrunchConfig(DataCrunchStoredConfig, BackendConfig):
9
- creds: AnyDataCrunchCreds
@@ -1,22 +0,0 @@
1
- from dstack._internal.core.backends.base.config import BackendConfig
2
- from dstack._internal.core.models.backends.gcp import AnyGCPCreds, GCPStoredConfig
3
-
4
-
5
- class GCPConfig(GCPStoredConfig, BackendConfig):
6
- creds: AnyGCPCreds
7
-
8
- @property
9
- def allocate_public_ips(self) -> bool:
10
- if self.public_ips is not None:
11
- return self.public_ips
12
- return True
13
-
14
- @property
15
- def vpc_resource_name(self) -> str:
16
- vpc_name = self.vpc_name
17
- if vpc_name is None:
18
- vpc_name = "default"
19
- project_id = self.project_id
20
- if self.vpc_project_id is not None:
21
- project_id = self.vpc_project_id
22
- return f"projects/{project_id}/global/networks/{vpc_name}"
@@ -1,6 +0,0 @@
1
- from dstack._internal.core.backends.base.config import BackendConfig
2
- from dstack._internal.core.models.backends.kubernetes import KubernetesStoredConfig
3
-
4
-
5
- class KubernetesConfig(KubernetesStoredConfig, BackendConfig):
6
- pass
@@ -1,9 +0,0 @@
1
- from dstack._internal.core.backends.base.config import BackendConfig
2
- from dstack._internal.core.models.backends.lambdalabs import (
3
- AnyLambdaCreds,
4
- LambdaStoredConfig,
5
- )
6
-
7
-
8
- class LambdaConfig(LambdaStoredConfig, BackendConfig):
9
- creds: AnyLambdaCreds
@@ -1,15 +0,0 @@
1
- from dstack._internal.core.backends.base import Backend
2
- from dstack._internal.core.backends.nebius.compute import NebiusCompute
3
- from dstack._internal.core.backends.nebius.config import NebiusConfig
4
- from dstack._internal.core.models.backends.base import BackendType
5
-
6
-
7
- class NebiusBackend(Backend):
8
- TYPE: BackendType = BackendType.NEBIUS
9
-
10
- def __init__(self, config: NebiusConfig):
11
- self.config = config
12
- self._compute = NebiusCompute(self.config)
13
-
14
- def compute(self) -> NebiusCompute:
15
- return self._compute
@@ -1,319 +0,0 @@
1
- import time
2
- from typing import Dict, List, Optional
3
-
4
- import jwt
5
- import requests
6
-
7
- from dstack._internal.core.backends.nebius.types import (
8
- ClientError,
9
- ConflictError,
10
- ForbiddenError,
11
- NebiusError,
12
- NotFoundError,
13
- ResourcesSpec,
14
- ServiceAccount,
15
- )
16
- from dstack._internal.utils.logging import get_logger
17
-
18
- logger = get_logger("nebius")
19
- API_URL = "api.ai.nebius.cloud"
20
- REQUEST_TIMEOUT = 15
21
-
22
-
23
- class NebiusAPIClient:
24
- # Reference: https://nebius.ai/docs/api-design-guide/
25
- def __init__(self, service_account: ServiceAccount):
26
- self.service_account = service_account
27
- self.s = requests.Session()
28
- self.expires_at = 0
29
-
30
- def get_token(self):
31
- now = int(time.time())
32
- if now + 60 < self.expires_at:
33
- return
34
- logger.debug("Refreshing IAM token")
35
- expires_at = now + 3600
36
- payload = {
37
- "aud": self.url("iam", "/tokens"),
38
- "iss": self.service_account["service_account_id"],
39
- "iat": now,
40
- "exp": expires_at,
41
- }
42
- jwt_token = jwt.encode(
43
- payload,
44
- self.service_account["private_key"],
45
- algorithm="PS256",
46
- headers={"kid": self.service_account["id"]},
47
- )
48
-
49
- resp = requests.post(payload["aud"], json={"jwt": jwt_token}, timeout=REQUEST_TIMEOUT)
50
- resp.raise_for_status()
51
- iam_token = resp.json()["iamToken"]
52
- self.s.headers["Authorization"] = f"Bearer {iam_token}"
53
- self.expires_at = expires_at
54
-
55
- def compute_zones_list(self) -> List[dict]:
56
- logger.debug("Fetching compute zones")
57
- self.get_token()
58
- resp = self.s.get(self.url("compute", "/zones"), timeout=REQUEST_TIMEOUT)
59
- self.raise_for_status(resp)
60
- return resp.json()["zones"]
61
-
62
- def resource_manager_folders_create(self, cloud_id: str, name: str, **kwargs) -> dict:
63
- logger.debug("Creating folder %s", name)
64
- self.get_token()
65
- resp = self.s.post(
66
- self.url("resource-manager", "/folders"),
67
- json=omit_none(
68
- cloudId=cloud_id,
69
- name=name,
70
- **kwargs,
71
- ),
72
- timeout=REQUEST_TIMEOUT,
73
- )
74
- self.raise_for_status(resp)
75
- return resp.json()
76
-
77
- def vpc_networks_create(self, folder_id: str, name: str, **kwargs) -> dict:
78
- logger.debug("Creating network %s in %s", name, folder_id)
79
- self.get_token()
80
- resp = self.s.post(
81
- self.url("vpc", "/networks"),
82
- json=omit_none(
83
- folderId=folder_id,
84
- name=name,
85
- **kwargs,
86
- ),
87
- timeout=REQUEST_TIMEOUT,
88
- )
89
- self.raise_for_status(resp)
90
- return resp.json()
91
-
92
- def vpc_networks_list(self, folder_id: str, filter: Optional[str] = None) -> List[dict]:
93
- logger.debug("Fetching networks in %s", folder_id)
94
- return self.list(
95
- "vpc",
96
- "networks",
97
- params=dict(
98
- folderId=folder_id,
99
- filter=filter,
100
- ),
101
- )
102
-
103
- def vpc_subnets_create(
104
- self,
105
- folder_id: str,
106
- name: str,
107
- network_id: str,
108
- zone: str,
109
- cird_blocks: List[str],
110
- **kwargs,
111
- ) -> dict:
112
- logger.debug("Creating subnet %s in %s", name, network_id)
113
- self.get_token()
114
- resp = self.s.post(
115
- self.url("vpc", "/subnets"),
116
- json=omit_none(
117
- folderId=folder_id,
118
- name=name,
119
- networkId=network_id,
120
- zoneId=zone,
121
- v4CidrBlocks=cird_blocks,
122
- **kwargs,
123
- ),
124
- timeout=REQUEST_TIMEOUT,
125
- )
126
- self.raise_for_status(resp)
127
- return resp.json()
128
-
129
- def vpc_subnets_list(self, folder_id: str, filter: Optional[str] = None) -> List[dict]:
130
- logger.debug("Fetching subnets in %s", folder_id)
131
- return self.list(
132
- "vpc",
133
- "subnets",
134
- params=dict(
135
- folderId=folder_id,
136
- filter=filter,
137
- ),
138
- )
139
-
140
- def vpc_security_groups_create(
141
- self, folder_id: str, name: str, network_id: str, rule_specs: List[dict], **kwargs
142
- ) -> dict:
143
- logger.debug("Creating security group %s in %s", name, folder_id)
144
- self.get_token()
145
- resp = self.s.post(
146
- self.url("vpc", "/securityGroups"),
147
- json=omit_none(
148
- folderId=folder_id,
149
- name=name,
150
- networkId=network_id,
151
- ruleSpecs=rule_specs,
152
- **kwargs,
153
- ),
154
- timeout=REQUEST_TIMEOUT,
155
- )
156
- self.raise_for_status(resp)
157
- return resp.json()
158
-
159
- def vpc_security_groups_list(self, folder_id: str, filter: Optional[str] = None) -> List[dict]:
160
- logger.debug("Fetching security groups in %s", folder_id)
161
- return self.list(
162
- "vpc",
163
- "securityGroups",
164
- params=dict(
165
- folderId=folder_id,
166
- filter=filter,
167
- ),
168
- )
169
-
170
- def vpc_security_groups_delete(self, security_group_id: str):
171
- logger.debug("Deleting security group %s", security_group_id)
172
- self.get_token()
173
- resp = self.s.delete(
174
- self.url("vpc", f"/securityGroups/{security_group_id}"), timeout=REQUEST_TIMEOUT
175
- )
176
- self.raise_for_status(resp)
177
-
178
- def compute_instances_create(
179
- self,
180
- folder_id: str,
181
- name: str,
182
- zone_id: str,
183
- platform_id: str,
184
- resources_spec: ResourcesSpec,
185
- metadata: Optional[Dict[str, str]],
186
- disk_size_gb: int,
187
- image_id: str,
188
- subnet_id: str,
189
- security_group_ids: List[str],
190
- **kwargs,
191
- ) -> dict:
192
- # Reference: https://nebius.ai/docs/api-design-guide/compute/v1/api-ref/Instance/create
193
- logger.debug("Creating instance %s (%s) in %s", name, platform_id, folder_id)
194
- self.get_token()
195
- resp = self.s.post(
196
- self.url("compute", "/instances"),
197
- json=omit_none(
198
- folderId=folder_id,
199
- name=name,
200
- zoneId=zone_id,
201
- platformId=platform_id,
202
- resourcesSpec=resources_spec,
203
- metadata=metadata,
204
- boot_disk_spec=dict(
205
- autoDelete=True,
206
- diskSpec=dict(
207
- typeId="network-ssd",
208
- size=disk_size_gb * 1024 * 1024 * 1024,
209
- imageId=image_id,
210
- ),
211
- ),
212
- networkInterfaceSpecs=[
213
- dict(
214
- subnetId=subnet_id,
215
- primaryV4AddressSpec=dict(
216
- oneToOneNatSpec=dict(
217
- ipVersion="IPV4",
218
- ),
219
- ),
220
- securityGroupIds=security_group_ids,
221
- )
222
- ],
223
- **kwargs,
224
- ),
225
- timeout=REQUEST_TIMEOUT,
226
- )
227
- self.raise_for_status(resp)
228
- return resp.json()
229
-
230
- def compute_instances_list(
231
- self, folder_id: str, filter: Optional[str] = None, order_by: Optional[str] = None
232
- ) -> List[dict]:
233
- logger.debug("Fetching instances in %s", folder_id)
234
- return self.list(
235
- "compute",
236
- "instances",
237
- params=dict(
238
- folderId=folder_id,
239
- filter=filter,
240
- orderBy=order_by,
241
- ),
242
- )
243
-
244
- def compute_instances_delete(self, instance_id: str):
245
- logger.debug("Deleting instance %s", instance_id)
246
- self.get_token()
247
- resp = self.s.delete(
248
- self.url("compute", f"/instances/{instance_id}"), timeout=REQUEST_TIMEOUT
249
- )
250
- self.raise_for_status(resp)
251
-
252
- def compute_instances_get(self, instance_id: str, full: bool = False) -> dict:
253
- logger.debug("Fetching instance %s", instance_id)
254
- self.get_token()
255
- resp = self.s.get(
256
- self.url("compute", f"/instances/{instance_id}"),
257
- params=dict(
258
- view="FULL" if full else "BASIC",
259
- ),
260
- timeout=REQUEST_TIMEOUT,
261
- )
262
- self.raise_for_status(resp)
263
- return resp.json()
264
-
265
- def compute_images_list(
266
- self, folder_id: str, filter: Optional[str] = None, order_by: Optional[str] = None
267
- ):
268
- logger.debug("Fetching images in %s", folder_id)
269
- return self.list(
270
- "compute",
271
- "images",
272
- params=dict(
273
- folderId=folder_id,
274
- filter=filter,
275
- orderBy=order_by,
276
- ),
277
- )
278
-
279
- def list(self, service: str, resource: str, params: dict, page_size: int = 1000) -> List[dict]:
280
- page_token = None
281
- output = []
282
- while True:
283
- self.get_token()
284
- resp = self.s.get(
285
- self.url(service, f"/{resource}"),
286
- params=omit_none(
287
- pageSize=page_size,
288
- pageToken=page_token,
289
- **params,
290
- ),
291
- timeout=REQUEST_TIMEOUT,
292
- )
293
- self.raise_for_status(resp)
294
- data = resp.json()
295
- output += data.get(resource, [])
296
- page_token = data.get("nextPageToken")
297
- if not page_token:
298
- break
299
- return output
300
-
301
- def url(self, service: str, path: str, version="v1") -> str:
302
- return f"https://{service}.{API_URL.rstrip('/')}/{service}/{version}/{path.lstrip('/')}"
303
-
304
- def raise_for_status(self, resp: requests.Response):
305
- if resp.status_code == 400:
306
- raise NebiusError(resp.text)
307
- if resp.status_code == 401:
308
- raise ClientError(resp.text)
309
- if resp.status_code == 403:
310
- raise ForbiddenError(resp.text)
311
- if resp.status_code == 404:
312
- raise NotFoundError(resp.text)
313
- if resp.status_code == 409:
314
- raise ConflictError(resp.text)
315
- resp.raise_for_status()
316
-
317
-
318
- def omit_none(**kwargs) -> dict:
319
- return {k: v for k, v in kwargs.items() if v is not None}
@@ -1,220 +0,0 @@
1
- import json
2
- import re
3
- import time
4
- from typing import List, Optional
5
-
6
- import dstack.version as version
7
- from dstack._internal import settings
8
- from dstack._internal.core.backends.base import Compute
9
- from dstack._internal.core.backends.base.compute import get_job_instance_name, get_user_data
10
- from dstack._internal.core.backends.base.offers import get_catalog_offers
11
- from dstack._internal.core.backends.nebius.api_client import NebiusAPIClient
12
- from dstack._internal.core.backends.nebius.config import NebiusConfig
13
- from dstack._internal.core.backends.nebius.types import (
14
- ForbiddenError,
15
- NotFoundError,
16
- ResourcesSpec,
17
- )
18
- from dstack._internal.core.errors import NoCapacityError
19
- from dstack._internal.core.models.backends.base import BackendType
20
- from dstack._internal.core.models.instances import (
21
- InstanceAvailability,
22
- InstanceConfiguration,
23
- InstanceOfferWithAvailability,
24
- SSHKey,
25
- )
26
- from dstack._internal.core.models.resources import Memory, Range
27
- from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
28
- from dstack._internal.core.models.volumes import Volume
29
-
30
- MEGABYTE = 1024**2
31
- INSTANCE_PULL_INTERVAL = 10
32
- # TODO: find out the actual lower bound considering dstack image size, 50GB is made up
33
- CONFIGURABLE_DISK_SIZE = Range[Memory](min=Memory.parse("50GB"), max=Memory.parse("4TB"))
34
-
35
-
36
- class NebiusCompute(Compute):
37
- def __init__(self, config: NebiusConfig):
38
- super().__init__()
39
- self.config = config
40
- self.api_client = NebiusAPIClient(json.loads(self.config.creds.data))
41
-
42
- def get_offers(
43
- self, requirements: Optional[Requirements] = None
44
- ) -> List[InstanceOfferWithAvailability]:
45
- offers = get_catalog_offers(
46
- backend=BackendType.NEBIUS,
47
- locations=self.config.regions,
48
- requirements=requirements,
49
- configurable_disk_size=CONFIGURABLE_DISK_SIZE,
50
- )
51
- # TODO(egor-s) quotas
52
- return [
53
- InstanceOfferWithAvailability(
54
- **offer.dict(), availability=InstanceAvailability.UNKNOWN
55
- )
56
- for offer in offers
57
- ]
58
-
59
- def create_instance(
60
- self,
61
- instance_offer: InstanceOfferWithAvailability,
62
- instance_config: InstanceConfiguration,
63
- ) -> JobProvisioningData:
64
- cuda = len(instance_offer.instance.resources.gpus) > 0
65
- security_group_id = self._get_security_group_id(project_name=instance_config.project_name)
66
- subnet_id = self._get_subnet_id(zone=instance_offer.region)
67
- image_id = self._get_image_id(cuda=cuda)
68
-
69
- try:
70
- disk_size = round(instance_offer.instance.resources.disk.size_mib / 1024)
71
- resp = self.api_client.compute_instances_create(
72
- folder_id=self.config.folder_id,
73
- name=instance_config.instance_name,
74
- zone_id=instance_offer.region,
75
- platform_id=instance_offer.instance.name,
76
- resources_spec=ResourcesSpec(
77
- memory=int(instance_offer.instance.resources.memory_mib * MEGABYTE),
78
- cores=instance_offer.instance.resources.cpus,
79
- coreFraction=100,
80
- gpus=len(instance_offer.instance.resources.gpus),
81
- ),
82
- metadata={
83
- "user-data": get_user_data(authorized_keys=instance_config.get_public_keys())
84
- },
85
- disk_size_gb=disk_size,
86
- image_id=image_id,
87
- subnet_id=subnet_id,
88
- security_group_ids=[security_group_id],
89
- labels=self._get_labels(project=instance_config.project_name),
90
- )
91
- except ForbiddenError as e:
92
- if instance_offer.instance.name in e.args[0]:
93
- raise NoCapacityError(json.loads(e.args[0])["message"])
94
- raise
95
- instance_id = resp["metadata"]["instanceId"]
96
- try:
97
- while True:
98
- instance = self.api_client.compute_instances_get(instance_id)
99
- if "primaryV4Address" in instance["networkInterfaces"][0]:
100
- break
101
- time.sleep(INSTANCE_PULL_INTERVAL)
102
- except Exception:
103
- self.terminate_instance(instance_id, instance_offer.region)
104
- raise
105
- return JobProvisioningData(
106
- backend=instance_offer.backend,
107
- instance_type=instance_offer.instance,
108
- instance_id=instance_id,
109
- hostname=instance["networkInterfaces"][0]["primaryV4Address"]["oneToOneNat"][
110
- "address"
111
- ],
112
- internal_ip=None,
113
- region=instance_offer.region,
114
- price=instance_offer.price,
115
- username="ubuntu",
116
- ssh_port=22,
117
- dockerized=True,
118
- ssh_proxy=None,
119
- backend_data=None,
120
- )
121
-
122
- def run_job(
123
- self,
124
- run: Run,
125
- job: Job,
126
- instance_offer: InstanceOfferWithAvailability,
127
- project_ssh_public_key: str,
128
- project_ssh_private_key: str,
129
- volumes: List[Volume],
130
- ) -> JobProvisioningData:
131
- instance_config = InstanceConfiguration(
132
- project_name=run.project_name,
133
- instance_name=get_job_instance_name(run, job), # TODO: generate name
134
- ssh_keys=[
135
- SSHKey(public=project_ssh_public_key.strip()),
136
- ],
137
- user=run.user,
138
- )
139
- return self.create_instance(instance_offer, instance_config)
140
-
141
- def terminate_instance(
142
- self, instance_id: str, region: str, backend_data: Optional[str] = None
143
- ):
144
- try:
145
- self.api_client.compute_instances_delete(instance_id)
146
- except NotFoundError:
147
- pass
148
-
149
- def _get_security_group_id(self, project_name: str) -> str:
150
- name = project_name
151
- security_groups = self.api_client.vpc_security_groups_list(
152
- folder_id=self.config.folder_id,
153
- filter=f'name="{name}"',
154
- )
155
- if security_groups:
156
- return security_groups[0]["id"]
157
- resp = self.api_client.vpc_security_groups_create(
158
- folder_id=self.config.folder_id,
159
- name=name,
160
- network_id=self.config.network_id,
161
- rule_specs=[
162
- {
163
- "description": "SSH access",
164
- "direction": "INGRESS",
165
- "ports": {"fromPort": 22, "toPort": 22},
166
- "protocolName": "ANY",
167
- "cidrBlocks": {"v4CidrBlocks": ["0.0.0.0/0"]},
168
- },
169
- {
170
- "description": "Project intranet",
171
- "direction": "INGRESS",
172
- "protocolName": "ANY",
173
- "predefinedTarget": "self_security_group",
174
- },
175
- {
176
- "description": "Internet access",
177
- "direction": "EGRESS",
178
- "protocolName": "ANY",
179
- "cidrBlocks": {"v4CidrBlocks": ["0.0.0.0/0"]},
180
- },
181
- ],
182
- description="For job instance, by dstack",
183
- labels=self._get_labels(project=project_name),
184
- )
185
- return resp["response"]["id"]
186
-
187
- def _get_subnet_id(self, zone: str, name: Optional[str] = None) -> str:
188
- name = name or f"default-{zone}"
189
- subnets = self.api_client.vpc_subnets_list(folder_id=self.config.folder_id)
190
- for subnet in subnets:
191
- if subnet["name"] == name:
192
- return subnet["id"]
193
- n = len(subnets)
194
- resp = self.api_client.vpc_subnets_create(
195
- folder_id=self.config.folder_id,
196
- name=name,
197
- network_id=self.config.network_id,
198
- zone=zone,
199
- cird_blocks=[f"10.{n}.0.0/16"],
200
- labels=self._get_labels(),
201
- )
202
- return resp["response"]["id"]
203
-
204
- def _get_image_id(self, cuda: bool) -> str:
205
- image_name = re.sub(r"[^a-z0-9-]", "-", f"dstack-{version.base_image}")
206
- if cuda:
207
- image_name += "-cuda"
208
- images = self.api_client.compute_images_list(
209
- folder_id="bjel82ie37qos4pc6guk", filter=f'name="{image_name}"'
210
- )
211
- return images[0]["id"]
212
-
213
- def _get_labels(self, **kwargs) -> dict:
214
- labels = {
215
- "owner": "dstack",
216
- **kwargs,
217
- }
218
- if settings.DSTACK_VERSION is not None:
219
- labels["dstack-version"] = settings.DSTACK_VERSION.replace(".", "-")
220
- return labels
@@ -1,6 +0,0 @@
1
- from dstack._internal.core.backends.base.config import BackendConfig
2
- from dstack._internal.core.models.backends.nebius import AnyNebiusCreds, NebiusStoredConfig
3
-
4
-
5
- class NebiusConfig(NebiusStoredConfig, BackendConfig):
6
- creds: AnyNebiusCreds
@@ -1,37 +0,0 @@
1
- from typing import TypedDict
2
-
3
-
4
- class ServiceAccount(TypedDict):
5
- id: str
6
- service_account_id: str
7
- created_at: str
8
- key_algorithm: str
9
- public_key: str
10
- private_key: str
11
-
12
-
13
- class ResourcesSpec(TypedDict):
14
- memory: int
15
- cores: int
16
- coreFraction: int
17
- gpus: int
18
-
19
-
20
- class NebiusError(Exception):
21
- pass
22
-
23
-
24
- class ClientError(NebiusError):
25
- pass
26
-
27
-
28
- class ForbiddenError(NebiusError):
29
- pass
30
-
31
-
32
- class NotFoundError(NebiusError):
33
- pass
34
-
35
-
36
- class ConflictError(NebiusError):
37
- pass
@@ -1,6 +0,0 @@
1
- from dstack._internal.core.backends.base.config import BackendConfig
2
- from dstack._internal.core.models.backends.oci import AnyOCICreds, OCIStoredConfig
3
-
4
-
5
- class OCIConfig(OCIStoredConfig, BackendConfig):
6
- creds: AnyOCICreds
@@ -1,17 +0,0 @@
1
- from dstack._internal.core.backends.base.config import BackendConfig
2
- from dstack._internal.core.models.backends.runpod import (
3
- AnyRunpodCreds,
4
- RunpodStoredConfig,
5
- )
6
-
7
- RUNPOD_COMMUNITY_CLOUD_DEFAULT = True
8
-
9
-
10
- class RunpodConfig(RunpodStoredConfig, BackendConfig):
11
- creds: AnyRunpodCreds
12
-
13
- @property
14
- def allow_community_cloud(self) -> bool:
15
- if self.community_cloud is not None:
16
- return self.community_cloud
17
- return RUNPOD_COMMUNITY_CLOUD_DEFAULT