dstack 0.18.43__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (278) hide show
  1. dstack/_internal/cli/commands/gateway.py +15 -3
  2. dstack/_internal/cli/commands/logs.py +0 -22
  3. dstack/_internal/cli/commands/stats.py +8 -17
  4. dstack/_internal/cli/main.py +1 -5
  5. dstack/_internal/cli/services/configurators/fleet.py +4 -39
  6. dstack/_internal/cli/services/configurators/run.py +22 -20
  7. dstack/_internal/cli/services/profile.py +34 -83
  8. dstack/_internal/cli/utils/gateway.py +1 -1
  9. dstack/_internal/cli/utils/run.py +11 -0
  10. dstack/_internal/core/backends/__init__.py +56 -39
  11. dstack/_internal/core/backends/aws/__init__.py +0 -25
  12. dstack/_internal/core/backends/aws/auth.py +1 -10
  13. dstack/_internal/core/backends/aws/backend.py +26 -0
  14. dstack/_internal/core/backends/aws/compute.py +21 -45
  15. dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
  16. dstack/_internal/core/backends/aws/models.py +135 -0
  17. dstack/_internal/core/backends/aws/resources.py +1 -1
  18. dstack/_internal/core/backends/azure/__init__.py +0 -20
  19. dstack/_internal/core/backends/azure/auth.py +2 -11
  20. dstack/_internal/core/backends/azure/backend.py +21 -0
  21. dstack/_internal/core/backends/azure/compute.py +14 -28
  22. dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
  23. dstack/_internal/core/backends/azure/models.py +89 -0
  24. dstack/_internal/core/backends/base/__init__.py +0 -12
  25. dstack/_internal/core/backends/base/backend.py +18 -0
  26. dstack/_internal/core/backends/base/compute.py +153 -33
  27. dstack/_internal/core/backends/base/configurator.py +105 -0
  28. dstack/_internal/core/backends/base/models.py +14 -0
  29. dstack/_internal/core/backends/configurators.py +138 -0
  30. dstack/_internal/core/backends/cudo/__init__.py +0 -15
  31. dstack/_internal/core/backends/cudo/backend.py +16 -0
  32. dstack/_internal/core/backends/cudo/compute.py +8 -26
  33. dstack/_internal/core/backends/cudo/configurator.py +72 -0
  34. dstack/_internal/core/backends/cudo/models.py +37 -0
  35. dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
  36. dstack/_internal/core/backends/datacrunch/backend.py +16 -0
  37. dstack/_internal/core/backends/datacrunch/compute.py +8 -25
  38. dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
  39. dstack/_internal/core/backends/datacrunch/models.py +38 -0
  40. dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
  41. dstack/_internal/core/backends/gcp/__init__.py +0 -16
  42. dstack/_internal/core/backends/gcp/auth.py +2 -11
  43. dstack/_internal/core/backends/gcp/backend.py +17 -0
  44. dstack/_internal/core/backends/gcp/compute.py +14 -44
  45. dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
  46. dstack/_internal/core/backends/gcp/models.py +125 -0
  47. dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
  48. dstack/_internal/core/backends/kubernetes/backend.py +16 -0
  49. dstack/_internal/core/backends/kubernetes/compute.py +16 -5
  50. dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
  51. dstack/_internal/core/backends/kubernetes/models.py +72 -0
  52. dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
  53. dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
  54. dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
  55. dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
  56. dstack/_internal/core/backends/lambdalabs/models.py +37 -0
  57. dstack/_internal/core/backends/local/__init__.py +0 -13
  58. dstack/_internal/core/backends/local/backend.py +14 -0
  59. dstack/_internal/core/backends/local/compute.py +16 -2
  60. dstack/_internal/core/backends/models.py +128 -0
  61. dstack/_internal/core/backends/oci/__init__.py +0 -15
  62. dstack/_internal/core/backends/oci/auth.py +1 -5
  63. dstack/_internal/core/backends/oci/backend.py +16 -0
  64. dstack/_internal/core/backends/oci/compute.py +9 -23
  65. dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
  66. dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
  67. dstack/_internal/core/backends/oci/region.py +1 -1
  68. dstack/_internal/core/backends/runpod/__init__.py +0 -15
  69. dstack/_internal/core/backends/runpod/backend.py +16 -0
  70. dstack/_internal/core/backends/runpod/compute.py +28 -6
  71. dstack/_internal/core/backends/runpod/configurator.py +59 -0
  72. dstack/_internal/core/backends/runpod/models.py +54 -0
  73. dstack/_internal/core/backends/template/__init__.py +0 -0
  74. dstack/_internal/core/backends/tensordock/__init__.py +0 -15
  75. dstack/_internal/core/backends/tensordock/backend.py +16 -0
  76. dstack/_internal/core/backends/tensordock/compute.py +8 -27
  77. dstack/_internal/core/backends/tensordock/configurator.py +68 -0
  78. dstack/_internal/core/backends/tensordock/models.py +38 -0
  79. dstack/_internal/core/backends/vastai/__init__.py +0 -15
  80. dstack/_internal/core/backends/vastai/backend.py +16 -0
  81. dstack/_internal/core/backends/vastai/compute.py +2 -2
  82. dstack/_internal/core/backends/vastai/configurator.py +66 -0
  83. dstack/_internal/core/backends/vastai/models.py +37 -0
  84. dstack/_internal/core/backends/vultr/__init__.py +0 -15
  85. dstack/_internal/core/backends/vultr/backend.py +16 -0
  86. dstack/_internal/core/backends/vultr/compute.py +10 -24
  87. dstack/_internal/core/backends/vultr/configurator.py +64 -0
  88. dstack/_internal/core/backends/vultr/models.py +34 -0
  89. dstack/_internal/core/models/backends/__init__.py +0 -184
  90. dstack/_internal/core/models/backends/base.py +0 -19
  91. dstack/_internal/core/models/configurations.py +22 -16
  92. dstack/_internal/core/models/envs.py +4 -3
  93. dstack/_internal/core/models/fleets.py +17 -22
  94. dstack/_internal/core/models/gateways.py +3 -3
  95. dstack/_internal/core/models/instances.py +24 -0
  96. dstack/_internal/core/models/profiles.py +85 -45
  97. dstack/_internal/core/models/projects.py +1 -1
  98. dstack/_internal/core/models/repos/base.py +0 -5
  99. dstack/_internal/core/models/repos/local.py +3 -3
  100. dstack/_internal/core/models/repos/remote.py +26 -12
  101. dstack/_internal/core/models/repos/virtual.py +1 -1
  102. dstack/_internal/core/models/resources.py +45 -76
  103. dstack/_internal/core/models/runs.py +21 -19
  104. dstack/_internal/core/models/volumes.py +1 -3
  105. dstack/_internal/core/services/profiles.py +7 -16
  106. dstack/_internal/core/services/repos.py +0 -4
  107. dstack/_internal/server/app.py +11 -4
  108. dstack/_internal/server/background/__init__.py +10 -0
  109. dstack/_internal/server/background/tasks/process_gateways.py +4 -8
  110. dstack/_internal/server/background/tasks/process_instances.py +14 -9
  111. dstack/_internal/server/background/tasks/process_metrics.py +1 -1
  112. dstack/_internal/server/background/tasks/process_placement_groups.py +5 -1
  113. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
  114. dstack/_internal/server/background/tasks/process_running_jobs.py +80 -24
  115. dstack/_internal/server/background/tasks/process_runs.py +1 -0
  116. dstack/_internal/server/background/tasks/process_submitted_jobs.py +20 -38
  117. dstack/_internal/server/background/tasks/process_volumes.py +5 -2
  118. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  119. dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
  120. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  121. dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
  122. dstack/_internal/server/models.py +59 -9
  123. dstack/_internal/server/routers/backends.py +14 -23
  124. dstack/_internal/server/routers/instances.py +3 -4
  125. dstack/_internal/server/routers/metrics.py +31 -10
  126. dstack/_internal/server/routers/prometheus.py +36 -0
  127. dstack/_internal/server/routers/repos.py +1 -2
  128. dstack/_internal/server/routers/runs.py +13 -59
  129. dstack/_internal/server/schemas/gateways.py +14 -23
  130. dstack/_internal/server/schemas/projects.py +7 -2
  131. dstack/_internal/server/schemas/repos.py +2 -38
  132. dstack/_internal/server/schemas/runner.py +1 -0
  133. dstack/_internal/server/schemas/runs.py +1 -24
  134. dstack/_internal/server/security/permissions.py +1 -1
  135. dstack/_internal/server/services/backends/__init__.py +85 -158
  136. dstack/_internal/server/services/config.py +53 -567
  137. dstack/_internal/server/services/fleets.py +9 -103
  138. dstack/_internal/server/services/gateways/__init__.py +13 -4
  139. dstack/_internal/server/services/{pools.py → instances.py} +22 -329
  140. dstack/_internal/server/services/jobs/__init__.py +9 -6
  141. dstack/_internal/server/services/jobs/configurators/base.py +25 -1
  142. dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
  143. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
  144. dstack/_internal/server/services/metrics.py +131 -72
  145. dstack/_internal/server/services/offers.py +1 -1
  146. dstack/_internal/server/services/projects.py +23 -14
  147. dstack/_internal/server/services/prometheus.py +245 -0
  148. dstack/_internal/server/services/runner/client.py +14 -3
  149. dstack/_internal/server/services/runs.py +67 -31
  150. dstack/_internal/server/services/volumes.py +9 -4
  151. dstack/_internal/server/settings.py +3 -0
  152. dstack/_internal/server/statics/index.html +1 -1
  153. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4a0fe83e84574654e397.js} +76 -19
  154. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4a0fe83e84574654e397.js.map} +1 -1
  155. dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
  156. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  157. dstack/_internal/server/testing/common.py +75 -32
  158. dstack/_internal/utils/json_schema.py +6 -0
  159. dstack/_internal/utils/ssh.py +2 -1
  160. dstack/api/__init__.py +4 -0
  161. dstack/api/_public/__init__.py +16 -20
  162. dstack/api/_public/backends.py +1 -1
  163. dstack/api/_public/repos.py +36 -36
  164. dstack/api/_public/runs.py +170 -83
  165. dstack/api/server/__init__.py +11 -13
  166. dstack/api/server/_backends.py +12 -16
  167. dstack/api/server/_fleets.py +15 -55
  168. dstack/api/server/_gateways.py +3 -14
  169. dstack/api/server/_repos.py +1 -4
  170. dstack/api/server/_runs.py +21 -96
  171. dstack/api/server/_volumes.py +10 -5
  172. dstack/api/utils.py +3 -0
  173. dstack/version.py +1 -1
  174. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/METADATA +10 -1
  175. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/RECORD +229 -206
  176. tests/_internal/cli/services/configurators/test_profile.py +6 -6
  177. tests/_internal/core/backends/aws/test_configurator.py +35 -0
  178. tests/_internal/core/backends/aws/test_resources.py +1 -1
  179. tests/_internal/core/backends/azure/test_configurator.py +61 -0
  180. tests/_internal/core/backends/cudo/__init__.py +0 -0
  181. tests/_internal/core/backends/cudo/test_configurator.py +37 -0
  182. tests/_internal/core/backends/datacrunch/__init__.py +0 -0
  183. tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
  184. tests/_internal/core/backends/gcp/test_configurator.py +42 -0
  185. tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
  186. tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
  187. tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
  188. tests/_internal/core/backends/oci/test_configurator.py +55 -0
  189. tests/_internal/core/backends/runpod/__init__.py +0 -0
  190. tests/_internal/core/backends/runpod/test_configurator.py +33 -0
  191. tests/_internal/core/backends/tensordock/__init__.py +0 -0
  192. tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
  193. tests/_internal/core/backends/vastai/__init__.py +0 -0
  194. tests/_internal/core/backends/vastai/test_configurator.py +33 -0
  195. tests/_internal/core/backends/vultr/__init__.py +0 -0
  196. tests/_internal/core/backends/vultr/test_configurator.py +33 -0
  197. tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
  198. tests/_internal/server/background/tasks/test_process_instances.py +49 -48
  199. tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
  200. tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
  201. tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +186 -0
  202. tests/_internal/server/background/tasks/test_process_running_jobs.py +123 -19
  203. tests/_internal/server/background/tasks/test_process_runs.py +8 -22
  204. tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
  205. tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
  206. tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
  207. tests/_internal/server/routers/test_backends.py +6 -764
  208. tests/_internal/server/routers/test_fleets.py +2 -26
  209. tests/_internal/server/routers/test_gateways.py +27 -3
  210. tests/_internal/server/routers/test_instances.py +0 -10
  211. tests/_internal/server/routers/test_metrics.py +42 -0
  212. tests/_internal/server/routers/test_projects.py +56 -0
  213. tests/_internal/server/routers/test_prometheus.py +333 -0
  214. tests/_internal/server/routers/test_repos.py +0 -15
  215. tests/_internal/server/routers/test_runs.py +83 -275
  216. tests/_internal/server/routers/test_volumes.py +2 -3
  217. tests/_internal/server/services/backends/__init__.py +0 -0
  218. tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
  219. tests/_internal/server/services/test_config.py +7 -4
  220. tests/_internal/server/services/test_fleets.py +1 -4
  221. tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
  222. tests/_internal/server/services/test_metrics.py +167 -0
  223. tests/_internal/server/services/test_repos.py +1 -14
  224. tests/_internal/server/services/test_runs.py +0 -4
  225. dstack/_internal/cli/commands/pool.py +0 -581
  226. dstack/_internal/cli/commands/run.py +0 -75
  227. dstack/_internal/core/backends/aws/config.py +0 -18
  228. dstack/_internal/core/backends/azure/config.py +0 -12
  229. dstack/_internal/core/backends/base/config.py +0 -5
  230. dstack/_internal/core/backends/cudo/config.py +0 -9
  231. dstack/_internal/core/backends/datacrunch/config.py +0 -9
  232. dstack/_internal/core/backends/gcp/config.py +0 -22
  233. dstack/_internal/core/backends/kubernetes/config.py +0 -6
  234. dstack/_internal/core/backends/lambdalabs/config.py +0 -9
  235. dstack/_internal/core/backends/nebius/__init__.py +0 -15
  236. dstack/_internal/core/backends/nebius/api_client.py +0 -319
  237. dstack/_internal/core/backends/nebius/compute.py +0 -220
  238. dstack/_internal/core/backends/nebius/config.py +0 -6
  239. dstack/_internal/core/backends/nebius/types.py +0 -37
  240. dstack/_internal/core/backends/oci/config.py +0 -6
  241. dstack/_internal/core/backends/runpod/config.py +0 -9
  242. dstack/_internal/core/backends/tensordock/config.py +0 -9
  243. dstack/_internal/core/backends/vastai/config.py +0 -6
  244. dstack/_internal/core/backends/vultr/config.py +0 -9
  245. dstack/_internal/core/models/backends/aws.py +0 -86
  246. dstack/_internal/core/models/backends/azure.py +0 -68
  247. dstack/_internal/core/models/backends/cudo.py +0 -43
  248. dstack/_internal/core/models/backends/datacrunch.py +0 -44
  249. dstack/_internal/core/models/backends/gcp.py +0 -67
  250. dstack/_internal/core/models/backends/kubernetes.py +0 -40
  251. dstack/_internal/core/models/backends/lambdalabs.py +0 -43
  252. dstack/_internal/core/models/backends/nebius.py +0 -54
  253. dstack/_internal/core/models/backends/runpod.py +0 -40
  254. dstack/_internal/core/models/backends/tensordock.py +0 -44
  255. dstack/_internal/core/models/backends/vastai.py +0 -43
  256. dstack/_internal/core/models/backends/vultr.py +0 -40
  257. dstack/_internal/core/models/pools.py +0 -43
  258. dstack/_internal/server/routers/pools.py +0 -142
  259. dstack/_internal/server/schemas/pools.py +0 -38
  260. dstack/_internal/server/services/backends/configurators/base.py +0 -72
  261. dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
  262. dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
  263. dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
  264. dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
  265. dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
  266. dstack/_internal/server/services/backends/configurators/runpod.py +0 -97
  267. dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
  268. dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
  269. dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
  270. dstack/api/_public/pools.py +0 -41
  271. dstack/api/_public/resources.py +0 -105
  272. dstack/api/server/_pools.py +0 -63
  273. tests/_internal/server/routers/test_pools.py +0 -612
  274. /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
  275. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/LICENSE.md +0 -0
  276. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/WHEEL +0 -0
  277. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/entry_points.txt +0 -0
  278. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,10 @@
1
- from typing import Dict
1
+ from typing import Annotated, Dict, List, Literal, Optional, Union
2
2
 
3
3
  from pydantic import Field, root_validator
4
- from typing_extensions import Annotated, List, Literal, Optional, Union
5
4
 
6
- from dstack._internal.core.models.backends.base import ConfigMultiElement
7
5
  from dstack._internal.core.models.common import CoreModel
8
6
 
9
7
 
10
- class OCIConfigInfo(CoreModel):
11
- type: Literal["oci"] = "oci"
12
- regions: Optional[List[str]] = None
13
- compartment_id: Optional[str] = None
14
-
15
-
16
8
  class OCIClientCreds(CoreModel):
17
9
  type: Annotated[Literal["client"], Field(description="The type of credentials")] = "client"
18
10
  user: Annotated[str, Field(description="User OCID")]
@@ -62,27 +54,34 @@ class OCICreds(CoreModel):
62
54
  __root__: AnyOCICreds = Field(..., discriminator="type")
63
55
 
64
56
 
65
- class OCIConfigInfoWithCreds(OCIConfigInfo):
66
- creds: AnyOCICreds
67
-
68
-
69
- AnyOCIConfigInfo = Union[OCIConfigInfo, OCIConfigInfoWithCreds]
57
+ class OCIBackendConfig(CoreModel):
58
+ type: Annotated[Literal["oci"], Field(description="The type of backend")] = "oci"
59
+ regions: Annotated[
60
+ Optional[List[str]],
61
+ Field(description="The list of OCI regions. Omit to use all regions"),
62
+ ] = None
63
+ compartment_id: Annotated[
64
+ Optional[str],
65
+ Field(
66
+ description=(
67
+ "Compartment where `dstack` will create all resources."
68
+ " Omit to instruct `dstack` to create a new compartment"
69
+ )
70
+ ),
71
+ ] = None
70
72
 
71
73
 
72
- class OCIConfigInfoWithCredsPartial(CoreModel):
73
- type: Literal["oci"] = "oci"
74
- creds: Optional[AnyOCICreds]
75
- regions: Optional[List[str]]
76
- compartment_id: Optional[str]
74
+ class OCIBackendConfigWithCreds(OCIBackendConfig):
75
+ creds: Annotated[AnyOCICreds, Field(description="The credentials", discriminator="type")]
77
76
 
78
77
 
79
- class OCIConfigValues(CoreModel):
80
- type: Literal["oci"] = "oci"
81
- default_creds: bool = False
82
- regions: Optional[ConfigMultiElement]
83
- compartment_id: Optional[str] = None
78
+ AnyOCIBackendConfig = Union[OCIBackendConfig, OCIBackendConfigWithCreds]
84
79
 
85
80
 
86
- class OCIStoredConfig(OCIConfigInfo):
81
+ class OCIStoredConfig(OCIBackendConfig):
87
82
  compartment_id: str
88
83
  subnet_ids_per_region: Dict[str, str]
84
+
85
+
86
+ class OCIConfig(OCIStoredConfig):
87
+ creds: AnyOCICreds
@@ -5,7 +5,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Set
5
5
  import oci
6
6
 
7
7
  from dstack._internal.core.backends.oci.auth import get_client_config
8
- from dstack._internal.core.models.backends.oci import AnyOCICreds
8
+ from dstack._internal.core.backends.oci.models import AnyOCICreds
9
9
 
10
10
 
11
11
  class OCIRegionClient:
@@ -1,15 +0,0 @@
1
- from dstack._internal.core.backends.base import Backend
2
- from dstack._internal.core.backends.runpod.compute import RunpodCompute
3
- from dstack._internal.core.backends.runpod.config import RunpodConfig
4
- from dstack._internal.core.models.backends.base import BackendType
5
-
6
-
7
- class RunpodBackend(Backend):
8
- TYPE: BackendType = BackendType.RUNPOD
9
-
10
- def __init__(self, config: RunpodConfig):
11
- self.config = config
12
- self._compute = RunpodCompute(self.config)
13
-
14
- def compute(self) -> RunpodCompute:
15
- return self._compute
@@ -0,0 +1,16 @@
1
+ from dstack._internal.core.backends.base.backend import Backend
2
+ from dstack._internal.core.backends.runpod.compute import RunpodCompute
3
+ from dstack._internal.core.backends.runpod.models import RunpodConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class RunpodBackend(Backend):
8
+ TYPE = BackendType.RUNPOD
9
+ COMPUTE_CLASS = RunpodCompute
10
+
11
+ def __init__(self, config: RunpodConfig):
12
+ self.config = config
13
+ self._compute = RunpodCompute(self.config)
14
+
15
+ def compute(self) -> RunpodCompute:
16
+ return self._compute
@@ -3,8 +3,9 @@ import uuid
3
3
  from datetime import timedelta
4
4
  from typing import List, Optional
5
5
 
6
- from dstack._internal.core.backends.base import Compute
6
+ from dstack._internal.core.backends.base.backend import Compute
7
7
  from dstack._internal.core.backends.base.compute import (
8
+ ComputeWithVolumeSupport,
8
9
  generate_unique_instance_name,
9
10
  generate_unique_volume_name,
10
11
  get_docker_commands,
@@ -12,7 +13,7 @@ from dstack._internal.core.backends.base.compute import (
12
13
  )
13
14
  from dstack._internal.core.backends.base.offers import get_catalog_offers
14
15
  from dstack._internal.core.backends.runpod.api_client import RunpodApiClient
15
- from dstack._internal.core.backends.runpod.config import RunpodConfig
16
+ from dstack._internal.core.backends.runpod.models import RunpodConfig
16
17
  from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
17
18
  from dstack._internal.core.errors import (
18
19
  BackendError,
@@ -39,7 +40,10 @@ MAX_RESOURCE_NAME_LEN = 60
39
40
  CONTAINER_REGISTRY_AUTH_CLEANUP_INTERVAL = 60 * 60 * 24 # 24 hour
40
41
 
41
42
 
42
- class RunpodCompute(Compute):
43
+ class RunpodCompute(
44
+ ComputeWithVolumeSupport,
45
+ Compute,
46
+ ):
43
47
  _last_cleanup_time = None
44
48
 
45
49
  def __init__(self, config: RunpodConfig):
@@ -52,8 +56,9 @@ class RunpodCompute(Compute):
52
56
  ) -> List[InstanceOfferWithAvailability]:
53
57
  offers = get_catalog_offers(
54
58
  backend=BackendType.RUNPOD,
55
- locations=self.config.regions,
59
+ locations=self.config.regions or None,
56
60
  requirements=requirements,
61
+ extra_filter=lambda o: _is_secure_cloud(o.region) or self.config.allow_community_cloud,
57
62
  )
58
63
  offers = [
59
64
  InstanceOfferWithAvailability(
@@ -102,13 +107,22 @@ class RunpodCompute(Compute):
102
107
  bid_per_gpu = None
103
108
  if instance_offer.instance.resources.spot and gpu_count:
104
109
  bid_per_gpu = instance_offer.price / gpu_count
110
+ if _is_secure_cloud(instance_offer.region):
111
+ cloud_type = "SECURE"
112
+ data_center_id = instance_offer.region
113
+ country_code = None
114
+ else:
115
+ cloud_type = "COMMUNITY"
116
+ data_center_id = None
117
+ country_code = instance_offer.region
105
118
 
106
119
  resp = self.api_client.create_pod(
107
120
  name=pod_name,
108
121
  image_name=job.job_spec.image_name,
109
122
  gpu_type_id=instance_offer.instance.name,
110
- cloud_type="SECURE", # ["ALL", "COMMUNITY", "SECURE"]:
111
- data_center_id=instance_offer.region,
123
+ cloud_type=cloud_type,
124
+ data_center_id=data_center_id,
125
+ country_code=country_code,
112
126
  gpu_count=gpu_count,
113
127
  container_disk_in_gb=disk_size,
114
128
  min_vcpu_count=instance_offer.instance.resources.cpus,
@@ -257,3 +271,11 @@ def _get_volume_price(size: int) -> float:
257
271
  if size < 1000:
258
272
  return 0.07 * size
259
273
  return 0.05 * size
274
+
275
+
276
+ def _is_secure_cloud(region: str) -> str:
277
+ """
278
+ Secure cloud regions are datacenter IDs: CA-MTL-1, EU-NL-1, etc.
279
+ Community cloud regions are country codes: CA, NL, etc.
280
+ """
281
+ return "-" in region
@@ -0,0 +1,59 @@
1
+ import json
2
+
3
+ from dstack._internal.core.backends.base.configurator import (
4
+ BackendRecord,
5
+ Configurator,
6
+ raise_invalid_credentials_error,
7
+ )
8
+ from dstack._internal.core.backends.runpod import api_client
9
+ from dstack._internal.core.backends.runpod.backend import RunpodBackend
10
+ from dstack._internal.core.backends.runpod.models import (
11
+ AnyRunpodBackendConfig,
12
+ RunpodBackendConfig,
13
+ RunpodBackendConfigWithCreds,
14
+ RunpodConfig,
15
+ RunpodCreds,
16
+ RunpodStoredConfig,
17
+ )
18
+ from dstack._internal.core.models.backends.base import BackendType
19
+
20
+
21
+ class RunpodConfigurator(Configurator):
22
+ TYPE = BackendType.RUNPOD
23
+ BACKEND_CLASS = RunpodBackend
24
+
25
+ def validate_config(self, config: RunpodBackendConfigWithCreds, default_creds_enabled: bool):
26
+ self._validate_runpod_api_key(config.creds.api_key)
27
+
28
+ def create_backend(
29
+ self, project_name: str, config: RunpodBackendConfigWithCreds
30
+ ) -> BackendRecord:
31
+ return BackendRecord(
32
+ config=RunpodStoredConfig(
33
+ **RunpodBackendConfig.__response__.parse_obj(config).dict()
34
+ ).json(),
35
+ auth=RunpodCreds.parse_obj(config.creds).json(),
36
+ )
37
+
38
+ def get_backend_config(
39
+ self, record: BackendRecord, include_creds: bool
40
+ ) -> AnyRunpodBackendConfig:
41
+ config = self._get_config(record)
42
+ if include_creds:
43
+ return RunpodBackendConfigWithCreds.__response__.parse_obj(config)
44
+ return RunpodBackendConfig.__response__.parse_obj(config)
45
+
46
+ def get_backend(self, record: BackendRecord) -> RunpodBackend:
47
+ config = self._get_config(record)
48
+ return RunpodBackend(config=config)
49
+
50
+ def _get_config(self, record: BackendRecord) -> RunpodConfig:
51
+ return RunpodConfig(
52
+ **json.loads(record.config),
53
+ creds=RunpodCreds.parse_raw(record.auth),
54
+ )
55
+
56
+ def _validate_runpod_api_key(self, api_key: str):
57
+ client = api_client.RunpodApiClient(api_key=api_key)
58
+ if not client.validate_api_key():
59
+ raise_invalid_credentials_error(fields=[["creds", "api_key"]])
@@ -0,0 +1,54 @@
1
+ from typing import Annotated, List, Literal, Optional, Union
2
+
3
+ from pydantic import Field
4
+
5
+ from dstack._internal.core.models.common import CoreModel
6
+
7
+ RUNPOD_COMMUNITY_CLOUD_DEFAULT = True
8
+
9
+
10
+ class RunpodAPIKeyCreds(CoreModel):
11
+ type: Literal["api_key"] = "api_key"
12
+ api_key: Annotated[str, Field(description="The API key")]
13
+
14
+
15
+ AnyRunpodCreds = RunpodAPIKeyCreds
16
+ RunpodCreds = AnyRunpodCreds
17
+
18
+
19
+ class RunpodBackendConfig(CoreModel):
20
+ type: Literal["runpod"] = "runpod"
21
+ regions: Annotated[
22
+ Optional[List[str]],
23
+ Field(description="The list of RunPod regions. Omit to use all regions"),
24
+ ] = None
25
+ community_cloud: Annotated[
26
+ Optional[bool],
27
+ Field(
28
+ description=(
29
+ "Whether Community Cloud offers can be suggested in addition to Secure Cloud."
30
+ f" Defaults to `{str(RUNPOD_COMMUNITY_CLOUD_DEFAULT).lower()}`"
31
+ )
32
+ ),
33
+ ] = None
34
+
35
+
36
+ class RunpodBackendConfigWithCreds(RunpodBackendConfig):
37
+ creds: Annotated[AnyRunpodCreds, Field(description="The credentials")]
38
+
39
+
40
+ AnyRunpodBackendConfig = Union[RunpodBackendConfig, RunpodBackendConfigWithCreds]
41
+
42
+
43
+ class RunpodStoredConfig(RunpodBackendConfig):
44
+ pass
45
+
46
+
47
+ class RunpodConfig(RunpodStoredConfig):
48
+ creds: AnyRunpodCreds
49
+
50
+ @property
51
+ def allow_community_cloud(self) -> bool:
52
+ if self.community_cloud is not None:
53
+ return self.community_cloud
54
+ return RUNPOD_COMMUNITY_CLOUD_DEFAULT
File without changes
@@ -1,15 +0,0 @@
1
- from dstack._internal.core.backends.base import Backend
2
- from dstack._internal.core.backends.tensordock.compute import TensorDockCompute
3
- from dstack._internal.core.backends.tensordock.config import TensorDockConfig
4
- from dstack._internal.core.models.backends.base import BackendType
5
-
6
-
7
- class TensorDockBackend(Backend):
8
- TYPE: BackendType = BackendType.TENSORDOCK
9
-
10
- def __init__(self, config: TensorDockConfig):
11
- self.config = config
12
- self._compute = TensorDockCompute(self.config)
13
-
14
- def compute(self) -> TensorDockCompute:
15
- return self._compute
@@ -0,0 +1,16 @@
1
+ from dstack._internal.core.backends.base.backend import Backend
2
+ from dstack._internal.core.backends.tensordock.compute import TensorDockCompute
3
+ from dstack._internal.core.backends.tensordock.models import TensorDockConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class TensorDockBackend(Backend):
8
+ TYPE = BackendType.TENSORDOCK
9
+ COMPUTE_CLASS = TensorDockCompute
10
+
11
+ def __init__(self, config: TensorDockConfig):
12
+ self.config = config
13
+ self._compute = TensorDockCompute(self.config)
14
+
15
+ def compute(self) -> TensorDockCompute:
16
+ return self._compute
@@ -3,25 +3,23 @@ from typing import List, Optional
3
3
 
4
4
  import requests
5
5
 
6
- from dstack._internal.core.backends.base import Compute
6
+ from dstack._internal.core.backends.base.backend import Compute
7
7
  from dstack._internal.core.backends.base.compute import (
8
+ ComputeWithCreateInstanceSupport,
8
9
  generate_unique_instance_name,
9
- get_job_instance_name,
10
10
  get_shim_commands,
11
11
  )
12
12
  from dstack._internal.core.backends.base.offers import get_catalog_offers
13
13
  from dstack._internal.core.backends.tensordock.api_client import TensorDockAPIClient
14
- from dstack._internal.core.backends.tensordock.config import TensorDockConfig
14
+ from dstack._internal.core.backends.tensordock.models import TensorDockConfig
15
15
  from dstack._internal.core.errors import BackendError, NoCapacityError
16
16
  from dstack._internal.core.models.backends.base import BackendType
17
17
  from dstack._internal.core.models.instances import (
18
18
  InstanceAvailability,
19
19
  InstanceConfiguration,
20
20
  InstanceOfferWithAvailability,
21
- SSHKey,
22
21
  )
23
- from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
24
- from dstack._internal.core.models.volumes import Volume
22
+ from dstack._internal.core.models.runs import JobProvisioningData, Requirements
25
23
  from dstack._internal.utils.logging import get_logger
26
24
 
27
25
  logger = get_logger(__name__)
@@ -31,7 +29,10 @@ logger = get_logger(__name__)
31
29
  MAX_INSTANCE_NAME_LEN = 60
32
30
 
33
31
 
34
- class TensorDockCompute(Compute):
32
+ class TensorDockCompute(
33
+ ComputeWithCreateInstanceSupport,
34
+ Compute,
35
+ ):
35
36
  def __init__(self, config: TensorDockConfig):
36
37
  super().__init__()
37
38
  self.config = config
@@ -113,26 +114,6 @@ class TensorDockCompute(Compute):
113
114
  backend_data=None,
114
115
  )
115
116
 
116
- def run_job(
117
- self,
118
- run: Run,
119
- job: Job,
120
- instance_offer: InstanceOfferWithAvailability,
121
- project_ssh_public_key: str,
122
- project_ssh_private_key: str,
123
- volumes: List[Volume],
124
- ) -> JobProvisioningData:
125
- instance_config = InstanceConfiguration(
126
- project_name=run.project_name,
127
- instance_name=get_job_instance_name(run, job), # TODO: generate name
128
- ssh_keys=[
129
- SSHKey(public=run.run_spec.ssh_key_pub.strip()),
130
- SSHKey(public=project_ssh_public_key.strip()),
131
- ],
132
- user=run.user,
133
- )
134
- return self.create_instance(instance_offer, instance_config)
135
-
136
117
  def terminate_instance(
137
118
  self, instance_id: str, region: str, backend_data: Optional[str] = None
138
119
  ):
@@ -0,0 +1,68 @@
1
+ import json
2
+
3
+ from dstack._internal.core.backends.base.configurator import (
4
+ BackendRecord,
5
+ Configurator,
6
+ raise_invalid_credentials_error,
7
+ )
8
+ from dstack._internal.core.backends.tensordock import api_client
9
+ from dstack._internal.core.backends.tensordock.backend import TensorDockBackend
10
+ from dstack._internal.core.backends.tensordock.models import (
11
+ AnyTensorDockBackendConfig,
12
+ TensorDockBackendConfig,
13
+ TensorDockBackendConfigWithCreds,
14
+ TensorDockConfig,
15
+ TensorDockCreds,
16
+ TensorDockStoredConfig,
17
+ )
18
+ from dstack._internal.core.models.backends.base import (
19
+ BackendType,
20
+ )
21
+
22
+ # TensorDock regions are dynamic, currently we don't offer any filtering
23
+ REGIONS = []
24
+
25
+
26
+ class TensorDockConfigurator(Configurator):
27
+ TYPE = BackendType.TENSORDOCK
28
+ BACKEND_CLASS = TensorDockBackend
29
+
30
+ def validate_config(
31
+ self, config: TensorDockBackendConfigWithCreds, default_creds_enabled: bool
32
+ ):
33
+ self._validate_tensordock_creds(config.creds.api_key, config.creds.api_token)
34
+
35
+ def create_backend(
36
+ self, project_name: str, config: TensorDockBackendConfigWithCreds
37
+ ) -> BackendRecord:
38
+ if config.regions is None:
39
+ config.regions = REGIONS
40
+ return BackendRecord(
41
+ config=TensorDockStoredConfig(
42
+ **TensorDockBackendConfig.__response__.parse_obj(config).dict()
43
+ ).json(),
44
+ auth=TensorDockCreds.parse_obj(config.creds).json(),
45
+ )
46
+
47
+ def get_backend_config(
48
+ self, record: BackendRecord, include_creds: bool
49
+ ) -> AnyTensorDockBackendConfig:
50
+ config = self._get_config(record)
51
+ if include_creds:
52
+ return TensorDockBackendConfigWithCreds.__response__.parse_obj(config)
53
+ return TensorDockBackendConfig.__response__.parse_obj(config)
54
+
55
+ def get_backend(self, record: BackendRecord) -> TensorDockBackend:
56
+ config = self._get_config(record)
57
+ return TensorDockBackend(config=config)
58
+
59
+ def _get_config(self, record: BackendRecord) -> TensorDockConfig:
60
+ return TensorDockConfig.__response__(
61
+ **json.loads(record.config),
62
+ creds=TensorDockCreds.parse_raw(record.auth),
63
+ )
64
+
65
+ def _validate_tensordock_creds(self, api_key: str, api_token: str):
66
+ client = api_client.TensorDockAPIClient(api_key=api_key, api_token=api_token)
67
+ if not client.auth_test():
68
+ raise_invalid_credentials_error(fields=[["creds", "api_key"], ["creds", "api_token"]])
@@ -0,0 +1,38 @@
1
+ from typing import Annotated, List, Literal, Optional, Union
2
+
3
+ from pydantic import Field
4
+
5
+ from dstack._internal.core.models.common import CoreModel
6
+
7
+
8
+ class TensorDockAPIKeyCreds(CoreModel):
9
+ type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
10
+ api_key: Annotated[str, Field(description="The API key")]
11
+ api_token: Annotated[str, Field(description="The API token")]
12
+
13
+
14
+ AnyTensorDockCreds = TensorDockAPIKeyCreds
15
+ TensorDockCreds = AnyTensorDockCreds
16
+
17
+
18
+ class TensorDockBackendConfig(CoreModel):
19
+ type: Annotated[Literal["tensordock"], Field(description="The type of backend")] = "tensordock"
20
+ regions: Annotated[
21
+ Optional[List[str]],
22
+ Field(description="The list of TensorDock regions. Omit to use all regions"),
23
+ ] = None
24
+
25
+
26
+ class TensorDockBackendConfigWithCreds(TensorDockBackendConfig):
27
+ creds: Annotated[AnyTensorDockCreds, Field(description="The credentials")]
28
+
29
+
30
+ AnyTensorDockBackendConfig = Union[TensorDockBackendConfig, TensorDockBackendConfigWithCreds]
31
+
32
+
33
+ class TensorDockStoredConfig(TensorDockBackendConfig):
34
+ pass
35
+
36
+
37
+ class TensorDockConfig(TensorDockStoredConfig):
38
+ creds: AnyTensorDockCreds
@@ -1,15 +0,0 @@
1
- from dstack._internal.core.backends.base import Backend
2
- from dstack._internal.core.backends.vastai.compute import VastAICompute
3
- from dstack._internal.core.backends.vastai.config import VastAIConfig
4
- from dstack._internal.core.models.backends.base import BackendType
5
-
6
-
7
- class VastAIBackend(Backend):
8
- TYPE: BackendType = BackendType.VASTAI
9
-
10
- def __init__(self, config: VastAIConfig):
11
- self.config = config
12
- self._compute = VastAICompute(self.config)
13
-
14
- def compute(self) -> VastAICompute:
15
- return self._compute
@@ -0,0 +1,16 @@
1
+ from dstack._internal.core.backends.base.backend import Backend
2
+ from dstack._internal.core.backends.vastai.compute import VastAICompute
3
+ from dstack._internal.core.backends.vastai.models import VastAIConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class VastAIBackend(Backend):
8
+ TYPE = BackendType.VASTAI
9
+ COMPUTE_CLASS = VastAICompute
10
+
11
+ def __init__(self, config: VastAIConfig):
12
+ self.config = config
13
+ self._compute = VastAICompute(self.config)
14
+
15
+ def compute(self) -> VastAICompute:
16
+ return self._compute
@@ -3,14 +3,14 @@ from typing import List, Optional
3
3
  import gpuhunt
4
4
  from gpuhunt.providers.vastai import VastAIProvider
5
5
 
6
- from dstack._internal.core.backends.base import Compute
6
+ from dstack._internal.core.backends.base.backend import Compute
7
7
  from dstack._internal.core.backends.base.compute import (
8
8
  generate_unique_instance_name_for_job,
9
9
  get_docker_commands,
10
10
  )
11
11
  from dstack._internal.core.backends.base.offers import get_catalog_offers
12
12
  from dstack._internal.core.backends.vastai.api_client import VastAIAPIClient
13
- from dstack._internal.core.backends.vastai.config import VastAIConfig
13
+ from dstack._internal.core.backends.vastai.models import VastAIConfig
14
14
  from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
15
15
  from dstack._internal.core.errors import ProvisioningError
16
16
  from dstack._internal.core.models.backends.base import BackendType
@@ -0,0 +1,66 @@
1
+ import json
2
+
3
+ from dstack._internal.core.backends.base.configurator import (
4
+ BackendRecord,
5
+ Configurator,
6
+ raise_invalid_credentials_error,
7
+ )
8
+ from dstack._internal.core.backends.vastai import api_client
9
+ from dstack._internal.core.backends.vastai.backend import VastAIBackend
10
+ from dstack._internal.core.backends.vastai.models import (
11
+ AnyVastAIBackendConfig,
12
+ VastAIBackendConfig,
13
+ VastAIBackendConfigWithCreds,
14
+ VastAIConfig,
15
+ VastAICreds,
16
+ VastAIStoredConfig,
17
+ )
18
+ from dstack._internal.core.models.backends.base import (
19
+ BackendType,
20
+ )
21
+
22
+ # VastAI regions are dynamic, currently we don't offer any filtering
23
+ REGIONS = []
24
+
25
+
26
+ class VastAIConfigurator(Configurator):
27
+ TYPE = BackendType.VASTAI
28
+ BACKEND_CLASS = VastAIBackend
29
+
30
+ def validate_config(self, config: VastAIBackendConfigWithCreds, default_creds_enabled: bool):
31
+ self._validate_vastai_creds(config.creds.api_key)
32
+
33
+ def create_backend(
34
+ self, project_name: str, config: VastAIBackendConfigWithCreds
35
+ ) -> BackendRecord:
36
+ if config.regions is None:
37
+ config.regions = REGIONS
38
+ return BackendRecord(
39
+ config=VastAIStoredConfig(
40
+ **VastAIBackendConfig.__response__.parse_obj(config).dict()
41
+ ).json(),
42
+ auth=VastAICreds.parse_obj(config.creds).json(),
43
+ )
44
+
45
+ def get_backend_config(
46
+ self, record: BackendRecord, include_creds: bool
47
+ ) -> AnyVastAIBackendConfig:
48
+ config = self._get_config(record)
49
+ if include_creds:
50
+ return VastAIBackendConfigWithCreds.__response__.parse_obj(config)
51
+ return VastAIBackendConfig.__response__.parse_obj(config)
52
+
53
+ def get_backend(self, record: BackendRecord) -> VastAIBackend:
54
+ config = self._get_config(record)
55
+ return VastAIBackend(config=config)
56
+
57
+ def _get_config(self, record: BackendRecord) -> VastAIConfig:
58
+ return VastAIConfig.__response__(
59
+ **json.loads(record.config),
60
+ creds=VastAICreds.parse_raw(record.auth),
61
+ )
62
+
63
+ def _validate_vastai_creds(self, api_key: str):
64
+ client = api_client.VastAIAPIClient(api_key=api_key)
65
+ if not client.auth_test():
66
+ raise_invalid_credentials_error(fields=[["creds", "api_key"]])
@@ -0,0 +1,37 @@
1
+ from typing import Annotated, List, Literal, Optional, Union
2
+
3
+ from pydantic import Field
4
+
5
+ from dstack._internal.core.models.common import CoreModel
6
+
7
+
8
+ class VastAIAPIKeyCreds(CoreModel):
9
+ type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
10
+ api_key: Annotated[str, Field(description="The API key")]
11
+
12
+
13
+ AnyVastAICreds = VastAIAPIKeyCreds
14
+ VastAICreds = AnyVastAICreds
15
+
16
+
17
+ class VastAIBackendConfig(CoreModel):
18
+ type: Annotated[Literal["vastai"], Field(description="The type of backend")] = "vastai"
19
+ regions: Annotated[
20
+ Optional[List[str]],
21
+ Field(description="The list of VastAI regions. Omit to use all regions"),
22
+ ] = None
23
+
24
+
25
+ class VastAIBackendConfigWithCreds(VastAIBackendConfig):
26
+ creds: Annotated[AnyVastAICreds, Field(description="The credentials")]
27
+
28
+
29
+ AnyVastAIBackendConfig = Union[VastAIBackendConfig, VastAIBackendConfigWithCreds]
30
+
31
+
32
+ class VastAIStoredConfig(VastAIBackendConfig):
33
+ pass
34
+
35
+
36
+ class VastAIConfig(VastAIStoredConfig):
37
+ creds: AnyVastAICreds