dstack 0.18.43__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (278) hide show
  1. dstack/_internal/cli/commands/gateway.py +15 -3
  2. dstack/_internal/cli/commands/logs.py +0 -22
  3. dstack/_internal/cli/commands/stats.py +8 -17
  4. dstack/_internal/cli/main.py +1 -5
  5. dstack/_internal/cli/services/configurators/fleet.py +4 -39
  6. dstack/_internal/cli/services/configurators/run.py +22 -20
  7. dstack/_internal/cli/services/profile.py +34 -83
  8. dstack/_internal/cli/utils/gateway.py +1 -1
  9. dstack/_internal/cli/utils/run.py +11 -0
  10. dstack/_internal/core/backends/__init__.py +56 -39
  11. dstack/_internal/core/backends/aws/__init__.py +0 -25
  12. dstack/_internal/core/backends/aws/auth.py +1 -10
  13. dstack/_internal/core/backends/aws/backend.py +26 -0
  14. dstack/_internal/core/backends/aws/compute.py +21 -45
  15. dstack/_internal/{server/services/backends/configurators/aws.py → core/backends/aws/configurator.py} +46 -85
  16. dstack/_internal/core/backends/aws/models.py +135 -0
  17. dstack/_internal/core/backends/aws/resources.py +1 -1
  18. dstack/_internal/core/backends/azure/__init__.py +0 -20
  19. dstack/_internal/core/backends/azure/auth.py +2 -11
  20. dstack/_internal/core/backends/azure/backend.py +21 -0
  21. dstack/_internal/core/backends/azure/compute.py +14 -28
  22. dstack/_internal/{server/services/backends/configurators/azure.py → core/backends/azure/configurator.py} +141 -210
  23. dstack/_internal/core/backends/azure/models.py +89 -0
  24. dstack/_internal/core/backends/base/__init__.py +0 -12
  25. dstack/_internal/core/backends/base/backend.py +18 -0
  26. dstack/_internal/core/backends/base/compute.py +153 -33
  27. dstack/_internal/core/backends/base/configurator.py +105 -0
  28. dstack/_internal/core/backends/base/models.py +14 -0
  29. dstack/_internal/core/backends/configurators.py +138 -0
  30. dstack/_internal/core/backends/cudo/__init__.py +0 -15
  31. dstack/_internal/core/backends/cudo/backend.py +16 -0
  32. dstack/_internal/core/backends/cudo/compute.py +8 -26
  33. dstack/_internal/core/backends/cudo/configurator.py +72 -0
  34. dstack/_internal/core/backends/cudo/models.py +37 -0
  35. dstack/_internal/core/backends/datacrunch/__init__.py +0 -15
  36. dstack/_internal/core/backends/datacrunch/backend.py +16 -0
  37. dstack/_internal/core/backends/datacrunch/compute.py +8 -25
  38. dstack/_internal/core/backends/datacrunch/configurator.py +66 -0
  39. dstack/_internal/core/backends/datacrunch/models.py +38 -0
  40. dstack/_internal/core/{models/backends/dstack.py → backends/dstack/models.py} +7 -7
  41. dstack/_internal/core/backends/gcp/__init__.py +0 -16
  42. dstack/_internal/core/backends/gcp/auth.py +2 -11
  43. dstack/_internal/core/backends/gcp/backend.py +17 -0
  44. dstack/_internal/core/backends/gcp/compute.py +14 -44
  45. dstack/_internal/{server/services/backends/configurators/gcp.py → core/backends/gcp/configurator.py} +46 -103
  46. dstack/_internal/core/backends/gcp/models.py +125 -0
  47. dstack/_internal/core/backends/kubernetes/__init__.py +0 -15
  48. dstack/_internal/core/backends/kubernetes/backend.py +16 -0
  49. dstack/_internal/core/backends/kubernetes/compute.py +16 -5
  50. dstack/_internal/core/backends/kubernetes/configurator.py +55 -0
  51. dstack/_internal/core/backends/kubernetes/models.py +72 -0
  52. dstack/_internal/core/backends/lambdalabs/__init__.py +0 -16
  53. dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
  54. dstack/_internal/core/backends/lambdalabs/compute.py +7 -28
  55. dstack/_internal/core/backends/lambdalabs/configurator.py +82 -0
  56. dstack/_internal/core/backends/lambdalabs/models.py +37 -0
  57. dstack/_internal/core/backends/local/__init__.py +0 -13
  58. dstack/_internal/core/backends/local/backend.py +14 -0
  59. dstack/_internal/core/backends/local/compute.py +16 -2
  60. dstack/_internal/core/backends/models.py +128 -0
  61. dstack/_internal/core/backends/oci/__init__.py +0 -15
  62. dstack/_internal/core/backends/oci/auth.py +1 -5
  63. dstack/_internal/core/backends/oci/backend.py +16 -0
  64. dstack/_internal/core/backends/oci/compute.py +9 -23
  65. dstack/_internal/{server/services/backends/configurators/oci.py → core/backends/oci/configurator.py} +40 -85
  66. dstack/_internal/core/{models/backends/oci.py → backends/oci/models.py} +24 -25
  67. dstack/_internal/core/backends/oci/region.py +1 -1
  68. dstack/_internal/core/backends/runpod/__init__.py +0 -15
  69. dstack/_internal/core/backends/runpod/backend.py +16 -0
  70. dstack/_internal/core/backends/runpod/compute.py +28 -6
  71. dstack/_internal/core/backends/runpod/configurator.py +59 -0
  72. dstack/_internal/core/backends/runpod/models.py +54 -0
  73. dstack/_internal/core/backends/template/__init__.py +0 -0
  74. dstack/_internal/core/backends/tensordock/__init__.py +0 -15
  75. dstack/_internal/core/backends/tensordock/backend.py +16 -0
  76. dstack/_internal/core/backends/tensordock/compute.py +8 -27
  77. dstack/_internal/core/backends/tensordock/configurator.py +68 -0
  78. dstack/_internal/core/backends/tensordock/models.py +38 -0
  79. dstack/_internal/core/backends/vastai/__init__.py +0 -15
  80. dstack/_internal/core/backends/vastai/backend.py +16 -0
  81. dstack/_internal/core/backends/vastai/compute.py +2 -2
  82. dstack/_internal/core/backends/vastai/configurator.py +66 -0
  83. dstack/_internal/core/backends/vastai/models.py +37 -0
  84. dstack/_internal/core/backends/vultr/__init__.py +0 -15
  85. dstack/_internal/core/backends/vultr/backend.py +16 -0
  86. dstack/_internal/core/backends/vultr/compute.py +10 -24
  87. dstack/_internal/core/backends/vultr/configurator.py +64 -0
  88. dstack/_internal/core/backends/vultr/models.py +34 -0
  89. dstack/_internal/core/models/backends/__init__.py +0 -184
  90. dstack/_internal/core/models/backends/base.py +0 -19
  91. dstack/_internal/core/models/configurations.py +22 -16
  92. dstack/_internal/core/models/envs.py +4 -3
  93. dstack/_internal/core/models/fleets.py +17 -22
  94. dstack/_internal/core/models/gateways.py +3 -3
  95. dstack/_internal/core/models/instances.py +24 -0
  96. dstack/_internal/core/models/profiles.py +85 -45
  97. dstack/_internal/core/models/projects.py +1 -1
  98. dstack/_internal/core/models/repos/base.py +0 -5
  99. dstack/_internal/core/models/repos/local.py +3 -3
  100. dstack/_internal/core/models/repos/remote.py +26 -12
  101. dstack/_internal/core/models/repos/virtual.py +1 -1
  102. dstack/_internal/core/models/resources.py +45 -76
  103. dstack/_internal/core/models/runs.py +21 -19
  104. dstack/_internal/core/models/volumes.py +1 -3
  105. dstack/_internal/core/services/profiles.py +7 -16
  106. dstack/_internal/core/services/repos.py +0 -4
  107. dstack/_internal/server/app.py +11 -4
  108. dstack/_internal/server/background/__init__.py +10 -0
  109. dstack/_internal/server/background/tasks/process_gateways.py +4 -8
  110. dstack/_internal/server/background/tasks/process_instances.py +14 -9
  111. dstack/_internal/server/background/tasks/process_metrics.py +1 -1
  112. dstack/_internal/server/background/tasks/process_placement_groups.py +5 -1
  113. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +135 -0
  114. dstack/_internal/server/background/tasks/process_running_jobs.py +80 -24
  115. dstack/_internal/server/background/tasks/process_runs.py +1 -0
  116. dstack/_internal/server/background/tasks/process_submitted_jobs.py +20 -38
  117. dstack/_internal/server/background/tasks/process_volumes.py +5 -2
  118. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  119. dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
  120. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  121. dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
  122. dstack/_internal/server/models.py +59 -9
  123. dstack/_internal/server/routers/backends.py +14 -23
  124. dstack/_internal/server/routers/instances.py +3 -4
  125. dstack/_internal/server/routers/metrics.py +31 -10
  126. dstack/_internal/server/routers/prometheus.py +36 -0
  127. dstack/_internal/server/routers/repos.py +1 -2
  128. dstack/_internal/server/routers/runs.py +13 -59
  129. dstack/_internal/server/schemas/gateways.py +14 -23
  130. dstack/_internal/server/schemas/projects.py +7 -2
  131. dstack/_internal/server/schemas/repos.py +2 -38
  132. dstack/_internal/server/schemas/runner.py +1 -0
  133. dstack/_internal/server/schemas/runs.py +1 -24
  134. dstack/_internal/server/security/permissions.py +1 -1
  135. dstack/_internal/server/services/backends/__init__.py +85 -158
  136. dstack/_internal/server/services/config.py +53 -567
  137. dstack/_internal/server/services/fleets.py +9 -103
  138. dstack/_internal/server/services/gateways/__init__.py +13 -4
  139. dstack/_internal/server/services/{pools.py → instances.py} +22 -329
  140. dstack/_internal/server/services/jobs/__init__.py +9 -6
  141. dstack/_internal/server/services/jobs/configurators/base.py +25 -1
  142. dstack/_internal/server/services/jobs/configurators/dev.py +9 -1
  143. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
  144. dstack/_internal/server/services/metrics.py +131 -72
  145. dstack/_internal/server/services/offers.py +1 -1
  146. dstack/_internal/server/services/projects.py +23 -14
  147. dstack/_internal/server/services/prometheus.py +245 -0
  148. dstack/_internal/server/services/runner/client.py +14 -3
  149. dstack/_internal/server/services/runs.py +67 -31
  150. dstack/_internal/server/services/volumes.py +9 -4
  151. dstack/_internal/server/settings.py +3 -0
  152. dstack/_internal/server/statics/index.html +1 -1
  153. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js → main-4a0fe83e84574654e397.js} +76 -19
  154. dstack/_internal/server/statics/{main-fe8fd9db55df8d10e648.js.map → main-4a0fe83e84574654e397.js.map} +1 -1
  155. dstack/_internal/server/statics/{main-7510e71dfa9749a4e70e.css → main-da9f8c06a69c20dac23e.css} +1 -1
  156. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  157. dstack/_internal/server/testing/common.py +75 -32
  158. dstack/_internal/utils/json_schema.py +6 -0
  159. dstack/_internal/utils/ssh.py +2 -1
  160. dstack/api/__init__.py +4 -0
  161. dstack/api/_public/__init__.py +16 -20
  162. dstack/api/_public/backends.py +1 -1
  163. dstack/api/_public/repos.py +36 -36
  164. dstack/api/_public/runs.py +170 -83
  165. dstack/api/server/__init__.py +11 -13
  166. dstack/api/server/_backends.py +12 -16
  167. dstack/api/server/_fleets.py +15 -55
  168. dstack/api/server/_gateways.py +3 -14
  169. dstack/api/server/_repos.py +1 -4
  170. dstack/api/server/_runs.py +21 -96
  171. dstack/api/server/_volumes.py +10 -5
  172. dstack/api/utils.py +3 -0
  173. dstack/version.py +1 -1
  174. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/METADATA +10 -1
  175. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/RECORD +229 -206
  176. tests/_internal/cli/services/configurators/test_profile.py +6 -6
  177. tests/_internal/core/backends/aws/test_configurator.py +35 -0
  178. tests/_internal/core/backends/aws/test_resources.py +1 -1
  179. tests/_internal/core/backends/azure/test_configurator.py +61 -0
  180. tests/_internal/core/backends/cudo/__init__.py +0 -0
  181. tests/_internal/core/backends/cudo/test_configurator.py +37 -0
  182. tests/_internal/core/backends/datacrunch/__init__.py +0 -0
  183. tests/_internal/core/backends/datacrunch/test_configurator.py +17 -0
  184. tests/_internal/core/backends/gcp/test_configurator.py +42 -0
  185. tests/_internal/core/backends/kubernetes/test_configurator.py +43 -0
  186. tests/_internal/core/backends/lambdalabs/__init__.py +0 -0
  187. tests/_internal/core/backends/lambdalabs/test_configurator.py +38 -0
  188. tests/_internal/core/backends/oci/test_configurator.py +55 -0
  189. tests/_internal/core/backends/runpod/__init__.py +0 -0
  190. tests/_internal/core/backends/runpod/test_configurator.py +33 -0
  191. tests/_internal/core/backends/tensordock/__init__.py +0 -0
  192. tests/_internal/core/backends/tensordock/test_configurator.py +38 -0
  193. tests/_internal/core/backends/vastai/__init__.py +0 -0
  194. tests/_internal/core/backends/vastai/test_configurator.py +33 -0
  195. tests/_internal/core/backends/vultr/__init__.py +0 -0
  196. tests/_internal/core/backends/vultr/test_configurator.py +33 -0
  197. tests/_internal/server/background/tasks/test_process_gateways.py +4 -0
  198. tests/_internal/server/background/tasks/test_process_instances.py +49 -48
  199. tests/_internal/server/background/tasks/test_process_metrics.py +0 -3
  200. tests/_internal/server/background/tasks/test_process_placement_groups.py +2 -0
  201. tests/_internal/server/background/tasks/test_process_prometheus_metrics.py +186 -0
  202. tests/_internal/server/background/tasks/test_process_running_jobs.py +123 -19
  203. tests/_internal/server/background/tasks/test_process_runs.py +8 -22
  204. tests/_internal/server/background/tasks/test_process_submitted_jobs.py +3 -40
  205. tests/_internal/server/background/tasks/test_process_submitted_volumes.py +2 -0
  206. tests/_internal/server/background/tasks/test_process_terminating_jobs.py +10 -15
  207. tests/_internal/server/routers/test_backends.py +6 -764
  208. tests/_internal/server/routers/test_fleets.py +2 -26
  209. tests/_internal/server/routers/test_gateways.py +27 -3
  210. tests/_internal/server/routers/test_instances.py +0 -10
  211. tests/_internal/server/routers/test_metrics.py +42 -0
  212. tests/_internal/server/routers/test_projects.py +56 -0
  213. tests/_internal/server/routers/test_prometheus.py +333 -0
  214. tests/_internal/server/routers/test_repos.py +0 -15
  215. tests/_internal/server/routers/test_runs.py +83 -275
  216. tests/_internal/server/routers/test_volumes.py +2 -3
  217. tests/_internal/server/services/backends/__init__.py +0 -0
  218. tests/_internal/server/services/jobs/configurators/test_task.py +35 -0
  219. tests/_internal/server/services/test_config.py +7 -4
  220. tests/_internal/server/services/test_fleets.py +1 -4
  221. tests/_internal/server/services/{test_pools.py → test_instances.py} +11 -49
  222. tests/_internal/server/services/test_metrics.py +167 -0
  223. tests/_internal/server/services/test_repos.py +1 -14
  224. tests/_internal/server/services/test_runs.py +0 -4
  225. dstack/_internal/cli/commands/pool.py +0 -581
  226. dstack/_internal/cli/commands/run.py +0 -75
  227. dstack/_internal/core/backends/aws/config.py +0 -18
  228. dstack/_internal/core/backends/azure/config.py +0 -12
  229. dstack/_internal/core/backends/base/config.py +0 -5
  230. dstack/_internal/core/backends/cudo/config.py +0 -9
  231. dstack/_internal/core/backends/datacrunch/config.py +0 -9
  232. dstack/_internal/core/backends/gcp/config.py +0 -22
  233. dstack/_internal/core/backends/kubernetes/config.py +0 -6
  234. dstack/_internal/core/backends/lambdalabs/config.py +0 -9
  235. dstack/_internal/core/backends/nebius/__init__.py +0 -15
  236. dstack/_internal/core/backends/nebius/api_client.py +0 -319
  237. dstack/_internal/core/backends/nebius/compute.py +0 -220
  238. dstack/_internal/core/backends/nebius/config.py +0 -6
  239. dstack/_internal/core/backends/nebius/types.py +0 -37
  240. dstack/_internal/core/backends/oci/config.py +0 -6
  241. dstack/_internal/core/backends/runpod/config.py +0 -9
  242. dstack/_internal/core/backends/tensordock/config.py +0 -9
  243. dstack/_internal/core/backends/vastai/config.py +0 -6
  244. dstack/_internal/core/backends/vultr/config.py +0 -9
  245. dstack/_internal/core/models/backends/aws.py +0 -86
  246. dstack/_internal/core/models/backends/azure.py +0 -68
  247. dstack/_internal/core/models/backends/cudo.py +0 -43
  248. dstack/_internal/core/models/backends/datacrunch.py +0 -44
  249. dstack/_internal/core/models/backends/gcp.py +0 -67
  250. dstack/_internal/core/models/backends/kubernetes.py +0 -40
  251. dstack/_internal/core/models/backends/lambdalabs.py +0 -43
  252. dstack/_internal/core/models/backends/nebius.py +0 -54
  253. dstack/_internal/core/models/backends/runpod.py +0 -40
  254. dstack/_internal/core/models/backends/tensordock.py +0 -44
  255. dstack/_internal/core/models/backends/vastai.py +0 -43
  256. dstack/_internal/core/models/backends/vultr.py +0 -40
  257. dstack/_internal/core/models/pools.py +0 -43
  258. dstack/_internal/server/routers/pools.py +0 -142
  259. dstack/_internal/server/schemas/pools.py +0 -38
  260. dstack/_internal/server/services/backends/configurators/base.py +0 -72
  261. dstack/_internal/server/services/backends/configurators/cudo.py +0 -87
  262. dstack/_internal/server/services/backends/configurators/datacrunch.py +0 -79
  263. dstack/_internal/server/services/backends/configurators/kubernetes.py +0 -63
  264. dstack/_internal/server/services/backends/configurators/lambdalabs.py +0 -98
  265. dstack/_internal/server/services/backends/configurators/nebius.py +0 -85
  266. dstack/_internal/server/services/backends/configurators/runpod.py +0 -97
  267. dstack/_internal/server/services/backends/configurators/tensordock.py +0 -82
  268. dstack/_internal/server/services/backends/configurators/vastai.py +0 -80
  269. dstack/_internal/server/services/backends/configurators/vultr.py +0 -80
  270. dstack/api/_public/pools.py +0 -41
  271. dstack/api/_public/resources.py +0 -105
  272. dstack/api/server/_pools.py +0 -63
  273. tests/_internal/server/routers/test_pools.py +0 -612
  274. /dstack/_internal/{server/services/backends/configurators → core/backends/dstack}/__init__.py +0 -0
  275. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/LICENSE.md +0 -0
  276. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/WHEEL +0 -0
  277. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/entry_points.txt +0 -0
  278. {dstack-0.18.43.dist-info → dstack-0.19.0.dist-info}/top_level.txt +0 -0
@@ -1,37 +1,30 @@
1
1
  import json
2
- from typing import List
3
2
 
4
3
  import google.cloud.compute_v1 as compute_v1
5
- from google.auth.credentials import Credentials
6
4
 
7
- from dstack._internal.core.backends.gcp import GCPBackend, auth, resources
8
- from dstack._internal.core.backends.gcp.config import GCPConfig
9
- from dstack._internal.core.errors import BackendAuthError, BackendError, ServerClientError
10
- from dstack._internal.core.models.backends.base import (
11
- BackendType,
12
- ConfigElement,
13
- ConfigElementValue,
14
- ConfigMultiElement,
5
+ from dstack._internal.core.backends.base.configurator import (
6
+ TAGS_MAX_NUM,
7
+ BackendRecord,
8
+ Configurator,
9
+ raise_invalid_credentials_error,
15
10
  )
16
- from dstack._internal.core.models.backends.gcp import (
17
- AnyGCPConfigInfo,
18
- GCPConfigInfo,
19
- GCPConfigInfoWithCreds,
20
- GCPConfigInfoWithCredsPartial,
21
- GCPConfigValues,
11
+ from dstack._internal.core.backends.gcp import auth, resources
12
+ from dstack._internal.core.backends.gcp.backend import GCPBackend
13
+ from dstack._internal.core.backends.gcp.models import (
14
+ AnyGCPBackendConfig,
15
+ GCPBackendConfig,
16
+ GCPBackendConfigWithCreds,
17
+ GCPConfig,
22
18
  GCPCreds,
23
19
  GCPDefaultCreds,
24
20
  GCPServiceAccountCreds,
25
21
  GCPStoredConfig,
26
22
  )
27
- from dstack._internal.core.models.common import is_core_model_instance
28
- from dstack._internal.server import settings
29
- from dstack._internal.server.models import BackendModel, DecryptedString, ProjectModel
30
- from dstack._internal.server.services.backends.configurators.base import (
31
- TAGS_MAX_NUM,
32
- Configurator,
33
- raise_invalid_credentials_error,
23
+ from dstack._internal.core.errors import BackendAuthError, BackendError, ServerClientError
24
+ from dstack._internal.core.models.backends.base import (
25
+ BackendType,
34
26
  )
27
+ from dstack._internal.core.models.common import is_core_model_instance
35
28
 
36
29
  LOCATIONS = [
37
30
  {
@@ -118,34 +111,11 @@ MAIN_REGION = "us-east1"
118
111
 
119
112
 
120
113
  class GCPConfigurator(Configurator):
121
- TYPE: BackendType = BackendType.GCP
122
-
123
- def get_default_configs(self) -> List[GCPConfigInfoWithCreds]:
124
- if not auth.default_creds_available():
125
- return []
126
- try:
127
- _, project_id = auth.authenticate(GCPDefaultCreds())
128
- except BackendAuthError:
129
- return []
130
- return [
131
- GCPConfigInfoWithCreds(
132
- project_id=project_id,
133
- regions=DEFAULT_REGIONS,
134
- creds=GCPDefaultCreds(),
135
- )
136
- ]
114
+ TYPE = BackendType.GCP
115
+ BACKEND_CLASS = GCPBackend
137
116
 
138
- def get_config_values(self, config: GCPConfigInfoWithCredsPartial) -> GCPConfigValues:
139
- config_values = GCPConfigValues(project_id=None, regions=None)
140
- config_values.default_creds = (
141
- settings.DEFAULT_CREDS_ENABLED and auth.default_creds_available()
142
- )
143
- if config.creds is None:
144
- return config_values
145
- if (
146
- is_core_model_instance(config.creds, GCPDefaultCreds)
147
- and not settings.DEFAULT_CREDS_ENABLED
148
- ):
117
+ def validate_config(self, config: GCPBackendConfigWithCreds, default_creds_enabled: bool):
118
+ if is_core_model_instance(config.creds, GCPDefaultCreds) and not default_creds_enabled:
149
119
  raise_invalid_credentials_error(fields=[["creds"]])
150
120
  try:
151
121
  credentials, _ = auth.authenticate(creds=config.creds, project_id=config.project_id)
@@ -157,73 +127,46 @@ class GCPConfigurator(Configurator):
157
127
  raise_invalid_credentials_error(fields=[["creds", "data"]], details=details)
158
128
  else:
159
129
  raise_invalid_credentials_error(fields=[["creds"]], details=details)
160
- config_values.regions = self._get_regions_element(
161
- selected=config.regions or DEFAULT_REGIONS
130
+ subnetworks_client = compute_v1.SubnetworksClient(credentials=credentials)
131
+ routers_client = compute_v1.RoutersClient(credentials=credentials)
132
+ self._check_config_tags(config)
133
+ self._check_config_vpc(
134
+ subnetworks_client=subnetworks_client,
135
+ routers_client=routers_client,
136
+ config=config,
162
137
  )
163
- if config.project_id is None:
164
- return config_values
165
- config_values.project_id = self._get_project_id_element(selected=config.project_id)
166
- self._check_config(config=config, credentials=credentials)
167
- return config_values
168
138
 
169
139
  def create_backend(
170
- self, project: ProjectModel, config: GCPConfigInfoWithCreds
171
- ) -> BackendModel:
140
+ self, project_name: str, config: GCPBackendConfigWithCreds
141
+ ) -> BackendRecord:
172
142
  if config.regions is None:
173
143
  config.regions = DEFAULT_REGIONS
174
- return BackendModel(
175
- project_id=project.id,
176
- type=self.TYPE.value,
144
+ return BackendRecord(
177
145
  config=GCPStoredConfig(
178
- **GCPConfigInfo.__response__.parse_obj(config).dict(),
146
+ **GCPBackendConfig.__response__.parse_obj(config).dict(),
179
147
  ).json(),
180
- auth=DecryptedString(plaintext=GCPCreds.parse_obj(config.creds).json()),
148
+ auth=GCPCreds.parse_obj(config.creds).json(),
181
149
  )
182
150
 
183
- def get_config_info(self, model: BackendModel, include_creds: bool) -> AnyGCPConfigInfo:
184
- config = self._get_backend_config(model)
151
+ def get_backend_config(
152
+ self, record: BackendRecord, include_creds: bool
153
+ ) -> AnyGCPBackendConfig:
154
+ config = self._get_config(record)
185
155
  if include_creds:
186
- return GCPConfigInfoWithCreds.__response__.parse_obj(config)
187
- return GCPConfigInfo.__response__.parse_obj(config)
156
+ return GCPBackendConfigWithCreds.__response__.parse_obj(config)
157
+ return GCPBackendConfig.__response__.parse_obj(config)
188
158
 
189
- def get_backend(self, model: BackendModel) -> GCPBackend:
190
- config = self._get_backend_config(model)
159
+ def get_backend(self, record: BackendRecord) -> GCPBackend:
160
+ config = self._get_config(record)
191
161
  return GCPBackend(config=config)
192
162
 
193
- def _get_backend_config(self, model: BackendModel) -> GCPConfig:
163
+ def _get_config(self, record: BackendRecord) -> GCPConfig:
194
164
  return GCPConfig.__response__(
195
- **json.loads(model.config),
196
- creds=GCPCreds.parse_raw(model.auth.get_plaintext_or_error()).__root__,
197
- )
198
-
199
- def _get_project_id_element(
200
- self,
201
- selected: str,
202
- ) -> ConfigElement:
203
- element = ConfigElement(selected=selected)
204
- element.values.append(ConfigElementValue(value=selected, label=selected))
205
- return element
206
-
207
- def _get_regions_element(
208
- self,
209
- selected: List[str],
210
- ) -> ConfigMultiElement:
211
- element = ConfigMultiElement(selected=selected)
212
- for region_name in REGIONS:
213
- element.values.append(ConfigElementValue(value=region_name, label=region_name))
214
- return element
215
-
216
- def _check_config(self, config: GCPConfigInfoWithCredsPartial, credentials: Credentials):
217
- subnetworks_client = compute_v1.SubnetworksClient(credentials=credentials)
218
- routers_client = compute_v1.RoutersClient(credentials=credentials)
219
- self._check_tags_config(config)
220
- self._check_vpc_config(
221
- subnetworks_client=subnetworks_client,
222
- routers_client=routers_client,
223
- config=config,
165
+ **json.loads(record.config),
166
+ creds=GCPCreds.parse_raw(record.auth).__root__,
224
167
  )
225
168
 
226
- def _check_tags_config(self, config: GCPConfigInfoWithCredsPartial):
169
+ def _check_config_tags(self, config: GCPBackendConfigWithCreds):
227
170
  if not config.tags:
228
171
  return
229
172
  if len(config.tags) > TAGS_MAX_NUM:
@@ -235,9 +178,9 @@ class GCPConfigurator(Configurator):
235
178
  except BackendError as e:
236
179
  raise ServerClientError(e.args[0])
237
180
 
238
- def _check_vpc_config(
181
+ def _check_config_vpc(
239
182
  self,
240
- config: GCPConfigInfoWithCredsPartial,
183
+ config: GCPBackendConfigWithCreds,
241
184
  subnetworks_client: compute_v1.SubnetworksClient,
242
185
  routers_client: compute_v1.RoutersClient,
243
186
  ):
@@ -0,0 +1,125 @@
1
+ from typing import Annotated, Dict, List, Literal, Optional, Union
2
+
3
+ from pydantic import Field, root_validator
4
+
5
+ from dstack._internal.core.backends.base.models import fill_data
6
+ from dstack._internal.core.models.common import CoreModel
7
+
8
+
9
+ class GCPServiceAccountCreds(CoreModel):
10
+ type: Annotated[Literal["service_account"], Field(description="The type of credentials")] = (
11
+ "service_account"
12
+ )
13
+ filename: Annotated[
14
+ Optional[str], Field(description="The path to the service account file")
15
+ ] = ""
16
+ data: Annotated[str, Field(description="The contents of the service account file")]
17
+
18
+
19
+ class GCPDefaultCreds(CoreModel):
20
+ type: Literal["default"] = "default"
21
+
22
+
23
+ AnyGCPCreds = Union[GCPServiceAccountCreds, GCPDefaultCreds]
24
+
25
+
26
+ class GCPCreds(CoreModel):
27
+ __root__: AnyGCPCreds = Field(..., discriminator="type")
28
+
29
+
30
+ class GCPBackendConfig(CoreModel):
31
+ type: Annotated[Literal["gcp"], Field(description="The type of backend")] = "gcp"
32
+ project_id: Annotated[str, Field(description="The project ID")]
33
+ regions: Annotated[
34
+ Optional[List[str]], Field(description="The list of GCP regions. Omit to use all regions")
35
+ ] = None
36
+ vpc_name: Annotated[Optional[str], Field(description="The name of a custom VPC")] = None
37
+ vpc_project_id: Annotated[
38
+ Optional[str],
39
+ Field(description="The shared VPC hosted project ID. Required for shared VPC only"),
40
+ ] = None
41
+ public_ips: Annotated[
42
+ Optional[bool],
43
+ Field(
44
+ description="A flag to enable/disable public IP assigning on instances. Defaults to `true`"
45
+ ),
46
+ ] = None
47
+ nat_check: Annotated[
48
+ Optional[bool],
49
+ Field(
50
+ description=(
51
+ "A flag to enable/disable a check that Cloud NAT is configured for the VPC."
52
+ " This should be set to `false` when `public_ips: false` and outbound internet connectivity"
53
+ " is provided by a mechanism other than Cloud NAT such as a third-party NAT appliance."
54
+ " Defaults to `true`"
55
+ )
56
+ ),
57
+ ] = None
58
+ vm_service_account: Annotated[
59
+ Optional[str], Field(description="The service account to associate with provisioned VMs")
60
+ ] = None
61
+ tags: Annotated[
62
+ Optional[Dict[str, str]],
63
+ Field(
64
+ description="The tags (labels) that will be assigned to resources created by `dstack`"
65
+ ),
66
+ ] = None
67
+
68
+
69
+ class GCPBackendConfigWithCreds(GCPBackendConfig):
70
+ creds: AnyGCPCreds = Field(..., description="The credentials", discriminator="type")
71
+
72
+
73
+ class GCPServiceAccountFileCreds(CoreModel):
74
+ type: Annotated[Literal["service_account"], Field(description="The type of credentials")] = (
75
+ "service_account"
76
+ )
77
+ filename: Annotated[str, Field(description="The path to the service account file")]
78
+ data: Annotated[
79
+ Optional[str],
80
+ Field(
81
+ description=(
82
+ "The contents of the service account file."
83
+ " When configuring via `server/config.yml`, it's automatically filled from `filename`."
84
+ " When configuring via UI, it has to be specified explicitly"
85
+ )
86
+ ),
87
+ ] = None
88
+
89
+ @root_validator
90
+ def fill_data(cls, values):
91
+ return fill_data(values)
92
+
93
+
94
+ AnyGCPFileCreds = Union[GCPServiceAccountFileCreds, GCPDefaultCreds]
95
+
96
+
97
+ class GCPBackendFileConfigWithCreds(GCPBackendConfig):
98
+ creds: AnyGCPFileCreds = Field(..., description="The credentials", discriminator="type")
99
+
100
+
101
+ AnyGCPBackendConfig = Union[GCPBackendConfig, GCPBackendConfigWithCreds]
102
+
103
+
104
+ class GCPStoredConfig(GCPBackendConfig):
105
+ pass
106
+
107
+
108
+ class GCPConfig(GCPStoredConfig):
109
+ creds: AnyGCPCreds
110
+
111
+ @property
112
+ def allocate_public_ips(self) -> bool:
113
+ if self.public_ips is not None:
114
+ return self.public_ips
115
+ return True
116
+
117
+ @property
118
+ def vpc_resource_name(self) -> str:
119
+ vpc_name = self.vpc_name
120
+ if vpc_name is None:
121
+ vpc_name = "default"
122
+ project_id = self.project_id
123
+ if self.vpc_project_id is not None:
124
+ project_id = self.vpc_project_id
125
+ return f"projects/{project_id}/global/networks/{vpc_name}"
@@ -1,15 +0,0 @@
1
- from dstack._internal.core.backends.base import Backend
2
- from dstack._internal.core.backends.kubernetes.compute import KubernetesCompute
3
- from dstack._internal.core.backends.kubernetes.config import KubernetesConfig
4
- from dstack._internal.core.models.backends.base import BackendType
5
-
6
-
7
- class KubernetesBackend(Backend):
8
- TYPE: BackendType = BackendType.KUBERNETES
9
-
10
- def __init__(self, config: KubernetesConfig):
11
- self.config = config
12
- self._compute = KubernetesCompute(self.config)
13
-
14
- def compute(self) -> KubernetesCompute:
15
- return self._compute
@@ -0,0 +1,16 @@
1
+ from dstack._internal.core.backends.base.backend import Backend
2
+ from dstack._internal.core.backends.kubernetes.compute import KubernetesCompute
3
+ from dstack._internal.core.backends.kubernetes.models import KubernetesConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class KubernetesBackend(Backend):
8
+ TYPE = BackendType.KUBERNETES
9
+ COMPUTE_CLASS = KubernetesCompute
10
+
11
+ def __init__(self, config: KubernetesConfig):
12
+ self.config = config
13
+ self._compute = KubernetesCompute(self.config)
14
+
15
+ def compute(self) -> KubernetesCompute:
16
+ return self._compute
@@ -9,13 +9,17 @@ from kubernetes import client
9
9
 
10
10
  from dstack._internal.core.backends.base.compute import (
11
11
  Compute,
12
+ ComputeWithGatewaySupport,
12
13
  generate_unique_gateway_instance_name,
13
14
  generate_unique_instance_name_for_job,
14
15
  get_docker_commands,
15
16
  get_dstack_gateway_commands,
16
17
  )
17
18
  from dstack._internal.core.backends.base.offers import match_requirements
18
- from dstack._internal.core.backends.kubernetes.config import KubernetesConfig
19
+ from dstack._internal.core.backends.kubernetes.models import (
20
+ KubernetesConfig,
21
+ KubernetesNetworkingConfig,
22
+ )
19
23
  from dstack._internal.core.backends.kubernetes.utils import (
20
24
  get_api_from_config_data,
21
25
  get_cluster_public_ip,
@@ -53,10 +57,17 @@ NVIDIA_GPU_NAME_TO_GPU_INFO = {gpu.name: gpu for gpu in KNOWN_NVIDIA_GPUS}
53
57
  NVIDIA_GPU_NAMES = NVIDIA_GPU_NAME_TO_GPU_INFO.keys()
54
58
 
55
59
 
56
- class KubernetesCompute(Compute):
60
+ class KubernetesCompute(
61
+ ComputeWithGatewaySupport,
62
+ Compute,
63
+ ):
57
64
  def __init__(self, config: KubernetesConfig):
58
65
  super().__init__()
59
- self.config = config
66
+ self.config = config.copy()
67
+ networking_config = self.config.networking
68
+ if networking_config is None:
69
+ networking_config = KubernetesNetworkingConfig()
70
+ self.networking_config = networking_config
60
71
  self.api = get_api_from_config_data(config.kubeconfig.data)
61
72
 
62
73
  def get_offers(
@@ -109,7 +120,7 @@ class KubernetesCompute(Compute):
109
120
  # as an ssh proxy jump to connect to all other services in Kubernetes.
110
121
  # Setup jump pod in a separate thread to avoid long-running run_job.
111
122
  # In case the thread fails, the job will be failed and resubmitted.
112
- jump_pod_hostname = self.config.networking.ssh_host
123
+ jump_pod_hostname = self.networking_config.ssh_host
113
124
  if jump_pod_hostname is None:
114
125
  jump_pod_hostname = get_cluster_public_ip(self.api)
115
126
  if jump_pod_hostname is None:
@@ -121,7 +132,7 @@ class KubernetesCompute(Compute):
121
132
  api=self.api,
122
133
  project_name=run.project_name,
123
134
  ssh_public_keys=[project_ssh_public_key.strip(), run.run_spec.ssh_key_pub.strip()],
124
- jump_pod_port=self.config.networking.ssh_port,
135
+ jump_pod_port=self.networking_config.ssh_port,
125
136
  )
126
137
  if not created:
127
138
  threading.Thread(
@@ -0,0 +1,55 @@
1
+ from dstack._internal.core.backends.base.configurator import (
2
+ BackendRecord,
3
+ Configurator,
4
+ raise_invalid_credentials_error,
5
+ )
6
+ from dstack._internal.core.backends.kubernetes import utils as kubernetes_utils
7
+ from dstack._internal.core.backends.kubernetes.backend import KubernetesBackend
8
+ from dstack._internal.core.backends.kubernetes.models import (
9
+ AnyKubernetesBackendConfig,
10
+ KubernetesBackendConfig,
11
+ KubernetesBackendConfigWithCreds,
12
+ KubernetesConfig,
13
+ KubernetesStoredConfig,
14
+ )
15
+ from dstack._internal.core.models.backends.base import BackendType
16
+ from dstack._internal.utils.logging import get_logger
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ class KubernetesConfigurator(Configurator):
22
+ TYPE = BackendType.KUBERNETES
23
+ BACKEND_CLASS = KubernetesBackend
24
+
25
+ def validate_config(
26
+ self, config: KubernetesBackendConfigWithCreds, default_creds_enabled: bool
27
+ ):
28
+ try:
29
+ api = kubernetes_utils.get_api_from_config_data(config.kubeconfig.data)
30
+ api.list_node()
31
+ except Exception as e:
32
+ logger.debug("Invalid kubeconfig: %s", str(e))
33
+ raise_invalid_credentials_error(fields=[["kubeconfig"]])
34
+
35
+ def create_backend(
36
+ self, project_name: str, config: KubernetesBackendConfigWithCreds
37
+ ) -> BackendRecord:
38
+ return BackendRecord(
39
+ config=KubernetesStoredConfig.__response__.parse_obj(config).json(),
40
+ auth="",
41
+ )
42
+
43
+ def get_backend_config(
44
+ self, record: BackendRecord, include_creds: bool
45
+ ) -> AnyKubernetesBackendConfig:
46
+ config = self._get_config(record)
47
+ if include_creds:
48
+ return KubernetesBackendConfigWithCreds.__response__.parse_obj(config)
49
+ return KubernetesBackendConfig.__response__.parse_obj(config)
50
+
51
+ def get_backend(self, record: BackendRecord) -> KubernetesBackend:
52
+ return KubernetesBackend(self._get_config(record))
53
+
54
+ def _get_config(self, record: BackendRecord) -> KubernetesConfig:
55
+ return KubernetesConfig.__response__.parse_raw(record.config)
@@ -0,0 +1,72 @@
1
+ from typing import Annotated, Literal, Optional, Union
2
+
3
+ from pydantic import Field, root_validator
4
+
5
+ from dstack._internal.core.backends.base.models import fill_data
6
+ from dstack._internal.core.models.common import CoreModel
7
+
8
+
9
+ class KubernetesNetworkingConfig(CoreModel):
10
+ ssh_host: Annotated[
11
+ Optional[str], Field(description="The external IP address of any node")
12
+ ] = None
13
+ ssh_port: Annotated[
14
+ Optional[int], Field(description="Any port accessible outside of the cluster")
15
+ ] = None
16
+
17
+
18
+ class KubeconfigConfig(CoreModel):
19
+ filename: Annotated[str, Field(description="The path to the kubeconfig file")] = ""
20
+ data: Annotated[str, Field(description="The contents of the kubeconfig file")]
21
+
22
+
23
+ class KubernetesBackendConfig(CoreModel):
24
+ type: Annotated[Literal["kubernetes"], Field(description="The type of backend")] = "kubernetes"
25
+ networking: Annotated[
26
+ Optional[KubernetesNetworkingConfig], Field(description="The networking configuration")
27
+ ] = None
28
+
29
+
30
+ class KubernetesBackendConfigWithCreds(CoreModel):
31
+ type: Annotated[Literal["kubernetes"], Field(description="The type of backend")] = "kubernetes"
32
+ networking: Annotated[
33
+ Optional[KubernetesNetworkingConfig], Field(description="The networking configuration")
34
+ ] = None
35
+ kubeconfig: Annotated[KubeconfigConfig, Field(description="The kubeconfig configuration")]
36
+
37
+
38
+ class KubeconfigFileConfig(CoreModel):
39
+ filename: Annotated[str, Field(description="The path to the kubeconfig file")]
40
+ data: Annotated[
41
+ Optional[str],
42
+ Field(
43
+ description=(
44
+ "The contents of the kubeconfig file."
45
+ " When configuring via `server/config.yml`, it's automatically filled from `filename`."
46
+ " When configuring via UI, it has to be specified explicitly"
47
+ )
48
+ ),
49
+ ] = None
50
+
51
+ @root_validator
52
+ def fill_data(cls, values):
53
+ return fill_data(values)
54
+
55
+
56
+ class KubernetesBackendFileConfigWithCreds(CoreModel):
57
+ type: Annotated[Literal["kubernetes"], Field(description="The type of backend")] = "kubernetes"
58
+ networking: Annotated[
59
+ Optional[KubernetesNetworkingConfig], Field(description="The networking configuration")
60
+ ] = None
61
+ kubeconfig: Annotated[KubeconfigFileConfig, Field(description="The kubeconfig configuration")]
62
+
63
+
64
+ AnyKubernetesBackendConfig = Union[KubernetesBackendConfig, KubernetesBackendConfigWithCreds]
65
+
66
+
67
+ class KubernetesStoredConfig(KubernetesBackendConfigWithCreds):
68
+ pass
69
+
70
+
71
+ class KubernetesConfig(KubernetesStoredConfig):
72
+ pass
@@ -1,16 +0,0 @@
1
- from dstack._internal.core.backends.base import Backend
2
- from dstack._internal.core.backends.lambdalabs.compute import LambdaCompute
3
- from dstack._internal.core.backends.lambdalabs.config import LambdaConfig
4
- from dstack._internal.core.models.backends.base import BackendType
5
-
6
-
7
- class LambdaBackend(Backend):
8
- TYPE: BackendType = BackendType.LAMBDA
9
-
10
- def __init__(self, config: LambdaConfig):
11
- self.config = config
12
- self._compute = LambdaCompute(self.config)
13
- # self._check_credentials()
14
-
15
- def compute(self) -> LambdaCompute:
16
- return self._compute
@@ -0,0 +1,17 @@
1
+ from dstack._internal.core.backends.base.backend import Backend
2
+ from dstack._internal.core.backends.lambdalabs.compute import LambdaCompute
3
+ from dstack._internal.core.backends.lambdalabs.models import LambdaConfig
4
+ from dstack._internal.core.models.backends.base import BackendType
5
+
6
+
7
+ class LambdaBackend(Backend):
8
+ TYPE = BackendType.LAMBDA
9
+ COMPUTE_CLASS = LambdaCompute
10
+
11
+ def __init__(self, config: LambdaConfig):
12
+ self.config = config
13
+ self._compute = LambdaCompute(self.config)
14
+ # self._check_credentials()
15
+
16
+ def compute(self) -> LambdaCompute:
17
+ return self._compute
@@ -6,28 +6,29 @@ from typing import Dict, List, Optional
6
6
 
7
7
  from dstack._internal.core.backends.base.compute import (
8
8
  Compute,
9
+ ComputeWithCreateInstanceSupport,
9
10
  generate_unique_instance_name,
10
- get_job_instance_name,
11
11
  get_shim_commands,
12
12
  )
13
13
  from dstack._internal.core.backends.base.offers import get_catalog_offers
14
14
  from dstack._internal.core.backends.lambdalabs.api_client import LambdaAPIClient
15
- from dstack._internal.core.backends.lambdalabs.config import LambdaConfig
15
+ from dstack._internal.core.backends.lambdalabs.models import LambdaConfig
16
16
  from dstack._internal.core.models.backends.base import BackendType
17
17
  from dstack._internal.core.models.instances import (
18
18
  InstanceAvailability,
19
19
  InstanceConfiguration,
20
20
  InstanceOffer,
21
21
  InstanceOfferWithAvailability,
22
- SSHKey,
23
22
  )
24
- from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
25
- from dstack._internal.core.models.volumes import Volume
23
+ from dstack._internal.core.models.runs import JobProvisioningData, Requirements
26
24
 
27
25
  MAX_INSTANCE_NAME_LEN = 60
28
26
 
29
27
 
30
- class LambdaCompute(Compute):
28
+ class LambdaCompute(
29
+ ComputeWithCreateInstanceSupport,
30
+ Compute,
31
+ ):
31
32
  def __init__(self, config: LambdaConfig):
32
33
  super().__init__()
33
34
  self.config = config
@@ -102,28 +103,6 @@ class LambdaCompute(Compute):
102
103
  )
103
104
  thread.start()
104
105
 
105
- def run_job(
106
- self,
107
- run: Run,
108
- job: Job,
109
- instance_offer: InstanceOfferWithAvailability,
110
- project_ssh_public_key: str,
111
- project_ssh_private_key: str,
112
- volumes: List[Volume],
113
- ) -> JobProvisioningData:
114
- instance_config = InstanceConfiguration(
115
- project_name=run.project_name,
116
- instance_name=get_job_instance_name(run, job), # TODO: generate name
117
- ssh_keys=[
118
- SSHKey(
119
- public=project_ssh_public_key.strip(), private=project_ssh_private_key.strip()
120
- ),
121
- SSHKey(public=run.run_spec.ssh_key_pub.strip()),
122
- ],
123
- user=run.user,
124
- )
125
- return self.create_instance(instance_offer, instance_config)
126
-
127
106
  def terminate_instance(
128
107
  self, instance_id: str, region: str, backend_data: Optional[str] = None
129
108
  ):