dstack 0.0.9__py3-none-any.whl → 0.20.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (879) hide show
  1. dstack/_internal/cli/commands/__init__.py +80 -0
  2. dstack/_internal/cli/commands/apply.py +100 -0
  3. dstack/_internal/cli/commands/attach.py +161 -0
  4. dstack/_internal/cli/commands/completion.py +22 -0
  5. dstack/_internal/cli/commands/delete.py +44 -0
  6. dstack/_internal/cli/commands/event.py +168 -0
  7. dstack/_internal/cli/commands/fleet.py +161 -0
  8. dstack/_internal/cli/commands/gateway.py +159 -0
  9. dstack/_internal/cli/commands/init.py +64 -0
  10. dstack/_internal/cli/commands/login.py +352 -0
  11. dstack/_internal/cli/commands/logs.py +62 -0
  12. dstack/_internal/cli/commands/metrics.py +153 -0
  13. dstack/_internal/cli/commands/offer.py +146 -0
  14. dstack/_internal/cli/commands/project.py +259 -0
  15. dstack/_internal/cli/commands/ps.py +81 -0
  16. dstack/_internal/cli/commands/run.py +69 -0
  17. dstack/_internal/cli/commands/secrets.py +92 -0
  18. dstack/_internal/cli/commands/server.py +96 -0
  19. dstack/_internal/cli/commands/stop.py +26 -0
  20. dstack/_internal/cli/commands/volume.py +117 -0
  21. dstack/_internal/cli/main.py +101 -0
  22. dstack/_internal/cli/models/gateways.py +16 -0
  23. dstack/_internal/cli/models/offers.py +47 -0
  24. dstack/_internal/cli/models/runs.py +16 -0
  25. dstack/_internal/cli/services/args.py +31 -0
  26. dstack/_internal/cli/services/completion.py +91 -0
  27. dstack/_internal/cli/services/configurators/__init__.py +86 -0
  28. dstack/_internal/cli/services/configurators/base.py +103 -0
  29. dstack/_internal/cli/services/configurators/fleet.py +475 -0
  30. dstack/_internal/cli/services/configurators/gateway.py +231 -0
  31. dstack/_internal/cli/services/configurators/run.py +882 -0
  32. dstack/_internal/cli/services/configurators/volume.py +222 -0
  33. dstack/_internal/cli/services/events.py +68 -0
  34. dstack/_internal/cli/services/profile.py +182 -0
  35. dstack/_internal/cli/services/repos.py +71 -0
  36. dstack/_internal/cli/services/resources.py +54 -0
  37. dstack/_internal/cli/utils/common.py +159 -0
  38. dstack/_internal/cli/utils/fleet.py +106 -0
  39. dstack/_internal/cli/utils/gateway.py +56 -0
  40. dstack/_internal/cli/utils/gpu.py +178 -0
  41. dstack/_internal/cli/utils/rich.py +156 -0
  42. dstack/_internal/cli/utils/run.py +517 -0
  43. dstack/_internal/cli/utils/secrets.py +25 -0
  44. dstack/_internal/cli/utils/updates.py +98 -0
  45. dstack/_internal/cli/utils/volume.py +58 -0
  46. dstack/_internal/compat.py +3 -0
  47. dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
  48. dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
  49. dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
  50. dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
  51. dstack/_internal/core/backends/aws/auth.py +30 -0
  52. dstack/_internal/core/backends/aws/backend.py +31 -0
  53. dstack/_internal/core/backends/aws/compute.py +1153 -0
  54. dstack/_internal/core/backends/aws/configurator.py +191 -0
  55. dstack/_internal/core/backends/aws/models.py +135 -0
  56. dstack/_internal/core/backends/aws/resources.py +700 -0
  57. dstack/_internal/core/backends/azure/auth.py +39 -0
  58. dstack/_internal/core/backends/azure/backend.py +21 -0
  59. dstack/_internal/core/backends/azure/compute.py +676 -0
  60. dstack/_internal/core/backends/azure/configurator.py +472 -0
  61. dstack/_internal/core/backends/azure/models.py +98 -0
  62. dstack/_internal/core/backends/azure/resources.py +116 -0
  63. dstack/_internal/core/backends/azure/utils.py +42 -0
  64. dstack/_internal/core/backends/base/backend.py +18 -0
  65. dstack/_internal/core/backends/base/compute.py +1101 -0
  66. dstack/_internal/core/backends/base/configurator.py +117 -0
  67. dstack/_internal/core/backends/base/models.py +24 -0
  68. dstack/_internal/core/backends/base/offers.py +232 -0
  69. dstack/_internal/core/backends/cloudrift/api_client.py +220 -0
  70. dstack/_internal/core/backends/cloudrift/backend.py +16 -0
  71. dstack/_internal/core/backends/cloudrift/compute.py +138 -0
  72. dstack/_internal/core/backends/cloudrift/configurator.py +72 -0
  73. dstack/_internal/core/backends/cloudrift/models.py +40 -0
  74. dstack/_internal/core/backends/configurators.py +181 -0
  75. dstack/_internal/core/backends/cudo/__init__.py +0 -0
  76. dstack/_internal/core/backends/cudo/api_client.py +111 -0
  77. dstack/_internal/core/backends/cudo/backend.py +16 -0
  78. dstack/_internal/core/backends/cudo/compute.py +174 -0
  79. dstack/_internal/core/backends/cudo/configurator.py +63 -0
  80. dstack/_internal/core/backends/cudo/models.py +37 -0
  81. dstack/_internal/core/backends/datacrunch/__init__.py +1 -0
  82. dstack/_internal/core/backends/datacrunch/backend.py +18 -0
  83. dstack/_internal/core/backends/datacrunch/compute.py +8 -0
  84. dstack/_internal/core/backends/datacrunch/configurator.py +17 -0
  85. dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
  86. dstack/_internal/core/backends/digitalocean/backend.py +16 -0
  87. dstack/_internal/core/backends/digitalocean/compute.py +5 -0
  88. dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
  89. dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
  90. dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
  91. dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
  92. dstack/_internal/core/backends/digitalocean_base/compute.py +174 -0
  93. dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
  94. dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
  95. dstack/_internal/core/backends/dstack/__init__.py +0 -0
  96. dstack/_internal/core/backends/dstack/models.py +26 -0
  97. dstack/_internal/core/backends/features.py +74 -0
  98. dstack/_internal/core/backends/gcp/__init__.py +0 -0
  99. dstack/_internal/core/backends/gcp/auth.py +57 -0
  100. dstack/_internal/core/backends/gcp/backend.py +17 -0
  101. dstack/_internal/core/backends/gcp/compute.py +1257 -0
  102. dstack/_internal/core/backends/gcp/configurator.py +206 -0
  103. dstack/_internal/core/backends/gcp/features/__init__.py +0 -0
  104. dstack/_internal/core/backends/gcp/features/tcpx.py +65 -0
  105. dstack/_internal/core/backends/gcp/models.py +160 -0
  106. dstack/_internal/core/backends/gcp/resources.py +585 -0
  107. dstack/_internal/core/backends/hotaisle/__init__.py +1 -0
  108. dstack/_internal/core/backends/hotaisle/api_client.py +101 -0
  109. dstack/_internal/core/backends/hotaisle/backend.py +16 -0
  110. dstack/_internal/core/backends/hotaisle/compute.py +188 -0
  111. dstack/_internal/core/backends/hotaisle/configurator.py +66 -0
  112. dstack/_internal/core/backends/hotaisle/models.py +45 -0
  113. dstack/_internal/core/backends/kubernetes/__init__.py +0 -0
  114. dstack/_internal/core/backends/kubernetes/backend.py +16 -0
  115. dstack/_internal/core/backends/kubernetes/compute.py +1077 -0
  116. dstack/_internal/core/backends/kubernetes/configurator.py +61 -0
  117. dstack/_internal/core/backends/kubernetes/models.py +71 -0
  118. dstack/_internal/core/backends/kubernetes/utils.py +81 -0
  119. dstack/_internal/core/backends/lambdalabs/__init__.py +0 -0
  120. dstack/_internal/core/backends/lambdalabs/api_client.py +87 -0
  121. dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
  122. dstack/_internal/core/backends/lambdalabs/compute.py +233 -0
  123. dstack/_internal/core/backends/lambdalabs/configurator.py +65 -0
  124. dstack/_internal/core/backends/lambdalabs/models.py +37 -0
  125. dstack/_internal/core/backends/local/__init__.py +0 -0
  126. dstack/_internal/core/backends/local/backend.py +14 -0
  127. dstack/_internal/core/backends/local/compute.py +130 -0
  128. dstack/_internal/core/backends/models.py +158 -0
  129. dstack/_internal/core/backends/nebius/__init__.py +0 -0
  130. dstack/_internal/core/backends/nebius/backend.py +16 -0
  131. dstack/_internal/core/backends/nebius/compute.py +401 -0
  132. dstack/_internal/core/backends/nebius/configurator.py +98 -0
  133. dstack/_internal/core/backends/nebius/models.py +185 -0
  134. dstack/_internal/core/backends/nebius/resources.py +433 -0
  135. dstack/_internal/core/backends/oci/__init__.py +0 -0
  136. dstack/_internal/core/backends/oci/auth.py +21 -0
  137. dstack/_internal/core/backends/oci/backend.py +16 -0
  138. dstack/_internal/core/backends/oci/compute.py +209 -0
  139. dstack/_internal/core/backends/oci/configurator.py +156 -0
  140. dstack/_internal/core/backends/oci/exceptions.py +15 -0
  141. dstack/_internal/core/backends/oci/models.py +87 -0
  142. dstack/_internal/core/backends/oci/region.py +86 -0
  143. dstack/_internal/core/backends/oci/resources.py +836 -0
  144. dstack/_internal/core/backends/runpod/__init__.py +0 -0
  145. dstack/_internal/core/backends/runpod/api_client.py +627 -0
  146. dstack/_internal/core/backends/runpod/backend.py +16 -0
  147. dstack/_internal/core/backends/runpod/compute.py +444 -0
  148. dstack/_internal/core/backends/runpod/configurator.py +63 -0
  149. dstack/_internal/core/backends/runpod/models.py +54 -0
  150. dstack/_internal/core/backends/template/__init__.py +0 -0
  151. dstack/_internal/core/backends/template/backend.py.jinja +16 -0
  152. dstack/_internal/core/backends/template/compute.py.jinja +95 -0
  153. dstack/_internal/core/backends/template/configurator.py.jinja +69 -0
  154. dstack/_internal/core/backends/template/models.py.jinja +62 -0
  155. dstack/_internal/core/backends/tensordock/models.py +40 -0
  156. dstack/_internal/core/backends/vastai/__init__.py +0 -0
  157. dstack/_internal/core/backends/vastai/api_client.py +143 -0
  158. dstack/_internal/core/backends/vastai/backend.py +16 -0
  159. dstack/_internal/core/backends/vastai/compute.py +141 -0
  160. dstack/_internal/core/backends/vastai/configurator.py +69 -0
  161. dstack/_internal/core/backends/vastai/models.py +37 -0
  162. dstack/_internal/core/backends/verda/__init__.py +0 -0
  163. dstack/_internal/core/backends/verda/backend.py +16 -0
  164. dstack/_internal/core/backends/verda/compute.py +266 -0
  165. dstack/_internal/core/backends/verda/configurator.py +73 -0
  166. dstack/_internal/core/backends/verda/models.py +38 -0
  167. dstack/_internal/core/backends/vultr/__init__.py +0 -0
  168. dstack/_internal/core/backends/vultr/api_client.py +116 -0
  169. dstack/_internal/core/backends/vultr/backend.py +16 -0
  170. dstack/_internal/core/backends/vultr/compute.py +167 -0
  171. dstack/_internal/core/backends/vultr/configurator.py +71 -0
  172. dstack/_internal/core/backends/vultr/models.py +34 -0
  173. dstack/_internal/core/compatibility/__init__.py +0 -0
  174. dstack/_internal/core/compatibility/events.py +13 -0
  175. dstack/_internal/core/compatibility/fleets.py +58 -0
  176. dstack/_internal/core/compatibility/gateways.py +39 -0
  177. dstack/_internal/core/compatibility/gpus.py +13 -0
  178. dstack/_internal/core/compatibility/logs.py +14 -0
  179. dstack/_internal/core/compatibility/runs.py +86 -0
  180. dstack/_internal/core/compatibility/volumes.py +37 -0
  181. dstack/_internal/core/consts.py +8 -0
  182. dstack/_internal/core/errors.py +160 -0
  183. dstack/_internal/core/models/__init__.py +0 -0
  184. dstack/_internal/core/models/auth.py +28 -0
  185. dstack/_internal/core/models/backends/__init__.py +0 -0
  186. dstack/_internal/core/models/backends/base.py +48 -0
  187. dstack/_internal/core/models/common.py +143 -0
  188. dstack/_internal/core/models/compute_groups.py +39 -0
  189. dstack/_internal/core/models/config.py +28 -0
  190. dstack/_internal/core/models/configurations.py +1123 -0
  191. dstack/_internal/core/models/envs.py +149 -0
  192. dstack/_internal/core/models/events.py +98 -0
  193. dstack/_internal/core/models/files.py +67 -0
  194. dstack/_internal/core/models/fleets.py +437 -0
  195. dstack/_internal/core/models/gateways.py +146 -0
  196. dstack/_internal/core/models/gpus.py +45 -0
  197. dstack/_internal/core/models/health.py +28 -0
  198. dstack/_internal/core/models/instances.py +346 -0
  199. dstack/_internal/core/models/logs.py +27 -0
  200. dstack/_internal/core/models/metrics.py +14 -0
  201. dstack/_internal/core/models/placement.py +27 -0
  202. dstack/_internal/core/models/profiles.py +431 -0
  203. dstack/_internal/core/models/projects.py +46 -0
  204. dstack/_internal/core/models/repos/__init__.py +34 -0
  205. dstack/_internal/core/models/repos/base.py +36 -0
  206. dstack/_internal/core/models/repos/local.py +96 -0
  207. dstack/_internal/core/models/repos/remote.py +341 -0
  208. dstack/_internal/core/models/repos/virtual.py +85 -0
  209. dstack/_internal/core/models/resources.py +424 -0
  210. dstack/_internal/core/models/routers.py +24 -0
  211. dstack/_internal/core/models/runs.py +618 -0
  212. dstack/_internal/core/models/secrets.py +16 -0
  213. dstack/_internal/core/models/server.py +7 -0
  214. dstack/_internal/core/models/services.py +76 -0
  215. dstack/_internal/core/models/unix.py +53 -0
  216. dstack/_internal/core/models/users.py +60 -0
  217. dstack/_internal/core/models/volumes.py +221 -0
  218. dstack/_internal/core/services/__init__.py +16 -0
  219. dstack/_internal/core/services/api_client.py +15 -0
  220. dstack/_internal/core/services/configs/__init__.py +116 -0
  221. dstack/_internal/core/services/diff.py +71 -0
  222. dstack/_internal/core/services/logs.py +58 -0
  223. dstack/_internal/core/services/profiles.py +46 -0
  224. dstack/_internal/core/services/repos.py +236 -0
  225. dstack/_internal/core/services/ssh/__init__.py +27 -0
  226. dstack/_internal/core/services/ssh/attach.py +241 -0
  227. dstack/_internal/core/services/ssh/client.py +113 -0
  228. dstack/_internal/core/services/ssh/key_manager.py +53 -0
  229. dstack/_internal/core/services/ssh/ports.py +89 -0
  230. dstack/_internal/core/services/ssh/tunnel.py +337 -0
  231. dstack/_internal/proxy/__init__.py +8 -0
  232. dstack/_internal/proxy/gateway/__init__.py +0 -0
  233. dstack/_internal/proxy/gateway/app.py +89 -0
  234. dstack/_internal/proxy/gateway/auth.py +26 -0
  235. dstack/_internal/proxy/gateway/const.py +7 -0
  236. dstack/_internal/proxy/gateway/deps.py +73 -0
  237. dstack/_internal/proxy/gateway/main.py +17 -0
  238. dstack/_internal/proxy/gateway/models.py +23 -0
  239. dstack/_internal/proxy/gateway/repo/__init__.py +0 -0
  240. dstack/_internal/proxy/gateway/repo/repo.py +121 -0
  241. dstack/_internal/proxy/gateway/repo/state_v1.py +164 -0
  242. dstack/_internal/proxy/gateway/resources/nginx/00-log-format.conf +11 -0
  243. dstack/_internal/proxy/gateway/resources/nginx/entrypoint.jinja2 +27 -0
  244. dstack/_internal/proxy/gateway/resources/nginx/router_workers.jinja2 +23 -0
  245. dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +105 -0
  246. dstack/_internal/proxy/gateway/routers/__init__.py +0 -0
  247. dstack/_internal/proxy/gateway/routers/auth.py +10 -0
  248. dstack/_internal/proxy/gateway/routers/config.py +28 -0
  249. dstack/_internal/proxy/gateway/routers/registry.py +124 -0
  250. dstack/_internal/proxy/gateway/routers/stats.py +18 -0
  251. dstack/_internal/proxy/gateway/schemas/__init__.py +0 -0
  252. dstack/_internal/proxy/gateway/schemas/common.py +5 -0
  253. dstack/_internal/proxy/gateway/schemas/config.py +9 -0
  254. dstack/_internal/proxy/gateway/schemas/registry.py +63 -0
  255. dstack/_internal/proxy/gateway/schemas/stats.py +15 -0
  256. dstack/_internal/proxy/gateway/services/__init__.py +0 -0
  257. dstack/_internal/proxy/gateway/services/model_routers/__init__.py +18 -0
  258. dstack/_internal/proxy/gateway/services/model_routers/base.py +91 -0
  259. dstack/_internal/proxy/gateway/services/model_routers/sglang.py +269 -0
  260. dstack/_internal/proxy/gateway/services/nginx.py +455 -0
  261. dstack/_internal/proxy/gateway/services/registry.py +426 -0
  262. dstack/_internal/proxy/gateway/services/server_client.py +95 -0
  263. dstack/_internal/proxy/gateway/services/stats.py +170 -0
  264. dstack/_internal/proxy/gateway/testing/__init__.py +0 -0
  265. dstack/_internal/proxy/gateway/testing/common.py +13 -0
  266. dstack/_internal/proxy/lib/__init__.py +0 -0
  267. dstack/_internal/proxy/lib/auth.py +7 -0
  268. dstack/_internal/proxy/lib/deps.py +106 -0
  269. dstack/_internal/proxy/lib/errors.py +14 -0
  270. dstack/_internal/proxy/lib/models.py +112 -0
  271. dstack/_internal/proxy/lib/repo.py +27 -0
  272. dstack/_internal/proxy/lib/routers/__init__.py +0 -0
  273. dstack/_internal/proxy/lib/routers/model_proxy.py +102 -0
  274. dstack/_internal/proxy/lib/schemas/__init__.py +0 -0
  275. dstack/_internal/proxy/lib/schemas/model_proxy.py +77 -0
  276. dstack/_internal/proxy/lib/services/__init__.py +0 -0
  277. dstack/_internal/proxy/lib/services/model_proxy/__init__.py +0 -0
  278. dstack/_internal/proxy/lib/services/model_proxy/clients/__init__.py +0 -0
  279. dstack/_internal/proxy/lib/services/model_proxy/clients/base.py +18 -0
  280. dstack/_internal/proxy/lib/services/model_proxy/clients/openai.py +67 -0
  281. dstack/_internal/proxy/lib/services/model_proxy/clients/tgi.py +208 -0
  282. dstack/_internal/proxy/lib/services/model_proxy/model_proxy.py +23 -0
  283. dstack/_internal/proxy/lib/services/service_connection.py +160 -0
  284. dstack/_internal/proxy/lib/testing/__init__.py +0 -0
  285. dstack/_internal/proxy/lib/testing/auth.py +11 -0
  286. dstack/_internal/proxy/lib/testing/common.py +51 -0
  287. dstack/_internal/server/__init__.py +0 -0
  288. dstack/_internal/server/alembic.ini +100 -0
  289. dstack/_internal/server/app.py +432 -0
  290. dstack/_internal/server/background/__init__.py +142 -0
  291. dstack/_internal/server/background/tasks/__init__.py +0 -0
  292. dstack/_internal/server/background/tasks/common.py +24 -0
  293. dstack/_internal/server/background/tasks/process_compute_groups.py +167 -0
  294. dstack/_internal/server/background/tasks/process_events.py +17 -0
  295. dstack/_internal/server/background/tasks/process_fleets.py +289 -0
  296. dstack/_internal/server/background/tasks/process_gateways.py +188 -0
  297. dstack/_internal/server/background/tasks/process_idle_volumes.py +145 -0
  298. dstack/_internal/server/background/tasks/process_instances.py +1186 -0
  299. dstack/_internal/server/background/tasks/process_metrics.py +172 -0
  300. dstack/_internal/server/background/tasks/process_placement_groups.py +104 -0
  301. dstack/_internal/server/background/tasks/process_probes.py +164 -0
  302. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +150 -0
  303. dstack/_internal/server/background/tasks/process_running_jobs.py +1238 -0
  304. dstack/_internal/server/background/tasks/process_runs.py +842 -0
  305. dstack/_internal/server/background/tasks/process_submitted_jobs.py +1106 -0
  306. dstack/_internal/server/background/tasks/process_terminating_jobs.py +108 -0
  307. dstack/_internal/server/background/tasks/process_volumes.py +129 -0
  308. dstack/_internal/server/compatibility/__init__.py +0 -0
  309. dstack/_internal/server/compatibility/common.py +20 -0
  310. dstack/_internal/server/compatibility/gpus.py +22 -0
  311. dstack/_internal/server/db.py +127 -0
  312. dstack/_internal/server/deps.py +19 -0
  313. dstack/_internal/server/main.py +4 -0
  314. dstack/_internal/server/migrations/__init__.py +0 -0
  315. dstack/_internal/server/migrations/env.py +112 -0
  316. dstack/_internal/server/migrations/script.py.mako +28 -0
  317. dstack/_internal/server/migrations/versions/006512f572b4_add_projects_original_name.py +38 -0
  318. dstack/_internal/server/migrations/versions/065588ec72b8_add_vultr_to_backendtype_enum.py +81 -0
  319. dstack/_internal/server/migrations/versions/06e977bc61c7_add_usermodel_deleted_and_original_name.py +45 -0
  320. dstack/_internal/server/migrations/versions/0e33559e16ed_update_instancestatus.py +64 -0
  321. dstack/_internal/server/migrations/versions/112753bc17dd_remove_nullable_fields.py +50 -0
  322. dstack/_internal/server/migrations/versions/1338b788b612_reverse_job_instance_relationship.py +71 -0
  323. dstack/_internal/server/migrations/versions/14f2cb002fc2_add_jobmodel_removed_flag.py +44 -0
  324. dstack/_internal/server/migrations/versions/1a48dfe44a40_rework_termination_handling.py +42 -0
  325. dstack/_internal/server/migrations/versions/1aa9638ad963_added_email_index.py +31 -0
  326. dstack/_internal/server/migrations/versions/1e3fb39ef74b_add_remote_connection_details.py +26 -0
  327. dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py +32 -0
  328. dstack/_internal/server/migrations/versions/20166748b60c_add_jobmodel_disconnected_at.py +100 -0
  329. dstack/_internal/server/migrations/versions/22d74df9897e_add_events_and_event_targets.py +99 -0
  330. dstack/_internal/server/migrations/versions/23e01c56279a_make_blob_nullable.py +32 -0
  331. dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
  332. dstack/_internal/server/migrations/versions/252d3743b641_.py +40 -0
  333. dstack/_internal/server/migrations/versions/25479f540245_add_probes.py +43 -0
  334. dstack/_internal/server/migrations/versions/27d3e55759fa_add_pools.py +152 -0
  335. dstack/_internal/server/migrations/versions/29826f417010_remove_instancemodel_retry_policy.py +34 -0
  336. dstack/_internal/server/migrations/versions/29c08c6a8cb3_.py +36 -0
  337. dstack/_internal/server/migrations/versions/35e90e1b0d3e_add_rolling_deployment_fields.py +42 -0
  338. dstack/_internal/server/migrations/versions/35f732ee4cf5_add_projectmodel_is_public.py +39 -0
  339. dstack/_internal/server/migrations/versions/3cf77fb8bcf1_store_repo_clone_url.py +85 -0
  340. dstack/_internal/server/migrations/versions/3d7f6c2ec000_add_jobmodel_registered.py +28 -0
  341. dstack/_internal/server/migrations/versions/3dbdce90d0e0_fix_code_uq_constraint.py +33 -0
  342. dstack/_internal/server/migrations/versions/48ad3ecbaea2_do_not_delete_projects_and_runs.py +46 -0
  343. dstack/_internal/server/migrations/versions/4ae1a5b0e7f1_add_run_list_index.py +34 -0
  344. dstack/_internal/server/migrations/versions/4b4319398164_introduce_runs_processing.py +144 -0
  345. dstack/_internal/server/migrations/versions/50dd7ea98639_index_status_columns.py +55 -0
  346. dstack/_internal/server/migrations/versions/51d45659d574_add_instancemodel_blocks_fields.py +43 -0
  347. dstack/_internal/server/migrations/versions/54a77e19c64c_add_manager_project_role.py +67 -0
  348. dstack/_internal/server/migrations/versions/555138b1f77f_change_instancemodel_for_asynchronous_.py +61 -0
  349. dstack/_internal/server/migrations/versions/58aa5162dcc3_add_gatewaymodel_configuration.py +32 -0
  350. dstack/_internal/server/migrations/versions/5ad8debc8fe6_fixes_for_psql.py +329 -0
  351. dstack/_internal/server/migrations/versions/5ec538b70e71_replace_instansestatus.py +31 -0
  352. dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
  353. dstack/_internal/server/migrations/versions/5fd659afca82_add_ix_instances_fleet_id.py +31 -0
  354. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  355. dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py +83 -0
  356. dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
  357. dstack/_internal/server/migrations/versions/686fb8341ea5_add_user_emails.py +32 -0
  358. dstack/_internal/server/migrations/versions/6c1a9d6530ee_add_jobmodel_exit_status.py +26 -0
  359. dstack/_internal/server/migrations/versions/706e0acc3a7d_add_runmodel_desired_replica_counts.py +26 -0
  360. dstack/_internal/server/migrations/versions/710e5b3fac8f_add_encryption.py +54 -0
  361. dstack/_internal/server/migrations/versions/728b1488b1b4_add_instance_health.py +50 -0
  362. dstack/_internal/server/migrations/versions/74a1f55209bd_store_enums_as_strings.py +484 -0
  363. dstack/_internal/server/migrations/versions/7b24b1c8eba7_add_instancemodel_last_processed_at.py +68 -0
  364. dstack/_internal/server/migrations/versions/7ba3b59d7ca6_add_runmodel_resubmission_attempt.py +35 -0
  365. dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
  366. dstack/_internal/server/migrations/versions/7d1ec2b920ac_add_computegroupmodel.py +91 -0
  367. dstack/_internal/server/migrations/versions/803c7e9ed85d_add_jobmodel_job_runtime_data.py +32 -0
  368. dstack/_internal/server/migrations/versions/82b32a135ea2_.py +58 -0
  369. dstack/_internal/server/migrations/versions/866ec1d67184_replace_retrypolicy_limit_with_.py +93 -0
  370. dstack/_internal/server/migrations/versions/903c91e24634_add_instances_termination_reason_message.py +34 -0
  371. dstack/_internal/server/migrations/versions/91a12fff6c76_add_repocredsmodel.py +43 -0
  372. dstack/_internal/server/migrations/versions/91ac5e543037_extend_repos_creds_column.py +36 -0
  373. dstack/_internal/server/migrations/versions/98cd9c8b5927_add_volumemodel.py +73 -0
  374. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  375. dstack/_internal/server/migrations/versions/99b4c8c954ea_add_termination_reason_message.py +71 -0
  376. dstack/_internal/server/migrations/versions/9eea6af28e10_added_fail_reason_for_instancemodel.py +36 -0
  377. dstack/_internal/server/migrations/versions/__init__.py +0 -0
  378. dstack/_internal/server/migrations/versions/a060e2440936_.py +206 -0
  379. dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
  380. dstack/_internal/server/migrations/versions/a7b46c073fa1_add_placementgroupmodel.py +58 -0
  381. dstack/_internal/server/migrations/versions/afbc600ff2b2_add_created_at_to_usermodel_and_.py +102 -0
  382. dstack/_internal/server/migrations/versions/b4d6ad60db08_add_instancemodel_unreachable.py +37 -0
  383. dstack/_internal/server/migrations/versions/b88d55c2a07d_replace_instancestatus_ready.py +21 -0
  384. dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
  385. dstack/_internal/server/migrations/versions/bca2fdf130bf_add_runmodel_priority.py +34 -0
  386. dstack/_internal/server/migrations/versions/bfba43f6def2_.py +32 -0
  387. dstack/_internal/server/migrations/versions/c00090eaef21_support_fleets.py +108 -0
  388. dstack/_internal/server/migrations/versions/c154eece89da_add_fields_for_async_gateway_creation.py +74 -0
  389. dstack/_internal/server/migrations/versions/c20626d03cfb_add_jobmetricspoint.py +43 -0
  390. dstack/_internal/server/migrations/versions/c48df7985d57_add_instance_termination_retries.py +38 -0
  391. dstack/_internal/server/migrations/versions/c83d45f9a971_replace_string_with_text.py +150 -0
  392. dstack/_internal/server/migrations/versions/d0bb68e48b9f_add_project_owners_and_quotas.py +106 -0
  393. dstack/_internal/server/migrations/versions/d3e8af4786fa_gateway_compute_flag_deleted.py +34 -0
  394. dstack/_internal/server/migrations/versions/d4d9dc26cf58_add_ix_jobs_run_id.py +31 -0
  395. dstack/_internal/server/migrations/versions/d5863798bf41_add_volumemodel_last_job_processed_at.py +40 -0
  396. dstack/_internal/server/migrations/versions/d6b11105f659_add_usermodel_active.py +36 -0
  397. dstack/_internal/server/migrations/versions/da574e93fee0_add_jobmodel_volumes_detached_at.py +40 -0
  398. dstack/_internal/server/migrations/versions/dfffd6a1165c_add_fields_for_gateways_behind_alb.py +36 -0
  399. dstack/_internal/server/migrations/versions/e2d08cd1b8d9_add_jobmodel_fleet.py +41 -0
  400. dstack/_internal/server/migrations/versions/e3b7db07727f_add_gatewaycomputemodel_app_updated_at.py +61 -0
  401. dstack/_internal/server/migrations/versions/e6391ca6a264_separate_gateways_from_compute.py +72 -0
  402. dstack/_internal/server/migrations/versions/ea60480f82bb_add_membermodel_member_num.py +32 -0
  403. dstack/_internal/server/migrations/versions/ec02a26a256c_add_runmodel_next_triggered_at.py +38 -0
  404. dstack/_internal/server/migrations/versions/ed0ca30e13bb_migrate_instancestatus_provisioning.py +29 -0
  405. dstack/_internal/server/migrations/versions/fe72c4de8376_add_gateways.py +81 -0
  406. dstack/_internal/server/migrations/versions/ff1d94f65b08_user_ssh_key.py +34 -0
  407. dstack/_internal/server/migrations/versions/ffa99edd1988_add_jobterminationreason_max_duration_.py +81 -0
  408. dstack/_internal/server/models.py +930 -0
  409. dstack/_internal/server/routers/__init__.py +0 -0
  410. dstack/_internal/server/routers/auth.py +34 -0
  411. dstack/_internal/server/routers/backends.py +142 -0
  412. dstack/_internal/server/routers/events.py +60 -0
  413. dstack/_internal/server/routers/files.py +68 -0
  414. dstack/_internal/server/routers/fleets.py +202 -0
  415. dstack/_internal/server/routers/gateways.py +109 -0
  416. dstack/_internal/server/routers/gpus.py +32 -0
  417. dstack/_internal/server/routers/instances.py +77 -0
  418. dstack/_internal/server/routers/logs.py +34 -0
  419. dstack/_internal/server/routers/metrics.py +82 -0
  420. dstack/_internal/server/routers/projects.py +205 -0
  421. dstack/_internal/server/routers/prometheus.py +35 -0
  422. dstack/_internal/server/routers/repos.py +118 -0
  423. dstack/_internal/server/routers/runs.py +216 -0
  424. dstack/_internal/server/routers/secrets.py +86 -0
  425. dstack/_internal/server/routers/server.py +19 -0
  426. dstack/_internal/server/routers/users.py +158 -0
  427. dstack/_internal/server/routers/volumes.py +122 -0
  428. dstack/_internal/server/schemas/__init__.py +0 -0
  429. dstack/_internal/server/schemas/auth.py +83 -0
  430. dstack/_internal/server/schemas/backends.py +16 -0
  431. dstack/_internal/server/schemas/common.py +9 -0
  432. dstack/_internal/server/schemas/events.py +211 -0
  433. dstack/_internal/server/schemas/files.py +5 -0
  434. dstack/_internal/server/schemas/fleets.py +49 -0
  435. dstack/_internal/server/schemas/gateways.py +31 -0
  436. dstack/_internal/server/schemas/gpus.py +26 -0
  437. dstack/_internal/server/schemas/health/__init__.py +0 -0
  438. dstack/_internal/server/schemas/health/dcgm.py +56 -0
  439. dstack/_internal/server/schemas/instances.py +47 -0
  440. dstack/_internal/server/schemas/logs.py +17 -0
  441. dstack/_internal/server/schemas/projects.py +81 -0
  442. dstack/_internal/server/schemas/repos.py +24 -0
  443. dstack/_internal/server/schemas/runner.py +269 -0
  444. dstack/_internal/server/schemas/runs.py +66 -0
  445. dstack/_internal/server/schemas/secrets.py +16 -0
  446. dstack/_internal/server/schemas/users.py +72 -0
  447. dstack/_internal/server/schemas/volumes.py +29 -0
  448. dstack/_internal/server/security/__init__.py +0 -0
  449. dstack/_internal/server/security/permissions.py +251 -0
  450. dstack/_internal/server/services/__init__.py +0 -0
  451. dstack/_internal/server/services/auth.py +77 -0
  452. dstack/_internal/server/services/backends/__init__.py +404 -0
  453. dstack/_internal/server/services/backends/handlers.py +105 -0
  454. dstack/_internal/server/services/compute_groups.py +22 -0
  455. dstack/_internal/server/services/config.py +279 -0
  456. dstack/_internal/server/services/docker.py +162 -0
  457. dstack/_internal/server/services/encryption/__init__.py +102 -0
  458. dstack/_internal/server/services/encryption/keys/__init__.py +0 -0
  459. dstack/_internal/server/services/encryption/keys/aes.py +68 -0
  460. dstack/_internal/server/services/encryption/keys/base.py +19 -0
  461. dstack/_internal/server/services/encryption/keys/identity.py +28 -0
  462. dstack/_internal/server/services/events.py +477 -0
  463. dstack/_internal/server/services/files.py +91 -0
  464. dstack/_internal/server/services/fleets.py +1224 -0
  465. dstack/_internal/server/services/gateways/__init__.py +686 -0
  466. dstack/_internal/server/services/gateways/client.py +209 -0
  467. dstack/_internal/server/services/gateways/connection.py +139 -0
  468. dstack/_internal/server/services/gateways/pool.py +58 -0
  469. dstack/_internal/server/services/gpus.py +387 -0
  470. dstack/_internal/server/services/instances.py +731 -0
  471. dstack/_internal/server/services/jobs/__init__.py +840 -0
  472. dstack/_internal/server/services/jobs/configurators/__init__.py +0 -0
  473. dstack/_internal/server/services/jobs/configurators/base.py +469 -0
  474. dstack/_internal/server/services/jobs/configurators/dev.py +69 -0
  475. dstack/_internal/server/services/jobs/configurators/extensions/__init__.py +0 -0
  476. dstack/_internal/server/services/jobs/configurators/extensions/base.py +15 -0
  477. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
  478. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +42 -0
  479. dstack/_internal/server/services/jobs/configurators/extensions/windsurf.py +43 -0
  480. dstack/_internal/server/services/jobs/configurators/service.py +28 -0
  481. dstack/_internal/server/services/jobs/configurators/task.py +39 -0
  482. dstack/_internal/server/services/locking.py +187 -0
  483. dstack/_internal/server/services/logging.py +29 -0
  484. dstack/_internal/server/services/logs/__init__.py +122 -0
  485. dstack/_internal/server/services/logs/aws.py +373 -0
  486. dstack/_internal/server/services/logs/base.py +47 -0
  487. dstack/_internal/server/services/logs/filelog.py +261 -0
  488. dstack/_internal/server/services/logs/fluentbit.py +329 -0
  489. dstack/_internal/server/services/logs/gcp.py +181 -0
  490. dstack/_internal/server/services/metrics.py +172 -0
  491. dstack/_internal/server/services/offers.py +249 -0
  492. dstack/_internal/server/services/permissions.py +37 -0
  493. dstack/_internal/server/services/placement.py +234 -0
  494. dstack/_internal/server/services/plugins.py +109 -0
  495. dstack/_internal/server/services/probes.py +10 -0
  496. dstack/_internal/server/services/projects.py +835 -0
  497. dstack/_internal/server/services/prometheus/__init__.py +0 -0
  498. dstack/_internal/server/services/prometheus/client_metrics.py +55 -0
  499. dstack/_internal/server/services/prometheus/custom_metrics.py +327 -0
  500. dstack/_internal/server/services/proxy/__init__.py +3 -0
  501. dstack/_internal/server/services/proxy/auth.py +12 -0
  502. dstack/_internal/server/services/proxy/deps.py +18 -0
  503. dstack/_internal/server/services/proxy/repo.py +189 -0
  504. dstack/_internal/server/services/proxy/routers/__init__.py +0 -0
  505. dstack/_internal/server/services/proxy/routers/service_proxy.py +49 -0
  506. dstack/_internal/server/services/proxy/services/__init__.py +0 -0
  507. dstack/_internal/server/services/proxy/services/service_proxy.py +135 -0
  508. dstack/_internal/server/services/repos.py +362 -0
  509. dstack/_internal/server/services/requirements/__init__.py +0 -0
  510. dstack/_internal/server/services/requirements/combine.py +260 -0
  511. dstack/_internal/server/services/resources.py +21 -0
  512. dstack/_internal/server/services/runner/__init__.py +0 -0
  513. dstack/_internal/server/services/runner/client.py +646 -0
  514. dstack/_internal/server/services/runner/ssh.py +128 -0
  515. dstack/_internal/server/services/runs/__init__.py +1026 -0
  516. dstack/_internal/server/services/runs/plan.py +703 -0
  517. dstack/_internal/server/services/runs/replicas.py +317 -0
  518. dstack/_internal/server/services/runs/spec.py +191 -0
  519. dstack/_internal/server/services/secrets.py +245 -0
  520. dstack/_internal/server/services/services/__init__.py +345 -0
  521. dstack/_internal/server/services/services/autoscalers.py +140 -0
  522. dstack/_internal/server/services/services/options.py +53 -0
  523. dstack/_internal/server/services/ssh.py +67 -0
  524. dstack/_internal/server/services/storage/__init__.py +37 -0
  525. dstack/_internal/server/services/storage/base.py +48 -0
  526. dstack/_internal/server/services/storage/gcs.py +66 -0
  527. dstack/_internal/server/services/storage/s3.py +69 -0
  528. dstack/_internal/server/services/users.py +461 -0
  529. dstack/_internal/server/services/volumes.py +496 -0
  530. dstack/_internal/server/settings.py +161 -0
  531. dstack/_internal/server/statics/00a6e1fb461ed2929fb9.png +0 -0
  532. dstack/_internal/server/statics/0cae4d9f0a36034984a7.png +0 -0
  533. dstack/_internal/server/statics/391de232cc0e30cae513.png +0 -0
  534. dstack/_internal/server/statics/4e0eead8c1a73689ef9d.svg +1 -0
  535. dstack/_internal/server/statics/544afa2f63428c2235b0.png +0 -0
  536. dstack/_internal/server/statics/54a4f50f74c6b9381530.svg +7 -0
  537. dstack/_internal/server/statics/68dd1360a7d2611e0132.svg +4 -0
  538. dstack/_internal/server/statics/69544b4c81973b54a66f.png +0 -0
  539. dstack/_internal/server/statics/77a8b02b17af19e39266.png +0 -0
  540. dstack/_internal/server/statics/83a93a8871c219104367.svg +9 -0
  541. dstack/_internal/server/statics/8f28bb8e9999e5e6a48b.svg +4 -0
  542. dstack/_internal/server/statics/9124086961ab8c366bc4.svg +9 -0
  543. dstack/_internal/server/statics/9a9ebaeb54b025dbac0a.svg +5 -0
  544. dstack/_internal/server/statics/a3428392dc534f3b15c4.svg +7 -0
  545. dstack/_internal/server/statics/ae22625574d69361f72c.png +0 -0
  546. dstack/_internal/server/statics/assets/android-chrome-144x144.png +0 -0
  547. dstack/_internal/server/statics/assets/android-chrome-192x192.png +0 -0
  548. dstack/_internal/server/statics/assets/android-chrome-256x256.png +0 -0
  549. dstack/_internal/server/statics/assets/android-chrome-36x36.png +0 -0
  550. dstack/_internal/server/statics/assets/android-chrome-384x384.png +0 -0
  551. dstack/_internal/server/statics/assets/android-chrome-48x48.png +0 -0
  552. dstack/_internal/server/statics/assets/android-chrome-512x512.png +0 -0
  553. dstack/_internal/server/statics/assets/android-chrome-72x72.png +0 -0
  554. dstack/_internal/server/statics/assets/android-chrome-96x96.png +0 -0
  555. dstack/_internal/server/statics/assets/apple-touch-icon-1024x1024.png +0 -0
  556. dstack/_internal/server/statics/assets/apple-touch-icon-114x114.png +0 -0
  557. dstack/_internal/server/statics/assets/apple-touch-icon-120x120.png +0 -0
  558. dstack/_internal/server/statics/assets/apple-touch-icon-144x144.png +0 -0
  559. dstack/_internal/server/statics/assets/apple-touch-icon-152x152.png +0 -0
  560. dstack/_internal/server/statics/assets/apple-touch-icon-167x167.png +0 -0
  561. dstack/_internal/server/statics/assets/apple-touch-icon-180x180.png +0 -0
  562. dstack/_internal/server/statics/assets/apple-touch-icon-57x57.png +0 -0
  563. dstack/_internal/server/statics/assets/apple-touch-icon-60x60.png +0 -0
  564. dstack/_internal/server/statics/assets/apple-touch-icon-72x72.png +0 -0
  565. dstack/_internal/server/statics/assets/apple-touch-icon-76x76.png +0 -0
  566. dstack/_internal/server/statics/assets/apple-touch-icon-precomposed.png +0 -0
  567. dstack/_internal/server/statics/assets/apple-touch-icon.png +0 -0
  568. dstack/_internal/server/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
  569. dstack/_internal/server/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
  570. dstack/_internal/server/statics/assets/apple-touch-startup-image-1170x2532.png +0 -0
  571. dstack/_internal/server/statics/assets/apple-touch-startup-image-1179x2556.png +0 -0
  572. dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
  573. dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
  574. dstack/_internal/server/statics/assets/apple-touch-startup-image-1284x2778.png +0 -0
  575. dstack/_internal/server/statics/assets/apple-touch-startup-image-1290x2796.png +0 -0
  576. dstack/_internal/server/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
  577. dstack/_internal/server/statics/assets/apple-touch-startup-image-1488x2266.png +0 -0
  578. dstack/_internal/server/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
  579. dstack/_internal/server/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
  580. dstack/_internal/server/statics/assets/apple-touch-startup-image-1640x2160.png +0 -0
  581. dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
  582. dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
  583. dstack/_internal/server/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
  584. dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
  585. dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
  586. dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
  587. dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1640.png +0 -0
  588. dstack/_internal/server/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
  589. dstack/_internal/server/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
  590. dstack/_internal/server/statics/assets/apple-touch-startup-image-2266x1488.png +0 -0
  591. dstack/_internal/server/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
  592. dstack/_internal/server/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
  593. dstack/_internal/server/statics/assets/apple-touch-startup-image-2532x1170.png +0 -0
  594. dstack/_internal/server/statics/assets/apple-touch-startup-image-2556x1179.png +0 -0
  595. dstack/_internal/server/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
  596. dstack/_internal/server/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
  597. dstack/_internal/server/statics/assets/apple-touch-startup-image-2778x1284.png +0 -0
  598. dstack/_internal/server/statics/assets/apple-touch-startup-image-2796x1290.png +0 -0
  599. dstack/_internal/server/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
  600. dstack/_internal/server/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
  601. dstack/_internal/server/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
  602. dstack/_internal/server/statics/assets/browserconfig.xml +12 -0
  603. dstack/_internal/server/statics/assets/favicon-16x16.png +0 -0
  604. dstack/_internal/server/statics/assets/favicon-32x32.png +0 -0
  605. dstack/_internal/server/statics/assets/favicon-48x48.png +0 -0
  606. dstack/_internal/server/statics/assets/favicon.ico +0 -0
  607. dstack/{dashboard/statics/assets/manifest.json → _internal/server/statics/assets/manifest.webmanifest} +18 -9
  608. dstack/_internal/server/statics/assets/mstile-144x144.png +0 -0
  609. dstack/_internal/server/statics/assets/mstile-150x150.png +0 -0
  610. dstack/_internal/server/statics/assets/mstile-310x150.png +0 -0
  611. dstack/_internal/server/statics/assets/mstile-310x310.png +0 -0
  612. dstack/_internal/server/statics/assets/mstile-70x70.png +0 -0
  613. dstack/_internal/server/statics/assets/yandex-browser-50x50.png +0 -0
  614. dstack/_internal/server/statics/b7ae68f44193474fc578.png +0 -0
  615. dstack/_internal/server/statics/d2f008c75b2b5b191f3f.png +0 -0
  616. dstack/_internal/server/statics/d44c33e1b92e05c379fd.png +0 -0
  617. dstack/_internal/server/statics/dd43ff0552815179d7ab.png +0 -0
  618. dstack/_internal/server/statics/dd4e7166c0b9aac197d7.png +0 -0
  619. dstack/_internal/server/statics/e30b27916930d43d2271.png +0 -0
  620. dstack/_internal/server/statics/e467d7d60aae81ab198b.svg +6 -0
  621. dstack/_internal/server/statics/eb9b344b73818fe2b71a.png +0 -0
  622. dstack/_internal/server/statics/f517dd626eb964120de0.png +0 -0
  623. dstack/_internal/server/statics/f958aecddee5d8e3222c.png +0 -0
  624. dstack/_internal/server/statics/index.html +3 -0
  625. dstack/_internal/server/statics/logo-notext.svg +116 -0
  626. dstack/_internal/server/statics/main-2e6967bad9f29395eea6.css +3 -0
  627. dstack/_internal/server/statics/main-7dc0f6d20b8b41659acc.js +155547 -0
  628. dstack/_internal/server/statics/main-7dc0f6d20b8b41659acc.js.map +1 -0
  629. dstack/{dashboard → _internal/server}/statics/manifest.json +2 -2
  630. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  631. dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
  632. dstack/{dashboard/statics/static/media/logo.f9d7170678f68f796e270698633770ec.svg → _internal/server/statics/static/media/logo.f602feeb138844eda97c8cb641461448.svg} +8 -6
  633. dstack/_internal/server/statics/static/media/okta.12f178e6873a1100965f2a4dbd18fcec.svg +2 -0
  634. dstack/_internal/server/statics/static/media/theme.3994c817bb7dda191c1c9640dee0bf42.svg +3 -0
  635. dstack/_internal/server/testing/__init__.py +0 -0
  636. dstack/_internal/server/testing/common.py +1220 -0
  637. dstack/_internal/server/testing/conf.py +53 -0
  638. dstack/_internal/server/testing/matchers.py +31 -0
  639. dstack/_internal/server/utils/__init__.py +0 -0
  640. dstack/_internal/server/utils/common.py +55 -0
  641. dstack/_internal/server/utils/logging.py +51 -0
  642. dstack/_internal/server/utils/provisioning.py +368 -0
  643. dstack/_internal/server/utils/routers.py +166 -0
  644. dstack/_internal/server/utils/sentry_utils.py +24 -0
  645. dstack/_internal/settings.py +49 -0
  646. dstack/_internal/utils/__init__.py +0 -0
  647. dstack/_internal/utils/common.py +318 -0
  648. dstack/_internal/utils/cron.py +5 -0
  649. dstack/_internal/utils/crypto.py +40 -0
  650. dstack/_internal/utils/env.py +88 -0
  651. dstack/_internal/utils/event_loop.py +30 -0
  652. dstack/_internal/utils/files.py +69 -0
  653. dstack/_internal/utils/gpu.py +59 -0
  654. dstack/_internal/utils/hash.py +31 -0
  655. dstack/_internal/utils/interpolator.py +91 -0
  656. dstack/_internal/utils/json_schema.py +11 -0
  657. dstack/_internal/utils/json_utils.py +54 -0
  658. dstack/_internal/utils/logging.py +5 -0
  659. dstack/_internal/utils/nested_list.py +47 -0
  660. dstack/_internal/utils/network.py +50 -0
  661. dstack/_internal/utils/path.py +57 -0
  662. dstack/_internal/utils/random_names.py +258 -0
  663. dstack/_internal/utils/ssh.py +346 -0
  664. dstack/_internal/utils/tags.py +42 -0
  665. dstack/_internal/utils/typing.py +14 -0
  666. dstack/_internal/utils/version.py +22 -0
  667. dstack/api/__init__.py +46 -0
  668. dstack/api/_public/__init__.py +96 -0
  669. dstack/api/_public/backends.py +42 -0
  670. dstack/api/_public/common.py +5 -0
  671. dstack/api/_public/repos.py +202 -0
  672. dstack/api/_public/runs.py +714 -0
  673. dstack/api/server/__init__.py +206 -0
  674. dstack/api/server/_auth.py +30 -0
  675. dstack/api/server/_backends.py +38 -0
  676. dstack/api/server/_events.py +64 -0
  677. dstack/api/server/_files.py +18 -0
  678. dstack/api/server/_fleets.py +82 -0
  679. dstack/api/server/_gateways.py +54 -0
  680. dstack/api/server/_gpus.py +27 -0
  681. dstack/api/server/_group.py +22 -0
  682. dstack/api/server/_logs.py +15 -0
  683. dstack/api/server/_metrics.py +23 -0
  684. dstack/api/server/_projects.py +124 -0
  685. dstack/api/server/_repos.py +64 -0
  686. dstack/api/server/_runs.py +102 -0
  687. dstack/api/server/_secrets.py +36 -0
  688. dstack/api/server/_users.py +82 -0
  689. dstack/api/server/_volumes.py +39 -0
  690. dstack/api/server/utils.py +34 -0
  691. dstack/api/utils.py +105 -0
  692. dstack/core/__init__.py +0 -0
  693. dstack/plugins/__init__.py +8 -0
  694. dstack/plugins/_base.py +72 -0
  695. dstack/plugins/_models.py +8 -0
  696. dstack/plugins/_utils.py +19 -0
  697. dstack/plugins/builtin/__init__.py +0 -0
  698. dstack/plugins/builtin/rest_plugin/__init__.py +18 -0
  699. dstack/plugins/builtin/rest_plugin/_models.py +48 -0
  700. dstack/plugins/builtin/rest_plugin/_plugin.py +147 -0
  701. dstack/version.py +3 -1
  702. dstack-0.20.7.dist-info/METADATA +519 -0
  703. dstack-0.20.7.dist-info/RECORD +720 -0
  704. {dstack-0.0.9.dist-info → dstack-0.20.7.dist-info}/WHEEL +1 -2
  705. dstack-0.20.7.dist-info/entry_points.txt +2 -0
  706. dstack-0.20.7.dist-info/licenses/LICENSE.md +353 -0
  707. dstack/aws/__init__.py +0 -180
  708. dstack/aws/artifacts.py +0 -111
  709. dstack/aws/config.py +0 -40
  710. dstack/aws/jobs.py +0 -245
  711. dstack/aws/logs.py +0 -186
  712. dstack/aws/repos.py +0 -137
  713. dstack/aws/run_names.py +0 -17
  714. dstack/aws/runners.py +0 -693
  715. dstack/aws/runs.py +0 -79
  716. dstack/aws/secrets.py +0 -99
  717. dstack/aws/tags.py +0 -138
  718. dstack/backend.py +0 -299
  719. dstack/cli/app.py +0 -41
  720. dstack/cli/artifacts.py +0 -87
  721. dstack/cli/common.py +0 -57
  722. dstack/cli/config.py +0 -194
  723. dstack/cli/dashboard.py +0 -26
  724. dstack/cli/delete.py +0 -49
  725. dstack/cli/init.py +0 -33
  726. dstack/cli/logs.py +0 -87
  727. dstack/cli/main.py +0 -81
  728. dstack/cli/restart.py +0 -43
  729. dstack/cli/run.py +0 -223
  730. dstack/cli/schema.py +0 -46
  731. dstack/cli/secrets.py +0 -97
  732. dstack/cli/status.py +0 -140
  733. dstack/cli/stop.py +0 -53
  734. dstack/cli/tags.py +0 -100
  735. dstack/config.py +0 -80
  736. dstack/dashboard/artifacts.py +0 -26
  737. dstack/dashboard/logs.py +0 -73
  738. dstack/dashboard/main.py +0 -45
  739. dstack/dashboard/repos.py +0 -41
  740. dstack/dashboard/runs.py +0 -140
  741. dstack/dashboard/secrets.py +0 -53
  742. dstack/dashboard/statics/4d6a4e032505c1efd23c.png +0 -0
  743. dstack/dashboard/statics/7e018c3e5566d7c349a8.png +0 -0
  744. dstack/dashboard/statics/assets/android-chrome-144x144.png +0 -0
  745. dstack/dashboard/statics/assets/android-chrome-192x192.png +0 -0
  746. dstack/dashboard/statics/assets/android-chrome-256x256.png +0 -0
  747. dstack/dashboard/statics/assets/android-chrome-36x36.png +0 -0
  748. dstack/dashboard/statics/assets/android-chrome-384x384.png +0 -0
  749. dstack/dashboard/statics/assets/android-chrome-48x48.png +0 -0
  750. dstack/dashboard/statics/assets/android-chrome-512x512.png +0 -0
  751. dstack/dashboard/statics/assets/android-chrome-72x72.png +0 -0
  752. dstack/dashboard/statics/assets/android-chrome-96x96.png +0 -0
  753. dstack/dashboard/statics/assets/apple-touch-icon-1024x1024.png +0 -0
  754. dstack/dashboard/statics/assets/apple-touch-icon-114x114.png +0 -0
  755. dstack/dashboard/statics/assets/apple-touch-icon-120x120.png +0 -0
  756. dstack/dashboard/statics/assets/apple-touch-icon-144x144.png +0 -0
  757. dstack/dashboard/statics/assets/apple-touch-icon-152x152.png +0 -0
  758. dstack/dashboard/statics/assets/apple-touch-icon-167x167.png +0 -0
  759. dstack/dashboard/statics/assets/apple-touch-icon-180x180.png +0 -0
  760. dstack/dashboard/statics/assets/apple-touch-icon-57x57.png +0 -0
  761. dstack/dashboard/statics/assets/apple-touch-icon-60x60.png +0 -0
  762. dstack/dashboard/statics/assets/apple-touch-icon-72x72.png +0 -0
  763. dstack/dashboard/statics/assets/apple-touch-icon-76x76.png +0 -0
  764. dstack/dashboard/statics/assets/apple-touch-icon-precomposed.png +0 -0
  765. dstack/dashboard/statics/assets/apple-touch-icon.png +0 -0
  766. dstack/dashboard/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
  767. dstack/dashboard/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
  768. dstack/dashboard/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
  769. dstack/dashboard/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
  770. dstack/dashboard/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
  771. dstack/dashboard/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
  772. dstack/dashboard/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
  773. dstack/dashboard/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
  774. dstack/dashboard/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
  775. dstack/dashboard/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
  776. dstack/dashboard/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
  777. dstack/dashboard/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
  778. dstack/dashboard/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
  779. dstack/dashboard/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
  780. dstack/dashboard/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
  781. dstack/dashboard/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
  782. dstack/dashboard/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
  783. dstack/dashboard/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
  784. dstack/dashboard/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
  785. dstack/dashboard/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
  786. dstack/dashboard/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
  787. dstack/dashboard/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
  788. dstack/dashboard/statics/assets/browserconfig.xml +0 -15
  789. dstack/dashboard/statics/assets/coast-228x228.png +0 -0
  790. dstack/dashboard/statics/assets/favicon-16x16.png +0 -0
  791. dstack/dashboard/statics/assets/favicon-32x32.png +0 -0
  792. dstack/dashboard/statics/assets/favicon-48x48.png +0 -0
  793. dstack/dashboard/statics/assets/favicon.ico +0 -0
  794. dstack/dashboard/statics/assets/firefox_app_128x128.png +0 -0
  795. dstack/dashboard/statics/assets/firefox_app_512x512.png +0 -0
  796. dstack/dashboard/statics/assets/firefox_app_60x60.png +0 -0
  797. dstack/dashboard/statics/assets/manifest.webapp +0 -14
  798. dstack/dashboard/statics/assets/mstile-144x144.png +0 -0
  799. dstack/dashboard/statics/assets/mstile-150x150.png +0 -0
  800. dstack/dashboard/statics/assets/mstile-310x150.png +0 -0
  801. dstack/dashboard/statics/assets/mstile-310x310.png +0 -0
  802. dstack/dashboard/statics/assets/mstile-70x70.png +0 -0
  803. dstack/dashboard/statics/assets/yandex-browser-50x50.png +0 -0
  804. dstack/dashboard/statics/d0f71e48806e25d72553.png +0 -0
  805. dstack/dashboard/statics/index.html +0 -7
  806. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js +0 -3
  807. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js.LICENSE.txt +0 -102
  808. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js.map +0 -1
  809. dstack/dashboard/statics/main.css +0 -5058
  810. dstack/dashboard/statics/splash_thumbnail.png +0 -0
  811. dstack/dashboard/statics/static/media/check.3f68ffc787a15c0476793a6d18ecb71a.svg +0 -3
  812. dstack/dashboard/statics/static/media/chevron-down.bfd8f22c4a5db4d443e76bca3b02f334.svg +0 -3
  813. dstack/dashboard/statics/static/media/chevron-up.bade0c5d82d741cead615813264140c9.svg +0 -3
  814. dstack/dashboard/statics/static/media/clock.583b744f29b9d143718a55e7c35fe38e.svg +0 -3
  815. dstack/dashboard/statics/static/media/close.a8bb9e47361b03a3b5084dad676ba1da.svg +0 -3
  816. dstack/dashboard/statics/static/media/content-copy.73f5f2a175094757758e315243a4111e.svg +0 -3
  817. dstack/dashboard/statics/static/media/delete-outline.6a8abf4e4f9cb777781967efd56efe9b.svg +0 -3
  818. dstack/dashboard/statics/static/media/dots-vertical.82fc618192e0c7dc4d615ff93269246a.svg +0 -3
  819. dstack/dashboard/statics/static/media/earth.1ad57c7f59f4be5c8bb2fa00439c3149.svg +0 -3
  820. dstack/dashboard/statics/static/media/email.320bc3af24a5f1bb41ebd85f66a5dd70.svg +0 -3
  821. dstack/dashboard/statics/static/media/external-link.99b88e699c15afb820a1779d9a2261ed.svg +0 -3
  822. dstack/dashboard/statics/static/media/eye-off-outline.5b4afb7ad624a44dd307518ff93d1faa.svg +0 -3
  823. dstack/dashboard/statics/static/media/eye-outline.ca41708feaaed1edb15c5fff021fbafe.svg +0 -3
  824. dstack/dashboard/statics/static/media/file-download-outline.3634b41923ba79b297ff294ef898661c.svg +0 -3
  825. dstack/dashboard/statics/static/media/folder-outline.33378387af61821dd1207e4b2d061a07.svg +0 -3
  826. dstack/dashboard/statics/static/media/github-circle.1bb85d171c31a3c2eebad07319377171.svg +0 -3
  827. dstack/dashboard/statics/static/media/infinity.915f92939afc0a37f94adba211ceb172.svg +0 -3
  828. dstack/dashboard/statics/static/media/layers.b4b02cea267a617d7aa44c2719250c89.svg +0 -3
  829. dstack/dashboard/statics/static/media/linkedin.1c52fae553eee54397f0e63a79455a5e.svg +0 -3
  830. dstack/dashboard/statics/static/media/loading.e466be7b2c1f0ac9e7e51ca929d0e37d.svg +0 -3
  831. dstack/dashboard/statics/static/media/lock.4a4c7768d0fa60c716609ddc483470ef.svg +0 -3
  832. dstack/dashboard/statics/static/media/magnify.0c803314d039d21f3cb1504ccd1437a4.svg +0 -3
  833. dstack/dashboard/statics/static/media/mark.3f68ffc787a15c0476793a6d18ecb71a.svg +0 -3
  834. dstack/dashboard/statics/static/media/menu-close.3ee84714181017c6ff837830297c8437.svg +0 -3
  835. dstack/dashboard/statics/static/media/menu.922f81e0972fbcbb5adcd8def20c86a3.svg +0 -3
  836. dstack/dashboard/statics/static/media/pencil.f706a3b9dcbff4959a91bf72e1e6324f.svg +0 -3
  837. dstack/dashboard/statics/static/media/refresh.a80edb948e98b322cd73b67814a57a48.svg +0 -3
  838. dstack/dashboard/statics/static/media/shape-plus.63b093c7f4b44c3def774f30fcfbceca.svg +0 -3
  839. dstack/dashboard/statics/static/media/slack.ec2fca99c6b944950ac65404ddd26880.svg +0 -4
  840. dstack/dashboard/statics/static/media/small-logo.b9cc8d09f646a553e65fa336dafd8b10.svg +0 -116
  841. dstack/dashboard/statics/static/media/source-branch.b8d22cfc42a7bed81f0fc08130818e85.svg +0 -3
  842. dstack/dashboard/statics/static/media/source-commit.be2bb53c081b9b6836adffccc0b8d3e6.svg +0 -3
  843. dstack/dashboard/statics/static/media/stop.11488ff1437ad929476be8924a3b7075.svg +0 -3
  844. dstack/dashboard/statics/static/media/tag-minus.15680a815b0b8d027e973c84832c05e6.svg +0 -3
  845. dstack/dashboard/statics/static/media/tag-outline.19b0bf86a8afd7d6d9c716e9a91d94ca.svg +0 -3
  846. dstack/dashboard/statics/static/media/twitter.4af18861c84a2f3044c7546b55d5739c.svg +0 -3
  847. dstack/dashboard/tags.py +0 -119
  848. dstack/jobs.py +0 -255
  849. dstack/providers/__init__.py +0 -316
  850. dstack/providers/_python/main.py +0 -88
  851. dstack/providers/_tensorboard/main.py +0 -93
  852. dstack/providers/_torchrun/main.py +0 -121
  853. dstack/providers/bash/main.py +0 -90
  854. dstack/providers/code/main.py +0 -95
  855. dstack/providers/docker/main.py +0 -79
  856. dstack/providers/lab/main.py +0 -95
  857. dstack/providers/notebook/main.py +0 -90
  858. dstack/random_name.py +0 -29
  859. dstack/repo.py +0 -135
  860. dstack/runners.py +0 -35
  861. dstack/util.py +0 -15
  862. dstack-0.0.9.dist-info/METADATA +0 -176
  863. dstack-0.0.9.dist-info/RECORD +0 -179
  864. dstack-0.0.9.dist-info/entry_points.txt +0 -3
  865. dstack-0.0.9.dist-info/top_level.txt +0 -2
  866. tests/test_config.py +0 -70
  867. /dstack/{cli → _internal}/__init__.py +0 -0
  868. /dstack/{dashboard → _internal/cli}/__init__.py +0 -0
  869. /dstack/{providers/_python → _internal/cli/models}/__init__.py +0 -0
  870. /dstack/{providers/_tensorboard → _internal/cli/services}/__init__.py +0 -0
  871. /dstack/{providers/_torchrun → _internal/cli/utils}/__init__.py +0 -0
  872. /dstack/{providers/bash → _internal/core}/__init__.py +0 -0
  873. /dstack/{providers/code → _internal/core/backends}/__init__.py +0 -0
  874. /dstack/{providers/docker → _internal/core/backends/aws}/__init__.py +0 -0
  875. /dstack/{providers/lab → _internal/core/backends/azure}/__init__.py +0 -0
  876. /dstack/{providers/notebook → _internal/core/backends/base}/__init__.py +0 -0
  877. {tests → dstack/_internal/core/backends/cloudrift}/__init__.py +0 -0
  878. /dstack/{dashboard → _internal/server}/statics/assets/yandex-browser-manifest.json +0 -0
  879. /dstack/{dashboard → _internal/server}/statics/robots.txt +0 -0
@@ -0,0 +1,1153 @@
1
+ import threading
2
+ from collections.abc import Iterable
3
+ from concurrent.futures import ThreadPoolExecutor, as_completed
4
+ from dataclasses import dataclass, field
5
+ from typing import Any, Callable, Dict, List, Optional, Tuple
6
+
7
+ import boto3
8
+ import botocore.client
9
+ import botocore.exceptions
10
+ from cachetools import Cache, TTLCache, cachedmethod
11
+ from cachetools.keys import hashkey
12
+ from pydantic import ValidationError
13
+
14
+ import dstack._internal.core.backends.aws.resources as aws_resources
15
+ from dstack._internal import settings
16
+ from dstack._internal.core.backends.aws.models import (
17
+ AWSAccessKeyCreds,
18
+ AWSConfig,
19
+ AWSOSImageConfig,
20
+ )
21
+ from dstack._internal.core.backends.base.compute import (
22
+ Compute,
23
+ ComputeCache,
24
+ ComputeTTLCache,
25
+ ComputeWithAllOffersCached,
26
+ ComputeWithCreateInstanceSupport,
27
+ ComputeWithGatewaySupport,
28
+ ComputeWithMultinodeSupport,
29
+ ComputeWithPlacementGroupSupport,
30
+ ComputeWithPrivateGatewaySupport,
31
+ ComputeWithPrivilegedSupport,
32
+ ComputeWithReservationSupport,
33
+ ComputeWithVolumeSupport,
34
+ generate_unique_gateway_instance_name,
35
+ generate_unique_instance_name,
36
+ generate_unique_volume_name,
37
+ get_gateway_user_data,
38
+ get_user_data,
39
+ merge_tags,
40
+ )
41
+ from dstack._internal.core.backends.base.offers import (
42
+ OfferModifier,
43
+ get_catalog_offers,
44
+ get_offers_disk_modifier,
45
+ )
46
+ from dstack._internal.core.errors import (
47
+ ComputeError,
48
+ NoCapacityError,
49
+ PlacementGroupInUseError,
50
+ PlacementGroupNotSupportedError,
51
+ )
52
+ from dstack._internal.core.models.backends.base import BackendType
53
+ from dstack._internal.core.models.common import CoreModel
54
+ from dstack._internal.core.models.gateways import (
55
+ GatewayComputeConfiguration,
56
+ GatewayProvisioningData,
57
+ )
58
+ from dstack._internal.core.models.instances import (
59
+ InstanceAvailability,
60
+ InstanceConfiguration,
61
+ InstanceOffer,
62
+ InstanceOfferWithAvailability,
63
+ )
64
+ from dstack._internal.core.models.placement import (
65
+ PlacementGroup,
66
+ PlacementGroupProvisioningData,
67
+ PlacementStrategy,
68
+ )
69
+ from dstack._internal.core.models.resources import Memory, Range
70
+ from dstack._internal.core.models.runs import JobProvisioningData, Requirements
71
+ from dstack._internal.core.models.volumes import (
72
+ Volume,
73
+ VolumeAttachmentData,
74
+ VolumeProvisioningData,
75
+ )
76
+ from dstack._internal.utils.common import get_or_error
77
+ from dstack._internal.utils.logging import get_logger
78
+
79
+ logger = get_logger(__name__)
80
+ # gp2 volumes can be 1GB-16TB, dstack AMIs are 100GB
81
+ CONFIGURABLE_DISK_SIZE = Range[Memory](min=Memory.parse("100GB"), max=Memory.parse("16TB"))
82
+ DEFAULT_GATEWAY_INSTANCE_TYPE = "t3.micro"
83
+
84
+
85
+ class AWSGatewayBackendData(CoreModel):
86
+ lb_arn: str
87
+ tg_arn: str
88
+ listener_arn: str
89
+
90
+
91
+ class AWSVolumeBackendData(CoreModel):
92
+ volume_type: str
93
+ iops: int
94
+
95
+
96
+ def _ec2client_cache_methodkey(self, ec2_client, *args, **kwargs):
97
+ return hashkey(*args, **kwargs)
98
+
99
+
100
+ @dataclass
101
+ class AWSQuotasCache(ComputeTTLCache):
102
+ execution_lock: threading.Lock = field(default_factory=threading.Lock)
103
+
104
+
105
+ class AWSCompute(
106
+ ComputeWithAllOffersCached,
107
+ ComputeWithCreateInstanceSupport,
108
+ ComputeWithPrivilegedSupport,
109
+ ComputeWithMultinodeSupport,
110
+ ComputeWithReservationSupport,
111
+ ComputeWithPlacementGroupSupport,
112
+ ComputeWithGatewaySupport,
113
+ ComputeWithPrivateGatewaySupport,
114
+ ComputeWithVolumeSupport,
115
+ Compute,
116
+ ):
117
+ def __init__(
118
+ self,
119
+ config: AWSConfig,
120
+ quotas_cache: Optional[AWSQuotasCache] = None,
121
+ zones_cache: Optional[ComputeCache] = None,
122
+ ):
123
+ super().__init__()
124
+ self.config = config
125
+ if isinstance(config.creds, AWSAccessKeyCreds):
126
+ self.session = boto3.Session(
127
+ aws_access_key_id=config.creds.access_key,
128
+ aws_secret_access_key=config.creds.secret_key,
129
+ )
130
+ else: # default creds
131
+ self.session = boto3.Session()
132
+ # Caches to avoid redundant API calls when provisioning many instances
133
+ # get_offers is already cached but we still cache its sub-functions
134
+ # with more aggressive/longer caches.
135
+ self._offers_post_filter_cache = ComputeTTLCache(cache=TTLCache(maxsize=10, ttl=180))
136
+ if quotas_cache is None:
137
+ quotas_cache = AWSQuotasCache(cache=TTLCache(maxsize=10, ttl=600))
138
+ self._regions_to_quotas_cache = quotas_cache
139
+ if zones_cache is None:
140
+ zones_cache = ComputeCache(cache=Cache(maxsize=10))
141
+ self._regions_to_zones_cache = zones_cache
142
+ self._vpc_id_subnet_id_cache = ComputeTTLCache(cache=TTLCache(maxsize=100, ttl=600))
143
+ self._maximum_efa_interfaces_cache = ComputeCache(cache=Cache(maxsize=100))
144
+ self._subnets_availability_zones_cache = ComputeCache(cache=Cache(maxsize=100))
145
+ self._security_group_cache = ComputeTTLCache(cache=TTLCache(maxsize=100, ttl=600))
146
+ self._image_id_and_username_cache = ComputeTTLCache(cache=TTLCache(maxsize=100, ttl=600))
147
+
148
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
149
+ offers = get_catalog_offers(
150
+ backend=BackendType.AWS,
151
+ locations=self.config.regions,
152
+ extra_filter=_supported_instances,
153
+ )
154
+ regions = list(set(i.region for i in offers))
155
+ with self._regions_to_quotas_cache.execution_lock:
156
+ # Cache lock does not prevent concurrent execution.
157
+ # We use a separate lock to avoid requesting quotas in parallel and hitting rate limits.
158
+ regions_to_quotas = self._get_regions_to_quotas(self.session, regions)
159
+ regions_to_zones = self._get_regions_to_zones(self.session, regions)
160
+
161
+ availability_offers = []
162
+ for offer in offers:
163
+ availability = InstanceAvailability.UNKNOWN
164
+ quota = _has_quota(regions_to_quotas[offer.region], offer.instance.name)
165
+ if quota is not None and not quota:
166
+ availability = InstanceAvailability.NO_QUOTA
167
+ availability_offers.append(
168
+ InstanceOfferWithAvailability(
169
+ **offer.dict(),
170
+ availability=availability,
171
+ availability_zones=regions_to_zones[offer.region],
172
+ )
173
+ )
174
+ return availability_offers
175
+
176
+ def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
177
+ return [get_offers_disk_modifier(CONFIGURABLE_DISK_SIZE, requirements)]
178
+
179
+ def _get_offers_cached_key(self, requirements: Requirements) -> int:
180
+ # Requirements is not hashable, so we use a hack to get arguments hash
181
+ return hash(requirements.json())
182
+
183
+ @cachedmethod(
184
+ cache=lambda self: self._offers_post_filter_cache.cache,
185
+ key=_get_offers_cached_key,
186
+ lock=lambda self: self._offers_post_filter_cache.lock,
187
+ )
188
+ def get_offers_post_filter(
189
+ self, requirements: Requirements
190
+ ) -> Optional[Callable[[InstanceOfferWithAvailability], bool]]:
191
+ if requirements.reservation:
192
+ region_to_reservation = {}
193
+ for region in get_or_error(self.config.regions):
194
+ reservation = aws_resources.get_reservation(
195
+ ec2_client=self.session.client("ec2", region_name=region),
196
+ reservation_id=requirements.reservation,
197
+ instance_count=1,
198
+ )
199
+ if reservation is not None:
200
+ region_to_reservation[region] = reservation
201
+
202
+ def reservation_filter(offer: InstanceOfferWithAvailability) -> bool:
203
+ # Filter: Spot instances can't be used with reservations
204
+ if offer.instance.resources.spot:
205
+ return False
206
+ region = offer.region
207
+ reservation = region_to_reservation.get(region)
208
+ # Filter: only instance types matching the capacity reservation
209
+ if not bool(reservation and offer.instance.name == reservation["InstanceType"]):
210
+ return False
211
+ return True
212
+
213
+ return reservation_filter
214
+
215
+ return None
216
+
217
+ def terminate_instance(
218
+ self, instance_id: str, region: str, backend_data: Optional[str] = None
219
+ ) -> None:
220
+ ec2_client = self.session.client("ec2", region_name=region)
221
+ try:
222
+ ec2_client.terminate_instances(InstanceIds=[instance_id])
223
+ except botocore.exceptions.ClientError as e:
224
+ if e.response["Error"]["Code"] == "InvalidInstanceID.NotFound":
225
+ logger.debug("Skipping instance %s termination. Instance not found.", instance_id)
226
+ else:
227
+ raise e
228
+
229
+ def create_instance(
230
+ self,
231
+ instance_offer: InstanceOfferWithAvailability,
232
+ instance_config: InstanceConfiguration,
233
+ placement_group: Optional[PlacementGroup],
234
+ ) -> JobProvisioningData:
235
+ project_name = instance_config.project_name
236
+ ec2_resource = self.session.resource("ec2", region_name=instance_offer.region)
237
+ ec2_client = self.session.client("ec2", region_name=instance_offer.region)
238
+ allocate_public_ip = self.config.allocate_public_ips
239
+ zones = instance_offer.availability_zones
240
+ if zones is not None and len(zones) == 0:
241
+ raise NoCapacityError("No eligible availability zones")
242
+
243
+ instance_name = generate_unique_instance_name(instance_config)
244
+ base_tags = {
245
+ "Name": instance_name,
246
+ "owner": "dstack",
247
+ "dstack_project": project_name,
248
+ "dstack_name": instance_config.instance_name,
249
+ "dstack_user": instance_config.user,
250
+ }
251
+ tags = merge_tags(
252
+ base_tags=base_tags,
253
+ backend_tags=self.config.tags,
254
+ resource_tags=instance_config.tags,
255
+ )
256
+ tags = aws_resources.filter_invalid_tags(tags)
257
+
258
+ disk_size = round(instance_offer.instance.resources.disk.size_mib / 1024)
259
+ max_efa_interfaces = self._get_maximum_efa_interfaces(
260
+ ec2_client=ec2_client,
261
+ region=instance_offer.region,
262
+ instance_type=instance_offer.instance.name,
263
+ )
264
+ enable_efa = max_efa_interfaces > 0
265
+ is_capacity_block = False
266
+ try:
267
+ vpc_id, subnet_ids = self._get_vpc_id_subnet_id_or_error(
268
+ ec2_client=ec2_client,
269
+ config=self.config,
270
+ region=instance_offer.region,
271
+ allocate_public_ip=allocate_public_ip,
272
+ availability_zones=zones,
273
+ )
274
+ subnet_id_to_az_map = self._get_subnets_availability_zones(
275
+ ec2_client=ec2_client,
276
+ region=instance_offer.region,
277
+ subnet_ids=subnet_ids,
278
+ )
279
+ if instance_config.reservation:
280
+ reservation = aws_resources.get_reservation(
281
+ ec2_client=ec2_client,
282
+ reservation_id=instance_config.reservation,
283
+ instance_count=1,
284
+ )
285
+ if reservation is not None:
286
+ # Filter out az different from capacity reservation
287
+ subnet_id_to_az_map = {
288
+ k: v
289
+ for k, v in subnet_id_to_az_map.items()
290
+ if v == reservation["AvailabilityZone"]
291
+ }
292
+ if reservation.get("ReservationType") == "capacity-block":
293
+ is_capacity_block = True
294
+
295
+ except botocore.exceptions.ClientError as e:
296
+ logger.warning("Got botocore.exceptions.ClientError: %s", e)
297
+ raise NoCapacityError()
298
+ tried_zones = set()
299
+ for subnet_id, az in subnet_id_to_az_map.items():
300
+ if az in tried_zones:
301
+ continue
302
+ tried_zones.add(az)
303
+ try:
304
+ logger.debug("Trying provisioning %s in %s", instance_offer.instance.name, az)
305
+ image_id, username = self._get_image_id_and_username(
306
+ ec2_client=ec2_client,
307
+ region=instance_offer.region,
308
+ gpu_name=(
309
+ instance_offer.instance.resources.gpus[0].name
310
+ if len(instance_offer.instance.resources.gpus) > 0
311
+ else None
312
+ ),
313
+ instance_type=instance_offer.instance.name,
314
+ image_config=self.config.os_images,
315
+ )
316
+ security_group_id = self._create_security_group(
317
+ ec2_client=ec2_client,
318
+ region=instance_offer.region,
319
+ project_id=project_name,
320
+ vpc_id=vpc_id,
321
+ )
322
+ response = ec2_resource.create_instances(
323
+ **aws_resources.create_instances_struct(
324
+ disk_size=disk_size,
325
+ image_id=image_id,
326
+ instance_type=instance_offer.instance.name,
327
+ iam_instance_profile=self.config.iam_instance_profile,
328
+ user_data=get_user_data(
329
+ authorized_keys=instance_config.get_public_keys(),
330
+ # Custom OS images may lack ufw, so don't attempt to set up the firewall.
331
+ # Rely on security groups and the image's built-in firewall rules instead.
332
+ skip_firewall_setup=self.config.os_images is not None,
333
+ ),
334
+ tags=aws_resources.make_tags(tags),
335
+ security_group_id=security_group_id,
336
+ spot=instance_offer.instance.resources.spot,
337
+ subnet_id=subnet_id,
338
+ allocate_public_ip=allocate_public_ip,
339
+ placement_group_name=placement_group.name if placement_group else None,
340
+ enable_efa=enable_efa,
341
+ max_efa_interfaces=max_efa_interfaces,
342
+ reservation_id=instance_config.reservation,
343
+ is_capacity_block=is_capacity_block,
344
+ )
345
+ )
346
+ instance = response[0]
347
+ instance.wait_until_running()
348
+ instance.reload() # populate instance.public_ip_address
349
+ if instance_offer.instance.resources.spot: # it will not terminate the instance
350
+ ec2_client.cancel_spot_instance_requests(
351
+ SpotInstanceRequestIds=[instance.spot_instance_request_id]
352
+ )
353
+ hostname = _get_instance_ip(instance, allocate_public_ip)
354
+ return JobProvisioningData(
355
+ backend=instance_offer.backend,
356
+ instance_type=instance_offer.instance,
357
+ instance_id=instance.instance_id,
358
+ public_ip_enabled=allocate_public_ip,
359
+ hostname=hostname,
360
+ internal_ip=instance.private_ip_address,
361
+ region=instance_offer.region,
362
+ availability_zone=az,
363
+ reservation=instance.capacity_reservation_id,
364
+ price=instance_offer.price,
365
+ username=username,
366
+ ssh_port=22,
367
+ dockerized=True, # because `dstack-shim` is used
368
+ ssh_proxy=None,
369
+ backend_data=None,
370
+ )
371
+ except botocore.exceptions.ClientError as e:
372
+ logger.warning("Got botocore.exceptions.ClientError: %s", e)
373
+ if e.response["Error"]["Code"] == "InvalidParameterValue":
374
+ msg = e.response["Error"].get("Message", "")
375
+ raise ComputeError(f"Invalid AWS request: {msg}")
376
+ continue
377
+ raise NoCapacityError()
378
+
379
+ def create_placement_group(
380
+ self,
381
+ placement_group: PlacementGroup,
382
+ master_instance_offer: InstanceOffer,
383
+ ) -> PlacementGroupProvisioningData:
384
+ if not _offer_supports_placement_group(master_instance_offer, placement_group):
385
+ raise PlacementGroupNotSupportedError()
386
+ ec2_client = self.session.client("ec2", region_name=placement_group.configuration.region)
387
+ logger.debug("Creating placement group %s...", placement_group.name)
388
+ ec2_client.create_placement_group(
389
+ GroupName=placement_group.name,
390
+ Strategy=placement_group.configuration.placement_strategy.value,
391
+ )
392
+ logger.debug("Created placement group %s", placement_group.name)
393
+ return PlacementGroupProvisioningData(
394
+ backend=BackendType.AWS,
395
+ backend_data=None,
396
+ )
397
+
398
+ def delete_placement_group(
399
+ self,
400
+ placement_group: PlacementGroup,
401
+ ):
402
+ ec2_client = self.session.client("ec2", region_name=placement_group.configuration.region)
403
+ logger.debug("Deleting placement group %s...", placement_group.name)
404
+ try:
405
+ ec2_client.delete_placement_group(GroupName=placement_group.name)
406
+ except botocore.exceptions.ClientError as e:
407
+ if e.response["Error"]["Code"] == "InvalidPlacementGroup.Unknown":
408
+ logger.debug("Placement group %s not found", placement_group.name)
409
+ return
410
+ elif e.response["Error"]["Code"] == "InvalidPlacementGroup.InUse":
411
+ logger.debug("Placement group %s is in use", placement_group.name)
412
+ raise PlacementGroupInUseError()
413
+ else:
414
+ raise e
415
+ logger.debug("Deleted placement group %s", placement_group.name)
416
+
417
+ def is_suitable_placement_group(
418
+ self,
419
+ placement_group: PlacementGroup,
420
+ instance_offer: InstanceOffer,
421
+ ) -> bool:
422
+ if not _offer_supports_placement_group(instance_offer, placement_group):
423
+ return False
424
+ return placement_group.configuration.region == instance_offer.region
425
+
426
+ def create_gateway(
427
+ self,
428
+ configuration: GatewayComputeConfiguration,
429
+ ) -> GatewayProvisioningData:
430
+ ec2_resource = self.session.resource("ec2", region_name=configuration.region)
431
+ ec2_client = self.session.client("ec2", region_name=configuration.region)
432
+
433
+ instance_name = generate_unique_gateway_instance_name(configuration)
434
+ base_tags = {
435
+ "Name": instance_name,
436
+ "owner": "dstack",
437
+ "dstack_project": configuration.project_name,
438
+ "dstack_name": configuration.instance_name,
439
+ }
440
+ if settings.DSTACK_VERSION is not None:
441
+ base_tags["dstack_version"] = settings.DSTACK_VERSION
442
+ tags = merge_tags(
443
+ base_tags=base_tags,
444
+ backend_tags=self.config.tags,
445
+ resource_tags=configuration.tags,
446
+ )
447
+ tags = aws_resources.filter_invalid_tags(tags)
448
+ tags = aws_resources.make_tags(tags)
449
+
450
+ vpc_id, subnets_ids = self._get_vpc_id_subnet_id_or_error(
451
+ ec2_client=ec2_client,
452
+ config=self.config,
453
+ region=configuration.region,
454
+ allocate_public_ip=configuration.public_ip,
455
+ )
456
+ subnet_id = subnets_ids[0]
457
+ availability_zone = aws_resources.get_availability_zone_by_subnet_id(
458
+ ec2_client=ec2_client,
459
+ subnet_id=subnet_id,
460
+ )
461
+ security_group_id = aws_resources.create_gateway_security_group(
462
+ ec2_client=ec2_client,
463
+ project_id=configuration.project_name,
464
+ vpc_id=vpc_id,
465
+ )
466
+ instance_struct = aws_resources.create_instances_struct(
467
+ disk_size=10,
468
+ image_id=aws_resources.get_gateway_image_id(ec2_client),
469
+ instance_type=configuration.instance_type or DEFAULT_GATEWAY_INSTANCE_TYPE,
470
+ iam_instance_profile=None,
471
+ user_data=get_gateway_user_data(
472
+ configuration.ssh_key_pub, router=configuration.router
473
+ ),
474
+ tags=tags,
475
+ security_group_id=security_group_id,
476
+ spot=False,
477
+ subnet_id=subnet_id,
478
+ allocate_public_ip=configuration.public_ip,
479
+ )
480
+ try:
481
+ response = ec2_resource.create_instances(**instance_struct)
482
+ except botocore.exceptions.ClientError as e:
483
+ msg = f"AWS Error: {e.response['Error']['Code']}"
484
+ if e.response["Error"].get("Message"):
485
+ msg += f": {e.response['Error']['Message']}"
486
+ raise ComputeError(msg)
487
+ instance = response[0]
488
+ instance.wait_until_running()
489
+ instance.reload() # populate instance.public_ip_address
490
+ if configuration.certificate is None or configuration.certificate.type != "acm":
491
+ ip_address = _get_instance_ip(instance, configuration.public_ip)
492
+ return GatewayProvisioningData(
493
+ instance_id=instance.instance_id,
494
+ region=configuration.region,
495
+ availability_zone=availability_zone,
496
+ ip_address=ip_address,
497
+ )
498
+
499
+ elb_client = self.session.client("elbv2", region_name=configuration.region)
500
+
501
+ if len(subnets_ids) < 2:
502
+ raise ComputeError(
503
+ "Deploying gateway with ACM certificate requires at least two subnets in different AZs"
504
+ )
505
+
506
+ logger.debug("Creating ALB for gateway %s...", configuration.instance_name)
507
+ response = elb_client.create_load_balancer(
508
+ Name=f"{instance_name}-lb",
509
+ Subnets=subnets_ids,
510
+ SecurityGroups=[security_group_id],
511
+ Scheme="internet-facing" if configuration.public_ip else "internal",
512
+ Tags=tags,
513
+ Type="application",
514
+ IpAddressType="ipv4",
515
+ )
516
+ lb = response["LoadBalancers"][0]
517
+ lb_arn = lb["LoadBalancerArn"]
518
+ lb_dns_name = lb["DNSName"]
519
+ logger.debug("Created ALB for gateway %s.", configuration.instance_name)
520
+
521
+ logger.debug("Creating Target Group for gateway %s...", configuration.instance_name)
522
+ response = elb_client.create_target_group(
523
+ Name=f"{instance_name}-tg",
524
+ Protocol="HTTP",
525
+ Port=80,
526
+ VpcId=vpc_id,
527
+ TargetType="instance",
528
+ )
529
+ tg_arn = response["TargetGroups"][0]["TargetGroupArn"]
530
+ logger.debug("Created Target Group for gateway %s", configuration.instance_name)
531
+
532
+ logger.debug("Registering ALB target for gateway %s...", configuration.instance_name)
533
+ elb_client.register_targets(
534
+ TargetGroupArn=tg_arn,
535
+ Targets=[
536
+ {"Id": instance.instance_id, "Port": 80},
537
+ ],
538
+ )
539
+ logger.debug("Registered ALB target for gateway %s", configuration.instance_name)
540
+
541
+ logger.debug("Creating ALB Listener for gateway %s...", configuration.instance_name)
542
+ response = elb_client.create_listener(
543
+ LoadBalancerArn=lb_arn,
544
+ Protocol="HTTPS",
545
+ Port=443,
546
+ SslPolicy="ELBSecurityPolicy-2016-08",
547
+ Certificates=[
548
+ {"CertificateArn": configuration.certificate.arn},
549
+ ],
550
+ DefaultActions=[
551
+ {
552
+ "Type": "forward",
553
+ "TargetGroupArn": tg_arn,
554
+ }
555
+ ],
556
+ )
557
+ listener_arn = response["Listeners"][0]["ListenerArn"]
558
+ logger.debug("Created ALB Listener for gateway %s", configuration.instance_name)
559
+
560
+ ip_address = _get_instance_ip(instance, configuration.public_ip)
561
+ return GatewayProvisioningData(
562
+ instance_id=instance.instance_id,
563
+ region=configuration.region,
564
+ ip_address=ip_address,
565
+ hostname=lb_dns_name,
566
+ backend_data=AWSGatewayBackendData(
567
+ lb_arn=lb_arn,
568
+ tg_arn=tg_arn,
569
+ listener_arn=listener_arn,
570
+ ).json(),
571
+ )
572
+
573
+ def terminate_gateway(
574
+ self,
575
+ instance_id: str,
576
+ configuration: GatewayComputeConfiguration,
577
+ backend_data: Optional[str] = None,
578
+ ):
579
+ self.terminate_instance(
580
+ instance_id=instance_id,
581
+ region=configuration.region,
582
+ backend_data=None,
583
+ )
584
+ if configuration.certificate is None or configuration.certificate.type != "acm":
585
+ return
586
+
587
+ if backend_data is None:
588
+ logger.error(
589
+ "Failed to terminate all gateway %s resources. backend_data is None.",
590
+ configuration.instance_name,
591
+ )
592
+ return
593
+
594
+ try:
595
+ backend_data_parsed = AWSGatewayBackendData.parse_raw(backend_data)
596
+ except ValidationError:
597
+ logger.exception(
598
+ "Failed to terminate all gateway %s resources. backend_data parsing error.",
599
+ configuration.instance_name,
600
+ )
601
+ return
602
+
603
+ elb_client = self.session.client("elbv2", region_name=configuration.region)
604
+
605
+ logger.debug("Deleting ALB resources for gateway %s...", configuration.instance_name)
606
+ elb_client.delete_listener(ListenerArn=backend_data_parsed.listener_arn)
607
+ elb_client.delete_target_group(TargetGroupArn=backend_data_parsed.tg_arn)
608
+ elb_client.delete_load_balancer(LoadBalancerArn=backend_data_parsed.lb_arn)
609
+ logger.debug("Deleted ALB resources for gateway %s", configuration.instance_name)
610
+
611
+ def register_volume(self, volume: Volume) -> VolumeProvisioningData:
612
+ ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
613
+
614
+ logger.debug("Requesting EBS volume %s", volume.configuration.volume_id)
615
+ try:
616
+ response = ec2_client.describe_volumes(VolumeIds=[volume.configuration.volume_id])
617
+ except botocore.exceptions.ClientError as e:
618
+ if e.response["Error"]["Code"] == "InvalidParameterValue":
619
+ raise ComputeError(f"Bad volume id: {volume.configuration.volume_id}")
620
+ else:
621
+ raise e
622
+ response_volumes = response["Volumes"]
623
+ if len(response_volumes) == 0:
624
+ raise ComputeError(f"Volume {volume.configuration.name} not found")
625
+ logger.debug("Found EBS volume %s", volume.configuration.volume_id)
626
+
627
+ response_volume = response_volumes[0]
628
+ return VolumeProvisioningData(
629
+ volume_id=response_volume["VolumeId"],
630
+ size_gb=response_volume["Size"],
631
+ availability_zone=response_volume["AvailabilityZone"],
632
+ backend_data=AWSVolumeBackendData(
633
+ volume_type=response_volume["VolumeType"],
634
+ iops=response_volume["Iops"],
635
+ ).json(),
636
+ )
637
+
638
+ def create_volume(self, volume: Volume) -> VolumeProvisioningData:
639
+ ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
640
+
641
+ volume_name = generate_unique_volume_name(volume)
642
+ base_tags = {
643
+ "Name": volume_name,
644
+ "owner": "dstack",
645
+ "dstack_project": volume.project_name,
646
+ "dstack_name": volume.name,
647
+ "dstack_user": volume.user,
648
+ }
649
+ tags = merge_tags(
650
+ base_tags=base_tags,
651
+ backend_tags=self.config.tags,
652
+ resource_tags=volume.configuration.tags,
653
+ )
654
+ tags = aws_resources.filter_invalid_tags(tags)
655
+
656
+ zones = aws_resources.get_availability_zones(
657
+ ec2_client=ec2_client, region=volume.configuration.region
658
+ )
659
+ if volume.configuration.availability_zone is not None:
660
+ zones = [z for z in zones if z == volume.configuration.availability_zone]
661
+ if len(zones) == 0:
662
+ raise ComputeError(
663
+ f"Failed to find availability zone in region {volume.configuration.region}"
664
+ )
665
+ zone = zones[0]
666
+ volume_type = "gp3"
667
+
668
+ logger.debug("Creating EBS volume %s", volume.configuration.name)
669
+ response = ec2_client.create_volume(
670
+ Size=volume.configuration.size_gb,
671
+ AvailabilityZone=zone,
672
+ VolumeType=volume_type,
673
+ TagSpecifications=[
674
+ {
675
+ "ResourceType": "volume",
676
+ "Tags": aws_resources.make_tags(tags),
677
+ }
678
+ ],
679
+ )
680
+ logger.debug("Created EBS volume %s", volume.configuration.name)
681
+
682
+ size = response["Size"]
683
+ iops = response["Iops"]
684
+ return VolumeProvisioningData(
685
+ backend=BackendType.AWS,
686
+ volume_id=response["VolumeId"],
687
+ size_gb=size,
688
+ availability_zone=zone,
689
+ price=_get_volume_price(size=size, iops=iops),
690
+ backend_data=AWSVolumeBackendData(
691
+ volume_type=response["VolumeType"],
692
+ iops=iops,
693
+ ).json(),
694
+ )
695
+
696
+ def delete_volume(self, volume: Volume):
697
+ ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
698
+
699
+ logger.debug("Deleting EBS volume %s", volume.configuration.name)
700
+ try:
701
+ ec2_client.delete_volume(VolumeId=volume.volume_id)
702
+ except botocore.exceptions.ClientError as e:
703
+ if e.response["Error"]["Code"] == "InvalidVolume.NotFound":
704
+ pass
705
+ else:
706
+ raise e
707
+ logger.debug("Deleted EBS volume %s", volume.configuration.name)
708
+
709
+ def attach_volume(
710
+ self, volume: Volume, provisioning_data: JobProvisioningData
711
+ ) -> VolumeAttachmentData:
712
+ ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
713
+
714
+ instance_id = provisioning_data.instance_id
715
+ device_names = aws_resources.list_available_device_names(
716
+ ec2_client=ec2_client, instance_id=instance_id
717
+ )
718
+
719
+ logger.debug("Attaching EBS volume %s to instance %s", volume.volume_id, instance_id)
720
+ for device_name in device_names:
721
+ try:
722
+ ec2_client.attach_volume(
723
+ VolumeId=volume.volume_id, InstanceId=instance_id, Device=device_name
724
+ )
725
+ break
726
+ except botocore.exceptions.ClientError as e:
727
+ if e.response["Error"]["Code"] == "VolumeInUse":
728
+ raise ComputeError(f"Failed to attach volume in use: {volume.volume_id}")
729
+ if e.response["Error"]["Code"] == "InvalidVolume.ZoneMismatch":
730
+ raise ComputeError("Volume zone is different from instance zone")
731
+ if e.response["Error"]["Code"] == "InvalidVolume.NotFound":
732
+ raise ComputeError("Volume not found")
733
+ if (
734
+ e.response["Error"]["Code"] == "InvalidParameterValue"
735
+ and f"Invalid value '{device_name}' for unixDevice"
736
+ in e.response["Error"]["Message"]
737
+ ):
738
+ # device name is taken but list API hasn't returned it yet
739
+ continue
740
+ raise e
741
+ else:
742
+ raise ComputeError(f"Failed to find available device name for volume {volume.name}")
743
+
744
+ logger.debug("Attached EBS volume %s to instance %s", volume.volume_id, instance_id)
745
+ return VolumeAttachmentData(device_name=device_name)
746
+
747
+ def detach_volume(
748
+ self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
749
+ ):
750
+ ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
751
+
752
+ instance_id = provisioning_data.instance_id
753
+ logger.debug("Detaching EBS volume %s from instance %s", volume.volume_id, instance_id)
754
+ attachment_data = get_or_error(volume.get_attachment_data_for_instance(instance_id))
755
+ try:
756
+ ec2_client.detach_volume(
757
+ VolumeId=volume.volume_id,
758
+ InstanceId=instance_id,
759
+ Device=attachment_data.device_name,
760
+ Force=force,
761
+ )
762
+ except botocore.exceptions.ClientError as e:
763
+ if e.response["Error"]["Code"] == "IncorrectState":
764
+ logger.info(
765
+ "Skipping EBS volume %s detach since it's already detached", volume.volume_id
766
+ )
767
+ return
768
+ raise e
769
+ logger.debug("Detached EBS volume %s from instance %s", volume.volume_id, instance_id)
770
+
771
+ def is_volume_detached(self, volume: Volume, provisioning_data: JobProvisioningData) -> bool:
772
+ ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
773
+
774
+ instance_id = provisioning_data.instance_id
775
+ logger.debug("Getting EBS volume %s status", volume.volume_id)
776
+ response = ec2_client.describe_volumes(VolumeIds=[volume.volume_id])
777
+ volumes_infos = response.get("Volumes")
778
+ if len(volumes_infos) == 0:
779
+ logger.debug(
780
+ "Failed to check EBS volume %s status. Volume not found.", volume.volume_id
781
+ )
782
+ return True
783
+ volume_info = volumes_infos[0]
784
+ for attachment in volume_info["Attachments"]:
785
+ if attachment["InstanceId"] != instance_id:
786
+ continue
787
+ if attachment["State"] != "detached":
788
+ return False
789
+ return True
790
+ return True
791
+
792
+ def _get_regions_to_quotas_key(
793
+ self,
794
+ session: boto3.Session,
795
+ regions: List[str],
796
+ ) -> tuple:
797
+ return hashkey(tuple(regions))
798
+
799
+ @cachedmethod(
800
+ cache=lambda self: self._regions_to_quotas_cache.cache,
801
+ key=_get_regions_to_quotas_key,
802
+ lock=lambda self: self._regions_to_quotas_cache.lock,
803
+ )
804
+ def _get_regions_to_quotas(
805
+ self,
806
+ session: boto3.Session,
807
+ regions: List[str],
808
+ ) -> Dict[str, Dict[str, int]]:
809
+ return _get_regions_to_quotas(session=session, regions=regions)
810
+
811
+ def _get_regions_to_zones_key(
812
+ self,
813
+ session: boto3.Session,
814
+ regions: List[str],
815
+ ) -> tuple:
816
+ return hashkey(tuple(regions))
817
+
818
+ @cachedmethod(
819
+ cache=lambda self: self._regions_to_zones_cache.cache,
820
+ key=_get_regions_to_zones_key,
821
+ lock=lambda self: self._regions_to_zones_cache.lock,
822
+ )
823
+ def _get_regions_to_zones(
824
+ self,
825
+ session: boto3.Session,
826
+ regions: List[str],
827
+ ) -> Dict[str, List[str]]:
828
+ return _get_regions_to_zones(session=session, regions=regions)
829
+
830
+ def _get_vpc_id_subnet_id_or_error_cache_key(
831
+ self,
832
+ ec2_client: botocore.client.BaseClient,
833
+ config: AWSConfig,
834
+ region: str,
835
+ allocate_public_ip: bool,
836
+ availability_zones: Optional[List[str]] = None,
837
+ ) -> tuple:
838
+ return hashkey(
839
+ region, allocate_public_ip, tuple(availability_zones) if availability_zones else None
840
+ )
841
+
842
+ @cachedmethod(
843
+ cache=lambda self: self._vpc_id_subnet_id_cache.cache,
844
+ key=_get_vpc_id_subnet_id_or_error_cache_key,
845
+ lock=lambda self: self._vpc_id_subnet_id_cache.lock,
846
+ )
847
+ def _get_vpc_id_subnet_id_or_error(
848
+ self,
849
+ ec2_client: botocore.client.BaseClient,
850
+ config: AWSConfig,
851
+ region: str,
852
+ allocate_public_ip: bool,
853
+ availability_zones: Optional[List[str]] = None,
854
+ ) -> Tuple[str, List[str]]:
855
+ return get_vpc_id_subnet_id_or_error(
856
+ ec2_client=ec2_client,
857
+ config=config,
858
+ region=region,
859
+ allocate_public_ip=allocate_public_ip,
860
+ availability_zones=availability_zones,
861
+ )
862
+
863
+ @cachedmethod(
864
+ cache=lambda self: self._maximum_efa_interfaces_cache.cache,
865
+ key=_ec2client_cache_methodkey,
866
+ lock=lambda self: self._maximum_efa_interfaces_cache.lock,
867
+ )
868
+ def _get_maximum_efa_interfaces(
869
+ self,
870
+ ec2_client: botocore.client.BaseClient,
871
+ region: str,
872
+ instance_type: str,
873
+ ) -> int:
874
+ return _get_maximum_efa_interfaces(
875
+ ec2_client=ec2_client,
876
+ instance_type=instance_type,
877
+ )
878
+
879
+ def _get_subnets_availability_zones_key(
880
+ self,
881
+ ec2_client: botocore.client.BaseClient,
882
+ region: str,
883
+ subnet_ids: List[str],
884
+ ) -> tuple:
885
+ return hashkey(region, tuple(subnet_ids))
886
+
887
+ @cachedmethod(
888
+ cache=lambda self: self._subnets_availability_zones_cache.cache,
889
+ key=_get_subnets_availability_zones_key,
890
+ lock=lambda self: self._subnets_availability_zones_cache.lock,
891
+ )
892
+ def _get_subnets_availability_zones(
893
+ self,
894
+ ec2_client: botocore.client.BaseClient,
895
+ region: str,
896
+ subnet_ids: List[str],
897
+ ) -> Dict[str, str]:
898
+ return aws_resources.get_subnets_availability_zones(
899
+ ec2_client=ec2_client,
900
+ subnet_ids=subnet_ids,
901
+ )
902
+
903
+ @cachedmethod(
904
+ cache=lambda self: self._security_group_cache.cache,
905
+ key=_ec2client_cache_methodkey,
906
+ lock=lambda self: self._security_group_cache.lock,
907
+ )
908
+ def _create_security_group(
909
+ self,
910
+ ec2_client: botocore.client.BaseClient,
911
+ region: str,
912
+ project_id: str,
913
+ vpc_id: Optional[str],
914
+ ) -> str:
915
+ return aws_resources.create_security_group(
916
+ ec2_client=ec2_client,
917
+ project_id=project_id,
918
+ vpc_id=vpc_id,
919
+ )
920
+
921
+ def _get_image_id_and_username_cache_key(
922
+ self,
923
+ ec2_client: botocore.client.BaseClient,
924
+ region: str,
925
+ gpu_name: Optional[str],
926
+ instance_type: str,
927
+ image_config: Optional[AWSOSImageConfig] = None,
928
+ ) -> tuple:
929
+ return hashkey(
930
+ region, gpu_name, instance_type, image_config.json() if image_config else None
931
+ )
932
+
933
+ @cachedmethod(
934
+ cache=lambda self: self._image_id_and_username_cache.cache,
935
+ key=_get_image_id_and_username_cache_key,
936
+ lock=lambda self: self._image_id_and_username_cache.lock,
937
+ )
938
+ def _get_image_id_and_username(
939
+ self,
940
+ ec2_client: botocore.client.BaseClient,
941
+ region: str,
942
+ gpu_name: Optional[str],
943
+ instance_type: str,
944
+ image_config: Optional[AWSOSImageConfig] = None,
945
+ ) -> tuple[str, str]:
946
+ return aws_resources.get_image_id_and_username(
947
+ ec2_client=ec2_client,
948
+ gpu_name=gpu_name,
949
+ instance_type=instance_type,
950
+ image_config=image_config,
951
+ )
952
+
953
+
954
+ def get_vpc_id_subnet_id_or_error(
955
+ ec2_client: botocore.client.BaseClient,
956
+ config: AWSConfig,
957
+ region: str,
958
+ allocate_public_ip: bool,
959
+ availability_zones: Optional[List[str]] = None,
960
+ ) -> Tuple[str, List[str]]:
961
+ if config.vpc_ids is not None:
962
+ vpc_id = config.vpc_ids.get(region)
963
+ if vpc_id is not None:
964
+ vpc = aws_resources.get_vpc_by_vpc_id(ec2_client=ec2_client, vpc_id=vpc_id)
965
+ if vpc is None:
966
+ raise ComputeError(f"Failed to find VPC {vpc_id} in region {region}")
967
+ subnets_ids = aws_resources.get_subnets_ids_for_vpc(
968
+ ec2_client=ec2_client,
969
+ vpc_id=vpc_id,
970
+ allocate_public_ip=allocate_public_ip,
971
+ availability_zones=availability_zones,
972
+ )
973
+ if len(subnets_ids) > 0:
974
+ return vpc_id, subnets_ids
975
+ if allocate_public_ip:
976
+ raise ComputeError(f"Failed to find public subnets for VPC {vpc_id}")
977
+ raise ComputeError(
978
+ f"Failed to find private subnets for VPC {vpc_id} with outbound internet access. "
979
+ "Ensure you've setup NAT Gateway, Transit Gateway, or other mechanism "
980
+ "to provide outbound internet access from private subnets."
981
+ )
982
+ if not config.use_default_vpcs:
983
+ raise ComputeError(f"No VPC ID configured for region {region}")
984
+
985
+ return _get_vpc_id_subnet_id_by_vpc_name_or_error(
986
+ ec2_client=ec2_client,
987
+ vpc_name=config.vpc_name,
988
+ region=region,
989
+ allocate_public_ip=allocate_public_ip,
990
+ availability_zones=availability_zones,
991
+ )
992
+
993
+
994
+ def _get_vpc_id_subnet_id_by_vpc_name_or_error(
995
+ ec2_client: botocore.client.BaseClient,
996
+ vpc_name: Optional[str],
997
+ region: str,
998
+ allocate_public_ip: bool,
999
+ availability_zones: Optional[List[str]] = None,
1000
+ ) -> Tuple[str, List[str]]:
1001
+ if vpc_name is not None:
1002
+ vpc_id = aws_resources.get_vpc_id_by_name(
1003
+ ec2_client=ec2_client,
1004
+ vpc_name=vpc_name,
1005
+ )
1006
+ if vpc_id is None:
1007
+ raise ComputeError(f"No VPC named {vpc_name} in region {region}")
1008
+ else:
1009
+ vpc_id = aws_resources.get_default_vpc_id(ec2_client=ec2_client)
1010
+ if vpc_id is None:
1011
+ raise ComputeError(f"No default VPC in region {region}")
1012
+ subnets_ids = aws_resources.get_subnets_ids_for_vpc(
1013
+ ec2_client=ec2_client,
1014
+ vpc_id=vpc_id,
1015
+ allocate_public_ip=allocate_public_ip,
1016
+ availability_zones=availability_zones,
1017
+ )
1018
+ if len(subnets_ids) > 0:
1019
+ return vpc_id, subnets_ids
1020
+ if vpc_name is not None:
1021
+ if allocate_public_ip:
1022
+ raise ComputeError(
1023
+ f"Failed to find public subnets for VPC {vpc_name} in region {region}"
1024
+ )
1025
+ raise ComputeError(
1026
+ f"Failed to find private subnets with NAT for VPC {vpc_name} in region {region}"
1027
+ )
1028
+ if allocate_public_ip:
1029
+ raise ComputeError(f"Failed to find public subnets for default VPC in region {region}")
1030
+ raise ComputeError(
1031
+ f"Failed to find private subnets with NAT for default VPC in region {region}"
1032
+ )
1033
+
1034
+
1035
+ def _get_regions_to_quotas(
1036
+ session: boto3.Session, regions: List[str]
1037
+ ) -> Dict[str, Dict[str, int]]:
1038
+ def get_region_quotas(client: botocore.client.BaseClient) -> Dict[str, int]:
1039
+ region_quotas = {}
1040
+ try:
1041
+ for page in client.get_paginator("list_service_quotas").paginate(ServiceCode="ec2"):
1042
+ for q in page["Quotas"]:
1043
+ if "On-Demand" in q["QuotaName"]:
1044
+ region_quotas[q["UsageMetric"]["MetricDimensions"]["Class"]] = q["Value"]
1045
+ except botocore.exceptions.ClientError as e:
1046
+ if len(e.args) > 0 and "TooManyRequestsException" in e.args[0]:
1047
+ logger.warning(
1048
+ "Failed to get quotas due to rate limits. Quotas won't be accounted for."
1049
+ )
1050
+ else:
1051
+ logger.exception(e)
1052
+ return region_quotas
1053
+
1054
+ regions_to_quotas = {}
1055
+ with ThreadPoolExecutor(max_workers=12) as executor:
1056
+ future_to_region = {}
1057
+ for region in regions:
1058
+ future = executor.submit(
1059
+ get_region_quotas, session.client("service-quotas", region_name=region)
1060
+ )
1061
+ future_to_region[future] = region
1062
+ for future in as_completed(future_to_region):
1063
+ regions_to_quotas[future_to_region[future]] = future.result()
1064
+ return regions_to_quotas
1065
+
1066
+
1067
+ def _has_quota(quotas: Dict[str, int], instance_name: str) -> Optional[bool]:
1068
+ quota = quotas.get("Standard/OnDemand")
1069
+ if instance_name.startswith("p"):
1070
+ quota = quotas.get("P/OnDemand")
1071
+ if instance_name.startswith("g"):
1072
+ quota = quotas.get("G/OnDemand")
1073
+ if quota is None:
1074
+ return None
1075
+ return quota > 0
1076
+
1077
+
1078
+ def _get_regions_to_zones(session: boto3.Session, regions: List[str]) -> Dict[str, List[str]]:
1079
+ regions_to_zones = {}
1080
+ with ThreadPoolExecutor(max_workers=12) as executor:
1081
+ future_to_region = {}
1082
+ for region in regions:
1083
+ future = executor.submit(
1084
+ aws_resources.get_availability_zones,
1085
+ session.client("ec2", region_name=region),
1086
+ region,
1087
+ )
1088
+ future_to_region[future] = region
1089
+ for future in as_completed(future_to_region):
1090
+ regions_to_zones[future_to_region[future]] = future.result()
1091
+ return regions_to_zones
1092
+
1093
+
1094
+ def _supported_instances(offer: InstanceOffer) -> bool:
1095
+ for family in [
1096
+ "m7i.",
1097
+ "c7i.",
1098
+ "r7i.",
1099
+ "t3.",
1100
+ "t2.small",
1101
+ "c5.",
1102
+ "m5.",
1103
+ "p5.",
1104
+ "p5e.",
1105
+ "p4d.",
1106
+ "p4de.",
1107
+ "p3.",
1108
+ "g6.",
1109
+ "g6e.",
1110
+ "gr6.",
1111
+ "g5.",
1112
+ "g4dn.",
1113
+ ]:
1114
+ if offer.instance.name.startswith(family):
1115
+ return True
1116
+ return False
1117
+
1118
+
1119
+ def _offer_supports_placement_group(offer: InstanceOffer, placement_group: PlacementGroup) -> bool:
1120
+ if placement_group.configuration.placement_strategy != PlacementStrategy.CLUSTER:
1121
+ return True
1122
+ for family in ["t3.", "t2."]:
1123
+ if offer.instance.name.startswith(family):
1124
+ return False
1125
+ return True
1126
+
1127
+
1128
+ def _get_maximum_efa_interfaces(ec2_client: botocore.client.BaseClient, instance_type: str) -> int:
1129
+ try:
1130
+ response = ec2_client.describe_instance_types(
1131
+ InstanceTypes=[instance_type],
1132
+ Filters=[{"Name": "network-info.efa-supported", "Values": ["true"]}],
1133
+ )
1134
+ except botocore.exceptions.ClientError as e:
1135
+ if e.response.get("Error", {}).get("Code") == "InvalidInstanceType":
1136
+ # "The following supplied instance types do not exist: [<instance_type>]"
1137
+ return 0
1138
+ raise
1139
+ instance_types = response["InstanceTypes"]
1140
+ if not instance_types:
1141
+ return 0
1142
+ return instance_types[0]["NetworkInfo"]["EfaInfo"]["MaximumEfaInterfaces"]
1143
+
1144
+
1145
+ def _get_instance_ip(instance: Any, public_ip: bool) -> str:
1146
+ if public_ip:
1147
+ return instance.public_ip_address
1148
+ return instance.private_ip_address
1149
+
1150
+
1151
+ def _get_volume_price(size: int, iops: int) -> float:
1152
+ # https://aws.amazon.com/ebs/pricing/
1153
+ return size * 0.08 + (iops - 3000) * 0.005