dstack 0.0.9__py3-none-any.whl → 0.20.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (879) hide show
  1. dstack/_internal/cli/commands/__init__.py +80 -0
  2. dstack/_internal/cli/commands/apply.py +100 -0
  3. dstack/_internal/cli/commands/attach.py +161 -0
  4. dstack/_internal/cli/commands/completion.py +22 -0
  5. dstack/_internal/cli/commands/delete.py +44 -0
  6. dstack/_internal/cli/commands/event.py +168 -0
  7. dstack/_internal/cli/commands/fleet.py +161 -0
  8. dstack/_internal/cli/commands/gateway.py +159 -0
  9. dstack/_internal/cli/commands/init.py +64 -0
  10. dstack/_internal/cli/commands/login.py +352 -0
  11. dstack/_internal/cli/commands/logs.py +62 -0
  12. dstack/_internal/cli/commands/metrics.py +153 -0
  13. dstack/_internal/cli/commands/offer.py +146 -0
  14. dstack/_internal/cli/commands/project.py +259 -0
  15. dstack/_internal/cli/commands/ps.py +81 -0
  16. dstack/_internal/cli/commands/run.py +69 -0
  17. dstack/_internal/cli/commands/secrets.py +92 -0
  18. dstack/_internal/cli/commands/server.py +96 -0
  19. dstack/_internal/cli/commands/stop.py +26 -0
  20. dstack/_internal/cli/commands/volume.py +117 -0
  21. dstack/_internal/cli/main.py +101 -0
  22. dstack/_internal/cli/models/gateways.py +16 -0
  23. dstack/_internal/cli/models/offers.py +47 -0
  24. dstack/_internal/cli/models/runs.py +16 -0
  25. dstack/_internal/cli/services/args.py +31 -0
  26. dstack/_internal/cli/services/completion.py +91 -0
  27. dstack/_internal/cli/services/configurators/__init__.py +86 -0
  28. dstack/_internal/cli/services/configurators/base.py +103 -0
  29. dstack/_internal/cli/services/configurators/fleet.py +475 -0
  30. dstack/_internal/cli/services/configurators/gateway.py +231 -0
  31. dstack/_internal/cli/services/configurators/run.py +882 -0
  32. dstack/_internal/cli/services/configurators/volume.py +222 -0
  33. dstack/_internal/cli/services/events.py +68 -0
  34. dstack/_internal/cli/services/profile.py +182 -0
  35. dstack/_internal/cli/services/repos.py +71 -0
  36. dstack/_internal/cli/services/resources.py +54 -0
  37. dstack/_internal/cli/utils/common.py +159 -0
  38. dstack/_internal/cli/utils/fleet.py +106 -0
  39. dstack/_internal/cli/utils/gateway.py +56 -0
  40. dstack/_internal/cli/utils/gpu.py +178 -0
  41. dstack/_internal/cli/utils/rich.py +156 -0
  42. dstack/_internal/cli/utils/run.py +517 -0
  43. dstack/_internal/cli/utils/secrets.py +25 -0
  44. dstack/_internal/cli/utils/updates.py +98 -0
  45. dstack/_internal/cli/utils/volume.py +58 -0
  46. dstack/_internal/compat.py +3 -0
  47. dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
  48. dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
  49. dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
  50. dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
  51. dstack/_internal/core/backends/aws/auth.py +30 -0
  52. dstack/_internal/core/backends/aws/backend.py +31 -0
  53. dstack/_internal/core/backends/aws/compute.py +1153 -0
  54. dstack/_internal/core/backends/aws/configurator.py +191 -0
  55. dstack/_internal/core/backends/aws/models.py +135 -0
  56. dstack/_internal/core/backends/aws/resources.py +700 -0
  57. dstack/_internal/core/backends/azure/auth.py +39 -0
  58. dstack/_internal/core/backends/azure/backend.py +21 -0
  59. dstack/_internal/core/backends/azure/compute.py +676 -0
  60. dstack/_internal/core/backends/azure/configurator.py +472 -0
  61. dstack/_internal/core/backends/azure/models.py +98 -0
  62. dstack/_internal/core/backends/azure/resources.py +116 -0
  63. dstack/_internal/core/backends/azure/utils.py +42 -0
  64. dstack/_internal/core/backends/base/backend.py +18 -0
  65. dstack/_internal/core/backends/base/compute.py +1101 -0
  66. dstack/_internal/core/backends/base/configurator.py +117 -0
  67. dstack/_internal/core/backends/base/models.py +24 -0
  68. dstack/_internal/core/backends/base/offers.py +232 -0
  69. dstack/_internal/core/backends/cloudrift/api_client.py +220 -0
  70. dstack/_internal/core/backends/cloudrift/backend.py +16 -0
  71. dstack/_internal/core/backends/cloudrift/compute.py +138 -0
  72. dstack/_internal/core/backends/cloudrift/configurator.py +72 -0
  73. dstack/_internal/core/backends/cloudrift/models.py +40 -0
  74. dstack/_internal/core/backends/configurators.py +181 -0
  75. dstack/_internal/core/backends/cudo/__init__.py +0 -0
  76. dstack/_internal/core/backends/cudo/api_client.py +111 -0
  77. dstack/_internal/core/backends/cudo/backend.py +16 -0
  78. dstack/_internal/core/backends/cudo/compute.py +174 -0
  79. dstack/_internal/core/backends/cudo/configurator.py +63 -0
  80. dstack/_internal/core/backends/cudo/models.py +37 -0
  81. dstack/_internal/core/backends/datacrunch/__init__.py +1 -0
  82. dstack/_internal/core/backends/datacrunch/backend.py +18 -0
  83. dstack/_internal/core/backends/datacrunch/compute.py +8 -0
  84. dstack/_internal/core/backends/datacrunch/configurator.py +17 -0
  85. dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
  86. dstack/_internal/core/backends/digitalocean/backend.py +16 -0
  87. dstack/_internal/core/backends/digitalocean/compute.py +5 -0
  88. dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
  89. dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
  90. dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
  91. dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
  92. dstack/_internal/core/backends/digitalocean_base/compute.py +174 -0
  93. dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
  94. dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
  95. dstack/_internal/core/backends/dstack/__init__.py +0 -0
  96. dstack/_internal/core/backends/dstack/models.py +26 -0
  97. dstack/_internal/core/backends/features.py +74 -0
  98. dstack/_internal/core/backends/gcp/__init__.py +0 -0
  99. dstack/_internal/core/backends/gcp/auth.py +57 -0
  100. dstack/_internal/core/backends/gcp/backend.py +17 -0
  101. dstack/_internal/core/backends/gcp/compute.py +1257 -0
  102. dstack/_internal/core/backends/gcp/configurator.py +206 -0
  103. dstack/_internal/core/backends/gcp/features/__init__.py +0 -0
  104. dstack/_internal/core/backends/gcp/features/tcpx.py +65 -0
  105. dstack/_internal/core/backends/gcp/models.py +160 -0
  106. dstack/_internal/core/backends/gcp/resources.py +585 -0
  107. dstack/_internal/core/backends/hotaisle/__init__.py +1 -0
  108. dstack/_internal/core/backends/hotaisle/api_client.py +101 -0
  109. dstack/_internal/core/backends/hotaisle/backend.py +16 -0
  110. dstack/_internal/core/backends/hotaisle/compute.py +188 -0
  111. dstack/_internal/core/backends/hotaisle/configurator.py +66 -0
  112. dstack/_internal/core/backends/hotaisle/models.py +45 -0
  113. dstack/_internal/core/backends/kubernetes/__init__.py +0 -0
  114. dstack/_internal/core/backends/kubernetes/backend.py +16 -0
  115. dstack/_internal/core/backends/kubernetes/compute.py +1077 -0
  116. dstack/_internal/core/backends/kubernetes/configurator.py +61 -0
  117. dstack/_internal/core/backends/kubernetes/models.py +71 -0
  118. dstack/_internal/core/backends/kubernetes/utils.py +81 -0
  119. dstack/_internal/core/backends/lambdalabs/__init__.py +0 -0
  120. dstack/_internal/core/backends/lambdalabs/api_client.py +87 -0
  121. dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
  122. dstack/_internal/core/backends/lambdalabs/compute.py +233 -0
  123. dstack/_internal/core/backends/lambdalabs/configurator.py +65 -0
  124. dstack/_internal/core/backends/lambdalabs/models.py +37 -0
  125. dstack/_internal/core/backends/local/__init__.py +0 -0
  126. dstack/_internal/core/backends/local/backend.py +14 -0
  127. dstack/_internal/core/backends/local/compute.py +130 -0
  128. dstack/_internal/core/backends/models.py +158 -0
  129. dstack/_internal/core/backends/nebius/__init__.py +0 -0
  130. dstack/_internal/core/backends/nebius/backend.py +16 -0
  131. dstack/_internal/core/backends/nebius/compute.py +401 -0
  132. dstack/_internal/core/backends/nebius/configurator.py +98 -0
  133. dstack/_internal/core/backends/nebius/models.py +185 -0
  134. dstack/_internal/core/backends/nebius/resources.py +433 -0
  135. dstack/_internal/core/backends/oci/__init__.py +0 -0
  136. dstack/_internal/core/backends/oci/auth.py +21 -0
  137. dstack/_internal/core/backends/oci/backend.py +16 -0
  138. dstack/_internal/core/backends/oci/compute.py +209 -0
  139. dstack/_internal/core/backends/oci/configurator.py +156 -0
  140. dstack/_internal/core/backends/oci/exceptions.py +15 -0
  141. dstack/_internal/core/backends/oci/models.py +87 -0
  142. dstack/_internal/core/backends/oci/region.py +86 -0
  143. dstack/_internal/core/backends/oci/resources.py +836 -0
  144. dstack/_internal/core/backends/runpod/__init__.py +0 -0
  145. dstack/_internal/core/backends/runpod/api_client.py +627 -0
  146. dstack/_internal/core/backends/runpod/backend.py +16 -0
  147. dstack/_internal/core/backends/runpod/compute.py +444 -0
  148. dstack/_internal/core/backends/runpod/configurator.py +63 -0
  149. dstack/_internal/core/backends/runpod/models.py +54 -0
  150. dstack/_internal/core/backends/template/__init__.py +0 -0
  151. dstack/_internal/core/backends/template/backend.py.jinja +16 -0
  152. dstack/_internal/core/backends/template/compute.py.jinja +95 -0
  153. dstack/_internal/core/backends/template/configurator.py.jinja +69 -0
  154. dstack/_internal/core/backends/template/models.py.jinja +62 -0
  155. dstack/_internal/core/backends/tensordock/models.py +40 -0
  156. dstack/_internal/core/backends/vastai/__init__.py +0 -0
  157. dstack/_internal/core/backends/vastai/api_client.py +143 -0
  158. dstack/_internal/core/backends/vastai/backend.py +16 -0
  159. dstack/_internal/core/backends/vastai/compute.py +141 -0
  160. dstack/_internal/core/backends/vastai/configurator.py +69 -0
  161. dstack/_internal/core/backends/vastai/models.py +37 -0
  162. dstack/_internal/core/backends/verda/__init__.py +0 -0
  163. dstack/_internal/core/backends/verda/backend.py +16 -0
  164. dstack/_internal/core/backends/verda/compute.py +266 -0
  165. dstack/_internal/core/backends/verda/configurator.py +73 -0
  166. dstack/_internal/core/backends/verda/models.py +38 -0
  167. dstack/_internal/core/backends/vultr/__init__.py +0 -0
  168. dstack/_internal/core/backends/vultr/api_client.py +116 -0
  169. dstack/_internal/core/backends/vultr/backend.py +16 -0
  170. dstack/_internal/core/backends/vultr/compute.py +167 -0
  171. dstack/_internal/core/backends/vultr/configurator.py +71 -0
  172. dstack/_internal/core/backends/vultr/models.py +34 -0
  173. dstack/_internal/core/compatibility/__init__.py +0 -0
  174. dstack/_internal/core/compatibility/events.py +13 -0
  175. dstack/_internal/core/compatibility/fleets.py +58 -0
  176. dstack/_internal/core/compatibility/gateways.py +39 -0
  177. dstack/_internal/core/compatibility/gpus.py +13 -0
  178. dstack/_internal/core/compatibility/logs.py +14 -0
  179. dstack/_internal/core/compatibility/runs.py +86 -0
  180. dstack/_internal/core/compatibility/volumes.py +37 -0
  181. dstack/_internal/core/consts.py +8 -0
  182. dstack/_internal/core/errors.py +160 -0
  183. dstack/_internal/core/models/__init__.py +0 -0
  184. dstack/_internal/core/models/auth.py +28 -0
  185. dstack/_internal/core/models/backends/__init__.py +0 -0
  186. dstack/_internal/core/models/backends/base.py +48 -0
  187. dstack/_internal/core/models/common.py +143 -0
  188. dstack/_internal/core/models/compute_groups.py +39 -0
  189. dstack/_internal/core/models/config.py +28 -0
  190. dstack/_internal/core/models/configurations.py +1123 -0
  191. dstack/_internal/core/models/envs.py +149 -0
  192. dstack/_internal/core/models/events.py +98 -0
  193. dstack/_internal/core/models/files.py +67 -0
  194. dstack/_internal/core/models/fleets.py +437 -0
  195. dstack/_internal/core/models/gateways.py +146 -0
  196. dstack/_internal/core/models/gpus.py +45 -0
  197. dstack/_internal/core/models/health.py +28 -0
  198. dstack/_internal/core/models/instances.py +346 -0
  199. dstack/_internal/core/models/logs.py +27 -0
  200. dstack/_internal/core/models/metrics.py +14 -0
  201. dstack/_internal/core/models/placement.py +27 -0
  202. dstack/_internal/core/models/profiles.py +431 -0
  203. dstack/_internal/core/models/projects.py +46 -0
  204. dstack/_internal/core/models/repos/__init__.py +34 -0
  205. dstack/_internal/core/models/repos/base.py +36 -0
  206. dstack/_internal/core/models/repos/local.py +96 -0
  207. dstack/_internal/core/models/repos/remote.py +341 -0
  208. dstack/_internal/core/models/repos/virtual.py +85 -0
  209. dstack/_internal/core/models/resources.py +424 -0
  210. dstack/_internal/core/models/routers.py +24 -0
  211. dstack/_internal/core/models/runs.py +618 -0
  212. dstack/_internal/core/models/secrets.py +16 -0
  213. dstack/_internal/core/models/server.py +7 -0
  214. dstack/_internal/core/models/services.py +76 -0
  215. dstack/_internal/core/models/unix.py +53 -0
  216. dstack/_internal/core/models/users.py +60 -0
  217. dstack/_internal/core/models/volumes.py +221 -0
  218. dstack/_internal/core/services/__init__.py +16 -0
  219. dstack/_internal/core/services/api_client.py +15 -0
  220. dstack/_internal/core/services/configs/__init__.py +116 -0
  221. dstack/_internal/core/services/diff.py +71 -0
  222. dstack/_internal/core/services/logs.py +58 -0
  223. dstack/_internal/core/services/profiles.py +46 -0
  224. dstack/_internal/core/services/repos.py +236 -0
  225. dstack/_internal/core/services/ssh/__init__.py +27 -0
  226. dstack/_internal/core/services/ssh/attach.py +241 -0
  227. dstack/_internal/core/services/ssh/client.py +113 -0
  228. dstack/_internal/core/services/ssh/key_manager.py +53 -0
  229. dstack/_internal/core/services/ssh/ports.py +89 -0
  230. dstack/_internal/core/services/ssh/tunnel.py +337 -0
  231. dstack/_internal/proxy/__init__.py +8 -0
  232. dstack/_internal/proxy/gateway/__init__.py +0 -0
  233. dstack/_internal/proxy/gateway/app.py +89 -0
  234. dstack/_internal/proxy/gateway/auth.py +26 -0
  235. dstack/_internal/proxy/gateway/const.py +7 -0
  236. dstack/_internal/proxy/gateway/deps.py +73 -0
  237. dstack/_internal/proxy/gateway/main.py +17 -0
  238. dstack/_internal/proxy/gateway/models.py +23 -0
  239. dstack/_internal/proxy/gateway/repo/__init__.py +0 -0
  240. dstack/_internal/proxy/gateway/repo/repo.py +121 -0
  241. dstack/_internal/proxy/gateway/repo/state_v1.py +164 -0
  242. dstack/_internal/proxy/gateway/resources/nginx/00-log-format.conf +11 -0
  243. dstack/_internal/proxy/gateway/resources/nginx/entrypoint.jinja2 +27 -0
  244. dstack/_internal/proxy/gateway/resources/nginx/router_workers.jinja2 +23 -0
  245. dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +105 -0
  246. dstack/_internal/proxy/gateway/routers/__init__.py +0 -0
  247. dstack/_internal/proxy/gateway/routers/auth.py +10 -0
  248. dstack/_internal/proxy/gateway/routers/config.py +28 -0
  249. dstack/_internal/proxy/gateway/routers/registry.py +124 -0
  250. dstack/_internal/proxy/gateway/routers/stats.py +18 -0
  251. dstack/_internal/proxy/gateway/schemas/__init__.py +0 -0
  252. dstack/_internal/proxy/gateway/schemas/common.py +5 -0
  253. dstack/_internal/proxy/gateway/schemas/config.py +9 -0
  254. dstack/_internal/proxy/gateway/schemas/registry.py +63 -0
  255. dstack/_internal/proxy/gateway/schemas/stats.py +15 -0
  256. dstack/_internal/proxy/gateway/services/__init__.py +0 -0
  257. dstack/_internal/proxy/gateway/services/model_routers/__init__.py +18 -0
  258. dstack/_internal/proxy/gateway/services/model_routers/base.py +91 -0
  259. dstack/_internal/proxy/gateway/services/model_routers/sglang.py +269 -0
  260. dstack/_internal/proxy/gateway/services/nginx.py +455 -0
  261. dstack/_internal/proxy/gateway/services/registry.py +426 -0
  262. dstack/_internal/proxy/gateway/services/server_client.py +95 -0
  263. dstack/_internal/proxy/gateway/services/stats.py +170 -0
  264. dstack/_internal/proxy/gateway/testing/__init__.py +0 -0
  265. dstack/_internal/proxy/gateway/testing/common.py +13 -0
  266. dstack/_internal/proxy/lib/__init__.py +0 -0
  267. dstack/_internal/proxy/lib/auth.py +7 -0
  268. dstack/_internal/proxy/lib/deps.py +106 -0
  269. dstack/_internal/proxy/lib/errors.py +14 -0
  270. dstack/_internal/proxy/lib/models.py +112 -0
  271. dstack/_internal/proxy/lib/repo.py +27 -0
  272. dstack/_internal/proxy/lib/routers/__init__.py +0 -0
  273. dstack/_internal/proxy/lib/routers/model_proxy.py +102 -0
  274. dstack/_internal/proxy/lib/schemas/__init__.py +0 -0
  275. dstack/_internal/proxy/lib/schemas/model_proxy.py +77 -0
  276. dstack/_internal/proxy/lib/services/__init__.py +0 -0
  277. dstack/_internal/proxy/lib/services/model_proxy/__init__.py +0 -0
  278. dstack/_internal/proxy/lib/services/model_proxy/clients/__init__.py +0 -0
  279. dstack/_internal/proxy/lib/services/model_proxy/clients/base.py +18 -0
  280. dstack/_internal/proxy/lib/services/model_proxy/clients/openai.py +67 -0
  281. dstack/_internal/proxy/lib/services/model_proxy/clients/tgi.py +208 -0
  282. dstack/_internal/proxy/lib/services/model_proxy/model_proxy.py +23 -0
  283. dstack/_internal/proxy/lib/services/service_connection.py +160 -0
  284. dstack/_internal/proxy/lib/testing/__init__.py +0 -0
  285. dstack/_internal/proxy/lib/testing/auth.py +11 -0
  286. dstack/_internal/proxy/lib/testing/common.py +51 -0
  287. dstack/_internal/server/__init__.py +0 -0
  288. dstack/_internal/server/alembic.ini +100 -0
  289. dstack/_internal/server/app.py +432 -0
  290. dstack/_internal/server/background/__init__.py +142 -0
  291. dstack/_internal/server/background/tasks/__init__.py +0 -0
  292. dstack/_internal/server/background/tasks/common.py +24 -0
  293. dstack/_internal/server/background/tasks/process_compute_groups.py +167 -0
  294. dstack/_internal/server/background/tasks/process_events.py +17 -0
  295. dstack/_internal/server/background/tasks/process_fleets.py +289 -0
  296. dstack/_internal/server/background/tasks/process_gateways.py +188 -0
  297. dstack/_internal/server/background/tasks/process_idle_volumes.py +145 -0
  298. dstack/_internal/server/background/tasks/process_instances.py +1186 -0
  299. dstack/_internal/server/background/tasks/process_metrics.py +172 -0
  300. dstack/_internal/server/background/tasks/process_placement_groups.py +104 -0
  301. dstack/_internal/server/background/tasks/process_probes.py +164 -0
  302. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +150 -0
  303. dstack/_internal/server/background/tasks/process_running_jobs.py +1238 -0
  304. dstack/_internal/server/background/tasks/process_runs.py +842 -0
  305. dstack/_internal/server/background/tasks/process_submitted_jobs.py +1106 -0
  306. dstack/_internal/server/background/tasks/process_terminating_jobs.py +108 -0
  307. dstack/_internal/server/background/tasks/process_volumes.py +129 -0
  308. dstack/_internal/server/compatibility/__init__.py +0 -0
  309. dstack/_internal/server/compatibility/common.py +20 -0
  310. dstack/_internal/server/compatibility/gpus.py +22 -0
  311. dstack/_internal/server/db.py +127 -0
  312. dstack/_internal/server/deps.py +19 -0
  313. dstack/_internal/server/main.py +4 -0
  314. dstack/_internal/server/migrations/__init__.py +0 -0
  315. dstack/_internal/server/migrations/env.py +112 -0
  316. dstack/_internal/server/migrations/script.py.mako +28 -0
  317. dstack/_internal/server/migrations/versions/006512f572b4_add_projects_original_name.py +38 -0
  318. dstack/_internal/server/migrations/versions/065588ec72b8_add_vultr_to_backendtype_enum.py +81 -0
  319. dstack/_internal/server/migrations/versions/06e977bc61c7_add_usermodel_deleted_and_original_name.py +45 -0
  320. dstack/_internal/server/migrations/versions/0e33559e16ed_update_instancestatus.py +64 -0
  321. dstack/_internal/server/migrations/versions/112753bc17dd_remove_nullable_fields.py +50 -0
  322. dstack/_internal/server/migrations/versions/1338b788b612_reverse_job_instance_relationship.py +71 -0
  323. dstack/_internal/server/migrations/versions/14f2cb002fc2_add_jobmodel_removed_flag.py +44 -0
  324. dstack/_internal/server/migrations/versions/1a48dfe44a40_rework_termination_handling.py +42 -0
  325. dstack/_internal/server/migrations/versions/1aa9638ad963_added_email_index.py +31 -0
  326. dstack/_internal/server/migrations/versions/1e3fb39ef74b_add_remote_connection_details.py +26 -0
  327. dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py +32 -0
  328. dstack/_internal/server/migrations/versions/20166748b60c_add_jobmodel_disconnected_at.py +100 -0
  329. dstack/_internal/server/migrations/versions/22d74df9897e_add_events_and_event_targets.py +99 -0
  330. dstack/_internal/server/migrations/versions/23e01c56279a_make_blob_nullable.py +32 -0
  331. dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
  332. dstack/_internal/server/migrations/versions/252d3743b641_.py +40 -0
  333. dstack/_internal/server/migrations/versions/25479f540245_add_probes.py +43 -0
  334. dstack/_internal/server/migrations/versions/27d3e55759fa_add_pools.py +152 -0
  335. dstack/_internal/server/migrations/versions/29826f417010_remove_instancemodel_retry_policy.py +34 -0
  336. dstack/_internal/server/migrations/versions/29c08c6a8cb3_.py +36 -0
  337. dstack/_internal/server/migrations/versions/35e90e1b0d3e_add_rolling_deployment_fields.py +42 -0
  338. dstack/_internal/server/migrations/versions/35f732ee4cf5_add_projectmodel_is_public.py +39 -0
  339. dstack/_internal/server/migrations/versions/3cf77fb8bcf1_store_repo_clone_url.py +85 -0
  340. dstack/_internal/server/migrations/versions/3d7f6c2ec000_add_jobmodel_registered.py +28 -0
  341. dstack/_internal/server/migrations/versions/3dbdce90d0e0_fix_code_uq_constraint.py +33 -0
  342. dstack/_internal/server/migrations/versions/48ad3ecbaea2_do_not_delete_projects_and_runs.py +46 -0
  343. dstack/_internal/server/migrations/versions/4ae1a5b0e7f1_add_run_list_index.py +34 -0
  344. dstack/_internal/server/migrations/versions/4b4319398164_introduce_runs_processing.py +144 -0
  345. dstack/_internal/server/migrations/versions/50dd7ea98639_index_status_columns.py +55 -0
  346. dstack/_internal/server/migrations/versions/51d45659d574_add_instancemodel_blocks_fields.py +43 -0
  347. dstack/_internal/server/migrations/versions/54a77e19c64c_add_manager_project_role.py +67 -0
  348. dstack/_internal/server/migrations/versions/555138b1f77f_change_instancemodel_for_asynchronous_.py +61 -0
  349. dstack/_internal/server/migrations/versions/58aa5162dcc3_add_gatewaymodel_configuration.py +32 -0
  350. dstack/_internal/server/migrations/versions/5ad8debc8fe6_fixes_for_psql.py +329 -0
  351. dstack/_internal/server/migrations/versions/5ec538b70e71_replace_instansestatus.py +31 -0
  352. dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
  353. dstack/_internal/server/migrations/versions/5fd659afca82_add_ix_instances_fleet_id.py +31 -0
  354. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  355. dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py +83 -0
  356. dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
  357. dstack/_internal/server/migrations/versions/686fb8341ea5_add_user_emails.py +32 -0
  358. dstack/_internal/server/migrations/versions/6c1a9d6530ee_add_jobmodel_exit_status.py +26 -0
  359. dstack/_internal/server/migrations/versions/706e0acc3a7d_add_runmodel_desired_replica_counts.py +26 -0
  360. dstack/_internal/server/migrations/versions/710e5b3fac8f_add_encryption.py +54 -0
  361. dstack/_internal/server/migrations/versions/728b1488b1b4_add_instance_health.py +50 -0
  362. dstack/_internal/server/migrations/versions/74a1f55209bd_store_enums_as_strings.py +484 -0
  363. dstack/_internal/server/migrations/versions/7b24b1c8eba7_add_instancemodel_last_processed_at.py +68 -0
  364. dstack/_internal/server/migrations/versions/7ba3b59d7ca6_add_runmodel_resubmission_attempt.py +35 -0
  365. dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
  366. dstack/_internal/server/migrations/versions/7d1ec2b920ac_add_computegroupmodel.py +91 -0
  367. dstack/_internal/server/migrations/versions/803c7e9ed85d_add_jobmodel_job_runtime_data.py +32 -0
  368. dstack/_internal/server/migrations/versions/82b32a135ea2_.py +58 -0
  369. dstack/_internal/server/migrations/versions/866ec1d67184_replace_retrypolicy_limit_with_.py +93 -0
  370. dstack/_internal/server/migrations/versions/903c91e24634_add_instances_termination_reason_message.py +34 -0
  371. dstack/_internal/server/migrations/versions/91a12fff6c76_add_repocredsmodel.py +43 -0
  372. dstack/_internal/server/migrations/versions/91ac5e543037_extend_repos_creds_column.py +36 -0
  373. dstack/_internal/server/migrations/versions/98cd9c8b5927_add_volumemodel.py +73 -0
  374. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  375. dstack/_internal/server/migrations/versions/99b4c8c954ea_add_termination_reason_message.py +71 -0
  376. dstack/_internal/server/migrations/versions/9eea6af28e10_added_fail_reason_for_instancemodel.py +36 -0
  377. dstack/_internal/server/migrations/versions/__init__.py +0 -0
  378. dstack/_internal/server/migrations/versions/a060e2440936_.py +206 -0
  379. dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
  380. dstack/_internal/server/migrations/versions/a7b46c073fa1_add_placementgroupmodel.py +58 -0
  381. dstack/_internal/server/migrations/versions/afbc600ff2b2_add_created_at_to_usermodel_and_.py +102 -0
  382. dstack/_internal/server/migrations/versions/b4d6ad60db08_add_instancemodel_unreachable.py +37 -0
  383. dstack/_internal/server/migrations/versions/b88d55c2a07d_replace_instancestatus_ready.py +21 -0
  384. dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
  385. dstack/_internal/server/migrations/versions/bca2fdf130bf_add_runmodel_priority.py +34 -0
  386. dstack/_internal/server/migrations/versions/bfba43f6def2_.py +32 -0
  387. dstack/_internal/server/migrations/versions/c00090eaef21_support_fleets.py +108 -0
  388. dstack/_internal/server/migrations/versions/c154eece89da_add_fields_for_async_gateway_creation.py +74 -0
  389. dstack/_internal/server/migrations/versions/c20626d03cfb_add_jobmetricspoint.py +43 -0
  390. dstack/_internal/server/migrations/versions/c48df7985d57_add_instance_termination_retries.py +38 -0
  391. dstack/_internal/server/migrations/versions/c83d45f9a971_replace_string_with_text.py +150 -0
  392. dstack/_internal/server/migrations/versions/d0bb68e48b9f_add_project_owners_and_quotas.py +106 -0
  393. dstack/_internal/server/migrations/versions/d3e8af4786fa_gateway_compute_flag_deleted.py +34 -0
  394. dstack/_internal/server/migrations/versions/d4d9dc26cf58_add_ix_jobs_run_id.py +31 -0
  395. dstack/_internal/server/migrations/versions/d5863798bf41_add_volumemodel_last_job_processed_at.py +40 -0
  396. dstack/_internal/server/migrations/versions/d6b11105f659_add_usermodel_active.py +36 -0
  397. dstack/_internal/server/migrations/versions/da574e93fee0_add_jobmodel_volumes_detached_at.py +40 -0
  398. dstack/_internal/server/migrations/versions/dfffd6a1165c_add_fields_for_gateways_behind_alb.py +36 -0
  399. dstack/_internal/server/migrations/versions/e2d08cd1b8d9_add_jobmodel_fleet.py +41 -0
  400. dstack/_internal/server/migrations/versions/e3b7db07727f_add_gatewaycomputemodel_app_updated_at.py +61 -0
  401. dstack/_internal/server/migrations/versions/e6391ca6a264_separate_gateways_from_compute.py +72 -0
  402. dstack/_internal/server/migrations/versions/ea60480f82bb_add_membermodel_member_num.py +32 -0
  403. dstack/_internal/server/migrations/versions/ec02a26a256c_add_runmodel_next_triggered_at.py +38 -0
  404. dstack/_internal/server/migrations/versions/ed0ca30e13bb_migrate_instancestatus_provisioning.py +29 -0
  405. dstack/_internal/server/migrations/versions/fe72c4de8376_add_gateways.py +81 -0
  406. dstack/_internal/server/migrations/versions/ff1d94f65b08_user_ssh_key.py +34 -0
  407. dstack/_internal/server/migrations/versions/ffa99edd1988_add_jobterminationreason_max_duration_.py +81 -0
  408. dstack/_internal/server/models.py +930 -0
  409. dstack/_internal/server/routers/__init__.py +0 -0
  410. dstack/_internal/server/routers/auth.py +34 -0
  411. dstack/_internal/server/routers/backends.py +142 -0
  412. dstack/_internal/server/routers/events.py +60 -0
  413. dstack/_internal/server/routers/files.py +68 -0
  414. dstack/_internal/server/routers/fleets.py +202 -0
  415. dstack/_internal/server/routers/gateways.py +109 -0
  416. dstack/_internal/server/routers/gpus.py +32 -0
  417. dstack/_internal/server/routers/instances.py +77 -0
  418. dstack/_internal/server/routers/logs.py +34 -0
  419. dstack/_internal/server/routers/metrics.py +82 -0
  420. dstack/_internal/server/routers/projects.py +205 -0
  421. dstack/_internal/server/routers/prometheus.py +35 -0
  422. dstack/_internal/server/routers/repos.py +118 -0
  423. dstack/_internal/server/routers/runs.py +216 -0
  424. dstack/_internal/server/routers/secrets.py +86 -0
  425. dstack/_internal/server/routers/server.py +19 -0
  426. dstack/_internal/server/routers/users.py +158 -0
  427. dstack/_internal/server/routers/volumes.py +122 -0
  428. dstack/_internal/server/schemas/__init__.py +0 -0
  429. dstack/_internal/server/schemas/auth.py +83 -0
  430. dstack/_internal/server/schemas/backends.py +16 -0
  431. dstack/_internal/server/schemas/common.py +9 -0
  432. dstack/_internal/server/schemas/events.py +211 -0
  433. dstack/_internal/server/schemas/files.py +5 -0
  434. dstack/_internal/server/schemas/fleets.py +49 -0
  435. dstack/_internal/server/schemas/gateways.py +31 -0
  436. dstack/_internal/server/schemas/gpus.py +26 -0
  437. dstack/_internal/server/schemas/health/__init__.py +0 -0
  438. dstack/_internal/server/schemas/health/dcgm.py +56 -0
  439. dstack/_internal/server/schemas/instances.py +47 -0
  440. dstack/_internal/server/schemas/logs.py +17 -0
  441. dstack/_internal/server/schemas/projects.py +81 -0
  442. dstack/_internal/server/schemas/repos.py +24 -0
  443. dstack/_internal/server/schemas/runner.py +269 -0
  444. dstack/_internal/server/schemas/runs.py +66 -0
  445. dstack/_internal/server/schemas/secrets.py +16 -0
  446. dstack/_internal/server/schemas/users.py +72 -0
  447. dstack/_internal/server/schemas/volumes.py +29 -0
  448. dstack/_internal/server/security/__init__.py +0 -0
  449. dstack/_internal/server/security/permissions.py +251 -0
  450. dstack/_internal/server/services/__init__.py +0 -0
  451. dstack/_internal/server/services/auth.py +77 -0
  452. dstack/_internal/server/services/backends/__init__.py +404 -0
  453. dstack/_internal/server/services/backends/handlers.py +105 -0
  454. dstack/_internal/server/services/compute_groups.py +22 -0
  455. dstack/_internal/server/services/config.py +279 -0
  456. dstack/_internal/server/services/docker.py +162 -0
  457. dstack/_internal/server/services/encryption/__init__.py +102 -0
  458. dstack/_internal/server/services/encryption/keys/__init__.py +0 -0
  459. dstack/_internal/server/services/encryption/keys/aes.py +68 -0
  460. dstack/_internal/server/services/encryption/keys/base.py +19 -0
  461. dstack/_internal/server/services/encryption/keys/identity.py +28 -0
  462. dstack/_internal/server/services/events.py +477 -0
  463. dstack/_internal/server/services/files.py +91 -0
  464. dstack/_internal/server/services/fleets.py +1224 -0
  465. dstack/_internal/server/services/gateways/__init__.py +686 -0
  466. dstack/_internal/server/services/gateways/client.py +209 -0
  467. dstack/_internal/server/services/gateways/connection.py +139 -0
  468. dstack/_internal/server/services/gateways/pool.py +58 -0
  469. dstack/_internal/server/services/gpus.py +387 -0
  470. dstack/_internal/server/services/instances.py +731 -0
  471. dstack/_internal/server/services/jobs/__init__.py +840 -0
  472. dstack/_internal/server/services/jobs/configurators/__init__.py +0 -0
  473. dstack/_internal/server/services/jobs/configurators/base.py +469 -0
  474. dstack/_internal/server/services/jobs/configurators/dev.py +69 -0
  475. dstack/_internal/server/services/jobs/configurators/extensions/__init__.py +0 -0
  476. dstack/_internal/server/services/jobs/configurators/extensions/base.py +15 -0
  477. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
  478. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +42 -0
  479. dstack/_internal/server/services/jobs/configurators/extensions/windsurf.py +43 -0
  480. dstack/_internal/server/services/jobs/configurators/service.py +28 -0
  481. dstack/_internal/server/services/jobs/configurators/task.py +39 -0
  482. dstack/_internal/server/services/locking.py +187 -0
  483. dstack/_internal/server/services/logging.py +29 -0
  484. dstack/_internal/server/services/logs/__init__.py +122 -0
  485. dstack/_internal/server/services/logs/aws.py +373 -0
  486. dstack/_internal/server/services/logs/base.py +47 -0
  487. dstack/_internal/server/services/logs/filelog.py +261 -0
  488. dstack/_internal/server/services/logs/fluentbit.py +329 -0
  489. dstack/_internal/server/services/logs/gcp.py +181 -0
  490. dstack/_internal/server/services/metrics.py +172 -0
  491. dstack/_internal/server/services/offers.py +249 -0
  492. dstack/_internal/server/services/permissions.py +37 -0
  493. dstack/_internal/server/services/placement.py +234 -0
  494. dstack/_internal/server/services/plugins.py +109 -0
  495. dstack/_internal/server/services/probes.py +10 -0
  496. dstack/_internal/server/services/projects.py +835 -0
  497. dstack/_internal/server/services/prometheus/__init__.py +0 -0
  498. dstack/_internal/server/services/prometheus/client_metrics.py +55 -0
  499. dstack/_internal/server/services/prometheus/custom_metrics.py +327 -0
  500. dstack/_internal/server/services/proxy/__init__.py +3 -0
  501. dstack/_internal/server/services/proxy/auth.py +12 -0
  502. dstack/_internal/server/services/proxy/deps.py +18 -0
  503. dstack/_internal/server/services/proxy/repo.py +189 -0
  504. dstack/_internal/server/services/proxy/routers/__init__.py +0 -0
  505. dstack/_internal/server/services/proxy/routers/service_proxy.py +49 -0
  506. dstack/_internal/server/services/proxy/services/__init__.py +0 -0
  507. dstack/_internal/server/services/proxy/services/service_proxy.py +135 -0
  508. dstack/_internal/server/services/repos.py +362 -0
  509. dstack/_internal/server/services/requirements/__init__.py +0 -0
  510. dstack/_internal/server/services/requirements/combine.py +260 -0
  511. dstack/_internal/server/services/resources.py +21 -0
  512. dstack/_internal/server/services/runner/__init__.py +0 -0
  513. dstack/_internal/server/services/runner/client.py +646 -0
  514. dstack/_internal/server/services/runner/ssh.py +128 -0
  515. dstack/_internal/server/services/runs/__init__.py +1026 -0
  516. dstack/_internal/server/services/runs/plan.py +703 -0
  517. dstack/_internal/server/services/runs/replicas.py +317 -0
  518. dstack/_internal/server/services/runs/spec.py +191 -0
  519. dstack/_internal/server/services/secrets.py +245 -0
  520. dstack/_internal/server/services/services/__init__.py +345 -0
  521. dstack/_internal/server/services/services/autoscalers.py +140 -0
  522. dstack/_internal/server/services/services/options.py +53 -0
  523. dstack/_internal/server/services/ssh.py +67 -0
  524. dstack/_internal/server/services/storage/__init__.py +37 -0
  525. dstack/_internal/server/services/storage/base.py +48 -0
  526. dstack/_internal/server/services/storage/gcs.py +66 -0
  527. dstack/_internal/server/services/storage/s3.py +69 -0
  528. dstack/_internal/server/services/users.py +461 -0
  529. dstack/_internal/server/services/volumes.py +496 -0
  530. dstack/_internal/server/settings.py +161 -0
  531. dstack/_internal/server/statics/00a6e1fb461ed2929fb9.png +0 -0
  532. dstack/_internal/server/statics/0cae4d9f0a36034984a7.png +0 -0
  533. dstack/_internal/server/statics/391de232cc0e30cae513.png +0 -0
  534. dstack/_internal/server/statics/4e0eead8c1a73689ef9d.svg +1 -0
  535. dstack/_internal/server/statics/544afa2f63428c2235b0.png +0 -0
  536. dstack/_internal/server/statics/54a4f50f74c6b9381530.svg +7 -0
  537. dstack/_internal/server/statics/68dd1360a7d2611e0132.svg +4 -0
  538. dstack/_internal/server/statics/69544b4c81973b54a66f.png +0 -0
  539. dstack/_internal/server/statics/77a8b02b17af19e39266.png +0 -0
  540. dstack/_internal/server/statics/83a93a8871c219104367.svg +9 -0
  541. dstack/_internal/server/statics/8f28bb8e9999e5e6a48b.svg +4 -0
  542. dstack/_internal/server/statics/9124086961ab8c366bc4.svg +9 -0
  543. dstack/_internal/server/statics/9a9ebaeb54b025dbac0a.svg +5 -0
  544. dstack/_internal/server/statics/a3428392dc534f3b15c4.svg +7 -0
  545. dstack/_internal/server/statics/ae22625574d69361f72c.png +0 -0
  546. dstack/_internal/server/statics/assets/android-chrome-144x144.png +0 -0
  547. dstack/_internal/server/statics/assets/android-chrome-192x192.png +0 -0
  548. dstack/_internal/server/statics/assets/android-chrome-256x256.png +0 -0
  549. dstack/_internal/server/statics/assets/android-chrome-36x36.png +0 -0
  550. dstack/_internal/server/statics/assets/android-chrome-384x384.png +0 -0
  551. dstack/_internal/server/statics/assets/android-chrome-48x48.png +0 -0
  552. dstack/_internal/server/statics/assets/android-chrome-512x512.png +0 -0
  553. dstack/_internal/server/statics/assets/android-chrome-72x72.png +0 -0
  554. dstack/_internal/server/statics/assets/android-chrome-96x96.png +0 -0
  555. dstack/_internal/server/statics/assets/apple-touch-icon-1024x1024.png +0 -0
  556. dstack/_internal/server/statics/assets/apple-touch-icon-114x114.png +0 -0
  557. dstack/_internal/server/statics/assets/apple-touch-icon-120x120.png +0 -0
  558. dstack/_internal/server/statics/assets/apple-touch-icon-144x144.png +0 -0
  559. dstack/_internal/server/statics/assets/apple-touch-icon-152x152.png +0 -0
  560. dstack/_internal/server/statics/assets/apple-touch-icon-167x167.png +0 -0
  561. dstack/_internal/server/statics/assets/apple-touch-icon-180x180.png +0 -0
  562. dstack/_internal/server/statics/assets/apple-touch-icon-57x57.png +0 -0
  563. dstack/_internal/server/statics/assets/apple-touch-icon-60x60.png +0 -0
  564. dstack/_internal/server/statics/assets/apple-touch-icon-72x72.png +0 -0
  565. dstack/_internal/server/statics/assets/apple-touch-icon-76x76.png +0 -0
  566. dstack/_internal/server/statics/assets/apple-touch-icon-precomposed.png +0 -0
  567. dstack/_internal/server/statics/assets/apple-touch-icon.png +0 -0
  568. dstack/_internal/server/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
  569. dstack/_internal/server/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
  570. dstack/_internal/server/statics/assets/apple-touch-startup-image-1170x2532.png +0 -0
  571. dstack/_internal/server/statics/assets/apple-touch-startup-image-1179x2556.png +0 -0
  572. dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
  573. dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
  574. dstack/_internal/server/statics/assets/apple-touch-startup-image-1284x2778.png +0 -0
  575. dstack/_internal/server/statics/assets/apple-touch-startup-image-1290x2796.png +0 -0
  576. dstack/_internal/server/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
  577. dstack/_internal/server/statics/assets/apple-touch-startup-image-1488x2266.png +0 -0
  578. dstack/_internal/server/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
  579. dstack/_internal/server/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
  580. dstack/_internal/server/statics/assets/apple-touch-startup-image-1640x2160.png +0 -0
  581. dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
  582. dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
  583. dstack/_internal/server/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
  584. dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
  585. dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
  586. dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
  587. dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1640.png +0 -0
  588. dstack/_internal/server/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
  589. dstack/_internal/server/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
  590. dstack/_internal/server/statics/assets/apple-touch-startup-image-2266x1488.png +0 -0
  591. dstack/_internal/server/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
  592. dstack/_internal/server/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
  593. dstack/_internal/server/statics/assets/apple-touch-startup-image-2532x1170.png +0 -0
  594. dstack/_internal/server/statics/assets/apple-touch-startup-image-2556x1179.png +0 -0
  595. dstack/_internal/server/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
  596. dstack/_internal/server/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
  597. dstack/_internal/server/statics/assets/apple-touch-startup-image-2778x1284.png +0 -0
  598. dstack/_internal/server/statics/assets/apple-touch-startup-image-2796x1290.png +0 -0
  599. dstack/_internal/server/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
  600. dstack/_internal/server/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
  601. dstack/_internal/server/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
  602. dstack/_internal/server/statics/assets/browserconfig.xml +12 -0
  603. dstack/_internal/server/statics/assets/favicon-16x16.png +0 -0
  604. dstack/_internal/server/statics/assets/favicon-32x32.png +0 -0
  605. dstack/_internal/server/statics/assets/favicon-48x48.png +0 -0
  606. dstack/_internal/server/statics/assets/favicon.ico +0 -0
  607. dstack/{dashboard/statics/assets/manifest.json → _internal/server/statics/assets/manifest.webmanifest} +18 -9
  608. dstack/_internal/server/statics/assets/mstile-144x144.png +0 -0
  609. dstack/_internal/server/statics/assets/mstile-150x150.png +0 -0
  610. dstack/_internal/server/statics/assets/mstile-310x150.png +0 -0
  611. dstack/_internal/server/statics/assets/mstile-310x310.png +0 -0
  612. dstack/_internal/server/statics/assets/mstile-70x70.png +0 -0
  613. dstack/_internal/server/statics/assets/yandex-browser-50x50.png +0 -0
  614. dstack/_internal/server/statics/b7ae68f44193474fc578.png +0 -0
  615. dstack/_internal/server/statics/d2f008c75b2b5b191f3f.png +0 -0
  616. dstack/_internal/server/statics/d44c33e1b92e05c379fd.png +0 -0
  617. dstack/_internal/server/statics/dd43ff0552815179d7ab.png +0 -0
  618. dstack/_internal/server/statics/dd4e7166c0b9aac197d7.png +0 -0
  619. dstack/_internal/server/statics/e30b27916930d43d2271.png +0 -0
  620. dstack/_internal/server/statics/e467d7d60aae81ab198b.svg +6 -0
  621. dstack/_internal/server/statics/eb9b344b73818fe2b71a.png +0 -0
  622. dstack/_internal/server/statics/f517dd626eb964120de0.png +0 -0
  623. dstack/_internal/server/statics/f958aecddee5d8e3222c.png +0 -0
  624. dstack/_internal/server/statics/index.html +3 -0
  625. dstack/_internal/server/statics/logo-notext.svg +116 -0
  626. dstack/_internal/server/statics/main-2e6967bad9f29395eea6.css +3 -0
  627. dstack/_internal/server/statics/main-7dc0f6d20b8b41659acc.js +155547 -0
  628. dstack/_internal/server/statics/main-7dc0f6d20b8b41659acc.js.map +1 -0
  629. dstack/{dashboard → _internal/server}/statics/manifest.json +2 -2
  630. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  631. dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
  632. dstack/{dashboard/statics/static/media/logo.f9d7170678f68f796e270698633770ec.svg → _internal/server/statics/static/media/logo.f602feeb138844eda97c8cb641461448.svg} +8 -6
  633. dstack/_internal/server/statics/static/media/okta.12f178e6873a1100965f2a4dbd18fcec.svg +2 -0
  634. dstack/_internal/server/statics/static/media/theme.3994c817bb7dda191c1c9640dee0bf42.svg +3 -0
  635. dstack/_internal/server/testing/__init__.py +0 -0
  636. dstack/_internal/server/testing/common.py +1220 -0
  637. dstack/_internal/server/testing/conf.py +53 -0
  638. dstack/_internal/server/testing/matchers.py +31 -0
  639. dstack/_internal/server/utils/__init__.py +0 -0
  640. dstack/_internal/server/utils/common.py +55 -0
  641. dstack/_internal/server/utils/logging.py +51 -0
  642. dstack/_internal/server/utils/provisioning.py +368 -0
  643. dstack/_internal/server/utils/routers.py +166 -0
  644. dstack/_internal/server/utils/sentry_utils.py +24 -0
  645. dstack/_internal/settings.py +49 -0
  646. dstack/_internal/utils/__init__.py +0 -0
  647. dstack/_internal/utils/common.py +318 -0
  648. dstack/_internal/utils/cron.py +5 -0
  649. dstack/_internal/utils/crypto.py +40 -0
  650. dstack/_internal/utils/env.py +88 -0
  651. dstack/_internal/utils/event_loop.py +30 -0
  652. dstack/_internal/utils/files.py +69 -0
  653. dstack/_internal/utils/gpu.py +59 -0
  654. dstack/_internal/utils/hash.py +31 -0
  655. dstack/_internal/utils/interpolator.py +91 -0
  656. dstack/_internal/utils/json_schema.py +11 -0
  657. dstack/_internal/utils/json_utils.py +54 -0
  658. dstack/_internal/utils/logging.py +5 -0
  659. dstack/_internal/utils/nested_list.py +47 -0
  660. dstack/_internal/utils/network.py +50 -0
  661. dstack/_internal/utils/path.py +57 -0
  662. dstack/_internal/utils/random_names.py +258 -0
  663. dstack/_internal/utils/ssh.py +346 -0
  664. dstack/_internal/utils/tags.py +42 -0
  665. dstack/_internal/utils/typing.py +14 -0
  666. dstack/_internal/utils/version.py +22 -0
  667. dstack/api/__init__.py +46 -0
  668. dstack/api/_public/__init__.py +96 -0
  669. dstack/api/_public/backends.py +42 -0
  670. dstack/api/_public/common.py +5 -0
  671. dstack/api/_public/repos.py +202 -0
  672. dstack/api/_public/runs.py +714 -0
  673. dstack/api/server/__init__.py +206 -0
  674. dstack/api/server/_auth.py +30 -0
  675. dstack/api/server/_backends.py +38 -0
  676. dstack/api/server/_events.py +64 -0
  677. dstack/api/server/_files.py +18 -0
  678. dstack/api/server/_fleets.py +82 -0
  679. dstack/api/server/_gateways.py +54 -0
  680. dstack/api/server/_gpus.py +27 -0
  681. dstack/api/server/_group.py +22 -0
  682. dstack/api/server/_logs.py +15 -0
  683. dstack/api/server/_metrics.py +23 -0
  684. dstack/api/server/_projects.py +124 -0
  685. dstack/api/server/_repos.py +64 -0
  686. dstack/api/server/_runs.py +102 -0
  687. dstack/api/server/_secrets.py +36 -0
  688. dstack/api/server/_users.py +82 -0
  689. dstack/api/server/_volumes.py +39 -0
  690. dstack/api/server/utils.py +34 -0
  691. dstack/api/utils.py +105 -0
  692. dstack/core/__init__.py +0 -0
  693. dstack/plugins/__init__.py +8 -0
  694. dstack/plugins/_base.py +72 -0
  695. dstack/plugins/_models.py +8 -0
  696. dstack/plugins/_utils.py +19 -0
  697. dstack/plugins/builtin/__init__.py +0 -0
  698. dstack/plugins/builtin/rest_plugin/__init__.py +18 -0
  699. dstack/plugins/builtin/rest_plugin/_models.py +48 -0
  700. dstack/plugins/builtin/rest_plugin/_plugin.py +147 -0
  701. dstack/version.py +3 -1
  702. dstack-0.20.7.dist-info/METADATA +519 -0
  703. dstack-0.20.7.dist-info/RECORD +720 -0
  704. {dstack-0.0.9.dist-info → dstack-0.20.7.dist-info}/WHEEL +1 -2
  705. dstack-0.20.7.dist-info/entry_points.txt +2 -0
  706. dstack-0.20.7.dist-info/licenses/LICENSE.md +353 -0
  707. dstack/aws/__init__.py +0 -180
  708. dstack/aws/artifacts.py +0 -111
  709. dstack/aws/config.py +0 -40
  710. dstack/aws/jobs.py +0 -245
  711. dstack/aws/logs.py +0 -186
  712. dstack/aws/repos.py +0 -137
  713. dstack/aws/run_names.py +0 -17
  714. dstack/aws/runners.py +0 -693
  715. dstack/aws/runs.py +0 -79
  716. dstack/aws/secrets.py +0 -99
  717. dstack/aws/tags.py +0 -138
  718. dstack/backend.py +0 -299
  719. dstack/cli/app.py +0 -41
  720. dstack/cli/artifacts.py +0 -87
  721. dstack/cli/common.py +0 -57
  722. dstack/cli/config.py +0 -194
  723. dstack/cli/dashboard.py +0 -26
  724. dstack/cli/delete.py +0 -49
  725. dstack/cli/init.py +0 -33
  726. dstack/cli/logs.py +0 -87
  727. dstack/cli/main.py +0 -81
  728. dstack/cli/restart.py +0 -43
  729. dstack/cli/run.py +0 -223
  730. dstack/cli/schema.py +0 -46
  731. dstack/cli/secrets.py +0 -97
  732. dstack/cli/status.py +0 -140
  733. dstack/cli/stop.py +0 -53
  734. dstack/cli/tags.py +0 -100
  735. dstack/config.py +0 -80
  736. dstack/dashboard/artifacts.py +0 -26
  737. dstack/dashboard/logs.py +0 -73
  738. dstack/dashboard/main.py +0 -45
  739. dstack/dashboard/repos.py +0 -41
  740. dstack/dashboard/runs.py +0 -140
  741. dstack/dashboard/secrets.py +0 -53
  742. dstack/dashboard/statics/4d6a4e032505c1efd23c.png +0 -0
  743. dstack/dashboard/statics/7e018c3e5566d7c349a8.png +0 -0
  744. dstack/dashboard/statics/assets/android-chrome-144x144.png +0 -0
  745. dstack/dashboard/statics/assets/android-chrome-192x192.png +0 -0
  746. dstack/dashboard/statics/assets/android-chrome-256x256.png +0 -0
  747. dstack/dashboard/statics/assets/android-chrome-36x36.png +0 -0
  748. dstack/dashboard/statics/assets/android-chrome-384x384.png +0 -0
  749. dstack/dashboard/statics/assets/android-chrome-48x48.png +0 -0
  750. dstack/dashboard/statics/assets/android-chrome-512x512.png +0 -0
  751. dstack/dashboard/statics/assets/android-chrome-72x72.png +0 -0
  752. dstack/dashboard/statics/assets/android-chrome-96x96.png +0 -0
  753. dstack/dashboard/statics/assets/apple-touch-icon-1024x1024.png +0 -0
  754. dstack/dashboard/statics/assets/apple-touch-icon-114x114.png +0 -0
  755. dstack/dashboard/statics/assets/apple-touch-icon-120x120.png +0 -0
  756. dstack/dashboard/statics/assets/apple-touch-icon-144x144.png +0 -0
  757. dstack/dashboard/statics/assets/apple-touch-icon-152x152.png +0 -0
  758. dstack/dashboard/statics/assets/apple-touch-icon-167x167.png +0 -0
  759. dstack/dashboard/statics/assets/apple-touch-icon-180x180.png +0 -0
  760. dstack/dashboard/statics/assets/apple-touch-icon-57x57.png +0 -0
  761. dstack/dashboard/statics/assets/apple-touch-icon-60x60.png +0 -0
  762. dstack/dashboard/statics/assets/apple-touch-icon-72x72.png +0 -0
  763. dstack/dashboard/statics/assets/apple-touch-icon-76x76.png +0 -0
  764. dstack/dashboard/statics/assets/apple-touch-icon-precomposed.png +0 -0
  765. dstack/dashboard/statics/assets/apple-touch-icon.png +0 -0
  766. dstack/dashboard/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
  767. dstack/dashboard/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
  768. dstack/dashboard/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
  769. dstack/dashboard/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
  770. dstack/dashboard/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
  771. dstack/dashboard/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
  772. dstack/dashboard/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
  773. dstack/dashboard/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
  774. dstack/dashboard/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
  775. dstack/dashboard/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
  776. dstack/dashboard/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
  777. dstack/dashboard/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
  778. dstack/dashboard/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
  779. dstack/dashboard/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
  780. dstack/dashboard/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
  781. dstack/dashboard/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
  782. dstack/dashboard/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
  783. dstack/dashboard/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
  784. dstack/dashboard/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
  785. dstack/dashboard/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
  786. dstack/dashboard/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
  787. dstack/dashboard/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
  788. dstack/dashboard/statics/assets/browserconfig.xml +0 -15
  789. dstack/dashboard/statics/assets/coast-228x228.png +0 -0
  790. dstack/dashboard/statics/assets/favicon-16x16.png +0 -0
  791. dstack/dashboard/statics/assets/favicon-32x32.png +0 -0
  792. dstack/dashboard/statics/assets/favicon-48x48.png +0 -0
  793. dstack/dashboard/statics/assets/favicon.ico +0 -0
  794. dstack/dashboard/statics/assets/firefox_app_128x128.png +0 -0
  795. dstack/dashboard/statics/assets/firefox_app_512x512.png +0 -0
  796. dstack/dashboard/statics/assets/firefox_app_60x60.png +0 -0
  797. dstack/dashboard/statics/assets/manifest.webapp +0 -14
  798. dstack/dashboard/statics/assets/mstile-144x144.png +0 -0
  799. dstack/dashboard/statics/assets/mstile-150x150.png +0 -0
  800. dstack/dashboard/statics/assets/mstile-310x150.png +0 -0
  801. dstack/dashboard/statics/assets/mstile-310x310.png +0 -0
  802. dstack/dashboard/statics/assets/mstile-70x70.png +0 -0
  803. dstack/dashboard/statics/assets/yandex-browser-50x50.png +0 -0
  804. dstack/dashboard/statics/d0f71e48806e25d72553.png +0 -0
  805. dstack/dashboard/statics/index.html +0 -7
  806. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js +0 -3
  807. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js.LICENSE.txt +0 -102
  808. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js.map +0 -1
  809. dstack/dashboard/statics/main.css +0 -5058
  810. dstack/dashboard/statics/splash_thumbnail.png +0 -0
  811. dstack/dashboard/statics/static/media/check.3f68ffc787a15c0476793a6d18ecb71a.svg +0 -3
  812. dstack/dashboard/statics/static/media/chevron-down.bfd8f22c4a5db4d443e76bca3b02f334.svg +0 -3
  813. dstack/dashboard/statics/static/media/chevron-up.bade0c5d82d741cead615813264140c9.svg +0 -3
  814. dstack/dashboard/statics/static/media/clock.583b744f29b9d143718a55e7c35fe38e.svg +0 -3
  815. dstack/dashboard/statics/static/media/close.a8bb9e47361b03a3b5084dad676ba1da.svg +0 -3
  816. dstack/dashboard/statics/static/media/content-copy.73f5f2a175094757758e315243a4111e.svg +0 -3
  817. dstack/dashboard/statics/static/media/delete-outline.6a8abf4e4f9cb777781967efd56efe9b.svg +0 -3
  818. dstack/dashboard/statics/static/media/dots-vertical.82fc618192e0c7dc4d615ff93269246a.svg +0 -3
  819. dstack/dashboard/statics/static/media/earth.1ad57c7f59f4be5c8bb2fa00439c3149.svg +0 -3
  820. dstack/dashboard/statics/static/media/email.320bc3af24a5f1bb41ebd85f66a5dd70.svg +0 -3
  821. dstack/dashboard/statics/static/media/external-link.99b88e699c15afb820a1779d9a2261ed.svg +0 -3
  822. dstack/dashboard/statics/static/media/eye-off-outline.5b4afb7ad624a44dd307518ff93d1faa.svg +0 -3
  823. dstack/dashboard/statics/static/media/eye-outline.ca41708feaaed1edb15c5fff021fbafe.svg +0 -3
  824. dstack/dashboard/statics/static/media/file-download-outline.3634b41923ba79b297ff294ef898661c.svg +0 -3
  825. dstack/dashboard/statics/static/media/folder-outline.33378387af61821dd1207e4b2d061a07.svg +0 -3
  826. dstack/dashboard/statics/static/media/github-circle.1bb85d171c31a3c2eebad07319377171.svg +0 -3
  827. dstack/dashboard/statics/static/media/infinity.915f92939afc0a37f94adba211ceb172.svg +0 -3
  828. dstack/dashboard/statics/static/media/layers.b4b02cea267a617d7aa44c2719250c89.svg +0 -3
  829. dstack/dashboard/statics/static/media/linkedin.1c52fae553eee54397f0e63a79455a5e.svg +0 -3
  830. dstack/dashboard/statics/static/media/loading.e466be7b2c1f0ac9e7e51ca929d0e37d.svg +0 -3
  831. dstack/dashboard/statics/static/media/lock.4a4c7768d0fa60c716609ddc483470ef.svg +0 -3
  832. dstack/dashboard/statics/static/media/magnify.0c803314d039d21f3cb1504ccd1437a4.svg +0 -3
  833. dstack/dashboard/statics/static/media/mark.3f68ffc787a15c0476793a6d18ecb71a.svg +0 -3
  834. dstack/dashboard/statics/static/media/menu-close.3ee84714181017c6ff837830297c8437.svg +0 -3
  835. dstack/dashboard/statics/static/media/menu.922f81e0972fbcbb5adcd8def20c86a3.svg +0 -3
  836. dstack/dashboard/statics/static/media/pencil.f706a3b9dcbff4959a91bf72e1e6324f.svg +0 -3
  837. dstack/dashboard/statics/static/media/refresh.a80edb948e98b322cd73b67814a57a48.svg +0 -3
  838. dstack/dashboard/statics/static/media/shape-plus.63b093c7f4b44c3def774f30fcfbceca.svg +0 -3
  839. dstack/dashboard/statics/static/media/slack.ec2fca99c6b944950ac65404ddd26880.svg +0 -4
  840. dstack/dashboard/statics/static/media/small-logo.b9cc8d09f646a553e65fa336dafd8b10.svg +0 -116
  841. dstack/dashboard/statics/static/media/source-branch.b8d22cfc42a7bed81f0fc08130818e85.svg +0 -3
  842. dstack/dashboard/statics/static/media/source-commit.be2bb53c081b9b6836adffccc0b8d3e6.svg +0 -3
  843. dstack/dashboard/statics/static/media/stop.11488ff1437ad929476be8924a3b7075.svg +0 -3
  844. dstack/dashboard/statics/static/media/tag-minus.15680a815b0b8d027e973c84832c05e6.svg +0 -3
  845. dstack/dashboard/statics/static/media/tag-outline.19b0bf86a8afd7d6d9c716e9a91d94ca.svg +0 -3
  846. dstack/dashboard/statics/static/media/twitter.4af18861c84a2f3044c7546b55d5739c.svg +0 -3
  847. dstack/dashboard/tags.py +0 -119
  848. dstack/jobs.py +0 -255
  849. dstack/providers/__init__.py +0 -316
  850. dstack/providers/_python/main.py +0 -88
  851. dstack/providers/_tensorboard/main.py +0 -93
  852. dstack/providers/_torchrun/main.py +0 -121
  853. dstack/providers/bash/main.py +0 -90
  854. dstack/providers/code/main.py +0 -95
  855. dstack/providers/docker/main.py +0 -79
  856. dstack/providers/lab/main.py +0 -95
  857. dstack/providers/notebook/main.py +0 -90
  858. dstack/random_name.py +0 -29
  859. dstack/repo.py +0 -135
  860. dstack/runners.py +0 -35
  861. dstack/util.py +0 -15
  862. dstack-0.0.9.dist-info/METADATA +0 -176
  863. dstack-0.0.9.dist-info/RECORD +0 -179
  864. dstack-0.0.9.dist-info/entry_points.txt +0 -3
  865. dstack-0.0.9.dist-info/top_level.txt +0 -2
  866. tests/test_config.py +0 -70
  867. /dstack/{cli → _internal}/__init__.py +0 -0
  868. /dstack/{dashboard → _internal/cli}/__init__.py +0 -0
  869. /dstack/{providers/_python → _internal/cli/models}/__init__.py +0 -0
  870. /dstack/{providers/_tensorboard → _internal/cli/services}/__init__.py +0 -0
  871. /dstack/{providers/_torchrun → _internal/cli/utils}/__init__.py +0 -0
  872. /dstack/{providers/bash → _internal/core}/__init__.py +0 -0
  873. /dstack/{providers/code → _internal/core/backends}/__init__.py +0 -0
  874. /dstack/{providers/docker → _internal/core/backends/aws}/__init__.py +0 -0
  875. /dstack/{providers/lab → _internal/core/backends/azure}/__init__.py +0 -0
  876. /dstack/{providers/notebook → _internal/core/backends/base}/__init__.py +0 -0
  877. {tests → dstack/_internal/core/backends/cloudrift}/__init__.py +0 -0
  878. /dstack/{dashboard → _internal/server}/statics/assets/yandex-browser-manifest.json +0 -0
  879. /dstack/{dashboard → _internal/server}/statics/robots.txt +0 -0
@@ -0,0 +1,1101 @@
1
+ import os
2
+ import random
3
+ import re
4
+ import shlex
5
+ import string
6
+ import threading
7
+ from abc import ABC, abstractmethod
8
+ from collections.abc import Iterable, Iterator
9
+ from dataclasses import dataclass, field
10
+ from enum import Enum
11
+ from functools import lru_cache
12
+ from pathlib import Path
13
+ from typing import Callable, Dict, List, Optional
14
+
15
+ import git
16
+ import requests
17
+ import yaml
18
+ from cachetools import Cache, TTLCache, cachedmethod
19
+ from gpuhunt import CPUArchitecture
20
+
21
+ from dstack._internal import settings
22
+ from dstack._internal.core.backends.base.models import JobConfiguration
23
+ from dstack._internal.core.backends.base.offers import OfferModifier, filter_offers_by_requirements
24
+ from dstack._internal.core.consts import (
25
+ DSTACK_RUNNER_HTTP_PORT,
26
+ DSTACK_RUNNER_SSH_PORT,
27
+ DSTACK_SHIM_HTTP_PORT,
28
+ )
29
+ from dstack._internal.core.models.backends.base import BackendType
30
+ from dstack._internal.core.models.compute_groups import ComputeGroup, ComputeGroupProvisioningData
31
+ from dstack._internal.core.models.gateways import (
32
+ GatewayComputeConfiguration,
33
+ GatewayProvisioningData,
34
+ )
35
+ from dstack._internal.core.models.instances import (
36
+ InstanceConfiguration,
37
+ InstanceOffer,
38
+ InstanceOfferWithAvailability,
39
+ SSHKey,
40
+ )
41
+ from dstack._internal.core.models.placement import PlacementGroup, PlacementGroupProvisioningData
42
+ from dstack._internal.core.models.routers import AnyRouterConfig
43
+ from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
44
+ from dstack._internal.core.models.volumes import (
45
+ Volume,
46
+ VolumeAttachmentData,
47
+ VolumeProvisioningData,
48
+ )
49
+ from dstack._internal.core.services import is_valid_dstack_resource_name
50
+ from dstack._internal.utils.logging import get_logger
51
+ from dstack._internal.utils.path import PathLike
52
+
53
+ logger = get_logger(__name__)
54
+
55
+ DSTACK_SHIM_BINARY_NAME = "dstack-shim"
56
+ DSTACK_SHIM_RESTART_INTERVAL_SECONDS = 3
57
+ DSTACK_RUNNER_BINARY_NAME = "dstack-runner"
58
+ DEFAULT_PRIVATE_SUBNETS = ("10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")
59
+ NVIDIA_GPUS_REQUIRING_PROPRIETARY_KERNEL_MODULES = frozenset(
60
+ # All NVIDIA architectures prior to Turing do not support Open Kernel Modules and require
61
+ # proprietary modules. This list is incomplete, update when necessary.
62
+ [
63
+ "v100",
64
+ "p100",
65
+ "p40",
66
+ "p4",
67
+ "m60",
68
+ "m40",
69
+ "m4",
70
+ "k80",
71
+ "k40",
72
+ "k20",
73
+ ]
74
+ )
75
+
76
+
77
+ class GoArchType(str, Enum):
78
+ """
79
+ A subset of GOARCH values
80
+ """
81
+
82
+ AMD64 = "amd64"
83
+ ARM64 = "arm64"
84
+
85
+ def to_cpu_architecture(self) -> CPUArchitecture:
86
+ if self == self.AMD64:
87
+ return CPUArchitecture.X86
88
+ if self == self.ARM64:
89
+ return CPUArchitecture.ARM
90
+ assert False, self
91
+
92
+
93
+ @dataclass
94
+ class ComputeCache:
95
+ cache: Cache
96
+ lock: threading.Lock = field(default_factory=threading.Lock)
97
+
98
+
99
+ @dataclass
100
+ class ComputeTTLCache:
101
+ cache: TTLCache
102
+ lock: threading.Lock = field(default_factory=threading.Lock)
103
+
104
+
105
+ class Compute(ABC):
106
+ """
107
+ A base class for all compute implementations with minimal features.
108
+ If a compute supports additional features, it must also subclass `ComputeWith*` classes.
109
+ """
110
+
111
+ @abstractmethod
112
+ def get_offers(self, requirements: Requirements) -> Iterator[InstanceOfferWithAvailability]:
113
+ """
114
+ Returns offers with availability matching `requirements`.
115
+ If the provider is added to gpuhunt, typically gets offers using
116
+ `base.offers.get_catalog_offers()` and extends them with availability info.
117
+ It is called from async code in executor. It can block on call but not between yields.
118
+ """
119
+ pass
120
+
121
+ @abstractmethod
122
+ def run_job(
123
+ self,
124
+ run: Run,
125
+ job: Job,
126
+ instance_offer: InstanceOfferWithAvailability,
127
+ project_ssh_public_key: str,
128
+ project_ssh_private_key: str,
129
+ volumes: List[Volume],
130
+ placement_group: Optional[PlacementGroup],
131
+ ) -> JobProvisioningData:
132
+ """
133
+ Launches a new instance for the job. It should return `JobProvisioningData` ASAP.
134
+ If required to wait to get the IP address or SSH port, return partially filled `JobProvisioningData`
135
+ and implement `update_provisioning_data()`.
136
+ """
137
+ pass
138
+
139
+ @abstractmethod
140
+ def terminate_instance(
141
+ self,
142
+ instance_id: str,
143
+ region: str,
144
+ backend_data: Optional[str] = None,
145
+ ) -> None:
146
+ """
147
+ Terminates an instance by `instance_id`.
148
+ If the instance does not exist, it should not raise errors but return silently.
149
+
150
+ Should return ASAP. If required to wait for some operation, raise `NotYetTerminated`.
151
+ In this case, the method will be called again after a few seconds.
152
+ """
153
+ pass
154
+
155
+ def update_provisioning_data(
156
+ self,
157
+ provisioning_data: JobProvisioningData,
158
+ project_ssh_public_key: str,
159
+ project_ssh_private_key: str,
160
+ ):
161
+ """
162
+ This method is called if `JobProvisioningData` returned from `run_job()`/`create_instance()`
163
+ is not complete, e.g. missing `hostname` or `ssh_port`.
164
+ It can be used if getting complete provisioning data takes a long of time.
165
+ It should not wait but return immediately.
166
+ If it raises `ProvisioningError`, there will be no further attempts to update the provisioning data,
167
+ and the run will be terminated.
168
+ """
169
+ pass
170
+
171
+
172
+ class ComputeWithAllOffersCached(ABC):
173
+ """
174
+ Provides common `get_offers()` implementation for backends
175
+ whose offers do not depend on requirements.
176
+ It caches all offers with availability and post-filters by requirements.
177
+ """
178
+
179
+ def __init__(self) -> None:
180
+ super().__init__()
181
+ self._offers_cache_lock = threading.Lock()
182
+ self._offers_cache = TTLCache(maxsize=1, ttl=180)
183
+
184
+ @abstractmethod
185
+ def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
186
+ """
187
+ Returns all backend offers with availability.
188
+ """
189
+ pass
190
+
191
+ def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
192
+ """
193
+ Returns functions that modify offers before they are filtered by requirements.
194
+ A modifier function can return `None` to exclude the offer.
195
+ E.g. can be used to set appropriate disk size based on requirements.
196
+ """
197
+ return []
198
+
199
+ def get_offers_post_filter(
200
+ self, requirements: Requirements
201
+ ) -> Optional[Callable[[InstanceOfferWithAvailability], bool]]:
202
+ """
203
+ Returns a filter function to apply to offers based on requirements.
204
+ This allows backends to implement custom post-filtering logic for specific requirements.
205
+ """
206
+ return None
207
+
208
+ def get_offers(self, requirements: Requirements) -> Iterator[InstanceOfferWithAvailability]:
209
+ cached_offers = self._get_all_offers_with_availability_cached()
210
+ offers = self.__apply_modifiers(cached_offers, self.get_offers_modifiers(requirements))
211
+ offers = filter_offers_by_requirements(offers, requirements)
212
+ post_filter = self.get_offers_post_filter(requirements)
213
+ if post_filter is not None:
214
+ offers = (o for o in offers if post_filter(o))
215
+ return offers
216
+
217
+ @cachedmethod(
218
+ cache=lambda self: self._offers_cache,
219
+ lock=lambda self: self._offers_cache_lock,
220
+ )
221
+ def _get_all_offers_with_availability_cached(self) -> List[InstanceOfferWithAvailability]:
222
+ return self.get_all_offers_with_availability()
223
+
224
+ @staticmethod
225
+ def __apply_modifiers(
226
+ offers: Iterable[InstanceOfferWithAvailability], modifiers: Iterable[OfferModifier]
227
+ ) -> Iterator[InstanceOfferWithAvailability]:
228
+ for offer in offers:
229
+ for modifier in modifiers:
230
+ offer = modifier(offer)
231
+ if offer is None:
232
+ break
233
+ else:
234
+ yield offer
235
+
236
+
237
+ class ComputeWithFilteredOffersCached(ABC):
238
+ """
239
+ Provides common `get_offers()` implementation for backends
240
+ whose offers depend on requirements.
241
+ It caches offers using requirements as key.
242
+ """
243
+
244
+ def __init__(self) -> None:
245
+ super().__init__()
246
+ self._offers_cache_lock = threading.Lock()
247
+ self._offers_cache = TTLCache(maxsize=10, ttl=180)
248
+
249
+ @abstractmethod
250
+ def get_offers_by_requirements(
251
+ self, requirements: Requirements
252
+ ) -> List[InstanceOfferWithAvailability]:
253
+ """
254
+ Returns backend offers with availability matching requirements.
255
+ """
256
+ pass
257
+
258
+ def get_offers(self, requirements: Requirements) -> Iterator[InstanceOfferWithAvailability]:
259
+ return iter(self._get_offers_cached(requirements))
260
+
261
+ def _get_offers_cached_key(self, requirements: Requirements) -> int:
262
+ # Requirements is not hashable, so we use a hack to get arguments hash
263
+ return hash(requirements.json())
264
+
265
+ @cachedmethod(
266
+ cache=lambda self: self._offers_cache,
267
+ key=_get_offers_cached_key,
268
+ lock=lambda self: self._offers_cache_lock,
269
+ )
270
+ def _get_offers_cached(
271
+ self, requirements: Requirements
272
+ ) -> List[InstanceOfferWithAvailability]:
273
+ return self.get_offers_by_requirements(requirements)
274
+
275
+
276
+ class ComputeWithCreateInstanceSupport(ABC):
277
+ """
278
+ Must be subclassed and implemented to support fleets (instance creation without running a job).
279
+ Typically, a compute that runs VMs would implement it,
280
+ and a compute that runs containers would not.
281
+ """
282
+
283
+ @abstractmethod
284
+ def create_instance(
285
+ self,
286
+ instance_offer: InstanceOfferWithAvailability,
287
+ instance_config: InstanceConfiguration,
288
+ placement_group: Optional[PlacementGroup],
289
+ ) -> JobProvisioningData:
290
+ """
291
+ Launches a new instance. It should return `JobProvisioningData` ASAP.
292
+ If required to wait to get the IP address or SSH port, return partially filled `JobProvisioningData`
293
+ and implement `update_provisioning_data()`.
294
+ """
295
+ pass
296
+
297
+ def run_job(
298
+ self,
299
+ run: Run,
300
+ job: Job,
301
+ instance_offer: InstanceOfferWithAvailability,
302
+ project_ssh_public_key: str,
303
+ project_ssh_private_key: str,
304
+ volumes: List[Volume],
305
+ placement_group: Optional[PlacementGroup],
306
+ ) -> JobProvisioningData:
307
+ """
308
+ The default `run_job()` implementation for all backends that support `create_instance()`.
309
+ Override only if custom `run_job()` behavior is required.
310
+ """
311
+ instance_config = InstanceConfiguration(
312
+ project_name=run.project_name,
313
+ instance_name=get_job_instance_name(run, job),
314
+ user=run.user,
315
+ ssh_keys=[SSHKey(public=project_ssh_public_key.strip())],
316
+ volumes=volumes,
317
+ reservation=run.run_spec.configuration.reservation,
318
+ tags=run.run_spec.merged_profile.tags,
319
+ )
320
+ instance_offer = instance_offer.copy()
321
+ self._restrict_instance_offer_az_to_volumes_az(instance_offer, volumes)
322
+ return self.create_instance(
323
+ instance_offer, instance_config, placement_group=placement_group
324
+ )
325
+
326
+ def _restrict_instance_offer_az_to_volumes_az(
327
+ self,
328
+ instance_offer: InstanceOfferWithAvailability,
329
+ volumes: List[Volume],
330
+ ):
331
+ if len(volumes) == 0:
332
+ return
333
+ volume = volumes[0]
334
+ if (
335
+ volume.provisioning_data is not None
336
+ and volume.provisioning_data.availability_zone is not None
337
+ ):
338
+ if instance_offer.availability_zones is None:
339
+ instance_offer.availability_zones = [volume.provisioning_data.availability_zone]
340
+ instance_offer.availability_zones = [
341
+ z
342
+ for z in instance_offer.availability_zones
343
+ if z == volume.provisioning_data.availability_zone
344
+ ]
345
+
346
+
347
+ class ComputeWithGroupProvisioningSupport(ABC):
348
+ @abstractmethod
349
+ def run_jobs(
350
+ self,
351
+ run: Run,
352
+ job_configurations: List[JobConfiguration],
353
+ instance_offer: InstanceOfferWithAvailability,
354
+ project_ssh_public_key: str,
355
+ project_ssh_private_key: str,
356
+ placement_group: Optional[PlacementGroup],
357
+ ) -> ComputeGroupProvisioningData:
358
+ pass
359
+
360
+ @abstractmethod
361
+ def terminate_compute_group(self, compute_group: ComputeGroup):
362
+ pass
363
+
364
+
365
+ class ComputeWithPrivilegedSupport:
366
+ """
367
+ Must be subclassed to support runs with `privileged: true`.
368
+ All VM-based Computes (that is, Computes that use the shim) should subclass this mixin.
369
+ """
370
+
371
+ pass
372
+
373
+
374
+ class ComputeWithMultinodeSupport:
375
+ """
376
+ Must be subclassed to support multinode tasks and cluster fleets.
377
+ Instances provisioned in the same project/region must be interconnected.
378
+ """
379
+
380
+ pass
381
+
382
+
383
+ class ComputeWithReservationSupport:
384
+ """
385
+ Must be subclassed to support provisioning from reservations.
386
+
387
+ The following is expected from a backend that supports reservations:
388
+
389
+ - `get_offers` respects `Requirements.reservation` if set, and only returns
390
+ offers that can be provisioned in the configured reservation. It can
391
+ adjust some offer properties such as `availability` and
392
+ `availability_zones` if necessary.
393
+ - `create_instance` respects `InstanceConfig.reservation` if set, and
394
+ provisions the instance in the configured reservation.
395
+ """
396
+
397
+ pass
398
+
399
+
400
+ class ComputeWithPlacementGroupSupport(ABC):
401
+ """
402
+ Must be subclassed and implemented to support placement groups.
403
+ """
404
+
405
+ @abstractmethod
406
+ def create_placement_group(
407
+ self,
408
+ placement_group: PlacementGroup,
409
+ master_instance_offer: InstanceOffer,
410
+ ) -> PlacementGroupProvisioningData:
411
+ """
412
+ Creates a placement group.
413
+
414
+ Args:
415
+ placement_group: details about the placement group to be created
416
+ master_instance_offer: the first instance dstack will attempt to add
417
+ to the placement group
418
+ """
419
+ pass
420
+
421
+ @abstractmethod
422
+ def delete_placement_group(
423
+ self,
424
+ placement_group: PlacementGroup,
425
+ ):
426
+ """
427
+ Deletes a placement group.
428
+ If the group does not exist, it should not raise errors but return silently.
429
+ """
430
+ pass
431
+
432
+ @abstractmethod
433
+ def is_suitable_placement_group(
434
+ self,
435
+ placement_group: PlacementGroup,
436
+ instance_offer: InstanceOffer,
437
+ ) -> bool:
438
+ """
439
+ Checks if the instance offer can be provisioned in the placement group.
440
+
441
+ Should return immediately, without performing API calls.
442
+ """
443
+ pass
444
+
445
+ def are_placement_groups_compatible_with_reservations(self, backend_type: BackendType) -> bool:
446
+ """
447
+ Whether placement groups can be used for instances provisioned in reservations.
448
+
449
+ Arguments:
450
+ backend_type: matches the backend type of this compute, unless this compute is a proxy
451
+ for other backends (dstack Sky)
452
+ """
453
+ return True
454
+
455
+
456
+ class ComputeWithGatewaySupport(ABC):
457
+ """
458
+ Must be subclassed and implemented to support gateways.
459
+ """
460
+
461
+ @abstractmethod
462
+ def create_gateway(
463
+ self,
464
+ configuration: GatewayComputeConfiguration,
465
+ ) -> GatewayProvisioningData:
466
+ """
467
+ Creates a gateway instance.
468
+ """
469
+ pass
470
+
471
+ @abstractmethod
472
+ def terminate_gateway(
473
+ self,
474
+ instance_id: str,
475
+ configuration: GatewayComputeConfiguration,
476
+ backend_data: Optional[str] = None,
477
+ ):
478
+ """
479
+ Terminates a gateway instance. Generally, it passes the call to `terminate_instance()`,
480
+ but may perform additional work such as deleting a load balancer when a gateway has one.
481
+ """
482
+ pass
483
+
484
+
485
+ class ComputeWithPrivateGatewaySupport:
486
+ """
487
+ Must be subclassed to support private gateways.
488
+ `create_gateway()` must be able to create private gateways.
489
+ """
490
+
491
+ pass
492
+
493
+
494
+ class ComputeWithVolumeSupport(ABC):
495
+ """
496
+ Must be subclassed and implemented to support volumes.
497
+ """
498
+
499
+ @abstractmethod
500
+ def register_volume(self, volume: Volume) -> VolumeProvisioningData:
501
+ """
502
+ Returns VolumeProvisioningData for an existing volume.
503
+ Used to add external volumes to dstack.
504
+ """
505
+ pass
506
+
507
+ @abstractmethod
508
+ def create_volume(self, volume: Volume) -> VolumeProvisioningData:
509
+ """
510
+ Creates a new volume.
511
+ """
512
+ raise NotImplementedError()
513
+
514
+ @abstractmethod
515
+ def delete_volume(self, volume: Volume):
516
+ """
517
+ Deletes a volume.
518
+ """
519
+ raise NotImplementedError()
520
+
521
+ def attach_volume(
522
+ self, volume: Volume, provisioning_data: JobProvisioningData
523
+ ) -> VolumeAttachmentData:
524
+ """
525
+ Attaches a volume to the instance.
526
+ If the volume is not found, it should raise `ComputeError()`.
527
+ Implement only if compute may return `VolumeProvisioningData.attachable`.
528
+ Otherwise, volumes should be attached by `run_job()`.
529
+ """
530
+ raise NotImplementedError()
531
+
532
+ def detach_volume(
533
+ self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
534
+ ):
535
+ """
536
+ Detaches a volume from the instance.
537
+ Implement only if compute may return `VolumeProvisioningData.detachable`.
538
+ Otherwise, volumes should be detached on instance termination.
539
+ """
540
+ raise NotImplementedError()
541
+
542
+ def is_volume_detached(self, volume: Volume, provisioning_data: JobProvisioningData) -> bool:
543
+ """
544
+ Checks if a volume was detached from the instance.
545
+ If `detach_volume()` may fail to detach volume,
546
+ this method should be overridden to check the volume status.
547
+ The caller will trigger force detach if the volume gets stuck detaching.
548
+ """
549
+ return True
550
+
551
+
552
+ def get_dstack_working_dir(base_path: Optional[PathLike] = None) -> str:
553
+ if base_path is None:
554
+ base_path = "/root"
555
+ return str(Path(base_path, ".dstack"))
556
+
557
+
558
+ def get_dstack_shim_binary_path(bin_path: Optional[PathLike] = None) -> str:
559
+ if bin_path is None:
560
+ bin_path = "/usr/local/bin"
561
+ return str(Path(bin_path, DSTACK_SHIM_BINARY_NAME))
562
+
563
+
564
+ def get_dstack_runner_binary_path(bin_path: Optional[PathLike] = None) -> str:
565
+ if bin_path is None:
566
+ bin_path = "/usr/local/bin"
567
+ return str(Path(bin_path, DSTACK_RUNNER_BINARY_NAME))
568
+
569
+
570
+ def get_job_instance_name(run: Run, job: Job) -> str:
571
+ return job.job_spec.job_name
572
+
573
+
574
+ _DEFAULT_MAX_RESOURCE_NAME_LEN = 60
575
+ _CLOUD_RESOURCE_SUFFIX_LEN = 8
576
+
577
+
578
+ def generate_unique_instance_name(
579
+ instance_configuration: InstanceConfiguration,
580
+ max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
581
+ ) -> str:
582
+ """
583
+ Generates a unique instance name valid across all backends.
584
+ """
585
+ return generate_unique_backend_name(
586
+ resource_name=instance_configuration.instance_name,
587
+ project_name=instance_configuration.project_name,
588
+ max_length=max_length,
589
+ )
590
+
591
+
592
+ def generate_unique_instance_name_for_job(
593
+ run: Run,
594
+ job: Job,
595
+ max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
596
+ ) -> str:
597
+ """
598
+ Generates a unique instance name for a job valid across all backends.
599
+ """
600
+ return generate_unique_backend_name(
601
+ resource_name=get_job_instance_name(run, job),
602
+ project_name=run.project_name,
603
+ max_length=max_length,
604
+ )
605
+
606
+
607
+ def generate_unique_gateway_instance_name(
608
+ gateway_compute_configuration: GatewayComputeConfiguration,
609
+ max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
610
+ ) -> str:
611
+ """
612
+ Generates a unique gateway instance name valid across all backends.
613
+ """
614
+ return generate_unique_backend_name(
615
+ resource_name=gateway_compute_configuration.instance_name,
616
+ project_name=gateway_compute_configuration.project_name,
617
+ max_length=max_length,
618
+ )
619
+
620
+
621
+ def generate_unique_volume_name(
622
+ volume: Volume,
623
+ max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
624
+ ) -> str:
625
+ """
626
+ Generates a unique volume name valid across all backends.
627
+ """
628
+ return generate_unique_backend_name(
629
+ resource_name=volume.name,
630
+ project_name=volume.project_name,
631
+ max_length=max_length,
632
+ )
633
+
634
+
635
+ def generate_unique_placement_group_name(
636
+ project_name: str,
637
+ fleet_name: str,
638
+ max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
639
+ ) -> str:
640
+ """
641
+ Generates a unique placement group name valid across all backends.
642
+ """
643
+ return generate_unique_backend_name(
644
+ resource_name=fleet_name,
645
+ project_name=project_name,
646
+ max_length=max_length,
647
+ )
648
+
649
+
650
+ def generate_unique_backend_name(
651
+ resource_name: str,
652
+ project_name: Optional[str],
653
+ max_length: int,
654
+ ) -> str:
655
+ """
656
+ Generates a unique resource name valid across all backends.
657
+ Backend resource names must be unique on every provisioning so that
658
+ resource re-submission/re-creation doesn't lead to conflicts
659
+ on backends that require unique names (e.g. Azure, GCP).
660
+ """
661
+ # resource_name is guaranteed to be valid in all backends
662
+ prefix = f"dstack-{resource_name}"
663
+ if project_name is not None and is_valid_dstack_resource_name(project_name):
664
+ # project_name is not guaranteed to be valid in all backends,
665
+ # so we add it only if it passes the validation
666
+ prefix = f"dstack-{project_name}-{resource_name}"
667
+ return _generate_unique_backend_name_with_prefix(
668
+ prefix=prefix,
669
+ max_length=max_length,
670
+ )
671
+
672
+
673
+ def _generate_unique_backend_name_with_prefix(
674
+ prefix: str,
675
+ max_length: int,
676
+ ) -> str:
677
+ prefix_len = max_length - _CLOUD_RESOURCE_SUFFIX_LEN - 1
678
+ prefix = prefix[:prefix_len]
679
+ suffix = "".join(
680
+ random.choice(string.ascii_lowercase + string.digits)
681
+ for _ in range(_CLOUD_RESOURCE_SUFFIX_LEN)
682
+ )
683
+ return f"{prefix}-{suffix}"
684
+
685
+
686
+ def get_cloud_config(**config) -> str:
687
+ return "#cloud-config\n" + yaml.dump(config, default_flow_style=False)
688
+
689
+
690
+ def get_user_data(
691
+ authorized_keys: List[str],
692
+ backend_specific_commands: Optional[List[str]] = None,
693
+ base_path: Optional[PathLike] = None,
694
+ bin_path: Optional[PathLike] = None,
695
+ backend_shim_env: Optional[Dict[str, str]] = None,
696
+ skip_firewall_setup: bool = False,
697
+ firewall_allow_from_subnets: Iterable[str] = DEFAULT_PRIVATE_SUBNETS,
698
+ ) -> str:
699
+ shim_commands = get_shim_commands(
700
+ base_path=base_path,
701
+ bin_path=bin_path,
702
+ backend_shim_env=backend_shim_env,
703
+ skip_firewall_setup=skip_firewall_setup,
704
+ firewall_allow_from_subnets=firewall_allow_from_subnets,
705
+ )
706
+ commands = (backend_specific_commands or []) + shim_commands
707
+ return get_cloud_config(
708
+ runcmd=[["sh", "-c", " && ".join(commands)]],
709
+ ssh_authorized_keys=authorized_keys,
710
+ )
711
+
712
+
713
+ def get_shim_env(
714
+ base_path: Optional[PathLike] = None,
715
+ bin_path: Optional[PathLike] = None,
716
+ backend_shim_env: Optional[Dict[str, str]] = None,
717
+ arch: Optional[str] = None,
718
+ ) -> Dict[str, str]:
719
+ log_level = "5" # Debug
720
+ envs = {
721
+ "DSTACK_SHIM_HOME": get_dstack_working_dir(base_path),
722
+ "DSTACK_SHIM_HTTP_PORT": str(DSTACK_SHIM_HTTP_PORT),
723
+ "DSTACK_SHIM_LOG_LEVEL": log_level,
724
+ "DSTACK_RUNNER_DOWNLOAD_URL": get_dstack_runner_download_url(arch),
725
+ "DSTACK_RUNNER_BINARY_PATH": get_dstack_runner_binary_path(bin_path),
726
+ "DSTACK_RUNNER_HTTP_PORT": str(DSTACK_RUNNER_HTTP_PORT),
727
+ "DSTACK_RUNNER_SSH_PORT": str(DSTACK_RUNNER_SSH_PORT),
728
+ "DSTACK_RUNNER_LOG_LEVEL": log_level,
729
+ }
730
+ if backend_shim_env is not None:
731
+ envs |= backend_shim_env
732
+ return envs
733
+
734
+
735
+ def get_shim_commands(
736
+ *,
737
+ is_privileged: bool = False,
738
+ pjrt_device: Optional[str] = None,
739
+ base_path: Optional[PathLike] = None,
740
+ bin_path: Optional[PathLike] = None,
741
+ backend_shim_env: Optional[Dict[str, str]] = None,
742
+ arch: Optional[str] = None,
743
+ skip_firewall_setup: bool = False,
744
+ firewall_allow_from_subnets: Iterable[str] = DEFAULT_PRIVATE_SUBNETS,
745
+ ) -> List[str]:
746
+ commands = get_setup_cloud_instance_commands(
747
+ skip_firewall_setup=skip_firewall_setup,
748
+ firewall_allow_from_subnets=firewall_allow_from_subnets,
749
+ )
750
+ commands += get_shim_pre_start_commands(
751
+ base_path=base_path,
752
+ bin_path=bin_path,
753
+ arch=arch,
754
+ )
755
+ shim_env = get_shim_env(
756
+ base_path=base_path,
757
+ bin_path=bin_path,
758
+ backend_shim_env=backend_shim_env,
759
+ arch=arch,
760
+ )
761
+ for k, v in shim_env.items():
762
+ commands += [f'export "{k}={v}"']
763
+ commands += get_run_shim_script(
764
+ is_privileged=is_privileged,
765
+ pjrt_device=pjrt_device,
766
+ bin_path=bin_path,
767
+ )
768
+ return commands
769
+
770
+
771
+ def get_dstack_runner_version() -> Optional[str]:
772
+ if version := settings.DSTACK_VERSION:
773
+ return version
774
+ if version := settings.DSTACK_RUNNER_VERSION:
775
+ return version
776
+ if version_url := settings.DSTACK_RUNNER_VERSION_URL:
777
+ return _fetch_version(version_url)
778
+ if settings.DSTACK_USE_LATEST_FROM_BRANCH:
779
+ return get_latest_runner_build()
780
+ return None
781
+
782
+
783
+ def get_dstack_shim_version() -> Optional[str]:
784
+ if version := settings.DSTACK_VERSION:
785
+ return version
786
+ if version := settings.DSTACK_SHIM_VERSION:
787
+ return version
788
+ if version := settings.DSTACK_RUNNER_VERSION:
789
+ logger.warning(
790
+ "DSTACK_SHIM_VERSION is not set, using DSTACK_RUNNER_VERSION."
791
+ " Future versions will not fall back to DSTACK_RUNNER_VERSION."
792
+ " Set DSTACK_SHIM_VERSION to supress this warning."
793
+ )
794
+ return version
795
+ if version_url := settings.DSTACK_SHIM_VERSION_URL:
796
+ return _fetch_version(version_url)
797
+ if settings.DSTACK_USE_LATEST_FROM_BRANCH:
798
+ return get_latest_runner_build()
799
+ return None
800
+
801
+
802
+ def normalize_arch(arch: Optional[str] = None) -> GoArchType:
803
+ """
804
+ Converts the given free-form architecture string to the Go GOARCH format.
805
+ Only 64-bit x86 and ARM are supported. If the word size is not specified (e.g., `x86`, `arm`),
806
+ 64-bit is implied.
807
+ If the arch is not specified, falls back to `amd64`.
808
+ """
809
+ if not arch:
810
+ return GoArchType.AMD64
811
+ arch_lower = arch.lower()
812
+ if "32" in arch_lower or arch_lower in ["i386", "i686"]:
813
+ raise ValueError(f"32-bit architectures are not supported: {arch}")
814
+ if arch_lower.startswith("x86") or arch_lower.startswith("amd"):
815
+ return GoArchType.AMD64
816
+ if arch_lower.startswith("arm") or arch_lower.startswith("aarch"):
817
+ return GoArchType.ARM64
818
+ raise ValueError(f"Unsupported architecture: {arch}")
819
+
820
+
821
+ def get_dstack_runner_download_url(
822
+ arch: Optional[str] = None, version: Optional[str] = None
823
+ ) -> str:
824
+ url_template = settings.DSTACK_RUNNER_DOWNLOAD_URL
825
+ if not url_template:
826
+ if settings.DSTACK_VERSION is not None:
827
+ bucket = "dstack-runner-downloads"
828
+ else:
829
+ bucket = "dstack-runner-downloads-stgn"
830
+ url_template = (
831
+ f"https://{bucket}.s3.eu-west-1.amazonaws.com"
832
+ "/{version}/binaries/dstack-runner-linux-{arch}"
833
+ )
834
+ if version is None:
835
+ version = get_dstack_runner_version() or "latest"
836
+ return _format_download_url(url_template, version, arch)
837
+
838
+
839
+ def get_dstack_shim_download_url(arch: Optional[str] = None, version: Optional[str] = None) -> str:
840
+ url_template = settings.DSTACK_SHIM_DOWNLOAD_URL
841
+ if not url_template:
842
+ if settings.DSTACK_VERSION is not None:
843
+ bucket = "dstack-runner-downloads"
844
+ else:
845
+ bucket = "dstack-runner-downloads-stgn"
846
+ url_template = (
847
+ f"https://{bucket}.s3.eu-west-1.amazonaws.com"
848
+ "/{version}/binaries/dstack-shim-linux-{arch}"
849
+ )
850
+ if version is None:
851
+ version = get_dstack_shim_version() or "latest"
852
+ return _format_download_url(url_template, version, arch)
853
+
854
+
855
+ def get_setup_cloud_instance_commands(
856
+ skip_firewall_setup: bool,
857
+ firewall_allow_from_subnets: Iterable[str],
858
+ ) -> list[str]:
859
+ commands = [
860
+ # Workaround for https://github.com/NVIDIA/nvidia-container-toolkit/issues/48
861
+ # Attempts to patch /etc/docker/daemon.json while keeping any custom settings it may have.
862
+ (
863
+ "/bin/sh -c '" # wrap in /bin/sh to avoid interfering with other cloud init commands
864
+ " grep -q nvidia /etc/docker/daemon.json"
865
+ " && ! grep -q native.cgroupdriver /etc/docker/daemon.json"
866
+ " && jq '\\''.\"exec-opts\" = ((.\"exec-opts\" // []) + [\"native.cgroupdriver=cgroupfs\"])'\\'' /etc/docker/daemon.json > /tmp/daemon.json"
867
+ " && sudo mv /tmp/daemon.json /etc/docker/daemon.json"
868
+ " && sudo service docker restart"
869
+ " || true"
870
+ "'"
871
+ ),
872
+ ]
873
+ if not skip_firewall_setup:
874
+ commands += [
875
+ "ufw --force reset", # Some OS images have default rules like `allow 80`. Delete them
876
+ "ufw default deny incoming",
877
+ "ufw default allow outgoing",
878
+ "ufw allow ssh",
879
+ ]
880
+ for subnet in firewall_allow_from_subnets:
881
+ commands.append(f"ufw allow from {subnet}")
882
+ commands += [
883
+ "ufw --force enable",
884
+ ]
885
+ return commands
886
+
887
+
888
+ def get_shim_pre_start_commands(
889
+ base_path: Optional[PathLike] = None,
890
+ bin_path: Optional[PathLike] = None,
891
+ arch: Optional[str] = None,
892
+ ) -> List[str]:
893
+ url = get_dstack_shim_download_url(arch)
894
+ dstack_shim_binary_path = get_dstack_shim_binary_path(bin_path)
895
+ dstack_working_dir = get_dstack_working_dir(base_path)
896
+ return [
897
+ f"dlpath=$(sudo mktemp -t {DSTACK_SHIM_BINARY_NAME}.XXXXXXXXXX)",
898
+ # -sS -- disable progress meter and warnings, but still show errors (unlike bare -s)
899
+ f'sudo curl -sS --compressed --connect-timeout 60 --max-time 240 --retry 1 --output "$dlpath" "{url}"',
900
+ f'sudo mv "$dlpath" {dstack_shim_binary_path}',
901
+ f"sudo chmod +x {dstack_shim_binary_path}",
902
+ f"sudo mkdir {dstack_working_dir} -p",
903
+ ]
904
+
905
+
906
+ def get_run_shim_script(
907
+ is_privileged: bool,
908
+ pjrt_device: Optional[str],
909
+ bin_path: Optional[PathLike] = None,
910
+ ) -> List[str]:
911
+ dstack_shim_binary_path = get_dstack_shim_binary_path(bin_path)
912
+ privileged_flag = "--privileged" if is_privileged else ""
913
+ pjrt_device_env = f"--pjrt-device={pjrt_device}" if pjrt_device else ""
914
+ # TODO: Use a proper process supervisor?
915
+ return [
916
+ f"""
917
+ nohup sh -c '
918
+ while true; do
919
+ {dstack_shim_binary_path} {privileged_flag} {pjrt_device_env}
920
+ sleep {DSTACK_SHIM_RESTART_INTERVAL_SECONDS}
921
+ done
922
+ ' &
923
+ """,
924
+ ]
925
+
926
+
927
+ def get_gateway_user_data(authorized_key: str, router: Optional[AnyRouterConfig] = None) -> str:
928
+ return get_cloud_config(
929
+ package_update=True,
930
+ packages=[
931
+ "nginx",
932
+ "python3.10-venv",
933
+ ],
934
+ snap={"commands": [["install", "--classic", "certbot"]]},
935
+ runcmd=[
936
+ ["ln", "-s", "/snap/bin/certbot", "/usr/bin/certbot"],
937
+ [
938
+ "sed",
939
+ "-i",
940
+ "s/# server_names_hash_bucket_size 64;/server_names_hash_bucket_size 128;/",
941
+ "/etc/nginx/nginx.conf",
942
+ ],
943
+ ["su", "ubuntu", "-c", " && ".join(get_dstack_gateway_commands(router))],
944
+ ],
945
+ ssh_authorized_keys=[authorized_key],
946
+ )
947
+
948
+
949
+ def get_docker_commands(
950
+ authorized_keys: list[str],
951
+ bin_path: Optional[PathLike] = None,
952
+ ) -> list[str]:
953
+ dstack_runner_binary_path = get_dstack_runner_binary_path(bin_path)
954
+ commands = [
955
+ "( :",
956
+ # See https://github.com/dstackai/dstack/issues/1769
957
+ "unset LD_LIBRARY_PATH && unset LD_PRELOAD",
958
+ # common functions
959
+ 'exists() { command -v "$1" > /dev/null 2>&1; }',
960
+ # package manager detection/abstraction
961
+ "install_pkg() { NAME=Distribution; test -f /etc/os-release && . /etc/os-release; echo $NAME not supported; exit 11; }",
962
+ 'if exists apt-get; then install_pkg() { apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y "$1"; }; fi',
963
+ 'if exists yum; then install_pkg() { yum install -y "$1"; }; fi',
964
+ 'if exists apk; then install_pkg() { apk add -U "$1"; }; fi',
965
+ # check in sshd is here, install if not
966
+ "if ! exists sshd; then install_pkg openssh-server; fi",
967
+ # install curl if necessary
968
+ "if ! exists curl; then install_pkg curl; fi",
969
+ ": )",
970
+ ]
971
+
972
+ runner_command = [
973
+ dstack_runner_binary_path,
974
+ "--log-level",
975
+ "6",
976
+ "start",
977
+ "--temp-dir",
978
+ "/tmp/runner",
979
+ "--http-port",
980
+ str(DSTACK_RUNNER_HTTP_PORT),
981
+ "--ssh-port",
982
+ str(DSTACK_RUNNER_SSH_PORT),
983
+ ]
984
+ for authorized_key in authorized_keys:
985
+ runner_command += ["--ssh-authorized-key", authorized_key]
986
+
987
+ url = get_dstack_runner_download_url()
988
+ commands += [
989
+ f"curl --connect-timeout 60 --max-time 240 --retry 1 --output {dstack_runner_binary_path} {url}",
990
+ f"chmod +x {dstack_runner_binary_path}",
991
+ shlex.join(runner_command),
992
+ ]
993
+
994
+ return commands
995
+
996
+
997
+ @lru_cache() # Restart the server to find the latest build
998
+ def get_latest_runner_build() -> Optional[str]:
999
+ owner_repo = "dstackai/dstack"
1000
+ workflow_id = "build.yml"
1001
+ version_offset = 150
1002
+
1003
+ try:
1004
+ repo = git.Repo(os.path.abspath(os.path.dirname(__file__)), search_parent_directories=True)
1005
+ except git.InvalidGitRepositoryError:
1006
+ return None
1007
+ for remote in repo.remotes:
1008
+ if re.search(rf"[@/]github\.com[:/]{owner_repo}\.", remote.url):
1009
+ break
1010
+ else:
1011
+ return None
1012
+
1013
+ resp = requests.get(
1014
+ f"https://api.github.com/repos/{owner_repo}/actions/workflows/{workflow_id}/runs",
1015
+ headers={
1016
+ "Accept": "application/vnd.github+json",
1017
+ "X-GitHub-Api-Version": "2022-11-28",
1018
+ },
1019
+ params={
1020
+ "status": "success",
1021
+ },
1022
+ timeout=10,
1023
+ )
1024
+ resp.raise_for_status()
1025
+
1026
+ head = repo.head.commit
1027
+ for run in resp.json()["workflow_runs"]:
1028
+ try:
1029
+ if repo.is_ancestor(run["head_sha"], head):
1030
+ ver = str(run["run_number"] + version_offset)
1031
+ logger.debug("Found the latest runner build: %s", ver)
1032
+ return ver
1033
+ except git.GitCommandError as e:
1034
+ if "Not a valid commit name" not in e.stderr:
1035
+ raise
1036
+ return None
1037
+
1038
+
1039
+ def get_dstack_gateway_wheel(build: str, router: Optional[AnyRouterConfig] = None) -> str:
1040
+ channel = "release" if settings.DSTACK_RELEASE else "stgn"
1041
+ base_url = f"https://dstack-gateway-downloads.s3.amazonaws.com/{channel}"
1042
+ if build == "latest":
1043
+ build = _fetch_version(f"{base_url}/latest-version") or "latest"
1044
+ logger.debug("Found the latest gateway build: %s", build)
1045
+ wheel = f"{base_url}/dstack_gateway-{build}-py3-none-any.whl"
1046
+ # Build package spec with extras if router is specified
1047
+ if router:
1048
+ return f"dstack-gateway[{router.type}] @ {wheel}"
1049
+ return f"dstack-gateway @ {wheel}"
1050
+
1051
+
1052
+ def get_dstack_gateway_commands(router: Optional[AnyRouterConfig] = None) -> List[str]:
1053
+ build = get_dstack_runner_version() or "latest"
1054
+ gateway_package = get_dstack_gateway_wheel(build, router)
1055
+ return [
1056
+ "mkdir -p /home/ubuntu/dstack",
1057
+ "python3 -m venv /home/ubuntu/dstack/blue",
1058
+ "python3 -m venv /home/ubuntu/dstack/green",
1059
+ f"/home/ubuntu/dstack/blue/bin/pip install '{gateway_package}'",
1060
+ "sudo /home/ubuntu/dstack/blue/bin/python -m dstack.gateway.systemd install --run",
1061
+ ]
1062
+
1063
+
1064
+ def merge_tags(
1065
+ base_tags: Dict[str, str],
1066
+ backend_tags: Optional[Dict[str, str]] = None,
1067
+ resource_tags: Optional[Dict[str, str]] = None,
1068
+ ) -> Dict[str, str]:
1069
+ res = base_tags.copy()
1070
+ # backend_tags have priority over resource_tags
1071
+ # so that regular users do not override the tags set by admins
1072
+ if backend_tags is not None:
1073
+ for k, v in backend_tags.items():
1074
+ res.setdefault(k, v)
1075
+ if resource_tags is not None:
1076
+ for k, v in resource_tags.items():
1077
+ res.setdefault(k, v)
1078
+ return res
1079
+
1080
+
1081
+ def requires_nvidia_proprietary_kernel_modules(gpu_name: str) -> bool:
1082
+ """
1083
+ Returns:
1084
+ Whether this NVIDIA GPU requires NVIDIA proprietary kernel modules
1085
+ instead of open kernel modules.
1086
+ """
1087
+ return gpu_name.lower() in NVIDIA_GPUS_REQUIRING_PROPRIETARY_KERNEL_MODULES
1088
+
1089
+
1090
+ def _fetch_version(url: str) -> Optional[str]:
1091
+ r = requests.get(url, timeout=5)
1092
+ r.raise_for_status()
1093
+ version = r.text.strip()
1094
+ if not version:
1095
+ logger.warning("Empty version response from URL: %s", url)
1096
+ return None
1097
+ return version
1098
+
1099
+
1100
+ def _format_download_url(template: str, version: str, arch: Optional[str]) -> str:
1101
+ return template.format(version=version, arch=normalize_arch(arch).value)