dstack 0.0.9__py3-none-any.whl → 0.20.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (879) hide show
  1. dstack/_internal/cli/commands/__init__.py +80 -0
  2. dstack/_internal/cli/commands/apply.py +100 -0
  3. dstack/_internal/cli/commands/attach.py +161 -0
  4. dstack/_internal/cli/commands/completion.py +22 -0
  5. dstack/_internal/cli/commands/delete.py +44 -0
  6. dstack/_internal/cli/commands/event.py +168 -0
  7. dstack/_internal/cli/commands/fleet.py +161 -0
  8. dstack/_internal/cli/commands/gateway.py +159 -0
  9. dstack/_internal/cli/commands/init.py +64 -0
  10. dstack/_internal/cli/commands/login.py +352 -0
  11. dstack/_internal/cli/commands/logs.py +62 -0
  12. dstack/_internal/cli/commands/metrics.py +153 -0
  13. dstack/_internal/cli/commands/offer.py +146 -0
  14. dstack/_internal/cli/commands/project.py +259 -0
  15. dstack/_internal/cli/commands/ps.py +81 -0
  16. dstack/_internal/cli/commands/run.py +69 -0
  17. dstack/_internal/cli/commands/secrets.py +92 -0
  18. dstack/_internal/cli/commands/server.py +96 -0
  19. dstack/_internal/cli/commands/stop.py +26 -0
  20. dstack/_internal/cli/commands/volume.py +117 -0
  21. dstack/_internal/cli/main.py +101 -0
  22. dstack/_internal/cli/models/gateways.py +16 -0
  23. dstack/_internal/cli/models/offers.py +47 -0
  24. dstack/_internal/cli/models/runs.py +16 -0
  25. dstack/_internal/cli/services/args.py +31 -0
  26. dstack/_internal/cli/services/completion.py +91 -0
  27. dstack/_internal/cli/services/configurators/__init__.py +86 -0
  28. dstack/_internal/cli/services/configurators/base.py +103 -0
  29. dstack/_internal/cli/services/configurators/fleet.py +475 -0
  30. dstack/_internal/cli/services/configurators/gateway.py +231 -0
  31. dstack/_internal/cli/services/configurators/run.py +882 -0
  32. dstack/_internal/cli/services/configurators/volume.py +222 -0
  33. dstack/_internal/cli/services/events.py +68 -0
  34. dstack/_internal/cli/services/profile.py +182 -0
  35. dstack/_internal/cli/services/repos.py +71 -0
  36. dstack/_internal/cli/services/resources.py +54 -0
  37. dstack/_internal/cli/utils/common.py +159 -0
  38. dstack/_internal/cli/utils/fleet.py +106 -0
  39. dstack/_internal/cli/utils/gateway.py +56 -0
  40. dstack/_internal/cli/utils/gpu.py +178 -0
  41. dstack/_internal/cli/utils/rich.py +156 -0
  42. dstack/_internal/cli/utils/run.py +517 -0
  43. dstack/_internal/cli/utils/secrets.py +25 -0
  44. dstack/_internal/cli/utils/updates.py +98 -0
  45. dstack/_internal/cli/utils/volume.py +58 -0
  46. dstack/_internal/compat.py +3 -0
  47. dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
  48. dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
  49. dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
  50. dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
  51. dstack/_internal/core/backends/aws/auth.py +30 -0
  52. dstack/_internal/core/backends/aws/backend.py +31 -0
  53. dstack/_internal/core/backends/aws/compute.py +1153 -0
  54. dstack/_internal/core/backends/aws/configurator.py +191 -0
  55. dstack/_internal/core/backends/aws/models.py +135 -0
  56. dstack/_internal/core/backends/aws/resources.py +700 -0
  57. dstack/_internal/core/backends/azure/auth.py +39 -0
  58. dstack/_internal/core/backends/azure/backend.py +21 -0
  59. dstack/_internal/core/backends/azure/compute.py +676 -0
  60. dstack/_internal/core/backends/azure/configurator.py +472 -0
  61. dstack/_internal/core/backends/azure/models.py +98 -0
  62. dstack/_internal/core/backends/azure/resources.py +116 -0
  63. dstack/_internal/core/backends/azure/utils.py +42 -0
  64. dstack/_internal/core/backends/base/backend.py +18 -0
  65. dstack/_internal/core/backends/base/compute.py +1101 -0
  66. dstack/_internal/core/backends/base/configurator.py +117 -0
  67. dstack/_internal/core/backends/base/models.py +24 -0
  68. dstack/_internal/core/backends/base/offers.py +232 -0
  69. dstack/_internal/core/backends/cloudrift/api_client.py +220 -0
  70. dstack/_internal/core/backends/cloudrift/backend.py +16 -0
  71. dstack/_internal/core/backends/cloudrift/compute.py +138 -0
  72. dstack/_internal/core/backends/cloudrift/configurator.py +72 -0
  73. dstack/_internal/core/backends/cloudrift/models.py +40 -0
  74. dstack/_internal/core/backends/configurators.py +181 -0
  75. dstack/_internal/core/backends/cudo/__init__.py +0 -0
  76. dstack/_internal/core/backends/cudo/api_client.py +111 -0
  77. dstack/_internal/core/backends/cudo/backend.py +16 -0
  78. dstack/_internal/core/backends/cudo/compute.py +174 -0
  79. dstack/_internal/core/backends/cudo/configurator.py +63 -0
  80. dstack/_internal/core/backends/cudo/models.py +37 -0
  81. dstack/_internal/core/backends/datacrunch/__init__.py +1 -0
  82. dstack/_internal/core/backends/datacrunch/backend.py +18 -0
  83. dstack/_internal/core/backends/datacrunch/compute.py +8 -0
  84. dstack/_internal/core/backends/datacrunch/configurator.py +17 -0
  85. dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
  86. dstack/_internal/core/backends/digitalocean/backend.py +16 -0
  87. dstack/_internal/core/backends/digitalocean/compute.py +5 -0
  88. dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
  89. dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
  90. dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
  91. dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
  92. dstack/_internal/core/backends/digitalocean_base/compute.py +174 -0
  93. dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
  94. dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
  95. dstack/_internal/core/backends/dstack/__init__.py +0 -0
  96. dstack/_internal/core/backends/dstack/models.py +26 -0
  97. dstack/_internal/core/backends/features.py +74 -0
  98. dstack/_internal/core/backends/gcp/__init__.py +0 -0
  99. dstack/_internal/core/backends/gcp/auth.py +57 -0
  100. dstack/_internal/core/backends/gcp/backend.py +17 -0
  101. dstack/_internal/core/backends/gcp/compute.py +1257 -0
  102. dstack/_internal/core/backends/gcp/configurator.py +206 -0
  103. dstack/_internal/core/backends/gcp/features/__init__.py +0 -0
  104. dstack/_internal/core/backends/gcp/features/tcpx.py +65 -0
  105. dstack/_internal/core/backends/gcp/models.py +160 -0
  106. dstack/_internal/core/backends/gcp/resources.py +585 -0
  107. dstack/_internal/core/backends/hotaisle/__init__.py +1 -0
  108. dstack/_internal/core/backends/hotaisle/api_client.py +101 -0
  109. dstack/_internal/core/backends/hotaisle/backend.py +16 -0
  110. dstack/_internal/core/backends/hotaisle/compute.py +188 -0
  111. dstack/_internal/core/backends/hotaisle/configurator.py +66 -0
  112. dstack/_internal/core/backends/hotaisle/models.py +45 -0
  113. dstack/_internal/core/backends/kubernetes/__init__.py +0 -0
  114. dstack/_internal/core/backends/kubernetes/backend.py +16 -0
  115. dstack/_internal/core/backends/kubernetes/compute.py +1077 -0
  116. dstack/_internal/core/backends/kubernetes/configurator.py +61 -0
  117. dstack/_internal/core/backends/kubernetes/models.py +71 -0
  118. dstack/_internal/core/backends/kubernetes/utils.py +81 -0
  119. dstack/_internal/core/backends/lambdalabs/__init__.py +0 -0
  120. dstack/_internal/core/backends/lambdalabs/api_client.py +87 -0
  121. dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
  122. dstack/_internal/core/backends/lambdalabs/compute.py +233 -0
  123. dstack/_internal/core/backends/lambdalabs/configurator.py +65 -0
  124. dstack/_internal/core/backends/lambdalabs/models.py +37 -0
  125. dstack/_internal/core/backends/local/__init__.py +0 -0
  126. dstack/_internal/core/backends/local/backend.py +14 -0
  127. dstack/_internal/core/backends/local/compute.py +130 -0
  128. dstack/_internal/core/backends/models.py +158 -0
  129. dstack/_internal/core/backends/nebius/__init__.py +0 -0
  130. dstack/_internal/core/backends/nebius/backend.py +16 -0
  131. dstack/_internal/core/backends/nebius/compute.py +401 -0
  132. dstack/_internal/core/backends/nebius/configurator.py +98 -0
  133. dstack/_internal/core/backends/nebius/models.py +185 -0
  134. dstack/_internal/core/backends/nebius/resources.py +433 -0
  135. dstack/_internal/core/backends/oci/__init__.py +0 -0
  136. dstack/_internal/core/backends/oci/auth.py +21 -0
  137. dstack/_internal/core/backends/oci/backend.py +16 -0
  138. dstack/_internal/core/backends/oci/compute.py +209 -0
  139. dstack/_internal/core/backends/oci/configurator.py +156 -0
  140. dstack/_internal/core/backends/oci/exceptions.py +15 -0
  141. dstack/_internal/core/backends/oci/models.py +87 -0
  142. dstack/_internal/core/backends/oci/region.py +86 -0
  143. dstack/_internal/core/backends/oci/resources.py +836 -0
  144. dstack/_internal/core/backends/runpod/__init__.py +0 -0
  145. dstack/_internal/core/backends/runpod/api_client.py +627 -0
  146. dstack/_internal/core/backends/runpod/backend.py +16 -0
  147. dstack/_internal/core/backends/runpod/compute.py +444 -0
  148. dstack/_internal/core/backends/runpod/configurator.py +63 -0
  149. dstack/_internal/core/backends/runpod/models.py +54 -0
  150. dstack/_internal/core/backends/template/__init__.py +0 -0
  151. dstack/_internal/core/backends/template/backend.py.jinja +16 -0
  152. dstack/_internal/core/backends/template/compute.py.jinja +95 -0
  153. dstack/_internal/core/backends/template/configurator.py.jinja +69 -0
  154. dstack/_internal/core/backends/template/models.py.jinja +62 -0
  155. dstack/_internal/core/backends/tensordock/models.py +40 -0
  156. dstack/_internal/core/backends/vastai/__init__.py +0 -0
  157. dstack/_internal/core/backends/vastai/api_client.py +143 -0
  158. dstack/_internal/core/backends/vastai/backend.py +16 -0
  159. dstack/_internal/core/backends/vastai/compute.py +141 -0
  160. dstack/_internal/core/backends/vastai/configurator.py +69 -0
  161. dstack/_internal/core/backends/vastai/models.py +37 -0
  162. dstack/_internal/core/backends/verda/__init__.py +0 -0
  163. dstack/_internal/core/backends/verda/backend.py +16 -0
  164. dstack/_internal/core/backends/verda/compute.py +266 -0
  165. dstack/_internal/core/backends/verda/configurator.py +73 -0
  166. dstack/_internal/core/backends/verda/models.py +38 -0
  167. dstack/_internal/core/backends/vultr/__init__.py +0 -0
  168. dstack/_internal/core/backends/vultr/api_client.py +116 -0
  169. dstack/_internal/core/backends/vultr/backend.py +16 -0
  170. dstack/_internal/core/backends/vultr/compute.py +167 -0
  171. dstack/_internal/core/backends/vultr/configurator.py +71 -0
  172. dstack/_internal/core/backends/vultr/models.py +34 -0
  173. dstack/_internal/core/compatibility/__init__.py +0 -0
  174. dstack/_internal/core/compatibility/events.py +13 -0
  175. dstack/_internal/core/compatibility/fleets.py +58 -0
  176. dstack/_internal/core/compatibility/gateways.py +39 -0
  177. dstack/_internal/core/compatibility/gpus.py +13 -0
  178. dstack/_internal/core/compatibility/logs.py +14 -0
  179. dstack/_internal/core/compatibility/runs.py +86 -0
  180. dstack/_internal/core/compatibility/volumes.py +37 -0
  181. dstack/_internal/core/consts.py +8 -0
  182. dstack/_internal/core/errors.py +160 -0
  183. dstack/_internal/core/models/__init__.py +0 -0
  184. dstack/_internal/core/models/auth.py +28 -0
  185. dstack/_internal/core/models/backends/__init__.py +0 -0
  186. dstack/_internal/core/models/backends/base.py +48 -0
  187. dstack/_internal/core/models/common.py +143 -0
  188. dstack/_internal/core/models/compute_groups.py +39 -0
  189. dstack/_internal/core/models/config.py +28 -0
  190. dstack/_internal/core/models/configurations.py +1123 -0
  191. dstack/_internal/core/models/envs.py +149 -0
  192. dstack/_internal/core/models/events.py +98 -0
  193. dstack/_internal/core/models/files.py +67 -0
  194. dstack/_internal/core/models/fleets.py +437 -0
  195. dstack/_internal/core/models/gateways.py +146 -0
  196. dstack/_internal/core/models/gpus.py +45 -0
  197. dstack/_internal/core/models/health.py +28 -0
  198. dstack/_internal/core/models/instances.py +346 -0
  199. dstack/_internal/core/models/logs.py +27 -0
  200. dstack/_internal/core/models/metrics.py +14 -0
  201. dstack/_internal/core/models/placement.py +27 -0
  202. dstack/_internal/core/models/profiles.py +431 -0
  203. dstack/_internal/core/models/projects.py +46 -0
  204. dstack/_internal/core/models/repos/__init__.py +34 -0
  205. dstack/_internal/core/models/repos/base.py +36 -0
  206. dstack/_internal/core/models/repos/local.py +96 -0
  207. dstack/_internal/core/models/repos/remote.py +341 -0
  208. dstack/_internal/core/models/repos/virtual.py +85 -0
  209. dstack/_internal/core/models/resources.py +424 -0
  210. dstack/_internal/core/models/routers.py +24 -0
  211. dstack/_internal/core/models/runs.py +618 -0
  212. dstack/_internal/core/models/secrets.py +16 -0
  213. dstack/_internal/core/models/server.py +7 -0
  214. dstack/_internal/core/models/services.py +76 -0
  215. dstack/_internal/core/models/unix.py +53 -0
  216. dstack/_internal/core/models/users.py +60 -0
  217. dstack/_internal/core/models/volumes.py +221 -0
  218. dstack/_internal/core/services/__init__.py +16 -0
  219. dstack/_internal/core/services/api_client.py +15 -0
  220. dstack/_internal/core/services/configs/__init__.py +116 -0
  221. dstack/_internal/core/services/diff.py +71 -0
  222. dstack/_internal/core/services/logs.py +58 -0
  223. dstack/_internal/core/services/profiles.py +46 -0
  224. dstack/_internal/core/services/repos.py +236 -0
  225. dstack/_internal/core/services/ssh/__init__.py +27 -0
  226. dstack/_internal/core/services/ssh/attach.py +241 -0
  227. dstack/_internal/core/services/ssh/client.py +113 -0
  228. dstack/_internal/core/services/ssh/key_manager.py +53 -0
  229. dstack/_internal/core/services/ssh/ports.py +89 -0
  230. dstack/_internal/core/services/ssh/tunnel.py +337 -0
  231. dstack/_internal/proxy/__init__.py +8 -0
  232. dstack/_internal/proxy/gateway/__init__.py +0 -0
  233. dstack/_internal/proxy/gateway/app.py +89 -0
  234. dstack/_internal/proxy/gateway/auth.py +26 -0
  235. dstack/_internal/proxy/gateway/const.py +7 -0
  236. dstack/_internal/proxy/gateway/deps.py +73 -0
  237. dstack/_internal/proxy/gateway/main.py +17 -0
  238. dstack/_internal/proxy/gateway/models.py +23 -0
  239. dstack/_internal/proxy/gateway/repo/__init__.py +0 -0
  240. dstack/_internal/proxy/gateway/repo/repo.py +121 -0
  241. dstack/_internal/proxy/gateway/repo/state_v1.py +164 -0
  242. dstack/_internal/proxy/gateway/resources/nginx/00-log-format.conf +11 -0
  243. dstack/_internal/proxy/gateway/resources/nginx/entrypoint.jinja2 +27 -0
  244. dstack/_internal/proxy/gateway/resources/nginx/router_workers.jinja2 +23 -0
  245. dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +105 -0
  246. dstack/_internal/proxy/gateway/routers/__init__.py +0 -0
  247. dstack/_internal/proxy/gateway/routers/auth.py +10 -0
  248. dstack/_internal/proxy/gateway/routers/config.py +28 -0
  249. dstack/_internal/proxy/gateway/routers/registry.py +124 -0
  250. dstack/_internal/proxy/gateway/routers/stats.py +18 -0
  251. dstack/_internal/proxy/gateway/schemas/__init__.py +0 -0
  252. dstack/_internal/proxy/gateway/schemas/common.py +5 -0
  253. dstack/_internal/proxy/gateway/schemas/config.py +9 -0
  254. dstack/_internal/proxy/gateway/schemas/registry.py +63 -0
  255. dstack/_internal/proxy/gateway/schemas/stats.py +15 -0
  256. dstack/_internal/proxy/gateway/services/__init__.py +0 -0
  257. dstack/_internal/proxy/gateway/services/model_routers/__init__.py +18 -0
  258. dstack/_internal/proxy/gateway/services/model_routers/base.py +91 -0
  259. dstack/_internal/proxy/gateway/services/model_routers/sglang.py +269 -0
  260. dstack/_internal/proxy/gateway/services/nginx.py +455 -0
  261. dstack/_internal/proxy/gateway/services/registry.py +426 -0
  262. dstack/_internal/proxy/gateway/services/server_client.py +95 -0
  263. dstack/_internal/proxy/gateway/services/stats.py +170 -0
  264. dstack/_internal/proxy/gateway/testing/__init__.py +0 -0
  265. dstack/_internal/proxy/gateway/testing/common.py +13 -0
  266. dstack/_internal/proxy/lib/__init__.py +0 -0
  267. dstack/_internal/proxy/lib/auth.py +7 -0
  268. dstack/_internal/proxy/lib/deps.py +106 -0
  269. dstack/_internal/proxy/lib/errors.py +14 -0
  270. dstack/_internal/proxy/lib/models.py +112 -0
  271. dstack/_internal/proxy/lib/repo.py +27 -0
  272. dstack/_internal/proxy/lib/routers/__init__.py +0 -0
  273. dstack/_internal/proxy/lib/routers/model_proxy.py +102 -0
  274. dstack/_internal/proxy/lib/schemas/__init__.py +0 -0
  275. dstack/_internal/proxy/lib/schemas/model_proxy.py +77 -0
  276. dstack/_internal/proxy/lib/services/__init__.py +0 -0
  277. dstack/_internal/proxy/lib/services/model_proxy/__init__.py +0 -0
  278. dstack/_internal/proxy/lib/services/model_proxy/clients/__init__.py +0 -0
  279. dstack/_internal/proxy/lib/services/model_proxy/clients/base.py +18 -0
  280. dstack/_internal/proxy/lib/services/model_proxy/clients/openai.py +67 -0
  281. dstack/_internal/proxy/lib/services/model_proxy/clients/tgi.py +208 -0
  282. dstack/_internal/proxy/lib/services/model_proxy/model_proxy.py +23 -0
  283. dstack/_internal/proxy/lib/services/service_connection.py +160 -0
  284. dstack/_internal/proxy/lib/testing/__init__.py +0 -0
  285. dstack/_internal/proxy/lib/testing/auth.py +11 -0
  286. dstack/_internal/proxy/lib/testing/common.py +51 -0
  287. dstack/_internal/server/__init__.py +0 -0
  288. dstack/_internal/server/alembic.ini +100 -0
  289. dstack/_internal/server/app.py +432 -0
  290. dstack/_internal/server/background/__init__.py +142 -0
  291. dstack/_internal/server/background/tasks/__init__.py +0 -0
  292. dstack/_internal/server/background/tasks/common.py +24 -0
  293. dstack/_internal/server/background/tasks/process_compute_groups.py +167 -0
  294. dstack/_internal/server/background/tasks/process_events.py +17 -0
  295. dstack/_internal/server/background/tasks/process_fleets.py +289 -0
  296. dstack/_internal/server/background/tasks/process_gateways.py +188 -0
  297. dstack/_internal/server/background/tasks/process_idle_volumes.py +145 -0
  298. dstack/_internal/server/background/tasks/process_instances.py +1186 -0
  299. dstack/_internal/server/background/tasks/process_metrics.py +172 -0
  300. dstack/_internal/server/background/tasks/process_placement_groups.py +104 -0
  301. dstack/_internal/server/background/tasks/process_probes.py +164 -0
  302. dstack/_internal/server/background/tasks/process_prometheus_metrics.py +150 -0
  303. dstack/_internal/server/background/tasks/process_running_jobs.py +1238 -0
  304. dstack/_internal/server/background/tasks/process_runs.py +842 -0
  305. dstack/_internal/server/background/tasks/process_submitted_jobs.py +1106 -0
  306. dstack/_internal/server/background/tasks/process_terminating_jobs.py +108 -0
  307. dstack/_internal/server/background/tasks/process_volumes.py +129 -0
  308. dstack/_internal/server/compatibility/__init__.py +0 -0
  309. dstack/_internal/server/compatibility/common.py +20 -0
  310. dstack/_internal/server/compatibility/gpus.py +22 -0
  311. dstack/_internal/server/db.py +127 -0
  312. dstack/_internal/server/deps.py +19 -0
  313. dstack/_internal/server/main.py +4 -0
  314. dstack/_internal/server/migrations/__init__.py +0 -0
  315. dstack/_internal/server/migrations/env.py +112 -0
  316. dstack/_internal/server/migrations/script.py.mako +28 -0
  317. dstack/_internal/server/migrations/versions/006512f572b4_add_projects_original_name.py +38 -0
  318. dstack/_internal/server/migrations/versions/065588ec72b8_add_vultr_to_backendtype_enum.py +81 -0
  319. dstack/_internal/server/migrations/versions/06e977bc61c7_add_usermodel_deleted_and_original_name.py +45 -0
  320. dstack/_internal/server/migrations/versions/0e33559e16ed_update_instancestatus.py +64 -0
  321. dstack/_internal/server/migrations/versions/112753bc17dd_remove_nullable_fields.py +50 -0
  322. dstack/_internal/server/migrations/versions/1338b788b612_reverse_job_instance_relationship.py +71 -0
  323. dstack/_internal/server/migrations/versions/14f2cb002fc2_add_jobmodel_removed_flag.py +44 -0
  324. dstack/_internal/server/migrations/versions/1a48dfe44a40_rework_termination_handling.py +42 -0
  325. dstack/_internal/server/migrations/versions/1aa9638ad963_added_email_index.py +31 -0
  326. dstack/_internal/server/migrations/versions/1e3fb39ef74b_add_remote_connection_details.py +26 -0
  327. dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py +32 -0
  328. dstack/_internal/server/migrations/versions/20166748b60c_add_jobmodel_disconnected_at.py +100 -0
  329. dstack/_internal/server/migrations/versions/22d74df9897e_add_events_and_event_targets.py +99 -0
  330. dstack/_internal/server/migrations/versions/23e01c56279a_make_blob_nullable.py +32 -0
  331. dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
  332. dstack/_internal/server/migrations/versions/252d3743b641_.py +40 -0
  333. dstack/_internal/server/migrations/versions/25479f540245_add_probes.py +43 -0
  334. dstack/_internal/server/migrations/versions/27d3e55759fa_add_pools.py +152 -0
  335. dstack/_internal/server/migrations/versions/29826f417010_remove_instancemodel_retry_policy.py +34 -0
  336. dstack/_internal/server/migrations/versions/29c08c6a8cb3_.py +36 -0
  337. dstack/_internal/server/migrations/versions/35e90e1b0d3e_add_rolling_deployment_fields.py +42 -0
  338. dstack/_internal/server/migrations/versions/35f732ee4cf5_add_projectmodel_is_public.py +39 -0
  339. dstack/_internal/server/migrations/versions/3cf77fb8bcf1_store_repo_clone_url.py +85 -0
  340. dstack/_internal/server/migrations/versions/3d7f6c2ec000_add_jobmodel_registered.py +28 -0
  341. dstack/_internal/server/migrations/versions/3dbdce90d0e0_fix_code_uq_constraint.py +33 -0
  342. dstack/_internal/server/migrations/versions/48ad3ecbaea2_do_not_delete_projects_and_runs.py +46 -0
  343. dstack/_internal/server/migrations/versions/4ae1a5b0e7f1_add_run_list_index.py +34 -0
  344. dstack/_internal/server/migrations/versions/4b4319398164_introduce_runs_processing.py +144 -0
  345. dstack/_internal/server/migrations/versions/50dd7ea98639_index_status_columns.py +55 -0
  346. dstack/_internal/server/migrations/versions/51d45659d574_add_instancemodel_blocks_fields.py +43 -0
  347. dstack/_internal/server/migrations/versions/54a77e19c64c_add_manager_project_role.py +67 -0
  348. dstack/_internal/server/migrations/versions/555138b1f77f_change_instancemodel_for_asynchronous_.py +61 -0
  349. dstack/_internal/server/migrations/versions/58aa5162dcc3_add_gatewaymodel_configuration.py +32 -0
  350. dstack/_internal/server/migrations/versions/5ad8debc8fe6_fixes_for_psql.py +329 -0
  351. dstack/_internal/server/migrations/versions/5ec538b70e71_replace_instansestatus.py +31 -0
  352. dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
  353. dstack/_internal/server/migrations/versions/5fd659afca82_add_ix_instances_fleet_id.py +31 -0
  354. dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
  355. dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py +83 -0
  356. dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
  357. dstack/_internal/server/migrations/versions/686fb8341ea5_add_user_emails.py +32 -0
  358. dstack/_internal/server/migrations/versions/6c1a9d6530ee_add_jobmodel_exit_status.py +26 -0
  359. dstack/_internal/server/migrations/versions/706e0acc3a7d_add_runmodel_desired_replica_counts.py +26 -0
  360. dstack/_internal/server/migrations/versions/710e5b3fac8f_add_encryption.py +54 -0
  361. dstack/_internal/server/migrations/versions/728b1488b1b4_add_instance_health.py +50 -0
  362. dstack/_internal/server/migrations/versions/74a1f55209bd_store_enums_as_strings.py +484 -0
  363. dstack/_internal/server/migrations/versions/7b24b1c8eba7_add_instancemodel_last_processed_at.py +68 -0
  364. dstack/_internal/server/migrations/versions/7ba3b59d7ca6_add_runmodel_resubmission_attempt.py +35 -0
  365. dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
  366. dstack/_internal/server/migrations/versions/7d1ec2b920ac_add_computegroupmodel.py +91 -0
  367. dstack/_internal/server/migrations/versions/803c7e9ed85d_add_jobmodel_job_runtime_data.py +32 -0
  368. dstack/_internal/server/migrations/versions/82b32a135ea2_.py +58 -0
  369. dstack/_internal/server/migrations/versions/866ec1d67184_replace_retrypolicy_limit_with_.py +93 -0
  370. dstack/_internal/server/migrations/versions/903c91e24634_add_instances_termination_reason_message.py +34 -0
  371. dstack/_internal/server/migrations/versions/91a12fff6c76_add_repocredsmodel.py +43 -0
  372. dstack/_internal/server/migrations/versions/91ac5e543037_extend_repos_creds_column.py +36 -0
  373. dstack/_internal/server/migrations/versions/98cd9c8b5927_add_volumemodel.py +73 -0
  374. dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
  375. dstack/_internal/server/migrations/versions/99b4c8c954ea_add_termination_reason_message.py +71 -0
  376. dstack/_internal/server/migrations/versions/9eea6af28e10_added_fail_reason_for_instancemodel.py +36 -0
  377. dstack/_internal/server/migrations/versions/__init__.py +0 -0
  378. dstack/_internal/server/migrations/versions/a060e2440936_.py +206 -0
  379. dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
  380. dstack/_internal/server/migrations/versions/a7b46c073fa1_add_placementgroupmodel.py +58 -0
  381. dstack/_internal/server/migrations/versions/afbc600ff2b2_add_created_at_to_usermodel_and_.py +102 -0
  382. dstack/_internal/server/migrations/versions/b4d6ad60db08_add_instancemodel_unreachable.py +37 -0
  383. dstack/_internal/server/migrations/versions/b88d55c2a07d_replace_instancestatus_ready.py +21 -0
  384. dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
  385. dstack/_internal/server/migrations/versions/bca2fdf130bf_add_runmodel_priority.py +34 -0
  386. dstack/_internal/server/migrations/versions/bfba43f6def2_.py +32 -0
  387. dstack/_internal/server/migrations/versions/c00090eaef21_support_fleets.py +108 -0
  388. dstack/_internal/server/migrations/versions/c154eece89da_add_fields_for_async_gateway_creation.py +74 -0
  389. dstack/_internal/server/migrations/versions/c20626d03cfb_add_jobmetricspoint.py +43 -0
  390. dstack/_internal/server/migrations/versions/c48df7985d57_add_instance_termination_retries.py +38 -0
  391. dstack/_internal/server/migrations/versions/c83d45f9a971_replace_string_with_text.py +150 -0
  392. dstack/_internal/server/migrations/versions/d0bb68e48b9f_add_project_owners_and_quotas.py +106 -0
  393. dstack/_internal/server/migrations/versions/d3e8af4786fa_gateway_compute_flag_deleted.py +34 -0
  394. dstack/_internal/server/migrations/versions/d4d9dc26cf58_add_ix_jobs_run_id.py +31 -0
  395. dstack/_internal/server/migrations/versions/d5863798bf41_add_volumemodel_last_job_processed_at.py +40 -0
  396. dstack/_internal/server/migrations/versions/d6b11105f659_add_usermodel_active.py +36 -0
  397. dstack/_internal/server/migrations/versions/da574e93fee0_add_jobmodel_volumes_detached_at.py +40 -0
  398. dstack/_internal/server/migrations/versions/dfffd6a1165c_add_fields_for_gateways_behind_alb.py +36 -0
  399. dstack/_internal/server/migrations/versions/e2d08cd1b8d9_add_jobmodel_fleet.py +41 -0
  400. dstack/_internal/server/migrations/versions/e3b7db07727f_add_gatewaycomputemodel_app_updated_at.py +61 -0
  401. dstack/_internal/server/migrations/versions/e6391ca6a264_separate_gateways_from_compute.py +72 -0
  402. dstack/_internal/server/migrations/versions/ea60480f82bb_add_membermodel_member_num.py +32 -0
  403. dstack/_internal/server/migrations/versions/ec02a26a256c_add_runmodel_next_triggered_at.py +38 -0
  404. dstack/_internal/server/migrations/versions/ed0ca30e13bb_migrate_instancestatus_provisioning.py +29 -0
  405. dstack/_internal/server/migrations/versions/fe72c4de8376_add_gateways.py +81 -0
  406. dstack/_internal/server/migrations/versions/ff1d94f65b08_user_ssh_key.py +34 -0
  407. dstack/_internal/server/migrations/versions/ffa99edd1988_add_jobterminationreason_max_duration_.py +81 -0
  408. dstack/_internal/server/models.py +930 -0
  409. dstack/_internal/server/routers/__init__.py +0 -0
  410. dstack/_internal/server/routers/auth.py +34 -0
  411. dstack/_internal/server/routers/backends.py +142 -0
  412. dstack/_internal/server/routers/events.py +60 -0
  413. dstack/_internal/server/routers/files.py +68 -0
  414. dstack/_internal/server/routers/fleets.py +202 -0
  415. dstack/_internal/server/routers/gateways.py +109 -0
  416. dstack/_internal/server/routers/gpus.py +32 -0
  417. dstack/_internal/server/routers/instances.py +77 -0
  418. dstack/_internal/server/routers/logs.py +34 -0
  419. dstack/_internal/server/routers/metrics.py +82 -0
  420. dstack/_internal/server/routers/projects.py +205 -0
  421. dstack/_internal/server/routers/prometheus.py +35 -0
  422. dstack/_internal/server/routers/repos.py +118 -0
  423. dstack/_internal/server/routers/runs.py +216 -0
  424. dstack/_internal/server/routers/secrets.py +86 -0
  425. dstack/_internal/server/routers/server.py +19 -0
  426. dstack/_internal/server/routers/users.py +158 -0
  427. dstack/_internal/server/routers/volumes.py +122 -0
  428. dstack/_internal/server/schemas/__init__.py +0 -0
  429. dstack/_internal/server/schemas/auth.py +83 -0
  430. dstack/_internal/server/schemas/backends.py +16 -0
  431. dstack/_internal/server/schemas/common.py +9 -0
  432. dstack/_internal/server/schemas/events.py +211 -0
  433. dstack/_internal/server/schemas/files.py +5 -0
  434. dstack/_internal/server/schemas/fleets.py +49 -0
  435. dstack/_internal/server/schemas/gateways.py +31 -0
  436. dstack/_internal/server/schemas/gpus.py +26 -0
  437. dstack/_internal/server/schemas/health/__init__.py +0 -0
  438. dstack/_internal/server/schemas/health/dcgm.py +56 -0
  439. dstack/_internal/server/schemas/instances.py +47 -0
  440. dstack/_internal/server/schemas/logs.py +17 -0
  441. dstack/_internal/server/schemas/projects.py +81 -0
  442. dstack/_internal/server/schemas/repos.py +24 -0
  443. dstack/_internal/server/schemas/runner.py +269 -0
  444. dstack/_internal/server/schemas/runs.py +66 -0
  445. dstack/_internal/server/schemas/secrets.py +16 -0
  446. dstack/_internal/server/schemas/users.py +72 -0
  447. dstack/_internal/server/schemas/volumes.py +29 -0
  448. dstack/_internal/server/security/__init__.py +0 -0
  449. dstack/_internal/server/security/permissions.py +251 -0
  450. dstack/_internal/server/services/__init__.py +0 -0
  451. dstack/_internal/server/services/auth.py +77 -0
  452. dstack/_internal/server/services/backends/__init__.py +404 -0
  453. dstack/_internal/server/services/backends/handlers.py +105 -0
  454. dstack/_internal/server/services/compute_groups.py +22 -0
  455. dstack/_internal/server/services/config.py +279 -0
  456. dstack/_internal/server/services/docker.py +162 -0
  457. dstack/_internal/server/services/encryption/__init__.py +102 -0
  458. dstack/_internal/server/services/encryption/keys/__init__.py +0 -0
  459. dstack/_internal/server/services/encryption/keys/aes.py +68 -0
  460. dstack/_internal/server/services/encryption/keys/base.py +19 -0
  461. dstack/_internal/server/services/encryption/keys/identity.py +28 -0
  462. dstack/_internal/server/services/events.py +477 -0
  463. dstack/_internal/server/services/files.py +91 -0
  464. dstack/_internal/server/services/fleets.py +1224 -0
  465. dstack/_internal/server/services/gateways/__init__.py +686 -0
  466. dstack/_internal/server/services/gateways/client.py +209 -0
  467. dstack/_internal/server/services/gateways/connection.py +139 -0
  468. dstack/_internal/server/services/gateways/pool.py +58 -0
  469. dstack/_internal/server/services/gpus.py +387 -0
  470. dstack/_internal/server/services/instances.py +731 -0
  471. dstack/_internal/server/services/jobs/__init__.py +840 -0
  472. dstack/_internal/server/services/jobs/configurators/__init__.py +0 -0
  473. dstack/_internal/server/services/jobs/configurators/base.py +469 -0
  474. dstack/_internal/server/services/jobs/configurators/dev.py +69 -0
  475. dstack/_internal/server/services/jobs/configurators/extensions/__init__.py +0 -0
  476. dstack/_internal/server/services/jobs/configurators/extensions/base.py +15 -0
  477. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
  478. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +42 -0
  479. dstack/_internal/server/services/jobs/configurators/extensions/windsurf.py +43 -0
  480. dstack/_internal/server/services/jobs/configurators/service.py +28 -0
  481. dstack/_internal/server/services/jobs/configurators/task.py +39 -0
  482. dstack/_internal/server/services/locking.py +187 -0
  483. dstack/_internal/server/services/logging.py +29 -0
  484. dstack/_internal/server/services/logs/__init__.py +122 -0
  485. dstack/_internal/server/services/logs/aws.py +373 -0
  486. dstack/_internal/server/services/logs/base.py +47 -0
  487. dstack/_internal/server/services/logs/filelog.py +261 -0
  488. dstack/_internal/server/services/logs/fluentbit.py +329 -0
  489. dstack/_internal/server/services/logs/gcp.py +181 -0
  490. dstack/_internal/server/services/metrics.py +172 -0
  491. dstack/_internal/server/services/offers.py +249 -0
  492. dstack/_internal/server/services/permissions.py +37 -0
  493. dstack/_internal/server/services/placement.py +234 -0
  494. dstack/_internal/server/services/plugins.py +109 -0
  495. dstack/_internal/server/services/probes.py +10 -0
  496. dstack/_internal/server/services/projects.py +835 -0
  497. dstack/_internal/server/services/prometheus/__init__.py +0 -0
  498. dstack/_internal/server/services/prometheus/client_metrics.py +55 -0
  499. dstack/_internal/server/services/prometheus/custom_metrics.py +327 -0
  500. dstack/_internal/server/services/proxy/__init__.py +3 -0
  501. dstack/_internal/server/services/proxy/auth.py +12 -0
  502. dstack/_internal/server/services/proxy/deps.py +18 -0
  503. dstack/_internal/server/services/proxy/repo.py +189 -0
  504. dstack/_internal/server/services/proxy/routers/__init__.py +0 -0
  505. dstack/_internal/server/services/proxy/routers/service_proxy.py +49 -0
  506. dstack/_internal/server/services/proxy/services/__init__.py +0 -0
  507. dstack/_internal/server/services/proxy/services/service_proxy.py +135 -0
  508. dstack/_internal/server/services/repos.py +362 -0
  509. dstack/_internal/server/services/requirements/__init__.py +0 -0
  510. dstack/_internal/server/services/requirements/combine.py +260 -0
  511. dstack/_internal/server/services/resources.py +21 -0
  512. dstack/_internal/server/services/runner/__init__.py +0 -0
  513. dstack/_internal/server/services/runner/client.py +646 -0
  514. dstack/_internal/server/services/runner/ssh.py +128 -0
  515. dstack/_internal/server/services/runs/__init__.py +1026 -0
  516. dstack/_internal/server/services/runs/plan.py +703 -0
  517. dstack/_internal/server/services/runs/replicas.py +317 -0
  518. dstack/_internal/server/services/runs/spec.py +191 -0
  519. dstack/_internal/server/services/secrets.py +245 -0
  520. dstack/_internal/server/services/services/__init__.py +345 -0
  521. dstack/_internal/server/services/services/autoscalers.py +140 -0
  522. dstack/_internal/server/services/services/options.py +53 -0
  523. dstack/_internal/server/services/ssh.py +67 -0
  524. dstack/_internal/server/services/storage/__init__.py +37 -0
  525. dstack/_internal/server/services/storage/base.py +48 -0
  526. dstack/_internal/server/services/storage/gcs.py +66 -0
  527. dstack/_internal/server/services/storage/s3.py +69 -0
  528. dstack/_internal/server/services/users.py +461 -0
  529. dstack/_internal/server/services/volumes.py +496 -0
  530. dstack/_internal/server/settings.py +161 -0
  531. dstack/_internal/server/statics/00a6e1fb461ed2929fb9.png +0 -0
  532. dstack/_internal/server/statics/0cae4d9f0a36034984a7.png +0 -0
  533. dstack/_internal/server/statics/391de232cc0e30cae513.png +0 -0
  534. dstack/_internal/server/statics/4e0eead8c1a73689ef9d.svg +1 -0
  535. dstack/_internal/server/statics/544afa2f63428c2235b0.png +0 -0
  536. dstack/_internal/server/statics/54a4f50f74c6b9381530.svg +7 -0
  537. dstack/_internal/server/statics/68dd1360a7d2611e0132.svg +4 -0
  538. dstack/_internal/server/statics/69544b4c81973b54a66f.png +0 -0
  539. dstack/_internal/server/statics/77a8b02b17af19e39266.png +0 -0
  540. dstack/_internal/server/statics/83a93a8871c219104367.svg +9 -0
  541. dstack/_internal/server/statics/8f28bb8e9999e5e6a48b.svg +4 -0
  542. dstack/_internal/server/statics/9124086961ab8c366bc4.svg +9 -0
  543. dstack/_internal/server/statics/9a9ebaeb54b025dbac0a.svg +5 -0
  544. dstack/_internal/server/statics/a3428392dc534f3b15c4.svg +7 -0
  545. dstack/_internal/server/statics/ae22625574d69361f72c.png +0 -0
  546. dstack/_internal/server/statics/assets/android-chrome-144x144.png +0 -0
  547. dstack/_internal/server/statics/assets/android-chrome-192x192.png +0 -0
  548. dstack/_internal/server/statics/assets/android-chrome-256x256.png +0 -0
  549. dstack/_internal/server/statics/assets/android-chrome-36x36.png +0 -0
  550. dstack/_internal/server/statics/assets/android-chrome-384x384.png +0 -0
  551. dstack/_internal/server/statics/assets/android-chrome-48x48.png +0 -0
  552. dstack/_internal/server/statics/assets/android-chrome-512x512.png +0 -0
  553. dstack/_internal/server/statics/assets/android-chrome-72x72.png +0 -0
  554. dstack/_internal/server/statics/assets/android-chrome-96x96.png +0 -0
  555. dstack/_internal/server/statics/assets/apple-touch-icon-1024x1024.png +0 -0
  556. dstack/_internal/server/statics/assets/apple-touch-icon-114x114.png +0 -0
  557. dstack/_internal/server/statics/assets/apple-touch-icon-120x120.png +0 -0
  558. dstack/_internal/server/statics/assets/apple-touch-icon-144x144.png +0 -0
  559. dstack/_internal/server/statics/assets/apple-touch-icon-152x152.png +0 -0
  560. dstack/_internal/server/statics/assets/apple-touch-icon-167x167.png +0 -0
  561. dstack/_internal/server/statics/assets/apple-touch-icon-180x180.png +0 -0
  562. dstack/_internal/server/statics/assets/apple-touch-icon-57x57.png +0 -0
  563. dstack/_internal/server/statics/assets/apple-touch-icon-60x60.png +0 -0
  564. dstack/_internal/server/statics/assets/apple-touch-icon-72x72.png +0 -0
  565. dstack/_internal/server/statics/assets/apple-touch-icon-76x76.png +0 -0
  566. dstack/_internal/server/statics/assets/apple-touch-icon-precomposed.png +0 -0
  567. dstack/_internal/server/statics/assets/apple-touch-icon.png +0 -0
  568. dstack/_internal/server/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
  569. dstack/_internal/server/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
  570. dstack/_internal/server/statics/assets/apple-touch-startup-image-1170x2532.png +0 -0
  571. dstack/_internal/server/statics/assets/apple-touch-startup-image-1179x2556.png +0 -0
  572. dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
  573. dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
  574. dstack/_internal/server/statics/assets/apple-touch-startup-image-1284x2778.png +0 -0
  575. dstack/_internal/server/statics/assets/apple-touch-startup-image-1290x2796.png +0 -0
  576. dstack/_internal/server/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
  577. dstack/_internal/server/statics/assets/apple-touch-startup-image-1488x2266.png +0 -0
  578. dstack/_internal/server/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
  579. dstack/_internal/server/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
  580. dstack/_internal/server/statics/assets/apple-touch-startup-image-1640x2160.png +0 -0
  581. dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
  582. dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
  583. dstack/_internal/server/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
  584. dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
  585. dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
  586. dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
  587. dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1640.png +0 -0
  588. dstack/_internal/server/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
  589. dstack/_internal/server/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
  590. dstack/_internal/server/statics/assets/apple-touch-startup-image-2266x1488.png +0 -0
  591. dstack/_internal/server/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
  592. dstack/_internal/server/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
  593. dstack/_internal/server/statics/assets/apple-touch-startup-image-2532x1170.png +0 -0
  594. dstack/_internal/server/statics/assets/apple-touch-startup-image-2556x1179.png +0 -0
  595. dstack/_internal/server/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
  596. dstack/_internal/server/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
  597. dstack/_internal/server/statics/assets/apple-touch-startup-image-2778x1284.png +0 -0
  598. dstack/_internal/server/statics/assets/apple-touch-startup-image-2796x1290.png +0 -0
  599. dstack/_internal/server/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
  600. dstack/_internal/server/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
  601. dstack/_internal/server/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
  602. dstack/_internal/server/statics/assets/browserconfig.xml +12 -0
  603. dstack/_internal/server/statics/assets/favicon-16x16.png +0 -0
  604. dstack/_internal/server/statics/assets/favicon-32x32.png +0 -0
  605. dstack/_internal/server/statics/assets/favicon-48x48.png +0 -0
  606. dstack/_internal/server/statics/assets/favicon.ico +0 -0
  607. dstack/{dashboard/statics/assets/manifest.json → _internal/server/statics/assets/manifest.webmanifest} +18 -9
  608. dstack/_internal/server/statics/assets/mstile-144x144.png +0 -0
  609. dstack/_internal/server/statics/assets/mstile-150x150.png +0 -0
  610. dstack/_internal/server/statics/assets/mstile-310x150.png +0 -0
  611. dstack/_internal/server/statics/assets/mstile-310x310.png +0 -0
  612. dstack/_internal/server/statics/assets/mstile-70x70.png +0 -0
  613. dstack/_internal/server/statics/assets/yandex-browser-50x50.png +0 -0
  614. dstack/_internal/server/statics/b7ae68f44193474fc578.png +0 -0
  615. dstack/_internal/server/statics/d2f008c75b2b5b191f3f.png +0 -0
  616. dstack/_internal/server/statics/d44c33e1b92e05c379fd.png +0 -0
  617. dstack/_internal/server/statics/dd43ff0552815179d7ab.png +0 -0
  618. dstack/_internal/server/statics/dd4e7166c0b9aac197d7.png +0 -0
  619. dstack/_internal/server/statics/e30b27916930d43d2271.png +0 -0
  620. dstack/_internal/server/statics/e467d7d60aae81ab198b.svg +6 -0
  621. dstack/_internal/server/statics/eb9b344b73818fe2b71a.png +0 -0
  622. dstack/_internal/server/statics/f517dd626eb964120de0.png +0 -0
  623. dstack/_internal/server/statics/f958aecddee5d8e3222c.png +0 -0
  624. dstack/_internal/server/statics/index.html +3 -0
  625. dstack/_internal/server/statics/logo-notext.svg +116 -0
  626. dstack/_internal/server/statics/main-2e6967bad9f29395eea6.css +3 -0
  627. dstack/_internal/server/statics/main-7dc0f6d20b8b41659acc.js +155547 -0
  628. dstack/_internal/server/statics/main-7dc0f6d20b8b41659acc.js.map +1 -0
  629. dstack/{dashboard → _internal/server}/statics/manifest.json +2 -2
  630. dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
  631. dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
  632. dstack/{dashboard/statics/static/media/logo.f9d7170678f68f796e270698633770ec.svg → _internal/server/statics/static/media/logo.f602feeb138844eda97c8cb641461448.svg} +8 -6
  633. dstack/_internal/server/statics/static/media/okta.12f178e6873a1100965f2a4dbd18fcec.svg +2 -0
  634. dstack/_internal/server/statics/static/media/theme.3994c817bb7dda191c1c9640dee0bf42.svg +3 -0
  635. dstack/_internal/server/testing/__init__.py +0 -0
  636. dstack/_internal/server/testing/common.py +1220 -0
  637. dstack/_internal/server/testing/conf.py +53 -0
  638. dstack/_internal/server/testing/matchers.py +31 -0
  639. dstack/_internal/server/utils/__init__.py +0 -0
  640. dstack/_internal/server/utils/common.py +55 -0
  641. dstack/_internal/server/utils/logging.py +51 -0
  642. dstack/_internal/server/utils/provisioning.py +368 -0
  643. dstack/_internal/server/utils/routers.py +166 -0
  644. dstack/_internal/server/utils/sentry_utils.py +24 -0
  645. dstack/_internal/settings.py +49 -0
  646. dstack/_internal/utils/__init__.py +0 -0
  647. dstack/_internal/utils/common.py +318 -0
  648. dstack/_internal/utils/cron.py +5 -0
  649. dstack/_internal/utils/crypto.py +40 -0
  650. dstack/_internal/utils/env.py +88 -0
  651. dstack/_internal/utils/event_loop.py +30 -0
  652. dstack/_internal/utils/files.py +69 -0
  653. dstack/_internal/utils/gpu.py +59 -0
  654. dstack/_internal/utils/hash.py +31 -0
  655. dstack/_internal/utils/interpolator.py +91 -0
  656. dstack/_internal/utils/json_schema.py +11 -0
  657. dstack/_internal/utils/json_utils.py +54 -0
  658. dstack/_internal/utils/logging.py +5 -0
  659. dstack/_internal/utils/nested_list.py +47 -0
  660. dstack/_internal/utils/network.py +50 -0
  661. dstack/_internal/utils/path.py +57 -0
  662. dstack/_internal/utils/random_names.py +258 -0
  663. dstack/_internal/utils/ssh.py +346 -0
  664. dstack/_internal/utils/tags.py +42 -0
  665. dstack/_internal/utils/typing.py +14 -0
  666. dstack/_internal/utils/version.py +22 -0
  667. dstack/api/__init__.py +46 -0
  668. dstack/api/_public/__init__.py +96 -0
  669. dstack/api/_public/backends.py +42 -0
  670. dstack/api/_public/common.py +5 -0
  671. dstack/api/_public/repos.py +202 -0
  672. dstack/api/_public/runs.py +714 -0
  673. dstack/api/server/__init__.py +206 -0
  674. dstack/api/server/_auth.py +30 -0
  675. dstack/api/server/_backends.py +38 -0
  676. dstack/api/server/_events.py +64 -0
  677. dstack/api/server/_files.py +18 -0
  678. dstack/api/server/_fleets.py +82 -0
  679. dstack/api/server/_gateways.py +54 -0
  680. dstack/api/server/_gpus.py +27 -0
  681. dstack/api/server/_group.py +22 -0
  682. dstack/api/server/_logs.py +15 -0
  683. dstack/api/server/_metrics.py +23 -0
  684. dstack/api/server/_projects.py +124 -0
  685. dstack/api/server/_repos.py +64 -0
  686. dstack/api/server/_runs.py +102 -0
  687. dstack/api/server/_secrets.py +36 -0
  688. dstack/api/server/_users.py +82 -0
  689. dstack/api/server/_volumes.py +39 -0
  690. dstack/api/server/utils.py +34 -0
  691. dstack/api/utils.py +105 -0
  692. dstack/core/__init__.py +0 -0
  693. dstack/plugins/__init__.py +8 -0
  694. dstack/plugins/_base.py +72 -0
  695. dstack/plugins/_models.py +8 -0
  696. dstack/plugins/_utils.py +19 -0
  697. dstack/plugins/builtin/__init__.py +0 -0
  698. dstack/plugins/builtin/rest_plugin/__init__.py +18 -0
  699. dstack/plugins/builtin/rest_plugin/_models.py +48 -0
  700. dstack/plugins/builtin/rest_plugin/_plugin.py +147 -0
  701. dstack/version.py +3 -1
  702. dstack-0.20.7.dist-info/METADATA +519 -0
  703. dstack-0.20.7.dist-info/RECORD +720 -0
  704. {dstack-0.0.9.dist-info → dstack-0.20.7.dist-info}/WHEEL +1 -2
  705. dstack-0.20.7.dist-info/entry_points.txt +2 -0
  706. dstack-0.20.7.dist-info/licenses/LICENSE.md +353 -0
  707. dstack/aws/__init__.py +0 -180
  708. dstack/aws/artifacts.py +0 -111
  709. dstack/aws/config.py +0 -40
  710. dstack/aws/jobs.py +0 -245
  711. dstack/aws/logs.py +0 -186
  712. dstack/aws/repos.py +0 -137
  713. dstack/aws/run_names.py +0 -17
  714. dstack/aws/runners.py +0 -693
  715. dstack/aws/runs.py +0 -79
  716. dstack/aws/secrets.py +0 -99
  717. dstack/aws/tags.py +0 -138
  718. dstack/backend.py +0 -299
  719. dstack/cli/app.py +0 -41
  720. dstack/cli/artifacts.py +0 -87
  721. dstack/cli/common.py +0 -57
  722. dstack/cli/config.py +0 -194
  723. dstack/cli/dashboard.py +0 -26
  724. dstack/cli/delete.py +0 -49
  725. dstack/cli/init.py +0 -33
  726. dstack/cli/logs.py +0 -87
  727. dstack/cli/main.py +0 -81
  728. dstack/cli/restart.py +0 -43
  729. dstack/cli/run.py +0 -223
  730. dstack/cli/schema.py +0 -46
  731. dstack/cli/secrets.py +0 -97
  732. dstack/cli/status.py +0 -140
  733. dstack/cli/stop.py +0 -53
  734. dstack/cli/tags.py +0 -100
  735. dstack/config.py +0 -80
  736. dstack/dashboard/artifacts.py +0 -26
  737. dstack/dashboard/logs.py +0 -73
  738. dstack/dashboard/main.py +0 -45
  739. dstack/dashboard/repos.py +0 -41
  740. dstack/dashboard/runs.py +0 -140
  741. dstack/dashboard/secrets.py +0 -53
  742. dstack/dashboard/statics/4d6a4e032505c1efd23c.png +0 -0
  743. dstack/dashboard/statics/7e018c3e5566d7c349a8.png +0 -0
  744. dstack/dashboard/statics/assets/android-chrome-144x144.png +0 -0
  745. dstack/dashboard/statics/assets/android-chrome-192x192.png +0 -0
  746. dstack/dashboard/statics/assets/android-chrome-256x256.png +0 -0
  747. dstack/dashboard/statics/assets/android-chrome-36x36.png +0 -0
  748. dstack/dashboard/statics/assets/android-chrome-384x384.png +0 -0
  749. dstack/dashboard/statics/assets/android-chrome-48x48.png +0 -0
  750. dstack/dashboard/statics/assets/android-chrome-512x512.png +0 -0
  751. dstack/dashboard/statics/assets/android-chrome-72x72.png +0 -0
  752. dstack/dashboard/statics/assets/android-chrome-96x96.png +0 -0
  753. dstack/dashboard/statics/assets/apple-touch-icon-1024x1024.png +0 -0
  754. dstack/dashboard/statics/assets/apple-touch-icon-114x114.png +0 -0
  755. dstack/dashboard/statics/assets/apple-touch-icon-120x120.png +0 -0
  756. dstack/dashboard/statics/assets/apple-touch-icon-144x144.png +0 -0
  757. dstack/dashboard/statics/assets/apple-touch-icon-152x152.png +0 -0
  758. dstack/dashboard/statics/assets/apple-touch-icon-167x167.png +0 -0
  759. dstack/dashboard/statics/assets/apple-touch-icon-180x180.png +0 -0
  760. dstack/dashboard/statics/assets/apple-touch-icon-57x57.png +0 -0
  761. dstack/dashboard/statics/assets/apple-touch-icon-60x60.png +0 -0
  762. dstack/dashboard/statics/assets/apple-touch-icon-72x72.png +0 -0
  763. dstack/dashboard/statics/assets/apple-touch-icon-76x76.png +0 -0
  764. dstack/dashboard/statics/assets/apple-touch-icon-precomposed.png +0 -0
  765. dstack/dashboard/statics/assets/apple-touch-icon.png +0 -0
  766. dstack/dashboard/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
  767. dstack/dashboard/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
  768. dstack/dashboard/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
  769. dstack/dashboard/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
  770. dstack/dashboard/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
  771. dstack/dashboard/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
  772. dstack/dashboard/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
  773. dstack/dashboard/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
  774. dstack/dashboard/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
  775. dstack/dashboard/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
  776. dstack/dashboard/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
  777. dstack/dashboard/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
  778. dstack/dashboard/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
  779. dstack/dashboard/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
  780. dstack/dashboard/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
  781. dstack/dashboard/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
  782. dstack/dashboard/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
  783. dstack/dashboard/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
  784. dstack/dashboard/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
  785. dstack/dashboard/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
  786. dstack/dashboard/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
  787. dstack/dashboard/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
  788. dstack/dashboard/statics/assets/browserconfig.xml +0 -15
  789. dstack/dashboard/statics/assets/coast-228x228.png +0 -0
  790. dstack/dashboard/statics/assets/favicon-16x16.png +0 -0
  791. dstack/dashboard/statics/assets/favicon-32x32.png +0 -0
  792. dstack/dashboard/statics/assets/favicon-48x48.png +0 -0
  793. dstack/dashboard/statics/assets/favicon.ico +0 -0
  794. dstack/dashboard/statics/assets/firefox_app_128x128.png +0 -0
  795. dstack/dashboard/statics/assets/firefox_app_512x512.png +0 -0
  796. dstack/dashboard/statics/assets/firefox_app_60x60.png +0 -0
  797. dstack/dashboard/statics/assets/manifest.webapp +0 -14
  798. dstack/dashboard/statics/assets/mstile-144x144.png +0 -0
  799. dstack/dashboard/statics/assets/mstile-150x150.png +0 -0
  800. dstack/dashboard/statics/assets/mstile-310x150.png +0 -0
  801. dstack/dashboard/statics/assets/mstile-310x310.png +0 -0
  802. dstack/dashboard/statics/assets/mstile-70x70.png +0 -0
  803. dstack/dashboard/statics/assets/yandex-browser-50x50.png +0 -0
  804. dstack/dashboard/statics/d0f71e48806e25d72553.png +0 -0
  805. dstack/dashboard/statics/index.html +0 -7
  806. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js +0 -3
  807. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js.LICENSE.txt +0 -102
  808. dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js.map +0 -1
  809. dstack/dashboard/statics/main.css +0 -5058
  810. dstack/dashboard/statics/splash_thumbnail.png +0 -0
  811. dstack/dashboard/statics/static/media/check.3f68ffc787a15c0476793a6d18ecb71a.svg +0 -3
  812. dstack/dashboard/statics/static/media/chevron-down.bfd8f22c4a5db4d443e76bca3b02f334.svg +0 -3
  813. dstack/dashboard/statics/static/media/chevron-up.bade0c5d82d741cead615813264140c9.svg +0 -3
  814. dstack/dashboard/statics/static/media/clock.583b744f29b9d143718a55e7c35fe38e.svg +0 -3
  815. dstack/dashboard/statics/static/media/close.a8bb9e47361b03a3b5084dad676ba1da.svg +0 -3
  816. dstack/dashboard/statics/static/media/content-copy.73f5f2a175094757758e315243a4111e.svg +0 -3
  817. dstack/dashboard/statics/static/media/delete-outline.6a8abf4e4f9cb777781967efd56efe9b.svg +0 -3
  818. dstack/dashboard/statics/static/media/dots-vertical.82fc618192e0c7dc4d615ff93269246a.svg +0 -3
  819. dstack/dashboard/statics/static/media/earth.1ad57c7f59f4be5c8bb2fa00439c3149.svg +0 -3
  820. dstack/dashboard/statics/static/media/email.320bc3af24a5f1bb41ebd85f66a5dd70.svg +0 -3
  821. dstack/dashboard/statics/static/media/external-link.99b88e699c15afb820a1779d9a2261ed.svg +0 -3
  822. dstack/dashboard/statics/static/media/eye-off-outline.5b4afb7ad624a44dd307518ff93d1faa.svg +0 -3
  823. dstack/dashboard/statics/static/media/eye-outline.ca41708feaaed1edb15c5fff021fbafe.svg +0 -3
  824. dstack/dashboard/statics/static/media/file-download-outline.3634b41923ba79b297ff294ef898661c.svg +0 -3
  825. dstack/dashboard/statics/static/media/folder-outline.33378387af61821dd1207e4b2d061a07.svg +0 -3
  826. dstack/dashboard/statics/static/media/github-circle.1bb85d171c31a3c2eebad07319377171.svg +0 -3
  827. dstack/dashboard/statics/static/media/infinity.915f92939afc0a37f94adba211ceb172.svg +0 -3
  828. dstack/dashboard/statics/static/media/layers.b4b02cea267a617d7aa44c2719250c89.svg +0 -3
  829. dstack/dashboard/statics/static/media/linkedin.1c52fae553eee54397f0e63a79455a5e.svg +0 -3
  830. dstack/dashboard/statics/static/media/loading.e466be7b2c1f0ac9e7e51ca929d0e37d.svg +0 -3
  831. dstack/dashboard/statics/static/media/lock.4a4c7768d0fa60c716609ddc483470ef.svg +0 -3
  832. dstack/dashboard/statics/static/media/magnify.0c803314d039d21f3cb1504ccd1437a4.svg +0 -3
  833. dstack/dashboard/statics/static/media/mark.3f68ffc787a15c0476793a6d18ecb71a.svg +0 -3
  834. dstack/dashboard/statics/static/media/menu-close.3ee84714181017c6ff837830297c8437.svg +0 -3
  835. dstack/dashboard/statics/static/media/menu.922f81e0972fbcbb5adcd8def20c86a3.svg +0 -3
  836. dstack/dashboard/statics/static/media/pencil.f706a3b9dcbff4959a91bf72e1e6324f.svg +0 -3
  837. dstack/dashboard/statics/static/media/refresh.a80edb948e98b322cd73b67814a57a48.svg +0 -3
  838. dstack/dashboard/statics/static/media/shape-plus.63b093c7f4b44c3def774f30fcfbceca.svg +0 -3
  839. dstack/dashboard/statics/static/media/slack.ec2fca99c6b944950ac65404ddd26880.svg +0 -4
  840. dstack/dashboard/statics/static/media/small-logo.b9cc8d09f646a553e65fa336dafd8b10.svg +0 -116
  841. dstack/dashboard/statics/static/media/source-branch.b8d22cfc42a7bed81f0fc08130818e85.svg +0 -3
  842. dstack/dashboard/statics/static/media/source-commit.be2bb53c081b9b6836adffccc0b8d3e6.svg +0 -3
  843. dstack/dashboard/statics/static/media/stop.11488ff1437ad929476be8924a3b7075.svg +0 -3
  844. dstack/dashboard/statics/static/media/tag-minus.15680a815b0b8d027e973c84832c05e6.svg +0 -3
  845. dstack/dashboard/statics/static/media/tag-outline.19b0bf86a8afd7d6d9c716e9a91d94ca.svg +0 -3
  846. dstack/dashboard/statics/static/media/twitter.4af18861c84a2f3044c7546b55d5739c.svg +0 -3
  847. dstack/dashboard/tags.py +0 -119
  848. dstack/jobs.py +0 -255
  849. dstack/providers/__init__.py +0 -316
  850. dstack/providers/_python/main.py +0 -88
  851. dstack/providers/_tensorboard/main.py +0 -93
  852. dstack/providers/_torchrun/main.py +0 -121
  853. dstack/providers/bash/main.py +0 -90
  854. dstack/providers/code/main.py +0 -95
  855. dstack/providers/docker/main.py +0 -79
  856. dstack/providers/lab/main.py +0 -95
  857. dstack/providers/notebook/main.py +0 -90
  858. dstack/random_name.py +0 -29
  859. dstack/repo.py +0 -135
  860. dstack/runners.py +0 -35
  861. dstack/util.py +0 -15
  862. dstack-0.0.9.dist-info/METADATA +0 -176
  863. dstack-0.0.9.dist-info/RECORD +0 -179
  864. dstack-0.0.9.dist-info/entry_points.txt +0 -3
  865. dstack-0.0.9.dist-info/top_level.txt +0 -2
  866. tests/test_config.py +0 -70
  867. /dstack/{cli → _internal}/__init__.py +0 -0
  868. /dstack/{dashboard → _internal/cli}/__init__.py +0 -0
  869. /dstack/{providers/_python → _internal/cli/models}/__init__.py +0 -0
  870. /dstack/{providers/_tensorboard → _internal/cli/services}/__init__.py +0 -0
  871. /dstack/{providers/_torchrun → _internal/cli/utils}/__init__.py +0 -0
  872. /dstack/{providers/bash → _internal/core}/__init__.py +0 -0
  873. /dstack/{providers/code → _internal/core/backends}/__init__.py +0 -0
  874. /dstack/{providers/docker → _internal/core/backends/aws}/__init__.py +0 -0
  875. /dstack/{providers/lab → _internal/core/backends/azure}/__init__.py +0 -0
  876. /dstack/{providers/notebook → _internal/core/backends/base}/__init__.py +0 -0
  877. {tests → dstack/_internal/core/backends/cloudrift}/__init__.py +0 -0
  878. /dstack/{dashboard → _internal/server}/statics/assets/yandex-browser-manifest.json +0 -0
  879. /dstack/{dashboard → _internal/server}/statics/robots.txt +0 -0
@@ -0,0 +1,1026 @@
1
+ import itertools
2
+ import math
3
+ import uuid
4
+ from collections.abc import Iterable
5
+ from datetime import datetime, timezone
6
+ from typing import List, Optional
7
+
8
+ import pydantic
9
+ from apscheduler.triggers.cron import CronTrigger
10
+ from sqlalchemy import and_, func, or_, select, update
11
+ from sqlalchemy.ext.asyncio import AsyncSession
12
+ from sqlalchemy.orm import joinedload, selectinload
13
+
14
+ import dstack._internal.utils.common as common_utils
15
+ from dstack._internal.core.errors import (
16
+ RepoDoesNotExistError,
17
+ ResourceNotExistsError,
18
+ ServerClientError,
19
+ )
20
+ from dstack._internal.core.models.common import ApplyAction
21
+ from dstack._internal.core.models.profiles import (
22
+ RetryEvent,
23
+ )
24
+ from dstack._internal.core.models.runs import (
25
+ ApplyRunPlanInput,
26
+ Job,
27
+ JobSpec,
28
+ JobStatus,
29
+ JobSubmission,
30
+ JobTerminationReason,
31
+ ProbeSpec,
32
+ Run,
33
+ RunFleet,
34
+ RunPlan,
35
+ RunSpec,
36
+ RunStatus,
37
+ RunTerminationReason,
38
+ ServiceSpec,
39
+ )
40
+ from dstack._internal.server.db import get_db, is_db_postgres, is_db_sqlite
41
+ from dstack._internal.server.models import (
42
+ FleetModel,
43
+ JobModel,
44
+ ProbeModel,
45
+ ProjectModel,
46
+ RepoModel,
47
+ RunModel,
48
+ UserModel,
49
+ )
50
+ from dstack._internal.server.services import events, services
51
+ from dstack._internal.server.services import repos as repos_services
52
+ from dstack._internal.server.services.jobs import (
53
+ check_can_attach_job_volumes,
54
+ delay_job_instance_termination,
55
+ get_job_configured_volumes,
56
+ get_jobs_from_run_spec,
57
+ job_model_to_job_submission,
58
+ remove_job_spec_sensitive_info,
59
+ stop_runner,
60
+ switch_job_status,
61
+ )
62
+ from dstack._internal.server.services.locking import get_locker, string_to_lock_id
63
+ from dstack._internal.server.services.logging import fmt
64
+ from dstack._internal.server.services.plugins import apply_plugin_policies
65
+ from dstack._internal.server.services.probes import is_probe_ready
66
+ from dstack._internal.server.services.projects import list_user_project_models
67
+ from dstack._internal.server.services.resources import set_resources_defaults
68
+ from dstack._internal.server.services.runs.plan import get_job_plans
69
+ from dstack._internal.server.services.runs.spec import (
70
+ can_update_run_spec,
71
+ check_can_update_run_spec,
72
+ validate_run_spec_and_set_defaults,
73
+ )
74
+ from dstack._internal.server.services.secrets import get_project_secrets_mapping
75
+ from dstack._internal.server.services.users import get_user_model_by_name
76
+ from dstack._internal.utils.logging import get_logger
77
+ from dstack._internal.utils.random_names import generate_name
78
+
79
+ logger = get_logger(__name__)
80
+
81
+
82
+ JOB_TERMINATION_REASONS_TO_RETRY = {
83
+ JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY,
84
+ JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY,
85
+ }
86
+
87
+
88
+ def switch_run_status(
89
+ session: AsyncSession,
90
+ run_model: RunModel,
91
+ new_status: RunStatus,
92
+ actor: events.AnyActor = events.SystemActor(),
93
+ ):
94
+ """
95
+ Switch run status.
96
+ """
97
+ old_status = run_model.status
98
+ if old_status == new_status:
99
+ return
100
+
101
+ run_model.status = new_status
102
+
103
+ msg = f"Run status changed {old_status.upper()} -> {new_status.upper()}"
104
+ if new_status == RunStatus.TERMINATING:
105
+ if run_model.termination_reason is None:
106
+ raise ValueError("termination_reason must be set when switching to TERMINATING status")
107
+ msg += f". Termination reason: {run_model.termination_reason.upper()}"
108
+ events.emit(session, msg, actor=actor, targets=[events.Target.from_model(run_model)])
109
+
110
+
111
+ async def list_user_runs(
112
+ session: AsyncSession,
113
+ user: UserModel,
114
+ project_name: Optional[str],
115
+ repo_id: Optional[str],
116
+ username: Optional[str],
117
+ only_active: bool,
118
+ include_jobs: bool,
119
+ job_submissions_limit: Optional[int],
120
+ prev_submitted_at: Optional[datetime],
121
+ prev_run_id: Optional[uuid.UUID],
122
+ limit: int,
123
+ ascending: bool,
124
+ ) -> List[Run]:
125
+ if project_name is None and repo_id is not None:
126
+ return []
127
+ projects = await list_user_project_models(
128
+ session=session,
129
+ user=user,
130
+ only_names=True,
131
+ )
132
+ runs_user = None
133
+ if username is not None:
134
+ runs_user = await get_user_model_by_name(session=session, username=username)
135
+ if runs_user is None:
136
+ raise ResourceNotExistsError("User not found")
137
+ repo = None
138
+ if project_name is not None:
139
+ projects = [p for p in projects if p.name == project_name]
140
+ if len(projects) == 0:
141
+ return []
142
+ if repo_id is not None:
143
+ repo = await repos_services.get_repo_model(
144
+ session=session,
145
+ project=projects[0],
146
+ repo_id=repo_id,
147
+ )
148
+ if repo is None:
149
+ raise RepoDoesNotExistError.with_id(repo_id)
150
+ run_models = await list_projects_run_models(
151
+ session=session,
152
+ projects=projects,
153
+ repo=repo,
154
+ runs_user=runs_user,
155
+ only_active=only_active,
156
+ prev_submitted_at=prev_submitted_at,
157
+ prev_run_id=prev_run_id,
158
+ limit=limit,
159
+ ascending=ascending,
160
+ )
161
+ runs = []
162
+ for r in run_models:
163
+ try:
164
+ runs.append(
165
+ run_model_to_run(
166
+ r,
167
+ return_in_api=True,
168
+ include_jobs=include_jobs,
169
+ job_submissions_limit=job_submissions_limit,
170
+ )
171
+ )
172
+ except pydantic.ValidationError:
173
+ pass
174
+ if len(run_models) > len(runs):
175
+ logger.debug("Can't load %s runs", len(run_models) - len(runs))
176
+ return runs
177
+
178
+
179
+ async def list_projects_run_models(
180
+ session: AsyncSession,
181
+ projects: List[ProjectModel],
182
+ repo: Optional[RepoModel],
183
+ runs_user: Optional[UserModel],
184
+ only_active: bool,
185
+ prev_submitted_at: Optional[datetime],
186
+ prev_run_id: Optional[uuid.UUID],
187
+ limit: int,
188
+ ascending: bool,
189
+ ) -> List[RunModel]:
190
+ filters = []
191
+ filters.append(RunModel.project_id.in_(p.id for p in projects))
192
+ if repo is not None:
193
+ filters.append(RunModel.repo_id == repo.id)
194
+ if runs_user is not None:
195
+ filters.append(RunModel.user_id == runs_user.id)
196
+ if only_active:
197
+ filters.append(RunModel.status.not_in(RunStatus.finished_statuses()))
198
+ if prev_submitted_at is not None:
199
+ if ascending:
200
+ if prev_run_id is None:
201
+ filters.append(RunModel.submitted_at > prev_submitted_at)
202
+ else:
203
+ filters.append(
204
+ or_(
205
+ RunModel.submitted_at > prev_submitted_at,
206
+ and_(
207
+ RunModel.submitted_at == prev_submitted_at, RunModel.id < prev_run_id
208
+ ),
209
+ )
210
+ )
211
+ else:
212
+ if prev_run_id is None:
213
+ filters.append(RunModel.submitted_at < prev_submitted_at)
214
+ else:
215
+ filters.append(
216
+ or_(
217
+ RunModel.submitted_at < prev_submitted_at,
218
+ and_(
219
+ RunModel.submitted_at == prev_submitted_at, RunModel.id > prev_run_id
220
+ ),
221
+ )
222
+ )
223
+ order_by = (RunModel.submitted_at.desc(), RunModel.id)
224
+ if ascending:
225
+ order_by = (RunModel.submitted_at.asc(), RunModel.id.desc())
226
+
227
+ res = await session.execute(
228
+ select(RunModel)
229
+ .where(*filters)
230
+ .options(joinedload(RunModel.user).load_only(UserModel.name))
231
+ .options(joinedload(RunModel.fleet).load_only(FleetModel.id, FleetModel.name))
232
+ .options(selectinload(RunModel.jobs).joinedload(JobModel.probes))
233
+ .order_by(*order_by)
234
+ .limit(limit)
235
+ )
236
+ run_models = list(res.scalars().all())
237
+ return run_models
238
+
239
+
240
+ async def get_run(
241
+ session: AsyncSession,
242
+ project: ProjectModel,
243
+ run_name: Optional[str] = None,
244
+ run_id: Optional[uuid.UUID] = None,
245
+ ) -> Optional[Run]:
246
+ if run_id is not None:
247
+ return await get_run_by_id(
248
+ session=session,
249
+ project=project,
250
+ run_id=run_id,
251
+ )
252
+ elif run_name is not None:
253
+ return await get_run_by_name(
254
+ session=session,
255
+ project=project,
256
+ run_name=run_name,
257
+ )
258
+ raise ServerClientError("run_name or id must be specified")
259
+
260
+
261
+ async def get_run_by_name(
262
+ session: AsyncSession,
263
+ project: ProjectModel,
264
+ run_name: str,
265
+ ) -> Optional[Run]:
266
+ res = await session.execute(
267
+ select(RunModel)
268
+ .where(
269
+ RunModel.project_id == project.id,
270
+ RunModel.run_name == run_name,
271
+ RunModel.deleted == False,
272
+ )
273
+ .options(joinedload(RunModel.user))
274
+ .options(joinedload(RunModel.fleet).load_only(FleetModel.id, FleetModel.name))
275
+ .options(selectinload(RunModel.jobs).joinedload(JobModel.probes))
276
+ )
277
+ run_model = res.scalar()
278
+ if run_model is None:
279
+ return None
280
+ return run_model_to_run(run_model, return_in_api=True)
281
+
282
+
283
+ async def get_run_by_id(
284
+ session: AsyncSession,
285
+ project: ProjectModel,
286
+ run_id: uuid.UUID,
287
+ ) -> Optional[Run]:
288
+ res = await session.execute(
289
+ select(RunModel)
290
+ .where(
291
+ RunModel.project_id == project.id,
292
+ RunModel.id == run_id,
293
+ )
294
+ .options(joinedload(RunModel.user))
295
+ .options(joinedload(RunModel.fleet).load_only(FleetModel.id, FleetModel.name))
296
+ .options(selectinload(RunModel.jobs).joinedload(JobModel.probes))
297
+ )
298
+ run_model = res.scalar()
299
+ if run_model is None:
300
+ return None
301
+ return run_model_to_run(run_model, return_in_api=True)
302
+
303
+
304
+ async def get_plan(
305
+ session: AsyncSession,
306
+ project: ProjectModel,
307
+ user: UserModel,
308
+ run_spec: RunSpec,
309
+ max_offers: Optional[int],
310
+ legacy_repo_dir: bool = False,
311
+ ) -> RunPlan:
312
+ # Spec must be copied by parsing to calculate merged_profile
313
+ effective_run_spec = RunSpec.parse_obj(run_spec.dict())
314
+ effective_run_spec = await apply_plugin_policies(
315
+ user=user.name,
316
+ project=project.name,
317
+ spec=effective_run_spec,
318
+ )
319
+ effective_run_spec = RunSpec.parse_obj(effective_run_spec.dict())
320
+ validate_run_spec_and_set_defaults(
321
+ user=user,
322
+ run_spec=effective_run_spec,
323
+ legacy_repo_dir=legacy_repo_dir,
324
+ )
325
+ profile = effective_run_spec.merged_profile
326
+
327
+ current_resource = None
328
+ action = ApplyAction.CREATE
329
+ if effective_run_spec.run_name is not None:
330
+ current_resource = await get_run_by_name(
331
+ session=session,
332
+ project=project,
333
+ run_name=effective_run_spec.run_name,
334
+ )
335
+ if current_resource is not None:
336
+ # For backward compatibility (current_resource may has been submitted before
337
+ # some fields, e.g., CPUSpec.arch, were added)
338
+ set_resources_defaults(current_resource.run_spec.configuration.resources)
339
+ if not current_resource.status.is_finished() and can_update_run_spec(
340
+ current_resource.run_spec, effective_run_spec
341
+ ):
342
+ action = ApplyAction.UPDATE
343
+
344
+ job_plans = await get_job_plans(
345
+ session=session,
346
+ project=project,
347
+ profile=profile,
348
+ run_spec=run_spec,
349
+ max_offers=max_offers,
350
+ )
351
+ run_plan = RunPlan(
352
+ project_name=project.name,
353
+ user=user.name,
354
+ run_spec=run_spec,
355
+ effective_run_spec=effective_run_spec,
356
+ job_plans=job_plans,
357
+ current_resource=current_resource,
358
+ action=action,
359
+ )
360
+ return run_plan
361
+
362
+
363
+ async def apply_plan(
364
+ session: AsyncSession,
365
+ user: UserModel,
366
+ project: ProjectModel,
367
+ plan: ApplyRunPlanInput,
368
+ force: bool,
369
+ legacy_repo_dir: bool = False,
370
+ ) -> Run:
371
+ run_spec = plan.run_spec
372
+ run_spec = await apply_plugin_policies(
373
+ user=user.name,
374
+ project=project.name,
375
+ spec=run_spec,
376
+ )
377
+ # Spec must be copied by parsing to calculate merged_profile
378
+ run_spec = RunSpec.parse_obj(run_spec.dict())
379
+ validate_run_spec_and_set_defaults(
380
+ user=user, run_spec=run_spec, legacy_repo_dir=legacy_repo_dir
381
+ )
382
+ if run_spec.run_name is None:
383
+ return await submit_run(
384
+ session=session,
385
+ user=user,
386
+ project=project,
387
+ run_spec=run_spec,
388
+ )
389
+ current_resource = await get_run_by_name(
390
+ session=session,
391
+ project=project,
392
+ run_name=run_spec.run_name,
393
+ )
394
+ if current_resource is None or current_resource.status.is_finished():
395
+ return await submit_run(
396
+ session=session,
397
+ user=user,
398
+ project=project,
399
+ run_spec=run_spec,
400
+ )
401
+
402
+ # For backward compatibility (current_resource may has been submitted before
403
+ # some fields, e.g., CPUSpec.arch, were added)
404
+ set_resources_defaults(current_resource.run_spec.configuration.resources)
405
+ try:
406
+ check_can_update_run_spec(current_resource.run_spec, run_spec)
407
+ except ServerClientError:
408
+ # The except is only needed to raise an appropriate error if run is active
409
+ if not current_resource.status.is_finished():
410
+ raise ServerClientError("Cannot override active run. Stop the run first.")
411
+ raise
412
+ if not force:
413
+ if plan.current_resource is not None:
414
+ set_resources_defaults(plan.current_resource.run_spec.configuration.resources)
415
+ if (
416
+ plan.current_resource is None
417
+ or plan.current_resource.id != current_resource.id
418
+ or plan.current_resource.run_spec != current_resource.run_spec
419
+ ):
420
+ raise ServerClientError(
421
+ "Failed to apply plan. Resource has been changed. Try again or use force apply."
422
+ )
423
+ # FIXME: potentially long write transaction
424
+ # Avoid getting run_model after update
425
+ await session.execute(
426
+ update(RunModel)
427
+ .where(RunModel.id == current_resource.id)
428
+ .values(
429
+ run_spec=run_spec.json(),
430
+ priority=run_spec.configuration.priority,
431
+ deployment_num=current_resource.deployment_num + 1,
432
+ )
433
+ )
434
+ run = await get_run_by_name(
435
+ session=session,
436
+ project=project,
437
+ run_name=run_spec.run_name,
438
+ )
439
+ return common_utils.get_or_error(run)
440
+
441
+
442
+ async def submit_run(
443
+ session: AsyncSession,
444
+ user: UserModel,
445
+ project: ProjectModel,
446
+ run_spec: RunSpec,
447
+ ) -> Run:
448
+ validate_run_spec_and_set_defaults(user, run_spec)
449
+ repo = await _get_run_repo_or_error(
450
+ session=session,
451
+ project=project,
452
+ run_spec=run_spec,
453
+ )
454
+ secrets = await get_project_secrets_mapping(
455
+ session=session,
456
+ project=project,
457
+ )
458
+
459
+ lock_namespace = f"run_names_{project.name}"
460
+ if is_db_sqlite():
461
+ # Start new transaction to see committed changes after lock
462
+ await session.commit()
463
+ elif is_db_postgres():
464
+ await session.execute(
465
+ select(func.pg_advisory_xact_lock(string_to_lock_id(lock_namespace)))
466
+ )
467
+ lock, _ = get_locker(get_db().dialect_name).get_lockset(lock_namespace)
468
+ async with lock:
469
+ # FIXME: delete_runs commits, so Postgres lock is released too early.
470
+ if run_spec.run_name is None:
471
+ run_spec.run_name = await _generate_run_name(
472
+ session=session,
473
+ project=project,
474
+ )
475
+ else:
476
+ await delete_runs(
477
+ session=session, user=user, project=project, runs_names=[run_spec.run_name]
478
+ )
479
+
480
+ await _validate_run(
481
+ session=session,
482
+ user=user,
483
+ project=project,
484
+ run_spec=run_spec,
485
+ )
486
+
487
+ submitted_at = common_utils.get_current_datetime()
488
+ initial_status = RunStatus.SUBMITTED
489
+ initial_replicas = 1
490
+ if run_spec.merged_profile.schedule is not None:
491
+ initial_status = RunStatus.PENDING
492
+ initial_replicas = 0
493
+
494
+ run_model = RunModel(
495
+ id=uuid.uuid4(),
496
+ project_id=project.id,
497
+ project=project,
498
+ repo_id=repo.id,
499
+ user_id=user.id,
500
+ run_name=run_spec.run_name,
501
+ submitted_at=submitted_at,
502
+ status=initial_status,
503
+ run_spec=run_spec.json(),
504
+ last_processed_at=submitted_at,
505
+ priority=run_spec.configuration.priority,
506
+ deployment_num=0,
507
+ desired_replica_count=1, # a relevant value will be set in process_runs.py
508
+ next_triggered_at=_get_next_triggered_at(run_spec),
509
+ )
510
+ session.add(run_model)
511
+ events.emit(
512
+ session,
513
+ f"Run submitted. Status: {run_model.status.upper()}",
514
+ actor=events.UserActor.from_user(user),
515
+ targets=[events.Target.from_model(run_model)],
516
+ )
517
+
518
+ if run_spec.configuration.type == "service":
519
+ await services.register_service(session, run_model, run_spec)
520
+ service_config = run_spec.configuration
521
+
522
+ global_replica_num = 0 # Global counter across all groups for unique replica_num
523
+
524
+ for replica_group in service_config.replica_groups:
525
+ if run_spec.merged_profile.schedule is not None:
526
+ group_initial_replicas = 0
527
+ else:
528
+ group_initial_replicas = replica_group.count.min or 0
529
+
530
+ # Each replica in this group gets the same group-specific configuration
531
+ for group_replica_num in range(group_initial_replicas):
532
+ jobs = await get_jobs_from_run_spec(
533
+ run_spec=run_spec,
534
+ secrets=secrets,
535
+ replica_num=global_replica_num,
536
+ replica_group_name=replica_group.name,
537
+ )
538
+
539
+ for job in jobs:
540
+ job_model = create_job_model_for_new_submission(
541
+ run_model=run_model,
542
+ job=job,
543
+ status=JobStatus.SUBMITTED,
544
+ )
545
+ session.add(job_model)
546
+ events.emit(
547
+ session,
548
+ f"Job created on run submission. Status: {job_model.status.upper()}",
549
+ actor=events.SystemActor(),
550
+ targets=[
551
+ events.Target.from_model(job_model),
552
+ ],
553
+ )
554
+ global_replica_num += 1
555
+ else:
556
+ for replica_num in range(initial_replicas):
557
+ jobs = await get_jobs_from_run_spec(
558
+ run_spec=run_spec,
559
+ secrets=secrets,
560
+ replica_num=replica_num,
561
+ )
562
+ for job in jobs:
563
+ job_model = create_job_model_for_new_submission(
564
+ run_model=run_model,
565
+ job=job,
566
+ status=JobStatus.SUBMITTED,
567
+ )
568
+ session.add(job_model)
569
+ events.emit(
570
+ session,
571
+ f"Job created on run submission. Status: {job_model.status.upper()}",
572
+ # Set `SystemActor` for consistency with all other places where jobs can be
573
+ # created (retry, scaling, rolling deployments, etc). Think of the run as being
574
+ # created by the user, while the job is created by the system to satisfy the
575
+ # run spec.
576
+ actor=events.SystemActor(),
577
+ targets=[
578
+ events.Target.from_model(job_model),
579
+ ],
580
+ )
581
+ await session.commit()
582
+ await session.refresh(run_model)
583
+
584
+ run = await get_run_by_id(session, project, run_model.id)
585
+ return common_utils.get_or_error(run)
586
+
587
+
588
+ def create_job_model_for_new_submission(
589
+ run_model: RunModel,
590
+ job: Job,
591
+ status: JobStatus,
592
+ ) -> JobModel:
593
+ """
594
+ Create a new job.
595
+
596
+ **NOTE**: don't forget to emit an event when writing the new job to the database.
597
+ """
598
+ now = common_utils.get_current_datetime()
599
+ return JobModel(
600
+ id=uuid.uuid4(),
601
+ project_id=run_model.project_id,
602
+ run_id=run_model.id,
603
+ run_name=run_model.run_name,
604
+ job_num=job.job_spec.job_num,
605
+ job_name=f"{job.job_spec.job_name}",
606
+ replica_num=job.job_spec.replica_num,
607
+ deployment_num=run_model.deployment_num,
608
+ submission_num=len(job.job_submissions),
609
+ submitted_at=now,
610
+ last_processed_at=now,
611
+ status=status,
612
+ termination_reason=None,
613
+ job_spec_data=job.job_spec.json(),
614
+ job_provisioning_data=None,
615
+ probes=[],
616
+ waiting_master_job=job.job_spec.job_num != 0,
617
+ )
618
+
619
+
620
+ async def stop_runs(
621
+ session: AsyncSession,
622
+ user: UserModel,
623
+ project: ProjectModel,
624
+ runs_names: List[str],
625
+ abort: bool,
626
+ ):
627
+ """
628
+ If abort is False, jobs receive a signal to stop and run status will be changed as a reaction to jobs status change.
629
+ If abort is True, run is marked as TERMINATED and process_runs will stop the jobs.
630
+ """
631
+ res = await session.execute(
632
+ select(RunModel).where(
633
+ RunModel.project_id == project.id,
634
+ RunModel.run_name.in_(runs_names),
635
+ RunModel.status.not_in(RunStatus.finished_statuses()),
636
+ )
637
+ )
638
+ run_models = res.scalars().all()
639
+ run_ids = sorted([r.id for r in run_models])
640
+ await session.commit()
641
+ async with get_locker(get_db().dialect_name).lock_ctx(RunModel.__tablename__, run_ids):
642
+ res = await session.execute(
643
+ select(RunModel)
644
+ .where(RunModel.id.in_(run_ids))
645
+ .order_by(RunModel.id) # take locks in order
646
+ .with_for_update(key_share=True)
647
+ .execution_options(populate_existing=True)
648
+ )
649
+ run_models = res.scalars().all()
650
+ now = common_utils.get_current_datetime()
651
+ for run_model in run_models:
652
+ if run_model.status.is_finished():
653
+ continue
654
+ if abort:
655
+ run_model.termination_reason = RunTerminationReason.ABORTED_BY_USER
656
+ else:
657
+ run_model.termination_reason = RunTerminationReason.STOPPED_BY_USER
658
+ switch_run_status(
659
+ session, run_model, RunStatus.TERMINATING, actor=events.UserActor.from_user(user)
660
+ )
661
+ run_model.last_processed_at = now
662
+ # The run will be terminated by process_runs.
663
+ # Terminating synchronously is problematic since it may take a long time.
664
+ await session.commit()
665
+
666
+
667
+ async def delete_runs(
668
+ session: AsyncSession,
669
+ user: UserModel,
670
+ project: ProjectModel,
671
+ runs_names: List[str],
672
+ ):
673
+ res = await session.execute(
674
+ select(RunModel).where(
675
+ RunModel.project_id == project.id,
676
+ RunModel.run_name.in_(runs_names),
677
+ )
678
+ )
679
+ run_models = res.scalars().all()
680
+ run_ids = sorted([r.id for r in run_models])
681
+ await session.commit()
682
+ async with get_locker(get_db().dialect_name).lock_ctx(RunModel.__tablename__, run_ids):
683
+ res = await session.execute(
684
+ select(RunModel)
685
+ .where(RunModel.id.in_(run_ids))
686
+ .order_by(RunModel.id) # take locks in order
687
+ .with_for_update(key_share=True)
688
+ )
689
+ run_models = res.scalars().all()
690
+ active_runs = [r for r in run_models if not r.status.is_finished()]
691
+ if len(active_runs) > 0:
692
+ raise ServerClientError(
693
+ msg=f"Cannot delete active runs: {[r.run_name for r in active_runs]}"
694
+ )
695
+ for run_model in run_models:
696
+ if not run_model.deleted:
697
+ run_model.deleted = True
698
+ events.emit(
699
+ session,
700
+ "Run deleted",
701
+ actor=events.UserActor.from_user(user),
702
+ targets=[events.Target.from_model(run_model)],
703
+ )
704
+ await session.commit()
705
+
706
+
707
+ def run_model_to_run(
708
+ run_model: RunModel,
709
+ include_jobs: bool = True,
710
+ job_submissions_limit: Optional[int] = None,
711
+ return_in_api: bool = False,
712
+ include_sensitive: bool = False,
713
+ ) -> Run:
714
+ jobs: List[Job] = []
715
+ if include_jobs:
716
+ jobs = _get_run_jobs_with_submissions(
717
+ run_model=run_model,
718
+ job_submissions_limit=job_submissions_limit,
719
+ return_in_api=return_in_api,
720
+ include_sensitive=include_sensitive,
721
+ )
722
+
723
+ run_spec = RunSpec.__response__.parse_raw(run_model.run_spec)
724
+
725
+ latest_job_submission = None
726
+ if len(jobs) > 0 and len(jobs[0].job_submissions) > 0:
727
+ # TODO(egor-s): does it make sense with replicas and multi-node?
728
+ latest_job_submission = jobs[0].job_submissions[-1]
729
+
730
+ service_spec = None
731
+ if run_model.service_spec is not None:
732
+ service_spec = ServiceSpec.__response__.parse_raw(run_model.service_spec)
733
+
734
+ status_message = _get_run_status_message(run_model)
735
+ error = _get_run_error(run_model)
736
+ fleet = _get_run_fleet(run_model)
737
+ next_triggered_at = None
738
+ if not run_model.status.is_finished():
739
+ next_triggered_at = _get_next_triggered_at(run_spec)
740
+ run = Run(
741
+ id=run_model.id,
742
+ project_name=run_model.project.name,
743
+ user=run_model.user.name,
744
+ fleet=fleet,
745
+ submitted_at=run_model.submitted_at,
746
+ last_processed_at=run_model.last_processed_at,
747
+ status=run_model.status,
748
+ status_message=status_message,
749
+ termination_reason=run_model.termination_reason.value
750
+ if run_model.termination_reason
751
+ else None,
752
+ run_spec=run_spec,
753
+ jobs=jobs,
754
+ latest_job_submission=latest_job_submission,
755
+ service=service_spec,
756
+ deployment_num=run_model.deployment_num,
757
+ error=error,
758
+ deleted=run_model.deleted,
759
+ next_triggered_at=next_triggered_at,
760
+ )
761
+ run.cost = _get_run_cost(run)
762
+ return run
763
+
764
+
765
+ def _get_run_jobs_with_submissions(
766
+ run_model: RunModel,
767
+ job_submissions_limit: Optional[int],
768
+ return_in_api: bool = False,
769
+ include_sensitive: bool = False,
770
+ ) -> List[Job]:
771
+ jobs: List[Job] = []
772
+ run_jobs = sorted(run_model.jobs, key=lambda j: (j.replica_num, j.job_num, j.submission_num))
773
+ for replica_num, replica_submissions in itertools.groupby(
774
+ run_jobs, key=lambda j: j.replica_num
775
+ ):
776
+ for job_num, job_models in itertools.groupby(replica_submissions, key=lambda j: j.job_num):
777
+ submissions = []
778
+ job_model = None
779
+ if job_submissions_limit is not None:
780
+ if job_submissions_limit == 0:
781
+ # Take latest job submission to return its job_spec
782
+ job_models = list(job_models)[-1:]
783
+ else:
784
+ job_models = list(job_models)[-job_submissions_limit:]
785
+ for job_model in job_models:
786
+ if job_submissions_limit != 0:
787
+ job_submission = job_model_to_job_submission(
788
+ job_model, include_probes=return_in_api
789
+ )
790
+ if return_in_api:
791
+ # Set default non-None values for 0.18 backward-compatibility
792
+ # Remove in 0.19
793
+ if job_submission.job_provisioning_data is not None:
794
+ if job_submission.job_provisioning_data.hostname is None:
795
+ job_submission.job_provisioning_data.hostname = ""
796
+ if job_submission.job_provisioning_data.ssh_port is None:
797
+ job_submission.job_provisioning_data.ssh_port = 22
798
+ submissions.append(job_submission)
799
+ if job_model is not None:
800
+ # Use the spec from the latest submission. Submissions can have different specs
801
+ job_spec = JobSpec.__response__.parse_raw(job_model.job_spec_data)
802
+ if not include_sensitive:
803
+ remove_job_spec_sensitive_info(job_spec)
804
+ jobs.append(Job(job_spec=job_spec, job_submissions=submissions))
805
+ return jobs
806
+
807
+
808
+ def _get_run_status_message(run_model: RunModel) -> str:
809
+ if len(run_model.jobs) == 0:
810
+ return run_model.status.value
811
+
812
+ sorted_job_models = sorted(
813
+ run_model.jobs, key=lambda j: (j.replica_num, j.job_num, j.submission_num)
814
+ )
815
+ job_models_grouped_by_job = list(
816
+ list(jm)
817
+ for _, jm in itertools.groupby(sorted_job_models, key=lambda j: (j.replica_num, j.job_num))
818
+ )
819
+
820
+ if all(job_models[-1].status == JobStatus.PULLING for job_models in job_models_grouped_by_job):
821
+ # Show `pulling`` if last job submission of all jobs is pulling
822
+ return "pulling"
823
+
824
+ if run_model.status in [RunStatus.SUBMITTED, RunStatus.PENDING]:
825
+ # Show `retrying` if any job caused the run to retry
826
+ for job_models in job_models_grouped_by_job:
827
+ last_job_spec = JobSpec.__response__.parse_raw(job_models[-1].job_spec_data)
828
+ retry_on_events = last_job_spec.retry.on_events if last_job_spec.retry else []
829
+ last_job_termination_reason = _get_last_job_termination_reason(job_models)
830
+ if (
831
+ last_job_termination_reason
832
+ == JobTerminationReason.FAILED_TO_START_DUE_TO_NO_CAPACITY
833
+ and RetryEvent.NO_CAPACITY in retry_on_events
834
+ ):
835
+ # TODO: Show `retrying` for other retry events
836
+ return "retrying"
837
+
838
+ return run_model.status.value
839
+
840
+
841
+ def _get_last_job_termination_reason(job_models: List[JobModel]) -> Optional[JobTerminationReason]:
842
+ for job_model in reversed(job_models):
843
+ if job_model.termination_reason is not None:
844
+ return job_model.termination_reason
845
+ return None
846
+
847
+
848
+ def _get_run_error(run_model: RunModel) -> Optional[str]:
849
+ if run_model.termination_reason is None:
850
+ return None
851
+ return run_model.termination_reason.to_error()
852
+
853
+
854
+ def _get_run_fleet(run_model: RunModel) -> Optional[RunFleet]:
855
+ if run_model.fleet is None:
856
+ return None
857
+ return RunFleet(
858
+ id=run_model.fleet.id,
859
+ name=run_model.fleet.name,
860
+ )
861
+
862
+
863
+ async def _generate_run_name(
864
+ session: AsyncSession,
865
+ project: ProjectModel,
866
+ ) -> str:
867
+ run_name_base = generate_name()
868
+ idx = 1
869
+ while True:
870
+ res = await session.execute(
871
+ select(RunModel).where(
872
+ RunModel.project_id == project.id,
873
+ RunModel.run_name == f"{run_name_base}-{idx}",
874
+ RunModel.deleted == False,
875
+ )
876
+ )
877
+ run_model = res.scalar()
878
+ if run_model is None:
879
+ return f"{run_name_base}-{idx}"
880
+ idx += 1
881
+
882
+
883
+ async def _validate_run(
884
+ session: AsyncSession,
885
+ user: UserModel,
886
+ project: ProjectModel,
887
+ run_spec: RunSpec,
888
+ ):
889
+ await _validate_run_volumes(
890
+ session=session,
891
+ project=project,
892
+ run_spec=run_spec,
893
+ )
894
+
895
+
896
+ async def _validate_run_volumes(
897
+ session: AsyncSession,
898
+ project: ProjectModel,
899
+ run_spec: RunSpec,
900
+ ):
901
+ # The volumes validation should be done here and not in job configurator
902
+ # since potentially we may need to validate volumes for jobs/replicas
903
+ # that won't be created immediately (e.g. range of replicas or nodes).
904
+ nodes = 1
905
+ if run_spec.configuration.type == "task":
906
+ nodes = run_spec.configuration.nodes
907
+ for job_num in range(nodes):
908
+ volumes = await get_job_configured_volumes(
909
+ session=session, project=project, run_spec=run_spec, job_num=job_num
910
+ )
911
+ check_can_attach_job_volumes(volumes=volumes)
912
+
913
+
914
+ async def _get_run_repo_or_error(
915
+ session: AsyncSession,
916
+ project: ProjectModel,
917
+ run_spec: RunSpec,
918
+ ) -> RepoModel:
919
+ # Must be set by _validate_run_spec_and_set_defaults()
920
+ repo_id = common_utils.get_or_error(run_spec.repo_id)
921
+ repo_data = common_utils.get_or_error(run_spec.repo_data)
922
+ if repo_data.repo_type == "virtual":
923
+ repo = await repos_services.create_or_update_repo(
924
+ session=session,
925
+ project=project,
926
+ repo_id=repo_id,
927
+ repo_info=repo_data,
928
+ )
929
+ repo = await repos_services.get_repo_model(
930
+ session=session,
931
+ project=project,
932
+ repo_id=repo_id,
933
+ )
934
+ if repo is None:
935
+ raise RepoDoesNotExistError.with_id(repo_id)
936
+ return repo
937
+
938
+
939
+ def _get_run_cost(run: Run) -> float:
940
+ run_cost = math.fsum(
941
+ _get_job_submission_cost(submission)
942
+ for job in run.jobs
943
+ for submission in job.job_submissions
944
+ )
945
+ return round(run_cost, 4)
946
+
947
+
948
+ def _get_job_submission_cost(job_submission: JobSubmission) -> float:
949
+ if job_submission.job_provisioning_data is None:
950
+ return 0
951
+ duration_hours = job_submission.duration.total_seconds() / 3600
952
+ return job_submission.job_provisioning_data.price * duration_hours
953
+
954
+
955
+ async def process_terminating_run(session: AsyncSession, run_model: RunModel):
956
+ """
957
+ Used by both `process_runs` and `stop_run` to process a TERMINATING run.
958
+ Stops the jobs gracefully and marks them as TERMINATING.
959
+ Jobs should be terminated by `process_terminating_jobs`.
960
+ When all jobs are terminated, assigns a finished status to the run.
961
+ Caller must acquire the lock on run.
962
+ """
963
+ assert run_model.termination_reason is not None
964
+ run = run_model_to_run(run_model, include_jobs=False)
965
+ job_termination_reason = run_model.termination_reason.to_job_termination_reason()
966
+
967
+ unfinished_jobs_count = 0
968
+ for job_model in run_model.jobs:
969
+ if job_model.status.is_finished():
970
+ continue
971
+ unfinished_jobs_count += 1
972
+ if job_model.status == JobStatus.TERMINATING:
973
+ if job_termination_reason == JobTerminationReason.ABORTED_BY_USER:
974
+ # Override termination reason so that
975
+ # abort actions such as volume force detach are triggered
976
+ job_model.termination_reason = job_termination_reason
977
+ continue
978
+
979
+ if job_model.status == JobStatus.RUNNING and job_termination_reason not in {
980
+ JobTerminationReason.ABORTED_BY_USER,
981
+ JobTerminationReason.DONE_BY_RUNNER,
982
+ }:
983
+ # Send a signal to stop the job gracefully
984
+ await stop_runner(session, job_model)
985
+ delay_job_instance_termination(job_model)
986
+ job_model.termination_reason = job_termination_reason
987
+ switch_job_status(session, job_model, JobStatus.TERMINATING)
988
+ job_model.last_processed_at = common_utils.get_current_datetime()
989
+
990
+ if unfinished_jobs_count == 0:
991
+ if run_model.service_spec is not None:
992
+ try:
993
+ await services.unregister_service(session, run_model)
994
+ except Exception as e:
995
+ logger.warning("%s: failed to unregister service: %s", fmt(run_model), repr(e))
996
+ if (
997
+ run.run_spec.merged_profile.schedule is not None
998
+ and run_model.termination_reason
999
+ not in [RunTerminationReason.ABORTED_BY_USER, RunTerminationReason.STOPPED_BY_USER]
1000
+ ):
1001
+ run_model.next_triggered_at = _get_next_triggered_at(run.run_spec)
1002
+ switch_run_status(session, run_model, RunStatus.PENDING)
1003
+ # Unassign run from fleet so that the new fleet can be chosen on the next submission
1004
+ run_model.fleet = None
1005
+ else:
1006
+ switch_run_status(session, run_model, run_model.termination_reason.to_status())
1007
+
1008
+
1009
+ def is_job_ready(probes: Iterable[ProbeModel], probe_specs: Iterable[ProbeSpec]) -> bool:
1010
+ return all(is_probe_ready(probe, probe_spec) for probe, probe_spec in zip(probes, probe_specs))
1011
+
1012
+
1013
+ def _get_next_triggered_at(run_spec: RunSpec) -> Optional[datetime]:
1014
+ if run_spec.merged_profile.schedule is None:
1015
+ return None
1016
+ now = common_utils.get_current_datetime()
1017
+ fire_times = []
1018
+ for cron in run_spec.merged_profile.schedule.crons:
1019
+ cron_trigger = CronTrigger.from_crontab(cron, timezone=timezone.utc)
1020
+ fire_times.append(
1021
+ cron_trigger.get_next_fire_time(
1022
+ previous_fire_time=None,
1023
+ now=now,
1024
+ )
1025
+ )
1026
+ return min(fire_times)