skypilot-nightly 1.0.0.dev20251203__py3-none-any.whl → 1.0.0.dev20260112__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. sky/__init__.py +6 -2
  2. sky/adaptors/aws.py +1 -61
  3. sky/adaptors/slurm.py +565 -0
  4. sky/backends/backend_utils.py +95 -12
  5. sky/backends/cloud_vm_ray_backend.py +224 -65
  6. sky/backends/task_codegen.py +380 -4
  7. sky/catalog/__init__.py +0 -3
  8. sky/catalog/data_fetchers/fetch_gcp.py +9 -1
  9. sky/catalog/data_fetchers/fetch_nebius.py +1 -1
  10. sky/catalog/data_fetchers/fetch_vast.py +4 -2
  11. sky/catalog/kubernetes_catalog.py +12 -4
  12. sky/catalog/seeweb_catalog.py +30 -15
  13. sky/catalog/shadeform_catalog.py +5 -2
  14. sky/catalog/slurm_catalog.py +236 -0
  15. sky/catalog/vast_catalog.py +30 -6
  16. sky/check.py +25 -11
  17. sky/client/cli/command.py +391 -32
  18. sky/client/interactive_utils.py +190 -0
  19. sky/client/sdk.py +64 -2
  20. sky/client/sdk_async.py +9 -0
  21. sky/clouds/__init__.py +2 -0
  22. sky/clouds/aws.py +60 -2
  23. sky/clouds/azure.py +2 -0
  24. sky/clouds/cloud.py +7 -0
  25. sky/clouds/kubernetes.py +2 -0
  26. sky/clouds/runpod.py +38 -7
  27. sky/clouds/slurm.py +610 -0
  28. sky/clouds/ssh.py +3 -2
  29. sky/clouds/vast.py +39 -16
  30. sky/core.py +197 -37
  31. sky/dashboard/out/404.html +1 -1
  32. sky/dashboard/out/_next/static/3nu-b8raeKRNABZ2d4GAG/_buildManifest.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/1871-0565f8975a7dcd10.js +6 -0
  34. sky/dashboard/out/_next/static/chunks/2109-55a1546d793574a7.js +11 -0
  35. sky/dashboard/out/_next/static/chunks/2521-099b07cd9e4745bf.js +26 -0
  36. sky/dashboard/out/_next/static/chunks/2755.a636e04a928a700e.js +31 -0
  37. sky/dashboard/out/_next/static/chunks/3495.05eab4862217c1a5.js +6 -0
  38. sky/dashboard/out/_next/static/chunks/3785.cfc5dcc9434fd98c.js +1 -0
  39. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/3981.645d01bf9c8cad0c.js +21 -0
  41. sky/dashboard/out/_next/static/chunks/4083-0115d67c1fb57d6c.js +21 -0
  42. sky/dashboard/out/_next/static/chunks/{8640.5b9475a2d18c5416.js → 429.a58e9ba9742309ed.js} +2 -2
  43. sky/dashboard/out/_next/static/chunks/4555.8e221537181b5dc1.js +6 -0
  44. sky/dashboard/out/_next/static/chunks/4725.937865b81fdaaebb.js +6 -0
  45. sky/dashboard/out/_next/static/chunks/6082-edabd8f6092300ce.js +25 -0
  46. sky/dashboard/out/_next/static/chunks/6989-49cb7dca83a7a62d.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/6990-630bd2a2257275f8.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/7248-a99800d4db8edabd.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/754-cfc5d4ad1b843d29.js +18 -0
  50. sky/dashboard/out/_next/static/chunks/8050-dd8aa107b17dce00.js +16 -0
  51. sky/dashboard/out/_next/static/chunks/8056-d4ae1e0cb81e7368.js +1 -0
  52. sky/dashboard/out/_next/static/chunks/8555.011023e296c127b3.js +6 -0
  53. sky/dashboard/out/_next/static/chunks/8821-93c25df904a8362b.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/8969-0662594b69432ade.js +1 -0
  55. sky/dashboard/out/_next/static/chunks/9025.f15c91c97d124a5f.js +6 -0
  56. sky/dashboard/out/_next/static/chunks/9353-7ad6bd01858556f1.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/pages/_app-5a86569acad99764.js +34 -0
  58. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8297476714acb4ac.js +6 -0
  59. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-337c3ba1085f1210.js +1 -0
  60. sky/dashboard/out/_next/static/chunks/pages/{clusters-ee39056f9851a3ff.js → clusters-57632ff3684a8b5c.js} +1 -1
  61. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  62. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-5fd3a453c079c2ea.js +1 -0
  63. sky/dashboard/out/_next/static/chunks/pages/infra-9f85c02c9c6cae9e.js +1 -0
  64. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90f16972cbecf354.js +1 -0
  65. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-2dd42fc37aad427a.js +16 -0
  66. sky/dashboard/out/_next/static/chunks/pages/jobs-ed806aeace26b972.js +1 -0
  67. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-449a9f5a3bb20fb3.js +1 -0
  68. sky/dashboard/out/_next/static/chunks/pages/users-bec34706b36f3524.js +1 -0
  69. sky/dashboard/out/_next/static/chunks/pages/{volumes-b84b948ff357c43e.js → volumes-a83ba9b38dff7ea9.js} +1 -1
  70. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-84a40f8c7c627fe4.js → [name]-c781e9c3e52ef9fc.js} +1 -1
  71. sky/dashboard/out/_next/static/chunks/pages/workspaces-91e0942f47310aae.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/webpack-cfe59cf684ee13b9.js +1 -0
  73. sky/dashboard/out/_next/static/css/b0dbca28f027cc19.css +3 -0
  74. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  75. sky/dashboard/out/clusters/[cluster].html +1 -1
  76. sky/dashboard/out/clusters.html +1 -1
  77. sky/dashboard/out/config.html +1 -1
  78. sky/dashboard/out/index.html +1 -1
  79. sky/dashboard/out/infra/[context].html +1 -1
  80. sky/dashboard/out/infra.html +1 -1
  81. sky/dashboard/out/jobs/[job].html +1 -1
  82. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  83. sky/dashboard/out/jobs.html +1 -1
  84. sky/dashboard/out/plugins/[...slug].html +1 -0
  85. sky/dashboard/out/users.html +1 -1
  86. sky/dashboard/out/volumes.html +1 -1
  87. sky/dashboard/out/workspace/new.html +1 -1
  88. sky/dashboard/out/workspaces/[name].html +1 -1
  89. sky/dashboard/out/workspaces.html +1 -1
  90. sky/data/data_utils.py +26 -12
  91. sky/data/mounting_utils.py +44 -5
  92. sky/global_user_state.py +111 -19
  93. sky/jobs/client/sdk.py +8 -3
  94. sky/jobs/controller.py +191 -31
  95. sky/jobs/recovery_strategy.py +109 -11
  96. sky/jobs/server/core.py +81 -4
  97. sky/jobs/server/server.py +14 -0
  98. sky/jobs/state.py +417 -19
  99. sky/jobs/utils.py +73 -80
  100. sky/models.py +11 -0
  101. sky/optimizer.py +8 -6
  102. sky/provision/__init__.py +12 -9
  103. sky/provision/common.py +20 -0
  104. sky/provision/docker_utils.py +15 -2
  105. sky/provision/kubernetes/utils.py +163 -20
  106. sky/provision/kubernetes/volume.py +52 -17
  107. sky/provision/provisioner.py +17 -7
  108. sky/provision/runpod/instance.py +3 -1
  109. sky/provision/runpod/utils.py +13 -1
  110. sky/provision/runpod/volume.py +25 -9
  111. sky/provision/slurm/__init__.py +12 -0
  112. sky/provision/slurm/config.py +13 -0
  113. sky/provision/slurm/instance.py +618 -0
  114. sky/provision/slurm/utils.py +689 -0
  115. sky/provision/vast/instance.py +4 -1
  116. sky/provision/vast/utils.py +11 -6
  117. sky/resources.py +135 -13
  118. sky/schemas/api/responses.py +4 -0
  119. sky/schemas/db/global_user_state/010_save_ssh_key.py +1 -1
  120. sky/schemas/db/spot_jobs/008_add_full_resources.py +34 -0
  121. sky/schemas/db/spot_jobs/009_job_events.py +32 -0
  122. sky/schemas/db/spot_jobs/010_job_events_timestamp_with_timezone.py +43 -0
  123. sky/schemas/db/spot_jobs/011_add_links.py +34 -0
  124. sky/schemas/generated/jobsv1_pb2.py +9 -5
  125. sky/schemas/generated/jobsv1_pb2.pyi +12 -0
  126. sky/schemas/generated/jobsv1_pb2_grpc.py +44 -0
  127. sky/schemas/generated/managed_jobsv1_pb2.py +32 -28
  128. sky/schemas/generated/managed_jobsv1_pb2.pyi +11 -2
  129. sky/serve/serve_utils.py +232 -40
  130. sky/serve/server/impl.py +1 -1
  131. sky/server/common.py +17 -0
  132. sky/server/constants.py +1 -1
  133. sky/server/metrics.py +6 -3
  134. sky/server/plugins.py +238 -0
  135. sky/server/requests/executor.py +5 -2
  136. sky/server/requests/payloads.py +30 -1
  137. sky/server/requests/request_names.py +4 -0
  138. sky/server/requests/requests.py +33 -11
  139. sky/server/requests/serializers/encoders.py +22 -0
  140. sky/server/requests/serializers/return_value_serializers.py +70 -0
  141. sky/server/server.py +506 -109
  142. sky/server/server_utils.py +30 -0
  143. sky/server/uvicorn.py +5 -0
  144. sky/setup_files/MANIFEST.in +1 -0
  145. sky/setup_files/dependencies.py +22 -9
  146. sky/sky_logging.py +2 -1
  147. sky/skylet/attempt_skylet.py +13 -3
  148. sky/skylet/constants.py +55 -13
  149. sky/skylet/events.py +10 -4
  150. sky/skylet/executor/__init__.py +1 -0
  151. sky/skylet/executor/slurm.py +187 -0
  152. sky/skylet/job_lib.py +91 -5
  153. sky/skylet/log_lib.py +22 -6
  154. sky/skylet/log_lib.pyi +8 -6
  155. sky/skylet/services.py +18 -3
  156. sky/skylet/skylet.py +5 -1
  157. sky/skylet/subprocess_daemon.py +2 -1
  158. sky/ssh_node_pools/constants.py +12 -0
  159. sky/ssh_node_pools/core.py +40 -3
  160. sky/ssh_node_pools/deploy/__init__.py +4 -0
  161. sky/{utils/kubernetes/deploy_ssh_node_pools.py → ssh_node_pools/deploy/deploy.py} +279 -504
  162. sky/ssh_node_pools/deploy/tunnel/ssh-tunnel.sh +379 -0
  163. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  164. sky/ssh_node_pools/deploy/utils.py +173 -0
  165. sky/ssh_node_pools/server.py +11 -13
  166. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  167. sky/templates/kubernetes-ray.yml.j2 +12 -6
  168. sky/templates/slurm-ray.yml.j2 +115 -0
  169. sky/templates/vast-ray.yml.j2 +1 -0
  170. sky/templates/websocket_proxy.py +18 -41
  171. sky/users/model.conf +1 -1
  172. sky/users/permission.py +85 -52
  173. sky/users/rbac.py +31 -3
  174. sky/utils/annotations.py +108 -8
  175. sky/utils/auth_utils.py +42 -0
  176. sky/utils/cli_utils/status_utils.py +19 -5
  177. sky/utils/cluster_utils.py +10 -3
  178. sky/utils/command_runner.py +389 -35
  179. sky/utils/command_runner.pyi +43 -4
  180. sky/utils/common_utils.py +47 -31
  181. sky/utils/context.py +32 -0
  182. sky/utils/db/db_utils.py +36 -6
  183. sky/utils/db/migration_utils.py +41 -21
  184. sky/utils/infra_utils.py +5 -1
  185. sky/utils/instance_links.py +139 -0
  186. sky/utils/interactive_utils.py +49 -0
  187. sky/utils/kubernetes/generate_kubeconfig.sh +42 -33
  188. sky/utils/kubernetes/kubernetes_deploy_utils.py +2 -94
  189. sky/utils/kubernetes/rsync_helper.sh +5 -1
  190. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  191. sky/utils/plugin_extensions/__init__.py +14 -0
  192. sky/utils/plugin_extensions/external_failure_source.py +176 -0
  193. sky/utils/resources_utils.py +10 -8
  194. sky/utils/rich_utils.py +9 -11
  195. sky/utils/schemas.py +93 -19
  196. sky/utils/status_lib.py +7 -0
  197. sky/utils/subprocess_utils.py +17 -0
  198. sky/volumes/client/sdk.py +6 -3
  199. sky/volumes/server/core.py +65 -27
  200. sky_templates/ray/start_cluster +8 -4
  201. {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/METADATA +67 -59
  202. {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/RECORD +208 -180
  203. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +0 -1
  204. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +0 -11
  205. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +0 -6
  206. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +0 -1
  207. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
  208. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +0 -15
  209. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +0 -26
  210. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +0 -1
  211. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +0 -1
  212. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +0 -1
  213. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  214. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +0 -1
  215. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +0 -15
  216. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +0 -13
  217. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +0 -1
  218. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +0 -1
  219. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +0 -1
  220. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +0 -30
  221. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +0 -41
  222. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +0 -1
  223. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +0 -1
  224. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +0 -6
  225. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +0 -1
  226. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +0 -31
  227. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +0 -30
  228. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +0 -34
  229. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +0 -16
  230. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +0 -1
  231. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +0 -1
  232. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +0 -1
  233. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +0 -16
  234. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +0 -21
  235. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +0 -1
  236. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +0 -1
  237. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +0 -1
  238. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +0 -1
  239. sky/dashboard/out/_next/static/css/0748ce22df867032.css +0 -3
  240. /sky/dashboard/out/_next/static/{96_E2yl3QAiIJGOYCkSpB → 3nu-b8raeKRNABZ2d4GAG}/_ssgManifest.js +0 -0
  241. /sky/{utils/kubernetes → ssh_node_pools/deploy/tunnel}/cleanup-tunnel.sh +0 -0
  242. {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/WHEEL +0 -0
  243. {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/entry_points.txt +0 -0
  244. {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/licenses/LICENSE +0 -0
  245. {skypilot_nightly-1.0.0.dev20251203.dist-info → skypilot_nightly-1.0.0.dev20260112.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,7 @@ _sym_db = _symbol_database.Default()
14
14
 
15
15
 
16
16
 
17
- DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n*sky/schemas/generated/managed_jobsv1.proto\x12\x0fmanaged_jobs.v1\"\x15\n\x06JobIds\x12\x0b\n\x03ids\x18\x01 \x03(\x03\"\x1c\n\nUserHashes\x12\x0e\n\x06hashes\x18\x01 \x03(\t\"\x1c\n\x08Statuses\x12\x10\n\x08statuses\x18\x01 \x03(\t\"\x18\n\x06\x46ields\x12\x0e\n\x06\x66ields\x18\x01 \x03(\t\" \n\nWorkspaces\x12\x12\n\nworkspaces\x18\x01 \x03(\t\"\x13\n\x11GetVersionRequest\"0\n\x12GetVersionResponse\x12\x1a\n\x12\x63ontroller_version\x18\x01 \x01(\t\"\xe1\x04\n\x12GetJobTableRequest\x12\x15\n\rskip_finished\x18\x01 \x01(\x08\x12?\n\x15\x61\x63\x63\x65ssible_workspaces\x18\x02 \x01(\x0b\x32\x1b.managed_jobs.v1.WorkspacesH\x00\x88\x01\x01\x12-\n\x07job_ids\x18\x03 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x01\x88\x01\x01\x12\x1c\n\x0fworkspace_match\x18\x04 \x01(\tH\x02\x88\x01\x01\x12\x17\n\nname_match\x18\x05 \x01(\tH\x03\x88\x01\x01\x12\x17\n\npool_match\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x11\n\x04page\x18\x07 \x01(\x05H\x05\x88\x01\x01\x12\x12\n\x05limit\x18\x08 \x01(\x05H\x06\x88\x01\x01\x12\x35\n\x0buser_hashes\x18\t \x01(\x0b\x32\x1b.managed_jobs.v1.UserHashesH\x07\x88\x01\x01\x12\x30\n\x08statuses\x18\n \x01(\x0b\x32\x19.managed_jobs.v1.StatusesH\x08\x88\x01\x01\x12#\n\x1bshow_jobs_without_user_hash\x18\x0b \x01(\x08\x12,\n\x06\x66ields\x18\x0c \x01(\x0b\x32\x17.managed_jobs.v1.FieldsH\t\x88\x01\x01\x42\x18\n\x16_accessible_workspacesB\n\n\x08_job_idsB\x12\n\x10_workspace_matchB\r\n\x0b_name_matchB\r\n\x0b_pool_matchB\x07\n\x05_pageB\x08\n\x06_limitB\x0e\n\x0c_user_hashesB\x0b\n\t_statusesB\t\n\x07_fields\"\xcb\x08\n\x0eManagedJobInfo\x12\x0e\n\x06job_id\x18\x01 \x01(\x03\x12\x0f\n\x07task_id\x18\x02 \x01(\x03\x12\x10\n\x08job_name\x18\x03 \x01(\t\x12\x11\n\ttask_name\x18\x04 \x01(\t\x12\x14\n\x0cjob_duration\x18\x05 \x01(\x01\x12\x16\n\tworkspace\x18\x06 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x06status\x18\x07 \x01(\x0e\x32!.managed_jobs.v1.ManagedJobStatus\x12@\n\x0eschedule_state\x18\x08 \x01(\x0e\x32(.managed_jobs.v1.ManagedJobScheduleState\x12\x11\n\tresources\x18\t \x01(\t\x12\x19\n\x11\x63luster_resources\x18\n \x01(\t\x12\x1e\n\x16\x63luster_resources_full\x18\x0b \x01(\t\x12\r\n\x05\x63loud\x18\x0c \x01(\t\x12\x0e\n\x06region\x18\r \x01(\t\x12\r\n\x05infra\x18\x0e \x01(\t\x12G\n\x0c\x61\x63\x63\x65lerators\x18\x0f \x03(\x0b\x32\x31.managed_jobs.v1.ManagedJobInfo.AcceleratorsEntry\x12\x16\n\x0erecovery_count\x18\x10 \x01(\x05\x12\x14\n\x07\x64\x65tails\x18\x11 \x01(\tH\x01\x88\x01\x01\x12\x1b\n\x0e\x66\x61ilure_reason\x18\x12 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tuser_name\x18\x13 \x01(\tH\x03\x88\x01\x01\x12\x16\n\tuser_hash\x18\x14 \x01(\tH\x04\x88\x01\x01\x12\x19\n\x0csubmitted_at\x18\x15 \x01(\x01H\x05\x88\x01\x01\x12\x15\n\x08start_at\x18\x16 \x01(\x01H\x06\x88\x01\x01\x12\x13\n\x06\x65nd_at\x18\x17 \x01(\x01H\x07\x88\x01\x01\x12\x16\n\tuser_yaml\x18\x18 \x01(\tH\x08\x88\x01\x01\x12\x17\n\nentrypoint\x18\x19 \x01(\tH\t\x88\x01\x01\x12?\n\x08metadata\x18\x1a \x03(\x0b\x32-.managed_jobs.v1.ManagedJobInfo.MetadataEntry\x12\x11\n\x04pool\x18\x1b \x01(\tH\n\x88\x01\x01\x12\x16\n\tpool_hash\x18\x1c \x01(\tH\x0b\x88\x01\x01\x12\x14\n\x07_job_id\x18\x1d \x01(\x03H\x0c\x88\x01\x01\x1a\x33\n\x11\x41\x63\x63\x65leratorsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_workspaceB\n\n\x08_detailsB\x11\n\x0f_failure_reasonB\x0c\n\n_user_nameB\x0c\n\n_user_hashB\x0f\n\r_submitted_atB\x0b\n\t_start_atB\t\n\x07_end_atB\x0c\n\n_user_yamlB\r\n\x0b_entrypointB\x07\n\x05_poolB\x0c\n\n_pool_hashB\n\n\x08X_job_id\"\xf0\x01\n\x13GetJobTableResponse\x12-\n\x04jobs\x18\x01 \x03(\x0b\x32\x1f.managed_jobs.v1.ManagedJobInfo\x12\r\n\x05total\x18\x02 \x01(\x05\x12\x17\n\x0ftotal_no_filter\x18\x03 \x01(\x05\x12M\n\rstatus_counts\x18\x04 \x03(\x0b\x32\x36.managed_jobs.v1.GetJobTableResponse.StatusCountsEntry\x1a\x33\n\x11StatusCountsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x05:\x02\x38\x01\"?\n\x19GetAllJobIdsByNameRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0b\n\t_job_name\"-\n\x1aGetAllJobIdsByNameResponse\x12\x0f\n\x07job_ids\x18\x01 \x03(\x03\"\xd7\x01\n\x11\x43\x61ncelJobsRequest\x12\x19\n\x11\x63urrent_workspace\x18\x01 \x01(\t\x12\x16\n\tuser_hash\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\tall_users\x18\x03 \x01(\x08H\x00\x12*\n\x07job_ids\x18\x04 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x00\x12\x12\n\x08job_name\x18\x05 \x01(\tH\x00\x12\x13\n\tpool_name\x18\x06 \x01(\tH\x00\x42\x17\n\x15\x63\x61ncellation_criteriaB\x0c\n\n_user_hash\"%\n\x12\x43\x61ncelJobsResponse\x12\x0f\n\x07message\x18\x01 \x01(\t\"\x97\x01\n\x11StreamLogsRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x06job_id\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x0e\n\x06\x66ollow\x18\x03 \x01(\x08\x12\x12\n\ncontroller\x18\x04 \x01(\x08\x12\x11\n\x04tail\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x0b\n\t_job_nameB\t\n\x07_job_idB\x07\n\x05_tail\"L\n\x12StreamLogsResponse\x12\x10\n\x08log_line\x18\x01 \x01(\t\x12\x16\n\texit_code\x18\x02 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_exit_code*\x85\x04\n\x10ManagedJobStatus\x12\"\n\x1eMANAGED_JOB_STATUS_UNSPECIFIED\x10\x00\x12\x1e\n\x1aMANAGED_JOB_STATUS_PENDING\x10\x01\x12 \n\x1cMANAGED_JOB_STATUS_SUBMITTED\x10\x02\x12\x1f\n\x1bMANAGED_JOB_STATUS_STARTING\x10\x03\x12\x1e\n\x1aMANAGED_JOB_STATUS_RUNNING\x10\x04\x12!\n\x1dMANAGED_JOB_STATUS_RECOVERING\x10\x05\x12!\n\x1dMANAGED_JOB_STATUS_CANCELLING\x10\x06\x12 \n\x1cMANAGED_JOB_STATUS_SUCCEEDED\x10\x07\x12 \n\x1cMANAGED_JOB_STATUS_CANCELLED\x10\x08\x12\x1d\n\x19MANAGED_JOB_STATUS_FAILED\x10\t\x12#\n\x1fMANAGED_JOB_STATUS_FAILED_SETUP\x10\n\x12\'\n#MANAGED_JOB_STATUS_FAILED_PRECHECKS\x10\x0b\x12)\n%MANAGED_JOB_STATUS_FAILED_NO_RESOURCE\x10\x0c\x12(\n$MANAGED_JOB_STATUS_FAILED_CONTROLLER\x10\r*\x9e\x03\n\x17ManagedJobScheduleState\x12*\n&MANAGED_JOB_SCHEDULE_STATE_UNSPECIFIED\x10\x00\x12\x35\n-DEPRECATED_MANAGED_JOB_SCHEDULE_STATE_INVALID\x10\x01\x1a\x02\x08\x01\x12\'\n#MANAGED_JOB_SCHEDULE_STATE_INACTIVE\x10\x02\x12&\n\"MANAGED_JOB_SCHEDULE_STATE_WAITING\x10\x03\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING\x10\x04\x12(\n$MANAGED_JOB_SCHEDULE_STATE_LAUNCHING\x10\x05\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF\x10\x06\x12$\n MANAGED_JOB_SCHEDULE_STATE_ALIVE\x10\x07\x12#\n\x1fMANAGED_JOB_SCHEDULE_STATE_DONE\x10\x08\x32\xe4\x03\n\x12ManagedJobsService\x12U\n\nGetVersion\x12\".managed_jobs.v1.GetVersionRequest\x1a#.managed_jobs.v1.GetVersionResponse\x12X\n\x0bGetJobTable\x12#.managed_jobs.v1.GetJobTableRequest\x1a$.managed_jobs.v1.GetJobTableResponse\x12m\n\x12GetAllJobIdsByName\x12*.managed_jobs.v1.GetAllJobIdsByNameRequest\x1a+.managed_jobs.v1.GetAllJobIdsByNameResponse\x12U\n\nCancelJobs\x12\".managed_jobs.v1.CancelJobsRequest\x1a#.managed_jobs.v1.CancelJobsResponse\x12W\n\nStreamLogs\x12\".managed_jobs.v1.StreamLogsRequest\x1a#.managed_jobs.v1.StreamLogsResponse0\x01\x62\x06proto3')
17
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n*sky/schemas/generated/managed_jobsv1.proto\x12\x0fmanaged_jobs.v1\"\x15\n\x06JobIds\x12\x0b\n\x03ids\x18\x01 \x03(\x03\"\x1c\n\nUserHashes\x12\x0e\n\x06hashes\x18\x01 \x03(\t\"\x1c\n\x08Statuses\x12\x10\n\x08statuses\x18\x01 \x03(\t\"\x18\n\x06\x46ields\x12\x0e\n\x06\x66ields\x18\x01 \x03(\t\" \n\nWorkspaces\x12\x12\n\nworkspaces\x18\x01 \x03(\t\"\x13\n\x11GetVersionRequest\"0\n\x12GetVersionResponse\x12\x1a\n\x12\x63ontroller_version\x18\x01 \x01(\t\"\xe1\x04\n\x12GetJobTableRequest\x12\x15\n\rskip_finished\x18\x01 \x01(\x08\x12?\n\x15\x61\x63\x63\x65ssible_workspaces\x18\x02 \x01(\x0b\x32\x1b.managed_jobs.v1.WorkspacesH\x00\x88\x01\x01\x12-\n\x07job_ids\x18\x03 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x01\x88\x01\x01\x12\x1c\n\x0fworkspace_match\x18\x04 \x01(\tH\x02\x88\x01\x01\x12\x17\n\nname_match\x18\x05 \x01(\tH\x03\x88\x01\x01\x12\x17\n\npool_match\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x11\n\x04page\x18\x07 \x01(\x05H\x05\x88\x01\x01\x12\x12\n\x05limit\x18\x08 \x01(\x05H\x06\x88\x01\x01\x12\x35\n\x0buser_hashes\x18\t \x01(\x0b\x32\x1b.managed_jobs.v1.UserHashesH\x07\x88\x01\x01\x12\x30\n\x08statuses\x18\n \x01(\x0b\x32\x19.managed_jobs.v1.StatusesH\x08\x88\x01\x01\x12#\n\x1bshow_jobs_without_user_hash\x18\x0b \x01(\x08\x12,\n\x06\x66ields\x18\x0c \x01(\x0b\x32\x17.managed_jobs.v1.FieldsH\t\x88\x01\x01\x42\x18\n\x16_accessible_workspacesB\n\n\x08_job_idsB\x12\n\x10_workspace_matchB\r\n\x0b_name_matchB\r\n\x0b_pool_matchB\x07\n\x05_pageB\x08\n\x06_limitB\x0e\n\x0c_user_hashesB\x0b\n\t_statusesB\t\n\x07_fields\"\xb4\t\n\x0eManagedJobInfo\x12\x0e\n\x06job_id\x18\x01 \x01(\x03\x12\x0f\n\x07task_id\x18\x02 \x01(\x03\x12\x10\n\x08job_name\x18\x03 \x01(\t\x12\x11\n\ttask_name\x18\x04 \x01(\t\x12\x14\n\x0cjob_duration\x18\x05 \x01(\x01\x12\x16\n\tworkspace\x18\x06 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x06status\x18\x07 \x01(\x0e\x32!.managed_jobs.v1.ManagedJobStatus\x12@\n\x0eschedule_state\x18\x08 \x01(\x0e\x32(.managed_jobs.v1.ManagedJobScheduleState\x12\x11\n\tresources\x18\t \x01(\t\x12\x19\n\x11\x63luster_resources\x18\n \x01(\t\x12\x1e\n\x16\x63luster_resources_full\x18\x0b \x01(\t\x12\r\n\x05\x63loud\x18\x0c \x01(\t\x12\x0e\n\x06region\x18\r \x01(\t\x12\r\n\x05infra\x18\x0e \x01(\t\x12G\n\x0c\x61\x63\x63\x65lerators\x18\x0f \x03(\x0b\x32\x31.managed_jobs.v1.ManagedJobInfo.AcceleratorsEntry\x12\x16\n\x0erecovery_count\x18\x10 \x01(\x05\x12\x14\n\x07\x64\x65tails\x18\x11 \x01(\tH\x01\x88\x01\x01\x12\x1b\n\x0e\x66\x61ilure_reason\x18\x12 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tuser_name\x18\x13 \x01(\tH\x03\x88\x01\x01\x12\x16\n\tuser_hash\x18\x14 \x01(\tH\x04\x88\x01\x01\x12\x19\n\x0csubmitted_at\x18\x15 \x01(\x01H\x05\x88\x01\x01\x12\x15\n\x08start_at\x18\x16 \x01(\x01H\x06\x88\x01\x01\x12\x13\n\x06\x65nd_at\x18\x17 \x01(\x01H\x07\x88\x01\x01\x12\x16\n\tuser_yaml\x18\x18 \x01(\tH\x08\x88\x01\x01\x12\x17\n\nentrypoint\x18\x19 \x01(\tH\t\x88\x01\x01\x12?\n\x08metadata\x18\x1a \x03(\x0b\x32-.managed_jobs.v1.ManagedJobInfo.MetadataEntry\x12\x11\n\x04pool\x18\x1b \x01(\tH\n\x88\x01\x01\x12\x16\n\tpool_hash\x18\x1c \x01(\tH\x0b\x88\x01\x01\x12\x14\n\x07_job_id\x18\x1d \x01(\x03H\x0c\x88\x01\x01\x12\x39\n\x05links\x18\x1e \x03(\x0b\x32*.managed_jobs.v1.ManagedJobInfo.LinksEntry\x1a\x33\n\x11\x41\x63\x63\x65leratorsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a,\n\nLinksEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_workspaceB\n\n\x08_detailsB\x11\n\x0f_failure_reasonB\x0c\n\n_user_nameB\x0c\n\n_user_hashB\x0f\n\r_submitted_atB\x0b\n\t_start_atB\t\n\x07_end_atB\x0c\n\n_user_yamlB\r\n\x0b_entrypointB\x07\n\x05_poolB\x0c\n\n_pool_hashB\n\n\x08X_job_id\"\xf0\x01\n\x13GetJobTableResponse\x12-\n\x04jobs\x18\x01 \x03(\x0b\x32\x1f.managed_jobs.v1.ManagedJobInfo\x12\r\n\x05total\x18\x02 \x01(\x05\x12\x17\n\x0ftotal_no_filter\x18\x03 \x01(\x05\x12M\n\rstatus_counts\x18\x04 \x03(\x0b\x32\x36.managed_jobs.v1.GetJobTableResponse.StatusCountsEntry\x1a\x33\n\x11StatusCountsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x05:\x02\x38\x01\"?\n\x19GetAllJobIdsByNameRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0b\n\t_job_name\"-\n\x1aGetAllJobIdsByNameResponse\x12\x0f\n\x07job_ids\x18\x01 \x03(\x03\"\xd7\x01\n\x11\x43\x61ncelJobsRequest\x12\x19\n\x11\x63urrent_workspace\x18\x01 \x01(\t\x12\x16\n\tuser_hash\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\tall_users\x18\x03 \x01(\x08H\x00\x12*\n\x07job_ids\x18\x04 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x00\x12\x12\n\x08job_name\x18\x05 \x01(\tH\x00\x12\x13\n\tpool_name\x18\x06 \x01(\tH\x00\x42\x17\n\x15\x63\x61ncellation_criteriaB\x0c\n\n_user_hash\"%\n\x12\x43\x61ncelJobsResponse\x12\x0f\n\x07message\x18\x01 \x01(\t\"\x97\x01\n\x11StreamLogsRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x06job_id\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x0e\n\x06\x66ollow\x18\x03 \x01(\x08\x12\x12\n\ncontroller\x18\x04 \x01(\x08\x12\x11\n\x04tail\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x0b\n\t_job_nameB\t\n\x07_job_idB\x07\n\x05_tail\"L\n\x12StreamLogsResponse\x12\x10\n\x08log_line\x18\x01 \x01(\t\x12\x16\n\texit_code\x18\x02 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_exit_code*\x85\x04\n\x10ManagedJobStatus\x12\"\n\x1eMANAGED_JOB_STATUS_UNSPECIFIED\x10\x00\x12\x1e\n\x1aMANAGED_JOB_STATUS_PENDING\x10\x01\x12 \n\x1cMANAGED_JOB_STATUS_SUBMITTED\x10\x02\x12\x1f\n\x1bMANAGED_JOB_STATUS_STARTING\x10\x03\x12\x1e\n\x1aMANAGED_JOB_STATUS_RUNNING\x10\x04\x12!\n\x1dMANAGED_JOB_STATUS_RECOVERING\x10\x05\x12!\n\x1dMANAGED_JOB_STATUS_CANCELLING\x10\x06\x12 \n\x1cMANAGED_JOB_STATUS_SUCCEEDED\x10\x07\x12 \n\x1cMANAGED_JOB_STATUS_CANCELLED\x10\x08\x12\x1d\n\x19MANAGED_JOB_STATUS_FAILED\x10\t\x12#\n\x1fMANAGED_JOB_STATUS_FAILED_SETUP\x10\n\x12\'\n#MANAGED_JOB_STATUS_FAILED_PRECHECKS\x10\x0b\x12)\n%MANAGED_JOB_STATUS_FAILED_NO_RESOURCE\x10\x0c\x12(\n$MANAGED_JOB_STATUS_FAILED_CONTROLLER\x10\r*\x9e\x03\n\x17ManagedJobScheduleState\x12*\n&MANAGED_JOB_SCHEDULE_STATE_UNSPECIFIED\x10\x00\x12\x35\n-DEPRECATED_MANAGED_JOB_SCHEDULE_STATE_INVALID\x10\x01\x1a\x02\x08\x01\x12\'\n#MANAGED_JOB_SCHEDULE_STATE_INACTIVE\x10\x02\x12&\n\"MANAGED_JOB_SCHEDULE_STATE_WAITING\x10\x03\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING\x10\x04\x12(\n$MANAGED_JOB_SCHEDULE_STATE_LAUNCHING\x10\x05\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF\x10\x06\x12$\n MANAGED_JOB_SCHEDULE_STATE_ALIVE\x10\x07\x12#\n\x1fMANAGED_JOB_SCHEDULE_STATE_DONE\x10\x08\x32\xe4\x03\n\x12ManagedJobsService\x12U\n\nGetVersion\x12\".managed_jobs.v1.GetVersionRequest\x1a#.managed_jobs.v1.GetVersionResponse\x12X\n\x0bGetJobTable\x12#.managed_jobs.v1.GetJobTableRequest\x1a$.managed_jobs.v1.GetJobTableResponse\x12m\n\x12GetAllJobIdsByName\x12*.managed_jobs.v1.GetAllJobIdsByNameRequest\x1a+.managed_jobs.v1.GetAllJobIdsByNameResponse\x12U\n\nCancelJobs\x12\".managed_jobs.v1.CancelJobsRequest\x1a#.managed_jobs.v1.CancelJobsResponse\x12W\n\nStreamLogs\x12\".managed_jobs.v1.StreamLogsRequest\x1a#.managed_jobs.v1.StreamLogsResponse0\x01\x62\x06proto3')
18
18
 
19
19
  _globals = globals()
20
20
  _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -27,12 +27,14 @@ if not _descriptor._USE_C_DESCRIPTORS:
27
27
  _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_options = b'8\001'
28
28
  _globals['_MANAGEDJOBINFO_METADATAENTRY']._loaded_options = None
29
29
  _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_options = b'8\001'
30
+ _globals['_MANAGEDJOBINFO_LINKSENTRY']._loaded_options = None
31
+ _globals['_MANAGEDJOBINFO_LINKSENTRY']._serialized_options = b'8\001'
30
32
  _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._loaded_options = None
31
33
  _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_options = b'8\001'
32
- _globals['_MANAGEDJOBSTATUS']._serialized_start=2836
33
- _globals['_MANAGEDJOBSTATUS']._serialized_end=3353
34
- _globals['_MANAGEDJOBSCHEDULESTATE']._serialized_start=3356
35
- _globals['_MANAGEDJOBSCHEDULESTATE']._serialized_end=3770
34
+ _globals['_MANAGEDJOBSTATUS']._serialized_start=2941
35
+ _globals['_MANAGEDJOBSTATUS']._serialized_end=3458
36
+ _globals['_MANAGEDJOBSCHEDULESTATE']._serialized_start=3461
37
+ _globals['_MANAGEDJOBSCHEDULESTATE']._serialized_end=3875
36
38
  _globals['_JOBIDS']._serialized_start=63
37
39
  _globals['_JOBIDS']._serialized_end=84
38
40
  _globals['_USERHASHES']._serialized_start=86
@@ -50,27 +52,29 @@ if not _descriptor._USE_C_DESCRIPTORS:
50
52
  _globals['_GETJOBTABLEREQUEST']._serialized_start=278
51
53
  _globals['_GETJOBTABLEREQUEST']._serialized_end=887
52
54
  _globals['_MANAGEDJOBINFO']._serialized_start=890
53
- _globals['_MANAGEDJOBINFO']._serialized_end=1989
54
- _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_start=1711
55
- _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_end=1762
56
- _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_start=1764
57
- _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_end=1811
58
- _globals['_GETJOBTABLERESPONSE']._serialized_start=1992
59
- _globals['_GETJOBTABLERESPONSE']._serialized_end=2232
60
- _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_start=2181
61
- _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_end=2232
62
- _globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_start=2234
63
- _globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_end=2297
64
- _globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_start=2299
65
- _globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_end=2344
66
- _globals['_CANCELJOBSREQUEST']._serialized_start=2347
67
- _globals['_CANCELJOBSREQUEST']._serialized_end=2562
68
- _globals['_CANCELJOBSRESPONSE']._serialized_start=2564
69
- _globals['_CANCELJOBSRESPONSE']._serialized_end=2601
70
- _globals['_STREAMLOGSREQUEST']._serialized_start=2604
71
- _globals['_STREAMLOGSREQUEST']._serialized_end=2755
72
- _globals['_STREAMLOGSRESPONSE']._serialized_start=2757
73
- _globals['_STREAMLOGSRESPONSE']._serialized_end=2833
74
- _globals['_MANAGEDJOBSSERVICE']._serialized_start=3773
75
- _globals['_MANAGEDJOBSSERVICE']._serialized_end=4257
55
+ _globals['_MANAGEDJOBINFO']._serialized_end=2094
56
+ _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_start=1770
57
+ _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_end=1821
58
+ _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_start=1823
59
+ _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_end=1870
60
+ _globals['_MANAGEDJOBINFO_LINKSENTRY']._serialized_start=1872
61
+ _globals['_MANAGEDJOBINFO_LINKSENTRY']._serialized_end=1916
62
+ _globals['_GETJOBTABLERESPONSE']._serialized_start=2097
63
+ _globals['_GETJOBTABLERESPONSE']._serialized_end=2337
64
+ _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_start=2286
65
+ _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_end=2337
66
+ _globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_start=2339
67
+ _globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_end=2402
68
+ _globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_start=2404
69
+ _globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_end=2449
70
+ _globals['_CANCELJOBSREQUEST']._serialized_start=2452
71
+ _globals['_CANCELJOBSREQUEST']._serialized_end=2667
72
+ _globals['_CANCELJOBSRESPONSE']._serialized_start=2669
73
+ _globals['_CANCELJOBSRESPONSE']._serialized_end=2706
74
+ _globals['_STREAMLOGSREQUEST']._serialized_start=2709
75
+ _globals['_STREAMLOGSREQUEST']._serialized_end=2860
76
+ _globals['_STREAMLOGSRESPONSE']._serialized_start=2862
77
+ _globals['_STREAMLOGSRESPONSE']._serialized_end=2938
78
+ _globals['_MANAGEDJOBSSERVICE']._serialized_start=3878
79
+ _globals['_MANAGEDJOBSSERVICE']._serialized_end=4362
76
80
  # @@protoc_insertion_point(module_scope)
@@ -127,7 +127,7 @@ class GetJobTableRequest(_message.Message):
127
127
  def __init__(self, skip_finished: bool = ..., accessible_workspaces: _Optional[_Union[Workspaces, _Mapping]] = ..., job_ids: _Optional[_Union[JobIds, _Mapping]] = ..., workspace_match: _Optional[str] = ..., name_match: _Optional[str] = ..., pool_match: _Optional[str] = ..., page: _Optional[int] = ..., limit: _Optional[int] = ..., user_hashes: _Optional[_Union[UserHashes, _Mapping]] = ..., statuses: _Optional[_Union[Statuses, _Mapping]] = ..., show_jobs_without_user_hash: bool = ..., fields: _Optional[_Union[Fields, _Mapping]] = ...) -> None: ...
128
128
 
129
129
  class ManagedJobInfo(_message.Message):
130
- __slots__ = ("job_id", "task_id", "job_name", "task_name", "job_duration", "workspace", "status", "schedule_state", "resources", "cluster_resources", "cluster_resources_full", "cloud", "region", "infra", "accelerators", "recovery_count", "details", "failure_reason", "user_name", "user_hash", "submitted_at", "start_at", "end_at", "user_yaml", "entrypoint", "metadata", "pool", "pool_hash", "_job_id")
130
+ __slots__ = ("job_id", "task_id", "job_name", "task_name", "job_duration", "workspace", "status", "schedule_state", "resources", "cluster_resources", "cluster_resources_full", "cloud", "region", "infra", "accelerators", "recovery_count", "details", "failure_reason", "user_name", "user_hash", "submitted_at", "start_at", "end_at", "user_yaml", "entrypoint", "metadata", "pool", "pool_hash", "_job_id", "links")
131
131
  class AcceleratorsEntry(_message.Message):
132
132
  __slots__ = ("key", "value")
133
133
  KEY_FIELD_NUMBER: _ClassVar[int]
@@ -142,6 +142,13 @@ class ManagedJobInfo(_message.Message):
142
142
  key: str
143
143
  value: str
144
144
  def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
145
+ class LinksEntry(_message.Message):
146
+ __slots__ = ("key", "value")
147
+ KEY_FIELD_NUMBER: _ClassVar[int]
148
+ VALUE_FIELD_NUMBER: _ClassVar[int]
149
+ key: str
150
+ value: str
151
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
145
152
  JOB_ID_FIELD_NUMBER: _ClassVar[int]
146
153
  TASK_ID_FIELD_NUMBER: _ClassVar[int]
147
154
  JOB_NAME_FIELD_NUMBER: _ClassVar[int]
@@ -171,6 +178,7 @@ class ManagedJobInfo(_message.Message):
171
178
  POOL_FIELD_NUMBER: _ClassVar[int]
172
179
  POOL_HASH_FIELD_NUMBER: _ClassVar[int]
173
180
  _JOB_ID_FIELD_NUMBER: _ClassVar[int]
181
+ LINKS_FIELD_NUMBER: _ClassVar[int]
174
182
  job_id: int
175
183
  task_id: int
176
184
  job_name: str
@@ -200,7 +208,8 @@ class ManagedJobInfo(_message.Message):
200
208
  pool: str
201
209
  pool_hash: str
202
210
  _job_id: int
203
- def __init__(self, job_id: _Optional[int] = ..., task_id: _Optional[int] = ..., job_name: _Optional[str] = ..., task_name: _Optional[str] = ..., job_duration: _Optional[float] = ..., workspace: _Optional[str] = ..., status: _Optional[_Union[ManagedJobStatus, str]] = ..., schedule_state: _Optional[_Union[ManagedJobScheduleState, str]] = ..., resources: _Optional[str] = ..., cluster_resources: _Optional[str] = ..., cluster_resources_full: _Optional[str] = ..., cloud: _Optional[str] = ..., region: _Optional[str] = ..., infra: _Optional[str] = ..., accelerators: _Optional[_Mapping[str, float]] = ..., recovery_count: _Optional[int] = ..., details: _Optional[str] = ..., failure_reason: _Optional[str] = ..., user_name: _Optional[str] = ..., user_hash: _Optional[str] = ..., submitted_at: _Optional[float] = ..., start_at: _Optional[float] = ..., end_at: _Optional[float] = ..., user_yaml: _Optional[str] = ..., entrypoint: _Optional[str] = ..., metadata: _Optional[_Mapping[str, str]] = ..., pool: _Optional[str] = ..., pool_hash: _Optional[str] = ..., _job_id: _Optional[int] = ...) -> None: ...
211
+ links: _containers.ScalarMap[str, str]
212
+ def __init__(self, job_id: _Optional[int] = ..., task_id: _Optional[int] = ..., job_name: _Optional[str] = ..., task_name: _Optional[str] = ..., job_duration: _Optional[float] = ..., workspace: _Optional[str] = ..., status: _Optional[_Union[ManagedJobStatus, str]] = ..., schedule_state: _Optional[_Union[ManagedJobScheduleState, str]] = ..., resources: _Optional[str] = ..., cluster_resources: _Optional[str] = ..., cluster_resources_full: _Optional[str] = ..., cloud: _Optional[str] = ..., region: _Optional[str] = ..., infra: _Optional[str] = ..., accelerators: _Optional[_Mapping[str, float]] = ..., recovery_count: _Optional[int] = ..., details: _Optional[str] = ..., failure_reason: _Optional[str] = ..., user_name: _Optional[str] = ..., user_hash: _Optional[str] = ..., submitted_at: _Optional[float] = ..., start_at: _Optional[float] = ..., end_at: _Optional[float] = ..., user_yaml: _Optional[str] = ..., entrypoint: _Optional[str] = ..., metadata: _Optional[_Mapping[str, str]] = ..., pool: _Optional[str] = ..., pool_hash: _Optional[str] = ..., _job_id: _Optional[int] = ..., links: _Optional[_Mapping[str, str]] = ...) -> None: ...
204
213
 
205
214
  class GetJobTableResponse(_message.Message):
206
215
  __slots__ = ("jobs", "total", "total_no_filter", "status_counts")
sky/serve/serve_utils.py CHANGED
@@ -23,6 +23,7 @@ import filelock
23
23
  from sky import backends
24
24
  from sky import exceptions
25
25
  from sky import global_user_state
26
+ from sky import resources as resources_lib
26
27
  from sky import sky_logging
27
28
  from sky import skypilot_config
28
29
  from sky.adaptors import common as adaptors_common
@@ -350,6 +351,13 @@ def validate_service_task(task: 'sky.Task', pool: bool) -> None:
350
351
  f'file does not match the pool argument. '
351
352
  f'To fix, add a valid `{field_name}` field.')
352
353
 
354
+ # Validate that pools do not use ordered resources
355
+ if pool and isinstance(task.resources, list):
356
+ with ux_utils.print_exception_no_traceback():
357
+ raise ValueError(
358
+ 'Ordered resources are not supported for pools. '
359
+ 'Use `any_of` instead, or specify a single resource.')
360
+
353
361
  policy_description = ('on-demand'
354
362
  if task.service.dynamic_ondemand_fallback else 'spot')
355
363
  for resource in list(task.resources):
@@ -360,22 +368,6 @@ def validate_service_task(task: 'sky.Task', pool: bool) -> None:
360
368
  f'{sys_name} will replenish preempted spot '
361
369
  f'with {policy_description} instances.')
362
370
 
363
- if pool:
364
- accelerators = set()
365
- for resource in task.resources:
366
- if resource.accelerators is not None:
367
- if isinstance(resource.accelerators, str):
368
- accelerators.add(resource.accelerators)
369
- elif isinstance(resource.accelerators, dict):
370
- accelerators.update(resource.accelerators.keys())
371
- elif isinstance(resource.accelerators, list):
372
- accelerators.update(resource.accelerators)
373
- if len(accelerators) > 1:
374
- with ux_utils.print_exception_no_traceback():
375
- raise ValueError('Heterogeneous clusters are not supported for '
376
- 'pools please specify one accelerator '
377
- 'for all workers.')
378
-
379
371
  # Try to create a spot placer from the task yaml. Check if the task yaml
380
372
  # is valid for spot placer.
381
373
  spot_placer.SpotPlacer.from_task(task.service, task)
@@ -730,7 +722,7 @@ def _get_service_status(
730
722
  for replica_info in record['replica_info']:
731
723
  job_ids = managed_job_state.get_nonterminal_job_ids_by_pool(
732
724
  service_name, replica_info['name'])
733
- replica_info['used_by'] = job_ids[0] if job_ids else None
725
+ replica_info['used_by'] = job_ids
734
726
  return record
735
727
 
736
728
 
@@ -810,16 +802,112 @@ def get_ready_replicas(
810
802
  ]
811
803
 
812
804
 
813
- def get_next_cluster_name(service_name: str, job_id: int) -> Optional[str]:
814
- """Get the next available cluster name from idle replicas.
805
+ def _task_fits(task_resources: 'resources_lib.Resources',
806
+ free_resources: 'resources_lib.Resources') -> bool:
807
+ """Check if the task resources fit in the free resources."""
808
+ if not task_resources.less_demanding_than(free_resources,
809
+ check_cloud=False):
810
+ return False
811
+ if task_resources.cpus is not None:
812
+ if (free_resources.cpus is None or
813
+ task_resources.cpus > free_resources.cpus):
814
+ return False
815
+ if task_resources.memory is not None:
816
+ if (free_resources.memory is None or
817
+ task_resources.memory > free_resources.memory):
818
+ return False
819
+ return True
820
+
821
+
822
+ def _is_empty_resource(resource: 'resources_lib.Resources') -> bool:
823
+ # Returns True if this resource object does not specify any resources.
824
+ return (resource.cpus is None and resource.memory is None and
825
+ resource.accelerators is None)
826
+
827
+
828
+ def get_free_worker_resources(
829
+ pool: str) -> Optional[Dict[str, Optional[resources_lib.Resources]]]:
830
+ """Get free resources for each worker in a pool.
831
+
832
+ Args:
833
+ pool: Pool name (service name)
834
+
835
+ Returns:
836
+ Dictionary mapping cluster_name (worker) to free Resources object (or
837
+ None if worker is not available or has no free resources).
838
+ """
839
+
840
+ free_resources: Dict[str, Optional[resources_lib.Resources]] = {}
841
+ replicas = serve_state.get_replica_infos(pool)
842
+
843
+ for replica_info in replicas:
844
+ cluster_name = replica_info.cluster_name
845
+
846
+ # Get cluster handle
847
+ handle = replica_info.handle()
848
+ if handle is None or handle.launched_resources is None:
849
+ free_resources[cluster_name] = None
850
+ continue
851
+
852
+ total_resources = handle.launched_resources
853
+
854
+ # Get job IDs running on this worker
855
+ job_ids = managed_job_state.get_nonterminal_job_ids_by_pool(
856
+ pool, cluster_name)
857
+
858
+ if len(job_ids) == 0:
859
+ free_resources[cluster_name] = total_resources
860
+ continue
861
+
862
+ # Get used resources
863
+ # TODO(lloyd): We should batch the database calls here so that we
864
+ # make a single call to get all the used resources for all the jobs.
865
+ used_resources = managed_job_state.get_pool_worker_used_resources(
866
+ set(job_ids))
867
+ if used_resources is None:
868
+ # We failed to get the used resources. We should return None since
869
+ # we can't make any guarantees about what resources are being used.
870
+ logger.warning(
871
+ f'Failed to get used resources for cluster {cluster_name!r}')
872
+ return None
873
+
874
+ if _is_empty_resource(used_resources):
875
+ # We encountered a job that has no resources specified. We
876
+ # will not consider it for resource-aware scheduling so it must
877
+ # be scheduled on its own. To do this we will set the free
878
+ # worker resources to nothing by returning an empty resource
879
+ # object.
880
+ logger.debug(f'Job {job_ids} has no resources specified. '
881
+ 'Skipping resource-aware scheduling for cluster '
882
+ f'{cluster_name!r}')
883
+ free_resources[cluster_name] = resources_lib.Resources()
884
+ else:
885
+ # Calculate free resources using - operator
886
+ free = total_resources - used_resources
887
+ free_resources[cluster_name] = free
888
+
889
+ return free_resources
890
+
891
+
892
+ def get_next_cluster_name(
893
+ service_name: str,
894
+ job_id: int,
895
+ task_resources: Optional[typing.Union[
896
+ 'resources_lib.Resources', typing.Set['resources_lib.Resources'],
897
+ typing.List['resources_lib.Resources']]] = None
898
+ ) -> Optional[str]:
899
+ """Get the next available cluster name from replicas with sufficient
900
+ resources.
815
901
 
816
902
  Args:
817
903
  service_name: The name of the service.
818
- job_id: Optional job ID to associate with the acquired cluster.
819
- If None, a placeholder will be used.
904
+ job_id: Job ID to associate with the acquired cluster.
905
+ task_resources: Optional task resource requirements. If provided, will
906
+ check if resources fit in free worker resources. Can be
907
+ a single Resources object or a set/list of Resources objects.
820
908
 
821
909
  Returns:
822
- The cluster name if an idle replica is found, None otherwise.
910
+ The cluster name if a suitable replica is found, None otherwise.
823
911
  """
824
912
  # Check if service exists
825
913
  service_status = _get_service_status(service_name,
@@ -831,36 +919,126 @@ def get_next_cluster_name(service_name: str, job_id: int) -> Optional[str]:
831
919
  if not service_status['pool']:
832
920
  logger.error(f'Service {service_name!r} is not a pool.')
833
921
  return None
922
+
834
923
  with filelock.FileLock(get_service_filelock_path(service_name)):
924
+ free_resources = get_free_worker_resources(service_name)
925
+ logger.debug(f'Free resources: {free_resources!r}')
835
926
  logger.debug(f'Get next cluster name for pool {service_name!r}')
836
927
  ready_replicas = get_ready_replicas(service_name)
928
+
929
+ logger.debug(f'Ready replicas: {ready_replicas!r}')
930
+
837
931
  idle_replicas: List['replica_managers.ReplicaInfo'] = []
838
- for replica_info in ready_replicas:
839
- jobs_on_replica = managed_job_state.get_nonterminal_job_ids_by_pool(
840
- service_name, replica_info.cluster_name)
841
- # TODO(tian): Make it resources aware. Currently we allow and only
842
- # allow one job per replica. In the following PR, we should:
843
- # i) When the replica is launched with `any_of` resources (
844
- # replicas can have different resources), we should check if
845
- # the resources that jobs require are available on the replica.
846
- # e.g., if a job requires A100:1 on a {L4:1, A100:1} pool, it
847
- # should only goes to replica with A100.
848
- # ii) When a job only requires a subset of the resources on the
849
- # replica, each replica should be able to handle multiple jobs
850
- # at the same time. e.g., if a job requires A100:1 on a A100:8
851
- # pool, it should be able to run 4 jobs at the same time.
852
- if not jobs_on_replica:
853
- idle_replicas.append(replica_info)
932
+
933
+ # If task_resources is provided, use resource-aware scheduling
934
+ # Normalize task_resources to a list
935
+ if isinstance(task_resources, resources_lib.Resources):
936
+ task_resources_list = [task_resources]
937
+ elif isinstance(task_resources, (set, list)):
938
+ task_resources_list = list(task_resources)
939
+ else:
940
+ task_resources_list = []
941
+
942
+ # We should do resource aware scheduling if:
943
+ # 1. There are task resources.
944
+ # 2. The first task resource has some resources listed.
945
+ # 3. There are free resources.
946
+ # 4. Any free resource has some resources listed.
947
+ resource_aware = len(task_resources_list) > 0
948
+ resource_aware = (resource_aware and
949
+ not _is_empty_resource(task_resources_list[0]))
950
+ resource_aware = resource_aware and free_resources is not None
951
+ if free_resources is not None:
952
+ for free_resource in free_resources.values():
953
+ if free_resource is not None and not _is_empty_resource(
954
+ free_resource):
955
+ resource_aware = True
956
+ break
957
+ else:
958
+ resource_aware = False
959
+ else:
960
+ resource_aware = False
961
+
962
+ if resource_aware:
963
+ logger.debug('Doing resource aware scheduling')
964
+ for replica_info in ready_replicas:
965
+ cluster_name = replica_info.cluster_name
966
+ assert free_resources is not None
967
+ free_resources_on_worker = free_resources.get(cluster_name)
968
+ logger.debug(f'Free resources for cluster {cluster_name!r}: '
969
+ f'{free_resources_on_worker!r}')
970
+
971
+ # Skip if worker has no free resources available
972
+ if free_resources_on_worker is None:
973
+ logger.debug(f'Worker {cluster_name!r} has no free '
974
+ 'resources')
975
+ continue
976
+
977
+ # Check if any of the task resource options fit
978
+ fits = False
979
+ for task_res in task_resources_list:
980
+ logger.debug(f'Task resources: {task_res!r}')
981
+ if _task_fits(task_res, free_resources_on_worker):
982
+ logger.debug(f'Task resources {task_res!r} fits'
983
+ ' in free resources '
984
+ f'{free_resources_on_worker!r}')
985
+ fits = True
986
+ break
987
+ else:
988
+ logger.debug(f'Task resources {task_res!r} does not fit'
989
+ ' in free resources '
990
+ f'{free_resources_on_worker!r}')
991
+ if fits:
992
+ idle_replicas.append(replica_info)
993
+ # Also fall back to resource unaware scheduling if no idle replicas are
994
+ # found. This might be because our launched resources were improperly
995
+ # set. If that's the case then jobs will fail to schedule in a resource
996
+ # aware way because one of the resources will be `None` so we can just
997
+ # fallback to 1 job per replica. If we are truly resource bottlenecked
998
+ # then we will see that there are jobs running on the replica and will
999
+ # not schedule another.
1000
+ if len(idle_replicas) == 0:
1001
+ logger.debug('Falling back to resource unaware scheduling')
1002
+ # Fall back to resource unaware scheduling if no task resources
1003
+ # are provided.
1004
+ for replica_info in ready_replicas:
1005
+ jobs_on_replica = (
1006
+ managed_job_state.get_nonterminal_job_ids_by_pool(
1007
+ service_name, replica_info.cluster_name))
1008
+ if not jobs_on_replica:
1009
+ idle_replicas.append(replica_info)
1010
+
854
1011
  if not idle_replicas:
855
1012
  logger.info(f'No idle replicas found for pool {service_name!r}')
856
1013
  return None
857
1014
 
858
1015
  # Select the first idle replica.
859
- # TODO(tian): "Load balancing" policy.
860
1016
  replica_info = idle_replicas[0]
861
1017
  logger.info(f'Selected replica {replica_info.replica_id} with cluster '
862
1018
  f'{replica_info.cluster_name!r} for job {job_id!r} in pool '
863
1019
  f'{service_name!r}')
1020
+
1021
+ # If job has heterogeneous resources (any_of/ordered), update
1022
+ # full_resources to the specific resource that was selected for this
1023
+ # worker. This must happen before releasing the filelock to ensure
1024
+ # atomicity with the scheduling decision.
1025
+ if resource_aware and len(task_resources_list) > 1:
1026
+ assert free_resources is not None
1027
+ free_resources_on_worker = free_resources.get(
1028
+ replica_info.cluster_name)
1029
+ if free_resources_on_worker is not None:
1030
+ # Find which task resource fits on this worker
1031
+ for task_res in task_resources_list:
1032
+ if _task_fits(task_res, free_resources_on_worker):
1033
+ # Update full_resources in database to this specific
1034
+ # resource
1035
+ logger.debug(
1036
+ f'Updating full_resources for job {job_id!r} '
1037
+ f'to selected resource: {task_res!r}')
1038
+ managed_job_state.update_job_full_resources(
1039
+ job_id, task_res.to_yaml_config())
1040
+ break
1041
+
864
1042
  managed_job_state.set_current_cluster_name(job_id,
865
1043
  replica_info.cluster_name)
866
1044
  return replica_info.cluster_name
@@ -1541,7 +1719,21 @@ def _format_replica_table(replica_records: List[Dict[str, Any]], show_all: bool,
1541
1719
  replica_status = record['status']
1542
1720
  status_str = replica_status.colored_str()
1543
1721
  used_by = record.get('used_by', None)
1544
- used_by_str = str(used_by) if used_by is not None else '-'
1722
+ if used_by is None:
1723
+ used_by_str = '-'
1724
+ elif isinstance(used_by, str):
1725
+ used_by_str = used_by
1726
+ else:
1727
+ if len(used_by) > 2:
1728
+ used_by_str = (
1729
+ f'{used_by[0]}, {used_by[1]}, +{len(used_by) - 2}'
1730
+ ' more')
1731
+ elif len(used_by) == 2:
1732
+ used_by_str = f'{used_by[0]}, {used_by[1]}'
1733
+ elif len(used_by) == 1:
1734
+ used_by_str = str(used_by[0])
1735
+ else:
1736
+ used_by_str = '-'
1545
1737
 
1546
1738
  replica_handle: Optional['backends.CloudVmRayResourceHandle'] = record[
1547
1739
  'handle']
sky/serve/server/impl.py CHANGED
@@ -517,7 +517,7 @@ def update(
517
517
  f'{workers} is not supported. Ignoring the update.')
518
518
 
519
519
  # Load the existing task configuration from the service's YAML file
520
- yaml_content = service_record['yaml_content']
520
+ yaml_content = service_record['pool_yaml']
521
521
 
522
522
  # Load the existing task configuration
523
523
  task = task_lib.Task.from_yaml_str(yaml_content)
sky/server/common.py CHANGED
@@ -17,6 +17,7 @@ import time
17
17
  import typing
18
18
  from typing import (Any, Callable, cast, Dict, Generic, Literal, Optional,
19
19
  Tuple, TypeVar, Union)
20
+ from urllib.request import Request
20
21
  import uuid
21
22
 
22
23
  import cachetools
@@ -147,6 +148,22 @@ def get_api_cookie_jar() -> requests.cookies.RequestsCookieJar:
147
148
  return cookie_jar
148
149
 
149
150
 
151
+ def get_cookie_header_for_url(url: str) -> Dict[str, str]:
152
+ """Extract Cookie header value from a cookie jar for a specific URL"""
153
+ cookies = get_api_cookie_jar()
154
+ if not cookies:
155
+ return {}
156
+
157
+ # Use urllib Request to do URL-aware cookie filtering
158
+ request = Request(url)
159
+ cookies.add_cookie_header(request)
160
+ cookie_header = request.get_header('Cookie')
161
+
162
+ if cookie_header is None:
163
+ return {}
164
+ return {'Cookie': cookie_header}
165
+
166
+
150
167
  def set_api_cookie_jar(cookie_jar: CookieJar,
151
168
  create_if_not_exists: bool = True) -> None:
152
169
  """Updates the file cookie jar with the given cookie jar."""
sky/server/constants.py CHANGED
@@ -10,7 +10,7 @@ from sky.skylet import constants
10
10
  # based on version info is needed.
11
11
  # For more details and code guidelines, refer to:
12
12
  # https://docs.skypilot.co/en/latest/developers/CONTRIBUTING.html#backward-compatibility-guidelines
13
- API_VERSION = 24
13
+ API_VERSION = 26
14
14
 
15
15
  # The minimum peer API version that the code should still work with.
16
16
  # Notes (dev):
sky/server/metrics.py CHANGED
@@ -48,10 +48,12 @@ async def gpu_metrics() -> fastapi.Response:
48
48
  all_metrics: List[str] = []
49
49
  successful_contexts = 0
50
50
 
51
+ remote_contexts = [
52
+ context for context in contexts if context != 'in-cluster'
53
+ ]
51
54
  tasks = [
52
55
  asyncio.create_task(metrics_utils.get_metrics_for_context(context))
53
- for context in contexts
54
- if context != 'in-cluster'
56
+ for context in remote_contexts
55
57
  ]
56
58
 
57
59
  results = await asyncio.gather(*tasks, return_exceptions=True)
@@ -59,7 +61,8 @@ async def gpu_metrics() -> fastapi.Response:
59
61
  for i, result in enumerate(results):
60
62
  if isinstance(result, Exception):
61
63
  logger.error(
62
- f'Failed to get metrics for context {contexts[i]}: {result}')
64
+ f'Failed to get metrics for context {remote_contexts[i]}: '
65
+ f'{result}')
63
66
  elif isinstance(result, BaseException):
64
67
  # Avoid changing behavior for non-Exception BaseExceptions
65
68
  # like KeyboardInterrupt/SystemExit: re-raise them.