skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,28 @@
1
1
  """Payloads for the Sky API requests.
2
2
 
3
- TODO(zhwu): We can consider a better way to handle the default values of the
4
- kwargs for the payloads, otherwise, we have to keep the default values the sync
5
- with the backend functions. The benefit of having the default values in the
6
- payloads is that a user can find the default values in the Restful API docs.
3
+ All the payloads that will be used between the client and server communication
4
+ must be defined here to make sure it get covered by our API compatbility tests.
5
+
6
+ Compatibility note:
7
+ - Adding a new body for new API is compatible as long as the SDK method using
8
+ the new API is properly decorated with `versions.minimal_api_version`.
9
+ - Adding a new field with default value to an existing body is compatible at
10
+ API level, but the business logic must handle the case where the field is
11
+ not proccessed by an old version of remote client/server. This can usually
12
+ be done by checking `versions.get_remote_api_version()`.
13
+ - Other changes are not compatible at API level, so must be handled specially.
14
+ A common pattern is to keep both the old and new version of the body and
15
+ checking `versions.get_remote_api_version()` to decide which body to use. For
16
+ example, say we refactor the `LaunchBody`, the original `LaunchBody` must be
17
+ kept in the codebase and the new body should be added via `LaunchBodyV2`.
18
+ Then if the remote runs in an old version, the local code should still send
19
+ `LaunchBody` to keep the backward compatibility. `LaunchBody` can be removed
20
+ later when constants.MIN_COMPATIBLE_API_VERSION is updated to a version that
21
+ supports `LaunchBodyV2`
22
+
23
+ Also refer to sky.server.constants.MIN_COMPATIBLE_API_VERSION and the
24
+ sky.server.versions module for more details.
7
25
  """
8
- import getpass
9
26
  import os
10
27
  import typing
11
28
  from typing import Any, Dict, List, Optional, Tuple, Union
@@ -16,6 +33,7 @@ from sky import sky_logging
16
33
  from sky import skypilot_config
17
34
  from sky.adaptors import common as adaptors_common
18
35
  from sky.server import common
36
+ from sky.skylet import autostop_lib
19
37
  from sky.skylet import constants
20
38
  from sky.usage import constants as usage_constants
21
39
  from sky.usage import usage_lib
@@ -53,13 +71,14 @@ EXTERNAL_LOCAL_ENV_VARS = [
53
71
  def request_body_env_vars() -> dict:
54
72
  env_vars = {}
55
73
  for env_var in os.environ:
56
- if env_var.startswith(constants.SKYPILOT_ENV_VAR_PREFIX):
74
+ if (env_var.startswith(constants.SKYPILOT_ENV_VAR_PREFIX) and
75
+ not env_var.startswith(
76
+ constants.SKYPILOT_SERVER_ENV_VAR_PREFIX)):
57
77
  env_vars[env_var] = os.environ[env_var]
58
78
  if common.is_api_server_local() and env_var in EXTERNAL_LOCAL_ENV_VARS:
59
79
  env_vars[env_var] = os.environ[env_var]
60
80
  env_vars[constants.USER_ID_ENV_VAR] = common_utils.get_user_hash()
61
- env_vars[constants.USER_ENV_VAR] = os.getenv(constants.USER_ENV_VAR,
62
- getpass.getuser())
81
+ env_vars[constants.USER_ENV_VAR] = common_utils.get_current_user_name()
63
82
  env_vars[
64
83
  usage_constants.USAGE_RUN_ID_ENV_VAR] = usage_lib.messages.usage.run_id
65
84
  # Remove the path to config file, as the config content is included in the
@@ -67,26 +86,59 @@ def request_body_env_vars() -> dict:
67
86
  env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
68
87
  env_vars.pop(skypilot_config.ENV_VAR_GLOBAL_CONFIG, None)
69
88
  env_vars.pop(skypilot_config.ENV_VAR_PROJECT_CONFIG, None)
89
+ # Remove the config related env vars, as the client config override
90
+ # should be passed in the request body.
91
+ # Any new environment variables that are server-specific should
92
+ # use SKYPILOT_SERVER_ENV_VAR_PREFIX.
93
+ env_vars.pop(constants.ENV_VAR_DB_CONNECTION_URI, None)
70
94
  return env_vars
71
95
 
72
96
 
73
97
  def get_override_skypilot_config_from_client() -> Dict[str, Any]:
74
98
  """Returns the override configs from the client."""
99
+ if annotations.is_on_api_server:
100
+ return {}
75
101
  config = skypilot_config.to_dict()
76
102
  # Remove the API server config, as we should not specify the SkyPilot
77
103
  # server endpoint on the server side. This avoids the warning at
78
104
  # server-side.
79
105
  config.pop_nested(('api_server',), default_value=None)
106
+ # Remove the admin policy, as the policy has been applied on the client
107
+ # side.
108
+ config.pop_nested(('admin_policy',), default_value=None)
80
109
  return config
81
110
 
82
111
 
83
- class RequestBody(pydantic.BaseModel):
112
+ def get_override_skypilot_config_path_from_client() -> Optional[str]:
113
+ """Returns the override config path from the client."""
114
+ if annotations.is_on_api_server:
115
+ return None
116
+ # Currently, we don't need to check if the client-side config
117
+ # has been overridden because we only deal with cases where
118
+ # client has a project-level config/changed config and the
119
+ # api server has a different config.
120
+ return skypilot_config.loaded_config_path_serialized()
121
+
122
+
123
+ class BasePayload(pydantic.BaseModel):
124
+ """The base payload for the SkyPilot API."""
125
+ # Ignore extra fields in the request body, which is useful for backward
126
+ # compatibility. The difference with `allow` is that `ignore` will not
127
+ # include the unknown fields when dump the model, i.e., we can add new
128
+ # fields to the request body without breaking the existing old API server
129
+ # where the handler function does not accept the new field in function
130
+ # signature.
131
+ model_config = pydantic.ConfigDict(extra='ignore')
132
+
133
+
134
+ class RequestBody(BasePayload):
84
135
  """The request body for the SkyPilot API."""
85
136
  env_vars: Dict[str, str] = {}
86
137
  entrypoint: str = ''
87
138
  entrypoint_command: str = ''
88
139
  using_remote_api_server: bool = False
89
140
  override_skypilot_config: Optional[Dict[str, Any]] = {}
141
+ override_skypilot_config_path: Optional[str] = None
90
142
 
91
143
  def __init__(self, **data):
92
144
  data['env_vars'] = data.get('env_vars', request_body_env_vars())
@@ -101,6 +153,9 @@ class RequestBody(pydantic.BaseModel):
101
153
  data['override_skypilot_config'] = data.get(
102
154
  'override_skypilot_config',
103
155
  get_override_skypilot_config_from_client())
156
+ data['override_skypilot_config_path'] = data.get(
157
+ 'override_skypilot_config_path',
158
+ get_override_skypilot_config_path_from_client())
104
159
  super().__init__(**data)
105
160
 
106
161
  def to_kwargs(self) -> Dict[str, Any]:
@@ -115,6 +170,7 @@ class RequestBody(pydantic.BaseModel):
115
170
  kwargs.pop('entrypoint_command')
116
171
  kwargs.pop('using_remote_api_server')
117
172
  kwargs.pop('override_skypilot_config')
173
+ kwargs.pop('override_skypilot_config_path')
118
174
  return kwargs
119
175
 
120
176
  @property
@@ -126,6 +182,13 @@ class CheckBody(RequestBody):
126
182
  """The request body for the check endpoint."""
127
183
  clouds: Optional[Tuple[str, ...]] = None
128
184
  verbose: bool = False
185
+ workspace: Optional[str] = None
186
+
187
+
188
+ class EnabledCloudsBody(RequestBody):
189
+ """The request body for the enabled clouds endpoint."""
190
+ workspace: Optional[str] = None
191
+ expand: bool = False
129
192
 
130
193
 
131
194
  class DagRequestBody(RequestBody):
@@ -148,17 +211,33 @@ class DagRequestBody(RequestBody):
148
211
  return kwargs
149
212
 
150
213
 
151
- class ValidateBody(DagRequestBody):
214
+ class DagRequestBodyWithRequestOptions(DagRequestBody):
215
+ """Request body base class for endpoints with a dag and request options."""
216
+ request_options: Optional[admin_policy.RequestOptions]
217
+
218
+ def get_request_options(self) -> Optional[admin_policy.RequestOptions]:
219
+ """Get the request options."""
220
+ if self.request_options is None:
221
+ return None
222
+ if isinstance(self.request_options, dict):
223
+ return admin_policy.RequestOptions(**self.request_options)
224
+ return self.request_options
225
+
226
+ def to_kwargs(self) -> Dict[str, Any]:
227
+ kwargs = super().to_kwargs()
228
+ kwargs['request_options'] = self.get_request_options()
229
+ return kwargs
230
+
231
+
232
+ class ValidateBody(DagRequestBodyWithRequestOptions):
152
233
  """The request body for the validate endpoint."""
153
234
  dag: str
154
- request_options: Optional[admin_policy.RequestOptions]
155
235
 
156
236
 
157
- class OptimizeBody(DagRequestBody):
237
+ class OptimizeBody(DagRequestBodyWithRequestOptions):
158
238
  """The request body for the optimize endpoint."""
159
239
  dag: str
160
240
  minimize: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
161
- request_options: Optional[admin_policy.RequestOptions]
162
241
 
163
242
 
164
243
  class LaunchBody(RequestBody):
@@ -166,8 +245,10 @@ class LaunchBody(RequestBody):
166
245
  task: str
167
246
  cluster_name: str
168
247
  retry_until_up: bool = False
248
+ # TODO(aylei): remove this field in v0.12.0
169
249
  idle_minutes_to_autostop: Optional[int] = None
170
250
  dryrun: bool = False
251
+ # TODO(aylei): remove this field in v0.12.0
171
252
  down: bool = False
172
253
  backend: Optional[str] = None
173
254
  optimize_target: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
@@ -233,12 +314,20 @@ class StatusBody(RequestBody):
233
314
  cluster_names: Optional[List[str]] = None
234
315
  refresh: common_lib.StatusRefreshMode = common_lib.StatusRefreshMode.NONE
235
316
  all_users: bool = True
317
+ # TODO (kyuds): default to False post 0.10.5
318
+ include_credentials: bool = True
319
+ # Only return fields that are needed for the
320
+ # dashboard / CLI summary response
321
+ summary_response: bool = False
322
+ # Include the cluster handle in the response
323
+ include_handle: bool = True
236
324
 
237
325
 
238
326
  class StartBody(RequestBody):
239
327
  """The request body for the start endpoint."""
240
328
  cluster_name: str
241
329
  idle_minutes_to_autostop: Optional[int] = None
330
+ wait_for: Optional[autostop_lib.AutostopWaitFor] = None
242
331
  retry_until_up: bool = False
243
332
  down: bool = False
244
333
  force: bool = False
@@ -248,6 +337,7 @@ class AutostopBody(RequestBody):
248
337
  """The request body for the autostop endpoint."""
249
338
  cluster_name: str
250
339
  idle_minutes: int
340
+ wait_for: Optional[autostop_lib.AutostopWaitFor] = None
251
341
  down: bool = False
252
342
 
253
343
 
@@ -275,9 +365,10 @@ class CancelBody(RequestBody):
275
365
  return kwargs
276
366
 
277
367
 
278
- class ClusterNameBody(RequestBody):
368
+ class ProvisionLogsBody(RequestBody):
279
369
  """Cluster node."""
280
370
  cluster_name: str
371
+ worker: Optional[int] = None
281
372
 
282
373
 
283
374
  class ClusterJobBody(RequestBody):
@@ -301,6 +392,63 @@ class ClusterJobsDownloadLogsBody(RequestBody):
301
392
  local_dir: str = constants.SKY_LOGS_DIRECTORY
302
393
 
303
394
 
395
+ class UserCreateBody(RequestBody):
396
+ """The request body for the user create endpoint."""
397
+ username: str
398
+ password: str
399
+ role: Optional[str] = None
400
+
401
+
402
+ class UserDeleteBody(RequestBody):
403
+ """The request body for the user delete endpoint."""
404
+ user_id: str
405
+
406
+
407
+ class UserUpdateBody(RequestBody):
408
+ """The request body for the user update endpoint."""
409
+ user_id: str
410
+ role: Optional[str] = None
411
+ password: Optional[str] = None
412
+
413
+
414
+ class UserImportBody(RequestBody):
415
+ """The request body for the user import endpoint."""
416
+ csv_content: str
417
+
418
+
419
+ class ServiceAccountTokenCreateBody(RequestBody):
420
+ """The request body for creating a service account token."""
421
+ token_name: str
422
+ expires_in_days: Optional[int] = None
423
+
424
+
425
+ class ServiceAccountTokenDeleteBody(RequestBody):
426
+ """The request body for deleting a service account token."""
427
+ token_id: str
428
+
429
+
430
+ class UpdateRoleBody(RequestBody):
431
+ """The request body for updating a user role."""
432
+ role: str
433
+
434
+
435
+ class ServiceAccountTokenRoleBody(RequestBody):
436
+ """The request body for getting a service account token role."""
437
+ token_id: str
438
+
439
+
440
+ class ServiceAccountTokenUpdateRoleBody(RequestBody):
441
+ """The request body for updating a service account token role."""
442
+ token_id: str
443
+ role: str
444
+
445
+
446
+ class ServiceAccountTokenRotateBody(RequestBody):
447
+ """The request body for rotating a service account token."""
448
+ token_id: str
449
+ expires_in_days: Optional[int] = None
450
+
451
+
304
452
  class DownloadBody(RequestBody):
305
453
  """The request body for the download endpoint."""
306
454
  folder_paths: List[str]
@@ -311,6 +459,39 @@ class StorageBody(RequestBody):
311
459
  name: str
312
460
 
313
461
 
462
+ class VolumeApplyBody(RequestBody):
463
+ """The request body for the volume apply endpoint."""
464
+ name: str
465
+ volume_type: str
466
+ cloud: str
467
+ region: Optional[str] = None
468
+ zone: Optional[str] = None
469
+ size: Optional[str] = None
470
+ config: Optional[Dict[str, Any]] = None
471
+ labels: Optional[Dict[str, str]] = None
472
+
473
+
474
+ class VolumeDeleteBody(RequestBody):
475
+ """The request body for the volume delete endpoint."""
476
+ names: List[str]
477
+
478
+
479
+ class VolumeListBody(RequestBody):
480
+ """The request body for the volume list endpoint."""
481
+ pass
482
+
483
+
484
+ class VolumeValidateBody(RequestBody):
485
+ """The request body for the volume validate endpoint."""
486
+ name: Optional[str] = None
487
+ volume_type: Optional[str] = None
488
+ infra: Optional[str] = None
489
+ size: Optional[str] = None
490
+ labels: Optional[Dict[str, str]] = None
491
+ resource_name: Optional[str] = None
492
+ config: Optional[Dict[str, Any]] = None
493
+
494
+
314
495
  class EndpointsBody(RequestBody):
315
496
  """The request body for the endpoint."""
316
497
  cluster: str
@@ -332,6 +513,8 @@ class JobsLaunchBody(RequestBody):
332
513
  """The request body for the jobs launch endpoint."""
333
514
  task: str
334
515
  name: Optional[str]
516
+ pool: Optional[str] = None
517
+ num_jobs: Optional[int] = None
335
518
 
336
519
  def to_kwargs(self) -> Dict[str, Any]:
337
520
  kwargs = super().to_kwargs()
@@ -345,6 +528,25 @@ class JobsQueueBody(RequestBody):
345
528
  refresh: bool = False
346
529
  skip_finished: bool = False
347
530
  all_users: bool = False
531
+ job_ids: Optional[List[int]] = None
532
+
533
+
534
+ class JobsQueueV2Body(RequestBody):
535
+ """The request body for the jobs queue endpoint."""
536
+ refresh: bool = False
537
+ skip_finished: bool = False
538
+ all_users: bool = False
539
+ job_ids: Optional[List[int]] = None
540
+ user_match: Optional[str] = None
541
+ workspace_match: Optional[str] = None
542
+ name_match: Optional[str] = None
543
+ pool_match: Optional[str] = None
544
+ page: Optional[int] = None
545
+ limit: Optional[int] = None
546
+ statuses: Optional[List[str]] = None
547
+ # The fields to return in the response.
548
+ # Refer to the fields in the `class ManagedJobRecord` in `response.py`
549
+ fields: Optional[List[str]] = None
348
550
 
349
551
 
350
552
  class JobsCancelBody(RequestBody):
@@ -353,6 +555,7 @@ class JobsCancelBody(RequestBody):
353
555
  job_ids: Optional[List[int]] = None
354
556
  all: bool = False
355
557
  all_users: bool = False
558
+ pool: Optional[str] = None
356
559
 
357
560
 
358
561
  class JobsLogsBody(RequestBody):
@@ -362,6 +565,7 @@ class JobsLogsBody(RequestBody):
362
565
  follow: bool = True
363
566
  controller: bool = False
364
567
  refresh: bool = False
568
+ tail: Optional[int] = None
365
569
 
366
570
 
367
571
  class RequestCancelBody(RequestBody):
@@ -375,6 +579,8 @@ class RequestStatusBody(pydantic.BaseModel):
375
579
  """The request body for the API request status endpoint."""
376
580
  request_ids: Optional[List[str]] = None
377
581
  all_status: bool = False
582
+ limit: Optional[int] = None
583
+ fields: Optional[List[str]] = None
378
584
 
379
585
 
380
586
  class ServeUpBody(RequestBody):
@@ -425,6 +631,7 @@ class ServeLogsBody(RequestBody):
425
631
  target: Union[str, serve.ServiceComponent]
426
632
  replica_id: Optional[int] = None
427
633
  follow: bool = True
634
+ tail: Optional[int] = None
428
635
 
429
636
 
430
637
  class ServeDownloadLogsBody(RequestBody):
@@ -434,6 +641,7 @@ class ServeDownloadLogsBody(RequestBody):
434
641
  targets: Optional[Union[str, serve.ServiceComponent,
435
642
  List[Union[str, serve.ServiceComponent]]]]
436
643
  replica_ids: Optional[List[int]] = None
644
+ tail: Optional[int] = None
437
645
 
438
646
 
439
647
  class ServeStatusBody(RequestBody):
@@ -443,9 +651,10 @@ class ServeStatusBody(RequestBody):
443
651
 
444
652
  class RealtimeGpuAvailabilityRequestBody(RequestBody):
445
653
  """The request body for the realtime GPU availability endpoint."""
446
- context: Optional[str]
447
- name_filter: Optional[str]
448
- quantity_filter: Optional[int]
654
+ context: Optional[str] = None
655
+ name_filter: Optional[str] = None
656
+ quantity_filter: Optional[int] = None
657
+ is_ssh: Optional[bool] = None
449
658
 
450
659
 
451
660
  class KubernetesNodeInfoRequestBody(RequestBody):
@@ -483,6 +692,19 @@ class LocalUpBody(RequestBody):
483
692
  cleanup: bool = False
484
693
  context_name: Optional[str] = None
485
694
  password: Optional[str] = None
695
+ name: Optional[str] = None
696
+ port_start: Optional[int] = None
697
+
698
+
699
+ class LocalDownBody(RequestBody):
700
+ """The request body for the local down endpoint."""
701
+ name: Optional[str] = None
702
+
703
+
704
+ class SSHUpBody(RequestBody):
705
+ """The request body for the SSH up/down endpoints."""
706
+ infra: Optional[str] = None
707
+ cleanup: bool = False
486
708
 
487
709
 
488
710
  class ServeTerminateReplicaBody(RequestBody):
@@ -514,7 +736,119 @@ class JobsDownloadLogsBody(RequestBody):
514
736
  local_dir: str = constants.SKY_LOGS_DIRECTORY
515
737
 
516
738
 
739
+ class JobsPoolApplyBody(RequestBody):
740
+ """The request body for the jobs pool apply endpoint."""
741
+ task: Optional[str] = None
742
+ workers: Optional[int] = None
743
+ pool_name: str
744
+ mode: serve.UpdateMode
745
+
746
+ def to_kwargs(self) -> Dict[str, Any]:
747
+ kwargs = super().to_kwargs()
748
+ if self.task is not None:
749
+ dag = common.process_mounts_in_task_on_api_server(
750
+ self.task, self.env_vars, workdir_only=False)
751
+ assert len(
752
+ dag.tasks) == 1, ('Must only specify one task in the DAG for '
753
+ 'a pool.', dag)
754
+ kwargs['task'] = dag.tasks[0]
755
+ else:
756
+ kwargs['task'] = None
757
+ return kwargs
758
+
759
+
760
+ class JobsPoolDownBody(RequestBody):
761
+ """The request body for the jobs pool down endpoint."""
762
+ pool_names: Optional[Union[str, List[str]]]
763
+ all: bool = False
764
+ purge: bool = False
765
+
766
+
767
+ class JobsPoolStatusBody(RequestBody):
768
+ """The request body for the jobs pool status endpoint."""
769
+ pool_names: Optional[Union[str, List[str]]]
770
+
771
+
772
+ class JobsPoolLogsBody(RequestBody):
773
+ """The request body for the jobs pool logs endpoint."""
774
+ pool_name: str
775
+ target: Union[str, serve.ServiceComponent]
776
+ worker_id: Optional[int] = None
777
+ follow: bool = True
778
+ tail: Optional[int] = None
779
+
780
+
781
+ class JobsPoolDownloadLogsBody(RequestBody):
782
+ """The request body for the jobs pool download logs endpoint."""
783
+ pool_name: str
784
+ local_dir: str
785
+ targets: Optional[Union[str, serve.ServiceComponent,
786
+ List[Union[str, serve.ServiceComponent]]]]
787
+ worker_ids: Optional[List[int]] = None
788
+ tail: Optional[int] = None
789
+
790
+
517
791
  class UploadZipFileResponse(pydantic.BaseModel):
518
792
  """The response body for the upload zip file endpoint."""
519
793
  status: str
520
794
  missing_chunks: Optional[List[str]] = None
795
+
796
+
797
+ class UpdateWorkspaceBody(RequestBody):
798
+ """The request body for updating a specific workspace configuration."""
799
+ workspace_name: str = '' # Will be set from path parameter
800
+ config: Dict[str, Any]
801
+
802
+
803
+ class CreateWorkspaceBody(RequestBody):
804
+ """The request body for creating a new workspace."""
805
+ workspace_name: str = '' # Will be set from path parameter
806
+ config: Dict[str, Any]
807
+
808
+
809
+ class DeleteWorkspaceBody(RequestBody):
810
+ """The request body for deleting a workspace."""
811
+ workspace_name: str
812
+
813
+
814
+ class UpdateConfigBody(RequestBody):
815
+ """The request body for updating the entire SkyPilot configuration."""
816
+ config: Dict[str, Any]
817
+
818
+
819
+ class GetConfigBody(RequestBody):
820
+ """The request body for getting the entire SkyPilot configuration."""
821
+ pass
822
+
823
+
824
+ class CostReportBody(RequestBody):
825
+ """The request body for the cost report endpoint."""
826
+ days: Optional[int] = 30
827
+ # we use hashes instead of names to avoid the case where
828
+ # the name is not unique
829
+ cluster_hashes: Optional[List[str]] = None
830
+ # Only return fields that are needed for the dashboard
831
+ # summary page
832
+ dashboard_summary_response: bool = False
833
+
834
+
835
+ class RequestPayload(BasePayload):
836
+ """The payload for the requests."""
837
+
838
+ request_id: str
839
+ name: str
840
+ entrypoint: str
841
+ request_body: str
842
+ status: str
843
+ created_at: float
844
+ user_id: str
845
+ return_value: str
846
+ error: str
847
+ pid: Optional[int]
848
+ schedule_type: str
849
+ user_name: Optional[str] = None
850
+ # Resources the request operates on.
851
+ cluster_name: Optional[str] = None
852
+ status_msg: Optional[str] = None
853
+ should_retry: bool = False
854
+ finished_at: Optional[float] = None
@@ -90,7 +90,7 @@ class Precondition(abc.ABC):
90
90
  while True:
91
91
  if self.timeout > 0 and time.time() - start_time > self.timeout:
92
92
  # Cancel the request on timeout.
93
- api_requests.set_request_failed(
93
+ await api_requests.set_request_failed_async(
94
94
  self.request_id,
95
95
  exceptions.RequestCancelled(
96
96
  f'Request {self.request_id} precondition wait timed '
@@ -98,13 +98,15 @@ class Precondition(abc.ABC):
98
98
  return False
99
99
 
100
100
  # Check if the request has been cancelled
101
- request = api_requests.get_request(self.request_id)
101
+ request = await api_requests.get_request_async(self.request_id,
102
+ fields=['status'])
102
103
  if request is None:
103
104
  logger.error(f'Request {self.request_id} not found')
104
105
  return False
105
106
  if request.status == api_requests.RequestStatus.CANCELLED:
106
107
  logger.debug(f'Request {self.request_id} cancelled')
107
108
  return False
109
+ del request
108
110
 
109
111
  try:
110
112
  met, status_msg = await self.check()
@@ -112,12 +114,11 @@ class Precondition(abc.ABC):
112
114
  return True
113
115
  if status_msg is not None and status_msg != last_status_msg:
114
116
  # Update the status message if it has changed.
115
- with api_requests.update_request(self.request_id) as req:
116
- assert req is not None, self.request_id
117
- req.status_msg = status_msg
117
+ await api_requests.update_status_msg_async(
118
+ self.request_id, status_msg)
118
119
  last_status_msg = status_msg
119
120
  except (Exception, SystemExit, KeyboardInterrupt) as e: # pylint: disable=broad-except
120
- api_requests.set_request_failed(self.request_id, e)
121
+ await api_requests.set_request_failed_async(self.request_id, e)
121
122
  logger.info(f'Request {self.request_id} failed due to '
122
123
  f'{common_utils.format_exception(e)}')
123
124
  return False
@@ -145,10 +146,9 @@ class ClusterStartCompletePrecondition(Precondition):
145
146
  self.cluster_name = cluster_name
146
147
 
147
148
  async def check(self) -> Tuple[bool, Optional[str]]:
148
- cluster_record = global_user_state.get_cluster_from_name(
149
+ cluster_status = global_user_state.get_status_from_cluster_name(
149
150
  self.cluster_name)
150
- if (cluster_record and
151
- cluster_record['status'] is status_lib.ClusterStatus.UP):
151
+ if cluster_status is status_lib.ClusterStatus.UP:
152
152
  # Shortcut for started clusters, ignore cluster not found
153
153
  # since the cluster record might not yet be created by the
154
154
  # launch task.
@@ -161,14 +161,18 @@ class ClusterStartCompletePrecondition(Precondition):
161
161
  # We unify these situations into a single state: the process of starting
162
162
  # the cluster is done (either normally or abnormally) but cluster is not
163
163
  # in UP status.
164
- requests = api_requests.get_request_tasks(
165
- status=[
166
- api_requests.RequestStatus.RUNNING,
167
- api_requests.RequestStatus.PENDING
168
- ],
169
- include_request_names=['sky.launch', 'sky.start'],
170
- cluster_names=[self.cluster_name])
164
+ requests = await api_requests.get_request_tasks_async(
165
+ req_filter=api_requests.RequestTaskFilter(
166
+ status=[
167
+ api_requests.RequestStatus.PENDING,
168
+ api_requests.RequestStatus.RUNNING
169
+ ],
170
+ include_request_names=['sky.launch', 'sky.start'],
171
+ cluster_names=[self.cluster_name],
172
+ # Only get the request ID to avoid fetching the whole request.
173
+ # We're only interested in the count, not the whole request.
174
+ fields=['request_id']))
171
175
  if len(requests) == 0:
172
- # No runnning or pending tasks, the start process is done.
176
+ # No running or pending tasks, the start process is done.
173
177
  return True, None
174
178
  return False, f'Waiting for cluster {self.cluster_name} to be UP.'