skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -1,11 +1,28 @@
1
1
  """Payloads for the Sky API requests.
2
2
 
3
- TODO(zhwu): We can consider a better way to handle the default values of the
4
- kwargs for the payloads, otherwise, we have to keep the default values the sync
5
- with the backend functions. The benefit of having the default values in the
6
- payloads is that a user can find the default values in the Restful API docs.
3
+ All the payloads that will be used between the client and server communication
4
+ must be defined here to make sure it get covered by our API compatbility tests.
5
+
6
+ Compatibility note:
7
+ - Adding a new body for new API is compatible as long as the SDK method using
8
+ the new API is properly decorated with `versions.minimal_api_version`.
9
+ - Adding a new field with default value to an existing body is compatible at
10
+ API level, but the business logic must handle the case where the field is
11
+ not proccessed by an old version of remote client/server. This can usually
12
+ be done by checking `versions.get_remote_api_version()`.
13
+ - Other changes are not compatible at API level, so must be handled specially.
14
+ A common pattern is to keep both the old and new version of the body and
15
+ checking `versions.get_remote_api_version()` to decide which body to use. For
16
+ example, say we refactor the `LaunchBody`, the original `LaunchBody` must be
17
+ kept in the codebase and the new body should be added via `LaunchBodyV2`.
18
+ Then if the remote runs in an old version, the local code should still send
19
+ `LaunchBody` to keep the backward compatibility. `LaunchBody` can be removed
20
+ later when constants.MIN_COMPATIBLE_API_VERSION is updated to a version that
21
+ supports `LaunchBodyV2`
22
+
23
+ Also refer to sky.server.constants.MIN_COMPATIBLE_API_VERSION and the
24
+ sky.server.versions module for more details.
7
25
  """
8
- import getpass
9
26
  import os
10
27
  import typing
11
28
  from typing import Any, Dict, List, Optional, Tuple, Union
@@ -16,6 +33,7 @@ from sky import sky_logging
16
33
  from sky import skypilot_config
17
34
  from sky.adaptors import common as adaptors_common
18
35
  from sky.server import common
36
+ from sky.skylet import autostop_lib
19
37
  from sky.skylet import constants
20
38
  from sky.usage import constants as usage_constants
21
39
  from sky.usage import usage_lib
@@ -38,51 +56,96 @@ logger = sky_logging.init_logger(__name__)
38
56
  EXTERNAL_LOCAL_ENV_VARS = [
39
57
  # Allow overriding the AWS authentication.
40
58
  'AWS_PROFILE',
59
+ 'AWS_DEFAULT_PROFILE',
41
60
  'AWS_ACCESS_KEY_ID',
42
61
  'AWS_SECRET_ACCESS_KEY',
62
+ 'AWS_SESSION_TOKEN',
63
+ # Allow overriding the Azure authentication.
64
+ 'AZURE_CLIENT_ID',
65
+ 'AZURE_CLIENT_SECRET',
66
+ 'AZURE_TENANT_ID',
67
+ 'AZURE_SUBSCRIPTION_ID',
43
68
  # Allow overriding the GCP authentication.
44
69
  'GOOGLE_APPLICATION_CREDENTIALS',
70
+ # Allow overriding the kubeconfig.
71
+ 'KUBECONFIG',
45
72
  ]
46
73
 
47
74
 
48
- @annotations.lru_cache(scope='global')
49
75
  def request_body_env_vars() -> dict:
50
76
  env_vars = {}
51
77
  for env_var in os.environ:
52
- if env_var.startswith(constants.SKYPILOT_ENV_VAR_PREFIX):
78
+ if (env_var.startswith(constants.SKYPILOT_ENV_VAR_PREFIX) and
79
+ not env_var.startswith(
80
+ constants.SKYPILOT_SERVER_ENV_VAR_PREFIX)):
53
81
  env_vars[env_var] = os.environ[env_var]
54
82
  if common.is_api_server_local() and env_var in EXTERNAL_LOCAL_ENV_VARS:
55
83
  env_vars[env_var] = os.environ[env_var]
56
84
  env_vars[constants.USER_ID_ENV_VAR] = common_utils.get_user_hash()
57
- env_vars[constants.USER_ENV_VAR] = os.getenv(constants.USER_ENV_VAR,
58
- getpass.getuser())
85
+ env_vars[constants.USER_ENV_VAR] = common_utils.get_current_user_name()
59
86
  env_vars[
60
87
  usage_constants.USAGE_RUN_ID_ENV_VAR] = usage_lib.messages.usage.run_id
88
+ if not common.is_api_server_local():
89
+ # Used in job controller, for local API server, keep the
90
+ # SKYPILOT_CONFIG env var to use the config for the managed job.
91
+ env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
61
92
  # Remove the path to config file, as the config content is included in the
62
93
  # request body and will be merged with the config on the server side.
63
- env_vars.pop(skypilot_config.ENV_VAR_SKYPILOT_CONFIG, None)
64
94
  env_vars.pop(skypilot_config.ENV_VAR_GLOBAL_CONFIG, None)
65
95
  env_vars.pop(skypilot_config.ENV_VAR_PROJECT_CONFIG, None)
96
+ # Remove the config related env vars, as the client config override
97
+ # should be passed in the request body.
98
+ # Any new environment variables that are server-specific should
99
+ # use SKYPILOT_SERVER_ENV_VAR_PREFIX.
100
+ env_vars.pop(constants.ENV_VAR_DB_CONNECTION_URI, None)
66
101
  return env_vars
67
102
 
68
103
 
69
104
  def get_override_skypilot_config_from_client() -> Dict[str, Any]:
70
105
  """Returns the override configs from the client."""
106
+ if annotations.is_on_api_server:
107
+ return {}
71
108
  config = skypilot_config.to_dict()
72
109
  # Remove the API server config, as we should not specify the SkyPilot
73
110
  # server endpoint on the server side. This avoids the warning at
74
111
  # server-side.
75
112
  config.pop_nested(('api_server',), default_value=None)
113
+ # Remove the admin policy, as the policy has been applied on the client
114
+ # side.
115
+ config.pop_nested(('admin_policy',), default_value=None)
76
116
  return config
77
117
 
78
118
 
79
- class RequestBody(pydantic.BaseModel):
119
+ def get_override_skypilot_config_path_from_client() -> Optional[str]:
120
+ """Returns the override config path from the client."""
121
+ if annotations.is_on_api_server:
122
+ return None
123
+ # Currently, we don't need to check if the client-side config
124
+ # has been overridden because we only deal with cases where
125
+ # client has a project-level config/changed config and the
126
+ # api server has a different config.
127
+ return skypilot_config.loaded_config_path_serialized()
128
+
129
+
130
+ class BasePayload(pydantic.BaseModel):
131
+ """The base payload for the SkyPilot API."""
132
+ # Ignore extra fields in the request body, which is useful for backward
133
+ # compatibility. The difference with `allow` is that `ignore` will not
134
+ # include the unknown fields when dump the model, i.e., we can add new
135
+ # fields to the request body without breaking the existing old API server
136
+ # where the handler function does not accept the new field in function
137
+ # signature.
138
+ model_config = pydantic.ConfigDict(extra='ignore')
139
+
140
+
141
+ class RequestBody(BasePayload):
80
142
  """The request body for the SkyPilot API."""
81
143
  env_vars: Dict[str, str] = {}
82
144
  entrypoint: str = ''
83
145
  entrypoint_command: str = ''
84
146
  using_remote_api_server: bool = False
85
147
  override_skypilot_config: Optional[Dict[str, Any]] = {}
148
+ override_skypilot_config_path: Optional[str] = None
86
149
 
87
150
  def __init__(self, **data):
88
151
  data['env_vars'] = data.get('env_vars', request_body_env_vars())
@@ -97,6 +160,9 @@ class RequestBody(pydantic.BaseModel):
97
160
  data['override_skypilot_config'] = data.get(
98
161
  'override_skypilot_config',
99
162
  get_override_skypilot_config_from_client())
163
+ data['override_skypilot_config_path'] = data.get(
164
+ 'override_skypilot_config_path',
165
+ get_override_skypilot_config_path_from_client())
100
166
  super().__init__(**data)
101
167
 
102
168
  def to_kwargs(self) -> Dict[str, Any]:
@@ -111,6 +177,7 @@ class RequestBody(pydantic.BaseModel):
111
177
  kwargs.pop('entrypoint_command')
112
178
  kwargs.pop('using_remote_api_server')
113
179
  kwargs.pop('override_skypilot_config')
180
+ kwargs.pop('override_skypilot_config_path')
114
181
  return kwargs
115
182
 
116
183
  @property
@@ -122,6 +189,13 @@ class CheckBody(RequestBody):
122
189
  """The request body for the check endpoint."""
123
190
  clouds: Optional[Tuple[str, ...]] = None
124
191
  verbose: bool = False
192
+ workspace: Optional[str] = None
193
+
194
+
195
+ class EnabledCloudsBody(RequestBody):
196
+ """The request body for the enabled clouds endpoint."""
197
+ workspace: Optional[str] = None
198
+ expand: bool = False
125
199
 
126
200
 
127
201
  class DagRequestBody(RequestBody):
@@ -144,17 +218,33 @@ class DagRequestBody(RequestBody):
144
218
  return kwargs
145
219
 
146
220
 
147
- class ValidateBody(DagRequestBody):
221
+ class DagRequestBodyWithRequestOptions(DagRequestBody):
222
+ """Request body base class for endpoints with a dag and request options."""
223
+ request_options: Optional[admin_policy.RequestOptions]
224
+
225
+ def get_request_options(self) -> Optional[admin_policy.RequestOptions]:
226
+ """Get the request options."""
227
+ if self.request_options is None:
228
+ return None
229
+ if isinstance(self.request_options, dict):
230
+ return admin_policy.RequestOptions(**self.request_options)
231
+ return self.request_options
232
+
233
+ def to_kwargs(self) -> Dict[str, Any]:
234
+ kwargs = super().to_kwargs()
235
+ kwargs['request_options'] = self.get_request_options()
236
+ return kwargs
237
+
238
+
239
+ class ValidateBody(DagRequestBodyWithRequestOptions):
148
240
  """The request body for the validate endpoint."""
149
241
  dag: str
150
- request_options: Optional[admin_policy.RequestOptions]
151
242
 
152
243
 
153
- class OptimizeBody(DagRequestBody):
244
+ class OptimizeBody(DagRequestBodyWithRequestOptions):
154
245
  """The request body for the optimize endpoint."""
155
246
  dag: str
156
247
  minimize: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
157
- request_options: Optional[admin_policy.RequestOptions]
158
248
 
159
249
 
160
250
  class LaunchBody(RequestBody):
@@ -162,8 +252,10 @@ class LaunchBody(RequestBody):
162
252
  task: str
163
253
  cluster_name: str
164
254
  retry_until_up: bool = False
255
+ # TODO(aylei): remove this field in v0.12.0
165
256
  idle_minutes_to_autostop: Optional[int] = None
166
257
  dryrun: bool = False
258
+ # TODO(aylei): remove this field in v0.12.0
167
259
  down: bool = False
168
260
  backend: Optional[str] = None
169
261
  optimize_target: common_lib.OptimizeTarget = common_lib.OptimizeTarget.COST
@@ -229,12 +321,20 @@ class StatusBody(RequestBody):
229
321
  cluster_names: Optional[List[str]] = None
230
322
  refresh: common_lib.StatusRefreshMode = common_lib.StatusRefreshMode.NONE
231
323
  all_users: bool = True
324
+ # TODO (kyuds): default to False post 0.12.0
325
+ include_credentials: bool = True
326
+ # Only return fields that are needed for the
327
+ # dashboard / CLI summary response
328
+ summary_response: bool = False
329
+ # Include the cluster handle in the response
330
+ include_handle: bool = True
232
331
 
233
332
 
234
333
  class StartBody(RequestBody):
235
334
  """The request body for the start endpoint."""
236
335
  cluster_name: str
237
336
  idle_minutes_to_autostop: Optional[int] = None
337
+ wait_for: Optional[autostop_lib.AutostopWaitFor] = None
238
338
  retry_until_up: bool = False
239
339
  down: bool = False
240
340
  force: bool = False
@@ -244,6 +344,7 @@ class AutostopBody(RequestBody):
244
344
  """The request body for the autostop endpoint."""
245
345
  cluster_name: str
246
346
  idle_minutes: int
347
+ wait_for: Optional[autostop_lib.AutostopWaitFor] = None
247
348
  down: bool = False
248
349
 
249
350
 
@@ -271,9 +372,10 @@ class CancelBody(RequestBody):
271
372
  return kwargs
272
373
 
273
374
 
274
- class ClusterNameBody(RequestBody):
375
+ class ProvisionLogsBody(RequestBody):
275
376
  """Cluster node."""
276
377
  cluster_name: str
378
+ worker: Optional[int] = None
277
379
 
278
380
 
279
381
  class ClusterJobBody(RequestBody):
@@ -297,6 +399,63 @@ class ClusterJobsDownloadLogsBody(RequestBody):
297
399
  local_dir: str = constants.SKY_LOGS_DIRECTORY
298
400
 
299
401
 
402
+ class UserCreateBody(RequestBody):
403
+ """The request body for the user create endpoint."""
404
+ username: str
405
+ password: str
406
+ role: Optional[str] = None
407
+
408
+
409
+ class UserDeleteBody(RequestBody):
410
+ """The request body for the user delete endpoint."""
411
+ user_id: str
412
+
413
+
414
+ class UserUpdateBody(RequestBody):
415
+ """The request body for the user update endpoint."""
416
+ user_id: str
417
+ role: Optional[str] = None
418
+ password: Optional[str] = None
419
+
420
+
421
+ class UserImportBody(RequestBody):
422
+ """The request body for the user import endpoint."""
423
+ csv_content: str
424
+
425
+
426
+ class ServiceAccountTokenCreateBody(RequestBody):
427
+ """The request body for creating a service account token."""
428
+ token_name: str
429
+ expires_in_days: Optional[int] = None
430
+
431
+
432
+ class ServiceAccountTokenDeleteBody(RequestBody):
433
+ """The request body for deleting a service account token."""
434
+ token_id: str
435
+
436
+
437
+ class UpdateRoleBody(RequestBody):
438
+ """The request body for updating a user role."""
439
+ role: str
440
+
441
+
442
+ class ServiceAccountTokenRoleBody(RequestBody):
443
+ """The request body for getting a service account token role."""
444
+ token_id: str
445
+
446
+
447
+ class ServiceAccountTokenUpdateRoleBody(RequestBody):
448
+ """The request body for updating a service account token role."""
449
+ token_id: str
450
+ role: str
451
+
452
+
453
+ class ServiceAccountTokenRotateBody(RequestBody):
454
+ """The request body for rotating a service account token."""
455
+ token_id: str
456
+ expires_in_days: Optional[int] = None
457
+
458
+
300
459
  class DownloadBody(RequestBody):
301
460
  """The request body for the download endpoint."""
302
461
  folder_paths: List[str]
@@ -307,6 +466,40 @@ class StorageBody(RequestBody):
307
466
  name: str
308
467
 
309
468
 
469
+ class VolumeApplyBody(RequestBody):
470
+ """The request body for the volume apply endpoint."""
471
+ name: str
472
+ volume_type: str
473
+ cloud: str
474
+ region: Optional[str] = None
475
+ zone: Optional[str] = None
476
+ size: Optional[str] = None
477
+ config: Optional[Dict[str, Any]] = None
478
+ labels: Optional[Dict[str, str]] = None
479
+ use_existing: Optional[bool] = None
480
+
481
+
482
+ class VolumeDeleteBody(RequestBody):
483
+ """The request body for the volume delete endpoint."""
484
+ names: List[str]
485
+
486
+
487
+ class VolumeListBody(RequestBody):
488
+ """The request body for the volume list endpoint."""
489
+ pass
490
+
491
+
492
+ class VolumeValidateBody(RequestBody):
493
+ """The request body for the volume validate endpoint."""
494
+ name: Optional[str] = None
495
+ volume_type: Optional[str] = None
496
+ infra: Optional[str] = None
497
+ size: Optional[str] = None
498
+ labels: Optional[Dict[str, str]] = None
499
+ config: Optional[Dict[str, Any]] = None
500
+ use_existing: Optional[bool] = None
501
+
502
+
310
503
  class EndpointsBody(RequestBody):
311
504
  """The request body for the endpoint."""
312
505
  cluster: str
@@ -328,6 +521,8 @@ class JobsLaunchBody(RequestBody):
328
521
  """The request body for the jobs launch endpoint."""
329
522
  task: str
330
523
  name: Optional[str]
524
+ pool: Optional[str] = None
525
+ num_jobs: Optional[int] = None
331
526
 
332
527
  def to_kwargs(self) -> Dict[str, Any]:
333
528
  kwargs = super().to_kwargs()
@@ -341,6 +536,25 @@ class JobsQueueBody(RequestBody):
341
536
  refresh: bool = False
342
537
  skip_finished: bool = False
343
538
  all_users: bool = False
539
+ job_ids: Optional[List[int]] = None
540
+
541
+
542
+ class JobsQueueV2Body(RequestBody):
543
+ """The request body for the jobs queue endpoint."""
544
+ refresh: bool = False
545
+ skip_finished: bool = False
546
+ all_users: bool = False
547
+ job_ids: Optional[List[int]] = None
548
+ user_match: Optional[str] = None
549
+ workspace_match: Optional[str] = None
550
+ name_match: Optional[str] = None
551
+ pool_match: Optional[str] = None
552
+ page: Optional[int] = None
553
+ limit: Optional[int] = None
554
+ statuses: Optional[List[str]] = None
555
+ # The fields to return in the response.
556
+ # Refer to the fields in the `class ManagedJobRecord` in `response.py`
557
+ fields: Optional[List[str]] = None
344
558
 
345
559
 
346
560
  class JobsCancelBody(RequestBody):
@@ -349,6 +563,7 @@ class JobsCancelBody(RequestBody):
349
563
  job_ids: Optional[List[int]] = None
350
564
  all: bool = False
351
565
  all_users: bool = False
566
+ pool: Optional[str] = None
352
567
 
353
568
 
354
569
  class JobsLogsBody(RequestBody):
@@ -358,6 +573,7 @@ class JobsLogsBody(RequestBody):
358
573
  follow: bool = True
359
574
  controller: bool = False
360
575
  refresh: bool = False
576
+ tail: Optional[int] = None
361
577
 
362
578
 
363
579
  class RequestCancelBody(RequestBody):
@@ -371,6 +587,8 @@ class RequestStatusBody(pydantic.BaseModel):
371
587
  """The request body for the API request status endpoint."""
372
588
  request_ids: Optional[List[str]] = None
373
589
  all_status: bool = False
590
+ limit: Optional[int] = None
591
+ fields: Optional[List[str]] = None
374
592
 
375
593
 
376
594
  class ServeUpBody(RequestBody):
@@ -421,6 +639,7 @@ class ServeLogsBody(RequestBody):
421
639
  target: Union[str, serve.ServiceComponent]
422
640
  replica_id: Optional[int] = None
423
641
  follow: bool = True
642
+ tail: Optional[int] = None
424
643
 
425
644
 
426
645
  class ServeDownloadLogsBody(RequestBody):
@@ -430,6 +649,7 @@ class ServeDownloadLogsBody(RequestBody):
430
649
  targets: Optional[Union[str, serve.ServiceComponent,
431
650
  List[Union[str, serve.ServiceComponent]]]]
432
651
  replica_ids: Optional[List[int]] = None
652
+ tail: Optional[int] = None
433
653
 
434
654
 
435
655
  class ServeStatusBody(RequestBody):
@@ -439,9 +659,10 @@ class ServeStatusBody(RequestBody):
439
659
 
440
660
  class RealtimeGpuAvailabilityRequestBody(RequestBody):
441
661
  """The request body for the realtime GPU availability endpoint."""
442
- context: Optional[str]
443
- name_filter: Optional[str]
444
- quantity_filter: Optional[int]
662
+ context: Optional[str] = None
663
+ name_filter: Optional[str] = None
664
+ quantity_filter: Optional[int] = None
665
+ is_ssh: Optional[bool] = None
445
666
 
446
667
 
447
668
  class KubernetesNodeInfoRequestBody(RequestBody):
@@ -473,12 +694,19 @@ class ListAcceleratorCountsBody(RequestBody):
473
694
  class LocalUpBody(RequestBody):
474
695
  """The request body for the local up endpoint."""
475
696
  gpus: bool = True
476
- ips: Optional[List[str]] = None
477
- ssh_user: Optional[str] = None
478
- ssh_key: Optional[str] = None
697
+ name: Optional[str] = None
698
+ port_start: Optional[int] = None
699
+
700
+
701
+ class LocalDownBody(RequestBody):
702
+ """The request body for the local down endpoint."""
703
+ name: Optional[str] = None
704
+
705
+
706
+ class SSHUpBody(RequestBody):
707
+ """The request body for the SSH up/down endpoints."""
708
+ infra: Optional[str] = None
479
709
  cleanup: bool = False
480
- context_name: Optional[str] = None
481
- password: Optional[str] = None
482
710
 
483
711
 
484
712
  class ServeTerminateReplicaBody(RequestBody):
@@ -510,7 +738,119 @@ class JobsDownloadLogsBody(RequestBody):
510
738
  local_dir: str = constants.SKY_LOGS_DIRECTORY
511
739
 
512
740
 
741
+ class JobsPoolApplyBody(RequestBody):
742
+ """The request body for the jobs pool apply endpoint."""
743
+ task: Optional[str] = None
744
+ workers: Optional[int] = None
745
+ pool_name: str
746
+ mode: serve.UpdateMode
747
+
748
+ def to_kwargs(self) -> Dict[str, Any]:
749
+ kwargs = super().to_kwargs()
750
+ if self.task is not None:
751
+ dag = common.process_mounts_in_task_on_api_server(
752
+ self.task, self.env_vars, workdir_only=False)
753
+ assert len(
754
+ dag.tasks) == 1, ('Must only specify one task in the DAG for '
755
+ 'a pool.', dag)
756
+ kwargs['task'] = dag.tasks[0]
757
+ else:
758
+ kwargs['task'] = None
759
+ return kwargs
760
+
761
+
762
+ class JobsPoolDownBody(RequestBody):
763
+ """The request body for the jobs pool down endpoint."""
764
+ pool_names: Optional[Union[str, List[str]]]
765
+ all: bool = False
766
+ purge: bool = False
767
+
768
+
769
+ class JobsPoolStatusBody(RequestBody):
770
+ """The request body for the jobs pool status endpoint."""
771
+ pool_names: Optional[Union[str, List[str]]]
772
+
773
+
774
+ class JobsPoolLogsBody(RequestBody):
775
+ """The request body for the jobs pool logs endpoint."""
776
+ pool_name: str
777
+ target: Union[str, serve.ServiceComponent]
778
+ worker_id: Optional[int] = None
779
+ follow: bool = True
780
+ tail: Optional[int] = None
781
+
782
+
783
+ class JobsPoolDownloadLogsBody(RequestBody):
784
+ """The request body for the jobs pool download logs endpoint."""
785
+ pool_name: str
786
+ local_dir: str
787
+ targets: Optional[Union[str, serve.ServiceComponent,
788
+ List[Union[str, serve.ServiceComponent]]]]
789
+ worker_ids: Optional[List[int]] = None
790
+ tail: Optional[int] = None
791
+
792
+
513
793
  class UploadZipFileResponse(pydantic.BaseModel):
514
794
  """The response body for the upload zip file endpoint."""
515
795
  status: str
516
796
  missing_chunks: Optional[List[str]] = None
797
+
798
+
799
+ class UpdateWorkspaceBody(RequestBody):
800
+ """The request body for updating a specific workspace configuration."""
801
+ workspace_name: str = '' # Will be set from path parameter
802
+ config: Dict[str, Any]
803
+
804
+
805
+ class CreateWorkspaceBody(RequestBody):
806
+ """The request body for creating a new workspace."""
807
+ workspace_name: str = '' # Will be set from path parameter
808
+ config: Dict[str, Any]
809
+
810
+
811
+ class DeleteWorkspaceBody(RequestBody):
812
+ """The request body for deleting a workspace."""
813
+ workspace_name: str
814
+
815
+
816
+ class UpdateConfigBody(RequestBody):
817
+ """The request body for updating the entire SkyPilot configuration."""
818
+ config: Dict[str, Any]
819
+
820
+
821
+ class GetConfigBody(RequestBody):
822
+ """The request body for getting the entire SkyPilot configuration."""
823
+ pass
824
+
825
+
826
+ class CostReportBody(RequestBody):
827
+ """The request body for the cost report endpoint."""
828
+ days: Optional[int] = 30
829
+ # we use hashes instead of names to avoid the case where
830
+ # the name is not unique
831
+ cluster_hashes: Optional[List[str]] = None
832
+ # Only return fields that are needed for the dashboard
833
+ # summary page
834
+ dashboard_summary_response: bool = False
835
+
836
+
837
+ class RequestPayload(BasePayload):
838
+ """The payload for the requests."""
839
+
840
+ request_id: str
841
+ name: str
842
+ entrypoint: str
843
+ request_body: str
844
+ status: str
845
+ created_at: float
846
+ user_id: str
847
+ return_value: str
848
+ error: str
849
+ pid: Optional[int]
850
+ schedule_type: str
851
+ user_name: Optional[str] = None
852
+ # Resources the request operates on.
853
+ cluster_name: Optional[str] = None
854
+ status_msg: Optional[str] = None
855
+ should_retry: bool = False
856
+ finished_at: Optional[float] = None
@@ -90,7 +90,7 @@ class Precondition(abc.ABC):
90
90
  while True:
91
91
  if self.timeout > 0 and time.time() - start_time > self.timeout:
92
92
  # Cancel the request on timeout.
93
- api_requests.set_request_failed(
93
+ await api_requests.set_request_failed_async(
94
94
  self.request_id,
95
95
  exceptions.RequestCancelled(
96
96
  f'Request {self.request_id} precondition wait timed '
@@ -98,13 +98,15 @@ class Precondition(abc.ABC):
98
98
  return False
99
99
 
100
100
  # Check if the request has been cancelled
101
- request = api_requests.get_request(self.request_id)
101
+ request = await api_requests.get_request_async(self.request_id,
102
+ fields=['status'])
102
103
  if request is None:
103
104
  logger.error(f'Request {self.request_id} not found')
104
105
  return False
105
106
  if request.status == api_requests.RequestStatus.CANCELLED:
106
107
  logger.debug(f'Request {self.request_id} cancelled')
107
108
  return False
109
+ del request
108
110
 
109
111
  try:
110
112
  met, status_msg = await self.check()
@@ -112,12 +114,11 @@ class Precondition(abc.ABC):
112
114
  return True
113
115
  if status_msg is not None and status_msg != last_status_msg:
114
116
  # Update the status message if it has changed.
115
- with api_requests.update_request(self.request_id) as req:
116
- assert req is not None, self.request_id
117
- req.status_msg = status_msg
117
+ await api_requests.update_status_msg_async(
118
+ self.request_id, status_msg)
118
119
  last_status_msg = status_msg
119
120
  except (Exception, SystemExit, KeyboardInterrupt) as e: # pylint: disable=broad-except
120
- api_requests.set_request_failed(self.request_id, e)
121
+ await api_requests.set_request_failed_async(self.request_id, e)
121
122
  logger.info(f'Request {self.request_id} failed due to '
122
123
  f'{common_utils.format_exception(e)}')
123
124
  return False
@@ -145,10 +146,9 @@ class ClusterStartCompletePrecondition(Precondition):
145
146
  self.cluster_name = cluster_name
146
147
 
147
148
  async def check(self) -> Tuple[bool, Optional[str]]:
148
- cluster_record = global_user_state.get_cluster_from_name(
149
+ cluster_status = global_user_state.get_status_from_cluster_name(
149
150
  self.cluster_name)
150
- if (cluster_record and
151
- cluster_record['status'] is status_lib.ClusterStatus.UP):
151
+ if cluster_status is status_lib.ClusterStatus.UP:
152
152
  # Shortcut for started clusters, ignore cluster not found
153
153
  # since the cluster record might not yet be created by the
154
154
  # launch task.
@@ -161,14 +161,18 @@ class ClusterStartCompletePrecondition(Precondition):
161
161
  # We unify these situations into a single state: the process of starting
162
162
  # the cluster is done (either normally or abnormally) but cluster is not
163
163
  # in UP status.
164
- requests = api_requests.get_request_tasks(
165
- status=[
166
- api_requests.RequestStatus.RUNNING,
167
- api_requests.RequestStatus.PENDING
168
- ],
169
- include_request_names=['sky.launch', 'sky.start'],
170
- cluster_names=[self.cluster_name])
164
+ requests = await api_requests.get_request_tasks_async(
165
+ req_filter=api_requests.RequestTaskFilter(
166
+ status=[
167
+ api_requests.RequestStatus.PENDING,
168
+ api_requests.RequestStatus.RUNNING
169
+ ],
170
+ include_request_names=['sky.launch', 'sky.start'],
171
+ cluster_names=[self.cluster_name],
172
+ # Only get the request ID to avoid fetching the whole request.
173
+ # We're only interested in the count, not the whole request.
174
+ fields=['request_id']))
171
175
  if len(requests) == 0:
172
- # No runnning or pending tasks, the start process is done.
176
+ # No running or pending tasks, the start process is done.
173
177
  return True, None
174
178
  return False, f'Waiting for cluster {self.cluster_name} to be UP.'