skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. sky/__init__.py +10 -2
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +20 -0
  14. sky/authentication.py +157 -263
  15. sky/backends/__init__.py +3 -2
  16. sky/backends/backend.py +11 -3
  17. sky/backends/backend_utils.py +588 -184
  18. sky/backends/cloud_vm_ray_backend.py +1088 -904
  19. sky/backends/local_docker_backend.py +9 -5
  20. sky/backends/task_codegen.py +633 -0
  21. sky/backends/wheel_utils.py +18 -0
  22. sky/catalog/__init__.py +8 -0
  23. sky/catalog/aws_catalog.py +4 -0
  24. sky/catalog/common.py +19 -1
  25. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  26. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  27. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  28. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  29. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  30. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  31. sky/catalog/kubernetes_catalog.py +24 -28
  32. sky/catalog/primeintellect_catalog.py +95 -0
  33. sky/catalog/runpod_catalog.py +5 -1
  34. sky/catalog/seeweb_catalog.py +184 -0
  35. sky/catalog/shadeform_catalog.py +165 -0
  36. sky/check.py +73 -43
  37. sky/client/cli/command.py +675 -412
  38. sky/client/cli/flags.py +4 -2
  39. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  40. sky/client/cli/utils.py +79 -0
  41. sky/client/common.py +12 -2
  42. sky/client/sdk.py +132 -63
  43. sky/client/sdk_async.py +34 -33
  44. sky/cloud_stores.py +82 -3
  45. sky/clouds/__init__.py +6 -0
  46. sky/clouds/aws.py +337 -129
  47. sky/clouds/azure.py +24 -18
  48. sky/clouds/cloud.py +40 -13
  49. sky/clouds/cudo.py +16 -13
  50. sky/clouds/do.py +9 -7
  51. sky/clouds/fluidstack.py +12 -5
  52. sky/clouds/gcp.py +14 -7
  53. sky/clouds/hyperbolic.py +12 -5
  54. sky/clouds/ibm.py +12 -5
  55. sky/clouds/kubernetes.py +80 -45
  56. sky/clouds/lambda_cloud.py +12 -5
  57. sky/clouds/nebius.py +23 -9
  58. sky/clouds/oci.py +19 -12
  59. sky/clouds/paperspace.py +4 -1
  60. sky/clouds/primeintellect.py +317 -0
  61. sky/clouds/runpod.py +85 -24
  62. sky/clouds/scp.py +12 -8
  63. sky/clouds/seeweb.py +477 -0
  64. sky/clouds/shadeform.py +400 -0
  65. sky/clouds/ssh.py +4 -2
  66. sky/clouds/utils/scp_utils.py +61 -50
  67. sky/clouds/vast.py +33 -27
  68. sky/clouds/vsphere.py +14 -16
  69. sky/core.py +174 -165
  70. sky/dashboard/out/404.html +1 -1
  71. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  73. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  74. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  76. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  77. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  79. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
  80. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  82. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  83. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  86. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  87. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  88. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  90. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  92. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  93. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  94. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  95. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  96. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  97. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
  98. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  100. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  101. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  102. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
  105. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
  106. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  107. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  108. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  109. sky/dashboard/out/clusters/[cluster].html +1 -1
  110. sky/dashboard/out/clusters.html +1 -1
  111. sky/dashboard/out/config.html +1 -1
  112. sky/dashboard/out/index.html +1 -1
  113. sky/dashboard/out/infra/[context].html +1 -1
  114. sky/dashboard/out/infra.html +1 -1
  115. sky/dashboard/out/jobs/[job].html +1 -1
  116. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  117. sky/dashboard/out/jobs.html +1 -1
  118. sky/dashboard/out/users.html +1 -1
  119. sky/dashboard/out/volumes.html +1 -1
  120. sky/dashboard/out/workspace/new.html +1 -1
  121. sky/dashboard/out/workspaces/[name].html +1 -1
  122. sky/dashboard/out/workspaces.html +1 -1
  123. sky/data/data_utils.py +92 -1
  124. sky/data/mounting_utils.py +162 -29
  125. sky/data/storage.py +200 -19
  126. sky/data/storage_utils.py +10 -45
  127. sky/exceptions.py +18 -7
  128. sky/execution.py +74 -31
  129. sky/global_user_state.py +605 -191
  130. sky/jobs/__init__.py +2 -0
  131. sky/jobs/client/sdk.py +101 -4
  132. sky/jobs/client/sdk_async.py +31 -5
  133. sky/jobs/constants.py +15 -8
  134. sky/jobs/controller.py +726 -284
  135. sky/jobs/file_content_utils.py +128 -0
  136. sky/jobs/log_gc.py +193 -0
  137. sky/jobs/recovery_strategy.py +250 -100
  138. sky/jobs/scheduler.py +271 -173
  139. sky/jobs/server/core.py +367 -114
  140. sky/jobs/server/server.py +81 -35
  141. sky/jobs/server/utils.py +89 -35
  142. sky/jobs/state.py +1498 -620
  143. sky/jobs/utils.py +771 -306
  144. sky/logs/agent.py +40 -5
  145. sky/logs/aws.py +9 -19
  146. sky/metrics/utils.py +282 -39
  147. sky/optimizer.py +1 -1
  148. sky/provision/__init__.py +37 -1
  149. sky/provision/aws/config.py +34 -13
  150. sky/provision/aws/instance.py +5 -2
  151. sky/provision/azure/instance.py +5 -3
  152. sky/provision/common.py +2 -0
  153. sky/provision/cudo/instance.py +4 -3
  154. sky/provision/do/instance.py +4 -3
  155. sky/provision/docker_utils.py +97 -26
  156. sky/provision/fluidstack/instance.py +6 -5
  157. sky/provision/gcp/config.py +6 -1
  158. sky/provision/gcp/instance.py +4 -2
  159. sky/provision/hyperbolic/instance.py +4 -2
  160. sky/provision/instance_setup.py +66 -20
  161. sky/provision/kubernetes/__init__.py +2 -0
  162. sky/provision/kubernetes/config.py +7 -44
  163. sky/provision/kubernetes/constants.py +0 -1
  164. sky/provision/kubernetes/instance.py +609 -213
  165. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  166. sky/provision/kubernetes/network.py +12 -8
  167. sky/provision/kubernetes/network_utils.py +8 -25
  168. sky/provision/kubernetes/utils.py +382 -418
  169. sky/provision/kubernetes/volume.py +150 -18
  170. sky/provision/lambda_cloud/instance.py +16 -13
  171. sky/provision/nebius/instance.py +6 -2
  172. sky/provision/nebius/utils.py +103 -86
  173. sky/provision/oci/instance.py +4 -2
  174. sky/provision/paperspace/instance.py +4 -3
  175. sky/provision/primeintellect/__init__.py +10 -0
  176. sky/provision/primeintellect/config.py +11 -0
  177. sky/provision/primeintellect/instance.py +454 -0
  178. sky/provision/primeintellect/utils.py +398 -0
  179. sky/provision/provisioner.py +30 -9
  180. sky/provision/runpod/__init__.py +2 -0
  181. sky/provision/runpod/instance.py +4 -3
  182. sky/provision/runpod/volume.py +69 -13
  183. sky/provision/scp/instance.py +307 -130
  184. sky/provision/seeweb/__init__.py +11 -0
  185. sky/provision/seeweb/config.py +13 -0
  186. sky/provision/seeweb/instance.py +812 -0
  187. sky/provision/shadeform/__init__.py +11 -0
  188. sky/provision/shadeform/config.py +12 -0
  189. sky/provision/shadeform/instance.py +351 -0
  190. sky/provision/shadeform/shadeform_utils.py +83 -0
  191. sky/provision/vast/instance.py +5 -3
  192. sky/provision/volume.py +164 -0
  193. sky/provision/vsphere/common/ssl_helper.py +1 -1
  194. sky/provision/vsphere/common/vapiconnect.py +2 -1
  195. sky/provision/vsphere/common/vim_utils.py +3 -2
  196. sky/provision/vsphere/instance.py +8 -6
  197. sky/provision/vsphere/vsphere_utils.py +8 -1
  198. sky/resources.py +11 -3
  199. sky/schemas/api/responses.py +107 -6
  200. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  201. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  202. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  203. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  204. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  205. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  206. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  207. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  208. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  209. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  210. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  211. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  212. sky/schemas/generated/jobsv1_pb2.py +86 -0
  213. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  214. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  215. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  216. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  217. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  218. sky/schemas/generated/servev1_pb2.py +58 -0
  219. sky/schemas/generated/servev1_pb2.pyi +115 -0
  220. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  221. sky/serve/autoscalers.py +2 -0
  222. sky/serve/client/impl.py +55 -21
  223. sky/serve/constants.py +4 -3
  224. sky/serve/controller.py +17 -11
  225. sky/serve/load_balancing_policies.py +1 -1
  226. sky/serve/replica_managers.py +219 -142
  227. sky/serve/serve_rpc_utils.py +179 -0
  228. sky/serve/serve_state.py +63 -54
  229. sky/serve/serve_utils.py +145 -109
  230. sky/serve/server/core.py +46 -25
  231. sky/serve/server/impl.py +311 -162
  232. sky/serve/server/server.py +21 -19
  233. sky/serve/service.py +84 -68
  234. sky/serve/service_spec.py +45 -7
  235. sky/server/auth/loopback.py +38 -0
  236. sky/server/auth/oauth2_proxy.py +12 -7
  237. sky/server/common.py +47 -24
  238. sky/server/config.py +62 -28
  239. sky/server/constants.py +9 -1
  240. sky/server/daemons.py +109 -38
  241. sky/server/metrics.py +76 -96
  242. sky/server/middleware_utils.py +166 -0
  243. sky/server/requests/executor.py +381 -145
  244. sky/server/requests/payloads.py +71 -18
  245. sky/server/requests/preconditions.py +15 -13
  246. sky/server/requests/request_names.py +121 -0
  247. sky/server/requests/requests.py +507 -157
  248. sky/server/requests/serializers/decoders.py +48 -17
  249. sky/server/requests/serializers/encoders.py +85 -20
  250. sky/server/requests/threads.py +117 -0
  251. sky/server/rest.py +116 -24
  252. sky/server/server.py +420 -172
  253. sky/server/stream_utils.py +219 -45
  254. sky/server/uvicorn.py +30 -19
  255. sky/setup_files/MANIFEST.in +6 -1
  256. sky/setup_files/alembic.ini +8 -0
  257. sky/setup_files/dependencies.py +62 -19
  258. sky/setup_files/setup.py +44 -44
  259. sky/sky_logging.py +13 -5
  260. sky/skylet/attempt_skylet.py +106 -24
  261. sky/skylet/configs.py +3 -1
  262. sky/skylet/constants.py +111 -26
  263. sky/skylet/events.py +64 -10
  264. sky/skylet/job_lib.py +141 -104
  265. sky/skylet/log_lib.py +233 -5
  266. sky/skylet/log_lib.pyi +40 -2
  267. sky/skylet/providers/ibm/node_provider.py +12 -8
  268. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  269. sky/skylet/runtime_utils.py +21 -0
  270. sky/skylet/services.py +524 -0
  271. sky/skylet/skylet.py +22 -1
  272. sky/skylet/subprocess_daemon.py +104 -29
  273. sky/skypilot_config.py +99 -79
  274. sky/ssh_node_pools/server.py +9 -8
  275. sky/task.py +221 -104
  276. sky/templates/aws-ray.yml.j2 +1 -0
  277. sky/templates/azure-ray.yml.j2 +1 -0
  278. sky/templates/cudo-ray.yml.j2 +1 -0
  279. sky/templates/do-ray.yml.j2 +1 -0
  280. sky/templates/fluidstack-ray.yml.j2 +1 -0
  281. sky/templates/gcp-ray.yml.j2 +1 -0
  282. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  283. sky/templates/ibm-ray.yml.j2 +2 -1
  284. sky/templates/jobs-controller.yaml.j2 +3 -0
  285. sky/templates/kubernetes-ray.yml.j2 +196 -55
  286. sky/templates/lambda-ray.yml.j2 +1 -0
  287. sky/templates/nebius-ray.yml.j2 +3 -0
  288. sky/templates/oci-ray.yml.j2 +1 -0
  289. sky/templates/paperspace-ray.yml.j2 +1 -0
  290. sky/templates/primeintellect-ray.yml.j2 +72 -0
  291. sky/templates/runpod-ray.yml.j2 +1 -0
  292. sky/templates/scp-ray.yml.j2 +1 -0
  293. sky/templates/seeweb-ray.yml.j2 +171 -0
  294. sky/templates/shadeform-ray.yml.j2 +73 -0
  295. sky/templates/vast-ray.yml.j2 +1 -0
  296. sky/templates/vsphere-ray.yml.j2 +1 -0
  297. sky/templates/websocket_proxy.py +188 -43
  298. sky/usage/usage_lib.py +16 -4
  299. sky/users/permission.py +60 -43
  300. sky/utils/accelerator_registry.py +6 -3
  301. sky/utils/admin_policy_utils.py +18 -5
  302. sky/utils/annotations.py +22 -0
  303. sky/utils/asyncio_utils.py +78 -0
  304. sky/utils/atomic.py +1 -1
  305. sky/utils/auth_utils.py +153 -0
  306. sky/utils/cli_utils/status_utils.py +12 -7
  307. sky/utils/cluster_utils.py +28 -6
  308. sky/utils/command_runner.py +88 -27
  309. sky/utils/command_runner.pyi +36 -3
  310. sky/utils/common.py +3 -1
  311. sky/utils/common_utils.py +37 -4
  312. sky/utils/config_utils.py +1 -14
  313. sky/utils/context.py +127 -40
  314. sky/utils/context_utils.py +73 -18
  315. sky/utils/controller_utils.py +229 -70
  316. sky/utils/db/db_utils.py +95 -18
  317. sky/utils/db/kv_cache.py +149 -0
  318. sky/utils/db/migration_utils.py +24 -7
  319. sky/utils/env_options.py +4 -0
  320. sky/utils/git.py +559 -1
  321. sky/utils/kubernetes/create_cluster.sh +15 -30
  322. sky/utils/kubernetes/delete_cluster.sh +10 -7
  323. sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
  324. sky/utils/kubernetes/generate_kind_config.py +6 -66
  325. sky/utils/kubernetes/gpu_labeler.py +13 -3
  326. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  327. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  328. sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
  329. sky/utils/kubernetes/rsync_helper.sh +11 -3
  330. sky/utils/kubernetes_enums.py +7 -15
  331. sky/utils/lock_events.py +4 -4
  332. sky/utils/locks.py +128 -31
  333. sky/utils/log_utils.py +0 -319
  334. sky/utils/resource_checker.py +13 -10
  335. sky/utils/resources_utils.py +53 -29
  336. sky/utils/rich_utils.py +8 -4
  337. sky/utils/schemas.py +107 -52
  338. sky/utils/subprocess_utils.py +17 -4
  339. sky/utils/thread_utils.py +91 -0
  340. sky/utils/timeline.py +2 -1
  341. sky/utils/ux_utils.py +35 -1
  342. sky/utils/volume.py +88 -4
  343. sky/utils/yaml_utils.py +9 -0
  344. sky/volumes/client/sdk.py +48 -10
  345. sky/volumes/server/core.py +59 -22
  346. sky/volumes/server/server.py +46 -17
  347. sky/volumes/volume.py +54 -42
  348. sky/workspaces/core.py +57 -21
  349. sky/workspaces/server.py +13 -12
  350. sky_templates/README.md +3 -0
  351. sky_templates/__init__.py +3 -0
  352. sky_templates/ray/__init__.py +0 -0
  353. sky_templates/ray/start_cluster +183 -0
  354. sky_templates/ray/stop_cluster +75 -0
  355. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
  356. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  357. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  358. sky/client/cli/git.py +0 -549
  359. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  360. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  361. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  362. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  363. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  364. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  365. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  366. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  367. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  368. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  369. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  370. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  371. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  372. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  373. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  374. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  375. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  376. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  377. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  378. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  379. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  380. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  381. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  382. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  383. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  384. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  385. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  386. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  387. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  388. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  389. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  390. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  391. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  392. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  393. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  394. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  395. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
  396. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  397. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/client/sdk.py CHANGED
@@ -31,11 +31,14 @@ from sky import skypilot_config
31
31
  from sky.adaptors import common as adaptors_common
32
32
  from sky.client import common as client_common
33
33
  from sky.client import oauth as oauth_lib
34
+ from sky.jobs import scheduler
35
+ from sky.jobs import utils as managed_job_utils
34
36
  from sky.schemas.api import responses
35
37
  from sky.server import common as server_common
36
38
  from sky.server import rest
37
39
  from sky.server import versions
38
40
  from sky.server.requests import payloads
41
+ from sky.server.requests import request_names
39
42
  from sky.server.requests import requests as requests_lib
40
43
  from sky.skylet import autostop_lib
41
44
  from sky.skylet import constants
@@ -97,6 +100,9 @@ def reload_config() -> None:
97
100
  skypilot_config.safe_reload_config()
98
101
 
99
102
 
103
+ # The overloads are not comprehensive - e.g. get_result Literal[False] could be
104
+ # specified to return None. We can add more overloads if needed. To do that see
105
+ # https://github.com/python/mypy/issues/8634#issuecomment-609411104
100
106
  @typing.overload
101
107
  def stream_response(request_id: None,
102
108
  response: 'requests.Response',
@@ -111,7 +117,16 @@ def stream_response(request_id: server_common.RequestId[T],
111
117
  response: 'requests.Response',
112
118
  output_stream: Optional['io.TextIOBase'] = None,
113
119
  resumable: bool = False,
114
- get_result: bool = True) -> T:
120
+ get_result: Literal[True] = True) -> T:
121
+ ...
122
+
123
+
124
+ @typing.overload
125
+ def stream_response(request_id: server_common.RequestId[T],
126
+ response: 'requests.Response',
127
+ output_stream: Optional['io.TextIOBase'] = None,
128
+ resumable: bool = False,
129
+ get_result: bool = True) -> Optional[T]:
115
130
  ...
116
131
 
117
132
 
@@ -367,6 +382,16 @@ def workspaces() -> server_common.RequestId[Dict[str, Any]]:
367
382
  return server_common.get_request_id(response)
368
383
 
369
384
 
385
+ def _raise_exception_object_on_client(e: BaseException) -> None:
386
+ """Raise the exception object on the client."""
387
+ if env_options.Options.SHOW_DEBUG_INFO.get():
388
+ stacktrace = getattr(e, 'stacktrace', str(e))
389
+ logger.error('=== Traceback on SkyPilot API Server ===\n'
390
+ f'{stacktrace}')
391
+ with ux_utils.print_exception_no_traceback():
392
+ raise e
393
+
394
+
370
395
  @usage_lib.entrypoint
371
396
  @server_common.check_server_healthy_or_start
372
397
  @annotations.client_api
@@ -407,9 +432,8 @@ def validate(
407
432
  response = server_common.make_authenticated_request(
408
433
  'POST', '/validate', json=json.loads(body.model_dump_json()))
409
434
  if response.status_code == 400:
410
- with ux_utils.print_exception_no_traceback():
411
- raise exceptions.deserialize_exception(
412
- response.json().get('detail'))
435
+ _raise_exception_object_on_client(
436
+ exceptions.deserialize_exception(response.json().get('detail')))
413
437
 
414
438
 
415
439
  @usage_lib.entrypoint
@@ -590,7 +614,10 @@ def launch(
590
614
  down=down,
591
615
  dryrun=dryrun)
592
616
  with admin_policy_utils.apply_and_use_config_in_current_request(
593
- dag, request_options=request_options, at_client_side=True) as dag:
617
+ dag,
618
+ request_name=request_names.AdminPolicyRequestName.CLUSTER_LAUNCH,
619
+ request_options=request_options,
620
+ at_client_side=True) as dag:
594
621
  return _launch(
595
622
  dag,
596
623
  cluster_name,
@@ -912,6 +939,7 @@ def tail_logs(
912
939
  @annotations.client_api
913
940
  @rest.retry_transient_errors()
914
941
  def tail_provision_logs(cluster_name: str,
942
+ worker: Optional[int] = None,
915
943
  follow: bool = True,
916
944
  tail: int = 0,
917
945
  output_stream: Optional['io.TextIOBase'] = None) -> int:
@@ -919,17 +947,31 @@ def tail_provision_logs(cluster_name: str,
919
947
 
920
948
  Args:
921
949
  cluster_name: name of the cluster.
950
+ worker: worker id in multi-node cluster.
951
+ If None, stream the logs of the head node.
922
952
  follow: follow the logs.
923
953
  tail: lines from end to tail.
924
954
  output_stream: optional stream to write logs.
925
955
  Returns:
926
956
  Exit code 0 on streaming success; raises on HTTP error.
927
957
  """
928
- body = payloads.ClusterNameBody(cluster_name=cluster_name)
958
+ body = payloads.ProvisionLogsBody(cluster_name=cluster_name)
959
+
960
+ if worker is not None:
961
+ remote_api_version = versions.get_remote_api_version()
962
+ if remote_api_version is not None and remote_api_version >= 21:
963
+ if worker < 1:
964
+ raise ValueError('Worker must be a positive integer.')
965
+ body.worker = worker
966
+ else:
967
+ raise exceptions.APINotSupportedError(
968
+ 'Worker node provision logs are not supported in your API '
969
+ 'server. Please upgrade to a newer API server to use it.')
929
970
  params = {
930
971
  'follow': str(follow).lower(),
931
972
  'tail': tail,
932
973
  }
974
+
933
975
  response = server_common.make_authenticated_request(
934
976
  'POST',
935
977
  '/provision_logs',
@@ -938,13 +980,21 @@ def tail_provision_logs(cluster_name: str,
938
980
  stream=True,
939
981
  timeout=(client_common.API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS,
940
982
  None))
983
+ # Check for HTTP errors before streaming the response
984
+ if response.status_code != 200:
985
+ with ux_utils.print_exception_no_traceback():
986
+ raise exceptions.CommandError(response.status_code,
987
+ 'tail_provision_logs',
988
+ 'Failed to stream provision logs',
989
+ response.text)
990
+
941
991
  # Log request is idempotent when tail is 0, thus can resume previous
942
992
  # streaming point on retry.
943
993
  # request_id=None here because /provision_logs does not create an async
944
994
  # request. Instead, it streams a plain file from the server. This does NOT
945
995
  # violate the stream_response doc warning about None in multi-user
946
- # environments: we are not asking stream_response to select the latest
947
- # request”. We already have the HTTP response to stream; request_id=None
996
+ # environments: we are not asking stream_response to select "the latest
997
+ # request". We already have the HTTP response to stream; request_id=None
948
998
  # merely disables the follow-up GET. It is also necessary for --no-follow
949
999
  # to return cleanly after printing the tailed lines. If we provided a
950
1000
  # non-None request_id here, the get(request_id) in stream_response(
@@ -1266,9 +1316,11 @@ def autostop(
1266
1316
  @usage_lib.entrypoint
1267
1317
  @server_common.check_server_healthy_or_start
1268
1318
  @annotations.client_api
1269
- def queue(cluster_name: str,
1270
- skip_finished: bool = False,
1271
- all_users: bool = False) -> server_common.RequestId[List[dict]]:
1319
+ def queue(
1320
+ cluster_name: str,
1321
+ skip_finished: bool = False,
1322
+ all_users: bool = False
1323
+ ) -> server_common.RequestId[List[responses.ClusterJobRecord]]:
1272
1324
  """Gets the job queue of a cluster.
1273
1325
 
1274
1326
  Args:
@@ -1281,8 +1333,8 @@ def queue(cluster_name: str,
1281
1333
  The request ID of the queue request.
1282
1334
 
1283
1335
  Request Returns:
1284
- job_records (List[Dict[str, Any]]): A list of dicts for each job in the
1285
- queue.
1336
+ job_records (List[responses.ClusterJobRecord]): A list of job records
1337
+ for each job in the queue.
1286
1338
 
1287
1339
  .. code-block:: python
1288
1340
 
@@ -1428,6 +1480,7 @@ def status(
1428
1480
  all_users: bool = False,
1429
1481
  *,
1430
1482
  _include_credentials: bool = False,
1483
+ _summary_response: bool = False,
1431
1484
  ) -> server_common.RequestId[List[responses.StatusResponse]]:
1432
1485
  """Gets cluster statuses.
1433
1486
 
@@ -1513,6 +1566,7 @@ def status(
1513
1566
  refresh=refresh,
1514
1567
  all_users=all_users,
1515
1568
  include_credentials=_include_credentials,
1569
+ summary_response=_summary_response,
1516
1570
  )
1517
1571
  response = server_common.make_authenticated_request(
1518
1572
  'POST', '/status', json=json.loads(body.model_dump_json()))
@@ -1613,26 +1667,15 @@ def cost_report(
1613
1667
  @usage_lib.entrypoint
1614
1668
  @server_common.check_server_healthy_or_start
1615
1669
  @annotations.client_api
1616
- def storage_ls() -> server_common.RequestId[List[Dict[str, Any]]]:
1670
+ def storage_ls() -> server_common.RequestId[List[responses.StorageRecord]]:
1617
1671
  """Gets the storages.
1618
1672
 
1619
1673
  Returns:
1620
1674
  The request ID of the storage list request.
1621
1675
 
1622
1676
  Request Returns:
1623
- storage_records (List[Dict[str, Any]]): A list of dicts, with each dict
1624
- containing the information of a storage.
1625
-
1626
- .. code-block:: python
1627
-
1628
- {
1629
- 'name': (str) storage name,
1630
- 'launched_at': (int) timestamp of creation,
1631
- 'store': (List[sky.StoreType]) storage type,
1632
- 'last_use': (int) timestamp of last use,
1633
- 'status': (sky.StorageStatus) storage status,
1634
- }
1635
- ]
1677
+ storage_records (List[responses.StorageRecord]):
1678
+ A list of storage records.
1636
1679
  """
1637
1680
  response = server_common.make_authenticated_request('GET', '/storage/ls')
1638
1681
  return server_common.get_request_id(response)
@@ -1669,12 +1712,8 @@ def storage_delete(name: str) -> server_common.RequestId[None]:
1669
1712
  @server_common.check_server_healthy_or_start
1670
1713
  @annotations.client_api
1671
1714
  def local_up(gpus: bool,
1672
- ips: Optional[List[str]],
1673
- ssh_user: Optional[str],
1674
- ssh_key: Optional[str],
1675
- cleanup: bool,
1676
- context_name: Optional[str] = None,
1677
- password: Optional[str] = None) -> server_common.RequestId[None]:
1715
+ name: Optional[str] = None,
1716
+ port_start: Optional[int] = None) -> server_common.RequestId[None]:
1678
1717
  """Launches a Kubernetes cluster on local machines.
1679
1718
 
1680
1719
  Returns:
@@ -1685,16 +1724,10 @@ def local_up(gpus: bool,
1685
1724
  # TODO: move this check to server.
1686
1725
  if not server_common.is_api_server_local():
1687
1726
  with ux_utils.print_exception_no_traceback():
1688
- raise ValueError(
1689
- 'sky local up is only supported when running SkyPilot locally.')
1690
-
1691
- body = payloads.LocalUpBody(gpus=gpus,
1692
- ips=ips,
1693
- ssh_user=ssh_user,
1694
- ssh_key=ssh_key,
1695
- cleanup=cleanup,
1696
- context_name=context_name,
1697
- password=password)
1727
+ raise ValueError('`sky local up` is only supported when '
1728
+ 'running SkyPilot locally.')
1729
+
1730
+ body = payloads.LocalUpBody(gpus=gpus, name=name, port_start=port_start)
1698
1731
  response = server_common.make_authenticated_request(
1699
1732
  'POST', '/local_up', json=json.loads(body.model_dump_json()))
1700
1733
  return server_common.get_request_id(response)
@@ -1703,16 +1736,19 @@ def local_up(gpus: bool,
1703
1736
  @usage_lib.entrypoint
1704
1737
  @server_common.check_server_healthy_or_start
1705
1738
  @annotations.client_api
1706
- def local_down() -> server_common.RequestId[None]:
1739
+ def local_down(name: Optional[str]) -> server_common.RequestId[None]:
1707
1740
  """Tears down the Kubernetes cluster started by local_up."""
1708
1741
  # We do not allow local up when the API server is running remotely since it
1709
1742
  # will modify the kubeconfig.
1710
1743
  # TODO: move this check to remote server.
1711
1744
  if not server_common.is_api_server_local():
1712
1745
  with ux_utils.print_exception_no_traceback():
1713
- raise ValueError('sky local down is only supported when running '
1746
+ raise ValueError('`sky local down` is only supported when running '
1714
1747
  'SkyPilot locally.')
1715
- response = server_common.make_authenticated_request('POST', '/local_down')
1748
+
1749
+ body = payloads.LocalDownBody(name=name)
1750
+ response = server_common.make_authenticated_request(
1751
+ 'POST', '/local_down', json=json.loads(body.model_dump_json()))
1716
1752
  return server_common.get_request_id(response)
1717
1753
 
1718
1754
 
@@ -1900,11 +1936,12 @@ def kubernetes_node_info(
1900
1936
  @usage_lib.entrypoint
1901
1937
  @server_common.check_server_healthy_or_start
1902
1938
  @annotations.client_api
1903
- def status_kubernetes() -> server_common.RequestId[Tuple[
1904
- List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
1905
- List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'], List[Dict[
1906
- str, Any]], Optional[str]]]:
1907
- """Gets all SkyPilot clusters and jobs in the Kubernetes cluster.
1939
+ def status_kubernetes() -> server_common.RequestId[
1940
+ Tuple[List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
1941
+ List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
1942
+ List[responses.ManagedJobRecord], Optional[str]]]:
1943
+ """[Experimental] Gets all SkyPilot clusters and jobs
1944
+ in the Kubernetes cluster.
1908
1945
 
1909
1946
  Managed jobs and services are also included in the clusters returned.
1910
1947
  The caller must parse the controllers to identify which clusters are run
@@ -1976,12 +2013,7 @@ def get(request_id: server_common.RequestId[T]) -> T:
1976
2013
  error = request_task.get_error()
1977
2014
  if error is not None:
1978
2015
  error_obj = error['object']
1979
- if env_options.Options.SHOW_DEBUG_INFO.get():
1980
- stacktrace = getattr(error_obj, 'stacktrace', str(error_obj))
1981
- logger.error('=== Traceback on SkyPilot API Server ===\n'
1982
- f'{stacktrace}')
1983
- with ux_utils.print_exception_no_traceback():
1984
- raise error_obj
2016
+ _raise_exception_object_on_client(error_obj)
1985
2017
  if request_task.status == requests_lib.RequestStatus.CANCELLED:
1986
2018
  with ux_utils.print_exception_no_traceback():
1987
2019
  raise exceptions.RequestCancelled(
@@ -2067,6 +2099,12 @@ def stream_and_get(
2067
2099
  detail = response.json().get('detail')
2068
2100
  with ux_utils.print_exception_no_traceback():
2069
2101
  raise exceptions.ClientError(f'Failed to stream logs: {detail}')
2102
+ stream_request_id: Optional[server_common.RequestId[
2103
+ T]] = server_common.get_stream_request_id(response)
2104
+ if request_id is not None and stream_request_id is not None:
2105
+ assert request_id == stream_request_id
2106
+ if request_id is None:
2107
+ request_id = stream_request_id
2070
2108
  elif response.status_code != 200:
2071
2109
  # TODO(syang): handle the case where the requestID is not provided
2072
2110
  # see https://github.com/skypilot-org/skypilot/issues/6549
@@ -2076,6 +2114,7 @@ def stream_and_get(
2076
2114
  return stream_response(request_id,
2077
2115
  response,
2078
2116
  output_stream,
2117
+ resumable=True,
2079
2118
  get_result=follow)
2080
2119
 
2081
2120
 
@@ -2150,7 +2189,9 @@ def _local_api_server_running(kill: bool = False) -> bool:
2150
2189
  def api_status(
2151
2190
  request_ids: Optional[List[Union[server_common.RequestId[T], str]]] = None,
2152
2191
  # pylint: disable=redefined-builtin
2153
- all_status: bool = False
2192
+ all_status: bool = False,
2193
+ limit: Optional[int] = None,
2194
+ fields: Optional[List[str]] = None,
2154
2195
  ) -> List[payloads.RequestPayload]:
2155
2196
  """Lists all requests.
2156
2197
 
@@ -2159,6 +2200,8 @@ def api_status(
2159
2200
  If None, all requests are queried.
2160
2201
  all_status: Whether to list all finished requests as well. This argument
2161
2202
  is ignored if request_ids is not None.
2203
+ limit: The number of requests to show. If None, show all requests.
2204
+ fields: The fields to get. If None, get all fields.
2162
2205
 
2163
2206
  Returns:
2164
2207
  A list of request payloads.
@@ -2167,8 +2210,12 @@ def api_status(
2167
2210
  logger.info('SkyPilot API server is not running.')
2168
2211
  return []
2169
2212
 
2170
- body = payloads.RequestStatusBody(request_ids=request_ids,
2171
- all_status=all_status)
2213
+ body = payloads.RequestStatusBody(
2214
+ request_ids=request_ids,
2215
+ all_status=all_status,
2216
+ limit=limit,
2217
+ fields=fields,
2218
+ )
2172
2219
  response = server_common.make_authenticated_request(
2173
2220
  'GET',
2174
2221
  '/api/status',
@@ -2287,10 +2334,32 @@ def api_stop() -> None:
2287
2334
  f'Cannot kill the API server at {server_url} because it is not '
2288
2335
  f'the default SkyPilot API server started locally.')
2289
2336
 
2290
- found = _local_api_server_running(kill=True)
2337
+ # Acquire the api server creation lock to prevent multiple processes from
2338
+ # stopping and starting the API server at the same time.
2339
+ with filelock.FileLock(
2340
+ os.path.expanduser(constants.API_SERVER_CREATION_LOCK_PATH)):
2341
+ try:
2342
+ records = scheduler.get_controller_process_records()
2343
+ if records is not None:
2344
+ for record in records:
2345
+ try:
2346
+ if managed_job_utils.controller_process_alive(
2347
+ record, quiet=False):
2348
+ subprocess_utils.kill_children_processes(
2349
+ parent_pids=[record.pid], force=True)
2350
+ except (psutil.NoSuchProcess, psutil.ZombieProcess):
2351
+ continue
2352
+ os.remove(os.path.expanduser(scheduler.JOB_CONTROLLER_PID_PATH))
2353
+ except FileNotFoundError:
2354
+ # its fine we will create it
2355
+ pass
2356
+ except Exception as e: # pylint: disable=broad-except
2357
+ # in case we get perm issues or something is messed up, just ignore
2358
+ # it and assume the process is dead
2359
+ logger.error(f'Error looking at job controller pid file: {e}')
2360
+ pass
2291
2361
 
2292
- # Remove the database for requests.
2293
- server_common.clear_local_api_server_database()
2362
+ found = _local_api_server_running(kill=True)
2294
2363
 
2295
2364
  if found:
2296
2365
  logger.info(f'{colorama.Fore.GREEN}SkyPilot API server stopped.'
sky/client/sdk_async.py CHANGED
@@ -19,20 +19,16 @@ import aiohttp
19
19
  import colorama
20
20
 
21
21
  from sky import admin_policy
22
- from sky import backends
23
22
  from sky import catalog
24
23
  from sky import exceptions
25
- from sky import models
26
24
  from sky import sky_logging
27
25
  from sky.client import common as client_common
28
26
  from sky.client import sdk
29
- from sky.provision.kubernetes import utils as kubernetes_utils
30
27
  from sky.schemas.api import responses
31
28
  from sky.server import common as server_common
32
29
  from sky.server import rest
33
30
  from sky.server.requests import payloads
34
31
  from sky.server.requests import requests as requests_lib
35
- from sky.skylet import job_lib
36
32
  from sky.usage import usage_lib
37
33
  from sky.utils import annotations
38
34
  from sky.utils import common
@@ -45,6 +41,11 @@ if typing.TYPE_CHECKING:
45
41
  import io
46
42
 
47
43
  import sky
44
+ from sky import backends
45
+ from sky import models
46
+ from sky.provision.kubernetes import utils as kubernetes_utils
47
+ from sky.skylet import autostop_lib
48
+ from sky.skylet import job_lib
48
49
 
49
50
  logger = sky_logging.init_logger(__name__)
50
51
  logging.getLogger('httpx').setLevel(logging.CRITICAL)
@@ -381,9 +382,10 @@ async def launch(
381
382
  cluster_name: Optional[str] = None,
382
383
  retry_until_up: bool = False,
383
384
  idle_minutes_to_autostop: Optional[int] = None,
385
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
384
386
  dryrun: bool = False,
385
387
  down: bool = False, # pylint: disable=redefined-outer-name
386
- backend: Optional[backends.Backend] = None,
388
+ backend: Optional['backends.Backend'] = None,
387
389
  optimize_target: common.OptimizeTarget = common.OptimizeTarget.COST,
388
390
  no_setup: bool = False,
389
391
  clone_disk_from: Optional[str] = None,
@@ -395,12 +397,12 @@ async def launch(
395
397
  _is_launched_by_sky_serve_controller: bool = False,
396
398
  _disable_controller_check: bool = False,
397
399
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
398
- ) -> Tuple[Optional[int], Optional[backends.ResourceHandle]]:
400
+ ) -> Tuple[Optional[int], Optional['backends.ResourceHandle']]:
399
401
  """Async version of launch() that launches a cluster or task."""
400
402
  request_id = await context_utils.to_thread(
401
403
  sdk.launch, task, cluster_name, retry_until_up,
402
- idle_minutes_to_autostop, dryrun, down, backend, optimize_target,
403
- no_setup, clone_disk_from, fast, _need_confirmation,
404
+ idle_minutes_to_autostop, wait_for, dryrun, down, backend,
405
+ optimize_target, no_setup, clone_disk_from, fast, _need_confirmation,
404
406
  _is_launched_by_jobs_controller, _is_launched_by_sky_serve_controller,
405
407
  _disable_controller_check)
406
408
  if stream_logs is not None:
@@ -416,9 +418,9 @@ async def exec( # pylint: disable=redefined-builtin
416
418
  cluster_name: Optional[str] = None,
417
419
  dryrun: bool = False,
418
420
  down: bool = False, # pylint: disable=redefined-outer-name
419
- backend: Optional[backends.Backend] = None,
421
+ backend: Optional['backends.Backend'] = None,
420
422
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
421
- ) -> Tuple[Optional[int], Optional[backends.ResourceHandle]]:
423
+ ) -> Tuple[Optional[int], Optional['backends.ResourceHandle']]:
422
424
  """Async version of exec() that executes a task on an existing cluster."""
423
425
  request_id = await context_utils.to_thread(sdk.exec, task, cluster_name,
424
426
  dryrun, down, backend)
@@ -454,15 +456,17 @@ async def download_logs(cluster_name: str,
454
456
  async def start(
455
457
  cluster_name: str,
456
458
  idle_minutes_to_autostop: Optional[int] = None,
459
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
457
460
  retry_until_up: bool = False,
458
461
  down: bool = False, # pylint: disable=redefined-outer-name
459
462
  force: bool = False,
460
463
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
461
- ) -> backends.CloudVmRayResourceHandle:
464
+ ) -> 'backends.CloudVmRayResourceHandle':
462
465
  """Async version of start() that restarts a cluster."""
463
466
  request_id = await context_utils.to_thread(sdk.start, cluster_name,
464
467
  idle_minutes_to_autostop,
465
- retry_until_up, down, force)
468
+ wait_for, retry_until_up, down,
469
+ force)
466
470
  if stream_logs is not None:
467
471
  return await _stream_and_get(request_id, stream_logs)
468
472
  else:
@@ -502,13 +506,14 @@ async def stop(
502
506
  async def autostop(
503
507
  cluster_name: str,
504
508
  idle_minutes: int,
509
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
505
510
  down: bool = False, # pylint: disable=redefined-outer-name
506
511
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
507
512
  ) -> None:
508
513
  """Async version of autostop() that schedules an autostop/autodown for a
509
514
  cluster."""
510
515
  request_id = await context_utils.to_thread(sdk.autostop, cluster_name,
511
- idle_minutes, down)
516
+ idle_minutes, wait_for, down)
512
517
  if stream_logs is not None:
513
518
  return await _stream_and_get(request_id, stream_logs)
514
519
  else:
@@ -518,11 +523,11 @@ async def autostop(
518
523
  @usage_lib.entrypoint
519
524
  @annotations.client_api
520
525
  async def queue(
521
- cluster_name: str,
522
- skip_finished: bool = False,
523
- all_users: bool = False,
524
- stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
525
- ) -> List[dict]:
526
+ cluster_name: str,
527
+ skip_finished: bool = False,
528
+ all_users: bool = False,
529
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
530
+ ) -> List[responses.ClusterJobRecord]:
526
531
  """Async version of queue() that gets the job queue of a cluster."""
527
532
  request_id = await context_utils.to_thread(sdk.queue, cluster_name,
528
533
  skip_finished, all_users)
@@ -538,7 +543,7 @@ async def job_status(
538
543
  cluster_name: str,
539
544
  job_ids: Optional[List[int]] = None,
540
545
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
541
- ) -> Dict[Optional[int], Optional[job_lib.JobStatus]]:
546
+ ) -> Dict[Optional[int], Optional['job_lib.JobStatus']]:
542
547
  """Async version of job_status() that gets the status of jobs on a
543
548
  cluster."""
544
549
  request_id = await context_utils.to_thread(sdk.job_status, cluster_name,
@@ -651,18 +656,13 @@ async def storage_delete(
651
656
  @annotations.client_api
652
657
  async def local_up(
653
658
  gpus: bool,
654
- ips: Optional[List[str]],
655
- ssh_user: Optional[str],
656
- ssh_key: Optional[str],
657
- cleanup: bool,
658
- context_name: Optional[str] = None,
659
- password: Optional[str] = None,
659
+ name: Optional[str] = None,
660
+ port_start: Optional[int] = None,
660
661
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
661
662
  """Async version of local_up() that launches a Kubernetes cluster on
662
663
  local machines."""
663
- request_id = await context_utils.to_thread(sdk.local_up, gpus, ips,
664
- ssh_user, ssh_key, cleanup,
665
- context_name, password)
664
+ request_id = await context_utils.to_thread(sdk.local_up, gpus, name,
665
+ port_start)
666
666
  if stream_logs is not None:
667
667
  return await _stream_and_get(request_id, stream_logs)
668
668
  else:
@@ -672,10 +672,11 @@ async def local_up(
672
672
  @usage_lib.entrypoint
673
673
  @annotations.client_api
674
674
  async def local_down(
675
+ name: Optional[str] = None,
675
676
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
676
677
  """Async version of local_down() that tears down the Kubernetes cluster
677
678
  started by local_up."""
678
- request_id = await context_utils.to_thread(sdk.local_down)
679
+ request_id = await context_utils.to_thread(sdk.local_down, name)
679
680
  if stream_logs is not None:
680
681
  return await _stream_and_get(request_id, stream_logs)
681
682
  else:
@@ -718,7 +719,7 @@ async def realtime_kubernetes_gpu_availability(
718
719
  quantity_filter: Optional[int] = None,
719
720
  is_ssh: Optional[bool] = None,
720
721
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
721
- ) -> List[Tuple[str, List[models.RealtimeGpuAvailability]]]:
722
+ ) -> List[Tuple[str, List['models.RealtimeGpuAvailability']]]:
722
723
  """Async version of realtime_kubernetes_gpu_availability() that gets the
723
724
  real-time Kubernetes GPU availability."""
724
725
  request_id = await context_utils.to_thread(
@@ -735,7 +736,7 @@ async def realtime_kubernetes_gpu_availability(
735
736
  async def kubernetes_node_info(
736
737
  context: Optional[str] = None,
737
738
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
738
- ) -> models.KubernetesNodesInfo:
739
+ ) -> 'models.KubernetesNodesInfo':
739
740
  """Async version of kubernetes_node_info() that gets the resource
740
741
  information for all the nodes in the cluster."""
741
742
  request_id = await context_utils.to_thread(sdk.kubernetes_node_info,
@@ -750,8 +751,8 @@ async def kubernetes_node_info(
750
751
  @annotations.client_api
751
752
  async def status_kubernetes(
752
753
  stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
753
- ) -> Tuple[List[kubernetes_utils.KubernetesSkyPilotClusterInfoPayload],
754
- List[kubernetes_utils.KubernetesSkyPilotClusterInfoPayload],
754
+ ) -> Tuple[List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
755
+ List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
755
756
  List[Dict[str, Any]], Optional[str]]:
756
757
  """Async version of status_kubernetes() that gets all SkyPilot clusters
757
758
  and jobs in the Kubernetes cluster."""