skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. sky/__init__.py +10 -2
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +20 -0
  14. sky/authentication.py +157 -263
  15. sky/backends/__init__.py +3 -2
  16. sky/backends/backend.py +11 -3
  17. sky/backends/backend_utils.py +588 -184
  18. sky/backends/cloud_vm_ray_backend.py +1088 -904
  19. sky/backends/local_docker_backend.py +9 -5
  20. sky/backends/task_codegen.py +633 -0
  21. sky/backends/wheel_utils.py +18 -0
  22. sky/catalog/__init__.py +8 -0
  23. sky/catalog/aws_catalog.py +4 -0
  24. sky/catalog/common.py +19 -1
  25. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  26. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  27. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  28. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  29. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  30. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  31. sky/catalog/kubernetes_catalog.py +24 -28
  32. sky/catalog/primeintellect_catalog.py +95 -0
  33. sky/catalog/runpod_catalog.py +5 -1
  34. sky/catalog/seeweb_catalog.py +184 -0
  35. sky/catalog/shadeform_catalog.py +165 -0
  36. sky/check.py +73 -43
  37. sky/client/cli/command.py +675 -412
  38. sky/client/cli/flags.py +4 -2
  39. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  40. sky/client/cli/utils.py +79 -0
  41. sky/client/common.py +12 -2
  42. sky/client/sdk.py +132 -63
  43. sky/client/sdk_async.py +34 -33
  44. sky/cloud_stores.py +82 -3
  45. sky/clouds/__init__.py +6 -0
  46. sky/clouds/aws.py +337 -129
  47. sky/clouds/azure.py +24 -18
  48. sky/clouds/cloud.py +40 -13
  49. sky/clouds/cudo.py +16 -13
  50. sky/clouds/do.py +9 -7
  51. sky/clouds/fluidstack.py +12 -5
  52. sky/clouds/gcp.py +14 -7
  53. sky/clouds/hyperbolic.py +12 -5
  54. sky/clouds/ibm.py +12 -5
  55. sky/clouds/kubernetes.py +80 -45
  56. sky/clouds/lambda_cloud.py +12 -5
  57. sky/clouds/nebius.py +23 -9
  58. sky/clouds/oci.py +19 -12
  59. sky/clouds/paperspace.py +4 -1
  60. sky/clouds/primeintellect.py +317 -0
  61. sky/clouds/runpod.py +85 -24
  62. sky/clouds/scp.py +12 -8
  63. sky/clouds/seeweb.py +477 -0
  64. sky/clouds/shadeform.py +400 -0
  65. sky/clouds/ssh.py +4 -2
  66. sky/clouds/utils/scp_utils.py +61 -50
  67. sky/clouds/vast.py +33 -27
  68. sky/clouds/vsphere.py +14 -16
  69. sky/core.py +174 -165
  70. sky/dashboard/out/404.html +1 -1
  71. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  73. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  74. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  76. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  77. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  79. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
  80. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  82. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  83. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  86. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  87. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  88. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  90. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  92. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  93. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  94. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  95. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  96. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  97. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
  98. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  100. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  101. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  102. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
  105. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
  106. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  107. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  108. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  109. sky/dashboard/out/clusters/[cluster].html +1 -1
  110. sky/dashboard/out/clusters.html +1 -1
  111. sky/dashboard/out/config.html +1 -1
  112. sky/dashboard/out/index.html +1 -1
  113. sky/dashboard/out/infra/[context].html +1 -1
  114. sky/dashboard/out/infra.html +1 -1
  115. sky/dashboard/out/jobs/[job].html +1 -1
  116. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  117. sky/dashboard/out/jobs.html +1 -1
  118. sky/dashboard/out/users.html +1 -1
  119. sky/dashboard/out/volumes.html +1 -1
  120. sky/dashboard/out/workspace/new.html +1 -1
  121. sky/dashboard/out/workspaces/[name].html +1 -1
  122. sky/dashboard/out/workspaces.html +1 -1
  123. sky/data/data_utils.py +92 -1
  124. sky/data/mounting_utils.py +162 -29
  125. sky/data/storage.py +200 -19
  126. sky/data/storage_utils.py +10 -45
  127. sky/exceptions.py +18 -7
  128. sky/execution.py +74 -31
  129. sky/global_user_state.py +605 -191
  130. sky/jobs/__init__.py +2 -0
  131. sky/jobs/client/sdk.py +101 -4
  132. sky/jobs/client/sdk_async.py +31 -5
  133. sky/jobs/constants.py +15 -8
  134. sky/jobs/controller.py +726 -284
  135. sky/jobs/file_content_utils.py +128 -0
  136. sky/jobs/log_gc.py +193 -0
  137. sky/jobs/recovery_strategy.py +250 -100
  138. sky/jobs/scheduler.py +271 -173
  139. sky/jobs/server/core.py +367 -114
  140. sky/jobs/server/server.py +81 -35
  141. sky/jobs/server/utils.py +89 -35
  142. sky/jobs/state.py +1498 -620
  143. sky/jobs/utils.py +771 -306
  144. sky/logs/agent.py +40 -5
  145. sky/logs/aws.py +9 -19
  146. sky/metrics/utils.py +282 -39
  147. sky/optimizer.py +1 -1
  148. sky/provision/__init__.py +37 -1
  149. sky/provision/aws/config.py +34 -13
  150. sky/provision/aws/instance.py +5 -2
  151. sky/provision/azure/instance.py +5 -3
  152. sky/provision/common.py +2 -0
  153. sky/provision/cudo/instance.py +4 -3
  154. sky/provision/do/instance.py +4 -3
  155. sky/provision/docker_utils.py +97 -26
  156. sky/provision/fluidstack/instance.py +6 -5
  157. sky/provision/gcp/config.py +6 -1
  158. sky/provision/gcp/instance.py +4 -2
  159. sky/provision/hyperbolic/instance.py +4 -2
  160. sky/provision/instance_setup.py +66 -20
  161. sky/provision/kubernetes/__init__.py +2 -0
  162. sky/provision/kubernetes/config.py +7 -44
  163. sky/provision/kubernetes/constants.py +0 -1
  164. sky/provision/kubernetes/instance.py +609 -213
  165. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  166. sky/provision/kubernetes/network.py +12 -8
  167. sky/provision/kubernetes/network_utils.py +8 -25
  168. sky/provision/kubernetes/utils.py +382 -418
  169. sky/provision/kubernetes/volume.py +150 -18
  170. sky/provision/lambda_cloud/instance.py +16 -13
  171. sky/provision/nebius/instance.py +6 -2
  172. sky/provision/nebius/utils.py +103 -86
  173. sky/provision/oci/instance.py +4 -2
  174. sky/provision/paperspace/instance.py +4 -3
  175. sky/provision/primeintellect/__init__.py +10 -0
  176. sky/provision/primeintellect/config.py +11 -0
  177. sky/provision/primeintellect/instance.py +454 -0
  178. sky/provision/primeintellect/utils.py +398 -0
  179. sky/provision/provisioner.py +30 -9
  180. sky/provision/runpod/__init__.py +2 -0
  181. sky/provision/runpod/instance.py +4 -3
  182. sky/provision/runpod/volume.py +69 -13
  183. sky/provision/scp/instance.py +307 -130
  184. sky/provision/seeweb/__init__.py +11 -0
  185. sky/provision/seeweb/config.py +13 -0
  186. sky/provision/seeweb/instance.py +812 -0
  187. sky/provision/shadeform/__init__.py +11 -0
  188. sky/provision/shadeform/config.py +12 -0
  189. sky/provision/shadeform/instance.py +351 -0
  190. sky/provision/shadeform/shadeform_utils.py +83 -0
  191. sky/provision/vast/instance.py +5 -3
  192. sky/provision/volume.py +164 -0
  193. sky/provision/vsphere/common/ssl_helper.py +1 -1
  194. sky/provision/vsphere/common/vapiconnect.py +2 -1
  195. sky/provision/vsphere/common/vim_utils.py +3 -2
  196. sky/provision/vsphere/instance.py +8 -6
  197. sky/provision/vsphere/vsphere_utils.py +8 -1
  198. sky/resources.py +11 -3
  199. sky/schemas/api/responses.py +107 -6
  200. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  201. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  202. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  203. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  204. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  205. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  206. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  207. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  208. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  209. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  210. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  211. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  212. sky/schemas/generated/jobsv1_pb2.py +86 -0
  213. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  214. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  215. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  216. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  217. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  218. sky/schemas/generated/servev1_pb2.py +58 -0
  219. sky/schemas/generated/servev1_pb2.pyi +115 -0
  220. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  221. sky/serve/autoscalers.py +2 -0
  222. sky/serve/client/impl.py +55 -21
  223. sky/serve/constants.py +4 -3
  224. sky/serve/controller.py +17 -11
  225. sky/serve/load_balancing_policies.py +1 -1
  226. sky/serve/replica_managers.py +219 -142
  227. sky/serve/serve_rpc_utils.py +179 -0
  228. sky/serve/serve_state.py +63 -54
  229. sky/serve/serve_utils.py +145 -109
  230. sky/serve/server/core.py +46 -25
  231. sky/serve/server/impl.py +311 -162
  232. sky/serve/server/server.py +21 -19
  233. sky/serve/service.py +84 -68
  234. sky/serve/service_spec.py +45 -7
  235. sky/server/auth/loopback.py +38 -0
  236. sky/server/auth/oauth2_proxy.py +12 -7
  237. sky/server/common.py +47 -24
  238. sky/server/config.py +62 -28
  239. sky/server/constants.py +9 -1
  240. sky/server/daemons.py +109 -38
  241. sky/server/metrics.py +76 -96
  242. sky/server/middleware_utils.py +166 -0
  243. sky/server/requests/executor.py +381 -145
  244. sky/server/requests/payloads.py +71 -18
  245. sky/server/requests/preconditions.py +15 -13
  246. sky/server/requests/request_names.py +121 -0
  247. sky/server/requests/requests.py +507 -157
  248. sky/server/requests/serializers/decoders.py +48 -17
  249. sky/server/requests/serializers/encoders.py +85 -20
  250. sky/server/requests/threads.py +117 -0
  251. sky/server/rest.py +116 -24
  252. sky/server/server.py +420 -172
  253. sky/server/stream_utils.py +219 -45
  254. sky/server/uvicorn.py +30 -19
  255. sky/setup_files/MANIFEST.in +6 -1
  256. sky/setup_files/alembic.ini +8 -0
  257. sky/setup_files/dependencies.py +62 -19
  258. sky/setup_files/setup.py +44 -44
  259. sky/sky_logging.py +13 -5
  260. sky/skylet/attempt_skylet.py +106 -24
  261. sky/skylet/configs.py +3 -1
  262. sky/skylet/constants.py +111 -26
  263. sky/skylet/events.py +64 -10
  264. sky/skylet/job_lib.py +141 -104
  265. sky/skylet/log_lib.py +233 -5
  266. sky/skylet/log_lib.pyi +40 -2
  267. sky/skylet/providers/ibm/node_provider.py +12 -8
  268. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  269. sky/skylet/runtime_utils.py +21 -0
  270. sky/skylet/services.py +524 -0
  271. sky/skylet/skylet.py +22 -1
  272. sky/skylet/subprocess_daemon.py +104 -29
  273. sky/skypilot_config.py +99 -79
  274. sky/ssh_node_pools/server.py +9 -8
  275. sky/task.py +221 -104
  276. sky/templates/aws-ray.yml.j2 +1 -0
  277. sky/templates/azure-ray.yml.j2 +1 -0
  278. sky/templates/cudo-ray.yml.j2 +1 -0
  279. sky/templates/do-ray.yml.j2 +1 -0
  280. sky/templates/fluidstack-ray.yml.j2 +1 -0
  281. sky/templates/gcp-ray.yml.j2 +1 -0
  282. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  283. sky/templates/ibm-ray.yml.j2 +2 -1
  284. sky/templates/jobs-controller.yaml.j2 +3 -0
  285. sky/templates/kubernetes-ray.yml.j2 +196 -55
  286. sky/templates/lambda-ray.yml.j2 +1 -0
  287. sky/templates/nebius-ray.yml.j2 +3 -0
  288. sky/templates/oci-ray.yml.j2 +1 -0
  289. sky/templates/paperspace-ray.yml.j2 +1 -0
  290. sky/templates/primeintellect-ray.yml.j2 +72 -0
  291. sky/templates/runpod-ray.yml.j2 +1 -0
  292. sky/templates/scp-ray.yml.j2 +1 -0
  293. sky/templates/seeweb-ray.yml.j2 +171 -0
  294. sky/templates/shadeform-ray.yml.j2 +73 -0
  295. sky/templates/vast-ray.yml.j2 +1 -0
  296. sky/templates/vsphere-ray.yml.j2 +1 -0
  297. sky/templates/websocket_proxy.py +188 -43
  298. sky/usage/usage_lib.py +16 -4
  299. sky/users/permission.py +60 -43
  300. sky/utils/accelerator_registry.py +6 -3
  301. sky/utils/admin_policy_utils.py +18 -5
  302. sky/utils/annotations.py +22 -0
  303. sky/utils/asyncio_utils.py +78 -0
  304. sky/utils/atomic.py +1 -1
  305. sky/utils/auth_utils.py +153 -0
  306. sky/utils/cli_utils/status_utils.py +12 -7
  307. sky/utils/cluster_utils.py +28 -6
  308. sky/utils/command_runner.py +88 -27
  309. sky/utils/command_runner.pyi +36 -3
  310. sky/utils/common.py +3 -1
  311. sky/utils/common_utils.py +37 -4
  312. sky/utils/config_utils.py +1 -14
  313. sky/utils/context.py +127 -40
  314. sky/utils/context_utils.py +73 -18
  315. sky/utils/controller_utils.py +229 -70
  316. sky/utils/db/db_utils.py +95 -18
  317. sky/utils/db/kv_cache.py +149 -0
  318. sky/utils/db/migration_utils.py +24 -7
  319. sky/utils/env_options.py +4 -0
  320. sky/utils/git.py +559 -1
  321. sky/utils/kubernetes/create_cluster.sh +15 -30
  322. sky/utils/kubernetes/delete_cluster.sh +10 -7
  323. sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
  324. sky/utils/kubernetes/generate_kind_config.py +6 -66
  325. sky/utils/kubernetes/gpu_labeler.py +13 -3
  326. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  327. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  328. sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
  329. sky/utils/kubernetes/rsync_helper.sh +11 -3
  330. sky/utils/kubernetes_enums.py +7 -15
  331. sky/utils/lock_events.py +4 -4
  332. sky/utils/locks.py +128 -31
  333. sky/utils/log_utils.py +0 -319
  334. sky/utils/resource_checker.py +13 -10
  335. sky/utils/resources_utils.py +53 -29
  336. sky/utils/rich_utils.py +8 -4
  337. sky/utils/schemas.py +107 -52
  338. sky/utils/subprocess_utils.py +17 -4
  339. sky/utils/thread_utils.py +91 -0
  340. sky/utils/timeline.py +2 -1
  341. sky/utils/ux_utils.py +35 -1
  342. sky/utils/volume.py +88 -4
  343. sky/utils/yaml_utils.py +9 -0
  344. sky/volumes/client/sdk.py +48 -10
  345. sky/volumes/server/core.py +59 -22
  346. sky/volumes/server/server.py +46 -17
  347. sky/volumes/volume.py +54 -42
  348. sky/workspaces/core.py +57 -21
  349. sky/workspaces/server.py +13 -12
  350. sky_templates/README.md +3 -0
  351. sky_templates/__init__.py +3 -0
  352. sky_templates/ray/__init__.py +0 -0
  353. sky_templates/ray/start_cluster +183 -0
  354. sky_templates/ray/stop_cluster +75 -0
  355. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
  356. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  357. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  358. sky/client/cli/git.py +0 -549
  359. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  360. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  361. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  362. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  363. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  364. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  365. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  366. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  367. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  368. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  369. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  370. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  371. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  372. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  373. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  374. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  375. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  376. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  377. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  378. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  379. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  380. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  381. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  382. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  383. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  384. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  385. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  386. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  387. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  388. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  389. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  390. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  391. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  392. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  393. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  394. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  395. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
  396. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  397. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/clouds/vast.py CHANGED
@@ -1,10 +1,12 @@
1
1
  """ Vast Cloud. """
2
2
 
3
+ import os
3
4
  import typing
4
5
  from typing import Dict, Iterator, List, Optional, Tuple, Union
5
6
 
6
7
  from sky import catalog
7
8
  from sky import clouds
9
+ from sky.adaptors import common
8
10
  from sky.utils import registry
9
11
  from sky.utils import resources_utils
10
12
 
@@ -12,6 +14,8 @@ if typing.TYPE_CHECKING:
12
14
  from sky import resources as resources_lib
13
15
  from sky.utils import volume as volume_lib
14
16
 
17
+ _CREDENTIAL_PATH = '~/.config/vastai/vast_api_key'
18
+
15
19
 
16
20
  @registry.CLOUD_REGISTRY.register
17
21
  class Vast(clouds.Cloud):
@@ -51,7 +55,9 @@ class Vast(clouds.Cloud):
51
55
 
52
56
  @classmethod
53
57
  def _unsupported_features_for_resources(
54
- cls, resources: 'resources_lib.Resources'
58
+ cls,
59
+ resources: 'resources_lib.Resources',
60
+ region: Optional[str] = None,
55
61
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
56
62
  """The features not supported based on the resources provided.
57
63
 
@@ -70,10 +76,15 @@ class Vast(clouds.Cloud):
70
76
  return cls._MAX_CLUSTER_NAME_LEN_LIMIT
71
77
 
72
78
  @classmethod
73
- def regions_with_offering(cls, instance_type: str,
74
- accelerators: Optional[Dict[str, int]],
75
- use_spot: bool, region: Optional[str],
76
- zone: Optional[str]) -> List[clouds.Region]:
79
+ def regions_with_offering(
80
+ cls,
81
+ instance_type: str,
82
+ accelerators: Optional[Dict[str, int]],
83
+ use_spot: bool,
84
+ region: Optional[str],
85
+ zone: Optional[str],
86
+ resources: Optional['resources_lib.Resources'] = None,
87
+ ) -> List[clouds.Region]:
77
88
  assert zone is None, 'Vast does not support zones.'
78
89
  del accelerators, zone # unused
79
90
  regions = catalog.get_region_zones_for_instance_type(
@@ -253,32 +264,27 @@ class Vast(clouds.Cloud):
253
264
  def _check_compute_credentials(
254
265
  cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
255
266
  """Checks if the user has valid credentials for
256
- Vast's compute service. """
257
- try:
258
- import vastai_sdk as _vast # pylint: disable=import-outside-toplevel
259
- vast = _vast.VastAI()
260
-
261
- # We only support file pased credential passing
262
- if vast.creds_source != 'FILE':
263
- return False, (
264
- 'error \n' # First line is indented by 4 spaces
265
- ' Credentials can be set up by running: \n'
266
- ' $ pip install vastai\n'
267
- ' $ mkdir -p ~/.config/vastai\n'
268
- ' $ echo [key] > ~/.config/vastai/vast_api_key\n'
269
- ' For more information, see https://skypilot.readthedocs.io/en/latest/getting-started/installation.html#vast' # pylint: disable=line-too-long
270
- )
267
+ Vast's compute service."""
268
+
269
+ dependency_error_msg = ('Failed to import vast. '
270
+ 'To install, run: pip install skypilot[vast]')
271
+ if not common.can_import_modules(['vastai_sdk']):
272
+ return False, dependency_error_msg
271
273
 
272
- return True, None
274
+ if not os.path.exists(os.path.expanduser(_CREDENTIAL_PATH)):
275
+ return False, (
276
+ 'error \n' # First line is indented by 4 spaces
277
+ ' Credentials can be set up by running: \n'
278
+ ' $ pip install vastai\n'
279
+ ' $ mkdir -p ~/.config/vastai\n'
280
+ f' $ echo [key] > {_CREDENTIAL_PATH}\n'
281
+ ' For more information, see https://skypilot.readthedocs.io/en/latest/getting-started/installation.html#vast' # pylint: disable=line-too-long
282
+ )
273
283
 
274
- except ImportError:
275
- return False, ('Failed to import vast. '
276
- 'To install, run: pip install skypilot[vast]')
284
+ return True, None
277
285
 
278
286
  def get_credential_file_mounts(self) -> Dict[str, str]:
279
- return {
280
- '~/.config/vastai/vast_api_key': '~/.config/vastai/vast_api_key'
281
- }
287
+ return {f'{_CREDENTIAL_PATH}': f'{_CREDENTIAL_PATH}'}
282
288
 
283
289
  @classmethod
284
290
  def get_user_identities(cls) -> Optional[List[List[str]]]:
sky/clouds/vsphere.py CHANGED
@@ -1,5 +1,4 @@
1
1
  """Vsphere cloud implementation."""
2
- import subprocess
3
2
  import typing
4
3
  from typing import Dict, Iterator, List, Optional, Tuple, Union
5
4
 
@@ -9,7 +8,6 @@ from sky.adaptors import common as adaptors_common
9
8
  from sky.provision.vsphere import vsphere_utils
10
9
  from sky.provision.vsphere.vsphere_utils import get_vsphere_credentials
11
10
  from sky.provision.vsphere.vsphere_utils import initialize_vsphere_data
12
- from sky.utils import common_utils
13
11
  from sky.utils import registry
14
12
  from sky.utils import resources_utils
15
13
 
@@ -75,7 +73,9 @@ class Vsphere(clouds.Cloud):
75
73
 
76
74
  @classmethod
77
75
  def _unsupported_features_for_resources(
78
- cls, resources: 'resources_lib.Resources'
76
+ cls,
77
+ resources: 'resources_lib.Resources',
78
+ region: Optional[str] = None,
79
79
  ) -> Dict[clouds.CloudImplementationFeatures, str]:
80
80
  features = cls._CLOUD_UNSUPPORTED_FEATURES
81
81
  return features
@@ -92,6 +92,7 @@ class Vsphere(clouds.Cloud):
92
92
  use_spot: bool,
93
93
  region: Optional[str],
94
94
  zone: Optional[str],
95
+ resources: Optional['resources_lib.Resources'] = None,
95
96
  ) -> List[clouds.Region]:
96
97
  del accelerators, zone # unused
97
98
  regions = catalog.get_region_zones_for_instance_type(
@@ -278,19 +279,16 @@ class Vsphere(clouds.Cloud):
278
279
  cls) -> Tuple[bool, Optional[Union[str, Dict[str, str]]]]:
279
280
  """Checks if the user has access credentials to
280
281
  vSphere's compute service."""
281
-
282
- try:
283
- # pylint: disable=import-outside-toplevel,unused-import
284
- # Check pyVmomi installation.
285
- import pyVmomi
286
- except (ImportError, subprocess.CalledProcessError) as e:
287
- return False, (
288
- 'vSphere dependencies are not installed. '
289
- 'Run the following commands:'
290
- f'\n{cls._INDENT_PREFIX} $ pip install skypilot[vSphere]'
291
- f'\n{cls._INDENT_PREFIX}Credentials may also need to be set. '
292
- 'For more details. See https://docs.skypilot.co/en/latest/getting-started/installation.html#vmware-vsphere' # pylint: disable=line-too-long
293
- f'{common_utils.format_exception(e, use_bracket=True)}')
282
+ dependency_error_msg = (
283
+ 'vSphere dependencies are not installed. '
284
+ 'Run the following commands:'
285
+ f'\n{cls._INDENT_PREFIX} $ pip install skypilot[vSphere]'
286
+ f'\n{cls._INDENT_PREFIX}Credentials may also need to be set. '
287
+ 'For more details. See https://docs.skypilot.co/en/latest/getting-started/installation.html#vmware-vsphere' # pylint: disable=line-too-long
288
+ )
289
+ # Check pyVmomi installation.
290
+ if not adaptors_common.can_import_modules(['pyVmomi']):
291
+ return False, dependency_error_msg
294
292
 
295
293
  required_keys = ['name', 'username', 'password', 'clusters']
296
294
  skip_key = 'skip_verification'
sky/core.py CHANGED
@@ -1,6 +1,4 @@
1
1
  """SDK functions for cluster/job management."""
2
- import os
3
- import shlex
4
2
  import typing
5
3
  from typing import Any, Dict, List, Optional, Tuple, Union
6
4
 
@@ -9,7 +7,6 @@ import colorama
9
7
  from sky import admin_policy
10
8
  from sky import backends
11
9
  from sky import catalog
12
- from sky import check as sky_check
13
10
  from sky import clouds
14
11
  from sky import dag as dag_lib
15
12
  from sky import data
@@ -20,16 +17,18 @@ from sky import optimizer
20
17
  from sky import sky_logging
21
18
  from sky import skypilot_config
22
19
  from sky import task as task_lib
20
+ from sky.adaptors import common as adaptors_common
23
21
  from sky.backends import backend_utils
22
+ from sky.backends import cloud_vm_ray_backend
24
23
  from sky.clouds import cloud as sky_cloud
25
24
  from sky.jobs.server import core as managed_jobs_core
26
25
  from sky.provision.kubernetes import constants as kubernetes_constants
27
26
  from sky.provision.kubernetes import utils as kubernetes_utils
28
27
  from sky.schemas.api import responses
28
+ from sky.server.requests import request_names
29
29
  from sky.skylet import autostop_lib
30
30
  from sky.skylet import constants
31
31
  from sky.skylet import job_lib
32
- from sky.skylet import log_lib
33
32
  from sky.usage import usage_lib
34
33
  from sky.utils import admin_policy_utils
35
34
  from sky.utils import common
@@ -44,6 +43,9 @@ from sky.utils.kubernetes import kubernetes_deploy_utils
44
43
 
45
44
  if typing.TYPE_CHECKING:
46
45
  from sky import resources as resources_lib
46
+ from sky.schemas.generated import jobsv1_pb2
47
+ else:
48
+ jobsv1_pb2 = adaptors_common.LazyImport('sky.schemas.generated.jobsv1_pb2')
47
49
 
48
50
  logger = sky_logging.init_logger(__name__)
49
51
 
@@ -83,7 +85,9 @@ def optimize(
83
85
  # but we do not apply the admin policy there. We should apply the admin
84
86
  # policy in the optimizer, but that will require some refactoring.
85
87
  with admin_policy_utils.apply_and_use_config_in_current_request(
86
- dag, request_options=request_options) as dag:
88
+ dag,
89
+ request_name=request_names.AdminPolicyRequestName.OPTIMIZE,
90
+ request_options=request_options) as dag:
87
91
  dag.resolve_and_validate_volumes()
88
92
  return optimizer.Optimizer.optimize(dag=dag,
89
93
  minimize=minimize,
@@ -97,6 +101,8 @@ def status(
97
101
  refresh: common.StatusRefreshMode = common.StatusRefreshMode.NONE,
98
102
  all_users: bool = False,
99
103
  include_credentials: bool = False,
104
+ summary_response: bool = False,
105
+ include_handle: bool = True,
100
106
  ) -> List[responses.StatusResponse]:
101
107
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
102
108
  """Gets cluster statuses.
@@ -176,16 +182,25 @@ def status(
176
182
  refresh=refresh,
177
183
  cluster_names=cluster_names,
178
184
  all_users=all_users,
179
- include_credentials=include_credentials)
180
- return [
181
- responses.StatusResponse.model_validate(cluster) for cluster in clusters
182
- ]
185
+ include_credentials=include_credentials,
186
+ summary_response=summary_response,
187
+ include_handle=include_handle)
188
+
189
+ status_responses = []
190
+ for cluster in clusters:
191
+ try:
192
+ status_responses.append(
193
+ responses.StatusResponse.model_validate(cluster))
194
+ except Exception as e: # pylint: disable=broad-except
195
+ logger.warning('Failed to validate status responses for cluster '
196
+ f'{cluster.get("name")}: {e}')
197
+ return status_responses
183
198
 
184
199
 
185
200
  def status_kubernetes(
186
201
  ) -> Tuple[List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
187
202
  List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
188
- List[Dict[str, Any]], Optional[str]]:
203
+ List[responses.ManagedJobRecord], Optional[str]]:
189
204
  """Gets all SkyPilot clusters and jobs in the Kubernetes cluster.
190
205
 
191
206
  Managed jobs and services are also included in the clusters returned.
@@ -260,6 +275,7 @@ all_clusters, unmanaged_clusters, all_jobs, context
260
275
  kubernetes_utils.KubernetesSkyPilotClusterInfoPayload.from_cluster(c)
261
276
  for c in unmanaged_clusters
262
277
  ]
278
+ all_jobs = [responses.ManagedJobRecord(**job) for job in all_jobs]
263
279
  return all_clusters, unmanaged_clusters, all_jobs, context
264
280
 
265
281
 
@@ -288,7 +304,10 @@ def endpoints(cluster: str,
288
304
 
289
305
 
290
306
  @usage_lib.entrypoint
291
- def cost_report(days: Optional[int] = None) -> List[Dict[str, Any]]:
307
+ def cost_report(
308
+ days: Optional[int] = None,
309
+ dashboard_summary_response: bool = False,
310
+ cluster_hashes: Optional[List[str]] = None) -> List[Dict[str, Any]]:
292
311
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
293
312
  """Get all cluster cost reports, including those that have been downed.
294
313
 
@@ -334,7 +353,12 @@ def cost_report(days: Optional[int] = None) -> List[Dict[str, Any]]:
334
353
  if days is None:
335
354
  days = constants.COST_REPORT_DEFAULT_DAYS
336
355
 
337
- cluster_reports = global_user_state.get_clusters_from_history(days=days)
356
+ abbreviate_response = dashboard_summary_response and cluster_hashes is None
357
+
358
+ cluster_reports = global_user_state.get_clusters_from_history(
359
+ days=days,
360
+ abbreviate_response=abbreviate_response,
361
+ cluster_hashes=cluster_hashes)
338
362
  logger.debug(
339
363
  f'{len(cluster_reports)} clusters found from history with {days} days.')
340
364
 
@@ -352,43 +376,6 @@ def cost_report(days: Optional[int] = None) -> List[Dict[str, Any]]:
352
376
  cost = (launched_resources.get_cost(duration) * launched_nodes)
353
377
  return cost
354
378
 
355
- def _update_record_with_resources(record: Dict[str, Any]) -> None:
356
- """Add resource fields for dashboard compatibility."""
357
- if record is None:
358
- return
359
- resources = record.get('resources')
360
- if resources is None:
361
- return
362
- fields = ['cloud', 'region', 'cpus', 'memory', 'accelerators']
363
- for field in fields:
364
- try:
365
- record[field] = str(getattr(resources, field))
366
- except Exception as e: # pylint: disable=broad-except
367
- # Ok to skip the fields as this is just for display
368
- # purposes.
369
- logger.debug(f'Failed to get resources.{field} for cluster '
370
- f'{record["name"]}: {str(e)}')
371
- record[field] = None
372
-
373
- # Add resources_str and resources_str_full for dashboard
374
- # compatibility
375
- num_nodes = record.get('num_nodes', 1)
376
- try:
377
- resource_str_simple = resources_utils.format_resource(
378
- resources, simplify=True)
379
- resource_str_full = resources_utils.format_resource(
380
- resources, simplify=False)
381
- record['resources_str'] = f'{num_nodes}x{resource_str_simple}'
382
- record[
383
- 'resources_str_full'] = f'{num_nodes}x{resource_str_full}'
384
- except Exception as e: # pylint: disable=broad-except
385
- logger.debug(f'Failed to get resources_str for cluster '
386
- f'{record["name"]}: {str(e)}')
387
- for field in fields:
388
- record[field] = None
389
- record['resources_str'] = '-'
390
- record['resources_str_full'] = '-'
391
-
392
379
  try:
393
380
  report['total_cost'] = get_total_cost(report)
394
381
  except Exception as e: # pylint: disable=broad-except
@@ -397,17 +384,62 @@ def cost_report(days: Optional[int] = None) -> List[Dict[str, Any]]:
397
384
  f'{report["name"]}: {str(e)}')
398
385
  report['total_cost'] = 0.0
399
386
 
400
- _update_record_with_resources(report)
401
387
  return report
402
388
 
403
389
  # Process clusters in parallel
404
390
  if not cluster_reports:
405
391
  return []
406
392
 
407
- processed_reports = subprocess_utils.run_in_parallel(
408
- _process_cluster_report, cluster_reports)
393
+ if not abbreviate_response:
394
+ cluster_reports = subprocess_utils.run_in_parallel(
395
+ _process_cluster_report, cluster_reports)
396
+
397
+ def _update_record_with_resources(record: Dict[str, Any]) -> None:
398
+ """Add resource fields for dashboard compatibility."""
399
+ if record is None:
400
+ return
401
+ resources = record.get('resources')
402
+ if resources is None:
403
+ return
404
+ if not dashboard_summary_response:
405
+ fields = ['cloud', 'region', 'cpus', 'memory', 'accelerators']
406
+ else:
407
+ fields = ['cloud']
408
+ for field in fields:
409
+ try:
410
+ record[field] = str(getattr(resources, field))
411
+ except Exception as e: # pylint: disable=broad-except
412
+ # Ok to skip the fields as this is just for display
413
+ # purposes.
414
+ logger.debug(f'Failed to get resources.{field} for cluster '
415
+ f'{record["name"]}: {str(e)}')
416
+ record[field] = None
409
417
 
410
- return processed_reports
418
+ # Add resources_str and resources_str_full for dashboard
419
+ # compatibility
420
+ num_nodes = record.get('num_nodes', 1)
421
+ try:
422
+ resource_str_simple, resource_str_full = (
423
+ resources_utils.format_resource(resources,
424
+ simplified_only=False))
425
+ record['resources_str'] = f'{num_nodes}x{resource_str_simple}'
426
+ record['resources_str_full'] = f'{num_nodes}x{resource_str_full}'
427
+ except Exception as e: # pylint: disable=broad-except
428
+ logger.debug(f'Failed to get resources_str for cluster '
429
+ f'{record["name"]}: {str(e)}')
430
+ for field in fields:
431
+ record[field] = None
432
+ record['resources_str'] = '-'
433
+ record['resources_str_full'] = '-'
434
+
435
+ for report in cluster_reports:
436
+ _update_record_with_resources(report)
437
+ if dashboard_summary_response:
438
+ report.pop('usage_intervals')
439
+ report.pop('user_hash')
440
+ report.pop('resources')
441
+
442
+ return cluster_reports
411
443
 
412
444
 
413
445
  def _start(
@@ -466,6 +498,32 @@ def _start(
466
498
  controller_autostop_config.enabled):
467
499
  idle_minutes_to_autostop = controller_autostop_config.idle_minutes
468
500
  down = controller_autostop_config.down
501
+ else:
502
+ # For non-controller clusters, restore autostop configuration from
503
+ # database if not explicitly provided.
504
+ if idle_minutes_to_autostop is None:
505
+ cluster_record = global_user_state.get_cluster_from_name(
506
+ cluster_name, include_user_info=False, summary_response=True)
507
+ if cluster_record is not None:
508
+ stored_autostop = cluster_record.get('autostop', -1)
509
+ stored_to_down = cluster_record.get('to_down', False)
510
+ # Restore autostop if it was previously set (autostop > 0)
511
+ if stored_autostop > 0:
512
+ logger.warning(f'Restoring cluster {cluster_name!r} with '
513
+ f'autostop set to {stored_autostop} minutes'
514
+ f'. To turn off autostop, run: '
515
+ f'`sky autostop {cluster_name} --cancel`')
516
+ idle_minutes_to_autostop = stored_autostop
517
+ # Only restore 'down' if it was explicitly set and we're
518
+ # restoring autostop
519
+ if stored_to_down:
520
+ down = stored_to_down
521
+ elif stored_autostop == 0:
522
+ logger.warning(
523
+ f'Autostop was previously set to 0 minutes '
524
+ f'for cluster {cluster_name!r} so it will '
525
+ 'not be restored. To turn on autostop, run: '
526
+ f'`sky autostop {cluster_name} -i <minutes>`')
469
527
 
470
528
  usage_lib.record_cluster_name_for_current_operation(cluster_name)
471
529
 
@@ -773,7 +831,7 @@ def autostop(
773
831
  @usage_lib.entrypoint
774
832
  def queue(cluster_name: str,
775
833
  skip_finished: bool = False,
776
- all_users: bool = False) -> List[dict]:
834
+ all_users: bool = False) -> List[responses.ClusterJobRecord]:
777
835
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
778
836
  """Gets the job queue of a cluster.
779
837
 
@@ -811,7 +869,6 @@ def queue(cluster_name: str,
811
869
  user_hash = None
812
870
  else:
813
871
  user_hash = common_utils.get_current_user().id
814
- code = job_lib.JobLibCodeGen.get_job_queue(user_hash, all_jobs)
815
872
 
816
873
  handle = backend_utils.check_cluster_available(
817
874
  cluster_name,
@@ -819,18 +876,49 @@ def queue(cluster_name: str,
819
876
  )
820
877
  backend = backend_utils.get_backend_from_handle(handle)
821
878
 
822
- returncode, jobs_payload, stderr = backend.run_on_head(handle,
823
- code,
824
- require_outputs=True,
825
- separate_stderr=True)
826
- subprocess_utils.handle_returncode(
827
- returncode,
828
- command=code,
829
- error_msg=f'Failed to get job queue on cluster {cluster_name}.',
830
- stderr=f'{jobs_payload + stderr}',
831
- stream_logs=True)
832
- jobs = job_lib.load_job_queue(jobs_payload)
833
- return jobs
879
+ use_legacy = not handle.is_grpc_enabled_with_flag
880
+
881
+ if not use_legacy:
882
+ try:
883
+ request = jobsv1_pb2.GetJobQueueRequest(user_hash=user_hash,
884
+ all_jobs=all_jobs)
885
+ response = backend_utils.invoke_skylet_with_retries(
886
+ lambda: cloud_vm_ray_backend.SkyletClient(
887
+ handle.get_grpc_channel()).get_job_queue(request))
888
+ jobs = []
889
+ for job_info in response.jobs:
890
+ job_dict = {
891
+ 'job_id': job_info.job_id,
892
+ 'job_name': job_info.job_name,
893
+ 'submitted_at': job_info.submitted_at,
894
+ 'status': job_lib.JobStatus.from_protobuf(job_info.status),
895
+ 'run_timestamp': job_info.run_timestamp,
896
+ 'start_at': job_info.start_at
897
+ if job_info.HasField('start_at') else None,
898
+ 'end_at': job_info.end_at
899
+ if job_info.HasField('end_at') else None,
900
+ 'resources': job_info.resources,
901
+ 'log_path': job_info.log_path,
902
+ 'user_hash': job_info.username,
903
+ }
904
+ # Copied from job_lib.load_job_queue.
905
+ user = global_user_state.get_user(job_dict['user_hash'])
906
+ job_dict['username'] = user.name if user is not None else None
907
+ jobs.append(job_dict)
908
+ except exceptions.SkyletMethodNotImplementedError:
909
+ use_legacy = True
910
+ if use_legacy:
911
+ code = job_lib.JobLibCodeGen.get_job_queue(user_hash, all_jobs)
912
+ returncode, jobs_payload, stderr = backend.run_on_head(
913
+ handle, code, require_outputs=True, separate_stderr=True)
914
+ subprocess_utils.handle_returncode(
915
+ returncode,
916
+ command=code,
917
+ error_msg=f'Failed to get job queue on cluster {cluster_name}.',
918
+ stderr=f'{jobs_payload + stderr}',
919
+ stream_logs=True)
920
+ jobs = job_lib.load_job_queue(jobs_payload)
921
+ return [responses.ClusterJobRecord.model_validate(job) for job in jobs]
834
922
 
835
923
 
836
924
  @usage_lib.entrypoint
@@ -1070,25 +1158,25 @@ def job_status(cluster_name: str,
1070
1158
  # = Storage Management =
1071
1159
  # ======================
1072
1160
  @usage_lib.entrypoint
1073
- def storage_ls() -> List[Dict[str, Any]]:
1161
+ def storage_ls() -> List[responses.StorageRecord]:
1074
1162
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
1075
1163
  """Gets the storages.
1076
1164
 
1077
1165
  Returns:
1078
- [
1079
- {
1080
- 'name': str,
1081
- 'launched_at': int timestamp of creation,
1082
- 'store': List[sky.StoreType],
1083
- 'last_use': int timestamp of last use,
1084
- 'status': sky.StorageStatus,
1085
- }
1086
- ]
1166
+ List[responses.StorageRecord]: A list of storage records.
1087
1167
  """
1088
1168
  storages = global_user_state.get_storage()
1169
+ storage_records = []
1089
1170
  for storage in storages:
1090
- storage['store'] = list(storage.pop('handle').sky_stores.keys())
1091
- return storages
1171
+ storage_records.append(
1172
+ responses.StorageRecord(
1173
+ name=storage['name'],
1174
+ launched_at=storage['launched_at'],
1175
+ store=list(storage.pop('handle').sky_stores.keys()),
1176
+ last_use=storage['last_use'],
1177
+ status=storage['status'],
1178
+ ))
1179
+ return storage_records
1092
1180
 
1093
1181
 
1094
1182
  @usage_lib.entrypoint
@@ -1104,9 +1192,7 @@ def storage_delete(name: str) -> None:
1104
1192
  if handle is None:
1105
1193
  raise ValueError(f'Storage name {name!r} not found.')
1106
1194
  else:
1107
- storage_object = data.Storage(name=handle.storage_name,
1108
- source=handle.source,
1109
- sync_on_reconstruction=False)
1195
+ storage_object = data.Storage.from_handle(handle)
1110
1196
  storage_object.delete()
1111
1197
 
1112
1198
 
@@ -1233,92 +1319,15 @@ def realtime_kubernetes_gpu_availability(
1233
1319
  # =================
1234
1320
  @usage_lib.entrypoint
1235
1321
  def local_up(gpus: bool,
1236
- ips: Optional[List[str]],
1237
- ssh_user: Optional[str],
1238
- ssh_key: Optional[str],
1239
- cleanup: bool,
1240
- context_name: Optional[str] = None,
1241
- password: Optional[str] = None) -> None:
1242
- """Creates a local or remote cluster."""
1243
-
1244
- def _validate_args(ips, ssh_user, ssh_key, cleanup):
1245
- # If any of --ips, --ssh-user, or --ssh-key-path is specified,
1246
- # all must be specified
1247
- if bool(ips) or bool(ssh_user) or bool(ssh_key):
1248
- if not (ips and ssh_user and ssh_key):
1249
- with ux_utils.print_exception_no_traceback():
1250
- raise ValueError(
1251
- 'All ips, ssh_user, and ssh_key must be specified '
1252
- 'together.')
1253
-
1254
- # --cleanup can only be used if --ips, --ssh-user and --ssh-key-path
1255
- # are all provided
1256
- if cleanup and not (ips and ssh_user and ssh_key):
1257
- with ux_utils.print_exception_no_traceback():
1258
- raise ValueError(
1259
- 'cleanup can only be used with ips, ssh_user and ssh_key.')
1260
-
1261
- _validate_args(ips, ssh_user, ssh_key, cleanup)
1262
-
1263
- # If remote deployment arguments are specified, run remote up script
1264
- if ips:
1265
- assert ssh_user is not None and ssh_key is not None
1266
- kubernetes_deploy_utils.deploy_remote_cluster(ips, ssh_user, ssh_key,
1267
- cleanup, context_name,
1268
- password)
1269
- else:
1270
- # Run local deployment (kind) if no remote args are specified
1271
- kubernetes_deploy_utils.deploy_local_cluster(gpus)
1322
+ name: Optional[str] = None,
1323
+ port_start: Optional[int] = None) -> None:
1324
+ """Creates a local cluster."""
1325
+ kubernetes_deploy_utils.deploy_local_cluster(name, port_start, gpus)
1272
1326
 
1273
1327
 
1274
- def local_down() -> None:
1328
+ def local_down(name: Optional[str] = None) -> None:
1275
1329
  """Tears down the Kubernetes cluster started by local_up."""
1276
- cluster_removed = False
1277
-
1278
- path_to_package = os.path.dirname(__file__)
1279
- down_script_path = os.path.join(path_to_package, 'utils/kubernetes',
1280
- 'delete_cluster.sh')
1281
-
1282
- cwd = os.path.dirname(os.path.abspath(down_script_path))
1283
- run_command = shlex.split(down_script_path)
1284
-
1285
- # Setup logging paths
1286
- run_timestamp = sky_logging.get_run_timestamp()
1287
- log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
1288
- 'local_down.log')
1289
-
1290
- with rich_utils.safe_status(
1291
- ux_utils.spinner_message('Removing local cluster',
1292
- log_path=log_path,
1293
- is_local=True)):
1294
-
1295
- returncode, stdout, stderr = log_lib.run_with_log(cmd=run_command,
1296
- log_path=log_path,
1297
- require_outputs=True,
1298
- stream_logs=False,
1299
- cwd=cwd)
1300
- stderr = stderr.replace('No kind clusters found.\n', '')
1301
-
1302
- if returncode == 0:
1303
- cluster_removed = True
1304
- elif returncode == 100:
1305
- logger.info(ux_utils.error_message('Local cluster does not exist.'))
1306
- else:
1307
- with ux_utils.print_exception_no_traceback():
1308
- raise RuntimeError('Failed to create local cluster. '
1309
- f'Stdout: {stdout}'
1310
- f'\nError: {stderr}')
1311
- if cluster_removed:
1312
- # Run sky check
1313
- with rich_utils.safe_status(
1314
- ux_utils.spinner_message('Running sky check...')):
1315
- sky_check.check_capability(sky_cloud.CloudCapability.COMPUTE,
1316
- clouds=['kubernetes'],
1317
- quiet=True)
1318
- logger.info(
1319
- ux_utils.finishing_message('Local cluster removed.',
1320
- log_path=log_path,
1321
- is_local=True))
1330
+ kubernetes_deploy_utils.teardown_local_cluster(name)
1322
1331
 
1323
1332
 
1324
1333
  @usage_lib.entrypoint
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-4fe903277b57b523.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/mS-4qZPSkRuA1u-g2wQhg/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"mS-4qZPSkRuA1u-g2wQhg","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/0748ce22df867032.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/0748ce22df867032.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-64e05f17bf2cf8ce.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-bde01e4a2beec258.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/96_E2yl3QAiIJGOYCkSpB/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"96_E2yl3QAiIJGOYCkSpB","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>