skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/core.py CHANGED
@@ -1,6 +1,4 @@
1
1
  """SDK functions for cluster/job management."""
2
- import os
3
- import shlex
4
2
  import typing
5
3
  from typing import Any, Dict, List, Optional, Tuple, Union
6
4
 
@@ -8,7 +6,7 @@ import colorama
8
6
 
9
7
  from sky import admin_policy
10
8
  from sky import backends
11
- from sky import check as sky_check
9
+ from sky import catalog
12
10
  from sky import clouds
13
11
  from sky import dag as dag_lib
14
12
  from sky import data
@@ -17,21 +15,26 @@ from sky import global_user_state
17
15
  from sky import models
18
16
  from sky import optimizer
19
17
  from sky import sky_logging
18
+ from sky import skypilot_config
20
19
  from sky import task as task_lib
20
+ from sky.adaptors import common as adaptors_common
21
21
  from sky.backends import backend_utils
22
+ from sky.backends import cloud_vm_ray_backend
22
23
  from sky.clouds import cloud as sky_cloud
23
- from sky.clouds import service_catalog
24
24
  from sky.jobs.server import core as managed_jobs_core
25
25
  from sky.provision.kubernetes import constants as kubernetes_constants
26
26
  from sky.provision.kubernetes import utils as kubernetes_utils
27
+ from sky.schemas.api import responses
28
+ from sky.server.requests import request_names
29
+ from sky.skylet import autostop_lib
27
30
  from sky.skylet import constants
28
31
  from sky.skylet import job_lib
29
- from sky.skylet import log_lib
30
32
  from sky.usage import usage_lib
31
33
  from sky.utils import admin_policy_utils
32
34
  from sky.utils import common
33
35
  from sky.utils import common_utils
34
36
  from sky.utils import controller_utils
37
+ from sky.utils import resources_utils
35
38
  from sky.utils import rich_utils
36
39
  from sky.utils import status_lib
37
40
  from sky.utils import subprocess_utils
@@ -40,6 +43,9 @@ from sky.utils.kubernetes import kubernetes_deploy_utils
40
43
 
41
44
  if typing.TYPE_CHECKING:
42
45
  from sky import resources as resources_lib
46
+ from sky.schemas.generated import jobsv1_pb2
47
+ else:
48
+ jobsv1_pb2 = adaptors_common.LazyImport('sky.schemas.generated.jobsv1_pb2')
43
49
 
44
50
  logger = sky_logging.init_logger(__name__)
45
51
 
@@ -78,14 +84,15 @@ def optimize(
78
84
  # is shown on `sky launch`. The optimizer is also invoked during failover,
79
85
  # but we do not apply the admin policy there. We should apply the admin
80
86
  # policy in the optimizer, but that will require some refactoring.
81
- dag, _ = admin_policy_utils.apply(
82
- dag,
83
- use_mutated_config_in_current_request=True,
84
- request_options=request_options)
85
- return optimizer.Optimizer.optimize(dag=dag,
86
- minimize=minimize,
87
- blocked_resources=blocked_resources,
88
- quiet=quiet)
87
+ with admin_policy_utils.apply_and_use_config_in_current_request(
88
+ dag,
89
+ request_name=request_names.AdminPolicyRequestName.OPTIMIZE,
90
+ request_options=request_options) as dag:
91
+ dag.resolve_and_validate_volumes()
92
+ return optimizer.Optimizer.optimize(dag=dag,
93
+ minimize=minimize,
94
+ blocked_resources=blocked_resources,
95
+ quiet=quiet)
89
96
 
90
97
 
91
98
  @usage_lib.entrypoint
@@ -93,7 +100,10 @@ def status(
93
100
  cluster_names: Optional[Union[str, List[str]]] = None,
94
101
  refresh: common.StatusRefreshMode = common.StatusRefreshMode.NONE,
95
102
  all_users: bool = False,
96
- ) -> List[Dict[str, Any]]:
103
+ include_credentials: bool = False,
104
+ summary_response: bool = False,
105
+ include_handle: bool = True,
106
+ ) -> List[responses.StatusResponse]:
97
107
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
98
108
  """Gets cluster statuses.
99
109
 
@@ -160,22 +170,37 @@ def status(
160
170
  provided, all clusters will be queried.
161
171
  refresh: whether to query the latest cluster statuses from the cloud
162
172
  provider(s).
173
+ include_credentials: whether to fetch ssh credentials for cluster
174
+ (credentials field in responses.StatusResponse)
163
175
 
164
176
  Returns:
165
177
  A list of dicts, with each dict containing the information of a
166
178
  cluster. If a cluster is found to be terminated or not found, it will
167
179
  be omitted from the returned list.
168
180
  """
169
- clusters = backend_utils.get_clusters(refresh=refresh,
170
- cluster_names=cluster_names,
171
- all_users=all_users)
172
- return clusters
181
+ clusters = backend_utils.get_clusters(
182
+ refresh=refresh,
183
+ cluster_names=cluster_names,
184
+ all_users=all_users,
185
+ include_credentials=include_credentials,
186
+ summary_response=summary_response,
187
+ include_handle=include_handle)
188
+
189
+ status_responses = []
190
+ for cluster in clusters:
191
+ try:
192
+ status_responses.append(
193
+ responses.StatusResponse.model_validate(cluster))
194
+ except Exception as e: # pylint: disable=broad-except
195
+ logger.warning('Failed to validate status responses for cluster '
196
+ f'{cluster.get("name")}: {e}')
197
+ return status_responses
173
198
 
174
199
 
175
200
  def status_kubernetes(
176
201
  ) -> Tuple[List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
177
202
  List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
178
- List[Dict[str, Any]], Optional[str]]:
203
+ List[responses.ManagedJobRecord], Optional[str]]:
179
204
  """Gets all SkyPilot clusters and jobs in the Kubernetes cluster.
180
205
 
181
206
  Managed jobs and services are also included in the clusters returned.
@@ -250,6 +275,7 @@ all_clusters, unmanaged_clusters, all_jobs, context
250
275
  kubernetes_utils.KubernetesSkyPilotClusterInfoPayload.from_cluster(c)
251
276
  for c in unmanaged_clusters
252
277
  ]
278
+ all_jobs = [responses.ManagedJobRecord(**job) for job in all_jobs]
253
279
  return all_clusters, unmanaged_clusters, all_jobs, context
254
280
 
255
281
 
@@ -262,22 +288,26 @@ def endpoints(cluster: str,
262
288
  port: The port number to get the endpoint for. If None, endpoints
263
289
  for all ports are returned..
264
290
 
265
- Returns: A dictionary of port numbers to endpoints. If endpoint is None,
291
+ Returns: A dictionary of port numbers to endpoints. If port is None,
266
292
  the dictionary will contain all ports:endpoints exposed on the cluster.
267
293
 
268
294
  Raises:
269
- ValueError: if the cluster is not UP or the endpoint is not exposed.
295
+ ValueError: if the cluster is not UP or the endpoint is not exposed.
270
296
  RuntimeError: if the cluster has no ports to be exposed or no endpoints
271
297
  are exposed yet.
272
298
  """
273
299
  with rich_utils.safe_status(
274
300
  ux_utils.spinner_message(
275
301
  f'Fetching endpoints for cluster {cluster}')):
276
- return backend_utils.get_endpoints(cluster=cluster, port=port)
302
+ result = backend_utils.get_endpoints(cluster=cluster, port=port)
303
+ return result
277
304
 
278
305
 
279
306
  @usage_lib.entrypoint
280
- def cost_report() -> List[Dict[str, Any]]:
307
+ def cost_report(
308
+ days: Optional[int] = None,
309
+ dashboard_summary_response: bool = False,
310
+ cluster_hashes: Optional[List[str]] = None) -> List[Dict[str, Any]]:
281
311
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
282
312
  """Get all cluster cost reports, including those that have been downed.
283
313
 
@@ -295,6 +325,13 @@ def cost_report() -> List[Dict[str, Any]]:
295
325
  'cluster_hash': (str) unique hash identifying cluster,
296
326
  'usage_intervals': (List[Tuple[int, int]]) cluster usage times,
297
327
  'total_cost': (float) cost given resources and usage intervals,
328
+ 'cloud': (str) cloud of the cluster,
329
+ 'region': (str) region of the cluster,
330
+ 'cpus': (str) number of vCPUs of the cluster,
331
+ 'memory': (str) memory of the cluster,
332
+ 'accelerators': (str) accelerators of the cluster,
333
+ 'resources_str': (str) resources string of the cluster,
334
+ 'resources_str_full': (str) full resources string of the cluster,
298
335
  }
299
336
 
300
337
  The estimated cost column indicates price for the cluster based on the type
@@ -304,25 +341,103 @@ def cost_report() -> List[Dict[str, Any]]:
304
341
  cache of the cluster status, and may not be accurate for the cluster with
305
342
  autostop/use_spot set or terminated/stopped on the cloud console.
306
343
 
344
+ Args:
345
+ days: Number of days to look back from now. Active clusters are always
346
+ included. Historical clusters are only included if they were last
347
+ used within the past 'days' days. Defaults to 30 days.
348
+
307
349
  Returns:
308
350
  A list of dicts, with each dict containing the cost information of a
309
351
  cluster.
310
352
  """
311
- cluster_reports = global_user_state.get_clusters_from_history()
353
+ if days is None:
354
+ days = constants.COST_REPORT_DEFAULT_DAYS
312
355
 
313
- def get_total_cost(cluster_report: dict) -> float:
314
- duration = cluster_report['duration']
315
- launched_nodes = cluster_report['num_nodes']
316
- launched_resources = cluster_report['resources']
356
+ abbreviate_response = dashboard_summary_response and cluster_hashes is None
317
357
 
318
- cost = (launched_resources.get_cost(duration) * launched_nodes)
319
- return cost
358
+ cluster_reports = global_user_state.get_clusters_from_history(
359
+ days=days,
360
+ abbreviate_response=abbreviate_response,
361
+ cluster_hashes=cluster_hashes)
362
+ logger.debug(
363
+ f'{len(cluster_reports)} clusters found from history with {days} days.')
320
364
 
321
- for cluster_report in cluster_reports:
322
- cluster_report['total_cost'] = get_total_cost(cluster_report)
323
- cluster_report['cloud'] = str(cluster_report['resources'].cloud)
324
- cluster_report['accelerators'] = cluster_report[
325
- 'resources'].accelerators
365
+ def _process_cluster_report(
366
+ cluster_report: Dict[str, Any]) -> Dict[str, Any]:
367
+ """Process cluster report by calculating cost and adding fields."""
368
+ # Make a copy to avoid modifying the original
369
+ report = cluster_report.copy()
370
+
371
+ def get_total_cost(cluster_report: dict) -> float:
372
+ duration = cluster_report['duration']
373
+ launched_nodes = cluster_report['num_nodes']
374
+ launched_resources = cluster_report['resources']
375
+
376
+ cost = (launched_resources.get_cost(duration) * launched_nodes)
377
+ return cost
378
+
379
+ try:
380
+ report['total_cost'] = get_total_cost(report)
381
+ except Exception as e: # pylint: disable=broad-except
382
+ # Ok to skip the total cost as this is just for display purposes.
383
+ logger.warning(f'Failed to get total cost for cluster '
384
+ f'{report["name"]}: {str(e)}')
385
+ report['total_cost'] = 0.0
386
+
387
+ return report
388
+
389
+ # Process clusters in parallel
390
+ if not cluster_reports:
391
+ return []
392
+
393
+ if not abbreviate_response:
394
+ cluster_reports = subprocess_utils.run_in_parallel(
395
+ _process_cluster_report, cluster_reports)
396
+
397
+ def _update_record_with_resources(record: Dict[str, Any]) -> None:
398
+ """Add resource fields for dashboard compatibility."""
399
+ if record is None:
400
+ return
401
+ resources = record.get('resources')
402
+ if resources is None:
403
+ return
404
+ if not dashboard_summary_response:
405
+ fields = ['cloud', 'region', 'cpus', 'memory', 'accelerators']
406
+ else:
407
+ fields = ['cloud']
408
+ for field in fields:
409
+ try:
410
+ record[field] = str(getattr(resources, field))
411
+ except Exception as e: # pylint: disable=broad-except
412
+ # Ok to skip the fields as this is just for display
413
+ # purposes.
414
+ logger.debug(f'Failed to get resources.{field} for cluster '
415
+ f'{record["name"]}: {str(e)}')
416
+ record[field] = None
417
+
418
+ # Add resources_str and resources_str_full for dashboard
419
+ # compatibility
420
+ num_nodes = record.get('num_nodes', 1)
421
+ try:
422
+ resource_str_simple, resource_str_full = (
423
+ resources_utils.format_resource(resources,
424
+ simplified_only=False))
425
+ record['resources_str'] = f'{num_nodes}x{resource_str_simple}'
426
+ record['resources_str_full'] = f'{num_nodes}x{resource_str_full}'
427
+ except Exception as e: # pylint: disable=broad-except
428
+ logger.debug(f'Failed to get resources_str for cluster '
429
+ f'{record["name"]}: {str(e)}')
430
+ for field in fields:
431
+ record[field] = None
432
+ record['resources_str'] = '-'
433
+ record['resources_str_full'] = '-'
434
+
435
+ for report in cluster_reports:
436
+ _update_record_with_resources(report)
437
+ if dashboard_summary_response:
438
+ report.pop('usage_intervals')
439
+ report.pop('user_hash')
440
+ report.pop('resources')
326
441
 
327
442
  return cluster_reports
328
443
 
@@ -330,6 +445,8 @@ def cost_report() -> List[Dict[str, Any]]:
330
445
  def _start(
331
446
  cluster_name: str,
332
447
  idle_minutes_to_autostop: Optional[int] = None,
448
+ wait_for: Optional[autostop_lib.AutostopWaitFor] = (
449
+ autostop_lib.DEFAULT_AUTOSTOP_WAIT_FOR),
333
450
  retry_until_up: bool = False,
334
451
  down: bool = False, # pylint: disable=redefined-outer-name
335
452
  force: bool = False,
@@ -369,9 +486,18 @@ def _start(
369
486
  'supported when starting SkyPilot controllers. To '
370
487
  f'fix: omit the {arguments_str} to use the '
371
488
  f'default autostop settings from config.')
372
- idle_minutes_to_autostop, down = (
373
- controller_utils.get_controller_autostop_config(
374
- controller=controller))
489
+
490
+ # Get the autostop resources, from which we extract the correct autostop
491
+ # config.
492
+ controller_resources = controller_utils.get_controller_resources(
493
+ controller, [])
494
+ # All resources should have the same autostop config.
495
+ controller_autostop_config = list(
496
+ controller_resources)[0].autostop_config
497
+ if (controller_autostop_config is not None and
498
+ controller_autostop_config.enabled):
499
+ idle_minutes_to_autostop = controller_autostop_config.idle_minutes
500
+ down = controller_autostop_config.down
375
501
 
376
502
  usage_lib.record_cluster_name_for_current_operation(cluster_name)
377
503
 
@@ -391,7 +517,7 @@ def _start(
391
517
  all_file_mounts=None,
392
518
  storage_mounts=storage_mounts)
393
519
  if idle_minutes_to_autostop is not None:
394
- backend.set_autostop(handle, idle_minutes_to_autostop, down=down)
520
+ backend.set_autostop(handle, idle_minutes_to_autostop, wait_for, down)
395
521
  return handle
396
522
 
397
523
 
@@ -399,6 +525,8 @@ def _start(
399
525
  def start(
400
526
  cluster_name: str,
401
527
  idle_minutes_to_autostop: Optional[int] = None,
528
+ wait_for: Optional[autostop_lib.AutostopWaitFor] = (
529
+ autostop_lib.DEFAULT_AUTOSTOP_WAIT_FOR),
402
530
  retry_until_up: bool = False,
403
531
  down: bool = False, # pylint: disable=redefined-outer-name
404
532
  force: bool = False,
@@ -453,6 +581,7 @@ def start(
453
581
  '`idle_minutes_to_autostop` must be set if `down` is True.')
454
582
  return _start(cluster_name,
455
583
  idle_minutes_to_autostop,
584
+ wait_for,
456
585
  retry_until_up,
457
586
  down,
458
587
  force=force)
@@ -463,7 +592,10 @@ def _stop_not_supported_message(resources: 'resources_lib.Resources') -> str:
463
592
  message = ('Stopping spot instances is currently not supported on '
464
593
  f'{resources.cloud}')
465
594
  else:
466
- message = f'Stopping is currently not supported for {resources}'
595
+ cloud_name = resources.cloud.display_name(
596
+ ) if resources.cloud else resources.cloud
597
+ message = ('Stopping is currently not supported for '
598
+ f'{cloud_name}')
467
599
  return message
468
600
 
469
601
 
@@ -539,6 +671,11 @@ def stop(cluster_name: str, purge: bool = False) -> None:
539
671
  raise exceptions.ClusterDoesNotExist(
540
672
  f'Cluster {cluster_name!r} does not exist.')
541
673
 
674
+ global_user_state.add_cluster_event(
675
+ cluster_name, status_lib.ClusterStatus.STOPPED,
676
+ 'Cluster was stopped by user.',
677
+ global_user_state.ClusterEventType.STATUS_CHANGE)
678
+
542
679
  backend = backend_utils.get_backend_from_handle(handle)
543
680
 
544
681
  if isinstance(backend, backends.CloudVmRayBackend):
@@ -566,6 +703,8 @@ def stop(cluster_name: str, purge: bool = False) -> None:
566
703
  def autostop(
567
704
  cluster_name: str,
568
705
  idle_minutes: int,
706
+ wait_for: Optional[autostop_lib.AutostopWaitFor] = autostop_lib.
707
+ DEFAULT_AUTOSTOP_WAIT_FOR,
569
708
  down: bool = False, # pylint: disable=redefined-outer-name
570
709
  ) -> None:
571
710
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
@@ -627,29 +766,26 @@ def autostop(
627
766
  )
628
767
  backend = backend_utils.get_backend_from_handle(handle)
629
768
 
769
+ resources = handle.launched_resources.assert_launchable()
630
770
  # Check cloud supports stopping spot instances
631
- cloud = handle.launched_resources.cloud
632
- assert cloud is not None, handle
771
+ cloud = resources.cloud
633
772
 
634
773
  if not isinstance(backend, backends.CloudVmRayBackend):
635
774
  raise exceptions.NotSupportedError(
636
775
  f'{operation} cluster {cluster_name!r} with backend '
637
776
  f'{backend.__class__.__name__!r} is not supported.')
638
- cloud = handle.launched_resources.cloud
777
+
639
778
  # Check if autostop/autodown is required and supported
640
779
  if not is_cancel:
641
780
  try:
642
781
  if down:
643
782
  cloud.check_features_are_supported(
644
- handle.launched_resources,
645
- {clouds.CloudImplementationFeatures.AUTODOWN})
783
+ resources, {clouds.CloudImplementationFeatures.AUTODOWN})
646
784
  else:
647
785
  cloud.check_features_are_supported(
648
- handle.launched_resources,
649
- {clouds.CloudImplementationFeatures.STOP})
786
+ resources, {clouds.CloudImplementationFeatures.STOP})
650
787
  cloud.check_features_are_supported(
651
- handle.launched_resources,
652
- {clouds.CloudImplementationFeatures.AUTOSTOP})
788
+ resources, {clouds.CloudImplementationFeatures.AUTOSTOP})
653
789
  except exceptions.NotSupportedError as e:
654
790
  raise exceptions.NotSupportedError(
655
791
  f'{colorama.Fore.YELLOW}{operation} on cluster '
@@ -658,7 +794,7 @@ def autostop(
658
794
  f'see reason above.') from e
659
795
 
660
796
  usage_lib.record_cluster_name_for_current_operation(cluster_name)
661
- backend.set_autostop(handle, idle_minutes, down)
797
+ backend.set_autostop(handle, idle_minutes, wait_for, down)
662
798
 
663
799
 
664
800
  # ==================
@@ -669,7 +805,7 @@ def autostop(
669
805
  @usage_lib.entrypoint
670
806
  def queue(cluster_name: str,
671
807
  skip_finished: bool = False,
672
- all_users: bool = False) -> List[dict]:
808
+ all_users: bool = False) -> List[responses.ClusterJobRecord]:
673
809
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
674
810
  """Gets the job queue of a cluster.
675
811
 
@@ -703,10 +839,10 @@ def queue(cluster_name: str,
703
839
  exceptions.CommandError: if failed to get the job queue with ssh.
704
840
  """
705
841
  all_jobs = not skip_finished
706
- user_hash: Optional[str] = common_utils.get_user_hash()
707
842
  if all_users:
708
843
  user_hash = None
709
- code = job_lib.JobLibCodeGen.get_job_queue(user_hash, all_jobs)
844
+ else:
845
+ user_hash = common_utils.get_current_user().id
710
846
 
711
847
  handle = backend_utils.check_cluster_available(
712
848
  cluster_name,
@@ -714,18 +850,49 @@ def queue(cluster_name: str,
714
850
  )
715
851
  backend = backend_utils.get_backend_from_handle(handle)
716
852
 
717
- returncode, jobs_payload, stderr = backend.run_on_head(handle,
718
- code,
719
- require_outputs=True,
720
- separate_stderr=True)
721
- subprocess_utils.handle_returncode(
722
- returncode,
723
- command=code,
724
- error_msg=f'Failed to get job queue on cluster {cluster_name}.',
725
- stderr=f'{jobs_payload + stderr}',
726
- stream_logs=True)
727
- jobs = job_lib.load_job_queue(jobs_payload)
728
- return jobs
853
+ use_legacy = not handle.is_grpc_enabled_with_flag
854
+
855
+ if not use_legacy:
856
+ try:
857
+ request = jobsv1_pb2.GetJobQueueRequest(user_hash=user_hash,
858
+ all_jobs=all_jobs)
859
+ response = backend_utils.invoke_skylet_with_retries(
860
+ lambda: cloud_vm_ray_backend.SkyletClient(
861
+ handle.get_grpc_channel()).get_job_queue(request))
862
+ jobs = []
863
+ for job_info in response.jobs:
864
+ job_dict = {
865
+ 'job_id': job_info.job_id,
866
+ 'job_name': job_info.job_name,
867
+ 'submitted_at': job_info.submitted_at,
868
+ 'status': job_lib.JobStatus.from_protobuf(job_info.status),
869
+ 'run_timestamp': job_info.run_timestamp,
870
+ 'start_at': job_info.start_at
871
+ if job_info.HasField('start_at') else None,
872
+ 'end_at': job_info.end_at
873
+ if job_info.HasField('end_at') else None,
874
+ 'resources': job_info.resources,
875
+ 'log_path': job_info.log_path,
876
+ 'user_hash': job_info.username,
877
+ }
878
+ # Copied from job_lib.load_job_queue.
879
+ user = global_user_state.get_user(job_dict['user_hash'])
880
+ job_dict['username'] = user.name if user is not None else None
881
+ jobs.append(job_dict)
882
+ except exceptions.SkyletMethodNotImplementedError:
883
+ use_legacy = True
884
+ if use_legacy:
885
+ code = job_lib.JobLibCodeGen.get_job_queue(user_hash, all_jobs)
886
+ returncode, jobs_payload, stderr = backend.run_on_head(
887
+ handle, code, require_outputs=True, separate_stderr=True)
888
+ subprocess_utils.handle_returncode(
889
+ returncode,
890
+ command=code,
891
+ error_msg=f'Failed to get job queue on cluster {cluster_name}.',
892
+ stderr=f'{jobs_payload + stderr}',
893
+ stream_logs=True)
894
+ jobs = job_lib.load_job_queue(jobs_payload)
895
+ return [responses.ClusterJobRecord.model_validate(job) for job in jobs]
729
896
 
730
897
 
731
898
  @usage_lib.entrypoint
@@ -795,8 +962,10 @@ def cancel(
795
962
  f'handle for cluster {cluster_name!r} should not be None')
796
963
 
797
964
  backend = backend_utils.get_backend_from_handle(handle)
965
+ user_hash: Optional[str] = common_utils.get_current_user().id
798
966
 
799
967
  if all_users:
968
+ user_hash = None
800
969
  sky_logging.print(
801
970
  f'{colorama.Fore.YELLOW}'
802
971
  f'Cancelling all users\' jobs on cluster {cluster_name!r}...'
@@ -821,7 +990,7 @@ def cancel(
821
990
  backend.cancel_jobs(handle,
822
991
  job_ids,
823
992
  cancel_all=all or all_users,
824
- user_hash=common_utils.get_user_hash())
993
+ user_hash=user_hash)
825
994
 
826
995
 
827
996
  @usage_lib.entrypoint
@@ -859,7 +1028,12 @@ def tail_logs(cluster_name: str,
859
1028
  backend = backend_utils.get_backend_from_handle(handle)
860
1029
 
861
1030
  usage_lib.record_cluster_name_for_current_operation(cluster_name)
862
- return backend.tail_logs(handle, job_id, follow=follow, tail=tail)
1031
+ # Although tail_logs returns an int when require_outputs=False (default),
1032
+ # we need to check returnval as an int to avoid type checking errors.
1033
+ returnval = backend.tail_logs(handle, job_id, follow=follow, tail=tail)
1034
+ assert isinstance(returnval,
1035
+ int), (f'returnval must be an int, but got {returnval}')
1036
+ return returnval
863
1037
 
864
1038
 
865
1039
  @usage_lib.entrypoint
@@ -958,25 +1132,25 @@ def job_status(cluster_name: str,
958
1132
  # = Storage Management =
959
1133
  # ======================
960
1134
  @usage_lib.entrypoint
961
- def storage_ls() -> List[Dict[str, Any]]:
1135
+ def storage_ls() -> List[responses.StorageRecord]:
962
1136
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
963
1137
  """Gets the storages.
964
1138
 
965
1139
  Returns:
966
- [
967
- {
968
- 'name': str,
969
- 'launched_at': int timestamp of creation,
970
- 'store': List[sky.StoreType],
971
- 'last_use': int timestamp of last use,
972
- 'status': sky.StorageStatus,
973
- }
974
- ]
1140
+ List[responses.StorageRecord]: A list of storage records.
975
1141
  """
976
1142
  storages = global_user_state.get_storage()
1143
+ storage_records = []
977
1144
  for storage in storages:
978
- storage['store'] = list(storage.pop('handle').sky_stores.keys())
979
- return storages
1145
+ storage_records.append(
1146
+ responses.StorageRecord(
1147
+ name=storage['name'],
1148
+ launched_at=storage['launched_at'],
1149
+ store=list(storage.pop('handle').sky_stores.keys()),
1150
+ last_use=storage['last_use'],
1151
+ status=storage['status'],
1152
+ ))
1153
+ return storage_records
980
1154
 
981
1155
 
982
1156
  @usage_lib.entrypoint
@@ -992,9 +1166,7 @@ def storage_delete(name: str) -> None:
992
1166
  if handle is None:
993
1167
  raise ValueError(f'Storage name {name!r} not found.')
994
1168
  else:
995
- storage_object = data.Storage(name=handle.storage_name,
996
- source=handle.source,
997
- sync_on_reconstruction=False)
1169
+ storage_object = data.Storage.from_handle(handle)
998
1170
  storage_object.delete()
999
1171
 
1000
1172
 
@@ -1002,20 +1174,49 @@ def storage_delete(name: str) -> None:
1002
1174
  # = Catalog Observe =
1003
1175
  # ===================
1004
1176
  @usage_lib.entrypoint
1005
- def enabled_clouds() -> List[clouds.Cloud]:
1006
- return global_user_state.get_cached_enabled_clouds(
1007
- sky_cloud.CloudCapability.COMPUTE)
1177
+ def enabled_clouds(workspace: Optional[str] = None,
1178
+ expand: bool = False) -> List[str]:
1179
+ if workspace is None:
1180
+ workspace = skypilot_config.get_active_workspace()
1181
+ cached_clouds = global_user_state.get_cached_enabled_clouds(
1182
+ sky_cloud.CloudCapability.COMPUTE, workspace=workspace)
1183
+ with skypilot_config.local_active_workspace_ctx(workspace):
1184
+ if not expand:
1185
+ return [cloud.canonical_name() for cloud in cached_clouds]
1186
+ enabled_ssh_infras = []
1187
+ enabled_k8s_infras = []
1188
+ enabled_cloud_infras = []
1189
+ for cloud in cached_clouds:
1190
+ cloud_infra = cloud.expand_infras()
1191
+ if isinstance(cloud, clouds.SSH):
1192
+ enabled_ssh_infras.extend(cloud_infra)
1193
+ elif isinstance(cloud, clouds.Kubernetes):
1194
+ enabled_k8s_infras.extend(cloud_infra)
1195
+ else:
1196
+ enabled_cloud_infras.extend(cloud_infra)
1197
+ all_infras = sorted(enabled_ssh_infras) + sorted(
1198
+ enabled_k8s_infras) + sorted(enabled_cloud_infras)
1199
+ return all_infras
1008
1200
 
1009
1201
 
1010
1202
  @usage_lib.entrypoint
1011
1203
  def realtime_kubernetes_gpu_availability(
1012
1204
  context: Optional[str] = None,
1013
1205
  name_filter: Optional[str] = None,
1014
- quantity_filter: Optional[int] = None
1206
+ quantity_filter: Optional[int] = None,
1207
+ is_ssh: Optional[bool] = None
1015
1208
  ) -> List[Tuple[str, List[models.RealtimeGpuAvailability]]]:
1016
1209
 
1017
1210
  if context is None:
1018
- context_list = clouds.Kubernetes.existing_allowed_contexts()
1211
+ # Include contexts from both Kubernetes and SSH clouds
1212
+ kubernetes_contexts = clouds.Kubernetes.existing_allowed_contexts()
1213
+ ssh_contexts = clouds.SSH.existing_allowed_contexts()
1214
+ if is_ssh is None:
1215
+ context_list = kubernetes_contexts + ssh_contexts
1216
+ elif is_ssh:
1217
+ context_list = ssh_contexts
1218
+ else:
1219
+ context_list = kubernetes_contexts
1019
1220
  else:
1020
1221
  context_list = [context]
1021
1222
 
@@ -1024,9 +1225,9 @@ def realtime_kubernetes_gpu_availability(
1024
1225
  name_filter: Optional[str] = None,
1025
1226
  quantity_filter: Optional[int] = None
1026
1227
  ) -> List[models.RealtimeGpuAvailability]:
1027
- counts, capacity, available = service_catalog.list_accelerator_realtime(
1228
+ counts, capacity, available = catalog.list_accelerator_realtime(
1028
1229
  gpus_only=True,
1029
- clouds='kubernetes',
1230
+ clouds='ssh' if is_ssh else 'kubernetes',
1030
1231
  name_filter=name_filter,
1031
1232
  region_filter=context,
1032
1233
  quantity_filter=quantity_filter,
@@ -1058,16 +1259,19 @@ def realtime_kubernetes_gpu_availability(
1058
1259
  name_filter=name_filter,
1059
1260
  quantity_filter=quantity_filter), context_list)
1060
1261
 
1262
+ cloud_identity = 'ssh' if is_ssh else 'kubernetes'
1263
+ cloud_identity_capital = 'SSH' if is_ssh else 'Kubernetes'
1264
+
1061
1265
  for ctx, queried in zip(context_list, parallel_queried):
1062
1266
  cumulative_count += len(queried)
1063
1267
  if len(queried) == 0:
1064
1268
  # don't add gpu results for clusters that don't have any
1065
- logger.debug(f'No gpus found in k8s cluster {ctx}')
1269
+ logger.debug(f'No gpus found in {cloud_identity} cluster {ctx}')
1066
1270
  continue
1067
1271
  availability_lists.append((ctx, queried))
1068
1272
 
1069
1273
  if cumulative_count == 0:
1070
- err_msg = 'No GPUs found in any Kubernetes clusters. '
1274
+ err_msg = f'No GPUs found in any {cloud_identity_capital} clusters. '
1071
1275
  debug_msg = 'To further debug, run: sky check '
1072
1276
  if name_filter is not None:
1073
1277
  gpu_info_msg = f' {name_filter!r}'
@@ -1075,9 +1279,9 @@ def realtime_kubernetes_gpu_availability(
1075
1279
  gpu_info_msg += (' with requested quantity'
1076
1280
  f' {quantity_filter}')
1077
1281
  err_msg = (f'Resources{gpu_info_msg} not found '
1078
- 'in Kubernetes clusters. ')
1079
- debug_msg = ('To show available accelerators on kubernetes,'
1080
- ' run: sky show-gpus --cloud kubernetes ')
1282
+ f'in {cloud_identity_capital} clusters. ')
1283
+ debug_msg = (f'To show available accelerators on {cloud_identity}, '
1284
+ f' run: sky show-gpus --cloud {cloud_identity} ')
1081
1285
  full_err_msg = (err_msg + kubernetes_constants.NO_GPU_HELP_MESSAGE +
1082
1286
  debug_msg)
1083
1287
  raise ValueError(full_err_msg)
@@ -1094,7 +1298,9 @@ def local_up(gpus: bool,
1094
1298
  ssh_key: Optional[str],
1095
1299
  cleanup: bool,
1096
1300
  context_name: Optional[str] = None,
1097
- password: Optional[str] = None) -> None:
1301
+ password: Optional[str] = None,
1302
+ name: Optional[str] = None,
1303
+ port_start: Optional[int] = None) -> None:
1098
1304
  """Creates a local or remote cluster."""
1099
1305
 
1100
1306
  def _validate_args(ips, ssh_user, ssh_key, cleanup):
@@ -1124,54 +1330,58 @@ def local_up(gpus: bool,
1124
1330
  password)
1125
1331
  else:
1126
1332
  # Run local deployment (kind) if no remote args are specified
1127
- kubernetes_deploy_utils.deploy_local_cluster(gpus)
1333
+ kubernetes_deploy_utils.deploy_local_cluster(name, port_start, gpus)
1128
1334
 
1129
1335
 
1130
- def local_down() -> None:
1336
+ def local_down(name: Optional[str] = None) -> None:
1131
1337
  """Tears down the Kubernetes cluster started by local_up."""
1132
- cluster_removed = False
1338
+ kubernetes_deploy_utils.teardown_local_cluster(name)
1133
1339
 
1134
- path_to_package = os.path.dirname(__file__)
1135
- down_script_path = os.path.join(path_to_package, 'utils/kubernetes',
1136
- 'delete_cluster.sh')
1137
1340
 
1138
- cwd = os.path.dirname(os.path.abspath(down_script_path))
1139
- run_command = shlex.split(down_script_path)
1341
+ @usage_lib.entrypoint
1342
+ def ssh_up(infra: Optional[str] = None, cleanup: bool = False) -> None:
1343
+ """Deploys or tears down a Kubernetes cluster on SSH targets.
1140
1344
 
1141
- # Setup logging paths
1142
- run_timestamp = sky_logging.get_run_timestamp()
1143
- log_path = os.path.join(constants.SKY_LOGS_DIRECTORY, run_timestamp,
1144
- 'local_down.log')
1345
+ Args:
1346
+ infra: Name of the cluster configuration in ssh_node_pools.yaml.
1347
+ If None, the first cluster in the file is used.
1348
+ cleanup: If True, clean up the cluster instead of deploying.
1349
+ """
1350
+ kubernetes_deploy_utils.deploy_ssh_cluster(
1351
+ cleanup=cleanup,
1352
+ infra=infra,
1353
+ )
1145
1354
 
1146
- with rich_utils.safe_status(
1147
- ux_utils.spinner_message('Removing local cluster',
1148
- log_path=log_path,
1149
- is_local=True)):
1150
-
1151
- returncode, stdout, stderr = log_lib.run_with_log(cmd=run_command,
1152
- log_path=log_path,
1153
- require_outputs=True,
1154
- stream_logs=False,
1155
- cwd=cwd)
1156
- stderr = stderr.replace('No kind clusters found.\n', '')
1157
-
1158
- if returncode == 0:
1159
- cluster_removed = True
1160
- elif returncode == 100:
1161
- logger.info(ux_utils.error_message('Local cluster does not exist.'))
1162
- else:
1163
- with ux_utils.print_exception_no_traceback():
1164
- raise RuntimeError('Failed to create local cluster. '
1165
- f'Stdout: {stdout}'
1166
- f'\nError: {stderr}')
1167
- if cluster_removed:
1168
- # Run sky check
1169
- with rich_utils.safe_status(
1170
- ux_utils.spinner_message('Running sky check...')):
1171
- sky_check.check_capability(sky_cloud.CloudCapability.COMPUTE,
1172
- clouds=['kubernetes'],
1173
- quiet=True)
1174
- logger.info(
1175
- ux_utils.finishing_message('Local cluster removed.',
1176
- log_path=log_path,
1177
- is_local=True))
1355
+
1356
+ @usage_lib.entrypoint
1357
+ def ssh_status(context_name: str) -> Tuple[bool, str]:
1358
+ """Check the status of an SSH Node Pool context.
1359
+
1360
+ Args:
1361
+ context_name: The SSH context name (e.g., 'ssh-my-cluster')
1362
+
1363
+ Returns:
1364
+ Tuple[bool, str]: (is_ready, reason)
1365
+ - is_ready: True if the SSH Node Pool is ready, False otherwise
1366
+ - reason: Explanation of the status
1367
+ """
1368
+ try:
1369
+ is_ready, reason = clouds.SSH.check_single_context(context_name)
1370
+ return is_ready, reason
1371
+ except Exception as e: # pylint: disable=broad-except
1372
+ return False, ('Failed to check SSH context: '
1373
+ f'{common_utils.format_exception(e)}')
1374
+
1375
+
1376
+ def get_all_contexts() -> List[str]:
1377
+ """Get all available contexts from Kubernetes and SSH clouds.
1378
+
1379
+ Returns:
1380
+ List[str]: A list of all available context names.
1381
+ """
1382
+ kube_contexts = clouds.Kubernetes.existing_allowed_contexts()
1383
+ ssh_contexts = clouds.SSH.get_ssh_node_pool_contexts()
1384
+ # Ensure ssh_contexts are prefixed appropriately if not already
1385
+ # For now, assuming get_ssh_node_pool_contexts already returns them
1386
+ # in the desired format (e.g., 'ssh-my-cluster')
1387
+ return sorted(list(set(kube_contexts + ssh_contexts)))