skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,153 @@
1
+ """Utils for managing SkyPilot SSH key pairs."""
2
+
3
+ import functools
4
+ import os
5
+ from typing import Tuple
6
+
7
+ import filelock
8
+
9
+ from sky import global_user_state
10
+ from sky import sky_logging
11
+ from sky.utils import common_utils
12
+
13
+ logger = sky_logging.init_logger(__name__)
14
+
15
+ MAX_TRIALS = 64
16
+ # TODO(zhwu): Support user specified key pair.
17
+ # We intentionally not have the ssh key pair to be stored in
18
+ # ~/.sky/api_server/clients, i.e. sky.server.common.API_SERVER_CLIENT_DIR,
19
+ # because ssh key pair need to persist across API server restarts, while
20
+ # the former dir is ephemeral.
21
+ _SSH_KEY_PATH_PREFIX = '~/.sky/clients/{user_hash}/ssh'
22
+
23
+
24
+ def get_ssh_key_and_lock_path(user_hash: str) -> Tuple[str, str, str]:
25
+ user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
26
+
27
+ os.makedirs(os.path.expanduser(user_ssh_key_prefix),
28
+ exist_ok=True,
29
+ mode=0o700)
30
+ private_key_path = os.path.join(user_ssh_key_prefix, 'sky-key')
31
+ public_key_path = os.path.join(user_ssh_key_prefix, 'sky-key.pub')
32
+ lock_path = os.path.join(user_ssh_key_prefix, '.__internal-sky-key.lock')
33
+ return private_key_path, public_key_path, lock_path
34
+
35
+
36
+ def _generate_rsa_key_pair() -> Tuple[str, str]:
37
+ # Keep the import of the cryptography local to avoid expensive
38
+ # third-party imports when not needed.
39
+ # pylint: disable=import-outside-toplevel
40
+ from cryptography.hazmat.backends import default_backend
41
+ from cryptography.hazmat.primitives import serialization
42
+ from cryptography.hazmat.primitives.asymmetric import rsa
43
+
44
+ key = rsa.generate_private_key(backend=default_backend(),
45
+ public_exponent=65537,
46
+ key_size=2048)
47
+
48
+ private_key = key.private_bytes(
49
+ encoding=serialization.Encoding.PEM,
50
+ format=serialization.PrivateFormat.TraditionalOpenSSL,
51
+ encryption_algorithm=serialization.NoEncryption()).decode(
52
+ 'utf-8').strip()
53
+
54
+ public_key = key.public_key().public_bytes(
55
+ serialization.Encoding.OpenSSH,
56
+ serialization.PublicFormat.OpenSSH).decode('utf-8').strip()
57
+
58
+ return public_key, private_key
59
+
60
+
61
+ def _save_key_pair(private_key_path: str, public_key_path: str,
62
+ private_key: str, public_key: str) -> None:
63
+ key_dir = os.path.dirname(private_key_path)
64
+ os.makedirs(key_dir, exist_ok=True, mode=0o700)
65
+
66
+ with open(
67
+ private_key_path,
68
+ 'w',
69
+ encoding='utf-8',
70
+ opener=functools.partial(os.open, mode=0o600),
71
+ ) as f:
72
+ f.write(private_key)
73
+
74
+ with open(public_key_path,
75
+ 'w',
76
+ encoding='utf-8',
77
+ opener=functools.partial(os.open, mode=0o644)) as f:
78
+ f.write(public_key)
79
+
80
+
81
+ def get_or_generate_keys() -> Tuple[str, str]:
82
+ """Returns the absolute private and public key paths."""
83
+ user_hash = common_utils.get_user_hash()
84
+ private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path(
85
+ user_hash)
86
+ private_key_path = os.path.expanduser(private_key_path)
87
+ public_key_path = os.path.expanduser(public_key_path)
88
+ lock_path = os.path.expanduser(lock_path)
89
+
90
+ lock_dir = os.path.dirname(lock_path)
91
+ # We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
92
+ # as the ssh configs will be written to this folder as well in
93
+ # backend_utils.SSHConfigHelper
94
+ os.makedirs(lock_dir, exist_ok=True, mode=0o700)
95
+ with filelock.FileLock(lock_path, timeout=10):
96
+ if not os.path.exists(private_key_path):
97
+ ssh_public_key, ssh_private_key, exists = (
98
+ global_user_state.get_ssh_keys(user_hash))
99
+ if not exists:
100
+ ssh_public_key, ssh_private_key = _generate_rsa_key_pair()
101
+ global_user_state.set_ssh_keys(user_hash, ssh_public_key,
102
+ ssh_private_key)
103
+ _save_key_pair(private_key_path, public_key_path, ssh_private_key,
104
+ ssh_public_key)
105
+ assert os.path.exists(public_key_path), (
106
+ 'Private key found, but associated public key '
107
+ f'{public_key_path} does not exist.')
108
+ return private_key_path, public_key_path
109
+
110
+
111
+ def create_ssh_key_files_from_db(private_key_path: str) -> bool:
112
+ """Creates the ssh key files from the database.
113
+
114
+ Returns:
115
+ True if the ssh key files are created successfully, False otherwise.
116
+ """
117
+ # Assume private key path is in the format of
118
+ # ~/.sky/clients/<user_hash>/ssh/sky-key
119
+ separated_path = os.path.normpath(private_key_path).split(os.path.sep)
120
+ assert separated_path[-1] == 'sky-key'
121
+ assert separated_path[-2] == 'ssh'
122
+ user_hash = separated_path[-3]
123
+
124
+ private_key_path_generated, public_key_path, lock_path = (
125
+ get_ssh_key_and_lock_path(user_hash))
126
+ assert private_key_path == os.path.expanduser(private_key_path_generated), (
127
+ f'Private key path {private_key_path} does not '
128
+ 'match the generated path '
129
+ f'{os.path.expanduser(private_key_path_generated)}')
130
+ private_key_path = os.path.expanduser(private_key_path)
131
+ public_key_path = os.path.expanduser(public_key_path)
132
+ lock_path = os.path.expanduser(lock_path)
133
+ lock_dir = os.path.dirname(lock_path)
134
+
135
+ if os.path.exists(private_key_path) and os.path.exists(public_key_path):
136
+ return True
137
+ # We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
138
+ # as the ssh configs will be written to this folder as well in
139
+ # backend_utils.SSHConfigHelper
140
+ os.makedirs(lock_dir, exist_ok=True, mode=0o700)
141
+ with filelock.FileLock(lock_path, timeout=10):
142
+ if not os.path.exists(private_key_path):
143
+ ssh_public_key, ssh_private_key, exists = (
144
+ global_user_state.get_ssh_keys(user_hash))
145
+ if not exists:
146
+ logger.debug(f'SSH keys not found for user {user_hash}')
147
+ return False
148
+ _save_key_pair(private_key_path, public_key_path, ssh_private_key,
149
+ ssh_public_key)
150
+ assert os.path.exists(public_key_path), (
151
+ 'Private key found, but associated public key '
152
+ f'{public_key_path} does not exist.')
153
+ return True
@@ -0,0 +1,60 @@
1
+ """Utility functions for benchmarking."""
2
+
3
+ import functools
4
+ import logging
5
+ import time
6
+ from typing import Callable, Optional
7
+
8
+ from sky import sky_logging
9
+
10
+ logger = sky_logging.init_logger(__name__)
11
+
12
+
13
+ def log_execution_time(func: Optional[Callable] = None,
14
+ *,
15
+ name: Optional[str] = None,
16
+ level: int = logging.DEBUG,
17
+ precision: int = 4) -> Callable:
18
+ """Mark a function and log its execution time.
19
+
20
+ Args:
21
+ func: Function to decorate.
22
+ name: Name of the function.
23
+ level: Logging level.
24
+ precision: Number of decimal places (default: 4).
25
+
26
+ Usage:
27
+ from sky.utils import benchmark_utils
28
+
29
+ @benchmark_utils.log_execution_time
30
+ def my_function():
31
+ pass
32
+
33
+ @benchmark_utils.log_execution_time(name='my_module.my_function2')
34
+ def my_function2():
35
+ pass
36
+ """
37
+
38
+ def decorator(f: Callable) -> Callable:
39
+
40
+ @functools.wraps(f)
41
+ def wrapper(*args, **kwargs):
42
+ nonlocal name
43
+ name = name or f.__name__
44
+ start_time = time.perf_counter()
45
+ try:
46
+ result = f(*args, **kwargs)
47
+ return result
48
+ finally:
49
+ end_time = time.perf_counter()
50
+ execution_time = end_time - start_time
51
+ log = (f'Method {name} executed in '
52
+ f'{execution_time:.{precision}f}')
53
+ logger.log(level, log)
54
+
55
+ return wrapper
56
+
57
+ if func is None:
58
+ return decorator
59
+ else:
60
+ return decorator(func)
@@ -6,11 +6,12 @@ import click
6
6
  import colorama
7
7
 
8
8
  from sky import backends
9
+ from sky.schemas.api import responses
9
10
  from sky.utils import common_utils
10
- from sky.utils import controller_utils
11
11
  from sky.utils import log_utils
12
12
  from sky.utils import resources_utils
13
13
  from sky.utils import status_lib
14
+ from sky.utils import ux_utils
14
15
 
15
16
  if typing.TYPE_CHECKING:
16
17
  from sky.provision.kubernetes import utils as kubernetes_utils
@@ -33,24 +34,23 @@ class StatusColumn:
33
34
  def __init__(self,
34
35
  name: str,
35
36
  calc_func: Callable,
36
- trunc_length: int = 0,
37
+ truncate: bool = True,
37
38
  show_by_default: bool = True):
38
39
  self.name = name
39
40
  self.calc_func = calc_func
40
- self.trunc_length = trunc_length
41
+ self.truncate: bool = truncate
41
42
  self.show_by_default = show_by_default
42
43
 
43
44
  def calc(self, record):
44
- val = self.calc_func(record)
45
- if self.trunc_length != 0:
46
- val = common_utils.truncate_long_string(str(val), self.trunc_length)
45
+ val = self.calc_func(record, self.truncate)
47
46
  return val
48
47
 
49
48
 
50
- def show_status_table(cluster_records: List[_ClusterRecord],
49
+ def show_status_table(cluster_records: List[responses.StatusResponse],
51
50
  show_all: bool,
52
51
  show_user: bool,
53
- query_clusters: Optional[List[str]] = None) -> int:
52
+ query_clusters: Optional[List[str]] = None,
53
+ show_workspaces: bool = False) -> int:
54
54
  """Compute cluster table values and display.
55
55
 
56
56
  Returns:
@@ -58,7 +58,6 @@ def show_status_table(cluster_records: List[_ClusterRecord],
58
58
  STOPPED.
59
59
  """
60
60
  # TODO(zhwu): Update the information for autostop clusters.
61
-
62
61
  status_columns = [
63
62
  StatusColumn('NAME', _get_name),
64
63
  ]
@@ -68,19 +67,24 @@ def show_status_table(cluster_records: List[_ClusterRecord],
68
67
  StatusColumn('USER_ID', _get_user_hash, show_by_default=False))
69
68
 
70
69
  status_columns += [
71
- StatusColumn('LAUNCHED', _get_launched),
72
- StatusColumn('RESOURCES',
73
- _get_resources,
74
- trunc_length=70 if not show_all else 0),
75
- StatusColumn('REGION', _get_region, show_by_default=False),
76
- StatusColumn('ZONE', _get_zone, show_by_default=False),
70
+ StatusColumn('WORKSPACE',
71
+ _get_workspace,
72
+ show_by_default=show_workspaces),
73
+ StatusColumn('INFRA', _get_infra, truncate=not show_all),
74
+ StatusColumn('RESOURCES', _get_resources, truncate=not show_all),
77
75
  StatusColumn('STATUS', _get_status_colored),
78
76
  StatusColumn('AUTOSTOP', _get_autostop),
79
- StatusColumn('HEAD_IP', _get_head_ip, show_by_default=False),
80
- StatusColumn('COMMAND',
81
- _get_command,
82
- trunc_length=COMMAND_TRUNC_LENGTH if not show_all else 0),
77
+ StatusColumn('LAUNCHED', _get_launched),
83
78
  ]
79
+ if show_all:
80
+ status_columns += [
81
+ StatusColumn('HEAD_IP', _get_head_ip, show_by_default=False),
82
+ StatusColumn('COMMAND',
83
+ _get_command,
84
+ truncate=not show_all,
85
+ show_by_default=False),
86
+ StatusColumn('LAST_EVENT', _get_last_event, show_by_default=False),
87
+ ]
84
88
 
85
89
  columns = []
86
90
  for status_column in status_columns:
@@ -102,17 +106,16 @@ def show_status_table(cluster_records: List[_ClusterRecord],
102
106
 
103
107
  if query_clusters:
104
108
  cluster_names = {record['name'] for record in cluster_records}
105
- not_found_clusters = [
106
- repr(cluster)
107
- for cluster in query_clusters
108
- if cluster not in cluster_names
109
- ]
110
- cluster_str = 'Cluster'
111
- if len(not_found_clusters) > 1:
112
- cluster_str += 's'
113
- cluster_str += ' '
114
- cluster_str += ', '.join(not_found_clusters)
115
- click.echo(f'{cluster_str} not found.')
109
+ not_found_clusters = ux_utils.get_non_matched_query(
110
+ query_clusters, cluster_names)
111
+ not_found_clusters = [repr(cluster) for cluster in not_found_clusters]
112
+ if not_found_clusters:
113
+ cluster_str = 'Cluster'
114
+ if len(not_found_clusters) > 1:
115
+ cluster_str += 's'
116
+ cluster_str += ' '
117
+ cluster_str += ', '.join(not_found_clusters)
118
+ click.echo(f'{cluster_str} not found.')
116
119
  elif not cluster_records:
117
120
  click.echo('No existing clusters.')
118
121
  return num_pending_autostop
@@ -134,7 +137,8 @@ def get_total_cost_of_displayed_records(
134
137
 
135
138
  def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
136
139
  show_all: bool,
137
- controller_name: Optional[str] = None):
140
+ controller_name: Optional[str] = None,
141
+ days: Optional[int] = None):
138
142
  """Compute cluster table values and display for cost report.
139
143
 
140
144
  For each cluster, this shows: cluster name, resources, launched time,
@@ -160,10 +164,10 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
160
164
  status_columns = [
161
165
  StatusColumn('NAME', _get_name),
162
166
  StatusColumn('LAUNCHED', _get_launched),
163
- StatusColumn('DURATION', _get_duration, trunc_length=20),
167
+ StatusColumn('DURATION', _get_duration, truncate=False),
164
168
  StatusColumn('RESOURCES',
165
169
  _get_resources_for_cost_report,
166
- trunc_length=70 if not show_all else 0),
170
+ truncate=False),
167
171
  StatusColumn('STATUS',
168
172
  _get_status_for_cost_report,
169
173
  show_by_default=True),
@@ -197,22 +201,21 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
197
201
  cluster_table.add_row(row)
198
202
 
199
203
  if cluster_records:
204
+ controller_record = cluster_records[0]
200
205
  if controller_name is not None:
201
- controller = controller_utils.Controllers.from_name(controller_name)
202
- if controller is None:
203
- raise ValueError(f'Controller {controller_name} not found.')
204
- autostop_minutes, _ = (
205
- controller_utils.get_controller_autostop_config(
206
- controller=controller))
207
- if autostop_minutes is not None:
206
+ autostop = controller_record.get('autostop', None)
207
+ autostop_str = ''
208
+ if autostop is not None:
208
209
  autostop_str = (f'{colorama.Style.DIM} (will be autostopped if '
209
- f'idle for {autostop_minutes}min)'
210
+ f'idle for {autostop}min)'
210
211
  f'{colorama.Style.RESET_ALL}')
211
212
  click.echo(f'\n{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
212
213
  f'{controller_name}{colorama.Style.RESET_ALL}'
213
214
  f'{autostop_str}')
214
215
  else:
215
- click.echo(f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}Clusters'
216
+ days_str = '' if days is None else f' (last {days} days)'
217
+ click.echo(f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
218
+ f'Clusters{days_str}'
216
219
  f'{colorama.Style.RESET_ALL}')
217
220
  click.echo(cluster_table)
218
221
 
@@ -220,47 +223,80 @@ def show_cost_report_table(cluster_records: List[_ClusterCostReportRecord],
220
223
  # Some of these lambdas are invoked on both _ClusterRecord and
221
224
  # _ClusterCostReportRecord, which is okay as we guarantee the queried fields
222
225
  # exist in those cases.
223
- _get_name = (lambda cluster_record: cluster_record['name'])
224
- _get_user_hash = (lambda cluster_record: cluster_record['user_hash'])
225
- _get_user_name = (lambda cluster_record: cluster_record.get('user_name', '-'))
226
- _get_launched = (lambda cluster_record: log_utils.readable_time_duration(
226
+ _get_name = (lambda cluster_record, _: cluster_record['name'])
227
+ _get_user_hash = (lambda cluster_record, _: cluster_record['user_hash'])
228
+ _get_user_name = (
229
+ lambda cluster_record, _: cluster_record.get('user_name', '-'))
230
+ _get_launched = (lambda cluster_record, _: log_utils.readable_time_duration(
227
231
  cluster_record['launched_at']))
228
- _get_region = (
229
- lambda clusters_status: clusters_status['handle'].launched_resources.region)
230
- _get_command = (lambda cluster_record: cluster_record['last_use'])
231
- _get_duration = (lambda cluster_record: log_utils.readable_time_duration(
232
+ _get_duration = (lambda cluster_record, _: log_utils.readable_time_duration(
232
233
  0, cluster_record['duration'], absolute=True))
233
234
 
234
235
 
235
- def _get_status(cluster_record: _ClusterRecord) -> status_lib.ClusterStatus:
236
- return cluster_record['status']
236
+ def _get_command(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
237
+ command = cluster_record.get('last_use', '-')
238
+ if truncate:
239
+ return common_utils.truncate_long_string(command, COMMAND_TRUNC_LENGTH)
240
+ return command
237
241
 
238
242
 
239
- def _get_status_colored(cluster_record: _ClusterRecord) -> str:
240
- return _get_status(cluster_record).colored_str()
243
+ def _get_status(cluster_record: _ClusterRecord,
244
+ truncate: bool = True) -> status_lib.ClusterStatus:
245
+ del truncate
246
+ return cluster_record['status']
241
247
 
242
248
 
243
- def _get_resources(cluster_record: _ClusterRecord) -> str:
244
- if 'resources_str' in cluster_record:
245
- return cluster_record['resources_str']
246
- handle = cluster_record['handle']
247
- if isinstance(handle, backends.LocalDockerResourceHandle):
248
- resources_str = 'docker'
249
- elif isinstance(handle, backends.CloudVmRayResourceHandle):
250
- resources_str = resources_utils.get_readable_resources_repr(handle)
251
- else:
252
- raise ValueError(f'Unknown handle type {type(handle)} encountered.')
253
- return resources_str
249
+ def _get_workspace(cluster_record: _ClusterRecord,
250
+ truncate: bool = True) -> str:
251
+ del truncate
252
+ return cluster_record['workspace']
254
253
 
255
254
 
256
- def _get_zone(cluster_record: _ClusterRecord) -> str:
257
- zone_str = cluster_record['handle'].launched_resources.zone
258
- if zone_str is None:
259
- zone_str = '-'
260
- return zone_str
255
+ def _get_status_colored(cluster_record: _ClusterRecord,
256
+ truncate: bool = True) -> str:
257
+ del truncate
258
+ return _get_status(cluster_record).colored_str()
261
259
 
262
260
 
263
- def _get_autostop(cluster_record: _ClusterRecord) -> str:
261
+ def _get_resources(cluster_record: _ClusterRecord,
262
+ truncate: bool = True) -> str:
263
+ """Get the resources information for a cluster.
264
+
265
+ Returns:
266
+ A string in one of the following formats:
267
+ - For cloud VMs: "Nx instance_type" (e.g., "1x m6i.2xlarge")
268
+ - For K8S/SSH: "Nx (...)"
269
+ - "-" if no resource information is available
270
+ """
271
+ handle = cluster_record['handle']
272
+ if isinstance(handle, backends.CloudVmRayResourceHandle):
273
+ launched_resources = handle.launched_resources
274
+ if launched_resources is None:
275
+ return '-'
276
+
277
+ # For cloud VMs, show instance type directly
278
+ # For K8S/SSH, show (...) as the resource type
279
+ resources_str = cluster_record.get('resources_str', None)
280
+ if not truncate:
281
+ resources_str_full = cluster_record.get('resources_str_full', None)
282
+ if resources_str_full is not None:
283
+ resources_str = resources_str_full
284
+ if resources_str is None:
285
+ resources_str_simple, resources_str_full = (
286
+ resources_utils.get_readable_resources_repr(
287
+ handle, simplified_only=truncate))
288
+ if truncate:
289
+ resources_str = resources_str_simple
290
+ else:
291
+ assert resources_str_full is not None
292
+ resources_str = resources_str_full
293
+
294
+ return resources_str
295
+ return '-'
296
+
297
+
298
+ def _get_autostop(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
299
+ del truncate
264
300
  autostop_str = ''
265
301
  separation = ''
266
302
  if cluster_record['autostop'] >= 0:
@@ -275,7 +311,8 @@ def _get_autostop(cluster_record: _ClusterRecord) -> str:
275
311
  return autostop_str
276
312
 
277
313
 
278
- def _get_head_ip(cluster_record: _ClusterRecord) -> str:
314
+ def _get_head_ip(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
315
+ del truncate # Unused
279
316
  handle = cluster_record['handle']
280
317
  if not isinstance(handle, backends.CloudVmRayResourceHandle):
281
318
  return '-'
@@ -284,17 +321,46 @@ def _get_head_ip(cluster_record: _ClusterRecord) -> str:
284
321
  return handle.head_ip
285
322
 
286
323
 
324
+ def _get_last_event(cluster_record: _ClusterRecord,
325
+ truncate: bool = True) -> str:
326
+ del truncate
327
+ if cluster_record.get('last_event', None) is None:
328
+ return 'No recorded events.'
329
+ return cluster_record['last_event']
330
+
331
+
287
332
  def _is_pending_autostop(cluster_record: _ClusterRecord) -> bool:
288
333
  # autostop < 0 means nothing scheduled.
289
334
  return cluster_record['autostop'] >= 0 and _get_status(
290
335
  cluster_record) != status_lib.ClusterStatus.STOPPED
291
336
 
292
337
 
338
+ def _get_infra(cluster_record: _ClusterRecord, truncate: bool = True) -> str:
339
+ """Get the infrastructure information for a cluster.
340
+
341
+ Returns:
342
+ A string in one of the following formats:
343
+ - AWS/region (e.g., "AWS/us-east-1")
344
+ - K8S/context (e.g., "K8S/my-ctx")
345
+ - SSH/hostname (e.g., "SSH/my-tobi-box")
346
+ - "-" if no infrastructure information is available
347
+ """
348
+ handle = cluster_record['handle']
349
+ if isinstance(handle, backends.CloudVmRayResourceHandle):
350
+ if handle.launched_resources is None:
351
+ # If launched_resources is None, try to get infra from the record
352
+ return cluster_record.get('infra', '-')
353
+ return handle.launched_resources.infra.formatted_str(truncate)
354
+ return '-'
355
+
356
+
293
357
  # ---- 'sky cost-report' helper functions below ----
294
358
 
295
359
 
296
360
  def _get_status_value_for_cost_report(
297
- cluster_cost_report_record: _ClusterCostReportRecord) -> int:
361
+ cluster_cost_report_record: _ClusterCostReportRecord,
362
+ truncate: bool = True) -> int:
363
+ del truncate
298
364
  status = cluster_cost_report_record['status']
299
365
  if status is None:
300
366
  return -1
@@ -302,7 +368,9 @@ def _get_status_value_for_cost_report(
302
368
 
303
369
 
304
370
  def _get_status_for_cost_report(
305
- cluster_cost_report_record: _ClusterCostReportRecord) -> str:
371
+ cluster_cost_report_record: _ClusterCostReportRecord,
372
+ truncate: bool = True) -> str:
373
+ del truncate
306
374
  status = cluster_cost_report_record['status']
307
375
  if status is None:
308
376
  return f'{colorama.Style.DIM}TERMINATED{colorama.Style.RESET_ALL}'
@@ -310,7 +378,9 @@ def _get_status_for_cost_report(
310
378
 
311
379
 
312
380
  def _get_resources_for_cost_report(
313
- cluster_cost_report_record: _ClusterCostReportRecord) -> str:
381
+ cluster_cost_report_record: _ClusterCostReportRecord,
382
+ truncate: bool = True) -> str:
383
+ del truncate
314
384
  launched_nodes = cluster_cost_report_record['num_nodes']
315
385
  launched_resources = cluster_cost_report_record['resources']
316
386
 
@@ -322,7 +392,9 @@ def _get_resources_for_cost_report(
322
392
 
323
393
 
324
394
  def _get_price_for_cost_report(
325
- cluster_cost_report_record: _ClusterCostReportRecord) -> str:
395
+ cluster_cost_report_record: _ClusterCostReportRecord,
396
+ truncate: bool = True) -> str:
397
+ del truncate
326
398
  launched_nodes = cluster_cost_report_record['num_nodes']
327
399
  launched_resources = cluster_cost_report_record['resources']
328
400
 
@@ -332,7 +404,9 @@ def _get_price_for_cost_report(
332
404
 
333
405
 
334
406
  def _get_estimated_cost_for_cost_report(
335
- cluster_cost_report_record: _ClusterCostReportRecord) -> str:
407
+ cluster_cost_report_record: _ClusterCostReportRecord,
408
+ truncate: bool = True) -> str:
409
+ del truncate
336
410
  cost = cluster_cost_report_record['total_cost']
337
411
 
338
412
  if not cost:
@@ -342,18 +416,17 @@ def _get_estimated_cost_for_cost_report(
342
416
 
343
417
 
344
418
  def show_kubernetes_cluster_status_table(
345
- clusters: List['kubernetes_utils.KubernetesSkyPilotClusterInfo'],
419
+ clusters: List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
346
420
  show_all: bool) -> None:
347
421
  """Compute cluster table values and display for Kubernetes clusters."""
348
422
  status_columns = [
349
- StatusColumn('USER', lambda c: c.user),
350
- StatusColumn('NAME', lambda c: c.cluster_name),
351
- StatusColumn('LAUNCHED',
352
- lambda c: log_utils.readable_time_duration(c.launched_at)),
353
- StatusColumn('RESOURCES',
354
- lambda c: c.resources_str,
355
- trunc_length=70 if not show_all else 0),
356
- StatusColumn('STATUS', lambda c: c.status.colored_str()),
423
+ StatusColumn('USER', lambda c, _: c.user),
424
+ StatusColumn('NAME', lambda c, _: c.cluster_name),
425
+ StatusColumn('RESOURCES', lambda c, _: c.resources_str, truncate=False),
426
+ StatusColumn('STATUS', lambda c, _: c.status.colored_str()),
427
+ StatusColumn(
428
+ 'LAUNCHED',
429
+ lambda c, _: log_utils.readable_time_duration(c.launched_at)),
357
430
  # TODO(romilb): We should consider adding POD_NAME field here when --all
358
431
  # is passed to help users fetch pod name programmatically.
359
432
  ]