skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
@@ -168,7 +168,7 @@ def build_dockerimage(task: task_mod.Task,
168
168
  build_dir=temp_dir)
169
169
 
170
170
  dst = os.path.join(temp_dir, SKY_DOCKER_WORKDIR)
171
- if task.workdir is not None:
171
+ if task.workdir is not None and isinstance(task.workdir, str):
172
172
  # Copy workdir contents to tempdir
173
173
  shutil.copytree(os.path.expanduser(task.workdir), dst)
174
174
  else:
@@ -178,7 +178,8 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
178
178
  return handle, False
179
179
 
180
180
  def _sync_workdir(self, handle: LocalDockerResourceHandle,
181
- workdir: Path) -> None:
181
+ workdir: Union[Path, Dict[str, Any]],
182
+ envs_and_secrets: Dict[str, str]) -> None:
182
183
  """Workdir is sync'd by adding to the docker image.
183
184
 
184
185
  This happens in the execute step.
@@ -188,6 +189,15 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
188
189
  ' a NoOp. If you are running sky exec, your workdir has not'
189
190
  ' been updated.')
190
191
 
192
+ def _download_file(self, handle: LocalDockerResourceHandle,
193
+ local_file_path: str, remote_file_path: str) -> None:
194
+ """Syncs file from remote to local."""
195
+ # Copy from docker container to local
196
+ container = self.containers[handle]
197
+ copy_cmd = (
198
+ f'docker cp {container.name}:{remote_file_path} {local_file_path}')
199
+ subprocess.run(copy_cmd, shell=True, check=True)
200
+
191
201
  def _sync_file_mounts(
192
202
  self,
193
203
  handle: LocalDockerResourceHandle,
@@ -273,13 +283,8 @@ class LocalDockerBackend(backends.Backend['LocalDockerResourceHandle']):
273
283
  def _execute(self,
274
284
  handle: LocalDockerResourceHandle,
275
285
  task: 'task_lib.Task',
276
- detach_run: bool,
277
286
  dryrun: bool = False) -> None:
278
287
  """ Launches the container."""
279
- if detach_run:
280
- raise NotImplementedError('detach_run=True is not supported in '
281
- 'LocalDockerBackend.')
282
-
283
288
  if task.num_nodes > 1:
284
289
  raise NotImplementedError(
285
290
  'Tasks with num_nodes > 1 is currently not supported in '
@@ -16,6 +16,7 @@ import pathlib
16
16
  import re
17
17
  import shutil
18
18
  import subprocess
19
+ import sys
19
20
  import tempfile
20
21
  from typing import Optional, Tuple
21
22
 
@@ -27,13 +28,14 @@ import sky
27
28
  from sky import sky_logging
28
29
  from sky.backends import backend_utils
29
30
  from sky.server import common
31
+ from sky.utils import directory_utils
30
32
 
31
33
  logger = sky_logging.init_logger(__name__)
32
34
 
33
35
  # Local wheel path is same as the remote path.
34
36
  WHEEL_DIR = pathlib.Path(os.path.expanduser(backend_utils.SKY_REMOTE_PATH))
35
37
  _WHEEL_LOCK_PATH = WHEEL_DIR.parent / '.wheels_lock'
36
- SKY_PACKAGE_PATH = pathlib.Path(sky.__file__).parent.parent / 'sky'
38
+ SKY_PACKAGE_PATH = pathlib.Path(directory_utils.get_sky_dir())
37
39
 
38
40
  # NOTE: keep the same as setup.py's setuptools.setup(name=..., ...).
39
41
  _PACKAGE_WHEEL_NAME = 'skypilot'
@@ -132,19 +134,45 @@ def _build_sky_wheel() -> pathlib.Path:
132
134
  # It is important to normalize the path, otherwise 'pip wheel' would
133
135
  # treat the directory as a file and generate an empty wheel.
134
136
  norm_path = str(tmp_dir) + os.sep
137
+ # TODO(#5046): Consider adding native UV support for building wheels.
138
+ # Use `python -m pip` instead of `pip3` for better compatibility across
139
+ # different environments (conda, venv, UV, system Python, etc.)
135
140
  try:
136
- # TODO(suquark): For python>=3.7, 'subprocess.run' supports capture
137
- # of the output.
138
141
  subprocess.run([
139
- 'pip3', 'wheel', '--no-deps', norm_path, '--wheel-dir',
142
+ sys.executable, '-m', 'pip', 'wheel', '--no-deps', norm_path,
143
+ '--wheel-dir',
140
144
  str(tmp_dir)
141
145
  ],
142
- stdout=subprocess.DEVNULL,
143
- stderr=subprocess.PIPE,
144
- check=True)
146
+ capture_output=True,
147
+ check=True,
148
+ text=True)
145
149
  except subprocess.CalledProcessError as e:
146
- raise RuntimeError('Failed to build pip wheel for SkyPilot. '
147
- f'Error message: {e.stderr.decode()}') from e
150
+ error_msg = e.stderr
151
+ if 'No module named pip' in error_msg:
152
+ # pip module not found - provide helpful suggestions based on
153
+ # the available package managers
154
+ if shutil.which('uv'):
155
+ msg = ('pip module not found. Since you have UV installed, '
156
+ 'you can install pip by running:\n'
157
+ ' uv pip install pip')
158
+ elif shutil.which('conda'):
159
+ msg = (
160
+ 'pip module not found. Since you have conda installed, '
161
+ 'you can install pip by running:\n'
162
+ ' conda install pip')
163
+ else:
164
+ msg = ('pip module not found. Please install pip for your '
165
+ f'Python environment ({sys.executable}).')
166
+ else:
167
+ # Other pip errors
168
+ msg = f'pip wheel command failed. Error: {error_msg}'
169
+ raise RuntimeError('Failed to build pip wheel for SkyPilot.\n' +
170
+ msg) from e
171
+ except FileNotFoundError as e:
172
+ # Python executable not found (extremely rare)
173
+ raise RuntimeError(
174
+ f'Failed to build pip wheel for SkyPilot. '
175
+ f'Python executable not found: {sys.executable}') from e
148
176
 
149
177
  try:
150
178
  wheel_path = next(tmp_dir.glob(_WHEEL_PATTERN))
@@ -4,24 +4,21 @@ import importlib
4
4
  import typing
5
5
  from typing import Dict, List, Optional, Set, Tuple, Union
6
6
 
7
- from sky.clouds.service_catalog.config import fallback_to_default_catalog
8
- from sky.clouds.service_catalog.constants import ALL_CLOUDS
9
- from sky.clouds.service_catalog.constants import CATALOG_DIR
10
- from sky.clouds.service_catalog.constants import CATALOG_SCHEMA_VERSION
11
- from sky.clouds.service_catalog.constants import HOSTED_CATALOG_DIR_URL
7
+ from sky.catalog.config import fallback_to_default_catalog
8
+ from sky.skylet import constants
12
9
  from sky.utils import resources_utils
13
10
  from sky.utils import subprocess_utils
14
11
 
15
12
  if typing.TYPE_CHECKING:
13
+ from sky.catalog import common
16
14
  from sky.clouds import cloud
17
- from sky.clouds.service_catalog import common
18
15
 
19
16
  CloudFilter = Optional[Union[List[str], str]]
20
17
 
21
18
 
22
19
  def _map_clouds_catalog(clouds: CloudFilter, method_name: str, *args, **kwargs):
23
20
  if clouds is None:
24
- clouds = list(ALL_CLOUDS)
21
+ clouds = list(constants.ALL_CLOUDS)
25
22
 
26
23
  # TODO(hemil): Remove this once the common service catalog
27
24
  # functions are refactored from clouds/kubernetes.py to
@@ -35,10 +32,10 @@ def _map_clouds_catalog(clouds: CloudFilter, method_name: str, *args, **kwargs):
35
32
  def _execute_catalog_method(cloud: str):
36
33
  try:
37
34
  cloud_module = importlib.import_module(
38
- f'sky.clouds.service_catalog.{cloud.lower()}_catalog')
35
+ f'sky.catalog.{cloud.lower()}_catalog')
39
36
  except ModuleNotFoundError:
40
37
  raise ValueError(
41
- 'Cannot find module "sky.clouds.service_catalog'
38
+ 'Cannot find module "sky.catalog'
42
39
  f'.{cloud}_catalog" for cloud "{cloud}".') from None
43
40
  try:
44
41
  method = getattr(cloud_module, method_name)
@@ -94,7 +91,7 @@ def list_accelerator_counts(
94
91
  region_filter: Optional[str] = None,
95
92
  quantity_filter: Optional[int] = None,
96
93
  clouds: CloudFilter = None,
97
- ) -> Dict[str, List[int]]:
94
+ ) -> Dict[str, List[float]]:
98
95
  """Lists all accelerators offered by Sky and available counts.
99
96
 
100
97
  Returns: A dictionary of canonical accelerator names mapped to a list
@@ -110,12 +107,12 @@ def list_accelerator_counts(
110
107
  require_price=False)
111
108
  if not isinstance(results, list):
112
109
  results = [results]
113
- accelerator_counts: Dict[str, Set[int]] = collections.defaultdict(set)
110
+ accelerator_counts: Dict[str, Set[float]] = collections.defaultdict(set)
114
111
  for result in results:
115
112
  for gpu, items in result.items():
116
113
  for item in items:
117
114
  accelerator_counts[gpu].add(item.accelerator_count)
118
- ret: Dict[str, List[int]] = {}
115
+ ret: Dict[str, List[float]] = {}
119
116
  for gpu, counts in accelerator_counts.items():
120
117
  ret[gpu] = sorted(counts)
121
118
  return ret
@@ -224,6 +221,8 @@ def get_default_instance_type(cpus: Optional[str] = None,
224
221
  memory: Optional[str] = None,
225
222
  disk_tier: Optional[
226
223
  resources_utils.DiskTier] = None,
224
+ region: Optional[str] = None,
225
+ zone: Optional[str] = None,
227
226
  clouds: CloudFilter = None) -> Optional[str]:
228
227
  """Returns the cloud's default instance type for given #vCPUs and memory.
229
228
 
@@ -237,7 +236,7 @@ def get_default_instance_type(cpus: Optional[str] = None,
237
236
  the given CPU and memory requirement.
238
237
  """
239
238
  return _map_clouds_catalog(clouds, 'get_default_instance_type', cpus,
240
- memory, disk_tier)
239
+ memory, disk_tier, region, zone)
241
240
 
242
241
 
243
242
  def get_accelerators_from_instance_type(
@@ -248,9 +247,16 @@ def get_accelerators_from_instance_type(
248
247
  instance_type)
249
248
 
250
249
 
250
+ def get_arch_from_instance_type(instance_type: str,
251
+ clouds: CloudFilter = None) -> Optional[str]:
252
+ """Returns the arch from a instance type."""
253
+ return _map_clouds_catalog(clouds, 'get_arch_from_instance_type',
254
+ instance_type)
255
+
256
+
251
257
  def get_instance_type_for_accelerator(
252
258
  acc_name: str,
253
- acc_count: int,
259
+ acc_count: Union[int, float],
254
260
  cpus: Optional[str] = None,
255
261
  memory: Optional[str] = None,
256
262
  use_spot: bool = False,
@@ -327,6 +333,7 @@ def get_common_gpus() -> List[str]:
327
333
  'A10G',
328
334
  'A100',
329
335
  'A100-80GB',
336
+ 'B200',
330
337
  'H100',
331
338
  'H200',
332
339
  'L4',
@@ -380,9 +387,4 @@ __all__ = [
380
387
  'is_image_tag_valid',
381
388
  # Configuration
382
389
  'fallback_to_default_catalog',
383
- # Constants
384
- 'ALL_CLOUDS',
385
- 'HOSTED_CATALOG_DIR_URL',
386
- 'CATALOG_SCHEMA_VERSION',
387
- 'CATALOG_DIR',
388
390
  ]
@@ -13,10 +13,10 @@ from typing import Dict, List, Optional, Tuple, Union
13
13
  from sky import exceptions
14
14
  from sky import sky_logging
15
15
  from sky.adaptors import common as adaptors_common
16
+ from sky.catalog import common
17
+ from sky.catalog import config
18
+ from sky.catalog.data_fetchers import fetch_aws
16
19
  from sky.clouds import aws
17
- from sky.clouds.service_catalog import common
18
- from sky.clouds.service_catalog import config
19
- from sky.clouds.service_catalog.data_fetchers import fetch_aws
20
20
  from sky.utils import common_utils
21
21
  from sky.utils import resources_utils
22
22
  from sky.utils import rich_utils
@@ -38,14 +38,26 @@ _DEFAULT_INSTANCE_FAMILY = [
38
38
  # CPU: Intel Ice Lake 8375C.
39
39
  # Memory: 4 GiB RAM per 1 vCPU;
40
40
  'm6i',
41
+ # This is the latest general-purpose instance family as of Jul 2025.
42
+ # CPU: Intel Sapphire Rapids.
43
+ # Memory: 4 GiB RAM per 1 vCPU;
44
+ 'm7i',
41
45
  # This is the latest memory-optimized instance family as of Mar 2023.
42
46
  # CPU: Intel Ice Lake 8375C
43
47
  # Memory: 8 GiB RAM per 1 vCPU;
44
48
  'r6i',
49
+ # This is the latest memory-optimized instance family as of Jul 2025.
50
+ # CPU: Intel Sapphire Rapids.
51
+ # Memory: 8 GiB RAM per 1 vCPU;
52
+ 'r7i',
45
53
  # This is the latest compute-optimized instance family as of Mar 2023.
46
54
  # CPU: Intel Ice Lake 8375C
47
55
  # Memory: 2 GiB RAM per 1 vCPU;
48
56
  'c6i',
57
+ # This is the latest compute-optimized instance family as of Jul 2025.
58
+ # CPU: Intel Sapphire Rapids.
59
+ # Memory: 2 GiB RAM per 1 vCPU;
60
+ 'c7i',
49
61
  ]
50
62
  _DEFAULT_NUM_VCPUS = 8
51
63
  _DEFAULT_MEMORY_CPU_RATIO = 4
@@ -230,10 +242,12 @@ def get_vcpus_mem_from_instance_type(
230
242
  instance_type)
231
243
 
232
244
 
233
- def get_default_instance_type(
234
- cpus: Optional[str] = None,
235
- memory: Optional[str] = None,
236
- disk_tier: Optional[resources_utils.DiskTier] = None) -> Optional[str]:
245
+ def get_default_instance_type(cpus: Optional[str] = None,
246
+ memory: Optional[str] = None,
247
+ disk_tier: Optional[
248
+ resources_utils.DiskTier] = None,
249
+ region: Optional[str] = None,
250
+ zone: Optional[str] = None) -> Optional[str]:
237
251
  del disk_tier # unused
238
252
  if cpus is None and memory is None:
239
253
  cpus = f'{_DEFAULT_NUM_VCPUS}+'
@@ -247,7 +261,8 @@ def get_default_instance_type(
247
261
  df = _get_df()
248
262
  df = df[df['InstanceType'].str.startswith(instance_type_prefix)]
249
263
  return common.get_instance_type_for_cpus_mem_impl(df, cpus,
250
- memory_gb_or_ratio)
264
+ memory_gb_or_ratio,
265
+ region, zone)
251
266
 
252
267
 
253
268
  def get_accelerators_from_instance_type(
@@ -256,6 +271,10 @@ def get_accelerators_from_instance_type(
256
271
  _get_df(), instance_type)
257
272
 
258
273
 
274
+ def get_arch_from_instance_type(instance_type: str) -> Optional[str]:
275
+ return common.get_arch_from_instance_type_impl(_get_df(), instance_type)
276
+
277
+
259
278
  def get_instance_type_for_accelerator(
260
279
  acc_name: str,
261
280
  acc_count: int,
@@ -8,8 +8,8 @@ from typing import Dict, List, Optional, Tuple, Union
8
8
 
9
9
  from sky import clouds as cloud_lib
10
10
  from sky import sky_logging
11
+ from sky.catalog import common
11
12
  from sky.clouds import Azure
12
- from sky.clouds.service_catalog import common
13
13
  from sky.utils import resources_utils
14
14
  from sky.utils import ux_utils
15
15
 
@@ -17,7 +17,7 @@ logger = sky_logging.init_logger(__name__)
17
17
 
18
18
  # This list should match the list of regions in
19
19
  # skypilot image generation Packer script's replication_regions
20
- # sky/clouds/service_catalog/images/skypilot-azure-cpu-ubuntu.pkr.hcl
20
+ # sky/clouds/catalog/images/skypilot-azure-cpu-ubuntu.pkr.hcl
21
21
  COMMUNITY_IMAGE_AVAILABLE_REGIONS = {
22
22
  'centralus',
23
23
  'eastus',
@@ -114,10 +114,12 @@ def _get_instance_family(instance_type: str) -> str:
114
114
  return instance_family
115
115
 
116
116
 
117
- def get_default_instance_type(
118
- cpus: Optional[str] = None,
119
- memory: Optional[str] = None,
120
- disk_tier: Optional[resources_utils.DiskTier] = None) -> Optional[str]:
117
+ def get_default_instance_type(cpus: Optional[str] = None,
118
+ memory: Optional[str] = None,
119
+ disk_tier: Optional[
120
+ resources_utils.DiskTier] = None,
121
+ region: Optional[str] = None,
122
+ zone: Optional[str] = None) -> Optional[str]:
121
123
  if cpus is None and memory is None:
122
124
  cpus = f'{_DEFAULT_NUM_VCPUS}+'
123
125
  if memory is None:
@@ -133,7 +135,8 @@ def get_default_instance_type(
133
135
 
134
136
  df = df.loc[df['InstanceType'].apply(_filter_disk_type)]
135
137
  return common.get_instance_type_for_cpus_mem_impl(df, cpus,
136
- memory_gb_or_ratio)
138
+ memory_gb_or_ratio,
139
+ region, zone)
137
140
 
138
141
 
139
142
  def get_accelerators_from_instance_type(
@@ -12,7 +12,8 @@ import filelock
12
12
  from sky import sky_logging
13
13
  from sky.adaptors import common as adaptors_common
14
14
  from sky.clouds import cloud as cloud_lib
15
- from sky.clouds.service_catalog import constants
15
+ from sky.skylet import constants
16
+ from sky.utils import annotations
16
17
  from sky.utils import common_utils
17
18
  from sky.utils import registry
18
19
  from sky.utils import rich_utils
@@ -50,7 +51,7 @@ class InstanceTypeInfo(NamedTuple):
50
51
  cloud: str
51
52
  instance_type: Optional[str]
52
53
  accelerator_name: str
53
- accelerator_count: int
54
+ accelerator_count: float
54
55
  cpu_count: Optional[float]
55
56
  device_memory: Optional[float]
56
57
  memory: Optional[float]
@@ -125,17 +126,21 @@ class LazyDataFrame:
125
126
 
126
127
  We don't need to load the catalog for every SkyPilot call, and this class
127
128
  allows us to load the catalog only when needed.
129
+
130
+ Use update_if_stale_func to pass in a function that decides whether to
131
+ update the catalog on disk, updates it if needed, and returns
132
+ a bool indicating whether the update was done.
128
133
  """
129
134
 
130
- def __init__(self, filename: str, update_func: Callable[[], None]):
135
+ def __init__(self, filename: str, update_if_stale_func: Callable[[], bool]):
131
136
  self._filename = filename
132
137
  self._df: Optional['pd.DataFrame'] = None
133
- self._update_func = update_func
138
+ self._update_if_stale_func = update_if_stale_func
134
139
 
140
+ @annotations.lru_cache(scope='request')
135
141
  def _load_df(self) -> 'pd.DataFrame':
136
- if self._df is None:
142
+ if self._update_if_stale_func() or self._df is None:
137
143
  try:
138
- self._update_func()
139
144
  self._df = pd.read_csv(self._filename)
140
145
  except Exception as e: # pylint: disable=broad-except
141
146
  # As users can manually modify the catalog, read_csv can fail.
@@ -193,48 +198,60 @@ def read_catalog(filename: str,
193
198
  return last_update + pull_frequency_hours * 3600 < time.time()
194
199
 
195
200
  def _update_catalog():
201
+ # Fast path: Exit early to avoid lock contention.
202
+ if not _need_update():
203
+ return False
204
+
196
205
  # Atomic check, to avoid conflicts with other processes.
197
206
  with filelock.FileLock(meta_path + '.lock'):
198
- if _need_update():
199
- url = f'{constants.HOSTED_CATALOG_DIR_URL}/{constants.CATALOG_SCHEMA_VERSION}/{filename}' # pylint: disable=line-too-long
200
- update_frequency_str = ''
201
- if pull_frequency_hours is not None:
202
- update_frequency_str = (
203
- f' (every {pull_frequency_hours} hours)')
204
- with rich_utils.safe_status(
205
- ux_utils.spinner_message(
206
- f'Updating {cloud} catalog: {filename}') +
207
- f'{update_frequency_str}'):
208
- try:
209
- r = requests.get(url=url,
210
- headers={'User-Agent': 'SkyPilot/0.7'})
211
- r.raise_for_status()
212
- except requests.exceptions.RequestException as e:
213
- error_str = (f'Failed to fetch {cloud} catalog '
214
- f'{filename}. ')
215
- if os.path.exists(catalog_path):
216
- logger.warning(
217
- f'{error_str}Using cached catalog files.')
218
- # Update catalog file modification time.
219
- os.utime(catalog_path, None) # Sets to current time
220
- else:
221
- logger.error(
222
- f'{error_str}Please check your internet '
223
- 'connection.')
224
- with ux_utils.print_exception_no_traceback():
225
- raise e
207
+ # Double check after acquiring the lock.
208
+ if not _need_update():
209
+ return False
210
+
211
+ url = f'{constants.HOSTED_CATALOG_DIR_URL}/{constants.CATALOG_SCHEMA_VERSION}/{filename}' # pylint: disable=line-too-long
212
+ url_fallback = f'{constants.HOSTED_CATALOG_DIR_URL_S3_MIRROR}/{constants.CATALOG_SCHEMA_VERSION}/{filename}' # pylint: disable=line-too-long
213
+ headers = {'User-Agent': 'SkyPilot/0.7'}
214
+ update_frequency_str = ''
215
+ if pull_frequency_hours is not None:
216
+ update_frequency_str = (
217
+ f' (every {pull_frequency_hours} hours)')
218
+ with rich_utils.safe_status(
219
+ ux_utils.spinner_message(
220
+ f'Updating {cloud} catalog: {filename}') +
221
+ f'{update_frequency_str}'):
222
+ try:
223
+ r = requests.get(url=url, headers=headers)
224
+ if r.status_code == 429:
225
+ # fallback to s3 mirror, github introduced rate
226
+ # limit after 2025-05, see
227
+ # https://github.com/skypilot-org/skypilot/issues/5438
228
+ # for more details
229
+ r = requests.get(url=url_fallback, headers=headers)
230
+ r.raise_for_status()
231
+ except requests.exceptions.RequestException as e:
232
+ error_str = (f'Failed to fetch {cloud} catalog '
233
+ f'{filename}. ')
234
+ if os.path.exists(catalog_path):
235
+ logger.warning(
236
+ f'{error_str}Using cached catalog files.')
237
+ # Update catalog file modification time.
238
+ os.utime(catalog_path, None) # Sets to current time
226
239
  else:
227
- # Download successful, save the catalog to a local file.
228
- os.makedirs(os.path.dirname(catalog_path),
229
- exist_ok=True)
230
- with open(catalog_path, 'w', encoding='utf-8') as f:
231
- f.write(r.text)
232
- with open(meta_path + '.md5', 'w',
233
- encoding='utf-8') as f:
234
- f.write(hashlib.md5(r.text.encode()).hexdigest())
235
- logger.debug(f'Updated {cloud} catalog {filename}.')
240
+ logger.error(f'{error_str}Please check your internet '
241
+ 'connection.')
242
+ with ux_utils.print_exception_no_traceback():
243
+ raise e
244
+ else:
245
+ # Download successful, save the catalog to a local file.
246
+ os.makedirs(os.path.dirname(catalog_path), exist_ok=True)
247
+ with open(catalog_path, 'w', encoding='utf-8') as f:
248
+ f.write(r.text)
249
+ with open(meta_path + '.md5', 'w', encoding='utf-8') as f:
250
+ f.write(hashlib.md5(r.text.encode()).hexdigest())
251
+ logger.debug(f'Updated {cloud} catalog {filename}.')
252
+ return True
236
253
 
237
- return LazyDataFrame(catalog_path, update_func=_update_catalog)
254
+ return LazyDataFrame(catalog_path, update_if_stale_func=_update_catalog)
238
255
 
239
256
 
240
257
  def _get_instance_type(
@@ -337,7 +354,7 @@ def get_hourly_cost_impl(
337
354
  ) -> float:
338
355
  """Returns the hourly price of a VM instance in the given region and zone.
339
356
 
340
- Refer to get_hourly_cost in service_catalog/__init__.py for the docstring.
357
+ Refer to get_hourly_cost in catalog/__init__.py for the docstring.
341
358
  """
342
359
  df = _get_instance_type(df, instance_type, region, zone)
343
360
  if df.empty:
@@ -459,8 +476,11 @@ def _filter_region_zone(df: 'pd.DataFrame', region: Optional[str],
459
476
 
460
477
 
461
478
  def get_instance_type_for_cpus_mem_impl(
462
- df: 'pd.DataFrame', cpus: Optional[str],
463
- memory_gb_or_ratio: Optional[str]) -> Optional[str]:
479
+ df: 'pd.DataFrame',
480
+ cpus: Optional[str],
481
+ memory_gb_or_ratio: Optional[str],
482
+ region: Optional[str] = None,
483
+ zone: Optional[str] = None) -> Optional[str]:
464
484
  """Returns the cheapest instance type that satisfies the requirements.
465
485
 
466
486
  Args:
@@ -473,7 +493,10 @@ def get_instance_type_for_cpus_mem_impl(
473
493
  returned instance type should have at least the given memory size.
474
494
  If the string ends with "x", then the returned instance type should
475
495
  have at least the given number of vCPUs times the given ratio.
496
+ region: The region to filter by.
497
+ zone: The zone to filter by.
476
498
  """
499
+ df = _filter_region_zone(df, region, zone)
477
500
  df = _filter_with_cpus(df, cpus)
478
501
  df = _filter_with_mem(df, memory_gb_or_ratio)
479
502
  if df.empty:
@@ -504,6 +527,24 @@ def get_accelerators_from_instance_type_impl(
504
527
  return {acc_name: _convert(acc_count)}
505
528
 
506
529
 
530
+ def get_arch_from_instance_type_impl(
531
+ df: 'pd.DataFrame',
532
+ instance_type: str,
533
+ ) -> Optional[str]:
534
+ df = _get_instance_type(df, instance_type, None)
535
+ if df.empty:
536
+ with ux_utils.print_exception_no_traceback():
537
+ raise ValueError(f'No instance type {instance_type} found.')
538
+ row = df.iloc[0]
539
+ if 'Arch' not in row:
540
+ return None
541
+ arch = row['Arch']
542
+ if pd.isnull(arch):
543
+ return None
544
+
545
+ return arch
546
+
547
+
507
548
  def get_instance_type_for_accelerator_impl(
508
549
  df: 'pd.DataFrame',
509
550
  acc_name: str,
@@ -608,7 +649,7 @@ def list_accelerators_impl(
608
649
  df = df[df['Region'].str.contains(region_filter,
609
650
  case=case_sensitive,
610
651
  regex=True)]
611
- df['AcceleratorCount'] = df['AcceleratorCount'].astype(int)
652
+ df['AcceleratorCount'] = df['AcceleratorCount'].astype(float)
612
653
  if quantity_filter is not None:
613
654
  df = df[df['AcceleratorCount'] == quantity_filter]
614
655
  grouped = df.groupby('AcceleratorName')
@@ -3,14 +3,14 @@
3
3
  import typing
4
4
  from typing import Dict, List, Optional, Tuple, Union
5
5
 
6
- from sky.clouds.service_catalog import common
7
- import sky.provision.cudo.cudo_machine_type as cudo_mt
6
+ from sky.catalog import common
7
+ from sky.provision.cudo import cudo_machine_type as cudo_mt
8
8
  from sky.utils import ux_utils
9
9
 
10
10
  if typing.TYPE_CHECKING:
11
11
  from sky.clouds import cloud
12
12
 
13
- _PULL_FREQUENCY_HOURS = 1
13
+ _PULL_FREQUENCY_HOURS = 7
14
14
  _df = common.read_catalog(cudo_mt.VMS_CSV,
15
15
  pull_frequency_hours=_PULL_FREQUENCY_HOURS)
16
16
 
@@ -51,7 +51,9 @@ def get_vcpus_mem_from_instance_type(
51
51
 
52
52
  def get_default_instance_type(cpus: Optional[str] = None,
53
53
  memory: Optional[str] = None,
54
- disk_tier: Optional[str] = None) -> Optional[str]:
54
+ disk_tier: Optional[str] = None,
55
+ region: Optional[str] = None,
56
+ zone: Optional[str] = None) -> Optional[str]:
55
57
  del disk_tier
56
58
  # NOTE: After expanding catalog to multiple entries, you may
57
59
  # want to specify a default instance type or family.
@@ -62,7 +64,8 @@ def get_default_instance_type(cpus: Optional[str] = None,
62
64
  if memory is None:
63
65
  memory_gb_or_ratio = f'{_DEFAULT_MEMORY_CPU_RATIO}x'
64
66
  return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
65
- memory_gb_or_ratio)
67
+ memory_gb_or_ratio,
68
+ region, zone)
66
69
 
67
70
 
68
71
  def get_accelerators_from_instance_type(
@@ -3,7 +3,7 @@ import typing
3
3
  from typing import List
4
4
 
5
5
  from sky.adaptors import common as adaptors_common
6
- from sky.clouds.service_catalog import common
6
+ from sky.catalog import common
7
7
 
8
8
  if typing.TYPE_CHECKING:
9
9
  import pandas as pd