skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/serve/autoscalers.py CHANGED
@@ -6,7 +6,7 @@ import enum
6
6
  import math
7
7
  import time
8
8
  import typing
9
- from typing import Any, Dict, Iterable, List, Optional, Union
9
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
10
10
 
11
11
  from sky import sky_logging
12
12
  from sky.serve import constants
@@ -175,6 +175,14 @@ class Autoscaler:
175
175
  """Collect request information from aggregator for autoscaling."""
176
176
  raise NotImplementedError
177
177
 
178
+ def info(self) -> Dict[str, Any]:
179
+ """Get information about the autoscaler."""
180
+ return {
181
+ 'target_num_replicas': self.target_num_replicas,
182
+ 'min_replicas': self.min_replicas,
183
+ 'max_replicas': self.max_replicas,
184
+ }
185
+
178
186
  def _generate_scaling_decisions(
179
187
  self,
180
188
  replica_infos: List['replica_managers.ReplicaInfo'],
@@ -205,6 +213,10 @@ class Autoscaler:
205
213
  # TODO(MaoZiming): use NAME to get the class.
206
214
  if spec.use_ondemand_fallback:
207
215
  return FallbackRequestRateAutoscaler(service_name, spec)
216
+ elif isinstance(spec.target_qps_per_replica, dict):
217
+ # Use instance-aware autoscaler
218
+ # when target_qps_per_replica is a dict
219
+ return InstanceAwareRequestRateAutoscaler(service_name, spec)
208
220
  else:
209
221
  return RequestRateAutoscaler(service_name, spec)
210
222
 
@@ -399,6 +411,8 @@ class _AutoscalerWithHysteresis(Autoscaler):
399
411
  # `_set_target_num_replicas_with_hysteresis` to have the replicas
400
412
  # quickly scale after each update.
401
413
  self.target_num_replicas = self._calculate_target_num_replicas()
414
+ logger.debug(f'Target number of replicas: {self.target_num_replicas}'
415
+ 'after update_version.')
402
416
  # Cleanup hysteresis counters.
403
417
  self.upscale_counter = 0
404
418
  self.downscale_counter = 0
@@ -456,20 +470,28 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
456
470
  request_timestamps: All request timestamps within the window.
457
471
  """
458
472
  super().__init__(service_name, spec)
459
- self.target_qps_per_replica: Optional[
460
- float] = spec.target_qps_per_replica
473
+ self.target_qps_per_replica: Optional[Union[float, Dict[
474
+ str, float]]] = spec.target_qps_per_replica
461
475
  self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
462
476
  self.request_timestamps: List[float] = []
463
477
 
464
478
  def _calculate_target_num_replicas(self) -> int:
465
479
  if self.target_qps_per_replica is None:
466
480
  return self.min_replicas
481
+
482
+ # RequestRateAutoscaler should only handle float values
483
+ if isinstance(self.target_qps_per_replica, dict):
484
+ raise ValueError('RequestRateAutoscaler does not support dict '
485
+ 'target_qps_per_replica. Should use '
486
+ 'InstanceAwareRequestRateAutoscaler instead.')
487
+
467
488
  num_requests_per_second = len(
468
489
  self.request_timestamps) / self.qps_window_size
469
- target_num_replicas = math.ceil(num_requests_per_second /
470
- self.target_qps_per_replica)
490
+ target_num_replicas = \
491
+ math.ceil(num_requests_per_second / self.target_qps_per_replica)
471
492
  logger.info(f'Requests per second: {num_requests_per_second}. '
472
493
  f'Target number of replicas: {target_num_replicas}.')
494
+
473
495
  return self._clip_target_num_replicas(target_num_replicas)
474
496
 
475
497
  def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
@@ -502,6 +524,7 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
502
524
  ) -> List[AutoscalerDecision]:
503
525
  """Generate Autoscaling decisions based on request rate."""
504
526
 
527
+ # Use standard hysteresis-based logic (non-instance-aware)
505
528
  self._set_target_num_replicas_with_hysteresis()
506
529
 
507
530
  latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
@@ -530,6 +553,7 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
530
553
  if len(latest_nonterminal_replicas) > target_num_replicas:
531
554
  num_replicas_to_scale_down = (len(latest_nonterminal_replicas) -
532
555
  target_num_replicas)
556
+ # Use standard downscaling logic
533
557
  replicas_to_scale_down = (
534
558
  _select_nonterminal_replicas_to_scale_down(
535
559
  num_replicas_to_scale_down, latest_nonterminal_replicas))
@@ -554,6 +578,334 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
554
578
  logger.info(f'Remaining dynamic states: {dynamic_states}')
555
579
 
556
580
 
581
+ class InstanceAwareRequestRateAutoscaler(RequestRateAutoscaler):
582
+ """Instance-aware RequestRateAutoscaler:
583
+ Autoscale based on each replica's GPU-specific QPS.
584
+
585
+ This autoscaler considers different QPS targets for different GPU types
586
+ when target_qps_per_replica is provided as a dictionary mapping GPU types
587
+ to their respective QPS targets.
588
+ """
589
+
590
+ def __init__(self, service_name: str,
591
+ spec: 'service_spec.SkyServiceSpec') -> None:
592
+ super().__init__(service_name, spec)
593
+ # Ensure target_qps_per_replica is a dict for instance-aware logic
594
+ assert isinstance(spec.target_qps_per_replica, dict), \
595
+ 'InstanceAware Autoscaler requires dict type target_qps_per_replica'
596
+ # Re-assign with correct type using setattr to avoid typing issues
597
+ self.target_qps_per_replica = spec.target_qps_per_replica
598
+
599
+ def _generate_scaling_decisions(
600
+ self,
601
+ replica_infos: List['replica_managers.ReplicaInfo'],
602
+ ) -> List[AutoscalerDecision]:
603
+ """Generate autoscaling decisions with instance-aware logic."""
604
+ # Always use instance-aware logic
605
+ # since target_qps_per_replica is guaranteed to be dict
606
+ self._set_target_num_replicas_with_instance_aware_logic(replica_infos)
607
+
608
+ latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
609
+
610
+ for info in replica_infos:
611
+ if not info.is_terminal and info.version == self.latest_version:
612
+ latest_nonterminal_replicas.append(info)
613
+
614
+ target_num_replicas = self.get_final_target_num_replicas()
615
+ current_num_replicas = len(latest_nonterminal_replicas)
616
+
617
+ scaling_decisions: List[AutoscalerDecision] = []
618
+
619
+ # Decide if to scale up or down.
620
+ if target_num_replicas > current_num_replicas:
621
+ for _ in range(target_num_replicas - current_num_replicas):
622
+ # No resources_override to use when scaling up
623
+ scaling_decisions.append(
624
+ AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP,
625
+ target=None))
626
+ elif target_num_replicas < current_num_replicas:
627
+ num_replicas_to_scale_down = \
628
+ current_num_replicas - target_num_replicas
629
+
630
+ # Use instance-aware scale down logic
631
+ replicas_to_scale_down = self._select_replicas_to_scale_down_by_qps(
632
+ num_replicas_to_scale_down, latest_nonterminal_replicas)
633
+ for replica_id in replicas_to_scale_down:
634
+ scaling_decisions.append(
635
+ AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
636
+ target=replica_id))
637
+
638
+ # Outdated replicas are handled by base class generate_scaling_decisions
639
+ # No need to handle them here
640
+
641
+ upscale_decisions = [
642
+ d for d in scaling_decisions
643
+ if d.operator == AutoscalerDecisionOperator.SCALE_UP
644
+ ]
645
+ downscale_decisions = [
646
+ d for d in scaling_decisions
647
+ if d.operator == AutoscalerDecisionOperator.SCALE_DOWN
648
+ ]
649
+ logger.info(f'Scaling decisions: '
650
+ f'{len(upscale_decisions)} scale up, '
651
+ f'{len(downscale_decisions)} scale down '
652
+ f'(latest nonterminal: {current_num_replicas}, '
653
+ f'target: {target_num_replicas})')
654
+
655
+ return scaling_decisions
656
+
657
+ def _set_target_num_replicas_with_instance_aware_logic(
658
+ self, replica_infos: List['replica_managers.ReplicaInfo']) -> None:
659
+ """Set target_num_replicas using instance-aware logic."""
660
+ assert isinstance(self.target_qps_per_replica,
661
+ dict), 'Expected dict for instance-aware logic'
662
+ target_qps_dict = self.target_qps_per_replica
663
+
664
+ num_requests_per_second = len(
665
+ self.request_timestamps) / self.qps_window_size
666
+
667
+ total_qps = self._calculate_total_qps_from_replicas(replica_infos)
668
+ if total_qps > 0:
669
+ if num_requests_per_second >= total_qps:
670
+ # for upscaling, max_target_qps is the standard qps
671
+ max_target_qps = max(target_qps_dict.values())
672
+ over_request_num = num_requests_per_second - total_qps
673
+ current_num_replicas = len(replica_infos)
674
+ raw_target_num = current_num_replicas + math.ceil(
675
+ over_request_num / max_target_qps)
676
+ target_num_replicas = self._clip_target_num_replicas(
677
+ raw_target_num)
678
+ logger.info(
679
+ f'Instance-aware autoscaling: total QPS {total_qps}, '
680
+ f'num_requests_per_second: {num_requests_per_second}, '
681
+ f'upscaling, using maximum QPS {max_target_qps} '
682
+ f'from {target_qps_dict}, '
683
+ f'target replicas: {target_num_replicas}')
684
+ else:
685
+ # for downscaling, use qps for every ready_target_qps_list
686
+ # to calculate target_num_replicas
687
+ ready_target_qps_list = \
688
+ self._extract_target_qps_list_from_ready_replicas(
689
+ replica_infos)
690
+ ready_target_qps_list = sorted(ready_target_qps_list,
691
+ reverse=True)
692
+ if not ready_target_qps_list:
693
+ # Fallback to maximum QPS from config if no ready replicas
694
+ ready_target_qps_list = [max(target_qps_dict.values())]
695
+
696
+ raw_target_num = 0
697
+ qps_sum = 0.0
698
+ for qps in ready_target_qps_list:
699
+ raw_target_num += 1
700
+ qps_sum += qps
701
+ if qps_sum > num_requests_per_second:
702
+ break
703
+
704
+ target_num_replicas = self._clip_target_num_replicas(
705
+ raw_target_num)
706
+ logger.info(
707
+ f'Instance-aware autoscaling: total QPS {total_qps}, '
708
+ f'num_requests_per_second: {num_requests_per_second}, '
709
+ f'downscaling, using ready QPS list '
710
+ f'{ready_target_qps_list}, '
711
+ f'target replicas: {target_num_replicas}')
712
+ else:
713
+ # no replica is ready; use the normal min_replicas
714
+ target_num_replicas = self._clip_target_num_replicas(
715
+ self.min_replicas)
716
+ logger.info(f'Instance-aware autoscaling: no replica QPS available,'
717
+ f' target replicas: {target_num_replicas}')
718
+
719
+ # Apply hysteresis logic
720
+ old_target_num_replicas = self.target_num_replicas
721
+
722
+ # Faster scale up when there is no replica.
723
+ if self.target_num_replicas == 0:
724
+ self.target_num_replicas = target_num_replicas
725
+ elif target_num_replicas > self.target_num_replicas:
726
+ self.upscale_counter += 1
727
+ self.downscale_counter = 0
728
+ if self.upscale_counter >= self.scale_up_threshold:
729
+ self.upscale_counter = 0
730
+ self.target_num_replicas = target_num_replicas
731
+ elif target_num_replicas < self.target_num_replicas:
732
+ self.downscale_counter += 1
733
+ self.upscale_counter = 0
734
+ if self.downscale_counter >= self.scale_down_threshold:
735
+ self.downscale_counter = 0
736
+ self.target_num_replicas = target_num_replicas
737
+ else:
738
+ self.upscale_counter = self.downscale_counter = 0
739
+
740
+ logger.info(
741
+ f'Instance-aware: Old target number of replicas: '
742
+ f'{old_target_num_replicas}. '
743
+ f'Current target number of replicas: {target_num_replicas}. '
744
+ f'Final target number of replicas: {self.target_num_replicas}. '
745
+ f'Num overprovision: {self.num_overprovision}. '
746
+ f'Upscale counter: {self.upscale_counter}/'
747
+ f'{self.scale_up_threshold}. '
748
+ f'Downscale counter: {self.downscale_counter}/'
749
+ f'{self.scale_down_threshold}. ')
750
+
751
+ def _calculate_total_qps_from_replicas(
752
+ self, replica_infos: List['replica_managers.ReplicaInfo']) -> float:
753
+ """Calculate total QPS based on current replica GPU types."""
754
+ total_qps = 0.0
755
+ logger.info(f'Calculating total QPS from {len(replica_infos)} replicas')
756
+
757
+ for replica_info in replica_infos:
758
+ # Skip non-valid replicas
759
+ valid_statuses = [
760
+ serve_state.ReplicaStatus.READY,
761
+ serve_state.ReplicaStatus.STARTING,
762
+ serve_state.ReplicaStatus.PROVISIONING
763
+ ]
764
+ if replica_info.status not in valid_statuses:
765
+ logger.info(f'Skipping replica {replica_info.replica_id} '
766
+ f'with status: {replica_info.status}')
767
+ continue
768
+
769
+ gpu_type = self._get_gpu_type_from_replica_info(replica_info)
770
+ logger.info(f'Processing replica {replica_info.replica_id} '
771
+ f'with GPU type: {gpu_type}')
772
+
773
+ # Use flexible matching logic
774
+ qps_for_this_gpu = self._get_target_qps_for_gpu_type(gpu_type)
775
+ total_qps += qps_for_this_gpu
776
+ logger.info(f'GPU type {gpu_type} -> {qps_for_this_gpu} QPS')
777
+
778
+ logger.info(f'Calculated total QPS: {total_qps}')
779
+ return total_qps
780
+
781
+ def _get_target_qps_for_gpu_type(self, gpu_type: str) -> float:
782
+ """Get target QPS for a specific GPU type with flexible matching."""
783
+ assert isinstance(self.target_qps_per_replica,
784
+ dict), 'Expected dict for instance-aware logic'
785
+ target_qps_dict = self.target_qps_per_replica
786
+
787
+ # Direct match first
788
+ if gpu_type in target_qps_dict:
789
+ return target_qps_dict[gpu_type]
790
+
791
+ # Try matching by base name (e.g., 'A100' matches 'A100:1')
792
+ for config_key in target_qps_dict.keys():
793
+ # Remove count suffix (e.g., 'A100:1' -> 'A100')
794
+ base_name = config_key.split(':')[0]
795
+ if gpu_type == base_name:
796
+ return target_qps_dict[config_key]
797
+
798
+ # Fallback to minimum QPS
799
+ logger.warning(f'No matching QPS found for GPU type: {gpu_type}. '
800
+ f'Available types: {list(target_qps_dict.keys())}. '
801
+ f'Using minimum QPS as fallback.')
802
+ return min(target_qps_dict.values())
803
+
804
+ def _get_gpu_type_from_replica_info(
805
+ self, replica_info: 'replica_managers.ReplicaInfo') -> str:
806
+ """Extract GPU type from ReplicaInfo object."""
807
+ gpu_type = 'unknown'
808
+ handle = replica_info.handle()
809
+ if handle is not None:
810
+ accelerators = handle.launched_resources.accelerators
811
+ if accelerators and len(accelerators) > 0:
812
+ # Get the first accelerator type
813
+ gpu_type = list(accelerators.keys())[0]
814
+ return gpu_type
815
+
816
+ def _extract_target_qps_list_from_ready_replicas(
817
+ self,
818
+ replica_infos: List['replica_managers.ReplicaInfo']) -> List[float]:
819
+ """Extract target QPS list from current READY replicas."""
820
+ ready_replica_qps = []
821
+
822
+ for replica_info in replica_infos:
823
+ # Check if replica is READY
824
+ if replica_info.status != serve_state.ReplicaStatus.READY:
825
+ logger.info(
826
+ f'Replica {replica_info.replica_id} '
827
+ f'not ready (status: {replica_info.status}), skipping')
828
+ continue
829
+
830
+ gpu_type = self._get_gpu_type_from_replica_info(replica_info)
831
+
832
+ # Use flexible matching logic
833
+ qps_for_this_gpu = self._get_target_qps_for_gpu_type(gpu_type)
834
+ ready_replica_qps.append(qps_for_this_gpu)
835
+ logger.info(f'Ready replica {replica_info.replica_id} '
836
+ f'with GPU {gpu_type}: {qps_for_this_gpu} QPS')
837
+
838
+ if ready_replica_qps:
839
+ logger.info(
840
+ f'Target QPS list from ready replicas: {ready_replica_qps}')
841
+ return ready_replica_qps
842
+
843
+ return []
844
+
845
+ def _select_replicas_to_scale_down_by_qps(
846
+ self, num_replicas_to_scale_down: int,
847
+ replica_infos: List['replica_managers.ReplicaInfo']) -> List[int]:
848
+ """Select replicas to scale down (lowest QPS first)."""
849
+ # Create a list of (replica_info, target_qps) tuples
850
+ replica_qps_pairs: List[Tuple['replica_managers.ReplicaInfo',
851
+ float]] = []
852
+
853
+ for info in replica_infos:
854
+ # Include old-version replicas as well so they also get a target_qps
855
+ # assigned. Skip terminal replicas only.
856
+ if info.is_terminal:
857
+ continue
858
+
859
+ # Get GPU type directly from replica info
860
+ gpu_type = self._get_gpu_type_from_replica_info(info)
861
+
862
+ # Use flexible matching logic
863
+ target_qps = self._get_target_qps_for_gpu_type(gpu_type)
864
+
865
+ replica_qps_pairs.append((info, float(target_qps)))
866
+ logger.info(f'Replica {info.replica_id} '
867
+ f'with GPU {gpu_type}: {target_qps} QPS')
868
+
869
+ # Create a mapping from replica_id to target_qps for sorting
870
+ replica_qps_map = {
871
+ info.replica_id: target_qps
872
+ for info, target_qps in replica_qps_pairs
873
+ }
874
+
875
+ # Sort replicas by: 1. status order, 2. target_qps (asc),
876
+ # 3. version (asc), 4. replica_id (desc)
877
+ sorted_replicas = sorted(
878
+ replica_infos,
879
+ key=lambda info: (
880
+ info.status.scale_down_decision_order(),
881
+ replica_qps_map.get(info.replica_id, float('inf')),
882
+ info.version,
883
+ -info.replica_id,
884
+ ))
885
+
886
+ selected_replica_ids = []
887
+ for info in sorted_replicas:
888
+ if info.is_terminal:
889
+ continue
890
+ selected_replica_ids.append(info.replica_id)
891
+ if len(selected_replica_ids) >= num_replicas_to_scale_down:
892
+ break
893
+
894
+ logger.info(
895
+ f'Selected {len(selected_replica_ids)} replicas to scale down: '
896
+ f'{selected_replica_ids}')
897
+ return selected_replica_ids
898
+
899
+ def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
900
+ update_mode: serve_utils.UpdateMode) -> None:
901
+ super(RequestRateAutoscaler,
902
+ self).update_version(version, spec, update_mode)
903
+ # Ensure it's a dict and re-assign using setattr to avoid typing
904
+ assert isinstance(spec.target_qps_per_replica, dict), \
905
+ 'InstanceAware Autoscaler requires dict type target_qps_per_replica'
906
+ self.target_qps_per_replica = spec.target_qps_per_replica
907
+
908
+
557
909
  class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
558
910
  """FallbackRequestRateAutoscaler
559
911