skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/serve/autoscalers.py CHANGED
@@ -6,7 +6,7 @@ import enum
6
6
  import math
7
7
  import time
8
8
  import typing
9
- from typing import Any, Dict, Iterable, List, Optional, Union
9
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
10
10
 
11
11
  from sky import sky_logging
12
12
  from sky.serve import constants
@@ -175,6 +175,14 @@ class Autoscaler:
175
175
  """Collect request information from aggregator for autoscaling."""
176
176
  raise NotImplementedError
177
177
 
178
+ def info(self) -> Dict[str, Any]:
179
+ """Get information about the autoscaler."""
180
+ return {
181
+ 'target_num_replicas': self.target_num_replicas,
182
+ 'min_replicas': self.min_replicas,
183
+ 'max_replicas': self.max_replicas,
184
+ }
185
+
178
186
  def _generate_scaling_decisions(
179
187
  self,
180
188
  replica_infos: List['replica_managers.ReplicaInfo'],
@@ -205,6 +213,10 @@ class Autoscaler:
205
213
  # TODO(MaoZiming): use NAME to get the class.
206
214
  if spec.use_ondemand_fallback:
207
215
  return FallbackRequestRateAutoscaler(service_name, spec)
216
+ elif isinstance(spec.target_qps_per_replica, dict):
217
+ # Use instance-aware autoscaler
218
+ # when target_qps_per_replica is a dict
219
+ return InstanceAwareRequestRateAutoscaler(service_name, spec)
208
220
  else:
209
221
  return RequestRateAutoscaler(service_name, spec)
210
222
 
@@ -399,6 +411,8 @@ class _AutoscalerWithHysteresis(Autoscaler):
399
411
  # `_set_target_num_replicas_with_hysteresis` to have the replicas
400
412
  # quickly scale after each update.
401
413
  self.target_num_replicas = self._calculate_target_num_replicas()
414
+ logger.debug(f'Target number of replicas: {self.target_num_replicas}'
415
+ 'after update_version.')
402
416
  # Cleanup hysteresis counters.
403
417
  self.upscale_counter = 0
404
418
  self.downscale_counter = 0
@@ -456,20 +470,28 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
456
470
  request_timestamps: All request timestamps within the window.
457
471
  """
458
472
  super().__init__(service_name, spec)
459
- self.target_qps_per_replica: Optional[
460
- float] = spec.target_qps_per_replica
473
+ self.target_qps_per_replica: Optional[Union[float, Dict[
474
+ str, float]]] = spec.target_qps_per_replica
461
475
  self.qps_window_size: int = constants.AUTOSCALER_QPS_WINDOW_SIZE_SECONDS
462
476
  self.request_timestamps: List[float] = []
463
477
 
464
478
  def _calculate_target_num_replicas(self) -> int:
465
479
  if self.target_qps_per_replica is None:
466
480
  return self.min_replicas
481
+
482
+ # RequestRateAutoscaler should only handle float values
483
+ if isinstance(self.target_qps_per_replica, dict):
484
+ raise ValueError('RequestRateAutoscaler does not support dict '
485
+ 'target_qps_per_replica. Should use '
486
+ 'InstanceAwareRequestRateAutoscaler instead.')
487
+
467
488
  num_requests_per_second = len(
468
489
  self.request_timestamps) / self.qps_window_size
469
- target_num_replicas = math.ceil(num_requests_per_second /
470
- self.target_qps_per_replica)
490
+ target_num_replicas = \
491
+ math.ceil(num_requests_per_second / self.target_qps_per_replica)
471
492
  logger.info(f'Requests per second: {num_requests_per_second}. '
472
493
  f'Target number of replicas: {target_num_replicas}.')
494
+
473
495
  return self._clip_target_num_replicas(target_num_replicas)
474
496
 
475
497
  def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
@@ -502,6 +524,7 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
502
524
  ) -> List[AutoscalerDecision]:
503
525
  """Generate Autoscaling decisions based on request rate."""
504
526
 
527
+ # Use standard hysteresis-based logic (non-instance-aware)
505
528
  self._set_target_num_replicas_with_hysteresis()
506
529
 
507
530
  latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
@@ -530,6 +553,7 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
530
553
  if len(latest_nonterminal_replicas) > target_num_replicas:
531
554
  num_replicas_to_scale_down = (len(latest_nonterminal_replicas) -
532
555
  target_num_replicas)
556
+ # Use standard downscaling logic
533
557
  replicas_to_scale_down = (
534
558
  _select_nonterminal_replicas_to_scale_down(
535
559
  num_replicas_to_scale_down, latest_nonterminal_replicas))
@@ -554,6 +578,334 @@ class RequestRateAutoscaler(_AutoscalerWithHysteresis):
554
578
  logger.info(f'Remaining dynamic states: {dynamic_states}')
555
579
 
556
580
 
581
+ class InstanceAwareRequestRateAutoscaler(RequestRateAutoscaler):
582
+ """Instance-aware RequestRateAutoscaler:
583
+ Autoscale based on each replica's GPU-specific QPS.
584
+
585
+ This autoscaler considers different QPS targets for different GPU types
586
+ when target_qps_per_replica is provided as a dictionary mapping GPU types
587
+ to their respective QPS targets.
588
+ """
589
+
590
+ def __init__(self, service_name: str,
591
+ spec: 'service_spec.SkyServiceSpec') -> None:
592
+ super().__init__(service_name, spec)
593
+ # Ensure target_qps_per_replica is a dict for instance-aware logic
594
+ assert isinstance(spec.target_qps_per_replica, dict), \
595
+ 'InstanceAware Autoscaler requires dict type target_qps_per_replica'
596
+ # Re-assign with correct type using setattr to avoid typing issues
597
+ self.target_qps_per_replica = spec.target_qps_per_replica
598
+
599
+ def _generate_scaling_decisions(
600
+ self,
601
+ replica_infos: List['replica_managers.ReplicaInfo'],
602
+ ) -> List[AutoscalerDecision]:
603
+ """Generate autoscaling decisions with instance-aware logic."""
604
+ # Always use instance-aware logic
605
+ # since target_qps_per_replica is guaranteed to be dict
606
+ self._set_target_num_replicas_with_instance_aware_logic(replica_infos)
607
+
608
+ latest_nonterminal_replicas: List['replica_managers.ReplicaInfo'] = []
609
+
610
+ for info in replica_infos:
611
+ if not info.is_terminal and info.version == self.latest_version:
612
+ latest_nonterminal_replicas.append(info)
613
+
614
+ target_num_replicas = self.get_final_target_num_replicas()
615
+ current_num_replicas = len(latest_nonterminal_replicas)
616
+
617
+ scaling_decisions: List[AutoscalerDecision] = []
618
+
619
+ # Decide if to scale up or down.
620
+ if target_num_replicas > current_num_replicas:
621
+ for _ in range(target_num_replicas - current_num_replicas):
622
+ # No resources_override to use when scaling up
623
+ scaling_decisions.append(
624
+ AutoscalerDecision(AutoscalerDecisionOperator.SCALE_UP,
625
+ target=None))
626
+ elif target_num_replicas < current_num_replicas:
627
+ num_replicas_to_scale_down = \
628
+ current_num_replicas - target_num_replicas
629
+
630
+ # Use instance-aware scale down logic
631
+ replicas_to_scale_down = self._select_replicas_to_scale_down_by_qps(
632
+ num_replicas_to_scale_down, latest_nonterminal_replicas)
633
+ for replica_id in replicas_to_scale_down:
634
+ scaling_decisions.append(
635
+ AutoscalerDecision(AutoscalerDecisionOperator.SCALE_DOWN,
636
+ target=replica_id))
637
+
638
+ # Outdated replicas are handled by base class generate_scaling_decisions
639
+ # No need to handle them here
640
+
641
+ upscale_decisions = [
642
+ d for d in scaling_decisions
643
+ if d.operator == AutoscalerDecisionOperator.SCALE_UP
644
+ ]
645
+ downscale_decisions = [
646
+ d for d in scaling_decisions
647
+ if d.operator == AutoscalerDecisionOperator.SCALE_DOWN
648
+ ]
649
+ logger.info(f'Scaling decisions: '
650
+ f'{len(upscale_decisions)} scale up, '
651
+ f'{len(downscale_decisions)} scale down '
652
+ f'(latest nonterminal: {current_num_replicas}, '
653
+ f'target: {target_num_replicas})')
654
+
655
+ return scaling_decisions
656
+
657
+ def _set_target_num_replicas_with_instance_aware_logic(
658
+ self, replica_infos: List['replica_managers.ReplicaInfo']) -> None:
659
+ """Set target_num_replicas using instance-aware logic."""
660
+ assert isinstance(self.target_qps_per_replica,
661
+ dict), 'Expected dict for instance-aware logic'
662
+ target_qps_dict = self.target_qps_per_replica
663
+
664
+ num_requests_per_second = len(
665
+ self.request_timestamps) / self.qps_window_size
666
+
667
+ total_qps = self._calculate_total_qps_from_replicas(replica_infos)
668
+ if total_qps > 0:
669
+ if num_requests_per_second >= total_qps:
670
+ # for upscaling, max_target_qps is the standard qps
671
+ max_target_qps = max(target_qps_dict.values())
672
+ over_request_num = num_requests_per_second - total_qps
673
+ current_num_replicas = len(replica_infos)
674
+ raw_target_num = current_num_replicas + math.ceil(
675
+ over_request_num / max_target_qps)
676
+ target_num_replicas = self._clip_target_num_replicas(
677
+ raw_target_num)
678
+ logger.info(
679
+ f'Instance-aware autoscaling: total QPS {total_qps}, '
680
+ f'num_requests_per_second: {num_requests_per_second}, '
681
+ f'upscaling, using maximum QPS {max_target_qps} '
682
+ f'from {target_qps_dict}, '
683
+ f'target replicas: {target_num_replicas}')
684
+ else:
685
+ # for downscaling, use qps for every ready_target_qps_list
686
+ # to calculate target_num_replicas
687
+ ready_target_qps_list = \
688
+ self._extract_target_qps_list_from_ready_replicas(
689
+ replica_infos)
690
+ ready_target_qps_list = sorted(ready_target_qps_list,
691
+ reverse=True)
692
+ if not ready_target_qps_list:
693
+ # Fallback to maximum QPS from config if no ready replicas
694
+ ready_target_qps_list = [max(target_qps_dict.values())]
695
+
696
+ raw_target_num = 0
697
+ qps_sum = 0.0
698
+ for qps in ready_target_qps_list:
699
+ raw_target_num += 1
700
+ qps_sum += qps
701
+ if qps_sum > num_requests_per_second:
702
+ break
703
+
704
+ target_num_replicas = self._clip_target_num_replicas(
705
+ raw_target_num)
706
+ logger.info(
707
+ f'Instance-aware autoscaling: total QPS {total_qps}, '
708
+ f'num_requests_per_second: {num_requests_per_second}, '
709
+ f'downscaling, using ready QPS list '
710
+ f'{ready_target_qps_list}, '
711
+ f'target replicas: {target_num_replicas}')
712
+ else:
713
+ # no replica is ready; use the normal min_replicas
714
+ target_num_replicas = self._clip_target_num_replicas(
715
+ self.min_replicas)
716
+ logger.info(f'Instance-aware autoscaling: no replica QPS available,'
717
+ f' target replicas: {target_num_replicas}')
718
+
719
+ # Apply hysteresis logic
720
+ old_target_num_replicas = self.target_num_replicas
721
+
722
+ # Faster scale up when there is no replica.
723
+ if self.target_num_replicas == 0:
724
+ self.target_num_replicas = target_num_replicas
725
+ elif target_num_replicas > self.target_num_replicas:
726
+ self.upscale_counter += 1
727
+ self.downscale_counter = 0
728
+ if self.upscale_counter >= self.scale_up_threshold:
729
+ self.upscale_counter = 0
730
+ self.target_num_replicas = target_num_replicas
731
+ elif target_num_replicas < self.target_num_replicas:
732
+ self.downscale_counter += 1
733
+ self.upscale_counter = 0
734
+ if self.downscale_counter >= self.scale_down_threshold:
735
+ self.downscale_counter = 0
736
+ self.target_num_replicas = target_num_replicas
737
+ else:
738
+ self.upscale_counter = self.downscale_counter = 0
739
+
740
+ logger.info(
741
+ f'Instance-aware: Old target number of replicas: '
742
+ f'{old_target_num_replicas}. '
743
+ f'Current target number of replicas: {target_num_replicas}. '
744
+ f'Final target number of replicas: {self.target_num_replicas}. '
745
+ f'Num overprovision: {self.num_overprovision}. '
746
+ f'Upscale counter: {self.upscale_counter}/'
747
+ f'{self.scale_up_threshold}. '
748
+ f'Downscale counter: {self.downscale_counter}/'
749
+ f'{self.scale_down_threshold}. ')
750
+
751
+ def _calculate_total_qps_from_replicas(
752
+ self, replica_infos: List['replica_managers.ReplicaInfo']) -> float:
753
+ """Calculate total QPS based on current replica GPU types."""
754
+ total_qps = 0.0
755
+ logger.info(f'Calculating total QPS from {len(replica_infos)} replicas')
756
+
757
+ for replica_info in replica_infos:
758
+ # Skip non-valid replicas
759
+ valid_statuses = [
760
+ serve_state.ReplicaStatus.READY,
761
+ serve_state.ReplicaStatus.STARTING,
762
+ serve_state.ReplicaStatus.PROVISIONING
763
+ ]
764
+ if replica_info.status not in valid_statuses:
765
+ logger.info(f'Skipping replica {replica_info.replica_id} '
766
+ f'with status: {replica_info.status}')
767
+ continue
768
+
769
+ gpu_type = self._get_gpu_type_from_replica_info(replica_info)
770
+ logger.info(f'Processing replica {replica_info.replica_id} '
771
+ f'with GPU type: {gpu_type}')
772
+
773
+ # Use flexible matching logic
774
+ qps_for_this_gpu = self._get_target_qps_for_gpu_type(gpu_type)
775
+ total_qps += qps_for_this_gpu
776
+ logger.info(f'GPU type {gpu_type} -> {qps_for_this_gpu} QPS')
777
+
778
+ logger.info(f'Calculated total QPS: {total_qps}')
779
+ return total_qps
780
+
781
+ def _get_target_qps_for_gpu_type(self, gpu_type: str) -> float:
782
+ """Get target QPS for a specific GPU type with flexible matching."""
783
+ assert isinstance(self.target_qps_per_replica,
784
+ dict), 'Expected dict for instance-aware logic'
785
+ target_qps_dict = self.target_qps_per_replica
786
+
787
+ # Direct match first
788
+ if gpu_type in target_qps_dict:
789
+ return target_qps_dict[gpu_type]
790
+
791
+ # Try matching by base name (e.g., 'A100' matches 'A100:1')
792
+ for config_key in target_qps_dict.keys():
793
+ # Remove count suffix (e.g., 'A100:1' -> 'A100')
794
+ base_name = config_key.split(':')[0]
795
+ if gpu_type == base_name:
796
+ return target_qps_dict[config_key]
797
+
798
+ # Fallback to minimum QPS
799
+ logger.warning(f'No matching QPS found for GPU type: {gpu_type}. '
800
+ f'Available types: {list(target_qps_dict.keys())}. '
801
+ f'Using minimum QPS as fallback.')
802
+ return min(target_qps_dict.values())
803
+
804
+ def _get_gpu_type_from_replica_info(
805
+ self, replica_info: 'replica_managers.ReplicaInfo') -> str:
806
+ """Extract GPU type from ReplicaInfo object."""
807
+ gpu_type = 'unknown'
808
+ handle = replica_info.handle()
809
+ if handle is not None:
810
+ accelerators = handle.launched_resources.accelerators
811
+ if accelerators and len(accelerators) > 0:
812
+ # Get the first accelerator type
813
+ gpu_type = list(accelerators.keys())[0]
814
+ return gpu_type
815
+
816
+ def _extract_target_qps_list_from_ready_replicas(
817
+ self,
818
+ replica_infos: List['replica_managers.ReplicaInfo']) -> List[float]:
819
+ """Extract target QPS list from current READY replicas."""
820
+ ready_replica_qps = []
821
+
822
+ for replica_info in replica_infos:
823
+ # Check if replica is READY
824
+ if replica_info.status != serve_state.ReplicaStatus.READY:
825
+ logger.info(
826
+ f'Replica {replica_info.replica_id} '
827
+ f'not ready (status: {replica_info.status}), skipping')
828
+ continue
829
+
830
+ gpu_type = self._get_gpu_type_from_replica_info(replica_info)
831
+
832
+ # Use flexible matching logic
833
+ qps_for_this_gpu = self._get_target_qps_for_gpu_type(gpu_type)
834
+ ready_replica_qps.append(qps_for_this_gpu)
835
+ logger.info(f'Ready replica {replica_info.replica_id} '
836
+ f'with GPU {gpu_type}: {qps_for_this_gpu} QPS')
837
+
838
+ if ready_replica_qps:
839
+ logger.info(
840
+ f'Target QPS list from ready replicas: {ready_replica_qps}')
841
+ return ready_replica_qps
842
+
843
+ return []
844
+
845
+ def _select_replicas_to_scale_down_by_qps(
846
+ self, num_replicas_to_scale_down: int,
847
+ replica_infos: List['replica_managers.ReplicaInfo']) -> List[int]:
848
+ """Select replicas to scale down (lowest QPS first)."""
849
+ # Create a list of (replica_info, target_qps) tuples
850
+ replica_qps_pairs: List[Tuple['replica_managers.ReplicaInfo',
851
+ float]] = []
852
+
853
+ for info in replica_infos:
854
+ # Include old-version replicas as well so they also get a target_qps
855
+ # assigned. Skip terminal replicas only.
856
+ if info.is_terminal:
857
+ continue
858
+
859
+ # Get GPU type directly from replica info
860
+ gpu_type = self._get_gpu_type_from_replica_info(info)
861
+
862
+ # Use flexible matching logic
863
+ target_qps = self._get_target_qps_for_gpu_type(gpu_type)
864
+
865
+ replica_qps_pairs.append((info, float(target_qps)))
866
+ logger.info(f'Replica {info.replica_id} '
867
+ f'with GPU {gpu_type}: {target_qps} QPS')
868
+
869
+ # Create a mapping from replica_id to target_qps for sorting
870
+ replica_qps_map = {
871
+ info.replica_id: target_qps
872
+ for info, target_qps in replica_qps_pairs
873
+ }
874
+
875
+ # Sort replicas by: 1. status order, 2. target_qps (asc),
876
+ # 3. version (asc), 4. replica_id (desc)
877
+ sorted_replicas = sorted(
878
+ replica_infos,
879
+ key=lambda info: (
880
+ info.status.scale_down_decision_order(),
881
+ replica_qps_map.get(info.replica_id, float('inf')),
882
+ info.version,
883
+ -info.replica_id,
884
+ ))
885
+
886
+ selected_replica_ids = []
887
+ for info in sorted_replicas:
888
+ if info.is_terminal:
889
+ continue
890
+ selected_replica_ids.append(info.replica_id)
891
+ if len(selected_replica_ids) >= num_replicas_to_scale_down:
892
+ break
893
+
894
+ logger.info(
895
+ f'Selected {len(selected_replica_ids)} replicas to scale down: '
896
+ f'{selected_replica_ids}')
897
+ return selected_replica_ids
898
+
899
+ def update_version(self, version: int, spec: 'service_spec.SkyServiceSpec',
900
+ update_mode: serve_utils.UpdateMode) -> None:
901
+ super(RequestRateAutoscaler,
902
+ self).update_version(version, spec, update_mode)
903
+ # Ensure it's a dict and re-assign using setattr to avoid typing
904
+ assert isinstance(spec.target_qps_per_replica, dict), \
905
+ 'InstanceAware Autoscaler requires dict type target_qps_per_replica'
906
+ self.target_qps_per_replica = spec.target_qps_per_replica
907
+
908
+
557
909
  class FallbackRequestRateAutoscaler(RequestRateAutoscaler):
558
910
  """FallbackRequestRateAutoscaler
559
911