skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (397) hide show
  1. sky/__init__.py +10 -2
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +20 -0
  14. sky/authentication.py +157 -263
  15. sky/backends/__init__.py +3 -2
  16. sky/backends/backend.py +11 -3
  17. sky/backends/backend_utils.py +588 -184
  18. sky/backends/cloud_vm_ray_backend.py +1088 -904
  19. sky/backends/local_docker_backend.py +9 -5
  20. sky/backends/task_codegen.py +633 -0
  21. sky/backends/wheel_utils.py +18 -0
  22. sky/catalog/__init__.py +8 -0
  23. sky/catalog/aws_catalog.py +4 -0
  24. sky/catalog/common.py +19 -1
  25. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  26. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  27. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  28. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  29. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  30. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  31. sky/catalog/kubernetes_catalog.py +24 -28
  32. sky/catalog/primeintellect_catalog.py +95 -0
  33. sky/catalog/runpod_catalog.py +5 -1
  34. sky/catalog/seeweb_catalog.py +184 -0
  35. sky/catalog/shadeform_catalog.py +165 -0
  36. sky/check.py +73 -43
  37. sky/client/cli/command.py +675 -412
  38. sky/client/cli/flags.py +4 -2
  39. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  40. sky/client/cli/utils.py +79 -0
  41. sky/client/common.py +12 -2
  42. sky/client/sdk.py +132 -63
  43. sky/client/sdk_async.py +34 -33
  44. sky/cloud_stores.py +82 -3
  45. sky/clouds/__init__.py +6 -0
  46. sky/clouds/aws.py +337 -129
  47. sky/clouds/azure.py +24 -18
  48. sky/clouds/cloud.py +40 -13
  49. sky/clouds/cudo.py +16 -13
  50. sky/clouds/do.py +9 -7
  51. sky/clouds/fluidstack.py +12 -5
  52. sky/clouds/gcp.py +14 -7
  53. sky/clouds/hyperbolic.py +12 -5
  54. sky/clouds/ibm.py +12 -5
  55. sky/clouds/kubernetes.py +80 -45
  56. sky/clouds/lambda_cloud.py +12 -5
  57. sky/clouds/nebius.py +23 -9
  58. sky/clouds/oci.py +19 -12
  59. sky/clouds/paperspace.py +4 -1
  60. sky/clouds/primeintellect.py +317 -0
  61. sky/clouds/runpod.py +85 -24
  62. sky/clouds/scp.py +12 -8
  63. sky/clouds/seeweb.py +477 -0
  64. sky/clouds/shadeform.py +400 -0
  65. sky/clouds/ssh.py +4 -2
  66. sky/clouds/utils/scp_utils.py +61 -50
  67. sky/clouds/vast.py +33 -27
  68. sky/clouds/vsphere.py +14 -16
  69. sky/core.py +174 -165
  70. sky/dashboard/out/404.html +1 -1
  71. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  72. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  73. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  74. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  76. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  77. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  79. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
  80. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  82. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  83. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  86. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  87. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  88. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  90. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  92. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  93. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  94. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  95. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  96. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  97. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
  98. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
  99. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  100. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  101. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  102. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
  105. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
  106. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  107. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  108. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  109. sky/dashboard/out/clusters/[cluster].html +1 -1
  110. sky/dashboard/out/clusters.html +1 -1
  111. sky/dashboard/out/config.html +1 -1
  112. sky/dashboard/out/index.html +1 -1
  113. sky/dashboard/out/infra/[context].html +1 -1
  114. sky/dashboard/out/infra.html +1 -1
  115. sky/dashboard/out/jobs/[job].html +1 -1
  116. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  117. sky/dashboard/out/jobs.html +1 -1
  118. sky/dashboard/out/users.html +1 -1
  119. sky/dashboard/out/volumes.html +1 -1
  120. sky/dashboard/out/workspace/new.html +1 -1
  121. sky/dashboard/out/workspaces/[name].html +1 -1
  122. sky/dashboard/out/workspaces.html +1 -1
  123. sky/data/data_utils.py +92 -1
  124. sky/data/mounting_utils.py +162 -29
  125. sky/data/storage.py +200 -19
  126. sky/data/storage_utils.py +10 -45
  127. sky/exceptions.py +18 -7
  128. sky/execution.py +74 -31
  129. sky/global_user_state.py +605 -191
  130. sky/jobs/__init__.py +2 -0
  131. sky/jobs/client/sdk.py +101 -4
  132. sky/jobs/client/sdk_async.py +31 -5
  133. sky/jobs/constants.py +15 -8
  134. sky/jobs/controller.py +726 -284
  135. sky/jobs/file_content_utils.py +128 -0
  136. sky/jobs/log_gc.py +193 -0
  137. sky/jobs/recovery_strategy.py +250 -100
  138. sky/jobs/scheduler.py +271 -173
  139. sky/jobs/server/core.py +367 -114
  140. sky/jobs/server/server.py +81 -35
  141. sky/jobs/server/utils.py +89 -35
  142. sky/jobs/state.py +1498 -620
  143. sky/jobs/utils.py +771 -306
  144. sky/logs/agent.py +40 -5
  145. sky/logs/aws.py +9 -19
  146. sky/metrics/utils.py +282 -39
  147. sky/optimizer.py +1 -1
  148. sky/provision/__init__.py +37 -1
  149. sky/provision/aws/config.py +34 -13
  150. sky/provision/aws/instance.py +5 -2
  151. sky/provision/azure/instance.py +5 -3
  152. sky/provision/common.py +2 -0
  153. sky/provision/cudo/instance.py +4 -3
  154. sky/provision/do/instance.py +4 -3
  155. sky/provision/docker_utils.py +97 -26
  156. sky/provision/fluidstack/instance.py +6 -5
  157. sky/provision/gcp/config.py +6 -1
  158. sky/provision/gcp/instance.py +4 -2
  159. sky/provision/hyperbolic/instance.py +4 -2
  160. sky/provision/instance_setup.py +66 -20
  161. sky/provision/kubernetes/__init__.py +2 -0
  162. sky/provision/kubernetes/config.py +7 -44
  163. sky/provision/kubernetes/constants.py +0 -1
  164. sky/provision/kubernetes/instance.py +609 -213
  165. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  166. sky/provision/kubernetes/network.py +12 -8
  167. sky/provision/kubernetes/network_utils.py +8 -25
  168. sky/provision/kubernetes/utils.py +382 -418
  169. sky/provision/kubernetes/volume.py +150 -18
  170. sky/provision/lambda_cloud/instance.py +16 -13
  171. sky/provision/nebius/instance.py +6 -2
  172. sky/provision/nebius/utils.py +103 -86
  173. sky/provision/oci/instance.py +4 -2
  174. sky/provision/paperspace/instance.py +4 -3
  175. sky/provision/primeintellect/__init__.py +10 -0
  176. sky/provision/primeintellect/config.py +11 -0
  177. sky/provision/primeintellect/instance.py +454 -0
  178. sky/provision/primeintellect/utils.py +398 -0
  179. sky/provision/provisioner.py +30 -9
  180. sky/provision/runpod/__init__.py +2 -0
  181. sky/provision/runpod/instance.py +4 -3
  182. sky/provision/runpod/volume.py +69 -13
  183. sky/provision/scp/instance.py +307 -130
  184. sky/provision/seeweb/__init__.py +11 -0
  185. sky/provision/seeweb/config.py +13 -0
  186. sky/provision/seeweb/instance.py +812 -0
  187. sky/provision/shadeform/__init__.py +11 -0
  188. sky/provision/shadeform/config.py +12 -0
  189. sky/provision/shadeform/instance.py +351 -0
  190. sky/provision/shadeform/shadeform_utils.py +83 -0
  191. sky/provision/vast/instance.py +5 -3
  192. sky/provision/volume.py +164 -0
  193. sky/provision/vsphere/common/ssl_helper.py +1 -1
  194. sky/provision/vsphere/common/vapiconnect.py +2 -1
  195. sky/provision/vsphere/common/vim_utils.py +3 -2
  196. sky/provision/vsphere/instance.py +8 -6
  197. sky/provision/vsphere/vsphere_utils.py +8 -1
  198. sky/resources.py +11 -3
  199. sky/schemas/api/responses.py +107 -6
  200. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  201. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  202. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  203. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  204. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  205. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  206. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  207. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  208. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  209. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  210. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  211. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  212. sky/schemas/generated/jobsv1_pb2.py +86 -0
  213. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  214. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  215. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  216. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  217. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  218. sky/schemas/generated/servev1_pb2.py +58 -0
  219. sky/schemas/generated/servev1_pb2.pyi +115 -0
  220. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  221. sky/serve/autoscalers.py +2 -0
  222. sky/serve/client/impl.py +55 -21
  223. sky/serve/constants.py +4 -3
  224. sky/serve/controller.py +17 -11
  225. sky/serve/load_balancing_policies.py +1 -1
  226. sky/serve/replica_managers.py +219 -142
  227. sky/serve/serve_rpc_utils.py +179 -0
  228. sky/serve/serve_state.py +63 -54
  229. sky/serve/serve_utils.py +145 -109
  230. sky/serve/server/core.py +46 -25
  231. sky/serve/server/impl.py +311 -162
  232. sky/serve/server/server.py +21 -19
  233. sky/serve/service.py +84 -68
  234. sky/serve/service_spec.py +45 -7
  235. sky/server/auth/loopback.py +38 -0
  236. sky/server/auth/oauth2_proxy.py +12 -7
  237. sky/server/common.py +47 -24
  238. sky/server/config.py +62 -28
  239. sky/server/constants.py +9 -1
  240. sky/server/daemons.py +109 -38
  241. sky/server/metrics.py +76 -96
  242. sky/server/middleware_utils.py +166 -0
  243. sky/server/requests/executor.py +381 -145
  244. sky/server/requests/payloads.py +71 -18
  245. sky/server/requests/preconditions.py +15 -13
  246. sky/server/requests/request_names.py +121 -0
  247. sky/server/requests/requests.py +507 -157
  248. sky/server/requests/serializers/decoders.py +48 -17
  249. sky/server/requests/serializers/encoders.py +85 -20
  250. sky/server/requests/threads.py +117 -0
  251. sky/server/rest.py +116 -24
  252. sky/server/server.py +420 -172
  253. sky/server/stream_utils.py +219 -45
  254. sky/server/uvicorn.py +30 -19
  255. sky/setup_files/MANIFEST.in +6 -1
  256. sky/setup_files/alembic.ini +8 -0
  257. sky/setup_files/dependencies.py +62 -19
  258. sky/setup_files/setup.py +44 -44
  259. sky/sky_logging.py +13 -5
  260. sky/skylet/attempt_skylet.py +106 -24
  261. sky/skylet/configs.py +3 -1
  262. sky/skylet/constants.py +111 -26
  263. sky/skylet/events.py +64 -10
  264. sky/skylet/job_lib.py +141 -104
  265. sky/skylet/log_lib.py +233 -5
  266. sky/skylet/log_lib.pyi +40 -2
  267. sky/skylet/providers/ibm/node_provider.py +12 -8
  268. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  269. sky/skylet/runtime_utils.py +21 -0
  270. sky/skylet/services.py +524 -0
  271. sky/skylet/skylet.py +22 -1
  272. sky/skylet/subprocess_daemon.py +104 -29
  273. sky/skypilot_config.py +99 -79
  274. sky/ssh_node_pools/server.py +9 -8
  275. sky/task.py +221 -104
  276. sky/templates/aws-ray.yml.j2 +1 -0
  277. sky/templates/azure-ray.yml.j2 +1 -0
  278. sky/templates/cudo-ray.yml.j2 +1 -0
  279. sky/templates/do-ray.yml.j2 +1 -0
  280. sky/templates/fluidstack-ray.yml.j2 +1 -0
  281. sky/templates/gcp-ray.yml.j2 +1 -0
  282. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  283. sky/templates/ibm-ray.yml.j2 +2 -1
  284. sky/templates/jobs-controller.yaml.j2 +3 -0
  285. sky/templates/kubernetes-ray.yml.j2 +196 -55
  286. sky/templates/lambda-ray.yml.j2 +1 -0
  287. sky/templates/nebius-ray.yml.j2 +3 -0
  288. sky/templates/oci-ray.yml.j2 +1 -0
  289. sky/templates/paperspace-ray.yml.j2 +1 -0
  290. sky/templates/primeintellect-ray.yml.j2 +72 -0
  291. sky/templates/runpod-ray.yml.j2 +1 -0
  292. sky/templates/scp-ray.yml.j2 +1 -0
  293. sky/templates/seeweb-ray.yml.j2 +171 -0
  294. sky/templates/shadeform-ray.yml.j2 +73 -0
  295. sky/templates/vast-ray.yml.j2 +1 -0
  296. sky/templates/vsphere-ray.yml.j2 +1 -0
  297. sky/templates/websocket_proxy.py +188 -43
  298. sky/usage/usage_lib.py +16 -4
  299. sky/users/permission.py +60 -43
  300. sky/utils/accelerator_registry.py +6 -3
  301. sky/utils/admin_policy_utils.py +18 -5
  302. sky/utils/annotations.py +22 -0
  303. sky/utils/asyncio_utils.py +78 -0
  304. sky/utils/atomic.py +1 -1
  305. sky/utils/auth_utils.py +153 -0
  306. sky/utils/cli_utils/status_utils.py +12 -7
  307. sky/utils/cluster_utils.py +28 -6
  308. sky/utils/command_runner.py +88 -27
  309. sky/utils/command_runner.pyi +36 -3
  310. sky/utils/common.py +3 -1
  311. sky/utils/common_utils.py +37 -4
  312. sky/utils/config_utils.py +1 -14
  313. sky/utils/context.py +127 -40
  314. sky/utils/context_utils.py +73 -18
  315. sky/utils/controller_utils.py +229 -70
  316. sky/utils/db/db_utils.py +95 -18
  317. sky/utils/db/kv_cache.py +149 -0
  318. sky/utils/db/migration_utils.py +24 -7
  319. sky/utils/env_options.py +4 -0
  320. sky/utils/git.py +559 -1
  321. sky/utils/kubernetes/create_cluster.sh +15 -30
  322. sky/utils/kubernetes/delete_cluster.sh +10 -7
  323. sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
  324. sky/utils/kubernetes/generate_kind_config.py +6 -66
  325. sky/utils/kubernetes/gpu_labeler.py +13 -3
  326. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  327. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  328. sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
  329. sky/utils/kubernetes/rsync_helper.sh +11 -3
  330. sky/utils/kubernetes_enums.py +7 -15
  331. sky/utils/lock_events.py +4 -4
  332. sky/utils/locks.py +128 -31
  333. sky/utils/log_utils.py +0 -319
  334. sky/utils/resource_checker.py +13 -10
  335. sky/utils/resources_utils.py +53 -29
  336. sky/utils/rich_utils.py +8 -4
  337. sky/utils/schemas.py +107 -52
  338. sky/utils/subprocess_utils.py +17 -4
  339. sky/utils/thread_utils.py +91 -0
  340. sky/utils/timeline.py +2 -1
  341. sky/utils/ux_utils.py +35 -1
  342. sky/utils/volume.py +88 -4
  343. sky/utils/yaml_utils.py +9 -0
  344. sky/volumes/client/sdk.py +48 -10
  345. sky/volumes/server/core.py +59 -22
  346. sky/volumes/server/server.py +46 -17
  347. sky/volumes/volume.py +54 -42
  348. sky/workspaces/core.py +57 -21
  349. sky/workspaces/server.py +13 -12
  350. sky_templates/README.md +3 -0
  351. sky_templates/__init__.py +3 -0
  352. sky_templates/ray/__init__.py +0 -0
  353. sky_templates/ray/start_cluster +183 -0
  354. sky_templates/ray/stop_cluster +75 -0
  355. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
  356. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  357. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  358. sky/client/cli/git.py +0 -549
  359. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  360. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  361. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  362. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  363. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  364. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  365. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  366. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  367. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  368. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  369. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  370. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  371. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  372. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  373. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  374. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  375. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  376. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  377. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  378. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  379. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  380. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  381. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  382. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  383. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  384. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  385. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  386. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  387. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  388. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  389. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  390. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  391. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  392. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  393. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  394. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  395. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
  396. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  397. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,115 @@
1
+ from google.protobuf.internal import containers as _containers
2
+ from google.protobuf import descriptor as _descriptor
3
+ from google.protobuf import message as _message
4
+ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
5
+
6
+ DESCRIPTOR: _descriptor.FileDescriptor
7
+
8
+ class ServiceNames(_message.Message):
9
+ __slots__ = ("names",)
10
+ NAMES_FIELD_NUMBER: _ClassVar[int]
11
+ names: _containers.RepeatedScalarFieldContainer[str]
12
+ def __init__(self, names: _Optional[_Iterable[str]] = ...) -> None: ...
13
+
14
+ class ServiceStatus(_message.Message):
15
+ __slots__ = ("status",)
16
+ class StatusEntry(_message.Message):
17
+ __slots__ = ("key", "value")
18
+ KEY_FIELD_NUMBER: _ClassVar[int]
19
+ VALUE_FIELD_NUMBER: _ClassVar[int]
20
+ key: str
21
+ value: str
22
+ def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
23
+ STATUS_FIELD_NUMBER: _ClassVar[int]
24
+ status: _containers.ScalarMap[str, str]
25
+ def __init__(self, status: _Optional[_Mapping[str, str]] = ...) -> None: ...
26
+
27
+ class GetServiceStatusRequest(_message.Message):
28
+ __slots__ = ("service_names", "pool")
29
+ SERVICE_NAMES_FIELD_NUMBER: _ClassVar[int]
30
+ POOL_FIELD_NUMBER: _ClassVar[int]
31
+ service_names: ServiceNames
32
+ pool: bool
33
+ def __init__(self, service_names: _Optional[_Union[ServiceNames, _Mapping]] = ..., pool: bool = ...) -> None: ...
34
+
35
+ class GetServiceStatusResponse(_message.Message):
36
+ __slots__ = ("statuses",)
37
+ STATUSES_FIELD_NUMBER: _ClassVar[int]
38
+ statuses: _containers.RepeatedCompositeFieldContainer[ServiceStatus]
39
+ def __init__(self, statuses: _Optional[_Iterable[_Union[ServiceStatus, _Mapping]]] = ...) -> None: ...
40
+
41
+ class AddVersionRequest(_message.Message):
42
+ __slots__ = ("service_name",)
43
+ SERVICE_NAME_FIELD_NUMBER: _ClassVar[int]
44
+ service_name: str
45
+ def __init__(self, service_name: _Optional[str] = ...) -> None: ...
46
+
47
+ class AddVersionResponse(_message.Message):
48
+ __slots__ = ("version",)
49
+ VERSION_FIELD_NUMBER: _ClassVar[int]
50
+ version: int
51
+ def __init__(self, version: _Optional[int] = ...) -> None: ...
52
+
53
+ class TerminateServicesRequest(_message.Message):
54
+ __slots__ = ("service_names", "purge", "pool")
55
+ SERVICE_NAMES_FIELD_NUMBER: _ClassVar[int]
56
+ PURGE_FIELD_NUMBER: _ClassVar[int]
57
+ POOL_FIELD_NUMBER: _ClassVar[int]
58
+ service_names: ServiceNames
59
+ purge: bool
60
+ pool: bool
61
+ def __init__(self, service_names: _Optional[_Union[ServiceNames, _Mapping]] = ..., purge: bool = ..., pool: bool = ...) -> None: ...
62
+
63
+ class TerminateServicesResponse(_message.Message):
64
+ __slots__ = ("message",)
65
+ MESSAGE_FIELD_NUMBER: _ClassVar[int]
66
+ message: str
67
+ def __init__(self, message: _Optional[str] = ...) -> None: ...
68
+
69
+ class TerminateReplicaRequest(_message.Message):
70
+ __slots__ = ("service_name", "replica_id", "purge")
71
+ SERVICE_NAME_FIELD_NUMBER: _ClassVar[int]
72
+ REPLICA_ID_FIELD_NUMBER: _ClassVar[int]
73
+ PURGE_FIELD_NUMBER: _ClassVar[int]
74
+ service_name: str
75
+ replica_id: int
76
+ purge: bool
77
+ def __init__(self, service_name: _Optional[str] = ..., replica_id: _Optional[int] = ..., purge: bool = ...) -> None: ...
78
+
79
+ class TerminateReplicaResponse(_message.Message):
80
+ __slots__ = ("message",)
81
+ MESSAGE_FIELD_NUMBER: _ClassVar[int]
82
+ message: str
83
+ def __init__(self, message: _Optional[str] = ...) -> None: ...
84
+
85
+ class WaitServiceRegistrationRequest(_message.Message):
86
+ __slots__ = ("service_name", "job_id", "pool")
87
+ SERVICE_NAME_FIELD_NUMBER: _ClassVar[int]
88
+ JOB_ID_FIELD_NUMBER: _ClassVar[int]
89
+ POOL_FIELD_NUMBER: _ClassVar[int]
90
+ service_name: str
91
+ job_id: int
92
+ pool: bool
93
+ def __init__(self, service_name: _Optional[str] = ..., job_id: _Optional[int] = ..., pool: bool = ...) -> None: ...
94
+
95
+ class WaitServiceRegistrationResponse(_message.Message):
96
+ __slots__ = ("lb_port",)
97
+ LB_PORT_FIELD_NUMBER: _ClassVar[int]
98
+ lb_port: int
99
+ def __init__(self, lb_port: _Optional[int] = ...) -> None: ...
100
+
101
+ class UpdateServiceRequest(_message.Message):
102
+ __slots__ = ("service_name", "version", "mode", "pool")
103
+ SERVICE_NAME_FIELD_NUMBER: _ClassVar[int]
104
+ VERSION_FIELD_NUMBER: _ClassVar[int]
105
+ MODE_FIELD_NUMBER: _ClassVar[int]
106
+ POOL_FIELD_NUMBER: _ClassVar[int]
107
+ service_name: str
108
+ version: int
109
+ mode: str
110
+ pool: bool
111
+ def __init__(self, service_name: _Optional[str] = ..., version: _Optional[int] = ..., mode: _Optional[str] = ..., pool: bool = ...) -> None: ...
112
+
113
+ class UpdateServiceResponse(_message.Message):
114
+ __slots__ = ()
115
+ def __init__(self) -> None: ...
@@ -0,0 +1,322 @@
1
+ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2
+ """Client and server classes corresponding to protobuf-defined services."""
3
+ import grpc
4
+ import warnings
5
+
6
+ from sky.schemas.generated import servev1_pb2 as sky_dot_schemas_dot_generated_dot_servev1__pb2
7
+
8
+ GRPC_GENERATED_VERSION = '1.63.0'
9
+ GRPC_VERSION = grpc.__version__
10
+ EXPECTED_ERROR_RELEASE = '1.65.0'
11
+ SCHEDULED_RELEASE_DATE = 'June 25, 2024'
12
+ _version_not_supported = False
13
+
14
+ try:
15
+ from grpc._utilities import first_version_is_lower
16
+ _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
17
+ except ImportError:
18
+ _version_not_supported = True
19
+
20
+ if _version_not_supported:
21
+ warnings.warn(
22
+ f'The grpc package installed is at version {GRPC_VERSION},'
23
+ + f' but the generated code in sky/schemas/generated/servev1_pb2_grpc.py depends on'
24
+ + f' grpcio>={GRPC_GENERATED_VERSION}.'
25
+ + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
26
+ + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
27
+ + f' This warning will become an error in {EXPECTED_ERROR_RELEASE},'
28
+ + f' scheduled for release on {SCHEDULED_RELEASE_DATE}.',
29
+ RuntimeWarning
30
+ )
31
+
32
+
33
+ class ServeServiceStub(object):
34
+ """Missing associated documentation comment in .proto file."""
35
+
36
+ def __init__(self, channel):
37
+ """Constructor.
38
+
39
+ Args:
40
+ channel: A grpc.Channel.
41
+ """
42
+ self.GetServiceStatus = channel.unary_unary(
43
+ '/serve.v1.ServeService/GetServiceStatus',
44
+ request_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.GetServiceStatusRequest.SerializeToString,
45
+ response_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.GetServiceStatusResponse.FromString,
46
+ _registered_method=True)
47
+ self.AddVersion = channel.unary_unary(
48
+ '/serve.v1.ServeService/AddVersion',
49
+ request_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.AddVersionRequest.SerializeToString,
50
+ response_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.AddVersionResponse.FromString,
51
+ _registered_method=True)
52
+ self.TerminateServices = channel.unary_unary(
53
+ '/serve.v1.ServeService/TerminateServices',
54
+ request_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateServicesRequest.SerializeToString,
55
+ response_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateServicesResponse.FromString,
56
+ _registered_method=True)
57
+ self.TerminateReplica = channel.unary_unary(
58
+ '/serve.v1.ServeService/TerminateReplica',
59
+ request_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateReplicaRequest.SerializeToString,
60
+ response_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateReplicaResponse.FromString,
61
+ _registered_method=True)
62
+ self.WaitServiceRegistration = channel.unary_unary(
63
+ '/serve.v1.ServeService/WaitServiceRegistration',
64
+ request_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.WaitServiceRegistrationRequest.SerializeToString,
65
+ response_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.WaitServiceRegistrationResponse.FromString,
66
+ _registered_method=True)
67
+ self.UpdateService = channel.unary_unary(
68
+ '/serve.v1.ServeService/UpdateService',
69
+ request_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.UpdateServiceRequest.SerializeToString,
70
+ response_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.UpdateServiceResponse.FromString,
71
+ _registered_method=True)
72
+
73
+
74
+ class ServeServiceServicer(object):
75
+ """Missing associated documentation comment in .proto file."""
76
+
77
+ def GetServiceStatus(self, request, context):
78
+ """Get status of service.
79
+ """
80
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
81
+ context.set_details('Method not implemented!')
82
+ raise NotImplementedError('Method not implemented!')
83
+
84
+ def AddVersion(self, request, context):
85
+ """Add version to service.
86
+ """
87
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
88
+ context.set_details('Method not implemented!')
89
+ raise NotImplementedError('Method not implemented!')
90
+
91
+ def TerminateServices(self, request, context):
92
+ """Terminate services.
93
+ """
94
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
95
+ context.set_details('Method not implemented!')
96
+ raise NotImplementedError('Method not implemented!')
97
+
98
+ def TerminateReplica(self, request, context):
99
+ """Terminate replica.
100
+ """
101
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
102
+ context.set_details('Method not implemented!')
103
+ raise NotImplementedError('Method not implemented!')
104
+
105
+ def WaitServiceRegistration(self, request, context):
106
+ """Wait for service registration.
107
+ """
108
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
109
+ context.set_details('Method not implemented!')
110
+ raise NotImplementedError('Method not implemented!')
111
+
112
+ def UpdateService(self, request, context):
113
+ """Update service.
114
+ """
115
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
116
+ context.set_details('Method not implemented!')
117
+ raise NotImplementedError('Method not implemented!')
118
+
119
+
120
+ def add_ServeServiceServicer_to_server(servicer, server):
121
+ rpc_method_handlers = {
122
+ 'GetServiceStatus': grpc.unary_unary_rpc_method_handler(
123
+ servicer.GetServiceStatus,
124
+ request_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.GetServiceStatusRequest.FromString,
125
+ response_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.GetServiceStatusResponse.SerializeToString,
126
+ ),
127
+ 'AddVersion': grpc.unary_unary_rpc_method_handler(
128
+ servicer.AddVersion,
129
+ request_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.AddVersionRequest.FromString,
130
+ response_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.AddVersionResponse.SerializeToString,
131
+ ),
132
+ 'TerminateServices': grpc.unary_unary_rpc_method_handler(
133
+ servicer.TerminateServices,
134
+ request_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateServicesRequest.FromString,
135
+ response_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateServicesResponse.SerializeToString,
136
+ ),
137
+ 'TerminateReplica': grpc.unary_unary_rpc_method_handler(
138
+ servicer.TerminateReplica,
139
+ request_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateReplicaRequest.FromString,
140
+ response_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateReplicaResponse.SerializeToString,
141
+ ),
142
+ 'WaitServiceRegistration': grpc.unary_unary_rpc_method_handler(
143
+ servicer.WaitServiceRegistration,
144
+ request_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.WaitServiceRegistrationRequest.FromString,
145
+ response_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.WaitServiceRegistrationResponse.SerializeToString,
146
+ ),
147
+ 'UpdateService': grpc.unary_unary_rpc_method_handler(
148
+ servicer.UpdateService,
149
+ request_deserializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.UpdateServiceRequest.FromString,
150
+ response_serializer=sky_dot_schemas_dot_generated_dot_servev1__pb2.UpdateServiceResponse.SerializeToString,
151
+ ),
152
+ }
153
+ generic_handler = grpc.method_handlers_generic_handler(
154
+ 'serve.v1.ServeService', rpc_method_handlers)
155
+ server.add_generic_rpc_handlers((generic_handler,))
156
+
157
+
158
+ # This class is part of an EXPERIMENTAL API.
159
+ class ServeService(object):
160
+ """Missing associated documentation comment in .proto file."""
161
+
162
+ @staticmethod
163
+ def GetServiceStatus(request,
164
+ target,
165
+ options=(),
166
+ channel_credentials=None,
167
+ call_credentials=None,
168
+ insecure=False,
169
+ compression=None,
170
+ wait_for_ready=None,
171
+ timeout=None,
172
+ metadata=None):
173
+ return grpc.experimental.unary_unary(
174
+ request,
175
+ target,
176
+ '/serve.v1.ServeService/GetServiceStatus',
177
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.GetServiceStatusRequest.SerializeToString,
178
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.GetServiceStatusResponse.FromString,
179
+ options,
180
+ channel_credentials,
181
+ insecure,
182
+ call_credentials,
183
+ compression,
184
+ wait_for_ready,
185
+ timeout,
186
+ metadata,
187
+ _registered_method=True)
188
+
189
+ @staticmethod
190
+ def AddVersion(request,
191
+ target,
192
+ options=(),
193
+ channel_credentials=None,
194
+ call_credentials=None,
195
+ insecure=False,
196
+ compression=None,
197
+ wait_for_ready=None,
198
+ timeout=None,
199
+ metadata=None):
200
+ return grpc.experimental.unary_unary(
201
+ request,
202
+ target,
203
+ '/serve.v1.ServeService/AddVersion',
204
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.AddVersionRequest.SerializeToString,
205
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.AddVersionResponse.FromString,
206
+ options,
207
+ channel_credentials,
208
+ insecure,
209
+ call_credentials,
210
+ compression,
211
+ wait_for_ready,
212
+ timeout,
213
+ metadata,
214
+ _registered_method=True)
215
+
216
+ @staticmethod
217
+ def TerminateServices(request,
218
+ target,
219
+ options=(),
220
+ channel_credentials=None,
221
+ call_credentials=None,
222
+ insecure=False,
223
+ compression=None,
224
+ wait_for_ready=None,
225
+ timeout=None,
226
+ metadata=None):
227
+ return grpc.experimental.unary_unary(
228
+ request,
229
+ target,
230
+ '/serve.v1.ServeService/TerminateServices',
231
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateServicesRequest.SerializeToString,
232
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateServicesResponse.FromString,
233
+ options,
234
+ channel_credentials,
235
+ insecure,
236
+ call_credentials,
237
+ compression,
238
+ wait_for_ready,
239
+ timeout,
240
+ metadata,
241
+ _registered_method=True)
242
+
243
+ @staticmethod
244
+ def TerminateReplica(request,
245
+ target,
246
+ options=(),
247
+ channel_credentials=None,
248
+ call_credentials=None,
249
+ insecure=False,
250
+ compression=None,
251
+ wait_for_ready=None,
252
+ timeout=None,
253
+ metadata=None):
254
+ return grpc.experimental.unary_unary(
255
+ request,
256
+ target,
257
+ '/serve.v1.ServeService/TerminateReplica',
258
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateReplicaRequest.SerializeToString,
259
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.TerminateReplicaResponse.FromString,
260
+ options,
261
+ channel_credentials,
262
+ insecure,
263
+ call_credentials,
264
+ compression,
265
+ wait_for_ready,
266
+ timeout,
267
+ metadata,
268
+ _registered_method=True)
269
+
270
+ @staticmethod
271
+ def WaitServiceRegistration(request,
272
+ target,
273
+ options=(),
274
+ channel_credentials=None,
275
+ call_credentials=None,
276
+ insecure=False,
277
+ compression=None,
278
+ wait_for_ready=None,
279
+ timeout=None,
280
+ metadata=None):
281
+ return grpc.experimental.unary_unary(
282
+ request,
283
+ target,
284
+ '/serve.v1.ServeService/WaitServiceRegistration',
285
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.WaitServiceRegistrationRequest.SerializeToString,
286
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.WaitServiceRegistrationResponse.FromString,
287
+ options,
288
+ channel_credentials,
289
+ insecure,
290
+ call_credentials,
291
+ compression,
292
+ wait_for_ready,
293
+ timeout,
294
+ metadata,
295
+ _registered_method=True)
296
+
297
+ @staticmethod
298
+ def UpdateService(request,
299
+ target,
300
+ options=(),
301
+ channel_credentials=None,
302
+ call_credentials=None,
303
+ insecure=False,
304
+ compression=None,
305
+ wait_for_ready=None,
306
+ timeout=None,
307
+ metadata=None):
308
+ return grpc.experimental.unary_unary(
309
+ request,
310
+ target,
311
+ '/serve.v1.ServeService/UpdateService',
312
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.UpdateServiceRequest.SerializeToString,
313
+ sky_dot_schemas_dot_generated_dot_servev1__pb2.UpdateServiceResponse.FromString,
314
+ options,
315
+ channel_credentials,
316
+ insecure,
317
+ call_credentials,
318
+ compression,
319
+ wait_for_ready,
320
+ timeout,
321
+ metadata,
322
+ _registered_method=True)
sky/serve/autoscalers.py CHANGED
@@ -411,6 +411,8 @@ class _AutoscalerWithHysteresis(Autoscaler):
411
411
  # `_set_target_num_replicas_with_hysteresis` to have the replicas
412
412
  # quickly scale after each update.
413
413
  self.target_num_replicas = self._calculate_target_num_replicas()
414
+ logger.debug(f'Target number of replicas: {self.target_num_replicas}'
415
+ 'after update_version.')
414
416
  # Cleanup hysteresis counters.
415
417
  self.upscale_counter = 0
416
418
  self.downscale_counter = 0
sky/serve/client/impl.py CHANGED
@@ -8,6 +8,7 @@ import click
8
8
  from sky.client import common as client_common
9
9
  from sky.server import common as server_common
10
10
  from sky.server.requests import payloads
11
+ from sky.server.requests import request_names
11
12
  from sky.utils import admin_policy_utils
12
13
  from sky.utils import dag_utils
13
14
 
@@ -32,7 +33,9 @@ def up(
32
33
 
33
34
  dag = dag_utils.convert_entrypoint_to_dag(task)
34
35
  with admin_policy_utils.apply_and_use_config_in_current_request(
35
- dag, at_client_side=True) as dag:
36
+ dag,
37
+ request_name=request_names.AdminPolicyRequestName.SERVE_UP,
38
+ at_client_side=True) as dag:
36
39
  sdk.validate(dag)
37
40
  request_id = sdk.optimize(dag)
38
41
  sdk.stream_and_get(request_id)
@@ -77,7 +80,9 @@ def update(
77
80
 
78
81
  dag = dag_utils.convert_entrypoint_to_dag(task)
79
82
  with admin_policy_utils.apply_and_use_config_in_current_request(
80
- dag, at_client_side=True) as dag:
83
+ dag,
84
+ request_name=request_names.AdminPolicyRequestName.SERVE_UPDATE,
85
+ at_client_side=True) as dag:
81
86
  sdk.validate(dag)
82
87
  request_id = sdk.optimize(dag)
83
88
  sdk.stream_and_get(request_id)
@@ -105,7 +110,8 @@ def update(
105
110
 
106
111
 
107
112
  def apply(
108
- task: Union['sky.Task', 'sky.Dag'],
113
+ task: Optional[Union['sky.Task', 'sky.Dag']],
114
+ workers: Optional[int],
109
115
  service_name: str,
110
116
  mode: 'serve_utils.UpdateMode',
111
117
  pool: bool = False,
@@ -117,35 +123,63 @@ def apply(
117
123
  # Avoid circular import.
118
124
  from sky.client import sdk # pylint: disable=import-outside-toplevel
119
125
 
120
- dag = dag_utils.convert_entrypoint_to_dag(task)
121
- with admin_policy_utils.apply_and_use_config_in_current_request(
122
- dag, at_client_side=True) as dag:
123
- sdk.validate(dag)
124
- request_id = sdk.optimize(dag)
125
- sdk.stream_and_get(request_id)
126
- if _need_confirmation:
127
- noun = 'pool' if pool else 'service'
128
- prompt = f'Applying config to {noun} {service_name!r}. Proceed?'
129
- if prompt is not None:
130
- click.confirm(prompt,
131
- default=True,
132
- abort=True,
133
- show_default=True)
134
-
135
- dag = client_common.upload_mounts_to_api_server(dag)
136
- dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
126
+ noun = 'pool' if pool else 'service'
127
+ # There are two cases here. If task is None, we should be trying to
128
+ # update the number of workers in the pool. If task is not None, we should
129
+ # be trying to apply a new config to the pool. The two code paths
130
+ # are slightly different with us needing to craft the dag and validate
131
+ # it if we have a task. In the future we could move this logic to the
132
+ # server side and simplify this code, for the time being we keep it here.
133
+ if task is None:
134
+ if workers is None:
135
+ raise ValueError(f'Cannot create a new {noun} without specifying '
136
+ f'task or workers. Please provide either a task '
137
+ f'or specify the number of workers.')
137
138
 
138
139
  body = payloads.JobsPoolApplyBody(
139
- task=dag_str,
140
+ workers=workers,
140
141
  pool_name=service_name,
141
142
  mode=mode,
142
143
  )
144
+
143
145
  response = server_common.make_authenticated_request(
144
146
  'POST',
145
147
  '/jobs/pool_apply',
146
148
  json=json.loads(body.model_dump_json()),
147
149
  timeout=(5, None))
148
150
  return server_common.get_request_id(response)
151
+ else:
152
+ dag = dag_utils.convert_entrypoint_to_dag(task)
153
+ with admin_policy_utils.apply_and_use_config_in_current_request(
154
+ dag,
155
+ request_name=request_names.AdminPolicyRequestName.
156
+ JOBS_POOL_APPLY,
157
+ at_client_side=True) as dag:
158
+ sdk.validate(dag)
159
+ request_id = sdk.optimize(dag)
160
+ sdk.stream_and_get(request_id)
161
+ if _need_confirmation:
162
+ prompt = f'Applying config to {noun} {service_name!r}. Proceed?'
163
+ if prompt is not None:
164
+ click.confirm(prompt,
165
+ default=True,
166
+ abort=True,
167
+ show_default=True)
168
+
169
+ dag = client_common.upload_mounts_to_api_server(dag)
170
+ dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
171
+
172
+ body = payloads.JobsPoolApplyBody(
173
+ task=dag_str,
174
+ pool_name=service_name,
175
+ mode=mode,
176
+ )
177
+ response = server_common.make_authenticated_request(
178
+ 'POST',
179
+ '/jobs/pool_apply',
180
+ json=json.loads(body.model_dump_json()),
181
+ timeout=(5, None))
182
+ return server_common.get_request_id(response)
149
183
 
150
184
 
151
185
  def down(
sky/serve/constants.py CHANGED
@@ -65,7 +65,8 @@ AUTOSCALER_DEFAULT_DOWNSCALE_DELAY_SECONDS = 1200
65
65
  # TODO(tian): We might need to be careful that service logs can take a lot of
66
66
  # disk space. Maybe we could use a larger disk size, migrate to cloud storage or
67
67
  # do some log rotation.
68
- CONTROLLER_RESOURCES = {'cpus': '4+', 'disk_size': 200}
68
+ # Set default minimal memory to 8GB to allow at least one service to run.
69
+ CONTROLLER_RESOURCES = {'cpus': '4+', 'memory': '8+', 'disk_size': 200}
69
70
  # Autostop config for the jobs controller. These are the default values for
70
71
  # serve.controller.autostop in ~/.sky/config.yaml.
71
72
  CONTROLLER_AUTOSTOP = {
@@ -97,7 +98,7 @@ REPLICA_ID_ENV_VAR = 'SKYPILOT_SERVE_REPLICA_ID'
97
98
  # Changelog:
98
99
  # v1.0 - Introduce rolling update.
99
100
  # v2.0 - Added template-replica feature.
100
- # v3.0 - Added cluster pool.
101
+ # v3.0 - Added pool.
101
102
  # v4.0 - Added pool argument to wait_service_registration.
102
103
  # v5.0 - Added pool argument to stream_serve_process_logs & stream_replica_logs.
103
104
  SERVE_VERSION = 5
@@ -106,7 +107,7 @@ TERMINATE_REPLICA_VERSION_MISMATCH_ERROR = (
106
107
  'The version of service is outdated and does not support manually '
107
108
  'terminating replicas. Please terminate the service and spin up again.')
108
109
 
109
- # Dummy run command for cluster pool.
110
+ # Dummy run command for pool.
110
111
  POOL_DUMMY_RUN_COMMAND = 'echo "setup done"'
111
112
 
112
113
  # Error message for max number of services reached.
sky/serve/controller.py CHANGED
@@ -21,7 +21,9 @@ from sky.serve import autoscalers
21
21
  from sky.serve import replica_managers
22
22
  from sky.serve import serve_state
23
23
  from sky.serve import serve_utils
24
+ from sky.skylet import constants
24
25
  from sky.utils import common_utils
26
+ from sky.utils import context_utils
25
27
  from sky.utils import ux_utils
26
28
 
27
29
  logger = sky_logging.init_logger(__name__)
@@ -44,13 +46,12 @@ class SkyServeController:
44
46
  """
45
47
 
46
48
  def __init__(self, service_name: str, service_spec: serve.SkyServiceSpec,
47
- service_task_yaml: str, host: str, port: int) -> None:
49
+ version: int, host: str, port: int) -> None:
48
50
  self._service_name = service_name
49
51
  self._replica_manager: replica_managers.ReplicaManager = (
50
- replica_managers.SkyPilotReplicaManager(
51
- service_name=service_name,
52
- spec=service_spec,
53
- service_task_yaml_path=service_task_yaml))
52
+ replica_managers.SkyPilotReplicaManager(service_name=service_name,
53
+ spec=service_spec,
54
+ version=version))
54
55
  self._autoscaler: autoscalers.Autoscaler = (
55
56
  autoscalers.Autoscaler.from_spec(service_name, service_spec))
56
57
  self._host = host
@@ -172,7 +173,11 @@ class SkyServeController:
172
173
  # See sky/serve/core.py::update
173
174
  latest_task_yaml = serve_utils.generate_task_yaml_file_name(
174
175
  self._service_name, version)
175
- service = serve.SkyServiceSpec.from_yaml(latest_task_yaml)
176
+ with open(latest_task_yaml, 'r', encoding='utf-8') as f:
177
+ yaml_content = f.read()
178
+ service = serve.SkyServiceSpec.from_yaml_str(yaml_content)
179
+ serve_state.add_or_update_version(self._service_name, version,
180
+ service, yaml_content)
176
181
  logger.info(
177
182
  f'Update to new version version {version}: {service}')
178
183
 
@@ -283,9 +288,10 @@ class SkyServeController:
283
288
  # TODO(tian): Probably we should support service that will stop the VM in
284
289
  # specific time period.
285
290
  def run_controller(service_name: str, service_spec: serve.SkyServiceSpec,
286
- service_task_yaml: str, controller_host: str,
287
- controller_port: int):
288
- controller = SkyServeController(service_name, service_spec,
289
- service_task_yaml, controller_host,
290
- controller_port)
291
+ version: int, controller_host: str, controller_port: int):
292
+ os.environ[constants.OVERRIDE_CONSOLIDATION_MODE] = 'true'
293
+ # Hijack sys.stdout/stderr to be context aware.
294
+ context_utils.hijack_sys_attrs()
295
+ controller = SkyServeController(service_name, service_spec, version,
296
+ controller_host, controller_port)
291
297
  controller.run()
@@ -121,7 +121,7 @@ class LeastLoadPolicy(LoadBalancingPolicy, name='least_load', default=True):
121
121
  return
122
122
  with self.lock:
123
123
  self.ready_replicas = ready_replicas
124
- for r in self.ready_replicas:
124
+ for r in list(self.load_map.keys()):
125
125
  if r not in ready_replicas:
126
126
  del self.load_map[r]
127
127
  for replica in ready_replicas: