skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/serve/server/core.py CHANGED
@@ -1,107 +1,27 @@
1
1
  """SkyServe core APIs."""
2
- import pathlib
3
- import re
4
- import signal
5
- import tempfile
6
- import threading
7
2
  import typing
8
- from typing import Any, Dict, List, Optional, Set, Tuple, Union
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
9
4
 
10
- import colorama
11
-
12
- import sky
13
5
  from sky import backends
14
6
  from sky import exceptions
15
- from sky import execution
16
7
  from sky import sky_logging
17
- from sky import task as task_lib
8
+ from sky.adaptors import common as adaptors_common
18
9
  from sky.backends import backend_utils
19
- from sky.clouds.service_catalog import common as service_catalog_common
20
- from sky.serve import constants as serve_constants
21
- from sky.serve import serve_state
10
+ from sky.serve import serve_rpc_utils
22
11
  from sky.serve import serve_utils
23
- from sky.skylet import constants
12
+ from sky.serve.server import impl
24
13
  from sky.usage import usage_lib
25
- from sky.utils import admin_policy_utils
26
- from sky.utils import command_runner
27
- from sky.utils import common
28
- from sky.utils import common_utils
29
14
  from sky.utils import controller_utils
30
- from sky.utils import rich_utils
31
15
  from sky.utils import subprocess_utils
32
- from sky.utils import ux_utils
33
16
 
34
17
  if typing.TYPE_CHECKING:
35
- from sky.backends import cloud_vm_ray_backend
36
-
37
- logger = sky_logging.init_logger(__name__)
18
+ import grpc
38
19
 
20
+ import sky
21
+ else:
22
+ grpc = adaptors_common.LazyImport('grpc')
39
23
 
40
- def _rewrite_tls_credential_paths_and_get_tls_env_vars(
41
- service_name: str, task: 'sky.Task') -> Dict[str, Any]:
42
- """Rewrite the paths of TLS credentials in the task.
43
-
44
- Args:
45
- service_name: Name of the service.
46
- task: sky.Task to rewrite.
47
-
48
- Returns:
49
- The generated template variables for TLS.
50
- """
51
- service_spec = task.service
52
- # Already checked by validate_service_task
53
- assert service_spec is not None
54
- if service_spec.tls_credential is None:
55
- return {'use_tls': False}
56
- remote_tls_keyfile = (
57
- serve_utils.generate_remote_tls_keyfile_name(service_name))
58
- remote_tls_certfile = (
59
- serve_utils.generate_remote_tls_certfile_name(service_name))
60
- tls_template_vars = {
61
- 'use_tls': True,
62
- 'remote_tls_keyfile': remote_tls_keyfile,
63
- 'remote_tls_certfile': remote_tls_certfile,
64
- 'local_tls_keyfile': service_spec.tls_credential.keyfile,
65
- 'local_tls_certfile': service_spec.tls_credential.certfile,
66
- }
67
- service_spec.tls_credential = serve_utils.TLSCredential(
68
- remote_tls_keyfile, remote_tls_certfile)
69
- return tls_template_vars
70
-
71
-
72
- def _get_all_replica_targets(
73
- service_name: str, backend: backends.CloudVmRayBackend,
74
- handle: backends.CloudVmRayResourceHandle
75
- ) -> Set[serve_utils.ServiceComponentTarget]:
76
- """Helper function to get targets for all live replicas."""
77
- code = serve_utils.ServeCodeGen.get_service_status([service_name])
78
- returncode, serve_status_payload, stderr = backend.run_on_head(
79
- handle,
80
- code,
81
- require_outputs=True,
82
- stream_logs=False,
83
- separate_stderr=True)
84
-
85
- try:
86
- subprocess_utils.handle_returncode(returncode,
87
- code,
88
- 'Failed to fetch services',
89
- stderr,
90
- stream_logs=True)
91
- except exceptions.CommandError as e:
92
- raise RuntimeError(e.error_msg) from e
93
-
94
- service_records = serve_utils.load_service_status(serve_status_payload)
95
- if not service_records:
96
- raise ValueError(f'Service {service_name!r} not found.')
97
- assert len(service_records) == 1
98
- service_record = service_records[0]
99
-
100
- return {
101
- serve_utils.ServiceComponentTarget(serve_utils.ServiceComponent.REPLICA,
102
- replica_info['replica_id'])
103
- for replica_info in service_record['replica_info']
104
- }
24
+ logger = sky_logging.init_logger(__name__)
105
25
 
106
26
 
107
27
  @usage_lib.entrypoint
@@ -122,381 +42,27 @@ def up(
122
42
  argument.
123
43
  endpoint: str; The service endpoint.
124
44
  """
125
- task.validate()
126
- if service_name is None:
127
- service_name = serve_utils.generate_service_name()
128
-
129
- # The service name will be used as:
130
- # 1. controller cluster name: 'sky-serve-controller-<service_name>'
131
- # 2. replica cluster name: '<service_name>-<replica_id>'
132
- # In both cases, service name shares the same regex with cluster name.
133
- if re.fullmatch(constants.CLUSTER_NAME_VALID_REGEX, service_name) is None:
134
- with ux_utils.print_exception_no_traceback():
135
- raise ValueError(f'Service name {service_name!r} is invalid: '
136
- f'ensure it is fully matched by regex (e.g., '
137
- 'only contains lower letters, numbers and dash): '
138
- f'{constants.CLUSTER_NAME_VALID_REGEX}')
139
-
140
- serve_utils.validate_service_task(task)
141
- # Always apply the policy again here, even though it might have been applied
142
- # in the CLI. This is to ensure that we apply the policy to the final DAG
143
- # and get the mutated config.
144
- dag, mutated_user_config = admin_policy_utils.apply(
145
- task, use_mutated_config_in_current_request=False)
146
- task = dag.tasks[0]
147
-
148
- with rich_utils.safe_status(
149
- ux_utils.spinner_message('Initializing service')):
150
- controller_utils.maybe_translate_local_file_mounts_and_sync_up(
151
- task, task_type='serve')
152
-
153
- tls_template_vars = _rewrite_tls_credential_paths_and_get_tls_env_vars(
154
- service_name, task)
155
-
156
- with tempfile.NamedTemporaryFile(
157
- prefix=f'service-task-{service_name}-',
158
- mode='w',
159
- ) as service_file, tempfile.NamedTemporaryFile(
160
- prefix=f'controller-task-{service_name}-',
161
- mode='w',
162
- ) as controller_file:
163
- controller_name = common.SKY_SERVE_CONTROLLER_NAME
164
- task_config = task.to_yaml_config()
165
- common_utils.dump_yaml(service_file.name, task_config)
166
- remote_tmp_task_yaml_path = (
167
- serve_utils.generate_remote_tmp_task_yaml_file_name(service_name))
168
- remote_config_yaml_path = (
169
- serve_utils.generate_remote_config_yaml_file_name(service_name))
170
- controller_log_file = (
171
- serve_utils.generate_remote_controller_log_file_name(service_name))
172
- controller_resources = controller_utils.get_controller_resources(
173
- controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
174
- task_resources=task.resources)
175
-
176
- vars_to_fill = {
177
- 'remote_task_yaml_path': remote_tmp_task_yaml_path,
178
- 'local_task_yaml_path': service_file.name,
179
- 'service_name': service_name,
180
- 'controller_log_file': controller_log_file,
181
- 'remote_user_config_path': remote_config_yaml_path,
182
- 'modified_catalogs':
183
- service_catalog_common.get_modified_catalog_file_mounts(),
184
- **tls_template_vars,
185
- **controller_utils.shared_controller_vars_to_fill(
186
- controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
187
- remote_user_config_path=remote_config_yaml_path,
188
- local_user_config=mutated_user_config,
189
- ),
190
- }
191
- common_utils.fill_template(serve_constants.CONTROLLER_TEMPLATE,
192
- vars_to_fill,
193
- output_path=controller_file.name)
194
- controller_task = task_lib.Task.from_yaml(controller_file.name)
195
- # TODO(tian): Probably run another sky.launch after we get the load
196
- # balancer port from the controller? So we don't need to open so many
197
- # ports here. Or, we should have a nginx traffic control to refuse
198
- # any connection to the unregistered ports.
199
- controller_resources = {
200
- r.copy(ports=[serve_constants.LOAD_BALANCER_PORT_RANGE])
201
- for r in controller_resources
202
- }
203
- controller_task.set_resources(controller_resources)
204
-
205
- # # Set service_name so the backend will know to modify default ray
206
- # task CPU usage to custom value instead of default 0.5 vCPU. We need
207
- # to set it to a smaller value to support a larger number of services.
208
- controller_task.service_name = service_name
209
-
210
- print(f'{colorama.Fore.YELLOW}Launching controller for '
211
- f'{service_name!r}...{colorama.Style.RESET_ALL}')
212
- # We directly submit the request to the controller and let the
213
- # controller to check name conflict. Suppose we have multiple
214
- # sky.serve.up() with same service name, the first one will
215
- # successfully write its job id to controller service database;
216
- # and for all following sky.serve.up(), the controller will throw
217
- # an exception (name conflict detected) and exit. Therefore the
218
- # controller job id in database could be use as an indicator of
219
- # whether the service is already running. If the id is the same
220
- # with the current job id, we know the service is up and running
221
- # for the first time; otherwise it is a name conflict.
222
- controller_idle_minutes_to_autostop, controller_down = (
223
- controller_utils.get_controller_autostop_config(
224
- controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER))
225
- # Since the controller may be shared among multiple users, launch the
226
- # controller with the API server's user hash.
227
- with common.with_server_user_hash():
228
- controller_job_id, controller_handle = execution.launch(
229
- task=controller_task,
230
- cluster_name=controller_name,
231
- idle_minutes_to_autostop=controller_idle_minutes_to_autostop,
232
- down=controller_down,
233
- retry_until_up=True,
234
- _disable_controller_check=True,
235
- )
236
-
237
- style = colorama.Style
238
- fore = colorama.Fore
239
-
240
- assert controller_job_id is not None and controller_handle is not None
241
- # TODO(tian): Cache endpoint locally to speedup. Endpoint won't
242
- # change after the first time, so there is no consistency issue.
243
- with rich_utils.safe_status(
244
- ux_utils.spinner_message(
245
- 'Waiting for the service to register')):
246
- # This function will check the controller job id in the database
247
- # and return the endpoint if the job id matches. Otherwise it will
248
- # return None.
249
- code = serve_utils.ServeCodeGen.wait_service_registration(
250
- service_name, controller_job_id)
251
- backend = backend_utils.get_backend_from_handle(controller_handle)
252
- assert isinstance(backend, backends.CloudVmRayBackend)
253
- assert isinstance(controller_handle,
254
- backends.CloudVmRayResourceHandle)
255
- returncode, lb_port_payload, _ = backend.run_on_head(
256
- controller_handle,
257
- code,
258
- require_outputs=True,
259
- stream_logs=False)
260
- try:
261
- subprocess_utils.handle_returncode(
262
- returncode, code, 'Failed to wait for service initialization',
263
- lb_port_payload)
264
- except exceptions.CommandError:
265
- statuses = backend.get_job_status(controller_handle,
266
- [controller_job_id],
267
- stream_logs=False)
268
- controller_job_status = list(statuses.values())[0]
269
- if controller_job_status == sky.JobStatus.PENDING:
270
- # Max number of services reached due to vCPU constraint.
271
- # The controller job is pending due to ray job scheduling.
272
- # We manually cancel the job here.
273
- backend.cancel_jobs(controller_handle, [controller_job_id])
274
- with ux_utils.print_exception_no_traceback():
275
- raise RuntimeError(
276
- 'Max number of services reached. '
277
- 'To spin up more services, please '
278
- 'tear down some existing services.') from None
279
- else:
280
- # Possible cases:
281
- # (1) name conflict;
282
- # (2) max number of services reached due to memory
283
- # constraint. The job will successfully run on the
284
- # controller, but there will be an error thrown due
285
- # to memory constraint check in the controller.
286
- # See sky/serve/service.py for more details.
287
- with ux_utils.print_exception_no_traceback():
288
- raise RuntimeError(
289
- 'Failed to spin up the service. Please '
290
- 'check the logs above for more details.') from None
291
- else:
292
- lb_port = serve_utils.load_service_initialization_result(
293
- lb_port_payload)
294
- socket_endpoint = backend_utils.get_endpoints(
295
- controller_handle.cluster_name, lb_port,
296
- skip_status_check=True).get(lb_port)
297
- assert socket_endpoint is not None, (
298
- 'Did not get endpoint for controller.')
299
- # Already checked by validate_service_task
300
- assert task.service is not None
301
- protocol = ('http'
302
- if task.service.tls_credential is None else 'https')
303
- endpoint = f'{protocol}://{socket_endpoint}'
304
-
305
- logger.info(
306
- f'{fore.CYAN}Service name: '
307
- f'{style.BRIGHT}{service_name}{style.RESET_ALL}'
308
- f'\n{fore.CYAN}Endpoint URL: '
309
- f'{style.BRIGHT}{endpoint}{style.RESET_ALL}'
310
- f'\n📋 Useful Commands'
311
- f'\n{ux_utils.INDENT_SYMBOL}To check service status:\t'
312
- f'{ux_utils.BOLD}sky serve status {service_name} '
313
- f'[--endpoint]{ux_utils.RESET_BOLD}'
314
- f'\n{ux_utils.INDENT_SYMBOL}To teardown the service:\t'
315
- f'{ux_utils.BOLD}sky serve down {service_name}'
316
- f'{ux_utils.RESET_BOLD}'
317
- f'\n{ux_utils.INDENT_SYMBOL}To see replica logs:\t'
318
- f'{ux_utils.BOLD}sky serve logs {service_name} [REPLICA_ID]'
319
- f'{ux_utils.RESET_BOLD}'
320
- f'\n{ux_utils.INDENT_SYMBOL}To see load balancer logs:\t'
321
- f'{ux_utils.BOLD}sky serve logs --load-balancer {service_name}'
322
- f'{ux_utils.RESET_BOLD}'
323
- f'\n{ux_utils.INDENT_SYMBOL}To see controller logs:\t'
324
- f'{ux_utils.BOLD}sky serve logs --controller {service_name}'
325
- f'{ux_utils.RESET_BOLD}'
326
- f'\n{ux_utils.INDENT_SYMBOL}To monitor the status:\t'
327
- f'{ux_utils.BOLD}watch -n10 sky serve status {service_name}'
328
- f'{ux_utils.RESET_BOLD}'
329
- f'\n{ux_utils.INDENT_LAST_SYMBOL}To send a test request:\t'
330
- f'{ux_utils.BOLD}curl {endpoint}'
331
- f'{ux_utils.RESET_BOLD}'
332
- '\n\n' +
333
- ux_utils.finishing_message('Service is spinning up and replicas '
334
- 'will be ready shortly.'))
335
- return service_name, endpoint
45
+ return impl.up(task, service_name, pool=False)
336
46
 
337
47
 
338
48
  @usage_lib.entrypoint
339
- def update(
340
- task: 'sky.Task',
341
- service_name: str,
342
- mode: serve_utils.UpdateMode = serve_utils.DEFAULT_UPDATE_MODE) -> None:
49
+ def update(task: Optional['sky.Task'],
50
+ service_name: str,
51
+ mode: serve_utils.UpdateMode = serve_utils.DEFAULT_UPDATE_MODE,
52
+ workers: Optional[int] = None) -> None:
343
53
  """Updates an existing service.
344
54
 
345
55
  Please refer to the sky.cli.serve_update for the document.
346
56
 
347
57
  Args:
348
- task: sky.Task to update.
58
+ task: sky.Task to update, or None if updating
59
+ the number of workers/replicas.
349
60
  service_name: Name of the service.
350
61
  mode: Update mode.
62
+ workers: Number of workers/replicas to set for the service when
63
+ task is None.
351
64
  """
352
- task.validate()
353
- serve_utils.validate_service_task(task)
354
-
355
- # Always apply the policy again here, even though it might have been applied
356
- # in the CLI. This is to ensure that we apply the policy to the final DAG
357
- # and get the mutated config.
358
- # TODO(cblmemo,zhwu): If a user sets a new skypilot_config, the update
359
- # will not apply the config.
360
- dag, _ = admin_policy_utils.apply(
361
- task, use_mutated_config_in_current_request=False)
362
- task = dag.tasks[0]
363
-
364
- assert task.service is not None
365
- if task.service.tls_credential is not None:
366
- logger.warning('Updating TLS keyfile and certfile is not supported. '
367
- 'Any updates to the keyfile and certfile will not take '
368
- 'effect. To update TLS keyfile and certfile, please '
369
- 'tear down the service and spin up a new one.')
370
-
371
- handle = backend_utils.is_controller_accessible(
372
- controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
373
- stopped_message=
374
- 'Service controller is stopped. There is no service to update. '
375
- f'To spin up a new service, use {ux_utils.BOLD}'
376
- f'sky serve up{ux_utils.RESET_BOLD}',
377
- non_existent_message='Service does not exist. '
378
- 'To spin up a new service, '
379
- f'use {ux_utils.BOLD}sky serve up{ux_utils.RESET_BOLD}',
380
- )
381
-
382
- backend = backend_utils.get_backend_from_handle(handle)
383
- assert isinstance(backend, backends.CloudVmRayBackend)
384
-
385
- code = serve_utils.ServeCodeGen.get_service_status([service_name])
386
- returncode, serve_status_payload, stderr = backend.run_on_head(
387
- handle,
388
- code,
389
- require_outputs=True,
390
- stream_logs=False,
391
- separate_stderr=True)
392
- try:
393
- subprocess_utils.handle_returncode(returncode,
394
- code, 'Failed to get service status '
395
- 'when update service',
396
- stderr,
397
- stream_logs=True)
398
- except exceptions.CommandError as e:
399
- raise RuntimeError(e.error_msg) from e
400
-
401
- service_statuses = serve_utils.load_service_status(serve_status_payload)
402
- if not service_statuses:
403
- with ux_utils.print_exception_no_traceback():
404
- raise RuntimeError(f'Cannot find service {service_name!r}.'
405
- f'To spin up a service, use {ux_utils.BOLD}'
406
- f'sky serve up{ux_utils.RESET_BOLD}')
407
-
408
- if len(service_statuses) > 1:
409
- with ux_utils.print_exception_no_traceback():
410
- raise RuntimeError(
411
- f'Multiple services found for {service_name!r}. ')
412
- service_record = service_statuses[0]
413
- prompt = None
414
- if (service_record['status'] == serve_state.ServiceStatus.CONTROLLER_FAILED
415
- ):
416
- prompt = (f'Service {service_name!r} has a failed controller. '
417
- 'Please clean up the service and try again.')
418
- elif (service_record['status'] == serve_state.ServiceStatus.CONTROLLER_INIT
419
- ):
420
- prompt = (f'Service {service_name!r} is still initializing '
421
- 'its controller. Please try again later.')
422
- if prompt is not None:
423
- with ux_utils.print_exception_no_traceback():
424
- raise RuntimeError(prompt)
425
-
426
- original_lb_policy = service_record['load_balancing_policy']
427
- assert task.service is not None, 'Service section not found.'
428
- if original_lb_policy != task.service.load_balancing_policy:
429
- logger.warning(
430
- f'{colorama.Fore.YELLOW}Current load balancing policy '
431
- f'{original_lb_policy!r} is different from the new policy '
432
- f'{task.service.load_balancing_policy!r}. Updating the load '
433
- 'balancing policy is not supported yet and it will be ignored. '
434
- 'The service will continue to use the current load balancing '
435
- f'policy.{colorama.Style.RESET_ALL}')
436
-
437
- with rich_utils.safe_status(
438
- ux_utils.spinner_message('Initializing service')):
439
- controller_utils.maybe_translate_local_file_mounts_and_sync_up(
440
- task, task_type='serve')
441
-
442
- code = serve_utils.ServeCodeGen.add_version(service_name)
443
- returncode, version_string_payload, stderr = backend.run_on_head(
444
- handle,
445
- code,
446
- require_outputs=True,
447
- stream_logs=False,
448
- separate_stderr=True)
449
- try:
450
- subprocess_utils.handle_returncode(returncode,
451
- code,
452
- 'Failed to add version',
453
- stderr,
454
- stream_logs=True)
455
- except exceptions.CommandError as e:
456
- raise RuntimeError(e.error_msg) from e
457
-
458
- version_string = serve_utils.load_version_string(version_string_payload)
459
- try:
460
- current_version = int(version_string)
461
- except ValueError as e:
462
- with ux_utils.print_exception_no_traceback():
463
- raise ValueError(f'Failed to parse version: {version_string}; '
464
- f'Returncode: {returncode}') from e
465
-
466
- print(f'New version: {current_version}')
467
- with tempfile.NamedTemporaryFile(
468
- prefix=f'{service_name}-v{current_version}',
469
- mode='w') as service_file:
470
- task_config = task.to_yaml_config()
471
- common_utils.dump_yaml(service_file.name, task_config)
472
- remote_task_yaml_path = serve_utils.generate_task_yaml_file_name(
473
- service_name, current_version, expand_user=False)
474
-
475
- backend.sync_file_mounts(handle,
476
- {remote_task_yaml_path: service_file.name},
477
- storage_mounts=None)
478
-
479
- code = serve_utils.ServeCodeGen.update_service(service_name,
480
- current_version,
481
- mode=mode.value)
482
- returncode, _, stderr = backend.run_on_head(handle,
483
- code,
484
- require_outputs=True,
485
- stream_logs=False,
486
- separate_stderr=True)
487
- try:
488
- subprocess_utils.handle_returncode(returncode,
489
- code,
490
- 'Failed to update services',
491
- stderr,
492
- stream_logs=True)
493
- except exceptions.CommandError as e:
494
- raise RuntimeError(e.error_msg) from e
495
-
496
- print(f'{colorama.Fore.GREEN}Service {service_name!r} update scheduled.'
497
- f'{colorama.Style.RESET_ALL}\n'
498
- f'Please use {ux_utils.BOLD}sky serve status {service_name} '
499
- f'{ux_utils.RESET_BOLD}to check the latest status.')
65
+ return impl.update(task, service_name, mode, pool=False, workers=workers)
500
66
 
501
67
 
502
68
  @usage_lib.entrypoint
@@ -521,46 +87,7 @@ def down(
521
87
  ValueError: if the arguments are invalid.
522
88
  RuntimeError: if failed to terminate the service.
523
89
  """
524
- if service_names is None:
525
- service_names = []
526
- if isinstance(service_names, str):
527
- service_names = [service_names]
528
- handle = backend_utils.is_controller_accessible(
529
- controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
530
- stopped_message='All services should have terminated.')
531
-
532
- service_names_str = ','.join(service_names)
533
- if sum([bool(service_names), all]) != 1:
534
- argument_str = (f'service_names={service_names_str}'
535
- if service_names else '')
536
- argument_str += ' all' if all else ''
537
- raise ValueError('Can only specify one of service_names or all. '
538
- f'Provided {argument_str!r}.')
539
-
540
- backend = backend_utils.get_backend_from_handle(handle)
541
- assert isinstance(backend, backends.CloudVmRayBackend)
542
- service_names = None if all else service_names
543
- code = serve_utils.ServeCodeGen.terminate_services(service_names, purge)
544
-
545
- try:
546
- returncode, stdout, _ = backend.run_on_head(handle,
547
- code,
548
- require_outputs=True,
549
- stream_logs=False)
550
- except exceptions.FetchClusterInfoError as e:
551
- raise RuntimeError(
552
- 'Failed to fetch controller IP. Please refresh controller status '
553
- f'by `sky status -r {common.SKY_SERVE_CONTROLLER_NAME}` '
554
- 'and try again.') from e
555
-
556
- try:
557
- subprocess_utils.handle_returncode(returncode, code,
558
- 'Failed to terminate service',
559
- stdout)
560
- except exceptions.CommandError as e:
561
- raise RuntimeError(e.error_msg) from e
562
-
563
- logger.info(stdout)
90
+ return impl.down(service_names, all, purge, pool=False)
564
91
 
565
92
 
566
93
  @usage_lib.entrypoint
@@ -587,25 +114,37 @@ def terminate_replica(service_name: str, replica_id: int, purge: bool) -> None:
587
114
  'Please spin up a service first.',
588
115
  )
589
116
 
590
- backend = backend_utils.get_backend_from_handle(handle)
591
- assert isinstance(backend, backends.CloudVmRayBackend)
592
-
593
- code = serve_utils.ServeCodeGen.terminate_replica(service_name, replica_id,
594
- purge)
595
- returncode, stdout, stderr = backend.run_on_head(handle,
596
- code,
597
- require_outputs=True,
598
- stream_logs=False,
599
- separate_stderr=True)
600
-
601
- try:
602
- subprocess_utils.handle_returncode(returncode,
603
- code,
604
- 'Failed to terminate the replica',
605
- stderr,
606
- stream_logs=True)
607
- except exceptions.CommandError as e:
608
- raise RuntimeError(e.error_msg) from e
117
+ assert isinstance(handle, backends.CloudVmRayResourceHandle)
118
+ use_legacy = not handle.is_grpc_enabled_with_flag
119
+
120
+ if not use_legacy:
121
+ try:
122
+ stdout = serve_rpc_utils.RpcRunner.terminate_replica(
123
+ handle, service_name, replica_id, purge)
124
+ except exceptions.SkyletMethodNotImplementedError:
125
+ use_legacy = True
126
+
127
+ if use_legacy:
128
+ backend = backend_utils.get_backend_from_handle(handle)
129
+ assert isinstance(backend, backends.CloudVmRayBackend)
130
+
131
+ code = serve_utils.ServeCodeGen.terminate_replica(
132
+ service_name, replica_id, purge)
133
+ returncode, stdout, stderr = backend.run_on_head(handle,
134
+ code,
135
+ require_outputs=True,
136
+ stream_logs=False,
137
+ separate_stderr=True)
138
+
139
+ try:
140
+ subprocess_utils.handle_returncode(
141
+ returncode,
142
+ code,
143
+ 'Failed to terminate the replica',
144
+ stderr,
145
+ stream_logs=True)
146
+ except exceptions.CommandError as e:
147
+ raise RuntimeError(e.error_msg) from e
609
148
 
610
149
  sky_logging.print(stdout)
611
150
 
@@ -669,60 +208,7 @@ def status(
669
208
  RuntimeError: if failed to get the service status.
670
209
  exceptions.ClusterNotUpError: if the sky serve controller is not up.
671
210
  """
672
- if service_names is not None:
673
- if isinstance(service_names, str):
674
- service_names = [service_names]
675
-
676
- try:
677
- backend_utils.check_network_connection()
678
- except exceptions.NetworkError as e:
679
- with ux_utils.print_exception_no_traceback():
680
- raise RuntimeError(
681
- 'Failed to refresh service status due to network error.') from e
682
-
683
- controller_type = controller_utils.Controllers.SKY_SERVE_CONTROLLER
684
- handle = backend_utils.is_controller_accessible(
685
- controller=controller_type,
686
- stopped_message=controller_type.value.default_hint_if_non_existent)
687
-
688
- backend = backend_utils.get_backend_from_handle(handle)
689
- assert isinstance(backend, backends.CloudVmRayBackend)
690
-
691
- code = serve_utils.ServeCodeGen.get_service_status(service_names)
692
- returncode, serve_status_payload, stderr = backend.run_on_head(
693
- handle,
694
- code,
695
- require_outputs=True,
696
- stream_logs=False,
697
- separate_stderr=True)
698
-
699
- try:
700
- subprocess_utils.handle_returncode(returncode,
701
- code,
702
- 'Failed to fetch services',
703
- stderr,
704
- stream_logs=True)
705
- except exceptions.CommandError as e:
706
- raise RuntimeError(e.error_msg) from e
707
-
708
- service_records = serve_utils.load_service_status(serve_status_payload)
709
- # Get the endpoint for each service
710
- for service_record in service_records:
711
- service_record['endpoint'] = None
712
- if service_record['load_balancer_port'] is not None:
713
- try:
714
- endpoint = backend_utils.get_endpoints(
715
- cluster=common.SKY_SERVE_CONTROLLER_NAME,
716
- port=service_record['load_balancer_port']).get(
717
- service_record['load_balancer_port'], None)
718
- except exceptions.ClusterNotUpError:
719
- pass
720
- else:
721
- protocol = ('https'
722
- if service_record['tls_encrypted'] else 'http')
723
- service_record['endpoint'] = f'{protocol}://{endpoint}'
724
-
725
- return service_records
211
+ return impl.status(service_names, pool=False)
726
212
 
727
213
 
728
214
  ServiceComponentOrStr = Union[str, serve_utils.ServiceComponent]
@@ -735,6 +221,7 @@ def tail_logs(
735
221
  target: ServiceComponentOrStr,
736
222
  replica_id: Optional[int] = None,
737
223
  follow: bool = True,
224
+ tail: Optional[int] = None,
738
225
  ) -> None:
739
226
  """Tails logs for a service.
740
227
 
@@ -769,56 +256,12 @@ def tail_logs(
769
256
  sky.exceptions.ClusterNotUpError: the sky serve controller is not up.
770
257
  ValueError: arguments not valid, or failed to tail the logs.
771
258
  """
772
- if isinstance(target, str):
773
- target = serve_utils.ServiceComponent(target)
774
- if not isinstance(target, serve_utils.ServiceComponent):
775
- with ux_utils.print_exception_no_traceback():
776
- raise ValueError(f'`target` must be a string or '
777
- f'sky.serve.ServiceComponent, got {type(target)}.')
778
-
779
- if target == serve_utils.ServiceComponent.REPLICA:
780
- if replica_id is None:
781
- with ux_utils.print_exception_no_traceback():
782
- raise ValueError(
783
- '`replica_id` must be specified when using target=REPLICA.')
784
- else:
785
- if replica_id is not None:
786
- with ux_utils.print_exception_no_traceback():
787
- raise ValueError('`replica_id` must be None when using '
788
- 'target=CONTROLLER/LOAD_BALANCER.')
789
-
790
- controller_type = controller_utils.Controllers.SKY_SERVE_CONTROLLER
791
- handle = backend_utils.is_controller_accessible(
792
- controller=controller_type,
793
- stopped_message=controller_type.value.default_hint_if_non_existent)
794
-
795
- backend = backend_utils.get_backend_from_handle(handle)
796
- assert isinstance(backend, backends.CloudVmRayBackend), backend
797
-
798
- if target != serve_utils.ServiceComponent.REPLICA:
799
- code = serve_utils.ServeCodeGen.stream_serve_process_logs(
800
- service_name,
801
- stream_controller=(
802
- target == serve_utils.ServiceComponent.CONTROLLER),
803
- follow=follow)
804
- else:
805
- assert replica_id is not None, service_name
806
- code = serve_utils.ServeCodeGen.stream_replica_logs(
807
- service_name, replica_id, follow)
808
-
809
- # With the stdin=subprocess.DEVNULL, the ctrl-c will not directly
810
- # kill the process, so we need to handle it manually here.
811
- if threading.current_thread() is threading.main_thread():
812
- signal.signal(signal.SIGINT, backend_utils.interrupt_handler)
813
- signal.signal(signal.SIGTSTP, backend_utils.stop_handler)
814
-
815
- # Refer to the notes in
816
- # sky/backends/cloud_vm_ray_backend.py::CloudVmRayBackend::tail_logs.
817
- backend.run_on_head(handle,
818
- code,
819
- stream_logs=True,
820
- process_stream=False,
821
- ssh_mode=command_runner.SshMode.INTERACTIVE)
259
+ return impl.tail_logs(service_name,
260
+ target=target,
261
+ replica_id=replica_id,
262
+ follow=follow,
263
+ tail=tail,
264
+ pool=False)
822
265
 
823
266
 
824
267
  @usage_lib.entrypoint
@@ -829,6 +272,7 @@ def sync_down_logs(
829
272
  targets: Union[ServiceComponentOrStr, List[ServiceComponentOrStr],
830
273
  None] = None,
831
274
  replica_ids: Optional[List[int]] = None,
275
+ tail: Optional[int] = None,
832
276
  ) -> str:
833
277
  """Sync down logs from the controller for the given service.
834
278
 
@@ -862,98 +306,9 @@ def sync_down_logs(
862
306
  sky.exceptions.ClusterNotUpError: If the controller is not up.
863
307
  ValueError: Arguments not valid.
864
308
  """
865
- # Step 0) get the controller handle
866
- with rich_utils.safe_status(
867
- ux_utils.spinner_message('Checking service status...')):
868
- controller_type = controller_utils.Controllers.SKY_SERVE_CONTROLLER
869
- handle = backend_utils.is_controller_accessible(
870
- controller=controller_type,
871
- stopped_message=controller_type.value.default_hint_if_non_existent)
872
- backend: backends.CloudVmRayBackend = (
873
- backend_utils.get_backend_from_handle(handle))
874
-
875
- requested_components: Set[serve_utils.ServiceComponent] = set()
876
- if not targets:
877
- # No targets specified -> request all components
878
- requested_components = {
879
- serve_utils.ServiceComponent.CONTROLLER,
880
- serve_utils.ServiceComponent.LOAD_BALANCER,
881
- serve_utils.ServiceComponent.REPLICA
882
- }
883
- else:
884
- # Parse provided targets
885
- if isinstance(targets, (str, serve_utils.ServiceComponent)):
886
- requested_components = {serve_utils.ServiceComponent(targets)}
887
- else: # list
888
- requested_components = {
889
- serve_utils.ServiceComponent(t) for t in targets
890
- }
891
-
892
- normalized_targets: Set[serve_utils.ServiceComponentTarget] = set()
893
- if serve_utils.ServiceComponent.CONTROLLER in requested_components:
894
- normalized_targets.add(
895
- serve_utils.ServiceComponentTarget(
896
- serve_utils.ServiceComponent.CONTROLLER))
897
- if serve_utils.ServiceComponent.LOAD_BALANCER in requested_components:
898
- normalized_targets.add(
899
- serve_utils.ServiceComponentTarget(
900
- serve_utils.ServiceComponent.LOAD_BALANCER))
901
- if serve_utils.ServiceComponent.REPLICA in requested_components:
902
- with rich_utils.safe_status(
903
- ux_utils.spinner_message('Getting live replica infos...')):
904
- replica_targets = _get_all_replica_targets(service_name, backend,
905
- handle)
906
- if not replica_ids:
907
- # Replica target requested but no specific IDs
908
- # -> Get all replica logs
909
- normalized_targets.update(replica_targets)
910
- else:
911
- # Replica target requested with specific IDs
912
- requested_replica_targets = [
913
- serve_utils.ServiceComponentTarget(
914
- serve_utils.ServiceComponent.REPLICA, rid)
915
- for rid in replica_ids
916
- ]
917
- for target in requested_replica_targets:
918
- if target not in replica_targets:
919
- logger.warning(f'Replica ID {target.replica_id} not found '
920
- f'for {service_name}. Skipping...')
921
- else:
922
- normalized_targets.add(target)
923
-
924
- def sync_down_logs_by_target(target: serve_utils.ServiceComponentTarget):
925
- component = target.component
926
- # We need to set one side of the pipe to a logs stream, and the other
927
- # side to a file.
928
- log_path = str(pathlib.Path(local_dir) / f'{target}.log')
929
- stream_logs_code: str
930
-
931
- if component == serve_utils.ServiceComponent.CONTROLLER:
932
- stream_logs_code = (
933
- serve_utils.ServeCodeGen.stream_serve_process_logs(
934
- service_name, stream_controller=True, follow=False))
935
- elif component == serve_utils.ServiceComponent.LOAD_BALANCER:
936
- stream_logs_code = (
937
- serve_utils.ServeCodeGen.stream_serve_process_logs(
938
- service_name, stream_controller=False, follow=False))
939
- elif component == serve_utils.ServiceComponent.REPLICA:
940
- replica_id = target.replica_id
941
- assert replica_id is not None, service_name
942
- stream_logs_code = serve_utils.ServeCodeGen.stream_replica_logs(
943
- service_name, replica_id, follow=False)
944
- else:
945
- assert False, component
946
-
947
- # Refer to the notes in
948
- # sky/backends/cloud_vm_ray_backend.py::CloudVmRayBackend::tail_logs.
949
- backend.run_on_head(handle,
950
- stream_logs_code,
951
- stream_logs=False,
952
- process_stream=False,
953
- ssh_mode=command_runner.SshMode.INTERACTIVE,
954
- log_path=log_path)
955
-
956
- subprocess_utils.run_in_parallel(sync_down_logs_by_target,
957
- list(normalized_targets))
958
-
959
- return local_dir
309
+ return impl.sync_down_logs(service_name,
310
+ local_dir=local_dir,
311
+ targets=targets,
312
+ replica_ids=replica_ids,
313
+ tail=tail,
314
+ pool=False)