skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,310 @@
1
+ """Implementation of SDK for SkyServe."""
2
+ import json
3
+ import typing
4
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
5
+
6
+ import click
7
+
8
+ from sky.client import common as client_common
9
+ from sky.server import common as server_common
10
+ from sky.server.requests import payloads
11
+ from sky.server.requests import request_names
12
+ from sky.utils import admin_policy_utils
13
+ from sky.utils import dag_utils
14
+
15
+ if typing.TYPE_CHECKING:
16
+ import io
17
+
18
+ import sky
19
+ from sky.serve import serve_utils
20
+
21
+
22
+ def up(
23
+ task: Union['sky.Task', 'sky.Dag'],
24
+ service_name: str,
25
+ pool: bool = False,
26
+ # Internal only:
27
+ # pylint: disable=invalid-name
28
+ _need_confirmation: bool = False
29
+ ) -> server_common.RequestId[Tuple[str, str]]:
30
+ assert not pool, 'Command `up` is not supported for pool.'
31
+ # Avoid circular import.
32
+ from sky.client import sdk # pylint: disable=import-outside-toplevel
33
+
34
+ dag = dag_utils.convert_entrypoint_to_dag(task)
35
+ with admin_policy_utils.apply_and_use_config_in_current_request(
36
+ dag,
37
+ request_name=request_names.AdminPolicyRequestName.SERVE_UP,
38
+ at_client_side=True) as dag:
39
+ sdk.validate(dag)
40
+ request_id = sdk.optimize(dag)
41
+ sdk.stream_and_get(request_id)
42
+ if _need_confirmation:
43
+ noun = 'pool' if pool else 'service'
44
+ prompt = f'Launching a new {noun} {service_name!r}. Proceed?'
45
+ if prompt is not None:
46
+ click.confirm(prompt,
47
+ default=True,
48
+ abort=True,
49
+ show_default=True)
50
+
51
+ dag = client_common.upload_mounts_to_api_server(dag)
52
+ dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
53
+
54
+ body = payloads.ServeUpBody(
55
+ task=dag_str,
56
+ service_name=service_name,
57
+ )
58
+
59
+ response = server_common.make_authenticated_request(
60
+ 'POST',
61
+ '/serve/up',
62
+ json=json.loads(body.model_dump_json()),
63
+ timeout=(5, None))
64
+ return server_common.get_request_id(response)
65
+
66
+
67
+ def update(
68
+ task: Union['sky.Task', 'sky.Dag'],
69
+ service_name: str,
70
+ mode: 'serve_utils.UpdateMode',
71
+ pool: bool = False,
72
+ # Internal only:
73
+ # pylint: disable=invalid-name
74
+ _need_confirmation: bool = False
75
+ ) -> server_common.RequestId[None]:
76
+ assert not pool, 'Command `update` is not supported for pool.'
77
+ # Avoid circular import.
78
+ from sky.client import sdk # pylint: disable=import-outside-toplevel
79
+ noun = 'pool' if pool else 'service'
80
+
81
+ dag = dag_utils.convert_entrypoint_to_dag(task)
82
+ with admin_policy_utils.apply_and_use_config_in_current_request(
83
+ dag,
84
+ request_name=request_names.AdminPolicyRequestName.SERVE_UPDATE,
85
+ at_client_side=True) as dag:
86
+ sdk.validate(dag)
87
+ request_id = sdk.optimize(dag)
88
+ sdk.stream_and_get(request_id)
89
+ if _need_confirmation:
90
+ click.confirm(f'Updating {noun} {service_name!r}. Proceed?',
91
+ default=True,
92
+ abort=True,
93
+ show_default=True)
94
+
95
+ dag = client_common.upload_mounts_to_api_server(dag)
96
+ dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
97
+
98
+ body = payloads.ServeUpdateBody(
99
+ task=dag_str,
100
+ service_name=service_name,
101
+ mode=mode,
102
+ )
103
+
104
+ response = server_common.make_authenticated_request(
105
+ 'POST',
106
+ '/serve/update',
107
+ json=json.loads(body.model_dump_json()),
108
+ timeout=(5, None))
109
+ return server_common.get_request_id(response)
110
+
111
+
112
+ def apply(
113
+ task: Optional[Union['sky.Task', 'sky.Dag']],
114
+ workers: Optional[int],
115
+ service_name: str,
116
+ mode: 'serve_utils.UpdateMode',
117
+ pool: bool = False,
118
+ # Internal only:
119
+ # pylint: disable=invalid-name
120
+ _need_confirmation: bool = False
121
+ ) -> server_common.RequestId[None]:
122
+ assert pool, 'Command `apply` is only supported for pool.'
123
+ # Avoid circular import.
124
+ from sky.client import sdk # pylint: disable=import-outside-toplevel
125
+
126
+ noun = 'pool' if pool else 'service'
127
+ # There are two cases here. If task is None, we should be trying to
128
+ # update the number of workers in the pool. If task is not None, we should
129
+ # be trying to apply a new config to the pool. The two code paths
130
+ # are slightly different with us needing to craft the dag and validate
131
+ # it if we have a task. In the future we could move this logic to the
132
+ # server side and simplify this code, for the time being we keep it here.
133
+ if task is None:
134
+ if workers is None:
135
+ raise ValueError(f'Cannot create a new {noun} without specifying '
136
+ f'task or workers. Please provide either a task '
137
+ f'or specify the number of workers.')
138
+
139
+ body = payloads.JobsPoolApplyBody(
140
+ workers=workers,
141
+ pool_name=service_name,
142
+ mode=mode,
143
+ )
144
+
145
+ response = server_common.make_authenticated_request(
146
+ 'POST',
147
+ '/jobs/pool_apply',
148
+ json=json.loads(body.model_dump_json()),
149
+ timeout=(5, None))
150
+ return server_common.get_request_id(response)
151
+ else:
152
+ dag = dag_utils.convert_entrypoint_to_dag(task)
153
+ with admin_policy_utils.apply_and_use_config_in_current_request(
154
+ dag,
155
+ request_name=request_names.AdminPolicyRequestName.
156
+ JOBS_POOL_APPLY,
157
+ at_client_side=True) as dag:
158
+ sdk.validate(dag)
159
+ request_id = sdk.optimize(dag)
160
+ sdk.stream_and_get(request_id)
161
+ if _need_confirmation:
162
+ prompt = f'Applying config to {noun} {service_name!r}. Proceed?'
163
+ if prompt is not None:
164
+ click.confirm(prompt,
165
+ default=True,
166
+ abort=True,
167
+ show_default=True)
168
+
169
+ dag = client_common.upload_mounts_to_api_server(dag)
170
+ dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
171
+
172
+ body = payloads.JobsPoolApplyBody(
173
+ task=dag_str,
174
+ pool_name=service_name,
175
+ mode=mode,
176
+ )
177
+ response = server_common.make_authenticated_request(
178
+ 'POST',
179
+ '/jobs/pool_apply',
180
+ json=json.loads(body.model_dump_json()),
181
+ timeout=(5, None))
182
+ return server_common.get_request_id(response)
183
+
184
+
185
+ def down(
186
+ service_names: Optional[Union[str, List[str]]],
187
+ all: bool = False, # pylint: disable=redefined-builtin
188
+ purge: bool = False,
189
+ pool: bool = False,
190
+ ) -> server_common.RequestId[None]:
191
+ if pool:
192
+ body = payloads.JobsPoolDownBody(
193
+ pool_names=service_names,
194
+ all=all,
195
+ purge=purge,
196
+ )
197
+ else:
198
+ body = payloads.ServeDownBody(
199
+ service_names=service_names,
200
+ all=all,
201
+ purge=purge,
202
+ )
203
+ response = server_common.make_authenticated_request(
204
+ 'POST',
205
+ '/jobs/pool_down' if pool else '/serve/down',
206
+ json=json.loads(body.model_dump_json()),
207
+ timeout=(5, None))
208
+ return server_common.get_request_id(response)
209
+
210
+
211
+ def status(
212
+ service_names: Optional[Union[str, List[str]]],
213
+ pool: bool = False,
214
+ ) -> server_common.RequestId[List[Dict[str, Any]]]:
215
+ if pool:
216
+ body = payloads.JobsPoolStatusBody(pool_names=service_names)
217
+ else:
218
+ body = payloads.ServeStatusBody(service_names=service_names)
219
+ response = server_common.make_authenticated_request(
220
+ 'POST',
221
+ '/jobs/pool_status' if pool else '/serve/status',
222
+ json=json.loads(body.model_dump_json()),
223
+ timeout=(5, None))
224
+ return server_common.get_request_id(response)
225
+
226
+
227
+ def tail_logs(service_name: str,
228
+ target: Union[str, 'serve_utils.ServiceComponent'],
229
+ replica_id: Optional[int] = None,
230
+ follow: bool = True,
231
+ output_stream: Optional['io.TextIOBase'] = None,
232
+ tail: Optional[int] = None,
233
+ pool: bool = False) -> None:
234
+ # Avoid circular import.
235
+ from sky.client import sdk # pylint: disable=import-outside-toplevel
236
+
237
+ if pool:
238
+ body = payloads.JobsPoolLogsBody(
239
+ pool_name=service_name,
240
+ target=target,
241
+ worker_id=replica_id,
242
+ follow=follow,
243
+ tail=tail,
244
+ )
245
+ else:
246
+ body = payloads.ServeLogsBody(
247
+ service_name=service_name,
248
+ target=target,
249
+ replica_id=replica_id,
250
+ follow=follow,
251
+ tail=tail,
252
+ )
253
+ response = server_common.make_authenticated_request(
254
+ 'POST',
255
+ '/jobs/pool_logs' if pool else '/serve/logs',
256
+ json=json.loads(body.model_dump_json()),
257
+ timeout=(5, None),
258
+ stream=True)
259
+ request_id: server_common.RequestId[None] = server_common.get_request_id(
260
+ response)
261
+ sdk.stream_response(request_id=request_id,
262
+ response=response,
263
+ output_stream=output_stream,
264
+ resumable=True,
265
+ get_result=follow)
266
+
267
+
268
+ def sync_down_logs(service_name: str,
269
+ local_dir: str,
270
+ *,
271
+ targets: Optional[Union[
272
+ str, 'serve_utils.ServiceComponent',
273
+ Sequence[Union[str,
274
+ 'serve_utils.ServiceComponent']]]] = None,
275
+ replica_ids: Optional[List[int]] = None,
276
+ tail: Optional[int] = None,
277
+ pool: bool = False) -> None:
278
+ # Avoid circular import.
279
+ from sky.client import sdk # pylint: disable=import-outside-toplevel
280
+
281
+ if pool:
282
+ body = payloads.JobsPoolDownloadLogsBody(
283
+ pool_name=service_name,
284
+ local_dir=local_dir,
285
+ targets=targets,
286
+ worker_ids=replica_ids,
287
+ tail=tail,
288
+ )
289
+ else:
290
+ body = payloads.ServeDownloadLogsBody(
291
+ service_name=service_name,
292
+ # No need to set here, since the server will override it
293
+ # to a directory on the API server.
294
+ local_dir=local_dir,
295
+ targets=targets,
296
+ replica_ids=replica_ids,
297
+ tail=tail,
298
+ )
299
+ response = server_common.make_authenticated_request(
300
+ 'POST',
301
+ '/jobs/pool_sync-down-logs' if pool else '/serve/sync-down-logs',
302
+ json=json.loads(body.model_dump_json()),
303
+ timeout=(5, None))
304
+ request_id: server_common.RequestId[str] = server_common.get_request_id(
305
+ response)
306
+ remote_dir = sdk.stream_and_get(request_id)
307
+
308
+ # Download from API server paths to the client's local_dir
309
+ client_common.download_logs_from_api_server([remote_dir], remote_dir,
310
+ local_dir)
sky/serve/client/sdk.py CHANGED
@@ -1,28 +1,23 @@
1
1
  """SDK for SkyServe."""
2
2
  import json
3
3
  import typing
4
- from typing import List, Optional, Union
4
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
5
5
 
6
- import click
7
-
8
- from sky.adaptors import common as adaptors_common
9
- from sky.client import common as client_common
6
+ from sky.serve.client import impl
10
7
  from sky.server import common as server_common
8
+ from sky.server import rest
11
9
  from sky.server.requests import payloads
12
10
  from sky.usage import usage_lib
13
- from sky.utils import dag_utils
11
+ from sky.utils import context
14
12
 
15
13
  if typing.TYPE_CHECKING:
16
14
  import io
17
15
 
18
- import requests
19
-
20
16
  import sky
21
17
  from sky.serve import serve_utils
22
- else:
23
- requests = adaptors_common.LazyImport('requests')
24
18
 
25
19
 
20
+ @context.contextual
26
21
  @usage_lib.entrypoint
27
22
  @server_common.check_server_healthy_or_start
28
23
  def up(
@@ -31,7 +26,7 @@ def up(
31
26
  # Internal only:
32
27
  # pylint: disable=invalid-name
33
28
  _need_confirmation: bool = False
34
- ) -> server_common.RequestId:
29
+ ) -> server_common.RequestId[Tuple[str, str]]:
35
30
  """Spins up a service.
36
31
 
37
32
  Please refer to the sky.cli.serve_up for the document.
@@ -50,35 +45,13 @@ def up(
50
45
  argument.
51
46
  endpoint (str): The service endpoint.
52
47
  """
53
-
54
- # Avoid circular import.
55
- from sky.client import sdk # pylint: disable=import-outside-toplevel
56
-
57
- dag = dag_utils.convert_entrypoint_to_dag(task)
58
- sdk.validate(dag)
59
- request_id = sdk.optimize(dag)
60
- sdk.stream_and_get(request_id)
61
- if _need_confirmation:
62
- prompt = f'Launching a new service {service_name!r}. Proceed?'
63
- if prompt is not None:
64
- click.confirm(prompt, default=True, abort=True, show_default=True)
65
-
66
- dag = client_common.upload_mounts_to_api_server(dag)
67
- dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
68
-
69
- body = payloads.ServeUpBody(
70
- task=dag_str,
71
- service_name=service_name,
72
- )
73
- response = requests.post(
74
- f'{server_common.get_server_url()}/serve/up',
75
- json=json.loads(body.model_dump_json()),
76
- timeout=(5, None),
77
- cookies=server_common.get_api_cookie_jar(),
78
- )
79
- return server_common.get_request_id(response)
48
+ return impl.up(task,
49
+ service_name,
50
+ pool=False,
51
+ _need_confirmation=_need_confirmation)
80
52
 
81
53
 
54
+ @context.contextual
82
55
  @usage_lib.entrypoint
83
56
  @server_common.check_server_healthy_or_start
84
57
  def update(
@@ -88,7 +61,7 @@ def update(
88
61
  # Internal only:
89
62
  # pylint: disable=invalid-name
90
63
  _need_confirmation: bool = False
91
- ) -> server_common.RequestId:
64
+ ) -> server_common.RequestId[None]:
92
65
  """Updates an existing service.
93
66
 
94
67
  Please refer to the sky.cli.serve_update for the document.
@@ -108,34 +81,11 @@ def update(
108
81
  Request Returns:
109
82
  None
110
83
  """
111
- # Avoid circular import.
112
- from sky.client import sdk # pylint: disable=import-outside-toplevel
113
-
114
- dag = dag_utils.convert_entrypoint_to_dag(task)
115
- sdk.validate(dag)
116
- request_id = sdk.optimize(dag)
117
- sdk.stream_and_get(request_id)
118
- if _need_confirmation:
119
- click.confirm(f'Updating service {service_name!r}. Proceed?',
120
- default=True,
121
- abort=True,
122
- show_default=True)
123
-
124
- dag = client_common.upload_mounts_to_api_server(dag)
125
- dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
126
- body = payloads.ServeUpdateBody(
127
- task=dag_str,
128
- service_name=service_name,
129
- mode=mode,
130
- )
131
-
132
- response = requests.post(
133
- f'{server_common.get_server_url()}/serve/update',
134
- json=json.loads(body.model_dump_json()),
135
- timeout=(5, None),
136
- cookies=server_common.get_api_cookie_jar(),
137
- )
138
- return server_common.get_request_id(response)
84
+ return impl.update(task,
85
+ service_name,
86
+ mode,
87
+ pool=False,
88
+ _need_confirmation=_need_confirmation)
139
89
 
140
90
 
141
91
  @usage_lib.entrypoint
@@ -144,7 +94,7 @@ def down(
144
94
  service_names: Optional[Union[str, List[str]]],
145
95
  all: bool = False, # pylint: disable=redefined-builtin
146
96
  purge: bool = False
147
- ) -> server_common.RequestId:
97
+ ) -> server_common.RequestId[None]:
148
98
  """Tears down a service.
149
99
 
150
100
  Please refer to the sky.cli.serve_down for the docs.
@@ -166,24 +116,13 @@ def down(
166
116
  ValueError: if the arguments are invalid.
167
117
  RuntimeError: if failed to terminate the service.
168
118
  """
169
- body = payloads.ServeDownBody(
170
- service_names=service_names,
171
- all=all,
172
- purge=purge,
173
- )
174
- response = requests.post(
175
- f'{server_common.get_server_url()}/serve/down',
176
- json=json.loads(body.model_dump_json()),
177
- timeout=(5, None),
178
- cookies=server_common.get_api_cookie_jar(),
179
- )
180
- return server_common.get_request_id(response)
119
+ return impl.down(service_names, all, purge, pool=False)
181
120
 
182
121
 
183
122
  @usage_lib.entrypoint
184
123
  @server_common.check_server_healthy_or_start
185
124
  def terminate_replica(service_name: str, replica_id: int,
186
- purge: bool) -> server_common.RequestId:
125
+ purge: bool) -> server_common.RequestId[None]:
187
126
  """Tears down a specific replica for the given service.
188
127
 
189
128
  Args:
@@ -206,20 +145,19 @@ def terminate_replica(service_name: str, replica_id: int,
206
145
  replica_id=replica_id,
207
146
  purge=purge,
208
147
  )
209
- response = requests.post(
210
- f'{server_common.get_server_url()}/serve/terminate-replica',
148
+ response = server_common.make_authenticated_request(
149
+ 'POST',
150
+ '/serve/terminate-replica',
211
151
  json=json.loads(body.model_dump_json()),
212
- timeout=(5, None),
213
- cookies=server_common.get_api_cookie_jar(),
214
- )
152
+ timeout=(5, None))
215
153
  return server_common.get_request_id(response)
216
154
 
217
155
 
218
156
  @usage_lib.entrypoint
219
157
  @server_common.check_server_healthy_or_start
220
158
  def status(
221
- service_names: Optional[Union[str,
222
- List[str]]]) -> server_common.RequestId:
159
+ service_names: Optional[Union[str, List[str]]]
160
+ ) -> server_common.RequestId[List[Dict[str, Any]]]:
223
161
  """Gets service statuses.
224
162
 
225
163
  If service_names is given, return those services. Otherwise, return all
@@ -278,23 +216,18 @@ def status(
278
216
  RuntimeError: if failed to get the service status.
279
217
  exceptions.ClusterNotUpError: if the sky serve controller is not up.
280
218
  """
281
- body = payloads.ServeStatusBody(service_names=service_names,)
282
- response = requests.post(
283
- f'{server_common.get_server_url()}/serve/status',
284
- json=json.loads(body.model_dump_json()),
285
- timeout=(5, None),
286
- cookies=server_common.get_api_cookie_jar(),
287
- )
288
- return server_common.get_request_id(response)
219
+ return impl.status(service_names, pool=False)
289
220
 
290
221
 
291
222
  @usage_lib.entrypoint
292
223
  @server_common.check_server_healthy_or_start
224
+ @rest.retry_transient_errors()
293
225
  def tail_logs(service_name: str,
294
226
  target: Union[str, 'serve_utils.ServiceComponent'],
295
227
  replica_id: Optional[int] = None,
296
228
  follow: bool = True,
297
- output_stream: Optional['io.TextIOBase'] = None) -> None:
229
+ output_stream: Optional['io.TextIOBase'] = None,
230
+ tail: Optional[int] = None) -> None:
298
231
  """Tails logs for a service.
299
232
 
300
233
  Usage:
@@ -356,24 +289,13 @@ def tail_logs(service_name: str,
356
289
  sky.exceptions.ClusterNotUpError: the sky serve controller is not up.
357
290
  ValueError: arguments not valid, or failed to tail the logs.
358
291
  """
359
- # Avoid circular import.
360
- from sky.client import sdk # pylint: disable=import-outside-toplevel
361
-
362
- body = payloads.ServeLogsBody(
363
- service_name=service_name,
364
- target=target,
365
- replica_id=replica_id,
366
- follow=follow,
367
- )
368
- response = requests.post(
369
- f'{server_common.get_server_url()}/serve/logs',
370
- json=json.loads(body.model_dump_json()),
371
- timeout=(5, None),
372
- stream=True,
373
- cookies=server_common.get_api_cookie_jar(),
374
- )
375
- request_id = server_common.get_request_id(response)
376
- sdk.stream_response(request_id, response, output_stream)
292
+ return impl.tail_logs(service_name,
293
+ target,
294
+ replica_id,
295
+ follow,
296
+ output_stream,
297
+ tail,
298
+ pool=False)
377
299
 
378
300
 
379
301
  @usage_lib.entrypoint
@@ -383,9 +305,10 @@ def sync_down_logs(service_name: str,
383
305
  *,
384
306
  targets: Optional[Union[
385
307
  str, 'serve_utils.ServiceComponent',
386
- List[Union[str,
387
- 'serve_utils.ServiceComponent']]]] = None,
388
- replica_ids: Optional[List[int]] = None) -> None:
308
+ Sequence[Union[str,
309
+ 'serve_utils.ServiceComponent']]]] = None,
310
+ replica_ids: Optional[List[int]] = None,
311
+ tail: Optional[int] = None) -> None:
389
312
  """Sync down logs from the service components to a local directory.
390
313
 
391
314
  This function syncs logs from the specified service components (controller,
@@ -414,24 +337,9 @@ def sync_down_logs(service_name: str,
414
337
  sky.exceptions.ClusterNotUpError: If the controller is not up.
415
338
  ValueError: Arguments not valid.
416
339
  """
417
- # Avoid circular import.
418
- from sky.client import sdk # pylint: disable=import-outside-toplevel
419
-
420
- body = payloads.ServeDownloadLogsBody(
421
- service_name=service_name,
422
- # No need to set here, since the server will override it
423
- # to a directory on the API server.
424
- local_dir=local_dir,
425
- targets=targets,
426
- replica_ids=replica_ids,
427
- )
428
- response = requests.post(
429
- f'{server_common.get_server_url()}/serve/sync-down-logs',
430
- json=json.loads(body.model_dump_json()),
431
- timeout=(5, None),
432
- )
433
- remote_dir = sdk.stream_and_get(server_common.get_request_id(response))
434
-
435
- # Download from API server paths to the client's local_dir
436
- client_common.download_logs_from_api_server([remote_dir], remote_dir,
437
- local_dir)
340
+ return impl.sync_down_logs(service_name,
341
+ local_dir,
342
+ targets=targets,
343
+ replica_ids=replica_ids,
344
+ tail=tail,
345
+ pool=False)