skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,827 @@
1
+ """Async client-side Python SDK for SkyPilot.
2
+
3
+ All functions will return a future that can be awaited on.
4
+
5
+ Usage example:
6
+
7
+ .. code-block:: python
8
+
9
+ request_id = await sky.status()
10
+ statuses = await sky.get(request_id)
11
+
12
+ """
13
+ import dataclasses
14
+ import logging
15
+ import typing
16
+ from typing import Any, Dict, List, Optional, Tuple, Union
17
+
18
+ import aiohttp
19
+ import colorama
20
+
21
+ from sky import admin_policy
22
+ from sky import catalog
23
+ from sky import exceptions
24
+ from sky import sky_logging
25
+ from sky.client import common as client_common
26
+ from sky.client import sdk
27
+ from sky.schemas.api import responses
28
+ from sky.server import common as server_common
29
+ from sky.server import rest
30
+ from sky.server.requests import payloads
31
+ from sky.server.requests import requests as requests_lib
32
+ from sky.usage import usage_lib
33
+ from sky.utils import annotations
34
+ from sky.utils import common
35
+ from sky.utils import context_utils
36
+ from sky.utils import env_options
37
+ from sky.utils import rich_utils
38
+ from sky.utils import ux_utils
39
+
40
+ if typing.TYPE_CHECKING:
41
+ import io
42
+
43
+ import sky
44
+ from sky import backends
45
+ from sky import models
46
+ from sky.provision.kubernetes import utils as kubernetes_utils
47
+ from sky.skylet import autostop_lib
48
+ from sky.skylet import job_lib
49
+
50
+ logger = sky_logging.init_logger(__name__)
51
+ logging.getLogger('httpx').setLevel(logging.CRITICAL)
52
+
53
+
54
+ @dataclasses.dataclass
55
+ class StreamConfig:
56
+ """Configuration class for stream_and_get behavior.
57
+
58
+ Attributes:
59
+ log_path: The path to the log file to stream.
60
+ tail: The number of lines to show from the end of the logs.
61
+ If None, show all logs.
62
+ follow: Whether to follow the logs.
63
+ output_stream: The output stream to write to. If None, print to the
64
+ console.
65
+ """
66
+ log_path: Optional[str] = None
67
+ tail: Optional[int] = None
68
+ follow: bool = True
69
+ output_stream: Optional['io.TextIOBase'] = None
70
+
71
+
72
+ DEFAULT_STREAM_CONFIG = StreamConfig()
73
+
74
+
75
+ @usage_lib.entrypoint
76
+ @server_common.check_server_healthy_or_start
77
+ @annotations.client_api
78
+ async def get(request_id: str) -> Any:
79
+ """Async version of get() that waits for and gets the result of a request.
80
+
81
+ Args:
82
+ request_id: The request ID of the request to get.
83
+
84
+ Returns:
85
+ The ``Request Returns`` of the specified request. See the documentation
86
+ of the specific requests above for more details.
87
+
88
+ Raises:
89
+ Exception: It raises the same exceptions as the specific requests,
90
+ see ``Request Raises`` in the documentation of the specific requests
91
+ above.
92
+ """
93
+ async with aiohttp.ClientSession() as session:
94
+ response = await server_common.make_authenticated_request_async(
95
+ session,
96
+ 'GET',
97
+ f'/api/get?request_id={request_id}',
98
+ retry=False,
99
+ timeout=aiohttp.ClientTimeout(
100
+ total=None,
101
+ connect=client_common.
102
+ API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS))
103
+
104
+ try:
105
+ request_task = None
106
+ if response.status == 200:
107
+ request_task = requests_lib.Request.decode(
108
+ payloads.RequestPayload(**await response.json()))
109
+ elif response.status == 500:
110
+ try:
111
+ request_task = requests_lib.Request.decode(
112
+ payloads.RequestPayload(**await response.json()))
113
+ logger.debug(f'Got request with error: {request_task.name}')
114
+ except Exception: # pylint: disable=broad-except
115
+ request_task = None
116
+ if request_task is None:
117
+ with ux_utils.print_exception_no_traceback():
118
+ raise RuntimeError(
119
+ f'Failed to get request {request_id}: '
120
+ f'{response.status} {await response.text()}')
121
+ error = request_task.get_error()
122
+ if error is not None:
123
+ error_obj = error['object']
124
+ if env_options.Options.SHOW_DEBUG_INFO.get():
125
+ stacktrace = getattr(error_obj, 'stacktrace',
126
+ str(error_obj))
127
+ logger.error('=== Traceback on SkyPilot API Server ===\n'
128
+ f'{stacktrace}')
129
+ with ux_utils.print_exception_no_traceback():
130
+ raise error_obj
131
+ if request_task.status == requests_lib.RequestStatus.CANCELLED:
132
+ with ux_utils.print_exception_no_traceback():
133
+ raise exceptions.RequestCancelled(
134
+ f'{colorama.Fore.YELLOW}Current {request_task.name!r} '
135
+ f'request ({request_task.request_id}) is cancelled by '
136
+ f'another process. {colorama.Style.RESET_ALL}')
137
+ return request_task.get_return_value()
138
+ finally:
139
+ response.close()
140
+
141
+
142
+ @usage_lib.entrypoint
143
+ @server_common.check_server_healthy_or_start
144
+ @annotations.client_api
145
+ async def stream_response_async(request_id: Optional[str],
146
+ response: 'aiohttp.ClientResponse',
147
+ output_stream: Optional['io.TextIOBase'] = None,
148
+ resumable: bool = False,
149
+ get_result: bool = True) -> Any:
150
+ """Async version of stream_response that streams the response to the
151
+ console.
152
+
153
+ Args:
154
+ request_id: The request ID.
155
+ response: The aiohttp response.
156
+ output_stream: The output stream to write to. If None, print to the
157
+ console.
158
+ resumable: Whether the response is resumable on retry. If True, the
159
+ streaming will start from the previous failure point on retry.
160
+
161
+ Returns:
162
+ Result of request_id if given. Will only return if get_result is True.
163
+ """
164
+
165
+ retry_context: Optional[rest.RetryContext] = None
166
+ if resumable:
167
+ retry_context = rest.get_retry_context()
168
+ try:
169
+ line_count = 0
170
+ async for line in rich_utils.decode_rich_status_async(response):
171
+ if line is not None:
172
+ line_count += 1
173
+ if retry_context is None:
174
+ print(line, flush=True, end='', file=output_stream)
175
+ elif line_count > retry_context.line_processed:
176
+ print(line, flush=True, end='', file=output_stream)
177
+ retry_context.line_processed = line_count
178
+ if request_id is not None and get_result:
179
+ return await get(request_id)
180
+ except Exception: # pylint: disable=broad-except
181
+ logger.debug(f'To stream request logs: sky api logs {request_id}')
182
+ raise
183
+
184
+
185
+ async def _stream_and_get(
186
+ request_id: Optional[str] = None,
187
+ config: StreamConfig = DEFAULT_STREAM_CONFIG,
188
+ ) -> Any:
189
+ """Streams the logs of a request or a log file and gets the final result.
190
+ """
191
+ return await stream_and_get(
192
+ request_id,
193
+ config.log_path,
194
+ config.tail,
195
+ config.follow,
196
+ config.output_stream,
197
+ )
198
+
199
+
200
+ async def stream_and_get(
201
+ request_id: Optional[str] = None,
202
+ log_path: Optional[str] = None,
203
+ tail: Optional[int] = None,
204
+ follow: bool = True,
205
+ output_stream: Optional['io.TextIOBase'] = None,
206
+ ) -> Any:
207
+ """Streams the logs of a request or a log file and gets the final result.
208
+
209
+ This will block until the request is finished. The request id can be a
210
+ prefix of the full request id.
211
+
212
+ Args:
213
+ request_id: The prefix of the request ID of the request to stream.
214
+ config: Configuration for streaming behavior.
215
+
216
+ Returns:
217
+ The ``Request Returns`` of the specified request. See the documentation
218
+ of the specific requests above for more details.
219
+
220
+ Raises:
221
+ Exception: It raises the same exceptions as the specific requests,
222
+ see ``Request Raises`` in the documentation of the specific requests
223
+ above.
224
+ """
225
+ params = {
226
+ 'request_id': request_id,
227
+ 'log_path': log_path,
228
+ 'tail': str(tail) if tail is not None else None,
229
+ 'follow': str(follow).lower(), # Convert boolean to string for aiohttp
230
+ 'format': 'console',
231
+ }
232
+ # Filter out None values
233
+ params = {k: v for k, v in params.items() if v is not None}
234
+
235
+ async with aiohttp.ClientSession() as session:
236
+ response = await server_common.make_authenticated_request_async(
237
+ session,
238
+ 'GET',
239
+ '/api/stream',
240
+ params=params,
241
+ retry=False,
242
+ timeout=aiohttp.ClientTimeout(
243
+ total=None,
244
+ connect=client_common.
245
+ API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS))
246
+
247
+ try:
248
+ if response.status in [404, 400]:
249
+ detail = (await response.json()).get('detail')
250
+ with ux_utils.print_exception_no_traceback():
251
+ raise RuntimeError(f'Failed to stream logs: {detail}')
252
+ elif response.status != 200:
253
+ # TODO(syang): handle the case where the requestID is not
254
+ # provided. https://github.com/skypilot-org/skypilot/issues/6549
255
+ if request_id is None:
256
+ return None
257
+ return await get(request_id)
258
+
259
+ return await stream_response_async(request_id, response,
260
+ output_stream)
261
+ finally:
262
+ response.close()
263
+
264
+
265
+ @usage_lib.entrypoint
266
+ @annotations.client_api
267
+ async def check(
268
+ infra_list: Optional[Tuple[str, ...]],
269
+ verbose: bool,
270
+ workspace: Optional[str] = None,
271
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
272
+ ) -> Dict[str, List[str]]:
273
+ """Async version of check() that checks the credentials to enable clouds."""
274
+ request_id = await context_utils.to_thread(sdk.check, infra_list, verbose,
275
+ workspace)
276
+ if stream_logs is not None:
277
+ return await _stream_and_get(request_id, stream_logs)
278
+ else:
279
+ return await get(request_id)
280
+
281
+
282
+ @usage_lib.entrypoint
283
+ @annotations.client_api
284
+ async def enabled_clouds(
285
+ workspace: Optional[str] = None,
286
+ expand: bool = False,
287
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
288
+ ) -> List[str]:
289
+ """Async version of enabled_clouds() that gets the enabled clouds."""
290
+ request_id = await context_utils.to_thread(sdk.enabled_clouds, workspace,
291
+ expand)
292
+ if stream_logs is not None:
293
+ return await _stream_and_get(request_id, stream_logs)
294
+ else:
295
+ return await get(request_id)
296
+
297
+
298
+ @usage_lib.entrypoint
299
+ @annotations.client_api
300
+ async def list_accelerators(
301
+ gpus_only: bool = True,
302
+ name_filter: Optional[str] = None,
303
+ region_filter: Optional[str] = None,
304
+ quantity_filter: Optional[int] = None,
305
+ clouds: Optional[Union[List[str], str]] = None,
306
+ all_regions: bool = False,
307
+ require_price: bool = True,
308
+ case_sensitive: bool = True,
309
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
310
+ ) -> Dict[str, List[catalog.common.InstanceTypeInfo]]:
311
+ """Async version of list_accelerators() that lists the names of all
312
+ accelerators offered by Sky."""
313
+ request_id = await context_utils.to_thread(sdk.list_accelerators, gpus_only,
314
+ name_filter, region_filter,
315
+ quantity_filter, clouds,
316
+ all_regions, require_price,
317
+ case_sensitive)
318
+ if stream_logs is not None:
319
+ return await _stream_and_get(request_id, stream_logs)
320
+ else:
321
+ return await get(request_id)
322
+
323
+
324
+ @usage_lib.entrypoint
325
+ @annotations.client_api
326
+ async def list_accelerator_counts(
327
+ gpus_only: bool = True,
328
+ name_filter: Optional[str] = None,
329
+ region_filter: Optional[str] = None,
330
+ quantity_filter: Optional[int] = None,
331
+ clouds: Optional[Union[List[str], str]] = None,
332
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
333
+ ) -> Dict[str, List[int]]:
334
+ """Async version of list_accelerator_counts() that lists all accelerators
335
+ offered by Sky and available counts."""
336
+ request_id = await context_utils.to_thread(sdk.list_accelerator_counts,
337
+ gpus_only, name_filter,
338
+ region_filter, quantity_filter,
339
+ clouds)
340
+ if stream_logs is not None:
341
+ return await _stream_and_get(request_id, stream_logs)
342
+ else:
343
+ return await get(request_id)
344
+
345
+
346
+ @usage_lib.entrypoint
347
+ @annotations.client_api
348
+ async def optimize(
349
+ dag: 'sky.Dag',
350
+ minimize: common.OptimizeTarget = common.OptimizeTarget.COST,
351
+ admin_policy_request_options: Optional[
352
+ admin_policy.RequestOptions] = None,
353
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
354
+ ) -> 'sky.Dag':
355
+ """Async version of optimize() that finds the best execution plan for the
356
+ given DAG."""
357
+ request_id = await context_utils.to_thread(sdk.optimize, dag, minimize,
358
+ admin_policy_request_options)
359
+ if stream_logs is not None:
360
+ return await _stream_and_get(request_id, stream_logs)
361
+ else:
362
+ return await get(request_id)
363
+
364
+
365
+ @usage_lib.entrypoint
366
+ @annotations.client_api
367
+ async def workspaces(
368
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
369
+ ) -> Dict[str, Any]:
370
+ """Async version of workspaces() that gets the workspaces."""
371
+ request_id = await context_utils.to_thread(sdk.workspaces)
372
+ if stream_logs is not None:
373
+ return await _stream_and_get(request_id, stream_logs)
374
+ else:
375
+ return await get(request_id)
376
+
377
+
378
+ @usage_lib.entrypoint
379
+ @annotations.client_api
380
+ async def launch(
381
+ task: Union['sky.Task', 'sky.Dag'],
382
+ cluster_name: Optional[str] = None,
383
+ retry_until_up: bool = False,
384
+ idle_minutes_to_autostop: Optional[int] = None,
385
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
386
+ dryrun: bool = False,
387
+ down: bool = False, # pylint: disable=redefined-outer-name
388
+ backend: Optional['backends.Backend'] = None,
389
+ optimize_target: common.OptimizeTarget = common.OptimizeTarget.COST,
390
+ no_setup: bool = False,
391
+ clone_disk_from: Optional[str] = None,
392
+ fast: bool = False,
393
+ # Internal only:
394
+ # pylint: disable=invalid-name
395
+ _need_confirmation: bool = False,
396
+ _is_launched_by_jobs_controller: bool = False,
397
+ _is_launched_by_sky_serve_controller: bool = False,
398
+ _disable_controller_check: bool = False,
399
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
400
+ ) -> Tuple[Optional[int], Optional['backends.ResourceHandle']]:
401
+ """Async version of launch() that launches a cluster or task."""
402
+ request_id = await context_utils.to_thread(
403
+ sdk.launch, task, cluster_name, retry_until_up,
404
+ idle_minutes_to_autostop, wait_for, dryrun, down, backend,
405
+ optimize_target, no_setup, clone_disk_from, fast, _need_confirmation,
406
+ _is_launched_by_jobs_controller, _is_launched_by_sky_serve_controller,
407
+ _disable_controller_check)
408
+ if stream_logs is not None:
409
+ return await _stream_and_get(request_id, stream_logs)
410
+ else:
411
+ return await get(request_id)
412
+
413
+
414
+ @usage_lib.entrypoint
415
+ @annotations.client_api
416
+ async def exec( # pylint: disable=redefined-builtin
417
+ task: Union['sky.Task', 'sky.Dag'],
418
+ cluster_name: Optional[str] = None,
419
+ dryrun: bool = False,
420
+ down: bool = False, # pylint: disable=redefined-outer-name
421
+ backend: Optional['backends.Backend'] = None,
422
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
423
+ ) -> Tuple[Optional[int], Optional['backends.ResourceHandle']]:
424
+ """Async version of exec() that executes a task on an existing cluster."""
425
+ request_id = await context_utils.to_thread(sdk.exec, task, cluster_name,
426
+ dryrun, down, backend)
427
+ if stream_logs is not None:
428
+ return await _stream_and_get(request_id, stream_logs)
429
+ else:
430
+ return await get(request_id)
431
+
432
+
433
+ @usage_lib.entrypoint
434
+ @annotations.client_api
435
+ async def tail_logs(cluster_name: str,
436
+ job_id: Optional[int],
437
+ follow: bool,
438
+ tail: int = 0,
439
+ output_stream: Optional['io.TextIOBase'] = None) -> int:
440
+ """Async version of tail_logs() that tails the logs of a job."""
441
+ return await context_utils.to_thread(sdk.tail_logs, cluster_name, job_id,
442
+ follow, tail, output_stream)
443
+
444
+
445
+ @usage_lib.entrypoint
446
+ @annotations.client_api
447
+ async def download_logs(cluster_name: str,
448
+ job_ids: Optional[List[str]]) -> Dict[str, str]:
449
+ """Async version of download_logs() that downloads the logs of jobs."""
450
+ return await context_utils.to_thread(sdk.download_logs, cluster_name,
451
+ job_ids)
452
+
453
+
454
+ @usage_lib.entrypoint
455
+ @annotations.client_api
456
+ async def start(
457
+ cluster_name: str,
458
+ idle_minutes_to_autostop: Optional[int] = None,
459
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
460
+ retry_until_up: bool = False,
461
+ down: bool = False, # pylint: disable=redefined-outer-name
462
+ force: bool = False,
463
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
464
+ ) -> 'backends.CloudVmRayResourceHandle':
465
+ """Async version of start() that restarts a cluster."""
466
+ request_id = await context_utils.to_thread(sdk.start, cluster_name,
467
+ idle_minutes_to_autostop,
468
+ wait_for, retry_until_up, down,
469
+ force)
470
+ if stream_logs is not None:
471
+ return await _stream_and_get(request_id, stream_logs)
472
+ else:
473
+ return await get(request_id)
474
+
475
+
476
+ @usage_lib.entrypoint
477
+ @annotations.client_api
478
+ async def down(
479
+ cluster_name: str,
480
+ purge: bool = False,
481
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
482
+ """Async version of down() that tears down a cluster."""
483
+ request_id = await context_utils.to_thread(sdk.down, cluster_name, purge)
484
+ if stream_logs is not None:
485
+ return await _stream_and_get(request_id, stream_logs)
486
+ else:
487
+ return await get(request_id)
488
+
489
+
490
+ @usage_lib.entrypoint
491
+ @annotations.client_api
492
+ async def stop(
493
+ cluster_name: str,
494
+ purge: bool = False,
495
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
496
+ """Async version of stop() that stops a cluster."""
497
+ request_id = await context_utils.to_thread(sdk.stop, cluster_name, purge)
498
+ if stream_logs is not None:
499
+ return await _stream_and_get(request_id, stream_logs)
500
+ else:
501
+ return await get(request_id)
502
+
503
+
504
+ @usage_lib.entrypoint
505
+ @annotations.client_api
506
+ async def autostop(
507
+ cluster_name: str,
508
+ idle_minutes: int,
509
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
510
+ down: bool = False, # pylint: disable=redefined-outer-name
511
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
512
+ ) -> None:
513
+ """Async version of autostop() that schedules an autostop/autodown for a
514
+ cluster."""
515
+ request_id = await context_utils.to_thread(sdk.autostop, cluster_name,
516
+ idle_minutes, wait_for, down)
517
+ if stream_logs is not None:
518
+ return await _stream_and_get(request_id, stream_logs)
519
+ else:
520
+ return await get(request_id)
521
+
522
+
523
+ @usage_lib.entrypoint
524
+ @annotations.client_api
525
+ async def queue(
526
+ cluster_name: str,
527
+ skip_finished: bool = False,
528
+ all_users: bool = False,
529
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
530
+ ) -> List[responses.ClusterJobRecord]:
531
+ """Async version of queue() that gets the job queue of a cluster."""
532
+ request_id = await context_utils.to_thread(sdk.queue, cluster_name,
533
+ skip_finished, all_users)
534
+ if stream_logs is not None:
535
+ return await _stream_and_get(request_id, stream_logs)
536
+ else:
537
+ return await get(request_id)
538
+
539
+
540
+ @usage_lib.entrypoint
541
+ @annotations.client_api
542
+ async def job_status(
543
+ cluster_name: str,
544
+ job_ids: Optional[List[int]] = None,
545
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
546
+ ) -> Dict[Optional[int], Optional['job_lib.JobStatus']]:
547
+ """Async version of job_status() that gets the status of jobs on a
548
+ cluster."""
549
+ request_id = await context_utils.to_thread(sdk.job_status, cluster_name,
550
+ job_ids)
551
+ if stream_logs is not None:
552
+ return await _stream_and_get(request_id, stream_logs)
553
+ else:
554
+ return await get(request_id)
555
+
556
+
557
+ @usage_lib.entrypoint
558
+ @annotations.client_api
559
+ async def cancel(
560
+ cluster_name: str,
561
+ all: bool = False, # pylint: disable=redefined-builtin
562
+ all_users: bool = False,
563
+ job_ids: Optional[List[int]] = None,
564
+ # pylint: disable=invalid-name
565
+ _try_cancel_if_cluster_is_init: bool = False,
566
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
567
+ """Async version of cancel() that cancels jobs on a cluster."""
568
+ request_id = await context_utils.to_thread(sdk.cancel, cluster_name, all,
569
+ all_users, job_ids,
570
+ _try_cancel_if_cluster_is_init)
571
+ if stream_logs is not None:
572
+ return await _stream_and_get(request_id, stream_logs)
573
+ else:
574
+ return await get(request_id)
575
+
576
+
577
+ @usage_lib.entrypoint
578
+ @annotations.client_api
579
+ async def status(
580
+ cluster_names: Optional[List[str]] = None,
581
+ refresh: common.StatusRefreshMode = common.StatusRefreshMode.NONE,
582
+ all_users: bool = False,
583
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
584
+ *,
585
+ _include_credentials: bool = False,
586
+ ) -> List[Dict[str, Any]]:
587
+ """Async version of status() that gets cluster statuses."""
588
+ request_id = await context_utils.to_thread(
589
+ sdk.status,
590
+ cluster_names,
591
+ refresh,
592
+ all_users,
593
+ _include_credentials=_include_credentials)
594
+ if stream_logs is not None:
595
+ return await _stream_and_get(request_id, stream_logs)
596
+ else:
597
+ return await get(request_id)
598
+
599
+
600
+ @usage_lib.entrypoint
601
+ @annotations.client_api
602
+ async def endpoints(
603
+ cluster: str,
604
+ port: Optional[Union[int, str]] = None,
605
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
606
+ ) -> Dict[int, str]:
607
+ """Async version of endpoints() that gets the endpoint for a given cluster
608
+ and port number."""
609
+ request_id = await context_utils.to_thread(sdk.endpoints, cluster, port)
610
+ if stream_logs is not None:
611
+ return await _stream_and_get(request_id, stream_logs)
612
+ else:
613
+ return await get(request_id)
614
+
615
+
616
+ @usage_lib.entrypoint
617
+ @annotations.client_api
618
+ async def cost_report(
619
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
620
+ ) -> List[Dict[str, Any]]:
621
+ """Async version of cost_report() that gets all cluster cost reports."""
622
+ request_id = await context_utils.to_thread(sdk.cost_report)
623
+ if stream_logs is not None:
624
+ return await _stream_and_get(request_id, stream_logs)
625
+ else:
626
+ return await get(request_id)
627
+
628
+
629
+ @usage_lib.entrypoint
630
+ @annotations.client_api
631
+ async def storage_ls(
632
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
633
+ ) -> List[Dict[str, Any]]:
634
+ """Async version of storage_ls() that gets the storages."""
635
+ request_id = await context_utils.to_thread(sdk.storage_ls)
636
+ if stream_logs is not None:
637
+ return await _stream_and_get(request_id, stream_logs)
638
+ else:
639
+ return await get(request_id)
640
+
641
+
642
+ @usage_lib.entrypoint
643
+ @annotations.client_api
644
+ async def storage_delete(
645
+ name: str,
646
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
647
+ """Async version of storage_delete() that deletes a storage."""
648
+ request_id = await context_utils.to_thread(sdk.storage_delete, name)
649
+ if stream_logs is not None:
650
+ return await _stream_and_get(request_id, stream_logs)
651
+ else:
652
+ return await get(request_id)
653
+
654
+
655
+ @usage_lib.entrypoint
656
+ @annotations.client_api
657
+ async def local_up(
658
+ gpus: bool,
659
+ name: Optional[str] = None,
660
+ port_start: Optional[int] = None,
661
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
662
+ """Async version of local_up() that launches a Kubernetes cluster on
663
+ local machines."""
664
+ request_id = await context_utils.to_thread(sdk.local_up, gpus, name,
665
+ port_start)
666
+ if stream_logs is not None:
667
+ return await _stream_and_get(request_id, stream_logs)
668
+ else:
669
+ return await get(request_id)
670
+
671
+
672
+ @usage_lib.entrypoint
673
+ @annotations.client_api
674
+ async def local_down(
675
+ name: Optional[str] = None,
676
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
677
+ """Async version of local_down() that tears down the Kubernetes cluster
678
+ started by local_up."""
679
+ request_id = await context_utils.to_thread(sdk.local_down, name)
680
+ if stream_logs is not None:
681
+ return await _stream_and_get(request_id, stream_logs)
682
+ else:
683
+ return await get(request_id)
684
+
685
+
686
+ @usage_lib.entrypoint
687
+ @annotations.client_api
688
+ async def ssh_up(
689
+ infra: Optional[str] = None,
690
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
691
+ """Async version of ssh_up() that deploys the SSH Node Pools defined in
692
+ ~/.sky/ssh_targets.yaml."""
693
+ request_id = await context_utils.to_thread(sdk.ssh_up, infra)
694
+ if stream_logs is not None:
695
+ return await _stream_and_get(request_id, stream_logs)
696
+ else:
697
+ return await get(request_id)
698
+
699
+
700
+ @usage_lib.entrypoint
701
+ @annotations.client_api
702
+ async def ssh_down(
703
+ infra: Optional[str] = None,
704
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
705
+ """Async version of ssh_down() that tears down a Kubernetes cluster on SSH
706
+ targets."""
707
+ request_id = await context_utils.to_thread(sdk.ssh_down, infra)
708
+ if stream_logs is not None:
709
+ return await _stream_and_get(request_id, stream_logs)
710
+ else:
711
+ return await get(request_id)
712
+
713
+
714
+ @usage_lib.entrypoint
715
+ @annotations.client_api
716
+ async def realtime_kubernetes_gpu_availability(
717
+ context: Optional[str] = None,
718
+ name_filter: Optional[str] = None,
719
+ quantity_filter: Optional[int] = None,
720
+ is_ssh: Optional[bool] = None,
721
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
722
+ ) -> List[Tuple[str, List['models.RealtimeGpuAvailability']]]:
723
+ """Async version of realtime_kubernetes_gpu_availability() that gets the
724
+ real-time Kubernetes GPU availability."""
725
+ request_id = await context_utils.to_thread(
726
+ sdk.realtime_kubernetes_gpu_availability, context, name_filter,
727
+ quantity_filter, is_ssh)
728
+ if stream_logs is not None:
729
+ return await _stream_and_get(request_id, stream_logs)
730
+ else:
731
+ return await get(request_id)
732
+
733
+
734
+ @usage_lib.entrypoint
735
+ @annotations.client_api
736
+ async def kubernetes_node_info(
737
+ context: Optional[str] = None,
738
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
739
+ ) -> 'models.KubernetesNodesInfo':
740
+ """Async version of kubernetes_node_info() that gets the resource
741
+ information for all the nodes in the cluster."""
742
+ request_id = await context_utils.to_thread(sdk.kubernetes_node_info,
743
+ context)
744
+ if stream_logs is not None:
745
+ return await _stream_and_get(request_id, stream_logs)
746
+ else:
747
+ return await get(request_id)
748
+
749
+
750
+ @usage_lib.entrypoint
751
+ @annotations.client_api
752
+ async def status_kubernetes(
753
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
754
+ ) -> Tuple[List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
755
+ List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
756
+ List[Dict[str, Any]], Optional[str]]:
757
+ """Async version of status_kubernetes() that gets all SkyPilot clusters
758
+ and jobs in the Kubernetes cluster."""
759
+ request_id = await context_utils.to_thread(sdk.status_kubernetes)
760
+ if stream_logs is not None:
761
+ return await _stream_and_get(request_id, stream_logs)
762
+ else:
763
+ return await get(request_id)
764
+
765
+
766
+ @usage_lib.entrypoint
767
+ @annotations.client_api
768
+ async def api_cancel(
769
+ request_ids: Optional[Union[str, List[str]]] = None,
770
+ all_users: bool = False,
771
+ silent: bool = False,
772
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
773
+ ) -> List[str]:
774
+ """Async version of api_cancel() that aborts a request or all requests."""
775
+ request_id = await context_utils.to_thread(sdk.api_cancel, request_ids,
776
+ all_users, silent)
777
+ if stream_logs is not None:
778
+ return await _stream_and_get(request_id, stream_logs)
779
+ else:
780
+ return await get(request_id)
781
+
782
+
783
+ @usage_lib.entrypoint
784
+ @annotations.client_api
785
+ async def api_status(request_ids: Optional[List[str]] = None,
786
+ all_status: bool = False) -> List[payloads.RequestPayload]:
787
+ """Async version of api_status() that lists all requests."""
788
+ return await context_utils.to_thread(sdk.api_status, request_ids,
789
+ all_status)
790
+
791
+
792
+ @usage_lib.entrypoint
793
+ @annotations.client_api
794
+ async def dashboard(starting_page: Optional[str] = None) -> None:
795
+ """Async version of dashboard() that starts the dashboard for SkyPilot."""
796
+ return await context_utils.to_thread(sdk.dashboard, starting_page)
797
+
798
+
799
+ @usage_lib.entrypoint
800
+ @annotations.client_api
801
+ async def api_info() -> responses.APIHealthResponse:
802
+ """Async version of api_info() that gets the server's status, commit and
803
+ version."""
804
+ return await context_utils.to_thread(sdk.api_info)
805
+
806
+
807
+ @usage_lib.entrypoint
808
+ @annotations.client_api
809
+ async def api_stop() -> None:
810
+ """Async version of api_stop() that stops the API server."""
811
+ return await context_utils.to_thread(sdk.api_stop)
812
+
813
+
814
+ @usage_lib.entrypoint
815
+ @annotations.client_api
816
+ async def api_server_logs(follow: bool = True,
817
+ tail: Optional[int] = None) -> None:
818
+ """Async version of api_server_logs() that streams the API server logs."""
819
+ return await context_utils.to_thread(sdk.api_server_logs, follow, tail)
820
+
821
+
822
+ @usage_lib.entrypoint
823
+ @annotations.client_api
824
+ async def api_login(endpoint: Optional[str] = None,
825
+ get_token: bool = False) -> None:
826
+ """Async version of api_login() that logs into a SkyPilot API server."""
827
+ return await context_utils.to_thread(sdk.api_login, endpoint, get_token)