skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,833 @@
1
+ """Async client-side Python SDK for SkyPilot.
2
+
3
+ All functions will return a future that can be awaited on.
4
+
5
+ Usage example:
6
+
7
+ .. code-block:: python
8
+
9
+ request_id = await sky.status()
10
+ statuses = await sky.get(request_id)
11
+
12
+ """
13
+ import dataclasses
14
+ import logging
15
+ import typing
16
+ from typing import Any, Dict, List, Optional, Tuple, Union
17
+
18
+ import aiohttp
19
+ import colorama
20
+
21
+ from sky import admin_policy
22
+ from sky import catalog
23
+ from sky import exceptions
24
+ from sky import sky_logging
25
+ from sky.client import common as client_common
26
+ from sky.client import sdk
27
+ from sky.schemas.api import responses
28
+ from sky.server import common as server_common
29
+ from sky.server import rest
30
+ from sky.server.requests import payloads
31
+ from sky.server.requests import requests as requests_lib
32
+ from sky.usage import usage_lib
33
+ from sky.utils import annotations
34
+ from sky.utils import common
35
+ from sky.utils import context_utils
36
+ from sky.utils import env_options
37
+ from sky.utils import rich_utils
38
+ from sky.utils import ux_utils
39
+
40
+ if typing.TYPE_CHECKING:
41
+ import io
42
+
43
+ import sky
44
+ from sky import backends
45
+ from sky import models
46
+ from sky.provision.kubernetes import utils as kubernetes_utils
47
+ from sky.skylet import autostop_lib
48
+ from sky.skylet import job_lib
49
+
50
+ logger = sky_logging.init_logger(__name__)
51
+ logging.getLogger('httpx').setLevel(logging.CRITICAL)
52
+
53
+
54
+ @dataclasses.dataclass
55
+ class StreamConfig:
56
+ """Configuration class for stream_and_get behavior.
57
+
58
+ Attributes:
59
+ log_path: The path to the log file to stream.
60
+ tail: The number of lines to show from the end of the logs.
61
+ If None, show all logs.
62
+ follow: Whether to follow the logs.
63
+ output_stream: The output stream to write to. If None, print to the
64
+ console.
65
+ """
66
+ log_path: Optional[str] = None
67
+ tail: Optional[int] = None
68
+ follow: bool = True
69
+ output_stream: Optional['io.TextIOBase'] = None
70
+
71
+
72
+ DEFAULT_STREAM_CONFIG = StreamConfig()
73
+
74
+
75
+ @usage_lib.entrypoint
76
+ @server_common.check_server_healthy_or_start
77
+ @annotations.client_api
78
+ async def get(request_id: str) -> Any:
79
+ """Async version of get() that waits for and gets the result of a request.
80
+
81
+ Args:
82
+ request_id: The request ID of the request to get.
83
+
84
+ Returns:
85
+ The ``Request Returns`` of the specified request. See the documentation
86
+ of the specific requests above for more details.
87
+
88
+ Raises:
89
+ Exception: It raises the same exceptions as the specific requests,
90
+ see ``Request Raises`` in the documentation of the specific requests
91
+ above.
92
+ """
93
+ async with aiohttp.ClientSession() as session:
94
+ response = await server_common.make_authenticated_request_async(
95
+ session,
96
+ 'GET',
97
+ f'/api/get?request_id={request_id}',
98
+ retry=False,
99
+ timeout=aiohttp.ClientTimeout(
100
+ total=None,
101
+ connect=client_common.
102
+ API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS))
103
+
104
+ try:
105
+ request_task = None
106
+ if response.status == 200:
107
+ request_task = requests_lib.Request.decode(
108
+ payloads.RequestPayload(**await response.json()))
109
+ elif response.status == 500:
110
+ try:
111
+ request_task = requests_lib.Request.decode(
112
+ payloads.RequestPayload(**await response.json()))
113
+ logger.debug(f'Got request with error: {request_task.name}')
114
+ except Exception: # pylint: disable=broad-except
115
+ request_task = None
116
+ if request_task is None:
117
+ with ux_utils.print_exception_no_traceback():
118
+ raise RuntimeError(
119
+ f'Failed to get request {request_id}: '
120
+ f'{response.status} {await response.text()}')
121
+ error = request_task.get_error()
122
+ if error is not None:
123
+ error_obj = error['object']
124
+ if env_options.Options.SHOW_DEBUG_INFO.get():
125
+ stacktrace = getattr(error_obj, 'stacktrace',
126
+ str(error_obj))
127
+ logger.error('=== Traceback on SkyPilot API Server ===\n'
128
+ f'{stacktrace}')
129
+ with ux_utils.print_exception_no_traceback():
130
+ raise error_obj
131
+ if request_task.status == requests_lib.RequestStatus.CANCELLED:
132
+ with ux_utils.print_exception_no_traceback():
133
+ raise exceptions.RequestCancelled(
134
+ f'{colorama.Fore.YELLOW}Current {request_task.name!r} '
135
+ f'request ({request_task.request_id}) is cancelled by '
136
+ f'another process. {colorama.Style.RESET_ALL}')
137
+ return request_task.get_return_value()
138
+ finally:
139
+ response.close()
140
+
141
+
142
+ @usage_lib.entrypoint
143
+ @server_common.check_server_healthy_or_start
144
+ @annotations.client_api
145
+ async def stream_response_async(request_id: Optional[str],
146
+ response: 'aiohttp.ClientResponse',
147
+ output_stream: Optional['io.TextIOBase'] = None,
148
+ resumable: bool = False,
149
+ get_result: bool = True) -> Any:
150
+ """Async version of stream_response that streams the response to the
151
+ console.
152
+
153
+ Args:
154
+ request_id: The request ID.
155
+ response: The aiohttp response.
156
+ output_stream: The output stream to write to. If None, print to the
157
+ console.
158
+ resumable: Whether the response is resumable on retry. If True, the
159
+ streaming will start from the previous failure point on retry.
160
+
161
+ Returns:
162
+ Result of request_id if given. Will only return if get_result is True.
163
+ """
164
+
165
+ retry_context: Optional[rest.RetryContext] = None
166
+ if resumable:
167
+ retry_context = rest.get_retry_context()
168
+ try:
169
+ line_count = 0
170
+ async for line in rich_utils.decode_rich_status_async(response):
171
+ if line is not None:
172
+ line_count += 1
173
+ if retry_context is None:
174
+ print(line, flush=True, end='', file=output_stream)
175
+ elif line_count > retry_context.line_processed:
176
+ print(line, flush=True, end='', file=output_stream)
177
+ retry_context.line_processed = line_count
178
+ if request_id is not None and get_result:
179
+ return await get(request_id)
180
+ except Exception: # pylint: disable=broad-except
181
+ logger.debug(f'To stream request logs: sky api logs {request_id}')
182
+ raise
183
+
184
+
185
+ async def _stream_and_get(
186
+ request_id: Optional[str] = None,
187
+ config: StreamConfig = DEFAULT_STREAM_CONFIG,
188
+ ) -> Any:
189
+ """Streams the logs of a request or a log file and gets the final result.
190
+ """
191
+ return await stream_and_get(
192
+ request_id,
193
+ config.log_path,
194
+ config.tail,
195
+ config.follow,
196
+ config.output_stream,
197
+ )
198
+
199
+
200
+ async def stream_and_get(
201
+ request_id: Optional[str] = None,
202
+ log_path: Optional[str] = None,
203
+ tail: Optional[int] = None,
204
+ follow: bool = True,
205
+ output_stream: Optional['io.TextIOBase'] = None,
206
+ ) -> Any:
207
+ """Streams the logs of a request or a log file and gets the final result.
208
+
209
+ This will block until the request is finished. The request id can be a
210
+ prefix of the full request id.
211
+
212
+ Args:
213
+ request_id: The prefix of the request ID of the request to stream.
214
+ config: Configuration for streaming behavior.
215
+
216
+ Returns:
217
+ The ``Request Returns`` of the specified request. See the documentation
218
+ of the specific requests above for more details.
219
+
220
+ Raises:
221
+ Exception: It raises the same exceptions as the specific requests,
222
+ see ``Request Raises`` in the documentation of the specific requests
223
+ above.
224
+ """
225
+ params = {
226
+ 'request_id': request_id,
227
+ 'log_path': log_path,
228
+ 'tail': str(tail) if tail is not None else None,
229
+ 'follow': str(follow).lower(), # Convert boolean to string for aiohttp
230
+ 'format': 'console',
231
+ }
232
+ # Filter out None values
233
+ params = {k: v for k, v in params.items() if v is not None}
234
+
235
+ async with aiohttp.ClientSession() as session:
236
+ response = await server_common.make_authenticated_request_async(
237
+ session,
238
+ 'GET',
239
+ '/api/stream',
240
+ params=params,
241
+ retry=False,
242
+ timeout=aiohttp.ClientTimeout(
243
+ total=None,
244
+ connect=client_common.
245
+ API_SERVER_REQUEST_CONNECTION_TIMEOUT_SECONDS))
246
+
247
+ try:
248
+ if response.status in [404, 400]:
249
+ detail = (await response.json()).get('detail')
250
+ with ux_utils.print_exception_no_traceback():
251
+ raise RuntimeError(f'Failed to stream logs: {detail}')
252
+ elif response.status != 200:
253
+ # TODO(syang): handle the case where the requestID is not
254
+ # provided. https://github.com/skypilot-org/skypilot/issues/6549
255
+ if request_id is None:
256
+ return None
257
+ return await get(request_id)
258
+
259
+ return await stream_response_async(request_id, response,
260
+ output_stream)
261
+ finally:
262
+ response.close()
263
+
264
+
265
+ @usage_lib.entrypoint
266
+ @annotations.client_api
267
+ async def check(
268
+ infra_list: Optional[Tuple[str, ...]],
269
+ verbose: bool,
270
+ workspace: Optional[str] = None,
271
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
272
+ ) -> Dict[str, List[str]]:
273
+ """Async version of check() that checks the credentials to enable clouds."""
274
+ request_id = await context_utils.to_thread(sdk.check, infra_list, verbose,
275
+ workspace)
276
+ if stream_logs is not None:
277
+ return await _stream_and_get(request_id, stream_logs)
278
+ else:
279
+ return await get(request_id)
280
+
281
+
282
+ @usage_lib.entrypoint
283
+ @annotations.client_api
284
+ async def enabled_clouds(
285
+ workspace: Optional[str] = None,
286
+ expand: bool = False,
287
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
288
+ ) -> List[str]:
289
+ """Async version of enabled_clouds() that gets the enabled clouds."""
290
+ request_id = await context_utils.to_thread(sdk.enabled_clouds, workspace,
291
+ expand)
292
+ if stream_logs is not None:
293
+ return await _stream_and_get(request_id, stream_logs)
294
+ else:
295
+ return await get(request_id)
296
+
297
+
298
+ @usage_lib.entrypoint
299
+ @annotations.client_api
300
+ async def list_accelerators(
301
+ gpus_only: bool = True,
302
+ name_filter: Optional[str] = None,
303
+ region_filter: Optional[str] = None,
304
+ quantity_filter: Optional[int] = None,
305
+ clouds: Optional[Union[List[str], str]] = None,
306
+ all_regions: bool = False,
307
+ require_price: bool = True,
308
+ case_sensitive: bool = True,
309
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
310
+ ) -> Dict[str, List[catalog.common.InstanceTypeInfo]]:
311
+ """Async version of list_accelerators() that lists the names of all
312
+ accelerators offered by Sky."""
313
+ request_id = await context_utils.to_thread(sdk.list_accelerators, gpus_only,
314
+ name_filter, region_filter,
315
+ quantity_filter, clouds,
316
+ all_regions, require_price,
317
+ case_sensitive)
318
+ if stream_logs is not None:
319
+ return await _stream_and_get(request_id, stream_logs)
320
+ else:
321
+ return await get(request_id)
322
+
323
+
324
+ @usage_lib.entrypoint
325
+ @annotations.client_api
326
+ async def list_accelerator_counts(
327
+ gpus_only: bool = True,
328
+ name_filter: Optional[str] = None,
329
+ region_filter: Optional[str] = None,
330
+ quantity_filter: Optional[int] = None,
331
+ clouds: Optional[Union[List[str], str]] = None,
332
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
333
+ ) -> Dict[str, List[int]]:
334
+ """Async version of list_accelerator_counts() that lists all accelerators
335
+ offered by Sky and available counts."""
336
+ request_id = await context_utils.to_thread(sdk.list_accelerator_counts,
337
+ gpus_only, name_filter,
338
+ region_filter, quantity_filter,
339
+ clouds)
340
+ if stream_logs is not None:
341
+ return await _stream_and_get(request_id, stream_logs)
342
+ else:
343
+ return await get(request_id)
344
+
345
+
346
+ @usage_lib.entrypoint
347
+ @annotations.client_api
348
+ async def optimize(
349
+ dag: 'sky.Dag',
350
+ minimize: common.OptimizeTarget = common.OptimizeTarget.COST,
351
+ admin_policy_request_options: Optional[
352
+ admin_policy.RequestOptions] = None,
353
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
354
+ ) -> 'sky.Dag':
355
+ """Async version of optimize() that finds the best execution plan for the
356
+ given DAG."""
357
+ request_id = await context_utils.to_thread(sdk.optimize, dag, minimize,
358
+ admin_policy_request_options)
359
+ if stream_logs is not None:
360
+ return await _stream_and_get(request_id, stream_logs)
361
+ else:
362
+ return await get(request_id)
363
+
364
+
365
+ @usage_lib.entrypoint
366
+ @annotations.client_api
367
+ async def workspaces(
368
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
369
+ ) -> Dict[str, Any]:
370
+ """Async version of workspaces() that gets the workspaces."""
371
+ request_id = await context_utils.to_thread(sdk.workspaces)
372
+ if stream_logs is not None:
373
+ return await _stream_and_get(request_id, stream_logs)
374
+ else:
375
+ return await get(request_id)
376
+
377
+
378
+ @usage_lib.entrypoint
379
+ @annotations.client_api
380
+ async def launch(
381
+ task: Union['sky.Task', 'sky.Dag'],
382
+ cluster_name: Optional[str] = None,
383
+ retry_until_up: bool = False,
384
+ idle_minutes_to_autostop: Optional[int] = None,
385
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
386
+ dryrun: bool = False,
387
+ down: bool = False, # pylint: disable=redefined-outer-name
388
+ backend: Optional['backends.Backend'] = None,
389
+ optimize_target: common.OptimizeTarget = common.OptimizeTarget.COST,
390
+ no_setup: bool = False,
391
+ clone_disk_from: Optional[str] = None,
392
+ fast: bool = False,
393
+ # Internal only:
394
+ # pylint: disable=invalid-name
395
+ _need_confirmation: bool = False,
396
+ _is_launched_by_jobs_controller: bool = False,
397
+ _is_launched_by_sky_serve_controller: bool = False,
398
+ _disable_controller_check: bool = False,
399
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
400
+ ) -> Tuple[Optional[int], Optional['backends.ResourceHandle']]:
401
+ """Async version of launch() that launches a cluster or task."""
402
+ request_id = await context_utils.to_thread(
403
+ sdk.launch, task, cluster_name, retry_until_up,
404
+ idle_minutes_to_autostop, wait_for, dryrun, down, backend,
405
+ optimize_target, no_setup, clone_disk_from, fast, _need_confirmation,
406
+ _is_launched_by_jobs_controller, _is_launched_by_sky_serve_controller,
407
+ _disable_controller_check)
408
+ if stream_logs is not None:
409
+ return await _stream_and_get(request_id, stream_logs)
410
+ else:
411
+ return await get(request_id)
412
+
413
+
414
+ @usage_lib.entrypoint
415
+ @annotations.client_api
416
+ async def exec( # pylint: disable=redefined-builtin
417
+ task: Union['sky.Task', 'sky.Dag'],
418
+ cluster_name: Optional[str] = None,
419
+ dryrun: bool = False,
420
+ down: bool = False, # pylint: disable=redefined-outer-name
421
+ backend: Optional['backends.Backend'] = None,
422
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
423
+ ) -> Tuple[Optional[int], Optional['backends.ResourceHandle']]:
424
+ """Async version of exec() that executes a task on an existing cluster."""
425
+ request_id = await context_utils.to_thread(sdk.exec, task, cluster_name,
426
+ dryrun, down, backend)
427
+ if stream_logs is not None:
428
+ return await _stream_and_get(request_id, stream_logs)
429
+ else:
430
+ return await get(request_id)
431
+
432
+
433
+ @usage_lib.entrypoint
434
+ @annotations.client_api
435
+ async def tail_logs(cluster_name: str,
436
+ job_id: Optional[int],
437
+ follow: bool,
438
+ tail: int = 0,
439
+ output_stream: Optional['io.TextIOBase'] = None) -> int:
440
+ """Async version of tail_logs() that tails the logs of a job."""
441
+ return await context_utils.to_thread(sdk.tail_logs, cluster_name, job_id,
442
+ follow, tail, output_stream)
443
+
444
+
445
+ @usage_lib.entrypoint
446
+ @annotations.client_api
447
+ async def download_logs(cluster_name: str,
448
+ job_ids: Optional[List[str]]) -> Dict[str, str]:
449
+ """Async version of download_logs() that downloads the logs of jobs."""
450
+ return await context_utils.to_thread(sdk.download_logs, cluster_name,
451
+ job_ids)
452
+
453
+
454
+ @usage_lib.entrypoint
455
+ @annotations.client_api
456
+ async def start(
457
+ cluster_name: str,
458
+ idle_minutes_to_autostop: Optional[int] = None,
459
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
460
+ retry_until_up: bool = False,
461
+ down: bool = False, # pylint: disable=redefined-outer-name
462
+ force: bool = False,
463
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
464
+ ) -> 'backends.CloudVmRayResourceHandle':
465
+ """Async version of start() that restarts a cluster."""
466
+ request_id = await context_utils.to_thread(sdk.start, cluster_name,
467
+ idle_minutes_to_autostop,
468
+ wait_for, retry_until_up, down,
469
+ force)
470
+ if stream_logs is not None:
471
+ return await _stream_and_get(request_id, stream_logs)
472
+ else:
473
+ return await get(request_id)
474
+
475
+
476
+ @usage_lib.entrypoint
477
+ @annotations.client_api
478
+ async def down(
479
+ cluster_name: str,
480
+ purge: bool = False,
481
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
482
+ """Async version of down() that tears down a cluster."""
483
+ request_id = await context_utils.to_thread(sdk.down, cluster_name, purge)
484
+ if stream_logs is not None:
485
+ return await _stream_and_get(request_id, stream_logs)
486
+ else:
487
+ return await get(request_id)
488
+
489
+
490
+ @usage_lib.entrypoint
491
+ @annotations.client_api
492
+ async def stop(
493
+ cluster_name: str,
494
+ purge: bool = False,
495
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
496
+ """Async version of stop() that stops a cluster."""
497
+ request_id = await context_utils.to_thread(sdk.stop, cluster_name, purge)
498
+ if stream_logs is not None:
499
+ return await _stream_and_get(request_id, stream_logs)
500
+ else:
501
+ return await get(request_id)
502
+
503
+
504
+ @usage_lib.entrypoint
505
+ @annotations.client_api
506
+ async def autostop(
507
+ cluster_name: str,
508
+ idle_minutes: int,
509
+ wait_for: Optional['autostop_lib.AutostopWaitFor'] = None,
510
+ down: bool = False, # pylint: disable=redefined-outer-name
511
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
512
+ ) -> None:
513
+ """Async version of autostop() that schedules an autostop/autodown for a
514
+ cluster."""
515
+ request_id = await context_utils.to_thread(sdk.autostop, cluster_name,
516
+ idle_minutes, wait_for, down)
517
+ if stream_logs is not None:
518
+ return await _stream_and_get(request_id, stream_logs)
519
+ else:
520
+ return await get(request_id)
521
+
522
+
523
+ @usage_lib.entrypoint
524
+ @annotations.client_api
525
+ async def queue(
526
+ cluster_name: str,
527
+ skip_finished: bool = False,
528
+ all_users: bool = False,
529
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
530
+ ) -> List[responses.ClusterJobRecord]:
531
+ """Async version of queue() that gets the job queue of a cluster."""
532
+ request_id = await context_utils.to_thread(sdk.queue, cluster_name,
533
+ skip_finished, all_users)
534
+ if stream_logs is not None:
535
+ return await _stream_and_get(request_id, stream_logs)
536
+ else:
537
+ return await get(request_id)
538
+
539
+
540
+ @usage_lib.entrypoint
541
+ @annotations.client_api
542
+ async def job_status(
543
+ cluster_name: str,
544
+ job_ids: Optional[List[int]] = None,
545
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
546
+ ) -> Dict[Optional[int], Optional['job_lib.JobStatus']]:
547
+ """Async version of job_status() that gets the status of jobs on a
548
+ cluster."""
549
+ request_id = await context_utils.to_thread(sdk.job_status, cluster_name,
550
+ job_ids)
551
+ if stream_logs is not None:
552
+ return await _stream_and_get(request_id, stream_logs)
553
+ else:
554
+ return await get(request_id)
555
+
556
+
557
+ @usage_lib.entrypoint
558
+ @annotations.client_api
559
+ async def cancel(
560
+ cluster_name: str,
561
+ all: bool = False, # pylint: disable=redefined-builtin
562
+ all_users: bool = False,
563
+ job_ids: Optional[List[int]] = None,
564
+ # pylint: disable=invalid-name
565
+ _try_cancel_if_cluster_is_init: bool = False,
566
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
567
+ """Async version of cancel() that cancels jobs on a cluster."""
568
+ request_id = await context_utils.to_thread(sdk.cancel, cluster_name, all,
569
+ all_users, job_ids,
570
+ _try_cancel_if_cluster_is_init)
571
+ if stream_logs is not None:
572
+ return await _stream_and_get(request_id, stream_logs)
573
+ else:
574
+ return await get(request_id)
575
+
576
+
577
+ @usage_lib.entrypoint
578
+ @annotations.client_api
579
+ async def status(
580
+ cluster_names: Optional[List[str]] = None,
581
+ refresh: common.StatusRefreshMode = common.StatusRefreshMode.NONE,
582
+ all_users: bool = False,
583
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG,
584
+ *,
585
+ _include_credentials: bool = False,
586
+ ) -> List[Dict[str, Any]]:
587
+ """Async version of status() that gets cluster statuses."""
588
+ request_id = await context_utils.to_thread(
589
+ sdk.status,
590
+ cluster_names,
591
+ refresh,
592
+ all_users,
593
+ _include_credentials=_include_credentials)
594
+ if stream_logs is not None:
595
+ return await _stream_and_get(request_id, stream_logs)
596
+ else:
597
+ return await get(request_id)
598
+
599
+
600
+ @usage_lib.entrypoint
601
+ @annotations.client_api
602
+ async def endpoints(
603
+ cluster: str,
604
+ port: Optional[Union[int, str]] = None,
605
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
606
+ ) -> Dict[int, str]:
607
+ """Async version of endpoints() that gets the endpoint for a given cluster
608
+ and port number."""
609
+ request_id = await context_utils.to_thread(sdk.endpoints, cluster, port)
610
+ if stream_logs is not None:
611
+ return await _stream_and_get(request_id, stream_logs)
612
+ else:
613
+ return await get(request_id)
614
+
615
+
616
+ @usage_lib.entrypoint
617
+ @annotations.client_api
618
+ async def cost_report(
619
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
620
+ ) -> List[Dict[str, Any]]:
621
+ """Async version of cost_report() that gets all cluster cost reports."""
622
+ request_id = await context_utils.to_thread(sdk.cost_report)
623
+ if stream_logs is not None:
624
+ return await _stream_and_get(request_id, stream_logs)
625
+ else:
626
+ return await get(request_id)
627
+
628
+
629
+ @usage_lib.entrypoint
630
+ @annotations.client_api
631
+ async def storage_ls(
632
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
633
+ ) -> List[Dict[str, Any]]:
634
+ """Async version of storage_ls() that gets the storages."""
635
+ request_id = await context_utils.to_thread(sdk.storage_ls)
636
+ if stream_logs is not None:
637
+ return await _stream_and_get(request_id, stream_logs)
638
+ else:
639
+ return await get(request_id)
640
+
641
+
642
+ @usage_lib.entrypoint
643
+ @annotations.client_api
644
+ async def storage_delete(
645
+ name: str,
646
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
647
+ """Async version of storage_delete() that deletes a storage."""
648
+ request_id = await context_utils.to_thread(sdk.storage_delete, name)
649
+ if stream_logs is not None:
650
+ return await _stream_and_get(request_id, stream_logs)
651
+ else:
652
+ return await get(request_id)
653
+
654
+
655
+ @usage_lib.entrypoint
656
+ @annotations.client_api
657
+ async def local_up(
658
+ gpus: bool,
659
+ ips: Optional[List[str]],
660
+ ssh_user: Optional[str],
661
+ ssh_key: Optional[str],
662
+ cleanup: bool,
663
+ context_name: Optional[str] = None,
664
+ name: Optional[str] = None,
665
+ password: Optional[str] = None,
666
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
667
+ """Async version of local_up() that launches a Kubernetes cluster on
668
+ local machines."""
669
+ request_id = await context_utils.to_thread(sdk.local_up, gpus, ips,
670
+ ssh_user, ssh_key, cleanup,
671
+ context_name, name, password)
672
+ if stream_logs is not None:
673
+ return await _stream_and_get(request_id, stream_logs)
674
+ else:
675
+ return await get(request_id)
676
+
677
+
678
+ @usage_lib.entrypoint
679
+ @annotations.client_api
680
+ async def local_down(
681
+ name: Optional[str] = None,
682
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
683
+ """Async version of local_down() that tears down the Kubernetes cluster
684
+ started by local_up."""
685
+ request_id = await context_utils.to_thread(sdk.local_down, name)
686
+ if stream_logs is not None:
687
+ return await _stream_and_get(request_id, stream_logs)
688
+ else:
689
+ return await get(request_id)
690
+
691
+
692
+ @usage_lib.entrypoint
693
+ @annotations.client_api
694
+ async def ssh_up(
695
+ infra: Optional[str] = None,
696
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
697
+ """Async version of ssh_up() that deploys the SSH Node Pools defined in
698
+ ~/.sky/ssh_targets.yaml."""
699
+ request_id = await context_utils.to_thread(sdk.ssh_up, infra)
700
+ if stream_logs is not None:
701
+ return await _stream_and_get(request_id, stream_logs)
702
+ else:
703
+ return await get(request_id)
704
+
705
+
706
+ @usage_lib.entrypoint
707
+ @annotations.client_api
708
+ async def ssh_down(
709
+ infra: Optional[str] = None,
710
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG) -> None:
711
+ """Async version of ssh_down() that tears down a Kubernetes cluster on SSH
712
+ targets."""
713
+ request_id = await context_utils.to_thread(sdk.ssh_down, infra)
714
+ if stream_logs is not None:
715
+ return await _stream_and_get(request_id, stream_logs)
716
+ else:
717
+ return await get(request_id)
718
+
719
+
720
+ @usage_lib.entrypoint
721
+ @annotations.client_api
722
+ async def realtime_kubernetes_gpu_availability(
723
+ context: Optional[str] = None,
724
+ name_filter: Optional[str] = None,
725
+ quantity_filter: Optional[int] = None,
726
+ is_ssh: Optional[bool] = None,
727
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
728
+ ) -> List[Tuple[str, List['models.RealtimeGpuAvailability']]]:
729
+ """Async version of realtime_kubernetes_gpu_availability() that gets the
730
+ real-time Kubernetes GPU availability."""
731
+ request_id = await context_utils.to_thread(
732
+ sdk.realtime_kubernetes_gpu_availability, context, name_filter,
733
+ quantity_filter, is_ssh)
734
+ if stream_logs is not None:
735
+ return await _stream_and_get(request_id, stream_logs)
736
+ else:
737
+ return await get(request_id)
738
+
739
+
740
+ @usage_lib.entrypoint
741
+ @annotations.client_api
742
+ async def kubernetes_node_info(
743
+ context: Optional[str] = None,
744
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
745
+ ) -> 'models.KubernetesNodesInfo':
746
+ """Async version of kubernetes_node_info() that gets the resource
747
+ information for all the nodes in the cluster."""
748
+ request_id = await context_utils.to_thread(sdk.kubernetes_node_info,
749
+ context)
750
+ if stream_logs is not None:
751
+ return await _stream_and_get(request_id, stream_logs)
752
+ else:
753
+ return await get(request_id)
754
+
755
+
756
+ @usage_lib.entrypoint
757
+ @annotations.client_api
758
+ async def status_kubernetes(
759
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
760
+ ) -> Tuple[List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
761
+ List['kubernetes_utils.KubernetesSkyPilotClusterInfoPayload'],
762
+ List[Dict[str, Any]], Optional[str]]:
763
+ """Async version of status_kubernetes() that gets all SkyPilot clusters
764
+ and jobs in the Kubernetes cluster."""
765
+ request_id = await context_utils.to_thread(sdk.status_kubernetes)
766
+ if stream_logs is not None:
767
+ return await _stream_and_get(request_id, stream_logs)
768
+ else:
769
+ return await get(request_id)
770
+
771
+
772
+ @usage_lib.entrypoint
773
+ @annotations.client_api
774
+ async def api_cancel(
775
+ request_ids: Optional[Union[str, List[str]]] = None,
776
+ all_users: bool = False,
777
+ silent: bool = False,
778
+ stream_logs: Optional[StreamConfig] = DEFAULT_STREAM_CONFIG
779
+ ) -> List[str]:
780
+ """Async version of api_cancel() that aborts a request or all requests."""
781
+ request_id = await context_utils.to_thread(sdk.api_cancel, request_ids,
782
+ all_users, silent)
783
+ if stream_logs is not None:
784
+ return await _stream_and_get(request_id, stream_logs)
785
+ else:
786
+ return await get(request_id)
787
+
788
+
789
+ @usage_lib.entrypoint
790
+ @annotations.client_api
791
+ async def api_status(request_ids: Optional[List[str]] = None,
792
+ all_status: bool = False) -> List[payloads.RequestPayload]:
793
+ """Async version of api_status() that lists all requests."""
794
+ return await context_utils.to_thread(sdk.api_status, request_ids,
795
+ all_status)
796
+
797
+
798
+ @usage_lib.entrypoint
799
+ @annotations.client_api
800
+ async def dashboard(starting_page: Optional[str] = None) -> None:
801
+ """Async version of dashboard() that starts the dashboard for SkyPilot."""
802
+ return await context_utils.to_thread(sdk.dashboard, starting_page)
803
+
804
+
805
+ @usage_lib.entrypoint
806
+ @annotations.client_api
807
+ async def api_info() -> responses.APIHealthResponse:
808
+ """Async version of api_info() that gets the server's status, commit and
809
+ version."""
810
+ return await context_utils.to_thread(sdk.api_info)
811
+
812
+
813
+ @usage_lib.entrypoint
814
+ @annotations.client_api
815
+ async def api_stop() -> None:
816
+ """Async version of api_stop() that stops the API server."""
817
+ return await context_utils.to_thread(sdk.api_stop)
818
+
819
+
820
+ @usage_lib.entrypoint
821
+ @annotations.client_api
822
+ async def api_server_logs(follow: bool = True,
823
+ tail: Optional[int] = None) -> None:
824
+ """Async version of api_server_logs() that streams the API server logs."""
825
+ return await context_utils.to_thread(sdk.api_server_logs, follow, tail)
826
+
827
+
828
+ @usage_lib.entrypoint
829
+ @annotations.client_api
830
+ async def api_login(endpoint: Optional[str] = None,
831
+ get_token: bool = False) -> None:
832
+ """Async version of api_login() that logs into a SkyPilot API server."""
833
+ return await context_utils.to_thread(sdk.api_login, endpoint, get_token)