skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,676 @@
1
+ Metadata-Version: 2.4
2
+ Name: skypilot-nightly
3
+ Version: 1.0.0.dev20251203
4
+ Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
+ Author: SkyPilot Team
6
+ License: Apache 2.0
7
+ Project-URL: Homepage, https://github.com/skypilot-org/skypilot
8
+ Project-URL: Issues, https://github.com/skypilot-org/skypilot/issues
9
+ Project-URL: Discussion, https://github.com/skypilot-org/skypilot/discussions
10
+ Project-URL: Documentation, https://docs.skypilot.co/
11
+ Classifier: Programming Language :: Python :: 3.7
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: License :: OSI Approved :: Apache Software License
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: System :: Distributed Computing
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: wheel<0.46.0
25
+ Requires-Dist: setuptools
26
+ Requires-Dist: pip
27
+ Requires-Dist: cachetools
28
+ Requires-Dist: click<8.2.0,>=7.0
29
+ Requires-Dist: colorama
30
+ Requires-Dist: cryptography
31
+ Requires-Dist: jinja2>=3.0
32
+ Requires-Dist: jsonschema
33
+ Requires-Dist: networkx
34
+ Requires-Dist: pandas>=1.3.0
35
+ Requires-Dist: pendulum
36
+ Requires-Dist: PrettyTable>=2.0.0
37
+ Requires-Dist: python-dotenv
38
+ Requires-Dist: rich
39
+ Requires-Dist: tabulate
40
+ Requires-Dist: typing_extensions
41
+ Requires-Dist: filelock>=3.15.0
42
+ Requires-Dist: packaging
43
+ Requires-Dist: psutil
44
+ Requires-Dist: pulp
45
+ Requires-Dist: pyyaml!=5.4.*,>3.13
46
+ Requires-Dist: ijson
47
+ Requires-Dist: orjson
48
+ Requires-Dist: requests
49
+ Requires-Dist: uvicorn[standard]<0.36.0,>=0.33.0
50
+ Requires-Dist: fastapi
51
+ Requires-Dist: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3,>2
52
+ Requires-Dist: python-multipart
53
+ Requires-Dist: aiofiles
54
+ Requires-Dist: httpx
55
+ Requires-Dist: setproctitle
56
+ Requires-Dist: sqlalchemy
57
+ Requires-Dist: psycopg2-binary
58
+ Requires-Dist: aiosqlite
59
+ Requires-Dist: asyncpg
60
+ Requires-Dist: casbin
61
+ Requires-Dist: sqlalchemy_adapter
62
+ Requires-Dist: prometheus_client>=0.8.0
63
+ Requires-Dist: passlib
64
+ Requires-Dist: bcrypt==4.0.1
65
+ Requires-Dist: pyjwt
66
+ Requires-Dist: gitpython
67
+ Requires-Dist: types-paramiko
68
+ Requires-Dist: alembic
69
+ Requires-Dist: aiohttp
70
+ Requires-Dist: anyio
71
+ Provides-Extra: aws
72
+ Requires-Dist: awscli>=1.27.10; extra == "aws"
73
+ Requires-Dist: botocore>=1.29.10; extra == "aws"
74
+ Requires-Dist: boto3>=1.26.1; extra == "aws"
75
+ Requires-Dist: colorama<0.4.5; extra == "aws"
76
+ Requires-Dist: casbin; extra == "aws"
77
+ Requires-Dist: sqlalchemy_adapter; extra == "aws"
78
+ Requires-Dist: passlib; extra == "aws"
79
+ Requires-Dist: pyjwt; extra == "aws"
80
+ Requires-Dist: aiohttp; extra == "aws"
81
+ Requires-Dist: anyio; extra == "aws"
82
+ Requires-Dist: grpcio>=1.63.0; extra == "aws"
83
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "aws"
84
+ Requires-Dist: aiosqlite; extra == "aws"
85
+ Requires-Dist: greenlet; extra == "aws"
86
+ Provides-Extra: azure
87
+ Requires-Dist: azure-cli>=2.65.0; extra == "azure"
88
+ Requires-Dist: azure-core>=1.31.0; extra == "azure"
89
+ Requires-Dist: azure-identity>=1.19.0; extra == "azure"
90
+ Requires-Dist: azure-mgmt-network>=27.0.0; extra == "azure"
91
+ Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "azure"
92
+ Requires-Dist: azure-storage-blob>=12.23.1; extra == "azure"
93
+ Requires-Dist: msgraph-sdk; extra == "azure"
94
+ Requires-Dist: msrestazure; extra == "azure"
95
+ Requires-Dist: casbin; extra == "azure"
96
+ Requires-Dist: sqlalchemy_adapter; extra == "azure"
97
+ Requires-Dist: passlib; extra == "azure"
98
+ Requires-Dist: pyjwt; extra == "azure"
99
+ Requires-Dist: aiohttp; extra == "azure"
100
+ Requires-Dist: anyio; extra == "azure"
101
+ Requires-Dist: grpcio>=1.63.0; extra == "azure"
102
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "azure"
103
+ Requires-Dist: aiosqlite; extra == "azure"
104
+ Requires-Dist: greenlet; extra == "azure"
105
+ Provides-Extra: gcp
106
+ Requires-Dist: google-api-python-client>=2.69.0; extra == "gcp"
107
+ Requires-Dist: google-cloud-storage; extra == "gcp"
108
+ Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "gcp"
109
+ Requires-Dist: casbin; extra == "gcp"
110
+ Requires-Dist: sqlalchemy_adapter; extra == "gcp"
111
+ Requires-Dist: passlib; extra == "gcp"
112
+ Requires-Dist: pyjwt; extra == "gcp"
113
+ Requires-Dist: aiohttp; extra == "gcp"
114
+ Requires-Dist: anyio; extra == "gcp"
115
+ Requires-Dist: grpcio>=1.63.0; extra == "gcp"
116
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "gcp"
117
+ Requires-Dist: aiosqlite; extra == "gcp"
118
+ Requires-Dist: greenlet; extra == "gcp"
119
+ Provides-Extra: ibm
120
+ Requires-Dist: ibm-cloud-sdk-core; extra == "ibm"
121
+ Requires-Dist: ibm-vpc; extra == "ibm"
122
+ Requires-Dist: ibm-platform-services>=0.48.0; extra == "ibm"
123
+ Requires-Dist: ibm-cos-sdk; extra == "ibm"
124
+ Requires-Dist: ray[default]>=2.6.1; extra == "ibm"
125
+ Requires-Dist: casbin; extra == "ibm"
126
+ Requires-Dist: sqlalchemy_adapter; extra == "ibm"
127
+ Requires-Dist: passlib; extra == "ibm"
128
+ Requires-Dist: pyjwt; extra == "ibm"
129
+ Requires-Dist: aiohttp; extra == "ibm"
130
+ Requires-Dist: anyio; extra == "ibm"
131
+ Requires-Dist: grpcio>=1.63.0; extra == "ibm"
132
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "ibm"
133
+ Requires-Dist: aiosqlite; extra == "ibm"
134
+ Requires-Dist: greenlet; extra == "ibm"
135
+ Provides-Extra: docker
136
+ Requires-Dist: docker; extra == "docker"
137
+ Requires-Dist: ray[default]>=2.6.1; extra == "docker"
138
+ Requires-Dist: casbin; extra == "docker"
139
+ Requires-Dist: sqlalchemy_adapter; extra == "docker"
140
+ Requires-Dist: passlib; extra == "docker"
141
+ Requires-Dist: pyjwt; extra == "docker"
142
+ Requires-Dist: aiohttp; extra == "docker"
143
+ Requires-Dist: anyio; extra == "docker"
144
+ Requires-Dist: grpcio>=1.63.0; extra == "docker"
145
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "docker"
146
+ Requires-Dist: aiosqlite; extra == "docker"
147
+ Requires-Dist: greenlet; extra == "docker"
148
+ Provides-Extra: lambda
149
+ Requires-Dist: casbin; extra == "lambda"
150
+ Requires-Dist: sqlalchemy_adapter; extra == "lambda"
151
+ Requires-Dist: passlib; extra == "lambda"
152
+ Requires-Dist: pyjwt; extra == "lambda"
153
+ Requires-Dist: aiohttp; extra == "lambda"
154
+ Requires-Dist: anyio; extra == "lambda"
155
+ Requires-Dist: grpcio>=1.63.0; extra == "lambda"
156
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "lambda"
157
+ Requires-Dist: aiosqlite; extra == "lambda"
158
+ Requires-Dist: greenlet; extra == "lambda"
159
+ Provides-Extra: cloudflare
160
+ Requires-Dist: awscli>=1.27.10; extra == "cloudflare"
161
+ Requires-Dist: botocore>=1.29.10; extra == "cloudflare"
162
+ Requires-Dist: boto3>=1.26.1; extra == "cloudflare"
163
+ Requires-Dist: colorama<0.4.5; extra == "cloudflare"
164
+ Requires-Dist: casbin; extra == "cloudflare"
165
+ Requires-Dist: sqlalchemy_adapter; extra == "cloudflare"
166
+ Requires-Dist: passlib; extra == "cloudflare"
167
+ Requires-Dist: pyjwt; extra == "cloudflare"
168
+ Requires-Dist: aiohttp; extra == "cloudflare"
169
+ Requires-Dist: anyio; extra == "cloudflare"
170
+ Requires-Dist: grpcio>=1.63.0; extra == "cloudflare"
171
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "cloudflare"
172
+ Requires-Dist: aiosqlite; extra == "cloudflare"
173
+ Requires-Dist: greenlet; extra == "cloudflare"
174
+ Provides-Extra: coreweave
175
+ Requires-Dist: awscli>=1.27.10; extra == "coreweave"
176
+ Requires-Dist: botocore>=1.29.10; extra == "coreweave"
177
+ Requires-Dist: boto3>=1.26.1; extra == "coreweave"
178
+ Requires-Dist: colorama<0.4.5; extra == "coreweave"
179
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "coreweave"
180
+ Requires-Dist: websockets; extra == "coreweave"
181
+ Requires-Dist: python-dateutil; extra == "coreweave"
182
+ Requires-Dist: casbin; extra == "coreweave"
183
+ Requires-Dist: sqlalchemy_adapter; extra == "coreweave"
184
+ Requires-Dist: passlib; extra == "coreweave"
185
+ Requires-Dist: pyjwt; extra == "coreweave"
186
+ Requires-Dist: aiohttp; extra == "coreweave"
187
+ Requires-Dist: anyio; extra == "coreweave"
188
+ Requires-Dist: grpcio>=1.63.0; extra == "coreweave"
189
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "coreweave"
190
+ Requires-Dist: aiosqlite; extra == "coreweave"
191
+ Requires-Dist: greenlet; extra == "coreweave"
192
+ Provides-Extra: scp
193
+ Requires-Dist: ray[default]>=2.6.1; extra == "scp"
194
+ Requires-Dist: casbin; extra == "scp"
195
+ Requires-Dist: sqlalchemy_adapter; extra == "scp"
196
+ Requires-Dist: passlib; extra == "scp"
197
+ Requires-Dist: pyjwt; extra == "scp"
198
+ Requires-Dist: aiohttp; extra == "scp"
199
+ Requires-Dist: anyio; extra == "scp"
200
+ Requires-Dist: grpcio>=1.63.0; extra == "scp"
201
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "scp"
202
+ Requires-Dist: aiosqlite; extra == "scp"
203
+ Requires-Dist: greenlet; extra == "scp"
204
+ Provides-Extra: oci
205
+ Requires-Dist: oci; extra == "oci"
206
+ Requires-Dist: casbin; extra == "oci"
207
+ Requires-Dist: sqlalchemy_adapter; extra == "oci"
208
+ Requires-Dist: passlib; extra == "oci"
209
+ Requires-Dist: pyjwt; extra == "oci"
210
+ Requires-Dist: aiohttp; extra == "oci"
211
+ Requires-Dist: anyio; extra == "oci"
212
+ Requires-Dist: grpcio>=1.63.0; extra == "oci"
213
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "oci"
214
+ Requires-Dist: aiosqlite; extra == "oci"
215
+ Requires-Dist: greenlet; extra == "oci"
216
+ Provides-Extra: kubernetes
217
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "kubernetes"
218
+ Requires-Dist: websockets; extra == "kubernetes"
219
+ Requires-Dist: python-dateutil; extra == "kubernetes"
220
+ Requires-Dist: casbin; extra == "kubernetes"
221
+ Requires-Dist: sqlalchemy_adapter; extra == "kubernetes"
222
+ Requires-Dist: passlib; extra == "kubernetes"
223
+ Requires-Dist: pyjwt; extra == "kubernetes"
224
+ Requires-Dist: aiohttp; extra == "kubernetes"
225
+ Requires-Dist: anyio; extra == "kubernetes"
226
+ Requires-Dist: grpcio>=1.63.0; extra == "kubernetes"
227
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "kubernetes"
228
+ Requires-Dist: aiosqlite; extra == "kubernetes"
229
+ Requires-Dist: greenlet; extra == "kubernetes"
230
+ Provides-Extra: ssh
231
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "ssh"
232
+ Requires-Dist: websockets; extra == "ssh"
233
+ Requires-Dist: python-dateutil; extra == "ssh"
234
+ Requires-Dist: casbin; extra == "ssh"
235
+ Requires-Dist: sqlalchemy_adapter; extra == "ssh"
236
+ Requires-Dist: passlib; extra == "ssh"
237
+ Requires-Dist: pyjwt; extra == "ssh"
238
+ Requires-Dist: aiohttp; extra == "ssh"
239
+ Requires-Dist: anyio; extra == "ssh"
240
+ Requires-Dist: grpcio>=1.63.0; extra == "ssh"
241
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "ssh"
242
+ Requires-Dist: aiosqlite; extra == "ssh"
243
+ Requires-Dist: greenlet; extra == "ssh"
244
+ Provides-Extra: runpod
245
+ Requires-Dist: runpod>=1.6.1; extra == "runpod"
246
+ Requires-Dist: tomli; extra == "runpod"
247
+ Requires-Dist: casbin; extra == "runpod"
248
+ Requires-Dist: sqlalchemy_adapter; extra == "runpod"
249
+ Requires-Dist: passlib; extra == "runpod"
250
+ Requires-Dist: pyjwt; extra == "runpod"
251
+ Requires-Dist: aiohttp; extra == "runpod"
252
+ Requires-Dist: anyio; extra == "runpod"
253
+ Requires-Dist: grpcio>=1.63.0; extra == "runpod"
254
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "runpod"
255
+ Requires-Dist: aiosqlite; extra == "runpod"
256
+ Requires-Dist: greenlet; extra == "runpod"
257
+ Provides-Extra: fluidstack
258
+ Requires-Dist: casbin; extra == "fluidstack"
259
+ Requires-Dist: sqlalchemy_adapter; extra == "fluidstack"
260
+ Requires-Dist: passlib; extra == "fluidstack"
261
+ Requires-Dist: pyjwt; extra == "fluidstack"
262
+ Requires-Dist: aiohttp; extra == "fluidstack"
263
+ Requires-Dist: anyio; extra == "fluidstack"
264
+ Requires-Dist: grpcio>=1.63.0; extra == "fluidstack"
265
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "fluidstack"
266
+ Requires-Dist: aiosqlite; extra == "fluidstack"
267
+ Requires-Dist: greenlet; extra == "fluidstack"
268
+ Provides-Extra: cudo
269
+ Requires-Dist: cudo-compute>=0.1.10; extra == "cudo"
270
+ Requires-Dist: casbin; extra == "cudo"
271
+ Requires-Dist: sqlalchemy_adapter; extra == "cudo"
272
+ Requires-Dist: passlib; extra == "cudo"
273
+ Requires-Dist: pyjwt; extra == "cudo"
274
+ Requires-Dist: aiohttp; extra == "cudo"
275
+ Requires-Dist: anyio; extra == "cudo"
276
+ Requires-Dist: grpcio>=1.63.0; extra == "cudo"
277
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "cudo"
278
+ Requires-Dist: aiosqlite; extra == "cudo"
279
+ Requires-Dist: greenlet; extra == "cudo"
280
+ Provides-Extra: paperspace
281
+ Requires-Dist: casbin; extra == "paperspace"
282
+ Requires-Dist: sqlalchemy_adapter; extra == "paperspace"
283
+ Requires-Dist: passlib; extra == "paperspace"
284
+ Requires-Dist: pyjwt; extra == "paperspace"
285
+ Requires-Dist: aiohttp; extra == "paperspace"
286
+ Requires-Dist: anyio; extra == "paperspace"
287
+ Requires-Dist: grpcio>=1.63.0; extra == "paperspace"
288
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "paperspace"
289
+ Requires-Dist: aiosqlite; extra == "paperspace"
290
+ Requires-Dist: greenlet; extra == "paperspace"
291
+ Provides-Extra: primeintellect
292
+ Requires-Dist: casbin; extra == "primeintellect"
293
+ Requires-Dist: sqlalchemy_adapter; extra == "primeintellect"
294
+ Requires-Dist: passlib; extra == "primeintellect"
295
+ Requires-Dist: pyjwt; extra == "primeintellect"
296
+ Requires-Dist: aiohttp; extra == "primeintellect"
297
+ Requires-Dist: anyio; extra == "primeintellect"
298
+ Requires-Dist: grpcio>=1.63.0; extra == "primeintellect"
299
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "primeintellect"
300
+ Requires-Dist: aiosqlite; extra == "primeintellect"
301
+ Requires-Dist: greenlet; extra == "primeintellect"
302
+ Provides-Extra: do
303
+ Requires-Dist: pydo>=0.3.0; extra == "do"
304
+ Requires-Dist: azure-core>=1.24.0; extra == "do"
305
+ Requires-Dist: azure-common; extra == "do"
306
+ Requires-Dist: casbin; extra == "do"
307
+ Requires-Dist: sqlalchemy_adapter; extra == "do"
308
+ Requires-Dist: passlib; extra == "do"
309
+ Requires-Dist: pyjwt; extra == "do"
310
+ Requires-Dist: aiohttp; extra == "do"
311
+ Requires-Dist: anyio; extra == "do"
312
+ Requires-Dist: grpcio>=1.63.0; extra == "do"
313
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "do"
314
+ Requires-Dist: aiosqlite; extra == "do"
315
+ Requires-Dist: greenlet; extra == "do"
316
+ Provides-Extra: vast
317
+ Requires-Dist: vastai-sdk>=0.1.12; extra == "vast"
318
+ Requires-Dist: casbin; extra == "vast"
319
+ Requires-Dist: sqlalchemy_adapter; extra == "vast"
320
+ Requires-Dist: passlib; extra == "vast"
321
+ Requires-Dist: pyjwt; extra == "vast"
322
+ Requires-Dist: aiohttp; extra == "vast"
323
+ Requires-Dist: anyio; extra == "vast"
324
+ Requires-Dist: grpcio>=1.63.0; extra == "vast"
325
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "vast"
326
+ Requires-Dist: aiosqlite; extra == "vast"
327
+ Requires-Dist: greenlet; extra == "vast"
328
+ Provides-Extra: vsphere
329
+ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "vsphere"
330
+ Requires-Dist: casbin; extra == "vsphere"
331
+ Requires-Dist: sqlalchemy_adapter; extra == "vsphere"
332
+ Requires-Dist: passlib; extra == "vsphere"
333
+ Requires-Dist: pyjwt; extra == "vsphere"
334
+ Requires-Dist: aiohttp; extra == "vsphere"
335
+ Requires-Dist: anyio; extra == "vsphere"
336
+ Requires-Dist: grpcio>=1.63.0; extra == "vsphere"
337
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "vsphere"
338
+ Requires-Dist: aiosqlite; extra == "vsphere"
339
+ Requires-Dist: greenlet; extra == "vsphere"
340
+ Provides-Extra: nebius
341
+ Requires-Dist: nebius>=0.3.12; extra == "nebius"
342
+ Requires-Dist: grpcio>=1.63.0; extra == "nebius"
343
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "nebius"
344
+ Requires-Dist: awscli>=1.27.10; extra == "nebius"
345
+ Requires-Dist: botocore>=1.29.10; extra == "nebius"
346
+ Requires-Dist: boto3>=1.26.1; extra == "nebius"
347
+ Requires-Dist: colorama<0.4.5; extra == "nebius"
348
+ Requires-Dist: casbin; extra == "nebius"
349
+ Requires-Dist: sqlalchemy_adapter; extra == "nebius"
350
+ Requires-Dist: passlib; extra == "nebius"
351
+ Requires-Dist: pyjwt; extra == "nebius"
352
+ Requires-Dist: aiohttp; extra == "nebius"
353
+ Requires-Dist: anyio; extra == "nebius"
354
+ Requires-Dist: grpcio>=1.63.0; extra == "nebius"
355
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "nebius"
356
+ Requires-Dist: aiosqlite; extra == "nebius"
357
+ Requires-Dist: greenlet; extra == "nebius"
358
+ Provides-Extra: hyperbolic
359
+ Requires-Dist: casbin; extra == "hyperbolic"
360
+ Requires-Dist: sqlalchemy_adapter; extra == "hyperbolic"
361
+ Requires-Dist: passlib; extra == "hyperbolic"
362
+ Requires-Dist: pyjwt; extra == "hyperbolic"
363
+ Requires-Dist: aiohttp; extra == "hyperbolic"
364
+ Requires-Dist: anyio; extra == "hyperbolic"
365
+ Requires-Dist: grpcio>=1.63.0; extra == "hyperbolic"
366
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "hyperbolic"
367
+ Requires-Dist: aiosqlite; extra == "hyperbolic"
368
+ Requires-Dist: greenlet; extra == "hyperbolic"
369
+ Provides-Extra: seeweb
370
+ Requires-Dist: ecsapi==0.4.0; extra == "seeweb"
371
+ Requires-Dist: casbin; extra == "seeweb"
372
+ Requires-Dist: sqlalchemy_adapter; extra == "seeweb"
373
+ Requires-Dist: passlib; extra == "seeweb"
374
+ Requires-Dist: pyjwt; extra == "seeweb"
375
+ Requires-Dist: aiohttp; extra == "seeweb"
376
+ Requires-Dist: anyio; extra == "seeweb"
377
+ Requires-Dist: grpcio>=1.63.0; extra == "seeweb"
378
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "seeweb"
379
+ Requires-Dist: aiosqlite; extra == "seeweb"
380
+ Requires-Dist: greenlet; extra == "seeweb"
381
+ Provides-Extra: shadeform
382
+ Requires-Dist: casbin; extra == "shadeform"
383
+ Requires-Dist: sqlalchemy_adapter; extra == "shadeform"
384
+ Requires-Dist: passlib; extra == "shadeform"
385
+ Requires-Dist: pyjwt; extra == "shadeform"
386
+ Requires-Dist: aiohttp; extra == "shadeform"
387
+ Requires-Dist: anyio; extra == "shadeform"
388
+ Requires-Dist: grpcio>=1.63.0; extra == "shadeform"
389
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "shadeform"
390
+ Requires-Dist: aiosqlite; extra == "shadeform"
391
+ Requires-Dist: greenlet; extra == "shadeform"
392
+ Provides-Extra: all
393
+ Requires-Dist: greenlet; extra == "all"
394
+ Requires-Dist: azure-identity>=1.19.0; extra == "all"
395
+ Requires-Dist: msrestazure; extra == "all"
396
+ Requires-Dist: azure-mgmt-network>=27.0.0; extra == "all"
397
+ Requires-Dist: aiosqlite; extra == "all"
398
+ Requires-Dist: azure-mgmt-compute>=33.0.0; extra == "all"
399
+ Requires-Dist: anyio; extra == "all"
400
+ Requires-Dist: ibm-platform-services>=0.48.0; extra == "all"
401
+ Requires-Dist: vastai-sdk>=0.1.12; extra == "all"
402
+ Requires-Dist: ibm-cloud-sdk-core; extra == "all"
403
+ Requires-Dist: sqlalchemy_adapter; extra == "all"
404
+ Requires-Dist: botocore>=1.29.10; extra == "all"
405
+ Requires-Dist: msgraph-sdk; extra == "all"
406
+ Requires-Dist: aiohttp; extra == "all"
407
+ Requires-Dist: nebius>=0.3.12; extra == "all"
408
+ Requires-Dist: passlib; extra == "all"
409
+ Requires-Dist: grpcio>=1.63.0; extra == "all"
410
+ Requires-Dist: websockets; extra == "all"
411
+ Requires-Dist: google-api-python-client>=2.69.0; extra == "all"
412
+ Requires-Dist: google-cloud-storage; extra == "all"
413
+ Requires-Dist: azure-cli>=2.65.0; extra == "all"
414
+ Requires-Dist: oci; extra == "all"
415
+ Requires-Dist: ecsapi==0.4.0; extra == "all"
416
+ Requires-Dist: cudo-compute>=0.1.10; extra == "all"
417
+ Requires-Dist: azure-core>=1.31.0; extra == "all"
418
+ Requires-Dist: colorama<0.4.5; extra == "all"
419
+ Requires-Dist: ibm-cos-sdk; extra == "all"
420
+ Requires-Dist: python-dateutil; extra == "all"
421
+ Requires-Dist: docker; extra == "all"
422
+ Requires-Dist: awscli>=1.27.10; extra == "all"
423
+ Requires-Dist: azure-storage-blob>=12.23.1; extra == "all"
424
+ Requires-Dist: tomli; extra == "all"
425
+ Requires-Dist: azure-core>=1.24.0; extra == "all"
426
+ Requires-Dist: casbin; extra == "all"
427
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
428
+ Requires-Dist: pyvmomi==8.0.1.0.2; extra == "all"
429
+ Requires-Dist: pyjwt; extra == "all"
430
+ Requires-Dist: runpod>=1.6.1; extra == "all"
431
+ Requires-Dist: boto3>=1.26.1; extra == "all"
432
+ Requires-Dist: ray[default]>=2.6.1; extra == "all"
433
+ Requires-Dist: pydo>=0.3.0; extra == "all"
434
+ Requires-Dist: azure-common; extra == "all"
435
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "all"
436
+ Requires-Dist: pyopenssl<24.3.0,>=23.2.0; extra == "all"
437
+ Requires-Dist: ibm-vpc; extra == "all"
438
+ Provides-Extra: remote
439
+ Requires-Dist: grpcio>=1.63.0; extra == "remote"
440
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "remote"
441
+ Provides-Extra: server
442
+ Requires-Dist: casbin; extra == "server"
443
+ Requires-Dist: sqlalchemy_adapter; extra == "server"
444
+ Requires-Dist: passlib; extra == "server"
445
+ Requires-Dist: pyjwt; extra == "server"
446
+ Requires-Dist: aiohttp; extra == "server"
447
+ Requires-Dist: anyio; extra == "server"
448
+ Requires-Dist: grpcio>=1.63.0; extra == "server"
449
+ Requires-Dist: protobuf<7.0.0,>=5.26.1; extra == "server"
450
+ Requires-Dist: aiosqlite; extra == "server"
451
+ Requires-Dist: greenlet; extra == "server"
452
+ Dynamic: author
453
+ Dynamic: classifier
454
+ Dynamic: description
455
+ Dynamic: description-content-type
456
+ Dynamic: license
457
+ Dynamic: license-file
458
+ Dynamic: project-url
459
+ Dynamic: provides-extra
460
+ Dynamic: requires-dist
461
+ Dynamic: summary
462
+
463
+ <p align="center">
464
+ <img alt="SkyPilot" src="https://raw.githubusercontent.com/skypilot-org/skypilot/master/docs/source/images/skypilot-wide-light-1k.png" width=55%>
465
+ </p>
466
+
467
+ <p align="center">
468
+ <a href="https://docs.skypilot.co/">
469
+ <img alt="Documentation" src="https://img.shields.io/badge/docs-gray?logo=readthedocs&logoColor=f5f5f5">
470
+ </a>
471
+
472
+ <a href="https://github.com/skypilot-org/skypilot/releases">
473
+ <img alt="GitHub Release" src="https://img.shields.io/github/release/skypilot-org/skypilot.svg">
474
+ </a>
475
+
476
+ <a href="http://slack.skypilot.co">
477
+ <img alt="Join Slack" src="https://img.shields.io/badge/SkyPilot-Join%20Slack-blue?logo=slack">
478
+ </a>
479
+
480
+ <a href="https://github.com/skypilot-org/skypilot/releases">
481
+ <img alt="Downloads" src="https://img.shields.io/pypi/dm/skypilot">
482
+ </a>
483
+
484
+ </p>
485
+
486
+ <h3 align="center">
487
+ Simplify & scale any AI infrastructure
488
+ </h3>
489
+
490
+ <div align="center">
491
+
492
+ #### [🌟 **SkyPilot Demo** 🌟: Click to see a 1-minute tour](https://demo.skypilot.co/dashboard/)
493
+
494
+ </div>
495
+
496
+
497
+
498
+ ----
499
+
500
+ :fire: *News* :fire:
501
+ - [Nov 2025] Serve **Kimi K2 Thinking** with reasoning capabilities on your Kubernetes or clouds: [**example**](./llm/kimi-k2-thinking/)
502
+ - [Oct 2025] Run **RL training for LLMs** with SkyRL on your Kubernetes or clouds: [**example**](./llm/skyrl/)
503
+ - [Oct 2025] Train and serve [Andrej Karpathy's](https://x.com/karpathy/status/1977755427569111362) **nanochat** - the best ChatGPT that $100 can buy: [**example**](./llm/nanochat)
504
+ - [Oct 2025] Run large-scale **LLM training with TorchTitan** on any AI infra: [**example**](./examples/training/torchtitan)
505
+ - [Sep 2025] Scaling AI infrastructure at Abridge - **10x faster development** with SkyPilot: [**blog**](https://blog.skypilot.co/abridge/)
506
+ - [Sep 2025] Network and Storage Benchmarks for LLM training on the cloud: [**blog**](https://maknee.github.io/blog/2025/Network-And-Storage-Training-Skypilot/)
507
+ - [Aug 2025] Serve and finetune **OpenAI GPT-OSS models** (gpt-oss-120b, gpt-oss-20b) with one command on any infra: [**serve**](./llm/gpt-oss/) + [**LoRA and full finetuning**](./llm/gpt-oss-finetuning/)
508
+ - [Jul 2025] Run distributed **RL training for LLMs** with Verl (PPO, GRPO) on any cloud: [**example**](./llm/verl/)
509
+ - [Jul 2025] Finetune **Llama4** on any distributed cluster/cloud: [**example**](./llm/llama-4-finetuning/)
510
+ - [Jul 2025] Two-part blog series, `The Evolution of AI Job Orchestration`: (1) [Running AI jobs on GPU Neoclouds](https://blog.skypilot.co/ai-job-orchestration-pt1-gpu-neoclouds/), (2) [The AI-Native Control Plane & Orchestration that Finally Works for ML](https://blog.skypilot.co/ai-job-orchestration-pt2-ai-control-plane/)
511
+ - [Apr 2025] Spin up **Qwen3** on your cluster/cloud: [**example**](./llm/qwen/)
512
+
513
+
514
+
515
+ **LLM Finetuning Cookbooks**: Finetuning Llama 2 / Llama 3.1 in your own cloud environment, privately: Llama 2 [**example**](./llm/vicuna-llama-2/) and [**blog**](https://blog.skypilot.co/finetuning-llama2-operational-guide/); Llama 3.1 [**example**](./llm/llama-3_1-finetuning/) and [**blog**](https://blog.skypilot.co/finetune-llama-3_1-on-your-infra/)
516
+
517
+ ----
518
+
519
+ SkyPilot is a system to run, manage, and scale AI workloads on any AI infrastructure.
520
+
521
+ SkyPilot gives **AI teams** a simple interface to run jobs on any infra.
522
+ **Infra teams** get a unified control plane to manage any AI compute — with advanced scheduling, scaling, and orchestration.
523
+
524
+ <img src="./docs/source/images/skypilot-abstractions-long-2.png" alt="SkyPilot Abstractions">
525
+
526
+ ## Overview
527
+
528
+ SkyPilot **is easy to use for AI teams**:
529
+ - Quickly spin up compute on your own infra
530
+ - Environment and job as code — simple and portable
531
+ - Easy job management: queue, run, and auto-recover many jobs
532
+
533
+ SkyPilot **makes Kubernetes easy for AI & Infra teams**:
534
+
535
+ - Slurm-like ease of use, cloud-native robustness
536
+ - Local dev experience on K8s: SSH into pods, sync code, or connect IDE
537
+ - Turbocharge your clusters: gang scheduling, multi-cluster, and scaling
538
+
539
+ SkyPilot **unifies multiple clusters, clouds, and hardware**:
540
+ - One interface to use reserved GPUs, Kubernetes clusters, Slurm clusters, or 20+ clouds
541
+ - [Flexible provisioning](https://docs.skypilot.co/en/latest/examples/auto-failover.html) of GPUs, TPUs, CPUs, with auto-retry
542
+ - [Team deployment](https://docs.skypilot.co/en/latest/reference/api-server/api-server.html) and resource sharing
543
+
544
+ SkyPilot **cuts your cloud costs & maximizes GPU availability**:
545
+ * Autostop: automatic cleanup of idle resources
546
+ * [Spot instance support](https://docs.skypilot.co/en/latest/examples/managed-jobs.html#running-on-spot-instances): 3-6x cost savings, with preemption auto-recovery
547
+ * Intelligent scheduling: automatically run on the cheapest & most available infra
548
+
549
+ SkyPilot supports your existing GPU, TPU, and CPU workloads, with no code changes.
550
+
551
+ Install with pip:
552
+ ```bash
553
+ # Choose your clouds:
554
+ pip install -U "skypilot[kubernetes,aws,gcp,azure,oci,nebius,lambda,runpod,fluidstack,paperspace,cudo,ibm,scp,seeweb,shadeform]"
555
+ ```
556
+ To get the latest features and fixes, use the nightly build or [install from source](https://docs.skypilot.co/en/latest/getting-started/installation.html):
557
+ ```bash
558
+ # Choose your clouds:
559
+ pip install "skypilot-nightly[kubernetes,aws,gcp,azure,oci,nebius,lambda,runpod,fluidstack,paperspace,cudo,ibm,scp,seeweb,shadeform]"
560
+ ```
561
+
562
+ <p align="center">
563
+ <img src="docs/source/_static/intro.gif" alt="SkyPilot">
564
+ </p>
565
+
566
+ Current supported infra: Kubernetes, Slurm, AWS, GCP, Azure, OCI, CoreWeave, Nebius, Lambda Cloud, RunPod, Fluidstack,
567
+ Cudo, Digital Ocean, Paperspace, Cloudflare, Samsung, IBM, Vast.ai, VMware vSphere, Seeweb, Prime Intellect, Shadeform.
568
+ <p align="center">
569
+ <img alt="SkyPilot" src="https://raw.githubusercontent.com/skypilot-org/skypilot/master/docs/source/images/cloud-logos-light.png" width=85%>
570
+ </p>
571
+ <!-- source xcf file: https://drive.google.com/drive/folders/1S_acjRsAD3T14qMeEnf6FFrIwHu_Gs_f?usp=drive_link -->
572
+
573
+
574
+ ## Getting started
575
+ You can find our documentation [here](https://docs.skypilot.co/).
576
+ - [Installation](https://docs.skypilot.co/en/latest/getting-started/installation.html)
577
+ - [Quickstart](https://docs.skypilot.co/en/latest/getting-started/quickstart.html)
578
+ - [CLI reference](https://docs.skypilot.co/en/latest/reference/cli.html)
579
+
580
+ ## SkyPilot in 1 minute
581
+
582
+ A SkyPilot task specifies: resource requirements, data to be synced, setup commands, and the task commands.
583
+
584
+ Once written in this [**unified interface**](https://docs.skypilot.co/en/latest/reference/yaml-spec.html) (YAML or Python API), the task can be launched on any available infra (Kubernetes, Slurm, cloud, etc.). This avoids vendor lock-in, and allows easily moving jobs to a different provider.
585
+
586
+ Paste the following into a file `my_task.yaml`:
587
+
588
+ ```yaml
589
+ resources:
590
+ accelerators: A100:8 # 8x NVIDIA A100 GPU
591
+
592
+ num_nodes: 1 # Number of VMs to launch
593
+
594
+ # Working directory (optional) containing the project codebase.
595
+ # Its contents are synced to ~/sky_workdir/ on the cluster.
596
+ workdir: ~/torch_examples
597
+
598
+ # Commands to be run before executing the job.
599
+ # Typical use: pip install -r requirements.txt, git clone, etc.
600
+ setup: |
601
+ cd mnist
602
+ pip install -r requirements.txt
603
+
604
+ # Commands to run as a job.
605
+ # Typical use: launch the main program.
606
+ run: |
607
+ cd mnist
608
+ python main.py --epochs 1
609
+ ```
610
+
611
+ Prepare the workdir by cloning:
612
+ ```bash
613
+ git clone https://github.com/pytorch/examples.git ~/torch_examples
614
+ ```
615
+
616
+ Launch with `sky launch` (note: [access to GPU instances](https://docs.skypilot.co/en/latest/cloud-setup/quota.html) is needed for this example):
617
+ ```bash
618
+ sky launch my_task.yaml
619
+ ```
620
+
621
+ SkyPilot then performs the heavy-lifting for you, including:
622
+ 1. Find the cheapest & available infra across your clusters or clouds
623
+ 2. Provision the GPUs (pods or VMs), with auto-failover if the infra returned capacity errors
624
+ 3. Sync your local `workdir` to the provisioned cluster
625
+ 4. Auto-install dependencies by running the task's `setup` commands
626
+ 5. Run the task's `run` commands, and stream logs
627
+
628
+ See [Quickstart](https://docs.skypilot.co/en/latest/getting-started/quickstart.html) to get started with SkyPilot.
629
+
630
+ ## Runnable examples
631
+
632
+ See [**SkyPilot examples**](https://docs.skypilot.co/en/docs-examples/examples/index.html) that cover: development, training, serving, LLM models, AI apps, and common frameworks.
633
+
634
+ Latest featured examples:
635
+
636
+ | Task | Examples |
637
+ |----------|----------|
638
+ | Training | [Verl](https://docs.skypilot.co/en/latest/examples/training/verl.html), [Finetune Llama 4](https://docs.skypilot.co/en/latest/examples/training/llama-4-finetuning.html), [TorchTitan](https://docs.skypilot.co/en/latest/examples/training/torchtitan.html), [PyTorch](https://docs.skypilot.co/en/latest/getting-started/tutorial.html), [DeepSpeed](https://docs.skypilot.co/en/latest/examples/training/deepspeed.html), [NeMo](https://docs.skypilot.co/en/latest/examples/training/nemo.html), [Ray](https://docs.skypilot.co/en/latest/examples/training/ray.html), [Unsloth](https://docs.skypilot.co/en/latest/examples/training/unsloth.html), [Jax/TPU](https://docs.skypilot.co/en/latest/examples/training/tpu.html) |
639
+ | Serving | [vLLM](https://docs.skypilot.co/en/latest/examples/serving/vllm.html), [SGLang](https://docs.skypilot.co/en/latest/examples/serving/sglang.html), [Ollama](https://docs.skypilot.co/en/latest/examples/serving/ollama.html) |
640
+ | Models | [DeepSeek-R1](https://docs.skypilot.co/en/latest/examples/models/deepseek-r1.html), [Llama 4](https://docs.skypilot.co/en/latest/examples/models/llama-4.html), [Llama 3](https://docs.skypilot.co/en/latest/examples/models/llama-3.html), [CodeLlama](https://docs.skypilot.co/en/latest/examples/models/codellama.html), [Qwen](https://docs.skypilot.co/en/latest/examples/models/qwen.html), [Kimi-K2](https://docs.skypilot.co/en/latest/examples/models/kimi-k2.html), [Kimi-K2-Thinking](https://docs.skypilot.co/en/latest/examples/models/kimi-k2-thinking.html), [Mixtral](https://docs.skypilot.co/en/latest/examples/models/mixtral.html) |
641
+ | AI apps | [RAG](https://docs.skypilot.co/en/latest/examples/applications/rag.html), [vector databases](https://docs.skypilot.co/en/latest/examples/applications/vector_database.html) (ChromaDB, CLIP) |
642
+ | Common frameworks | [Airflow](https://docs.skypilot.co/en/latest/examples/frameworks/airflow.html), [Jupyter](https://docs.skypilot.co/en/latest/examples/frameworks/jupyter.html), [marimo](https://docs.skypilot.co/en/latest/examples/frameworks/marimo.html) |
643
+
644
+ Source files can be found in [`llm/`](https://github.com/skypilot-org/skypilot/tree/master/llm) and [`examples/`](https://github.com/skypilot-org/skypilot/tree/master/examples).
645
+
646
+ ## More information
647
+ To learn more, see [SkyPilot Overview](https://docs.skypilot.co/en/latest/overview.html), [SkyPilot docs](https://docs.skypilot.co/en/latest/), and [SkyPilot blog](https://blog.skypilot.co/).
648
+
649
+ SkyPilot adopters: [Testimonials and Case Studies](https://blog.skypilot.co/case-studies/)
650
+
651
+ Partners and integrations: [Community Spotlights](https://blog.skypilot.co/community/)
652
+
653
+ Follow updates:
654
+ - [Slack](http://slack.skypilot.co)
655
+ - [X / Twitter](https://twitter.com/skypilot_org)
656
+ - [LinkedIn](https://www.linkedin.com/company/skypilot-oss/)
657
+ - [SkyPilot Blog](https://blog.skypilot.co/) ([Introductory blog post](https://blog.skypilot.co/introducing-skypilot/))
658
+
659
+ Read the research:
660
+ - [SkyPilot paper](https://www.usenix.org/system/files/nsdi23-yang-zongheng.pdf) and [talk](https://www.usenix.org/conference/nsdi23/presentation/yang-zongheng) (NSDI 2023)
661
+ - [Sky Computing whitepaper](https://arxiv.org/abs/2205.07147)
662
+ - [Sky Computing vision paper](https://sigops.org/s/conferences/hotos/2021/papers/hotos21-s02-stoica.pdf) (HotOS 2021)
663
+ - [SkyServe: AI serving across regions and clouds](https://arxiv.org/pdf/2411.01438) (EuroSys 2025)
664
+ - [Managed jobs spot instance policy](https://www.usenix.org/conference/nsdi24/presentation/wu-zhanghao) (NSDI 2024)
665
+
666
+ SkyPilot was initially started at the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley and has since gained many industry contributors. To read about the project's origin and vision, see [Concept: Sky Computing](https://docs.skypilot.co/en/latest/sky-computing.html).
667
+
668
+ ## Questions and feedback
669
+ We are excited to hear your feedback:
670
+ * For issues and feature requests, please [open a GitHub issue](https://github.com/skypilot-org/skypilot/issues/new).
671
+ * For questions, please use [GitHub Discussions](https://github.com/skypilot-org/skypilot/discussions).
672
+
673
+ For general discussions, join us on the [SkyPilot Slack](http://slack.skypilot.co).
674
+
675
+ ## Contributing
676
+ We welcome all contributions to the project! See [CONTRIBUTING](CONTRIBUTING.md) for how to get involved.