skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/server/uvicorn.py CHANGED
@@ -3,24 +3,238 @@
3
3
  This module is a wrapper around uvicorn to customize the behavior of the
4
4
  server.
5
5
  """
6
+ import asyncio
7
+ import logging
6
8
  import os
9
+ import signal
10
+ import sys
7
11
  import threading
8
- from typing import Optional
12
+ import time
13
+ from types import FrameType
14
+ from typing import Optional, Union
9
15
 
16
+ import filelock
10
17
  import uvicorn
11
18
  from uvicorn.supervisors import multiprocess
12
19
 
20
+ from sky import sky_logging
21
+ from sky.server import daemons
22
+ from sky.server import metrics as metrics_lib
23
+ from sky.server import state
24
+ from sky.server.requests import requests as requests_lib
25
+ from sky.skylet import constants
26
+ from sky.utils import context_utils
27
+ from sky.utils import env_options
28
+ from sky.utils import perf_utils
13
29
  from sky.utils import subprocess_utils
30
+ from sky.utils.db import db_utils
14
31
 
32
+ logger = sky_logging.init_logger(__name__)
15
33
 
16
- def run(config: uvicorn.Config):
34
+ # File lock path for coordinating graceful shutdown across processes
35
+ _GRACEFUL_SHUTDOWN_LOCK_PATH = '/tmp/skypilot_graceful_shutdown.lock'
36
+
37
+ # Interval to check for on-going requests.
38
+ _WAIT_REQUESTS_INTERVAL_SECONDS = 5
39
+
40
+ # Timeout for waiting for on-going requests to finish.
41
+ try:
42
+ _WAIT_REQUESTS_TIMEOUT_SECONDS = int(
43
+ os.environ.get(constants.GRACE_PERIOD_SECONDS_ENV_VAR, '60'))
44
+ except ValueError:
45
+ _WAIT_REQUESTS_TIMEOUT_SECONDS = 60
46
+
47
+ # TODO(aylei): use decorator to register requests that need to be proactively
48
+ # cancelled instead of hardcoding here.
49
+ _RETRIABLE_REQUEST_NAMES = {
50
+ 'sky.logs',
51
+ 'sky.jobs.logs',
52
+ 'sky.serve.logs',
53
+ }
54
+
55
+
56
+ def add_timestamp_prefix_for_server_logs() -> None:
57
+ """Configure logging for API server.
58
+
59
+ Note: we only do this in the main API server process and uvicorn processes,
60
+ to avoid affecting executor logs (including in modules like
61
+ sky.server.requests) that may get sent to the client.
62
+ """
63
+ server_logger = sky_logging.init_logger('sky.server')
64
+ # Clear existing handlers first to prevent duplicates
65
+ server_logger.handlers.clear()
66
+ # Disable propagation to avoid the root logger of SkyPilot being affected
67
+ server_logger.propagate = False
68
+ # Add date prefix to the log message printed by loggers under
69
+ # server.
70
+ stream_handler = logging.StreamHandler(sys.stdout)
71
+ if env_options.Options.SHOW_DEBUG_INFO.get():
72
+ stream_handler.setLevel(logging.DEBUG)
73
+ else:
74
+ stream_handler.setLevel(logging.INFO)
75
+ stream_handler.flush = sys.stdout.flush # type: ignore
76
+ stream_handler.setFormatter(sky_logging.FORMATTER)
77
+ server_logger.addHandler(stream_handler)
78
+ # Add date prefix to the log message printed by uvicorn.
79
+ for name in ['uvicorn', 'uvicorn.access']:
80
+ uvicorn_logger = logging.getLogger(name)
81
+ uvicorn_logger.handlers.clear()
82
+ uvicorn_logger.addHandler(stream_handler)
83
+
84
+
85
+ class Server(uvicorn.Server):
86
+ """Server wrapper for uvicorn.
87
+
88
+ Extended functionalities:
89
+ - Handle exit signal and perform custom graceful shutdown.
90
+ - Run the server process with contextually aware.
91
+ """
92
+
93
+ def __init__(self,
94
+ config: uvicorn.Config,
95
+ max_db_connections: Optional[int] = None):
96
+ super().__init__(config=config)
97
+ self.exiting: bool = False
98
+ self.max_db_connections = max_db_connections
99
+
100
+ def handle_exit(self, sig: int, frame: Union[FrameType, None]) -> None:
101
+ """Handle exit signal.
102
+
103
+ When a server process receives a SIGTERM or SIGINT signal, a graceful
104
+ shutdown will be initiated. If a SIGINT signal is received again, the
105
+ server will be forcefully shutdown.
106
+ """
107
+ if self.exiting and sig == signal.SIGINT:
108
+ # The server has been siganled to exit and recieved a SIGINT again,
109
+ # do force shutdown.
110
+ logger.info('Force shutdown.')
111
+ self.should_exit = True
112
+ super().handle_exit(sig, frame)
113
+ return
114
+ if not self.exiting:
115
+ self.exiting = True
116
+ # Perform graceful shutdown in a separate thread to avoid blocking
117
+ # the main thread.
118
+ threading.Thread(target=self._graceful_shutdown,
119
+ args=(sig, frame),
120
+ daemon=True).start()
121
+
122
+ def _graceful_shutdown(self, sig: int, frame: Union[FrameType,
123
+ None]) -> None:
124
+ """Perform graceful shutdown."""
125
+ # Block new requests so that we can wait until all on-going requests
126
+ # are finished. Note that /api/$verb operations are still allowed in
127
+ # this stage to ensure the client can still operate the on-going
128
+ # requests, e.g. /api/logs, /api/cancel, etc.
129
+ logger.info('Block new requests being submitted in worker '
130
+ f'{os.getpid()}.')
131
+ state.set_block_requests(True)
132
+ # Ensure the shutting_down are set on all workers before next step.
133
+ # TODO(aylei): hacky, need a reliable solution.
134
+ time.sleep(1)
135
+
136
+ lock = filelock.FileLock(_GRACEFUL_SHUTDOWN_LOCK_PATH)
137
+ # Elect a coordinator process to handle on-going requests check
138
+ with lock.acquire():
139
+ logger.info(f'Worker {os.getpid()} elected as shutdown coordinator')
140
+ self._wait_requests()
141
+
142
+ logger.info('Shutting down server...')
143
+ self.should_exit = True
144
+ super().handle_exit(sig, frame)
145
+
146
+ def _wait_requests(self) -> None:
147
+ """Wait until all on-going requests are finished or cancelled."""
148
+ start_time = time.time()
149
+ while True:
150
+ statuses = [
151
+ requests_lib.RequestStatus.PENDING,
152
+ requests_lib.RequestStatus.RUNNING,
153
+ ]
154
+ requests = [(request_task.request_id, request_task.name)
155
+ for request_task in requests_lib.get_request_tasks(
156
+ req_filter=requests_lib.RequestTaskFilter(
157
+ status=statuses, fields=['request_id', 'name']))
158
+ ]
159
+ if not requests:
160
+ break
161
+ logger.info(f'{len(requests)} on-going requests '
162
+ 'found, waiting for them to finish...')
163
+ # Proactively cancel internal requests and logs requests since
164
+ # they can run for infinite time.
165
+ internal_request_ids = {
166
+ d.id for d in daemons.INTERNAL_REQUEST_DAEMONS
167
+ }
168
+ if time.time() - start_time > _WAIT_REQUESTS_TIMEOUT_SECONDS:
169
+ logger.warning('Timeout waiting for on-going requests to '
170
+ 'finish, cancelling all on-going requests.')
171
+ for request_id, _ in requests:
172
+ self.interrupt_request_for_retry(request_id)
173
+ break
174
+ interrupted = 0
175
+ for request_id, name in requests:
176
+ if (name in _RETRIABLE_REQUEST_NAMES or
177
+ request_id in internal_request_ids):
178
+ self.interrupt_request_for_retry(request_id)
179
+ interrupted += 1
180
+ # TODO(aylei): interrupt pending requests to accelerate the
181
+ # shutdown.
182
+ # If some requests are not interrupted, wait for them to finish,
183
+ # otherwise we just check again immediately to accelerate the
184
+ # shutdown process.
185
+ if interrupted < len(requests):
186
+ time.sleep(_WAIT_REQUESTS_INTERVAL_SECONDS)
187
+
188
+ def interrupt_request_for_retry(self, request_id: str) -> None:
189
+ """Interrupt a request for retry."""
190
+ with requests_lib.update_request(request_id) as req:
191
+ if req is None:
192
+ return
193
+ if req.pid is not None:
194
+ try:
195
+ os.kill(req.pid, signal.SIGTERM)
196
+ except ProcessLookupError:
197
+ logger.debug(f'Process {req.pid} already finished.')
198
+ req.status = requests_lib.RequestStatus.CANCELLED
199
+ req.should_retry = True
200
+ logger.info(
201
+ f'Request {request_id} interrupted and will be retried by client.')
202
+
203
+ def run(self, *args, **kwargs):
204
+ """Run the server process."""
205
+ if self.max_db_connections is not None:
206
+ db_utils.set_max_connections(self.max_db_connections)
207
+ add_timestamp_prefix_for_server_logs()
208
+ context_utils.hijack_sys_attrs()
209
+ # Use default loop policy of uvicorn (use uvloop if available).
210
+ self.config.setup_event_loop()
211
+ lag_threshold = perf_utils.get_loop_lag_threshold()
212
+ if lag_threshold is not None:
213
+ event_loop = asyncio.get_event_loop()
214
+ # Same as set PYTHONASYNCIODEBUG=1, but with custom threshold.
215
+ event_loop.set_debug(True)
216
+ event_loop.slow_callback_duration = lag_threshold
217
+ stop_monitor = threading.Event()
218
+ monitor = threading.Thread(target=metrics_lib.process_monitor,
219
+ args=('server', stop_monitor),
220
+ daemon=True)
221
+ monitor.start()
222
+ try:
223
+ with self.capture_signals():
224
+ asyncio.run(self.serve(*args, **kwargs))
225
+ finally:
226
+ stop_monitor.set()
227
+ monitor.join()
228
+
229
+
230
+ def run(config: uvicorn.Config, max_db_connections: Optional[int] = None):
17
231
  """Run unvicorn server."""
18
232
  if config.reload:
19
233
  # Reload and multi-workers are mutually exclusive
20
234
  # in uvicorn. Since we do not use reload now, simply
21
235
  # guard by an exception.
22
236
  raise ValueError('Reload is not supported yet.')
23
- server = uvicorn.Server(config=config)
237
+ server = Server(config=config, max_db_connections=max_db_connections)
24
238
  try:
25
239
  if config.workers is not None and config.workers > 1:
26
240
  sock = config.bind_socket()
sky/server/versions.py ADDED
@@ -0,0 +1,270 @@
1
+ """API versioning module."""
2
+
3
+ import contextvars
4
+ import functools
5
+ import re
6
+ from typing import Callable, Literal, Mapping, NamedTuple, Optional, Tuple
7
+
8
+ import colorama
9
+ from packaging import version as version_lib
10
+
11
+ import sky
12
+ from sky import exceptions
13
+ from sky import sky_logging
14
+ from sky.server import constants
15
+ from sky.utils import ux_utils
16
+
17
+ logger = sky_logging.init_logger(__name__)
18
+
19
+ CLIENT_TOO_OLD_ERROR = (
20
+ f'{colorama.Fore.YELLOW}Your SkyPilot client version is too old:'
21
+ '{remote_version}\n'
22
+ f'{colorama.Style.RESET_ALL}'
23
+ 'The server is running on {local_version} and the minimum compatible '
24
+ 'version is {min_version}.\n'
25
+ f'Upgrade your client with:\n{colorama.Fore.YELLOW}'
26
+ '{command}'
27
+ f'{colorama.Style.RESET_ALL}')
28
+ SERVER_TOO_OLD_ERROR = (
29
+ f'{colorama.Fore.YELLOW}Your SkyPilot API server version is too old: '
30
+ '{remote_version}\n'
31
+ f'{colorama.Style.RESET_ALL}'
32
+ 'The client is running on {local_version} and the minimum compatible '
33
+ 'version is {min_version}.\n'
34
+ 'Contact your administrator to upgrade the remote API server or downgrade '
35
+ f'your client with:\n{colorama.Fore.YELLOW}'
36
+ '{command}'
37
+ f'{colorama.Style.RESET_ALL}')
38
+
39
+ # SkyPilot dev version.
40
+ DEV_VERSION = '1.0.0-dev0'
41
+
42
+ _REMOTE_TO_ERROR = {
43
+ 'client': CLIENT_TOO_OLD_ERROR,
44
+ 'server': SERVER_TOO_OLD_ERROR,
45
+ }
46
+
47
+ # Context-local (thread or cooroutine) remote API version, captured during
48
+ # communication with the remote peer.
49
+ _remote_api_version: contextvars.ContextVar[Optional[int]] = \
50
+ contextvars.ContextVar('remote_api_version', default=None)
51
+ _remote_version: contextvars.ContextVar[str] = \
52
+ contextvars.ContextVar('remote_version', default='unknown')
53
+ _reminded_for_minor_version_upgrade = False
54
+
55
+
56
+ def get_remote_api_version() -> Optional[int]:
57
+ return _remote_api_version.get()
58
+
59
+
60
+ def set_remote_api_version(api_version: int) -> None:
61
+ _remote_api_version.set(api_version)
62
+
63
+
64
+ def get_remote_version() -> str:
65
+ return _remote_version.get()
66
+
67
+
68
+ def set_remote_version(version: str) -> None:
69
+ _remote_version.set(version)
70
+
71
+
72
+ class VersionInfo(NamedTuple):
73
+ api_version: int
74
+ version: str
75
+ error: Optional[str] = None
76
+
77
+
78
+ def check_compatibility_at_server(
79
+ client_headers: Mapping[str, str]) -> Optional[VersionInfo]:
80
+ """Check API compatibility between client and server."""
81
+ return _check_version_compatibility(client_headers, 'client')
82
+
83
+
84
+ def check_compatibility_at_client(
85
+ server_headers: Mapping[str, str]) -> Optional[VersionInfo]:
86
+ """Check API compatibility between client and server."""
87
+ return _check_version_compatibility(server_headers, 'server')
88
+
89
+
90
+ def _check_version_compatibility(
91
+ remote_headers: Mapping[str, str],
92
+ remote_type: Literal['client', 'server']) -> Optional[VersionInfo]:
93
+ """Check API compatibility between client and server.
94
+
95
+ This function can be called at both client and server side, where the
96
+ headers should contain the version info of the remote.
97
+
98
+ Args:
99
+ remote_headers: The headers of the request/response sent from the
100
+ remote.
101
+ remote_type: The type of the remote, used to determine the error
102
+ message. Valid options are 'client' and 'server'.
103
+
104
+ Returns:
105
+ The version info of the remote, None if the version info is not found
106
+ in the headers for backward compatibility.
107
+ """
108
+ api_version_str = remote_headers.get(constants.API_VERSION_HEADER)
109
+ version = remote_headers.get(constants.VERSION_HEADER)
110
+ if version is None or api_version_str is None:
111
+ return None
112
+ try:
113
+ api_version = int(api_version_str)
114
+ except ValueError:
115
+ # The future change is expected to not break the compatibility of this
116
+ # header, so we are encountering a bug or a malicious request here,
117
+ # just raise an error.
118
+ raise ValueError(
119
+ f'Header {constants.API_VERSION_HEADER}: '
120
+ f'{api_version_str} is not a valid API version.') from None
121
+
122
+ if api_version < constants.MIN_COMPATIBLE_API_VERSION:
123
+ if remote_type == 'server':
124
+ # Hint the user to downgrade to client to the remote server server.
125
+ server_version, server_commit = parse_readable_version(version)
126
+ command = install_version_command(server_version, server_commit)
127
+ else:
128
+ # Hint the client to upgrade to upgrade the server version
129
+ command = install_version_command(sky.__version__, sky.__commit__)
130
+ return VersionInfo(api_version=api_version,
131
+ version=version,
132
+ error=_REMOTE_TO_ERROR[remote_type].format(
133
+ remote_version=version,
134
+ local_version=get_local_readable_version(),
135
+ min_version=constants.MIN_COMPATIBLE_VERSION,
136
+ command=command,
137
+ ))
138
+
139
+ if remote_type == 'server':
140
+ # Only print the reminder at client-side.
141
+ _remind_minor_version_upgrade(version)
142
+
143
+ return VersionInfo(api_version=api_version, version=version)
144
+
145
+
146
+ def get_local_readable_version() -> str:
147
+ """Get the readable version of the SkyPilot code loaded in current process.
148
+
149
+ For dev version, the version is formatted as: 1.0.0-dev0 (commit: 1234567)
150
+ to make it meaningful for users.
151
+ """
152
+ if sky.__version__ == DEV_VERSION:
153
+ return f'{sky.__version__} (commit: {sky.__commit__})'
154
+ else:
155
+ return sky.__version__
156
+
157
+
158
+ def parse_readable_version(version: str) -> Tuple[str, Optional[str]]:
159
+ """Parse a readable produced by get_local_readable_version.
160
+
161
+ Args:
162
+ version: The version string to parse.
163
+
164
+ Returns:
165
+ A tuple of (version, optional_commit) where:
166
+ - version: The base version string (e.g., "1.0.0-dev0")
167
+ - optional_commit: The commit hash if present, None otherwise
168
+ """
169
+ # Check if this is a dev version with commit info
170
+ # Format: "1.0.0-dev0 (commit: 1234567)"
171
+ commit_pattern = r'^(.+) \(commit: ([a-f0-9]+)\)$'
172
+ match = re.match(commit_pattern, version)
173
+
174
+ if match:
175
+ base_version = match.group(1)
176
+ commit = match.group(2)
177
+ return base_version, commit
178
+ else:
179
+ # Regular version without commit info
180
+ return version, None
181
+
182
+
183
+ def install_version_command(version: str, commit: Optional[str] = None) -> str:
184
+ if version == DEV_VERSION:
185
+ if commit is not None:
186
+ return ('pip install git+https://github.com/skypilot-org/skypilot@'
187
+ f'{commit}')
188
+ elif 'dev' in version:
189
+ return f'pip install -U "skypilot-nightly=={version}"'
190
+ return f'pip install -U "skypilot=={version}"'
191
+
192
+
193
+ def _remind_minor_version_upgrade(remote_version: str) -> None:
194
+ """Remind the user to upgrade the CLI/SDK."""
195
+ # Only print the reminder once per process.
196
+ global _reminded_for_minor_version_upgrade
197
+ if _reminded_for_minor_version_upgrade:
198
+ return
199
+ # Skip for dev versions.
200
+ if 'dev' in sky.__version__ or 'dev' in remote_version:
201
+ return
202
+
203
+ # Remove the commit info if any.
204
+ remote_base_version, _ = parse_readable_version(remote_version)
205
+
206
+ # Parse semver for both local and remote versions
207
+ try:
208
+ local = version_lib.parse(sky.__version__)
209
+ remote = version_lib.parse(remote_base_version)
210
+
211
+ # Check if local version is behind remote version, ignore patch version.
212
+ if (local.major, local.minor) < (remote.major, remote.minor):
213
+ logger.warning(
214
+ f'{colorama.Fore.YELLOW}The SkyPilot API server is running in '
215
+ f'version {remote_version}, which is newer than your client '
216
+ f'version {sky.__version__}. The compatibility for your '
217
+ f'current version might be dropped in the next server upgrade.'
218
+ f'\nConsider upgrading your client with:\n'
219
+ f'{install_version_command(remote_version)}'
220
+ f'{colorama.Style.RESET_ALL}')
221
+ _reminded_for_minor_version_upgrade = True
222
+ except version_lib.InvalidVersion:
223
+ # Skip for non-valid semver (probabely a dev version)
224
+ pass
225
+
226
+
227
+ # TODO(aylei): maybe we can use similiar approach to mark a new argument can
228
+ # only be used in the new server version.
229
+ def minimal_api_version(min_version: int) -> Callable:
230
+ """Decorator to enforce a minimum remote API version for an SDK function.
231
+
232
+ New SDK method must be decorated with this decorator to make sure it raises
233
+ an readable error when the remote server is not upgraded.
234
+
235
+ Args:
236
+ min_version: The minimum remote API version required to call the
237
+ function.
238
+
239
+ Returns:
240
+ A decorator function that checks API version before execution.
241
+
242
+ Raises:
243
+ APINotSupportedError: If the remote API version is below the minimum
244
+ required.
245
+ """
246
+
247
+ def decorator(func: Callable) -> Callable:
248
+
249
+ @functools.wraps(func)
250
+ def wrapper(*args, **kwargs):
251
+ remote_api_version = get_remote_api_version()
252
+ if remote_api_version is None:
253
+ return func(*args, **kwargs)
254
+ if remote_api_version < min_version:
255
+ with ux_utils.print_exception_no_traceback():
256
+ hint = 'Please upgrade the remote server.'
257
+ # The client runs in a released version, do better hint.
258
+ if 'dev' not in sky.__version__:
259
+ hint = (
260
+ f'Upgrade the remote server to {sky.__version__} '
261
+ 'and re-run the command.')
262
+ raise exceptions.APINotSupportedError(
263
+ f'Function {func.__name__} is introduced after the '
264
+ f'remote server version {get_remote_version()!r} is '
265
+ f'released. {hint}')
266
+ return func(*args, **kwargs)
267
+
268
+ return wrapper
269
+
270
+ return decorator
@@ -1,5 +1,5 @@
1
1
  include sky/backends/monkey_patches/*.py
2
- exclude sky/clouds/service_catalog/data_fetchers/analyze.py
2
+ exclude sky/catalog/data_fetchers/analyze.py
3
3
  include sky/provision/kubernetes/manifests/*
4
4
  include sky/provision/azure/*
5
5
  include sky/setup_files/*
@@ -9,6 +9,7 @@ include sky/skylet/providers/ibm/*
9
9
  include sky/skylet/providers/scp/*
10
10
  include sky/skylet/providers/*.py
11
11
  include sky/skylet/ray_patches/*.patch
12
+ include sky/skylet/ray_patches/*.diff
12
13
  include sky/jobs/dashboard/*
13
14
  include sky/jobs/dashboard/templates/*
14
15
  include sky/jobs/dashboard/static/*
@@ -16,3 +17,12 @@ include sky/templates/*
16
17
  include sky/utils/kubernetes/*
17
18
  include sky/server/html/*
18
19
  recursive-include sky/dashboard/out *
20
+ include sky/users/*.conf
21
+ include sky/utils/*.sh
22
+ include sky/setup_files/alembic.ini
23
+ recursive-include sky/schemas/db *
24
+
25
+ # SkyPilot templates package
26
+ recursive-include sky_templates/ray *
27
+ recursive-include sky_templates *.py
28
+ include sky_templates/README.md
@@ -0,0 +1,160 @@
1
+ # alembic configuration for global user state, jobs state, and sky config db migrations.
2
+
3
+ [DEFAULT]
4
+ # path to migration scripts.
5
+ # this is typically a path given in POSIX (e.g. forward slashes)
6
+ # format, relative to the token %(here)s which refers to the location of this
7
+ # ini file
8
+ script_location = %(here)s/../schemas/db
9
+
10
+ # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
11
+ # Uncomment the line below if you want the files to be prepended with date and time
12
+ # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
13
+ # for all available tokens
14
+ # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
15
+
16
+ # sys.path path, will be prepended to sys.path if present.
17
+ # defaults to the current working directory. for multiple paths, the path separator
18
+ # is defined by "path_separator" below.
19
+ prepend_sys_path = .
20
+
21
+
22
+ # timezone to use when rendering the date within the migration file
23
+ # as well as the filename.
24
+ # If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
25
+ # Any required deps can installed by adding `alembic[tz]` to the pip requirements
26
+ # string value is passed to ZoneInfo()
27
+ # leave blank for localtime
28
+ # timezone =
29
+
30
+ # max length of characters to apply to the "slug" field
31
+ # truncate_slug_length = 40
32
+
33
+ # set to 'true' to run the environment during
34
+ # the 'revision' command, regardless of autogenerate
35
+ # revision_environment = false
36
+
37
+ # set to 'true' to allow .pyc and .pyo files without
38
+ # a source .py file to be detected as revisions in the
39
+ # versions/ directory
40
+ # sourceless = false
41
+
42
+ # version location specification; This defaults
43
+ # to <script_location>/versions. When using multiple version
44
+ # directories, initial revisions must be specified with --version-path.
45
+ # The path separator used here should be the separator specified by "path_separator"
46
+ # below.
47
+ # version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
48
+
49
+ # path_separator; This indicates what character is used to split lists of file
50
+ # paths, including version_locations and prepend_sys_path within configparser
51
+ # files such as alembic.ini.
52
+ # The default rendered in new alembic.ini files is "os", which uses os.pathsep
53
+ # to provide os-dependent path splitting.
54
+ #
55
+ # Note that in order to support legacy alembic.ini files, this default does NOT
56
+ # take place if path_separator is not present in alembic.ini. If this
57
+ # option is omitted entirely, fallback logic is as follows:
58
+ #
59
+ # 1. Parsing of the version_locations option falls back to using the legacy
60
+ # "version_path_separator" key, which if absent then falls back to the legacy
61
+ # behavior of splitting on spaces and/or commas.
62
+ # 2. Parsing of the prepend_sys_path option falls back to the legacy
63
+ # behavior of splitting on spaces, commas, or colons.
64
+ #
65
+ # Valid values for path_separator are:
66
+ #
67
+ # path_separator = :
68
+ # path_separator = ;
69
+ # path_separator = space
70
+ # path_separator = newline
71
+ #
72
+ # Use os.pathsep. Default configuration used for new projects.
73
+ path_separator = os
74
+
75
+ # set to 'true' to search source files recursively
76
+ # in each "version_locations" directory
77
+ # new in Alembic version 1.10
78
+ # recursive_version_locations = false
79
+
80
+ # the output encoding used when revision files
81
+ # are written from script.py.mako
82
+ # output_encoding = utf-8
83
+
84
+ # database URL. This is consumed by the user-maintained env.py script only.
85
+ # other means of configuring database URLs may be customized within the env.py
86
+ # file.
87
+ # sqlalchemy.url = driver://user:pass@localhost/dbname
88
+
89
+ [state_db]
90
+ version_locations = %(here)s/../schemas/db/global_user_state
91
+ version_table = alembic_version_state_db
92
+
93
+ [spot_jobs_db]
94
+ version_locations = %(here)s/../schemas/db/spot_jobs
95
+ version_table = alembic_version_spot_jobs_db
96
+
97
+ [serve_db]
98
+ version_locations = %(here)s/../schemas/db/serve_state
99
+ version_table = alembic_version_serve_state_db
100
+
101
+ [sky_config_db]
102
+ version_locations = %(here)s/../schemas/db/skypilot_config
103
+ version_table = alembic_version_sky_config_db
104
+
105
+ [kv_cache_db]
106
+ version_locations = %(here)s/../schemas/db/kv_cache
107
+ version_table = alembic_version_kv_cache_db
108
+
109
+ [post_write_hooks]
110
+ # post_write_hooks defines scripts or Python functions that are run
111
+ # on newly generated revision scripts. See the documentation for further
112
+ # detail and examples
113
+
114
+ # format using "black" - use the console_scripts runner, against the "black" entrypoint
115
+ # hooks = black
116
+ # black.type = console_scripts
117
+ # black.entrypoint = black
118
+ # black.options = -l 79 REVISION_SCRIPT_FILENAME
119
+
120
+ # lint with attempts to fix using "ruff" - use the exec runner, execute a binary
121
+ # hooks = ruff
122
+ # ruff.type = exec
123
+ # ruff.executable = %(here)s/.venv/bin/ruff
124
+ # ruff.options = check --fix REVISION_SCRIPT_FILENAME
125
+
126
+ # Logging configuration. This is also consumed by the user-maintained
127
+ # env.py script only.
128
+ [loggers]
129
+ keys = root,sqlalchemy,alembic
130
+
131
+ [handlers]
132
+ keys = console
133
+
134
+ [formatters]
135
+ keys = generic
136
+
137
+ [logger_root]
138
+ level = WARNING
139
+ handlers = console
140
+ qualname =
141
+
142
+ [logger_sqlalchemy]
143
+ level = WARNING
144
+ handlers =
145
+ qualname = sqlalchemy.engine
146
+
147
+ [logger_alembic]
148
+ level = WARNING
149
+ handlers =
150
+ qualname = alembic
151
+
152
+ [handler_console]
153
+ class = StreamHandler
154
+ args = (sys.stderr,)
155
+ level = NOTSET
156
+ formatter = generic
157
+
158
+ [formatter_generic]
159
+ format = %(levelname)-5.5s [%(name)s] %(message)s
160
+ datefmt = %H:%M:%S