skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/server/uvicorn.py CHANGED
@@ -3,24 +3,238 @@
3
3
  This module is a wrapper around uvicorn to customize the behavior of the
4
4
  server.
5
5
  """
6
+ import asyncio
7
+ import logging
6
8
  import os
9
+ import signal
10
+ import sys
7
11
  import threading
8
- from typing import Optional
12
+ import time
13
+ from types import FrameType
14
+ from typing import Optional, Union
9
15
 
16
+ import filelock
10
17
  import uvicorn
11
18
  from uvicorn.supervisors import multiprocess
12
19
 
20
+ from sky import sky_logging
21
+ from sky.server import daemons
22
+ from sky.server import metrics as metrics_lib
23
+ from sky.server import state
24
+ from sky.server.requests import requests as requests_lib
25
+ from sky.skylet import constants
26
+ from sky.utils import context_utils
27
+ from sky.utils import env_options
28
+ from sky.utils import perf_utils
13
29
  from sky.utils import subprocess_utils
30
+ from sky.utils.db import db_utils
14
31
 
32
+ logger = sky_logging.init_logger(__name__)
15
33
 
16
- def run(config: uvicorn.Config):
34
+ # File lock path for coordinating graceful shutdown across processes
35
+ _GRACEFUL_SHUTDOWN_LOCK_PATH = '/tmp/skypilot_graceful_shutdown.lock'
36
+
37
+ # Interval to check for on-going requests.
38
+ _WAIT_REQUESTS_INTERVAL_SECONDS = 5
39
+
40
+ # Timeout for waiting for on-going requests to finish.
41
+ try:
42
+ _WAIT_REQUESTS_TIMEOUT_SECONDS = int(
43
+ os.environ.get(constants.GRACE_PERIOD_SECONDS_ENV_VAR, '60'))
44
+ except ValueError:
45
+ _WAIT_REQUESTS_TIMEOUT_SECONDS = 60
46
+
47
+ # TODO(aylei): use decorator to register requests that need to be proactively
48
+ # cancelled instead of hardcoding here.
49
+ _RETRIABLE_REQUEST_NAMES = {
50
+ 'sky.logs',
51
+ 'sky.jobs.logs',
52
+ 'sky.serve.logs',
53
+ }
54
+
55
+
56
+ def add_timestamp_prefix_for_server_logs() -> None:
57
+ """Configure logging for API server.
58
+
59
+ Note: we only do this in the main API server process and uvicorn processes,
60
+ to avoid affecting executor logs (including in modules like
61
+ sky.server.requests) that may get sent to the client.
62
+ """
63
+ server_logger = sky_logging.init_logger('sky.server')
64
+ # Clear existing handlers first to prevent duplicates
65
+ server_logger.handlers.clear()
66
+ # Disable propagation to avoid the root logger of SkyPilot being affected
67
+ server_logger.propagate = False
68
+ # Add date prefix to the log message printed by loggers under
69
+ # server.
70
+ stream_handler = logging.StreamHandler(sys.stdout)
71
+ if env_options.Options.SHOW_DEBUG_INFO.get():
72
+ stream_handler.setLevel(logging.DEBUG)
73
+ else:
74
+ stream_handler.setLevel(logging.INFO)
75
+ stream_handler.flush = sys.stdout.flush # type: ignore
76
+ stream_handler.setFormatter(sky_logging.FORMATTER)
77
+ server_logger.addHandler(stream_handler)
78
+ # Add date prefix to the log message printed by uvicorn.
79
+ for name in ['uvicorn', 'uvicorn.access']:
80
+ uvicorn_logger = logging.getLogger(name)
81
+ uvicorn_logger.handlers.clear()
82
+ uvicorn_logger.addHandler(stream_handler)
83
+
84
+
85
+ class Server(uvicorn.Server):
86
+ """Server wrapper for uvicorn.
87
+
88
+ Extended functionalities:
89
+ - Handle exit signal and perform custom graceful shutdown.
90
+ - Run the server process with contextually aware.
91
+ """
92
+
93
+ def __init__(self,
94
+ config: uvicorn.Config,
95
+ max_db_connections: Optional[int] = None):
96
+ super().__init__(config=config)
97
+ self.exiting: bool = False
98
+ self.max_db_connections = max_db_connections
99
+
100
+ def handle_exit(self, sig: int, frame: Union[FrameType, None]) -> None:
101
+ """Handle exit signal.
102
+
103
+ When a server process receives a SIGTERM or SIGINT signal, a graceful
104
+ shutdown will be initiated. If a SIGINT signal is received again, the
105
+ server will be forcefully shutdown.
106
+ """
107
+ if self.exiting and sig == signal.SIGINT:
108
+ # The server has been siganled to exit and recieved a SIGINT again,
109
+ # do force shutdown.
110
+ logger.info('Force shutdown.')
111
+ self.should_exit = True
112
+ super().handle_exit(sig, frame)
113
+ return
114
+ if not self.exiting:
115
+ self.exiting = True
116
+ # Perform graceful shutdown in a separate thread to avoid blocking
117
+ # the main thread.
118
+ threading.Thread(target=self._graceful_shutdown,
119
+ args=(sig, frame),
120
+ daemon=True).start()
121
+
122
+ def _graceful_shutdown(self, sig: int, frame: Union[FrameType,
123
+ None]) -> None:
124
+ """Perform graceful shutdown."""
125
+ # Block new requests so that we can wait until all on-going requests
126
+ # are finished. Note that /api/$verb operations are still allowed in
127
+ # this stage to ensure the client can still operate the on-going
128
+ # requests, e.g. /api/logs, /api/cancel, etc.
129
+ logger.info('Block new requests being submitted in worker '
130
+ f'{os.getpid()}.')
131
+ state.set_block_requests(True)
132
+ # Ensure the shutting_down are set on all workers before next step.
133
+ # TODO(aylei): hacky, need a reliable solution.
134
+ time.sleep(1)
135
+
136
+ lock = filelock.FileLock(_GRACEFUL_SHUTDOWN_LOCK_PATH)
137
+ # Elect a coordinator process to handle on-going requests check
138
+ with lock.acquire():
139
+ logger.info(f'Worker {os.getpid()} elected as shutdown coordinator')
140
+ self._wait_requests()
141
+
142
+ logger.info('Shutting down server...')
143
+ self.should_exit = True
144
+ super().handle_exit(sig, frame)
145
+
146
+ def _wait_requests(self) -> None:
147
+ """Wait until all on-going requests are finished or cancelled."""
148
+ start_time = time.time()
149
+ while True:
150
+ statuses = [
151
+ requests_lib.RequestStatus.PENDING,
152
+ requests_lib.RequestStatus.RUNNING,
153
+ ]
154
+ requests = [(request_task.request_id, request_task.name)
155
+ for request_task in requests_lib.get_request_tasks(
156
+ req_filter=requests_lib.RequestTaskFilter(
157
+ status=statuses, fields=['request_id', 'name']))
158
+ ]
159
+ if not requests:
160
+ break
161
+ logger.info(f'{len(requests)} on-going requests '
162
+ 'found, waiting for them to finish...')
163
+ # Proactively cancel internal requests and logs requests since
164
+ # they can run for infinite time.
165
+ internal_request_ids = {
166
+ d.id for d in daemons.INTERNAL_REQUEST_DAEMONS
167
+ }
168
+ if time.time() - start_time > _WAIT_REQUESTS_TIMEOUT_SECONDS:
169
+ logger.warning('Timeout waiting for on-going requests to '
170
+ 'finish, cancelling all on-going requests.')
171
+ for request_id, _ in requests:
172
+ self.interrupt_request_for_retry(request_id)
173
+ break
174
+ interrupted = 0
175
+ for request_id, name in requests:
176
+ if (name in _RETRIABLE_REQUEST_NAMES or
177
+ request_id in internal_request_ids):
178
+ self.interrupt_request_for_retry(request_id)
179
+ interrupted += 1
180
+ # TODO(aylei): interrupt pending requests to accelerate the
181
+ # shutdown.
182
+ # If some requests are not interrupted, wait for them to finish,
183
+ # otherwise we just check again immediately to accelerate the
184
+ # shutdown process.
185
+ if interrupted < len(requests):
186
+ time.sleep(_WAIT_REQUESTS_INTERVAL_SECONDS)
187
+
188
+ def interrupt_request_for_retry(self, request_id: str) -> None:
189
+ """Interrupt a request for retry."""
190
+ with requests_lib.update_request(request_id) as req:
191
+ if req is None:
192
+ return
193
+ if req.pid is not None:
194
+ try:
195
+ os.kill(req.pid, signal.SIGTERM)
196
+ except ProcessLookupError:
197
+ logger.debug(f'Process {req.pid} already finished.')
198
+ req.status = requests_lib.RequestStatus.CANCELLED
199
+ req.should_retry = True
200
+ logger.info(
201
+ f'Request {request_id} interrupted and will be retried by client.')
202
+
203
+ def run(self, *args, **kwargs):
204
+ """Run the server process."""
205
+ if self.max_db_connections is not None:
206
+ db_utils.set_max_connections(self.max_db_connections)
207
+ add_timestamp_prefix_for_server_logs()
208
+ context_utils.hijack_sys_attrs()
209
+ # Use default loop policy of uvicorn (use uvloop if available).
210
+ self.config.setup_event_loop()
211
+ lag_threshold = perf_utils.get_loop_lag_threshold()
212
+ if lag_threshold is not None:
213
+ event_loop = asyncio.get_event_loop()
214
+ # Same as set PYTHONASYNCIODEBUG=1, but with custom threshold.
215
+ event_loop.set_debug(True)
216
+ event_loop.slow_callback_duration = lag_threshold
217
+ stop_monitor = threading.Event()
218
+ monitor = threading.Thread(target=metrics_lib.process_monitor,
219
+ args=('server', stop_monitor),
220
+ daemon=True)
221
+ monitor.start()
222
+ try:
223
+ with self.capture_signals():
224
+ asyncio.run(self.serve(*args, **kwargs))
225
+ finally:
226
+ stop_monitor.set()
227
+ monitor.join()
228
+
229
+
230
+ def run(config: uvicorn.Config, max_db_connections: Optional[int] = None):
17
231
  """Run unvicorn server."""
18
232
  if config.reload:
19
233
  # Reload and multi-workers are mutually exclusive
20
234
  # in uvicorn. Since we do not use reload now, simply
21
235
  # guard by an exception.
22
236
  raise ValueError('Reload is not supported yet.')
23
- server = uvicorn.Server(config=config)
237
+ server = Server(config=config, max_db_connections=max_db_connections)
24
238
  try:
25
239
  if config.workers is not None and config.workers > 1:
26
240
  sock = config.bind_socket()
sky/server/versions.py ADDED
@@ -0,0 +1,270 @@
1
+ """API versioning module."""
2
+
3
+ import contextvars
4
+ import functools
5
+ import re
6
+ from typing import Callable, Literal, Mapping, NamedTuple, Optional, Tuple
7
+
8
+ import colorama
9
+ from packaging import version as version_lib
10
+
11
+ import sky
12
+ from sky import exceptions
13
+ from sky import sky_logging
14
+ from sky.server import constants
15
+ from sky.utils import ux_utils
16
+
17
+ logger = sky_logging.init_logger(__name__)
18
+
19
+ CLIENT_TOO_OLD_ERROR = (
20
+ f'{colorama.Fore.YELLOW}Your SkyPilot client version is too old:'
21
+ '{remote_version}\n'
22
+ f'{colorama.Style.RESET_ALL}'
23
+ 'The server is running on {local_version} and the minimum compatible '
24
+ 'version is {min_version}.\n'
25
+ f'Upgrade your client with:\n{colorama.Fore.YELLOW}'
26
+ '{command}'
27
+ f'{colorama.Style.RESET_ALL}')
28
+ SERVER_TOO_OLD_ERROR = (
29
+ f'{colorama.Fore.YELLOW}Your SkyPilot API server version is too old: '
30
+ '{remote_version}\n'
31
+ f'{colorama.Style.RESET_ALL}'
32
+ 'The client is running on {local_version} and the minimum compatible '
33
+ 'version is {min_version}.\n'
34
+ 'Contact your administrator to upgrade the remote API server or downgrade '
35
+ f'your client with:\n{colorama.Fore.YELLOW}'
36
+ '{command}'
37
+ f'{colorama.Style.RESET_ALL}')
38
+
39
+ # SkyPilot dev version.
40
+ DEV_VERSION = '1.0.0-dev0'
41
+
42
+ _REMOTE_TO_ERROR = {
43
+ 'client': CLIENT_TOO_OLD_ERROR,
44
+ 'server': SERVER_TOO_OLD_ERROR,
45
+ }
46
+
47
+ # Context-local (thread or cooroutine) remote API version, captured during
48
+ # communication with the remote peer.
49
+ _remote_api_version: contextvars.ContextVar[Optional[int]] = \
50
+ contextvars.ContextVar('remote_api_version', default=None)
51
+ _remote_version: contextvars.ContextVar[str] = \
52
+ contextvars.ContextVar('remote_version', default='unknown')
53
+ _reminded_for_minor_version_upgrade = False
54
+
55
+
56
+ def get_remote_api_version() -> Optional[int]:
57
+ return _remote_api_version.get()
58
+
59
+
60
+ def set_remote_api_version(api_version: int) -> None:
61
+ _remote_api_version.set(api_version)
62
+
63
+
64
+ def get_remote_version() -> str:
65
+ return _remote_version.get()
66
+
67
+
68
+ def set_remote_version(version: str) -> None:
69
+ _remote_version.set(version)
70
+
71
+
72
+ class VersionInfo(NamedTuple):
73
+ api_version: int
74
+ version: str
75
+ error: Optional[str] = None
76
+
77
+
78
+ def check_compatibility_at_server(
79
+ client_headers: Mapping[str, str]) -> Optional[VersionInfo]:
80
+ """Check API compatibility between client and server."""
81
+ return _check_version_compatibility(client_headers, 'client')
82
+
83
+
84
+ def check_compatibility_at_client(
85
+ server_headers: Mapping[str, str]) -> Optional[VersionInfo]:
86
+ """Check API compatibility between client and server."""
87
+ return _check_version_compatibility(server_headers, 'server')
88
+
89
+
90
+ def _check_version_compatibility(
91
+ remote_headers: Mapping[str, str],
92
+ remote_type: Literal['client', 'server']) -> Optional[VersionInfo]:
93
+ """Check API compatibility between client and server.
94
+
95
+ This function can be called at both client and server side, where the
96
+ headers should contain the version info of the remote.
97
+
98
+ Args:
99
+ remote_headers: The headers of the request/response sent from the
100
+ remote.
101
+ remote_type: The type of the remote, used to determine the error
102
+ message. Valid options are 'client' and 'server'.
103
+
104
+ Returns:
105
+ The version info of the remote, None if the version info is not found
106
+ in the headers for backward compatibility.
107
+ """
108
+ api_version_str = remote_headers.get(constants.API_VERSION_HEADER)
109
+ version = remote_headers.get(constants.VERSION_HEADER)
110
+ if version is None or api_version_str is None:
111
+ return None
112
+ try:
113
+ api_version = int(api_version_str)
114
+ except ValueError:
115
+ # The future change is expected to not break the compatibility of this
116
+ # header, so we are encountering a bug or a malicious request here,
117
+ # just raise an error.
118
+ raise ValueError(
119
+ f'Header {constants.API_VERSION_HEADER}: '
120
+ f'{api_version_str} is not a valid API version.') from None
121
+
122
+ if api_version < constants.MIN_COMPATIBLE_API_VERSION:
123
+ if remote_type == 'server':
124
+ # Hint the user to downgrade to client to the remote server server.
125
+ server_version, server_commit = parse_readable_version(version)
126
+ command = install_version_command(server_version, server_commit)
127
+ else:
128
+ # Hint the client to upgrade to upgrade the server version
129
+ command = install_version_command(sky.__version__, sky.__commit__)
130
+ return VersionInfo(api_version=api_version,
131
+ version=version,
132
+ error=_REMOTE_TO_ERROR[remote_type].format(
133
+ remote_version=version,
134
+ local_version=get_local_readable_version(),
135
+ min_version=constants.MIN_COMPATIBLE_VERSION,
136
+ command=command,
137
+ ))
138
+
139
+ if remote_type == 'server':
140
+ # Only print the reminder at client-side.
141
+ _remind_minor_version_upgrade(version)
142
+
143
+ return VersionInfo(api_version=api_version, version=version)
144
+
145
+
146
+ def get_local_readable_version() -> str:
147
+ """Get the readable version of the SkyPilot code loaded in current process.
148
+
149
+ For dev version, the version is formatted as: 1.0.0-dev0 (commit: 1234567)
150
+ to make it meaningful for users.
151
+ """
152
+ if sky.__version__ == DEV_VERSION:
153
+ return f'{sky.__version__} (commit: {sky.__commit__})'
154
+ else:
155
+ return sky.__version__
156
+
157
+
158
+ def parse_readable_version(version: str) -> Tuple[str, Optional[str]]:
159
+ """Parse a readable produced by get_local_readable_version.
160
+
161
+ Args:
162
+ version: The version string to parse.
163
+
164
+ Returns:
165
+ A tuple of (version, optional_commit) where:
166
+ - version: The base version string (e.g., "1.0.0-dev0")
167
+ - optional_commit: The commit hash if present, None otherwise
168
+ """
169
+ # Check if this is a dev version with commit info
170
+ # Format: "1.0.0-dev0 (commit: 1234567)"
171
+ commit_pattern = r'^(.+) \(commit: ([a-f0-9]+)\)$'
172
+ match = re.match(commit_pattern, version)
173
+
174
+ if match:
175
+ base_version = match.group(1)
176
+ commit = match.group(2)
177
+ return base_version, commit
178
+ else:
179
+ # Regular version without commit info
180
+ return version, None
181
+
182
+
183
+ def install_version_command(version: str, commit: Optional[str] = None) -> str:
184
+ if version == DEV_VERSION:
185
+ if commit is not None:
186
+ return ('pip install git+https://github.com/skypilot-org/skypilot@'
187
+ f'{commit}')
188
+ elif 'dev' in version:
189
+ return f'pip install -U "skypilot-nightly=={version}"'
190
+ return f'pip install -U "skypilot=={version}"'
191
+
192
+
193
+ def _remind_minor_version_upgrade(remote_version: str) -> None:
194
+ """Remind the user to upgrade the CLI/SDK."""
195
+ # Only print the reminder once per process.
196
+ global _reminded_for_minor_version_upgrade
197
+ if _reminded_for_minor_version_upgrade:
198
+ return
199
+ # Skip for dev versions.
200
+ if 'dev' in sky.__version__ or 'dev' in remote_version:
201
+ return
202
+
203
+ # Remove the commit info if any.
204
+ remote_base_version, _ = parse_readable_version(remote_version)
205
+
206
+ # Parse semver for both local and remote versions
207
+ try:
208
+ local = version_lib.parse(sky.__version__)
209
+ remote = version_lib.parse(remote_base_version)
210
+
211
+ # Check if local version is behind remote version, ignore patch version.
212
+ if (local.major, local.minor) < (remote.major, remote.minor):
213
+ logger.warning(
214
+ f'{colorama.Fore.YELLOW}The SkyPilot API server is running in '
215
+ f'version {remote_version}, which is newer than your client '
216
+ f'version {sky.__version__}. The compatibility for your '
217
+ f'current version might be dropped in the next server upgrade.'
218
+ f'\nConsider upgrading your client with:\n'
219
+ f'{install_version_command(remote_version)}'
220
+ f'{colorama.Style.RESET_ALL}')
221
+ _reminded_for_minor_version_upgrade = True
222
+ except version_lib.InvalidVersion:
223
+ # Skip for non-valid semver (probabely a dev version)
224
+ pass
225
+
226
+
227
+ # TODO(aylei): maybe we can use similiar approach to mark a new argument can
228
+ # only be used in the new server version.
229
+ def minimal_api_version(min_version: int) -> Callable:
230
+ """Decorator to enforce a minimum remote API version for an SDK function.
231
+
232
+ New SDK method must be decorated with this decorator to make sure it raises
233
+ an readable error when the remote server is not upgraded.
234
+
235
+ Args:
236
+ min_version: The minimum remote API version required to call the
237
+ function.
238
+
239
+ Returns:
240
+ A decorator function that checks API version before execution.
241
+
242
+ Raises:
243
+ APINotSupportedError: If the remote API version is below the minimum
244
+ required.
245
+ """
246
+
247
+ def decorator(func: Callable) -> Callable:
248
+
249
+ @functools.wraps(func)
250
+ def wrapper(*args, **kwargs):
251
+ remote_api_version = get_remote_api_version()
252
+ if remote_api_version is None:
253
+ return func(*args, **kwargs)
254
+ if remote_api_version < min_version:
255
+ with ux_utils.print_exception_no_traceback():
256
+ hint = 'Please upgrade the remote server.'
257
+ # The client runs in a released version, do better hint.
258
+ if 'dev' not in sky.__version__:
259
+ hint = (
260
+ f'Upgrade the remote server to {sky.__version__} '
261
+ 'and re-run the command.')
262
+ raise exceptions.APINotSupportedError(
263
+ f'Function {func.__name__} is introduced after the '
264
+ f'remote server version {get_remote_version()!r} is '
265
+ f'released. {hint}')
266
+ return func(*args, **kwargs)
267
+
268
+ return wrapper
269
+
270
+ return decorator
@@ -9,6 +9,7 @@ include sky/skylet/providers/ibm/*
9
9
  include sky/skylet/providers/scp/*
10
10
  include sky/skylet/providers/*.py
11
11
  include sky/skylet/ray_patches/*.patch
12
+ include sky/skylet/ray_patches/*.diff
12
13
  include sky/jobs/dashboard/*
13
14
  include sky/jobs/dashboard/templates/*
14
15
  include sky/jobs/dashboard/static/*
@@ -16,3 +17,7 @@ include sky/templates/*
16
17
  include sky/utils/kubernetes/*
17
18
  include sky/server/html/*
18
19
  recursive-include sky/dashboard/out *
20
+ include sky/users/*.conf
21
+ include sky/utils/*.sh
22
+ include sky/setup_files/alembic.ini
23
+ recursive-include sky/schemas/db *
@@ -0,0 +1,156 @@
1
+ # alembic configuration for global user state, jobs state, and sky config db migrations.
2
+
3
+ [DEFAULT]
4
+ # path to migration scripts.
5
+ # this is typically a path given in POSIX (e.g. forward slashes)
6
+ # format, relative to the token %(here)s which refers to the location of this
7
+ # ini file
8
+ script_location = %(here)s/../schemas/db
9
+
10
+ # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
11
+ # Uncomment the line below if you want the files to be prepended with date and time
12
+ # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
13
+ # for all available tokens
14
+ # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
15
+
16
+ # sys.path path, will be prepended to sys.path if present.
17
+ # defaults to the current working directory. for multiple paths, the path separator
18
+ # is defined by "path_separator" below.
19
+ prepend_sys_path = .
20
+
21
+
22
+ # timezone to use when rendering the date within the migration file
23
+ # as well as the filename.
24
+ # If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
25
+ # Any required deps can installed by adding `alembic[tz]` to the pip requirements
26
+ # string value is passed to ZoneInfo()
27
+ # leave blank for localtime
28
+ # timezone =
29
+
30
+ # max length of characters to apply to the "slug" field
31
+ # truncate_slug_length = 40
32
+
33
+ # set to 'true' to run the environment during
34
+ # the 'revision' command, regardless of autogenerate
35
+ # revision_environment = false
36
+
37
+ # set to 'true' to allow .pyc and .pyo files without
38
+ # a source .py file to be detected as revisions in the
39
+ # versions/ directory
40
+ # sourceless = false
41
+
42
+ # version location specification; This defaults
43
+ # to <script_location>/versions. When using multiple version
44
+ # directories, initial revisions must be specified with --version-path.
45
+ # The path separator used here should be the separator specified by "path_separator"
46
+ # below.
47
+ # version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
48
+
49
+ # path_separator; This indicates what character is used to split lists of file
50
+ # paths, including version_locations and prepend_sys_path within configparser
51
+ # files such as alembic.ini.
52
+ # The default rendered in new alembic.ini files is "os", which uses os.pathsep
53
+ # to provide os-dependent path splitting.
54
+ #
55
+ # Note that in order to support legacy alembic.ini files, this default does NOT
56
+ # take place if path_separator is not present in alembic.ini. If this
57
+ # option is omitted entirely, fallback logic is as follows:
58
+ #
59
+ # 1. Parsing of the version_locations option falls back to using the legacy
60
+ # "version_path_separator" key, which if absent then falls back to the legacy
61
+ # behavior of splitting on spaces and/or commas.
62
+ # 2. Parsing of the prepend_sys_path option falls back to the legacy
63
+ # behavior of splitting on spaces, commas, or colons.
64
+ #
65
+ # Valid values for path_separator are:
66
+ #
67
+ # path_separator = :
68
+ # path_separator = ;
69
+ # path_separator = space
70
+ # path_separator = newline
71
+ #
72
+ # Use os.pathsep. Default configuration used for new projects.
73
+ path_separator = os
74
+
75
+ # set to 'true' to search source files recursively
76
+ # in each "version_locations" directory
77
+ # new in Alembic version 1.10
78
+ # recursive_version_locations = false
79
+
80
+ # the output encoding used when revision files
81
+ # are written from script.py.mako
82
+ # output_encoding = utf-8
83
+
84
+ # database URL. This is consumed by the user-maintained env.py script only.
85
+ # other means of configuring database URLs may be customized within the env.py
86
+ # file.
87
+ # sqlalchemy.url = driver://user:pass@localhost/dbname
88
+
89
+ [state_db]
90
+ version_locations = %(here)s/../schemas/db/global_user_state
91
+ version_table = alembic_version_state_db
92
+
93
+ [spot_jobs_db]
94
+ version_locations = %(here)s/../schemas/db/spot_jobs
95
+ version_table = alembic_version_spot_jobs_db
96
+
97
+ [serve_db]
98
+ version_locations = %(here)s/../schemas/db/serve_state
99
+ version_table = alembic_version_serve_state_db
100
+
101
+ [sky_config_db]
102
+ version_locations = %(here)s/../schemas/db/skypilot_config
103
+ version_table = alembic_version_sky_config_db
104
+
105
+ [post_write_hooks]
106
+ # post_write_hooks defines scripts or Python functions that are run
107
+ # on newly generated revision scripts. See the documentation for further
108
+ # detail and examples
109
+
110
+ # format using "black" - use the console_scripts runner, against the "black" entrypoint
111
+ # hooks = black
112
+ # black.type = console_scripts
113
+ # black.entrypoint = black
114
+ # black.options = -l 79 REVISION_SCRIPT_FILENAME
115
+
116
+ # lint with attempts to fix using "ruff" - use the exec runner, execute a binary
117
+ # hooks = ruff
118
+ # ruff.type = exec
119
+ # ruff.executable = %(here)s/.venv/bin/ruff
120
+ # ruff.options = check --fix REVISION_SCRIPT_FILENAME
121
+
122
+ # Logging configuration. This is also consumed by the user-maintained
123
+ # env.py script only.
124
+ [loggers]
125
+ keys = root,sqlalchemy,alembic
126
+
127
+ [handlers]
128
+ keys = console
129
+
130
+ [formatters]
131
+ keys = generic
132
+
133
+ [logger_root]
134
+ level = WARNING
135
+ handlers = console
136
+ qualname =
137
+
138
+ [logger_sqlalchemy]
139
+ level = WARNING
140
+ handlers =
141
+ qualname = sqlalchemy.engine
142
+
143
+ [logger_alembic]
144
+ level = WARNING
145
+ handlers =
146
+ qualname = alembic
147
+
148
+ [handler_console]
149
+ class = StreamHandler
150
+ args = (sys.stderr,)
151
+ level = NOTSET
152
+ formatter = generic
153
+
154
+ [formatter_generic]
155
+ format = %(levelname)-5.5s [%(name)s] %(message)s
156
+ datefmt = %H:%M:%S