skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/skypilot_config.py CHANGED
@@ -52,9 +52,18 @@ import contextlib
52
52
  import copy
53
53
  import json
54
54
  import os
55
+ import pathlib
56
+ import tempfile
55
57
  import threading
56
58
  import typing
57
- from typing import Any, Dict, Iterator, List, Optional, Tuple
59
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
60
+
61
+ import filelock
62
+ import sqlalchemy
63
+ from sqlalchemy import orm
64
+ from sqlalchemy.dialects import postgresql
65
+ from sqlalchemy.dialects import sqlite
66
+ from sqlalchemy.ext import declarative
58
67
 
59
68
  from sky import exceptions
60
69
  from sky import sky_logging
@@ -62,8 +71,13 @@ from sky.adaptors import common as adaptors_common
62
71
  from sky.skylet import constants
63
72
  from sky.utils import common_utils
64
73
  from sky.utils import config_utils
74
+ from sky.utils import context
65
75
  from sky.utils import schemas
66
76
  from sky.utils import ux_utils
77
+ from sky.utils import yaml_utils
78
+ from sky.utils.db import db_utils
79
+ from sky.utils.db import migration_utils
80
+ from sky.utils.kubernetes import config_map_utils
67
81
 
68
82
  if typing.TYPE_CHECKING:
69
83
  import yaml
@@ -105,11 +119,102 @@ ENV_VAR_PROJECT_CONFIG = f'{constants.SKYPILOT_ENV_VAR_PREFIX}PROJECT_CONFIG'
105
119
  _GLOBAL_CONFIG_PATH = '~/.sky/config.yaml'
106
120
  _PROJECT_CONFIG_PATH = '.sky.yaml'
107
121
 
108
- # The loaded config.
109
- _dict = config_utils.Config()
110
- _loaded_config_path: Optional[str] = None
111
- _config_overridden: bool = False
112
- _reload_config_lock = threading.Lock()
122
+ API_SERVER_CONFIG_KEY = 'api_server_config'
123
+
124
+ _SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
125
+ _SQLALCHEMY_ENGINE_LOCK = threading.Lock()
126
+
127
+ Base = declarative.declarative_base()
128
+
129
+ config_yaml_table = sqlalchemy.Table(
130
+ 'config_yaml',
131
+ Base.metadata,
132
+ sqlalchemy.Column('key', sqlalchemy.Text, primary_key=True),
133
+ sqlalchemy.Column('value', sqlalchemy.Text),
134
+ )
135
+
136
+
137
+ class ConfigContext:
138
+
139
+ def __init__(self,
140
+ config: config_utils.Config = config_utils.Config(),
141
+ config_path: Optional[str] = None,
142
+ config_overridden: bool = False):
143
+ self.config = config
144
+ self.config_path = config_path
145
+ self.config_overridden = config_overridden
146
+
147
+
148
+ # The global loaded config.
149
+ _active_workspace_context = threading.local()
150
+ _global_config_context = ConfigContext()
151
+
152
+ SKYPILOT_CONFIG_LOCK_PATH = '~/.sky/locks/.skypilot_config.lock'
153
+
154
+
155
+ def get_skypilot_config_lock_path() -> str:
156
+ """Get the path for the SkyPilot config lock file."""
157
+ lock_path = os.path.expanduser(SKYPILOT_CONFIG_LOCK_PATH)
158
+ os.makedirs(os.path.dirname(lock_path), exist_ok=True)
159
+ return lock_path
160
+
161
+
162
+ def _get_config_context() -> ConfigContext:
163
+ """Get config context for current context.
164
+
165
+ If no context is available, the global config context is returned.
166
+ """
167
+ ctx = context.get()
168
+ if not ctx:
169
+ return _global_config_context
170
+ if ctx.config_context is None:
171
+ # Config context for current context is not initialized, inherit from
172
+ # the global one.
173
+ ctx.config_context = ConfigContext(
174
+ config=copy.deepcopy(_global_config_context.config),
175
+ config_path=_global_config_context.config_path,
176
+ config_overridden=_global_config_context.config_overridden,
177
+ )
178
+ return ctx.config_context
179
+
180
+
181
+ def _get_loaded_config() -> config_utils.Config:
182
+ return _get_config_context().config
183
+
184
+
185
+ def _set_loaded_config(config: config_utils.Config) -> None:
186
+ _get_config_context().config = config
187
+
188
+
189
+ def _get_loaded_config_path() -> List[Optional[str]]:
190
+ serialized = _get_config_context().config_path
191
+ if not serialized:
192
+ return []
193
+ config_paths = json.loads(serialized)
194
+ if config_paths is None:
195
+ return []
196
+ return config_paths
197
+
198
+
199
+ def _set_loaded_config_path(
200
+ path: Optional[Union[str, List[Optional[str]]]]) -> None:
201
+ if not path:
202
+ _get_config_context().config_path = None
203
+ if isinstance(path, str):
204
+ path = [path]
205
+ _get_config_context().config_path = json.dumps(path)
206
+
207
+
208
+ def _set_loaded_config_path_serialized(path: Optional[str]) -> None:
209
+ _get_config_context().config_path = path
210
+
211
+
212
+ def _is_config_overridden() -> bool:
213
+ return _get_config_context().config_overridden
214
+
215
+
216
+ def _set_config_overridden(config_overridden: bool) -> None:
217
+ _get_config_context().config_overridden = config_overridden
113
218
 
114
219
 
115
220
  def get_user_config_path() -> str:
@@ -117,9 +222,14 @@ def get_user_config_path() -> str:
117
222
  return _GLOBAL_CONFIG_PATH
118
223
 
119
224
 
120
- def get_user_config() -> config_utils.Config:
121
- """Returns the user config."""
122
- # find the user config file
225
+ def _get_config_from_path(path: Optional[str]) -> config_utils.Config:
226
+ if path is None:
227
+ return config_utils.Config()
228
+ return parse_and_validate_config_file(path)
229
+
230
+
231
+ def resolve_user_config_path() -> Optional[str]:
232
+ # find the user config file path, None if not resolved.
123
233
  user_config_path = _get_config_file_path(ENV_VAR_GLOBAL_CONFIG)
124
234
  if user_config_path:
125
235
  logger.debug('using user config file specified by '
@@ -136,17 +246,17 @@ def get_user_config() -> config_utils.Config:
136
246
  user_config_path = get_user_config_path()
137
247
  logger.debug(f'using default user config file: {user_config_path}')
138
248
  user_config_path = os.path.expanduser(user_config_path)
139
-
140
- # load the user config file
141
249
  if os.path.exists(user_config_path):
142
- user_config = parse_config_file(user_config_path)
143
- _validate_config(user_config, user_config_path)
144
- else:
145
- user_config = config_utils.Config()
146
- return user_config
250
+ return user_config_path
251
+ return None
252
+
253
+
254
+ def get_user_config() -> config_utils.Config:
255
+ """Returns the user config."""
256
+ return _get_config_from_path(resolve_user_config_path())
147
257
 
148
258
 
149
- def _get_project_config() -> config_utils.Config:
259
+ def _resolve_project_config_path() -> Optional[str]:
150
260
  # find the project config file
151
261
  project_config_path = _get_config_file_path(ENV_VAR_PROJECT_CONFIG)
152
262
  if project_config_path:
@@ -165,18 +275,12 @@ def _get_project_config() -> config_utils.Config:
165
275
  f'using default project config file: {_PROJECT_CONFIG_PATH}')
166
276
  project_config_path = _PROJECT_CONFIG_PATH
167
277
  project_config_path = os.path.expanduser(project_config_path)
168
-
169
- # load the project config file
170
278
  if os.path.exists(project_config_path):
171
- project_config = parse_config_file(project_config_path)
172
- _validate_config(project_config, project_config_path)
173
- else:
174
- project_config = config_utils.Config()
175
- return project_config
279
+ return project_config_path
280
+ return None
176
281
 
177
282
 
178
- def get_server_config() -> config_utils.Config:
179
- """Returns the server config."""
283
+ def _resolve_server_config_path() -> Optional[str]:
180
284
  # find the server config file
181
285
  server_config_path = _get_config_file_path(ENV_VAR_GLOBAL_CONFIG)
182
286
  if server_config_path:
@@ -194,14 +298,14 @@ def get_server_config() -> config_utils.Config:
194
298
  server_config_path = _GLOBAL_CONFIG_PATH
195
299
  logger.debug(f'using default server config file: {server_config_path}')
196
300
  server_config_path = os.path.expanduser(server_config_path)
197
-
198
- # load the server config file
199
301
  if os.path.exists(server_config_path):
200
- server_config = parse_config_file(server_config_path)
201
- _validate_config(server_config, server_config_path)
202
- else:
203
- server_config = config_utils.Config()
204
- return server_config
302
+ return server_config_path
303
+ return None
304
+
305
+
306
+ def get_server_config() -> config_utils.Config:
307
+ """Returns the server config."""
308
+ return _get_config_from_path(_resolve_server_config_path())
205
309
 
206
310
 
207
311
  def get_nested(keys: Tuple[str, ...],
@@ -224,7 +328,7 @@ def get_nested(keys: Tuple[str, ...],
224
328
  Returns:
225
329
  The value of the nested key, or 'default_value' if not found.
226
330
  """
227
- return _dict.get_nested(
331
+ return _get_loaded_config().get_nested(
228
332
  keys,
229
333
  default_value,
230
334
  override_configs,
@@ -232,19 +336,112 @@ def get_nested(keys: Tuple[str, ...],
232
336
  disallowed_override_keys=None)
233
337
 
234
338
 
339
+ def get_effective_region_config(
340
+ cloud: str,
341
+ keys: Tuple[str, ...],
342
+ region: Optional[str] = None,
343
+ default_value: Optional[Any] = None,
344
+ override_configs: Optional[Dict[str, Any]] = None) -> Any:
345
+ """Returns the nested key value by reading from config
346
+ Order to get the property_name value:
347
+ 1. if region is specified,
348
+ try to get the value from <cloud>/<region_key>/<region>/keys
349
+ 2. if no region or no override,
350
+ try to get it at the cloud level <cloud>/keys
351
+ 3. if not found at cloud level,
352
+ return either default_value if specified or None
353
+
354
+ Note: This function currently only supports getting region-specific
355
+ config from "kubernetes" cloud. For other clouds, this function behaves
356
+ identically to get_nested().
357
+ """
358
+ return config_utils.get_cloud_config_value_from_dict(
359
+ dict_config=_get_loaded_config(),
360
+ cloud=cloud,
361
+ keys=keys,
362
+ region=region,
363
+ default_value=default_value,
364
+ override_configs=override_configs)
365
+
366
+
367
+ def get_workspace_cloud(cloud: str,
368
+ workspace: Optional[str] = None) -> config_utils.Config:
369
+ """Returns the workspace config."""
370
+ # TODO(zhwu): Instead of just returning the workspace specific config, we
371
+ # should return the config that already merges the global config, so that
372
+ # the caller does not need to manually merge the global config with
373
+ # the workspace specific config.
374
+ if workspace is None:
375
+ workspace = get_active_workspace()
376
+ clouds = get_nested(keys=(
377
+ 'workspaces',
378
+ workspace,
379
+ ), default_value=None)
380
+ if clouds is None:
381
+ return config_utils.Config()
382
+ return clouds.get(cloud.lower(), config_utils.Config())
383
+
384
+
385
+ @contextlib.contextmanager
386
+ def local_active_workspace_ctx(workspace: str) -> Iterator[None]:
387
+ """Temporarily set the active workspace IN CURRENT THREAD.
388
+
389
+ Note: having this function thread-local is error-prone, as wrapping some
390
+ operations with this will not have the underlying threads to get the
391
+ correct active workspace. However, we cannot make it global either, as
392
+ backend_utils.refresh_cluster_status() will be called in multiple threads,
393
+ and they may have different active workspaces for different threads.
394
+
395
+ # TODO(zhwu): make this function global by default and able to be set
396
+ # it to thread-local with an argument.
397
+
398
+ Args:
399
+ workspace: The workspace to set as active.
400
+
401
+ Raises:
402
+ RuntimeError: If called from a non-main thread.
403
+ """
404
+ original_workspace = get_active_workspace()
405
+ if original_workspace == workspace:
406
+ # No change, do nothing.
407
+ yield
408
+ return
409
+ _active_workspace_context.workspace = workspace
410
+ logger.debug(f'Set context workspace: {workspace}')
411
+ yield
412
+ logger.debug(f'Reset context workspace: {original_workspace}')
413
+ _active_workspace_context.workspace = original_workspace
414
+
415
+
416
+ def get_active_workspace(force_user_workspace: bool = False) -> str:
417
+ context_workspace = getattr(_active_workspace_context, 'workspace', None)
418
+ if not force_user_workspace and context_workspace is not None:
419
+ logger.debug(f'Got context workspace: {context_workspace}')
420
+ return context_workspace
421
+ active_workspace = get_nested(keys=('active_workspace',),
422
+ default_value=None)
423
+ if active_workspace is None:
424
+ logger.debug(f'No active workspace found, using default workspace: '
425
+ f'{constants.SKYPILOT_DEFAULT_WORKSPACE}')
426
+ active_workspace = constants.SKYPILOT_DEFAULT_WORKSPACE
427
+ else:
428
+ logger.debug(f'Got active workspace: {active_workspace}')
429
+ return active_workspace
430
+
431
+
235
432
  def set_nested(keys: Tuple[str, ...], value: Any) -> Dict[str, Any]:
236
433
  """Returns a deep-copied config with the nested key set to value.
237
434
 
238
435
  Like get_nested(), if any key is not found, this will not raise an error.
239
436
  """
240
- copied_dict = copy.deepcopy(_dict)
437
+ copied_dict = copy.deepcopy(_get_loaded_config())
241
438
  copied_dict.set_nested(keys, value)
242
439
  return dict(**copied_dict)
243
440
 
244
441
 
245
442
  def to_dict() -> config_utils.Config:
246
443
  """Returns a deep-copied version of the current config."""
247
- return copy.deepcopy(_dict)
444
+ return copy.deepcopy(_get_loaded_config())
248
445
 
249
446
 
250
447
  def _get_config_file_path(envvar: str) -> Optional[str]:
@@ -281,11 +478,11 @@ def overlay_skypilot_config(
281
478
 
282
479
  def safe_reload_config() -> None:
283
480
  """Reloads the config, safe to be called concurrently."""
284
- with _reload_config_lock:
285
- _reload_config()
481
+ with filelock.FileLock(get_skypilot_config_lock_path()):
482
+ reload_config()
286
483
 
287
484
 
288
- def _reload_config() -> None:
485
+ def reload_config(init_db: bool = False) -> None:
289
486
  internal_config_path = os.environ.get(ENV_VAR_SKYPILOT_CONFIG)
290
487
  if internal_config_path is not None:
291
488
  # {ENV_VAR_SKYPILOT_CONFIG} is used internally.
@@ -297,19 +494,25 @@ def _reload_config() -> None:
297
494
  return
298
495
 
299
496
  if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
300
- _reload_config_as_server()
497
+ _reload_config_as_server(init_db=init_db)
301
498
  else:
302
499
  _reload_config_as_client()
303
500
 
304
501
 
305
- def parse_config_file(config_path: str) -> config_utils.Config:
502
+ def parse_and_validate_config_file(config_path: str) -> config_utils.Config:
306
503
  config = config_utils.Config()
307
504
  try:
308
- config_dict = common_utils.read_yaml(config_path)
505
+ config_dict = yaml_utils.read_yaml(config_path)
309
506
  config = config_utils.Config.from_dict(config_dict)
507
+ # pop the db url from the config, and set it to the env var.
508
+ # this is to avoid db url (considered a sensitive value)
509
+ # being printed with the rest of the config.
510
+ db_url = config.pop_nested(('db',), None)
511
+ if db_url:
512
+ os.environ[constants.ENV_VAR_DB_CONNECTION_URI] = db_url
310
513
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
311
514
  logger.debug(f'Config loaded from {config_path}:\n'
312
- f'{common_utils.dump_yaml_str(dict(config))}')
515
+ f'{yaml_utils.dump_yaml_str(dict(config))}')
313
516
  except yaml.YAMLError as e:
314
517
  logger.error(f'Error in loading config file ({config_path}):', e)
315
518
  if config:
@@ -320,10 +523,10 @@ def parse_config_file(config_path: str) -> config_utils.Config:
320
523
 
321
524
 
322
525
  def _parse_dotlist(dotlist: List[str]) -> config_utils.Config:
323
- """Parse a comma-separated list of key-value pairs into a dictionary.
526
+ """Parse a single key-value pair into a dictionary.
324
527
 
325
528
  Args:
326
- dotlist: A comma-separated list of key-value pairs.
529
+ dotlist: A single key-value pair.
327
530
 
328
531
  Returns:
329
532
  A config_utils.Config object with the parsed key-value pairs.
@@ -338,17 +541,16 @@ def _parse_dotlist(dotlist: List[str]) -> config_utils.Config:
338
541
  if len(key) == 0 or len(value) == 0:
339
542
  raise ValueError(f'Invalid config override: {arg}. '
340
543
  'Please use the format: key=value')
341
- value = yaml.safe_load(value)
544
+ value = yaml_utils.safe_load(value)
342
545
  nested_keys = tuple(key.split('.'))
343
546
  config.set_nested(nested_keys, value)
344
547
  return config
345
548
 
346
549
 
347
550
  def _reload_config_from_internal_file(internal_config_path: str) -> None:
348
- global _dict, _loaded_config_path
349
551
  # Reset the global variables, to avoid using stale values.
350
- _dict = config_utils.Config()
351
- _loaded_config_path = None
552
+ _set_loaded_config(config_utils.Config())
553
+ _set_loaded_config_path(None)
352
554
 
353
555
  config_path = os.path.expanduser(internal_config_path)
354
556
  if not os.path.exists(config_path):
@@ -359,42 +561,99 @@ def _reload_config_from_internal_file(internal_config_path: str) -> None:
359
561
  'exist. Please double check the path or unset the env var: '
360
562
  f'unset {ENV_VAR_SKYPILOT_CONFIG}')
361
563
  logger.debug(f'Using config path: {config_path}')
362
- _dict = parse_config_file(config_path)
363
- _loaded_config_path = config_path
564
+ _set_loaded_config(parse_and_validate_config_file(config_path))
565
+ _set_loaded_config_path(config_path)
364
566
 
365
567
 
366
- def _reload_config_as_server() -> None:
367
- global _dict
368
- # Reset the global variables, to avoid using stale values.
369
- _dict = config_utils.Config()
568
+ def _create_table(engine: sqlalchemy.engine.Engine):
569
+ """Initialize the config database with migrations."""
570
+ migration_utils.safe_alembic_upgrade(
571
+ engine, migration_utils.SKYPILOT_CONFIG_DB_NAME,
572
+ migration_utils.SKYPILOT_CONFIG_VERSION)
370
573
 
371
- overrides: List[config_utils.Config] = []
372
- server_config = get_server_config()
373
- if server_config:
374
- overrides.append(server_config)
375
574
 
376
- # layer the configs on top of each other based on priority
377
- overlaid_server_config: config_utils.Config = config_utils.Config()
378
- for override in overrides:
379
- overlaid_server_config = overlay_skypilot_config(
380
- original_config=overlaid_server_config, override_configs=override)
575
+ def _initialize_and_get_db() -> sqlalchemy.engine.Engine:
576
+ """Initialize and return the config database engine.
577
+
578
+ This function should only be called by the API Server during initialization.
579
+ Client-side code should never call this function.
580
+ """
581
+ assert os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None, (
582
+ 'initialize_and_get_db() can only be called by the API Server')
583
+
584
+ global _SQLALCHEMY_ENGINE
585
+
586
+ if _SQLALCHEMY_ENGINE is not None:
587
+ return _SQLALCHEMY_ENGINE
588
+
589
+ with _SQLALCHEMY_ENGINE_LOCK:
590
+ if _SQLALCHEMY_ENGINE is not None:
591
+ return _SQLALCHEMY_ENGINE
592
+
593
+ # We only store config in the DB when using Postgres,
594
+ # so no need to pass in db_name here.
595
+ engine = db_utils.get_engine(None)
596
+
597
+ # Run migrations if needed
598
+ _create_table(engine)
599
+
600
+ _SQLALCHEMY_ENGINE = engine
601
+ return _SQLALCHEMY_ENGINE
602
+
603
+
604
+ def _reload_config_as_server(init_db: bool = False) -> None:
605
+ # Reset the global variables, to avoid using stale values.
606
+ _set_loaded_config(config_utils.Config())
607
+ _set_loaded_config_path(None)
608
+
609
+ server_config_path = _resolve_server_config_path()
610
+ server_config = _get_config_from_path(server_config_path)
611
+ # Get the db url from the env var. _get_config_from_path should have moved
612
+ # the db url specified in config file to the env var.
613
+ db_url = os.environ.get(constants.ENV_VAR_DB_CONNECTION_URI)
614
+
615
+ if db_url:
616
+ if len(server_config.keys()) > 1:
617
+ raise ValueError(
618
+ 'If db config is specified, no other config is allowed')
619
+ logger.debug('retrieving config from database')
620
+
621
+ if init_db:
622
+ _initialize_and_get_db()
623
+
624
+ def _get_config_yaml_from_db(key: str) -> Optional[config_utils.Config]:
625
+ assert _SQLALCHEMY_ENGINE is not None
626
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
627
+ row = session.query(config_yaml_table).filter_by(
628
+ key=key).first()
629
+ if row:
630
+ db_config = config_utils.Config(yaml_utils.safe_load(row.value))
631
+ db_config.pop_nested(('db',), None)
632
+ return db_config
633
+ return None
634
+
635
+ db_config = _get_config_yaml_from_db(API_SERVER_CONFIG_KEY)
636
+ if db_config:
637
+ server_config = overlay_skypilot_config(server_config, db_config)
381
638
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
382
- logger.debug(
383
- f'server config: \n'
384
- f'{common_utils.dump_yaml_str(dict(overlaid_server_config))}')
385
- _dict = overlaid_server_config
639
+ logger.debug(f'server config: \n'
640
+ f'{yaml_utils.dump_yaml_str(dict(server_config))}')
641
+ _set_loaded_config(server_config)
642
+ _set_loaded_config_path(server_config_path)
386
643
 
387
644
 
388
645
  def _reload_config_as_client() -> None:
389
- global _dict
390
646
  # Reset the global variables, to avoid using stale values.
391
- _dict = config_utils.Config()
647
+ _set_loaded_config(config_utils.Config())
648
+ _set_loaded_config_path(None)
392
649
 
393
650
  overrides: List[config_utils.Config] = []
394
- user_config = get_user_config()
651
+ user_config_path = resolve_user_config_path()
652
+ user_config = _get_config_from_path(user_config_path)
395
653
  if user_config:
396
654
  overrides.append(user_config)
397
- project_config = _get_project_config()
655
+ project_config_path = _resolve_project_config_path()
656
+ project_config = _get_config_from_path(project_config_path)
398
657
  if project_config:
399
658
  overrides.append(project_config)
400
659
 
@@ -406,42 +665,65 @@ def _reload_config_as_client() -> None:
406
665
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
407
666
  logger.debug(
408
667
  f'client config (before task and CLI overrides): \n'
409
- f'{common_utils.dump_yaml_str(dict(overlaid_client_config))}')
410
- _dict = overlaid_client_config
668
+ f'{yaml_utils.dump_yaml_str(dict(overlaid_client_config))}')
669
+ _set_loaded_config(overlaid_client_config)
670
+ _set_loaded_config_path([user_config_path, project_config_path])
411
671
 
412
672
 
413
673
  def loaded_config_path() -> Optional[str]:
414
- """Returns the path to the loaded config file, or
415
- '<overridden>' if the config is overridden."""
416
- if _config_overridden:
417
- return '<overridden>'
418
- return _loaded_config_path
674
+ """Returns the path to the loaded config file, or '<overridden>' if the
675
+ config is overridden."""
676
+ path = [p for p in set(_get_loaded_config_path()) if p is not None]
677
+ if len(path) == 0:
678
+ return '<overridden>' if _is_config_overridden() else None
679
+ if len(path) == 1:
680
+ return path[0]
681
+
682
+ header = 'overridden' if _is_config_overridden() else 'merged'
683
+ path_str = ', '.join(p for p in path if p is not None)
684
+ return f'<{header} ({path_str})>'
685
+
686
+
687
+ def loaded_config_path_serialized() -> Optional[str]:
688
+ """Returns the json serialized config path list"""
689
+ return _get_config_context().config_path
419
690
 
420
691
 
421
692
  # Load on import, synchronization is guaranteed by python interpreter.
422
- _reload_config()
693
+ reload_config(init_db=True)
423
694
 
424
695
 
425
696
  def loaded() -> bool:
426
697
  """Returns if the user configurations are loaded."""
427
- return bool(_dict)
698
+ return bool(_get_loaded_config())
428
699
 
429
700
 
430
701
  @contextlib.contextmanager
431
702
  def override_skypilot_config(
432
- override_configs: Optional[Dict[str, Any]]) -> Iterator[None]:
703
+ override_configs: Optional[Dict[str, Any]],
704
+ override_config_path_serialized: Optional[str] = None
705
+ ) -> Iterator[None]:
433
706
  """Overrides the user configurations."""
434
- global _dict, _config_overridden
435
707
  # TODO(SKY-1215): allow admin user to extend the disallowed keys or specify
436
708
  # allowed keys.
437
709
  if not override_configs:
438
710
  # If no override configs (None or empty dict), do nothing.
439
711
  yield
440
712
  return
441
- original_config = _dict
713
+ original_config = _get_loaded_config()
714
+ original_config_path = loaded_config_path_serialized()
442
715
  override_configs = config_utils.Config(override_configs)
716
+ if override_config_path_serialized is None:
717
+ override_config_path = []
718
+ else:
719
+ override_config_path = json.loads(override_config_path_serialized)
720
+
443
721
  disallowed_diff_keys = []
444
722
  for key in constants.SKIPPED_CLIENT_OVERRIDE_KEYS:
723
+ if key == ('db',):
724
+ # since db key is popped out of server config, the key is expected
725
+ # to be different between client and server.
726
+ continue
445
727
  value = override_configs.pop_nested(key, default_value=None)
446
728
  if (value is not None and
447
729
  value != original_config.get_nested(key, default_value=None)):
@@ -455,12 +737,25 @@ def override_skypilot_config(
455
737
  'and will be ignored. Remove these keys to disable this warning. '
456
738
  'If you want to specify it, please modify it on server side or '
457
739
  'contact your administrator.')
458
- config = _dict.get_nested(
740
+ config = original_config.get_nested(
459
741
  keys=tuple(),
460
742
  default_value=None,
461
743
  override_configs=dict(override_configs),
462
744
  allowed_override_keys=None,
463
745
  disallowed_override_keys=constants.SKIPPED_CLIENT_OVERRIDE_KEYS)
746
+ workspace = config.get_nested(
747
+ keys=('active_workspace',),
748
+ default_value=constants.SKYPILOT_DEFAULT_WORKSPACE)
749
+ if (workspace != constants.SKYPILOT_DEFAULT_WORKSPACE and workspace
750
+ not in get_nested(keys=('workspaces',), default_value={})):
751
+ raise ValueError(f'Workspace {workspace} does not exist. '
752
+ 'Use `sky check` to see if it is defined on the API '
753
+ 'server and try again.')
754
+ # Initialize the active workspace context to the workspace specified, so
755
+ # that a new request is not affected by the previous request's workspace.
756
+ global _active_workspace_context
757
+ _active_workspace_context = threading.local()
758
+
464
759
  try:
465
760
  common_utils.validate_schema(
466
761
  config,
@@ -469,8 +764,10 @@ def override_skypilot_config(
469
764
  'https://docs.skypilot.co/en/latest/reference/config.html. ' # pylint: disable=line-too-long
470
765
  'Error: ',
471
766
  skip_none=False)
472
- _config_overridden = True
473
- _dict = config
767
+ _set_config_overridden(True)
768
+ _set_loaded_config(config)
769
+ _set_loaded_config_path(_get_loaded_config_path() +
770
+ override_config_path)
474
771
  yield
475
772
  except exceptions.InvalidSkyPilotConfigError as e:
476
773
  with ux_utils.print_exception_no_traceback():
@@ -478,20 +775,59 @@ def override_skypilot_config(
478
775
  'Failed to override the SkyPilot config on API '
479
776
  'server with your local SkyPilot config:\n'
480
777
  '=== SkyPilot config on API server ===\n'
481
- f'{common_utils.dump_yaml_str(dict(original_config))}\n'
778
+ f'{yaml_utils.dump_yaml_str(dict(original_config))}\n'
482
779
  '=== Your local SkyPilot config ===\n'
483
- f'{common_utils.dump_yaml_str(dict(override_configs))}\n'
780
+ f'{yaml_utils.dump_yaml_str(dict(override_configs))}\n'
484
781
  f'Details: {e}') from e
485
782
  finally:
486
- _dict = original_config
487
- _config_overridden = False
783
+ _set_loaded_config(original_config)
784
+ _set_config_overridden(False)
785
+ _set_loaded_config_path_serialized(original_config_path)
786
+
787
+
788
+ @contextlib.contextmanager
789
+ def replace_skypilot_config(new_configs: config_utils.Config) -> Iterator[None]:
790
+ """Replaces the global config with the new configs.
791
+
792
+ This function is concurrent safe when it is:
793
+ 1. called in different processes;
794
+ 2. or called in a same process but with different context, refer to
795
+ sky_utils.context for more details.
796
+ """
797
+ original_config = _get_loaded_config()
798
+ original_config_path = loaded_config_path_serialized()
799
+ original_env_var = os.environ.get(ENV_VAR_SKYPILOT_CONFIG)
800
+ if new_configs != original_config:
801
+ # Modify the global config of current process or context
802
+ _set_loaded_config(new_configs)
803
+ with tempfile.NamedTemporaryFile(delete=False,
804
+ mode='w',
805
+ prefix='mutated-skypilot-config-',
806
+ suffix='.yaml') as temp_file:
807
+ yaml_utils.dump_yaml(temp_file.name, dict(**new_configs))
808
+ # Modify the env var of current process or context so that the
809
+ # new config will be used by spawned sub-processes.
810
+ # Note that this code modifies os.environ directly because it
811
+ # will be hijacked to be context-aware if a context is active.
812
+ os.environ[ENV_VAR_SKYPILOT_CONFIG] = temp_file.name
813
+ _set_loaded_config_path(temp_file.name)
814
+ yield
815
+ # Restore the original config and env var.
816
+ _set_loaded_config(original_config)
817
+ _set_loaded_config_path_serialized(original_config_path)
818
+ if original_env_var:
819
+ os.environ[ENV_VAR_SKYPILOT_CONFIG] = original_env_var
820
+ else:
821
+ os.environ.pop(ENV_VAR_SKYPILOT_CONFIG, None)
822
+ else:
823
+ yield
488
824
 
489
825
 
490
826
  def _compose_cli_config(cli_config: Optional[List[str]]) -> config_utils.Config:
491
827
  """Composes the skypilot CLI config.
492
828
  CLI config can either be:
493
829
  - A path to a config file
494
- - A comma-separated list of key-value pairs
830
+ - A single key-value pair
495
831
  """
496
832
 
497
833
  if not cli_config:
@@ -506,14 +842,15 @@ def _compose_cli_config(cli_config: Optional[List[str]]) -> config_utils.Config:
506
842
  'Cannot use multiple --config flags with a config file.')
507
843
  config_source = maybe_config_path
508
844
  # cli_config is a path to a config file
509
- parsed_config = parse_config_file(maybe_config_path)
510
- else: # cli_config is a comma-separated list of key-value pairs
845
+ parsed_config = parse_and_validate_config_file(maybe_config_path)
846
+ else: # cli_config is a single key-value pair
511
847
  parsed_config = _parse_dotlist(cli_config)
512
848
  _validate_config(parsed_config, config_source)
513
849
  except ValueError as e:
514
850
  raise ValueError(f'Invalid config override: {cli_config}. '
515
851
  f'Check if config file exists or if the dotlist '
516
- f'is formatted as: key1=value1,key2=value2') from e
852
+ f'is formatted as: key1=value1,key2=value2.\n'
853
+ f'Details: {e}') from e
517
854
  logger.debug('CLI overrides config syntax check passed.')
518
855
 
519
856
  return parsed_config
@@ -529,11 +866,81 @@ def apply_cli_config(cli_config: Optional[List[str]]) -> Dict[str, Any]:
529
866
  cli_config: A path to a config file or a comma-separated
530
867
  list of key-value pairs.
531
868
  """
532
- global _dict
533
869
  parsed_config = _compose_cli_config(cli_config)
534
870
  if sky_logging.logging_enabled(logger, sky_logging.DEBUG):
535
871
  logger.debug(f'applying following CLI overrides: \n'
536
- f'{common_utils.dump_yaml_str(dict(parsed_config))}')
537
- _dict = overlay_skypilot_config(original_config=_dict,
538
- override_configs=parsed_config)
872
+ f'{yaml_utils.dump_yaml_str(dict(parsed_config))}')
873
+ _set_loaded_config(
874
+ overlay_skypilot_config(original_config=_get_loaded_config(),
875
+ override_configs=parsed_config))
539
876
  return parsed_config
877
+
878
+
879
+ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
880
+ """Dumps the new config to a file and syncs to ConfigMap if in Kubernetes.
881
+
882
+ Args:
883
+ config: The config to save and sync.
884
+ """
885
+
886
+ def is_running_pytest() -> bool:
887
+ return 'PYTEST_CURRENT_TEST' in os.environ
888
+
889
+ # Only allow this function to be called by the API Server in production.
890
+ if not is_running_pytest() and os.environ.get(
891
+ constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
892
+ raise ValueError('This function can only be called by the API Server.')
893
+
894
+ global_config_path = _resolve_server_config_path()
895
+ if global_config_path is None:
896
+ # Fallback to ~/.sky/config.yaml, and make sure it exists.
897
+ global_config_path = os.path.expanduser(get_user_config_path())
898
+ pathlib.Path(global_config_path).touch(exist_ok=True)
899
+
900
+ db_updated = False
901
+ if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
902
+ existing_db_url = os.environ.get(constants.ENV_VAR_DB_CONNECTION_URI)
903
+ new_db_url = config.pop_nested(('db',), None)
904
+ if new_db_url and new_db_url != existing_db_url:
905
+ raise ValueError('Cannot change db url while server is running')
906
+ if existing_db_url:
907
+
908
+ def _set_config_yaml_to_db(key: str, config: config_utils.Config):
909
+ # reload_config(init_db=True) is called when this module is
910
+ # imported, so the database engine must already be initialized.
911
+ assert _SQLALCHEMY_ENGINE is not None
912
+ config_str = yaml_utils.dump_yaml_str(dict(config))
913
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
914
+ if (_SQLALCHEMY_ENGINE.dialect.name ==
915
+ db_utils.SQLAlchemyDialect.SQLITE.value):
916
+ insert_func = sqlite.insert
917
+ elif (_SQLALCHEMY_ENGINE.dialect.name ==
918
+ db_utils.SQLAlchemyDialect.POSTGRESQL.value):
919
+ insert_func = postgresql.insert
920
+ else:
921
+ raise ValueError('Unsupported database dialect')
922
+ insert_stmnt = insert_func(config_yaml_table).values(
923
+ key=key, value=config_str)
924
+ do_update_stmt = insert_stmnt.on_conflict_do_update(
925
+ index_elements=[config_yaml_table.c.key],
926
+ set_={config_yaml_table.c.value: config_str})
927
+ session.execute(do_update_stmt)
928
+ session.commit()
929
+
930
+ logger.debug('saving api_server config to db')
931
+ _set_config_yaml_to_db(API_SERVER_CONFIG_KEY, config)
932
+ db_updated = True
933
+
934
+ if not db_updated:
935
+ # save to the local file (PVC in Kubernetes, local file otherwise)
936
+ yaml_utils.dump_yaml(global_config_path, dict(config))
937
+
938
+ if config_map_utils.is_running_in_kubernetes():
939
+ # In Kubernetes, sync the PVC config to ConfigMap for user
940
+ # convenience.
941
+ # PVC file is the source of truth, ConfigMap is just a mirror for
942
+ # easy access.
943
+ config_map_utils.patch_configmap_with_config(
944
+ config, global_config_path)
945
+
946
+ reload_config()