skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/workspaces/core.py ADDED
@@ -0,0 +1,655 @@
1
+ """Workspace management core."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Callable, Dict, List, Tuple
5
+
6
+ import filelock
7
+
8
+ from sky import check as sky_check
9
+ from sky import exceptions
10
+ from sky import models
11
+ from sky import sky_logging
12
+ from sky import skypilot_config
13
+ from sky.backends import backend_utils
14
+ from sky.skylet import constants
15
+ from sky.usage import usage_lib
16
+ from sky.users import permission
17
+ from sky.users import rbac
18
+ from sky.utils import annotations
19
+ from sky.utils import common_utils
20
+ from sky.utils import config_utils
21
+ from sky.utils import locks
22
+ from sky.utils import resource_checker
23
+ from sky.utils import schemas
24
+ from sky.workspaces import utils as workspaces_utils
25
+
26
+ logger = sky_logging.init_logger(__name__)
27
+
28
+ # Lock for workspace configuration updates to prevent race conditions
29
+ _WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS = 60
30
+
31
+
32
+ @dataclass
33
+ class WorkspaceConfigComparison:
34
+ """Result of comparing current and new workspace configurations.
35
+
36
+ This class encapsulates the results of analyzing differences between
37
+ workspace configurations, particularly focusing on user access changes
38
+ and their implications for resource validation.
39
+
40
+ Attributes:
41
+ only_user_access_changes: True if only allowed_users or private changed
42
+ private_changed: True if private setting changed
43
+ private_old: Old private setting value
44
+ private_new: New private setting value
45
+ allowed_users_changed: True if allowed_users changed
46
+ allowed_users_old: Old allowed users list
47
+ allowed_users_new: New allowed users list
48
+ removed_users: Users removed from allowed_users
49
+ added_users: Users added to allowed_users
50
+ """
51
+ only_user_access_changes: bool
52
+ private_changed: bool
53
+ private_old: bool
54
+ private_new: bool
55
+ allowed_users_changed: bool
56
+ allowed_users_old: List[str]
57
+ allowed_users_new: List[str]
58
+ removed_users: List[str]
59
+ added_users: List[str]
60
+
61
+
62
+ # =========================
63
+ # = Workspace Management =
64
+ # =========================
65
+
66
+
67
+ def get_workspaces() -> Dict[str, Any]:
68
+ """Returns the workspace config."""
69
+ return workspaces_for_user(common_utils.get_current_user().id)
70
+
71
+
72
+ def _update_workspaces_config(
73
+ workspace_modifier_fn: Callable[[Dict[str, Any]],
74
+ None]) -> Dict[str, Any]:
75
+ """Update the workspaces configuration in the config file.
76
+
77
+ This function uses file locking to prevent race conditions when multiple
78
+ processes try to update the workspace configuration simultaneously.
79
+
80
+ Args:
81
+ workspace_modifier_fn: A function that takes the current workspaces
82
+ dict and modifies it in-place. This ensures all read-modify-write
83
+ operations happen atomically inside the lock.
84
+
85
+ Returns:
86
+ The updated workspaces configuration.
87
+ """
88
+ lock_path = skypilot_config.get_skypilot_config_lock_path()
89
+ try:
90
+ with filelock.FileLock(lock_path,
91
+ _WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS):
92
+ # Read the current config inside the lock to ensure we have
93
+ # the latest state
94
+ current_config = skypilot_config.to_dict()
95
+ current_workspaces = current_config.get('workspaces', {}).copy()
96
+
97
+ # Apply the modification inside the lock
98
+ workspace_modifier_fn(current_workspaces)
99
+
100
+ # Update the config with the modified workspaces
101
+ current_config['workspaces'] = current_workspaces
102
+
103
+ # Write the configuration back to the file
104
+ skypilot_config.update_api_server_config_no_lock(current_config)
105
+
106
+ return current_workspaces
107
+ except filelock.Timeout as e:
108
+ raise RuntimeError(
109
+ f'Failed to update workspace configuration due to a timeout '
110
+ f'when trying to acquire the lock at {lock_path}. This may '
111
+ 'indicate another SkyPilot process is currently updating the '
112
+ 'configuration. Please try again or manually remove the lock '
113
+ f'file if you believe it is stale.') from e
114
+
115
+
116
+ def _validate_workspace_config(workspace_name: str,
117
+ workspace_config: Dict[str, Any]) -> None:
118
+ """Validate the workspace configuration.
119
+ """
120
+ workspace_schema = schemas.get_config_schema(
121
+ )['properties']['workspaces']['additionalProperties']
122
+ try:
123
+ common_utils.validate_schema(
124
+ workspace_config, workspace_schema,
125
+ f'Invalid configuration for workspace {workspace_name!r}: ')
126
+ except exceptions.InvalidSkyPilotConfigError as e:
127
+ # We need to replace this exception with a ValueError because: a) it is
128
+ # more user-friendly and b) it will not be caught by the try-except by
129
+ # the caller the may cause confusion.
130
+ raise ValueError(str(e)) from e
131
+
132
+
133
+ def _compare_workspace_configs(
134
+ current_config: Dict[str, Any],
135
+ new_config: Dict[str, Any],
136
+ ) -> WorkspaceConfigComparison:
137
+ """Compare current and new workspace configurations.
138
+
139
+ Args:
140
+ current_config: The current workspace configuration.
141
+ new_config: The new workspace configuration.
142
+
143
+ Returns:
144
+ WorkspaceConfigComparison object containing the comparison results.
145
+ """
146
+ # Get private settings
147
+ private_old = current_config.get('private', False)
148
+ private_new = new_config.get('private', False)
149
+ private_changed = private_old != private_new
150
+
151
+ admin_user_ids = permission.permission_service.get_users_for_role(
152
+ rbac.RoleName.ADMIN.value)
153
+ # Get allowed users (resolve to user IDs for comparison)
154
+ allowed_users_old = workspaces_utils.get_workspace_users(
155
+ current_config) if private_old else []
156
+ allowed_users_old += admin_user_ids
157
+ allowed_users_new = workspaces_utils.get_workspace_users(
158
+ new_config) if private_new else []
159
+ allowed_users_new += admin_user_ids
160
+
161
+ # Convert to sets for easier comparison
162
+ old_users_set = set(allowed_users_old)
163
+ new_users_set = set(allowed_users_new)
164
+
165
+ allowed_users_changed = old_users_set != new_users_set
166
+ removed_users = list(old_users_set - new_users_set)
167
+ added_users = list(new_users_set - old_users_set)
168
+
169
+ # Check if only user access related fields changed
170
+ # Create copies without the user access fields for comparison
171
+ current_without_access = {
172
+ k: v
173
+ for k, v in current_config.items()
174
+ if k not in ['private', 'allowed_users']
175
+ }
176
+ new_without_access = {
177
+ k: v
178
+ for k, v in new_config.items()
179
+ if k not in ['private', 'allowed_users']
180
+ }
181
+
182
+ only_user_access_changes = current_without_access == new_without_access
183
+
184
+ return WorkspaceConfigComparison(
185
+ only_user_access_changes=only_user_access_changes,
186
+ private_changed=private_changed,
187
+ private_old=private_old,
188
+ private_new=private_new,
189
+ allowed_users_changed=allowed_users_changed,
190
+ allowed_users_old=allowed_users_old,
191
+ allowed_users_new=allowed_users_new,
192
+ removed_users=removed_users,
193
+ added_users=added_users)
194
+
195
+
196
+ def _validate_workspace_config_changes_with_lock(
197
+ workspace_name: str, current_config: Dict[str, Any],
198
+ new_config: Dict[str, Any]) -> None:
199
+ lock_id = backend_utils.workspace_lock_id(workspace_name)
200
+ lock_timeout = backend_utils.WORKSPACE_LOCK_TIMEOUT_SECONDS
201
+ try:
202
+ with locks.get_lock(lock_id, lock_timeout):
203
+ # Validate the configuration changes based on active resources
204
+ _validate_workspace_config_changes(workspace_name, current_config,
205
+ new_config)
206
+ except locks.LockTimeout as e:
207
+ raise RuntimeError(
208
+ f'Failed to validate workspace {workspace_name!r} due to '
209
+ 'a timeout when trying to access database. Please '
210
+ f'try again or manually remove the lock at {lock_id}. '
211
+ f'{common_utils.format_exception(e)}') from None
212
+
213
+
214
+ def _validate_workspace_config_changes(workspace_name: str,
215
+ current_config: Dict[str, Any],
216
+ new_config: Dict[str, Any]) -> None:
217
+ """Validate workspace configuration changes based on active resources.
218
+
219
+ This function implements the logic:
220
+ - If only allowed_users or private changed:
221
+ - If private changed from true to false: allow it
222
+ - If private changed from false to true: check that all active resources
223
+ belong to allowed_users
224
+ - If private didn't change: check that removed users don't have active
225
+ resources
226
+ - Otherwise: check that workspace has no active resources
227
+
228
+ Args:
229
+ workspace_name: The name of the workspace.
230
+ current_config: The current workspace configuration.
231
+ new_config: The new workspace configuration.
232
+
233
+ Raises:
234
+ ValueError: If the configuration change is not allowed due to active
235
+ resources.
236
+ """
237
+ config_comparison = _compare_workspace_configs(current_config, new_config)
238
+
239
+ if config_comparison.only_user_access_changes:
240
+ # Only user access settings changed
241
+ if config_comparison.private_changed:
242
+ if (config_comparison.private_old and
243
+ not config_comparison.private_new):
244
+ # Changed from private to public - always allow
245
+ logger.info(
246
+ f'Workspace {workspace_name!r} changed from private to'
247
+ f' public.')
248
+ return
249
+ elif (not config_comparison.private_old and
250
+ config_comparison.private_new):
251
+ # Changed from public to private - check that all active
252
+ # resources belong to the new allowed users
253
+ logger.info(
254
+ f'Workspace {workspace_name!r} changed from public to'
255
+ f' private. Checking that all active resources belong'
256
+ f' to allowed users.')
257
+
258
+ error_summary, missed_users_names, _ = (
259
+ resource_checker.check_users_workspaces_active_resources(
260
+ config_comparison.allowed_users_new, [workspace_name]))
261
+ if error_summary:
262
+ error_msg=f'Cannot change workspace {workspace_name!r}' \
263
+ f' to private '
264
+ if missed_users_names:
265
+ missed_users_list = ', '.join(missed_users_names)
266
+ if len(missed_users_names) == 1:
267
+ error_msg += f'because the user ' \
268
+ f'{missed_users_list!r} has {error_summary}'
269
+ else:
270
+ error_msg += f'because the users ' \
271
+ f'{missed_users_list!r} have {error_summary}'
272
+ error_msg += ' but not in the allowed_users list.' \
273
+ ' Please either add the users to allowed_users or' \
274
+ ' ask them to terminate their resources.'
275
+ raise ValueError(error_msg)
276
+ else:
277
+ # Private setting didn't change, but allowed_users changed
278
+ if (config_comparison.allowed_users_changed and
279
+ config_comparison.removed_users):
280
+ # Check that removed users don't have active resources
281
+ logger.info(
282
+ f'Checking that removed users'
283
+ f' {config_comparison.removed_users} do not have'
284
+ f' active resources in workspace {workspace_name!r}.')
285
+ error_summary, missed_users_names, missed_user_dict = (
286
+ resource_checker.check_users_workspaces_active_resources(
287
+ config_comparison.allowed_users_new, [workspace_name]))
288
+ if error_summary:
289
+ error_user_ids = []
290
+ for user_id in config_comparison.removed_users:
291
+ if user_id in missed_user_dict:
292
+ error_user_ids.append(user_id)
293
+ error_user_names = []
294
+ if error_user_ids:
295
+ error_user_names = [
296
+ missed_user_dict[user_id]
297
+ for user_id in error_user_ids
298
+ ]
299
+
300
+ error_msg = 'Cannot '
301
+ error_users_list = ', '.join(error_user_names)
302
+ if len(error_user_names) == 1:
303
+ error_msg += f'remove user {error_users_list!r} ' \
304
+ f'from workspace {workspace_name!r} because the ' \
305
+ f'user has {error_summary}'
306
+ else:
307
+ error_msg += f'remove users {error_users_list!r}' \
308
+ f' from workspace {workspace_name!r} because the' \
309
+ f' users have {error_summary}'
310
+ error_msg += ', but not in the allowed_users list.' \
311
+ ' Please either add the users to allowed_users or' \
312
+ ' ask them to terminate their resources.'
313
+ raise ValueError(error_msg)
314
+ else:
315
+ # Other configuration changes - check that workspace has no active
316
+ # resources
317
+ logger.info(
318
+ f'Non-user-access configuration changes detected for'
319
+ f' workspace {workspace_name!r}. Checking that workspace has'
320
+ f' no active resources.')
321
+ resource_checker.check_no_active_resources_for_workspaces([
322
+ (workspace_name, 'update')
323
+ ])
324
+
325
+
326
+ @usage_lib.entrypoint
327
+ def update_workspace(workspace_name: str, config: Dict[str,
328
+ Any]) -> Dict[str, Any]:
329
+ """Updates a specific workspace configuration.
330
+
331
+ Args:
332
+ workspace_name: The name of the workspace to update.
333
+ config: The new configuration for the workspace.
334
+
335
+ Returns:
336
+ The updated workspaces configuration.
337
+
338
+ Raises:
339
+ ValueError: If the workspace configuration is invalid, or if there are
340
+ active clusters or managed jobs that prevent the configuration
341
+ change.
342
+ The validation logic depends on what changed:
343
+ - If only allowed_users or private changed:
344
+ - Private true->false: Always allowed
345
+ - Private false->true: All active resources must belong to
346
+ allowed_users
347
+ - allowed_users changes: Removed users must not have active
348
+ resources
349
+ - Other changes: Workspace must have no active resources
350
+ FileNotFoundError: If the config file cannot be found.
351
+ PermissionError: If the config file cannot be written.
352
+ """
353
+ _validate_workspace_config(workspace_name, config)
354
+
355
+ # Get the current workspace configuration for comparison
356
+ current_workspaces = skypilot_config.get_nested(('workspaces',),
357
+ default_value={})
358
+ current_config = current_workspaces.get(workspace_name, {})
359
+
360
+ _validate_workspace_config_changes_with_lock(workspace_name, current_config,
361
+ config)
362
+
363
+ def update_workspace_fn(workspaces: Dict[str, Any]) -> None:
364
+ """Function to update workspace inside the lock."""
365
+ workspaces[workspace_name] = config
366
+ users = workspaces_utils.get_workspace_users(config)
367
+ permission_service = permission.permission_service
368
+ permission_service.update_workspace_policy(workspace_name, users)
369
+
370
+ # Use the internal helper function to save
371
+ result = _update_workspaces_config(update_workspace_fn)
372
+
373
+ # Validate the workspace by running sky check for it
374
+ try:
375
+ sky_check.check(quiet=True, workspace=workspace_name)
376
+ except Exception as e: # pylint: disable=broad-except
377
+ logger.warning(f'Workspace {workspace_name} configuration saved but '
378
+ f'validation check failed: {e}')
379
+ # Don't fail the update if the check fails, just warn
380
+
381
+ return result
382
+
383
+
384
+ @usage_lib.entrypoint
385
+ def create_workspace(workspace_name: str, config: Dict[str,
386
+ Any]) -> Dict[str, Any]:
387
+ """Creates a new workspace configuration.
388
+
389
+ Args:
390
+ workspace_name: The name of the workspace to create.
391
+ config: The configuration for the new workspace.
392
+
393
+ Returns:
394
+ The updated workspaces configuration.
395
+
396
+ Raises:
397
+ ValueError: If the workspace already exists or configuration is invalid.
398
+ FileNotFoundError: If the config file cannot be found.
399
+ PermissionError: If the config file cannot be written.
400
+ """
401
+ # Validate the workspace name
402
+ if not workspace_name or not isinstance(workspace_name, str):
403
+ raise ValueError('Workspace name must be a non-empty string.')
404
+
405
+ _validate_workspace_config(workspace_name, config)
406
+
407
+ def create_workspace_fn(workspaces: Dict[str, Any]) -> None:
408
+ """Function to create workspace inside the lock."""
409
+ if workspace_name in workspaces:
410
+ raise ValueError(f'Workspace {workspace_name!r} already exists. '
411
+ 'Use update instead.')
412
+ workspaces[workspace_name] = config
413
+ # Add policy for the workspace and allowed users
414
+ users = workspaces_utils.get_workspace_users(config)
415
+ permission_service = permission.permission_service
416
+ permission_service.add_workspace_policy(workspace_name, users)
417
+
418
+ # Use the internal helper function to save
419
+ result = _update_workspaces_config(create_workspace_fn)
420
+
421
+ # Validate the workspace by running sky check for it
422
+ try:
423
+ sky_check.check(quiet=True, workspace=workspace_name)
424
+ except Exception as e: # pylint: disable=broad-except
425
+ logger.warning(f'Workspace {workspace_name} configuration saved but '
426
+ f'validation check failed: {e}')
427
+ # Don't fail the update if the check fails, just warn
428
+
429
+ return result
430
+
431
+
432
+ @usage_lib.entrypoint
433
+ def delete_workspace(workspace_name: str) -> Dict[str, Any]:
434
+ """Deletes a workspace configuration.
435
+
436
+ Args:
437
+ workspace_name: The name of the workspace to delete.
438
+
439
+ Returns:
440
+ The updated workspaces configuration.
441
+
442
+ Raises:
443
+ ValueError: If the workspace doesn't exist, is the default workspace,
444
+ or has active clusters or managed jobs.
445
+ FileNotFoundError: If the config file cannot be found.
446
+ PermissionError: If the config file cannot be written.
447
+ """
448
+ # Prevent deletion of default workspace
449
+ if workspace_name == constants.SKYPILOT_DEFAULT_WORKSPACE:
450
+ raise ValueError(f'Cannot delete the default workspace '
451
+ f'{constants.SKYPILOT_DEFAULT_WORKSPACE!r}.')
452
+
453
+ # Check if workspace exists
454
+ current_workspaces = get_workspaces()
455
+ if workspace_name not in current_workspaces:
456
+ raise ValueError(f'Workspace {workspace_name!r} does not exist.')
457
+
458
+ # Check for active clusters and managed jobs in the workspace
459
+ resource_checker.check_no_active_resources_for_workspaces([(workspace_name,
460
+ 'delete')])
461
+
462
+ def delete_workspace_fn(workspaces: Dict[str, Any]) -> None:
463
+ """Function to delete workspace inside the lock."""
464
+ if workspace_name not in workspaces:
465
+ raise ValueError(f'Workspace {workspace_name!r} does not exist.')
466
+ del workspaces[workspace_name]
467
+ permission_service = permission.permission_service
468
+ permission_service.remove_workspace_policy(workspace_name)
469
+
470
+ # Use the internal helper function to save
471
+ return _update_workspaces_config(delete_workspace_fn)
472
+
473
+
474
+ # =========================
475
+ # = Config Management =
476
+ # =========================
477
+
478
+
479
+ @usage_lib.entrypoint
480
+ def get_config() -> Dict[str, Any]:
481
+ """Returns the entire SkyPilot configuration.
482
+
483
+ Returns:
484
+ The complete SkyPilot configuration as a dictionary.
485
+ """
486
+ return skypilot_config.to_dict()
487
+
488
+
489
+ @usage_lib.entrypoint
490
+ def update_config(config: Dict[str, Any]) -> Dict[str, Any]:
491
+ """Updates the entire SkyPilot configuration.
492
+
493
+ Args:
494
+ config: The new configuration to save.
495
+
496
+ Returns:
497
+ The updated configuration.
498
+
499
+ Raises:
500
+ ValueError: If the configuration is invalid, or if there are
501
+ active clusters or managed jobs in workspaces being modified.
502
+ FileNotFoundError: If the config file cannot be found.
503
+ PermissionError: If the config file cannot be written.
504
+ """
505
+ # Validate the configuration using the schema
506
+ try:
507
+ common_utils.validate_schema(config, schemas.get_config_schema(),
508
+ 'Invalid SkyPilot configuration: ')
509
+ except exceptions.InvalidSkyPilotConfigError as e:
510
+ raise ValueError(str(e)) from e
511
+
512
+ # Check for API server changes and validate them
513
+ current_config = skypilot_config.to_dict()
514
+ # If there is no changes to the config, we can return early
515
+ if current_config == config:
516
+ return config
517
+
518
+ current_endpoint = current_config.get('api_server', {}).get('endpoint')
519
+ new_endpoint = config.get('api_server', {}).get('endpoint')
520
+ if current_endpoint != new_endpoint:
521
+ raise ValueError('API server endpoint should not be changed to avoid '
522
+ 'unexpected behavior.')
523
+
524
+ # Check for workspace changes and validate them
525
+ current_workspaces = current_config.get('workspaces', {})
526
+ new_workspaces = config.get('workspaces', {})
527
+
528
+ # Collect all workspaces that need to be checked for active resources
529
+ workspaces_to_check: List[Tuple[str, str]] = []
530
+ workspaces_to_check_policy: Dict[str, Dict[str, List[str]]] = {
531
+ 'add': {},
532
+ 'update': {},
533
+ 'delete': {}
534
+ }
535
+
536
+ # Check each workspace that is being modified
537
+ for workspace_name, new_workspace_config in new_workspaces.items():
538
+ if workspace_name not in current_workspaces:
539
+ users = workspaces_utils.get_workspace_users(new_workspace_config)
540
+ workspaces_to_check_policy['add'][workspace_name] = users
541
+ continue
542
+
543
+ current_workspace_config = current_workspaces.get(workspace_name, {})
544
+
545
+ # If workspace configuration is changing, validate and mark for checking
546
+ if current_workspace_config != new_workspace_config:
547
+ _validate_workspace_config(workspace_name, new_workspace_config)
548
+ _validate_workspace_config_changes_with_lock(
549
+ workspace_name, current_workspace_config, new_workspace_config)
550
+ users = workspaces_utils.get_workspace_users(new_workspace_config)
551
+ workspaces_to_check_policy['update'][workspace_name] = users
552
+
553
+ # Check for workspace deletions
554
+ for workspace_name in current_workspaces:
555
+ if workspace_name not in new_workspaces:
556
+ # Workspace is being deleted
557
+ if workspace_name == constants.SKYPILOT_DEFAULT_WORKSPACE:
558
+ raise ValueError(f'Cannot delete the default workspace '
559
+ f'{constants.SKYPILOT_DEFAULT_WORKSPACE!r}.')
560
+ workspaces_to_check.append((workspace_name, 'delete'))
561
+ workspaces_to_check_policy['delete'][workspace_name] = ['*']
562
+
563
+ # Check all workspaces for active resources in one efficient call
564
+ resource_checker.check_no_active_resources_for_workspaces(
565
+ workspaces_to_check)
566
+
567
+ # Use file locking to prevent race conditions
568
+ lock_path = skypilot_config.get_skypilot_config_lock_path()
569
+ try:
570
+ with filelock.FileLock(lock_path,
571
+ _WORKSPACE_CONFIG_LOCK_TIMEOUT_SECONDS):
572
+ # Convert to config_utils.Config and save
573
+ config_obj = config_utils.Config.from_dict(config)
574
+ skypilot_config.update_api_server_config_no_lock(config_obj)
575
+ permission_service = permission.permission_service
576
+ for operation, workspaces in workspaces_to_check_policy.items():
577
+ for workspace_name, users in workspaces.items():
578
+ if operation == 'add':
579
+ permission_service.add_workspace_policy(
580
+ workspace_name, users)
581
+ elif operation == 'update':
582
+ permission_service.update_workspace_policy(
583
+ workspace_name, users)
584
+ elif operation == 'delete':
585
+ permission_service.remove_workspace_policy(
586
+ workspace_name)
587
+ except filelock.Timeout as e:
588
+ raise RuntimeError(
589
+ f'Failed to update configuration due to a timeout '
590
+ f'when trying to acquire the lock at {lock_path}. This may '
591
+ 'indicate another SkyPilot process is currently updating the '
592
+ 'configuration. Please try again or manually remove the lock '
593
+ f'file if you believe it is stale.') from e
594
+
595
+ # Validate the configuration by running sky check
596
+ try:
597
+ sky_check.check(quiet=True)
598
+ except Exception as e: # pylint: disable=broad-except
599
+ logger.warning(f'Configuration saved but '
600
+ f'validation check failed: {e}')
601
+ # Don't fail the update if the check fails, just warn
602
+
603
+ return config
604
+
605
+
606
+ def reject_request_for_unauthorized_workspace(user: models.User) -> None:
607
+ """Rejects a request that has no permission to access active workspace.
608
+
609
+ Args:
610
+ user: The user making the request.
611
+
612
+ Raises:
613
+ PermissionDeniedError: If the user does not have permission to access
614
+ the active workspace.
615
+ """
616
+ active_workspace = skypilot_config.get_active_workspace()
617
+ if not permission.permission_service.check_workspace_permission(
618
+ user.id, active_workspace):
619
+ raise exceptions.PermissionDeniedError(
620
+ f'User {user.name} ({user.id}) does not have '
621
+ f'permission to access workspace {active_workspace!r}')
622
+
623
+
624
+ def is_workspace_private(workspace_config: Dict[str, Any]) -> bool:
625
+ """Check if a workspace is private.
626
+
627
+ Args:
628
+ workspace_config: The workspace configuration dictionary.
629
+
630
+ Returns:
631
+ True if the workspace is private, False if it's public.
632
+ """
633
+ return workspace_config.get('private', False)
634
+
635
+
636
+ @annotations.lru_cache(scope='request', maxsize=1)
637
+ def workspaces_for_user(user_id: str) -> Dict[str, Any]:
638
+ """Returns the workspaces that the user has access to.
639
+
640
+ Args:
641
+ user_id: The user id to check.
642
+
643
+ Returns:
644
+ A map from workspace name to workspace configuration.
645
+ """
646
+ workspaces = skypilot_config.get_nested(('workspaces',), default_value={})
647
+ if constants.SKYPILOT_DEFAULT_WORKSPACE not in workspaces:
648
+ workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
649
+ user_workspaces = {}
650
+
651
+ for workspace_name, workspace_config in workspaces.items():
652
+ if permission.permission_service.check_workspace_permission(
653
+ user_id, workspace_name):
654
+ user_workspaces[workspace_name] = workspace_config
655
+ return user_workspaces