skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (512) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/common.py +24 -1
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/hyperbolic.py +8 -0
  7. sky/adaptors/kubernetes.py +149 -18
  8. sky/adaptors/nebius.py +170 -17
  9. sky/adaptors/primeintellect.py +1 -0
  10. sky/adaptors/runpod.py +68 -0
  11. sky/adaptors/seeweb.py +167 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/admin_policy.py +187 -4
  14. sky/authentication.py +179 -225
  15. sky/backends/__init__.py +4 -2
  16. sky/backends/backend.py +22 -9
  17. sky/backends/backend_utils.py +1299 -380
  18. sky/backends/cloud_vm_ray_backend.py +1715 -518
  19. sky/backends/docker_utils.py +1 -1
  20. sky/backends/local_docker_backend.py +11 -6
  21. sky/backends/wheel_utils.py +37 -9
  22. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  23. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  24. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  25. sky/{clouds/service_catalog → catalog}/common.py +89 -48
  26. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  27. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  28. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
  29. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
  31. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  33. sky/catalog/data_fetchers/fetch_nebius.py +335 -0
  34. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  35. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  36. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  37. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  38. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  39. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  40. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  41. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  42. sky/catalog/hyperbolic_catalog.py +136 -0
  43. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  44. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  45. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  46. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  47. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  48. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  49. sky/catalog/primeintellect_catalog.py +95 -0
  50. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  51. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  52. sky/catalog/seeweb_catalog.py +184 -0
  53. sky/catalog/shadeform_catalog.py +165 -0
  54. sky/catalog/ssh_catalog.py +167 -0
  55. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  56. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  57. sky/check.py +491 -203
  58. sky/cli.py +5 -6005
  59. sky/client/{cli.py → cli/command.py} +2477 -1885
  60. sky/client/cli/deprecation_utils.py +99 -0
  61. sky/client/cli/flags.py +359 -0
  62. sky/client/cli/table_utils.py +320 -0
  63. sky/client/common.py +70 -32
  64. sky/client/oauth.py +82 -0
  65. sky/client/sdk.py +1203 -297
  66. sky/client/sdk_async.py +833 -0
  67. sky/client/service_account_auth.py +47 -0
  68. sky/cloud_stores.py +73 -0
  69. sky/clouds/__init__.py +13 -0
  70. sky/clouds/aws.py +358 -93
  71. sky/clouds/azure.py +105 -83
  72. sky/clouds/cloud.py +127 -36
  73. sky/clouds/cudo.py +68 -50
  74. sky/clouds/do.py +66 -48
  75. sky/clouds/fluidstack.py +63 -44
  76. sky/clouds/gcp.py +339 -110
  77. sky/clouds/hyperbolic.py +293 -0
  78. sky/clouds/ibm.py +70 -49
  79. sky/clouds/kubernetes.py +563 -162
  80. sky/clouds/lambda_cloud.py +74 -54
  81. sky/clouds/nebius.py +206 -80
  82. sky/clouds/oci.py +88 -66
  83. sky/clouds/paperspace.py +61 -44
  84. sky/clouds/primeintellect.py +317 -0
  85. sky/clouds/runpod.py +164 -74
  86. sky/clouds/scp.py +89 -83
  87. sky/clouds/seeweb.py +466 -0
  88. sky/clouds/shadeform.py +400 -0
  89. sky/clouds/ssh.py +263 -0
  90. sky/clouds/utils/aws_utils.py +10 -4
  91. sky/clouds/utils/gcp_utils.py +87 -11
  92. sky/clouds/utils/oci_utils.py +38 -14
  93. sky/clouds/utils/scp_utils.py +177 -124
  94. sky/clouds/vast.py +99 -77
  95. sky/clouds/vsphere.py +51 -40
  96. sky/core.py +349 -139
  97. sky/dag.py +15 -0
  98. sky/dashboard/out/404.html +1 -1
  99. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  100. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
  102. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  103. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  105. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  106. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
  111. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  112. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  113. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  115. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  116. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  118. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  119. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  121. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  122. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  124. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  126. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  127. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  128. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  129. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  131. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  133. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  134. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  136. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  139. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  141. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  143. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
  144. sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  148. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
  149. sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  151. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  152. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  153. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  154. sky/dashboard/out/clusters/[cluster].html +1 -1
  155. sky/dashboard/out/clusters.html +1 -1
  156. sky/dashboard/out/config.html +1 -0
  157. sky/dashboard/out/index.html +1 -1
  158. sky/dashboard/out/infra/[context].html +1 -0
  159. sky/dashboard/out/infra.html +1 -0
  160. sky/dashboard/out/jobs/[job].html +1 -1
  161. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  162. sky/dashboard/out/jobs.html +1 -1
  163. sky/dashboard/out/users.html +1 -0
  164. sky/dashboard/out/volumes.html +1 -0
  165. sky/dashboard/out/workspace/new.html +1 -0
  166. sky/dashboard/out/workspaces/[name].html +1 -0
  167. sky/dashboard/out/workspaces.html +1 -0
  168. sky/data/data_utils.py +137 -1
  169. sky/data/mounting_utils.py +269 -84
  170. sky/data/storage.py +1451 -1807
  171. sky/data/storage_utils.py +43 -57
  172. sky/exceptions.py +132 -2
  173. sky/execution.py +206 -63
  174. sky/global_user_state.py +2374 -586
  175. sky/jobs/__init__.py +5 -0
  176. sky/jobs/client/sdk.py +242 -65
  177. sky/jobs/client/sdk_async.py +143 -0
  178. sky/jobs/constants.py +9 -8
  179. sky/jobs/controller.py +839 -277
  180. sky/jobs/file_content_utils.py +80 -0
  181. sky/jobs/log_gc.py +201 -0
  182. sky/jobs/recovery_strategy.py +398 -152
  183. sky/jobs/scheduler.py +315 -189
  184. sky/jobs/server/core.py +829 -255
  185. sky/jobs/server/server.py +156 -115
  186. sky/jobs/server/utils.py +136 -0
  187. sky/jobs/state.py +2092 -701
  188. sky/jobs/utils.py +1242 -160
  189. sky/logs/__init__.py +21 -0
  190. sky/logs/agent.py +108 -0
  191. sky/logs/aws.py +243 -0
  192. sky/logs/gcp.py +91 -0
  193. sky/metrics/__init__.py +0 -0
  194. sky/metrics/utils.py +443 -0
  195. sky/models.py +78 -1
  196. sky/optimizer.py +164 -70
  197. sky/provision/__init__.py +90 -4
  198. sky/provision/aws/config.py +147 -26
  199. sky/provision/aws/instance.py +135 -50
  200. sky/provision/azure/instance.py +10 -5
  201. sky/provision/common.py +13 -1
  202. sky/provision/cudo/cudo_machine_type.py +1 -1
  203. sky/provision/cudo/cudo_utils.py +14 -8
  204. sky/provision/cudo/cudo_wrapper.py +72 -71
  205. sky/provision/cudo/instance.py +10 -6
  206. sky/provision/do/instance.py +10 -6
  207. sky/provision/do/utils.py +4 -3
  208. sky/provision/docker_utils.py +114 -23
  209. sky/provision/fluidstack/instance.py +13 -8
  210. sky/provision/gcp/__init__.py +1 -0
  211. sky/provision/gcp/config.py +301 -19
  212. sky/provision/gcp/constants.py +218 -0
  213. sky/provision/gcp/instance.py +36 -8
  214. sky/provision/gcp/instance_utils.py +18 -4
  215. sky/provision/gcp/volume_utils.py +247 -0
  216. sky/provision/hyperbolic/__init__.py +12 -0
  217. sky/provision/hyperbolic/config.py +10 -0
  218. sky/provision/hyperbolic/instance.py +437 -0
  219. sky/provision/hyperbolic/utils.py +373 -0
  220. sky/provision/instance_setup.py +93 -14
  221. sky/provision/kubernetes/__init__.py +5 -0
  222. sky/provision/kubernetes/config.py +9 -52
  223. sky/provision/kubernetes/constants.py +17 -0
  224. sky/provision/kubernetes/instance.py +789 -247
  225. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  226. sky/provision/kubernetes/network.py +27 -17
  227. sky/provision/kubernetes/network_utils.py +40 -43
  228. sky/provision/kubernetes/utils.py +1192 -531
  229. sky/provision/kubernetes/volume.py +282 -0
  230. sky/provision/lambda_cloud/instance.py +22 -16
  231. sky/provision/nebius/constants.py +50 -0
  232. sky/provision/nebius/instance.py +19 -6
  233. sky/provision/nebius/utils.py +196 -91
  234. sky/provision/oci/instance.py +10 -5
  235. sky/provision/paperspace/instance.py +10 -7
  236. sky/provision/paperspace/utils.py +1 -1
  237. sky/provision/primeintellect/__init__.py +10 -0
  238. sky/provision/primeintellect/config.py +11 -0
  239. sky/provision/primeintellect/instance.py +454 -0
  240. sky/provision/primeintellect/utils.py +398 -0
  241. sky/provision/provisioner.py +110 -36
  242. sky/provision/runpod/__init__.py +5 -0
  243. sky/provision/runpod/instance.py +27 -6
  244. sky/provision/runpod/utils.py +51 -18
  245. sky/provision/runpod/volume.py +180 -0
  246. sky/provision/scp/__init__.py +15 -0
  247. sky/provision/scp/config.py +93 -0
  248. sky/provision/scp/instance.py +531 -0
  249. sky/provision/seeweb/__init__.py +11 -0
  250. sky/provision/seeweb/config.py +13 -0
  251. sky/provision/seeweb/instance.py +807 -0
  252. sky/provision/shadeform/__init__.py +11 -0
  253. sky/provision/shadeform/config.py +12 -0
  254. sky/provision/shadeform/instance.py +351 -0
  255. sky/provision/shadeform/shadeform_utils.py +83 -0
  256. sky/provision/ssh/__init__.py +18 -0
  257. sky/provision/vast/instance.py +13 -8
  258. sky/provision/vast/utils.py +10 -7
  259. sky/provision/vsphere/common/vim_utils.py +1 -2
  260. sky/provision/vsphere/instance.py +15 -10
  261. sky/provision/vsphere/vsphere_utils.py +9 -19
  262. sky/py.typed +0 -0
  263. sky/resources.py +844 -118
  264. sky/schemas/__init__.py +0 -0
  265. sky/schemas/api/__init__.py +0 -0
  266. sky/schemas/api/responses.py +225 -0
  267. sky/schemas/db/README +4 -0
  268. sky/schemas/db/env.py +90 -0
  269. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  270. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  271. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  272. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  273. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  274. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  275. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  276. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  277. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  278. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  279. sky/schemas/db/script.py.mako +28 -0
  280. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  281. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  282. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  283. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  284. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  285. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  286. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  287. sky/schemas/generated/__init__.py +0 -0
  288. sky/schemas/generated/autostopv1_pb2.py +36 -0
  289. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  290. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  291. sky/schemas/generated/jobsv1_pb2.py +86 -0
  292. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  293. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  294. sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
  295. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  296. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  297. sky/schemas/generated/servev1_pb2.py +58 -0
  298. sky/schemas/generated/servev1_pb2.pyi +115 -0
  299. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  300. sky/serve/autoscalers.py +357 -5
  301. sky/serve/client/impl.py +310 -0
  302. sky/serve/client/sdk.py +47 -139
  303. sky/serve/client/sdk_async.py +130 -0
  304. sky/serve/constants.py +10 -8
  305. sky/serve/controller.py +64 -19
  306. sky/serve/load_balancer.py +106 -60
  307. sky/serve/load_balancing_policies.py +115 -1
  308. sky/serve/replica_managers.py +273 -162
  309. sky/serve/serve_rpc_utils.py +179 -0
  310. sky/serve/serve_state.py +554 -251
  311. sky/serve/serve_utils.py +733 -220
  312. sky/serve/server/core.py +66 -711
  313. sky/serve/server/impl.py +1093 -0
  314. sky/serve/server/server.py +21 -18
  315. sky/serve/service.py +133 -48
  316. sky/serve/service_spec.py +135 -16
  317. sky/serve/spot_placer.py +3 -0
  318. sky/server/auth/__init__.py +0 -0
  319. sky/server/auth/authn.py +50 -0
  320. sky/server/auth/loopback.py +38 -0
  321. sky/server/auth/oauth2_proxy.py +200 -0
  322. sky/server/common.py +475 -181
  323. sky/server/config.py +81 -23
  324. sky/server/constants.py +44 -6
  325. sky/server/daemons.py +229 -0
  326. sky/server/html/token_page.html +185 -0
  327. sky/server/metrics.py +160 -0
  328. sky/server/requests/executor.py +528 -138
  329. sky/server/requests/payloads.py +351 -17
  330. sky/server/requests/preconditions.py +21 -17
  331. sky/server/requests/process.py +112 -29
  332. sky/server/requests/request_names.py +120 -0
  333. sky/server/requests/requests.py +817 -224
  334. sky/server/requests/serializers/decoders.py +82 -31
  335. sky/server/requests/serializers/encoders.py +140 -22
  336. sky/server/requests/threads.py +106 -0
  337. sky/server/rest.py +417 -0
  338. sky/server/server.py +1290 -284
  339. sky/server/state.py +20 -0
  340. sky/server/stream_utils.py +345 -57
  341. sky/server/uvicorn.py +217 -3
  342. sky/server/versions.py +270 -0
  343. sky/setup_files/MANIFEST.in +5 -0
  344. sky/setup_files/alembic.ini +156 -0
  345. sky/setup_files/dependencies.py +136 -31
  346. sky/setup_files/setup.py +44 -42
  347. sky/sky_logging.py +102 -5
  348. sky/skylet/attempt_skylet.py +1 -0
  349. sky/skylet/autostop_lib.py +129 -8
  350. sky/skylet/configs.py +27 -20
  351. sky/skylet/constants.py +171 -19
  352. sky/skylet/events.py +105 -21
  353. sky/skylet/job_lib.py +335 -104
  354. sky/skylet/log_lib.py +297 -18
  355. sky/skylet/log_lib.pyi +44 -1
  356. sky/skylet/ray_patches/__init__.py +17 -3
  357. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  358. sky/skylet/ray_patches/cli.py.diff +19 -0
  359. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  360. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  361. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  362. sky/skylet/ray_patches/updater.py.diff +18 -0
  363. sky/skylet/ray_patches/worker.py.diff +41 -0
  364. sky/skylet/services.py +564 -0
  365. sky/skylet/skylet.py +63 -4
  366. sky/skylet/subprocess_daemon.py +103 -29
  367. sky/skypilot_config.py +506 -99
  368. sky/ssh_node_pools/__init__.py +1 -0
  369. sky/ssh_node_pools/core.py +135 -0
  370. sky/ssh_node_pools/server.py +233 -0
  371. sky/task.py +621 -137
  372. sky/templates/aws-ray.yml.j2 +10 -3
  373. sky/templates/azure-ray.yml.j2 +1 -1
  374. sky/templates/do-ray.yml.j2 +1 -1
  375. sky/templates/gcp-ray.yml.j2 +57 -0
  376. sky/templates/hyperbolic-ray.yml.j2 +67 -0
  377. sky/templates/jobs-controller.yaml.j2 +27 -24
  378. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  379. sky/templates/kubernetes-ray.yml.j2 +607 -51
  380. sky/templates/lambda-ray.yml.j2 +1 -1
  381. sky/templates/nebius-ray.yml.j2 +33 -12
  382. sky/templates/paperspace-ray.yml.j2 +1 -1
  383. sky/templates/primeintellect-ray.yml.j2 +71 -0
  384. sky/templates/runpod-ray.yml.j2 +9 -1
  385. sky/templates/scp-ray.yml.j2 +3 -50
  386. sky/templates/seeweb-ray.yml.j2 +108 -0
  387. sky/templates/shadeform-ray.yml.j2 +72 -0
  388. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  389. sky/templates/websocket_proxy.py +178 -18
  390. sky/usage/usage_lib.py +18 -11
  391. sky/users/__init__.py +0 -0
  392. sky/users/model.conf +15 -0
  393. sky/users/permission.py +387 -0
  394. sky/users/rbac.py +121 -0
  395. sky/users/server.py +720 -0
  396. sky/users/token_service.py +218 -0
  397. sky/utils/accelerator_registry.py +34 -5
  398. sky/utils/admin_policy_utils.py +84 -38
  399. sky/utils/annotations.py +16 -5
  400. sky/utils/asyncio_utils.py +78 -0
  401. sky/utils/auth_utils.py +153 -0
  402. sky/utils/benchmark_utils.py +60 -0
  403. sky/utils/cli_utils/status_utils.py +159 -86
  404. sky/utils/cluster_utils.py +31 -9
  405. sky/utils/command_runner.py +354 -68
  406. sky/utils/command_runner.pyi +93 -3
  407. sky/utils/common.py +35 -8
  408. sky/utils/common_utils.py +310 -87
  409. sky/utils/config_utils.py +87 -5
  410. sky/utils/context.py +402 -0
  411. sky/utils/context_utils.py +222 -0
  412. sky/utils/controller_utils.py +264 -89
  413. sky/utils/dag_utils.py +31 -12
  414. sky/utils/db/__init__.py +0 -0
  415. sky/utils/db/db_utils.py +470 -0
  416. sky/utils/db/migration_utils.py +133 -0
  417. sky/utils/directory_utils.py +12 -0
  418. sky/utils/env_options.py +13 -0
  419. sky/utils/git.py +567 -0
  420. sky/utils/git_clone.sh +460 -0
  421. sky/utils/infra_utils.py +195 -0
  422. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  423. sky/utils/kubernetes/config_map_utils.py +133 -0
  424. sky/utils/kubernetes/create_cluster.sh +13 -27
  425. sky/utils/kubernetes/delete_cluster.sh +10 -7
  426. sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
  427. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  428. sky/utils/kubernetes/generate_kind_config.py +6 -66
  429. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  430. sky/utils/kubernetes/gpu_labeler.py +5 -5
  431. sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
  432. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  433. sky/utils/kubernetes/ssh_utils.py +221 -0
  434. sky/utils/kubernetes_enums.py +8 -15
  435. sky/utils/lock_events.py +94 -0
  436. sky/utils/locks.py +368 -0
  437. sky/utils/log_utils.py +300 -6
  438. sky/utils/perf_utils.py +22 -0
  439. sky/utils/resource_checker.py +298 -0
  440. sky/utils/resources_utils.py +249 -32
  441. sky/utils/rich_utils.py +213 -37
  442. sky/utils/schemas.py +905 -147
  443. sky/utils/serialize_utils.py +16 -0
  444. sky/utils/status_lib.py +10 -0
  445. sky/utils/subprocess_utils.py +38 -15
  446. sky/utils/tempstore.py +70 -0
  447. sky/utils/timeline.py +24 -52
  448. sky/utils/ux_utils.py +84 -15
  449. sky/utils/validator.py +11 -1
  450. sky/utils/volume.py +86 -0
  451. sky/utils/yaml_utils.py +111 -0
  452. sky/volumes/__init__.py +13 -0
  453. sky/volumes/client/__init__.py +0 -0
  454. sky/volumes/client/sdk.py +149 -0
  455. sky/volumes/server/__init__.py +0 -0
  456. sky/volumes/server/core.py +258 -0
  457. sky/volumes/server/server.py +122 -0
  458. sky/volumes/volume.py +212 -0
  459. sky/workspaces/__init__.py +0 -0
  460. sky/workspaces/core.py +655 -0
  461. sky/workspaces/server.py +101 -0
  462. sky/workspaces/utils.py +56 -0
  463. skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
  464. skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
  465. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
  466. sky/benchmark/benchmark_state.py +0 -256
  467. sky/benchmark/benchmark_utils.py +0 -641
  468. sky/clouds/service_catalog/constants.py +0 -7
  469. sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
  470. sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
  471. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  472. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  473. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  474. sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
  475. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  476. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  477. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  478. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  479. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  480. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  481. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  482. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
  483. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  484. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  485. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  486. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
  487. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  488. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  489. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  490. sky/jobs/dashboard/dashboard.py +0 -223
  491. sky/jobs/dashboard/static/favicon.ico +0 -0
  492. sky/jobs/dashboard/templates/index.html +0 -831
  493. sky/jobs/server/dashboard_utils.py +0 -69
  494. sky/skylet/providers/scp/__init__.py +0 -2
  495. sky/skylet/providers/scp/config.py +0 -149
  496. sky/skylet/providers/scp/node_provider.py +0 -578
  497. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  498. sky/utils/db_utils.py +0 -100
  499. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  500. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  501. skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
  502. skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
  503. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  504. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  505. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  506. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  507. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  508. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  509. /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  510. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  511. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  512. {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,387 @@
1
+ """Permission service for SkyPilot API Server."""
2
+ import contextlib
3
+ import hashlib
4
+ import logging
5
+ import os
6
+ from typing import Generator, List
7
+
8
+ import casbin
9
+ import filelock
10
+ import sqlalchemy_adapter
11
+
12
+ from sky import global_user_state
13
+ from sky import models
14
+ from sky import sky_logging
15
+ from sky.skylet import constants
16
+ from sky.users import rbac
17
+ from sky.utils import annotations
18
+ from sky.utils import common_utils
19
+ from sky.utils.db import db_utils
20
+
21
+ logging.getLogger('casbin.policy').setLevel(sky_logging.ERROR)
22
+ logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
23
+ logging.getLogger('casbin.model').setLevel(sky_logging.ERROR)
24
+ logging.getLogger('casbin.rbac').setLevel(sky_logging.ERROR)
25
+ logger = sky_logging.init_logger(__name__)
26
+
27
+ # Filelocks for the policy update.
28
+ POLICY_UPDATE_LOCK_PATH = os.path.expanduser('~/.sky/.policy_update.lock')
29
+ POLICY_UPDATE_LOCK_TIMEOUT_SECONDS = 20
30
+
31
+ _enforcer_instance = None
32
+
33
+
34
+ class PermissionService:
35
+ """Permission service for SkyPilot API Server."""
36
+
37
+ def __init__(self):
38
+ self.enforcer = None
39
+
40
+ def _lazy_initialize(self):
41
+ if self.enforcer is not None:
42
+ return
43
+ with _policy_lock():
44
+ global _enforcer_instance
45
+ if _enforcer_instance is None:
46
+ engine = global_user_state.initialize_and_get_db()
47
+ db_utils.add_all_tables_to_db_sqlalchemy(
48
+ sqlalchemy_adapter.Base.metadata, engine)
49
+ adapter = sqlalchemy_adapter.Adapter(
50
+ engine, db_class=sqlalchemy_adapter.CasbinRule)
51
+ model_path = os.path.join(os.path.dirname(__file__),
52
+ 'model.conf')
53
+ enforcer = casbin.Enforcer(model_path, adapter)
54
+ self.enforcer = enforcer
55
+ # Only set the enforcer instance once the enforcer
56
+ # is successfully initialized, if we change it and then fail
57
+ # we will set it to None and all subsequent calls will fail.
58
+ _enforcer_instance = self
59
+ self._maybe_initialize_policies()
60
+ self._maybe_initialize_basic_auth_user()
61
+ else:
62
+ self.enforcer = _enforcer_instance.enforcer
63
+
64
+ def _maybe_initialize_basic_auth_user(self) -> None:
65
+ """Initialize basic auth user if it is enabled."""
66
+ basic_auth = os.environ.get(constants.SKYPILOT_INITIAL_BASIC_AUTH)
67
+ if not basic_auth:
68
+ return
69
+ username, password = basic_auth.split(':', 1)
70
+ if username and password:
71
+ user_hash = hashlib.md5(
72
+ username.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
73
+ user_info = global_user_state.get_user(user_hash)
74
+ if user_info:
75
+ logger.debug(f'Basic auth user {username} already exists')
76
+ return
77
+ global_user_state.add_or_update_user(
78
+ models.User(id=user_hash, name=username, password=password))
79
+ self.enforcer.add_grouping_policy(user_hash,
80
+ rbac.RoleName.ADMIN.value)
81
+ self.enforcer.save_policy()
82
+ logger.info(f'Basic auth user {username} initialized')
83
+
84
+ def _maybe_initialize_policies(self) -> None:
85
+ """Initialize policies if they don't already exist."""
86
+ logger.debug(f'Initializing policies in process: {os.getpid()}')
87
+ self._load_policy_no_lock()
88
+
89
+ policy_updated = False
90
+
91
+ # Check if policies are already initialized by looking for existing
92
+ # permission policies in the enforcer
93
+ existing_policies = self.enforcer.get_policy()
94
+
95
+ # If we already have policies for the expected roles, skip
96
+ # initialization
97
+ role_permissions = rbac.get_role_permissions()
98
+ expected_policies = []
99
+ for role, permissions in role_permissions.items():
100
+ if permissions['permissions'] and 'blocklist' in permissions[
101
+ 'permissions']:
102
+ blocklist = permissions['permissions']['blocklist']
103
+ for item in blocklist:
104
+ expected_policies.append(
105
+ [role, item['path'], item['method']])
106
+
107
+ # Add workspace policy
108
+ workspace_policy_permissions = rbac.get_workspace_policy_permissions()
109
+ logger.debug(f'Workspace policy permissions from config: '
110
+ f'{workspace_policy_permissions}')
111
+
112
+ for workspace_name, users in workspace_policy_permissions.items():
113
+ for user in users:
114
+ expected_policies.append([user, workspace_name, '*'])
115
+ logger.debug(f'Expected workspace policy: user={user}, '
116
+ f'workspace={workspace_name}')
117
+
118
+ # Check if all expected policies already exist
119
+ policies_exist = all(
120
+ any(policy == expected
121
+ for policy in existing_policies)
122
+ for expected in expected_policies)
123
+
124
+ if not policies_exist:
125
+ # Only clear and reinitialize if policies don't exist or are
126
+ # incomplete
127
+ logger.debug('Policies not found or incomplete, initializing...')
128
+ # Only clear p policies (permission policies),
129
+ # keep g policies (role policies)
130
+ self.enforcer.remove_filtered_policy(0)
131
+ for role, permissions in role_permissions.items():
132
+ if permissions['permissions'] and 'blocklist' in permissions[
133
+ 'permissions']:
134
+ blocklist = permissions['permissions']['blocklist']
135
+ for item in blocklist:
136
+ path = item['path']
137
+ method = item['method']
138
+ logger.debug(f'Adding role policy: role={role}, '
139
+ f'path={path}, method={method}')
140
+ self.enforcer.add_policy(role, path, method)
141
+ policy_updated = True
142
+
143
+ for workspace_name, users in workspace_policy_permissions.items():
144
+ for user in users:
145
+ logger.debug(f'Initializing workspace policy: user={user}, '
146
+ f'workspace={workspace_name}')
147
+ self.enforcer.add_policy(user, workspace_name, '*')
148
+ policy_updated = True
149
+ logger.debug('Policies initialized successfully')
150
+ else:
151
+ logger.debug('Policies already exist, skipping initialization')
152
+
153
+ # Always ensure users have default roles (this is idempotent)
154
+ all_users = global_user_state.get_all_users()
155
+ for existing_user in all_users:
156
+ user_added = self._add_user_if_not_exists_no_lock(existing_user.id)
157
+ policy_updated = policy_updated or user_added
158
+
159
+ if policy_updated:
160
+ self.enforcer.save_policy()
161
+
162
+ def add_user_if_not_exists(self, user_id: str) -> None:
163
+ """Add user role relationship."""
164
+ self._lazy_initialize()
165
+ with _policy_lock():
166
+ self._add_user_if_not_exists_no_lock(user_id)
167
+
168
+ def _add_user_if_not_exists_no_lock(self, user_id: str) -> bool:
169
+ """Add user role relationship without lock.
170
+
171
+ Returns:
172
+ True if the user was added, False otherwise.
173
+ """
174
+ user_roles = self.enforcer.get_roles_for_user(user_id)
175
+ if not user_roles:
176
+ self.enforcer.add_grouping_policy(user_id, rbac.get_default_role())
177
+ return True
178
+ return False
179
+
180
+ def delete_user(self, user_id: str) -> None:
181
+ """Delete user role relationship."""
182
+ self._lazy_initialize()
183
+ with _policy_lock():
184
+ # Get current roles
185
+ self._load_policy_no_lock()
186
+ # Avoid calling get_user_roles, as it will require the lock.
187
+ current_roles = self.enforcer.get_roles_for_user(user_id)
188
+ if not current_roles:
189
+ logger.debug(f'User {user_id} has no roles')
190
+ return
191
+ self.enforcer.remove_grouping_policy(user_id, current_roles[0])
192
+ self.enforcer.save_policy()
193
+
194
+ def update_role(self, user_id: str, new_role: str) -> None:
195
+ """Update user role relationship."""
196
+ self._lazy_initialize()
197
+ with _policy_lock():
198
+ # Get current roles
199
+ self._load_policy_no_lock()
200
+ # Avoid calling get_user_roles, as it will require the lock.
201
+ current_roles = self.enforcer.get_roles_for_user(user_id)
202
+ if not current_roles:
203
+ logger.debug(f'User {user_id} has no roles')
204
+ else:
205
+ # TODO(hailong): how to handle multiple roles?
206
+ current_role = current_roles[0]
207
+ if current_role == new_role:
208
+ logger.debug(f'User {user_id} already has role {new_role}')
209
+ return
210
+ self.enforcer.remove_grouping_policy(user_id, current_role)
211
+
212
+ # Update user role
213
+ self.enforcer.add_grouping_policy(user_id, new_role)
214
+ self.enforcer.save_policy()
215
+
216
+ def get_user_roles(self, user_id: str) -> List[str]:
217
+ """Get all roles for a user.
218
+
219
+ This method returns all roles that the user has, including inherited
220
+ roles. For example, if a user has role 'admin' and 'admin' inherits
221
+ from 'user', this method will return ['admin', 'user'].
222
+
223
+ Args:
224
+ user: The user ID to get roles for.
225
+
226
+ Returns:
227
+ A list of role names that the user has.
228
+ """
229
+ self._lazy_initialize()
230
+ self._load_policy_no_lock()
231
+ return self.enforcer.get_roles_for_user(user_id)
232
+
233
+ def get_users_for_role(self, role: str) -> List[str]:
234
+ """Get all users for a role."""
235
+ self._lazy_initialize()
236
+ self._load_policy_no_lock()
237
+ return self.enforcer.get_users_for_role(role)
238
+
239
+ def check_endpoint_permission(self, user_id: str, path: str,
240
+ method: str) -> bool:
241
+ """Check permission."""
242
+ # We intentionally don't load the policy here, as it is a hot path, and
243
+ # we don't support updating the policy.
244
+ # We don't hold the lock for checking permission, as it is read only and
245
+ # it is a hot path in every request. It is ok to have a stale policy,
246
+ # as long as it is eventually consistent.
247
+ # self._load_policy_no_lock()
248
+ self._lazy_initialize()
249
+ return self.enforcer.enforce(user_id, path, method)
250
+
251
+ def _load_policy_no_lock(self):
252
+ """Load policy from storage."""
253
+ self.enforcer.load_policy()
254
+
255
+ def load_policy(self):
256
+ """Load policy from storage with lock."""
257
+ self._lazy_initialize()
258
+ with _policy_lock():
259
+ self._load_policy_no_lock()
260
+
261
+ # Right now, not a lot of users are using multiple workspaces,
262
+ # so 5 should be more than enough.
263
+ @annotations.lru_cache(scope='request', maxsize=5)
264
+ def check_workspace_permission(self, user_id: str,
265
+ workspace_name: str) -> bool:
266
+ """Check workspace permission.
267
+
268
+ This method checks if a user has permission to access a specific
269
+ workspace.
270
+
271
+ For private workspaces, the user must have explicit permission.
272
+
273
+ For public workspaces, the permission is granted via a wildcard policy
274
+ ('*').
275
+ """
276
+ self._lazy_initialize()
277
+ if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
278
+ # When it is not on API server, we allow all users to access all
279
+ # workspaces, as the workspace check has been done on API server.
280
+ return True
281
+ role = self.get_user_roles(user_id)
282
+ if rbac.RoleName.ADMIN.value in role:
283
+ return True
284
+ # The Casbin model matcher already handles the wildcard '*' case:
285
+ # m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj &&
286
+ # r.act == p.act
287
+ # This means if there's a policy ('*', workspace_name, '*'), it will
288
+ # match any user
289
+ result = self.enforcer.enforce(user_id, workspace_name, '*')
290
+ logger.debug(f'Workspace permission check: user={user_id}, '
291
+ f'workspace={workspace_name}, result={result}')
292
+ return result
293
+
294
+ def check_service_account_token_permission(self, user_id: str,
295
+ token_owner_id: str,
296
+ action: str) -> bool:
297
+ """Check service account token permission.
298
+
299
+ This method checks if a user has permission to perform an action on
300
+ a service account token owned by another user.
301
+
302
+ Args:
303
+ user_id: The ID of the user requesting the action
304
+ token_owner_id: The ID of the user who owns the token
305
+ action: The action being performed (e.g., 'delete', 'view')
306
+
307
+ Returns:
308
+ True if the user has permission, False otherwise
309
+ """
310
+ del action
311
+ # Users can always manage their own tokens
312
+ if user_id == token_owner_id:
313
+ return True
314
+
315
+ # Check if user has admin role (admins can manage any token)
316
+ user_roles = self.get_user_roles(user_id)
317
+ if rbac.RoleName.ADMIN.value in user_roles:
318
+ return True
319
+
320
+ # Regular users cannot manage tokens owned by others
321
+ return False
322
+
323
+ def add_workspace_policy(self, workspace_name: str,
324
+ users: List[str]) -> None:
325
+ """Add workspace policy.
326
+
327
+ Args:
328
+ workspace_name: Name of the workspace
329
+ users: List of user IDs that should have access.
330
+ For public workspaces, this should be ['*'].
331
+ For private workspaces, this should be specific user IDs.
332
+ """
333
+ self._lazy_initialize()
334
+ with _policy_lock():
335
+ for user in users:
336
+ logger.debug(f'Adding workspace policy: user={user}, '
337
+ f'workspace={workspace_name}')
338
+ self.enforcer.add_policy(user, workspace_name, '*')
339
+ self.enforcer.save_policy()
340
+
341
+ def update_workspace_policy(self, workspace_name: str,
342
+ users: List[str]) -> None:
343
+ """Update workspace policy.
344
+
345
+ Args:
346
+ workspace_name: Name of the workspace
347
+ users: List of user IDs that should have access.
348
+ For public workspaces, this should be ['*'].
349
+ For private workspaces, this should be specific user IDs.
350
+ """
351
+ self._lazy_initialize()
352
+ with _policy_lock():
353
+ self._load_policy_no_lock()
354
+ # Remove all existing policies for this workspace
355
+ self.enforcer.remove_filtered_policy(1, workspace_name)
356
+ # Add new policies
357
+ for user in users:
358
+ logger.debug(f'Updating workspace policy: user={user}, '
359
+ f'workspace={workspace_name}')
360
+ self.enforcer.add_policy(user, workspace_name, '*')
361
+ self.enforcer.save_policy()
362
+
363
+ def remove_workspace_policy(self, workspace_name: str) -> None:
364
+ """Remove workspace policy."""
365
+ self._lazy_initialize()
366
+ with _policy_lock():
367
+ self.enforcer.remove_filtered_policy(1, workspace_name)
368
+ self.enforcer.save_policy()
369
+
370
+
371
+ @contextlib.contextmanager
372
+ def _policy_lock() -> Generator[None, None, None]:
373
+ """Context manager for policy update lock."""
374
+ try:
375
+ with filelock.FileLock(POLICY_UPDATE_LOCK_PATH,
376
+ POLICY_UPDATE_LOCK_TIMEOUT_SECONDS):
377
+ yield
378
+ except filelock.Timeout as e:
379
+ raise RuntimeError(f'Failed to reload policy due to a timeout '
380
+ f'when trying to acquire the lock at '
381
+ f'{POLICY_UPDATE_LOCK_PATH}. '
382
+ 'Please try again or manually remove the lock '
383
+ f'file if you believe it is stale.') from e
384
+
385
+
386
+ # Singleton instance of PermissionService for other modules to use.
387
+ permission_service = PermissionService()
sky/users/rbac.py ADDED
@@ -0,0 +1,121 @@
1
+ """RBAC (Role-Based Access Control) functionality for SkyPilot API Server."""
2
+
3
+ import enum
4
+ from typing import Dict, List
5
+
6
+ from sky import sky_logging
7
+ from sky import skypilot_config
8
+ from sky.skylet import constants
9
+ from sky.workspaces import utils as workspaces_utils
10
+
11
+ logger = sky_logging.init_logger(__name__)
12
+
13
+ # Default user blocklist for user role
14
+ # Cannot access workspace CUD operations
15
+ _DEFAULT_USER_BLOCKLIST = [{
16
+ 'path': '/workspaces/config',
17
+ 'method': 'POST'
18
+ }, {
19
+ 'path': '/workspaces/update',
20
+ 'method': 'POST'
21
+ }, {
22
+ 'path': '/workspaces/create',
23
+ 'method': 'POST'
24
+ }, {
25
+ 'path': '/workspaces/delete',
26
+ 'method': 'POST'
27
+ }, {
28
+ 'path': '/users/delete',
29
+ 'method': 'POST'
30
+ }, {
31
+ 'path': '/users/create',
32
+ 'method': 'POST'
33
+ }, {
34
+ 'path': '/users/import',
35
+ 'method': 'POST'
36
+ }, {
37
+ 'path': '/users/export',
38
+ 'method': 'GET'
39
+ }]
40
+
41
+
42
+ # Define roles
43
+ class RoleName(str, enum.Enum):
44
+ ADMIN = 'admin'
45
+ USER = 'user'
46
+
47
+
48
+ def get_supported_roles() -> List[str]:
49
+ return [role_name.value for role_name in RoleName]
50
+
51
+
52
+ def get_default_role() -> str:
53
+ return skypilot_config.get_nested(('rbac', 'default_role'),
54
+ default_value=RoleName.ADMIN.value)
55
+
56
+
57
+ def get_role_permissions(
58
+ ) -> Dict[str, Dict[str, Dict[str, List[Dict[str, str]]]]]:
59
+ """Get all role permissions from config.
60
+
61
+ Returns:
62
+ Dictionary containing all roles and their permissions configuration.
63
+ Example:
64
+ {
65
+ 'admin': {
66
+ 'permissions': {
67
+ 'blocklist': []
68
+ }
69
+ },
70
+ 'user': {
71
+ 'permissions': {
72
+ 'blocklist': [
73
+ {'path': '/workspaces/config', 'method': 'POST'},
74
+ {'path': '/workspaces/update', 'method': 'POST'}
75
+ ]
76
+ }
77
+ }
78
+ }
79
+ """
80
+ # Get all roles from the config
81
+ config_permissions = skypilot_config.get_nested(('rbac', 'roles'),
82
+ default_value={})
83
+ supported_roles = get_supported_roles()
84
+ for role, permissions in config_permissions.items():
85
+ role_name = role.lower()
86
+ if role_name not in supported_roles:
87
+ logger.warning(f'Invalid role: {role_name}')
88
+ continue
89
+ config_permissions[role_name] = permissions
90
+ # Add default roles if not present
91
+ if 'user' not in config_permissions:
92
+ config_permissions['user'] = {
93
+ 'permissions': {
94
+ 'blocklist': _DEFAULT_USER_BLOCKLIST
95
+ }
96
+ }
97
+ return config_permissions
98
+
99
+
100
+ def get_workspace_policy_permissions() -> Dict[str, List[str]]:
101
+ """Get workspace policy permissions from config.
102
+
103
+ Returns:
104
+ A dictionary of workspace policy permissions.
105
+ Example:
106
+ {
107
+ 'workspace1': ['user1-id', 'user2-id'],
108
+ 'workspace2': ['user3-id', 'user4-id']
109
+ 'default': ['*']
110
+ }
111
+ """
112
+ current_workspaces = skypilot_config.get_nested(('workspaces',),
113
+ default_value={})
114
+ if constants.SKYPILOT_DEFAULT_WORKSPACE not in current_workspaces:
115
+ current_workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
116
+ workspaces_to_policy = {}
117
+ for workspace_name, workspace_config in current_workspaces.items():
118
+ users = workspaces_utils.get_workspace_users(workspace_config)
119
+ workspaces_to_policy[workspace_name] = users
120
+ logger.debug(f'Workspace policy permissions: {workspaces_to_policy}')
121
+ return workspaces_to_policy