skypilot-nightly 1.0.0.dev20250502__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. sky/__init__.py +22 -6
  2. sky/adaptors/aws.py +81 -16
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/hyperbolic.py +8 -0
  8. sky/adaptors/ibm.py +5 -2
  9. sky/adaptors/kubernetes.py +149 -18
  10. sky/adaptors/nebius.py +173 -30
  11. sky/adaptors/primeintellect.py +1 -0
  12. sky/adaptors/runpod.py +68 -0
  13. sky/adaptors/seeweb.py +183 -0
  14. sky/adaptors/shadeform.py +89 -0
  15. sky/admin_policy.py +187 -4
  16. sky/authentication.py +179 -225
  17. sky/backends/__init__.py +4 -2
  18. sky/backends/backend.py +22 -9
  19. sky/backends/backend_utils.py +1323 -397
  20. sky/backends/cloud_vm_ray_backend.py +1749 -1029
  21. sky/backends/docker_utils.py +1 -1
  22. sky/backends/local_docker_backend.py +11 -6
  23. sky/backends/task_codegen.py +633 -0
  24. sky/backends/wheel_utils.py +55 -9
  25. sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
  26. sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
  27. sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
  28. sky/{clouds/service_catalog → catalog}/common.py +90 -49
  29. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
  30. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  31. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +116 -80
  32. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
  33. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +70 -16
  34. sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
  35. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
  36. sky/catalog/data_fetchers/fetch_nebius.py +338 -0
  37. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  38. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  39. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  40. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
  41. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  42. sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
  43. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
  44. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
  45. sky/catalog/hyperbolic_catalog.py +136 -0
  46. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
  47. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
  48. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
  49. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
  50. sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
  51. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
  52. sky/catalog/primeintellect_catalog.py +95 -0
  53. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
  54. sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
  55. sky/catalog/seeweb_catalog.py +184 -0
  56. sky/catalog/shadeform_catalog.py +165 -0
  57. sky/catalog/ssh_catalog.py +167 -0
  58. sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
  59. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
  60. sky/check.py +533 -185
  61. sky/cli.py +5 -5975
  62. sky/client/{cli.py → cli/command.py} +2591 -1956
  63. sky/client/cli/deprecation_utils.py +99 -0
  64. sky/client/cli/flags.py +359 -0
  65. sky/client/cli/table_utils.py +322 -0
  66. sky/client/cli/utils.py +79 -0
  67. sky/client/common.py +78 -32
  68. sky/client/oauth.py +82 -0
  69. sky/client/sdk.py +1219 -319
  70. sky/client/sdk_async.py +827 -0
  71. sky/client/service_account_auth.py +47 -0
  72. sky/cloud_stores.py +82 -3
  73. sky/clouds/__init__.py +13 -0
  74. sky/clouds/aws.py +564 -164
  75. sky/clouds/azure.py +105 -83
  76. sky/clouds/cloud.py +140 -40
  77. sky/clouds/cudo.py +68 -50
  78. sky/clouds/do.py +66 -48
  79. sky/clouds/fluidstack.py +63 -44
  80. sky/clouds/gcp.py +339 -110
  81. sky/clouds/hyperbolic.py +293 -0
  82. sky/clouds/ibm.py +70 -49
  83. sky/clouds/kubernetes.py +570 -162
  84. sky/clouds/lambda_cloud.py +74 -54
  85. sky/clouds/nebius.py +210 -81
  86. sky/clouds/oci.py +88 -66
  87. sky/clouds/paperspace.py +61 -44
  88. sky/clouds/primeintellect.py +317 -0
  89. sky/clouds/runpod.py +164 -74
  90. sky/clouds/scp.py +89 -86
  91. sky/clouds/seeweb.py +477 -0
  92. sky/clouds/shadeform.py +400 -0
  93. sky/clouds/ssh.py +263 -0
  94. sky/clouds/utils/aws_utils.py +10 -4
  95. sky/clouds/utils/gcp_utils.py +87 -11
  96. sky/clouds/utils/oci_utils.py +38 -14
  97. sky/clouds/utils/scp_utils.py +231 -167
  98. sky/clouds/vast.py +99 -77
  99. sky/clouds/vsphere.py +51 -40
  100. sky/core.py +375 -173
  101. sky/dag.py +15 -0
  102. sky/dashboard/out/404.html +1 -1
  103. sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
  104. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  105. sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
  106. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  107. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  110. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  111. sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
  112. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  113. sky/dashboard/out/_next/static/chunks/3800-7b45f9fbb6308557.js +1 -0
  114. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  115. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  116. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  117. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  118. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  119. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  120. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  121. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  122. sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
  123. sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
  124. sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
  125. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  126. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  127. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  128. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  129. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  130. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  131. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  132. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  133. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  134. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  135. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  136. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  137. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  138. sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
  139. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  140. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
  141. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
  142. sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
  143. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  144. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  145. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
  146. sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
  147. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
  148. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
  149. sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
  150. sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
  151. sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
  152. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  153. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-84a40f8c7c627fe4.js +1 -0
  154. sky/dashboard/out/_next/static/chunks/pages/workspaces-531b2f8c4bf89f82.js +1 -0
  155. sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
  156. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  157. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  158. sky/dashboard/out/clusters/[cluster].html +1 -1
  159. sky/dashboard/out/clusters.html +1 -1
  160. sky/dashboard/out/config.html +1 -0
  161. sky/dashboard/out/index.html +1 -1
  162. sky/dashboard/out/infra/[context].html +1 -0
  163. sky/dashboard/out/infra.html +1 -0
  164. sky/dashboard/out/jobs/[job].html +1 -1
  165. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  166. sky/dashboard/out/jobs.html +1 -1
  167. sky/dashboard/out/users.html +1 -0
  168. sky/dashboard/out/volumes.html +1 -0
  169. sky/dashboard/out/workspace/new.html +1 -0
  170. sky/dashboard/out/workspaces/[name].html +1 -0
  171. sky/dashboard/out/workspaces.html +1 -0
  172. sky/data/data_utils.py +137 -1
  173. sky/data/mounting_utils.py +269 -84
  174. sky/data/storage.py +1460 -1807
  175. sky/data/storage_utils.py +43 -57
  176. sky/exceptions.py +126 -2
  177. sky/execution.py +216 -63
  178. sky/global_user_state.py +2390 -586
  179. sky/jobs/__init__.py +7 -0
  180. sky/jobs/client/sdk.py +300 -58
  181. sky/jobs/client/sdk_async.py +161 -0
  182. sky/jobs/constants.py +15 -8
  183. sky/jobs/controller.py +848 -275
  184. sky/jobs/file_content_utils.py +128 -0
  185. sky/jobs/log_gc.py +193 -0
  186. sky/jobs/recovery_strategy.py +402 -152
  187. sky/jobs/scheduler.py +314 -189
  188. sky/jobs/server/core.py +836 -255
  189. sky/jobs/server/server.py +156 -115
  190. sky/jobs/server/utils.py +136 -0
  191. sky/jobs/state.py +2109 -706
  192. sky/jobs/utils.py +1306 -215
  193. sky/logs/__init__.py +21 -0
  194. sky/logs/agent.py +108 -0
  195. sky/logs/aws.py +243 -0
  196. sky/logs/gcp.py +91 -0
  197. sky/metrics/__init__.py +0 -0
  198. sky/metrics/utils.py +453 -0
  199. sky/models.py +78 -1
  200. sky/optimizer.py +164 -70
  201. sky/provision/__init__.py +90 -4
  202. sky/provision/aws/config.py +147 -26
  203. sky/provision/aws/instance.py +136 -50
  204. sky/provision/azure/instance.py +11 -6
  205. sky/provision/common.py +13 -1
  206. sky/provision/cudo/cudo_machine_type.py +1 -1
  207. sky/provision/cudo/cudo_utils.py +14 -8
  208. sky/provision/cudo/cudo_wrapper.py +72 -71
  209. sky/provision/cudo/instance.py +10 -6
  210. sky/provision/do/instance.py +10 -6
  211. sky/provision/do/utils.py +4 -3
  212. sky/provision/docker_utils.py +140 -33
  213. sky/provision/fluidstack/instance.py +13 -8
  214. sky/provision/gcp/__init__.py +1 -0
  215. sky/provision/gcp/config.py +301 -19
  216. sky/provision/gcp/constants.py +218 -0
  217. sky/provision/gcp/instance.py +36 -8
  218. sky/provision/gcp/instance_utils.py +18 -4
  219. sky/provision/gcp/volume_utils.py +247 -0
  220. sky/provision/hyperbolic/__init__.py +12 -0
  221. sky/provision/hyperbolic/config.py +10 -0
  222. sky/provision/hyperbolic/instance.py +437 -0
  223. sky/provision/hyperbolic/utils.py +373 -0
  224. sky/provision/instance_setup.py +101 -20
  225. sky/provision/kubernetes/__init__.py +5 -0
  226. sky/provision/kubernetes/config.py +9 -52
  227. sky/provision/kubernetes/constants.py +17 -0
  228. sky/provision/kubernetes/instance.py +919 -280
  229. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  230. sky/provision/kubernetes/network.py +27 -17
  231. sky/provision/kubernetes/network_utils.py +44 -43
  232. sky/provision/kubernetes/utils.py +1221 -534
  233. sky/provision/kubernetes/volume.py +343 -0
  234. sky/provision/lambda_cloud/instance.py +22 -16
  235. sky/provision/nebius/constants.py +50 -0
  236. sky/provision/nebius/instance.py +19 -6
  237. sky/provision/nebius/utils.py +237 -137
  238. sky/provision/oci/instance.py +10 -5
  239. sky/provision/paperspace/instance.py +10 -7
  240. sky/provision/paperspace/utils.py +1 -1
  241. sky/provision/primeintellect/__init__.py +10 -0
  242. sky/provision/primeintellect/config.py +11 -0
  243. sky/provision/primeintellect/instance.py +454 -0
  244. sky/provision/primeintellect/utils.py +398 -0
  245. sky/provision/provisioner.py +117 -36
  246. sky/provision/runpod/__init__.py +5 -0
  247. sky/provision/runpod/instance.py +27 -6
  248. sky/provision/runpod/utils.py +51 -18
  249. sky/provision/runpod/volume.py +214 -0
  250. sky/provision/scp/__init__.py +15 -0
  251. sky/provision/scp/config.py +93 -0
  252. sky/provision/scp/instance.py +707 -0
  253. sky/provision/seeweb/__init__.py +11 -0
  254. sky/provision/seeweb/config.py +13 -0
  255. sky/provision/seeweb/instance.py +812 -0
  256. sky/provision/shadeform/__init__.py +11 -0
  257. sky/provision/shadeform/config.py +12 -0
  258. sky/provision/shadeform/instance.py +351 -0
  259. sky/provision/shadeform/shadeform_utils.py +83 -0
  260. sky/provision/ssh/__init__.py +18 -0
  261. sky/provision/vast/instance.py +13 -8
  262. sky/provision/vast/utils.py +10 -7
  263. sky/provision/volume.py +164 -0
  264. sky/provision/vsphere/common/ssl_helper.py +1 -1
  265. sky/provision/vsphere/common/vapiconnect.py +2 -1
  266. sky/provision/vsphere/common/vim_utils.py +4 -4
  267. sky/provision/vsphere/instance.py +15 -10
  268. sky/provision/vsphere/vsphere_utils.py +17 -20
  269. sky/py.typed +0 -0
  270. sky/resources.py +845 -119
  271. sky/schemas/__init__.py +0 -0
  272. sky/schemas/api/__init__.py +0 -0
  273. sky/schemas/api/responses.py +227 -0
  274. sky/schemas/db/README +4 -0
  275. sky/schemas/db/env.py +90 -0
  276. sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
  277. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  278. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  279. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  280. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  281. sky/schemas/db/global_user_state/006_provision_log.py +41 -0
  282. sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
  283. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  284. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  285. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  286. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  287. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  288. sky/schemas/db/script.py.mako +28 -0
  289. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  290. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  291. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  292. sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
  293. sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
  294. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  295. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  296. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  297. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  298. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  299. sky/schemas/generated/__init__.py +0 -0
  300. sky/schemas/generated/autostopv1_pb2.py +36 -0
  301. sky/schemas/generated/autostopv1_pb2.pyi +43 -0
  302. sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
  303. sky/schemas/generated/jobsv1_pb2.py +86 -0
  304. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  305. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  306. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  307. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  308. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  309. sky/schemas/generated/servev1_pb2.py +58 -0
  310. sky/schemas/generated/servev1_pb2.pyi +115 -0
  311. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  312. sky/serve/autoscalers.py +357 -5
  313. sky/serve/client/impl.py +310 -0
  314. sky/serve/client/sdk.py +47 -139
  315. sky/serve/client/sdk_async.py +130 -0
  316. sky/serve/constants.py +12 -9
  317. sky/serve/controller.py +68 -17
  318. sky/serve/load_balancer.py +106 -60
  319. sky/serve/load_balancing_policies.py +116 -2
  320. sky/serve/replica_managers.py +434 -249
  321. sky/serve/serve_rpc_utils.py +179 -0
  322. sky/serve/serve_state.py +569 -257
  323. sky/serve/serve_utils.py +775 -265
  324. sky/serve/server/core.py +66 -711
  325. sky/serve/server/impl.py +1093 -0
  326. sky/serve/server/server.py +21 -18
  327. sky/serve/service.py +192 -89
  328. sky/serve/service_spec.py +144 -20
  329. sky/serve/spot_placer.py +3 -0
  330. sky/server/auth/__init__.py +0 -0
  331. sky/server/auth/authn.py +50 -0
  332. sky/server/auth/loopback.py +38 -0
  333. sky/server/auth/oauth2_proxy.py +202 -0
  334. sky/server/common.py +478 -182
  335. sky/server/config.py +85 -23
  336. sky/server/constants.py +44 -6
  337. sky/server/daemons.py +295 -0
  338. sky/server/html/token_page.html +185 -0
  339. sky/server/metrics.py +160 -0
  340. sky/server/middleware_utils.py +166 -0
  341. sky/server/requests/executor.py +558 -138
  342. sky/server/requests/payloads.py +364 -24
  343. sky/server/requests/preconditions.py +21 -17
  344. sky/server/requests/process.py +112 -29
  345. sky/server/requests/request_names.py +121 -0
  346. sky/server/requests/requests.py +822 -226
  347. sky/server/requests/serializers/decoders.py +82 -31
  348. sky/server/requests/serializers/encoders.py +140 -22
  349. sky/server/requests/threads.py +117 -0
  350. sky/server/rest.py +455 -0
  351. sky/server/server.py +1309 -285
  352. sky/server/state.py +20 -0
  353. sky/server/stream_utils.py +327 -61
  354. sky/server/uvicorn.py +217 -3
  355. sky/server/versions.py +270 -0
  356. sky/setup_files/MANIFEST.in +11 -1
  357. sky/setup_files/alembic.ini +160 -0
  358. sky/setup_files/dependencies.py +139 -31
  359. sky/setup_files/setup.py +44 -42
  360. sky/sky_logging.py +114 -7
  361. sky/skylet/attempt_skylet.py +106 -24
  362. sky/skylet/autostop_lib.py +129 -8
  363. sky/skylet/configs.py +29 -20
  364. sky/skylet/constants.py +216 -25
  365. sky/skylet/events.py +101 -21
  366. sky/skylet/job_lib.py +345 -164
  367. sky/skylet/log_lib.py +297 -18
  368. sky/skylet/log_lib.pyi +44 -1
  369. sky/skylet/providers/ibm/node_provider.py +12 -8
  370. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  371. sky/skylet/ray_patches/__init__.py +17 -3
  372. sky/skylet/ray_patches/autoscaler.py.diff +18 -0
  373. sky/skylet/ray_patches/cli.py.diff +19 -0
  374. sky/skylet/ray_patches/command_runner.py.diff +17 -0
  375. sky/skylet/ray_patches/log_monitor.py.diff +20 -0
  376. sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
  377. sky/skylet/ray_patches/updater.py.diff +18 -0
  378. sky/skylet/ray_patches/worker.py.diff +41 -0
  379. sky/skylet/runtime_utils.py +21 -0
  380. sky/skylet/services.py +568 -0
  381. sky/skylet/skylet.py +72 -4
  382. sky/skylet/subprocess_daemon.py +104 -29
  383. sky/skypilot_config.py +506 -99
  384. sky/ssh_node_pools/__init__.py +1 -0
  385. sky/ssh_node_pools/core.py +135 -0
  386. sky/ssh_node_pools/server.py +233 -0
  387. sky/task.py +685 -163
  388. sky/templates/aws-ray.yml.j2 +11 -3
  389. sky/templates/azure-ray.yml.j2 +2 -1
  390. sky/templates/cudo-ray.yml.j2 +1 -0
  391. sky/templates/do-ray.yml.j2 +2 -1
  392. sky/templates/fluidstack-ray.yml.j2 +1 -0
  393. sky/templates/gcp-ray.yml.j2 +62 -1
  394. sky/templates/hyperbolic-ray.yml.j2 +68 -0
  395. sky/templates/ibm-ray.yml.j2 +2 -1
  396. sky/templates/jobs-controller.yaml.j2 +27 -24
  397. sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
  398. sky/templates/kubernetes-ray.yml.j2 +611 -50
  399. sky/templates/lambda-ray.yml.j2 +2 -1
  400. sky/templates/nebius-ray.yml.j2 +34 -12
  401. sky/templates/oci-ray.yml.j2 +1 -0
  402. sky/templates/paperspace-ray.yml.j2 +2 -1
  403. sky/templates/primeintellect-ray.yml.j2 +72 -0
  404. sky/templates/runpod-ray.yml.j2 +10 -1
  405. sky/templates/scp-ray.yml.j2 +4 -50
  406. sky/templates/seeweb-ray.yml.j2 +171 -0
  407. sky/templates/shadeform-ray.yml.j2 +73 -0
  408. sky/templates/sky-serve-controller.yaml.j2 +22 -2
  409. sky/templates/vast-ray.yml.j2 +1 -0
  410. sky/templates/vsphere-ray.yml.j2 +1 -0
  411. sky/templates/websocket_proxy.py +212 -37
  412. sky/usage/usage_lib.py +31 -15
  413. sky/users/__init__.py +0 -0
  414. sky/users/model.conf +15 -0
  415. sky/users/permission.py +397 -0
  416. sky/users/rbac.py +121 -0
  417. sky/users/server.py +720 -0
  418. sky/users/token_service.py +218 -0
  419. sky/utils/accelerator_registry.py +35 -5
  420. sky/utils/admin_policy_utils.py +84 -38
  421. sky/utils/annotations.py +38 -5
  422. sky/utils/asyncio_utils.py +78 -0
  423. sky/utils/atomic.py +1 -1
  424. sky/utils/auth_utils.py +153 -0
  425. sky/utils/benchmark_utils.py +60 -0
  426. sky/utils/cli_utils/status_utils.py +159 -86
  427. sky/utils/cluster_utils.py +31 -9
  428. sky/utils/command_runner.py +354 -68
  429. sky/utils/command_runner.pyi +93 -3
  430. sky/utils/common.py +35 -8
  431. sky/utils/common_utils.py +314 -91
  432. sky/utils/config_utils.py +74 -5
  433. sky/utils/context.py +403 -0
  434. sky/utils/context_utils.py +242 -0
  435. sky/utils/controller_utils.py +383 -89
  436. sky/utils/dag_utils.py +31 -12
  437. sky/utils/db/__init__.py +0 -0
  438. sky/utils/db/db_utils.py +485 -0
  439. sky/utils/db/kv_cache.py +149 -0
  440. sky/utils/db/migration_utils.py +137 -0
  441. sky/utils/directory_utils.py +12 -0
  442. sky/utils/env_options.py +13 -0
  443. sky/utils/git.py +567 -0
  444. sky/utils/git_clone.sh +460 -0
  445. sky/utils/infra_utils.py +195 -0
  446. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  447. sky/utils/kubernetes/config_map_utils.py +133 -0
  448. sky/utils/kubernetes/create_cluster.sh +15 -29
  449. sky/utils/kubernetes/delete_cluster.sh +10 -7
  450. sky/utils/kubernetes/deploy_ssh_node_pools.py +1177 -0
  451. sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
  452. sky/utils/kubernetes/generate_kind_config.py +6 -66
  453. sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
  454. sky/utils/kubernetes/gpu_labeler.py +18 -8
  455. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  456. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  457. sky/utils/kubernetes/kubernetes_deploy_utils.py +284 -114
  458. sky/utils/kubernetes/rsync_helper.sh +11 -3
  459. sky/utils/kubernetes/ssh-tunnel.sh +379 -0
  460. sky/utils/kubernetes/ssh_utils.py +221 -0
  461. sky/utils/kubernetes_enums.py +8 -15
  462. sky/utils/lock_events.py +94 -0
  463. sky/utils/locks.py +416 -0
  464. sky/utils/log_utils.py +82 -107
  465. sky/utils/perf_utils.py +22 -0
  466. sky/utils/resource_checker.py +298 -0
  467. sky/utils/resources_utils.py +249 -32
  468. sky/utils/rich_utils.py +217 -39
  469. sky/utils/schemas.py +955 -160
  470. sky/utils/serialize_utils.py +16 -0
  471. sky/utils/status_lib.py +10 -0
  472. sky/utils/subprocess_utils.py +29 -15
  473. sky/utils/tempstore.py +70 -0
  474. sky/utils/thread_utils.py +91 -0
  475. sky/utils/timeline.py +26 -53
  476. sky/utils/ux_utils.py +84 -15
  477. sky/utils/validator.py +11 -1
  478. sky/utils/volume.py +165 -0
  479. sky/utils/yaml_utils.py +111 -0
  480. sky/volumes/__init__.py +13 -0
  481. sky/volumes/client/__init__.py +0 -0
  482. sky/volumes/client/sdk.py +150 -0
  483. sky/volumes/server/__init__.py +0 -0
  484. sky/volumes/server/core.py +270 -0
  485. sky/volumes/server/server.py +124 -0
  486. sky/volumes/volume.py +215 -0
  487. sky/workspaces/__init__.py +0 -0
  488. sky/workspaces/core.py +655 -0
  489. sky/workspaces/server.py +101 -0
  490. sky/workspaces/utils.py +56 -0
  491. sky_templates/README.md +3 -0
  492. sky_templates/__init__.py +3 -0
  493. sky_templates/ray/__init__.py +0 -0
  494. sky_templates/ray/start_cluster +183 -0
  495. sky_templates/ray/stop_cluster +75 -0
  496. skypilot_nightly-1.0.0.dev20251203.dist-info/METADATA +676 -0
  497. skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
  498. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +1 -1
  499. skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
  500. sky/benchmark/benchmark_state.py +0 -256
  501. sky/benchmark/benchmark_utils.py +0 -641
  502. sky/clouds/service_catalog/constants.py +0 -7
  503. sky/dashboard/out/_next/static/GWvVBSCS7FmUiVmjaL1a7/_buildManifest.js +0 -1
  504. sky/dashboard/out/_next/static/chunks/236-2db3ee3fba33dd9e.js +0 -6
  505. sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
  506. sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
  507. sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
  508. sky/dashboard/out/_next/static/chunks/845-9e60713e0c441abc.js +0 -1
  509. sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
  510. sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
  511. sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
  512. sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
  513. sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
  514. sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
  515. sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
  516. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6ac338bc2239cb45.js +0 -1
  517. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
  518. sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
  519. sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
  520. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-1c519e1afc523dc9.js +0 -1
  521. sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
  522. sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
  523. sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
  524. sky/jobs/dashboard/dashboard.py +0 -223
  525. sky/jobs/dashboard/static/favicon.ico +0 -0
  526. sky/jobs/dashboard/templates/index.html +0 -831
  527. sky/jobs/server/dashboard_utils.py +0 -69
  528. sky/skylet/providers/scp/__init__.py +0 -2
  529. sky/skylet/providers/scp/config.py +0 -149
  530. sky/skylet/providers/scp/node_provider.py +0 -578
  531. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  532. sky/utils/db_utils.py +0 -100
  533. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  534. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  535. skypilot_nightly-1.0.0.dev20250502.dist-info/METADATA +0 -361
  536. skypilot_nightly-1.0.0.dev20250502.dist-info/RECORD +0 -396
  537. skypilot_nightly-1.0.0.dev20250502.dist-info/top_level.txt +0 -1
  538. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  539. /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
  540. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  541. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  542. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  543. /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
  544. /sky/dashboard/out/_next/static/{GWvVBSCS7FmUiVmjaL1a7 → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
  545. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
  546. {skypilot_nightly-1.0.0.dev20250502.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,397 @@
1
+ """Permission service for SkyPilot API Server."""
2
+ import contextlib
3
+ import hashlib
4
+ import logging
5
+ import os
6
+ from typing import Generator, List, Optional
7
+
8
+ import casbin
9
+ import filelock
10
+ import sqlalchemy_adapter
11
+
12
+ from sky import global_user_state
13
+ from sky import models
14
+ from sky import sky_logging
15
+ from sky.skylet import constants
16
+ from sky.users import rbac
17
+ from sky.utils import annotations
18
+ from sky.utils import common_utils
19
+ from sky.utils.db import db_utils
20
+
21
+ logging.getLogger('casbin.policy').setLevel(sky_logging.ERROR)
22
+ logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
23
+ logging.getLogger('casbin.model').setLevel(sky_logging.ERROR)
24
+ logging.getLogger('casbin.rbac').setLevel(sky_logging.ERROR)
25
+ logger = sky_logging.init_logger(__name__)
26
+
27
+ # Filelocks for the policy update.
28
+ POLICY_UPDATE_LOCK_PATH = os.path.expanduser('~/.sky/.policy_update.lock')
29
+ POLICY_UPDATE_LOCK_TIMEOUT_SECONDS = 20
30
+
31
+ _enforcer_instance: Optional['PermissionService'] = None
32
+
33
+
34
+ class PermissionService:
35
+ """Permission service for SkyPilot API Server."""
36
+
37
+ def __init__(self):
38
+ self.enforcer: Optional[casbin.Enforcer] = None
39
+
40
+ def _lazy_initialize(self):
41
+ if self.enforcer is not None:
42
+ return
43
+ with _policy_lock():
44
+ global _enforcer_instance
45
+ if _enforcer_instance is None:
46
+ engine = global_user_state.initialize_and_get_db()
47
+ db_utils.add_all_tables_to_db_sqlalchemy(
48
+ sqlalchemy_adapter.Base.metadata, engine)
49
+ adapter = sqlalchemy_adapter.Adapter(
50
+ engine, db_class=sqlalchemy_adapter.CasbinRule)
51
+ model_path = os.path.join(os.path.dirname(__file__),
52
+ 'model.conf')
53
+ enforcer = casbin.Enforcer(model_path, adapter)
54
+ self.enforcer = enforcer
55
+ # Only set the enforcer instance once the enforcer
56
+ # is successfully initialized, if we change it and then fail
57
+ # we will set it to None and all subsequent calls will fail.
58
+ _enforcer_instance = self
59
+ self._maybe_initialize_policies()
60
+ self._maybe_initialize_basic_auth_user()
61
+ else:
62
+ assert _enforcer_instance is not None
63
+ self.enforcer = _enforcer_instance.enforcer
64
+
65
+ def _ensure_enforcer(self) -> casbin.Enforcer:
66
+ """Ensure enforcer is initialized and return it."""
67
+ self._lazy_initialize()
68
+ assert self.enforcer is not None, (
69
+ 'Enforcer should be initialized after _lazy_initialize()')
70
+ return self.enforcer
71
+
72
+ def _maybe_initialize_basic_auth_user(self) -> None:
73
+ """Initialize basic auth user if it is enabled."""
74
+ basic_auth = os.environ.get(constants.SKYPILOT_INITIAL_BASIC_AUTH)
75
+ if not basic_auth:
76
+ return
77
+ username, password = basic_auth.split(':', 1)
78
+ if username and password:
79
+ user_hash = hashlib.md5(
80
+ username.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
81
+ user_info = global_user_state.get_user(user_hash)
82
+ if user_info:
83
+ logger.debug(f'Basic auth user {username} already exists')
84
+ return
85
+ global_user_state.add_or_update_user(
86
+ models.User(id=user_hash, name=username, password=password))
87
+ enforcer = self._ensure_enforcer()
88
+ enforcer.add_grouping_policy(user_hash, rbac.RoleName.ADMIN.value)
89
+ enforcer.save_policy()
90
+ logger.info(f'Basic auth user {username} initialized')
91
+
92
+ def _maybe_initialize_policies(self) -> None:
93
+ """Initialize policies if they don't already exist."""
94
+ logger.debug(f'Initializing policies in process: {os.getpid()}')
95
+ self._load_policy_no_lock()
96
+
97
+ policy_updated = False
98
+
99
+ # Check if policies are already initialized by looking for existing
100
+ # permission policies in the enforcer
101
+ enforcer = self._ensure_enforcer()
102
+ existing_policies = enforcer.get_policy()
103
+
104
+ # If we already have policies for the expected roles, skip
105
+ # initialization
106
+ role_permissions = rbac.get_role_permissions()
107
+ expected_policies = []
108
+ for role, permissions in role_permissions.items():
109
+ if permissions['permissions'] and 'blocklist' in permissions[
110
+ 'permissions']:
111
+ blocklist = permissions['permissions']['blocklist']
112
+ for item in blocklist:
113
+ expected_policies.append(
114
+ [role, item['path'], item['method']])
115
+
116
+ # Add workspace policy
117
+ workspace_policy_permissions = rbac.get_workspace_policy_permissions()
118
+ logger.debug(f'Workspace policy permissions from config: '
119
+ f'{workspace_policy_permissions}')
120
+
121
+ for workspace_name, users in workspace_policy_permissions.items():
122
+ for user in users:
123
+ expected_policies.append([user, workspace_name, '*'])
124
+ logger.debug(f'Expected workspace policy: user={user}, '
125
+ f'workspace={workspace_name}')
126
+
127
+ # Check if all expected policies already exist
128
+ policies_exist = all(
129
+ any(policy == expected
130
+ for policy in existing_policies)
131
+ for expected in expected_policies)
132
+
133
+ if not policies_exist:
134
+ # Only clear and reinitialize if policies don't exist or are
135
+ # incomplete
136
+ logger.debug('Policies not found or incomplete, initializing...')
137
+ # Only clear p policies (permission policies),
138
+ # keep g policies (role policies)
139
+ enforcer.remove_filtered_policy(0)
140
+ for role, permissions in role_permissions.items():
141
+ if permissions['permissions'] and 'blocklist' in permissions[
142
+ 'permissions']:
143
+ blocklist = permissions['permissions']['blocklist']
144
+ for item in blocklist:
145
+ path = item['path']
146
+ method = item['method']
147
+ logger.debug(f'Adding role policy: role={role}, '
148
+ f'path={path}, method={method}')
149
+ enforcer.add_policy(role, path, method)
150
+ policy_updated = True
151
+
152
+ for workspace_name, users in workspace_policy_permissions.items():
153
+ for user in users:
154
+ logger.debug(f'Initializing workspace policy: user={user}, '
155
+ f'workspace={workspace_name}')
156
+ enforcer.add_policy(user, workspace_name, '*')
157
+ policy_updated = True
158
+ logger.debug('Policies initialized successfully')
159
+ else:
160
+ logger.debug('Policies already exist, skipping initialization')
161
+
162
+ # Always ensure users have default roles (this is idempotent)
163
+ all_users = global_user_state.get_all_users()
164
+ for existing_user in all_users:
165
+ user_added = self._add_user_if_not_exists_no_lock(existing_user.id)
166
+ policy_updated = policy_updated or user_added
167
+
168
+ if policy_updated:
169
+ enforcer.save_policy()
170
+
171
+ def add_user_if_not_exists(self, user_id: str) -> None:
172
+ """Add user role relationship."""
173
+ self._lazy_initialize()
174
+ with _policy_lock():
175
+ self._add_user_if_not_exists_no_lock(user_id)
176
+
177
+ def _add_user_if_not_exists_no_lock(self, user_id: str) -> bool:
178
+ """Add user role relationship without lock.
179
+
180
+ Returns:
181
+ True if the user was added, False otherwise.
182
+ """
183
+ enforcer = self._ensure_enforcer()
184
+ user_roles = enforcer.get_roles_for_user(user_id)
185
+ if not user_roles:
186
+ enforcer.add_grouping_policy(user_id, rbac.get_default_role())
187
+ return True
188
+ return False
189
+
190
+ def delete_user(self, user_id: str) -> None:
191
+ """Delete user role relationship."""
192
+ with _policy_lock():
193
+ # Get current roles
194
+ self._load_policy_no_lock()
195
+ # Avoid calling get_user_roles, as it will require the lock.
196
+ enforcer = self._ensure_enforcer()
197
+ current_roles = enforcer.get_roles_for_user(user_id)
198
+ if not current_roles:
199
+ logger.debug(f'User {user_id} has no roles')
200
+ return
201
+ enforcer.remove_grouping_policy(user_id, current_roles[0])
202
+ enforcer.save_policy()
203
+
204
+ def update_role(self, user_id: str, new_role: str) -> None:
205
+ """Update user role relationship."""
206
+ with _policy_lock():
207
+ # Get current roles
208
+ self._load_policy_no_lock()
209
+ # Avoid calling get_user_roles, as it will require the lock.
210
+ enforcer = self._ensure_enforcer()
211
+ current_roles = enforcer.get_roles_for_user(user_id)
212
+ if not current_roles:
213
+ logger.debug(f'User {user_id} has no roles')
214
+ else:
215
+ # TODO(hailong): how to handle multiple roles?
216
+ current_role = current_roles[0]
217
+ if current_role == new_role:
218
+ logger.debug(f'User {user_id} already has role {new_role}')
219
+ return
220
+ enforcer.remove_grouping_policy(user_id, current_role)
221
+
222
+ # Update user role
223
+ enforcer.add_grouping_policy(user_id, new_role)
224
+ enforcer.save_policy()
225
+
226
+ def get_user_roles(self, user_id: str) -> List[str]:
227
+ """Get all roles for a user.
228
+
229
+ This method returns all roles that the user has, including inherited
230
+ roles. For example, if a user has role 'admin' and 'admin' inherits
231
+ from 'user', this method will return ['admin', 'user'].
232
+
233
+ Args:
234
+ user: The user ID to get roles for.
235
+
236
+ Returns:
237
+ A list of role names that the user has.
238
+ """
239
+ self._load_policy_no_lock()
240
+ enforcer = self._ensure_enforcer()
241
+ return enforcer.get_roles_for_user(user_id)
242
+
243
+ def get_users_for_role(self, role: str) -> List[str]:
244
+ """Get all users for a role."""
245
+ self._load_policy_no_lock()
246
+ enforcer = self._ensure_enforcer()
247
+ return enforcer.get_users_for_role(role)
248
+
249
+ def check_endpoint_permission(self, user_id: str, path: str,
250
+ method: str) -> bool:
251
+ """Check permission."""
252
+ # We intentionally don't load the policy here, as it is a hot path, and
253
+ # we don't support updating the policy.
254
+ # We don't hold the lock for checking permission, as it is read only and
255
+ # it is a hot path in every request. It is ok to have a stale policy,
256
+ # as long as it is eventually consistent.
257
+ # self._load_policy_no_lock()
258
+ enforcer = self._ensure_enforcer()
259
+ return enforcer.enforce(user_id, path, method)
260
+
261
+ def _load_policy_no_lock(self):
262
+ """Load policy from storage."""
263
+ enforcer = self._ensure_enforcer()
264
+ enforcer.load_policy()
265
+
266
+ def load_policy(self):
267
+ """Load policy from storage with lock."""
268
+ with _policy_lock():
269
+ self._load_policy_no_lock()
270
+
271
+ # Right now, not a lot of users are using multiple workspaces,
272
+ # so 5 should be more than enough.
273
+ @annotations.lru_cache(scope='request', maxsize=5)
274
+ def check_workspace_permission(self, user_id: str,
275
+ workspace_name: str) -> bool:
276
+ """Check workspace permission.
277
+
278
+ This method checks if a user has permission to access a specific
279
+ workspace.
280
+
281
+ For private workspaces, the user must have explicit permission.
282
+
283
+ For public workspaces, the permission is granted via a wildcard policy
284
+ ('*').
285
+ """
286
+ if os.getenv(constants.ENV_VAR_IS_SKYPILOT_SERVER) is None:
287
+ # When it is not on API server, we allow all users to access all
288
+ # workspaces, as the workspace check has been done on API server.
289
+ return True
290
+ role = self.get_user_roles(user_id)
291
+ if rbac.RoleName.ADMIN.value in role:
292
+ return True
293
+ # The Casbin model matcher already handles the wildcard '*' case:
294
+ # m = (g(r.sub, p.sub)|| p.sub == '*') && r.obj == p.obj &&
295
+ # r.act == p.act
296
+ # This means if there's a policy ('*', workspace_name, '*'), it will
297
+ # match any user
298
+ enforcer = self._ensure_enforcer()
299
+ result = enforcer.enforce(user_id, workspace_name, '*')
300
+ logger.debug(f'Workspace permission check: user={user_id}, '
301
+ f'workspace={workspace_name}, result={result}')
302
+ return result
303
+
304
+ def check_service_account_token_permission(self, user_id: str,
305
+ token_owner_id: str,
306
+ action: str) -> bool:
307
+ """Check service account token permission.
308
+
309
+ This method checks if a user has permission to perform an action on
310
+ a service account token owned by another user.
311
+
312
+ Args:
313
+ user_id: The ID of the user requesting the action
314
+ token_owner_id: The ID of the user who owns the token
315
+ action: The action being performed (e.g., 'delete', 'view')
316
+
317
+ Returns:
318
+ True if the user has permission, False otherwise
319
+ """
320
+ del action
321
+ # Users can always manage their own tokens
322
+ if user_id == token_owner_id:
323
+ return True
324
+
325
+ # Check if user has admin role (admins can manage any token)
326
+ user_roles = self.get_user_roles(user_id)
327
+ if rbac.RoleName.ADMIN.value in user_roles:
328
+ return True
329
+
330
+ # Regular users cannot manage tokens owned by others
331
+ return False
332
+
333
+ def add_workspace_policy(self, workspace_name: str,
334
+ users: List[str]) -> None:
335
+ """Add workspace policy.
336
+
337
+ Args:
338
+ workspace_name: Name of the workspace
339
+ users: List of user IDs that should have access.
340
+ For public workspaces, this should be ['*'].
341
+ For private workspaces, this should be specific user IDs.
342
+ """
343
+ with _policy_lock():
344
+ enforcer = self._ensure_enforcer()
345
+ for user in users:
346
+ logger.debug(f'Adding workspace policy: user={user}, '
347
+ f'workspace={workspace_name}')
348
+ enforcer.add_policy(user, workspace_name, '*')
349
+ enforcer.save_policy()
350
+
351
+ def update_workspace_policy(self, workspace_name: str,
352
+ users: List[str]) -> None:
353
+ """Update workspace policy.
354
+
355
+ Args:
356
+ workspace_name: Name of the workspace
357
+ users: List of user IDs that should have access.
358
+ For public workspaces, this should be ['*'].
359
+ For private workspaces, this should be specific user IDs.
360
+ """
361
+ with _policy_lock():
362
+ self._load_policy_no_lock()
363
+ enforcer = self._ensure_enforcer()
364
+ # Remove all existing policies for this workspace
365
+ enforcer.remove_filtered_policy(1, workspace_name)
366
+ # Add new policies
367
+ for user in users:
368
+ logger.debug(f'Updating workspace policy: user={user}, '
369
+ f'workspace={workspace_name}')
370
+ enforcer.add_policy(user, workspace_name, '*')
371
+ enforcer.save_policy()
372
+
373
+ def remove_workspace_policy(self, workspace_name: str) -> None:
374
+ """Remove workspace policy."""
375
+ with _policy_lock():
376
+ enforcer = self._ensure_enforcer()
377
+ enforcer.remove_filtered_policy(1, workspace_name)
378
+ enforcer.save_policy()
379
+
380
+
381
+ @contextlib.contextmanager
382
+ def _policy_lock() -> Generator[None, None, None]:
383
+ """Context manager for policy update lock."""
384
+ try:
385
+ with filelock.FileLock(POLICY_UPDATE_LOCK_PATH,
386
+ POLICY_UPDATE_LOCK_TIMEOUT_SECONDS):
387
+ yield
388
+ except filelock.Timeout as e:
389
+ raise RuntimeError(f'Failed to reload policy due to a timeout '
390
+ f'when trying to acquire the lock at '
391
+ f'{POLICY_UPDATE_LOCK_PATH}. '
392
+ 'Please try again or manually remove the lock '
393
+ f'file if you believe it is stale.') from e
394
+
395
+
396
+ # Singleton instance of PermissionService for other modules to use.
397
+ permission_service = PermissionService()
sky/users/rbac.py ADDED
@@ -0,0 +1,121 @@
1
+ """RBAC (Role-Based Access Control) functionality for SkyPilot API Server."""
2
+
3
+ import enum
4
+ from typing import Dict, List
5
+
6
+ from sky import sky_logging
7
+ from sky import skypilot_config
8
+ from sky.skylet import constants
9
+ from sky.workspaces import utils as workspaces_utils
10
+
11
+ logger = sky_logging.init_logger(__name__)
12
+
13
+ # Default user blocklist for user role
14
+ # Cannot access workspace CUD operations
15
+ _DEFAULT_USER_BLOCKLIST = [{
16
+ 'path': '/workspaces/config',
17
+ 'method': 'POST'
18
+ }, {
19
+ 'path': '/workspaces/update',
20
+ 'method': 'POST'
21
+ }, {
22
+ 'path': '/workspaces/create',
23
+ 'method': 'POST'
24
+ }, {
25
+ 'path': '/workspaces/delete',
26
+ 'method': 'POST'
27
+ }, {
28
+ 'path': '/users/delete',
29
+ 'method': 'POST'
30
+ }, {
31
+ 'path': '/users/create',
32
+ 'method': 'POST'
33
+ }, {
34
+ 'path': '/users/import',
35
+ 'method': 'POST'
36
+ }, {
37
+ 'path': '/users/export',
38
+ 'method': 'GET'
39
+ }]
40
+
41
+
42
+ # Define roles
43
+ class RoleName(str, enum.Enum):
44
+ ADMIN = 'admin'
45
+ USER = 'user'
46
+
47
+
48
+ def get_supported_roles() -> List[str]:
49
+ return [role_name.value for role_name in RoleName]
50
+
51
+
52
+ def get_default_role() -> str:
53
+ return skypilot_config.get_nested(('rbac', 'default_role'),
54
+ default_value=RoleName.ADMIN.value)
55
+
56
+
57
+ def get_role_permissions(
58
+ ) -> Dict[str, Dict[str, Dict[str, List[Dict[str, str]]]]]:
59
+ """Get all role permissions from config.
60
+
61
+ Returns:
62
+ Dictionary containing all roles and their permissions configuration.
63
+ Example:
64
+ {
65
+ 'admin': {
66
+ 'permissions': {
67
+ 'blocklist': []
68
+ }
69
+ },
70
+ 'user': {
71
+ 'permissions': {
72
+ 'blocklist': [
73
+ {'path': '/workspaces/config', 'method': 'POST'},
74
+ {'path': '/workspaces/update', 'method': 'POST'}
75
+ ]
76
+ }
77
+ }
78
+ }
79
+ """
80
+ # Get all roles from the config
81
+ config_permissions = skypilot_config.get_nested(('rbac', 'roles'),
82
+ default_value={})
83
+ supported_roles = get_supported_roles()
84
+ for role, permissions in config_permissions.items():
85
+ role_name = role.lower()
86
+ if role_name not in supported_roles:
87
+ logger.warning(f'Invalid role: {role_name}')
88
+ continue
89
+ config_permissions[role_name] = permissions
90
+ # Add default roles if not present
91
+ if 'user' not in config_permissions:
92
+ config_permissions['user'] = {
93
+ 'permissions': {
94
+ 'blocklist': _DEFAULT_USER_BLOCKLIST
95
+ }
96
+ }
97
+ return config_permissions
98
+
99
+
100
+ def get_workspace_policy_permissions() -> Dict[str, List[str]]:
101
+ """Get workspace policy permissions from config.
102
+
103
+ Returns:
104
+ A dictionary of workspace policy permissions.
105
+ Example:
106
+ {
107
+ 'workspace1': ['user1-id', 'user2-id'],
108
+ 'workspace2': ['user3-id', 'user4-id']
109
+ 'default': ['*']
110
+ }
111
+ """
112
+ current_workspaces = skypilot_config.get_nested(('workspaces',),
113
+ default_value={})
114
+ if constants.SKYPILOT_DEFAULT_WORKSPACE not in current_workspaces:
115
+ current_workspaces[constants.SKYPILOT_DEFAULT_WORKSPACE] = {}
116
+ workspaces_to_policy = {}
117
+ for workspace_name, workspace_config in current_workspaces.items():
118
+ users = workspaces_utils.get_workspace_users(workspace_config)
119
+ workspaces_to_policy[workspace_name] = users
120
+ logger.debug(f'Workspace policy permissions: {workspaces_to_policy}')
121
+ return workspaces_to_policy