skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (429) hide show
  1. sky/__init__.py +12 -2
  2. sky/adaptors/aws.py +27 -22
  3. sky/adaptors/common.py +25 -2
  4. sky/adaptors/coreweave.py +278 -0
  5. sky/adaptors/do.py +8 -2
  6. sky/adaptors/gcp.py +11 -0
  7. sky/adaptors/ibm.py +5 -2
  8. sky/adaptors/kubernetes.py +64 -0
  9. sky/adaptors/nebius.py +3 -1
  10. sky/adaptors/primeintellect.py +1 -0
  11. sky/adaptors/seeweb.py +183 -0
  12. sky/adaptors/shadeform.py +89 -0
  13. sky/adaptors/slurm.py +478 -0
  14. sky/admin_policy.py +20 -0
  15. sky/authentication.py +157 -263
  16. sky/backends/__init__.py +3 -2
  17. sky/backends/backend.py +11 -3
  18. sky/backends/backend_utils.py +630 -185
  19. sky/backends/cloud_vm_ray_backend.py +1111 -928
  20. sky/backends/local_docker_backend.py +9 -5
  21. sky/backends/task_codegen.py +971 -0
  22. sky/backends/wheel_utils.py +18 -0
  23. sky/catalog/__init__.py +8 -3
  24. sky/catalog/aws_catalog.py +4 -0
  25. sky/catalog/common.py +19 -1
  26. sky/catalog/data_fetchers/fetch_aws.py +102 -80
  27. sky/catalog/data_fetchers/fetch_gcp.py +30 -3
  28. sky/catalog/data_fetchers/fetch_nebius.py +9 -6
  29. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  30. sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
  31. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  32. sky/catalog/kubernetes_catalog.py +36 -32
  33. sky/catalog/primeintellect_catalog.py +95 -0
  34. sky/catalog/runpod_catalog.py +5 -1
  35. sky/catalog/seeweb_catalog.py +184 -0
  36. sky/catalog/shadeform_catalog.py +165 -0
  37. sky/catalog/slurm_catalog.py +243 -0
  38. sky/check.py +87 -46
  39. sky/client/cli/command.py +1004 -434
  40. sky/client/cli/flags.py +4 -2
  41. sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
  42. sky/client/cli/utils.py +79 -0
  43. sky/client/common.py +12 -2
  44. sky/client/sdk.py +188 -65
  45. sky/client/sdk_async.py +34 -33
  46. sky/cloud_stores.py +82 -3
  47. sky/clouds/__init__.py +8 -0
  48. sky/clouds/aws.py +337 -129
  49. sky/clouds/azure.py +24 -18
  50. sky/clouds/cloud.py +47 -13
  51. sky/clouds/cudo.py +16 -13
  52. sky/clouds/do.py +9 -7
  53. sky/clouds/fluidstack.py +12 -5
  54. sky/clouds/gcp.py +14 -7
  55. sky/clouds/hyperbolic.py +12 -5
  56. sky/clouds/ibm.py +12 -5
  57. sky/clouds/kubernetes.py +80 -45
  58. sky/clouds/lambda_cloud.py +12 -5
  59. sky/clouds/nebius.py +23 -9
  60. sky/clouds/oci.py +19 -12
  61. sky/clouds/paperspace.py +4 -1
  62. sky/clouds/primeintellect.py +317 -0
  63. sky/clouds/runpod.py +85 -24
  64. sky/clouds/scp.py +12 -8
  65. sky/clouds/seeweb.py +477 -0
  66. sky/clouds/shadeform.py +400 -0
  67. sky/clouds/slurm.py +578 -0
  68. sky/clouds/ssh.py +6 -3
  69. sky/clouds/utils/scp_utils.py +61 -50
  70. sky/clouds/vast.py +43 -27
  71. sky/clouds/vsphere.py +14 -16
  72. sky/core.py +296 -195
  73. sky/dashboard/out/404.html +1 -1
  74. sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
  75. sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
  76. sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
  77. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  78. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  79. sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
  80. sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
  81. sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
  82. sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
  83. sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
  84. sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
  85. sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
  86. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  87. sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
  88. sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
  89. sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
  90. sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
  91. sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
  92. sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
  93. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  94. sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
  95. sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
  96. sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
  97. sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
  98. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
  99. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
  100. sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
  101. sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
  102. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
  103. sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
  104. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
  105. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
  106. sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
  107. sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
  108. sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
  109. sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
  110. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
  111. sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
  112. sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
  113. sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
  114. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  115. sky/dashboard/out/clusters/[cluster].html +1 -1
  116. sky/dashboard/out/clusters.html +1 -1
  117. sky/dashboard/out/config.html +1 -1
  118. sky/dashboard/out/index.html +1 -1
  119. sky/dashboard/out/infra/[context].html +1 -1
  120. sky/dashboard/out/infra.html +1 -1
  121. sky/dashboard/out/jobs/[job].html +1 -1
  122. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  123. sky/dashboard/out/jobs.html +1 -1
  124. sky/dashboard/out/plugins/[...slug].html +1 -0
  125. sky/dashboard/out/users.html +1 -1
  126. sky/dashboard/out/volumes.html +1 -1
  127. sky/dashboard/out/workspace/new.html +1 -1
  128. sky/dashboard/out/workspaces/[name].html +1 -1
  129. sky/dashboard/out/workspaces.html +1 -1
  130. sky/data/data_utils.py +92 -1
  131. sky/data/mounting_utils.py +177 -30
  132. sky/data/storage.py +200 -19
  133. sky/data/storage_utils.py +10 -45
  134. sky/exceptions.py +18 -7
  135. sky/execution.py +74 -31
  136. sky/global_user_state.py +605 -191
  137. sky/jobs/__init__.py +2 -0
  138. sky/jobs/client/sdk.py +101 -4
  139. sky/jobs/client/sdk_async.py +31 -5
  140. sky/jobs/constants.py +15 -8
  141. sky/jobs/controller.py +726 -284
  142. sky/jobs/file_content_utils.py +128 -0
  143. sky/jobs/log_gc.py +193 -0
  144. sky/jobs/recovery_strategy.py +250 -100
  145. sky/jobs/scheduler.py +271 -173
  146. sky/jobs/server/core.py +367 -114
  147. sky/jobs/server/server.py +81 -35
  148. sky/jobs/server/utils.py +89 -35
  149. sky/jobs/state.py +1498 -620
  150. sky/jobs/utils.py +771 -306
  151. sky/logs/agent.py +40 -5
  152. sky/logs/aws.py +9 -19
  153. sky/metrics/utils.py +282 -39
  154. sky/models.py +2 -0
  155. sky/optimizer.py +7 -6
  156. sky/provision/__init__.py +38 -1
  157. sky/provision/aws/config.py +34 -13
  158. sky/provision/aws/instance.py +5 -2
  159. sky/provision/azure/instance.py +5 -3
  160. sky/provision/common.py +22 -0
  161. sky/provision/cudo/instance.py +4 -3
  162. sky/provision/do/instance.py +4 -3
  163. sky/provision/docker_utils.py +112 -28
  164. sky/provision/fluidstack/instance.py +6 -5
  165. sky/provision/gcp/config.py +6 -1
  166. sky/provision/gcp/instance.py +4 -2
  167. sky/provision/hyperbolic/instance.py +4 -2
  168. sky/provision/instance_setup.py +66 -20
  169. sky/provision/kubernetes/__init__.py +2 -0
  170. sky/provision/kubernetes/config.py +7 -44
  171. sky/provision/kubernetes/constants.py +0 -1
  172. sky/provision/kubernetes/instance.py +609 -213
  173. sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
  174. sky/provision/kubernetes/network.py +12 -8
  175. sky/provision/kubernetes/network_utils.py +8 -25
  176. sky/provision/kubernetes/utils.py +422 -422
  177. sky/provision/kubernetes/volume.py +150 -18
  178. sky/provision/lambda_cloud/instance.py +16 -13
  179. sky/provision/nebius/instance.py +6 -2
  180. sky/provision/nebius/utils.py +103 -86
  181. sky/provision/oci/instance.py +4 -2
  182. sky/provision/paperspace/instance.py +4 -3
  183. sky/provision/primeintellect/__init__.py +10 -0
  184. sky/provision/primeintellect/config.py +11 -0
  185. sky/provision/primeintellect/instance.py +454 -0
  186. sky/provision/primeintellect/utils.py +398 -0
  187. sky/provision/provisioner.py +45 -15
  188. sky/provision/runpod/__init__.py +2 -0
  189. sky/provision/runpod/instance.py +4 -3
  190. sky/provision/runpod/volume.py +69 -13
  191. sky/provision/scp/instance.py +307 -130
  192. sky/provision/seeweb/__init__.py +11 -0
  193. sky/provision/seeweb/config.py +13 -0
  194. sky/provision/seeweb/instance.py +812 -0
  195. sky/provision/shadeform/__init__.py +11 -0
  196. sky/provision/shadeform/config.py +12 -0
  197. sky/provision/shadeform/instance.py +351 -0
  198. sky/provision/shadeform/shadeform_utils.py +83 -0
  199. sky/provision/slurm/__init__.py +12 -0
  200. sky/provision/slurm/config.py +13 -0
  201. sky/provision/slurm/instance.py +572 -0
  202. sky/provision/slurm/utils.py +583 -0
  203. sky/provision/vast/instance.py +9 -4
  204. sky/provision/vast/utils.py +10 -6
  205. sky/provision/volume.py +164 -0
  206. sky/provision/vsphere/common/ssl_helper.py +1 -1
  207. sky/provision/vsphere/common/vapiconnect.py +2 -1
  208. sky/provision/vsphere/common/vim_utils.py +3 -2
  209. sky/provision/vsphere/instance.py +8 -6
  210. sky/provision/vsphere/vsphere_utils.py +8 -1
  211. sky/resources.py +11 -3
  212. sky/schemas/api/responses.py +107 -6
  213. sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
  214. sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
  215. sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
  216. sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
  217. sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
  218. sky/schemas/db/serve_state/002_yaml_content.py +34 -0
  219. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  220. sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
  221. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  222. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  223. sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
  224. sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
  225. sky/schemas/generated/jobsv1_pb2.py +86 -0
  226. sky/schemas/generated/jobsv1_pb2.pyi +254 -0
  227. sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
  228. sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
  229. sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
  230. sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
  231. sky/schemas/generated/servev1_pb2.py +58 -0
  232. sky/schemas/generated/servev1_pb2.pyi +115 -0
  233. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  234. sky/serve/autoscalers.py +2 -0
  235. sky/serve/client/impl.py +55 -21
  236. sky/serve/constants.py +4 -3
  237. sky/serve/controller.py +17 -11
  238. sky/serve/load_balancing_policies.py +1 -1
  239. sky/serve/replica_managers.py +219 -142
  240. sky/serve/serve_rpc_utils.py +179 -0
  241. sky/serve/serve_state.py +63 -54
  242. sky/serve/serve_utils.py +145 -109
  243. sky/serve/server/core.py +46 -25
  244. sky/serve/server/impl.py +311 -162
  245. sky/serve/server/server.py +21 -19
  246. sky/serve/service.py +84 -68
  247. sky/serve/service_spec.py +45 -7
  248. sky/server/auth/loopback.py +38 -0
  249. sky/server/auth/oauth2_proxy.py +12 -7
  250. sky/server/common.py +47 -24
  251. sky/server/config.py +62 -28
  252. sky/server/constants.py +9 -1
  253. sky/server/daemons.py +109 -38
  254. sky/server/metrics.py +76 -96
  255. sky/server/middleware_utils.py +166 -0
  256. sky/server/plugins.py +222 -0
  257. sky/server/requests/executor.py +384 -145
  258. sky/server/requests/payloads.py +83 -19
  259. sky/server/requests/preconditions.py +15 -13
  260. sky/server/requests/request_names.py +123 -0
  261. sky/server/requests/requests.py +511 -157
  262. sky/server/requests/serializers/decoders.py +48 -17
  263. sky/server/requests/serializers/encoders.py +102 -20
  264. sky/server/requests/serializers/return_value_serializers.py +60 -0
  265. sky/server/requests/threads.py +117 -0
  266. sky/server/rest.py +116 -24
  267. sky/server/server.py +497 -179
  268. sky/server/server_utils.py +30 -0
  269. sky/server/stream_utils.py +219 -45
  270. sky/server/uvicorn.py +30 -19
  271. sky/setup_files/MANIFEST.in +6 -1
  272. sky/setup_files/alembic.ini +8 -0
  273. sky/setup_files/dependencies.py +64 -19
  274. sky/setup_files/setup.py +44 -44
  275. sky/sky_logging.py +13 -5
  276. sky/skylet/attempt_skylet.py +116 -24
  277. sky/skylet/configs.py +3 -1
  278. sky/skylet/constants.py +139 -29
  279. sky/skylet/events.py +74 -14
  280. sky/skylet/executor/__init__.py +1 -0
  281. sky/skylet/executor/slurm.py +189 -0
  282. sky/skylet/job_lib.py +143 -105
  283. sky/skylet/log_lib.py +252 -8
  284. sky/skylet/log_lib.pyi +47 -7
  285. sky/skylet/providers/ibm/node_provider.py +12 -8
  286. sky/skylet/providers/ibm/vpc_provider.py +13 -12
  287. sky/skylet/runtime_utils.py +21 -0
  288. sky/skylet/services.py +524 -0
  289. sky/skylet/skylet.py +27 -2
  290. sky/skylet/subprocess_daemon.py +104 -28
  291. sky/skypilot_config.py +99 -79
  292. sky/ssh_node_pools/constants.py +12 -0
  293. sky/ssh_node_pools/core.py +40 -3
  294. sky/ssh_node_pools/deploy/__init__.py +4 -0
  295. sky/ssh_node_pools/deploy/deploy.py +952 -0
  296. sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
  297. sky/ssh_node_pools/deploy/utils.py +173 -0
  298. sky/ssh_node_pools/server.py +20 -21
  299. sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
  300. sky/task.py +221 -104
  301. sky/templates/aws-ray.yml.j2 +1 -0
  302. sky/templates/azure-ray.yml.j2 +1 -0
  303. sky/templates/cudo-ray.yml.j2 +1 -0
  304. sky/templates/do-ray.yml.j2 +1 -0
  305. sky/templates/fluidstack-ray.yml.j2 +1 -0
  306. sky/templates/gcp-ray.yml.j2 +1 -0
  307. sky/templates/hyperbolic-ray.yml.j2 +1 -0
  308. sky/templates/ibm-ray.yml.j2 +2 -1
  309. sky/templates/jobs-controller.yaml.j2 +3 -0
  310. sky/templates/kubernetes-ray.yml.j2 +204 -55
  311. sky/templates/lambda-ray.yml.j2 +1 -0
  312. sky/templates/nebius-ray.yml.j2 +3 -0
  313. sky/templates/oci-ray.yml.j2 +1 -0
  314. sky/templates/paperspace-ray.yml.j2 +1 -0
  315. sky/templates/primeintellect-ray.yml.j2 +72 -0
  316. sky/templates/runpod-ray.yml.j2 +1 -0
  317. sky/templates/scp-ray.yml.j2 +1 -0
  318. sky/templates/seeweb-ray.yml.j2 +171 -0
  319. sky/templates/shadeform-ray.yml.j2 +73 -0
  320. sky/templates/slurm-ray.yml.j2 +85 -0
  321. sky/templates/vast-ray.yml.j2 +2 -0
  322. sky/templates/vsphere-ray.yml.j2 +1 -0
  323. sky/templates/websocket_proxy.py +188 -43
  324. sky/usage/usage_lib.py +16 -4
  325. sky/users/model.conf +1 -1
  326. sky/users/permission.py +84 -44
  327. sky/users/rbac.py +31 -3
  328. sky/utils/accelerator_registry.py +6 -3
  329. sky/utils/admin_policy_utils.py +18 -5
  330. sky/utils/annotations.py +128 -6
  331. sky/utils/asyncio_utils.py +78 -0
  332. sky/utils/atomic.py +1 -1
  333. sky/utils/auth_utils.py +153 -0
  334. sky/utils/cli_utils/status_utils.py +12 -7
  335. sky/utils/cluster_utils.py +28 -6
  336. sky/utils/command_runner.py +283 -30
  337. sky/utils/command_runner.pyi +63 -7
  338. sky/utils/common.py +3 -1
  339. sky/utils/common_utils.py +55 -7
  340. sky/utils/config_utils.py +1 -14
  341. sky/utils/context.py +127 -40
  342. sky/utils/context_utils.py +73 -18
  343. sky/utils/controller_utils.py +229 -70
  344. sky/utils/db/db_utils.py +95 -18
  345. sky/utils/db/kv_cache.py +149 -0
  346. sky/utils/db/migration_utils.py +24 -7
  347. sky/utils/env_options.py +4 -0
  348. sky/utils/git.py +559 -1
  349. sky/utils/kubernetes/create_cluster.sh +15 -30
  350. sky/utils/kubernetes/delete_cluster.sh +10 -7
  351. sky/utils/kubernetes/generate_kind_config.py +6 -66
  352. sky/utils/kubernetes/gpu_labeler.py +13 -3
  353. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
  354. sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
  355. sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
  356. sky/utils/kubernetes/rsync_helper.sh +11 -3
  357. sky/utils/kubernetes/ssh-tunnel.sh +7 -376
  358. sky/utils/kubernetes_enums.py +7 -15
  359. sky/utils/lock_events.py +4 -4
  360. sky/utils/locks.py +128 -31
  361. sky/utils/log_utils.py +0 -319
  362. sky/utils/resource_checker.py +13 -10
  363. sky/utils/resources_utils.py +53 -29
  364. sky/utils/rich_utils.py +8 -4
  365. sky/utils/schemas.py +138 -52
  366. sky/utils/subprocess_utils.py +17 -4
  367. sky/utils/thread_utils.py +91 -0
  368. sky/utils/timeline.py +2 -1
  369. sky/utils/ux_utils.py +35 -1
  370. sky/utils/volume.py +88 -4
  371. sky/utils/yaml_utils.py +9 -0
  372. sky/volumes/client/sdk.py +48 -10
  373. sky/volumes/server/core.py +59 -22
  374. sky/volumes/server/server.py +46 -17
  375. sky/volumes/volume.py +54 -42
  376. sky/workspaces/core.py +57 -21
  377. sky/workspaces/server.py +13 -12
  378. sky_templates/README.md +3 -0
  379. sky_templates/__init__.py +3 -0
  380. sky_templates/ray/__init__.py +0 -0
  381. sky_templates/ray/start_cluster +183 -0
  382. sky_templates/ray/stop_cluster +75 -0
  383. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
  384. skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
  385. skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
  386. sky/client/cli/git.py +0 -549
  387. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  388. sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
  389. sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
  390. sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
  391. sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
  392. sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
  393. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
  394. sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
  395. sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
  396. sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
  397. sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
  398. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  399. sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
  400. sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
  401. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  402. sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
  403. sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
  404. sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
  405. sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
  406. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  407. sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
  408. sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
  409. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  410. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
  411. sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
  412. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
  413. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
  414. sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
  415. sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
  416. sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
  417. sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
  418. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  419. sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
  420. sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
  421. sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
  422. sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
  423. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
  424. skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
  425. skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
  426. /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
  427. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
  428. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
  429. {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/server/metrics.py CHANGED
@@ -1,74 +1,33 @@
1
1
  """Instrumentation for the API server."""
2
2
 
3
- import contextlib
4
- import functools
3
+ import asyncio
4
+ import multiprocessing
5
5
  import os
6
+ import threading
6
7
  import time
8
+ from typing import List
7
9
 
8
10
  import fastapi
9
11
  from prometheus_client import generate_latest
10
12
  from prometheus_client import multiprocess
11
13
  import prometheus_client as prom
14
+ import psutil
12
15
  import starlette.middleware.base
13
16
  import uvicorn
14
17
 
18
+ from sky import core
15
19
  from sky import sky_logging
16
- from sky.skylet import constants
17
-
18
- # Whether the metrics are enabled, cannot be changed at runtime.
19
- METRICS_ENABLED = os.environ.get(constants.ENV_VAR_SERVER_METRICS_ENABLED,
20
- 'false').lower() == 'true'
20
+ from sky.metrics import utils as metrics_utils
21
21
 
22
22
  logger = sky_logging.init_logger(__name__)
23
23
 
24
- # Total number of API server requests, grouped by path, method, and status.
25
- SKY_APISERVER_REQUESTS_TOTAL = prom.Counter(
26
- 'sky_apiserver_requests_total',
27
- 'Total number of API server requests',
28
- ['path', 'method', 'status'],
29
- )
30
-
31
- # Time spent processing API server requests, grouped by path, method, and
32
- # status.
33
- SKY_APISERVER_REQUEST_DURATION_SECONDS = prom.Histogram(
34
- 'sky_apiserver_request_duration_seconds',
35
- 'Time spent processing API server requests',
36
- ['path', 'method', 'status'],
37
- buckets=(0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 20.0, 30.0,
38
- 60.0, 120.0, float('inf')),
39
- )
40
-
41
- # Time spent processing requests in executor.
42
- SKY_APISERVER_REQUEST_EXECUTION_DURATION_SECONDS = prom.Histogram(
43
- 'sky_apiserver_request_execution_duration_seconds',
44
- 'Time spent executing requests in executor',
45
- ['request', 'worker'],
46
- buckets=(0.5, 1, 2.5, 5.0, 10.0, 15.0, 25.0, 40.0, 60.0, 90.0, 120.0, 180.0,
47
- float('inf')),
48
- )
49
-
50
- # Time spent processing a piece of code, refer to time_it().
51
- SKY_APISERVER_CODE_DURATION_SECONDS = prom.Histogram(
52
- 'sky_apiserver_code_duration_seconds',
53
- 'Time spent processing code',
54
- ['name', 'group'],
55
- buckets=(0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 20.0, 30.0,
56
- 60.0, 120.0, float('inf')),
57
- )
58
-
59
- SKY_APISERVER_EVENT_LOOP_LAG_SECONDS = prom.Histogram(
60
- 'sky_apiserver_event_loop_lag_seconds',
61
- 'Scheduling delay of the server event loop',
62
- ['pid'],
63
- buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 20.0,
64
- 60.0, float('inf')),
65
- )
66
-
67
24
  metrics_app = fastapi.FastAPI()
68
25
 
69
26
 
27
+ # Serve /metrics in dedicated thread to avoid blocking the event loop
28
+ # of metrics server.
70
29
  @metrics_app.get('/metrics')
71
- async def metrics() -> fastapi.Response:
30
+ def metrics() -> fastapi.Response:
72
31
  """Expose aggregated Prometheus metrics from all worker processes."""
73
32
  if os.environ.get('PROMETHEUS_MULTIPROC_DIR'):
74
33
  # In multiprocess mode, we need to collect metrics from all processes.
@@ -82,6 +41,42 @@ async def metrics() -> fastapi.Response:
82
41
  headers={'Cache-Control': 'no-cache'})
83
42
 
84
43
 
44
+ @metrics_app.get('/gpu-metrics')
45
+ async def gpu_metrics() -> fastapi.Response:
46
+ """Gets the GPU metrics from multiple external k8s clusters"""
47
+ contexts = core.get_all_contexts()
48
+ all_metrics: List[str] = []
49
+ successful_contexts = 0
50
+
51
+ tasks = [
52
+ asyncio.create_task(metrics_utils.get_metrics_for_context(context))
53
+ for context in contexts
54
+ if context != 'in-cluster'
55
+ ]
56
+
57
+ results = await asyncio.gather(*tasks, return_exceptions=True)
58
+
59
+ for i, result in enumerate(results):
60
+ if isinstance(result, Exception):
61
+ logger.error(
62
+ f'Failed to get metrics for context {contexts[i]}: {result}')
63
+ elif isinstance(result, BaseException):
64
+ # Avoid changing behavior for non-Exception BaseExceptions
65
+ # like KeyboardInterrupt/SystemExit: re-raise them.
66
+ raise result
67
+ else:
68
+ metrics_text = result
69
+ all_metrics.append(metrics_text)
70
+ successful_contexts += 1
71
+
72
+ combined_metrics = '\n\n'.join(all_metrics)
73
+
74
+ # Return as plain text for Prometheus compatibility
75
+ return fastapi.Response(
76
+ content=combined_metrics,
77
+ media_type='text/plain; version=0.0.4; charset=utf-8')
78
+
79
+
85
80
  def build_metrics_server(host: str, port: int) -> uvicorn.Server:
86
81
  metrics_config = uvicorn.Config(
87
82
  'sky.server.metrics:metrics_app',
@@ -125,56 +120,41 @@ class PrometheusMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
125
120
  status_code_group = '5xx'
126
121
  raise
127
122
  finally:
128
- SKY_APISERVER_REQUESTS_TOTAL.labels(path=path,
129
- method=method,
130
- status=status_code_group).inc()
123
+ metrics_utils.SKY_APISERVER_REQUESTS_TOTAL.labels(
124
+ path=path, method=method, status=status_code_group).inc()
131
125
  if not streaming:
132
126
  duration = time.time() - start_time
133
- SKY_APISERVER_REQUEST_DURATION_SECONDS.labels(
127
+ metrics_utils.SKY_APISERVER_REQUEST_DURATION_SECONDS.labels(
134
128
  path=path, method=method,
135
129
  status=status_code_group).observe(duration)
136
130
 
137
131
  return response
138
132
 
139
133
 
140
- @contextlib.contextmanager
141
- def time_it(name: str, group: str = 'default'):
142
- """Context manager to measure and record code execution duration."""
143
- if not METRICS_ENABLED:
144
- yield
145
- else:
146
- start_time = time.time()
147
- try:
148
- yield
149
- finally:
150
- duration = time.time() - start_time
151
- SKY_APISERVER_CODE_DURATION_SECONDS.labels(
152
- name=name, group=group).observe(duration)
153
-
154
-
155
- def time_me(func):
156
- """Measure the duration of decorated function."""
157
-
158
- @functools.wraps(func)
159
- def wrapper(*args, **kwargs):
160
- if not METRICS_ENABLED:
161
- return func(*args, **kwargs)
162
- name = f'{func.__module__}/{func.__name__}'
163
- with time_it(name, group='function'):
164
- return func(*args, **kwargs)
165
-
166
- return wrapper
167
-
168
-
169
- def time_me_async(func):
170
- """Measure the duration of decorated async function."""
171
-
172
- @functools.wraps(func)
173
- async def async_wrapper(*args, **kwargs):
174
- if not METRICS_ENABLED:
175
- return await func(*args, **kwargs)
176
- name = f'{func.__module__}/{func.__name__}'
177
- with time_it(name, group='function'):
178
- return await func(*args, **kwargs)
179
-
180
- return async_wrapper
134
+ peak_rss_bytes = 0
135
+
136
+
137
+ def process_monitor(process_type: str, stop: threading.Event):
138
+ pid = multiprocessing.current_process().pid
139
+ proc = psutil.Process(pid)
140
+ last_bucket_end = time.time()
141
+ bucket_peak = 0
142
+ global peak_rss_bytes
143
+ while not stop.is_set():
144
+ if time.time() - last_bucket_end >= 30:
145
+ # Reset peak RSS for the next time bucket.
146
+ last_bucket_end = time.time()
147
+ bucket_peak = 0
148
+ peak_rss_bytes = max(bucket_peak, proc.memory_info().rss)
149
+ metrics_utils.SKY_APISERVER_PROCESS_PEAK_RSS.labels(
150
+ pid=pid, type=process_type).set(peak_rss_bytes)
151
+ ctimes = proc.cpu_times()
152
+ metrics_utils.SKY_APISERVER_PROCESS_CPU_TOTAL.labels(pid=pid,
153
+ type=process_type,
154
+ mode='user').set(
155
+ ctimes.user)
156
+ metrics_utils.SKY_APISERVER_PROCESS_CPU_TOTAL.labels(pid=pid,
157
+ type=process_type,
158
+ mode='system').set(
159
+ ctimes.system)
160
+ time.sleep(1)
@@ -0,0 +1,166 @@
1
+ """Utilities for building middlewares."""
2
+ import enum
3
+ import http
4
+ from typing import Type
5
+
6
+ import fastapi
7
+ import starlette.middleware.base
8
+ import starlette.types
9
+
10
+ from sky import sky_logging
11
+
12
+ logger = sky_logging.init_logger(__name__)
13
+
14
+
15
+ class WebSocketDecision(enum.Enum):
16
+ ACCEPT = 'accept'
17
+ UNAUTHORIZED = 'unauthorized'
18
+ FORBIDDEN = 'forbidden'
19
+ ERROR = 'error'
20
+
21
+
22
+ def websocket_aware(
23
+ middleware_cls: Type[starlette.middleware.base.BaseHTTPMiddleware]):
24
+ """Decorator to adapt BaseHTTPMiddleware to handle WebSockets.
25
+
26
+ It assembles an HTTP-style request like the HTTP upgrade request during
27
+ websocket handshake and then delegates it to the real HTTP middleware.
28
+ The websocket connection will be rejected if the HTTP middleware returns
29
+ a 4xx or 5xx status code.
30
+
31
+ Note: for websocket connection, the mutation made by the underlying HTTP
32
+ middleware on the request and response will be discarded.
33
+ """
34
+
35
+ class WebSocketAwareMiddleware:
36
+ """WebSocket-aware middleware wrapper."""
37
+
38
+ def __init__(self, app: starlette.types.ASGIApp, *args, **kwargs):
39
+ self.app = app
40
+ self.middleware = middleware_cls(app, *args, **kwargs)
41
+
42
+ async def __call__(self, scope: starlette.types.Scope,
43
+ receive: starlette.types.Receive,
44
+ send: starlette.types.Send):
45
+ scope_type = scope.get('type')
46
+ if scope_type == 'websocket':
47
+ await self._handle_websocket(scope, receive, send)
48
+ else:
49
+ # Delegate other scopes to the underlying HTTP middleware.
50
+ await self.middleware(scope, receive, send)
51
+
52
+ async def dispatch(
53
+ self, request: fastapi.Request,
54
+ call_next: starlette.middleware.base.RequestResponseEndpoint):
55
+ """Implement dispatch method to keep compatibility."""
56
+ return await self.middleware.dispatch(request, call_next)
57
+
58
+ async def _handle_websocket(self, scope: starlette.types.Scope,
59
+ receive: starlette.types.Receive,
60
+ send: starlette.types.Send):
61
+ """Handle websocket connection by delegating to HTTP middleware."""
62
+ decision = await self._run_websocket_dispatch(scope)
63
+ if decision == WebSocketDecision.ACCEPT:
64
+ await self.app(scope, receive, send)
65
+ elif decision == WebSocketDecision.UNAUTHORIZED:
66
+ await send({
67
+ 'type': 'websocket.close',
68
+ 'code': 4401,
69
+ 'reason': 'Unauthorized',
70
+ })
71
+ elif decision == WebSocketDecision.FORBIDDEN:
72
+ await send({
73
+ 'type': 'websocket.close',
74
+ 'code': 4403,
75
+ 'reason': 'Forbidden',
76
+ })
77
+ else:
78
+ await send({
79
+ 'type': 'websocket.close',
80
+ 'code': 1011,
81
+ 'reason': 'Internal Server Error',
82
+ })
83
+
84
+ async def _run_websocket_dispatch(
85
+ self, scope: starlette.types.Scope) -> WebSocketDecision:
86
+ http_scope = self._build_http_scope(scope)
87
+ http_receive = self._http_receive_adapter()
88
+ request = fastapi.Request(http_scope, receive=http_receive)
89
+ call_next_called = False
90
+ stub_response = fastapi.Response(status_code=http.HTTPStatus.OK)
91
+
92
+ async def call_next(req):
93
+ del req
94
+ # Capture whether call_next() is called in the underlying
95
+ # HTTP middleware to determine if we can proceed with current
96
+ # websocket connection.
97
+ nonlocal call_next_called
98
+ call_next_called = True
99
+ return stub_response
100
+
101
+ try:
102
+ response = await self.dispatch(request, call_next)
103
+ except Exception as e: # pylint: disable=broad-except
104
+ logger.error('Exception occurred in middleware dispatch for '
105
+ f'WebSocket scope: {e}')
106
+ return WebSocketDecision.ERROR
107
+
108
+ if response is None:
109
+ response = stub_response
110
+
111
+ status_code = response.status_code
112
+
113
+ if call_next_called and 200 <= status_code < 400:
114
+ return WebSocketDecision.ACCEPT
115
+ if status_code == http.HTTPStatus.UNAUTHORIZED:
116
+ return WebSocketDecision.UNAUTHORIZED
117
+ if status_code == http.HTTPStatus.FORBIDDEN:
118
+ return WebSocketDecision.FORBIDDEN
119
+ return WebSocketDecision.ERROR
120
+
121
+ @staticmethod
122
+ def _build_http_scope(
123
+ scope: starlette.types.Scope) -> starlette.types.Scope:
124
+ state = scope.setdefault('state', {})
125
+ scheme = scope.get('scheme', 'ws')
126
+ if scheme == 'ws':
127
+ http_scheme = 'http'
128
+ elif scheme == 'wss':
129
+ http_scheme = 'https'
130
+ else:
131
+ http_scheme = scheme
132
+ http_scope = dict(scope)
133
+ http_scope['type'] = 'http'
134
+ http_scope['scheme'] = http_scheme
135
+ http_scope['method'] = 'GET'
136
+ http_scope['http_version'] = scope.get('http_version', '1.1')
137
+ http_scope['state'] = state
138
+ return http_scope
139
+
140
+ @staticmethod
141
+ def _http_receive_adapter() -> starlette.types.Receive:
142
+ """Adapter thatmimics the sequence produced by Starlette for an HTTP
143
+ request: a single http.request event followed by a http.disconnect
144
+ """
145
+ sent = False
146
+
147
+ async def receive():
148
+ nonlocal sent
149
+ if not sent:
150
+ sent = True
151
+ return {
152
+ 'type': 'http.request',
153
+ 'body': b'',
154
+ 'more_body': False,
155
+ }
156
+ return {
157
+ 'type': 'http.disconnect',
158
+ }
159
+
160
+ return receive
161
+
162
+ WebSocketAwareMiddleware.__name__ = middleware_cls.__name__
163
+ WebSocketAwareMiddleware.__qualname__ = middleware_cls.__qualname__
164
+ WebSocketAwareMiddleware.__module__ = middleware_cls.__module__
165
+ WebSocketAwareMiddleware.__doc__ = middleware_cls.__doc__
166
+ return WebSocketAwareMiddleware
sky/server/plugins.py ADDED
@@ -0,0 +1,222 @@
1
+ """Load plugins for the SkyPilot API server."""
2
+ import abc
3
+ import dataclasses
4
+ import importlib
5
+ import os
6
+ from typing import Dict, List, Optional, Tuple
7
+
8
+ from fastapi import FastAPI
9
+
10
+ from sky import sky_logging
11
+ from sky.skylet import constants as skylet_constants
12
+ from sky.utils import common_utils
13
+ from sky.utils import config_utils
14
+ from sky.utils import yaml_utils
15
+
16
+ logger = sky_logging.init_logger(__name__)
17
+
18
+ _DEFAULT_PLUGINS_CONFIG_PATH = '~/.sky/plugins.yaml'
19
+ _PLUGINS_CONFIG_ENV_VAR = (
20
+ f'{skylet_constants.SKYPILOT_SERVER_ENV_VAR_PREFIX}PLUGINS_CONFIG')
21
+
22
+
23
+ class ExtensionContext:
24
+ """Context provided to plugins during installation.
25
+
26
+ Attributes:
27
+ app: The FastAPI application instance.
28
+ rbac_rules: List of RBAC rules registered by the plugin.
29
+ Example:
30
+ [
31
+ ('user', RBACRule(path='/plugins/api/xx/*', method='POST')),
32
+ ('user', RBACRule(path='/plugins/api/xx/*', method='DELETE'))
33
+ ]
34
+ """
35
+
36
+ def __init__(self, app: Optional[FastAPI] = None):
37
+ self.app = app
38
+ self.rbac_rules: List[Tuple[str, RBACRule]] = []
39
+
40
+ def register_rbac_rule(self,
41
+ path: str,
42
+ method: str,
43
+ description: Optional[str] = None,
44
+ role: str = 'user') -> None:
45
+ """Register an RBAC rule for this plugin.
46
+
47
+ This method allows plugins to declare which endpoints should be
48
+ restricted to admin users during the install phase.
49
+
50
+ Args:
51
+ path: The path pattern to restrict (supports wildcards with
52
+ keyMatch2).
53
+ Example: '/plugins/api/credentials/*'
54
+ method: The HTTP method to restrict. Example: 'POST', 'DELETE'
55
+ description: Optional description of what this rule protects.
56
+ role: The role to add this rule to (default: 'user').
57
+ Rules added to 'user' role block regular users but allow
58
+ admins.
59
+
60
+ Example:
61
+ def install(self, ctx: ExtensionContext):
62
+ # Only admin can upload credentials
63
+ ctx.register_rbac_rule(
64
+ path='/plugins/api/credentials/*',
65
+ method='POST',
66
+ description='Only admin can upload credentials'
67
+ )
68
+ """
69
+ rule = RBACRule(path=path, method=method, description=description)
70
+ self.rbac_rules.append((role, rule))
71
+ logger.debug(f'Registered RBAC rule for {role}: {method} {path}'
72
+ f'{f" - {description}" if description else ""}')
73
+
74
+
75
+ @dataclasses.dataclass
76
+ class RBACRule:
77
+ """RBAC rule for a plugin endpoint.
78
+
79
+ Attributes:
80
+ path: The path pattern to match (supports wildcards with keyMatch2).
81
+ Example: '/plugins/api/credentials/*'
82
+ method: The HTTP method to restrict. Example: 'POST', 'DELETE'
83
+ description: Optional description of what this rule protects.
84
+ """
85
+ path: str
86
+ method: str
87
+ description: Optional[str] = None
88
+
89
+
90
+ class BasePlugin(abc.ABC):
91
+ """Base class for all SkyPilot server plugins."""
92
+
93
+ @property
94
+ def js_extension_path(self) -> Optional[str]:
95
+ """Optional API route to the JavaScript extension to load."""
96
+ return None
97
+
98
+ @abc.abstractmethod
99
+ def install(self, extension_context: ExtensionContext):
100
+ """Hook called by API server to let the plugin install itself."""
101
+ raise NotImplementedError
102
+
103
+ def shutdown(self):
104
+ """Hook called by API server to let the plugin shutdown."""
105
+ pass
106
+
107
+
108
+ def _config_schema():
109
+ plugin_schema = {
110
+ 'type': 'object',
111
+ 'required': ['class'],
112
+ 'additionalProperties': False,
113
+ 'properties': {
114
+ 'class': {
115
+ 'type': 'string',
116
+ },
117
+ 'parameters': {
118
+ 'type': 'object',
119
+ 'required': [],
120
+ 'additionalProperties': True,
121
+ },
122
+ },
123
+ }
124
+ return {
125
+ 'type': 'object',
126
+ 'required': [],
127
+ 'additionalProperties': False,
128
+ 'properties': {
129
+ 'plugins': {
130
+ 'type': 'array',
131
+ 'items': plugin_schema,
132
+ 'default': [],
133
+ },
134
+ },
135
+ }
136
+
137
+
138
+ def _load_plugin_config() -> Optional[config_utils.Config]:
139
+ """Load plugin config."""
140
+ config_path = os.getenv(_PLUGINS_CONFIG_ENV_VAR,
141
+ _DEFAULT_PLUGINS_CONFIG_PATH)
142
+ config_path = os.path.expanduser(config_path)
143
+ if not os.path.exists(config_path):
144
+ return None
145
+ config = yaml_utils.read_yaml(config_path) or {}
146
+ common_utils.validate_schema(config,
147
+ _config_schema(),
148
+ err_msg_prefix='Invalid plugins config: ')
149
+ return config_utils.Config.from_dict(config)
150
+
151
+
152
+ _PLUGINS: Dict[str, BasePlugin] = {}
153
+ _EXTENSION_CONTEXT: Optional[ExtensionContext] = None
154
+
155
+
156
+ def load_plugins(extension_context: ExtensionContext):
157
+ """Load and initialize plugins from the config."""
158
+ global _EXTENSION_CONTEXT
159
+ _EXTENSION_CONTEXT = extension_context
160
+
161
+ config = _load_plugin_config()
162
+ if not config:
163
+ return
164
+
165
+ for plugin_config in config.get('plugins', []):
166
+ class_path = plugin_config['class']
167
+ module_path, class_name = class_path.rsplit('.', 1)
168
+ try:
169
+ module = importlib.import_module(module_path)
170
+ except ImportError as e:
171
+ raise ImportError(
172
+ f'Failed to import plugin module: {module_path}. '
173
+ 'Please check if the module is installed in your Python '
174
+ 'environment.') from e
175
+ try:
176
+ plugin_cls = getattr(module, class_name)
177
+ except AttributeError as e:
178
+ raise AttributeError(
179
+ f'Could not find plugin {class_name} class in module '
180
+ f'{module_path}. ') from e
181
+ if not issubclass(plugin_cls, BasePlugin):
182
+ raise TypeError(
183
+ f'Plugin {class_path} must inherit from BasePlugin.')
184
+ parameters = plugin_config.get('parameters') or {}
185
+ plugin = plugin_cls(**parameters)
186
+ plugin.install(extension_context)
187
+ _PLUGINS[class_path] = plugin
188
+
189
+
190
+ def get_plugins() -> List[BasePlugin]:
191
+ """Return shallow copies of the registered plugins."""
192
+ return list(_PLUGINS.values())
193
+
194
+
195
+ def get_plugin_rbac_rules() -> Dict[str, List[Dict[str, str]]]:
196
+ """Collect RBAC rules from all loaded plugins.
197
+
198
+ Collects rules from the ExtensionContext.
199
+
200
+ Returns:
201
+ Dictionary mapping role names to lists of blocklist rules.
202
+ Example:
203
+ {
204
+ 'user': [
205
+ {'path': '/plugins/api/credentials/*', 'method': 'POST'},
206
+ {'path': '/plugins/api/credentials/*', 'method': 'DELETE'}
207
+ ]
208
+ }
209
+ """
210
+ rules_by_role: Dict[str, List[Dict[str, str]]] = {}
211
+
212
+ # Collect rules registered via ExtensionContext
213
+ if _EXTENSION_CONTEXT:
214
+ for role, rule in _EXTENSION_CONTEXT.rbac_rules:
215
+ if role not in rules_by_role:
216
+ rules_by_role[role] = []
217
+ rules_by_role[role].append({
218
+ 'path': rule.path,
219
+ 'method': rule.method,
220
+ })
221
+
222
+ return rules_by_role