@openhands/extensions 0.0.1-alpha → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. package/.agents/skills/custom-codereview-guide.md +25 -0
  2. package/.github/pull_request_template.md +38 -0
  3. package/.github/release.yml +14 -0
  4. package/.github/workflows/check-extensions.yml +72 -0
  5. package/.github/workflows/npm-publish.yml +89 -0
  6. package/.github/workflows/pr.yml +30 -0
  7. package/.github/workflows/release.yml +24 -0
  8. package/.github/workflows/tests.yml +25 -0
  9. package/.github/workflows/vulnerability-scan.yml +87 -0
  10. package/.release-please-manifest.json +3 -0
  11. package/AGENTS.md +132 -0
  12. package/README.md +10 -0
  13. package/analysis_results.md +162 -0
  14. package/marketplaces/large-codebase.json +66 -0
  15. package/marketplaces/openhands-extensions.json +682 -0
  16. package/package.json +4 -10
  17. package/plugins/README.md +30 -0
  18. package/plugins/city-weather/.plugin/plugin.json +13 -0
  19. package/plugins/city-weather/README.md +145 -0
  20. package/plugins/city-weather/commands/now.md +56 -0
  21. package/plugins/cobol-modernization/.plugin/plugin.json +19 -0
  22. package/plugins/cobol-modernization/README.md +201 -0
  23. package/plugins/cobol-modernization/references/troubleshooting.md +18 -0
  24. package/plugins/cobol-modernization/skills/build-setup/SKILL.md +78 -0
  25. package/plugins/cobol-modernization/skills/build-setup/scripts/install-gnucobol.sh +32 -0
  26. package/plugins/cobol-modernization/skills/cobol-modernization-overview/SKILL.md +113 -0
  27. package/plugins/cobol-modernization/skills/mainfraime-removal/SKILL.md +62 -0
  28. package/plugins/cobol-modernization/skills/mainfraime-removal/references/cics-transformation-examples.md +45 -0
  29. package/plugins/cobol-modernization/skills/mainframe-planning/SKILL.md +78 -0
  30. package/plugins/cobol-modernization/skills/to-java-migration/SKILL.md +59 -0
  31. package/plugins/cobol-modernization/skills/to-java-migration/references/cobol-to-java-example.md +58 -0
  32. package/plugins/cobol-modernization/skills/to-java-migration/references/datatype-mappings.md +19 -0
  33. package/plugins/issue-duplicate-checker/.plugin/plugin.json +13 -0
  34. package/plugins/issue-duplicate-checker/README.md +51 -0
  35. package/plugins/issue-duplicate-checker/action.yml +349 -0
  36. package/plugins/issue-duplicate-checker/scripts/auto_close_duplicate_issues.py +569 -0
  37. package/plugins/issue-duplicate-checker/scripts/issue_duplicate_check_openhands.py +681 -0
  38. package/plugins/issue-duplicate-checker/scripts/post_duplicate_notice.js +220 -0
  39. package/plugins/issue-duplicate-checker/scripts/remove_duplicate_candidate_label.js +27 -0
  40. package/plugins/magic-test/.plugin/plugin.json +13 -0
  41. package/plugins/magic-test/skills/magic-word/SKILL.md +33 -0
  42. package/plugins/migration-scoring/.plugin/plugin.json +19 -0
  43. package/plugins/migration-scoring/README.md +244 -0
  44. package/plugins/migration-scoring/skills/migration-mapping/SKILL.md +72 -0
  45. package/plugins/migration-scoring/skills/migration-report/SKILL.md +118 -0
  46. package/plugins/migration-scoring/skills/migration-scoring-overview/SKILL.md +126 -0
  47. package/plugins/migration-scoring/skills/score-quality/SKILL.md +54 -0
  48. package/plugins/migration-scoring/skills/score-quality/references/scoring-criteria.md +30 -0
  49. package/plugins/migration-scoring/skills/score-style/SKILL.md +106 -0
  50. package/plugins/onboarding/.plugin/plugin.json +20 -0
  51. package/plugins/onboarding/README.md +30 -0
  52. package/plugins/onboarding/references/criteria.md +144 -0
  53. package/plugins/onboarding/skills/agent-readiness-report/README.md +23 -0
  54. package/plugins/onboarding/skills/agent-readiness-report/SKILL.md +122 -0
  55. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_agent_instructions.sh +88 -0
  56. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_build_env.sh +114 -0
  57. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_feedback_loops.sh +133 -0
  58. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_policy.sh +113 -0
  59. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_workflows.sh +127 -0
  60. package/plugins/onboarding/skills/improve-agent-readiness/README.md +19 -0
  61. package/plugins/onboarding/skills/improve-agent-readiness/SKILL.md +167 -0
  62. package/plugins/onboarding/skills/setup-agents-md/README.md +15 -0
  63. package/plugins/onboarding/skills/setup-agents-md/SKILL.md +150 -0
  64. package/plugins/onboarding/skills/setup-openhands/README.md +20 -0
  65. package/plugins/onboarding/skills/setup-openhands/SKILL.md +56 -0
  66. package/plugins/onboarding/skills/setup-pr-review/README.md +23 -0
  67. package/plugins/onboarding/skills/setup-pr-review/SKILL.md +72 -0
  68. package/plugins/openhands/.plugin/plugin.json +13 -0
  69. package/plugins/openhands/README.md +52 -0
  70. package/plugins/openhands/SKILL.md +61 -0
  71. package/plugins/openhands/commands/create.md +55 -0
  72. package/plugins/openhands/commands/openhands-cloud.md +8 -0
  73. package/plugins/openhands/scripts/run.sh +69 -0
  74. package/plugins/pr-review/.plugin/plugin.json +13 -0
  75. package/plugins/pr-review/README.md +393 -0
  76. package/plugins/pr-review/action.yml +298 -0
  77. package/plugins/pr-review/scripts/agent_script.py +1282 -0
  78. package/plugins/pr-review/scripts/evaluate_review.py +655 -0
  79. package/plugins/pr-review/scripts/prompt.py +260 -0
  80. package/plugins/pr-review/workflows/pr-review-by-openhands.yml +51 -0
  81. package/plugins/pr-review/workflows/pr-review-evaluation.yml +85 -0
  82. package/plugins/qa-changes/.plugin/plugin.json +11 -0
  83. package/plugins/qa-changes/README.md +185 -0
  84. package/plugins/qa-changes/action.yml +181 -0
  85. package/plugins/qa-changes/scripts/agent_script.py +406 -0
  86. package/plugins/qa-changes/scripts/evaluate_qa_changes.py +385 -0
  87. package/plugins/qa-changes/scripts/prompt.py +174 -0
  88. package/plugins/qa-changes/workflows/qa-changes-by-openhands.yml +50 -0
  89. package/plugins/qa-changes/workflows/qa-changes-evaluation.yml +85 -0
  90. package/plugins/release-notes/.plugin/plugin.json +19 -0
  91. package/plugins/release-notes/README.md +283 -0
  92. package/plugins/release-notes/SKILL.md +83 -0
  93. package/plugins/release-notes/action.yml +117 -0
  94. package/plugins/release-notes/commands/release-notes.md +8 -0
  95. package/plugins/release-notes/scripts/agent_script.py +292 -0
  96. package/plugins/release-notes/scripts/generate_release_notes.py +733 -0
  97. package/plugins/release-notes/scripts/prompt.py +90 -0
  98. package/plugins/release-notes/scripts/validate_release_notes.py +328 -0
  99. package/plugins/release-notes/workflows/release-notes.yml +76 -0
  100. package/plugins/vulnerability-remediation/.plugin/plugin.json +19 -0
  101. package/plugins/vulnerability-remediation/README.md +217 -0
  102. package/plugins/vulnerability-remediation/action.yml +187 -0
  103. package/plugins/vulnerability-remediation/scripts/scan_and_remediate.py +561 -0
  104. package/plugins/vulnerability-remediation/workflows/vulnerability-scan.yml +87 -0
  105. package/pyproject.toml +12 -0
  106. package/release-please-config.json +16 -0
  107. package/scripts/sync_extensions.py +494 -0
  108. package/scripts/sync_openhands_sdk_skill.py +264 -0
  109. package/skills/README.md +159 -0
  110. package/skills/add-javadoc/.plugin/plugin.json +18 -0
  111. package/skills/add-javadoc/README.md +40 -0
  112. package/skills/add-javadoc/SKILL.md +35 -0
  113. package/skills/add-javadoc/references/example.md +32 -0
  114. package/skills/add-skill/.plugin/plugin.json +18 -0
  115. package/skills/add-skill/README.md +67 -0
  116. package/skills/add-skill/SKILL.md +47 -0
  117. package/skills/add-skill/scripts/fetch_skill.py +259 -0
  118. package/skills/agent-creator/.plugin/plugin.json +20 -0
  119. package/skills/agent-creator/README.md +104 -0
  120. package/skills/agent-creator/SKILL.md +190 -0
  121. package/skills/agent-creator/commands/agent-creator.md +8 -0
  122. package/skills/agent-creator/references/fallback.md +117 -0
  123. package/skills/agent-memory/.plugin/plugin.json +18 -0
  124. package/skills/agent-memory/README.md +35 -0
  125. package/skills/agent-memory/SKILL.md +30 -0
  126. package/skills/agent-memory/commands/remember.md +8 -0
  127. package/skills/agent-sdk-builder/.plugin/plugin.json +18 -0
  128. package/skills/agent-sdk-builder/README.md +40 -0
  129. package/skills/agent-sdk-builder/SKILL.md +37 -0
  130. package/skills/agent-sdk-builder/commands/agent-builder.md +8 -0
  131. package/skills/azure-devops/.plugin/plugin.json +18 -0
  132. package/skills/azure-devops/README.md +55 -0
  133. package/skills/azure-devops/SKILL.md +50 -0
  134. package/skills/bitbucket/.plugin/plugin.json +17 -0
  135. package/skills/bitbucket/README.md +50 -0
  136. package/skills/bitbucket/SKILL.md +45 -0
  137. package/skills/code-review/.plugin/plugin.json +19 -0
  138. package/skills/code-review/README.md +18 -0
  139. package/skills/code-review/SKILL.md +208 -0
  140. package/skills/code-review/commands/codereview-roasted.md +8 -0
  141. package/skills/code-review/commands/codereview.md +8 -0
  142. package/skills/code-review/references/risk-evaluation.md +41 -0
  143. package/skills/code-review/references/supply-chain-security.md +31 -0
  144. package/skills/code-simplifier/.plugin/plugin.json +21 -0
  145. package/skills/code-simplifier/README.md +30 -0
  146. package/skills/code-simplifier/SKILL.md +91 -0
  147. package/skills/code-simplifier/commands/simplify.md +8 -0
  148. package/skills/code-simplifier/references/code-quality-review.md +86 -0
  149. package/skills/code-simplifier/references/code-reuse-review.md +63 -0
  150. package/skills/code-simplifier/references/efficiency-review.md +81 -0
  151. package/skills/datadog/.plugin/plugin.json +19 -0
  152. package/skills/datadog/README.md +100 -0
  153. package/skills/datadog/SKILL.md +95 -0
  154. package/skills/deno/.plugin/plugin.json +18 -0
  155. package/skills/deno/README.md +5 -0
  156. package/skills/deno/SKILL.md +99 -0
  157. package/skills/deno/references/README.md +6 -0
  158. package/skills/discord/.plugin/plugin.json +18 -0
  159. package/skills/discord/README.md +31 -0
  160. package/skills/discord/SKILL.md +109 -0
  161. package/skills/discord/__init__.py +0 -0
  162. package/skills/discord/references/REFERENCE.md +78 -0
  163. package/skills/discord/scripts/__init__.py +0 -0
  164. package/skills/discord/scripts/_http.py +127 -0
  165. package/skills/discord/scripts/post_webhook.py +106 -0
  166. package/skills/discord/scripts/send_message.py +102 -0
  167. package/skills/docker/.plugin/plugin.json +17 -0
  168. package/skills/docker/README.md +34 -0
  169. package/skills/docker/SKILL.md +29 -0
  170. package/skills/evidence-based-citations/.plugin/plugin.json +20 -0
  171. package/skills/evidence-based-citations/README.md +31 -0
  172. package/skills/evidence-based-citations/SKILL.md +59 -0
  173. package/skills/flarglebargle/.plugin/plugin.json +16 -0
  174. package/skills/flarglebargle/README.md +14 -0
  175. package/skills/flarglebargle/SKILL.md +9 -0
  176. package/skills/frontend-design/.plugin/plugin.json +21 -0
  177. package/skills/frontend-design/LICENSE.txt +177 -0
  178. package/skills/frontend-design/README.md +42 -0
  179. package/skills/frontend-design/SKILL.md +42 -0
  180. package/skills/github/.plugin/plugin.json +19 -0
  181. package/skills/github/README.md +42 -0
  182. package/skills/github/SKILL.md +106 -0
  183. package/skills/github-pr-review/.plugin/plugin.json +18 -0
  184. package/skills/github-pr-review/README.md +145 -0
  185. package/skills/github-pr-review/SKILL.md +148 -0
  186. package/skills/github-pr-review/commands/github-pr-review.md +8 -0
  187. package/skills/github-pr-reviewer/.plugin/plugin.json +20 -0
  188. package/skills/github-pr-reviewer/README.md +34 -0
  189. package/skills/github-pr-reviewer/SKILL.md +89 -0
  190. package/skills/github-pr-reviewer/commands/pr-reviewer:setup.md +8 -0
  191. package/skills/github-repo-monitor/.plugin/plugin.json +22 -0
  192. package/skills/github-repo-monitor/README.md +70 -0
  193. package/skills/github-repo-monitor/SKILL.md +316 -0
  194. package/skills/github-repo-monitor/commands/github-monitor:poll.md +8 -0
  195. package/skills/github-repo-monitor/references/github-api.md +241 -0
  196. package/skills/github-repo-monitor/references/state-schema.md +160 -0
  197. package/skills/github-repo-monitor/scripts/main.py +915 -0
  198. package/skills/github-repo-monitor/tests/test_main.py +400 -0
  199. package/skills/gitlab/.plugin/plugin.json +17 -0
  200. package/skills/gitlab/README.md +37 -0
  201. package/skills/gitlab/SKILL.md +32 -0
  202. package/skills/incident-retrospective/.plugin/plugin.json +21 -0
  203. package/skills/incident-retrospective/README.md +34 -0
  204. package/skills/incident-retrospective/SKILL.md +98 -0
  205. package/skills/incident-retrospective/commands/incident-retro:setup.md +8 -0
  206. package/skills/iterate/.plugin/plugin.json +13 -0
  207. package/skills/iterate/README.md +25 -0
  208. package/skills/iterate/SKILL.md +399 -0
  209. package/skills/iterate/commands/babysit.md +8 -0
  210. package/skills/iterate/commands/iterate.md +8 -0
  211. package/skills/iterate/commands/verify.md +8 -0
  212. package/skills/iterate/references/heuristics.md +58 -0
  213. package/skills/iterate/references/verification.md +96 -0
  214. package/skills/jupyter/.plugin/plugin.json +18 -0
  215. package/skills/jupyter/README.md +55 -0
  216. package/skills/jupyter/SKILL.md +50 -0
  217. package/skills/kubernetes/.plugin/plugin.json +18 -0
  218. package/skills/kubernetes/README.md +53 -0
  219. package/skills/kubernetes/SKILL.md +48 -0
  220. package/skills/learn-from-code-review/.plugin/plugin.json +19 -0
  221. package/skills/learn-from-code-review/README.md +64 -0
  222. package/skills/learn-from-code-review/SKILL.md +186 -0
  223. package/skills/learn-from-code-review/commands/learn-from-reviews.md +8 -0
  224. package/skills/linear/.plugin/plugin.json +19 -0
  225. package/skills/linear/README.md +58 -0
  226. package/skills/linear/SKILL.md +213 -0
  227. package/skills/linear-triage/.plugin/plugin.json +21 -0
  228. package/skills/linear-triage/README.md +34 -0
  229. package/skills/linear-triage/SKILL.md +91 -0
  230. package/skills/linear-triage/commands/linear-triage:setup.md +8 -0
  231. package/skills/notion/.plugin/plugin.json +17 -0
  232. package/skills/notion/README.md +114 -0
  233. package/skills/notion/SKILL.md +109 -0
  234. package/skills/npm/.plugin/plugin.json +17 -0
  235. package/skills/npm/README.md +14 -0
  236. package/skills/npm/SKILL.md +9 -0
  237. package/skills/openhands-api/.plugin/plugin.json +22 -0
  238. package/skills/openhands-api/README.md +48 -0
  239. package/skills/openhands-api/SKILL.md +399 -0
  240. package/skills/openhands-api/references/README.md +33 -0
  241. package/skills/openhands-api/references/TROUBLESHOOTING.md +81 -0
  242. package/skills/openhands-api/references/example_prompt.md +12 -0
  243. package/skills/openhands-api/scripts/openhands_api.py +606 -0
  244. package/skills/openhands-api/scripts/openhands_api.ts +252 -0
  245. package/skills/openhands-automation/.plugin/plugin.json +19 -0
  246. package/skills/openhands-automation/README.md +89 -0
  247. package/skills/openhands-automation/SKILL.md +875 -0
  248. package/skills/openhands-automation/commands/automation:create.md +8 -0
  249. package/skills/openhands-automation/references/ab-testing.md +185 -0
  250. package/skills/openhands-automation/references/custom-automation.md +644 -0
  251. package/skills/openhands-sdk/.plugin/plugin.json +20 -0
  252. package/skills/openhands-sdk/README.md +22 -0
  253. package/skills/openhands-sdk/SKILL.md +229 -0
  254. package/skills/openhands-sdk/commands/sdk.md +8 -0
  255. package/skills/pdflatex/.plugin/plugin.json +18 -0
  256. package/skills/pdflatex/README.md +39 -0
  257. package/skills/pdflatex/SKILL.md +34 -0
  258. package/skills/prd/.plugin/plugin.json +19 -0
  259. package/skills/prd/README.md +28 -0
  260. package/skills/prd/SKILL.md +237 -0
  261. package/skills/prd/commands/prd.md +8 -0
  262. package/skills/qa-changes/README.md +18 -0
  263. package/skills/qa-changes/SKILL.md +229 -0
  264. package/skills/qa-changes/commands/qa-changes.md +8 -0
  265. package/skills/release-notes/README.md +24 -0
  266. package/skills/release-notes/SKILL.md +19 -0
  267. package/skills/release-notes/commands/release-notes.md +8 -0
  268. package/skills/research-brief/.plugin/plugin.json +20 -0
  269. package/skills/research-brief/README.md +34 -0
  270. package/skills/research-brief/SKILL.md +99 -0
  271. package/skills/research-brief/commands/research-brief:setup.md +8 -0
  272. package/skills/security/.plugin/plugin.json +18 -0
  273. package/skills/security/README.md +38 -0
  274. package/skills/security/SKILL.md +33 -0
  275. package/skills/skill-creator/.plugin/plugin.json +17 -0
  276. package/skills/skill-creator/LICENSE.txt +202 -0
  277. package/skills/skill-creator/README.md +182 -0
  278. package/skills/skill-creator/SKILL.md +545 -0
  279. package/skills/skill-creator/references/output-patterns.md +82 -0
  280. package/skills/skill-creator/references/workflows.md +28 -0
  281. package/skills/skill-creator/scripts/init_skill.py +303 -0
  282. package/skills/skill-creator/scripts/quick_validate.py +95 -0
  283. package/skills/slack-channel-monitor/.plugin/plugin.json +21 -0
  284. package/skills/slack-channel-monitor/README.md +91 -0
  285. package/skills/slack-channel-monitor/SKILL.md +276 -0
  286. package/skills/slack-channel-monitor/commands/slack-monitor:poll.md +8 -0
  287. package/skills/slack-channel-monitor/references/slack-api.md +207 -0
  288. package/skills/slack-channel-monitor/references/state-schema.md +180 -0
  289. package/skills/slack-channel-monitor/scripts/main.py +962 -0
  290. package/skills/slack-standup-digest/.plugin/plugin.json +21 -0
  291. package/skills/slack-standup-digest/README.md +34 -0
  292. package/skills/slack-standup-digest/SKILL.md +92 -0
  293. package/skills/slack-standup-digest/commands/standup-digest:setup.md +8 -0
  294. package/skills/spark-version-upgrade/.plugin/plugin.json +20 -0
  295. package/skills/spark-version-upgrade/README.md +54 -0
  296. package/skills/spark-version-upgrade/SKILL.md +233 -0
  297. package/skills/ssh/.plugin/plugin.json +18 -0
  298. package/skills/ssh/README.md +140 -0
  299. package/skills/ssh/SKILL.md +135 -0
  300. package/skills/swift-linux/.plugin/plugin.json +17 -0
  301. package/skills/swift-linux/README.md +86 -0
  302. package/skills/swift-linux/SKILL.md +81 -0
  303. package/skills/theme-factory/.plugin/plugin.json +19 -0
  304. package/skills/theme-factory/LICENSE.txt +202 -0
  305. package/skills/theme-factory/README.md +58 -0
  306. package/skills/theme-factory/SKILL.md +59 -0
  307. package/skills/theme-factory/theme-showcase.pdf +0 -0
  308. package/skills/theme-factory/themes/arctic-frost.md +19 -0
  309. package/skills/theme-factory/themes/botanical-garden.md +19 -0
  310. package/skills/theme-factory/themes/desert-rose.md +19 -0
  311. package/skills/theme-factory/themes/forest-canopy.md +19 -0
  312. package/skills/theme-factory/themes/golden-hour.md +19 -0
  313. package/skills/theme-factory/themes/midnight-galaxy.md +19 -0
  314. package/skills/theme-factory/themes/modern-minimalist.md +19 -0
  315. package/skills/theme-factory/themes/ocean-depths.md +19 -0
  316. package/skills/theme-factory/themes/sunset-boulevard.md +19 -0
  317. package/skills/theme-factory/themes/tech-innovation.md +19 -0
  318. package/skills/uv/.plugin/plugin.json +18 -0
  319. package/skills/uv/README.md +5 -0
  320. package/skills/uv/SKILL.md +95 -0
  321. package/skills/uv/references/README.md +5 -0
  322. package/skills/vercel/.plugin/plugin.json +18 -0
  323. package/skills/vercel/README.md +108 -0
  324. package/skills/vercel/SKILL.md +103 -0
  325. package/tests/test_add_skill_installs_to_agents_dir.py +42 -0
  326. package/tests/test_catalogs.py +109 -0
  327. package/tests/test_code_review_risk_evaluation.py +94 -0
  328. package/tests/test_issue_duplicate_checker.py +240 -0
  329. package/tests/test_openhands_api_python.py +152 -0
  330. package/tests/test_plugin_manifest.py +83 -0
  331. package/tests/test_pr_review_diff_payload.py +202 -0
  332. package/tests/test_pr_review_feedback.py +263 -0
  333. package/tests/test_pr_review_prompt.py +152 -0
  334. package/tests/test_pr_review_review_context.py +253 -0
  335. package/tests/test_qa_changes.py +232 -0
  336. package/tests/test_qa_changes_evaluation.py +259 -0
  337. package/tests/test_release_notes_generator.py +990 -0
  338. package/tests/test_sdk_loading.py +150 -0
  339. package/tests/test_skill_plugin_loading.py +149 -0
  340. package/tests/test_skills_have_readme.py +66 -0
  341. package/tests/test_sync_extensions.py +292 -0
  342. package/tests/test_workflow_sync.py +46 -0
  343. package/utils/analysis/README.md +7 -0
  344. package/utils/analysis/laminar_signals/README.md +211 -0
  345. package/utils/analysis/laminar_signals/analyze.py +780 -0
  346. package/utils/analysis/laminar_signals/templates/default.j2 +49 -0
  347. package/utils/analysis/laminar_signals/templates/pr_review.j2 +61 -0
@@ -0,0 +1,8 @@
1
+ ---
2
+ # auto-generated by sync_extensions.py
3
+ description: This skill should be used when the user asks to "create an automation", "schedule a task", "set up a cron job", "webhook integration", "event-triggered automation", or mentions automations, scheduled tasks, cron scheduling, or webhook events in OpenHands Cloud.
4
+ ---
5
+
6
+ Read and follow the complete instructions in the SKILL.md file located in this skill's directory.
7
+
8
+ $ARGUMENTS
@@ -0,0 +1,185 @@
1
+ # A/B Testing for Plugin Automations
2
+
3
+ Run A/B tests on plugin automations by defining **variants** — each with its own plugin set and selection weight — instead of a single `plugins` list. The automation service generates a tarball with all variant configs; at runtime, the SDK script selects a variant via weighted random and loads its plugins.
4
+
5
+ > **Scope:** A/B testing is currently supported on the **plugin preset** endpoint only (`POST /v1/preset/plugin`). See [OpenHands/automation#147](https://github.com/OpenHands/automation/issues/147) for the roadmap to server-level variant support across all automation types.
6
+
7
+ ---
8
+
9
+ ## Quick Start
10
+
11
+ Replace `plugins` with `variants` and add an `experiment_id`:
12
+
13
+ ```bash
14
+ curl -X POST "${OPENHANDS_HOST}/api/automation/v1/preset/plugin" \
15
+ -H "Authorization: Bearer ${OPENHANDS_API_KEY}" \
16
+ -H "Content-Type: application/json" \
17
+ -d '{
18
+ "name": "Code Review A/B Test",
19
+ "experiment_id": "review-model-comparison",
20
+ "variants": [
21
+ {
22
+ "name": "control",
23
+ "weight": 50,
24
+ "plugins": [{"source": "github:owner/review-plugin", "ref": "v1.0.0"}]
25
+ },
26
+ {
27
+ "name": "treatment",
28
+ "weight": 50,
29
+ "plugins": [{"source": "github:owner/review-plugin", "ref": "v2.0.0-beta"}]
30
+ }
31
+ ],
32
+ "prompt": "Review this pull request for code quality and potential bugs.",
33
+ "trigger": {
34
+ "type": "event",
35
+ "source": "github",
36
+ "on": "pull_request.opened"
37
+ }
38
+ }'
39
+ ```
40
+
41
+ ## How It Works
42
+
43
+ 1. **At creation time**, the service generates a single tarball containing an `experiment_config.json` with all variant definitions (names, weights, plugin configs) alongside the SDK entrypoint and prompt.
44
+
45
+ 2. **At runtime**, `sdk_main.py` reads `experiment_config.json`, performs weighted-random selection across variants, and loads the selected variant's plugins.
46
+
47
+ 3. **Experiment metadata** (`experiment_id` and `variant` name) is attached as conversation tags, allowing you to filter and compare runs by variant in the UI.
48
+
49
+ ## API Reference
50
+
51
+ ### Request Fields
52
+
53
+ `plugins` and `variants` are **mutually exclusive** — provide exactly one.
54
+
55
+ | Field | Required | Description |
56
+ |-------|----------|-------------|
57
+ | `variants` | Yes* | List of experiment variants (2–10). Replaces `plugins`. |
58
+ | `experiment_id` | Yes* | Human-readable experiment identifier (1–200 chars). Required when using `variants`. |
59
+
60
+ *Required only for A/B tests. Standard plugin automations use `plugins` instead.
61
+
62
+ All other fields (`name`, `prompt`, `trigger`, `timeout`, `repos`, `model`) are identical to the standard plugin preset request.
63
+
64
+ ### Variant Object
65
+
66
+ | Field | Required | Type | Description |
67
+ |-------|----------|------|-------------|
68
+ | `name` | Yes | string | Unique variant name (1–100 chars) |
69
+ | `weight` | Yes | integer | Relative selection weight (> 0) |
70
+ | `plugins` | Yes | array | Plugin source(s) for this variant (at least one) |
71
+
72
+ ### Validation Rules
73
+
74
+ - Exactly **one of** `plugins` or `variants` must be provided (not both, not neither)
75
+ - `experiment_id` is **required** with `variants`, **forbidden** with `plugins`
76
+ - At least **2** variants, at most **10**
77
+ - Variant **names must be unique** within an experiment
78
+ - Each variant must have at least **1 plugin**
79
+ - Weights are relative — `[50, 50]` and `[1, 1]` both give 50/50 selection
80
+
81
+ ## Variant Selection
82
+
83
+ Selection uses Python's `random.choices` with the configured weights. The probability of selecting variant *i* is:
84
+
85
+ ```
86
+ P(variant_i) = weight_i / sum(all_weights)
87
+ ```
88
+
89
+ Examples:
90
+ - `[50, 50]` → 50% / 50%
91
+ - `[80, 20]` → 80% / 20%
92
+ - `[1, 1, 1]` → 33.3% each
93
+ - `[70, 20, 10]` → 70% / 20% / 10%
94
+
95
+ Selection happens independently on every run — there is no cross-run state or session stickiness.
96
+
97
+ ## Examples
98
+
99
+ ### Compare two plugin versions
100
+
101
+ ```json
102
+ {
103
+ "name": "Plugin v2 Rollout",
104
+ "experiment_id": "plugin-v2-rollout",
105
+ "variants": [
106
+ {
107
+ "name": "stable",
108
+ "weight": 80,
109
+ "plugins": [{"source": "github:myorg/my-plugin", "ref": "v1.4.2"}]
110
+ },
111
+ {
112
+ "name": "canary",
113
+ "weight": 20,
114
+ "plugins": [{"source": "github:myorg/my-plugin", "ref": "v2.0.0-rc1"}]
115
+ }
116
+ ],
117
+ "prompt": "Run the standard analysis workflow.",
118
+ "trigger": {"type": "cron", "schedule": "0 9 * * 1-5"}
119
+ }
120
+ ```
121
+
122
+ ### Test different plugin combinations
123
+
124
+ ```json
125
+ {
126
+ "name": "Review Pipeline Experiment",
127
+ "experiment_id": "review-pipeline-2026",
128
+ "variants": [
129
+ {
130
+ "name": "basic",
131
+ "weight": 1,
132
+ "plugins": [{"source": "github:myorg/code-review"}]
133
+ },
134
+ {
135
+ "name": "enhanced",
136
+ "weight": 1,
137
+ "plugins": [
138
+ {"source": "github:myorg/code-review"},
139
+ {"source": "github:myorg/security-scanner"}
140
+ ]
141
+ }
142
+ ],
143
+ "prompt": "Review the PR and report findings.",
144
+ "trigger": {
145
+ "type": "event",
146
+ "source": "github",
147
+ "on": "pull_request.opened",
148
+ "filter": "contains(pull_request.labels[].name, 'needs-review')"
149
+ }
150
+ }
151
+ ```
152
+
153
+ ### Three-way comparison
154
+
155
+ ```json
156
+ {
157
+ "name": "Scanner Comparison",
158
+ "experiment_id": "scanner-eval-q3",
159
+ "variants": [
160
+ {"name": "scanner-a", "weight": 1, "plugins": [{"source": "github:myorg/scanner-a"}]},
161
+ {"name": "scanner-b", "weight": 1, "plugins": [{"source": "github:myorg/scanner-b"}]},
162
+ {"name": "scanner-c", "weight": 1, "plugins": [{"source": "github:myorg/scanner-c"}]}
163
+ ],
164
+ "prompt": "Scan the repository and produce a findings report.",
165
+ "trigger": {"type": "cron", "schedule": "0 2 * * 0"}
166
+ }
167
+ ```
168
+
169
+ ## Observability
170
+
171
+ Each experiment run tags the conversation with:
172
+
173
+ | Tag | Value |
174
+ |-----|-------|
175
+ | `experiment_id` | The `experiment_id` from the request |
176
+ | `variant` | The name of the selected variant |
177
+
178
+ Use these tags to filter runs in the OpenHands UI and compare outcomes across variants.
179
+
180
+ ## Limitations
181
+
182
+ - **Plugin preset only** — A/B testing is not yet supported for prompt presets or custom automations. See [#147](https://github.com/OpenHands/automation/issues/147) for the server-level variant selection roadmap.
183
+ - **No session stickiness** — each run independently selects a variant. There is no user- or session-based assignment.
184
+ - **No built-in metrics** — the platform records which variant ran (via tags) but does not compute statistical significance. Export run data for external analysis.
185
+ - **Single prompt** — all variants share the same prompt. To test different prompts, use separate automations.