@openhands/extensions 0.0.1-alpha → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. package/.agents/skills/custom-codereview-guide.md +25 -0
  2. package/.github/pull_request_template.md +38 -0
  3. package/.github/release.yml +14 -0
  4. package/.github/workflows/check-extensions.yml +72 -0
  5. package/.github/workflows/npm-publish.yml +89 -0
  6. package/.github/workflows/pr.yml +30 -0
  7. package/.github/workflows/release.yml +24 -0
  8. package/.github/workflows/tests.yml +25 -0
  9. package/.github/workflows/vulnerability-scan.yml +87 -0
  10. package/.release-please-manifest.json +3 -0
  11. package/AGENTS.md +132 -0
  12. package/README.md +10 -0
  13. package/analysis_results.md +162 -0
  14. package/marketplaces/large-codebase.json +66 -0
  15. package/marketplaces/openhands-extensions.json +682 -0
  16. package/package.json +4 -10
  17. package/plugins/README.md +30 -0
  18. package/plugins/city-weather/.plugin/plugin.json +13 -0
  19. package/plugins/city-weather/README.md +145 -0
  20. package/plugins/city-weather/commands/now.md +56 -0
  21. package/plugins/cobol-modernization/.plugin/plugin.json +19 -0
  22. package/plugins/cobol-modernization/README.md +201 -0
  23. package/plugins/cobol-modernization/references/troubleshooting.md +18 -0
  24. package/plugins/cobol-modernization/skills/build-setup/SKILL.md +78 -0
  25. package/plugins/cobol-modernization/skills/build-setup/scripts/install-gnucobol.sh +32 -0
  26. package/plugins/cobol-modernization/skills/cobol-modernization-overview/SKILL.md +113 -0
  27. package/plugins/cobol-modernization/skills/mainfraime-removal/SKILL.md +62 -0
  28. package/plugins/cobol-modernization/skills/mainfraime-removal/references/cics-transformation-examples.md +45 -0
  29. package/plugins/cobol-modernization/skills/mainframe-planning/SKILL.md +78 -0
  30. package/plugins/cobol-modernization/skills/to-java-migration/SKILL.md +59 -0
  31. package/plugins/cobol-modernization/skills/to-java-migration/references/cobol-to-java-example.md +58 -0
  32. package/plugins/cobol-modernization/skills/to-java-migration/references/datatype-mappings.md +19 -0
  33. package/plugins/issue-duplicate-checker/.plugin/plugin.json +13 -0
  34. package/plugins/issue-duplicate-checker/README.md +51 -0
  35. package/plugins/issue-duplicate-checker/action.yml +349 -0
  36. package/plugins/issue-duplicate-checker/scripts/auto_close_duplicate_issues.py +569 -0
  37. package/plugins/issue-duplicate-checker/scripts/issue_duplicate_check_openhands.py +681 -0
  38. package/plugins/issue-duplicate-checker/scripts/post_duplicate_notice.js +220 -0
  39. package/plugins/issue-duplicate-checker/scripts/remove_duplicate_candidate_label.js +27 -0
  40. package/plugins/magic-test/.plugin/plugin.json +13 -0
  41. package/plugins/magic-test/skills/magic-word/SKILL.md +33 -0
  42. package/plugins/migration-scoring/.plugin/plugin.json +19 -0
  43. package/plugins/migration-scoring/README.md +244 -0
  44. package/plugins/migration-scoring/skills/migration-mapping/SKILL.md +72 -0
  45. package/plugins/migration-scoring/skills/migration-report/SKILL.md +118 -0
  46. package/plugins/migration-scoring/skills/migration-scoring-overview/SKILL.md +126 -0
  47. package/plugins/migration-scoring/skills/score-quality/SKILL.md +54 -0
  48. package/plugins/migration-scoring/skills/score-quality/references/scoring-criteria.md +30 -0
  49. package/plugins/migration-scoring/skills/score-style/SKILL.md +106 -0
  50. package/plugins/onboarding/.plugin/plugin.json +20 -0
  51. package/plugins/onboarding/README.md +30 -0
  52. package/plugins/onboarding/references/criteria.md +144 -0
  53. package/plugins/onboarding/skills/agent-readiness-report/README.md +23 -0
  54. package/plugins/onboarding/skills/agent-readiness-report/SKILL.md +122 -0
  55. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_agent_instructions.sh +88 -0
  56. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_build_env.sh +114 -0
  57. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_feedback_loops.sh +133 -0
  58. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_policy.sh +113 -0
  59. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_workflows.sh +127 -0
  60. package/plugins/onboarding/skills/improve-agent-readiness/README.md +19 -0
  61. package/plugins/onboarding/skills/improve-agent-readiness/SKILL.md +167 -0
  62. package/plugins/onboarding/skills/setup-agents-md/README.md +15 -0
  63. package/plugins/onboarding/skills/setup-agents-md/SKILL.md +150 -0
  64. package/plugins/onboarding/skills/setup-openhands/README.md +20 -0
  65. package/plugins/onboarding/skills/setup-openhands/SKILL.md +56 -0
  66. package/plugins/onboarding/skills/setup-pr-review/README.md +23 -0
  67. package/plugins/onboarding/skills/setup-pr-review/SKILL.md +72 -0
  68. package/plugins/openhands/.plugin/plugin.json +13 -0
  69. package/plugins/openhands/README.md +52 -0
  70. package/plugins/openhands/SKILL.md +61 -0
  71. package/plugins/openhands/commands/create.md +55 -0
  72. package/plugins/openhands/commands/openhands-cloud.md +8 -0
  73. package/plugins/openhands/scripts/run.sh +69 -0
  74. package/plugins/pr-review/.plugin/plugin.json +13 -0
  75. package/plugins/pr-review/README.md +393 -0
  76. package/plugins/pr-review/action.yml +298 -0
  77. package/plugins/pr-review/scripts/agent_script.py +1282 -0
  78. package/plugins/pr-review/scripts/evaluate_review.py +655 -0
  79. package/plugins/pr-review/scripts/prompt.py +260 -0
  80. package/plugins/pr-review/workflows/pr-review-by-openhands.yml +51 -0
  81. package/plugins/pr-review/workflows/pr-review-evaluation.yml +85 -0
  82. package/plugins/qa-changes/.plugin/plugin.json +11 -0
  83. package/plugins/qa-changes/README.md +185 -0
  84. package/plugins/qa-changes/action.yml +181 -0
  85. package/plugins/qa-changes/scripts/agent_script.py +406 -0
  86. package/plugins/qa-changes/scripts/evaluate_qa_changes.py +385 -0
  87. package/plugins/qa-changes/scripts/prompt.py +174 -0
  88. package/plugins/qa-changes/workflows/qa-changes-by-openhands.yml +50 -0
  89. package/plugins/qa-changes/workflows/qa-changes-evaluation.yml +85 -0
  90. package/plugins/release-notes/.plugin/plugin.json +19 -0
  91. package/plugins/release-notes/README.md +283 -0
  92. package/plugins/release-notes/SKILL.md +83 -0
  93. package/plugins/release-notes/action.yml +117 -0
  94. package/plugins/release-notes/commands/release-notes.md +8 -0
  95. package/plugins/release-notes/scripts/agent_script.py +292 -0
  96. package/plugins/release-notes/scripts/generate_release_notes.py +733 -0
  97. package/plugins/release-notes/scripts/prompt.py +90 -0
  98. package/plugins/release-notes/scripts/validate_release_notes.py +328 -0
  99. package/plugins/release-notes/workflows/release-notes.yml +76 -0
  100. package/plugins/vulnerability-remediation/.plugin/plugin.json +19 -0
  101. package/plugins/vulnerability-remediation/README.md +217 -0
  102. package/plugins/vulnerability-remediation/action.yml +187 -0
  103. package/plugins/vulnerability-remediation/scripts/scan_and_remediate.py +561 -0
  104. package/plugins/vulnerability-remediation/workflows/vulnerability-scan.yml +87 -0
  105. package/pyproject.toml +12 -0
  106. package/release-please-config.json +16 -0
  107. package/scripts/sync_extensions.py +494 -0
  108. package/scripts/sync_openhands_sdk_skill.py +264 -0
  109. package/skills/README.md +159 -0
  110. package/skills/add-javadoc/.plugin/plugin.json +18 -0
  111. package/skills/add-javadoc/README.md +40 -0
  112. package/skills/add-javadoc/SKILL.md +35 -0
  113. package/skills/add-javadoc/references/example.md +32 -0
  114. package/skills/add-skill/.plugin/plugin.json +18 -0
  115. package/skills/add-skill/README.md +67 -0
  116. package/skills/add-skill/SKILL.md +47 -0
  117. package/skills/add-skill/scripts/fetch_skill.py +259 -0
  118. package/skills/agent-creator/.plugin/plugin.json +20 -0
  119. package/skills/agent-creator/README.md +104 -0
  120. package/skills/agent-creator/SKILL.md +190 -0
  121. package/skills/agent-creator/commands/agent-creator.md +8 -0
  122. package/skills/agent-creator/references/fallback.md +117 -0
  123. package/skills/agent-memory/.plugin/plugin.json +18 -0
  124. package/skills/agent-memory/README.md +35 -0
  125. package/skills/agent-memory/SKILL.md +30 -0
  126. package/skills/agent-memory/commands/remember.md +8 -0
  127. package/skills/agent-sdk-builder/.plugin/plugin.json +18 -0
  128. package/skills/agent-sdk-builder/README.md +40 -0
  129. package/skills/agent-sdk-builder/SKILL.md +37 -0
  130. package/skills/agent-sdk-builder/commands/agent-builder.md +8 -0
  131. package/skills/azure-devops/.plugin/plugin.json +18 -0
  132. package/skills/azure-devops/README.md +55 -0
  133. package/skills/azure-devops/SKILL.md +50 -0
  134. package/skills/bitbucket/.plugin/plugin.json +17 -0
  135. package/skills/bitbucket/README.md +50 -0
  136. package/skills/bitbucket/SKILL.md +45 -0
  137. package/skills/code-review/.plugin/plugin.json +19 -0
  138. package/skills/code-review/README.md +18 -0
  139. package/skills/code-review/SKILL.md +208 -0
  140. package/skills/code-review/commands/codereview-roasted.md +8 -0
  141. package/skills/code-review/commands/codereview.md +8 -0
  142. package/skills/code-review/references/risk-evaluation.md +41 -0
  143. package/skills/code-review/references/supply-chain-security.md +31 -0
  144. package/skills/code-simplifier/.plugin/plugin.json +21 -0
  145. package/skills/code-simplifier/README.md +30 -0
  146. package/skills/code-simplifier/SKILL.md +91 -0
  147. package/skills/code-simplifier/commands/simplify.md +8 -0
  148. package/skills/code-simplifier/references/code-quality-review.md +86 -0
  149. package/skills/code-simplifier/references/code-reuse-review.md +63 -0
  150. package/skills/code-simplifier/references/efficiency-review.md +81 -0
  151. package/skills/datadog/.plugin/plugin.json +19 -0
  152. package/skills/datadog/README.md +100 -0
  153. package/skills/datadog/SKILL.md +95 -0
  154. package/skills/deno/.plugin/plugin.json +18 -0
  155. package/skills/deno/README.md +5 -0
  156. package/skills/deno/SKILL.md +99 -0
  157. package/skills/deno/references/README.md +6 -0
  158. package/skills/discord/.plugin/plugin.json +18 -0
  159. package/skills/discord/README.md +31 -0
  160. package/skills/discord/SKILL.md +109 -0
  161. package/skills/discord/__init__.py +0 -0
  162. package/skills/discord/references/REFERENCE.md +78 -0
  163. package/skills/discord/scripts/__init__.py +0 -0
  164. package/skills/discord/scripts/_http.py +127 -0
  165. package/skills/discord/scripts/post_webhook.py +106 -0
  166. package/skills/discord/scripts/send_message.py +102 -0
  167. package/skills/docker/.plugin/plugin.json +17 -0
  168. package/skills/docker/README.md +34 -0
  169. package/skills/docker/SKILL.md +29 -0
  170. package/skills/evidence-based-citations/.plugin/plugin.json +20 -0
  171. package/skills/evidence-based-citations/README.md +31 -0
  172. package/skills/evidence-based-citations/SKILL.md +59 -0
  173. package/skills/flarglebargle/.plugin/plugin.json +16 -0
  174. package/skills/flarglebargle/README.md +14 -0
  175. package/skills/flarglebargle/SKILL.md +9 -0
  176. package/skills/frontend-design/.plugin/plugin.json +21 -0
  177. package/skills/frontend-design/LICENSE.txt +177 -0
  178. package/skills/frontend-design/README.md +42 -0
  179. package/skills/frontend-design/SKILL.md +42 -0
  180. package/skills/github/.plugin/plugin.json +19 -0
  181. package/skills/github/README.md +42 -0
  182. package/skills/github/SKILL.md +106 -0
  183. package/skills/github-pr-review/.plugin/plugin.json +18 -0
  184. package/skills/github-pr-review/README.md +145 -0
  185. package/skills/github-pr-review/SKILL.md +148 -0
  186. package/skills/github-pr-review/commands/github-pr-review.md +8 -0
  187. package/skills/github-pr-reviewer/.plugin/plugin.json +20 -0
  188. package/skills/github-pr-reviewer/README.md +34 -0
  189. package/skills/github-pr-reviewer/SKILL.md +89 -0
  190. package/skills/github-pr-reviewer/commands/pr-reviewer:setup.md +8 -0
  191. package/skills/github-repo-monitor/.plugin/plugin.json +22 -0
  192. package/skills/github-repo-monitor/README.md +70 -0
  193. package/skills/github-repo-monitor/SKILL.md +316 -0
  194. package/skills/github-repo-monitor/commands/github-monitor:poll.md +8 -0
  195. package/skills/github-repo-monitor/references/github-api.md +241 -0
  196. package/skills/github-repo-monitor/references/state-schema.md +160 -0
  197. package/skills/github-repo-monitor/scripts/main.py +915 -0
  198. package/skills/github-repo-monitor/tests/test_main.py +400 -0
  199. package/skills/gitlab/.plugin/plugin.json +17 -0
  200. package/skills/gitlab/README.md +37 -0
  201. package/skills/gitlab/SKILL.md +32 -0
  202. package/skills/incident-retrospective/.plugin/plugin.json +21 -0
  203. package/skills/incident-retrospective/README.md +34 -0
  204. package/skills/incident-retrospective/SKILL.md +98 -0
  205. package/skills/incident-retrospective/commands/incident-retro:setup.md +8 -0
  206. package/skills/iterate/.plugin/plugin.json +13 -0
  207. package/skills/iterate/README.md +25 -0
  208. package/skills/iterate/SKILL.md +399 -0
  209. package/skills/iterate/commands/babysit.md +8 -0
  210. package/skills/iterate/commands/iterate.md +8 -0
  211. package/skills/iterate/commands/verify.md +8 -0
  212. package/skills/iterate/references/heuristics.md +58 -0
  213. package/skills/iterate/references/verification.md +96 -0
  214. package/skills/jupyter/.plugin/plugin.json +18 -0
  215. package/skills/jupyter/README.md +55 -0
  216. package/skills/jupyter/SKILL.md +50 -0
  217. package/skills/kubernetes/.plugin/plugin.json +18 -0
  218. package/skills/kubernetes/README.md +53 -0
  219. package/skills/kubernetes/SKILL.md +48 -0
  220. package/skills/learn-from-code-review/.plugin/plugin.json +19 -0
  221. package/skills/learn-from-code-review/README.md +64 -0
  222. package/skills/learn-from-code-review/SKILL.md +186 -0
  223. package/skills/learn-from-code-review/commands/learn-from-reviews.md +8 -0
  224. package/skills/linear/.plugin/plugin.json +19 -0
  225. package/skills/linear/README.md +58 -0
  226. package/skills/linear/SKILL.md +213 -0
  227. package/skills/linear-triage/.plugin/plugin.json +21 -0
  228. package/skills/linear-triage/README.md +34 -0
  229. package/skills/linear-triage/SKILL.md +91 -0
  230. package/skills/linear-triage/commands/linear-triage:setup.md +8 -0
  231. package/skills/notion/.plugin/plugin.json +17 -0
  232. package/skills/notion/README.md +114 -0
  233. package/skills/notion/SKILL.md +109 -0
  234. package/skills/npm/.plugin/plugin.json +17 -0
  235. package/skills/npm/README.md +14 -0
  236. package/skills/npm/SKILL.md +9 -0
  237. package/skills/openhands-api/.plugin/plugin.json +22 -0
  238. package/skills/openhands-api/README.md +48 -0
  239. package/skills/openhands-api/SKILL.md +399 -0
  240. package/skills/openhands-api/references/README.md +33 -0
  241. package/skills/openhands-api/references/TROUBLESHOOTING.md +81 -0
  242. package/skills/openhands-api/references/example_prompt.md +12 -0
  243. package/skills/openhands-api/scripts/openhands_api.py +606 -0
  244. package/skills/openhands-api/scripts/openhands_api.ts +252 -0
  245. package/skills/openhands-automation/.plugin/plugin.json +19 -0
  246. package/skills/openhands-automation/README.md +89 -0
  247. package/skills/openhands-automation/SKILL.md +875 -0
  248. package/skills/openhands-automation/commands/automation:create.md +8 -0
  249. package/skills/openhands-automation/references/ab-testing.md +185 -0
  250. package/skills/openhands-automation/references/custom-automation.md +644 -0
  251. package/skills/openhands-sdk/.plugin/plugin.json +20 -0
  252. package/skills/openhands-sdk/README.md +22 -0
  253. package/skills/openhands-sdk/SKILL.md +229 -0
  254. package/skills/openhands-sdk/commands/sdk.md +8 -0
  255. package/skills/pdflatex/.plugin/plugin.json +18 -0
  256. package/skills/pdflatex/README.md +39 -0
  257. package/skills/pdflatex/SKILL.md +34 -0
  258. package/skills/prd/.plugin/plugin.json +19 -0
  259. package/skills/prd/README.md +28 -0
  260. package/skills/prd/SKILL.md +237 -0
  261. package/skills/prd/commands/prd.md +8 -0
  262. package/skills/qa-changes/README.md +18 -0
  263. package/skills/qa-changes/SKILL.md +229 -0
  264. package/skills/qa-changes/commands/qa-changes.md +8 -0
  265. package/skills/release-notes/README.md +24 -0
  266. package/skills/release-notes/SKILL.md +19 -0
  267. package/skills/release-notes/commands/release-notes.md +8 -0
  268. package/skills/research-brief/.plugin/plugin.json +20 -0
  269. package/skills/research-brief/README.md +34 -0
  270. package/skills/research-brief/SKILL.md +99 -0
  271. package/skills/research-brief/commands/research-brief:setup.md +8 -0
  272. package/skills/security/.plugin/plugin.json +18 -0
  273. package/skills/security/README.md +38 -0
  274. package/skills/security/SKILL.md +33 -0
  275. package/skills/skill-creator/.plugin/plugin.json +17 -0
  276. package/skills/skill-creator/LICENSE.txt +202 -0
  277. package/skills/skill-creator/README.md +182 -0
  278. package/skills/skill-creator/SKILL.md +545 -0
  279. package/skills/skill-creator/references/output-patterns.md +82 -0
  280. package/skills/skill-creator/references/workflows.md +28 -0
  281. package/skills/skill-creator/scripts/init_skill.py +303 -0
  282. package/skills/skill-creator/scripts/quick_validate.py +95 -0
  283. package/skills/slack-channel-monitor/.plugin/plugin.json +21 -0
  284. package/skills/slack-channel-monitor/README.md +91 -0
  285. package/skills/slack-channel-monitor/SKILL.md +276 -0
  286. package/skills/slack-channel-monitor/commands/slack-monitor:poll.md +8 -0
  287. package/skills/slack-channel-monitor/references/slack-api.md +207 -0
  288. package/skills/slack-channel-monitor/references/state-schema.md +180 -0
  289. package/skills/slack-channel-monitor/scripts/main.py +962 -0
  290. package/skills/slack-standup-digest/.plugin/plugin.json +21 -0
  291. package/skills/slack-standup-digest/README.md +34 -0
  292. package/skills/slack-standup-digest/SKILL.md +92 -0
  293. package/skills/slack-standup-digest/commands/standup-digest:setup.md +8 -0
  294. package/skills/spark-version-upgrade/.plugin/plugin.json +20 -0
  295. package/skills/spark-version-upgrade/README.md +54 -0
  296. package/skills/spark-version-upgrade/SKILL.md +233 -0
  297. package/skills/ssh/.plugin/plugin.json +18 -0
  298. package/skills/ssh/README.md +140 -0
  299. package/skills/ssh/SKILL.md +135 -0
  300. package/skills/swift-linux/.plugin/plugin.json +17 -0
  301. package/skills/swift-linux/README.md +86 -0
  302. package/skills/swift-linux/SKILL.md +81 -0
  303. package/skills/theme-factory/.plugin/plugin.json +19 -0
  304. package/skills/theme-factory/LICENSE.txt +202 -0
  305. package/skills/theme-factory/README.md +58 -0
  306. package/skills/theme-factory/SKILL.md +59 -0
  307. package/skills/theme-factory/theme-showcase.pdf +0 -0
  308. package/skills/theme-factory/themes/arctic-frost.md +19 -0
  309. package/skills/theme-factory/themes/botanical-garden.md +19 -0
  310. package/skills/theme-factory/themes/desert-rose.md +19 -0
  311. package/skills/theme-factory/themes/forest-canopy.md +19 -0
  312. package/skills/theme-factory/themes/golden-hour.md +19 -0
  313. package/skills/theme-factory/themes/midnight-galaxy.md +19 -0
  314. package/skills/theme-factory/themes/modern-minimalist.md +19 -0
  315. package/skills/theme-factory/themes/ocean-depths.md +19 -0
  316. package/skills/theme-factory/themes/sunset-boulevard.md +19 -0
  317. package/skills/theme-factory/themes/tech-innovation.md +19 -0
  318. package/skills/uv/.plugin/plugin.json +18 -0
  319. package/skills/uv/README.md +5 -0
  320. package/skills/uv/SKILL.md +95 -0
  321. package/skills/uv/references/README.md +5 -0
  322. package/skills/vercel/.plugin/plugin.json +18 -0
  323. package/skills/vercel/README.md +108 -0
  324. package/skills/vercel/SKILL.md +103 -0
  325. package/tests/test_add_skill_installs_to_agents_dir.py +42 -0
  326. package/tests/test_catalogs.py +109 -0
  327. package/tests/test_code_review_risk_evaluation.py +94 -0
  328. package/tests/test_issue_duplicate_checker.py +240 -0
  329. package/tests/test_openhands_api_python.py +152 -0
  330. package/tests/test_plugin_manifest.py +83 -0
  331. package/tests/test_pr_review_diff_payload.py +202 -0
  332. package/tests/test_pr_review_feedback.py +263 -0
  333. package/tests/test_pr_review_prompt.py +152 -0
  334. package/tests/test_pr_review_review_context.py +253 -0
  335. package/tests/test_qa_changes.py +232 -0
  336. package/tests/test_qa_changes_evaluation.py +259 -0
  337. package/tests/test_release_notes_generator.py +990 -0
  338. package/tests/test_sdk_loading.py +150 -0
  339. package/tests/test_skill_plugin_loading.py +149 -0
  340. package/tests/test_skills_have_readme.py +66 -0
  341. package/tests/test_sync_extensions.py +292 -0
  342. package/tests/test_workflow_sync.py +46 -0
  343. package/utils/analysis/README.md +7 -0
  344. package/utils/analysis/laminar_signals/README.md +211 -0
  345. package/utils/analysis/laminar_signals/analyze.py +780 -0
  346. package/utils/analysis/laminar_signals/templates/default.j2 +49 -0
  347. package/utils/analysis/laminar_signals/templates/pr_review.j2 +61 -0
@@ -0,0 +1,181 @@
1
+ ---
2
+ name: OpenHands QA Changes
3
+ description: Automated QA validation of PR changes using OpenHands agent
4
+ author: OpenHands
5
+
6
+ branding:
7
+ icon: check-circle
8
+ color: green
9
+
10
+ inputs:
11
+ llm-model:
12
+ description: LLM model to use for QA validation.
13
+ required: false
14
+ default: anthropic/claude-sonnet-4-5-20250929
15
+ llm-base-url:
16
+ description: LLM base URL (optional, for custom LLM endpoints)
17
+ required: false
18
+ default: ''
19
+ extensions-repo:
20
+ description: GitHub repository for extensions (owner/repo)
21
+ required: false
22
+ default: OpenHands/extensions
23
+ extensions-version:
24
+ description: Git ref to use for extensions (tag, branch, or commit SHA)
25
+ required: false
26
+ default: main
27
+ max-budget:
28
+ description: Maximum LLM cost in dollars. The agent stops when this budget is exceeded.
29
+ required: false
30
+ default: '10.0'
31
+ timeout-minutes:
32
+ description: Maximum wall-clock time in minutes for the QA job.
33
+ required: false
34
+ default: '30'
35
+ max-iterations:
36
+ description: Maximum number of agent iterations (each iteration is one LLM call + action).
37
+ required: false
38
+ default: '500'
39
+ llm-api-key:
40
+ description: LLM API key (required)
41
+ required: true
42
+ github-token:
43
+ description: GitHub token for API access (required)
44
+ required: true
45
+ lmnr-api-key:
46
+ description: Laminar API key for observability (optional)
47
+ required: false
48
+ default: ''
49
+
50
+ runs:
51
+ using: composite
52
+ steps:
53
+ - name: Preflight fork/secrets guard
54
+ id: preflight
55
+ shell: bash
56
+ env:
57
+ IS_FORK_PR: ${{ github.event.pull_request.head.repo.full_name != github.repository }}
58
+ LLM_API_KEY: ${{ inputs.llm-api-key }}
59
+ run: |
60
+ if [ "$IS_FORK_PR" = "true" ] && [ -z "$LLM_API_KEY" ]; then
61
+ echo "::notice title=Skipping QA Changes for fork PR::This workflow runs in pull_request context, so fork PRs do not receive repository secrets such as LLM_API_KEY. Skipping automated QA instead of failing. A maintainer can run QA locally or via a separate trusted workflow."
62
+ {
63
+ echo "## QA Changes skipped"
64
+ echo
65
+ echo "This PR comes from a fork, and pull_request workflows do not receive repository secrets such as LLM_API_KEY."
66
+ echo "Skipping automated QA instead of failing."
67
+ echo
68
+ echo "A maintainer can run QA locally or via a separate trusted workflow if needed."
69
+ } >> "$GITHUB_STEP_SUMMARY"
70
+ echo "skip=true" >> "$GITHUB_OUTPUT"
71
+ exit 0
72
+ fi
73
+
74
+ echo "skip=false" >> "$GITHUB_OUTPUT"
75
+
76
+ - name: Checkout extensions repository
77
+ if: steps.preflight.outputs.skip != 'true'
78
+ uses: actions/checkout@v4
79
+ with:
80
+ repository: ${{ inputs.extensions-repo }}
81
+ ref: ${{ inputs.extensions-version }}
82
+ path: extensions
83
+
84
+ - name: Checkout PR repository
85
+ if: steps.preflight.outputs.skip != 'true'
86
+ uses: actions/checkout@v4
87
+ with:
88
+ repository: ${{ github.event.pull_request.head.repo.full_name }}
89
+ ref: ${{ github.event.pull_request.head.ref }}
90
+ fetch-depth: 0
91
+ persist-credentials: false
92
+ path: pr-repo
93
+ submodules: recursive
94
+
95
+ - name: Set up Python
96
+ if: steps.preflight.outputs.skip != 'true'
97
+ uses: actions/setup-python@v5
98
+ with:
99
+ python-version: '3.12'
100
+
101
+ - name: Install uv
102
+ if: steps.preflight.outputs.skip != 'true'
103
+ uses: astral-sh/setup-uv@v6
104
+ with:
105
+ enable-cache: false
106
+
107
+ - name: Install system dependencies
108
+ if: steps.preflight.outputs.skip != 'true'
109
+ shell: bash
110
+ run: |
111
+ sudo apt-get update
112
+ # gh: GitHub CLI for posting QA reports
113
+ # tmux: required by the OpenHands agent runtime
114
+ sudo apt-get install -y gh tmux
115
+
116
+ - name: Check required configuration
117
+ if: steps.preflight.outputs.skip != 'true'
118
+ shell: bash
119
+ env:
120
+ LLM_API_KEY: ${{ inputs.llm-api-key }}
121
+ GITHUB_TOKEN: ${{ inputs.github-token }}
122
+ run: |
123
+ if [ -z "$LLM_API_KEY" ]; then
124
+ echo "Error: llm-api-key is required."
125
+ exit 1
126
+ fi
127
+ if [ -z "$GITHUB_TOKEN" ]; then
128
+ echo "Error: github-token is required."
129
+ exit 1
130
+ fi
131
+
132
+ echo "PR Number: ${{ github.event.pull_request.number }}"
133
+ echo "PR Title: ${{ github.event.pull_request.title }}"
134
+ echo "Repository: ${{ github.repository }}"
135
+ echo "LLM model: ${{ inputs.llm-model }}"
136
+
137
+ - name: Run QA validation
138
+ if: steps.preflight.outputs.skip != 'true'
139
+ shell: bash
140
+ env:
141
+ LLM_MODEL: ${{ inputs.llm-model }}
142
+ LLM_BASE_URL: ${{ inputs.llm-base-url }}
143
+ LLM_API_KEY: ${{ inputs.llm-api-key }}
144
+ GITHUB_TOKEN: ${{ inputs.github-token }}
145
+ LMNR_PROJECT_API_KEY: ${{ inputs.lmnr-api-key }}
146
+ MAX_BUDGET: ${{ inputs.max-budget }}
147
+ MAX_ITERATIONS: ${{ inputs.max-iterations }}
148
+ TIMEOUT_MINUTES: ${{ inputs.timeout-minutes }}
149
+ PR_NUMBER: ${{ github.event.pull_request.number }}
150
+ PR_TITLE: ${{ github.event.pull_request.title }}
151
+ PR_BODY: ${{ github.event.pull_request.body }}
152
+ PR_BASE_BRANCH: ${{ github.event.pull_request.base.ref }}
153
+ PR_HEAD_BRANCH: ${{ github.event.pull_request.head.ref }}
154
+ REPO_NAME: ${{ github.repository }}
155
+ run: |
156
+ cd pr-repo
157
+ # timeout-minutes is not supported on composite action steps,
158
+ # so we enforce the time limit via the coreutils timeout command.
159
+ TIMEOUT_SECONDS=$((TIMEOUT_MINUTES * 60))
160
+ timeout "${TIMEOUT_SECONDS}" \
161
+ uv run --no-project --with openhands-sdk --with openhands-tools --with lmnr \
162
+ python ../extensions/plugins/qa-changes/scripts/agent_script.py
163
+
164
+ - name: Upload logs as artifact
165
+ uses: actions/upload-artifact@v4
166
+ if: always() && steps.preflight.outputs.skip != 'true'
167
+ with:
168
+ name: openhands-qa-changes-logs
169
+ path: |
170
+ *.log
171
+ output/
172
+ retention-days: 7
173
+
174
+ - name: Upload Laminar trace info for evaluation
175
+ uses: actions/upload-artifact@v4
176
+ if: success() && steps.preflight.outputs.skip != 'true'
177
+ with:
178
+ name: qa-changes-trace-${{ github.event.pull_request.number }}
179
+ path: pr-repo/laminar_trace_info.json
180
+ retention-days: 30
181
+ if-no-files-found: ignore
@@ -0,0 +1,406 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ QA Changes Agent
4
+
5
+ This script runs an OpenHands agent to QA a pull request by actually
6
+ setting up the environment, exercising the changed behavior as a real
7
+ user would, and posting a structured report as a PR review.
8
+
9
+ Unlike the pr-review agent which reads the diff and posts inline code
10
+ review comments, the QA agent *runs the actual software* to verify the
11
+ change works as described. It does not re-run the test suite (CI's job)
12
+ or analyze code style/logic (code review's job).
13
+
14
+ The agent uses the /qa-changes skill for its methodology and posts
15
+ results as a GitHub code review.
16
+
17
+ Environment Variables:
18
+ LLM_API_KEY: API key for the LLM (required)
19
+ LLM_MODEL: Language model to use (default: anthropic/claude-sonnet-4-5-20250929)
20
+ LLM_BASE_URL: Optional base URL for LLM API
21
+ GITHUB_TOKEN: GitHub token for API access (required)
22
+ MAX_BUDGET: Maximum LLM cost in dollars (default: 10.0)
23
+ MAX_ITERATIONS: Maximum agent iterations (default: 500)
24
+ PR_NUMBER: Pull request number (required)
25
+ PR_TITLE: Pull request title (required)
26
+ PR_BODY: Pull request body (optional)
27
+ PR_BASE_BRANCH: Base branch name (required)
28
+ PR_HEAD_BRANCH: Head branch name (required)
29
+ REPO_NAME: Repository name in format owner/repo (required)
30
+ """
31
+ from __future__ import annotations
32
+
33
+ import json
34
+ import os
35
+ import sys
36
+ import urllib.error
37
+ import urllib.request
38
+ from pathlib import Path
39
+ from typing import Any
40
+
41
+ from lmnr import Laminar
42
+ from openhands.sdk import LLM, Agent, AgentContext, Conversation, get_logger
43
+ from openhands.sdk.skills import load_project_skills
44
+ from openhands.sdk.conversation import get_agent_final_response
45
+ from openhands.sdk.git.utils import run_git_command
46
+ from openhands.sdk.plugin import PluginSource
47
+ from openhands.tools.preset.default import get_default_condenser, get_default_tools
48
+
49
+ # Add the script directory to Python path so we can import prompt.py
50
+ script_dir = Path(__file__).parent
51
+ sys.path.insert(0, str(script_dir))
52
+
53
+ from prompt import format_prompt # noqa: E402
54
+
55
+ logger = get_logger(__name__)
56
+
57
+ # Maximum total diff size (characters)
58
+ MAX_TOTAL_DIFF = 100000
59
+ # Cost and iteration defaults
60
+ DEFAULT_MAX_BUDGET = 10.0
61
+ DEFAULT_MAX_ITERATIONS = 500
62
+
63
+
64
+ def _get_required_env(name: str) -> str:
65
+ value = os.getenv(name)
66
+ if not value:
67
+ raise ValueError(f"{name} environment variable is required")
68
+ return value
69
+
70
+
71
+ def _call_github_api(
72
+ url: str,
73
+ method: str = "GET",
74
+ data: dict[str, Any] | None = None,
75
+ accept: str = "application/vnd.github+json",
76
+ ) -> Any:
77
+ """Make a GitHub API request."""
78
+ token = _get_required_env("GITHUB_TOKEN")
79
+
80
+ if not url.startswith("http"):
81
+ url = f"https://api.github.com{url}"
82
+
83
+ request = urllib.request.Request(url, method=method)
84
+ request.add_header("Accept", accept)
85
+ request.add_header("Authorization", f"Bearer {token}")
86
+ request.add_header("X-GitHub-Api-Version", "2022-11-28")
87
+
88
+ if data:
89
+ request.add_header("Content-Type", "application/json")
90
+ request.data = json.dumps(data).encode("utf-8")
91
+
92
+ try:
93
+ with urllib.request.urlopen(request, timeout=60) as response:
94
+ raw_data = response.read()
95
+ if "diff" in accept:
96
+ return raw_data.decode("utf-8", errors="replace")
97
+ return json.loads(raw_data.decode("utf-8"))
98
+ except urllib.error.HTTPError as e:
99
+ details = (e.read() or b"").decode("utf-8", errors="replace").strip()
100
+ raise RuntimeError(
101
+ f"GitHub API request to {url} failed: HTTP {e.code} {e.reason}. {details}"
102
+ ) from e
103
+ except urllib.error.URLError as e:
104
+ raise RuntimeError(
105
+ f"GitHub API request to {url} failed: {e.reason}"
106
+ ) from e
107
+ except json.JSONDecodeError as e:
108
+ raise RuntimeError(
109
+ f"GitHub API response from {url} returned invalid JSON: {e}"
110
+ ) from e
111
+
112
+
113
+ def get_pr_diff(pr_number: str) -> str:
114
+ """Fetch the PR diff via the GitHub API."""
115
+ repo = _get_required_env("REPO_NAME")
116
+ return _call_github_api(
117
+ f"/repos/{repo}/pulls/{pr_number}",
118
+ accept="application/vnd.github.v3.diff",
119
+ )
120
+
121
+
122
+ def truncate_diff(diff_text: str, max_total: int = MAX_TOTAL_DIFF) -> str:
123
+ if len(diff_text) <= max_total:
124
+ return diff_text
125
+ total_chars = len(diff_text)
126
+ return (
127
+ diff_text[:max_total]
128
+ + f"\n\n... [diff truncated, {total_chars:,} chars total, "
129
+ + f"showing first {max_total:,}] ..."
130
+ )
131
+
132
+
133
+ def get_head_commit_sha(repo_dir: Path | None = None) -> str:
134
+ if repo_dir is None:
135
+ repo_dir = Path.cwd()
136
+ return run_git_command(["git", "rev-parse", "HEAD"], repo_dir).strip()
137
+
138
+
139
+ def validate_environment() -> dict[str, Any]:
140
+ """Validate required environment variables and return config."""
141
+ required_vars = [
142
+ "LLM_API_KEY",
143
+ "GITHUB_TOKEN",
144
+ "PR_NUMBER",
145
+ "PR_TITLE",
146
+ "PR_BASE_BRANCH",
147
+ "PR_HEAD_BRANCH",
148
+ "REPO_NAME",
149
+ ]
150
+ missing_vars = [var for var in required_vars if not os.getenv(var)]
151
+ if missing_vars:
152
+ logger.error(f"Missing required environment variables: {missing_vars}")
153
+ sys.exit(1)
154
+
155
+ return {
156
+ "api_key": os.getenv("LLM_API_KEY"),
157
+ "github_token": os.getenv("GITHUB_TOKEN"),
158
+ "model": os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"),
159
+ "base_url": os.getenv("LLM_BASE_URL"),
160
+ "max_budget": float(os.getenv("MAX_BUDGET", str(DEFAULT_MAX_BUDGET))),
161
+ "max_iterations": int(os.getenv("MAX_ITERATIONS", str(DEFAULT_MAX_ITERATIONS))),
162
+ "pr_info": {
163
+ "number": os.getenv("PR_NUMBER"),
164
+ "title": os.getenv("PR_TITLE"),
165
+ "body": os.getenv("PR_BODY", ""),
166
+ "repo_name": os.getenv("REPO_NAME"),
167
+ "base_branch": os.getenv("PR_BASE_BRANCH"),
168
+ "head_branch": os.getenv("PR_HEAD_BRANCH"),
169
+ },
170
+ }
171
+
172
+
173
+ def get_qa_plugin_source() -> PluginSource:
174
+ """Return a PluginSource pointing at the qa-changes plugin directory."""
175
+ plugin_dir = script_dir.parent # plugins/qa-changes/
176
+ return PluginSource(source=str(plugin_dir))
177
+
178
+
179
+ class BudgetExceeded(RuntimeError):
180
+ """Raised when the LLM cost exceeds the configured budget."""
181
+
182
+
183
+ def _make_budget_callback(
184
+ conversation_holder: list,
185
+ max_budget: float,
186
+ ) -> Any:
187
+ """Return an event callback that stops the agent when cost exceeds budget.
188
+
189
+ Uses a mutable list so the callback can reference the Conversation object
190
+ that hasn't been created yet at callback-definition time.
191
+ """
192
+ def _check_budget(event: Any) -> None:
193
+ if not conversation_holder:
194
+ return
195
+ conv = conversation_holder[0]
196
+ metrics = conv.conversation_stats.get_combined_metrics()
197
+ cost = metrics.accumulated_cost
198
+ if cost > max_budget:
199
+ raise BudgetExceeded(
200
+ f"Budget exceeded: ${cost:.2f} spent, "
201
+ f"${max_budget:.2f} limit"
202
+ )
203
+
204
+ return _check_budget
205
+
206
+
207
+ def create_agent_and_conversation(
208
+ config: dict[str, Any],
209
+ secrets: dict[str, str],
210
+ ) -> tuple[Agent, Conversation]:
211
+ """Create the QA agent and conversation with the plugin loaded."""
212
+ llm_config: dict[str, Any] = {
213
+ "model": config["model"],
214
+ "api_key": config["api_key"],
215
+ "usage_id": "qa_changes_agent",
216
+ "drop_params": True,
217
+ }
218
+ if config["base_url"]:
219
+ llm_config["base_url"] = config["base_url"]
220
+
221
+ llm = LLM(**llm_config)
222
+ plugin_source = get_qa_plugin_source()
223
+
224
+ # Load project-specific skills from the workspace
225
+ cwd = os.getcwd()
226
+ project_skills = load_project_skills(cwd)
227
+ logger.info(
228
+ f"Loaded {len(project_skills)} project skills: "
229
+ f"{[s.name for s in project_skills]}"
230
+ )
231
+
232
+ agent_context = AgentContext(
233
+ load_public_skills=True,
234
+ skills=project_skills,
235
+ )
236
+
237
+ agent = Agent(
238
+ llm=llm,
239
+ tools=get_default_tools(enable_browser=True),
240
+ agent_context=agent_context,
241
+ system_prompt_kwargs={"cli_mode": True},
242
+ condenser=get_default_condenser(
243
+ llm=llm.model_copy(update={"usage_id": "condenser"})
244
+ ),
245
+ )
246
+
247
+ max_budget = config["max_budget"]
248
+ max_iterations = config["max_iterations"]
249
+ logger.info(f"Budget: ${max_budget:.2f}, max iterations: {max_iterations}")
250
+
251
+ # Budget callback uses a holder list so we can wire it up before
252
+ # the Conversation object exists.
253
+ conversation_holder: list = []
254
+ budget_callback = _make_budget_callback(conversation_holder, max_budget)
255
+
256
+ conversation = Conversation(
257
+ agent=agent,
258
+ workspace=cwd,
259
+ secrets=secrets,
260
+ plugins=[plugin_source],
261
+ max_iteration_per_run=max_iterations,
262
+ callbacks=[budget_callback],
263
+ )
264
+ conversation_holder.append(conversation)
265
+
266
+ return agent, conversation
267
+
268
+
269
+ def run_qa(
270
+ conversation: Conversation,
271
+ prompt: str,
272
+ ) -> Conversation:
273
+ """Execute the QA validation."""
274
+ logger.info("Starting QA validation...")
275
+ logger.info("Agent will set up environment and exercise changed behavior")
276
+
277
+ conversation.send_message(prompt)
278
+ conversation.run()
279
+
280
+ response = get_agent_final_response(conversation.state.events)
281
+ if response:
282
+ logger.info(f"Agent final response: {len(response)} characters")
283
+
284
+ return conversation
285
+
286
+
287
+ def log_cost_summary(conversation: Conversation) -> None:
288
+ """Print cost information for CI output."""
289
+ metrics = conversation.conversation_stats.get_combined_metrics()
290
+ print("\n=== QA Changes Cost Summary ===")
291
+ print(f"Total Cost: ${metrics.accumulated_cost:.6f}")
292
+ if metrics.accumulated_token_usage:
293
+ usage = metrics.accumulated_token_usage
294
+ print(f"Prompt Tokens: {usage.prompt_tokens}")
295
+ print(f"Completion Tokens: {usage.completion_tokens}")
296
+ if usage.cache_read_tokens > 0:
297
+ print(f"Cache Read Tokens: {usage.cache_read_tokens}")
298
+ if usage.cache_write_tokens > 0:
299
+ print(f"Cache Write Tokens: {usage.cache_write_tokens}")
300
+
301
+
302
+ def save_trace_context(
303
+ pr_info: dict[str, Any],
304
+ commit_id: str,
305
+ model: str,
306
+ ) -> None:
307
+ """Capture and store Laminar trace context for evaluation.
308
+
309
+ Saves trace info to file for GitHub artifact upload, enabling
310
+ the evaluation workflow to continue the trace.
311
+ """
312
+ trace_id = Laminar.get_trace_id()
313
+ laminar_span_context = Laminar.get_laminar_span_context()
314
+ span_context = (
315
+ laminar_span_context.model_dump(mode="json") if laminar_span_context else None
316
+ )
317
+
318
+ if not trace_id or not laminar_span_context:
319
+ logger.warning(
320
+ "No Laminar trace ID found - observability may not be enabled"
321
+ )
322
+ return
323
+
324
+ with Laminar.start_as_current_span(
325
+ name="qa-changes-metadata",
326
+ parent_span_context=laminar_span_context,
327
+ ) as _:
328
+ pr_url = f"https://github.com/{pr_info['repo_name']}/pull/{pr_info['number']}"
329
+ Laminar.set_trace_metadata(
330
+ {
331
+ "pr_number": pr_info["number"],
332
+ "repo_name": pr_info["repo_name"],
333
+ "pr_url": pr_url,
334
+ "workflow_phase": "qa-changes",
335
+ "model": model,
336
+ }
337
+ )
338
+
339
+ trace_data = {
340
+ "trace_id": str(trace_id),
341
+ "span_context": span_context,
342
+ "pr_number": pr_info["number"],
343
+ "repo_name": pr_info["repo_name"],
344
+ "commit_id": commit_id,
345
+ "model": model,
346
+ }
347
+ with open("laminar_trace_info.json", "w") as f:
348
+ json.dump(trace_data, f, indent=2)
349
+
350
+ logger.info(f"Laminar trace ID: {trace_id}")
351
+ logger.info(f"Model used: {model}")
352
+ if span_context:
353
+ logger.info("Laminar span context captured for trace continuation")
354
+ print("\n=== Laminar Trace ===")
355
+ print(f"Trace ID: {trace_id}")
356
+
357
+ Laminar.flush()
358
+
359
+
360
+ def main():
361
+ """Run the QA agent."""
362
+ logger.info("Starting QA changes process...")
363
+
364
+ config = validate_environment()
365
+ pr_info = config["pr_info"]
366
+
367
+ logger.info(f"QA for PR #{pr_info['number']}: {pr_info['title']}")
368
+
369
+ try:
370
+ pr_diff = truncate_diff(get_pr_diff(pr_info["number"]))
371
+ logger.info(f"Got PR diff with {len(pr_diff)} characters")
372
+
373
+ commit_id = get_head_commit_sha()
374
+ logger.info(f"HEAD commit SHA: {commit_id}")
375
+
376
+ prompt = format_prompt(
377
+ title=pr_info.get("title", "N/A"),
378
+ body=pr_info.get("body") or "No description provided",
379
+ repo_name=pr_info.get("repo_name", "N/A"),
380
+ base_branch=pr_info.get("base_branch", "main"),
381
+ head_branch=pr_info.get("head_branch", "N/A"),
382
+ pr_number=pr_info["number"],
383
+ commit_id=commit_id,
384
+ diff=pr_diff,
385
+ )
386
+
387
+ secrets: dict[str, str] = {}
388
+ if config["api_key"]:
389
+ secrets["LLM_API_KEY"] = config["api_key"]
390
+ if config["github_token"]:
391
+ secrets["GITHUB_TOKEN"] = config["github_token"]
392
+
393
+ _, conversation = create_agent_and_conversation(config, secrets)
394
+ conversation = run_qa(conversation, prompt)
395
+ log_cost_summary(conversation)
396
+ save_trace_context(pr_info, commit_id, config["model"])
397
+
398
+ logger.info("QA validation completed successfully")
399
+
400
+ except Exception as e:
401
+ logger.error(f"QA validation failed: {e}")
402
+ sys.exit(1)
403
+
404
+
405
+ if __name__ == "__main__":
406
+ main()