@openhands/extensions 0.0.1-alpha → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. package/.agents/skills/custom-codereview-guide.md +25 -0
  2. package/.github/pull_request_template.md +38 -0
  3. package/.github/release.yml +14 -0
  4. package/.github/workflows/check-extensions.yml +72 -0
  5. package/.github/workflows/npm-publish.yml +89 -0
  6. package/.github/workflows/pr.yml +30 -0
  7. package/.github/workflows/release.yml +24 -0
  8. package/.github/workflows/tests.yml +25 -0
  9. package/.github/workflows/vulnerability-scan.yml +87 -0
  10. package/.release-please-manifest.json +3 -0
  11. package/AGENTS.md +132 -0
  12. package/README.md +10 -0
  13. package/analysis_results.md +162 -0
  14. package/marketplaces/large-codebase.json +66 -0
  15. package/marketplaces/openhands-extensions.json +682 -0
  16. package/package.json +4 -10
  17. package/plugins/README.md +30 -0
  18. package/plugins/city-weather/.plugin/plugin.json +13 -0
  19. package/plugins/city-weather/README.md +145 -0
  20. package/plugins/city-weather/commands/now.md +56 -0
  21. package/plugins/cobol-modernization/.plugin/plugin.json +19 -0
  22. package/plugins/cobol-modernization/README.md +201 -0
  23. package/plugins/cobol-modernization/references/troubleshooting.md +18 -0
  24. package/plugins/cobol-modernization/skills/build-setup/SKILL.md +78 -0
  25. package/plugins/cobol-modernization/skills/build-setup/scripts/install-gnucobol.sh +32 -0
  26. package/plugins/cobol-modernization/skills/cobol-modernization-overview/SKILL.md +113 -0
  27. package/plugins/cobol-modernization/skills/mainfraime-removal/SKILL.md +62 -0
  28. package/plugins/cobol-modernization/skills/mainfraime-removal/references/cics-transformation-examples.md +45 -0
  29. package/plugins/cobol-modernization/skills/mainframe-planning/SKILL.md +78 -0
  30. package/plugins/cobol-modernization/skills/to-java-migration/SKILL.md +59 -0
  31. package/plugins/cobol-modernization/skills/to-java-migration/references/cobol-to-java-example.md +58 -0
  32. package/plugins/cobol-modernization/skills/to-java-migration/references/datatype-mappings.md +19 -0
  33. package/plugins/issue-duplicate-checker/.plugin/plugin.json +13 -0
  34. package/plugins/issue-duplicate-checker/README.md +51 -0
  35. package/plugins/issue-duplicate-checker/action.yml +349 -0
  36. package/plugins/issue-duplicate-checker/scripts/auto_close_duplicate_issues.py +569 -0
  37. package/plugins/issue-duplicate-checker/scripts/issue_duplicate_check_openhands.py +681 -0
  38. package/plugins/issue-duplicate-checker/scripts/post_duplicate_notice.js +220 -0
  39. package/plugins/issue-duplicate-checker/scripts/remove_duplicate_candidate_label.js +27 -0
  40. package/plugins/magic-test/.plugin/plugin.json +13 -0
  41. package/plugins/magic-test/skills/magic-word/SKILL.md +33 -0
  42. package/plugins/migration-scoring/.plugin/plugin.json +19 -0
  43. package/plugins/migration-scoring/README.md +244 -0
  44. package/plugins/migration-scoring/skills/migration-mapping/SKILL.md +72 -0
  45. package/plugins/migration-scoring/skills/migration-report/SKILL.md +118 -0
  46. package/plugins/migration-scoring/skills/migration-scoring-overview/SKILL.md +126 -0
  47. package/plugins/migration-scoring/skills/score-quality/SKILL.md +54 -0
  48. package/plugins/migration-scoring/skills/score-quality/references/scoring-criteria.md +30 -0
  49. package/plugins/migration-scoring/skills/score-style/SKILL.md +106 -0
  50. package/plugins/onboarding/.plugin/plugin.json +20 -0
  51. package/plugins/onboarding/README.md +30 -0
  52. package/plugins/onboarding/references/criteria.md +144 -0
  53. package/plugins/onboarding/skills/agent-readiness-report/README.md +23 -0
  54. package/plugins/onboarding/skills/agent-readiness-report/SKILL.md +122 -0
  55. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_agent_instructions.sh +88 -0
  56. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_build_env.sh +114 -0
  57. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_feedback_loops.sh +133 -0
  58. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_policy.sh +113 -0
  59. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_workflows.sh +127 -0
  60. package/plugins/onboarding/skills/improve-agent-readiness/README.md +19 -0
  61. package/plugins/onboarding/skills/improve-agent-readiness/SKILL.md +167 -0
  62. package/plugins/onboarding/skills/setup-agents-md/README.md +15 -0
  63. package/plugins/onboarding/skills/setup-agents-md/SKILL.md +150 -0
  64. package/plugins/onboarding/skills/setup-openhands/README.md +20 -0
  65. package/plugins/onboarding/skills/setup-openhands/SKILL.md +56 -0
  66. package/plugins/onboarding/skills/setup-pr-review/README.md +23 -0
  67. package/plugins/onboarding/skills/setup-pr-review/SKILL.md +72 -0
  68. package/plugins/openhands/.plugin/plugin.json +13 -0
  69. package/plugins/openhands/README.md +52 -0
  70. package/plugins/openhands/SKILL.md +61 -0
  71. package/plugins/openhands/commands/create.md +55 -0
  72. package/plugins/openhands/commands/openhands-cloud.md +8 -0
  73. package/plugins/openhands/scripts/run.sh +69 -0
  74. package/plugins/pr-review/.plugin/plugin.json +13 -0
  75. package/plugins/pr-review/README.md +393 -0
  76. package/plugins/pr-review/action.yml +298 -0
  77. package/plugins/pr-review/scripts/agent_script.py +1282 -0
  78. package/plugins/pr-review/scripts/evaluate_review.py +655 -0
  79. package/plugins/pr-review/scripts/prompt.py +260 -0
  80. package/plugins/pr-review/workflows/pr-review-by-openhands.yml +51 -0
  81. package/plugins/pr-review/workflows/pr-review-evaluation.yml +85 -0
  82. package/plugins/qa-changes/.plugin/plugin.json +11 -0
  83. package/plugins/qa-changes/README.md +185 -0
  84. package/plugins/qa-changes/action.yml +181 -0
  85. package/plugins/qa-changes/scripts/agent_script.py +406 -0
  86. package/plugins/qa-changes/scripts/evaluate_qa_changes.py +385 -0
  87. package/plugins/qa-changes/scripts/prompt.py +174 -0
  88. package/plugins/qa-changes/workflows/qa-changes-by-openhands.yml +50 -0
  89. package/plugins/qa-changes/workflows/qa-changes-evaluation.yml +85 -0
  90. package/plugins/release-notes/.plugin/plugin.json +19 -0
  91. package/plugins/release-notes/README.md +283 -0
  92. package/plugins/release-notes/SKILL.md +83 -0
  93. package/plugins/release-notes/action.yml +117 -0
  94. package/plugins/release-notes/commands/release-notes.md +8 -0
  95. package/plugins/release-notes/scripts/agent_script.py +292 -0
  96. package/plugins/release-notes/scripts/generate_release_notes.py +733 -0
  97. package/plugins/release-notes/scripts/prompt.py +90 -0
  98. package/plugins/release-notes/scripts/validate_release_notes.py +328 -0
  99. package/plugins/release-notes/workflows/release-notes.yml +76 -0
  100. package/plugins/vulnerability-remediation/.plugin/plugin.json +19 -0
  101. package/plugins/vulnerability-remediation/README.md +217 -0
  102. package/plugins/vulnerability-remediation/action.yml +187 -0
  103. package/plugins/vulnerability-remediation/scripts/scan_and_remediate.py +561 -0
  104. package/plugins/vulnerability-remediation/workflows/vulnerability-scan.yml +87 -0
  105. package/pyproject.toml +12 -0
  106. package/release-please-config.json +16 -0
  107. package/scripts/sync_extensions.py +494 -0
  108. package/scripts/sync_openhands_sdk_skill.py +264 -0
  109. package/skills/README.md +159 -0
  110. package/skills/add-javadoc/.plugin/plugin.json +18 -0
  111. package/skills/add-javadoc/README.md +40 -0
  112. package/skills/add-javadoc/SKILL.md +35 -0
  113. package/skills/add-javadoc/references/example.md +32 -0
  114. package/skills/add-skill/.plugin/plugin.json +18 -0
  115. package/skills/add-skill/README.md +67 -0
  116. package/skills/add-skill/SKILL.md +47 -0
  117. package/skills/add-skill/scripts/fetch_skill.py +259 -0
  118. package/skills/agent-creator/.plugin/plugin.json +20 -0
  119. package/skills/agent-creator/README.md +104 -0
  120. package/skills/agent-creator/SKILL.md +190 -0
  121. package/skills/agent-creator/commands/agent-creator.md +8 -0
  122. package/skills/agent-creator/references/fallback.md +117 -0
  123. package/skills/agent-memory/.plugin/plugin.json +18 -0
  124. package/skills/agent-memory/README.md +35 -0
  125. package/skills/agent-memory/SKILL.md +30 -0
  126. package/skills/agent-memory/commands/remember.md +8 -0
  127. package/skills/agent-sdk-builder/.plugin/plugin.json +18 -0
  128. package/skills/agent-sdk-builder/README.md +40 -0
  129. package/skills/agent-sdk-builder/SKILL.md +37 -0
  130. package/skills/agent-sdk-builder/commands/agent-builder.md +8 -0
  131. package/skills/azure-devops/.plugin/plugin.json +18 -0
  132. package/skills/azure-devops/README.md +55 -0
  133. package/skills/azure-devops/SKILL.md +50 -0
  134. package/skills/bitbucket/.plugin/plugin.json +17 -0
  135. package/skills/bitbucket/README.md +50 -0
  136. package/skills/bitbucket/SKILL.md +45 -0
  137. package/skills/code-review/.plugin/plugin.json +19 -0
  138. package/skills/code-review/README.md +18 -0
  139. package/skills/code-review/SKILL.md +208 -0
  140. package/skills/code-review/commands/codereview-roasted.md +8 -0
  141. package/skills/code-review/commands/codereview.md +8 -0
  142. package/skills/code-review/references/risk-evaluation.md +41 -0
  143. package/skills/code-review/references/supply-chain-security.md +31 -0
  144. package/skills/code-simplifier/.plugin/plugin.json +21 -0
  145. package/skills/code-simplifier/README.md +30 -0
  146. package/skills/code-simplifier/SKILL.md +91 -0
  147. package/skills/code-simplifier/commands/simplify.md +8 -0
  148. package/skills/code-simplifier/references/code-quality-review.md +86 -0
  149. package/skills/code-simplifier/references/code-reuse-review.md +63 -0
  150. package/skills/code-simplifier/references/efficiency-review.md +81 -0
  151. package/skills/datadog/.plugin/plugin.json +19 -0
  152. package/skills/datadog/README.md +100 -0
  153. package/skills/datadog/SKILL.md +95 -0
  154. package/skills/deno/.plugin/plugin.json +18 -0
  155. package/skills/deno/README.md +5 -0
  156. package/skills/deno/SKILL.md +99 -0
  157. package/skills/deno/references/README.md +6 -0
  158. package/skills/discord/.plugin/plugin.json +18 -0
  159. package/skills/discord/README.md +31 -0
  160. package/skills/discord/SKILL.md +109 -0
  161. package/skills/discord/__init__.py +0 -0
  162. package/skills/discord/references/REFERENCE.md +78 -0
  163. package/skills/discord/scripts/__init__.py +0 -0
  164. package/skills/discord/scripts/_http.py +127 -0
  165. package/skills/discord/scripts/post_webhook.py +106 -0
  166. package/skills/discord/scripts/send_message.py +102 -0
  167. package/skills/docker/.plugin/plugin.json +17 -0
  168. package/skills/docker/README.md +34 -0
  169. package/skills/docker/SKILL.md +29 -0
  170. package/skills/evidence-based-citations/.plugin/plugin.json +20 -0
  171. package/skills/evidence-based-citations/README.md +31 -0
  172. package/skills/evidence-based-citations/SKILL.md +59 -0
  173. package/skills/flarglebargle/.plugin/plugin.json +16 -0
  174. package/skills/flarglebargle/README.md +14 -0
  175. package/skills/flarglebargle/SKILL.md +9 -0
  176. package/skills/frontend-design/.plugin/plugin.json +21 -0
  177. package/skills/frontend-design/LICENSE.txt +177 -0
  178. package/skills/frontend-design/README.md +42 -0
  179. package/skills/frontend-design/SKILL.md +42 -0
  180. package/skills/github/.plugin/plugin.json +19 -0
  181. package/skills/github/README.md +42 -0
  182. package/skills/github/SKILL.md +106 -0
  183. package/skills/github-pr-review/.plugin/plugin.json +18 -0
  184. package/skills/github-pr-review/README.md +145 -0
  185. package/skills/github-pr-review/SKILL.md +148 -0
  186. package/skills/github-pr-review/commands/github-pr-review.md +8 -0
  187. package/skills/github-pr-reviewer/.plugin/plugin.json +20 -0
  188. package/skills/github-pr-reviewer/README.md +34 -0
  189. package/skills/github-pr-reviewer/SKILL.md +89 -0
  190. package/skills/github-pr-reviewer/commands/pr-reviewer:setup.md +8 -0
  191. package/skills/github-repo-monitor/.plugin/plugin.json +22 -0
  192. package/skills/github-repo-monitor/README.md +70 -0
  193. package/skills/github-repo-monitor/SKILL.md +316 -0
  194. package/skills/github-repo-monitor/commands/github-monitor:poll.md +8 -0
  195. package/skills/github-repo-monitor/references/github-api.md +241 -0
  196. package/skills/github-repo-monitor/references/state-schema.md +160 -0
  197. package/skills/github-repo-monitor/scripts/main.py +915 -0
  198. package/skills/github-repo-monitor/tests/test_main.py +400 -0
  199. package/skills/gitlab/.plugin/plugin.json +17 -0
  200. package/skills/gitlab/README.md +37 -0
  201. package/skills/gitlab/SKILL.md +32 -0
  202. package/skills/incident-retrospective/.plugin/plugin.json +21 -0
  203. package/skills/incident-retrospective/README.md +34 -0
  204. package/skills/incident-retrospective/SKILL.md +98 -0
  205. package/skills/incident-retrospective/commands/incident-retro:setup.md +8 -0
  206. package/skills/iterate/.plugin/plugin.json +13 -0
  207. package/skills/iterate/README.md +25 -0
  208. package/skills/iterate/SKILL.md +399 -0
  209. package/skills/iterate/commands/babysit.md +8 -0
  210. package/skills/iterate/commands/iterate.md +8 -0
  211. package/skills/iterate/commands/verify.md +8 -0
  212. package/skills/iterate/references/heuristics.md +58 -0
  213. package/skills/iterate/references/verification.md +96 -0
  214. package/skills/jupyter/.plugin/plugin.json +18 -0
  215. package/skills/jupyter/README.md +55 -0
  216. package/skills/jupyter/SKILL.md +50 -0
  217. package/skills/kubernetes/.plugin/plugin.json +18 -0
  218. package/skills/kubernetes/README.md +53 -0
  219. package/skills/kubernetes/SKILL.md +48 -0
  220. package/skills/learn-from-code-review/.plugin/plugin.json +19 -0
  221. package/skills/learn-from-code-review/README.md +64 -0
  222. package/skills/learn-from-code-review/SKILL.md +186 -0
  223. package/skills/learn-from-code-review/commands/learn-from-reviews.md +8 -0
  224. package/skills/linear/.plugin/plugin.json +19 -0
  225. package/skills/linear/README.md +58 -0
  226. package/skills/linear/SKILL.md +213 -0
  227. package/skills/linear-triage/.plugin/plugin.json +21 -0
  228. package/skills/linear-triage/README.md +34 -0
  229. package/skills/linear-triage/SKILL.md +91 -0
  230. package/skills/linear-triage/commands/linear-triage:setup.md +8 -0
  231. package/skills/notion/.plugin/plugin.json +17 -0
  232. package/skills/notion/README.md +114 -0
  233. package/skills/notion/SKILL.md +109 -0
  234. package/skills/npm/.plugin/plugin.json +17 -0
  235. package/skills/npm/README.md +14 -0
  236. package/skills/npm/SKILL.md +9 -0
  237. package/skills/openhands-api/.plugin/plugin.json +22 -0
  238. package/skills/openhands-api/README.md +48 -0
  239. package/skills/openhands-api/SKILL.md +399 -0
  240. package/skills/openhands-api/references/README.md +33 -0
  241. package/skills/openhands-api/references/TROUBLESHOOTING.md +81 -0
  242. package/skills/openhands-api/references/example_prompt.md +12 -0
  243. package/skills/openhands-api/scripts/openhands_api.py +606 -0
  244. package/skills/openhands-api/scripts/openhands_api.ts +252 -0
  245. package/skills/openhands-automation/.plugin/plugin.json +19 -0
  246. package/skills/openhands-automation/README.md +89 -0
  247. package/skills/openhands-automation/SKILL.md +875 -0
  248. package/skills/openhands-automation/commands/automation:create.md +8 -0
  249. package/skills/openhands-automation/references/ab-testing.md +185 -0
  250. package/skills/openhands-automation/references/custom-automation.md +644 -0
  251. package/skills/openhands-sdk/.plugin/plugin.json +20 -0
  252. package/skills/openhands-sdk/README.md +22 -0
  253. package/skills/openhands-sdk/SKILL.md +229 -0
  254. package/skills/openhands-sdk/commands/sdk.md +8 -0
  255. package/skills/pdflatex/.plugin/plugin.json +18 -0
  256. package/skills/pdflatex/README.md +39 -0
  257. package/skills/pdflatex/SKILL.md +34 -0
  258. package/skills/prd/.plugin/plugin.json +19 -0
  259. package/skills/prd/README.md +28 -0
  260. package/skills/prd/SKILL.md +237 -0
  261. package/skills/prd/commands/prd.md +8 -0
  262. package/skills/qa-changes/README.md +18 -0
  263. package/skills/qa-changes/SKILL.md +229 -0
  264. package/skills/qa-changes/commands/qa-changes.md +8 -0
  265. package/skills/release-notes/README.md +24 -0
  266. package/skills/release-notes/SKILL.md +19 -0
  267. package/skills/release-notes/commands/release-notes.md +8 -0
  268. package/skills/research-brief/.plugin/plugin.json +20 -0
  269. package/skills/research-brief/README.md +34 -0
  270. package/skills/research-brief/SKILL.md +99 -0
  271. package/skills/research-brief/commands/research-brief:setup.md +8 -0
  272. package/skills/security/.plugin/plugin.json +18 -0
  273. package/skills/security/README.md +38 -0
  274. package/skills/security/SKILL.md +33 -0
  275. package/skills/skill-creator/.plugin/plugin.json +17 -0
  276. package/skills/skill-creator/LICENSE.txt +202 -0
  277. package/skills/skill-creator/README.md +182 -0
  278. package/skills/skill-creator/SKILL.md +545 -0
  279. package/skills/skill-creator/references/output-patterns.md +82 -0
  280. package/skills/skill-creator/references/workflows.md +28 -0
  281. package/skills/skill-creator/scripts/init_skill.py +303 -0
  282. package/skills/skill-creator/scripts/quick_validate.py +95 -0
  283. package/skills/slack-channel-monitor/.plugin/plugin.json +21 -0
  284. package/skills/slack-channel-monitor/README.md +91 -0
  285. package/skills/slack-channel-monitor/SKILL.md +276 -0
  286. package/skills/slack-channel-monitor/commands/slack-monitor:poll.md +8 -0
  287. package/skills/slack-channel-monitor/references/slack-api.md +207 -0
  288. package/skills/slack-channel-monitor/references/state-schema.md +180 -0
  289. package/skills/slack-channel-monitor/scripts/main.py +962 -0
  290. package/skills/slack-standup-digest/.plugin/plugin.json +21 -0
  291. package/skills/slack-standup-digest/README.md +34 -0
  292. package/skills/slack-standup-digest/SKILL.md +92 -0
  293. package/skills/slack-standup-digest/commands/standup-digest:setup.md +8 -0
  294. package/skills/spark-version-upgrade/.plugin/plugin.json +20 -0
  295. package/skills/spark-version-upgrade/README.md +54 -0
  296. package/skills/spark-version-upgrade/SKILL.md +233 -0
  297. package/skills/ssh/.plugin/plugin.json +18 -0
  298. package/skills/ssh/README.md +140 -0
  299. package/skills/ssh/SKILL.md +135 -0
  300. package/skills/swift-linux/.plugin/plugin.json +17 -0
  301. package/skills/swift-linux/README.md +86 -0
  302. package/skills/swift-linux/SKILL.md +81 -0
  303. package/skills/theme-factory/.plugin/plugin.json +19 -0
  304. package/skills/theme-factory/LICENSE.txt +202 -0
  305. package/skills/theme-factory/README.md +58 -0
  306. package/skills/theme-factory/SKILL.md +59 -0
  307. package/skills/theme-factory/theme-showcase.pdf +0 -0
  308. package/skills/theme-factory/themes/arctic-frost.md +19 -0
  309. package/skills/theme-factory/themes/botanical-garden.md +19 -0
  310. package/skills/theme-factory/themes/desert-rose.md +19 -0
  311. package/skills/theme-factory/themes/forest-canopy.md +19 -0
  312. package/skills/theme-factory/themes/golden-hour.md +19 -0
  313. package/skills/theme-factory/themes/midnight-galaxy.md +19 -0
  314. package/skills/theme-factory/themes/modern-minimalist.md +19 -0
  315. package/skills/theme-factory/themes/ocean-depths.md +19 -0
  316. package/skills/theme-factory/themes/sunset-boulevard.md +19 -0
  317. package/skills/theme-factory/themes/tech-innovation.md +19 -0
  318. package/skills/uv/.plugin/plugin.json +18 -0
  319. package/skills/uv/README.md +5 -0
  320. package/skills/uv/SKILL.md +95 -0
  321. package/skills/uv/references/README.md +5 -0
  322. package/skills/vercel/.plugin/plugin.json +18 -0
  323. package/skills/vercel/README.md +108 -0
  324. package/skills/vercel/SKILL.md +103 -0
  325. package/tests/test_add_skill_installs_to_agents_dir.py +42 -0
  326. package/tests/test_catalogs.py +109 -0
  327. package/tests/test_code_review_risk_evaluation.py +94 -0
  328. package/tests/test_issue_duplicate_checker.py +240 -0
  329. package/tests/test_openhands_api_python.py +152 -0
  330. package/tests/test_plugin_manifest.py +83 -0
  331. package/tests/test_pr_review_diff_payload.py +202 -0
  332. package/tests/test_pr_review_feedback.py +263 -0
  333. package/tests/test_pr_review_prompt.py +152 -0
  334. package/tests/test_pr_review_review_context.py +253 -0
  335. package/tests/test_qa_changes.py +232 -0
  336. package/tests/test_qa_changes_evaluation.py +259 -0
  337. package/tests/test_release_notes_generator.py +990 -0
  338. package/tests/test_sdk_loading.py +150 -0
  339. package/tests/test_skill_plugin_loading.py +149 -0
  340. package/tests/test_skills_have_readme.py +66 -0
  341. package/tests/test_sync_extensions.py +292 -0
  342. package/tests/test_workflow_sync.py +46 -0
  343. package/utils/analysis/README.md +7 -0
  344. package/utils/analysis/laminar_signals/README.md +211 -0
  345. package/utils/analysis/laminar_signals/analyze.py +780 -0
  346. package/utils/analysis/laminar_signals/templates/default.j2 +49 -0
  347. package/utils/analysis/laminar_signals/templates/pr_review.j2 +61 -0
@@ -0,0 +1,655 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ PR Review Evaluation Script
4
+
5
+ This script runs when a PR is merged or closed to evaluate how well
6
+ the review comments were addressed. It creates an evaluation trace
7
+ in Laminar that can be processed by a signal to determine review
8
+ effectiveness.
9
+
10
+ The evaluation flow:
11
+ 1. Read the original trace ID from the artifact
12
+ 2. Fetch PR review comments and thread discussion from GitHub
13
+ 3. Fetch the final patch/diff
14
+ 4. Create an evaluation span with all context
15
+ 5. Optionally score the original trace
16
+
17
+ Environment Variables:
18
+ LMNR_PROJECT_API_KEY: Laminar project API key (required)
19
+ GITHUB_TOKEN: GitHub token for API access (required)
20
+ PR_NUMBER: Pull request number (required)
21
+ REPO_NAME: Repository name in format owner/repo (required)
22
+ PR_MERGED: Whether the PR was merged ('true' or 'false')
23
+ """
24
+
25
+ import json
26
+
27
+ # Configure logging
28
+ import logging
29
+ import os
30
+ import sys
31
+ import urllib.error
32
+ import urllib.request
33
+ from pathlib import Path
34
+
35
+ from lmnr import Laminar, LaminarClient
36
+
37
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
38
+ logger = logging.getLogger(__name__)
39
+
40
+ FEEDBACK_COMMENT_MARKER = "<!-- openhands-pr-review-feedback -->"
41
+
42
+ REVIEWS_QUERY = """
43
+ query($owner: String!, $repo: String!, $pr_number: Int!, $cursor: String) {
44
+ repository(owner: $owner, name: $repo) {
45
+ pullRequest(number: $pr_number) {
46
+ reviews(first: 100, after: $cursor) {
47
+ pageInfo {
48
+ hasNextPage
49
+ endCursor
50
+ }
51
+ nodes {
52
+ id
53
+ body
54
+ state
55
+ submittedAt
56
+ author { login }
57
+ reactionGroups {
58
+ content
59
+ users {
60
+ totalCount
61
+ }
62
+ }
63
+ }
64
+ }
65
+ }
66
+ }
67
+ }
68
+ """
69
+
70
+
71
+
72
+ def _get_required_env(name: str) -> str:
73
+ """Get a required environment variable or raise an error."""
74
+ value = os.getenv(name)
75
+ if not value:
76
+ raise ValueError(f"{name} environment variable is required")
77
+ return value
78
+
79
+
80
+ def _get_github_headers() -> dict[str, str]:
81
+ """Get headers for GitHub API requests."""
82
+ token = _get_required_env("GITHUB_TOKEN")
83
+ return {
84
+ "Accept": "application/vnd.github.v3+json",
85
+ "Authorization": f"Bearer {token}",
86
+ "X-GitHub-Api-Version": "2022-11-28",
87
+ }
88
+
89
+
90
+ def _get_agent_usernames() -> set[str]:
91
+ """Get the set of agent usernames to identify agent comments.
92
+
93
+ Configurable via AGENT_USERNAMES environment variable (comma-separated).
94
+ Defaults to 'openhands-agent,all-hands-bot,github-actions[bot]'.
95
+ """
96
+ usernames = os.getenv(
97
+ "AGENT_USERNAMES",
98
+ "openhands-agent,all-hands-bot,github-actions[bot]",
99
+ )
100
+ return set(name.strip() for name in usernames.split(",") if name.strip())
101
+
102
+
103
+ def _handle_github_api_error(e: urllib.error.HTTPError, context: str) -> None:
104
+ """Handle GitHub API errors with rate limit awareness."""
105
+ if e.code == 429:
106
+ retry_after = e.headers.get("Retry-After", "60")
107
+ logger.warning(f"Rate limited by GitHub API. Retry after {retry_after}s")
108
+ logger.error(f"Failed to {context}: HTTP {e.code}")
109
+
110
+
111
+ def fetch_pr_review_comments(repo: str, pr_number: str) -> list[dict]:
112
+ """Fetch all review comments on a PR.
113
+
114
+ This includes inline code review comments, not regular PR comments.
115
+ """
116
+ url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments"
117
+ request = urllib.request.Request(url, headers=_get_github_headers())
118
+ try:
119
+ with urllib.request.urlopen(request, timeout=60) as response:
120
+ return json.loads(response.read().decode("utf-8"))
121
+ except urllib.error.HTTPError as e:
122
+ _handle_github_api_error(e, "fetch review comments")
123
+ return []
124
+
125
+
126
+ def fetch_pr_issue_comments(repo: str, pr_number: str) -> list[dict]:
127
+ """Fetch issue-style comments on a PR (the main thread)."""
128
+ url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
129
+ request = urllib.request.Request(url, headers=_get_github_headers())
130
+ try:
131
+ with urllib.request.urlopen(request, timeout=60) as response:
132
+ return json.loads(response.read().decode("utf-8"))
133
+ except urllib.error.HTTPError as e:
134
+ _handle_github_api_error(e, "fetch issue comments")
135
+ return []
136
+
137
+
138
+ def _call_github_graphql(query: str, variables: dict) -> dict:
139
+ """Execute a GitHub GraphQL query and return the `data` payload."""
140
+ request = urllib.request.Request(
141
+ "https://api.github.com/graphql",
142
+ headers=_get_github_headers(),
143
+ method="POST",
144
+ data=json.dumps({"query": query, "variables": variables}).encode("utf-8"),
145
+ )
146
+ request.add_header("Content-Type", "application/json")
147
+ try:
148
+ with urllib.request.urlopen(request, timeout=60) as response:
149
+ payload = json.loads(response.read().decode("utf-8"))
150
+ except urllib.error.HTTPError as e:
151
+ _handle_github_api_error(e, "fetch GraphQL data")
152
+ return {}
153
+
154
+ if payload.get("errors"):
155
+ logger.error("GitHub GraphQL returned errors: %s", payload["errors"])
156
+ return {}
157
+
158
+ return payload.get("data") or {}
159
+
160
+
161
+ def _normalize_review_reactions(reaction_groups: list[dict] | None) -> dict[str, int]:
162
+ """Map GraphQL reaction groups to GitHub-style thumbs-up/down counters."""
163
+ thumbs_up = 0
164
+ thumbs_down = 0
165
+
166
+ for group in reaction_groups or []:
167
+ total_count = ((group.get("users") or {}).get("totalCount")) or 0
168
+ content = group.get("content")
169
+ if content == "THUMBS_UP":
170
+ thumbs_up = total_count
171
+ elif content == "THUMBS_DOWN":
172
+ thumbs_down = total_count
173
+
174
+ return {
175
+ "+1": thumbs_up,
176
+ "-1": thumbs_down,
177
+ "total_count": thumbs_up + thumbs_down,
178
+ }
179
+
180
+
181
+ def fetch_pr_reviews(repo: str, pr_number: str) -> list[dict]:
182
+ """Fetch all reviews on a PR, including thumbs-up/down reaction counts."""
183
+ owner, repo_name = repo.split("/", 1)
184
+ reviews = []
185
+ cursor = None
186
+
187
+ while True:
188
+ data = _call_github_graphql(
189
+ REVIEWS_QUERY,
190
+ {
191
+ "owner": owner,
192
+ "repo": repo_name,
193
+ "pr_number": int(pr_number),
194
+ "cursor": cursor,
195
+ },
196
+ )
197
+ reviews_data = (
198
+ data.get("repository", {})
199
+ .get("pullRequest", {})
200
+ .get("reviews", {})
201
+ )
202
+ nodes = reviews_data.get("nodes") or []
203
+
204
+ for review in nodes:
205
+ author = review.get("author") or {}
206
+ reviews.append(
207
+ {
208
+ "id": review.get("id"),
209
+ "user": {"login": author.get("login")},
210
+ "body": review.get("body") or "",
211
+ "state": review.get("state"),
212
+ "submitted_at": review.get("submittedAt"),
213
+ "reactions": _normalize_review_reactions(
214
+ review.get("reactionGroups")
215
+ ),
216
+ }
217
+ )
218
+
219
+ page_info = reviews_data.get("pageInfo") or {}
220
+ if not page_info.get("hasNextPage"):
221
+ break
222
+ cursor = page_info.get("endCursor")
223
+ if not cursor:
224
+ break
225
+
226
+ return reviews
227
+
228
+
229
+ def fetch_pr_diff(repo: str, pr_number: str) -> str:
230
+ """Fetch the final diff of the PR."""
231
+ url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
232
+ headers = _get_github_headers()
233
+ headers["Accept"] = "application/vnd.github.v3.diff"
234
+ request = urllib.request.Request(url, headers=headers)
235
+ try:
236
+ with urllib.request.urlopen(request, timeout=60) as response:
237
+ return response.read().decode("utf-8", errors="replace")
238
+ except urllib.error.HTTPError as e:
239
+ _handle_github_api_error(e, "fetch PR diff")
240
+ return ""
241
+
242
+
243
+ def fetch_pr_info(repo: str, pr_number: str) -> dict:
244
+ """Fetch PR metadata."""
245
+ url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
246
+ request = urllib.request.Request(url, headers=_get_github_headers())
247
+ try:
248
+ with urllib.request.urlopen(request, timeout=60) as response:
249
+ return json.loads(response.read().decode("utf-8"))
250
+ except urllib.error.HTTPError as e:
251
+ _handle_github_api_error(e, "fetch PR info")
252
+ return {}
253
+
254
+
255
+ def extract_agent_comments(
256
+ review_comments: list[dict], issue_comments: list[dict], reviews: list[dict]
257
+ ) -> list[dict]:
258
+ """Extract comments made by the review agent.
259
+
260
+ Agent usernames are configurable via AGENT_USERNAMES environment variable.
261
+ """
262
+ agent_users = _get_agent_usernames()
263
+ agent_comments = []
264
+
265
+ # Review comments (inline code comments)
266
+ for comment in review_comments:
267
+ if comment.get("user", {}).get("login") in agent_users:
268
+ agent_comments.append(
269
+ {
270
+ "type": "review_comment",
271
+ "id": comment.get("id"),
272
+ "body": comment.get("body", ""),
273
+ "path": comment.get("path"),
274
+ "line": comment.get("line") or comment.get("original_line"),
275
+ "created_at": comment.get("created_at"),
276
+ }
277
+ )
278
+
279
+ # Issue comments (main thread)
280
+ for comment in issue_comments:
281
+ if comment.get("user", {}).get("login") in agent_users:
282
+ agent_comments.append(
283
+ {
284
+ "type": "issue_comment",
285
+ "id": comment.get("id"),
286
+ "body": comment.get("body", ""),
287
+ "created_at": comment.get("created_at"),
288
+ }
289
+ )
290
+
291
+ # Review bodies
292
+ for review in reviews:
293
+ if review.get("user", {}).get("login") in agent_users and review.get("body"):
294
+ agent_comments.append(
295
+ {
296
+ "type": "review",
297
+ "id": review.get("id"),
298
+ "body": review.get("body", ""),
299
+ "state": review.get("state"),
300
+ "created_at": review.get("submitted_at"),
301
+ }
302
+ )
303
+
304
+ return agent_comments
305
+
306
+
307
+ def extract_human_responses(
308
+ review_comments: list[dict],
309
+ issue_comments: list[dict],
310
+ agent_users: set[str] | None = None,
311
+ ) -> list[dict]:
312
+ """Extract comments/responses from humans (non-agent users).
313
+
314
+ Agent usernames are configurable via AGENT_USERNAMES environment variable.
315
+ """
316
+ if agent_users is None:
317
+ agent_users = _get_agent_usernames()
318
+
319
+ human_responses = []
320
+
321
+ for comment in review_comments:
322
+ if comment.get("user", {}).get("login") not in agent_users:
323
+ human_responses.append(
324
+ {
325
+ "type": "review_comment",
326
+ "user": comment.get("user", {}).get("login"),
327
+ "body": comment.get("body", ""),
328
+ "in_reply_to_id": comment.get("in_reply_to_id"),
329
+ "created_at": comment.get("created_at"),
330
+ }
331
+ )
332
+
333
+ for comment in issue_comments:
334
+ if comment.get("user", {}).get("login") not in agent_users:
335
+ human_responses.append(
336
+ {
337
+ "type": "issue_comment",
338
+ "user": comment.get("user", {}).get("login"),
339
+ "body": comment.get("body", ""),
340
+ "created_at": comment.get("created_at"),
341
+ }
342
+ )
343
+
344
+ return human_responses
345
+
346
+
347
+ def extract_review_feedback(
348
+ issue_comments: list[dict], reviews: list[dict] | None = None
349
+ ) -> list[dict]:
350
+ """Extract thumbs-up/down feedback from review bodies or legacy comments."""
351
+ agent_users = _get_agent_usernames()
352
+ feedback = []
353
+
354
+ for comment in [*issue_comments, *(reviews or [])]:
355
+ if FEEDBACK_COMMENT_MARKER not in (comment.get("body") or ""):
356
+ continue
357
+ if comment.get("user", {}).get("login") not in agent_users:
358
+ continue
359
+
360
+ reactions = comment.get("reactions") or {}
361
+ thumbs_up = reactions.get("+1", 0) or 0
362
+ thumbs_down = reactions.get("-1", 0) or 0
363
+ feedback.append(
364
+ {
365
+ "comment_id": comment.get("id"),
366
+ "created_at": comment.get("created_at")
367
+ or comment.get("submitted_at"),
368
+ "thumbs_up": thumbs_up,
369
+ "thumbs_down": thumbs_down,
370
+ "total": thumbs_up + thumbs_down,
371
+ }
372
+ )
373
+
374
+ return feedback
375
+
376
+
377
+ def truncate_text(text: str, max_chars: int = 50000) -> str:
378
+ """Truncate text to stay within reasonable API payload limits.
379
+
380
+ Max 50k chars chosen to stay well under typical API payload limits
381
+ while preserving enough context for evaluation. This keeps the
382
+ evaluation trace size manageable for Laminar processing.
383
+ """
384
+ if len(text) <= max_chars:
385
+ return text
386
+ return text[:max_chars] + f"\n\n... [truncated, {len(text)} total chars]"
387
+
388
+
389
+ def load_trace_info(trace_file_path: str | None = None) -> dict:
390
+ """Load trace info from artifact file.
391
+
392
+ Args:
393
+ trace_file_path: Path to trace info JSON file. If None, uses default path.
394
+
395
+ Returns:
396
+ Dictionary with trace_id, span_context, and other metadata.
397
+ Empty dict if file not found.
398
+ """
399
+ trace_info_path = Path(trace_file_path) if trace_file_path else Path("laminar_trace_info.json")
400
+
401
+ if not trace_info_path.exists():
402
+ logger.warning(
403
+ "No trace info file found - evaluation will create standalone trace"
404
+ )
405
+ return {}
406
+
407
+ with open(trace_info_path) as f:
408
+ data = json.load(f)
409
+
410
+ logger.info(f"Original trace ID: {data.get('trace_id')}")
411
+ if data.get("span_context"):
412
+ logger.info("Found span context - will add evaluation to original trace")
413
+ else:
414
+ logger.info("No span context - evaluation will create standalone trace")
415
+
416
+ return data
417
+
418
+
419
+ def fetch_pr_data(repo: str, pr_number: str) -> dict:
420
+ """Fetch all PR data from GitHub.
421
+
422
+ Args:
423
+ repo: Repository in format owner/repo
424
+ pr_number: PR number
425
+
426
+ Returns:
427
+ Dictionary with review_comments, issue_comments, reviews,
428
+ final_diff, pr_info, agent_comments, and human_responses
429
+ """
430
+ logger.info("Fetching PR data from GitHub...")
431
+
432
+ review_comments = fetch_pr_review_comments(repo, pr_number)
433
+ issue_comments = fetch_pr_issue_comments(repo, pr_number)
434
+ reviews = fetch_pr_reviews(repo, pr_number)
435
+ final_diff = fetch_pr_diff(repo, pr_number)
436
+ pr_info = fetch_pr_info(repo, pr_number)
437
+
438
+ logger.info(f"Found {len(review_comments)} review comments")
439
+ logger.info(f"Found {len(issue_comments)} issue comments")
440
+ logger.info(f"Found {len(reviews)} reviews")
441
+
442
+ agent_comments = extract_agent_comments(review_comments, issue_comments, reviews)
443
+ human_responses = extract_human_responses(review_comments, issue_comments)
444
+ review_feedback = extract_review_feedback(issue_comments, reviews)
445
+
446
+ logger.info(f"Agent made {len(agent_comments)} comments")
447
+ logger.info(f"Humans made {len(human_responses)} responses")
448
+ logger.info(f"Found {len(review_feedback)} review feedback prompts")
449
+
450
+ return {
451
+ "review_comments": review_comments,
452
+ "issue_comments": issue_comments,
453
+ "reviews": reviews,
454
+ "final_diff": final_diff,
455
+ "pr_info": pr_info,
456
+ "agent_comments": agent_comments,
457
+ "human_responses": human_responses,
458
+ "review_feedback": review_feedback,
459
+ }
460
+
461
+
462
+ def calculate_engagement_score(
463
+ agent_comments: list[dict],
464
+ human_responses: list[dict],
465
+ pr_merged: bool,
466
+ ) -> float:
467
+ """Calculate engagement score based on interaction metrics.
468
+
469
+ Components:
470
+ - Response ratio: humans responded to agent comments (0-0.5)
471
+ - Completion bonus: PR was merged (0.3)
472
+ Max score: 0.8
473
+
474
+ Args:
475
+ agent_comments: List of agent comments
476
+ human_responses: List of human responses
477
+ pr_merged: Whether the PR was merged
478
+
479
+ Returns:
480
+ Engagement score between 0.0 and 0.8
481
+ """
482
+ score = 0.0
483
+ if agent_comments:
484
+ engagement_ratio = min(len(human_responses) / len(agent_comments), 1.0)
485
+ score = engagement_ratio * 0.5
486
+ if pr_merged:
487
+ score += 0.3
488
+ return score
489
+
490
+
491
+ def create_evaluation_span(
492
+ pr_number: str,
493
+ repo_name: str,
494
+ pr_merged: bool,
495
+ pr_data: dict,
496
+ trace_info: dict,
497
+ ) -> str | None:
498
+ """Create Laminar evaluation span and return trace ID.
499
+
500
+ Args:
501
+ pr_number: PR number
502
+ repo_name: Repository name
503
+ pr_merged: Whether PR was merged
504
+ pr_data: Dictionary from fetch_pr_data()
505
+ trace_info: Dictionary from load_trace_info()
506
+
507
+ Returns:
508
+ Evaluation trace ID, or None if not available
509
+ """
510
+ Laminar.initialize()
511
+
512
+ evaluation_context = {
513
+ "pr_number": pr_number,
514
+ "repo_name": repo_name,
515
+ "pr_merged": pr_merged,
516
+ "pr_title": pr_data["pr_info"].get("title", ""),
517
+ "pr_state": pr_data["pr_info"].get("state", ""),
518
+ "original_trace_id": trace_info.get("trace_id"),
519
+ "agent_comments": pr_data["agent_comments"],
520
+ "human_responses": pr_data["human_responses"],
521
+ "review_feedback": pr_data["review_feedback"],
522
+ "final_diff": truncate_text(pr_data["final_diff"]),
523
+ "total_review_comments": len(pr_data["review_comments"]),
524
+ "total_issue_comments": len(pr_data["issue_comments"]),
525
+ }
526
+
527
+ with Laminar.start_as_current_span(
528
+ name="pr_review_evaluation",
529
+ input=evaluation_context,
530
+ tags=["pr-review-evaluation"],
531
+ parent_span_context=trace_info.get("span_context"),
532
+ ):
533
+ Laminar.set_trace_metadata(
534
+ {
535
+ "original_trace_id": trace_info.get("trace_id") or "none",
536
+ "evaluation_type": "pr_review_effectiveness",
537
+ "pr_number": pr_number,
538
+ "repo_name": repo_name,
539
+ "pr_merged": str(pr_merged),
540
+ }
541
+ )
542
+
543
+ summary = {
544
+ "pr": f"{repo_name}#{pr_number}",
545
+ "merged": pr_merged,
546
+ "agent_comments_count": len(pr_data["agent_comments"]),
547
+ "human_responses_count": len(pr_data["human_responses"]),
548
+ "review_feedback": pr_data["review_feedback"],
549
+ "diff_length": len(pr_data["final_diff"]),
550
+ }
551
+ logger.info(f"Evaluation summary: {json.dumps(summary)}")
552
+
553
+ Laminar.set_span_output(
554
+ {
555
+ "summary": summary,
556
+ "ready_for_signal": True,
557
+ }
558
+ )
559
+
560
+ eval_trace_id = Laminar.get_trace_id()
561
+
562
+ Laminar.flush()
563
+ return str(eval_trace_id) if eval_trace_id else None
564
+
565
+
566
+ def main(trace_file_path: str | None = None):
567
+ """Run the PR review evaluation.
568
+
569
+ Args:
570
+ trace_file_path: Optional path to trace info JSON file.
571
+ """
572
+ logger.info("Starting PR review evaluation...")
573
+
574
+ pr_number = _get_required_env("PR_NUMBER")
575
+ repo_name = _get_required_env("REPO_NAME")
576
+ pr_merged = os.getenv("PR_MERGED", "false").lower() == "true"
577
+
578
+ logger.info(f"Evaluating PR #{pr_number} in {repo_name}")
579
+ logger.info(f"PR was merged: {pr_merged}")
580
+
581
+ trace_info = load_trace_info(trace_file_path)
582
+ pr_data = fetch_pr_data(repo_name, pr_number)
583
+ eval_trace_id = create_evaluation_span(
584
+ pr_number, repo_name, pr_merged, pr_data, trace_info
585
+ )
586
+
587
+ original_trace_id = trace_info.get("trace_id")
588
+ agent_comments = pr_data["agent_comments"]
589
+ human_responses = pr_data["human_responses"]
590
+ review_feedback = pr_data["review_feedback"]
591
+
592
+ # Score engagement on the original trace for immediate feedback
593
+ if original_trace_id:
594
+ try:
595
+ client = LaminarClient()
596
+ engagement_score = calculate_engagement_score(
597
+ agent_comments, human_responses, pr_merged
598
+ )
599
+
600
+ client.evaluators.score(
601
+ name="review_engagement",
602
+ trace_id=original_trace_id,
603
+ score=engagement_score,
604
+ metadata={
605
+ "agent_comments": len(agent_comments),
606
+ "human_responses": len(human_responses),
607
+ "pr_merged": pr_merged,
608
+ "review_feedback": review_feedback,
609
+ "score_type": "engagement",
610
+ },
611
+ )
612
+ logger.info(
613
+ f"Added engagement score {engagement_score:.2f} "
614
+ f"to original trace {original_trace_id}"
615
+ )
616
+
617
+ client.tags.tag(original_trace_id, ["evaluated", f"pr-{pr_number}"])
618
+ logger.info(f"Tagged original trace {original_trace_id}")
619
+
620
+ except Exception as e:
621
+ logger.warning(f"Failed to score original trace: {e}")
622
+
623
+ # Print evaluation summary
624
+ print("\n=== PR Review Evaluation ===")
625
+ print(f"PR: {repo_name}#{pr_number}")
626
+ print(f"Merged: {pr_merged}")
627
+ print(f"Agent Comments: {len(agent_comments)}")
628
+ print(f"Human Responses: {len(human_responses)}")
629
+ if review_feedback:
630
+ thumbs_up = sum(item["thumbs_up"] for item in review_feedback)
631
+ thumbs_down = sum(item["thumbs_down"] for item in review_feedback)
632
+ print(f"Review Feedback: 👍 {thumbs_up} / 👎 {thumbs_down}")
633
+ if original_trace_id:
634
+ print(f"Original Review Trace: {original_trace_id}")
635
+ if eval_trace_id:
636
+ print(f"Evaluation Trace: {eval_trace_id}")
637
+
638
+ logger.info("PR review evaluation completed successfully")
639
+
640
+
641
+ if __name__ == "__main__":
642
+ import argparse
643
+
644
+ parser = argparse.ArgumentParser(description="Evaluate PR review effectiveness")
645
+ parser.add_argument(
646
+ "--trace-file",
647
+ help="Path to trace info JSON file (default: laminar_trace_info.json)",
648
+ )
649
+ args = parser.parse_args()
650
+
651
+ try:
652
+ main(trace_file_path=args.trace_file)
653
+ except Exception as e:
654
+ logger.error(f"Evaluation failed: {e}")
655
+ sys.exit(1)