@openhands/extensions 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (347) hide show
  1. package/.agents/skills/custom-codereview-guide.md +25 -0
  2. package/.github/pull_request_template.md +38 -0
  3. package/.github/release.yml +14 -0
  4. package/.github/workflows/check-extensions.yml +72 -0
  5. package/.github/workflows/npm-publish.yml +89 -0
  6. package/.github/workflows/pr.yml +30 -0
  7. package/.github/workflows/release.yml +24 -0
  8. package/.github/workflows/tests.yml +25 -0
  9. package/.github/workflows/vulnerability-scan.yml +87 -0
  10. package/.release-please-manifest.json +3 -0
  11. package/AGENTS.md +132 -0
  12. package/README.md +10 -0
  13. package/analysis_results.md +162 -0
  14. package/marketplaces/large-codebase.json +66 -0
  15. package/marketplaces/openhands-extensions.json +682 -0
  16. package/package.json +4 -10
  17. package/plugins/README.md +30 -0
  18. package/plugins/city-weather/.plugin/plugin.json +13 -0
  19. package/plugins/city-weather/README.md +145 -0
  20. package/plugins/city-weather/commands/now.md +56 -0
  21. package/plugins/cobol-modernization/.plugin/plugin.json +19 -0
  22. package/plugins/cobol-modernization/README.md +201 -0
  23. package/plugins/cobol-modernization/references/troubleshooting.md +18 -0
  24. package/plugins/cobol-modernization/skills/build-setup/SKILL.md +78 -0
  25. package/plugins/cobol-modernization/skills/build-setup/scripts/install-gnucobol.sh +32 -0
  26. package/plugins/cobol-modernization/skills/cobol-modernization-overview/SKILL.md +113 -0
  27. package/plugins/cobol-modernization/skills/mainfraime-removal/SKILL.md +62 -0
  28. package/plugins/cobol-modernization/skills/mainfraime-removal/references/cics-transformation-examples.md +45 -0
  29. package/plugins/cobol-modernization/skills/mainframe-planning/SKILL.md +78 -0
  30. package/plugins/cobol-modernization/skills/to-java-migration/SKILL.md +59 -0
  31. package/plugins/cobol-modernization/skills/to-java-migration/references/cobol-to-java-example.md +58 -0
  32. package/plugins/cobol-modernization/skills/to-java-migration/references/datatype-mappings.md +19 -0
  33. package/plugins/issue-duplicate-checker/.plugin/plugin.json +13 -0
  34. package/plugins/issue-duplicate-checker/README.md +51 -0
  35. package/plugins/issue-duplicate-checker/action.yml +349 -0
  36. package/plugins/issue-duplicate-checker/scripts/auto_close_duplicate_issues.py +569 -0
  37. package/plugins/issue-duplicate-checker/scripts/issue_duplicate_check_openhands.py +681 -0
  38. package/plugins/issue-duplicate-checker/scripts/post_duplicate_notice.js +220 -0
  39. package/plugins/issue-duplicate-checker/scripts/remove_duplicate_candidate_label.js +27 -0
  40. package/plugins/magic-test/.plugin/plugin.json +13 -0
  41. package/plugins/magic-test/skills/magic-word/SKILL.md +33 -0
  42. package/plugins/migration-scoring/.plugin/plugin.json +19 -0
  43. package/plugins/migration-scoring/README.md +244 -0
  44. package/plugins/migration-scoring/skills/migration-mapping/SKILL.md +72 -0
  45. package/plugins/migration-scoring/skills/migration-report/SKILL.md +118 -0
  46. package/plugins/migration-scoring/skills/migration-scoring-overview/SKILL.md +126 -0
  47. package/plugins/migration-scoring/skills/score-quality/SKILL.md +54 -0
  48. package/plugins/migration-scoring/skills/score-quality/references/scoring-criteria.md +30 -0
  49. package/plugins/migration-scoring/skills/score-style/SKILL.md +106 -0
  50. package/plugins/onboarding/.plugin/plugin.json +20 -0
  51. package/plugins/onboarding/README.md +30 -0
  52. package/plugins/onboarding/references/criteria.md +144 -0
  53. package/plugins/onboarding/skills/agent-readiness-report/README.md +23 -0
  54. package/plugins/onboarding/skills/agent-readiness-report/SKILL.md +122 -0
  55. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_agent_instructions.sh +88 -0
  56. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_build_env.sh +114 -0
  57. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_feedback_loops.sh +133 -0
  58. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_policy.sh +113 -0
  59. package/plugins/onboarding/skills/agent-readiness-report/scripts/scan_workflows.sh +127 -0
  60. package/plugins/onboarding/skills/improve-agent-readiness/README.md +19 -0
  61. package/plugins/onboarding/skills/improve-agent-readiness/SKILL.md +167 -0
  62. package/plugins/onboarding/skills/setup-agents-md/README.md +15 -0
  63. package/plugins/onboarding/skills/setup-agents-md/SKILL.md +150 -0
  64. package/plugins/onboarding/skills/setup-openhands/README.md +20 -0
  65. package/plugins/onboarding/skills/setup-openhands/SKILL.md +56 -0
  66. package/plugins/onboarding/skills/setup-pr-review/README.md +23 -0
  67. package/plugins/onboarding/skills/setup-pr-review/SKILL.md +72 -0
  68. package/plugins/openhands/.plugin/plugin.json +13 -0
  69. package/plugins/openhands/README.md +52 -0
  70. package/plugins/openhands/SKILL.md +61 -0
  71. package/plugins/openhands/commands/create.md +55 -0
  72. package/plugins/openhands/commands/openhands-cloud.md +8 -0
  73. package/plugins/openhands/scripts/run.sh +69 -0
  74. package/plugins/pr-review/.plugin/plugin.json +13 -0
  75. package/plugins/pr-review/README.md +393 -0
  76. package/plugins/pr-review/action.yml +298 -0
  77. package/plugins/pr-review/scripts/agent_script.py +1282 -0
  78. package/plugins/pr-review/scripts/evaluate_review.py +655 -0
  79. package/plugins/pr-review/scripts/prompt.py +260 -0
  80. package/plugins/pr-review/workflows/pr-review-by-openhands.yml +51 -0
  81. package/plugins/pr-review/workflows/pr-review-evaluation.yml +85 -0
  82. package/plugins/qa-changes/.plugin/plugin.json +11 -0
  83. package/plugins/qa-changes/README.md +185 -0
  84. package/plugins/qa-changes/action.yml +181 -0
  85. package/plugins/qa-changes/scripts/agent_script.py +406 -0
  86. package/plugins/qa-changes/scripts/evaluate_qa_changes.py +385 -0
  87. package/plugins/qa-changes/scripts/prompt.py +174 -0
  88. package/plugins/qa-changes/workflows/qa-changes-by-openhands.yml +50 -0
  89. package/plugins/qa-changes/workflows/qa-changes-evaluation.yml +85 -0
  90. package/plugins/release-notes/.plugin/plugin.json +19 -0
  91. package/plugins/release-notes/README.md +283 -0
  92. package/plugins/release-notes/SKILL.md +83 -0
  93. package/plugins/release-notes/action.yml +117 -0
  94. package/plugins/release-notes/commands/release-notes.md +8 -0
  95. package/plugins/release-notes/scripts/agent_script.py +292 -0
  96. package/plugins/release-notes/scripts/generate_release_notes.py +733 -0
  97. package/plugins/release-notes/scripts/prompt.py +90 -0
  98. package/plugins/release-notes/scripts/validate_release_notes.py +328 -0
  99. package/plugins/release-notes/workflows/release-notes.yml +76 -0
  100. package/plugins/vulnerability-remediation/.plugin/plugin.json +19 -0
  101. package/plugins/vulnerability-remediation/README.md +217 -0
  102. package/plugins/vulnerability-remediation/action.yml +187 -0
  103. package/plugins/vulnerability-remediation/scripts/scan_and_remediate.py +561 -0
  104. package/plugins/vulnerability-remediation/workflows/vulnerability-scan.yml +87 -0
  105. package/pyproject.toml +12 -0
  106. package/release-please-config.json +16 -0
  107. package/scripts/sync_extensions.py +494 -0
  108. package/scripts/sync_openhands_sdk_skill.py +264 -0
  109. package/skills/README.md +159 -0
  110. package/skills/add-javadoc/.plugin/plugin.json +18 -0
  111. package/skills/add-javadoc/README.md +40 -0
  112. package/skills/add-javadoc/SKILL.md +35 -0
  113. package/skills/add-javadoc/references/example.md +32 -0
  114. package/skills/add-skill/.plugin/plugin.json +18 -0
  115. package/skills/add-skill/README.md +67 -0
  116. package/skills/add-skill/SKILL.md +47 -0
  117. package/skills/add-skill/scripts/fetch_skill.py +259 -0
  118. package/skills/agent-creator/.plugin/plugin.json +20 -0
  119. package/skills/agent-creator/README.md +104 -0
  120. package/skills/agent-creator/SKILL.md +190 -0
  121. package/skills/agent-creator/commands/agent-creator.md +8 -0
  122. package/skills/agent-creator/references/fallback.md +117 -0
  123. package/skills/agent-memory/.plugin/plugin.json +18 -0
  124. package/skills/agent-memory/README.md +35 -0
  125. package/skills/agent-memory/SKILL.md +30 -0
  126. package/skills/agent-memory/commands/remember.md +8 -0
  127. package/skills/agent-sdk-builder/.plugin/plugin.json +18 -0
  128. package/skills/agent-sdk-builder/README.md +40 -0
  129. package/skills/agent-sdk-builder/SKILL.md +37 -0
  130. package/skills/agent-sdk-builder/commands/agent-builder.md +8 -0
  131. package/skills/azure-devops/.plugin/plugin.json +18 -0
  132. package/skills/azure-devops/README.md +55 -0
  133. package/skills/azure-devops/SKILL.md +50 -0
  134. package/skills/bitbucket/.plugin/plugin.json +17 -0
  135. package/skills/bitbucket/README.md +50 -0
  136. package/skills/bitbucket/SKILL.md +45 -0
  137. package/skills/code-review/.plugin/plugin.json +19 -0
  138. package/skills/code-review/README.md +18 -0
  139. package/skills/code-review/SKILL.md +208 -0
  140. package/skills/code-review/commands/codereview-roasted.md +8 -0
  141. package/skills/code-review/commands/codereview.md +8 -0
  142. package/skills/code-review/references/risk-evaluation.md +41 -0
  143. package/skills/code-review/references/supply-chain-security.md +31 -0
  144. package/skills/code-simplifier/.plugin/plugin.json +21 -0
  145. package/skills/code-simplifier/README.md +30 -0
  146. package/skills/code-simplifier/SKILL.md +91 -0
  147. package/skills/code-simplifier/commands/simplify.md +8 -0
  148. package/skills/code-simplifier/references/code-quality-review.md +86 -0
  149. package/skills/code-simplifier/references/code-reuse-review.md +63 -0
  150. package/skills/code-simplifier/references/efficiency-review.md +81 -0
  151. package/skills/datadog/.plugin/plugin.json +19 -0
  152. package/skills/datadog/README.md +100 -0
  153. package/skills/datadog/SKILL.md +95 -0
  154. package/skills/deno/.plugin/plugin.json +18 -0
  155. package/skills/deno/README.md +5 -0
  156. package/skills/deno/SKILL.md +99 -0
  157. package/skills/deno/references/README.md +6 -0
  158. package/skills/discord/.plugin/plugin.json +18 -0
  159. package/skills/discord/README.md +31 -0
  160. package/skills/discord/SKILL.md +109 -0
  161. package/skills/discord/__init__.py +0 -0
  162. package/skills/discord/references/REFERENCE.md +78 -0
  163. package/skills/discord/scripts/__init__.py +0 -0
  164. package/skills/discord/scripts/_http.py +127 -0
  165. package/skills/discord/scripts/post_webhook.py +106 -0
  166. package/skills/discord/scripts/send_message.py +102 -0
  167. package/skills/docker/.plugin/plugin.json +17 -0
  168. package/skills/docker/README.md +34 -0
  169. package/skills/docker/SKILL.md +29 -0
  170. package/skills/evidence-based-citations/.plugin/plugin.json +20 -0
  171. package/skills/evidence-based-citations/README.md +31 -0
  172. package/skills/evidence-based-citations/SKILL.md +59 -0
  173. package/skills/flarglebargle/.plugin/plugin.json +16 -0
  174. package/skills/flarglebargle/README.md +14 -0
  175. package/skills/flarglebargle/SKILL.md +9 -0
  176. package/skills/frontend-design/.plugin/plugin.json +21 -0
  177. package/skills/frontend-design/LICENSE.txt +177 -0
  178. package/skills/frontend-design/README.md +42 -0
  179. package/skills/frontend-design/SKILL.md +42 -0
  180. package/skills/github/.plugin/plugin.json +19 -0
  181. package/skills/github/README.md +42 -0
  182. package/skills/github/SKILL.md +106 -0
  183. package/skills/github-pr-review/.plugin/plugin.json +18 -0
  184. package/skills/github-pr-review/README.md +145 -0
  185. package/skills/github-pr-review/SKILL.md +148 -0
  186. package/skills/github-pr-review/commands/github-pr-review.md +8 -0
  187. package/skills/github-pr-reviewer/.plugin/plugin.json +20 -0
  188. package/skills/github-pr-reviewer/README.md +34 -0
  189. package/skills/github-pr-reviewer/SKILL.md +89 -0
  190. package/skills/github-pr-reviewer/commands/pr-reviewer:setup.md +8 -0
  191. package/skills/github-repo-monitor/.plugin/plugin.json +22 -0
  192. package/skills/github-repo-monitor/README.md +70 -0
  193. package/skills/github-repo-monitor/SKILL.md +316 -0
  194. package/skills/github-repo-monitor/commands/github-monitor:poll.md +8 -0
  195. package/skills/github-repo-monitor/references/github-api.md +241 -0
  196. package/skills/github-repo-monitor/references/state-schema.md +160 -0
  197. package/skills/github-repo-monitor/scripts/main.py +915 -0
  198. package/skills/github-repo-monitor/tests/test_main.py +400 -0
  199. package/skills/gitlab/.plugin/plugin.json +17 -0
  200. package/skills/gitlab/README.md +37 -0
  201. package/skills/gitlab/SKILL.md +32 -0
  202. package/skills/incident-retrospective/.plugin/plugin.json +21 -0
  203. package/skills/incident-retrospective/README.md +34 -0
  204. package/skills/incident-retrospective/SKILL.md +98 -0
  205. package/skills/incident-retrospective/commands/incident-retro:setup.md +8 -0
  206. package/skills/iterate/.plugin/plugin.json +13 -0
  207. package/skills/iterate/README.md +25 -0
  208. package/skills/iterate/SKILL.md +399 -0
  209. package/skills/iterate/commands/babysit.md +8 -0
  210. package/skills/iterate/commands/iterate.md +8 -0
  211. package/skills/iterate/commands/verify.md +8 -0
  212. package/skills/iterate/references/heuristics.md +58 -0
  213. package/skills/iterate/references/verification.md +96 -0
  214. package/skills/jupyter/.plugin/plugin.json +18 -0
  215. package/skills/jupyter/README.md +55 -0
  216. package/skills/jupyter/SKILL.md +50 -0
  217. package/skills/kubernetes/.plugin/plugin.json +18 -0
  218. package/skills/kubernetes/README.md +53 -0
  219. package/skills/kubernetes/SKILL.md +48 -0
  220. package/skills/learn-from-code-review/.plugin/plugin.json +19 -0
  221. package/skills/learn-from-code-review/README.md +64 -0
  222. package/skills/learn-from-code-review/SKILL.md +186 -0
  223. package/skills/learn-from-code-review/commands/learn-from-reviews.md +8 -0
  224. package/skills/linear/.plugin/plugin.json +19 -0
  225. package/skills/linear/README.md +58 -0
  226. package/skills/linear/SKILL.md +213 -0
  227. package/skills/linear-triage/.plugin/plugin.json +21 -0
  228. package/skills/linear-triage/README.md +34 -0
  229. package/skills/linear-triage/SKILL.md +91 -0
  230. package/skills/linear-triage/commands/linear-triage:setup.md +8 -0
  231. package/skills/notion/.plugin/plugin.json +17 -0
  232. package/skills/notion/README.md +114 -0
  233. package/skills/notion/SKILL.md +109 -0
  234. package/skills/npm/.plugin/plugin.json +17 -0
  235. package/skills/npm/README.md +14 -0
  236. package/skills/npm/SKILL.md +9 -0
  237. package/skills/openhands-api/.plugin/plugin.json +22 -0
  238. package/skills/openhands-api/README.md +48 -0
  239. package/skills/openhands-api/SKILL.md +399 -0
  240. package/skills/openhands-api/references/README.md +33 -0
  241. package/skills/openhands-api/references/TROUBLESHOOTING.md +81 -0
  242. package/skills/openhands-api/references/example_prompt.md +12 -0
  243. package/skills/openhands-api/scripts/openhands_api.py +606 -0
  244. package/skills/openhands-api/scripts/openhands_api.ts +252 -0
  245. package/skills/openhands-automation/.plugin/plugin.json +19 -0
  246. package/skills/openhands-automation/README.md +89 -0
  247. package/skills/openhands-automation/SKILL.md +875 -0
  248. package/skills/openhands-automation/commands/automation:create.md +8 -0
  249. package/skills/openhands-automation/references/ab-testing.md +185 -0
  250. package/skills/openhands-automation/references/custom-automation.md +644 -0
  251. package/skills/openhands-sdk/.plugin/plugin.json +20 -0
  252. package/skills/openhands-sdk/README.md +22 -0
  253. package/skills/openhands-sdk/SKILL.md +229 -0
  254. package/skills/openhands-sdk/commands/sdk.md +8 -0
  255. package/skills/pdflatex/.plugin/plugin.json +18 -0
  256. package/skills/pdflatex/README.md +39 -0
  257. package/skills/pdflatex/SKILL.md +34 -0
  258. package/skills/prd/.plugin/plugin.json +19 -0
  259. package/skills/prd/README.md +28 -0
  260. package/skills/prd/SKILL.md +237 -0
  261. package/skills/prd/commands/prd.md +8 -0
  262. package/skills/qa-changes/README.md +18 -0
  263. package/skills/qa-changes/SKILL.md +229 -0
  264. package/skills/qa-changes/commands/qa-changes.md +8 -0
  265. package/skills/release-notes/README.md +24 -0
  266. package/skills/release-notes/SKILL.md +19 -0
  267. package/skills/release-notes/commands/release-notes.md +8 -0
  268. package/skills/research-brief/.plugin/plugin.json +20 -0
  269. package/skills/research-brief/README.md +34 -0
  270. package/skills/research-brief/SKILL.md +99 -0
  271. package/skills/research-brief/commands/research-brief:setup.md +8 -0
  272. package/skills/security/.plugin/plugin.json +18 -0
  273. package/skills/security/README.md +38 -0
  274. package/skills/security/SKILL.md +33 -0
  275. package/skills/skill-creator/.plugin/plugin.json +17 -0
  276. package/skills/skill-creator/LICENSE.txt +202 -0
  277. package/skills/skill-creator/README.md +182 -0
  278. package/skills/skill-creator/SKILL.md +545 -0
  279. package/skills/skill-creator/references/output-patterns.md +82 -0
  280. package/skills/skill-creator/references/workflows.md +28 -0
  281. package/skills/skill-creator/scripts/init_skill.py +303 -0
  282. package/skills/skill-creator/scripts/quick_validate.py +95 -0
  283. package/skills/slack-channel-monitor/.plugin/plugin.json +21 -0
  284. package/skills/slack-channel-monitor/README.md +91 -0
  285. package/skills/slack-channel-monitor/SKILL.md +276 -0
  286. package/skills/slack-channel-monitor/commands/slack-monitor:poll.md +8 -0
  287. package/skills/slack-channel-monitor/references/slack-api.md +207 -0
  288. package/skills/slack-channel-monitor/references/state-schema.md +180 -0
  289. package/skills/slack-channel-monitor/scripts/main.py +962 -0
  290. package/skills/slack-standup-digest/.plugin/plugin.json +21 -0
  291. package/skills/slack-standup-digest/README.md +34 -0
  292. package/skills/slack-standup-digest/SKILL.md +92 -0
  293. package/skills/slack-standup-digest/commands/standup-digest:setup.md +8 -0
  294. package/skills/spark-version-upgrade/.plugin/plugin.json +20 -0
  295. package/skills/spark-version-upgrade/README.md +54 -0
  296. package/skills/spark-version-upgrade/SKILL.md +233 -0
  297. package/skills/ssh/.plugin/plugin.json +18 -0
  298. package/skills/ssh/README.md +140 -0
  299. package/skills/ssh/SKILL.md +135 -0
  300. package/skills/swift-linux/.plugin/plugin.json +17 -0
  301. package/skills/swift-linux/README.md +86 -0
  302. package/skills/swift-linux/SKILL.md +81 -0
  303. package/skills/theme-factory/.plugin/plugin.json +19 -0
  304. package/skills/theme-factory/LICENSE.txt +202 -0
  305. package/skills/theme-factory/README.md +58 -0
  306. package/skills/theme-factory/SKILL.md +59 -0
  307. package/skills/theme-factory/theme-showcase.pdf +0 -0
  308. package/skills/theme-factory/themes/arctic-frost.md +19 -0
  309. package/skills/theme-factory/themes/botanical-garden.md +19 -0
  310. package/skills/theme-factory/themes/desert-rose.md +19 -0
  311. package/skills/theme-factory/themes/forest-canopy.md +19 -0
  312. package/skills/theme-factory/themes/golden-hour.md +19 -0
  313. package/skills/theme-factory/themes/midnight-galaxy.md +19 -0
  314. package/skills/theme-factory/themes/modern-minimalist.md +19 -0
  315. package/skills/theme-factory/themes/ocean-depths.md +19 -0
  316. package/skills/theme-factory/themes/sunset-boulevard.md +19 -0
  317. package/skills/theme-factory/themes/tech-innovation.md +19 -0
  318. package/skills/uv/.plugin/plugin.json +18 -0
  319. package/skills/uv/README.md +5 -0
  320. package/skills/uv/SKILL.md +95 -0
  321. package/skills/uv/references/README.md +5 -0
  322. package/skills/vercel/.plugin/plugin.json +18 -0
  323. package/skills/vercel/README.md +108 -0
  324. package/skills/vercel/SKILL.md +103 -0
  325. package/tests/test_add_skill_installs_to_agents_dir.py +42 -0
  326. package/tests/test_catalogs.py +109 -0
  327. package/tests/test_code_review_risk_evaluation.py +94 -0
  328. package/tests/test_issue_duplicate_checker.py +240 -0
  329. package/tests/test_openhands_api_python.py +152 -0
  330. package/tests/test_plugin_manifest.py +83 -0
  331. package/tests/test_pr_review_diff_payload.py +202 -0
  332. package/tests/test_pr_review_feedback.py +263 -0
  333. package/tests/test_pr_review_prompt.py +152 -0
  334. package/tests/test_pr_review_review_context.py +253 -0
  335. package/tests/test_qa_changes.py +232 -0
  336. package/tests/test_qa_changes_evaluation.py +259 -0
  337. package/tests/test_release_notes_generator.py +990 -0
  338. package/tests/test_sdk_loading.py +150 -0
  339. package/tests/test_skill_plugin_loading.py +149 -0
  340. package/tests/test_skills_have_readme.py +66 -0
  341. package/tests/test_sync_extensions.py +292 -0
  342. package/tests/test_workflow_sync.py +46 -0
  343. package/utils/analysis/README.md +7 -0
  344. package/utils/analysis/laminar_signals/README.md +211 -0
  345. package/utils/analysis/laminar_signals/analyze.py +780 -0
  346. package/utils/analysis/laminar_signals/templates/default.j2 +49 -0
  347. package/utils/analysis/laminar_signals/templates/pr_review.j2 +61 -0
@@ -0,0 +1,259 @@
1
+ """Tests for the qa-changes evaluation script (evaluate_qa_changes.py)."""
2
+
3
+ import importlib.util
4
+ import json
5
+ import sys
6
+ import types
7
+ from pathlib import Path
8
+
9
+ import pytest
10
+
11
+ _SCRIPTS_DIR = (
12
+ Path(__file__).parent.parent / "plugins" / "qa-changes" / "scripts"
13
+ )
14
+
15
+
16
+ def _load_eval_module():
17
+ """Load evaluate_qa_changes.py, stubbing out lmnr which needs a project key."""
18
+ lmnr_mod = types.ModuleType("lmnr")
19
+
20
+ class _FakeLaminar:
21
+ @staticmethod
22
+ def initialize():
23
+ pass
24
+
25
+ @staticmethod
26
+ def get_trace_id():
27
+ return None
28
+
29
+ @staticmethod
30
+ def get_laminar_span_context():
31
+ return None
32
+
33
+ @staticmethod
34
+ def set_trace_metadata(meta):
35
+ pass
36
+
37
+ @staticmethod
38
+ def set_span_output(output):
39
+ pass
40
+
41
+ @staticmethod
42
+ def flush():
43
+ pass
44
+
45
+ @staticmethod
46
+ def start_as_current_span(**kwargs):
47
+ import contextlib
48
+ return contextlib.nullcontext()
49
+
50
+ class _FakeClient:
51
+ class evaluators:
52
+ @staticmethod
53
+ def score(**kwargs):
54
+ pass
55
+
56
+ class tags:
57
+ @staticmethod
58
+ def tag(trace_id, tags):
59
+ pass
60
+
61
+ lmnr_mod.Laminar = _FakeLaminar
62
+ lmnr_mod.LaminarClient = _FakeClient
63
+
64
+ saved = sys.modules.get("lmnr")
65
+ sys.modules["lmnr"] = lmnr_mod
66
+
67
+ try:
68
+ path = _SCRIPTS_DIR / "evaluate_qa_changes.py"
69
+ spec = importlib.util.spec_from_file_location("evaluate_qa_changes", path)
70
+ module = importlib.util.module_from_spec(spec)
71
+ sys.modules[spec.name] = module
72
+ spec.loader.exec_module(module)
73
+ return module
74
+ finally:
75
+ if saved is None:
76
+ sys.modules.pop("lmnr", None)
77
+ else:
78
+ sys.modules["lmnr"] = saved
79
+
80
+
81
+ @pytest.fixture(scope="module")
82
+ def eval_mod():
83
+ return _load_eval_module()
84
+
85
+
86
+ # ===================================================================
87
+ # extract_qa_report
88
+ # ===================================================================
89
+
90
+
91
+ class TestExtractQaReport:
92
+ def test_extracts_agent_comments(self, eval_mod):
93
+ comments = [
94
+ {"user": {"login": "openhands-agent"}, "id": 1, "body": "QA report", "created_at": "2024-01-01"},
95
+ {"user": {"login": "human-dev"}, "id": 2, "body": "looks good", "created_at": "2024-01-02"},
96
+ {"user": {"login": "all-hands-bot"}, "id": 3, "body": "another report", "created_at": "2024-01-03"},
97
+ ]
98
+ result = eval_mod.extract_qa_report(comments)
99
+ assert len(result) == 2
100
+ assert result[0]["id"] == 1
101
+ assert result[0]["type"] == "qa_report"
102
+ assert result[1]["id"] == 3
103
+
104
+ def test_empty_comments(self, eval_mod):
105
+ assert eval_mod.extract_qa_report([]) == []
106
+
107
+ def test_no_agent_comments(self, eval_mod):
108
+ comments = [
109
+ {"user": {"login": "human"}, "id": 1, "body": "test", "created_at": "2024-01-01"},
110
+ ]
111
+ assert eval_mod.extract_qa_report(comments) == []
112
+
113
+
114
+ # ===================================================================
115
+ # extract_human_responses
116
+ # ===================================================================
117
+
118
+
119
+ class TestExtractHumanResponses:
120
+ def test_extracts_human_comments(self, eval_mod):
121
+ comments = [
122
+ {"user": {"login": "openhands-agent"}, "id": 1, "body": "QA report", "created_at": "2024-01-01"},
123
+ {"user": {"login": "dev-alice"}, "id": 2, "body": "thanks", "created_at": "2024-01-02"},
124
+ {"user": {"login": "dev-bob"}, "id": 3, "body": "agreed", "created_at": "2024-01-03"},
125
+ ]
126
+ result = eval_mod.extract_human_responses(comments)
127
+ assert len(result) == 2
128
+ assert result[0]["user"] == "dev-alice"
129
+ assert result[1]["user"] == "dev-bob"
130
+
131
+ def test_empty_comments(self, eval_mod):
132
+ assert eval_mod.extract_human_responses([]) == []
133
+
134
+ def test_all_agent_comments(self, eval_mod):
135
+ comments = [
136
+ {"user": {"login": "openhands-agent"}, "id": 1, "body": "report", "created_at": "2024-01-01"},
137
+ ]
138
+ assert eval_mod.extract_human_responses(comments) == []
139
+
140
+ def test_custom_agent_users(self, eval_mod):
141
+ comments = [
142
+ {"user": {"login": "my-bot"}, "id": 1, "body": "report", "created_at": "2024-01-01"},
143
+ {"user": {"login": "human"}, "id": 2, "body": "ok", "created_at": "2024-01-02"},
144
+ ]
145
+ result = eval_mod.extract_human_responses(comments, agent_users={"my-bot"})
146
+ assert len(result) == 1
147
+ assert result[0]["user"] == "human"
148
+
149
+
150
+ # ===================================================================
151
+ # truncate_text
152
+ # ===================================================================
153
+
154
+
155
+ class TestTruncateText:
156
+ def test_short_text_unchanged(self, eval_mod):
157
+ assert eval_mod.truncate_text("hello") == "hello"
158
+
159
+ def test_exact_limit(self, eval_mod):
160
+ text = "x" * 100
161
+ assert eval_mod.truncate_text(text, max_chars=100) == text
162
+
163
+ def test_over_limit(self, eval_mod):
164
+ text = "x" * 200
165
+ result = eval_mod.truncate_text(text, max_chars=100)
166
+ assert result.startswith("x" * 100)
167
+ assert "truncated" in result
168
+ assert "200" in result
169
+
170
+ def test_default_limit_is_50k(self, eval_mod):
171
+ text = "x" * 50000
172
+ assert eval_mod.truncate_text(text) == text
173
+ text_over = "x" * 50001
174
+ assert "truncated" in eval_mod.truncate_text(text_over)
175
+
176
+
177
+ # ===================================================================
178
+ # calculate_engagement_score
179
+ # ===================================================================
180
+
181
+
182
+ class TestCalculateEngagementScore:
183
+ def test_no_comments_no_merge(self, eval_mod):
184
+ score = eval_mod.calculate_engagement_score([], [], False)
185
+ assert score == 0.0
186
+
187
+ def test_qa_report_posted_no_responses(self, eval_mod):
188
+ qa = [{"type": "qa_report", "body": "report"}]
189
+ score = eval_mod.calculate_engagement_score(qa, [], False)
190
+ assert score == pytest.approx(0.3)
191
+
192
+ def test_qa_report_with_responses(self, eval_mod):
193
+ qa = [{"type": "qa_report", "body": "report"}]
194
+ human = [{"type": "issue_comment", "body": "thanks"}]
195
+ score = eval_mod.calculate_engagement_score(qa, human, False)
196
+ # 0.3 (report) + 1.0 * 0.2 (response ratio capped at 1.0)
197
+ assert score == pytest.approx(0.5)
198
+
199
+ def test_merged_bonus(self, eval_mod):
200
+ score = eval_mod.calculate_engagement_score([], [], True)
201
+ assert score == pytest.approx(0.3)
202
+
203
+ def test_full_engagement(self, eval_mod):
204
+ qa = [{"type": "qa_report", "body": "report"}]
205
+ human = [{"type": "issue_comment", "body": "thanks"}]
206
+ score = eval_mod.calculate_engagement_score(qa, human, True)
207
+ # 0.3 (report) + 0.2 (response) + 0.3 (merged)
208
+ assert score == pytest.approx(0.8)
209
+
210
+ def test_many_responses_capped(self, eval_mod):
211
+ qa = [{"type": "qa_report", "body": "report"}]
212
+ human = [{"body": f"msg {i}"} for i in range(10)]
213
+ score = eval_mod.calculate_engagement_score(qa, human, False)
214
+ # ratio capped at 1.0 → 0.3 + 0.2
215
+ assert score == pytest.approx(0.5)
216
+
217
+ def test_multiple_qa_comments_with_fewer_responses(self, eval_mod):
218
+ qa = [{"body": f"qa {i}"} for i in range(4)]
219
+ human = [{"body": "reply"}]
220
+ score = eval_mod.calculate_engagement_score(qa, human, False)
221
+ # 0.3 + (1/4) * 0.2 = 0.35
222
+ assert score == pytest.approx(0.35)
223
+
224
+
225
+ # ===================================================================
226
+ # load_trace_info
227
+ # ===================================================================
228
+
229
+
230
+ class TestLoadTraceInfo:
231
+ def test_file_not_found_returns_empty(self, eval_mod, tmp_path):
232
+ result = eval_mod.load_trace_info(str(tmp_path / "nonexistent.json"))
233
+ assert result == {}
234
+
235
+ def test_loads_valid_trace_file(self, eval_mod, tmp_path):
236
+ trace_data = {
237
+ "trace_id": "abc-123",
238
+ "span_context": {"trace_id": "abc", "span_id": "def"},
239
+ "pr_number": "42",
240
+ "repo_name": "org/repo",
241
+ "commit_id": "deadbeef",
242
+ "model": "claude-sonnet",
243
+ }
244
+ trace_file = tmp_path / "trace.json"
245
+ trace_file.write_text(json.dumps(trace_data))
246
+
247
+ result = eval_mod.load_trace_info(str(trace_file))
248
+ assert result["trace_id"] == "abc-123"
249
+ assert result["pr_number"] == "42"
250
+ assert result["span_context"]["trace_id"] == "abc"
251
+
252
+ def test_trace_without_span_context(self, eval_mod, tmp_path):
253
+ trace_data = {"trace_id": "abc-123"}
254
+ trace_file = tmp_path / "trace.json"
255
+ trace_file.write_text(json.dumps(trace_data))
256
+
257
+ result = eval_mod.load_trace_info(str(trace_file))
258
+ assert result["trace_id"] == "abc-123"
259
+ assert result.get("span_context") is None