oh-my-opencode 4.3.1 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/.agents/command/get-unpublished-changes.md +148 -0
  2. package/.agents/command/omomomo.md +37 -0
  3. package/.agents/command/publish.md +376 -0
  4. package/.agents/command/remove-deadcode.md +221 -0
  5. package/.agents/command/security-research.md +16 -0
  6. package/.agents/skills/get-unpublished-changes/SKILL.md +24 -0
  7. package/.agents/skills/github-triage/SKILL.md +587 -0
  8. package/.agents/skills/github-triage/scripts/gh_fetch.py +398 -0
  9. package/.agents/skills/hyperplan/SKILL.md +450 -0
  10. package/.agents/skills/omomomo/SKILL.md +36 -0
  11. package/.agents/skills/pre-publish-review/SKILL.md +407 -0
  12. package/.agents/skills/publish/SKILL.md +428 -0
  13. package/.agents/skills/remove-deadcode/SKILL.md +216 -0
  14. package/.agents/skills/security-research/SKILL.md +204 -0
  15. package/.agents/skills/work-with-pr/SKILL.md +360 -0
  16. package/.agents/skills/work-with-pr-workspace/evals/evals.json +76 -0
  17. package/.agents/skills/work-with-pr-workspace/iteration-1/benchmark.json +138 -0
  18. package/.agents/skills/work-with-pr-workspace/iteration-1/benchmark.md +42 -0
  19. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/eval_metadata.json +57 -0
  20. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/grading.json +15 -0
  21. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/code-changes.md +454 -0
  22. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/execution-plan.md +136 -0
  23. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/pr-description.md +47 -0
  24. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/verification-strategy.md +163 -0
  25. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/timing.json +1 -0
  26. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/grading.json +15 -0
  27. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/code-changes.md +615 -0
  28. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/execution-plan.md +99 -0
  29. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/pr-description.md +50 -0
  30. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/verification-strategy.md +111 -0
  31. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/timing.json +1 -0
  32. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/eval_metadata.json +37 -0
  33. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/grading.json +11 -0
  34. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md +205 -0
  35. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/execution-plan.md +78 -0
  36. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/pr-description.md +42 -0
  37. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/verification-strategy.md +87 -0
  38. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/timing.json +1 -0
  39. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/grading.json +11 -0
  40. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/code-changes.md +334 -0
  41. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/execution-plan.md +86 -0
  42. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/pr-description.md +23 -0
  43. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/verification-strategy.md +119 -0
  44. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/timing.json +1 -0
  45. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/eval_metadata.json +32 -0
  46. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/grading.json +10 -0
  47. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/code-changes.md +221 -0
  48. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/execution-plan.md +104 -0
  49. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/pr-description.md +41 -0
  50. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/verification-strategy.md +84 -0
  51. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/timing.json +1 -0
  52. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/grading.json +10 -0
  53. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/code-changes.md +342 -0
  54. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/execution-plan.md +131 -0
  55. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/pr-description.md +39 -0
  56. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/verification-strategy.md +128 -0
  57. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/timing.json +1 -0
  58. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/eval_metadata.json +32 -0
  59. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/grading.json +10 -0
  60. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/code-changes.md +143 -0
  61. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/execution-plan.md +82 -0
  62. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/pr-description.md +51 -0
  63. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/verification-strategy.md +69 -0
  64. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/timing.json +1 -0
  65. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/grading.json +10 -0
  66. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/code-changes.md +252 -0
  67. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/execution-plan.md +83 -0
  68. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/pr-description.md +33 -0
  69. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/verification-strategy.md +101 -0
  70. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/timing.json +1 -0
  71. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/eval_metadata.json +32 -0
  72. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/grading.json +10 -0
  73. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/code-changes.md +387 -0
  74. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/execution-plan.md +112 -0
  75. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/pr-description.md +51 -0
  76. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/verification-strategy.md +75 -0
  77. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/timing.json +1 -0
  78. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/grading.json +10 -0
  79. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/code-changes.md +529 -0
  80. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/execution-plan.md +127 -0
  81. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/pr-description.md +42 -0
  82. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/verification-strategy.md +120 -0
  83. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/timing.json +1 -0
  84. package/.agents/skills/work-with-pr-workspace/iteration-1/review.html +1326 -0
  85. package/.opencode/command/get-unpublished-changes.md +148 -0
  86. package/.opencode/command/omomomo.md +37 -0
  87. package/.opencode/command/publish.md +376 -0
  88. package/.opencode/command/remove-deadcode.md +221 -0
  89. package/.opencode/command/security-research.md +16 -0
  90. package/.opencode/skills/github-triage/SKILL.md +587 -0
  91. package/.opencode/skills/github-triage/scripts/gh_fetch.py +398 -0
  92. package/.opencode/skills/hyperplan/SKILL.md +450 -0
  93. package/.opencode/skills/pre-publish-review/SKILL.md +407 -0
  94. package/.opencode/skills/work-with-pr/SKILL.md +360 -0
  95. package/.opencode/skills/work-with-pr-workspace/evals/evals.json +76 -0
  96. package/.opencode/skills/work-with-pr-workspace/iteration-1/benchmark.json +138 -0
  97. package/.opencode/skills/work-with-pr-workspace/iteration-1/benchmark.md +42 -0
  98. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/eval_metadata.json +57 -0
  99. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/grading.json +15 -0
  100. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/code-changes.md +454 -0
  101. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/execution-plan.md +136 -0
  102. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/pr-description.md +47 -0
  103. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/verification-strategy.md +163 -0
  104. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/timing.json +1 -0
  105. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/grading.json +15 -0
  106. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/code-changes.md +615 -0
  107. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/execution-plan.md +99 -0
  108. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/pr-description.md +50 -0
  109. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/verification-strategy.md +111 -0
  110. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/timing.json +1 -0
  111. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/eval_metadata.json +37 -0
  112. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/grading.json +11 -0
  113. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md +205 -0
  114. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/execution-plan.md +78 -0
  115. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/pr-description.md +42 -0
  116. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/verification-strategy.md +87 -0
  117. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/timing.json +1 -0
  118. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/grading.json +11 -0
  119. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/code-changes.md +334 -0
  120. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/execution-plan.md +86 -0
  121. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/pr-description.md +23 -0
  122. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/verification-strategy.md +119 -0
  123. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/timing.json +1 -0
  124. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/eval_metadata.json +32 -0
  125. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/grading.json +10 -0
  126. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/code-changes.md +221 -0
  127. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/execution-plan.md +104 -0
  128. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/pr-description.md +41 -0
  129. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/verification-strategy.md +84 -0
  130. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/timing.json +1 -0
  131. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/grading.json +10 -0
  132. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/code-changes.md +342 -0
  133. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/execution-plan.md +131 -0
  134. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/pr-description.md +39 -0
  135. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/verification-strategy.md +128 -0
  136. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/timing.json +1 -0
  137. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/eval_metadata.json +32 -0
  138. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/grading.json +10 -0
  139. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/code-changes.md +143 -0
  140. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/execution-plan.md +82 -0
  141. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/pr-description.md +51 -0
  142. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/verification-strategy.md +69 -0
  143. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/timing.json +1 -0
  144. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/grading.json +10 -0
  145. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/code-changes.md +252 -0
  146. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/execution-plan.md +83 -0
  147. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/pr-description.md +33 -0
  148. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/verification-strategy.md +101 -0
  149. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/timing.json +1 -0
  150. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/eval_metadata.json +32 -0
  151. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/grading.json +10 -0
  152. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/code-changes.md +387 -0
  153. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/execution-plan.md +112 -0
  154. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/pr-description.md +51 -0
  155. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/verification-strategy.md +75 -0
  156. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/timing.json +1 -0
  157. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/grading.json +10 -0
  158. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/code-changes.md +529 -0
  159. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/execution-plan.md +127 -0
  160. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/pr-description.md +42 -0
  161. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/verification-strategy.md +120 -0
  162. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/timing.json +1 -0
  163. package/.opencode/skills/work-with-pr-workspace/iteration-1/review.html +1326 -0
  164. package/README.ja.md +1 -1
  165. package/README.ko.md +1 -1
  166. package/README.md +1 -1
  167. package/README.ru.md +1 -1
  168. package/README.zh-cn.md +1 -1
  169. package/dist/agents/atlas/agent.d.ts +6 -6
  170. package/dist/agents/prometheus/gemini.d.ts +0 -11
  171. package/dist/agents/prometheus/gpt.d.ts +0 -10
  172. package/dist/agents/prometheus/system-prompt.d.ts +2 -20
  173. package/dist/agents/types.d.ts +1 -16
  174. package/dist/cli/index.js +60 -20
  175. package/dist/config/schema/agent-names.d.ts +3 -3
  176. package/dist/config/schema/agent-overrides.d.ts +208 -208
  177. package/dist/config/schema/categories.d.ts +28 -28
  178. package/dist/config/schema/fallback-models.d.ts +20 -20
  179. package/dist/config/schema/oh-my-opencode-config.d.ts +208 -208
  180. package/dist/features/background-agent/parent-wake-dedupe.d.ts +19 -0
  181. package/dist/features/background-agent/parent-wake-notifier.d.ts +8 -19
  182. package/dist/help/schema/acp.d.ts +95 -0
  183. package/dist/help/schema/doctor.d.ts +147 -0
  184. package/dist/help/schema/sandbox.d.ts +74 -0
  185. package/dist/help/schema/status.d.ts +139 -0
  186. package/dist/hooks/keyword-detector/analyze/default.d.ts +1 -1
  187. package/dist/hooks/keyword-detector/hyperplan/default.d.ts +1 -1
  188. package/dist/hooks/keyword-detector/search/default.d.ts +1 -1
  189. package/dist/hooks/keyword-detector/team/default.d.ts +2 -7
  190. package/dist/hooks/keyword-detector/ultrawork/default.d.ts +1 -9
  191. package/dist/hooks/keyword-detector/ultrawork/gemini.d.ts +1 -16
  192. package/dist/hooks/keyword-detector/ultrawork/gpt.d.ts +1 -10
  193. package/dist/hooks/keyword-detector/ultrawork/planner.d.ts +1 -5
  194. package/dist/hooks/ralph-loop/no-progress-turn-detector.d.ts +7 -0
  195. package/dist/hooks/ralph-loop/pending-verification-handler.d.ts +1 -0
  196. package/dist/hooks/ralph-loop/types.d.ts +1 -0
  197. package/dist/hooks/runtime-fallback/error-classifier.d.ts +1 -0
  198. package/dist/hooks/tool-pair-validator/hook.d.ts +6 -1
  199. package/dist/index.js +51976 -50299
  200. package/dist/plugin-handlers/provider-config-handler.d.ts +1 -0
  201. package/dist/shared/migration/model-versions.d.ts +6 -0
  202. package/dist/shared/prompt-async-gate/pending-tool-turn.d.ts +1 -0
  203. package/dist/shared/prompt-async-gate/types.d.ts +4 -3
  204. package/package.json +19 -13
  205. package/dist/agents/atlas/default-prompt-sections.d.ts +0 -6
  206. package/dist/agents/atlas/default.d.ts +0 -2
  207. package/dist/agents/atlas/gemini-prompt-sections.d.ts +0 -6
  208. package/dist/agents/atlas/gemini.d.ts +0 -2
  209. package/dist/agents/atlas/gpt-prompt-sections.d.ts +0 -6
  210. package/dist/agents/atlas/gpt.d.ts +0 -2
  211. package/dist/agents/atlas/kimi-prompt-sections.d.ts +0 -6
  212. package/dist/agents/atlas/kimi.d.ts +0 -2
  213. package/dist/agents/atlas/opus-4-7-prompt-sections.d.ts +0 -6
  214. package/dist/agents/atlas/opus-4-7.d.ts +0 -2
  215. package/dist/agents/atlas/shared-prompt.d.ts +0 -9
  216. package/dist/agents/prometheus/behavioral-summary.d.ts +0 -6
  217. package/dist/agents/prometheus/high-accuracy-mode.d.ts +0 -6
  218. package/dist/agents/prometheus/identity-constraints.d.ts +0 -7
  219. package/dist/agents/prometheus/interview-mode.d.ts +0 -7
  220. package/dist/agents/prometheus/plan-generation.d.ts +0 -7
  221. package/dist/agents/prometheus/plan-template.d.ts +0 -7
  222. package/dist/agents/prometheus/spec-driven-mode.d.ts +0 -7
@@ -0,0 +1,204 @@
1
+ ---
2
+ name: security-research
3
+ description: "Team Mode security research skill. Orchestrates 3 vulnerability hunters and 2 PoC engineers to audit a codebase in parallel, prove exploitability, classify root causes, and calibrate severity by actual exploitability. Use for security review, vulnerability research, exploitability audit, pre-release security check, threat model validation, and `/security-research`. Triggers: 'security-research', 'security research', 'security review', 'vulnerability audit', 'exploitability audit', '보안 리뷰', '취약점 감사'."
4
+ ---
5
+
6
+ # Security Research - Team Mode Vulnerability Audit
7
+
8
+ Use this skill to run a parallel security audit that separates real exploitability from generic concern. The team has 3 vulnerability hunters and 2 PoC engineers.
9
+
10
+ ## Hard Preconditions
11
+
12
+ Before starting, verify:
13
+
14
+ 1. `team_*` tools are available. If not, stop and tell the user:
15
+ `security-research requires team-mode. Set team_mode.enabled: true in your oh-my-openagent config, restart opencode, then retry.`
16
+ 2. You are in the main session, not a background subagent.
17
+ 3. You have a concrete target: repository, diff range, PR, release candidate, path list, or threat surface.
18
+
19
+ If the user provided no target, audit the current repository and current branch diff against its upstream or merge base. If there is no diff, audit the security-sensitive surfaces in the working tree.
20
+
21
+ ## Severity Standard
22
+
23
+ Use these references as the scoring frame:
24
+
25
+ - CWE for root-cause weakness classification: https://cwe.mitre.org/
26
+ - OWASP WSTG for test methodology: https://devguide.owasp.org/en/06-verification/01-guides/01-wstg/
27
+ - OWASP ASVS for control verification: https://owasp.org/www-project-application-security-verification-standard/
28
+ - CVSS v4.0 for exploitability and impact scoring: https://www.first.org/cvss/v4.0/specification-document
29
+
30
+ Rules:
31
+
32
+ - No severity without an attack path.
33
+ - No critical or high finding without concrete exploit preconditions and impact.
34
+ - Keep CWE category separate from severity.
35
+ - Prefer a small, reproducible PoC over theoretical language.
36
+ - Never run destructive exploits against real services or third-party systems.
37
+ - Use local fixtures, toy payloads, dry runs, or static proof when real execution would be unsafe.
38
+
39
+ ## Team Roster
40
+
41
+ Create one Team Mode run with these 5 members:
42
+
43
+ | Member | Kind | Category | Role |
44
+ |--------|------|----------|------|
45
+ | `surface-hunter` | category | `deep` | Map entry points, trust boundaries, and reachable attack surfaces. |
46
+ | `auth-data-hunter` | category | `ultrabrain` | Hunt auth, authorization, data isolation, injection, and secret handling flaws. |
47
+ | `runtime-supply-hunter` | category | `unspecified-high` | Hunt filesystem, subprocess, archive, dependency, hook, MCP, and config risks. |
48
+ | `poc-engineer-a` | category | `unspecified-high` | Build minimal PoCs for the strongest candidate findings. |
49
+ | `poc-engineer-b` | category | `deep` | Independently reproduce, falsify, or downgrade candidate findings. |
50
+
51
+ Call `team_create` with an inline spec:
52
+
53
+ ```typescript
54
+ team_create({
55
+ inline_spec: {
56
+ name: "security-research",
57
+ description: "Parallel exploitability-driven security research team.",
58
+ members: [
59
+ {
60
+ name: "surface-hunter",
61
+ kind: "category",
62
+ category: "deep",
63
+ prompt: "You map attack surface. Enumerate entry points, trust boundaries, attacker-controlled inputs, data sinks, privilege transitions, and sensitive assets. Return evidence with file paths and exact functions. Do not assign severity unless you can name an attack path."
64
+ },
65
+ {
66
+ name: "auth-data-hunter",
67
+ kind: "category",
68
+ category: "ultrabrain",
69
+ prompt: "You hunt auth, authorization, tenant/data isolation, injection, SSRF, credential exposure, and confused-deputy flaws. Reason from attacker capability to impact. Return only findings with concrete exploit preconditions, CWE candidates, and verification steps."
70
+ },
71
+ {
72
+ name: "runtime-supply-hunter",
73
+ kind: "category",
74
+ category: "unspecified-high",
75
+ prompt: "You hunt filesystem, subprocess, archive extraction, dependency, hook execution, MCP, config, and environment-variable risks. Check path traversal, command injection, unsafe downloads, permission boundaries, and supply-chain assumptions. Cite file paths and commands used."
76
+ },
77
+ {
78
+ name: "poc-engineer-a",
79
+ kind: "category",
80
+ category: "unspecified-high",
81
+ prompt: "You build minimal safe PoCs for candidate findings. Use toy inputs and local-only execution. Your job is to prove or disprove exploitability, not to broaden scope. Report exact reproduction steps and expected output."
82
+ },
83
+ {
84
+ name: "poc-engineer-b",
85
+ kind: "category",
86
+ category: "deep",
87
+ prompt: "You independently reproduce candidate findings and try to falsify them. Downgrade anything without a working path. If a PoC is unsafe to run, design a safe static or dry-run proof and explain the limit."
88
+ }
89
+ ]
90
+ }
91
+ })
92
+ ```
93
+
94
+ If a category is unavailable, retry once by replacing only that category with `unspecified-high`. Do not reduce the team below 5 members.
95
+
96
+ ## Workflow
97
+
98
+ ### Phase 0: Scope and Baseline
99
+
100
+ Collect:
101
+
102
+ - Target scope and reason for audit.
103
+ - Branch, base ref, diff, and changed files if this is a change review.
104
+ - Security-sensitive directories and files if this is a full-repo audit.
105
+ - Existing tests and commands that exercise relevant surfaces.
106
+ - Any user-stated constraints, such as no network calls or no destructive tests.
107
+
108
+ Use `rg`, `git diff`, `git log`, LSP, and existing tests before assigning work.
109
+
110
+ ### Phase 1: Independent Hunter Pass
111
+
112
+ Send one prompt to the 3 hunters:
113
+
114
+ ```text
115
+ Audit target:
116
+ {target summary}
117
+
118
+ Context:
119
+ {diff, file list, security-sensitive paths, known constraints}
120
+
121
+ Task:
122
+ Find candidate vulnerabilities in your assigned role. For each candidate include:
123
+ - title
124
+ - affected file/function
125
+ - attacker capability
126
+ - attack path
127
+ - impact
128
+ - CWE candidate
129
+ - exact evidence
130
+ - safe verification idea
131
+
132
+ Reject generic hardening advice. Return only candidates with a plausible path.
133
+ ```
134
+
135
+ Wait for all hunters.
136
+
137
+ ### Phase 2: PoC Pass
138
+
139
+ Deduplicate hunter candidates. Send the strongest candidates to both PoC engineers.
140
+
141
+ Each PoC engineer must return:
142
+
143
+ - Reproduced, falsified, or unsafe-to-run.
144
+ - Exact commands, fixtures, or static proof.
145
+ - Observed output or reason it fails.
146
+ - Severity recommendation using exploitability and impact.
147
+ - Downgrade rationale for anything not reproduced.
148
+
149
+ ### Phase 3: Cross-Check
150
+
151
+ Send the PoC results back to all 5 members.
152
+
153
+ Ask every member:
154
+
155
+ - Which findings survive?
156
+ - Which findings should be downgraded or removed?
157
+ - What remediation is smallest and specific?
158
+ - What regression test would prevent recurrence?
159
+
160
+ ### Phase 4: Final Report
161
+
162
+ Produce this report:
163
+
164
+ ```markdown
165
+ ## Security Research Result
166
+
167
+ ### Verdict
168
+ PASS | PASS WITH FINDINGS | BLOCK
169
+
170
+ ### Scope
171
+ - Target:
172
+ - Base/diff:
173
+ - Commands run:
174
+
175
+ ### Findings
176
+ | Severity | Title | CWE | Exploitability | Impact | PoC | Fix |
177
+ |----------|-------|-----|----------------|--------|-----|-----|
178
+
179
+ ### Finding Details
180
+ For each finding:
181
+ - Evidence:
182
+ - Attack path:
183
+ - PoC:
184
+ - Severity rationale:
185
+ - Minimal fix:
186
+ - Regression check:
187
+
188
+ ### Downgraded or Rejected Candidates
189
+ | Candidate | Reason |
190
+ |-----------|--------|
191
+
192
+ ### Residual Risk
193
+ - What was not tested and why.
194
+ ```
195
+
196
+ ## Output Rules
197
+
198
+ - Lead with the verdict.
199
+ - Do not bury blocking issues.
200
+ - Do not report speculative findings as vulnerabilities.
201
+ - Do not claim CVSS precision unless you actually scored the metrics.
202
+ - Include exact file paths and commands for every surviving finding.
203
+ - If no findings survive PoC, say that plainly and list residual risk.
204
+
@@ -0,0 +1,360 @@
1
+ ---
2
+ name: work-with-pr
3
+ description: "Full PR lifecycle: git worktree → implement → atomic commits → PR creation → verification loop (CI + review-work + Cubic approval) → merge. Keeps iterating until ALL gates pass and PR is merged. Worktree auto-cleanup after merge. Use whenever implementation work needs to land as a PR. Triggers: 'create a PR', 'implement and PR', 'work on this and make a PR', 'implement issue', 'land this as a PR', 'work-with-pr', 'PR workflow', 'implement end to end', even when user just says 'implement X' if the context implies PR delivery."
4
+ ---
5
+
6
+ # Work With PR — Full PR Lifecycle
7
+
8
+ You are executing a complete PR lifecycle: from isolated worktree setup through implementation, PR creation, and an unbounded verification loop until the PR is merged. The loop has three gates — CI, review-work, and Cubic — and you keep fixing and pushing until all three pass simultaneously.
9
+
10
+ <architecture>
11
+
12
+ ```
13
+ Phase 0: Setup → Branch + worktree in sibling directory
14
+ Phase 1: Implement → Do the work, atomic commits
15
+ Phase 2: PR Creation → Push, create PR targeting dev
16
+ Phase 3: Verify Loop → Unbounded iteration until ALL gates pass:
17
+ ├─ Gate A: CI → gh pr checks (bun test, typecheck, build)
18
+ ├─ Gate B: review-work → 5-agent parallel review
19
+ └─ Gate C: Cubic → cubic-dev-ai[bot] "No issues found"
20
+ Phase 4: Merge → Merge commit, worktree cleanup
21
+ ```
22
+
23
+ </architecture>
24
+
25
+ ---
26
+
27
+ ## Phase 0: Setup
28
+
29
+ Create an isolated worktree so the user's main working directory stays clean. This matters because the user may have uncommitted work, and checking out a branch would destroy it.
30
+
31
+ <setup>
32
+
33
+ ### 1. Resolve repository context
34
+
35
+ ```bash
36
+ REPO=$(gh repo view --json nameWithOwner -q .nameWithOwner)
37
+ REPO_NAME=$(basename "$PWD")
38
+ BASE_BRANCH="dev" # CI blocks PRs to master
39
+ ```
40
+
41
+ ### 2. Create branch
42
+
43
+ If user provides a branch name, use it. Otherwise, derive from the task:
44
+
45
+ ```bash
46
+ # Auto-generate: feature/short-description or fix/short-description
47
+ BRANCH_NAME="feature/$(echo "$TASK_SUMMARY" | tr '[:upper:] ' '[:lower:]-' | head -c 50)"
48
+ git fetch origin "$BASE_BRANCH"
49
+ git branch "$BRANCH_NAME" "origin/$BASE_BRANCH"
50
+ ```
51
+
52
+ ### 3. Create worktree
53
+
54
+ Place worktrees as siblings to the repo — not inside it. This avoids git nested repo issues and keeps the working tree clean.
55
+
56
+ ```bash
57
+ WORKTREE_PATH="../${REPO_NAME}-wt/${BRANCH_NAME}"
58
+ mkdir -p "$(dirname "$WORKTREE_PATH")"
59
+ git worktree add "$WORKTREE_PATH" "$BRANCH_NAME"
60
+ ```
61
+
62
+ ### 4. Set working context
63
+
64
+ All subsequent work happens inside the worktree. Install dependencies if needed:
65
+
66
+ ```bash
67
+ cd "$WORKTREE_PATH"
68
+ # If bun project:
69
+ [ -f "bun.lock" ] && bun install
70
+ ```
71
+
72
+ </setup>
73
+
74
+ ---
75
+
76
+ ## Phase 1: Implement
77
+
78
+ Do the actual implementation work inside the worktree. The agent using this skill does the work directly — no subagent delegation for the implementation itself.
79
+
80
+ **Scope discipline**: For bug fixes, stay minimal. Fix the bug, add a test for it, done. Do not refactor surrounding code, add config options, or "improve" things that aren't broken. The verification loop will catch regressions — trust the process.
81
+
82
+ <implementation>
83
+
84
+ ### Commit strategy
85
+
86
+ Use the git-master skill's atomic commit principles. The reason for atomic commits: if CI fails on one change, you can isolate and fix it without unwinding everything.
87
+
88
+ ```
89
+ 3+ files changed → 2+ commits minimum
90
+ 5+ files changed → 3+ commits minimum
91
+ 10+ files changed → 5+ commits minimum
92
+ ```
93
+
94
+ Each commit should pair implementation with its tests. Load `git-master` skill when committing:
95
+
96
+ ```
97
+ task(category="quick", load_skills=["git-master"], prompt="Commit the changes atomically following git-master conventions. Repository is at {WORKTREE_PATH}.")
98
+ ```
99
+
100
+ ### Pre-push local validation
101
+
102
+ Before pushing, run the same checks CI will run. Catching failures locally saves a full CI round-trip (~3-5 min):
103
+
104
+ ```bash
105
+ bun run typecheck
106
+ bun test
107
+ bun run build
108
+ ```
109
+
110
+ Fix any failures before pushing. Each fix-commit cycle should be atomic.
111
+
112
+ </implementation>
113
+
114
+ ---
115
+
116
+ ## Phase 2: PR Creation
117
+
118
+ <pr_creation>
119
+
120
+ ### Push and create PR
121
+
122
+ ```bash
123
+ git push -u origin "$BRANCH_NAME"
124
+ ```
125
+
126
+ Create the PR using the project's template structure:
127
+
128
+ ```bash
129
+ gh pr create \
130
+ --base "$BASE_BRANCH" \
131
+ --head "$BRANCH_NAME" \
132
+ --title "$PR_TITLE" \
133
+ --body "$(cat <<'EOF'
134
+ ## Summary
135
+ [1-3 sentences describing what this PR does and why]
136
+
137
+ ## Changes
138
+ [Bullet list of key changes]
139
+
140
+ ## Testing
141
+ - `bun run typecheck` ✅
142
+ - `bun test` ✅
143
+ - `bun run build` ✅
144
+
145
+ ## Related Issues
146
+ [Link to issue if applicable]
147
+ EOF
148
+ )"
149
+ ```
150
+
151
+ Capture the PR number:
152
+
153
+ ```bash
154
+ PR_NUMBER=$(gh pr view --json number -q .number)
155
+ ```
156
+
157
+ </pr_creation>
158
+
159
+ ---
160
+
161
+ ## Phase 3: Verification Loop
162
+
163
+ This is the core of the skill. Three gates must ALL pass for the PR to be ready. The loop has no iteration cap — keep going until done. Gate ordering is intentional: CI is cheapest/fastest, review-work is most thorough, Cubic is external and asynchronous.
164
+
165
+ <verify_loop>
166
+
167
+ ```
168
+ while true:
169
+ 1. Wait for CI → Gate A
170
+ 2. If CI fails → read logs, fix, commit, push, continue
171
+ 3. Run review-work → Gate B
172
+ 4. If review fails → fix blocking issues, commit, push, continue
173
+ 5. Check Cubic → Gate C
174
+ 6. If Cubic has issues → fix issues, commit, push, continue
175
+ 7. All three pass → break
176
+ ```
177
+
178
+ ### Gate A: CI Checks
179
+
180
+ CI is the fastest feedback loop. Wait for it to complete, then parse results.
181
+
182
+ ```bash
183
+ # Wait for checks to start (GitHub needs a moment after push)
184
+ # Then watch for completion
185
+ gh pr checks "$PR_NUMBER" --watch --fail-fast
186
+ ```
187
+
188
+ **On failure**: Get the failed run logs to understand what broke:
189
+
190
+ ```bash
191
+ # Find the failed run
192
+ RUN_ID=$(gh run list --branch "$BRANCH_NAME" --status failure --json databaseId --jq '.[0].databaseId')
193
+
194
+ # Get failed job logs
195
+ gh run view "$RUN_ID" --log-failed
196
+ ```
197
+
198
+ Read the logs, fix the issue, commit atomically, push, and re-enter the loop.
199
+
200
+ ### Gate B: review-work
201
+
202
+ The review-work skill launches 5 parallel sub-agents (goal verification, QA, code quality, security, context mining). All 5 must pass.
203
+
204
+ Invoke review-work after CI passes — there's no point reviewing code that doesn't build:
205
+
206
+ ```
207
+ task(
208
+ category="unspecified-high",
209
+ load_skills=["review-work"],
210
+ run_in_background=false,
211
+ description="Post-implementation review of PR changes",
212
+ prompt="Review the implementation work on branch {BRANCH_NAME}. The worktree is at {WORKTREE_PATH}. Goal: {ORIGINAL_GOAL}. Constraints: {CONSTRAINTS}. Run command: bun run dev (or as appropriate)."
213
+ )
214
+ ```
215
+
216
+ **On failure**: review-work reports blocking issues with specific files and line numbers. Fix each blocking issue, commit, push, and re-enter the loop from Gate A (since code changed, CI must re-run).
217
+
218
+ ### Gate C: Cubic Approval
219
+
220
+ Cubic (`cubic-dev-ai[bot]`) is an automated review bot that comments on PRs. It does NOT use GitHub's APPROVED review state — instead it posts comments with issue counts and confidence scores.
221
+
222
+ **Approval signal**: The latest Cubic comment contains `**No issues found**` and confidence `**5/5**`.
223
+
224
+ **Issue signal**: The comment lists issues with file-level detail.
225
+
226
+ ```bash
227
+ # Get the latest Cubic review
228
+ CUBIC_REVIEW=$(gh api "repos/${REPO}/pulls/${PR_NUMBER}/reviews" \
229
+ --jq '[.[] | select(.user.login == "cubic-dev-ai[bot]")] | last | .body')
230
+
231
+ # Check if approved
232
+ if echo "$CUBIC_REVIEW" | grep -q "No issues found"; then
233
+ echo "Cubic: APPROVED"
234
+ else
235
+ echo "Cubic: ISSUES FOUND"
236
+ echo "$CUBIC_REVIEW"
237
+ fi
238
+ ```
239
+
240
+ **On issues**: Cubic's review body contains structured issue descriptions. Parse them, determine which are valid (some may be false positives), fix the valid ones, commit, push, re-enter from Gate A.
241
+
242
+ Cubic reviews are triggered automatically on PR updates. After pushing a fix, wait for the new review to appear before checking again. Use `gh api` polling with a conditional loop:
243
+
244
+ ```bash
245
+ # Wait for new Cubic review after push
246
+ PUSH_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ)
247
+ while true; do
248
+ LATEST_REVIEW_TIME=$(gh api "repos/${REPO}/pulls/${PR_NUMBER}/reviews" \
249
+ --jq '[.[] | select(.user.login == "cubic-dev-ai[bot]")] | last | .submitted_at')
250
+ if [[ "$LATEST_REVIEW_TIME" > "$PUSH_TIME" ]]; then
251
+ break
252
+ fi
253
+ # Use gh api call itself as the delay mechanism — each call takes ~1-2s
254
+ # For longer waits, use: timeout 30 gh pr checks "$PR_NUMBER" --watch 2>/dev/null || true
255
+ done
256
+ ```
257
+
258
+ ### Iteration discipline
259
+
260
+ Each iteration through the loop:
261
+ 1. Fix ONLY the issues identified by the failing gate
262
+ 2. Commit atomically (one logical fix per commit)
263
+ 3. Push
264
+ 4. Re-enter from Gate A (code changed → full re-verification)
265
+
266
+ Avoid the temptation to "improve" unrelated code during fix iterations. Scope creep in the fix loop makes debugging harder and can introduce new failures.
267
+
268
+ </verify_loop>
269
+
270
+ ---
271
+
272
+ ## Phase 4: Merge & Cleanup
273
+
274
+ Once all three gates pass:
275
+
276
+ <merge_cleanup>
277
+
278
+ ### Merge the PR
279
+
280
+ ```bash
281
+ # This repository requires merge commits. Never use --squash or --rebase here.
282
+ gh pr merge "$PR_NUMBER" --merge --delete-branch
283
+ ```
284
+
285
+ ### Sync .omo state back to main repo
286
+
287
+ Before removing the worktree, copy `.omo/` state back. When `.omo/` is gitignored, files written there during worktree execution are not committed or merged — they would be lost on worktree removal.
288
+
289
+ ```bash
290
+ # Sync .omo state from worktree to main repo (preserves task state, plans, notepads)
291
+ if [ -d "$WORKTREE_PATH/.omo" ]; then
292
+ mkdir -p "$ORIGINAL_DIR/.omo"
293
+ cp -r "$WORKTREE_PATH/.omo/"* "$ORIGINAL_DIR/.omo/" 2>/dev/null || true
294
+ fi
295
+ ```
296
+
297
+ ### Clean up the worktree
298
+
299
+ The worktree served its purpose — remove it to avoid disk bloat:
300
+
301
+ ```bash
302
+ cd "$ORIGINAL_DIR" # Return to original working directory
303
+ git worktree remove "$WORKTREE_PATH"
304
+ # Prune any stale worktree references
305
+ git worktree prune
306
+ ```
307
+
308
+ ### Report completion
309
+
310
+ Summarize what happened:
311
+
312
+ ```
313
+ ## PR Merged ✅
314
+
315
+ - **PR**: #{PR_NUMBER} — {PR_TITLE}
316
+ - **Branch**: {BRANCH_NAME} → {BASE_BRANCH}
317
+ - **Iterations**: {N} verification loops
318
+ - **Gates passed**: CI ✅ | review-work ✅ | Cubic ✅
319
+ - **Worktree**: cleaned up
320
+ ```
321
+
322
+ </merge_cleanup>
323
+
324
+ ---
325
+
326
+ ## Failure Recovery
327
+
328
+ <failure_recovery>
329
+
330
+ If you hit an unrecoverable error (e.g., merge conflict with base branch, infrastructure failure):
331
+
332
+ 1. **Do NOT delete the worktree** — the user may want to inspect or continue manually
333
+ 2. Report what happened, what was attempted, and where things stand
334
+ 3. Include the worktree path so the user can resume
335
+
336
+ For merge conflicts:
337
+
338
+ ```bash
339
+ cd "$WORKTREE_PATH"
340
+ git fetch origin "$BASE_BRANCH"
341
+ git rebase "origin/$BASE_BRANCH"
342
+ # Resolve conflicts, then continue the loop
343
+ ```
344
+
345
+ </failure_recovery>
346
+
347
+ ---
348
+
349
+ ## Anti-Patterns
350
+
351
+ | Violation | Why it fails | Severity |
352
+ |-----------|-------------|----------|
353
+ | Working in main worktree instead of isolated worktree | Pollutes user's working directory, may destroy uncommitted work | CRITICAL |
354
+ | Pushing directly to dev/master | Bypasses review entirely | CRITICAL |
355
+ | Skipping CI gate after code changes | review-work and Cubic may pass on stale code | CRITICAL |
356
+ | Fixing unrelated code during verification loop | Scope creep causes new failures | HIGH |
357
+ | Deleting worktree on failure | User loses ability to inspect/resume | HIGH |
358
+ | Ignoring Cubic false positives without justification | Cubic issues should be evaluated, not blindly dismissed | MEDIUM |
359
+ | Giant single commits | Harder to isolate failures, violates git-master principles | MEDIUM |
360
+ | Not running local checks before push | Wastes CI time on obvious failures | MEDIUM |
@@ -0,0 +1,76 @@
1
+ {
2
+ "skill_name": "work-with-pr",
3
+ "evals": [
4
+ {
5
+ "id": 1,
6
+ "prompt": "I need to add a `max_background_agents` config option to oh-my-opencode that limits how many background agents can run simultaneously. It should be in the plugin config schema with a default of 5. Add validation and make sure the background manager respects it. Create a PR for this.",
7
+ "expected_output": "Agent creates worktree, implements config option with schema validation, adds tests, creates PR, iterates through verification gates until merged",
8
+ "files": [],
9
+ "assertions": [
10
+ {"id": "worktree-isolation", "text": "Plan uses git worktree in a sibling directory (not main working directory)"},
11
+ {"id": "branch-from-dev", "text": "Branch is created from origin/dev (not master/main)"},
12
+ {"id": "atomic-commits", "text": "Plan specifies multiple atomic commits for multi-file changes"},
13
+ {"id": "local-validation", "text": "Runs bun run typecheck, bun test, and bun run build before pushing"},
14
+ {"id": "pr-targets-dev", "text": "PR is created targeting dev branch (not master)"},
15
+ {"id": "three-gates", "text": "Verification loop includes all 3 gates: CI, review-work, and Cubic"},
16
+ {"id": "gate-ordering", "text": "Gates are checked in order: CI first, then review-work, then Cubic"},
17
+ {"id": "cubic-check-method", "text": "Cubic check uses gh api to check cubic-dev-ai[bot] reviews for 'No issues found'"},
18
+ {"id": "worktree-cleanup", "text": "Plan includes worktree cleanup after merge"},
19
+ {"id": "real-file-references", "text": "Code changes reference actual files in the codebase (config schema, background manager)"}
20
+ ]
21
+ },
22
+ {
23
+ "id": 2,
24
+ "prompt": "The atlas hook has a bug where it crashes when boulder.json is missing the worktree_path field. Fix it and land the fix as a PR. Make sure CI passes.",
25
+ "expected_output": "Agent creates worktree for the fix branch, adds null check and test for missing worktree_path, creates PR, iterates verification loop",
26
+ "files": [],
27
+ "assertions": [
28
+ {"id": "worktree-isolation", "text": "Plan uses git worktree in a sibling directory"},
29
+ {"id": "minimal-fix", "text": "Fix is minimal — adds null check, doesn't refactor unrelated code"},
30
+ {"id": "test-added", "text": "Test case added for the missing worktree_path scenario"},
31
+ {"id": "three-gates", "text": "Verification loop includes all 3 gates: CI, review-work, Cubic"},
32
+ {"id": "real-atlas-files", "text": "References actual atlas hook files in src/hooks/atlas/"},
33
+ {"id": "fix-branch-naming", "text": "Branch name follows fix/ prefix convention"}
34
+ ]
35
+ },
36
+ {
37
+ "id": 3,
38
+ "prompt": "Refactor src/tools/delegate-task/constants.ts to split DEFAULT_CATEGORIES and CATEGORY_MODEL_REQUIREMENTS into separate files. Keep backward compatibility with the barrel export. Make a PR.",
39
+ "expected_output": "Agent creates worktree, splits file with atomic commits, ensures imports still work via barrel, creates PR, runs through all gates",
40
+ "files": [],
41
+ "assertions": [
42
+ {"id": "worktree-isolation", "text": "Plan uses git worktree in a sibling directory"},
43
+ {"id": "multiple-atomic-commits", "text": "Uses 2+ commits for the multi-file refactor"},
44
+ {"id": "barrel-export", "text": "Maintains backward compatibility via barrel re-export in constants.ts or index.ts"},
45
+ {"id": "three-gates", "text": "Verification loop includes all 3 gates"},
46
+ {"id": "real-constants-file", "text": "References actual src/tools/delegate-task/constants.ts file and its exports"}
47
+ ]
48
+ },
49
+ {
50
+ "id": 4,
51
+ "prompt": "implement issue #100 - we need to add a new built-in MCP for arxiv paper search. just the basic search endpoint, nothing fancy. pr it",
52
+ "expected_output": "Agent creates worktree, implements arxiv MCP following existing MCP patterns (websearch, context7, grep_app), creates PR with proper template, verification loop runs",
53
+ "files": [],
54
+ "assertions": [
55
+ {"id": "worktree-isolation", "text": "Plan uses git worktree in a sibling directory"},
56
+ {"id": "follows-mcp-pattern", "text": "New MCP follows existing pattern from src/mcp/ (websearch, context7, grep_app)"},
57
+ {"id": "three-gates", "text": "Verification loop includes all 3 gates"},
58
+ {"id": "pr-targets-dev", "text": "PR targets dev branch"},
59
+ {"id": "local-validation", "text": "Runs local checks before pushing"}
60
+ ]
61
+ },
62
+ {
63
+ "id": 5,
64
+ "prompt": "The comment-checker hook is too aggressive - it's flagging legitimate comments that happen to contain 'Note:' as AI slop. Relax the regex pattern and add test cases for the false positives. Work on a separate branch and make a PR.",
65
+ "expected_output": "Agent creates worktree, fixes regex, adds specific test cases for false positive scenarios, creates PR, all three gates pass",
66
+ "files": [],
67
+ "assertions": [
68
+ {"id": "worktree-isolation", "text": "Plan uses git worktree in a sibling directory"},
69
+ {"id": "real-comment-checker-files", "text": "References actual comment-checker hook files in the codebase"},
70
+ {"id": "regression-tests", "text": "Adds test cases specifically for 'Note:' false positive scenarios"},
71
+ {"id": "three-gates", "text": "Verification loop includes all 3 gates"},
72
+ {"id": "minimal-change", "text": "Only modifies regex and adds tests — no unrelated changes"}
73
+ ]
74
+ }
75
+ ]
76
+ }