oh-my-opencode 4.3.1 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/.agents/command/get-unpublished-changes.md +148 -0
  2. package/.agents/command/omomomo.md +37 -0
  3. package/.agents/command/publish.md +376 -0
  4. package/.agents/command/remove-deadcode.md +221 -0
  5. package/.agents/command/security-research.md +16 -0
  6. package/.agents/skills/get-unpublished-changes/SKILL.md +24 -0
  7. package/.agents/skills/github-triage/SKILL.md +587 -0
  8. package/.agents/skills/github-triage/scripts/gh_fetch.py +398 -0
  9. package/.agents/skills/hyperplan/SKILL.md +450 -0
  10. package/.agents/skills/omomomo/SKILL.md +36 -0
  11. package/.agents/skills/pre-publish-review/SKILL.md +407 -0
  12. package/.agents/skills/publish/SKILL.md +428 -0
  13. package/.agents/skills/remove-deadcode/SKILL.md +216 -0
  14. package/.agents/skills/security-research/SKILL.md +204 -0
  15. package/.agents/skills/work-with-pr/SKILL.md +360 -0
  16. package/.agents/skills/work-with-pr-workspace/evals/evals.json +76 -0
  17. package/.agents/skills/work-with-pr-workspace/iteration-1/benchmark.json +138 -0
  18. package/.agents/skills/work-with-pr-workspace/iteration-1/benchmark.md +42 -0
  19. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/eval_metadata.json +57 -0
  20. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/grading.json +15 -0
  21. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/code-changes.md +454 -0
  22. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/execution-plan.md +136 -0
  23. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/pr-description.md +47 -0
  24. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/verification-strategy.md +163 -0
  25. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/timing.json +1 -0
  26. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/grading.json +15 -0
  27. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/code-changes.md +615 -0
  28. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/execution-plan.md +99 -0
  29. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/pr-description.md +50 -0
  30. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/verification-strategy.md +111 -0
  31. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/timing.json +1 -0
  32. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/eval_metadata.json +37 -0
  33. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/grading.json +11 -0
  34. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md +205 -0
  35. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/execution-plan.md +78 -0
  36. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/pr-description.md +42 -0
  37. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/verification-strategy.md +87 -0
  38. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/timing.json +1 -0
  39. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/grading.json +11 -0
  40. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/code-changes.md +334 -0
  41. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/execution-plan.md +86 -0
  42. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/pr-description.md +23 -0
  43. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/verification-strategy.md +119 -0
  44. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/timing.json +1 -0
  45. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/eval_metadata.json +32 -0
  46. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/grading.json +10 -0
  47. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/code-changes.md +221 -0
  48. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/execution-plan.md +104 -0
  49. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/pr-description.md +41 -0
  50. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/verification-strategy.md +84 -0
  51. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/timing.json +1 -0
  52. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/grading.json +10 -0
  53. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/code-changes.md +342 -0
  54. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/execution-plan.md +131 -0
  55. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/pr-description.md +39 -0
  56. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/verification-strategy.md +128 -0
  57. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/timing.json +1 -0
  58. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/eval_metadata.json +32 -0
  59. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/grading.json +10 -0
  60. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/code-changes.md +143 -0
  61. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/execution-plan.md +82 -0
  62. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/pr-description.md +51 -0
  63. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/verification-strategy.md +69 -0
  64. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/timing.json +1 -0
  65. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/grading.json +10 -0
  66. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/code-changes.md +252 -0
  67. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/execution-plan.md +83 -0
  68. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/pr-description.md +33 -0
  69. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/verification-strategy.md +101 -0
  70. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/timing.json +1 -0
  71. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/eval_metadata.json +32 -0
  72. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/grading.json +10 -0
  73. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/code-changes.md +387 -0
  74. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/execution-plan.md +112 -0
  75. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/pr-description.md +51 -0
  76. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/verification-strategy.md +75 -0
  77. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/timing.json +1 -0
  78. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/grading.json +10 -0
  79. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/code-changes.md +529 -0
  80. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/execution-plan.md +127 -0
  81. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/pr-description.md +42 -0
  82. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/verification-strategy.md +120 -0
  83. package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/timing.json +1 -0
  84. package/.agents/skills/work-with-pr-workspace/iteration-1/review.html +1326 -0
  85. package/.opencode/command/get-unpublished-changes.md +148 -0
  86. package/.opencode/command/omomomo.md +37 -0
  87. package/.opencode/command/publish.md +376 -0
  88. package/.opencode/command/remove-deadcode.md +221 -0
  89. package/.opencode/command/security-research.md +16 -0
  90. package/.opencode/skills/github-triage/SKILL.md +587 -0
  91. package/.opencode/skills/github-triage/scripts/gh_fetch.py +398 -0
  92. package/.opencode/skills/hyperplan/SKILL.md +450 -0
  93. package/.opencode/skills/pre-publish-review/SKILL.md +407 -0
  94. package/.opencode/skills/work-with-pr/SKILL.md +360 -0
  95. package/.opencode/skills/work-with-pr-workspace/evals/evals.json +76 -0
  96. package/.opencode/skills/work-with-pr-workspace/iteration-1/benchmark.json +138 -0
  97. package/.opencode/skills/work-with-pr-workspace/iteration-1/benchmark.md +42 -0
  98. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/eval_metadata.json +57 -0
  99. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/grading.json +15 -0
  100. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/code-changes.md +454 -0
  101. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/execution-plan.md +136 -0
  102. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/pr-description.md +47 -0
  103. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/verification-strategy.md +163 -0
  104. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/timing.json +1 -0
  105. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/grading.json +15 -0
  106. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/code-changes.md +615 -0
  107. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/execution-plan.md +99 -0
  108. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/pr-description.md +50 -0
  109. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/verification-strategy.md +111 -0
  110. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/timing.json +1 -0
  111. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/eval_metadata.json +37 -0
  112. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/grading.json +11 -0
  113. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md +205 -0
  114. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/execution-plan.md +78 -0
  115. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/pr-description.md +42 -0
  116. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/verification-strategy.md +87 -0
  117. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/timing.json +1 -0
  118. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/grading.json +11 -0
  119. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/code-changes.md +334 -0
  120. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/execution-plan.md +86 -0
  121. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/pr-description.md +23 -0
  122. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/verification-strategy.md +119 -0
  123. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/timing.json +1 -0
  124. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/eval_metadata.json +32 -0
  125. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/grading.json +10 -0
  126. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/code-changes.md +221 -0
  127. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/execution-plan.md +104 -0
  128. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/pr-description.md +41 -0
  129. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/verification-strategy.md +84 -0
  130. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/timing.json +1 -0
  131. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/grading.json +10 -0
  132. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/code-changes.md +342 -0
  133. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/execution-plan.md +131 -0
  134. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/pr-description.md +39 -0
  135. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/verification-strategy.md +128 -0
  136. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/timing.json +1 -0
  137. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/eval_metadata.json +32 -0
  138. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/grading.json +10 -0
  139. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/code-changes.md +143 -0
  140. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/execution-plan.md +82 -0
  141. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/pr-description.md +51 -0
  142. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/verification-strategy.md +69 -0
  143. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/timing.json +1 -0
  144. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/grading.json +10 -0
  145. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/code-changes.md +252 -0
  146. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/execution-plan.md +83 -0
  147. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/pr-description.md +33 -0
  148. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/verification-strategy.md +101 -0
  149. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/timing.json +1 -0
  150. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/eval_metadata.json +32 -0
  151. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/grading.json +10 -0
  152. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/code-changes.md +387 -0
  153. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/execution-plan.md +112 -0
  154. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/pr-description.md +51 -0
  155. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/verification-strategy.md +75 -0
  156. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/timing.json +1 -0
  157. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/grading.json +10 -0
  158. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/code-changes.md +529 -0
  159. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/execution-plan.md +127 -0
  160. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/pr-description.md +42 -0
  161. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/verification-strategy.md +120 -0
  162. package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/timing.json +1 -0
  163. package/.opencode/skills/work-with-pr-workspace/iteration-1/review.html +1326 -0
  164. package/README.ja.md +1 -1
  165. package/README.ko.md +1 -1
  166. package/README.md +1 -1
  167. package/README.ru.md +1 -1
  168. package/README.zh-cn.md +1 -1
  169. package/dist/agents/atlas/agent.d.ts +6 -6
  170. package/dist/agents/prometheus/gemini.d.ts +0 -11
  171. package/dist/agents/prometheus/gpt.d.ts +0 -10
  172. package/dist/agents/prometheus/system-prompt.d.ts +2 -20
  173. package/dist/agents/types.d.ts +1 -16
  174. package/dist/cli/index.js +60 -20
  175. package/dist/config/schema/agent-names.d.ts +3 -3
  176. package/dist/config/schema/agent-overrides.d.ts +208 -208
  177. package/dist/config/schema/categories.d.ts +28 -28
  178. package/dist/config/schema/fallback-models.d.ts +20 -20
  179. package/dist/config/schema/oh-my-opencode-config.d.ts +208 -208
  180. package/dist/features/background-agent/parent-wake-dedupe.d.ts +19 -0
  181. package/dist/features/background-agent/parent-wake-notifier.d.ts +8 -19
  182. package/dist/help/schema/acp.d.ts +95 -0
  183. package/dist/help/schema/doctor.d.ts +147 -0
  184. package/dist/help/schema/sandbox.d.ts +74 -0
  185. package/dist/help/schema/status.d.ts +139 -0
  186. package/dist/hooks/keyword-detector/analyze/default.d.ts +1 -1
  187. package/dist/hooks/keyword-detector/hyperplan/default.d.ts +1 -1
  188. package/dist/hooks/keyword-detector/search/default.d.ts +1 -1
  189. package/dist/hooks/keyword-detector/team/default.d.ts +2 -7
  190. package/dist/hooks/keyword-detector/ultrawork/default.d.ts +1 -9
  191. package/dist/hooks/keyword-detector/ultrawork/gemini.d.ts +1 -16
  192. package/dist/hooks/keyword-detector/ultrawork/gpt.d.ts +1 -10
  193. package/dist/hooks/keyword-detector/ultrawork/planner.d.ts +1 -5
  194. package/dist/hooks/ralph-loop/no-progress-turn-detector.d.ts +7 -0
  195. package/dist/hooks/ralph-loop/pending-verification-handler.d.ts +1 -0
  196. package/dist/hooks/ralph-loop/types.d.ts +1 -0
  197. package/dist/hooks/runtime-fallback/error-classifier.d.ts +1 -0
  198. package/dist/hooks/tool-pair-validator/hook.d.ts +6 -1
  199. package/dist/index.js +51976 -50299
  200. package/dist/plugin-handlers/provider-config-handler.d.ts +1 -0
  201. package/dist/shared/migration/model-versions.d.ts +6 -0
  202. package/dist/shared/prompt-async-gate/pending-tool-turn.d.ts +1 -0
  203. package/dist/shared/prompt-async-gate/types.d.ts +4 -3
  204. package/package.json +19 -13
  205. package/dist/agents/atlas/default-prompt-sections.d.ts +0 -6
  206. package/dist/agents/atlas/default.d.ts +0 -2
  207. package/dist/agents/atlas/gemini-prompt-sections.d.ts +0 -6
  208. package/dist/agents/atlas/gemini.d.ts +0 -2
  209. package/dist/agents/atlas/gpt-prompt-sections.d.ts +0 -6
  210. package/dist/agents/atlas/gpt.d.ts +0 -2
  211. package/dist/agents/atlas/kimi-prompt-sections.d.ts +0 -6
  212. package/dist/agents/atlas/kimi.d.ts +0 -2
  213. package/dist/agents/atlas/opus-4-7-prompt-sections.d.ts +0 -6
  214. package/dist/agents/atlas/opus-4-7.d.ts +0 -2
  215. package/dist/agents/atlas/shared-prompt.d.ts +0 -9
  216. package/dist/agents/prometheus/behavioral-summary.d.ts +0 -6
  217. package/dist/agents/prometheus/high-accuracy-mode.d.ts +0 -6
  218. package/dist/agents/prometheus/identity-constraints.d.ts +0 -7
  219. package/dist/agents/prometheus/interview-mode.d.ts +0 -7
  220. package/dist/agents/prometheus/plan-generation.d.ts +0 -7
  221. package/dist/agents/prometheus/plan-template.d.ts +0 -7
  222. package/dist/agents/prometheus/spec-driven-mode.d.ts +0 -7
@@ -0,0 +1,50 @@
1
+ # PR Description
2
+
3
+ **Title:** feat: add `maxBackgroundAgents` config to limit total simultaneous background agents
4
+
5
+ **Body:**
6
+
7
+ ## Summary
8
+
9
+ - Add `maxBackgroundAgents` field to `BackgroundTaskConfigSchema` that enforces a global ceiling on total running background agents across all models/providers
10
+ - Modify `ConcurrencyManager` to track global count and enforce the limit alongside existing per-model limits
11
+ - Add schema validation tests and concurrency enforcement tests
12
+
13
+ ## Motivation
14
+
15
+ Currently, concurrency is only limited per model/provider key (default 5 per key). On resource-constrained machines or when using many different models, the total number of background agents can grow unbounded (5 per model x N models). This config option lets users set a hard ceiling.
16
+
17
+ ## Changes
18
+
19
+ ### Schema (`src/config/schema/background-task.ts`)
20
+ - Added `maxBackgroundAgents: z.number().int().min(1).optional()` to `BackgroundTaskConfigSchema`
21
+ - Grouped with existing limit fields (`maxDepth`, `maxDescendants`)
22
+
23
+ ### ConcurrencyManager (`src/features/background-agent/concurrency.ts`)
24
+ - Added `globalCount` tracking total active agents across all concurrency keys
25
+ - Added `getGlobalLimit()` reading `maxBackgroundAgents` from config (defaults to `Infinity` = no global limit)
26
+ - Modified `acquire()` to check both per-model AND global capacity
27
+ - Modified `release()` to decrement global count and drain cross-model waiters blocked by global limit
28
+ - Modified `clear()` to reset global state
29
+ - Added `getGlobalCount()` / `getGlobalQueueLength()` for testing
30
+
31
+ ### Tests
32
+ - `src/config/schema/background-task.test.ts`: 6 test cases for schema validation (valid, min boundary, below min, negative, non-integer, undefined)
33
+ - `src/features/background-agent/concurrency.test.ts`: 8 test cases for global limit enforcement (cross-model blocking, release unblocking, per-model vs global interaction, no-config default, clear reset)
34
+
35
+ ## Config Example
36
+
37
+ ```jsonc
38
+ {
39
+ "background_task": {
40
+ "maxBackgroundAgents": 5,
41
+ "defaultConcurrency": 3
42
+ }
43
+ }
44
+ ```
45
+
46
+ ## Backward Compatibility
47
+
48
+ - When `maxBackgroundAgents` is not set (default), no global limit is enforced - behavior is identical to before
49
+ - Existing `defaultConcurrency`, `providerConcurrency`, and `modelConcurrency` continue to work unchanged
50
+ - No config migration needed
@@ -0,0 +1,111 @@
1
+ # Verification Strategy
2
+
3
+ ## 1. Static Analysis
4
+
5
+ ### TypeScript Typecheck
6
+ ```bash
7
+ bun run typecheck
8
+ ```
9
+ - Verify no type errors introduced
10
+ - `BackgroundTaskConfig` type is inferred from Zod schema, so adding the field automatically updates the type
11
+ - All existing consumers of `BackgroundTaskConfig` remain compatible (new field is optional)
12
+
13
+ ### LSP Diagnostics
14
+ Check changed files for errors:
15
+ - `src/config/schema/background-task.ts`
16
+ - `src/features/background-agent/concurrency.ts`
17
+ - `src/config/schema/background-task.test.ts`
18
+ - `src/features/background-agent/concurrency.test.ts`
19
+
20
+ ## 2. Unit Tests
21
+
22
+ ### Schema Validation Tests
23
+ ```bash
24
+ bun test src/config/schema/background-task.test.ts
25
+ ```
26
+
27
+ | Test Case | Input | Expected |
28
+ |-----------|-------|----------|
29
+ | Valid value (10) | `{ maxBackgroundAgents: 10 }` | Parses to `10` |
30
+ | Minimum boundary (1) | `{ maxBackgroundAgents: 1 }` | Parses to `1` |
31
+ | Below minimum (0) | `{ maxBackgroundAgents: 0 }` | Throws `ZodError` |
32
+ | Negative (-1) | `{ maxBackgroundAgents: -1 }` | Throws `ZodError` |
33
+ | Non-integer (2.5) | `{ maxBackgroundAgents: 2.5 }` | Throws `ZodError` |
34
+ | Not provided | `{}` | Field is `undefined` |
35
+
36
+ ### ConcurrencyManager Tests
37
+ ```bash
38
+ bun test src/features/background-agent/concurrency.test.ts
39
+ ```
40
+
41
+ | Test Case | Setup | Expected |
42
+ |-----------|-------|----------|
43
+ | No config = no global limit | No `maxBackgroundAgents` | `getGlobalLimit()` returns `Infinity` |
44
+ | Config respected | `maxBackgroundAgents: 3` | `getGlobalLimit()` returns `3` |
45
+ | Cross-model blocking | Global limit 2, acquire model-a + model-b, try model-c | model-c blocks |
46
+ | Under-limit allows | Global limit 3, acquire 3 different models | All succeed |
47
+ | Per-model + global interaction | Per-model 1, global 3, acquire model-a twice | Blocked by per-model, not global |
48
+ | Release unblocks | Global limit 1, acquire model-a, queue model-b, release model-a | model-b proceeds |
49
+ | No global limit = no enforcement | No config, acquire 6 different models | All succeed |
50
+ | Clear resets global count | Acquire 2, clear | `getGlobalCount()` is 0 |
51
+
52
+ ### Existing Test Regression
53
+ ```bash
54
+ bun test src/features/background-agent/concurrency.test.ts
55
+ bun test src/config/schema/background-task.test.ts
56
+ bun test src/config/schema.test.ts
57
+ ```
58
+ All existing tests must continue to pass unchanged.
59
+
60
+ ## 3. Integration Verification
61
+
62
+ ### Config Loading Path
63
+ Verify the config flows correctly through the system:
64
+
65
+ 1. **Schema → Type**: `BackgroundTaskConfig` type auto-includes `maxBackgroundAgents` via `z.infer`
66
+ 2. **Config file → Schema**: `loadConfigFromPath()` in `plugin-config.ts` uses `OhMyOpenCodeConfigSchema.safeParse()` which includes `BackgroundTaskConfigSchema`
67
+ 3. **Config → Manager**: `create-managers.ts` passes `pluginConfig.background_task` to `BackgroundManager` constructor
68
+ 4. **Manager → ConcurrencyManager**: `BackgroundManager` constructor passes config to `new ConcurrencyManager(config)`
69
+ 5. **ConcurrencyManager → Enforcement**: `acquire()` reads `config.maxBackgroundAgents` via `getGlobalLimit()`
70
+
71
+ No changes needed in steps 2-4 since the field is optional and the existing plumbing passes the entire `BackgroundTaskConfig` object.
72
+
73
+ ### Manual Config Test
74
+ Create a test config to verify parsing:
75
+ ```bash
76
+ echo '{ "background_task": { "maxBackgroundAgents": 3 } }' | bun -e "
77
+ const { BackgroundTaskConfigSchema } = require('./src/config/schema/background-task');
78
+ const result = BackgroundTaskConfigSchema.safeParse(JSON.parse(require('fs').readFileSync('/dev/stdin', 'utf-8')).background_task);
79
+ console.log(result.success, result.data);
80
+ "
81
+ ```
82
+
83
+ ## 4. Build Verification
84
+
85
+ ```bash
86
+ bun run build
87
+ ```
88
+ - Verify build succeeds
89
+ - Schema JSON output includes the new field (if applicable)
90
+
91
+ ## 5. Edge Cases to Verify
92
+
93
+ | Edge Case | Expected Behavior |
94
+ |-----------|-------------------|
95
+ | `maxBackgroundAgents` not set | No global limit enforced (backward compatible) |
96
+ | `maxBackgroundAgents: 1` | Only 1 background agent at a time across all models |
97
+ | `maxBackgroundAgents` > sum of all per-model limits | Global limit never triggers (per-model limits are tighter) |
98
+ | Per-model limit tighter than global | Per-model limit blocks first |
99
+ | Global limit tighter than per-model | Global limit blocks first |
100
+ | Release from one model unblocks different model | Global slot freed, different model's waiter proceeds |
101
+ | Manager shutdown with global waiters | `clear()` rejects all waiters and resets global count |
102
+ | Concurrent acquire/release | No race conditions (single-threaded JS event loop) |
103
+
104
+ ## 6. CI Pipeline
105
+
106
+ The existing CI workflow (`ci.yml`) will run:
107
+ - `bun run typecheck` - type checking
108
+ - `bun test` - all tests including new ones
109
+ - `bun run build` - build verification
110
+
111
+ No CI changes needed.
@@ -0,0 +1 @@
1
+ {"total_tokens": null, "duration_ms": 365000, "total_duration_seconds": 365}
@@ -0,0 +1,37 @@
1
+ {
2
+ "eval_id": 2,
3
+ "eval_name": "bugfix-atlas-null-check",
4
+ "prompt": "The atlas hook has a bug where it crashes when boulder.json is missing the worktree_path field. Fix it and land the fix as a PR. Make sure CI passes.",
5
+ "assertions": [
6
+ {
7
+ "id": "worktree-isolation",
8
+ "text": "Plan uses git worktree in a sibling directory",
9
+ "type": "manual"
10
+ },
11
+ {
12
+ "id": "minimal-fix",
13
+ "text": "Fix is minimal — adds null check, doesn't refactor unrelated code",
14
+ "type": "manual"
15
+ },
16
+ {
17
+ "id": "test-added",
18
+ "text": "Test case added for the missing worktree_path scenario",
19
+ "type": "manual"
20
+ },
21
+ {
22
+ "id": "three-gates",
23
+ "text": "Verification loop includes all 3 gates: CI, review-work, Cubic",
24
+ "type": "manual"
25
+ },
26
+ {
27
+ "id": "real-atlas-files",
28
+ "text": "References actual atlas hook files in src/hooks/atlas/",
29
+ "type": "manual"
30
+ },
31
+ {
32
+ "id": "fix-branch-naming",
33
+ "text": "Branch name follows fix/ prefix convention",
34
+ "type": "manual"
35
+ }
36
+ ]
37
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "run_id": "eval-2-with_skill",
3
+ "expectations": [
4
+ {"text": "Plan uses git worktree in a sibling directory", "passed": true, "evidence": "../omo-wt/fix-atlas-worktree-path-crash"},
5
+ {"text": "Fix is minimal — adds null check, doesn't refactor unrelated code", "passed": true, "evidence": "3 targeted changes: readBoulderState sanitization, idle-event guard, tests"},
6
+ {"text": "Test case added for the missing worktree_path scenario", "passed": true, "evidence": "Tests for missing and null worktree_path"},
7
+ {"text": "Verification loop includes all 3 gates", "passed": true, "evidence": "Gate A (CI), Gate B (review-work), Gate C (Cubic)"},
8
+ {"text": "References actual atlas hook files", "passed": true, "evidence": "src/hooks/atlas/idle-event.ts, src/features/boulder-state/storage.ts"},
9
+ {"text": "Branch name follows fix/ prefix convention", "passed": true, "evidence": "fix/atlas-worktree-path-crash"}
10
+ ]
11
+ }
@@ -0,0 +1,205 @@
1
+ # Code Changes
2
+
3
+ ## File 1: `src/features/boulder-state/storage.ts`
4
+
5
+ **Change**: Add `worktree_path` sanitization in `readBoulderState()`
6
+
7
+ ```typescript
8
+ // BEFORE (lines 29-32):
9
+ if (!Array.isArray(parsed.session_ids)) {
10
+ parsed.session_ids = []
11
+ }
12
+ return parsed as BoulderState
13
+
14
+ // AFTER:
15
+ if (!Array.isArray(parsed.session_ids)) {
16
+ parsed.session_ids = []
17
+ }
18
+ if (parsed.worktree_path !== undefined && typeof parsed.worktree_path !== "string") {
19
+ parsed.worktree_path = undefined
20
+ }
21
+ return parsed as BoulderState
22
+ ```
23
+
24
+ **Rationale**: `readBoulderState` casts raw `JSON.parse()` output as `BoulderState` without validating individual fields. When boulder.json has `"worktree_path": null` (valid JSON from manual edits, corrupted state, or external tools), the runtime type is `null` but TypeScript type says `string | undefined`. This sanitization ensures downstream code always gets the correct type.
25
+
26
+ ---
27
+
28
+ ## File 2: `src/hooks/atlas/idle-event.ts`
29
+
30
+ **Change**: Add defensive string type guard before passing `worktree_path` to continuation functions.
31
+
32
+ ```typescript
33
+ // BEFORE (lines 83-88 in scheduleRetry):
34
+ await injectContinuation({
35
+ ctx,
36
+ sessionID,
37
+ sessionState,
38
+ options,
39
+ planName: currentBoulder.plan_name,
40
+ progress: currentProgress,
41
+ agent: currentBoulder.agent,
42
+ worktreePath: currentBoulder.worktree_path,
43
+ })
44
+
45
+ // AFTER:
46
+ await injectContinuation({
47
+ ctx,
48
+ sessionID,
49
+ sessionState,
50
+ options,
51
+ planName: currentBoulder.plan_name,
52
+ progress: currentProgress,
53
+ agent: currentBoulder.agent,
54
+ worktreePath: typeof currentBoulder.worktree_path === "string" ? currentBoulder.worktree_path : undefined,
55
+ })
56
+ ```
57
+
58
+ ```typescript
59
+ // BEFORE (lines 184-188 in handleAtlasSessionIdle):
60
+ await injectContinuation({
61
+ ctx,
62
+ sessionID,
63
+ sessionState,
64
+ options,
65
+ planName: boulderState.plan_name,
66
+ progress,
67
+ agent: boulderState.agent,
68
+ worktreePath: boulderState.worktree_path,
69
+ })
70
+
71
+ // AFTER:
72
+ await injectContinuation({
73
+ ctx,
74
+ sessionID,
75
+ sessionState,
76
+ options,
77
+ planName: boulderState.plan_name,
78
+ progress,
79
+ agent: boulderState.agent,
80
+ worktreePath: typeof boulderState.worktree_path === "string" ? boulderState.worktree_path : undefined,
81
+ })
82
+ ```
83
+
84
+ **Rationale**: Belt-and-suspenders defense. Even though `readBoulderState` now sanitizes, direct `writeBoulderState` calls elsewhere could still produce invalid state. The `typeof` check is zero-cost and prevents any possibility of `null` or non-string values leaking through.
85
+
86
+ ---
87
+
88
+ ## File 3: `src/hooks/atlas/index.test.ts`
89
+
90
+ **Change**: Add test cases for missing `worktree_path` scenarios within the existing `session.idle handler` describe block.
91
+
92
+ ```typescript
93
+ test("should inject continuation when boulder.json has no worktree_path field", async () => {
94
+ // given - boulder state WITHOUT worktree_path
95
+ const planPath = join(TEST_DIR, "test-plan.md")
96
+ writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
97
+
98
+ const state: BoulderState = {
99
+ active_plan: planPath,
100
+ started_at: "2026-01-02T10:00:00Z",
101
+ session_ids: [MAIN_SESSION_ID],
102
+ plan_name: "test-plan",
103
+ }
104
+ writeBoulderState(TEST_DIR, state)
105
+
106
+ const readState = readBoulderState(TEST_DIR)
107
+ expect(readState?.worktree_path).toBeUndefined()
108
+
109
+ const mockInput = createMockPluginInput()
110
+ const hook = createAtlasHook(mockInput)
111
+
112
+ // when
113
+ await hook.handler({
114
+ event: {
115
+ type: "session.idle",
116
+ properties: { sessionID: MAIN_SESSION_ID },
117
+ },
118
+ })
119
+
120
+ // then - continuation injected, no worktree context in prompt
121
+ expect(mockInput._promptMock).toHaveBeenCalled()
122
+ const callArgs = mockInput._promptMock.mock.calls[0][0]
123
+ expect(callArgs.body.parts[0].text).not.toContain("[Worktree:")
124
+ expect(callArgs.body.parts[0].text).toContain("1 remaining")
125
+ })
126
+
127
+ test("should handle boulder.json with worktree_path: null without crashing", async () => {
128
+ // given - manually write boulder.json with worktree_path: null (corrupted state)
129
+ const planPath = join(TEST_DIR, "test-plan.md")
130
+ writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
131
+
132
+ const boulderPath = join(SISYPHUS_DIR, "boulder.json")
133
+ writeFileSync(boulderPath, JSON.stringify({
134
+ active_plan: planPath,
135
+ started_at: "2026-01-02T10:00:00Z",
136
+ session_ids: [MAIN_SESSION_ID],
137
+ plan_name: "test-plan",
138
+ worktree_path: null,
139
+ }, null, 2))
140
+
141
+ const mockInput = createMockPluginInput()
142
+ const hook = createAtlasHook(mockInput)
143
+
144
+ // when
145
+ await hook.handler({
146
+ event: {
147
+ type: "session.idle",
148
+ properties: { sessionID: MAIN_SESSION_ID },
149
+ },
150
+ })
151
+
152
+ // then - should inject continuation without crash, no "[Worktree: null]"
153
+ expect(mockInput._promptMock).toHaveBeenCalled()
154
+ const callArgs = mockInput._promptMock.mock.calls[0][0]
155
+ expect(callArgs.body.parts[0].text).not.toContain("[Worktree: null]")
156
+ expect(callArgs.body.parts[0].text).not.toContain("[Worktree: undefined]")
157
+ })
158
+ ```
159
+
160
+ ---
161
+
162
+ ## File 4: `src/features/boulder-state/storage.test.ts` (addition to existing)
163
+
164
+ **Change**: Add `readBoulderState` sanitization test.
165
+
166
+ ```typescript
167
+ describe("#given boulder.json with worktree_path: null", () => {
168
+ test("#then readBoulderState should sanitize null to undefined", () => {
169
+ // given
170
+ const boulderPath = join(TEST_DIR, ".sisyphus", "boulder.json")
171
+ writeFileSync(boulderPath, JSON.stringify({
172
+ active_plan: "/path/to/plan.md",
173
+ started_at: "2026-01-02T10:00:00Z",
174
+ session_ids: ["session-1"],
175
+ plan_name: "test-plan",
176
+ worktree_path: null,
177
+ }, null, 2))
178
+
179
+ // when
180
+ const state = readBoulderState(TEST_DIR)
181
+
182
+ // then
183
+ expect(state).not.toBeNull()
184
+ expect(state!.worktree_path).toBeUndefined()
185
+ })
186
+
187
+ test("#then readBoulderState should preserve valid worktree_path string", () => {
188
+ // given
189
+ const boulderPath = join(TEST_DIR, ".sisyphus", "boulder.json")
190
+ writeFileSync(boulderPath, JSON.stringify({
191
+ active_plan: "/path/to/plan.md",
192
+ started_at: "2026-01-02T10:00:00Z",
193
+ session_ids: ["session-1"],
194
+ plan_name: "test-plan",
195
+ worktree_path: "/valid/worktree/path",
196
+ }, null, 2))
197
+
198
+ // when
199
+ const state = readBoulderState(TEST_DIR)
200
+
201
+ // then
202
+ expect(state?.worktree_path).toBe("/valid/worktree/path")
203
+ })
204
+ })
205
+ ```
@@ -0,0 +1,78 @@
1
+ # Execution Plan — Fix atlas hook crash on missing worktree_path
2
+
3
+ ## Phase 0: Setup
4
+
5
+ 1. **Create worktree from origin/dev**:
6
+ ```bash
7
+ git fetch origin dev
8
+ git worktree add ../omo-wt/fix-atlas-worktree-path-crash origin/dev
9
+ ```
10
+ 2. **Create feature branch**:
11
+ ```bash
12
+ cd ../omo-wt/fix-atlas-worktree-path-crash
13
+ git checkout -b fix/atlas-worktree-path-crash
14
+ ```
15
+
16
+ ## Phase 1: Implement
17
+
18
+ ### Step 1: Fix `readBoulderState()` in `src/features/boulder-state/storage.ts`
19
+ - Add `worktree_path` sanitization after JSON parse
20
+ - Ensure `worktree_path` is `string | undefined`, never `null` or other types
21
+ - This is the root cause: raw `JSON.parse` + `as BoulderState` cast allows type violations at runtime
22
+
23
+ ### Step 2: Add defensive guard in `src/hooks/atlas/idle-event.ts`
24
+ - Before passing `boulderState.worktree_path` to `injectContinuation`, validate it's a string
25
+ - Apply same guard in the `scheduleRetry` callback (line 86)
26
+ - Ensures even if `readBoulderState` is bypassed, the idle handler won't crash
27
+
28
+ ### Step 3: Add test coverage in `src/hooks/atlas/index.test.ts`
29
+ - Add test: boulder.json without `worktree_path` field → session.idle works
30
+ - Add test: boulder.json with `worktree_path: null` → session.idle works (no `[Worktree: null]` in prompt)
31
+ - Add test: `readBoulderState` sanitizes `null` worktree_path to `undefined`
32
+ - Follow existing given/when/then test pattern
33
+
34
+ ### Step 4: Local validation
35
+ ```bash
36
+ bun run typecheck
37
+ bun test src/hooks/atlas/
38
+ bun test src/features/boulder-state/
39
+ bun run build
40
+ ```
41
+
42
+ ### Step 5: Atomic commit
43
+ ```bash
44
+ git add src/features/boulder-state/storage.ts src/hooks/atlas/idle-event.ts src/hooks/atlas/index.test.ts
45
+ git commit -m "fix(atlas): prevent crash when boulder.json missing worktree_path field
46
+
47
+ readBoulderState() performs unsafe cast of parsed JSON as BoulderState.
48
+ When worktree_path is absent or null in boulder.json, downstream code
49
+ in idle-event.ts could receive null where string|undefined is expected.
50
+
51
+ - Sanitize worktree_path in readBoulderState (reject non-string values)
52
+ - Add defensive typeof check in idle-event before passing to continuation
53
+ - Add test coverage for missing and null worktree_path scenarios"
54
+ ```
55
+
56
+ ## Phase 2: PR Creation
57
+
58
+ ```bash
59
+ git push -u origin fix/atlas-worktree-path-crash
60
+ gh pr create \
61
+ --base dev \
62
+ --title "fix(atlas): prevent crash when boulder.json missing worktree_path" \
63
+ --body-file /tmp/pull-request-atlas-worktree-fix.md
64
+ ```
65
+
66
+ ## Phase 3: Verify Loop
67
+
68
+ - **Gate A (CI)**: `gh pr checks --watch` — wait for all checks green
69
+ - **Gate B (review-work)**: Run 5-agent review (Oracle goal, Oracle quality, Oracle security, QA execution, context mining)
70
+ - **Gate C (Cubic)**: Wait for cubic-dev-ai[bot] to respond "No issues found"
71
+ - On any failure: fix-commit-push, re-enter verify loop
72
+
73
+ ## Phase 4: Merge
74
+
75
+ ```bash
76
+ gh pr merge --squash --delete-branch
77
+ git worktree remove ../omo-wt/fix-atlas-worktree-path-crash
78
+ ```
@@ -0,0 +1,42 @@
1
+ # PR Title
2
+
3
+ ```
4
+ fix(atlas): prevent crash when boulder.json missing worktree_path
5
+ ```
6
+
7
+ # PR Body
8
+
9
+ ## Summary
10
+
11
+ - Fix runtime type violation in atlas hook when `boulder.json` lacks `worktree_path` field
12
+ - Add `worktree_path` sanitization in `readBoulderState()` to reject non-string values (e.g., `null` from manual edits)
13
+ - Add defensive `typeof` guards in `idle-event.ts` before passing worktree path to continuation injection
14
+ - Add test coverage for missing and null `worktree_path` scenarios
15
+
16
+ ## Problem
17
+
18
+ `readBoulderState()` in `src/features/boulder-state/storage.ts` casts raw `JSON.parse()` output directly as `BoulderState` via `return parsed as BoulderState`. This bypasses TypeScript's type system entirely at runtime.
19
+
20
+ When `boulder.json` is missing the `worktree_path` field (common for boulders created before worktree support was added, or created without `--worktree` flag), `boulderState.worktree_path` is `undefined` which is handled correctly. However, when boulder.json has `"worktree_path": null` (possible from manual edits, external tooling, or corrupted state), the runtime type becomes `null` which violates the TypeScript type `string | undefined`.
21
+
22
+ This `null` value propagates through:
23
+ 1. `idle-event.ts:handleAtlasSessionIdle()` → `injectContinuation()` → `injectBoulderContinuation()`
24
+ 2. `idle-event.ts:scheduleRetry()` callback → same chain
25
+
26
+ While the `boulder-continuation-injector.ts` handles falsy values via `worktreePath ? ... : ""`, the type mismatch can cause subtle downstream issues and violates the contract of the `BoulderState` interface.
27
+
28
+ ## Changes
29
+
30
+ | File | Change |
31
+ |------|--------|
32
+ | `src/features/boulder-state/storage.ts` | Sanitize `worktree_path` in `readBoulderState()` — reject non-string values |
33
+ | `src/hooks/atlas/idle-event.ts` | Add `typeof` guards before passing worktree_path to continuation (2 call sites) |
34
+ | `src/hooks/atlas/index.test.ts` | Add 2 tests: missing worktree_path + null worktree_path in session.idle |
35
+ | `src/features/boulder-state/storage.test.ts` | Add 2 tests: sanitization of null + preservation of valid string |
36
+
37
+ ## Testing
38
+
39
+ - `bun test src/hooks/atlas/` — all existing + new tests pass
40
+ - `bun test src/features/boulder-state/` — all existing + new tests pass
41
+ - `bun run typecheck` — clean
42
+ - `bun run build` — clean
@@ -0,0 +1,87 @@
1
+ # Verification Strategy
2
+
3
+ ## Gate A: CI (`gh pr checks --watch`)
4
+
5
+ ### What CI runs (from `ci.yml`)
6
+ 1. **Tests (split)**: Mock-heavy tests in isolation + batch tests
7
+ 2. **Typecheck**: `bun run typecheck` (tsc --noEmit)
8
+ 3. **Build**: `bun run build` (ESM + declarations + schema)
9
+
10
+ ### Pre-push local validation
11
+ Before pushing, run the exact CI steps locally to catch failures early:
12
+
13
+ ```bash
14
+ # Targeted test runs first (fast feedback)
15
+ bun test src/features/boulder-state/storage.test.ts
16
+ bun test src/hooks/atlas/index.test.ts
17
+
18
+ # Full test suite
19
+ bun test
20
+
21
+ # Type check
22
+ bun run typecheck
23
+
24
+ # Build
25
+ bun run build
26
+ ```
27
+
28
+ ### Failure handling
29
+ - **Test failure**: Read test output, fix code, create new commit (never amend pushed commits), push
30
+ - **Typecheck failure**: Run `lsp_diagnostics` on changed files, fix type errors, commit, push
31
+ - **Build failure**: Check build output for missing exports or circular deps, fix, commit, push
32
+
33
+ After each fix-commit-push: `gh pr checks --watch` to re-enter gate
34
+
35
+ ## Gate B: review-work (5-agent review)
36
+
37
+ ### The 5 parallel agents
38
+ 1. **Oracle (goal/constraint verification)**: Checks the fix matches the stated problem — `worktree_path` crash resolved, no scope creep
39
+ 2. **Oracle (code quality)**: Validates code follows existing patterns — factory pattern, given/when/then tests, < 200 LOC, no catch-all files
40
+ 3. **Oracle (security)**: Ensures no new security issues — JSON parse injection, path traversal in worktree_path
41
+ 4. **QA agent (hands-on execution)**: Actually runs the tests, checks `lsp_diagnostics` on changed files, verifies the fix in action
42
+ 5. **Context mining agent**: Checks GitHub issues, git history, related PRs for context alignment
43
+
44
+ ### Expected focus areas for this PR
45
+ - Oracle (goal): Does the sanitization in `readBoulderState` actually prevent the crash? Is the `typeof` guard necessary or redundant?
46
+ - Oracle (quality): Are the new tests following the given/when/then pattern? Do they use the same mock setup as existing tests?
47
+ - Oracle (security): Is the `worktree_path` value ever used in path operations without sanitization? (Answer: no, it's only used in template strings)
48
+ - QA: Run `bun test src/hooks/atlas/index.test.ts` — does the null worktree_path test actually trigger the bug before fix?
49
+
50
+ ### Failure handling
51
+ - Each oracle produces a PASS/FAIL verdict with specific issues
52
+ - On FAIL: read the specific issue, fix in the worktree, commit, push, re-run review-work
53
+ - All 5 agents must PASS
54
+
55
+ ## Gate C: Cubic (`cubic-dev-ai[bot]`)
56
+
57
+ ### What Cubic checks
58
+ - Automated code review bot that analyzes the PR diff
59
+ - Looks for: type safety issues, missing error handling, test coverage gaps, anti-patterns
60
+
61
+ ### Expected result
62
+ - "No issues found" for this small, focused fix
63
+ - 3 files changed (storage.ts, idle-event.ts, index.test.ts) + 1 test file
64
+
65
+ ### Failure handling
66
+ - If Cubic flags an issue: evaluate if it's a real concern or false positive
67
+ - Real concern: fix, commit, push
68
+ - False positive: comment explaining why the flagged pattern is intentional
69
+ - Wait for Cubic to re-review after push
70
+
71
+ ## Post-verification: Merge
72
+
73
+ Once all 3 gates pass:
74
+ ```bash
75
+ gh pr merge --squash --delete-branch
76
+ git worktree remove ../omo-wt/fix-atlas-worktree-path-crash
77
+ ```
78
+
79
+ On merge failure (conflicts):
80
+ ```bash
81
+ cd ../omo-wt/fix-atlas-worktree-path-crash
82
+ git fetch origin dev
83
+ git rebase origin/dev
84
+ # Resolve conflicts if any
85
+ git push --force-with-lease
86
+ # Re-enter verify loop from Gate A
87
+ ```
@@ -0,0 +1 @@
1
+ {"total_tokens": null, "duration_ms": 506000, "total_duration_seconds": 506}
@@ -0,0 +1,11 @@
1
+ {
2
+ "run_id": "eval-2-without_skill",
3
+ "expectations": [
4
+ {"text": "Plan uses git worktree in a sibling directory", "passed": false, "evidence": "No worktree. Steps go directly to creating branch and modifying files."},
5
+ {"text": "Fix is minimal — adds null check, doesn't refactor unrelated code", "passed": true, "evidence": "Focused fix though also adds try/catch in setTimeout (reasonable secondary fix)"},
6
+ {"text": "Test case added for the missing worktree_path scenario", "passed": true, "evidence": "Detailed test plan for missing/null/malformed boulder.json"},
7
+ {"text": "Verification loop includes all 3 gates", "passed": false, "evidence": "Only mentions CI pipeline (step 5). No review-work or Cubic."},
8
+ {"text": "References actual atlas hook files", "passed": true, "evidence": "References idle-event.ts, storage.ts with line numbers"},
9
+ {"text": "Branch name follows fix/ prefix convention", "passed": true, "evidence": "fix/atlas-hook-missing-worktree-path"}
10
+ ]
11
+ }