oh-my-opencode 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/command/get-unpublished-changes.md +148 -0
- package/.agents/command/omomomo.md +37 -0
- package/.agents/command/publish.md +376 -0
- package/.agents/command/remove-deadcode.md +221 -0
- package/.agents/command/security-research.md +16 -0
- package/.agents/skills/get-unpublished-changes/SKILL.md +24 -0
- package/.agents/skills/github-triage/SKILL.md +587 -0
- package/.agents/skills/github-triage/scripts/gh_fetch.py +398 -0
- package/.agents/skills/hyperplan/SKILL.md +450 -0
- package/.agents/skills/omomomo/SKILL.md +36 -0
- package/.agents/skills/pre-publish-review/SKILL.md +407 -0
- package/.agents/skills/publish/SKILL.md +428 -0
- package/.agents/skills/remove-deadcode/SKILL.md +216 -0
- package/.agents/skills/security-research/SKILL.md +204 -0
- package/.agents/skills/work-with-pr/SKILL.md +360 -0
- package/.agents/skills/work-with-pr-workspace/evals/evals.json +76 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/benchmark.json +138 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/benchmark.md +42 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/eval_metadata.json +57 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/grading.json +15 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/code-changes.md +454 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/execution-plan.md +136 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/pr-description.md +47 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/verification-strategy.md +163 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/grading.json +15 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/code-changes.md +615 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/execution-plan.md +99 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/pr-description.md +50 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/verification-strategy.md +111 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/eval_metadata.json +37 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/grading.json +11 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md +205 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/execution-plan.md +78 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/pr-description.md +42 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/verification-strategy.md +87 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/grading.json +11 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/code-changes.md +334 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/execution-plan.md +86 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/pr-description.md +23 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/verification-strategy.md +119 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/eval_metadata.json +32 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/code-changes.md +221 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/execution-plan.md +104 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/pr-description.md +41 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/verification-strategy.md +84 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/code-changes.md +342 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/execution-plan.md +131 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/pr-description.md +39 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/verification-strategy.md +128 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/eval_metadata.json +32 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/code-changes.md +143 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/execution-plan.md +82 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/pr-description.md +51 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/verification-strategy.md +69 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/code-changes.md +252 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/execution-plan.md +83 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/pr-description.md +33 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/verification-strategy.md +101 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/eval_metadata.json +32 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/code-changes.md +387 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/execution-plan.md +112 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/pr-description.md +51 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/verification-strategy.md +75 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/code-changes.md +529 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/execution-plan.md +127 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/pr-description.md +42 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/verification-strategy.md +120 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/review.html +1326 -0
- package/.opencode/command/get-unpublished-changes.md +148 -0
- package/.opencode/command/omomomo.md +37 -0
- package/.opencode/command/publish.md +376 -0
- package/.opencode/command/remove-deadcode.md +221 -0
- package/.opencode/command/security-research.md +16 -0
- package/.opencode/skills/github-triage/SKILL.md +587 -0
- package/.opencode/skills/github-triage/scripts/gh_fetch.py +398 -0
- package/.opencode/skills/hyperplan/SKILL.md +450 -0
- package/.opencode/skills/pre-publish-review/SKILL.md +407 -0
- package/.opencode/skills/work-with-pr/SKILL.md +360 -0
- package/.opencode/skills/work-with-pr-workspace/evals/evals.json +76 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/benchmark.json +138 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/benchmark.md +42 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/eval_metadata.json +57 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/grading.json +15 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/code-changes.md +454 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/execution-plan.md +136 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/pr-description.md +47 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/verification-strategy.md +163 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/grading.json +15 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/code-changes.md +615 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/execution-plan.md +99 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/pr-description.md +50 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/verification-strategy.md +111 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/eval_metadata.json +37 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/grading.json +11 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md +205 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/execution-plan.md +78 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/pr-description.md +42 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/verification-strategy.md +87 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/grading.json +11 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/code-changes.md +334 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/execution-plan.md +86 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/pr-description.md +23 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/verification-strategy.md +119 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/eval_metadata.json +32 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/code-changes.md +221 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/execution-plan.md +104 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/pr-description.md +41 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/verification-strategy.md +84 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/code-changes.md +342 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/execution-plan.md +131 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/pr-description.md +39 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/verification-strategy.md +128 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/eval_metadata.json +32 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/code-changes.md +143 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/execution-plan.md +82 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/pr-description.md +51 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/verification-strategy.md +69 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/code-changes.md +252 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/execution-plan.md +83 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/pr-description.md +33 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/verification-strategy.md +101 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/eval_metadata.json +32 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/code-changes.md +387 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/execution-plan.md +112 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/pr-description.md +51 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/verification-strategy.md +75 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/code-changes.md +529 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/execution-plan.md +127 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/pr-description.md +42 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/verification-strategy.md +120 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/review.html +1326 -0
- package/README.ja.md +1 -1
- package/README.ko.md +1 -1
- package/README.md +1 -1
- package/README.ru.md +1 -1
- package/README.zh-cn.md +1 -1
- package/dist/agents/atlas/agent.d.ts +6 -6
- package/dist/agents/prometheus/gemini.d.ts +0 -11
- package/dist/agents/prometheus/gpt.d.ts +0 -10
- package/dist/agents/prometheus/system-prompt.d.ts +2 -20
- package/dist/agents/types.d.ts +1 -16
- package/dist/cli/index.js +178 -129
- package/dist/config/schema/agent-names.d.ts +3 -3
- package/dist/config/schema/agent-overrides.d.ts +208 -208
- package/dist/config/schema/categories.d.ts +28 -28
- package/dist/config/schema/fallback-models.d.ts +20 -20
- package/dist/config/schema/oh-my-opencode-config.d.ts +208 -208
- package/dist/features/background-agent/parent-wake-notifier.d.ts +8 -1
- package/dist/help/schema/acp.d.ts +95 -0
- package/dist/help/schema/doctor.d.ts +147 -0
- package/dist/help/schema/sandbox.d.ts +74 -0
- package/dist/help/schema/status.d.ts +139 -0
- package/dist/hooks/keyword-detector/analyze/default.d.ts +1 -1
- package/dist/hooks/keyword-detector/hyperplan/default.d.ts +1 -1
- package/dist/hooks/keyword-detector/search/default.d.ts +1 -1
- package/dist/hooks/keyword-detector/team/default.d.ts +2 -7
- package/dist/hooks/keyword-detector/ultrawork/default.d.ts +1 -9
- package/dist/hooks/keyword-detector/ultrawork/gemini.d.ts +1 -16
- package/dist/hooks/keyword-detector/ultrawork/gpt.d.ts +1 -10
- package/dist/hooks/keyword-detector/ultrawork/planner.d.ts +1 -5
- package/dist/hooks/ralph-loop/no-progress-turn-detector.d.ts +7 -0
- package/dist/hooks/ralph-loop/pending-verification-handler.d.ts +1 -0
- package/dist/hooks/ralph-loop/types.d.ts +1 -0
- package/dist/hooks/runtime-fallback/error-classifier.d.ts +1 -0
- package/dist/index.js +52205 -50528
- package/dist/shared/prompt-async-gate/pending-tool-turn.d.ts +1 -0
- package/dist/shared/prompt-async-gate/types.d.ts +4 -3
- package/package.json +19 -13
- package/dist/agents/atlas/default-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/default.d.ts +0 -2
- package/dist/agents/atlas/gemini-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/gemini.d.ts +0 -2
- package/dist/agents/atlas/gpt-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/gpt.d.ts +0 -2
- package/dist/agents/atlas/kimi-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/kimi.d.ts +0 -2
- package/dist/agents/atlas/opus-4-7-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/opus-4-7.d.ts +0 -2
- package/dist/agents/atlas/shared-prompt.d.ts +0 -9
- package/dist/agents/prometheus/behavioral-summary.d.ts +0 -6
- package/dist/agents/prometheus/high-accuracy-mode.d.ts +0 -6
- package/dist/agents/prometheus/identity-constraints.d.ts +0 -7
- package/dist/agents/prometheus/interview-mode.d.ts +0 -7
- package/dist/agents/prometheus/plan-generation.d.ts +0 -7
- package/dist/agents/prometheus/plan-template.d.ts +0 -7
- package/dist/agents/prometheus/spec-driven-mode.d.ts +0 -7
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# PR Description
|
|
2
|
+
|
|
3
|
+
**Title:** feat: add `maxBackgroundAgents` config to limit total simultaneous background agents
|
|
4
|
+
|
|
5
|
+
**Body:**
|
|
6
|
+
|
|
7
|
+
## Summary
|
|
8
|
+
|
|
9
|
+
- Add `maxBackgroundAgents` field to `BackgroundTaskConfigSchema` that enforces a global ceiling on total running background agents across all models/providers
|
|
10
|
+
- Modify `ConcurrencyManager` to track global count and enforce the limit alongside existing per-model limits
|
|
11
|
+
- Add schema validation tests and concurrency enforcement tests
|
|
12
|
+
|
|
13
|
+
## Motivation
|
|
14
|
+
|
|
15
|
+
Currently, concurrency is only limited per model/provider key (default 5 per key). On resource-constrained machines or when using many different models, the total number of background agents can grow unbounded (5 per model x N models). This config option lets users set a hard ceiling.
|
|
16
|
+
|
|
17
|
+
## Changes
|
|
18
|
+
|
|
19
|
+
### Schema (`src/config/schema/background-task.ts`)
|
|
20
|
+
- Added `maxBackgroundAgents: z.number().int().min(1).optional()` to `BackgroundTaskConfigSchema`
|
|
21
|
+
- Grouped with existing limit fields (`maxDepth`, `maxDescendants`)
|
|
22
|
+
|
|
23
|
+
### ConcurrencyManager (`src/features/background-agent/concurrency.ts`)
|
|
24
|
+
- Added `globalCount` tracking total active agents across all concurrency keys
|
|
25
|
+
- Added `getGlobalLimit()` reading `maxBackgroundAgents` from config (defaults to `Infinity` = no global limit)
|
|
26
|
+
- Modified `acquire()` to check both per-model AND global capacity
|
|
27
|
+
- Modified `release()` to decrement global count and drain cross-model waiters blocked by global limit
|
|
28
|
+
- Modified `clear()` to reset global state
|
|
29
|
+
- Added `getGlobalCount()` / `getGlobalQueueLength()` for testing
|
|
30
|
+
|
|
31
|
+
### Tests
|
|
32
|
+
- `src/config/schema/background-task.test.ts`: 6 test cases for schema validation (valid, min boundary, below min, negative, non-integer, undefined)
|
|
33
|
+
- `src/features/background-agent/concurrency.test.ts`: 8 test cases for global limit enforcement (cross-model blocking, release unblocking, per-model vs global interaction, no-config default, clear reset)
|
|
34
|
+
|
|
35
|
+
## Config Example
|
|
36
|
+
|
|
37
|
+
```jsonc
|
|
38
|
+
{
|
|
39
|
+
"background_task": {
|
|
40
|
+
"maxBackgroundAgents": 5,
|
|
41
|
+
"defaultConcurrency": 3
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Backward Compatibility
|
|
47
|
+
|
|
48
|
+
- When `maxBackgroundAgents` is not set (default), no global limit is enforced - behavior is identical to before
|
|
49
|
+
- Existing `defaultConcurrency`, `providerConcurrency`, and `modelConcurrency` continue to work unchanged
|
|
50
|
+
- No config migration needed
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# Verification Strategy
|
|
2
|
+
|
|
3
|
+
## 1. Static Analysis
|
|
4
|
+
|
|
5
|
+
### TypeScript Typecheck
|
|
6
|
+
```bash
|
|
7
|
+
bun run typecheck
|
|
8
|
+
```
|
|
9
|
+
- Verify no type errors introduced
|
|
10
|
+
- `BackgroundTaskConfig` type is inferred from Zod schema, so adding the field automatically updates the type
|
|
11
|
+
- All existing consumers of `BackgroundTaskConfig` remain compatible (new field is optional)
|
|
12
|
+
|
|
13
|
+
### LSP Diagnostics
|
|
14
|
+
Check changed files for errors:
|
|
15
|
+
- `src/config/schema/background-task.ts`
|
|
16
|
+
- `src/features/background-agent/concurrency.ts`
|
|
17
|
+
- `src/config/schema/background-task.test.ts`
|
|
18
|
+
- `src/features/background-agent/concurrency.test.ts`
|
|
19
|
+
|
|
20
|
+
## 2. Unit Tests
|
|
21
|
+
|
|
22
|
+
### Schema Validation Tests
|
|
23
|
+
```bash
|
|
24
|
+
bun test src/config/schema/background-task.test.ts
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
| Test Case | Input | Expected |
|
|
28
|
+
|-----------|-------|----------|
|
|
29
|
+
| Valid value (10) | `{ maxBackgroundAgents: 10 }` | Parses to `10` |
|
|
30
|
+
| Minimum boundary (1) | `{ maxBackgroundAgents: 1 }` | Parses to `1` |
|
|
31
|
+
| Below minimum (0) | `{ maxBackgroundAgents: 0 }` | Throws `ZodError` |
|
|
32
|
+
| Negative (-1) | `{ maxBackgroundAgents: -1 }` | Throws `ZodError` |
|
|
33
|
+
| Non-integer (2.5) | `{ maxBackgroundAgents: 2.5 }` | Throws `ZodError` |
|
|
34
|
+
| Not provided | `{}` | Field is `undefined` |
|
|
35
|
+
|
|
36
|
+
### ConcurrencyManager Tests
|
|
37
|
+
```bash
|
|
38
|
+
bun test src/features/background-agent/concurrency.test.ts
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
| Test Case | Setup | Expected |
|
|
42
|
+
|-----------|-------|----------|
|
|
43
|
+
| No config = no global limit | No `maxBackgroundAgents` | `getGlobalLimit()` returns `Infinity` |
|
|
44
|
+
| Config respected | `maxBackgroundAgents: 3` | `getGlobalLimit()` returns `3` |
|
|
45
|
+
| Cross-model blocking | Global limit 2, acquire model-a + model-b, try model-c | model-c blocks |
|
|
46
|
+
| Under-limit allows | Global limit 3, acquire 3 different models | All succeed |
|
|
47
|
+
| Per-model + global interaction | Per-model 1, global 3, acquire model-a twice | Blocked by per-model, not global |
|
|
48
|
+
| Release unblocks | Global limit 1, acquire model-a, queue model-b, release model-a | model-b proceeds |
|
|
49
|
+
| No global limit = no enforcement | No config, acquire 6 different models | All succeed |
|
|
50
|
+
| Clear resets global count | Acquire 2, clear | `getGlobalCount()` is 0 |
|
|
51
|
+
|
|
52
|
+
### Existing Test Regression
|
|
53
|
+
```bash
|
|
54
|
+
bun test src/features/background-agent/concurrency.test.ts
|
|
55
|
+
bun test src/config/schema/background-task.test.ts
|
|
56
|
+
bun test src/config/schema.test.ts
|
|
57
|
+
```
|
|
58
|
+
All existing tests must continue to pass unchanged.
|
|
59
|
+
|
|
60
|
+
## 3. Integration Verification
|
|
61
|
+
|
|
62
|
+
### Config Loading Path
|
|
63
|
+
Verify the config flows correctly through the system:
|
|
64
|
+
|
|
65
|
+
1. **Schema → Type**: `BackgroundTaskConfig` type auto-includes `maxBackgroundAgents` via `z.infer`
|
|
66
|
+
2. **Config file → Schema**: `loadConfigFromPath()` in `plugin-config.ts` uses `OhMyOpenCodeConfigSchema.safeParse()` which includes `BackgroundTaskConfigSchema`
|
|
67
|
+
3. **Config → Manager**: `create-managers.ts` passes `pluginConfig.background_task` to `BackgroundManager` constructor
|
|
68
|
+
4. **Manager → ConcurrencyManager**: `BackgroundManager` constructor passes config to `new ConcurrencyManager(config)`
|
|
69
|
+
5. **ConcurrencyManager → Enforcement**: `acquire()` reads `config.maxBackgroundAgents` via `getGlobalLimit()`
|
|
70
|
+
|
|
71
|
+
No changes needed in steps 2-4 since the field is optional and the existing plumbing passes the entire `BackgroundTaskConfig` object.
|
|
72
|
+
|
|
73
|
+
### Manual Config Test
|
|
74
|
+
Create a test config to verify parsing:
|
|
75
|
+
```bash
|
|
76
|
+
echo '{ "background_task": { "maxBackgroundAgents": 3 } }' | bun -e "
|
|
77
|
+
const { BackgroundTaskConfigSchema } = require('./src/config/schema/background-task');
|
|
78
|
+
const result = BackgroundTaskConfigSchema.safeParse(JSON.parse(require('fs').readFileSync('/dev/stdin', 'utf-8')).background_task);
|
|
79
|
+
console.log(result.success, result.data);
|
|
80
|
+
"
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## 4. Build Verification
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
bun run build
|
|
87
|
+
```
|
|
88
|
+
- Verify build succeeds
|
|
89
|
+
- Schema JSON output includes the new field (if applicable)
|
|
90
|
+
|
|
91
|
+
## 5. Edge Cases to Verify
|
|
92
|
+
|
|
93
|
+
| Edge Case | Expected Behavior |
|
|
94
|
+
|-----------|-------------------|
|
|
95
|
+
| `maxBackgroundAgents` not set | No global limit enforced (backward compatible) |
|
|
96
|
+
| `maxBackgroundAgents: 1` | Only 1 background agent at a time across all models |
|
|
97
|
+
| `maxBackgroundAgents` > sum of all per-model limits | Global limit never triggers (per-model limits are tighter) |
|
|
98
|
+
| Per-model limit tighter than global | Per-model limit blocks first |
|
|
99
|
+
| Global limit tighter than per-model | Global limit blocks first |
|
|
100
|
+
| Release from one model unblocks different model | Global slot freed, different model's waiter proceeds |
|
|
101
|
+
| Manager shutdown with global waiters | `clear()` rejects all waiters and resets global count |
|
|
102
|
+
| Concurrent acquire/release | No race conditions (single-threaded JS event loop) |
|
|
103
|
+
|
|
104
|
+
## 6. CI Pipeline
|
|
105
|
+
|
|
106
|
+
The existing CI workflow (`ci.yml`) will run:
|
|
107
|
+
- `bun run typecheck` - type checking
|
|
108
|
+
- `bun test` - all tests including new ones
|
|
109
|
+
- `bun run build` - build verification
|
|
110
|
+
|
|
111
|
+
No CI changes needed.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"total_tokens": null, "duration_ms": 365000, "total_duration_seconds": 365}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"eval_id": 2,
|
|
3
|
+
"eval_name": "bugfix-atlas-null-check",
|
|
4
|
+
"prompt": "The atlas hook has a bug where it crashes when boulder.json is missing the worktree_path field. Fix it and land the fix as a PR. Make sure CI passes.",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{
|
|
7
|
+
"id": "worktree-isolation",
|
|
8
|
+
"text": "Plan uses git worktree in a sibling directory",
|
|
9
|
+
"type": "manual"
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"id": "minimal-fix",
|
|
13
|
+
"text": "Fix is minimal — adds null check, doesn't refactor unrelated code",
|
|
14
|
+
"type": "manual"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "test-added",
|
|
18
|
+
"text": "Test case added for the missing worktree_path scenario",
|
|
19
|
+
"type": "manual"
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"id": "three-gates",
|
|
23
|
+
"text": "Verification loop includes all 3 gates: CI, review-work, Cubic",
|
|
24
|
+
"type": "manual"
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"id": "real-atlas-files",
|
|
28
|
+
"text": "References actual atlas hook files in src/hooks/atlas/",
|
|
29
|
+
"type": "manual"
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"id": "fix-branch-naming",
|
|
33
|
+
"text": "Branch name follows fix/ prefix convention",
|
|
34
|
+
"type": "manual"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"run_id": "eval-2-with_skill",
|
|
3
|
+
"expectations": [
|
|
4
|
+
{"text": "Plan uses git worktree in a sibling directory", "passed": true, "evidence": "../omo-wt/fix-atlas-worktree-path-crash"},
|
|
5
|
+
{"text": "Fix is minimal — adds null check, doesn't refactor unrelated code", "passed": true, "evidence": "3 targeted changes: readBoulderState sanitization, idle-event guard, tests"},
|
|
6
|
+
{"text": "Test case added for the missing worktree_path scenario", "passed": true, "evidence": "Tests for missing and null worktree_path"},
|
|
7
|
+
{"text": "Verification loop includes all 3 gates", "passed": true, "evidence": "Gate A (CI), Gate B (review-work), Gate C (Cubic)"},
|
|
8
|
+
{"text": "References actual atlas hook files", "passed": true, "evidence": "src/hooks/atlas/idle-event.ts, src/features/boulder-state/storage.ts"},
|
|
9
|
+
{"text": "Branch name follows fix/ prefix convention", "passed": true, "evidence": "fix/atlas-worktree-path-crash"}
|
|
10
|
+
]
|
|
11
|
+
}
|
package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# Code Changes
|
|
2
|
+
|
|
3
|
+
## File 1: `src/features/boulder-state/storage.ts`
|
|
4
|
+
|
|
5
|
+
**Change**: Add `worktree_path` sanitization in `readBoulderState()`
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
// BEFORE (lines 29-32):
|
|
9
|
+
if (!Array.isArray(parsed.session_ids)) {
|
|
10
|
+
parsed.session_ids = []
|
|
11
|
+
}
|
|
12
|
+
return parsed as BoulderState
|
|
13
|
+
|
|
14
|
+
// AFTER:
|
|
15
|
+
if (!Array.isArray(parsed.session_ids)) {
|
|
16
|
+
parsed.session_ids = []
|
|
17
|
+
}
|
|
18
|
+
if (parsed.worktree_path !== undefined && typeof parsed.worktree_path !== "string") {
|
|
19
|
+
parsed.worktree_path = undefined
|
|
20
|
+
}
|
|
21
|
+
return parsed as BoulderState
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
**Rationale**: `readBoulderState` casts raw `JSON.parse()` output as `BoulderState` without validating individual fields. When boulder.json has `"worktree_path": null` (valid JSON from manual edits, corrupted state, or external tools), the runtime type is `null` but TypeScript type says `string | undefined`. This sanitization ensures downstream code always gets the correct type.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## File 2: `src/hooks/atlas/idle-event.ts`
|
|
29
|
+
|
|
30
|
+
**Change**: Add defensive string type guard before passing `worktree_path` to continuation functions.
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
// BEFORE (lines 83-88 in scheduleRetry):
|
|
34
|
+
await injectContinuation({
|
|
35
|
+
ctx,
|
|
36
|
+
sessionID,
|
|
37
|
+
sessionState,
|
|
38
|
+
options,
|
|
39
|
+
planName: currentBoulder.plan_name,
|
|
40
|
+
progress: currentProgress,
|
|
41
|
+
agent: currentBoulder.agent,
|
|
42
|
+
worktreePath: currentBoulder.worktree_path,
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
// AFTER:
|
|
46
|
+
await injectContinuation({
|
|
47
|
+
ctx,
|
|
48
|
+
sessionID,
|
|
49
|
+
sessionState,
|
|
50
|
+
options,
|
|
51
|
+
planName: currentBoulder.plan_name,
|
|
52
|
+
progress: currentProgress,
|
|
53
|
+
agent: currentBoulder.agent,
|
|
54
|
+
worktreePath: typeof currentBoulder.worktree_path === "string" ? currentBoulder.worktree_path : undefined,
|
|
55
|
+
})
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
```typescript
|
|
59
|
+
// BEFORE (lines 184-188 in handleAtlasSessionIdle):
|
|
60
|
+
await injectContinuation({
|
|
61
|
+
ctx,
|
|
62
|
+
sessionID,
|
|
63
|
+
sessionState,
|
|
64
|
+
options,
|
|
65
|
+
planName: boulderState.plan_name,
|
|
66
|
+
progress,
|
|
67
|
+
agent: boulderState.agent,
|
|
68
|
+
worktreePath: boulderState.worktree_path,
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
// AFTER:
|
|
72
|
+
await injectContinuation({
|
|
73
|
+
ctx,
|
|
74
|
+
sessionID,
|
|
75
|
+
sessionState,
|
|
76
|
+
options,
|
|
77
|
+
planName: boulderState.plan_name,
|
|
78
|
+
progress,
|
|
79
|
+
agent: boulderState.agent,
|
|
80
|
+
worktreePath: typeof boulderState.worktree_path === "string" ? boulderState.worktree_path : undefined,
|
|
81
|
+
})
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Rationale**: Belt-and-suspenders defense. Even though `readBoulderState` now sanitizes, direct `writeBoulderState` calls elsewhere could still produce invalid state. The `typeof` check is zero-cost and prevents any possibility of `null` or non-string values leaking through.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## File 3: `src/hooks/atlas/index.test.ts`
|
|
89
|
+
|
|
90
|
+
**Change**: Add test cases for missing `worktree_path` scenarios within the existing `session.idle handler` describe block.
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
test("should inject continuation when boulder.json has no worktree_path field", async () => {
|
|
94
|
+
// given - boulder state WITHOUT worktree_path
|
|
95
|
+
const planPath = join(TEST_DIR, "test-plan.md")
|
|
96
|
+
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
|
|
97
|
+
|
|
98
|
+
const state: BoulderState = {
|
|
99
|
+
active_plan: planPath,
|
|
100
|
+
started_at: "2026-01-02T10:00:00Z",
|
|
101
|
+
session_ids: [MAIN_SESSION_ID],
|
|
102
|
+
plan_name: "test-plan",
|
|
103
|
+
}
|
|
104
|
+
writeBoulderState(TEST_DIR, state)
|
|
105
|
+
|
|
106
|
+
const readState = readBoulderState(TEST_DIR)
|
|
107
|
+
expect(readState?.worktree_path).toBeUndefined()
|
|
108
|
+
|
|
109
|
+
const mockInput = createMockPluginInput()
|
|
110
|
+
const hook = createAtlasHook(mockInput)
|
|
111
|
+
|
|
112
|
+
// when
|
|
113
|
+
await hook.handler({
|
|
114
|
+
event: {
|
|
115
|
+
type: "session.idle",
|
|
116
|
+
properties: { sessionID: MAIN_SESSION_ID },
|
|
117
|
+
},
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
// then - continuation injected, no worktree context in prompt
|
|
121
|
+
expect(mockInput._promptMock).toHaveBeenCalled()
|
|
122
|
+
const callArgs = mockInput._promptMock.mock.calls[0][0]
|
|
123
|
+
expect(callArgs.body.parts[0].text).not.toContain("[Worktree:")
|
|
124
|
+
expect(callArgs.body.parts[0].text).toContain("1 remaining")
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
test("should handle boulder.json with worktree_path: null without crashing", async () => {
|
|
128
|
+
// given - manually write boulder.json with worktree_path: null (corrupted state)
|
|
129
|
+
const planPath = join(TEST_DIR, "test-plan.md")
|
|
130
|
+
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
|
|
131
|
+
|
|
132
|
+
const boulderPath = join(SISYPHUS_DIR, "boulder.json")
|
|
133
|
+
writeFileSync(boulderPath, JSON.stringify({
|
|
134
|
+
active_plan: planPath,
|
|
135
|
+
started_at: "2026-01-02T10:00:00Z",
|
|
136
|
+
session_ids: [MAIN_SESSION_ID],
|
|
137
|
+
plan_name: "test-plan",
|
|
138
|
+
worktree_path: null,
|
|
139
|
+
}, null, 2))
|
|
140
|
+
|
|
141
|
+
const mockInput = createMockPluginInput()
|
|
142
|
+
const hook = createAtlasHook(mockInput)
|
|
143
|
+
|
|
144
|
+
// when
|
|
145
|
+
await hook.handler({
|
|
146
|
+
event: {
|
|
147
|
+
type: "session.idle",
|
|
148
|
+
properties: { sessionID: MAIN_SESSION_ID },
|
|
149
|
+
},
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
// then - should inject continuation without crash, no "[Worktree: null]"
|
|
153
|
+
expect(mockInput._promptMock).toHaveBeenCalled()
|
|
154
|
+
const callArgs = mockInput._promptMock.mock.calls[0][0]
|
|
155
|
+
expect(callArgs.body.parts[0].text).not.toContain("[Worktree: null]")
|
|
156
|
+
expect(callArgs.body.parts[0].text).not.toContain("[Worktree: undefined]")
|
|
157
|
+
})
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## File 4: `src/features/boulder-state/storage.test.ts` (addition to existing)
|
|
163
|
+
|
|
164
|
+
**Change**: Add `readBoulderState` sanitization test.
|
|
165
|
+
|
|
166
|
+
```typescript
|
|
167
|
+
describe("#given boulder.json with worktree_path: null", () => {
|
|
168
|
+
test("#then readBoulderState should sanitize null to undefined", () => {
|
|
169
|
+
// given
|
|
170
|
+
const boulderPath = join(TEST_DIR, ".sisyphus", "boulder.json")
|
|
171
|
+
writeFileSync(boulderPath, JSON.stringify({
|
|
172
|
+
active_plan: "/path/to/plan.md",
|
|
173
|
+
started_at: "2026-01-02T10:00:00Z",
|
|
174
|
+
session_ids: ["session-1"],
|
|
175
|
+
plan_name: "test-plan",
|
|
176
|
+
worktree_path: null,
|
|
177
|
+
}, null, 2))
|
|
178
|
+
|
|
179
|
+
// when
|
|
180
|
+
const state = readBoulderState(TEST_DIR)
|
|
181
|
+
|
|
182
|
+
// then
|
|
183
|
+
expect(state).not.toBeNull()
|
|
184
|
+
expect(state!.worktree_path).toBeUndefined()
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
test("#then readBoulderState should preserve valid worktree_path string", () => {
|
|
188
|
+
// given
|
|
189
|
+
const boulderPath = join(TEST_DIR, ".sisyphus", "boulder.json")
|
|
190
|
+
writeFileSync(boulderPath, JSON.stringify({
|
|
191
|
+
active_plan: "/path/to/plan.md",
|
|
192
|
+
started_at: "2026-01-02T10:00:00Z",
|
|
193
|
+
session_ids: ["session-1"],
|
|
194
|
+
plan_name: "test-plan",
|
|
195
|
+
worktree_path: "/valid/worktree/path",
|
|
196
|
+
}, null, 2))
|
|
197
|
+
|
|
198
|
+
// when
|
|
199
|
+
const state = readBoulderState(TEST_DIR)
|
|
200
|
+
|
|
201
|
+
// then
|
|
202
|
+
expect(state?.worktree_path).toBe("/valid/worktree/path")
|
|
203
|
+
})
|
|
204
|
+
})
|
|
205
|
+
```
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Execution Plan — Fix atlas hook crash on missing worktree_path
|
|
2
|
+
|
|
3
|
+
## Phase 0: Setup
|
|
4
|
+
|
|
5
|
+
1. **Create worktree from origin/dev**:
|
|
6
|
+
```bash
|
|
7
|
+
git fetch origin dev
|
|
8
|
+
git worktree add ../omo-wt/fix-atlas-worktree-path-crash origin/dev
|
|
9
|
+
```
|
|
10
|
+
2. **Create feature branch**:
|
|
11
|
+
```bash
|
|
12
|
+
cd ../omo-wt/fix-atlas-worktree-path-crash
|
|
13
|
+
git checkout -b fix/atlas-worktree-path-crash
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Phase 1: Implement
|
|
17
|
+
|
|
18
|
+
### Step 1: Fix `readBoulderState()` in `src/features/boulder-state/storage.ts`
|
|
19
|
+
- Add `worktree_path` sanitization after JSON parse
|
|
20
|
+
- Ensure `worktree_path` is `string | undefined`, never `null` or other types
|
|
21
|
+
- This is the root cause: raw `JSON.parse` + `as BoulderState` cast allows type violations at runtime
|
|
22
|
+
|
|
23
|
+
### Step 2: Add defensive guard in `src/hooks/atlas/idle-event.ts`
|
|
24
|
+
- Before passing `boulderState.worktree_path` to `injectContinuation`, validate it's a string
|
|
25
|
+
- Apply same guard in the `scheduleRetry` callback (line 86)
|
|
26
|
+
- Ensures even if `readBoulderState` is bypassed, the idle handler won't crash
|
|
27
|
+
|
|
28
|
+
### Step 3: Add test coverage in `src/hooks/atlas/index.test.ts`
|
|
29
|
+
- Add test: boulder.json without `worktree_path` field → session.idle works
|
|
30
|
+
- Add test: boulder.json with `worktree_path: null` → session.idle works (no `[Worktree: null]` in prompt)
|
|
31
|
+
- Add test: `readBoulderState` sanitizes `null` worktree_path to `undefined`
|
|
32
|
+
- Follow existing given/when/then test pattern
|
|
33
|
+
|
|
34
|
+
### Step 4: Local validation
|
|
35
|
+
```bash
|
|
36
|
+
bun run typecheck
|
|
37
|
+
bun test src/hooks/atlas/
|
|
38
|
+
bun test src/features/boulder-state/
|
|
39
|
+
bun run build
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### Step 5: Atomic commit
|
|
43
|
+
```bash
|
|
44
|
+
git add src/features/boulder-state/storage.ts src/hooks/atlas/idle-event.ts src/hooks/atlas/index.test.ts
|
|
45
|
+
git commit -m "fix(atlas): prevent crash when boulder.json missing worktree_path field
|
|
46
|
+
|
|
47
|
+
readBoulderState() performs unsafe cast of parsed JSON as BoulderState.
|
|
48
|
+
When worktree_path is absent or null in boulder.json, downstream code
|
|
49
|
+
in idle-event.ts could receive null where string|undefined is expected.
|
|
50
|
+
|
|
51
|
+
- Sanitize worktree_path in readBoulderState (reject non-string values)
|
|
52
|
+
- Add defensive typeof check in idle-event before passing to continuation
|
|
53
|
+
- Add test coverage for missing and null worktree_path scenarios"
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Phase 2: PR Creation
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
git push -u origin fix/atlas-worktree-path-crash
|
|
60
|
+
gh pr create \
|
|
61
|
+
--base dev \
|
|
62
|
+
--title "fix(atlas): prevent crash when boulder.json missing worktree_path" \
|
|
63
|
+
--body-file /tmp/pull-request-atlas-worktree-fix.md
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Phase 3: Verify Loop
|
|
67
|
+
|
|
68
|
+
- **Gate A (CI)**: `gh pr checks --watch` — wait for all checks green
|
|
69
|
+
- **Gate B (review-work)**: Run 5-agent review (Oracle goal, Oracle quality, Oracle security, QA execution, context mining)
|
|
70
|
+
- **Gate C (Cubic)**: Wait for cubic-dev-ai[bot] to respond "No issues found"
|
|
71
|
+
- On any failure: fix-commit-push, re-enter verify loop
|
|
72
|
+
|
|
73
|
+
## Phase 4: Merge
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
gh pr merge --squash --delete-branch
|
|
77
|
+
git worktree remove ../omo-wt/fix-atlas-worktree-path-crash
|
|
78
|
+
```
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# PR Title
|
|
2
|
+
|
|
3
|
+
```
|
|
4
|
+
fix(atlas): prevent crash when boulder.json missing worktree_path
|
|
5
|
+
```
|
|
6
|
+
|
|
7
|
+
# PR Body
|
|
8
|
+
|
|
9
|
+
## Summary
|
|
10
|
+
|
|
11
|
+
- Fix runtime type violation in atlas hook when `boulder.json` lacks `worktree_path` field
|
|
12
|
+
- Add `worktree_path` sanitization in `readBoulderState()` to reject non-string values (e.g., `null` from manual edits)
|
|
13
|
+
- Add defensive `typeof` guards in `idle-event.ts` before passing worktree path to continuation injection
|
|
14
|
+
- Add test coverage for missing and null `worktree_path` scenarios
|
|
15
|
+
|
|
16
|
+
## Problem
|
|
17
|
+
|
|
18
|
+
`readBoulderState()` in `src/features/boulder-state/storage.ts` casts raw `JSON.parse()` output directly as `BoulderState` via `return parsed as BoulderState`. This bypasses TypeScript's type system entirely at runtime.
|
|
19
|
+
|
|
20
|
+
When `boulder.json` is missing the `worktree_path` field (common for boulders created before worktree support was added, or created without `--worktree` flag), `boulderState.worktree_path` is `undefined` which is handled correctly. However, when boulder.json has `"worktree_path": null` (possible from manual edits, external tooling, or corrupted state), the runtime type becomes `null` which violates the TypeScript type `string | undefined`.
|
|
21
|
+
|
|
22
|
+
This `null` value propagates through:
|
|
23
|
+
1. `idle-event.ts:handleAtlasSessionIdle()` → `injectContinuation()` → `injectBoulderContinuation()`
|
|
24
|
+
2. `idle-event.ts:scheduleRetry()` callback → same chain
|
|
25
|
+
|
|
26
|
+
While the `boulder-continuation-injector.ts` handles falsy values via `worktreePath ? ... : ""`, the type mismatch can cause subtle downstream issues and violates the contract of the `BoulderState` interface.
|
|
27
|
+
|
|
28
|
+
## Changes
|
|
29
|
+
|
|
30
|
+
| File | Change |
|
|
31
|
+
|------|--------|
|
|
32
|
+
| `src/features/boulder-state/storage.ts` | Sanitize `worktree_path` in `readBoulderState()` — reject non-string values |
|
|
33
|
+
| `src/hooks/atlas/idle-event.ts` | Add `typeof` guards before passing worktree_path to continuation (2 call sites) |
|
|
34
|
+
| `src/hooks/atlas/index.test.ts` | Add 2 tests: missing worktree_path + null worktree_path in session.idle |
|
|
35
|
+
| `src/features/boulder-state/storage.test.ts` | Add 2 tests: sanitization of null + preservation of valid string |
|
|
36
|
+
|
|
37
|
+
## Testing
|
|
38
|
+
|
|
39
|
+
- `bun test src/hooks/atlas/` — all existing + new tests pass
|
|
40
|
+
- `bun test src/features/boulder-state/` — all existing + new tests pass
|
|
41
|
+
- `bun run typecheck` — clean
|
|
42
|
+
- `bun run build` — clean
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Verification Strategy
|
|
2
|
+
|
|
3
|
+
## Gate A: CI (`gh pr checks --watch`)
|
|
4
|
+
|
|
5
|
+
### What CI runs (from `ci.yml`)
|
|
6
|
+
1. **Tests (split)**: Mock-heavy tests in isolation + batch tests
|
|
7
|
+
2. **Typecheck**: `bun run typecheck` (tsc --noEmit)
|
|
8
|
+
3. **Build**: `bun run build` (ESM + declarations + schema)
|
|
9
|
+
|
|
10
|
+
### Pre-push local validation
|
|
11
|
+
Before pushing, run the exact CI steps locally to catch failures early:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Targeted test runs first (fast feedback)
|
|
15
|
+
bun test src/features/boulder-state/storage.test.ts
|
|
16
|
+
bun test src/hooks/atlas/index.test.ts
|
|
17
|
+
|
|
18
|
+
# Full test suite
|
|
19
|
+
bun test
|
|
20
|
+
|
|
21
|
+
# Type check
|
|
22
|
+
bun run typecheck
|
|
23
|
+
|
|
24
|
+
# Build
|
|
25
|
+
bun run build
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Failure handling
|
|
29
|
+
- **Test failure**: Read test output, fix code, create new commit (never amend pushed commits), push
|
|
30
|
+
- **Typecheck failure**: Run `lsp_diagnostics` on changed files, fix type errors, commit, push
|
|
31
|
+
- **Build failure**: Check build output for missing exports or circular deps, fix, commit, push
|
|
32
|
+
|
|
33
|
+
After each fix-commit-push: `gh pr checks --watch` to re-enter gate
|
|
34
|
+
|
|
35
|
+
## Gate B: review-work (5-agent review)
|
|
36
|
+
|
|
37
|
+
### The 5 parallel agents
|
|
38
|
+
1. **Oracle (goal/constraint verification)**: Checks the fix matches the stated problem — `worktree_path` crash resolved, no scope creep
|
|
39
|
+
2. **Oracle (code quality)**: Validates code follows existing patterns — factory pattern, given/when/then tests, < 200 LOC, no catch-all files
|
|
40
|
+
3. **Oracle (security)**: Ensures no new security issues — JSON parse injection, path traversal in worktree_path
|
|
41
|
+
4. **QA agent (hands-on execution)**: Actually runs the tests, checks `lsp_diagnostics` on changed files, verifies the fix in action
|
|
42
|
+
5. **Context mining agent**: Checks GitHub issues, git history, related PRs for context alignment
|
|
43
|
+
|
|
44
|
+
### Expected focus areas for this PR
|
|
45
|
+
- Oracle (goal): Does the sanitization in `readBoulderState` actually prevent the crash? Is the `typeof` guard necessary or redundant?
|
|
46
|
+
- Oracle (quality): Are the new tests following the given/when/then pattern? Do they use the same mock setup as existing tests?
|
|
47
|
+
- Oracle (security): Is the `worktree_path` value ever used in path operations without sanitization? (Answer: no, it's only used in template strings)
|
|
48
|
+
- QA: Run `bun test src/hooks/atlas/index.test.ts` — does the null worktree_path test actually trigger the bug before fix?
|
|
49
|
+
|
|
50
|
+
### Failure handling
|
|
51
|
+
- Each oracle produces a PASS/FAIL verdict with specific issues
|
|
52
|
+
- On FAIL: read the specific issue, fix in the worktree, commit, push, re-run review-work
|
|
53
|
+
- All 5 agents must PASS
|
|
54
|
+
|
|
55
|
+
## Gate C: Cubic (`cubic-dev-ai[bot]`)
|
|
56
|
+
|
|
57
|
+
### What Cubic checks
|
|
58
|
+
- Automated code review bot that analyzes the PR diff
|
|
59
|
+
- Looks for: type safety issues, missing error handling, test coverage gaps, anti-patterns
|
|
60
|
+
|
|
61
|
+
### Expected result
|
|
62
|
+
- "No issues found" for this small, focused fix
|
|
63
|
+
- 3 files changed (storage.ts, idle-event.ts, index.test.ts) + 1 test file
|
|
64
|
+
|
|
65
|
+
### Failure handling
|
|
66
|
+
- If Cubic flags an issue: evaluate if it's a real concern or false positive
|
|
67
|
+
- Real concern: fix, commit, push
|
|
68
|
+
- False positive: comment explaining why the flagged pattern is intentional
|
|
69
|
+
- Wait for Cubic to re-review after push
|
|
70
|
+
|
|
71
|
+
## Post-verification: Merge
|
|
72
|
+
|
|
73
|
+
Once all 3 gates pass:
|
|
74
|
+
```bash
|
|
75
|
+
gh pr merge --squash --delete-branch
|
|
76
|
+
git worktree remove ../omo-wt/fix-atlas-worktree-path-crash
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
On merge failure (conflicts):
|
|
80
|
+
```bash
|
|
81
|
+
cd ../omo-wt/fix-atlas-worktree-path-crash
|
|
82
|
+
git fetch origin dev
|
|
83
|
+
git rebase origin/dev
|
|
84
|
+
# Resolve conflicts if any
|
|
85
|
+
git push --force-with-lease
|
|
86
|
+
# Re-enter verify loop from Gate A
|
|
87
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"total_tokens": null, "duration_ms": 506000, "total_duration_seconds": 506}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"run_id": "eval-2-without_skill",
|
|
3
|
+
"expectations": [
|
|
4
|
+
{"text": "Plan uses git worktree in a sibling directory", "passed": false, "evidence": "No worktree. Steps go directly to creating branch and modifying files."},
|
|
5
|
+
{"text": "Fix is minimal — adds null check, doesn't refactor unrelated code", "passed": true, "evidence": "Focused fix though also adds try/catch in setTimeout (reasonable secondary fix)"},
|
|
6
|
+
{"text": "Test case added for the missing worktree_path scenario", "passed": true, "evidence": "Detailed test plan for missing/null/malformed boulder.json"},
|
|
7
|
+
{"text": "Verification loop includes all 3 gates", "passed": false, "evidence": "Only mentions CI pipeline (step 5). No review-work or Cubic."},
|
|
8
|
+
{"text": "References actual atlas hook files", "passed": true, "evidence": "References idle-event.ts, storage.ts with line numbers"},
|
|
9
|
+
{"text": "Branch name follows fix/ prefix convention", "passed": true, "evidence": "fix/atlas-hook-missing-worktree-path"}
|
|
10
|
+
]
|
|
11
|
+
}
|