oh-my-opencode 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/command/get-unpublished-changes.md +148 -0
- package/.agents/command/omomomo.md +37 -0
- package/.agents/command/publish.md +376 -0
- package/.agents/command/remove-deadcode.md +221 -0
- package/.agents/command/security-research.md +16 -0
- package/.agents/skills/get-unpublished-changes/SKILL.md +24 -0
- package/.agents/skills/github-triage/SKILL.md +587 -0
- package/.agents/skills/github-triage/scripts/gh_fetch.py +398 -0
- package/.agents/skills/hyperplan/SKILL.md +450 -0
- package/.agents/skills/omomomo/SKILL.md +36 -0
- package/.agents/skills/pre-publish-review/SKILL.md +407 -0
- package/.agents/skills/publish/SKILL.md +428 -0
- package/.agents/skills/remove-deadcode/SKILL.md +216 -0
- package/.agents/skills/security-research/SKILL.md +204 -0
- package/.agents/skills/work-with-pr/SKILL.md +360 -0
- package/.agents/skills/work-with-pr-workspace/evals/evals.json +76 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/benchmark.json +138 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/benchmark.md +42 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/eval_metadata.json +57 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/grading.json +15 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/code-changes.md +454 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/execution-plan.md +136 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/pr-description.md +47 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/verification-strategy.md +163 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/grading.json +15 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/code-changes.md +615 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/execution-plan.md +99 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/pr-description.md +50 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/verification-strategy.md +111 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/eval_metadata.json +37 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/grading.json +11 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md +205 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/execution-plan.md +78 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/pr-description.md +42 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/verification-strategy.md +87 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/grading.json +11 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/code-changes.md +334 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/execution-plan.md +86 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/pr-description.md +23 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/verification-strategy.md +119 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/eval_metadata.json +32 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/code-changes.md +221 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/execution-plan.md +104 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/pr-description.md +41 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/verification-strategy.md +84 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/code-changes.md +342 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/execution-plan.md +131 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/pr-description.md +39 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/verification-strategy.md +128 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/eval_metadata.json +32 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/code-changes.md +143 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/execution-plan.md +82 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/pr-description.md +51 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/verification-strategy.md +69 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/code-changes.md +252 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/execution-plan.md +83 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/pr-description.md +33 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/verification-strategy.md +101 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/eval_metadata.json +32 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/code-changes.md +387 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/execution-plan.md +112 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/pr-description.md +51 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/verification-strategy.md +75 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/grading.json +10 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/code-changes.md +529 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/execution-plan.md +127 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/pr-description.md +42 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/verification-strategy.md +120 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/timing.json +1 -0
- package/.agents/skills/work-with-pr-workspace/iteration-1/review.html +1326 -0
- package/.opencode/command/get-unpublished-changes.md +148 -0
- package/.opencode/command/omomomo.md +37 -0
- package/.opencode/command/publish.md +376 -0
- package/.opencode/command/remove-deadcode.md +221 -0
- package/.opencode/command/security-research.md +16 -0
- package/.opencode/skills/github-triage/SKILL.md +587 -0
- package/.opencode/skills/github-triage/scripts/gh_fetch.py +398 -0
- package/.opencode/skills/hyperplan/SKILL.md +450 -0
- package/.opencode/skills/pre-publish-review/SKILL.md +407 -0
- package/.opencode/skills/work-with-pr/SKILL.md +360 -0
- package/.opencode/skills/work-with-pr-workspace/evals/evals.json +76 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/benchmark.json +138 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/benchmark.md +42 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/eval_metadata.json +57 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/grading.json +15 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/code-changes.md +454 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/execution-plan.md +136 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/pr-description.md +47 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/outputs/verification-strategy.md +163 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/grading.json +15 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/code-changes.md +615 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/execution-plan.md +99 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/pr-description.md +50 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/outputs/verification-strategy.md +111 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-1/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/eval_metadata.json +37 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/grading.json +11 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/code-changes.md +205 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/execution-plan.md +78 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/pr-description.md +42 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/outputs/verification-strategy.md +87 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/grading.json +11 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/code-changes.md +334 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/execution-plan.md +86 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/pr-description.md +23 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/outputs/verification-strategy.md +119 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-2/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/eval_metadata.json +32 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/code-changes.md +221 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/execution-plan.md +104 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/pr-description.md +41 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/outputs/verification-strategy.md +84 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/code-changes.md +342 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/execution-plan.md +131 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/pr-description.md +39 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/outputs/verification-strategy.md +128 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-3/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/eval_metadata.json +32 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/code-changes.md +143 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/execution-plan.md +82 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/pr-description.md +51 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/outputs/verification-strategy.md +69 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/code-changes.md +252 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/execution-plan.md +83 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/pr-description.md +33 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/outputs/verification-strategy.md +101 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-4/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/eval_metadata.json +32 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/code-changes.md +387 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/execution-plan.md +112 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/pr-description.md +51 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/outputs/verification-strategy.md +75 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/with_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/grading.json +10 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/code-changes.md +529 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/execution-plan.md +127 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/pr-description.md +42 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/outputs/verification-strategy.md +120 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/eval-5/without_skill/timing.json +1 -0
- package/.opencode/skills/work-with-pr-workspace/iteration-1/review.html +1326 -0
- package/README.ja.md +1 -1
- package/README.ko.md +1 -1
- package/README.md +1 -1
- package/README.ru.md +1 -1
- package/README.zh-cn.md +1 -1
- package/dist/agents/atlas/agent.d.ts +6 -6
- package/dist/agents/prometheus/gemini.d.ts +0 -11
- package/dist/agents/prometheus/gpt.d.ts +0 -10
- package/dist/agents/prometheus/system-prompt.d.ts +2 -20
- package/dist/agents/types.d.ts +1 -16
- package/dist/cli/index.js +178 -129
- package/dist/config/schema/agent-names.d.ts +3 -3
- package/dist/config/schema/agent-overrides.d.ts +208 -208
- package/dist/config/schema/categories.d.ts +28 -28
- package/dist/config/schema/fallback-models.d.ts +20 -20
- package/dist/config/schema/oh-my-opencode-config.d.ts +208 -208
- package/dist/features/background-agent/parent-wake-notifier.d.ts +8 -1
- package/dist/help/schema/acp.d.ts +95 -0
- package/dist/help/schema/doctor.d.ts +147 -0
- package/dist/help/schema/sandbox.d.ts +74 -0
- package/dist/help/schema/status.d.ts +139 -0
- package/dist/hooks/keyword-detector/analyze/default.d.ts +1 -1
- package/dist/hooks/keyword-detector/hyperplan/default.d.ts +1 -1
- package/dist/hooks/keyword-detector/search/default.d.ts +1 -1
- package/dist/hooks/keyword-detector/team/default.d.ts +2 -7
- package/dist/hooks/keyword-detector/ultrawork/default.d.ts +1 -9
- package/dist/hooks/keyword-detector/ultrawork/gemini.d.ts +1 -16
- package/dist/hooks/keyword-detector/ultrawork/gpt.d.ts +1 -10
- package/dist/hooks/keyword-detector/ultrawork/planner.d.ts +1 -5
- package/dist/hooks/ralph-loop/no-progress-turn-detector.d.ts +7 -0
- package/dist/hooks/ralph-loop/pending-verification-handler.d.ts +1 -0
- package/dist/hooks/ralph-loop/types.d.ts +1 -0
- package/dist/hooks/runtime-fallback/error-classifier.d.ts +1 -0
- package/dist/index.js +52205 -50528
- package/dist/shared/prompt-async-gate/pending-tool-turn.d.ts +1 -0
- package/dist/shared/prompt-async-gate/types.d.ts +4 -3
- package/package.json +19 -13
- package/dist/agents/atlas/default-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/default.d.ts +0 -2
- package/dist/agents/atlas/gemini-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/gemini.d.ts +0 -2
- package/dist/agents/atlas/gpt-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/gpt.d.ts +0 -2
- package/dist/agents/atlas/kimi-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/kimi.d.ts +0 -2
- package/dist/agents/atlas/opus-4-7-prompt-sections.d.ts +0 -6
- package/dist/agents/atlas/opus-4-7.d.ts +0 -2
- package/dist/agents/atlas/shared-prompt.d.ts +0 -9
- package/dist/agents/prometheus/behavioral-summary.d.ts +0 -6
- package/dist/agents/prometheus/high-accuracy-mode.d.ts +0 -6
- package/dist/agents/prometheus/identity-constraints.d.ts +0 -7
- package/dist/agents/prometheus/interview-mode.d.ts +0 -7
- package/dist/agents/prometheus/plan-generation.d.ts +0 -7
- package/dist/agents/prometheus/plan-template.d.ts +0 -7
- package/dist/agents/prometheus/spec-driven-mode.d.ts +0 -7
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Verification Strategy
|
|
2
|
+
|
|
3
|
+
## Pre-Push Local Validation
|
|
4
|
+
|
|
5
|
+
Before every push, run all three checks sequentially:
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
bun run typecheck && bun test && bun run build
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Specific test files to watch:
|
|
12
|
+
```bash
|
|
13
|
+
bun test src/config/schema/background-task.test.ts
|
|
14
|
+
bun test src/features/background-agent/concurrency.test.ts
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Gate A: CI (`ci.yml`)
|
|
20
|
+
|
|
21
|
+
### What CI runs
|
|
22
|
+
1. **Tests (split):** mock-heavy tests run in isolation (separate `bun test` processes), rest in batch
|
|
23
|
+
2. **Typecheck:** `bun run typecheck` (tsc --noEmit)
|
|
24
|
+
3. **Build:** `bun run build` (ESM + declarations + schema)
|
|
25
|
+
4. **Schema auto-commit:** if generated schema changed, CI commits it
|
|
26
|
+
|
|
27
|
+
### How to monitor
|
|
28
|
+
```bash
|
|
29
|
+
gh pr checks <PR_NUMBER> --watch
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Common failure scenarios and fixes
|
|
33
|
+
|
|
34
|
+
| Failure | Likely Cause | Fix |
|
|
35
|
+
|---------|-------------|-----|
|
|
36
|
+
| Typecheck error | New field not matching existing type imports | Verify `BackgroundTaskConfig` type is auto-inferred from schema, no manual type updates needed |
|
|
37
|
+
| Test failure | Test assertion wrong or missing import | Fix test, re-push |
|
|
38
|
+
| Build failure | Import cycle or missing export | Check barrel exports in `src/config/schema.ts` (already re-exports via `export *`) |
|
|
39
|
+
| Schema auto-commit | Generated JSON schema changed | Pull the auto-commit, rebase if needed |
|
|
40
|
+
|
|
41
|
+
### Recovery
|
|
42
|
+
```bash
|
|
43
|
+
# Read CI logs
|
|
44
|
+
gh run view <RUN_ID> --log-failed
|
|
45
|
+
|
|
46
|
+
# Fix, commit, push
|
|
47
|
+
git add -A && git commit -m "fix: address CI failure" && git push
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Gate B: review-work (5 parallel agents)
|
|
53
|
+
|
|
54
|
+
### What it checks
|
|
55
|
+
Run `/review-work` which launches 5 background sub-agents:
|
|
56
|
+
|
|
57
|
+
| Agent | Role | What it checks for this PR |
|
|
58
|
+
|-------|------|---------------------------|
|
|
59
|
+
| Oracle (goal) | Goal/constraint verification | Does `maxBackgroundAgents` actually limit agents? Is default 5? Is min 1? |
|
|
60
|
+
| Oracle (quality) | Code quality | Follows existing patterns? No catch-all files? Under 200 LOC? given/when/then tests? |
|
|
61
|
+
| Oracle (security) | Security review | No injection vectors, no unsafe defaults, proper input validation via Zod |
|
|
62
|
+
| Hephaestus (QA) | Hands-on QA execution | Actually runs tests, checks typecheck, verifies build |
|
|
63
|
+
| Hephaestus (context) | Context mining | Checks git history, related issues, ensures no duplicate/conflicting PRs |
|
|
64
|
+
|
|
65
|
+
### Pass criteria
|
|
66
|
+
All 5 agents must pass. Any single failure blocks.
|
|
67
|
+
|
|
68
|
+
### Common failure scenarios and fixes
|
|
69
|
+
|
|
70
|
+
| Agent | Likely Issue | Fix |
|
|
71
|
+
|-------|-------------|-----|
|
|
72
|
+
| Oracle (goal) | Global limit not enforced in all exit paths (completion, cancel, error, interrupt) | Audit every status transition in `manager.ts` that should call `releaseGlobal()` |
|
|
73
|
+
| Oracle (quality) | Test style not matching given/when/then | Restructure tests with `#given`/`#when`/`#then` describe nesting |
|
|
74
|
+
| Oracle (quality) | File exceeds 200 LOC | `concurrency.ts` is 137 LOC + ~25 new = ~162 LOC, safe. `manager.ts` is already large but we're adding ~20 lines to existing methods, not creating new responsibility |
|
|
75
|
+
| Oracle (security) | Integer overflow or negative values | Zod `.int().min(1)` handles this at config parse time |
|
|
76
|
+
| Hephaestus (QA) | Test actually fails when run | Run tests locally first, fix before push |
|
|
77
|
+
|
|
78
|
+
### Recovery
|
|
79
|
+
```bash
|
|
80
|
+
# Review agent output
|
|
81
|
+
background_output(task_id="<review-work-task-id>")
|
|
82
|
+
|
|
83
|
+
# Fix identified issues
|
|
84
|
+
# ... edit files ...
|
|
85
|
+
git add -A && git commit -m "fix: address review-work feedback" && git push
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Gate C: Cubic (`cubic-dev-ai[bot]`)
|
|
91
|
+
|
|
92
|
+
### What it checks
|
|
93
|
+
Cubic is an automated code review bot that analyzes the PR diff. It must respond with "No issues found" for the gate to pass.
|
|
94
|
+
|
|
95
|
+
### Common failure scenarios and fixes
|
|
96
|
+
|
|
97
|
+
| Issue | Likely Cause | Fix |
|
|
98
|
+
|-------|-------------|-----|
|
|
99
|
+
| "Missing error handling" | `releaseGlobal()` not called in some error path | Add `releaseGlobal()` to the missed path |
|
|
100
|
+
| "Inconsistent naming" | Field name doesn't match convention | Use `maxBackgroundAgents` (camelCase in schema, `max_background_agents` in JSONC config) |
|
|
101
|
+
| "Missing documentation" | No JSDoc on new public methods | Add JSDoc comments to `canSpawnGlobally()`, `acquireGlobal()`, `releaseGlobal()`, `getMaxBackgroundAgents()` |
|
|
102
|
+
| "Test coverage gap" | Missing edge case test | Add the specific test case Cubic identifies |
|
|
103
|
+
|
|
104
|
+
### Recovery
|
|
105
|
+
```bash
|
|
106
|
+
# Read Cubic's review
|
|
107
|
+
gh api repos/code-yeongyu/oh-my-openagent/pulls/<PR_NUMBER>/reviews
|
|
108
|
+
|
|
109
|
+
# Address each comment
|
|
110
|
+
# ... edit files ...
|
|
111
|
+
git add -A && git commit -m "fix: address Cubic review feedback" && git push
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Verification Loop Pseudocode
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
iteration = 0
|
|
120
|
+
while true:
|
|
121
|
+
iteration++
|
|
122
|
+
log("Verification iteration ${iteration}")
|
|
123
|
+
|
|
124
|
+
# Gate A: CI (cheapest, check first)
|
|
125
|
+
push_and_wait_for_ci()
|
|
126
|
+
if ci_failed:
|
|
127
|
+
read_ci_logs()
|
|
128
|
+
fix_and_commit()
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
# Gate B: review-work (5 agents, more expensive)
|
|
132
|
+
run_review_work()
|
|
133
|
+
if any_agent_failed:
|
|
134
|
+
read_agent_feedback()
|
|
135
|
+
fix_and_commit()
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
# Gate C: Cubic (external bot, wait for it)
|
|
139
|
+
wait_for_cubic_review()
|
|
140
|
+
if cubic_has_issues:
|
|
141
|
+
read_cubic_comments()
|
|
142
|
+
fix_and_commit()
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
# All gates passed
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
# Merge
|
|
149
|
+
gh pr merge <PR_NUMBER> --squash --delete-branch
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
No iteration cap. Loop continues until all three gates pass simultaneously in a single iteration.
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Risk Assessment
|
|
157
|
+
|
|
158
|
+
| Risk | Probability | Mitigation |
|
|
159
|
+
|------|------------|------------|
|
|
160
|
+
| Slot leak (global count never decremented) | Medium | Audit every exit path: `tryCompleteTask`, `cancelTask`, `handleEvent(session.error)`, `startTask` prompt error, `resume` prompt error |
|
|
161
|
+
| Race condition on global count | Low | `globalRunningCount` is synchronous (single-threaded JS), no async gap between check and increment in `launch()` |
|
|
162
|
+
| Breaking existing behavior | Low | Default is 5, same as existing per-model default. Users with <5 total agents see no change |
|
|
163
|
+
| `manager.ts` exceeding 200 LOC | Already exceeded | File is already ~1500 LOC (exempt due to being a core orchestration class with many methods). Our changes add ~20 lines to existing methods, not a new responsibility |
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"total_tokens": null, "duration_ms": 292000, "total_duration_seconds": 292}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"run_id": "eval-1-without_skill",
|
|
3
|
+
"expectations": [
|
|
4
|
+
{"text": "Plan uses git worktree in a sibling directory", "passed": false, "evidence": "Uses git checkout -b, no worktree isolation"},
|
|
5
|
+
{"text": "Branch is created from origin/dev", "passed": true, "evidence": "git checkout -b feat/max-background-agents dev"},
|
|
6
|
+
{"text": "Plan specifies multiple atomic commits for multi-file changes", "passed": false, "evidence": "Steps listed sequentially but no atomic commit strategy mentioned"},
|
|
7
|
+
{"text": "Runs bun run typecheck, bun test, and bun run build before pushing", "passed": true, "evidence": "Step 6 runs typecheck and tests, Step 8 implies push after verification"},
|
|
8
|
+
{"text": "PR is created targeting dev branch", "passed": true, "evidence": "Step 8 mentions creating PR"},
|
|
9
|
+
{"text": "Verification loop includes all 3 gates: CI, review-work, and Cubic", "passed": false, "evidence": "Only mentions CI pipeline in step 6. No review-work or Cubic."},
|
|
10
|
+
{"text": "Gates are checked in order: CI first, then review-work, then Cubic", "passed": false, "evidence": "No gate ordering - only CI mentioned"},
|
|
11
|
+
{"text": "Cubic check uses gh api to check cubic-dev-ai[bot] reviews", "passed": false, "evidence": "No mention of Cubic at all"},
|
|
12
|
+
{"text": "Plan includes worktree cleanup after merge", "passed": false, "evidence": "No worktree used, no cleanup needed"},
|
|
13
|
+
{"text": "Code changes reference actual files in the codebase", "passed": true, "evidence": "References actual files with detailed design decisions"}
|
|
14
|
+
]
|
|
15
|
+
}
|