create-merlin-brain 4.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/bin/install.cjs +113 -14
- package/files/CLAUDE.md +43 -3
- package/files/agents/code-review.md +190 -0
- package/files/agents/codex-code-review.md +32 -0
- package/files/agents/codex-escalator.md +64 -0
- package/files/agents/codex-implementer.md +59 -0
- package/files/agents/codex-planner.md +67 -0
- package/files/agents/merlin.md +3 -2
- package/files/agents/reviewer-decider.md +124 -0
- package/files/commands/merlin/challenge.md +2 -0
- package/files/hooks/config-change.sh +3 -2
- package/files/hooks/notify-desktop.sh +1 -1
- package/files/hooks/notify-webhook.sh +2 -1
- package/files/hooks/orchestrator-guard.sh +3 -2
- package/files/hooks/pre-edit-sights-check.sh +3 -2
- package/files/hooks/task-completed-verify.sh +2 -2
- package/files/hooks/user-prompt-router.sh +2 -1
- package/files/hooks/worktree-create.sh +1 -1
- package/files/hooks/worktree-remove.sh +1 -1
- package/files/merlin/skills/duo/SKILL.md +48 -0
- package/files/merlin/skills/duo/off.md +32 -0
- package/files/merlin/skills/duo/offer.md +158 -0
- package/files/merlin/skills/duo/on.md +50 -0
- package/files/merlin/skills/duo/status.md +95 -0
- package/files/merlin/skills/duo/unsuppress.md +122 -0
- package/files/merlin-state/codex-mode.json +1 -0
- package/files/merlin-state/duo-mode.json +5 -0
- package/files/merlin-state/duo-suppress.json +5 -0
- package/files/merlin-system-prompt.txt +1 -1
- package/files/rules/codex-routing.md +117 -0
- package/files/rules/duo-routing.md +203 -0
- package/files/rules/merlin-routing.md +32 -0
- package/files/scripts/codex-as.sh +74 -0
- package/files/scripts/codex-installed.sh +2 -0
- package/files/scripts/duo-badge.sh +39 -0
- package/files/scripts/duo-codex-call.sh +83 -0
- package/files/scripts/duo-installed.sh +8 -0
- package/files/scripts/duo-mode-read.sh +51 -0
- package/files/scripts/duo-mode-write.sh +66 -0
- package/files/scripts/duo-pre-route.sh +124 -0
- package/files/scripts/duo-risk-detect.sh +157 -0
- package/package.json +1 -1
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# duo/status — report current duo state
|
|
2
|
+
|
|
3
|
+
## Steps
|
|
4
|
+
|
|
5
|
+
**Step 1 — Parse duo-mode.json with 24h expiry logic.**
|
|
6
|
+
```bash
|
|
7
|
+
python3 - <<'PYEOF'
|
|
8
|
+
import json, os, sys
|
|
9
|
+
from datetime import datetime, timezone, timedelta
|
|
10
|
+
|
|
11
|
+
state_path = os.path.expanduser("~/.claude/merlin-state/duo-mode.json")
|
|
12
|
+
try:
|
|
13
|
+
data = json.load(open(state_path))
|
|
14
|
+
except:
|
|
15
|
+
data = {"enabled": False, "sinceISO": None, "lastToggleReason": None}
|
|
16
|
+
|
|
17
|
+
enabled = data.get("enabled", False)
|
|
18
|
+
since_iso = data.get("sinceISO")
|
|
19
|
+
reason = data.get("lastToggleReason") or "never enabled"
|
|
20
|
+
|
|
21
|
+
age_str = ""
|
|
22
|
+
status_label = "OFF"
|
|
23
|
+
expired = False
|
|
24
|
+
|
|
25
|
+
if enabled and since_iso:
|
|
26
|
+
try:
|
|
27
|
+
since_dt = datetime.fromisoformat(since_iso.replace("Z", "+00:00"))
|
|
28
|
+
now_dt = datetime.now(timezone.utc)
|
|
29
|
+
delta = now_dt - since_dt
|
|
30
|
+
if delta > timedelta(hours=24):
|
|
31
|
+
expired = True
|
|
32
|
+
status_label = "AUTO-EXPIRED"
|
|
33
|
+
else:
|
|
34
|
+
status_label = "ON"
|
|
35
|
+
total_s = int(delta.total_seconds())
|
|
36
|
+
age_str = f"{total_s // 3600:02d}:{(total_s % 3600) // 60:02d}"
|
|
37
|
+
except:
|
|
38
|
+
expired = True
|
|
39
|
+
status_label = "AUTO-EXPIRED"
|
|
40
|
+
|
|
41
|
+
print(f"status={status_label}")
|
|
42
|
+
print(f"since={since_iso or 'n/a'}")
|
|
43
|
+
print(f"age={age_str or 'n/a'}")
|
|
44
|
+
print(f"reason={reason}")
|
|
45
|
+
print(f"expired={expired}")
|
|
46
|
+
PYEOF
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Step 2 — Check Codex install gate.**
|
|
50
|
+
```bash
|
|
51
|
+
~/.claude/scripts/duo-installed.sh 2>/dev/null && echo "gate=pass" || echo "gate=fail"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Step 3 — Parse duo-suppress.json.**
|
|
55
|
+
```bash
|
|
56
|
+
python3 - <<'PYEOF'
|
|
57
|
+
import json, os
|
|
58
|
+
f = os.path.expanduser("~/.claude/merlin-state/duo-suppress.json")
|
|
59
|
+
try:
|
|
60
|
+
d = json.load(open(f))
|
|
61
|
+
except:
|
|
62
|
+
d = {"session_skip": False, "never_for_intents": [], "task_hashes_declined": []}
|
|
63
|
+
|
|
64
|
+
print(f"session_skip={d.get('session_skip', False)}")
|
|
65
|
+
print(f"intents_count={len(d.get('never_for_intents', []))}")
|
|
66
|
+
print(f"hashes_count={len(d.get('task_hashes_declined', []))}")
|
|
67
|
+
PYEOF
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**Step 4 — Emit status output.**
|
|
71
|
+
|
|
72
|
+
Use the values above to emit ONE of these formats:
|
|
73
|
+
|
|
74
|
+
If status=ON:
|
|
75
|
+
```
|
|
76
|
+
⟡🔮↔🔮 MERLIN·DUO › Duo: ON · since {sinceISO} · age {hh:mm} · reason: {lastToggleReason}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
If status=OFF:
|
|
80
|
+
```
|
|
81
|
+
⟡🔮 MERLIN › Duo: OFF (last reason: {lastToggleReason})
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
If status=AUTO-EXPIRED:
|
|
85
|
+
```
|
|
86
|
+
⟡🔮 MERLIN › Duo: AUTO-EXPIRED (was on since {sinceISO}, exceeded 24h). Treating as off.
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Then append install gate and suppression lines:
|
|
90
|
+
```
|
|
91
|
+
Codex install gate: ✓ pass (or ✗ fail — duo would silent-fallback to solo)
|
|
92
|
+
Session skip: {true|false}
|
|
93
|
+
Suppressed intents: {N} (if N > 0, add: — run Skill("merlin:duo", args="unsuppress") to clear)
|
|
94
|
+
Recently declined task hashes: {N}
|
|
95
|
+
```
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# duo/unsuppress — interactive suppression memory clearer
|
|
2
|
+
|
|
3
|
+
## Steps
|
|
4
|
+
|
|
5
|
+
**Step 1 — Read suppression state with exclusive lock.**
|
|
6
|
+
```bash
|
|
7
|
+
SUPPRESS_FILE="${HOME}/.claude/merlin-state/duo-suppress.json"
|
|
8
|
+
LOCK_FILE="${HOME}/.claude/merlin-state/.duo-suppress.lock"
|
|
9
|
+
|
|
10
|
+
flock -x "$LOCK_FILE" -c "python3 - '$SUPPRESS_FILE'" <<'PYEOF'
|
|
11
|
+
import json, sys
|
|
12
|
+
|
|
13
|
+
state_path = sys.argv[1]
|
|
14
|
+
try:
|
|
15
|
+
data = json.load(open(state_path))
|
|
16
|
+
except:
|
|
17
|
+
data = {"session_skip": False, "never_for_intents": [], "task_hashes_declined": []}
|
|
18
|
+
|
|
19
|
+
session_skip = data.get("session_skip", False)
|
|
20
|
+
intents = data.get("never_for_intents", [])
|
|
21
|
+
hashes = data.get("task_hashes_declined", [])
|
|
22
|
+
|
|
23
|
+
print(f"session_skip={session_skip}")
|
|
24
|
+
print(f"intents_count={len(intents)}")
|
|
25
|
+
print(f"hashes_count={len(hashes)}")
|
|
26
|
+
for i, intent in enumerate(intents):
|
|
27
|
+
print(f"intent_{i}={intent}")
|
|
28
|
+
PYEOF
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**Step 2 — Check for empty state.**
|
|
32
|
+
If `session_skip=false` AND `intents_count=0` AND `hashes_count=0`:
|
|
33
|
+
```
|
|
34
|
+
⟡🔮 MERLIN › Nothing suppressed. Duo offers fire on all qualifying tasks.
|
|
35
|
+
```
|
|
36
|
+
Stop here.
|
|
37
|
+
|
|
38
|
+
**Step 3 — Display current suppression state.**
|
|
39
|
+
```
|
|
40
|
+
⟡🔮 MERLIN › Duo suppression memory:
|
|
41
|
+
• Session skip: {true|false}
|
|
42
|
+
• Never-for intents ({N}): [{list of intents, or "none"}]
|
|
43
|
+
• Recently declined task hashes ({N}): [{first 5 hashes then "...and X more" if >5}]
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Step 4 — Ask user for choice.**
|
|
47
|
+
```
|
|
48
|
+
Reply with:
|
|
49
|
+
• "clear all" — wipe everything
|
|
50
|
+
• "clear session" — un-set session_skip only
|
|
51
|
+
• "clear intents" — wipe never_for_intents only
|
|
52
|
+
• "clear hashes" — wipe task_hashes_declined only
|
|
53
|
+
• "clear <fingerprint>" — remove one specific never_for_intents entry (paste from list above)
|
|
54
|
+
• "cancel" — leave as-is
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Step 5 — Apply chosen change atomically.**
|
|
58
|
+
After user replies, execute the matching branch with atomic write:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
SUPPRESS_FILE="${HOME}/.claude/merlin-state/duo-suppress.json"
|
|
62
|
+
LOCK_FILE="${HOME}/.claude/merlin-state/.duo-suppress.lock"
|
|
63
|
+
USER_CHOICE="<user reply>"
|
|
64
|
+
|
|
65
|
+
flock -x "$LOCK_FILE" -c "python3 - '$SUPPRESS_FILE' '$USER_CHOICE'" <<'PYEOF'
|
|
66
|
+
import json, sys, os, tempfile
|
|
67
|
+
|
|
68
|
+
state_path = sys.argv[1]
|
|
69
|
+
choice = sys.argv[2].strip().lower()
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
data = json.load(open(state_path))
|
|
73
|
+
except:
|
|
74
|
+
data = {"session_skip": False, "never_for_intents": [], "task_hashes_declined": []}
|
|
75
|
+
|
|
76
|
+
cleared = []
|
|
77
|
+
|
|
78
|
+
if choice == "clear all":
|
|
79
|
+
data = {"session_skip": False, "never_for_intents": [], "task_hashes_declined": []}
|
|
80
|
+
cleared = ["session_skip", "never_for_intents", "task_hashes_declined"]
|
|
81
|
+
elif choice == "clear session":
|
|
82
|
+
data["session_skip"] = False
|
|
83
|
+
cleared = ["session_skip"]
|
|
84
|
+
elif choice == "clear intents":
|
|
85
|
+
data["never_for_intents"] = []
|
|
86
|
+
cleared = ["never_for_intents"]
|
|
87
|
+
elif choice == "clear hashes":
|
|
88
|
+
data["task_hashes_declined"] = []
|
|
89
|
+
cleared = ["task_hashes_declined"]
|
|
90
|
+
elif choice.startswith("clear "):
|
|
91
|
+
fingerprint = sys.argv[2][6:].strip()
|
|
92
|
+
before = len(data.get("never_for_intents", []))
|
|
93
|
+
data["never_for_intents"] = [x for x in data.get("never_for_intents", []) if x != fingerprint]
|
|
94
|
+
after = len(data["never_for_intents"])
|
|
95
|
+
cleared = [f"intent:{fingerprint}"] if after < before else []
|
|
96
|
+
elif choice == "cancel":
|
|
97
|
+
print("cancelled")
|
|
98
|
+
sys.exit(0)
|
|
99
|
+
else:
|
|
100
|
+
print("unrecognized — no changes made")
|
|
101
|
+
sys.exit(0)
|
|
102
|
+
|
|
103
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
104
|
+
mode="w", dir=os.path.dirname(state_path), delete=False, suffix=".tmp"
|
|
105
|
+
)
|
|
106
|
+
json.dump(data, tmp, indent=2)
|
|
107
|
+
tmp.close()
|
|
108
|
+
os.replace(tmp.name, state_path)
|
|
109
|
+
|
|
110
|
+
print(f"cleared={','.join(cleared) if cleared else 'nothing'}")
|
|
111
|
+
PYEOF
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
**Step 6 — Emit confirmation.**
|
|
115
|
+
Show what was cleared, e.g.:
|
|
116
|
+
```
|
|
117
|
+
⟡🔮 MERLIN › Cleared: {cleared list}. Duo offers will fire normally for affected tasks.
|
|
118
|
+
```
|
|
119
|
+
If user said "cancel":
|
|
120
|
+
```
|
|
121
|
+
⟡🔮 MERLIN › No changes made.
|
|
122
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"enabled": false, "sinceISO": null, "lastToggleReason": null}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
MANDATORY: You are Merlin, an orchestrator. Before processing any user request, call merlin_get_selected_repo, merlin_get_project_status, merlin_get_rules and merlin_get_brief. Route ALL implementation work to specialist agents via Skill("merlin:workflow") or merlin_route(). NEVER write, edit, or debug code yourself. Prefix every action with the ⟡🔮 MERLIN ›
|
|
1
|
+
MANDATORY: You are Merlin, an orchestrator. Before processing any user request, call merlin_get_selected_repo, merlin_get_project_status, merlin_get_rules and merlin_get_brief. Route ALL implementation work to specialist agents via Skill("merlin:workflow") or merlin_route(). NEVER write, edit, or debug code yourself. Prefix every action with the badge from ~/.claude/scripts/duo-badge.sh (solo: ⟡🔮 MERLIN ›, duo: ⟡🔮↔🔮 MERLIN·DUO ›). Run independent agents in PARALLEL. Before editing code, call merlin_get_context first.
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Codex Execution Layer — Routing Rules
|
|
2
|
+
|
|
3
|
+
This document defines how Merlin integrates with OpenAI Codex for code execution.
|
|
4
|
+
|
|
5
|
+
## Install Gate (MANDATORY)
|
|
6
|
+
|
|
7
|
+
Before ANY Codex routing, check if Codex is installed:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
~/.claude/scripts/codex-installed.sh && echo installed || echo not-installed
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
- If the check FAILS (exit 1): silently fall back to Claude specialist — NO mention of Codex to the user
|
|
14
|
+
- If the check PASSES (exit 0): proceed with Codex routing
|
|
15
|
+
|
|
16
|
+
**Never skip the install gate. Never mention Codex if it's not installed.**
|
|
17
|
+
|
|
18
|
+
## Scenarios
|
|
19
|
+
|
|
20
|
+
| Scenario | Trigger | Action |
|
|
21
|
+
|---|---|---|
|
|
22
|
+
| **1. Failed-fix Escalation** | After `merlin_run_verification` fails OR user says "still broken" / "didn't work" / "same error" / "that didn't fix it" | Route to `codex-escalator` with bundle: {original_issue, claude_diagnosis, claude_diff, failure_evidence}. ONE attempt only — if Codex also fails, stop and report both attempts. |
|
|
23
|
+
| **2. Big-feature Dual-plan** | `feature-dev` or `refactor` workflow starts (NOT bug-fix, NOT quick) | Run `merlin-planner` AND `codex-planner` in PARALLEL. Route both plans to `challenger-arbiter` for synthesis. Execute unified plan with `codex-implementer` for coding; Claude orchestrates and verifies. |
|
|
24
|
+
| **3. Manual Codex Mode** | Natural language toggle (see phrases below) | While enabled, EVERY implementation/edit/refactor routes to `codex-implementer`. Planning, orchestration, and verification stay with Claude. |
|
|
25
|
+
|
|
26
|
+
## Scenario 3: Manual Codex-Execution Mode
|
|
27
|
+
|
|
28
|
+
### Turn-On Phrases
|
|
29
|
+
- "use codex to code"
|
|
30
|
+
- "let codex do the coding"
|
|
31
|
+
- "code with codex"
|
|
32
|
+
- "codex hands"
|
|
33
|
+
- "switch to codex for this"
|
|
34
|
+
- "codex execute"
|
|
35
|
+
|
|
36
|
+
### Turn-Off Phrases
|
|
37
|
+
- "back to claude"
|
|
38
|
+
- "stop codex"
|
|
39
|
+
- "claude does the coding"
|
|
40
|
+
- "disable codex"
|
|
41
|
+
|
|
42
|
+
### State Management
|
|
43
|
+
|
|
44
|
+
When turned ON, write to `~/.claude/merlin-state/codex-mode.json`:
|
|
45
|
+
```json
|
|
46
|
+
{"enabled": true, "sinceISO": "<ISO timestamp>", "lastToggleReason": "user said X"}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
When turned OFF:
|
|
50
|
+
```json
|
|
51
|
+
{"enabled": false, "sinceISO": null, "lastToggleReason": "user said X"}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Auto-Expire
|
|
55
|
+
|
|
56
|
+
If `sinceISO` is more than 24 hours old, treat as disabled. This approximates session-sticky behavior — mode resets between sessions.
|
|
57
|
+
|
|
58
|
+
## Skill Injection Mechanism
|
|
59
|
+
|
|
60
|
+
`codex-implementer` uses `codex-as.sh` which:
|
|
61
|
+
1. Reads the Merlin specialist's `.md` file (e.g., `~/.claude/agents/implementation-dev.md`)
|
|
62
|
+
2. Strips YAML frontmatter
|
|
63
|
+
3. Extracts the prompt body
|
|
64
|
+
4. Prepends it to the Codex invocation
|
|
65
|
+
|
|
66
|
+
This gives Codex the SAME system prompt, instructions, and constraints that the Claude specialist would have. Same patterns, same guardrails, different brain.
|
|
67
|
+
|
|
68
|
+
## Verification Authority
|
|
69
|
+
|
|
70
|
+
**Claude ALWAYS verifies**, regardless of who wrote the code:
|
|
71
|
+
- After `codex-escalator` completes → run `merlin_run_verification()`
|
|
72
|
+
- After `codex-implementer` completes → run `merlin_run_verification()`
|
|
73
|
+
- After dual-plan execution step → Claude verifies before proceeding
|
|
74
|
+
|
|
75
|
+
This is the "brain/hands split" — Codex may execute, but Claude certifies.
|
|
76
|
+
|
|
77
|
+
## Curated Specialists
|
|
78
|
+
|
|
79
|
+
Codex can embody these roles via `codex-as.sh`:
|
|
80
|
+
- `implementation-dev` — General implementation
|
|
81
|
+
- `dry-refactor` — DRY cleanup and refactoring
|
|
82
|
+
- `hardening-guard` — Security hardening
|
|
83
|
+
- `ui-builder` — React/UI components
|
|
84
|
+
- `android-expert` — Android/Kotlin
|
|
85
|
+
- `apple-swift-expert` — iOS/macOS Swift
|
|
86
|
+
- `desktop-app-expert` — Electron/Tauri
|
|
87
|
+
- `merlin-frontend` — Frontend specialist
|
|
88
|
+
- `animation-expert` — Motion/animation
|
|
89
|
+
- `code-review` — Production-readiness code review
|
|
90
|
+
|
|
91
|
+
**Excluded from codex-as.sh:**
|
|
92
|
+
- `reviewer-decider` — Claude-only by design (sequential gate authority)
|
|
93
|
+
|
|
94
|
+
Any other specialist stays with Claude.
|
|
95
|
+
|
|
96
|
+
## Code Review Routing
|
|
97
|
+
|
|
98
|
+
Natural language intent: "code review" / "production readiness review" / "review the codebase" / "check for AI smells" / "review this folder" / "do a full review"
|
|
99
|
+
|
|
100
|
+
Routing logic:
|
|
101
|
+
1. Check codex-mode.json state (enabled + within 24h) AND `codex-installed.sh` returns 0
|
|
102
|
+
2. If both true → route to `codex-code-review` agent (Codex gpt-5.4)
|
|
103
|
+
3. Otherwise → route to `code-review` agent (Claude Opus)
|
|
104
|
+
|
|
105
|
+
Both produce the same CODE_REVIEW.md report format. User can override by saying "use claude for code review" or "use codex for code review".
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Duo Mode — see `duo-routing.md`
|
|
110
|
+
|
|
111
|
+
When duo is enabled (`~/.claude/merlin-state/duo-mode.json: enabled=true`, within 24h, install gate passes), duo routing in `duo-routing.md` SUPERSEDES the rules in this file. Codex-mode does not need to be on for duo to work — duo manages its own state.
|
|
112
|
+
|
|
113
|
+
When duo is OFF, the rules in this file (codex-mode escalation, dual-plan, manual codex hands) apply normally.
|
|
114
|
+
|
|
115
|
+
### Curated specialists exclusion (P0 safety)
|
|
116
|
+
|
|
117
|
+
`reviewer-decider` is **Claude-only** and MUST NOT be added to the curated specialists list for `codex-as.sh`. Codex impersonating the gate that checks Codex defeats the sequential safety story.
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# Duo Mode — Routing Rules
|
|
2
|
+
|
|
3
|
+
This document is the single source of truth for duo mode. Do not duplicate routing logic elsewhere — cross-reference this file instead.
|
|
4
|
+
|
|
5
|
+
## What duo is
|
|
6
|
+
|
|
7
|
+
Duo mode runs two brains on the same task: Claude and Codex. Tasks that produce independent artifacts (plans, reports, markdown, test files) run in **parallel** — both brains work simultaneously and an arbiter merges outputs. Tasks that write or modify source files run **sequentially** — one author at a time, followed by a Claude reviewer and a structured gate decision. This eliminates file-conflict risk while still getting two perspectives on every code change.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Activation
|
|
12
|
+
|
|
13
|
+
**State file:** `~/.claude/merlin-state/duo-mode.json`
|
|
14
|
+
|
|
15
|
+
**Schema:**
|
|
16
|
+
```json
|
|
17
|
+
{"enabled": bool, "sinceISO": "ISO8601 | null", "lastToggleReason": "string | null"}
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
**Default (first install):**
|
|
21
|
+
```json
|
|
22
|
+
{"enabled": false, "sinceISO": null, "lastToggleReason": null}
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**Auto-expire:** If `sinceISO` is more than 24 hours old at read time, treat as disabled. The file is NOT rewritten on read — expiry is a read-time interpretation. Approximately session-sticky.
|
|
26
|
+
|
|
27
|
+
**Toggle:** `Skill("merlin:duo", args="on|off|status|unsuppress")` or natural language (see `merlin-routing.md` for intent triggers).
|
|
28
|
+
|
|
29
|
+
**Install gate:** Run `~/.claude/scripts/duo-installed.sh` before any duo activation. If gate returns non-zero:
|
|
30
|
+
- Silently fall back to solo routing for this task
|
|
31
|
+
- Do NOT write `enabled: true` to the state file
|
|
32
|
+
- Do NOT mention duo, Codex, or the gate failure to the user
|
|
33
|
+
- Proceed as if duo was never considered
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Precedence
|
|
38
|
+
|
|
39
|
+
When multiple modes are enabled simultaneously:
|
|
40
|
+
|
|
41
|
+
1. **Duo active** (enabled=true, within 24h, gate passes) → duo rules in this file win
|
|
42
|
+
2. **Codex-mode active** (enabled=true, within 24h, codex gate passes) → `codex-routing.md` rules apply
|
|
43
|
+
3. **Neither** → solo routing (existing `merlin-routing.md` + `codex-routing.md` rules)
|
|
44
|
+
|
|
45
|
+
Note: duo does NOT require `codex-mode.json` to be enabled. Duo manages its own state independently.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Badge contract (UNMISSABLE)
|
|
50
|
+
|
|
51
|
+
Compute the badge via `~/.claude/scripts/duo-badge.sh` before every action prefix.
|
|
52
|
+
|
|
53
|
+
| Condition | Badge |
|
|
54
|
+
|---|---|
|
|
55
|
+
| Duo enabled + within 24h + gate passes | `⟡🔮↔🔮 MERLIN·DUO ›` |
|
|
56
|
+
| Any other state | `⟡🔮 MERLIN ›` |
|
|
57
|
+
|
|
58
|
+
**Text-only fallback:** If env `MERLIN_BADGE_TEXTONLY=1`, `duo-badge.sh` returns `[DUO] MERLIN ›` or `MERLIN ›` for terminals that mangle `↔` or emoji.
|
|
59
|
+
|
|
60
|
+
Every Merlin action MUST prefix with the computed badge. If the badge is missing, the action is non-compliant. See badge audit: `.planning/duo/BADGE-AUDIT.md`.
|
|
61
|
+
|
|
62
|
+
Special case — `duo off` when codex-mode is still active:
|
|
63
|
+
```
|
|
64
|
+
⟡🔮 MERLIN › Duo off. (codex-mode is still active.)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Routing Matrix — PARALLEL execution
|
|
70
|
+
|
|
71
|
+
Both brains run independently on the same task. Results are routed to the arbiter for merging or deduplication. **Parallel tasks ONLY produce reports, plans, markdown, or test files — never edits to existing source code.** Source edits are always sequential (see below).
|
|
72
|
+
|
|
73
|
+
| Task | Brain A | Brain B | Decider |
|
|
74
|
+
|---|---|---|---|
|
|
75
|
+
| Planning (feature-dev / refactor / product-dev) | `merlin-planner` | `codex-planner` | `challenger-arbiter` |
|
|
76
|
+
| Documentation | `docs-keeper` (Claude) | `docs-keeper` via `codex-as.sh` | `challenger-arbiter` |
|
|
77
|
+
| Code review | `code-review` | `codex-code-review` | merge + dedupe (in arbiter) |
|
|
78
|
+
| Testing | `tests-qa` (Claude) | `tests-qa` via `codex-as.sh` | merge + dedupe (in arbiter) |
|
|
79
|
+
|
|
80
|
+
**UX during parallel runs:**
|
|
81
|
+
```
|
|
82
|
+
⟡🔮↔🔮 MERLIN·DUO › Planning ×2 (claude + codex)…
|
|
83
|
+
⟡🔮↔🔮 MERLIN·DUO › Arbiter merging plans…
|
|
84
|
+
⟡🔮↔🔮 MERLIN·DUO › Plan ready.
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Routing Matrix — SEQUENTIAL execution
|
|
90
|
+
|
|
91
|
+
One author at a time to prevent file conflicts. Sequential only for tasks that write or modify source files.
|
|
92
|
+
|
|
93
|
+
| Task | Author | Reviewer | Decider |
|
|
94
|
+
|---|---|---|---|
|
|
95
|
+
| Code write (new files) | `codex-implementer` | `code-review` (Claude) | `reviewer-decider` (Claude) |
|
|
96
|
+
| Code modification | `codex-implementer` | `code-review` (Claude) | `reviewer-decider` (Claude) |
|
|
97
|
+
|
|
98
|
+
**Sequential decision flow:**
|
|
99
|
+
|
|
100
|
+
1. Author (`codex-implementer`) writes diff
|
|
101
|
+
2. Reviewer (`code-review`, Claude) reviews diff
|
|
102
|
+
3. `reviewer-decider` returns `{decision: approve|revise|reject, reasoning, required_changes?}`
|
|
103
|
+
4. **On `approve`:** proceed to verification
|
|
104
|
+
5. **On `revise`:** author iterates ONCE with `required_changes`. Re-review. If revise again → escalate to reject.
|
|
105
|
+
6. **On `reject`** OR second revise without resolution: Claude reviewer takes over and writes the fix directly
|
|
106
|
+
|
|
107
|
+
Maximum iterations per task: 2 (one initial pass + one revise loop). `reviewer-decider` enforces `iteration_count` ≤ 2.
|
|
108
|
+
|
|
109
|
+
**UX during sequential runs:**
|
|
110
|
+
```
|
|
111
|
+
⟡🔮↔🔮 MERLIN·DUO › [1/3] Codex writing diff…
|
|
112
|
+
⟡🔮↔🔮 MERLIN·DUO › [2/3] Claude reviewing…
|
|
113
|
+
⟡🔮↔🔮 MERLIN·DUO › [3/3] Decision: approve · proceeding to verify
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
If decider returns `revise`:
|
|
117
|
+
```
|
|
118
|
+
⟡🔮↔🔮 MERLIN·DUO › Iteration 2/2: codex revising…
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## reviewer-decider is Claude-only (P0 safety)
|
|
124
|
+
|
|
125
|
+
`reviewer-decider` MUST NOT be invoked via `codex-as.sh`. Codex impersonating the gate that checks Codex's own output destroys the sequential safety story. The curated specialists list in `codex-routing.md` explicitly excludes `reviewer-decider` — this exclusion is permanent and must not be removed.
|
|
126
|
+
|
|
127
|
+
See also: `codex-routing.md` § "Curated specialists exclusion".
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Auto-offer for risky tasks
|
|
132
|
+
|
|
133
|
+
When duo is OFF, Merlin runs a pre-route hook at the start of every routing decision:
|
|
134
|
+
|
|
135
|
+
1. Call `~/.claude/scripts/duo-pre-route.sh --task "..." [--workflow X] [--files a,b] [--loc N]`
|
|
136
|
+
2. Hook reads `duo-mode.json`, calls `duo-installed.sh`, calls `duo-risk-detect.sh`, checks `duo-suppress.json`
|
|
137
|
+
3. Hook outputs exactly one of:
|
|
138
|
+
- `mode=duo` → proceed with duo routing (duo was already on)
|
|
139
|
+
- `mode=offer` → invoke `Skill("merlin:duo", args="offer")` first; user response determines mode
|
|
140
|
+
- `mode=solo` → fall through to existing solo/codex-mode rules
|
|
141
|
+
|
|
142
|
+
**Risk threshold:** `duo-risk-detect.sh` scores the task 0–100. `suggest_duo: true` when score ≥ 50. Threshold tunable via env `MERLIN_DUO_OFFER_THRESHOLD`.
|
|
143
|
+
|
|
144
|
+
**Offer fires only when ALL of the following are true:**
|
|
145
|
+
- `duo-installed.sh` exits 0 (Codex installed)
|
|
146
|
+
- Duo is currently OFF (or auto-expired)
|
|
147
|
+
- Risk score ≥ threshold
|
|
148
|
+
- Not suppressed (no `session_skip`, task hash not in `task_hashes_declined`, intent not in `never_for_intents`)
|
|
149
|
+
|
|
150
|
+
**Offer is one-shot per task.** Never offer mid-flight. If `duo-pre-route.sh` errors or times out (>500ms), skip the offer silently and log to `duo-decisions.log`.
|
|
151
|
+
|
|
152
|
+
**Suppression memory:** `~/.claude/merlin-state/duo-suppress.json`. Reset via `Skill("merlin:duo", args="unsuppress")`. `never_for_intents` entries auto-expire after 7 days. `session_skip` clears when file mtime is > 12h old.
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Codex-broken-runtime fallback (P0)
|
|
157
|
+
|
|
158
|
+
`duo-installed.sh` only checks PATH presence. If Codex is on PATH but fails at runtime:
|
|
159
|
+
|
|
160
|
+
1. Wrap all Codex invocations with a 60s timeout:
|
|
161
|
+
- With coreutils: `gtimeout 60s codex …`
|
|
162
|
+
- Without coreutils: `perl -e 'alarm 60; exec @ARGV' codex …`
|
|
163
|
+
2. On Codex error or timeout: log to `~/.claude/merlin-state/duo-decisions.log` with severity `codex_runtime_failure`. Fall back to Claude for that step:
|
|
164
|
+
- Parallel branch: drop the codex result; arbiter receives one input
|
|
165
|
+
- Sequential branch: Claude takes the author role
|
|
166
|
+
3. After 3 consecutive Codex failures in a session: surface the following message, write `enabled: false` to `duo-mode.json` with `lastToggleReason: "codex runtime failures"`, and revert to solo:
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
⟡🔮 MERLIN › Codex appears unhealthy. Reverting to solo for this session. Run 'codex doctor' to diagnose.
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Verification authority (UNCHANGED)
|
|
175
|
+
|
|
176
|
+
Claude ALWAYS runs `merlin_run_verification()` after a duo flow completes, regardless of who wrote the code. This is the brain/hands principle — Codex (or any specialist) may execute, but Claude certifies.
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Workflow integration
|
|
181
|
+
|
|
182
|
+
| Workflow | When duo ON | When duo OFF |
|
|
183
|
+
|---|---|---|
|
|
184
|
+
| `feature-dev` | Parallel planning + sequential coding | Existing codex-mode dual-plan or solo |
|
|
185
|
+
| `refactor` | Parallel planning + sequential coding | Existing codex-mode dual-plan or solo |
|
|
186
|
+
| `product-dev` | Parallel planning + sequential coding | Solo |
|
|
187
|
+
| `bug-fix` | Solo by default; opt-in via "duo this" | Solo |
|
|
188
|
+
| `quick` | Solo by default; opt-in via "duo this" | Solo |
|
|
189
|
+
| Code review intent | Parallel review pair | `code-review` or `codex-code-review` (codex-routing rules) |
|
|
190
|
+
| Tests intent | Parallel test generation pair | Solo |
|
|
191
|
+
| Docs intent | Parallel docs pair | Solo |
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Cross-references
|
|
196
|
+
|
|
197
|
+
- Codex execution layer: `~/.claude/rules/codex-routing.md`
|
|
198
|
+
- Intent triggers (toggle phrases, workflow routing): `~/.claude/rules/merlin-routing.md`
|
|
199
|
+
- Skill prompts: `~/.claude/skills/merlin/duo/` (SKILL.md, on.md, off.md, status.md, offer.md)
|
|
200
|
+
- Reviewer-decider agent: `~/.claude/agents/reviewer-decider.md`
|
|
201
|
+
- Badge audit: `.planning/duo/BADGE-AUDIT.md`
|
|
202
|
+
- State files: `~/.claude/merlin-state/duo-mode.json`, `~/.claude/merlin-state/duo-suppress.json`
|
|
203
|
+
- Decision audit log: `~/.claude/merlin-state/duo-decisions.log` (JSONL, append-only)
|
|
@@ -38,6 +38,7 @@ Call `merlin_smart_route(task="...")` FIRST (searches 500+ community agents). Th
|
|
|
38
38
|
| Database migrations | `merlin-migrator` |
|
|
39
39
|
| API design | `merlin-api-designer` |
|
|
40
40
|
| Code review | `merlin-reviewer` |
|
|
41
|
+
| Production code review / AI-smell audit | `code-review` (or `codex-code-review` in Codex mode) |
|
|
41
42
|
| Performance | `merlin-performance` |
|
|
42
43
|
|
|
43
44
|
## Collaborative Intents — Auto-Detect from Natural Language
|
|
@@ -57,6 +58,10 @@ Call `merlin_smart_route(task="...")` FIRST (searches 500+ community agents). Th
|
|
|
57
58
|
| "remind me" / "add a todo" | `Skill("merlin:add-todo")` |
|
|
58
59
|
| "check todos" / "pending items" | `Skill("merlin:check-todos")` |
|
|
59
60
|
| New project, no PROJECT.md | `Skill("merlin:map-codebase")` then `Skill("merlin:new-project")` |
|
|
61
|
+
| "duo on" / "enable duo" / "go duo" | `Skill("merlin:duo", args="on")` |
|
|
62
|
+
| "duo off" / "disable duo" / "back to solo" | `Skill("merlin:duo", args="off")` |
|
|
63
|
+
| "duo status" / "am I in duo" / "is duo on" | `Skill("merlin:duo", args="status")` |
|
|
64
|
+
| "stop duo for this kind of task" / "never duo for X" | `Skill("merlin:duo", args="unsuppress")` |
|
|
60
65
|
|
|
61
66
|
## Planning Intents
|
|
62
67
|
|
|
@@ -98,3 +103,30 @@ At natural moments, surface ONE relevant capability:
|
|
|
98
103
|
- Session end → auto-invoke `Skill("merlin:standup")`.
|
|
99
104
|
- After implementation → auto-run `merlin_run_verification()`.
|
|
100
105
|
- Agent failure → diagnose, retry with different config, NEVER fall back to coding yourself.
|
|
106
|
+
|
|
107
|
+
## Codex Execution Layer
|
|
108
|
+
|
|
109
|
+
See `~/.claude/rules/codex-routing.md` for full details.
|
|
110
|
+
|
|
111
|
+
**Install Gate:** Before any Codex routing, run `~/.claude/scripts/codex-installed.sh`. If it fails, silently fall back to Claude.
|
|
112
|
+
|
|
113
|
+
**Scenarios:**
|
|
114
|
+
1. **Failed-fix escalation** — after verification fails, escalate to `codex-escalator`
|
|
115
|
+
2. **Big-feature dual-plan** — feature-dev/refactor workflows run `merlin-planner` + `codex-planner` in parallel, then `challenger-arbiter` synthesizes
|
|
116
|
+
3. **Manual Codex mode** — user toggles with natural language, all implementation routes to `codex-implementer`
|
|
117
|
+
|
|
118
|
+
**State file:** `~/.claude/merlin-state/codex-mode.json`
|
|
119
|
+
|
|
120
|
+
### Additional Collaborative Intents
|
|
121
|
+
|
|
122
|
+
| User says | Action |
|
|
123
|
+
|---|---|
|
|
124
|
+
| "use codex to code" / "let codex do the coding" / "code with codex" / "codex hands" / "switch to codex for this" / "codex execute" | Write `{"enabled": true, "sinceISO": "<now>", "lastToggleReason": "user said X"}` to `~/.claude/merlin-state/codex-mode.json`. Route implementation to `codex-implementer`. |
|
|
125
|
+
| "back to claude" / "stop codex" / "claude does the coding" / "disable codex" | Write `{"enabled": false, ...}` to `~/.claude/merlin-state/codex-mode.json`. Resume normal Claude routing. |
|
|
126
|
+
|
|
127
|
+
### Additional Workflow Routing Notes
|
|
128
|
+
|
|
129
|
+
- `feature-dev` and `refactor` workflows: If Codex installed, use dual-plan flow (merlin-planner + codex-planner → challenger-arbiter → codex-implementer execution)
|
|
130
|
+
- `bug-fix` and `quick`: No dual-plan — normal flow, but failed-fix escalation to codex-escalator is available
|
|
131
|
+
|
|
132
|
+
> When duo mode is active, `feature-dev`, `refactor`, and `product-dev` workflows automatically use parallel planning + sequential coding. See `duo-routing.md`.
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# codex-as.sh — invoke Codex as a Merlin specialist agent
|
|
3
|
+
# Usage: codex-as.sh <agent-name> <task-text> [--model <model-name>]
|
|
4
|
+
|
|
5
|
+
set -euo pipefail
|
|
6
|
+
|
|
7
|
+
# Install gate: if codex is not installed, exit silently
|
|
8
|
+
command -v codex >/dev/null 2>&1 || exit 0
|
|
9
|
+
|
|
10
|
+
AGENT_NAME=""
|
|
11
|
+
TASK_TEXT=""
|
|
12
|
+
MODEL_FLAG=""
|
|
13
|
+
|
|
14
|
+
# Parse arguments
|
|
15
|
+
while [[ $# -gt 0 ]]; do
|
|
16
|
+
case "$1" in
|
|
17
|
+
--model)
|
|
18
|
+
if [[ -n "${2:-}" ]]; then
|
|
19
|
+
MODEL_FLAG="--model $2"
|
|
20
|
+
shift 2
|
|
21
|
+
else
|
|
22
|
+
echo "Error: --model requires a value" >&2
|
|
23
|
+
exit 1
|
|
24
|
+
fi
|
|
25
|
+
;;
|
|
26
|
+
*)
|
|
27
|
+
if [[ -z "$AGENT_NAME" ]]; then
|
|
28
|
+
AGENT_NAME="$1"
|
|
29
|
+
elif [[ -z "$TASK_TEXT" ]]; then
|
|
30
|
+
TASK_TEXT="$1"
|
|
31
|
+
fi
|
|
32
|
+
shift
|
|
33
|
+
;;
|
|
34
|
+
esac
|
|
35
|
+
done
|
|
36
|
+
|
|
37
|
+
if [[ -z "$AGENT_NAME" ]]; then
|
|
38
|
+
echo "Usage: codex-as.sh <agent-name> <task-text> [--model <model-name>]" >&2
|
|
39
|
+
exit 1
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
AGENT_FILE="$HOME/.claude/agents/${AGENT_NAME}.md"
|
|
43
|
+
|
|
44
|
+
if [[ ! -f "$AGENT_FILE" ]]; then
|
|
45
|
+
echo "Error: Agent file not found: $AGENT_FILE" >&2
|
|
46
|
+
exit 1
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
# Extract prompt body by stripping YAML frontmatter
|
|
50
|
+
# Frontmatter is between --- lines at the start of the file
|
|
51
|
+
PROMPT_BODY=$(awk '
|
|
52
|
+
BEGIN { in_frontmatter = 0; past_frontmatter = 0 }
|
|
53
|
+
/^---$/ {
|
|
54
|
+
if (!past_frontmatter) {
|
|
55
|
+
in_frontmatter = !in_frontmatter
|
|
56
|
+
if (!in_frontmatter) past_frontmatter = 1
|
|
57
|
+
next
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
past_frontmatter { print }
|
|
61
|
+
' "$AGENT_FILE")
|
|
62
|
+
|
|
63
|
+
# Build the full prompt: agent system prompt + separator + task
|
|
64
|
+
FULL_PROMPT="${PROMPT_BODY}
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Task
|
|
69
|
+
|
|
70
|
+
${TASK_TEXT}"
|
|
71
|
+
|
|
72
|
+
# Invoke codex with --write to allow file modifications
|
|
73
|
+
# shellcheck disable=SC2086
|
|
74
|
+
exec codex exec --write --cd "$PWD" $MODEL_FLAG "$FULL_PROMPT"
|