create-merlin-brain 4.0.0 โ 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/bin/install.cjs +113 -14
- package/files/CLAUDE.md +43 -3
- package/files/agents/code-review.md +190 -0
- package/files/agents/codex-code-review.md +32 -0
- package/files/agents/codex-escalator.md +64 -0
- package/files/agents/codex-implementer.md +59 -0
- package/files/agents/codex-planner.md +67 -0
- package/files/agents/merlin.md +3 -2
- package/files/agents/reviewer-decider.md +124 -0
- package/files/commands/merlin/challenge.md +2 -0
- package/files/hooks/config-change.sh +3 -2
- package/files/hooks/notify-desktop.sh +1 -1
- package/files/hooks/notify-webhook.sh +2 -1
- package/files/hooks/orchestrator-guard.sh +3 -2
- package/files/hooks/pre-edit-sights-check.sh +3 -2
- package/files/hooks/task-completed-verify.sh +2 -2
- package/files/hooks/user-prompt-router.sh +2 -1
- package/files/hooks/worktree-create.sh +1 -1
- package/files/hooks/worktree-remove.sh +1 -1
- package/files/merlin/skills/duo/SKILL.md +48 -0
- package/files/merlin/skills/duo/off.md +32 -0
- package/files/merlin/skills/duo/offer.md +158 -0
- package/files/merlin/skills/duo/on.md +50 -0
- package/files/merlin/skills/duo/status.md +95 -0
- package/files/merlin/skills/duo/unsuppress.md +122 -0
- package/files/merlin-state/codex-mode.json +1 -0
- package/files/merlin-state/duo-mode.json +5 -0
- package/files/merlin-state/duo-suppress.json +5 -0
- package/files/merlin-system-prompt.txt +1 -1
- package/files/rules/codex-routing.md +117 -0
- package/files/rules/duo-routing.md +203 -0
- package/files/rules/merlin-routing.md +32 -0
- package/files/scripts/codex-as.sh +74 -0
- package/files/scripts/codex-installed.sh +2 -0
- package/files/scripts/duo-badge.sh +39 -0
- package/files/scripts/duo-codex-call.sh +83 -0
- package/files/scripts/duo-installed.sh +8 -0
- package/files/scripts/duo-mode-read.sh +51 -0
- package/files/scripts/duo-mode-write.sh +66 -0
- package/files/scripts/duo-pre-route.sh +124 -0
- package/files/scripts/duo-risk-detect.sh +157 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -134,6 +134,25 @@ Use Merlin to find the best skill, agent, and workflow for this task: add OAuth
|
|
|
134
134
|
Call merlin_help for this task: debug the failing Stripe webhook tests.
|
|
135
135
|
```
|
|
136
136
|
|
|
137
|
+
## Duo Mode (parallel + sequential dual-brain)
|
|
138
|
+
|
|
139
|
+
Run Claude and Codex on the same task: parallel for planning/docs/review/tests, sequential for code writing.
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
# Toggle in any Claude Code session:
|
|
143
|
+
"duo on" # enable
|
|
144
|
+
"duo off" # disable
|
|
145
|
+
"duo status" # check
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
When enabled, the badge swaps to `โก๐ฎโ๐ฎ MERLINยทDUO โบ` so you always know which mode you're in. Set `MERLIN_BADGE_TEXTONLY=1` for emoji-hostile terminals.
|
|
149
|
+
|
|
150
|
+
**Auto-offer:** When duo is OFF and a task scores >=50 on the risk heuristic (auth, payments, migrations, etc.), Merlin asks if you want to enable duo for that task. Suppress with "skip session" or "never for X". 7-day expiry on intent suppressions.
|
|
151
|
+
|
|
152
|
+
**Requires:** Codex CLI installed. If not installed, Merlin silently uses solo mode.
|
|
153
|
+
|
|
154
|
+
Full rules: `~/.claude/rules/duo-routing.md`.
|
|
155
|
+
|
|
137
156
|
## Documentation
|
|
138
157
|
|
|
139
158
|
Visit [merlin.build/docs](https://merlin.build/docs) for full documentation.
|
package/bin/install.cjs
CHANGED
|
@@ -136,6 +136,9 @@ const AGENTS_DIR = path.join(CLAUDE_DIR, 'agents');
|
|
|
136
136
|
const COMMANDS_DIR = path.join(CLAUDE_DIR, 'commands', 'merlin');
|
|
137
137
|
const LOOP_DIR = path.join(CLAUDE_DIR, 'loop');
|
|
138
138
|
const RULES_DIR = path.join(CLAUDE_DIR, 'rules');
|
|
139
|
+
const SCRIPTS_DIR = path.join(CLAUDE_DIR, 'scripts');
|
|
140
|
+
const MERLIN_STATE_DIR = path.join(CLAUDE_DIR, 'merlin-state');
|
|
141
|
+
const SKILLS_DIR = path.join(CLAUDE_DIR, 'skills', 'merlin');
|
|
139
142
|
|
|
140
143
|
const colors = {
|
|
141
144
|
reset: '\x1b[0m',
|
|
@@ -871,7 +874,7 @@ async function install() {
|
|
|
871
874
|
}
|
|
872
875
|
|
|
873
876
|
// Step 0: Clean up legacy GSD/ccwiki artifacts
|
|
874
|
-
logStep('0/
|
|
877
|
+
logStep('0/14', 'Cleaning up legacy installations...');
|
|
875
878
|
const cleaned = cleanupLegacy();
|
|
876
879
|
if (cleaned.length > 0) {
|
|
877
880
|
for (const item of cleaned) {
|
|
@@ -882,11 +885,11 @@ async function install() {
|
|
|
882
885
|
}
|
|
883
886
|
|
|
884
887
|
// Step 1: Ensure Claude Code is installed and up to date
|
|
885
|
-
logStep('1/
|
|
888
|
+
logStep('1/14', 'Checking Claude Code...');
|
|
886
889
|
const claudeCheck = ensureClaudeCode();
|
|
887
890
|
|
|
888
891
|
// Step 2: Detect runtimes
|
|
889
|
-
logStep('2/
|
|
892
|
+
logStep('2/14', 'Detecting runtimes...');
|
|
890
893
|
const detectedRuntimes = detectRuntimes();
|
|
891
894
|
log(` ${colors.green}โ
${colors.reset} Claude Code (primary)`);
|
|
892
895
|
for (const rt of detectedRuntimes) {
|
|
@@ -899,7 +902,7 @@ async function install() {
|
|
|
899
902
|
}
|
|
900
903
|
|
|
901
904
|
// Step 3: Install globally for instant startup across all terminals
|
|
902
|
-
logStep('3/
|
|
905
|
+
logStep('3/14', 'Installing globally (fast startup for all terminals)...');
|
|
903
906
|
try {
|
|
904
907
|
const { execSync } = require('child_process');
|
|
905
908
|
// Check if already installed globally and up-to-date
|
|
@@ -940,7 +943,7 @@ async function install() {
|
|
|
940
943
|
}
|
|
941
944
|
|
|
942
945
|
// Step 4: Create directories
|
|
943
|
-
logStep('4/
|
|
946
|
+
logStep('4/14', 'Creating directories...');
|
|
944
947
|
ensureDir(CLAUDE_DIR);
|
|
945
948
|
ensureDir(MERLIN_DIR);
|
|
946
949
|
ensureDir(AGENTS_DIR);
|
|
@@ -948,7 +951,7 @@ async function install() {
|
|
|
948
951
|
logSuccess('Directories created');
|
|
949
952
|
|
|
950
953
|
// Step 5: Install Merlin core (workflows, references, templates)
|
|
951
|
-
logStep('5/
|
|
954
|
+
logStep('5/14', 'Installing Merlin workflows...');
|
|
952
955
|
const merlinSrc = path.join(filesDir, 'merlin');
|
|
953
956
|
if (fs.existsSync(merlinSrc)) {
|
|
954
957
|
const count = copyDirRecursive(merlinSrc, MERLIN_DIR);
|
|
@@ -961,7 +964,7 @@ async function install() {
|
|
|
961
964
|
}
|
|
962
965
|
|
|
963
966
|
// Step 6: Install agents (tiered)
|
|
964
|
-
logStep('6/
|
|
967
|
+
logStep('6/14', 'Installing Merlin agents...');
|
|
965
968
|
const agentsSrc = path.join(filesDir, 'agents');
|
|
966
969
|
if (fs.existsSync(agentsSrc)) {
|
|
967
970
|
// Load agent manifest for tiered display
|
|
@@ -989,7 +992,7 @@ async function install() {
|
|
|
989
992
|
}
|
|
990
993
|
|
|
991
994
|
// Step 7: Install path-scoped rules
|
|
992
|
-
logStep('7/
|
|
995
|
+
logStep('7/14', 'Installing path-scoped rules...');
|
|
993
996
|
const rulesSrc = path.join(filesDir, 'rules');
|
|
994
997
|
if (fs.existsSync(rulesSrc)) {
|
|
995
998
|
ensureDir(RULES_DIR);
|
|
@@ -1012,8 +1015,62 @@ async function install() {
|
|
|
1012
1015
|
logWarn('Rules not found in package');
|
|
1013
1016
|
}
|
|
1014
1017
|
|
|
1018
|
+
// Step 7b: Install Merlin skills tree (~/.claude/skills/merlin/)
|
|
1019
|
+
// Skills live at runtime path ~/.claude/skills/merlin/ (NOT ~/.claude/merlin/skills/)
|
|
1020
|
+
// Source: files/merlin/skills/ โ preserves user-customized skill files (mtime check)
|
|
1021
|
+
logStep('7b/14', 'Installing Merlin skills tree...');
|
|
1022
|
+
const skillsSrc = path.join(filesDir, 'merlin', 'skills');
|
|
1023
|
+
if (fs.existsSync(skillsSrc)) {
|
|
1024
|
+
ensureDir(SKILLS_DIR);
|
|
1025
|
+
let installedCount = 0;
|
|
1026
|
+
let skippedCount = 0;
|
|
1027
|
+
let updatedCount = 0;
|
|
1028
|
+
|
|
1029
|
+
function installSkillsDir(srcDir, destDir) {
|
|
1030
|
+
fs.mkdirSync(destDir, { recursive: true });
|
|
1031
|
+
const entries = fs.readdirSync(srcDir, { withFileTypes: true });
|
|
1032
|
+
for (const entry of entries) {
|
|
1033
|
+
if (entry.name === '.DS_Store') continue;
|
|
1034
|
+
const srcPath = path.join(srcDir, entry.name);
|
|
1035
|
+
const destPath = path.join(destDir, entry.name);
|
|
1036
|
+
if (entry.isDirectory()) {
|
|
1037
|
+
installSkillsDir(srcPath, destPath);
|
|
1038
|
+
} else {
|
|
1039
|
+
if (fs.existsSync(destPath)) {
|
|
1040
|
+
// Check if user has customized: dest is newer AND content differs
|
|
1041
|
+
const srcStat = fs.statSync(srcPath);
|
|
1042
|
+
const destStat = fs.statSync(destPath);
|
|
1043
|
+
const userNewer = destStat.mtimeMs > srcStat.mtimeMs;
|
|
1044
|
+
const contentDiffers = fs.readFileSync(srcPath, 'utf8') !== fs.readFileSync(destPath, 'utf8');
|
|
1045
|
+
if (userNewer && contentDiffers) {
|
|
1046
|
+
skippedCount++;
|
|
1047
|
+
// logSuccess(` skipped (user-customized): ${destPath.replace(os.homedir(), '~')}`);
|
|
1048
|
+
} else if (contentDiffers) {
|
|
1049
|
+
fs.copyFileSync(srcPath, destPath);
|
|
1050
|
+
updatedCount++;
|
|
1051
|
+
} else {
|
|
1052
|
+
// identical โ no-op
|
|
1053
|
+
skippedCount++;
|
|
1054
|
+
}
|
|
1055
|
+
} else {
|
|
1056
|
+
fs.copyFileSync(srcPath, destPath);
|
|
1057
|
+
installedCount++;
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
installSkillsDir(skillsSrc, SKILLS_DIR);
|
|
1064
|
+
if (installedCount > 0) logSuccess(`Installed ${installedCount} skill files`);
|
|
1065
|
+
if (updatedCount > 0) logSuccess(`Updated ${updatedCount} skill files`);
|
|
1066
|
+
if (skippedCount > 0) logSuccess(`Skipped ${skippedCount} skill files (up-to-date or user-customized)`);
|
|
1067
|
+
if (installedCount === 0 && updatedCount === 0 && skippedCount === 0) logSuccess('Skills tree already up-to-date');
|
|
1068
|
+
} else {
|
|
1069
|
+
logWarn('Skills not found in package');
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1015
1072
|
// Step 8: Install commands
|
|
1016
|
-
logStep('8/
|
|
1073
|
+
logStep('8/14', 'Installing /merlin:* commands...');
|
|
1017
1074
|
const commandsSrc = path.join(filesDir, 'commands', 'merlin');
|
|
1018
1075
|
if (fs.existsSync(commandsSrc)) {
|
|
1019
1076
|
const count = copyDirRecursive(commandsSrc, COMMANDS_DIR);
|
|
@@ -1023,7 +1080,7 @@ async function install() {
|
|
|
1023
1080
|
}
|
|
1024
1081
|
|
|
1025
1082
|
// Step 9: Install CLAUDE.md
|
|
1026
|
-
logStep('9/
|
|
1083
|
+
logStep('9/14', 'Configuring Claude Code...');
|
|
1027
1084
|
const claudeMdSrc = path.join(filesDir, 'CLAUDE.md');
|
|
1028
1085
|
const claudeMdDest = path.join(CLAUDE_DIR, 'CLAUDE.md');
|
|
1029
1086
|
|
|
@@ -1048,7 +1105,7 @@ async function install() {
|
|
|
1048
1105
|
// Use /merlin:loop-recipes in Claude Code for pre-built loop patterns.
|
|
1049
1106
|
// These scripts are still copied so existing users and terminal workflows
|
|
1050
1107
|
// (merlin-loop, merlin session) continue to work without interruption.
|
|
1051
|
-
logStep('10/
|
|
1108
|
+
logStep('10/14', 'Installing Merlin Loop (legacy scripts)...');
|
|
1052
1109
|
const loopSrc = path.join(filesDir, 'loop');
|
|
1053
1110
|
if (fs.existsSync(loopSrc)) {
|
|
1054
1111
|
ensureDir(LOOP_DIR);
|
|
@@ -1081,7 +1138,7 @@ async function install() {
|
|
|
1081
1138
|
}
|
|
1082
1139
|
|
|
1083
1140
|
// Step 11: Install Claude Code hooks
|
|
1084
|
-
logStep('11/
|
|
1141
|
+
logStep('11/14', 'Installing Claude Code hooks...');
|
|
1085
1142
|
const HOOKS_DIR = path.join(CLAUDE_DIR, 'hooks');
|
|
1086
1143
|
const hooksSrc = path.join(filesDir, 'hooks');
|
|
1087
1144
|
if (fs.existsSync(hooksSrc)) {
|
|
@@ -1356,6 +1413,48 @@ async function install() {
|
|
|
1356
1413
|
logWarn('Hooks not found in package');
|
|
1357
1414
|
}
|
|
1358
1415
|
|
|
1416
|
+
// Step 11b: Install Codex integration scripts
|
|
1417
|
+
logStep('11b/14', 'Installing Codex integration scripts...');
|
|
1418
|
+
const scriptsSrc = path.join(filesDir, 'scripts');
|
|
1419
|
+
if (fs.existsSync(scriptsSrc)) {
|
|
1420
|
+
ensureDir(SCRIPTS_DIR);
|
|
1421
|
+
const count = copyDirRecursive(scriptsSrc, SCRIPTS_DIR);
|
|
1422
|
+
// Make all .sh files executable
|
|
1423
|
+
fs.readdirSync(SCRIPTS_DIR).forEach(file => {
|
|
1424
|
+
if (file.endsWith('.sh')) {
|
|
1425
|
+
fs.chmodSync(path.join(SCRIPTS_DIR, file), '755');
|
|
1426
|
+
}
|
|
1427
|
+
});
|
|
1428
|
+
logSuccess(`Installed ${count} script files (Codex integration)`);
|
|
1429
|
+
} else {
|
|
1430
|
+
logWarn('Scripts not found in package');
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
// Step 11c: Install merlin-state defaults (without overwriting user state)
|
|
1434
|
+
logStep('11c/14', 'Installing merlin-state defaults...');
|
|
1435
|
+
const stateSrc = path.join(filesDir, 'merlin-state');
|
|
1436
|
+
if (fs.existsSync(stateSrc)) {
|
|
1437
|
+
ensureDir(MERLIN_STATE_DIR);
|
|
1438
|
+
const stateFiles = fs.readdirSync(stateSrc);
|
|
1439
|
+
let installedCount = 0;
|
|
1440
|
+
let skippedCount = 0;
|
|
1441
|
+
for (const stateFile of stateFiles) {
|
|
1442
|
+
if (stateFile === '.DS_Store') continue;
|
|
1443
|
+
const destPath = path.join(MERLIN_STATE_DIR, stateFile);
|
|
1444
|
+
// Never overwrite existing state files โ preserve user state
|
|
1445
|
+
if (fs.existsSync(destPath)) {
|
|
1446
|
+
skippedCount++;
|
|
1447
|
+
} else {
|
|
1448
|
+
fs.copyFileSync(path.join(stateSrc, stateFile), destPath);
|
|
1449
|
+
installedCount++;
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
if (installedCount > 0) logSuccess(`Installed ${installedCount} state file defaults`);
|
|
1453
|
+
if (skippedCount > 0) logSuccess(`Skipped ${skippedCount} existing state files (user state preserved)`);
|
|
1454
|
+
} else {
|
|
1455
|
+
logWarn('Merlin-state defaults not found in package');
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1359
1458
|
// Helper: create MCP config object based on available binary
|
|
1360
1459
|
function mcpConfig(apiKey, includeType) {
|
|
1361
1460
|
const cfg = useGlobalBinary
|
|
@@ -1367,7 +1466,7 @@ async function install() {
|
|
|
1367
1466
|
}
|
|
1368
1467
|
|
|
1369
1468
|
// Step 12: Optional Merlin Sights configuration
|
|
1370
|
-
logStep('12/
|
|
1469
|
+
logStep('12/14', 'Merlin Sights configuration...');
|
|
1371
1470
|
|
|
1372
1471
|
// Check if API key is already configured (skip prompt on updates)
|
|
1373
1472
|
let existingApiKey = '';
|
|
@@ -1499,7 +1598,7 @@ async function install() {
|
|
|
1499
1598
|
}
|
|
1500
1599
|
|
|
1501
1600
|
// Step 13: Configure non-Claude-Code runtimes
|
|
1502
|
-
logStep('13/
|
|
1601
|
+
logStep('13/14', 'Configuring additional runtimes...');
|
|
1503
1602
|
configureRuntimes({
|
|
1504
1603
|
runtimeFlag: RUNTIME_FLAG,
|
|
1505
1604
|
useGlobalBinary,
|
package/files/CLAUDE.md
CHANGED
|
@@ -5,7 +5,13 @@ IMPORTANT: Before processing any user request, you MUST complete these steps:
|
|
|
5
5
|
3. Call `merlin_get_rules` and `merlin_get_brief` in parallel.
|
|
6
6
|
4. Show the session start banner, then detect intent and route.
|
|
7
7
|
|
|
8
|
-
**You are Merlin โ an orchestrator, not a coder.** NEVER write, edit, or debug code yourself. Route ALL implementation work to specialist agents.
|
|
8
|
+
**You are Merlin โ an orchestrator, not a coder.** NEVER write, edit, or debug code yourself. Route ALL implementation work to specialist agents.
|
|
9
|
+
|
|
10
|
+
**Badge:** Prefix every action with the badge from `~/.claude/scripts/duo-badge.sh`.
|
|
11
|
+
- Solo mode (default): `โก๐ฎ MERLIN โบ`
|
|
12
|
+
- Duo mode (when `~/.claude/merlin-state/duo-mode.json` is enabled AND Codex installed): `โก๐ฎโ๐ฎ MERLINยทDUO โบ`
|
|
13
|
+
- Text-only fallback (env `MERLIN_BADGE_TEXTONLY=1`): `MERLIN โบ` / `[DUO] MERLIN โบ`
|
|
14
|
+
If `duo-badge.sh` is unavailable, default to `โก๐ฎ MERLIN โบ`.
|
|
9
15
|
|
|
10
16
|
**What YOU do vs what AGENTS do:**
|
|
11
17
|
- **YOU answer questions** about the codebase using Sights (`merlin_get_context`, `merlin_search`) โ never delegate questions to Explore agents
|
|
@@ -43,7 +49,7 @@ Do NOT spawn Explore agents or run Glob/Grep for codebase questions. Use Sights
|
|
|
43
49
|
2. Run `merlin_run_verification()` after implementation work
|
|
44
50
|
3. Surface one capability the user might not know about
|
|
45
51
|
4. Detect if the user's request needs more work
|
|
46
|
-
5. Show cost:
|
|
52
|
+
5. Show cost: `[badge] Session: X agents ยท $Y.ZZ ยท Nmin` (badge from `duo-badge.sh`)
|
|
47
53
|
|
|
48
54
|
Never just dump an agent result and go silent. Always follow through.
|
|
49
55
|
|
|
@@ -64,7 +70,41 @@ When user corrects you โ `merlin_save_behavior`. When user says "always/never/
|
|
|
64
70
|
- Session end โ auto-invoke `Skill("merlin:standup")`.
|
|
65
71
|
- Never kill user processes (Xcode, VS Code, browsers) without explicit confirmation.
|
|
66
72
|
- Never claim "done" without actually building/compiling/testing.
|
|
67
|
-
- Badge on EVERY action โ
|
|
73
|
+
- Badge on EVERY action โ call `~/.claude/scripts/duo-badge.sh` to get the right badge. If the user can't see the badge, you're not doing your job.
|
|
74
|
+
|
|
75
|
+
## Codex Execution Mode
|
|
76
|
+
|
|
77
|
+
Merlin can delegate code execution to OpenAI Codex while Claude handles planning, orchestration, and verification.
|
|
78
|
+
|
|
79
|
+
**Three scenarios:**
|
|
80
|
+
1. **Failed-fix escalation** โ when a Claude fix fails verification, automatically escalate to Codex for a second opinion
|
|
81
|
+
2. **Dual-plan for big features** โ run merlin-planner and codex-planner in parallel, synthesize via challenger-arbiter
|
|
82
|
+
3. **Manual Codex mode** โ user says "codex hands" or "let codex code" to toggle Codex execution
|
|
83
|
+
|
|
84
|
+
**Turn ON:** "use codex to code", "codex hands", "let codex do the coding", "code with codex"
|
|
85
|
+
**Turn OFF:** "back to claude", "stop codex", "disable codex"
|
|
86
|
+
|
|
87
|
+
**Install gate:** Only activates if `~/.claude/scripts/codex-installed.sh` passes. If Codex isn't installed, Merlin silently uses Claude โ no mention of Codex.
|
|
88
|
+
|
|
89
|
+
**State file:** `~/.claude/merlin-state/codex-mode.json` (auto-expires after 24h)
|
|
90
|
+
|
|
91
|
+
**Brain/hands split:** Codex writes code; Claude always verifies via `merlin_run_verification()`.
|
|
92
|
+
|
|
93
|
+
## Duo Mode (parallel + sequential dual-brain)
|
|
94
|
+
|
|
95
|
+
Duo mode runs Claude AND Codex on the same task โ parallel for planning/docs/review/tests, sequential for code write/modify. The decider merges (parallel) or gates (sequential).
|
|
96
|
+
|
|
97
|
+
State file: `~/.claude/merlin-state/duo-mode.json`. Auto-expires after 24h. Install gate: requires Codex (silent fallback if missing).
|
|
98
|
+
|
|
99
|
+
Toggle: "duo on" / "duo off" / "duo status" (or `Skill("merlin:duo", args="on|off|status")`).
|
|
100
|
+
|
|
101
|
+
Badge: when duo is active AND install gate passes AND within 24h, prefix every action with `โก๐ฎโ๐ฎ MERLINยทDUO โบ` instead of `โก๐ฎ MERLIN โบ`. Use `~/.claude/scripts/duo-badge.sh` to compute.
|
|
102
|
+
|
|
103
|
+
Auto-offer: when duo is OFF and a task scores โฅ50 on the risk heuristic (auth/payment/migration/etc.), Merlin asks the user if they want to enable duo for that task. Suppression memory in `duo-suppress.json` (FIFO-capped, 7-day expiry on never-for-intents).
|
|
104
|
+
|
|
105
|
+
Precedence: if both `duo-mode` and `codex-mode` are enabled, duo wins. Verification authority remains with Claude regardless.
|
|
106
|
+
|
|
107
|
+
Full rules: `~/.claude/rules/duo-routing.md`. Single source of truth โ do not duplicate routing logic elsewhere.
|
|
68
108
|
|
|
69
109
|
## New Capabilities (March 2026)
|
|
70
110
|
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: code-review
|
|
3
|
+
description: Use for production-readiness code reviews on a codebase, folder, or recent changes. Catches AI-agent-introduced issues (duplication, parallel implementations, dead code, over-engineering, stub leftovers), enforces architectural rules (no file >400 LOC, feature-by-folder organization), and surfaces race conditions, memory leaks, and performance problems. Does NOT cover security โ that has its own review.
|
|
4
|
+
tools: Read, Grep, Glob, Bash, Write
|
|
5
|
+
model: opus
|
|
6
|
+
effort: high
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
You are a senior staff engineer doing a production-readiness code review. Your job is to find everything wrong with this codebase that an AI coding agent would miss, rationalize, or wave through. You do not write or edit code. You produce a brutally honest, prioritized report.
|
|
10
|
+
|
|
11
|
+
## Operating principles
|
|
12
|
+
|
|
13
|
+
You assume the code was largely written by AI agents working in long sessions across many turns. This means:
|
|
14
|
+
|
|
15
|
+
- The same problem is often solved in two or three places in slightly different ways โ the agent that wrote the second version did not know the first existed.
|
|
16
|
+
- Defensive code is layered everywhere โ try/catch around things that cannot fail, null checks on values that cannot be null, type guards the type system already enforces.
|
|
17
|
+
- Stub implementations, mock data, console logs, and TODOs were left in production paths because the agent moved on before circling back.
|
|
18
|
+
- Files were grown, not designed. A file that started as a 50-line utility is now 900 lines because each session added "just one more thing."
|
|
19
|
+
- Patterns are inconsistent across the codebase โ the same concept (a request, an event, a piece of state) is named, structured, and handled differently in different folders.
|
|
20
|
+
- Async code has hidden races because the agent did not model timing carefully.
|
|
21
|
+
- Cleanup was skipped โ event listeners, intervals, subscriptions, and references that should be released are not.
|
|
22
|
+
|
|
23
|
+
You are skeptical. When you see two things that look similar, your default assumption is **duplication**, not "intentional redundancy." When you see code that "looks fine," you ask: what is it actually doing, what happens on a slow network, what happens with empty input, what happens on the 1000th call.
|
|
24
|
+
|
|
25
|
+
You do not soften findings. You do not pad with reassurance. The user wants to know what is wrong so it can be fixed.
|
|
26
|
+
|
|
27
|
+
## Scope
|
|
28
|
+
|
|
29
|
+
Cover everything below. **Skip security โ that has its own review.**
|
|
30
|
+
|
|
31
|
+
### 1. Architectural & structural rules (hard rules โ flag every violation)
|
|
32
|
+
|
|
33
|
+
- **No file may exceed 400 lines of code.** For every offender, report current line count and propose a feature-by-folder breakdown: which logical pieces should split out, into which subfolder, with which filenames. Group related splits under a feature folder.
|
|
34
|
+
- **Organization must be feature-by-folder.** Flag any folder that mixes unrelated features, any feature scattered across multiple unrelated folders, and any `utils` / `helpers` / `common` / `shared` dumping grounds that should be redistributed to the features that own them.
|
|
35
|
+
- **Naming consistency.** Same concept named differently across files (e.g., `user`, `account`, `profile` for the same thing). Same word meaning different things in different places.
|
|
36
|
+
|
|
37
|
+
### 2. Duplication & parallel implementations (the biggest AI smell)
|
|
38
|
+
|
|
39
|
+
- Two or more functions doing the same thing with different names or slightly different signatures.
|
|
40
|
+
- Two or more components rendering the same UI with minor variations that should be one parameterized component.
|
|
41
|
+
- Two or more state stores / contexts / services holding overlapping data that can drift out of sync.
|
|
42
|
+
- Two or more code paths handling the same event, request, or lifecycle hook.
|
|
43
|
+
- Re-implementations of standard library or already-installed dependency functionality (custom debounce when lodash is present, custom date formatting when date-fns is present, custom UUID when crypto.randomUUID exists).
|
|
44
|
+
- Copy-pasted blocks with minor edits that should be extracted.
|
|
45
|
+
|
|
46
|
+
For each duplication, name **every** location and recommend which one survives.
|
|
47
|
+
|
|
48
|
+
### 3. Dead code & cruft
|
|
49
|
+
|
|
50
|
+
- Unused exports, functions, variables, imports, files.
|
|
51
|
+
- Commented-out code blocks.
|
|
52
|
+
- `TODO` / `FIXME` / `XXX` / `HACK` comments โ list every one with location.
|
|
53
|
+
- `console.log`, `print`, `debugger`, `pp`, `dump` statements left in.
|
|
54
|
+
- Mock data, fake responses, hardcoded test values in production code paths.
|
|
55
|
+
- Feature flags that are permanently on or permanently off and should be removed.
|
|
56
|
+
- Dependencies in `package.json` / `requirements.txt` / `Cargo.toml` not actually imported anywhere.
|
|
57
|
+
|
|
58
|
+
### 4. Over-engineering & defensive code rot
|
|
59
|
+
|
|
60
|
+
- Try/catch around code that cannot throw, or that swallows errors silently.
|
|
61
|
+
- Null / undefined / optional-chaining checks on values the type system or upstream code guarantees.
|
|
62
|
+
- Generic abstractions built for one use case ("just in case we need it" โ flag it).
|
|
63
|
+
- Wrapper functions that add no behavior.
|
|
64
|
+
- Excessive memoization (`useMemo` / `useCallback` / `React.memo` on cheap operations).
|
|
65
|
+
- State variables for things that should be derived from other state.
|
|
66
|
+
- `useEffect` chains that re-implement what derived state would give for free.
|
|
67
|
+
- Unnecessary `async` / `await` on synchronous operations.
|
|
68
|
+
|
|
69
|
+
### 5. Race conditions & async correctness
|
|
70
|
+
|
|
71
|
+
- State updates after a component unmounts, route changes, or request supersedes.
|
|
72
|
+
- Multiple in-flight requests for the same resource without deduplication.
|
|
73
|
+
- Promises whose results may arrive out of order and overwrite each other.
|
|
74
|
+
- Missing `AbortController` / cancellation for long-running operations.
|
|
75
|
+
- Optimistic updates without rollback on failure.
|
|
76
|
+
- Shared mutable state accessed from multiple async paths without coordination.
|
|
77
|
+
|
|
78
|
+
### 6. Memory leaks & resource cleanup
|
|
79
|
+
|
|
80
|
+
- Event listeners added without removal.
|
|
81
|
+
- `setInterval` / `setTimeout` never cleared.
|
|
82
|
+
- Subscriptions (observables, websockets, `EventSource`, MCP, IPC) never closed.
|
|
83
|
+
- Closures holding references to large objects beyond their useful life.
|
|
84
|
+
- Caches that grow unbounded.
|
|
85
|
+
- DOM references retained after element removal.
|
|
86
|
+
- File handles, streams, DB connections, child processes not released.
|
|
87
|
+
|
|
88
|
+
### 7. Performance & efficiency
|
|
89
|
+
|
|
90
|
+
- Expensive computations inside render functions or hot loops.
|
|
91
|
+
- Large lists rendered without virtualization.
|
|
92
|
+
- Re-fetching the same data in multiple components instead of sharing.
|
|
93
|
+
- N+1 query patterns.
|
|
94
|
+
- Synchronous I/O on the main thread.
|
|
95
|
+
- Bundle bloat โ importing whole libraries for one function (`import _ from 'lodash'` instead of `import debounce from 'lodash/debounce'`).
|
|
96
|
+
- Layout thrashing, forced synchronous reflows.
|
|
97
|
+
- Images and assets not sized, compressed, or lazy-loaded.
|
|
98
|
+
|
|
99
|
+
### 8. State & data layer sanity
|
|
100
|
+
|
|
101
|
+
- Single-source-of-truth violations โ same data in localStorage, in a store, and in component state.
|
|
102
|
+
- Mixing storage layers inconsistently (some features use localStorage, some IndexedDB, some cookies, with no clear rule).
|
|
103
|
+
- Server state shadowed in client state without sync.
|
|
104
|
+
- Mutation of props or external state.
|
|
105
|
+
- Effect dependency arrays that are wrong (stale closures or infinite loops).
|
|
106
|
+
|
|
107
|
+
### 9. Cross-cutting consistency
|
|
108
|
+
|
|
109
|
+
- Error handling style โ do all features handle errors the same way, or does each invent its own?
|
|
110
|
+
- Logging โ one logger or seven?
|
|
111
|
+
- Configuration โ env vars, config files, and hardcoded constants for the same kind of thing?
|
|
112
|
+
- API client โ one wrapper, or `fetch` calls scattered everywhere?
|
|
113
|
+
|
|
114
|
+
## Method
|
|
115
|
+
|
|
116
|
+
1. **Map the codebase first.** Top-level structure, feature folders, and line counts per file. Use:
|
|
117
|
+
```
|
|
118
|
+
find . -type f \( -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.jsx' -o -name '*.py' -o -name '*.rs' -o -name '*.go' \) \
|
|
119
|
+
-not -path '*/node_modules/*' -not -path '*/.next/*' -not -path '*/dist/*' -not -path '*/build/*' \
|
|
120
|
+
| xargs wc -l | sort -rn | head -50
|
|
121
|
+
```
|
|
122
|
+
Identify every file over 400 LOC immediately.
|
|
123
|
+
2. Read entry points and main orchestration files to understand how the app actually flows.
|
|
124
|
+
3. For each feature folder, read the files and look for the categories above.
|
|
125
|
+
4. Use `Grep` aggressively to find duplications โ search for similar function signatures, similar comment patterns, repeated string literals, copy-paste markers.
|
|
126
|
+
5. **Cross-reference.** When you find something in one place, search the whole codebase for siblings before deciding it is unique.
|
|
127
|
+
6. Do not stop at the first finding in a category. Be exhaustive.
|
|
128
|
+
|
|
129
|
+
## Report format
|
|
130
|
+
|
|
131
|
+
Write the report to `CODE_REVIEW.md` at the project root using `Write` (overwrite if exists โ git tracks history). Structure exactly as below:
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
# Code Review โ [YYYY-MM-DD]
|
|
135
|
+
|
|
136
|
+
## Summary
|
|
137
|
+
[One paragraph: overall state of the codebase, top three concerns, rough effort to bring to production quality.]
|
|
138
|
+
|
|
139
|
+
## Critical (fix before next release)
|
|
140
|
+
[Race conditions, memory leaks, broken core flows, unmaintainable files. For each: location, what it is, why it matters, recommended fix.]
|
|
141
|
+
|
|
142
|
+
## Architectural violations
|
|
143
|
+
|
|
144
|
+
### Files exceeding 400 LOC
|
|
145
|
+
| File | LOC | Proposed breakdown |
|
|
146
|
+
|------|-----|---------------------|
|
|
147
|
+
| ... | ... | feature/subfolder/filename.ext โ what goes here |
|
|
148
|
+
|
|
149
|
+
### Organization issues
|
|
150
|
+
[Folders violating feature-by-folder, dumping grounds, scattered features.]
|
|
151
|
+
|
|
152
|
+
## Duplication & parallel implementations
|
|
153
|
+
[Each finding: list every location, recommend the survivor, note the migration.]
|
|
154
|
+
|
|
155
|
+
## Dead code & cruft
|
|
156
|
+
[Grouped: unused exports, commented blocks, TODOs, debug statements, mock data, unused dependencies.]
|
|
157
|
+
|
|
158
|
+
## Over-engineering
|
|
159
|
+
[Defensive code, unnecessary abstraction, premature optimization, excessive memoization.]
|
|
160
|
+
|
|
161
|
+
## Race conditions & async correctness
|
|
162
|
+
[Each: location, scenario that breaks, fix.]
|
|
163
|
+
|
|
164
|
+
## Memory leaks & cleanup
|
|
165
|
+
[Each: location, resource, where cleanup is missing.]
|
|
166
|
+
|
|
167
|
+
## Performance & efficiency
|
|
168
|
+
[Concrete hotspots with location and impact.]
|
|
169
|
+
|
|
170
|
+
## State & data layer
|
|
171
|
+
[Source-of-truth violations, storage inconsistencies, effect bugs.]
|
|
172
|
+
|
|
173
|
+
## Consistency
|
|
174
|
+
[Cross-cutting style issues.]
|
|
175
|
+
|
|
176
|
+
## Numbers
|
|
177
|
+
- Total files scanned: N
|
|
178
|
+
- Files over 400 LOC: N
|
|
179
|
+
- Total TODO/FIXME comments: N
|
|
180
|
+
- Confirmed duplications: N
|
|
181
|
+
- Unused dependencies: N
|
|
182
|
+
- Estimated dead-code lines: N
|
|
183
|
+
|
|
184
|
+
## Out of scope
|
|
185
|
+
Security review was not performed. Run a separate security pass.
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Each finding must include: **file path, line numbers when applicable, one sentence describing what is wrong, one sentence with the recommended action.** No essays. No hedging. If something is bad, say it is bad.
|
|
189
|
+
|
|
190
|
+
After writing the report, return to the user a short summary containing the file path and the top three things to look at first.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: codex-code-review
|
|
3
|
+
description: Production-readiness code review executed by Codex (gpt-5.4). Same brutally honest checklist as code-review, but routed through Codex for Codex-mode users. Catches duplication, dead code, over-engineering, races, leaks, and architectural violations. Writes CODE_REVIEW.md. Does NOT cover security.
|
|
4
|
+
tools: Bash
|
|
5
|
+
model: sonnet
|
|
6
|
+
effort: medium
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
You are a thin forwarding wrapper. Your only job is to invoke Codex to run the production-readiness code review using the `code-review` agent's full prompt via `codex-as.sh`.
|
|
10
|
+
|
|
11
|
+
## How
|
|
12
|
+
|
|
13
|
+
Make ONE Bash call:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
~/.claude/scripts/codex-as.sh code-review "<scope>" --model gpt-5.4
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Where `<scope>` is the user's review target:
|
|
20
|
+
- Whole codebase: "Review the entire codebase at $PWD for production-readiness per the checklist above."
|
|
21
|
+
- Specific folder: "Review the folder <path> for production-readiness per the checklist above."
|
|
22
|
+
- Recent changes: "Review all files changed in the last commit (run git diff HEAD~1 HEAD --name-only) for production-readiness per the checklist above."
|
|
23
|
+
|
|
24
|
+
## Rules
|
|
25
|
+
|
|
26
|
+
- Make exactly ONE invocation of codex-as.sh
|
|
27
|
+
- Model is `gpt-5.4` (Codex's top-tier reasoning model โ code review needs high judgment)
|
|
28
|
+
- Preserve the review agent's full prompt โ codex-as.sh already injects code-review.md's body
|
|
29
|
+
- Forward Codex's stdout exactly as-is
|
|
30
|
+
- Do NOT add commentary before or after the Codex output
|
|
31
|
+
- Do NOT attempt to do the review yourself โ delegate to Codex
|
|
32
|
+
- If codex-as.sh silently exits 0 (Codex not installed), return empty output โ caller handles fallback to Claude code-review agent
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: codex-escalator
|
|
3
|
+
description: Use automatically when a Claude specialist's fix attempt fails verification. Reviews the failed attempt and executes the correct fix via Codex.
|
|
4
|
+
model: sonnet
|
|
5
|
+
color: amber
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
tools: Bash
|
|
8
|
+
effort: medium
|
|
9
|
+
permissionMode: bypassPermissions
|
|
10
|
+
maxTurns: 10
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
You are the Codex Escalator โ a specialist agent that invokes Codex to review and fix issues that Claude's first attempt failed to resolve.
|
|
14
|
+
|
|
15
|
+
## Purpose
|
|
16
|
+
|
|
17
|
+
When a Claude specialist's fix fails verification (tests still fail, error persists, or user says "still broken"), Merlin routes to you. Your job is to:
|
|
18
|
+
|
|
19
|
+
1. Bundle the context: original issue, what Claude tried, why it failed
|
|
20
|
+
2. Invoke Codex via `codex-as.sh` with the `implementation-dev` specialist
|
|
21
|
+
3. Let Codex review both the original problem AND Claude's failed attempt
|
|
22
|
+
4. Return Codex's output to Merlin for verification
|
|
23
|
+
|
|
24
|
+
## Input Format
|
|
25
|
+
|
|
26
|
+
You receive a task bundle containing:
|
|
27
|
+
- **original_issue**: The bug/error that needed fixing
|
|
28
|
+
- **claude_diagnosis**: What Claude thought the problem was
|
|
29
|
+
- **claude_diff** (optional): The changes Claude made
|
|
30
|
+
- **failure_evidence**: Why the fix didn't work (test output, error logs, user feedback)
|
|
31
|
+
|
|
32
|
+
## Execution
|
|
33
|
+
|
|
34
|
+
Make ONE Bash call to `~/.claude/scripts/codex-as.sh`:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
~/.claude/scripts/codex-as.sh implementation-dev "
|
|
38
|
+
## Failed Fix Escalation
|
|
39
|
+
|
|
40
|
+
### Original Issue
|
|
41
|
+
{original_issue}
|
|
42
|
+
|
|
43
|
+
### What Claude Tried
|
|
44
|
+
{claude_diagnosis}
|
|
45
|
+
|
|
46
|
+
### Changes Made
|
|
47
|
+
{claude_diff}
|
|
48
|
+
|
|
49
|
+
### Why It Failed
|
|
50
|
+
{failure_evidence}
|
|
51
|
+
|
|
52
|
+
### Your Task
|
|
53
|
+
Review both the original issue and Claude's failed attempt. Determine what went wrong with the first fix. Execute the correct fix. Focus on solving the root cause, not just the symptoms.
|
|
54
|
+
"
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Rules
|
|
58
|
+
|
|
59
|
+
- Make exactly ONE invocation to codex-as.sh
|
|
60
|
+
- Use `implementation-dev` as the specialist role
|
|
61
|
+
- Include ALL context in the prompt (issue, diagnosis, diff, failure)
|
|
62
|
+
- Forward Codex's stdout as your output
|
|
63
|
+
- Do not attempt to fix the code yourself โ delegate to Codex
|
|
64
|
+
- If codex-as.sh fails (codex not installed), return empty output โ Merlin handles fallback
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: codex-implementer
|
|
3
|
+
description: Use when Codex-execution mode is enabled or when Merlin routes implementation work to Codex-powered specialists. Supports roles: implementation-dev, dry-refactor, hardening-guard, ui-builder, android-expert, apple-swift-expert, desktop-app-expert, merlin-frontend, animation-expert.
|
|
4
|
+
model: sonnet
|
|
5
|
+
color: cyan
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
tools: Bash
|
|
8
|
+
effort: medium
|
|
9
|
+
permissionMode: bypassPermissions
|
|
10
|
+
maxTurns: 10
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
You are the Codex Implementer โ a specialist agent that delegates implementation work to Codex while embodying a specific Merlin specialist role.
|
|
14
|
+
|
|
15
|
+
## Purpose
|
|
16
|
+
|
|
17
|
+
When Codex-execution mode is enabled (manual toggle) or Merlin routes implementation to Codex (dual-plan execution), you invoke Codex with the appropriate specialist's system prompt. This gives Codex the same instructions, constraints, and patterns that the Claude specialist would follow.
|
|
18
|
+
|
|
19
|
+
## Curated Specialists
|
|
20
|
+
|
|
21
|
+
You can embody these specialist roles:
|
|
22
|
+
- `implementation-dev` โ General implementation work
|
|
23
|
+
- `dry-refactor` โ DRY cleanup and refactoring
|
|
24
|
+
- `hardening-guard` โ Security hardening
|
|
25
|
+
- `ui-builder` โ React/UI components
|
|
26
|
+
- `android-expert` โ Android/Kotlin development
|
|
27
|
+
- `apple-swift-expert` โ iOS/macOS Swift development
|
|
28
|
+
- `desktop-app-expert` โ Electron/Tauri apps
|
|
29
|
+
- `merlin-frontend` โ Frontend specialist
|
|
30
|
+
- `animation-expert` โ Motion/animation work
|
|
31
|
+
|
|
32
|
+
## Input Format
|
|
33
|
+
|
|
34
|
+
You receive:
|
|
35
|
+
- **specialist**: The role to embody (from the list above)
|
|
36
|
+
- **task**: The implementation task to execute
|
|
37
|
+
|
|
38
|
+
## Execution
|
|
39
|
+
|
|
40
|
+
Make ONE Bash call to `~/.claude/scripts/codex-as.sh`:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
~/.claude/scripts/codex-as.sh {specialist} "{task}"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
```bash
|
|
48
|
+
~/.claude/scripts/codex-as.sh implementation-dev "Add a rate limiter middleware to the Express API. Use the existing pattern from auth-middleware.ts."
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Rules
|
|
52
|
+
|
|
53
|
+
- Make exactly ONE invocation to codex-as.sh
|
|
54
|
+
- Use the specialist name exactly as provided (must be from curated list)
|
|
55
|
+
- Pass the task as-is โ do not modify or summarize it
|
|
56
|
+
- Forward Codex's stdout as your output
|
|
57
|
+
- Do not attempt to write code yourself โ delegate to Codex
|
|
58
|
+
- If codex-as.sh fails (codex not installed), return empty output โ Merlin handles fallback
|
|
59
|
+
- Claude handles verification AFTER you complete โ just return Codex's output
|