dual-brain 3.7.2 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -5
- package/hooks/cost-logger.mjs +3 -1
- package/hooks/enforce-tier.mjs +26 -3
- package/hooks/failure-detector.mjs +76 -14
- package/hooks/test-orchestrator.mjs +359 -1
- package/package.json +4 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Dual-Brain Orchestrator
|
|
2
2
|
|
|
3
|
-
One command. Both brains. Auto-detected. Auto-configured.
|
|
3
|
+
One command. Both brains. Auto-detected. Auto-configured. Default profile: **auto**.
|
|
4
4
|
|
|
5
5
|
Dual-provider orchestration for Claude Code across Claude and OpenAI subscriptions. Routes search to cheap models, execution to mid-tier, thinking to the most capable. Dispatches work to GPT via Codex CLI. Dual-brain analysis for high-risk decisions.
|
|
6
6
|
|
|
@@ -36,9 +36,9 @@ npx -y dual-brain
|
|
|
36
36
|
|
|
37
37
|
## How it works
|
|
38
38
|
|
|
39
|
-
**Two hooks
|
|
39
|
+
**Two advisory hooks** are registered in `.claude/settings.json` and fire on each tool use. They detect and recommend — they do not execute actions without user confirmation:
|
|
40
40
|
|
|
41
|
-
- **enforce-tier.mjs** (PreToolUse on Agent): Classifies tasks,
|
|
41
|
+
- **enforce-tier.mjs** (PreToolUse on Agent): Classifies tasks, recommends the correct model tier, detects duplicates, suggests cross-provider routing
|
|
42
42
|
- **cost-logger.mjs** (PostToolUse on all tools): Logs usage to daily rotated files for cost tracking
|
|
43
43
|
|
|
44
44
|
**Three tiers route work by complexity:**
|
|
@@ -49,7 +49,7 @@ npx -y dual-brain
|
|
|
49
49
|
| Execute | Sonnet | GPT-5.4 | edits, tests, git ops |
|
|
50
50
|
| Think | Opus | GPT-5.5 | architecture, review, planning |
|
|
51
51
|
|
|
52
|
-
**Dual-brain**
|
|
52
|
+
**Dual-brain** is recommended automatically for high-risk decisions — hooks detect the risk level and suggest dual-brain analysis, where both providers think on the same problem independently.
|
|
53
53
|
|
|
54
54
|
## Scripts
|
|
55
55
|
|
|
@@ -63,7 +63,7 @@ npx -y dual-brain
|
|
|
63
63
|
| `hooks/gpt-work-dispatcher.mjs` | Dispatch execution tasks to GPT via Codex CLI |
|
|
64
64
|
| `hooks/session-report.mjs` | Session-end summary: activity, compliance, quality |
|
|
65
65
|
| `hooks/health-check.mjs` | Verify all hooks and dependencies are working |
|
|
66
|
-
| `hooks/test-orchestrator.mjs` | Self-test harness (
|
|
66
|
+
| `hooks/test-orchestrator.mjs` | Self-test harness (29 tests) |
|
|
67
67
|
| `hooks/setup-wizard.mjs` | Interactive config (optional — for custom plans) |
|
|
68
68
|
| `hooks/install-git-hooks.mjs` | Git pre-commit hook for quality gate |
|
|
69
69
|
|
|
@@ -85,6 +85,20 @@ After install, edit these files:
|
|
|
85
85
|
- `review-rules.md` — project-specific rules for GPT code review
|
|
86
86
|
- `settings.json` — hook registrations (auto-generated, safe to extend)
|
|
87
87
|
|
|
88
|
+
## Profiles
|
|
89
|
+
|
|
90
|
+
The active profile controls routing posture, budgets, and quality gate behavior. Default: **auto**.
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
npx dual-brain mode cost-saver # switch profile
|
|
94
|
+
npx dual-brain status # check current profile and provider health
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
- **auto** (default): Adapts routing based on task risk, provider health, and outcomes. Auto-escalates tier on repeated failures.
|
|
98
|
+
- **balanced**: Best model per tier, normal budgets, reviews at medium+ risk.
|
|
99
|
+
- **cost-saver**: Prefer cheaper models, lower budgets, skip GPT for non-critical work.
|
|
100
|
+
- **quality-first**: Dual-brain for medium+ risk, higher budgets, stricter reviews.
|
|
101
|
+
|
|
88
102
|
## Requirements
|
|
89
103
|
|
|
90
104
|
- Node 20+
|
package/hooks/cost-logger.mjs
CHANGED
|
@@ -265,9 +265,11 @@ async function main() {
|
|
|
265
265
|
// Record failures for adaptive routing (failure-loop detection)
|
|
266
266
|
if (status === 'error' && toolName === 'Agent') {
|
|
267
267
|
try {
|
|
268
|
-
const { recordFailure } = await import('./failure-detector.mjs');
|
|
268
|
+
const { recordFailure, pruneOldFailures } = await import('./failure-detector.mjs');
|
|
269
269
|
const promptHash = createHash('md5').update(JSON.stringify(toolInput)).digest('hex').slice(0, 12);
|
|
270
270
|
recordFailure(promptHash, tier, payload?.error || 'agent_error');
|
|
271
|
+
// Best-effort cleanup of stale failure entries (>24h old)
|
|
272
|
+
try { pruneOldFailures(); } catch {}
|
|
271
273
|
} catch {}
|
|
272
274
|
}
|
|
273
275
|
|
package/hooks/enforce-tier.mjs
CHANGED
|
@@ -10,6 +10,17 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
10
10
|
const CONFIG_FILE = resolve(__dirname, '..', 'orchestrator.json');
|
|
11
11
|
const PROFILE_FILE = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
12
12
|
const DRIFT_STATE = resolve(__dirname, '.drift-warned');
|
|
13
|
+
const BURST_FILE = resolve(__dirname, '.burst-state');
|
|
14
|
+
|
|
15
|
+
function detectBurst() {
|
|
16
|
+
const now = Date.now();
|
|
17
|
+
let state = { count: 0, window_start: now };
|
|
18
|
+
try { state = JSON.parse(readFileSync(BURST_FILE, 'utf8')); } catch {}
|
|
19
|
+
if (now - state.window_start > 90_000) state = { count: 0, window_start: now };
|
|
20
|
+
state.count++;
|
|
21
|
+
try { writeFileSync(BURST_FILE, JSON.stringify(state)); } catch {}
|
|
22
|
+
return state.count >= 3;
|
|
23
|
+
}
|
|
13
24
|
|
|
14
25
|
function loadProfile() {
|
|
15
26
|
try {
|
|
@@ -205,12 +216,23 @@ try {
|
|
|
205
216
|
// Compute prompt hash early for duplicate detection and logging
|
|
206
217
|
const promptHash = createHash('sha256').update(text).digest('hex').slice(0, 12);
|
|
207
218
|
|
|
219
|
+
// Burst detection — suppress noise during wave launches (3+ agents in 90s)
|
|
220
|
+
const burstMode = detectBurst();
|
|
221
|
+
|
|
208
222
|
// Check for duplicate agent dispatch before tier classification
|
|
209
223
|
const duplicate = checkDuplicate(promptHash);
|
|
210
224
|
let duplicateWarning = null;
|
|
211
225
|
if (duplicate) {
|
|
212
226
|
const minutesAgo = Math.round((Date.now() - Date.parse(duplicate.timestamp)) / 60000);
|
|
213
|
-
|
|
227
|
+
if (burstMode) {
|
|
228
|
+
// In burst mode, only warn on exact hash matches (same description+prompt)
|
|
229
|
+
if (duplicate.prompt_hash === promptHash) {
|
|
230
|
+
duplicateWarning = `**[Wave] [Duplicate Warning]** A similar agent task was dispatched ${minutesAgo} minute${minutesAgo !== 1 ? 's' : ''} ago. Reuse the prior result unless the scope changed.`;
|
|
231
|
+
}
|
|
232
|
+
// Otherwise suppress — similar-but-different agents in a wave are expected
|
|
233
|
+
} else {
|
|
234
|
+
duplicateWarning = `**[Duplicate Warning]** A similar agent task was dispatched ${minutesAgo} minute${minutesAgo !== 1 ? 's' : ''} ago. Reuse the prior result unless the scope changed.`;
|
|
235
|
+
}
|
|
214
236
|
}
|
|
215
237
|
|
|
216
238
|
let config;
|
|
@@ -294,7 +316,7 @@ try {
|
|
|
294
316
|
}
|
|
295
317
|
|
|
296
318
|
// Failure loop detection
|
|
297
|
-
const failureCheck = checkFailureLoop(promptHash);
|
|
319
|
+
const failureCheck = checkFailureLoop(promptHash, tier);
|
|
298
320
|
let failureMessage = null;
|
|
299
321
|
if (failureCheck.isLoop) {
|
|
300
322
|
if (failureCheck.suggestion === 'promote_tier' && tier === 'execute') {
|
|
@@ -315,7 +337,8 @@ try {
|
|
|
315
337
|
}
|
|
316
338
|
|
|
317
339
|
// Compute balance hint now that tier is resolved
|
|
318
|
-
|
|
340
|
+
// In burst mode, skip balance hints — one hint per wave is enough
|
|
341
|
+
if (!burstMode) {
|
|
319
342
|
const currentProvider = detectProvider(currentModel);
|
|
320
343
|
if (currentProvider === 'claude') {
|
|
321
344
|
const balance = quickPressureCheck(tier);
|
|
@@ -3,22 +3,38 @@
|
|
|
3
3
|
* failure-detector.mjs — Detects repeated failure loops for adaptive routing.
|
|
4
4
|
*
|
|
5
5
|
* Exports:
|
|
6
|
-
* checkFailureLoop(promptHash) → { isLoop, count, suggestion }
|
|
6
|
+
* checkFailureLoop(promptHash, tier?) → { isLoop, count, weightedScore, suggestion }
|
|
7
7
|
* recordFailure(promptHash, tier, reason) → void
|
|
8
|
+
* pruneOldFailures() → { pruned, remaining }
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
|
-
import { readFileSync, appendFileSync } from 'fs';
|
|
11
|
+
import { readFileSync, appendFileSync, writeFileSync, renameSync, unlinkSync } from 'fs';
|
|
11
12
|
import { dirname, join } from 'path';
|
|
12
13
|
import { fileURLToPath } from 'url';
|
|
13
14
|
|
|
15
|
+
|
|
14
16
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
17
|
const LEDGER_FILE = join(__dirname, 'decision-ledger.jsonl');
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
+
/**
|
|
20
|
+
* Compute a decay weight based on failure age.
|
|
21
|
+
* 0-30 min → 1.0, 30-60 min → 0.5, 60-120 min → 0.25, >120 min → 0 (excluded by window)
|
|
22
|
+
*/
|
|
23
|
+
function decayWeight(timestampMs, now) {
|
|
24
|
+
const ageMs = now - timestampMs;
|
|
25
|
+
const ageMin = ageMs / (60 * 1000);
|
|
26
|
+
if (ageMin <= 30) return 1.0;
|
|
27
|
+
if (ageMin <= 60) return 0.5;
|
|
28
|
+
return 0.25; // 60-120 min
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function checkFailureLoop(promptHash, tier) {
|
|
32
|
+
if (!promptHash) return { isLoop: false, count: 0, weightedScore: 0, suggestion: null };
|
|
19
33
|
|
|
20
|
-
const
|
|
21
|
-
|
|
34
|
+
const now = Date.now();
|
|
35
|
+
const twoHoursAgo = now - 2 * 60 * 60 * 1000;
|
|
36
|
+
let count = 0;
|
|
37
|
+
let weightedScore = 0;
|
|
22
38
|
let lastTier = null;
|
|
23
39
|
|
|
24
40
|
try {
|
|
@@ -27,22 +43,26 @@ function checkFailureLoop(promptHash) {
|
|
|
27
43
|
try {
|
|
28
44
|
const entry = JSON.parse(line);
|
|
29
45
|
if (entry.prompt_hash !== promptHash) continue;
|
|
30
|
-
|
|
31
|
-
if (
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
46
|
+
const entryTime = Date.parse(entry.timestamp);
|
|
47
|
+
if (entryTime < twoHoursAgo) continue;
|
|
48
|
+
if (entry.success !== false) continue;
|
|
49
|
+
// If tier is provided, only count matching tiers
|
|
50
|
+
if (tier && entry.tier && entry.tier !== tier) continue;
|
|
51
|
+
|
|
52
|
+
count++;
|
|
53
|
+
weightedScore += decayWeight(entryTime, now);
|
|
54
|
+
lastTier = entry.tier;
|
|
35
55
|
} catch {}
|
|
36
56
|
}
|
|
37
57
|
} catch {}
|
|
38
58
|
|
|
39
|
-
if (
|
|
59
|
+
if (weightedScore < 2.0) return { isLoop: false, count, weightedScore, suggestion: null };
|
|
40
60
|
|
|
41
61
|
const suggestion = lastTier === 'execute'
|
|
42
62
|
? 'promote_tier'
|
|
43
63
|
: 'escalate_to_dual_brain';
|
|
44
64
|
|
|
45
|
-
return { isLoop: true, count
|
|
65
|
+
return { isLoop: true, count, weightedScore, suggestion };
|
|
46
66
|
}
|
|
47
67
|
|
|
48
68
|
function recordFailure(promptHash, tier, reason) {
|
|
@@ -59,4 +79,46 @@ function recordFailure(promptHash, tier, reason) {
|
|
|
59
79
|
} catch {}
|
|
60
80
|
}
|
|
61
81
|
|
|
62
|
-
|
|
82
|
+
/**
|
|
83
|
+
* Remove failure entries older than 24 hours from the ledger.
|
|
84
|
+
* Uses atomic write (tmp file + rename) to avoid corruption.
|
|
85
|
+
*/
|
|
86
|
+
function pruneOldFailures() {
|
|
87
|
+
const twentyFourHoursAgo = Date.now() - 24 * 60 * 60 * 1000;
|
|
88
|
+
let pruned = 0;
|
|
89
|
+
let remaining = 0;
|
|
90
|
+
const kept = [];
|
|
91
|
+
|
|
92
|
+
try {
|
|
93
|
+
const lines = readFileSync(LEDGER_FILE, 'utf8').split('\n').filter(Boolean);
|
|
94
|
+
for (const line of lines) {
|
|
95
|
+
try {
|
|
96
|
+
const entry = JSON.parse(line);
|
|
97
|
+
const entryTime = Date.parse(entry.timestamp);
|
|
98
|
+
if (entry.type === 'failure' && entryTime < twentyFourHoursAgo) {
|
|
99
|
+
pruned++;
|
|
100
|
+
} else {
|
|
101
|
+
kept.push(line);
|
|
102
|
+
remaining++;
|
|
103
|
+
}
|
|
104
|
+
} catch {
|
|
105
|
+
// Keep unparseable lines to avoid data loss
|
|
106
|
+
kept.push(line);
|
|
107
|
+
remaining++;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const tmpFile = LEDGER_FILE + `.tmp.${process.pid}`;
|
|
112
|
+
writeFileSync(tmpFile, kept.length > 0 ? kept.join('\n') + '\n' : '');
|
|
113
|
+
renameSync(tmpFile, LEDGER_FILE);
|
|
114
|
+
} catch (err) {
|
|
115
|
+
if (err.code !== 'ENOENT') {
|
|
116
|
+
try { unlinkSync(LEDGER_FILE + `.tmp.${process.pid}`); } catch {}
|
|
117
|
+
}
|
|
118
|
+
return { pruned: 0, remaining: 0 };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return { pruned, remaining };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export { checkFailureLoop, recordFailure, pruneOldFailures };
|
|
@@ -27,6 +27,7 @@ const COST_LOGGER = resolve(HOOKS, 'cost-logger.mjs');
|
|
|
27
27
|
const DUAL_BRAIN = resolve(HOOKS, 'dual-brain-review.mjs');
|
|
28
28
|
const ORCHESTRATOR = resolve(HOOKS, '..', 'orchestrator.json');
|
|
29
29
|
const USAGE_JSONL = resolve(HOOKS, `usage-${new Date().toISOString().slice(0, 10)}.jsonl`);
|
|
30
|
+
const BURST_FILE = resolve(HOOKS, '.burst-state');
|
|
30
31
|
|
|
31
32
|
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
32
33
|
|
|
@@ -335,7 +336,7 @@ test('profiles: consistent across modules', () => {
|
|
|
335
336
|
test('failure-detector: ignores followed=false', () => {
|
|
336
337
|
const src = readFileSync(resolve(__dirname, 'failure-detector.mjs'), 'utf8');
|
|
337
338
|
if (src.includes('followed === false')) return 'still conflates followed=false with failure';
|
|
338
|
-
if (!src.includes('success === false')) return 'missing success
|
|
339
|
+
if (!src.includes('success === false') && !src.includes('success !== false')) return 'missing success check';
|
|
339
340
|
return true;
|
|
340
341
|
});
|
|
341
342
|
|
|
@@ -649,6 +650,363 @@ test('adaptive: cost-logger records Agent errors', () => {
|
|
|
649
650
|
}
|
|
650
651
|
});
|
|
651
652
|
|
|
653
|
+
// ─── Test 30: enforce-tier: burst detection activates on 3+ agents ─────────
|
|
654
|
+
test('enforce-tier: burst detection activates on 3+ agents', () => {
|
|
655
|
+
try {
|
|
656
|
+
// Write burst state at count 2, within window
|
|
657
|
+
writeFileSync(BURST_FILE, JSON.stringify({ count: 2, window_start: Date.now() }));
|
|
658
|
+
const payload = JSON.stringify({
|
|
659
|
+
tool_name: 'Agent',
|
|
660
|
+
tool_input: { prompt: `burst activation test ${Date.now()}`, model: 'sonnet' },
|
|
661
|
+
});
|
|
662
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
663
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
664
|
+
if (!parsed) return 'no valid JSON output';
|
|
665
|
+
|
|
666
|
+
// Read burst state — count should have incremented to >= 3
|
|
667
|
+
if (!existsSync(BURST_FILE)) return '.burst-state file was removed unexpectedly';
|
|
668
|
+
let state;
|
|
669
|
+
try { state = JSON.parse(readFileSync(BURST_FILE, 'utf8')); } catch (e) { return `.burst-state not valid JSON: ${e.message}`; }
|
|
670
|
+
if (state.count < 3) return `expected count >= 3, got: ${state.count}`;
|
|
671
|
+
return true;
|
|
672
|
+
} finally {
|
|
673
|
+
try { unlinkSync(BURST_FILE); } catch {}
|
|
674
|
+
}
|
|
675
|
+
});
|
|
676
|
+
|
|
677
|
+
// ─── Test 31: enforce-tier: burst mode suppresses duplicate warnings ───────
|
|
678
|
+
test('enforce-tier: burst mode suppresses duplicate warnings', () => {
|
|
679
|
+
try {
|
|
680
|
+
// Pre-set burst mode (count=5, active window)
|
|
681
|
+
writeFileSync(BURST_FILE, JSON.stringify({ count: 5, window_start: Date.now() }));
|
|
682
|
+
const payload = JSON.stringify({
|
|
683
|
+
tool_name: 'Agent',
|
|
684
|
+
tool_input: { prompt: 'burst duplicate test identical prompt', model: 'sonnet' },
|
|
685
|
+
});
|
|
686
|
+
|
|
687
|
+
// First call — establishes the prompt hash
|
|
688
|
+
run(ENFORCE_TIER, payload);
|
|
689
|
+
// Second identical call — in burst mode, duplicate warning should be suppressed or [Wave]-prefixed
|
|
690
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
691
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
692
|
+
if (!parsed) return 'no valid JSON output';
|
|
693
|
+
|
|
694
|
+
// In burst mode: either no duplicate warning at all, or a [Wave]-prefixed one
|
|
695
|
+
const msg = parsed.systemMessage || '';
|
|
696
|
+
const hasDuplicateWarning = msg.toLowerCase().includes('duplicate');
|
|
697
|
+
if (hasDuplicateWarning && !msg.includes('[Wave]'))
|
|
698
|
+
return `expected no duplicate warning or [Wave]-prefixed in burst mode, got: ${msg}`;
|
|
699
|
+
return true;
|
|
700
|
+
} finally {
|
|
701
|
+
try { unlinkSync(BURST_FILE); } catch {}
|
|
702
|
+
}
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
// ─── Test 32: enforce-tier: non-burst mode still warns on duplicates ───────
|
|
706
|
+
test('enforce-tier: non-burst mode still warns on duplicates', () => {
|
|
707
|
+
try {
|
|
708
|
+
// Expire burst state by setting window_start to 0 (well outside 90s window)
|
|
709
|
+
writeFileSync(BURST_FILE, JSON.stringify({ count: 0, window_start: 0 }));
|
|
710
|
+
const payload = JSON.stringify({
|
|
711
|
+
tool_name: 'Agent',
|
|
712
|
+
tool_input: { prompt: 'non-burst duplicate test identical prompt', model: 'sonnet' },
|
|
713
|
+
});
|
|
714
|
+
|
|
715
|
+
// First call — establishes the prompt hash
|
|
716
|
+
run(ENFORCE_TIER, payload);
|
|
717
|
+
// Second identical call — should trigger duplicate warning
|
|
718
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
719
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
720
|
+
if (!parsed) return 'no valid JSON output';
|
|
721
|
+
|
|
722
|
+
const msg = parsed.systemMessage || '';
|
|
723
|
+
if (!msg.toLowerCase().includes('duplicate'))
|
|
724
|
+
return `expected duplicate warning in non-burst mode, got: ${msg || '(empty)'}`;
|
|
725
|
+
return true;
|
|
726
|
+
} finally {
|
|
727
|
+
try { unlinkSync(BURST_FILE); } catch {}
|
|
728
|
+
}
|
|
729
|
+
});
|
|
730
|
+
|
|
731
|
+
// ─── Test 33: install preserves existing hooks ─────────────────────────────
|
|
732
|
+
test('install: preserves existing hooks', () => {
|
|
733
|
+
const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
|
|
734
|
+
|
|
735
|
+
// install.mjs must define DUAL_BRAIN_CMDS to identify its own hooks
|
|
736
|
+
if (!installSrc.includes('DUAL_BRAIN_CMDS'))
|
|
737
|
+
return 'install.mjs missing DUAL_BRAIN_CMDS constant for filtering';
|
|
738
|
+
|
|
739
|
+
// It must filter out only dual-brain hooks (not all hooks) before merging
|
|
740
|
+
if (!installSrc.includes('.filter'))
|
|
741
|
+
return 'install.mjs missing .filter() call — may clobber non-dual-brain hooks';
|
|
742
|
+
|
|
743
|
+
// The merge logic should spread existingEntries first, then add dual-brain hooks
|
|
744
|
+
if (!installSrc.includes('existingEntries'))
|
|
745
|
+
return 'install.mjs missing existingEntries variable — may not preserve other hooks';
|
|
746
|
+
|
|
747
|
+
// Verify it reads existing settings before overwriting
|
|
748
|
+
if (!installSrc.includes('existing') || !installSrc.includes('settings.json'))
|
|
749
|
+
return 'install.mjs does not read existing settings.json before writing';
|
|
750
|
+
|
|
751
|
+
return true;
|
|
752
|
+
});
|
|
753
|
+
|
|
754
|
+
// ─── Test 34: gitignore entries don't conflict with data-tools ─────────────
|
|
755
|
+
test('install: gitignore entries scoped to dual-brain', () => {
|
|
756
|
+
const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
|
|
757
|
+
|
|
758
|
+
// Extract the generateGitignoreEntries function body
|
|
759
|
+
const fnMatch = installSrc.match(/generateGitignoreEntries[\s\S]*?const entries\s*=\s*\[([\s\S]*?)\]/);
|
|
760
|
+
if (!fnMatch) return 'could not find generateGitignoreEntries entries array';
|
|
761
|
+
|
|
762
|
+
const entriesBlock = fnMatch[1];
|
|
763
|
+
|
|
764
|
+
// Extract individual entry strings
|
|
765
|
+
const entryStrings = [...entriesBlock.matchAll(/'([^']+)'/g)].map(m => m[1]);
|
|
766
|
+
if (entryStrings.length === 0) return 'no gitignore entries found in install.mjs';
|
|
767
|
+
|
|
768
|
+
// Each entry must be scoped — no broad patterns like *.json, *.jsonl, .claude/hooks/
|
|
769
|
+
const broadPatterns = ['*.json', '*.jsonl', '*.mjs', '.claude/', '.claude/hooks/'];
|
|
770
|
+
for (const entry of entryStrings) {
|
|
771
|
+
for (const bad of broadPatterns) {
|
|
772
|
+
if (entry === bad)
|
|
773
|
+
return `gitignore entry "${entry}" is too broad — could match data-tools files`;
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// Each entry should reference dual-brain-specific names
|
|
778
|
+
const validScopes = ['dual-brain', 'usage-', 'usage.jsonl', 'decision-ledger', 'drift-warned', 'budget-alerted', 'summary-', 'reviews/', '.launched'];
|
|
779
|
+
for (const entry of entryStrings) {
|
|
780
|
+
const isScoped = validScopes.some(scope => entry.includes(scope));
|
|
781
|
+
if (!isScoped)
|
|
782
|
+
return `gitignore entry "${entry}" may not be scoped to dual-brain files`;
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
return true;
|
|
786
|
+
});
|
|
787
|
+
|
|
788
|
+
// ─── Test 35: hooks use isolated file paths ────────────────────────────────
|
|
789
|
+
test('hooks: output files use dual-brain-namespaced paths', () => {
|
|
790
|
+
const validNames = ['dual-brain', 'usage-', 'usage.jsonl', 'decision-ledger', 'summary-checkpoint', '.drift-warned', '.burst-state', '.budget-alerted', 'orchestrator.json', '.launched'];
|
|
791
|
+
|
|
792
|
+
const hookFiles = {
|
|
793
|
+
'enforce-tier.mjs': ['DRIFT_STATE', 'BURST_FILE', 'PROFILE_FILE'],
|
|
794
|
+
'cost-logger.mjs': ['usage-', 'PROFILE_FILE'],
|
|
795
|
+
'summary-checkpoint.mjs': ['usage-summary-', 'usage-'],
|
|
796
|
+
};
|
|
797
|
+
|
|
798
|
+
for (const [hookFile, expectedRefs] of Object.entries(hookFiles)) {
|
|
799
|
+
const src = readFileSync(resolve(__dirname, hookFile), 'utf8');
|
|
800
|
+
|
|
801
|
+
// Find all file paths the hook writes to (writeFileSync / appendFileSync targets)
|
|
802
|
+
const writeTargets = [...src.matchAll(/(?:writeFileSync|appendFileSync|renameSync)\(\s*([^,)]+)/g)].map(m => m[1].trim());
|
|
803
|
+
|
|
804
|
+
if (writeTargets.length === 0) return `${hookFile}: no write targets found`;
|
|
805
|
+
|
|
806
|
+
// Verify none of the write targets use generic names
|
|
807
|
+
// They should resolve to variables defined with dual-brain-specific names
|
|
808
|
+
const genericNames = ['config.json', 'state.json', 'log.jsonl', 'data.json', 'output.json'];
|
|
809
|
+
for (const target of writeTargets) {
|
|
810
|
+
for (const bad of genericNames) {
|
|
811
|
+
if (target.includes(`'${bad}'`) || target.includes(`"${bad}"`))
|
|
812
|
+
return `${hookFile}: writes to generic filename "${bad}" — could collide with other tools`;
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// Verify the actual file path constants in enforce-tier use dual-brain-scoped names
|
|
818
|
+
const enforceSrc = readFileSync(resolve(__dirname, 'enforce-tier.mjs'), 'utf8');
|
|
819
|
+
if (!enforceSrc.includes('dual-brain.profile.json'))
|
|
820
|
+
return 'enforce-tier.mjs PROFILE_FILE does not reference dual-brain namespace';
|
|
821
|
+
if (!enforceSrc.includes('.drift-warned'))
|
|
822
|
+
return 'enforce-tier.mjs DRIFT_STATE does not use scoped filename';
|
|
823
|
+
if (!enforceSrc.includes('.burst-state'))
|
|
824
|
+
return 'enforce-tier.mjs BURST_FILE does not use scoped filename';
|
|
825
|
+
|
|
826
|
+
// Verify cost-logger writes to usage-dated files, not generic names
|
|
827
|
+
const costSrc = readFileSync(resolve(__dirname, 'cost-logger.mjs'), 'utf8');
|
|
828
|
+
if (!costSrc.includes('usage-'))
|
|
829
|
+
return 'cost-logger.mjs does not write to usage-prefixed files';
|
|
830
|
+
if (!costSrc.includes('dual-brain.profile.json'))
|
|
831
|
+
return 'cost-logger.mjs PROFILE_FILE does not reference dual-brain namespace';
|
|
832
|
+
|
|
833
|
+
return true;
|
|
834
|
+
});
|
|
835
|
+
|
|
836
|
+
// ─── Test 36: failure decay weights recent failures higher ─────────────────
|
|
837
|
+
test('failure decay: recent failures score high', () => {
|
|
838
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
839
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
840
|
+
|
|
841
|
+
try {
|
|
842
|
+
const hash = 'decay_recent_' + Date.now();
|
|
843
|
+
const fiveMinAgo = new Date(Date.now() - 5 * 60 * 1000).toISOString();
|
|
844
|
+
const entry = JSON.stringify({
|
|
845
|
+
type: 'failure', timestamp: fiveMinAgo, prompt_hash: hash,
|
|
846
|
+
tier: 'execute', reason: 'test_decay', success: false,
|
|
847
|
+
});
|
|
848
|
+
writeFileSync(LEDGER, entry + '\n' + entry + '\n', 'utf8');
|
|
849
|
+
|
|
850
|
+
const script = `
|
|
851
|
+
import { checkFailureLoop } from './failure-detector.mjs';
|
|
852
|
+
const result = checkFailureLoop('${hash}');
|
|
853
|
+
process.stdout.write(JSON.stringify(result));
|
|
854
|
+
`;
|
|
855
|
+
const proc = spawnSync(process.execPath, [
|
|
856
|
+
'--input-type=module',
|
|
857
|
+
'-e', script,
|
|
858
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
859
|
+
|
|
860
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
861
|
+
let result;
|
|
862
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
863
|
+
if (!result.isLoop) return `expected isLoop=true for recent failures, got: ${JSON.stringify(result)}`;
|
|
864
|
+
if (typeof result.weightedScore !== 'number' || result.weightedScore < 2.0)
|
|
865
|
+
return `expected weightedScore >= 2.0, got: ${result.weightedScore}`;
|
|
866
|
+
return true;
|
|
867
|
+
} finally {
|
|
868
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
869
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
870
|
+
}
|
|
871
|
+
});
|
|
872
|
+
|
|
873
|
+
// ─── Test 37: failure decay reduces old failure weight ─────────────────────
|
|
874
|
+
test('failure decay: old failures score low', () => {
|
|
875
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
876
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
877
|
+
|
|
878
|
+
try {
|
|
879
|
+
const hash = 'decay_old_' + Date.now();
|
|
880
|
+
const ninetyMinAgo = new Date(Date.now() - 90 * 60 * 1000).toISOString();
|
|
881
|
+
const entry = JSON.stringify({
|
|
882
|
+
type: 'failure', timestamp: ninetyMinAgo, prompt_hash: hash,
|
|
883
|
+
tier: 'execute', reason: 'test_decay_old', success: false,
|
|
884
|
+
});
|
|
885
|
+
writeFileSync(LEDGER, entry + '\n' + entry + '\n', 'utf8');
|
|
886
|
+
|
|
887
|
+
const script = `
|
|
888
|
+
import { checkFailureLoop } from './failure-detector.mjs';
|
|
889
|
+
const result = checkFailureLoop('${hash}');
|
|
890
|
+
process.stdout.write(JSON.stringify(result));
|
|
891
|
+
`;
|
|
892
|
+
const proc = spawnSync(process.execPath, [
|
|
893
|
+
'--input-type=module',
|
|
894
|
+
'-e', script,
|
|
895
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
896
|
+
|
|
897
|
+
if (proc.status !== 0) return `script failed: ${proc.stderr}`;
|
|
898
|
+
let result;
|
|
899
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
900
|
+
if (result.isLoop) return `expected isLoop=false for old failures (weightedScore should be ~0.5), got: ${JSON.stringify(result)}`;
|
|
901
|
+
if (typeof result.weightedScore !== 'number')
|
|
902
|
+
return `expected weightedScore in result, got: ${JSON.stringify(result)}`;
|
|
903
|
+
if (result.weightedScore >= 2.0)
|
|
904
|
+
return `expected weightedScore < 2.0 for 90-min-old failures, got: ${result.weightedScore}`;
|
|
905
|
+
return true;
|
|
906
|
+
} finally {
|
|
907
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
908
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
909
|
+
}
|
|
910
|
+
});
|
|
911
|
+
|
|
912
|
+
// ─── Test 38: failure scoping by tier ──────────────────────────────────────
|
|
913
|
+
test('failure decay: scoping by tier', () => {
|
|
914
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
915
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
916
|
+
|
|
917
|
+
try {
|
|
918
|
+
const hash = 'tier_scope_' + Date.now();
|
|
919
|
+
const now = new Date().toISOString();
|
|
920
|
+
const mkEntry = (tier) => JSON.stringify({
|
|
921
|
+
type: 'failure', timestamp: now, prompt_hash: hash,
|
|
922
|
+
tier, reason: 'test_tier_scope', success: false,
|
|
923
|
+
});
|
|
924
|
+
const content = [
|
|
925
|
+
mkEntry('execute'), mkEntry('execute'),
|
|
926
|
+
mkEntry('search'), mkEntry('search'),
|
|
927
|
+
].join('\n') + '\n';
|
|
928
|
+
writeFileSync(LEDGER, content, 'utf8');
|
|
929
|
+
|
|
930
|
+
const checkTier = (tier) => {
|
|
931
|
+
const script = `
|
|
932
|
+
import { checkFailureLoop } from './failure-detector.mjs';
|
|
933
|
+
const result = checkFailureLoop('${hash}', '${tier}');
|
|
934
|
+
process.stdout.write(JSON.stringify(result));
|
|
935
|
+
`;
|
|
936
|
+
const proc = spawnSync(process.execPath, [
|
|
937
|
+
'--input-type=module',
|
|
938
|
+
'-e', script,
|
|
939
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
940
|
+
if (proc.status !== 0) return { error: `script failed for tier=${tier}: ${proc.stderr}` };
|
|
941
|
+
try { return JSON.parse(proc.stdout.trim()); } catch { return { error: `output not JSON for tier=${tier}: ${proc.stdout}` }; }
|
|
942
|
+
};
|
|
943
|
+
|
|
944
|
+
const execResult = checkTier('execute');
|
|
945
|
+
if (execResult.error) return execResult.error;
|
|
946
|
+
if (!execResult.isLoop) return `expected isLoop=true for execute tier, got: ${JSON.stringify(execResult)}`;
|
|
947
|
+
|
|
948
|
+
const searchResult = checkTier('search');
|
|
949
|
+
if (searchResult.error) return searchResult.error;
|
|
950
|
+
if (!searchResult.isLoop) return `expected isLoop=true for search tier, got: ${JSON.stringify(searchResult)}`;
|
|
951
|
+
|
|
952
|
+
const thinkResult = checkTier('think');
|
|
953
|
+
if (thinkResult.error) return thinkResult.error;
|
|
954
|
+
if (thinkResult.isLoop) return `expected isLoop=false for think tier (no think failures), got: ${JSON.stringify(thinkResult)}`;
|
|
955
|
+
|
|
956
|
+
return true;
|
|
957
|
+
} finally {
|
|
958
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
959
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
960
|
+
}
|
|
961
|
+
});
|
|
962
|
+
|
|
963
|
+
// ─── Test 39: pruneOldFailures removes stale entries ───────────────────────
|
|
964
|
+
test('failure decay: pruneOldFailures removes stale entries', () => {
|
|
965
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
966
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
967
|
+
|
|
968
|
+
try {
|
|
969
|
+
const twentyFiveHoursAgo = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString();
|
|
970
|
+
const oneHourAgo = new Date(Date.now() - 1 * 60 * 60 * 1000).toISOString();
|
|
971
|
+
const staleEntry = JSON.stringify({
|
|
972
|
+
type: 'failure', timestamp: twentyFiveHoursAgo, prompt_hash: 'stale',
|
|
973
|
+
tier: 'execute', reason: 'old', success: false,
|
|
974
|
+
});
|
|
975
|
+
const recentEntry = JSON.stringify({
|
|
976
|
+
type: 'failure', timestamp: oneHourAgo, prompt_hash: 'recent',
|
|
977
|
+
tier: 'execute', reason: 'new', success: false,
|
|
978
|
+
});
|
|
979
|
+
const content = [staleEntry, staleEntry, recentEntry, recentEntry].join('\n') + '\n';
|
|
980
|
+
writeFileSync(LEDGER, content, 'utf8');
|
|
981
|
+
|
|
982
|
+
const script = `
|
|
983
|
+
import { pruneOldFailures } from './failure-detector.mjs';
|
|
984
|
+
pruneOldFailures();
|
|
985
|
+
`;
|
|
986
|
+
const proc = spawnSync(process.execPath, [
|
|
987
|
+
'--input-type=module',
|
|
988
|
+
'-e', script,
|
|
989
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
990
|
+
|
|
991
|
+
if (proc.status !== 0) return `pruneOldFailures script failed: ${proc.stderr}`;
|
|
992
|
+
if (!existsSync(LEDGER)) return 'ledger file was deleted instead of pruned';
|
|
993
|
+
|
|
994
|
+
const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
|
|
995
|
+
if (lines.length !== 2) return `expected 2 entries after prune, got: ${lines.length}`;
|
|
996
|
+
|
|
997
|
+
for (const line of lines) {
|
|
998
|
+
let entry;
|
|
999
|
+
try { entry = JSON.parse(line); } catch { return `pruned ledger has invalid JSON: ${line}`; }
|
|
1000
|
+
if (entry.prompt_hash !== 'recent')
|
|
1001
|
+
return `expected only recent entries to remain, found prompt_hash=${entry.prompt_hash}`;
|
|
1002
|
+
}
|
|
1003
|
+
return true;
|
|
1004
|
+
} finally {
|
|
1005
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
1006
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
1007
|
+
}
|
|
1008
|
+
});
|
|
1009
|
+
|
|
652
1010
|
// ─── Summary ─────────────────────────────────────────────────────────────────
|
|
653
1011
|
const total = passed + failed;
|
|
654
1012
|
console.log(`\n${passed}/${total} tests passed`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dual-brain",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.8.1",
|
|
4
4
|
"description": "Dual-provider orchestration for Claude Code — tiered routing, budget balancing, and GPT dual-brain review across Claude + OpenAI subscriptions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -22,6 +22,9 @@
|
|
|
22
22
|
"type": "git",
|
|
23
23
|
"url": "https://github.com/1xmint/dual-brain.git"
|
|
24
24
|
},
|
|
25
|
+
"scripts": {
|
|
26
|
+
"test": "node hooks/test-orchestrator.mjs"
|
|
27
|
+
},
|
|
25
28
|
"engines": {
|
|
26
29
|
"node": ">=20.0.0"
|
|
27
30
|
},
|