dual-brain 3.7.1 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -5
- package/hooks/enforce-tier.mjs +25 -2
- package/hooks/test-orchestrator.mjs +391 -0
- package/package.json +4 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Dual-Brain Orchestrator
|
|
2
2
|
|
|
3
|
-
One command. Both brains. Auto-detected. Auto-configured.
|
|
3
|
+
One command. Both brains. Auto-detected. Auto-configured. Default profile: **auto**.
|
|
4
4
|
|
|
5
5
|
Dual-provider orchestration for Claude Code across Claude and OpenAI subscriptions. Routes search to cheap models, execution to mid-tier, thinking to the most capable. Dispatches work to GPT via Codex CLI. Dual-brain analysis for high-risk decisions.
|
|
6
6
|
|
|
@@ -36,9 +36,9 @@ npx -y dual-brain
|
|
|
36
36
|
|
|
37
37
|
## How it works
|
|
38
38
|
|
|
39
|
-
**Two hooks
|
|
39
|
+
**Two advisory hooks** are registered in `.claude/settings.json` and fire on each tool use. They detect and recommend — they do not execute actions without user confirmation:
|
|
40
40
|
|
|
41
|
-
- **enforce-tier.mjs** (PreToolUse on Agent): Classifies tasks,
|
|
41
|
+
- **enforce-tier.mjs** (PreToolUse on Agent): Classifies tasks, recommends the correct model tier, detects duplicates, suggests cross-provider routing
|
|
42
42
|
- **cost-logger.mjs** (PostToolUse on all tools): Logs usage to daily rotated files for cost tracking
|
|
43
43
|
|
|
44
44
|
**Three tiers route work by complexity:**
|
|
@@ -49,7 +49,7 @@ npx -y dual-brain
|
|
|
49
49
|
| Execute | Sonnet | GPT-5.4 | edits, tests, git ops |
|
|
50
50
|
| Think | Opus | GPT-5.5 | architecture, review, planning |
|
|
51
51
|
|
|
52
|
-
**Dual-brain**
|
|
52
|
+
**Dual-brain** is recommended automatically for high-risk decisions — hooks detect the risk level and suggest dual-brain analysis, where both providers think on the same problem independently.
|
|
53
53
|
|
|
54
54
|
## Scripts
|
|
55
55
|
|
|
@@ -63,7 +63,7 @@ npx -y dual-brain
|
|
|
63
63
|
| `hooks/gpt-work-dispatcher.mjs` | Dispatch execution tasks to GPT via Codex CLI |
|
|
64
64
|
| `hooks/session-report.mjs` | Session-end summary: activity, compliance, quality |
|
|
65
65
|
| `hooks/health-check.mjs` | Verify all hooks and dependencies are working |
|
|
66
|
-
| `hooks/test-orchestrator.mjs` | Self-test harness (
|
|
66
|
+
| `hooks/test-orchestrator.mjs` | Self-test harness (29 tests) |
|
|
67
67
|
| `hooks/setup-wizard.mjs` | Interactive config (optional — for custom plans) |
|
|
68
68
|
| `hooks/install-git-hooks.mjs` | Git pre-commit hook for quality gate |
|
|
69
69
|
|
|
@@ -85,6 +85,20 @@ After install, edit these files:
|
|
|
85
85
|
- `review-rules.md` — project-specific rules for GPT code review
|
|
86
86
|
- `settings.json` — hook registrations (auto-generated, safe to extend)
|
|
87
87
|
|
|
88
|
+
## Profiles
|
|
89
|
+
|
|
90
|
+
The active profile controls routing posture, budgets, and quality gate behavior. Default: **auto**.
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
npx dual-brain mode cost-saver # switch profile
|
|
94
|
+
npx dual-brain status # check current profile and provider health
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
- **auto** (default): Adapts routing based on task risk, provider health, and outcomes. Auto-escalates tier on repeated failures.
|
|
98
|
+
- **balanced**: Best model per tier, normal budgets, reviews at medium+ risk.
|
|
99
|
+
- **cost-saver**: Prefer cheaper models, lower budgets, skip GPT for non-critical work.
|
|
100
|
+
- **quality-first**: Dual-brain for medium+ risk, higher budgets, stricter reviews.
|
|
101
|
+
|
|
88
102
|
## Requirements
|
|
89
103
|
|
|
90
104
|
- Node 20+
|
package/hooks/enforce-tier.mjs
CHANGED
|
@@ -10,6 +10,17 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
10
10
|
const CONFIG_FILE = resolve(__dirname, '..', 'orchestrator.json');
|
|
11
11
|
const PROFILE_FILE = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
12
12
|
const DRIFT_STATE = resolve(__dirname, '.drift-warned');
|
|
13
|
+
const BURST_FILE = resolve(__dirname, '.burst-state');
|
|
14
|
+
|
|
15
|
+
function detectBurst() {
|
|
16
|
+
const now = Date.now();
|
|
17
|
+
let state = { count: 0, window_start: now };
|
|
18
|
+
try { state = JSON.parse(readFileSync(BURST_FILE, 'utf8')); } catch {}
|
|
19
|
+
if (now - state.window_start > 90_000) state = { count: 0, window_start: now };
|
|
20
|
+
state.count++;
|
|
21
|
+
try { writeFileSync(BURST_FILE, JSON.stringify(state)); } catch {}
|
|
22
|
+
return state.count >= 3;
|
|
23
|
+
}
|
|
13
24
|
|
|
14
25
|
function loadProfile() {
|
|
15
26
|
try {
|
|
@@ -205,12 +216,23 @@ try {
|
|
|
205
216
|
// Compute prompt hash early for duplicate detection and logging
|
|
206
217
|
const promptHash = createHash('sha256').update(text).digest('hex').slice(0, 12);
|
|
207
218
|
|
|
219
|
+
// Burst detection — suppress noise during wave launches (3+ agents in 90s)
|
|
220
|
+
const burstMode = detectBurst();
|
|
221
|
+
|
|
208
222
|
// Check for duplicate agent dispatch before tier classification
|
|
209
223
|
const duplicate = checkDuplicate(promptHash);
|
|
210
224
|
let duplicateWarning = null;
|
|
211
225
|
if (duplicate) {
|
|
212
226
|
const minutesAgo = Math.round((Date.now() - Date.parse(duplicate.timestamp)) / 60000);
|
|
213
|
-
|
|
227
|
+
if (burstMode) {
|
|
228
|
+
// In burst mode, only warn on exact hash matches (same description+prompt)
|
|
229
|
+
if (duplicate.prompt_hash === promptHash) {
|
|
230
|
+
duplicateWarning = `**[Wave] [Duplicate Warning]** A similar agent task was dispatched ${minutesAgo} minute${minutesAgo !== 1 ? 's' : ''} ago. Reuse the prior result unless the scope changed.`;
|
|
231
|
+
}
|
|
232
|
+
// Otherwise suppress — similar-but-different agents in a wave are expected
|
|
233
|
+
} else {
|
|
234
|
+
duplicateWarning = `**[Duplicate Warning]** A similar agent task was dispatched ${minutesAgo} minute${minutesAgo !== 1 ? 's' : ''} ago. Reuse the prior result unless the scope changed.`;
|
|
235
|
+
}
|
|
214
236
|
}
|
|
215
237
|
|
|
216
238
|
let config;
|
|
@@ -315,7 +337,8 @@ try {
|
|
|
315
337
|
}
|
|
316
338
|
|
|
317
339
|
// Compute balance hint now that tier is resolved
|
|
318
|
-
|
|
340
|
+
// In burst mode, skip balance hints — one hint per wave is enough
|
|
341
|
+
if (!burstMode) {
|
|
319
342
|
const currentProvider = detectProvider(currentModel);
|
|
320
343
|
if (currentProvider === 'claude') {
|
|
321
344
|
const balance = quickPressureCheck(tier);
|
|
@@ -10,8 +10,10 @@
|
|
|
10
10
|
|
|
11
11
|
import { execSync, spawnSync } from 'child_process';
|
|
12
12
|
import {
|
|
13
|
+
appendFileSync,
|
|
13
14
|
existsSync,
|
|
14
15
|
readFileSync,
|
|
16
|
+
unlinkSync,
|
|
15
17
|
writeFileSync,
|
|
16
18
|
} from 'fs';
|
|
17
19
|
import { dirname, resolve } from 'path';
|
|
@@ -25,6 +27,7 @@ const COST_LOGGER = resolve(HOOKS, 'cost-logger.mjs');
|
|
|
25
27
|
const DUAL_BRAIN = resolve(HOOKS, 'dual-brain-review.mjs');
|
|
26
28
|
const ORCHESTRATOR = resolve(HOOKS, '..', 'orchestrator.json');
|
|
27
29
|
const USAGE_JSONL = resolve(HOOKS, `usage-${new Date().toISOString().slice(0, 10)}.jsonl`);
|
|
30
|
+
const BURST_FILE = resolve(HOOKS, '.burst-state');
|
|
28
31
|
|
|
29
32
|
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
30
33
|
|
|
@@ -337,6 +340,394 @@ test('failure-detector: ignores followed=false', () => {
|
|
|
337
340
|
return true;
|
|
338
341
|
});
|
|
339
342
|
|
|
343
|
+
// ─── Test 17: enforce-tier: malformed stdin ─────────────────────────────────
|
|
344
|
+
test('enforce-tier: malformed stdin', () => {
|
|
345
|
+
const { parsed, status } = run(ENFORCE_TIER, 'this is not json at all {{{');
|
|
346
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
347
|
+
if (!parsed) return 'no valid JSON output';
|
|
348
|
+
return true;
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
// ─── Test 18: enforce-tier: missing tool_input ──────────────────────────────
|
|
352
|
+
test('enforce-tier: missing tool_input', () => {
|
|
353
|
+
const payload = JSON.stringify({ tool_name: 'Agent' });
|
|
354
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
355
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
356
|
+
if (!parsed) return 'no valid JSON output';
|
|
357
|
+
return true;
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
// ─── Test 19: enforce-tier: non-Agent tool passthrough ──────────────────────
|
|
361
|
+
test('enforce-tier: non-Agent tool passthrough', () => {
|
|
362
|
+
const payload = JSON.stringify({ tool_name: 'Read', tool_input: { file_path: '/foo' } });
|
|
363
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
364
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
365
|
+
if (!parsed) return 'no valid JSON output';
|
|
366
|
+
if (Object.keys(parsed).length !== 0)
|
|
367
|
+
return `expected {}, got: ${JSON.stringify(parsed)}`;
|
|
368
|
+
return true;
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
// ─── Test 20: cost-logger: malformed stdin ──────────────────────────────────
|
|
372
|
+
test('cost-logger: malformed stdin', () => {
|
|
373
|
+
const { parsed, status } = runStream(COST_LOGGER, 'not json garbage >>>');
|
|
374
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
375
|
+
if (!parsed) return 'no valid JSON output';
|
|
376
|
+
return true;
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
// ─── Test 21: cost-logger: missing fields ───────────────────────────────────
|
|
380
|
+
test('cost-logger: missing fields', () => {
|
|
381
|
+
let linesBefore = 0;
|
|
382
|
+
if (existsSync(USAGE_JSONL)) {
|
|
383
|
+
linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const { parsed, status } = runStream(COST_LOGGER, '{}');
|
|
387
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
388
|
+
if (!parsed) return 'no valid JSON output';
|
|
389
|
+
|
|
390
|
+
if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
|
|
391
|
+
const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
|
|
392
|
+
if (lines.length <= linesBefore) return 'no new line was appended to daily usage log';
|
|
393
|
+
|
|
394
|
+
// Clean up the test line
|
|
395
|
+
try {
|
|
396
|
+
const kept = lines.slice(0, linesBefore).join('\n');
|
|
397
|
+
writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
|
|
398
|
+
} catch {}
|
|
399
|
+
|
|
400
|
+
return true;
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
// ─── Test 22: cost-logger: error status recorded ────────────────────────────
|
|
404
|
+
test('cost-logger: error status recorded', () => {
|
|
405
|
+
let linesBefore = 0;
|
|
406
|
+
if (existsSync(USAGE_JSONL)) {
|
|
407
|
+
linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
const payload = JSON.stringify({
|
|
411
|
+
tool_name: 'Agent',
|
|
412
|
+
tool_input: { prompt: 'test' },
|
|
413
|
+
error: 'something failed',
|
|
414
|
+
});
|
|
415
|
+
const { parsed, status } = runStream(COST_LOGGER, payload);
|
|
416
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
417
|
+
if (!parsed) return 'no valid JSON output';
|
|
418
|
+
|
|
419
|
+
if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
|
|
420
|
+
const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
|
|
421
|
+
if (lines.length <= linesBefore) return 'no new line was appended to daily usage log';
|
|
422
|
+
|
|
423
|
+
const lastLine = lines[lines.length - 1];
|
|
424
|
+
let entry;
|
|
425
|
+
try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
|
|
426
|
+
if (entry.status !== 'error') return `expected status "error", got: "${entry.status}"`;
|
|
427
|
+
|
|
428
|
+
// Clean up the test line
|
|
429
|
+
try {
|
|
430
|
+
const kept = lines.slice(0, linesBefore).join('\n');
|
|
431
|
+
writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
|
|
432
|
+
} catch {}
|
|
433
|
+
|
|
434
|
+
return true;
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
// ─── Test 23: enforce-tier: cost-saver demotes think ────────────────────────
|
|
438
|
+
test('enforce-tier: cost-saver demotes think', () => {
|
|
439
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
440
|
+
let originalProfile;
|
|
441
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
442
|
+
try {
|
|
443
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'cost-saver' }));
|
|
444
|
+
// "edit the README file" — execute-like text, no think words
|
|
445
|
+
// cost-saver's demote_think=true demotes think→execute when text lacks think words
|
|
446
|
+
const payload = JSON.stringify({
|
|
447
|
+
tool_name: 'Agent',
|
|
448
|
+
tool_input: { prompt: 'edit the README file', model: 'opus' },
|
|
449
|
+
});
|
|
450
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
451
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
452
|
+
if (!parsed) return 'no valid JSON output';
|
|
453
|
+
// With demote_think, the tier stays execute, so opus on execute work exits 0 with valid JSON
|
|
454
|
+
return true;
|
|
455
|
+
} finally {
|
|
456
|
+
if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
|
|
457
|
+
else try { unlinkSync(profileFile); } catch {}
|
|
458
|
+
}
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
// ─── Test 24: enforce-tier: quality-first promotes execute ──────────────────
|
|
462
|
+
test('enforce-tier: quality-first promotes execute', () => {
|
|
463
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
464
|
+
let originalProfile;
|
|
465
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
466
|
+
try {
|
|
467
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'quality-first' }));
|
|
468
|
+
// Think-like description on sonnet model — quality-first's promote_execute=true
|
|
469
|
+
// promotes to think when text matches think words
|
|
470
|
+
const payload = JSON.stringify({
|
|
471
|
+
tool_name: 'Agent',
|
|
472
|
+
tool_input: { prompt: 'review architecture and plan the migration', model: 'sonnet' },
|
|
473
|
+
});
|
|
474
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
475
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
476
|
+
if (!parsed) return 'no valid JSON output';
|
|
477
|
+
if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
|
|
478
|
+
if (!parsed.systemMessage.toLowerCase().includes('think'))
|
|
479
|
+
return `expected "think" in systemMessage, got: ${parsed.systemMessage}`;
|
|
480
|
+
return true;
|
|
481
|
+
} finally {
|
|
482
|
+
if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
|
|
483
|
+
else try { unlinkSync(profileFile); } catch {}
|
|
484
|
+
}
|
|
485
|
+
});
|
|
486
|
+
|
|
487
|
+
// ─── Test 25: enforce-tier: auto profile with high-risk file ────────────────
|
|
488
|
+
test('enforce-tier: auto profile with high-risk file', () => {
|
|
489
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
490
|
+
let originalProfile;
|
|
491
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
492
|
+
try {
|
|
493
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'auto' }));
|
|
494
|
+
// Description with auth/credentials path → risk classifier detects critical risk → promote to think
|
|
495
|
+
const payload = JSON.stringify({
|
|
496
|
+
tool_name: 'Agent',
|
|
497
|
+
tool_input: { description: 'update src/auth/credentials.mjs', prompt: 'change the token logic', model: 'sonnet' },
|
|
498
|
+
});
|
|
499
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
500
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
501
|
+
if (!parsed) return 'no valid JSON output';
|
|
502
|
+
if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
|
|
503
|
+
const msg = parsed.systemMessage.toLowerCase();
|
|
504
|
+
if (!msg.includes('think') && !msg.includes('dual-brain'))
|
|
505
|
+
return `expected "think" or "dual-brain" in systemMessage, got: ${parsed.systemMessage}`;
|
|
506
|
+
return true;
|
|
507
|
+
} finally {
|
|
508
|
+
// Always restore profile to auto so subsequent tests aren't affected
|
|
509
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'auto' }));
|
|
510
|
+
}
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
// ─── Test 26: adaptive: recordFailure writes to ledger ─────────────────────
|
|
514
|
+
test('adaptive: recordFailure writes to ledger', () => {
|
|
515
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
516
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
517
|
+
|
|
518
|
+
try {
|
|
519
|
+
const script = `
|
|
520
|
+
import { recordFailure } from './failure-detector.mjs';
|
|
521
|
+
recordFailure('testhash123', 'execute', 'test_error');
|
|
522
|
+
`;
|
|
523
|
+
const proc = spawnSync(process.execPath, [
|
|
524
|
+
'--input-type=module',
|
|
525
|
+
'-e', script,
|
|
526
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
527
|
+
|
|
528
|
+
if (proc.status !== 0) return `recordFailure script failed: ${proc.stderr}`;
|
|
529
|
+
if (!existsSync(LEDGER)) return 'ledger file not created';
|
|
530
|
+
|
|
531
|
+
const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
|
|
532
|
+
const lastLine = lines[lines.length - 1];
|
|
533
|
+
let entry;
|
|
534
|
+
try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
|
|
535
|
+
if (entry.prompt_hash !== 'testhash123') return `expected prompt_hash=testhash123, got: ${entry.prompt_hash}`;
|
|
536
|
+
if (entry.success !== false) return `expected success=false, got: ${entry.success}`;
|
|
537
|
+
return true;
|
|
538
|
+
} finally {
|
|
539
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
540
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
541
|
+
}
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
// ─── Test 27: adaptive: checkFailureLoop detects 2+ failures ───────────────
|
|
545
|
+
test('adaptive: checkFailureLoop detects 2+ failures', () => {
|
|
546
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
547
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
548
|
+
|
|
549
|
+
try {
|
|
550
|
+
const hash = 'looptest_' + Date.now();
|
|
551
|
+
const now = new Date().toISOString();
|
|
552
|
+
const failEntry = JSON.stringify({
|
|
553
|
+
type: 'failure', timestamp: now, prompt_hash: hash,
|
|
554
|
+
tier: 'execute', reason: 'test', success: false,
|
|
555
|
+
});
|
|
556
|
+
const content = (backup || '') + failEntry + '\n' + failEntry + '\n';
|
|
557
|
+
writeFileSync(LEDGER, content, 'utf8');
|
|
558
|
+
|
|
559
|
+
const script = `
|
|
560
|
+
import { checkFailureLoop } from './failure-detector.mjs';
|
|
561
|
+
const result = checkFailureLoop('${hash}');
|
|
562
|
+
process.stdout.write(JSON.stringify(result));
|
|
563
|
+
`;
|
|
564
|
+
const proc = spawnSync(process.execPath, [
|
|
565
|
+
'--input-type=module',
|
|
566
|
+
'-e', script,
|
|
567
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
568
|
+
|
|
569
|
+
if (proc.status !== 0) return `checkFailureLoop script failed: ${proc.stderr}`;
|
|
570
|
+
let result;
|
|
571
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
572
|
+
if (!result.isLoop) return `expected isLoop=true, got: ${JSON.stringify(result)}`;
|
|
573
|
+
if (result.count < 2) return `expected count>=2, got: ${result.count}`;
|
|
574
|
+
if (result.suggestion !== 'promote_tier' && result.suggestion !== 'escalate_to_dual_brain')
|
|
575
|
+
return `unexpected suggestion: ${result.suggestion}`;
|
|
576
|
+
return true;
|
|
577
|
+
} finally {
|
|
578
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
579
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
580
|
+
}
|
|
581
|
+
});
|
|
582
|
+
|
|
583
|
+
// ─── Test 28: adaptive: checkFailureLoop ignores old failures ──────────────
|
|
584
|
+
test('adaptive: checkFailureLoop ignores old failures', () => {
|
|
585
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
586
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
587
|
+
|
|
588
|
+
try {
|
|
589
|
+
const hash = 'oldtest_' + Date.now();
|
|
590
|
+
const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000).toISOString();
|
|
591
|
+
const oldEntry = JSON.stringify({
|
|
592
|
+
type: 'failure', timestamp: threeHoursAgo, prompt_hash: hash,
|
|
593
|
+
tier: 'execute', reason: 'old_test', success: false,
|
|
594
|
+
});
|
|
595
|
+
writeFileSync(LEDGER, oldEntry + '\n' + oldEntry + '\n', 'utf8');
|
|
596
|
+
|
|
597
|
+
const script = `
|
|
598
|
+
import { checkFailureLoop } from './failure-detector.mjs';
|
|
599
|
+
const result = checkFailureLoop('${hash}');
|
|
600
|
+
process.stdout.write(JSON.stringify(result));
|
|
601
|
+
`;
|
|
602
|
+
const proc = spawnSync(process.execPath, [
|
|
603
|
+
'--input-type=module',
|
|
604
|
+
'-e', script,
|
|
605
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
606
|
+
|
|
607
|
+
if (proc.status !== 0) return `checkFailureLoop script failed: ${proc.stderr}`;
|
|
608
|
+
let result;
|
|
609
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
610
|
+
if (result.isLoop) return `expected isLoop=false for old failures, got: ${JSON.stringify(result)}`;
|
|
611
|
+
return true;
|
|
612
|
+
} finally {
|
|
613
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
614
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
615
|
+
}
|
|
616
|
+
});
|
|
617
|
+
|
|
618
|
+
// ─── Test 29: adaptive: cost-logger records Agent errors ───────────────────
|
|
619
|
+
test('adaptive: cost-logger records Agent errors', () => {
|
|
620
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
621
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
622
|
+
|
|
623
|
+
try {
|
|
624
|
+
let linesBefore = 0;
|
|
625
|
+
if (existsSync(LEDGER)) {
|
|
626
|
+
linesBefore = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean).length;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
const payload = JSON.stringify({
|
|
630
|
+
tool_name: 'Agent',
|
|
631
|
+
tool_input: { prompt: 'failing task hash test' },
|
|
632
|
+
error: 'test failure',
|
|
633
|
+
});
|
|
634
|
+
const { status } = runStream(COST_LOGGER, payload);
|
|
635
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
636
|
+
|
|
637
|
+
if (!existsSync(LEDGER)) return 'ledger file not created';
|
|
638
|
+
const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
|
|
639
|
+
if (lines.length <= linesBefore) return 'no new failure entry appended to ledger';
|
|
640
|
+
|
|
641
|
+
const newEntry = lines[lines.length - 1];
|
|
642
|
+
let entry;
|
|
643
|
+
try { entry = JSON.parse(newEntry); } catch { return `last line not valid JSON: ${newEntry}`; }
|
|
644
|
+
if (entry.success !== false) return `expected success=false, got: ${entry.success}`;
|
|
645
|
+
if (entry.type !== 'failure') return `expected type=failure, got: ${entry.type}`;
|
|
646
|
+
return true;
|
|
647
|
+
} finally {
|
|
648
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
649
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
650
|
+
}
|
|
651
|
+
});
|
|
652
|
+
|
|
653
|
+
// ─── Test 30: enforce-tier: burst detection activates on 3+ agents ─────────
|
|
654
|
+
test('enforce-tier: burst detection activates on 3+ agents', () => {
|
|
655
|
+
try {
|
|
656
|
+
// Write burst state at count 2, within window
|
|
657
|
+
writeFileSync(BURST_FILE, JSON.stringify({ count: 2, window_start: Date.now() }));
|
|
658
|
+
const payload = JSON.stringify({
|
|
659
|
+
tool_name: 'Agent',
|
|
660
|
+
tool_input: { prompt: `burst activation test ${Date.now()}`, model: 'sonnet' },
|
|
661
|
+
});
|
|
662
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
663
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
664
|
+
if (!parsed) return 'no valid JSON output';
|
|
665
|
+
|
|
666
|
+
// Read burst state — count should have incremented to >= 3
|
|
667
|
+
if (!existsSync(BURST_FILE)) return '.burst-state file was removed unexpectedly';
|
|
668
|
+
let state;
|
|
669
|
+
try { state = JSON.parse(readFileSync(BURST_FILE, 'utf8')); } catch (e) { return `.burst-state not valid JSON: ${e.message}`; }
|
|
670
|
+
if (state.count < 3) return `expected count >= 3, got: ${state.count}`;
|
|
671
|
+
return true;
|
|
672
|
+
} finally {
|
|
673
|
+
try { unlinkSync(BURST_FILE); } catch {}
|
|
674
|
+
}
|
|
675
|
+
});
|
|
676
|
+
|
|
677
|
+
// ─── Test 31: enforce-tier: burst mode suppresses duplicate warnings ───────
|
|
678
|
+
test('enforce-tier: burst mode suppresses duplicate warnings', () => {
|
|
679
|
+
try {
|
|
680
|
+
// Pre-set burst mode (count=5, active window)
|
|
681
|
+
writeFileSync(BURST_FILE, JSON.stringify({ count: 5, window_start: Date.now() }));
|
|
682
|
+
const payload = JSON.stringify({
|
|
683
|
+
tool_name: 'Agent',
|
|
684
|
+
tool_input: { prompt: 'burst duplicate test identical prompt', model: 'sonnet' },
|
|
685
|
+
});
|
|
686
|
+
|
|
687
|
+
// First call — establishes the prompt hash
|
|
688
|
+
run(ENFORCE_TIER, payload);
|
|
689
|
+
// Second identical call — in burst mode, duplicate warning should be suppressed or [Wave]-prefixed
|
|
690
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
691
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
692
|
+
if (!parsed) return 'no valid JSON output';
|
|
693
|
+
|
|
694
|
+
// In burst mode: either no duplicate warning at all, or a [Wave]-prefixed one
|
|
695
|
+
const msg = parsed.systemMessage || '';
|
|
696
|
+
const hasDuplicateWarning = msg.toLowerCase().includes('duplicate');
|
|
697
|
+
if (hasDuplicateWarning && !msg.includes('[Wave]'))
|
|
698
|
+
return `expected no duplicate warning or [Wave]-prefixed in burst mode, got: ${msg}`;
|
|
699
|
+
return true;
|
|
700
|
+
} finally {
|
|
701
|
+
try { unlinkSync(BURST_FILE); } catch {}
|
|
702
|
+
}
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
// ─── Test 32: enforce-tier: non-burst mode still warns on duplicates ───────
|
|
706
|
+
test('enforce-tier: non-burst mode still warns on duplicates', () => {
|
|
707
|
+
try {
|
|
708
|
+
// Expire burst state by setting window_start to 0 (well outside 90s window)
|
|
709
|
+
writeFileSync(BURST_FILE, JSON.stringify({ count: 0, window_start: 0 }));
|
|
710
|
+
const payload = JSON.stringify({
|
|
711
|
+
tool_name: 'Agent',
|
|
712
|
+
tool_input: { prompt: 'non-burst duplicate test identical prompt', model: 'sonnet' },
|
|
713
|
+
});
|
|
714
|
+
|
|
715
|
+
// First call — establishes the prompt hash
|
|
716
|
+
run(ENFORCE_TIER, payload);
|
|
717
|
+
// Second identical call — should trigger duplicate warning
|
|
718
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
719
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
720
|
+
if (!parsed) return 'no valid JSON output';
|
|
721
|
+
|
|
722
|
+
const msg = parsed.systemMessage || '';
|
|
723
|
+
if (!msg.toLowerCase().includes('duplicate'))
|
|
724
|
+
return `expected duplicate warning in non-burst mode, got: ${msg || '(empty)'}`;
|
|
725
|
+
return true;
|
|
726
|
+
} finally {
|
|
727
|
+
try { unlinkSync(BURST_FILE); } catch {}
|
|
728
|
+
}
|
|
729
|
+
});
|
|
730
|
+
|
|
340
731
|
// ─── Summary ─────────────────────────────────────────────────────────────────
|
|
341
732
|
const total = passed + failed;
|
|
342
733
|
console.log(`\n${passed}/${total} tests passed`);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dual-brain",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.8.0",
|
|
4
4
|
"description": "Dual-provider orchestration for Claude Code — tiered routing, budget balancing, and GPT dual-brain review across Claude + OpenAI subscriptions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -22,6 +22,9 @@
|
|
|
22
22
|
"type": "git",
|
|
23
23
|
"url": "https://github.com/1xmint/dual-brain.git"
|
|
24
24
|
},
|
|
25
|
+
"scripts": {
|
|
26
|
+
"test": "node hooks/test-orchestrator.mjs"
|
|
27
|
+
},
|
|
25
28
|
"engines": {
|
|
26
29
|
"node": ">=20.0.0"
|
|
27
30
|
},
|