dual-brain 3.8.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -63,7 +63,7 @@ npx -y dual-brain
63
63
  | `hooks/gpt-work-dispatcher.mjs` | Dispatch execution tasks to GPT via Codex CLI |
64
64
  | `hooks/session-report.mjs` | Session-end summary: activity, compliance, quality |
65
65
  | `hooks/health-check.mjs` | Verify all hooks and dependencies are working |
66
- | `hooks/test-orchestrator.mjs` | Self-test harness (29 tests) |
66
+ | `hooks/test-orchestrator.mjs` | Self-test harness (39 tests) |
67
67
  | `hooks/setup-wizard.mjs` | Interactive config (optional — for custom plans) |
68
68
  | `hooks/install-git-hooks.mjs` | Git pre-commit hook for quality gate |
69
69
 
@@ -437,16 +437,40 @@ function showProfilePicker(rl) {
437
437
 
438
438
  rl.question(' Choice: ', (answer) => {
439
439
  const names = Object.keys(PROFILES);
440
- const idx = parseInt(answer, 10) - 1;
440
+ const trimmed = answer.trim();
441
+ let selectedName = null;
442
+
443
+ // Try numeric selection first
444
+ const idx = parseInt(trimmed, 10) - 1;
441
445
  if (idx >= 0 && idx < names.length) {
446
+ selectedName = names[idx];
447
+ }
448
+
449
+ // Try natural language alias resolution
450
+ if (!selectedName && trimmed && trimmed !== 'q') {
451
+ const PANEL_ALIASES = {
452
+ 'auto': 'auto', 'adaptive': 'auto', 'smart': 'auto', 'default': 'auto', 'normal': 'auto',
453
+ 'balanced': 'balanced', 'even': 'balanced', 'equal': 'balanced',
454
+ 'cost-saver': 'cost-saver', 'cheap': 'cost-saver', 'save': 'cost-saver', 'conservative': 'cost-saver', 'frugal': 'cost-saver', 'budget': 'cost-saver',
455
+ 'quality-first': 'quality-first', 'aggressive': 'quality-first', 'quality': 'quality-first', 'max': 'quality-first', 'full': 'quality-first', 'both': 'quality-first',
456
+ };
457
+ const cleaned = trimmed.toLowerCase()
458
+ .replace(/^(go|be|use|switch to|set|mode)\s+/i, '')
459
+ .replace(/\s+mode$/i, '');
460
+ selectedName = PANEL_ALIASES[cleaned] || null;
461
+ }
462
+
463
+ if (selectedName) {
442
464
  let customOverrides = null;
443
465
  try {
444
466
  const existing = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
445
467
  if (existing.custom_overrides?.budgets) customOverrides = { budgets: existing.custom_overrides.budgets };
446
468
  } catch {}
447
- saveProfile(names[idx], customOverrides);
448
- const pf = PROFILES[names[idx]];
469
+ saveProfile(selectedName, customOverrides);
470
+ const pf = PROFILES[selectedName];
449
471
  console.log(` ✅ Switched to ${pf.emoji} ${pf.uiLabel}`);
472
+ } else if (trimmed && trimmed !== 'q') {
473
+ console.log(` Unknown profile: ${trimmed}. Try: cheap, aggressive, quality, balanced, auto`);
450
474
  }
451
475
  resolve();
452
476
  });
@@ -8,7 +8,6 @@
8
8
  * Output contract: must print "{}" to stdout and exit 0 within ~100 ms.
9
9
  */
10
10
 
11
- import { createHash } from "crypto";
12
11
  import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from "fs";
13
12
  import { dirname, join } from "path";
14
13
  import { fileURLToPath } from "url";
@@ -265,9 +264,11 @@ async function main() {
265
264
  // Record failures for adaptive routing (failure-loop detection)
266
265
  if (status === 'error' && toolName === 'Agent') {
267
266
  try {
268
- const { recordFailure } = await import('./failure-detector.mjs');
269
- const promptHash = createHash('md5').update(JSON.stringify(toolInput)).digest('hex').slice(0, 12);
267
+ const { computePromptHash, recordFailure, pruneOldFailures } = await import('./failure-detector.mjs');
268
+ const promptHash = computePromptHash(toolInput);
270
269
  recordFailure(promptHash, tier, payload?.error || 'agent_error');
270
+ // Best-effort cleanup of stale failure entries (>24h old)
271
+ try { pruneOldFailures(); } catch {}
271
272
  } catch {}
272
273
  }
273
274
 
@@ -1,10 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
  import { readFileSync, writeFileSync, appendFileSync, renameSync } from 'fs';
3
- import { createHash } from 'crypto';
4
3
  import { dirname, resolve, join } from 'path';
5
4
  import { fileURLToPath } from 'url';
6
5
  import { classifyRisk, extractPaths } from './risk-classifier.mjs';
7
- import { checkFailureLoop, recordFailure } from './failure-detector.mjs';
6
+ import { computePromptHash, checkFailureLoop, recordFailure } from './failure-detector.mjs';
8
7
 
9
8
  const __dirname = dirname(fileURLToPath(import.meta.url));
10
9
  const CONFIG_FILE = resolve(__dirname, '..', 'orchestrator.json');
@@ -214,7 +213,7 @@ try {
214
213
  const currentModel = (ti.model || '').toLowerCase();
215
214
 
216
215
  // Compute prompt hash early for duplicate detection and logging
217
- const promptHash = createHash('sha256').update(text).digest('hex').slice(0, 12);
216
+ const promptHash = computePromptHash(ti);
218
217
 
219
218
  // Burst detection — suppress noise during wave launches (3+ agents in 90s)
220
219
  const burstMode = detectBurst();
@@ -3,22 +3,52 @@
3
3
  * failure-detector.mjs — Detects repeated failure loops for adaptive routing.
4
4
  *
5
5
  * Exports:
6
- * checkFailureLoop(promptHash) → { isLoop, count, suggestion }
6
+ * checkFailureLoop(promptHash, tier?) → { isLoop, count, weightedScore, suggestion }
7
7
  * recordFailure(promptHash, tier, reason) → void
8
+ * pruneOldFailures() → { pruned, remaining }
8
9
  */
9
10
 
10
- import { readFileSync, appendFileSync } from 'fs';
11
+ import { createHash } from 'crypto';
12
+ import { readFileSync, appendFileSync, writeFileSync, renameSync, unlinkSync } from 'fs';
11
13
  import { dirname, join } from 'path';
12
14
  import { fileURLToPath } from 'url';
13
15
 
16
+
14
17
  const __dirname = dirname(fileURLToPath(import.meta.url));
15
18
  const LEDGER_FILE = join(__dirname, 'decision-ledger.jsonl');
16
19
 
17
- function checkFailureLoop(promptHash) {
18
- if (!promptHash) return { isLoop: false, count: 0, suggestion: null };
20
+ /**
21
+ * Canonical prompt hash used by all hooks for failure-loop correlation.
22
+ * Both enforce-tier (PreToolUse) and cost-logger (PostToolUse) must use this
23
+ * same function so that recorded failures can be matched during escalation.
24
+ *
25
+ * @param {object} toolInput — the raw tool_input from the hook payload
26
+ * @returns {string} 12-char hex hash
27
+ */
28
+ function computePromptHash(toolInput) {
29
+ const text = (toolInput?.description || '') + (toolInput?.prompt || '');
30
+ return createHash('sha256').update(text).digest('hex').slice(0, 12);
31
+ }
32
+
33
+ /**
34
+ * Compute a decay weight based on failure age.
35
+ * 0-30 min → 1.0, 30-60 min → 0.5, 60-120 min → 0.25, >120 min → 0 (excluded by window)
36
+ */
37
+ function decayWeight(timestampMs, now) {
38
+ const ageMs = now - timestampMs;
39
+ const ageMin = ageMs / (60 * 1000);
40
+ if (ageMin <= 30) return 1.0;
41
+ if (ageMin <= 60) return 0.5;
42
+ return 0.25; // 60-120 min
43
+ }
44
+
45
+ function checkFailureLoop(promptHash, tier) {
46
+ if (!promptHash) return { isLoop: false, count: 0, weightedScore: 0, suggestion: null };
19
47
 
20
- const twoHoursAgo = Date.now() - 2 * 60 * 60 * 1000;
21
- let failures = 0;
48
+ const now = Date.now();
49
+ const twoHoursAgo = now - 2 * 60 * 60 * 1000;
50
+ let count = 0;
51
+ let weightedScore = 0;
22
52
  let lastTier = null;
23
53
 
24
54
  try {
@@ -27,22 +57,26 @@ function checkFailureLoop(promptHash) {
27
57
  try {
28
58
  const entry = JSON.parse(line);
29
59
  if (entry.prompt_hash !== promptHash) continue;
30
- if (Date.parse(entry.timestamp) < twoHoursAgo) continue;
31
- if (entry.success === false) {
32
- failures++;
33
- lastTier = entry.tier;
34
- }
60
+ const entryTime = Date.parse(entry.timestamp);
61
+ if (entryTime < twoHoursAgo) continue;
62
+ if (entry.success !== false) continue;
63
+ // If tier is provided, only count matching tiers
64
+ if (tier && entry.tier && entry.tier !== tier) continue;
65
+
66
+ count++;
67
+ weightedScore += decayWeight(entryTime, now);
68
+ lastTier = entry.tier;
35
69
  } catch {}
36
70
  }
37
71
  } catch {}
38
72
 
39
- if (failures < 2) return { isLoop: false, count: failures, suggestion: null };
73
+ if (weightedScore < 2.0) return { isLoop: false, count, weightedScore, suggestion: null };
40
74
 
41
75
  const suggestion = lastTier === 'execute'
42
76
  ? 'promote_tier'
43
77
  : 'escalate_to_dual_brain';
44
78
 
45
- return { isLoop: true, count: failures, suggestion };
79
+ return { isLoop: true, count, weightedScore, suggestion };
46
80
  }
47
81
 
48
82
  function recordFailure(promptHash, tier, reason) {
@@ -59,4 +93,46 @@ function recordFailure(promptHash, tier, reason) {
59
93
  } catch {}
60
94
  }
61
95
 
62
- export { checkFailureLoop, recordFailure };
96
+ /**
97
+ * Remove failure entries older than 24 hours from the ledger.
98
+ * Uses atomic write (tmp file + rename) to avoid corruption.
99
+ */
100
+ function pruneOldFailures() {
101
+ const twentyFourHoursAgo = Date.now() - 24 * 60 * 60 * 1000;
102
+ let pruned = 0;
103
+ let remaining = 0;
104
+ const kept = [];
105
+
106
+ try {
107
+ const lines = readFileSync(LEDGER_FILE, 'utf8').split('\n').filter(Boolean);
108
+ for (const line of lines) {
109
+ try {
110
+ const entry = JSON.parse(line);
111
+ const entryTime = Date.parse(entry.timestamp);
112
+ if (entry.type === 'failure' && entryTime < twentyFourHoursAgo) {
113
+ pruned++;
114
+ } else {
115
+ kept.push(line);
116
+ remaining++;
117
+ }
118
+ } catch {
119
+ // Keep unparseable lines to avoid data loss
120
+ kept.push(line);
121
+ remaining++;
122
+ }
123
+ }
124
+
125
+ const tmpFile = LEDGER_FILE + `.tmp.${process.pid}`;
126
+ writeFileSync(tmpFile, kept.length > 0 ? kept.join('\n') + '\n' : '');
127
+ renameSync(tmpFile, LEDGER_FILE);
128
+ } catch (err) {
129
+ if (err.code !== 'ENOENT') {
130
+ try { unlinkSync(LEDGER_FILE + `.tmp.${process.pid}`); } catch {}
131
+ }
132
+ return { pruned: 0, remaining: 0 };
133
+ }
134
+
135
+ return { pruned, remaining };
136
+ }
137
+
138
+ export { computePromptHash, checkFailureLoop, recordFailure, pruneOldFailures };
@@ -20,6 +20,25 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
20
20
  const PROFILE_FILE = join(__dirname, '..', 'dual-brain.profile.json');
21
21
  const CONFIG_FILE = join(__dirname, '..', 'orchestrator.json');
22
22
 
23
+ const ALIASES = {
24
+ // auto
25
+ 'auto': 'auto', 'adaptive': 'auto', 'smart': 'auto', 'default': 'auto', 'normal': 'auto',
26
+ // balanced
27
+ 'balanced': 'balanced', 'even': 'balanced', 'equal': 'balanced',
28
+ // cost-saver
29
+ 'cost-saver': 'cost-saver', 'cheap': 'cost-saver', 'save': 'cost-saver', 'conservative': 'cost-saver', 'frugal': 'cost-saver', 'budget': 'cost-saver',
30
+ // quality-first
31
+ 'quality-first': 'quality-first', 'aggressive': 'quality-first', 'quality': 'quality-first', 'max': 'quality-first', 'full': 'quality-first', 'both': 'quality-first',
32
+ };
33
+
34
+ function resolveProfileName(input) {
35
+ if (!input) return null;
36
+ const cleaned = input.toLowerCase().trim()
37
+ .replace(/^(go|be|use|switch to|set|mode)\s+/i, '')
38
+ .replace(/\s+mode$/i, '');
39
+ return ALIASES[cleaned] || null;
40
+ }
41
+
23
42
  const PROFILES = {
24
43
  auto: {
25
44
  description: 'Adapts routing based on task risk, provider health, and outcomes',
@@ -140,12 +159,22 @@ function getActiveProfile() {
140
159
  }
141
160
 
142
161
  function setActiveProfile(name, customOverrides = null) {
143
- if (!PROFILES[name]) {
144
- return { ok: false, error: `Unknown profile: ${name}. Available: ${Object.keys(PROFILES).join(', ')}` };
162
+ let resolved = name;
163
+ if (!PROFILES[resolved]) {
164
+ const alias = resolveProfileName(name);
165
+ if (alias) {
166
+ resolved = alias;
167
+ } else {
168
+ const aliasHint = Object.entries(ALIASES)
169
+ .filter(([k, v]) => k !== v)
170
+ .map(([k, v]) => `${k} → ${v}`)
171
+ .join(', ');
172
+ return { ok: false, error: `Unknown profile: ${name}. Available: ${Object.keys(PROFILES).join(', ')}. Aliases: ${aliasHint}` };
173
+ }
145
174
  }
146
175
 
147
176
  const data = {
148
- active: name,
177
+ active: resolved,
149
178
  switched_at: new Date().toISOString(),
150
179
  };
151
180
  if (customOverrides) data.custom_overrides = customOverrides;
@@ -154,7 +183,7 @@ function setActiveProfile(name, customOverrides = null) {
154
183
  const tmp = PROFILE_FILE + '.tmp.' + process.pid;
155
184
  writeFileSync(tmp, JSON.stringify(data, null, 2) + '\n');
156
185
  renameSync(tmp, PROFILE_FILE);
157
- return { ok: true, profile: PROFILES[name] };
186
+ return { ok: true, profile: PROFILES[resolved], resolvedName: resolved };
158
187
  } catch (err) {
159
188
  return { ok: false, error: `Failed to write profile: ${err.message}` };
160
189
  }
@@ -216,6 +245,8 @@ function getProfileOverrides(system) {
216
245
 
217
246
  export {
218
247
  PROFILES,
248
+ ALIASES,
249
+ resolveProfileName,
219
250
  getActiveProfile,
220
251
  setActiveProfile,
221
252
  setBudgetOverrides,
@@ -336,7 +336,7 @@ test('profiles: consistent across modules', () => {
336
336
  test('failure-detector: ignores followed=false', () => {
337
337
  const src = readFileSync(resolve(__dirname, 'failure-detector.mjs'), 'utf8');
338
338
  if (src.includes('followed === false')) return 'still conflates followed=false with failure';
339
- if (!src.includes('success === false')) return 'missing success===false check';
339
+ if (!src.includes('success === false') && !src.includes('success !== false')) return 'missing success check';
340
340
  return true;
341
341
  });
342
342
 
@@ -728,6 +728,349 @@ test('enforce-tier: non-burst mode still warns on duplicates', () => {
728
728
  }
729
729
  });
730
730
 
731
+ // ─── Test 33: install preserves existing hooks ─────────────────────────────
732
+ test('install: preserves existing hooks', () => {
733
+ const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
734
+
735
+ // install.mjs must define DUAL_BRAIN_CMDS to identify its own hooks
736
+ if (!installSrc.includes('DUAL_BRAIN_CMDS'))
737
+ return 'install.mjs missing DUAL_BRAIN_CMDS constant for filtering';
738
+
739
+ // It must filter out only dual-brain hooks (not all hooks) before merging
740
+ if (!installSrc.includes('.filter'))
741
+ return 'install.mjs missing .filter() call — may clobber non-dual-brain hooks';
742
+
743
+ // The merge logic should spread existingEntries first, then add dual-brain hooks
744
+ if (!installSrc.includes('existingEntries'))
745
+ return 'install.mjs missing existingEntries variable — may not preserve other hooks';
746
+
747
+ // Verify it reads existing settings before overwriting
748
+ if (!installSrc.includes('existing') || !installSrc.includes('settings.json'))
749
+ return 'install.mjs does not read existing settings.json before writing';
750
+
751
+ return true;
752
+ });
753
+
754
+ // ─── Test 34: gitignore entries don't conflict with data-tools ─────────────
755
+ test('install: gitignore entries scoped to dual-brain', () => {
756
+ const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
757
+
758
+ // Extract the generateGitignoreEntries function body
759
+ const fnMatch = installSrc.match(/generateGitignoreEntries[\s\S]*?const entries\s*=\s*\[([\s\S]*?)\]/);
760
+ if (!fnMatch) return 'could not find generateGitignoreEntries entries array';
761
+
762
+ const entriesBlock = fnMatch[1];
763
+
764
+ // Extract individual entry strings
765
+ const entryStrings = [...entriesBlock.matchAll(/'([^']+)'/g)].map(m => m[1]);
766
+ if (entryStrings.length === 0) return 'no gitignore entries found in install.mjs';
767
+
768
+ // Each entry must be scoped — no broad patterns like *.json, *.jsonl, .claude/hooks/
769
+ const broadPatterns = ['*.json', '*.jsonl', '*.mjs', '.claude/', '.claude/hooks/'];
770
+ for (const entry of entryStrings) {
771
+ for (const bad of broadPatterns) {
772
+ if (entry === bad)
773
+ return `gitignore entry "${entry}" is too broad — could match data-tools files`;
774
+ }
775
+ }
776
+
777
+ // Each entry should reference dual-brain-specific names
778
+ const validScopes = ['dual-brain', 'usage-', 'usage.jsonl', 'decision-ledger', 'drift-warned', 'budget-alerted', 'summary-', 'reviews/', '.launched'];
779
+ for (const entry of entryStrings) {
780
+ const isScoped = validScopes.some(scope => entry.includes(scope));
781
+ if (!isScoped)
782
+ return `gitignore entry "${entry}" may not be scoped to dual-brain files`;
783
+ }
784
+
785
+ return true;
786
+ });
787
+
788
+ // ─── Test 35: hooks use isolated file paths ────────────────────────────────
789
+ test('hooks: output files use dual-brain-namespaced paths', () => {
790
+ const validNames = ['dual-brain', 'usage-', 'usage.jsonl', 'decision-ledger', 'summary-checkpoint', '.drift-warned', '.burst-state', '.budget-alerted', 'orchestrator.json', '.launched'];
791
+
792
+ const hookFiles = {
793
+ 'enforce-tier.mjs': ['DRIFT_STATE', 'BURST_FILE', 'PROFILE_FILE'],
794
+ 'cost-logger.mjs': ['usage-', 'PROFILE_FILE'],
795
+ 'summary-checkpoint.mjs': ['usage-summary-', 'usage-'],
796
+ };
797
+
798
+ for (const [hookFile, expectedRefs] of Object.entries(hookFiles)) {
799
+ const src = readFileSync(resolve(__dirname, hookFile), 'utf8');
800
+
801
+ // Find all file paths the hook writes to (writeFileSync / appendFileSync targets)
802
+ const writeTargets = [...src.matchAll(/(?:writeFileSync|appendFileSync|renameSync)\(\s*([^,)]+)/g)].map(m => m[1].trim());
803
+
804
+ if (writeTargets.length === 0) return `${hookFile}: no write targets found`;
805
+
806
+ // Verify none of the write targets use generic names
807
+ // They should resolve to variables defined with dual-brain-specific names
808
+ const genericNames = ['config.json', 'state.json', 'log.jsonl', 'data.json', 'output.json'];
809
+ for (const target of writeTargets) {
810
+ for (const bad of genericNames) {
811
+ if (target.includes(`'${bad}'`) || target.includes(`"${bad}"`))
812
+ return `${hookFile}: writes to generic filename "${bad}" — could collide with other tools`;
813
+ }
814
+ }
815
+ }
816
+
817
+ // Verify the actual file path constants in enforce-tier use dual-brain-scoped names
818
+ const enforceSrc = readFileSync(resolve(__dirname, 'enforce-tier.mjs'), 'utf8');
819
+ if (!enforceSrc.includes('dual-brain.profile.json'))
820
+ return 'enforce-tier.mjs PROFILE_FILE does not reference dual-brain namespace';
821
+ if (!enforceSrc.includes('.drift-warned'))
822
+ return 'enforce-tier.mjs DRIFT_STATE does not use scoped filename';
823
+ if (!enforceSrc.includes('.burst-state'))
824
+ return 'enforce-tier.mjs BURST_FILE does not use scoped filename';
825
+
826
+ // Verify cost-logger writes to usage-dated files, not generic names
827
+ const costSrc = readFileSync(resolve(__dirname, 'cost-logger.mjs'), 'utf8');
828
+ if (!costSrc.includes('usage-'))
829
+ return 'cost-logger.mjs does not write to usage-prefixed files';
830
+ if (!costSrc.includes('dual-brain.profile.json'))
831
+ return 'cost-logger.mjs PROFILE_FILE does not reference dual-brain namespace';
832
+
833
+ return true;
834
+ });
835
+
836
+ // ─── Test 36: failure decay weights recent failures higher ─────────────────
837
+ test('failure decay: recent failures score high', () => {
838
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
839
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
840
+
841
+ try {
842
+ const hash = 'decay_recent_' + Date.now();
843
+ const fiveMinAgo = new Date(Date.now() - 5 * 60 * 1000).toISOString();
844
+ const entry = JSON.stringify({
845
+ type: 'failure', timestamp: fiveMinAgo, prompt_hash: hash,
846
+ tier: 'execute', reason: 'test_decay', success: false,
847
+ });
848
+ writeFileSync(LEDGER, entry + '\n' + entry + '\n', 'utf8');
849
+
850
+ const script = `
851
+ import { checkFailureLoop } from './failure-detector.mjs';
852
+ const result = checkFailureLoop('${hash}');
853
+ process.stdout.write(JSON.stringify(result));
854
+ `;
855
+ const proc = spawnSync(process.execPath, [
856
+ '--input-type=module',
857
+ '-e', script,
858
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
859
+
860
+ if (proc.status !== 0) return `script failed: ${proc.stderr}`;
861
+ let result;
862
+ try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
863
+ if (!result.isLoop) return `expected isLoop=true for recent failures, got: ${JSON.stringify(result)}`;
864
+ if (typeof result.weightedScore !== 'number' || result.weightedScore < 2.0)
865
+ return `expected weightedScore >= 2.0, got: ${result.weightedScore}`;
866
+ return true;
867
+ } finally {
868
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
869
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
870
+ }
871
+ });
872
+
873
+ // ─── Test 37: failure decay reduces old failure weight ─────────────────────
874
+ test('failure decay: old failures score low', () => {
875
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
876
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
877
+
878
+ try {
879
+ const hash = 'decay_old_' + Date.now();
880
+ const ninetyMinAgo = new Date(Date.now() - 90 * 60 * 1000).toISOString();
881
+ const entry = JSON.stringify({
882
+ type: 'failure', timestamp: ninetyMinAgo, prompt_hash: hash,
883
+ tier: 'execute', reason: 'test_decay_old', success: false,
884
+ });
885
+ writeFileSync(LEDGER, entry + '\n' + entry + '\n', 'utf8');
886
+
887
+ const script = `
888
+ import { checkFailureLoop } from './failure-detector.mjs';
889
+ const result = checkFailureLoop('${hash}');
890
+ process.stdout.write(JSON.stringify(result));
891
+ `;
892
+ const proc = spawnSync(process.execPath, [
893
+ '--input-type=module',
894
+ '-e', script,
895
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
896
+
897
+ if (proc.status !== 0) return `script failed: ${proc.stderr}`;
898
+ let result;
899
+ try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
900
+ if (result.isLoop) return `expected isLoop=false for old failures (weightedScore should be ~0.5), got: ${JSON.stringify(result)}`;
901
+ if (typeof result.weightedScore !== 'number')
902
+ return `expected weightedScore in result, got: ${JSON.stringify(result)}`;
903
+ if (result.weightedScore >= 2.0)
904
+ return `expected weightedScore < 2.0 for 90-min-old failures, got: ${result.weightedScore}`;
905
+ return true;
906
+ } finally {
907
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
908
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
909
+ }
910
+ });
911
+
912
+ // ─── Test 38: failure scoping by tier ──────────────────────────────────────
913
+ test('failure decay: scoping by tier', () => {
914
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
915
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
916
+
917
+ try {
918
+ const hash = 'tier_scope_' + Date.now();
919
+ const now = new Date().toISOString();
920
+ const mkEntry = (tier) => JSON.stringify({
921
+ type: 'failure', timestamp: now, prompt_hash: hash,
922
+ tier, reason: 'test_tier_scope', success: false,
923
+ });
924
+ const content = [
925
+ mkEntry('execute'), mkEntry('execute'),
926
+ mkEntry('search'), mkEntry('search'),
927
+ ].join('\n') + '\n';
928
+ writeFileSync(LEDGER, content, 'utf8');
929
+
930
+ const checkTier = (tier) => {
931
+ const script = `
932
+ import { checkFailureLoop } from './failure-detector.mjs';
933
+ const result = checkFailureLoop('${hash}', '${tier}');
934
+ process.stdout.write(JSON.stringify(result));
935
+ `;
936
+ const proc = spawnSync(process.execPath, [
937
+ '--input-type=module',
938
+ '-e', script,
939
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
940
+ if (proc.status !== 0) return { error: `script failed for tier=${tier}: ${proc.stderr}` };
941
+ try { return JSON.parse(proc.stdout.trim()); } catch { return { error: `output not JSON for tier=${tier}: ${proc.stdout}` }; }
942
+ };
943
+
944
+ const execResult = checkTier('execute');
945
+ if (execResult.error) return execResult.error;
946
+ if (!execResult.isLoop) return `expected isLoop=true for execute tier, got: ${JSON.stringify(execResult)}`;
947
+
948
+ const searchResult = checkTier('search');
949
+ if (searchResult.error) return searchResult.error;
950
+ if (!searchResult.isLoop) return `expected isLoop=true for search tier, got: ${JSON.stringify(searchResult)}`;
951
+
952
+ const thinkResult = checkTier('think');
953
+ if (thinkResult.error) return thinkResult.error;
954
+ if (thinkResult.isLoop) return `expected isLoop=false for think tier (no think failures), got: ${JSON.stringify(thinkResult)}`;
955
+
956
+ return true;
957
+ } finally {
958
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
959
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
960
+ }
961
+ });
962
+
963
+ // ─── Test 39: pruneOldFailures removes stale entries ───────────────────────
964
+ test('failure decay: pruneOldFailures removes stale entries', () => {
965
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
966
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
967
+
968
+ try {
969
+ const twentyFiveHoursAgo = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString();
970
+ const oneHourAgo = new Date(Date.now() - 1 * 60 * 60 * 1000).toISOString();
971
+ const staleEntry = JSON.stringify({
972
+ type: 'failure', timestamp: twentyFiveHoursAgo, prompt_hash: 'stale',
973
+ tier: 'execute', reason: 'old', success: false,
974
+ });
975
+ const recentEntry = JSON.stringify({
976
+ type: 'failure', timestamp: oneHourAgo, prompt_hash: 'recent',
977
+ tier: 'execute', reason: 'new', success: false,
978
+ });
979
+ const content = [staleEntry, staleEntry, recentEntry, recentEntry].join('\n') + '\n';
980
+ writeFileSync(LEDGER, content, 'utf8');
981
+
982
+ const script = `
983
+ import { pruneOldFailures } from './failure-detector.mjs';
984
+ pruneOldFailures();
985
+ `;
986
+ const proc = spawnSync(process.execPath, [
987
+ '--input-type=module',
988
+ '-e', script,
989
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
990
+
991
+ if (proc.status !== 0) return `pruneOldFailures script failed: ${proc.stderr}`;
992
+ if (!existsSync(LEDGER)) return 'ledger file was deleted instead of pruned';
993
+
994
+ const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
995
+ if (lines.length !== 2) return `expected 2 entries after prune, got: ${lines.length}`;
996
+
997
+ for (const line of lines) {
998
+ let entry;
999
+ try { entry = JSON.parse(line); } catch { return `pruned ledger has invalid JSON: ${line}`; }
1000
+ if (entry.prompt_hash !== 'recent')
1001
+ return `expected only recent entries to remain, found prompt_hash=${entry.prompt_hash}`;
1002
+ }
1003
+ return true;
1004
+ } finally {
1005
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
1006
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
1007
+ }
1008
+ });
1009
+
1010
+ // ─── Test 40: adaptive loop end-to-end hash match ─────────────────────────
1011
+ test('adaptive loop: end-to-end hash match', () => {
1012
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
1013
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
1014
+
1015
+ try {
1016
+ // Start with a clean ledger so prior failures don't interfere
1017
+ writeFileSync(LEDGER, '', 'utf8');
1018
+
1019
+ // Step 1: Define a specific Agent payload used consistently across all steps
1020
+ const toolInput = { prompt: 'fix the auth bug', description: 'patch auth module' };
1021
+ const agentPayload = JSON.stringify({ tool_name: 'Agent', tool_input: toolInput });
1022
+
1023
+ // Step 2: Run enforce-tier with this payload (computes and may log a promptHash)
1024
+ const firstRun = run(ENFORCE_TIER, agentPayload);
1025
+ if (firstRun.status !== 0) return `first enforce-tier run failed with status: ${firstRun.status}`;
1026
+ if (!firstRun.parsed) return `first enforce-tier run produced no valid JSON`;
1027
+
1028
+ // Step 3: Simulate 2 failures via cost-logger with the SAME tool_input
1029
+ const errorPayload = JSON.stringify({
1030
+ tool_name: 'Agent',
1031
+ tool_input: toolInput,
1032
+ error: 'test failure',
1033
+ });
1034
+
1035
+ const fail1 = runStream(COST_LOGGER, errorPayload);
1036
+ if (fail1.status !== 0) return `first cost-logger failure run failed with status: ${fail1.status}`;
1037
+
1038
+ const fail2 = runStream(COST_LOGGER, errorPayload);
1039
+ if (fail2.status !== 0) return `second cost-logger failure run failed with status: ${fail2.status}`;
1040
+
1041
+ // Verify cost-logger actually wrote failure entries to the ledger
1042
+ if (!existsSync(LEDGER)) return 'ledger file not created after cost-logger failures';
1043
+ const ledgerLines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
1044
+ const failureEntries = ledgerLines
1045
+ .map(l => { try { return JSON.parse(l); } catch { return null; } })
1046
+ .filter(e => e && e.type === 'failure' && e.success === false);
1047
+ if (failureEntries.length < 2)
1048
+ return `expected >= 2 failure entries in ledger, got: ${failureEntries.length}`;
1049
+
1050
+ // Step 4: Run enforce-tier again with the same Agent payload
1051
+ const secondRun = run(ENFORCE_TIER, agentPayload);
1052
+ if (secondRun.status !== 0) return `second enforce-tier run failed with status: ${secondRun.status}`;
1053
+ if (!secondRun.parsed) return `second enforce-tier run produced no valid JSON`;
1054
+
1055
+ // Step 5: The second enforce-tier run should detect the failure loop
1056
+ // and mention escalation or failure loop in its systemMessage
1057
+ const msg = (secondRun.parsed.systemMessage || '').toLowerCase();
1058
+ if (!msg.includes('failure') && !msg.includes('escalat') && !msg.includes('loop') && !msg.includes('dual-brain'))
1059
+ return `expected failure loop / escalation in second enforce-tier systemMessage, got: "${secondRun.parsed.systemMessage || '(empty)'}"`;
1060
+
1061
+ // Bonus: verify the hashes match — the failure entries recorded by cost-logger
1062
+ // should have the same prompt_hash that enforce-tier uses for checkFailureLoop
1063
+ const failureHashes = [...new Set(failureEntries.map(e => e.prompt_hash))];
1064
+ if (failureHashes.length !== 1)
1065
+ return `expected all failure entries to share one hash, got ${failureHashes.length} distinct hashes: ${failureHashes.join(', ')}`;
1066
+
1067
+ return true;
1068
+ } finally {
1069
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
1070
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
1071
+ }
1072
+ });
1073
+
731
1074
  // ─── Summary ─────────────────────────────────────────────────────────────────
732
1075
  const total = passed + failed;
733
1076
  console.log(`\n${passed}/${total} tests passed`);
package/install.mjs CHANGED
@@ -58,7 +58,8 @@ if (flag('--help') || flag('-h')) {
58
58
  --help Show this help
59
59
 
60
60
  🎛️ Routing modes:
61
- ⚖️ Default Auto-routes, uses both providers evenly
61
+ 🤖 Auto (default) Adapts routing based on risk, health, outcomes
62
+ ⚖️ Balanced Auto-routes, uses both providers evenly
62
63
  🛡️ Conservative Fewer GPT dispatches, sticks to Claude
63
64
  🚀 Aggressive Maximizes both subscriptions, dual-brain for medium+
64
65
 
@@ -453,7 +454,7 @@ const PROFILES = {
453
454
  function loadProfile(workspace) {
454
455
  try {
455
456
  const data = JSON.parse(readFileSync(profilePath(workspace), 'utf8'));
456
- const name = data.active && PROFILES[data.active] ? data.active : 'balanced';
457
+ const name = data.active && PROFILES[data.active] ? data.active : 'auto';
457
458
  const profile = PROFILES[name];
458
459
  const custom = data.custom_overrides || {};
459
460
  return {
@@ -464,7 +465,7 @@ function loadProfile(workspace) {
464
465
  switched_at: data.switched_at || null,
465
466
  };
466
467
  } catch {
467
- return { name: 'balanced', ...PROFILES.balanced, switched_at: null };
468
+ return { name: 'auto', ...PROFILES.auto, switched_at: null };
468
469
  }
469
470
  }
470
471
 
@@ -497,8 +498,8 @@ function cmdMode() {
497
498
 
498
499
  if (!modeArg || modeArg === 'list') {
499
500
  const current = loadProfile(workspace);
500
- const PEMOJIS = { balanced: '⚖️ ', 'cost-saver': '🛡️', 'quality-first': '🚀' };
501
- const UI_NAMES = { balanced: 'Default', 'cost-saver': 'Conservative', 'quality-first': 'Aggressive' };
501
+ const PEMOJIS = { auto: '🤖', balanced: '⚖️ ', 'cost-saver': '🛡️', 'quality-first': '🚀' };
502
+ const UI_NAMES = { auto: 'Auto (default)', balanced: 'Balanced', 'cost-saver': 'Conservative', 'quality-first': 'Aggressive' };
502
503
  console.log('');
503
504
  console.log(' 🎛️ Routing modes:');
504
505
  console.log('');
@@ -513,13 +514,28 @@ function cmdMode() {
513
514
  return;
514
515
  }
515
516
 
516
- if (!PROFILES[modeArg]) {
517
- console.error(` Unknown profile: ${modeArg}`);
518
- console.error(` Available: ${Object.keys(PROFILES).join(', ')}`);
519
- process.exit(1);
517
+ let resolvedMode = modeArg;
518
+ if (!PROFILES[resolvedMode]) {
519
+ // Try natural language alias resolution
520
+ const cleaned = resolvedMode.toLowerCase().trim()
521
+ .replace(/^(go|be|use|switch to|set|mode)\s+/i, '')
522
+ .replace(/\s+mode$/i, '');
523
+ const MODE_ALIASES = {
524
+ 'auto': 'auto', 'adaptive': 'auto', 'smart': 'auto', 'default': 'auto', 'normal': 'auto',
525
+ 'balanced': 'balanced', 'even': 'balanced', 'equal': 'balanced',
526
+ 'cost-saver': 'cost-saver', 'cheap': 'cost-saver', 'save': 'cost-saver', 'conservative': 'cost-saver', 'frugal': 'cost-saver', 'budget': 'cost-saver',
527
+ 'quality-first': 'quality-first', 'aggressive': 'quality-first', 'quality': 'quality-first', 'max': 'quality-first', 'full': 'quality-first', 'both': 'quality-first',
528
+ };
529
+ resolvedMode = MODE_ALIASES[cleaned] || null;
530
+ if (!resolvedMode) {
531
+ console.error(` Unknown profile: ${modeArg}`);
532
+ console.error(` Available: ${Object.keys(PROFILES).join(', ')}`);
533
+ console.error(` Aliases: cheap, aggressive, quality, budget, frugal, smart, adaptive, ...`);
534
+ process.exit(1);
535
+ }
520
536
  }
521
537
 
522
- const profile = PROFILES[modeArg];
538
+ const profile = PROFILES[resolvedMode];
523
539
 
524
540
  let customOverrides = null;
525
541
  try {
@@ -529,12 +545,12 @@ function cmdMode() {
529
545
  }
530
546
  } catch {}
531
547
 
532
- saveProfile(workspace, modeArg, customOverrides);
548
+ saveProfile(workspace, resolvedMode, customOverrides);
533
549
 
534
- const PEMOJIS = { balanced: '⚖️ ', 'cost-saver': '🛡️', 'quality-first': '🚀' };
535
- const UI_NAMES = { balanced: 'Default', 'cost-saver': 'Conservative', 'quality-first': 'Aggressive' };
550
+ const PEMOJIS = { auto: '🤖', balanced: '⚖️ ', 'cost-saver': '🛡️', 'quality-first': '🚀' };
551
+ const UI_NAMES = { auto: 'Auto (default)', balanced: 'Balanced', 'cost-saver': 'Conservative', 'quality-first': 'Aggressive' };
536
552
  console.log('');
537
- console.log(` ✅ Mode switched: ${PEMOJIS[modeArg] || ''} ${UI_NAMES[modeArg] || modeArg}`);
553
+ console.log(` ✅ Mode switched: ${PEMOJIS[resolvedMode] || ''} ${UI_NAMES[resolvedMode] || resolvedMode}`);
538
554
  console.log(` ${profile.description}`);
539
555
  console.log('');
540
556
  console.log(' 🧭 Routing changes:');
@@ -586,7 +602,7 @@ function cmdBudget() {
586
602
  };
587
603
 
588
604
  const data = {
589
- active: existing.active || 'balanced',
605
+ active: existing.active || 'auto',
590
606
  switched_at: existing.switched_at || new Date().toISOString(),
591
607
  custom_overrides: customOverrides,
592
608
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dual-brain",
3
- "version": "3.8.0",
3
+ "version": "3.9.0",
4
4
  "description": "Dual-provider orchestration for Claude Code — tiered routing, budget balancing, and GPT dual-brain review across Claude + OpenAI subscriptions",
5
5
  "type": "module",
6
6
  "bin": {