dual-brain 3.8.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -265,9 +265,11 @@ async function main() {
265
265
  // Record failures for adaptive routing (failure-loop detection)
266
266
  if (status === 'error' && toolName === 'Agent') {
267
267
  try {
268
- const { recordFailure } = await import('./failure-detector.mjs');
268
+ const { recordFailure, pruneOldFailures } = await import('./failure-detector.mjs');
269
269
  const promptHash = createHash('md5').update(JSON.stringify(toolInput)).digest('hex').slice(0, 12);
270
270
  recordFailure(promptHash, tier, payload?.error || 'agent_error');
271
+ // Best-effort cleanup of stale failure entries (>24h old)
272
+ try { pruneOldFailures(); } catch {}
271
273
  } catch {}
272
274
  }
273
275
 
@@ -316,7 +316,7 @@ try {
316
316
  }
317
317
 
318
318
  // Failure loop detection
319
- const failureCheck = checkFailureLoop(promptHash);
319
+ const failureCheck = checkFailureLoop(promptHash, tier);
320
320
  let failureMessage = null;
321
321
  if (failureCheck.isLoop) {
322
322
  if (failureCheck.suggestion === 'promote_tier' && tier === 'execute') {
@@ -3,22 +3,38 @@
3
3
  * failure-detector.mjs — Detects repeated failure loops for adaptive routing.
4
4
  *
5
5
  * Exports:
6
- * checkFailureLoop(promptHash) → { isLoop, count, suggestion }
6
+ * checkFailureLoop(promptHash, tier?) → { isLoop, count, weightedScore, suggestion }
7
7
  * recordFailure(promptHash, tier, reason) → void
8
+ * pruneOldFailures() → { pruned, remaining }
8
9
  */
9
10
 
10
- import { readFileSync, appendFileSync } from 'fs';
11
+ import { readFileSync, appendFileSync, writeFileSync, renameSync, unlinkSync } from 'fs';
11
12
  import { dirname, join } from 'path';
12
13
  import { fileURLToPath } from 'url';
13
14
 
15
+
14
16
  const __dirname = dirname(fileURLToPath(import.meta.url));
15
17
  const LEDGER_FILE = join(__dirname, 'decision-ledger.jsonl');
16
18
 
17
- function checkFailureLoop(promptHash) {
18
- if (!promptHash) return { isLoop: false, count: 0, suggestion: null };
19
+ /**
20
+ * Compute a decay weight based on failure age.
21
+ * 0-30 min → 1.0, 30-60 min → 0.5, 60-120 min → 0.25, >120 min → 0 (excluded by window)
22
+ */
23
+ function decayWeight(timestampMs, now) {
24
+ const ageMs = now - timestampMs;
25
+ const ageMin = ageMs / (60 * 1000);
26
+ if (ageMin <= 30) return 1.0;
27
+ if (ageMin <= 60) return 0.5;
28
+ return 0.25; // 60-120 min
29
+ }
30
+
31
+ function checkFailureLoop(promptHash, tier) {
32
+ if (!promptHash) return { isLoop: false, count: 0, weightedScore: 0, suggestion: null };
19
33
 
20
- const twoHoursAgo = Date.now() - 2 * 60 * 60 * 1000;
21
- let failures = 0;
34
+ const now = Date.now();
35
+ const twoHoursAgo = now - 2 * 60 * 60 * 1000;
36
+ let count = 0;
37
+ let weightedScore = 0;
22
38
  let lastTier = null;
23
39
 
24
40
  try {
@@ -27,22 +43,26 @@ function checkFailureLoop(promptHash) {
27
43
  try {
28
44
  const entry = JSON.parse(line);
29
45
  if (entry.prompt_hash !== promptHash) continue;
30
- if (Date.parse(entry.timestamp) < twoHoursAgo) continue;
31
- if (entry.success === false) {
32
- failures++;
33
- lastTier = entry.tier;
34
- }
46
+ const entryTime = Date.parse(entry.timestamp);
47
+ if (entryTime < twoHoursAgo) continue;
48
+ if (entry.success !== false) continue;
49
+ // If tier is provided, only count matching tiers
50
+ if (tier && entry.tier && entry.tier !== tier) continue;
51
+
52
+ count++;
53
+ weightedScore += decayWeight(entryTime, now);
54
+ lastTier = entry.tier;
35
55
  } catch {}
36
56
  }
37
57
  } catch {}
38
58
 
39
- if (failures < 2) return { isLoop: false, count: failures, suggestion: null };
59
+ if (weightedScore < 2.0) return { isLoop: false, count, weightedScore, suggestion: null };
40
60
 
41
61
  const suggestion = lastTier === 'execute'
42
62
  ? 'promote_tier'
43
63
  : 'escalate_to_dual_brain';
44
64
 
45
- return { isLoop: true, count: failures, suggestion };
65
+ return { isLoop: true, count, weightedScore, suggestion };
46
66
  }
47
67
 
48
68
  function recordFailure(promptHash, tier, reason) {
@@ -59,4 +79,46 @@ function recordFailure(promptHash, tier, reason) {
59
79
  } catch {}
60
80
  }
61
81
 
62
- export { checkFailureLoop, recordFailure };
82
+ /**
83
+ * Remove failure entries older than 24 hours from the ledger.
84
+ * Uses atomic write (tmp file + rename) to avoid corruption.
85
+ */
86
+ function pruneOldFailures() {
87
+ const twentyFourHoursAgo = Date.now() - 24 * 60 * 60 * 1000;
88
+ let pruned = 0;
89
+ let remaining = 0;
90
+ const kept = [];
91
+
92
+ try {
93
+ const lines = readFileSync(LEDGER_FILE, 'utf8').split('\n').filter(Boolean);
94
+ for (const line of lines) {
95
+ try {
96
+ const entry = JSON.parse(line);
97
+ const entryTime = Date.parse(entry.timestamp);
98
+ if (entry.type === 'failure' && entryTime < twentyFourHoursAgo) {
99
+ pruned++;
100
+ } else {
101
+ kept.push(line);
102
+ remaining++;
103
+ }
104
+ } catch {
105
+ // Keep unparseable lines to avoid data loss
106
+ kept.push(line);
107
+ remaining++;
108
+ }
109
+ }
110
+
111
+ const tmpFile = LEDGER_FILE + `.tmp.${process.pid}`;
112
+ writeFileSync(tmpFile, kept.length > 0 ? kept.join('\n') + '\n' : '');
113
+ renameSync(tmpFile, LEDGER_FILE);
114
+ } catch (err) {
115
+ if (err.code !== 'ENOENT') {
116
+ try { unlinkSync(LEDGER_FILE + `.tmp.${process.pid}`); } catch {}
117
+ }
118
+ return { pruned: 0, remaining: 0 };
119
+ }
120
+
121
+ return { pruned, remaining };
122
+ }
123
+
124
+ export { checkFailureLoop, recordFailure, pruneOldFailures };
@@ -336,7 +336,7 @@ test('profiles: consistent across modules', () => {
336
336
  test('failure-detector: ignores followed=false', () => {
337
337
  const src = readFileSync(resolve(__dirname, 'failure-detector.mjs'), 'utf8');
338
338
  if (src.includes('followed === false')) return 'still conflates followed=false with failure';
339
- if (!src.includes('success === false')) return 'missing success===false check';
339
+ if (!src.includes('success === false') && !src.includes('success !== false')) return 'missing success check';
340
340
  return true;
341
341
  });
342
342
 
@@ -728,6 +728,285 @@ test('enforce-tier: non-burst mode still warns on duplicates', () => {
728
728
  }
729
729
  });
730
730
 
731
+ // ─── Test 33: install preserves existing hooks ─────────────────────────────
732
+ test('install: preserves existing hooks', () => {
733
+ const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
734
+
735
+ // install.mjs must define DUAL_BRAIN_CMDS to identify its own hooks
736
+ if (!installSrc.includes('DUAL_BRAIN_CMDS'))
737
+ return 'install.mjs missing DUAL_BRAIN_CMDS constant for filtering';
738
+
739
+ // It must filter out only dual-brain hooks (not all hooks) before merging
740
+ if (!installSrc.includes('.filter'))
741
+ return 'install.mjs missing .filter() call — may clobber non-dual-brain hooks';
742
+
743
+ // The merge logic should spread existingEntries first, then add dual-brain hooks
744
+ if (!installSrc.includes('existingEntries'))
745
+ return 'install.mjs missing existingEntries variable — may not preserve other hooks';
746
+
747
+ // Verify it reads existing settings before overwriting
748
+ if (!installSrc.includes('existing') || !installSrc.includes('settings.json'))
749
+ return 'install.mjs does not read existing settings.json before writing';
750
+
751
+ return true;
752
+ });
753
+
754
+ // ─── Test 34: gitignore entries don't conflict with data-tools ─────────────
755
+ test('install: gitignore entries scoped to dual-brain', () => {
756
+ const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
757
+
758
+ // Extract the generateGitignoreEntries function body
759
+ const fnMatch = installSrc.match(/generateGitignoreEntries[\s\S]*?const entries\s*=\s*\[([\s\S]*?)\]/);
760
+ if (!fnMatch) return 'could not find generateGitignoreEntries entries array';
761
+
762
+ const entriesBlock = fnMatch[1];
763
+
764
+ // Extract individual entry strings
765
+ const entryStrings = [...entriesBlock.matchAll(/'([^']+)'/g)].map(m => m[1]);
766
+ if (entryStrings.length === 0) return 'no gitignore entries found in install.mjs';
767
+
768
+ // Each entry must be scoped — no broad patterns like *.json, *.jsonl, .claude/hooks/
769
+ const broadPatterns = ['*.json', '*.jsonl', '*.mjs', '.claude/', '.claude/hooks/'];
770
+ for (const entry of entryStrings) {
771
+ for (const bad of broadPatterns) {
772
+ if (entry === bad)
773
+ return `gitignore entry "${entry}" is too broad — could match data-tools files`;
774
+ }
775
+ }
776
+
777
+ // Each entry should reference dual-brain-specific names
778
+ const validScopes = ['dual-brain', 'usage-', 'usage.jsonl', 'decision-ledger', 'drift-warned', 'budget-alerted', 'summary-', 'reviews/', '.launched'];
779
+ for (const entry of entryStrings) {
780
+ const isScoped = validScopes.some(scope => entry.includes(scope));
781
+ if (!isScoped)
782
+ return `gitignore entry "${entry}" may not be scoped to dual-brain files`;
783
+ }
784
+
785
+ return true;
786
+ });
787
+
788
+ // ─── Test 35: hooks use isolated file paths ────────────────────────────────
789
+ test('hooks: output files use dual-brain-namespaced paths', () => {
790
+ const validNames = ['dual-brain', 'usage-', 'usage.jsonl', 'decision-ledger', 'summary-checkpoint', '.drift-warned', '.burst-state', '.budget-alerted', 'orchestrator.json', '.launched'];
791
+
792
+ const hookFiles = {
793
+ 'enforce-tier.mjs': ['DRIFT_STATE', 'BURST_FILE', 'PROFILE_FILE'],
794
+ 'cost-logger.mjs': ['usage-', 'PROFILE_FILE'],
795
+ 'summary-checkpoint.mjs': ['usage-summary-', 'usage-'],
796
+ };
797
+
798
+ for (const [hookFile, expectedRefs] of Object.entries(hookFiles)) {
799
+ const src = readFileSync(resolve(__dirname, hookFile), 'utf8');
800
+
801
+ // Find all file paths the hook writes to (writeFileSync / appendFileSync targets)
802
+ const writeTargets = [...src.matchAll(/(?:writeFileSync|appendFileSync|renameSync)\(\s*([^,)]+)/g)].map(m => m[1].trim());
803
+
804
+ if (writeTargets.length === 0) return `${hookFile}: no write targets found`;
805
+
806
+ // Verify none of the write targets use generic names
807
+ // They should resolve to variables defined with dual-brain-specific names
808
+ const genericNames = ['config.json', 'state.json', 'log.jsonl', 'data.json', 'output.json'];
809
+ for (const target of writeTargets) {
810
+ for (const bad of genericNames) {
811
+ if (target.includes(`'${bad}'`) || target.includes(`"${bad}"`))
812
+ return `${hookFile}: writes to generic filename "${bad}" — could collide with other tools`;
813
+ }
814
+ }
815
+ }
816
+
817
+ // Verify the actual file path constants in enforce-tier use dual-brain-scoped names
818
+ const enforceSrc = readFileSync(resolve(__dirname, 'enforce-tier.mjs'), 'utf8');
819
+ if (!enforceSrc.includes('dual-brain.profile.json'))
820
+ return 'enforce-tier.mjs PROFILE_FILE does not reference dual-brain namespace';
821
+ if (!enforceSrc.includes('.drift-warned'))
822
+ return 'enforce-tier.mjs DRIFT_STATE does not use scoped filename';
823
+ if (!enforceSrc.includes('.burst-state'))
824
+ return 'enforce-tier.mjs BURST_FILE does not use scoped filename';
825
+
826
+ // Verify cost-logger writes to usage-dated files, not generic names
827
+ const costSrc = readFileSync(resolve(__dirname, 'cost-logger.mjs'), 'utf8');
828
+ if (!costSrc.includes('usage-'))
829
+ return 'cost-logger.mjs does not write to usage-prefixed files';
830
+ if (!costSrc.includes('dual-brain.profile.json'))
831
+ return 'cost-logger.mjs PROFILE_FILE does not reference dual-brain namespace';
832
+
833
+ return true;
834
+ });
835
+
836
+ // ─── Test 36: failure decay weights recent failures higher ─────────────────
837
+ test('failure decay: recent failures score high', () => {
838
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
839
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
840
+
841
+ try {
842
+ const hash = 'decay_recent_' + Date.now();
843
+ const fiveMinAgo = new Date(Date.now() - 5 * 60 * 1000).toISOString();
844
+ const entry = JSON.stringify({
845
+ type: 'failure', timestamp: fiveMinAgo, prompt_hash: hash,
846
+ tier: 'execute', reason: 'test_decay', success: false,
847
+ });
848
+ writeFileSync(LEDGER, entry + '\n' + entry + '\n', 'utf8');
849
+
850
+ const script = `
851
+ import { checkFailureLoop } from './failure-detector.mjs';
852
+ const result = checkFailureLoop('${hash}');
853
+ process.stdout.write(JSON.stringify(result));
854
+ `;
855
+ const proc = spawnSync(process.execPath, [
856
+ '--input-type=module',
857
+ '-e', script,
858
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
859
+
860
+ if (proc.status !== 0) return `script failed: ${proc.stderr}`;
861
+ let result;
862
+ try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
863
+ if (!result.isLoop) return `expected isLoop=true for recent failures, got: ${JSON.stringify(result)}`;
864
+ if (typeof result.weightedScore !== 'number' || result.weightedScore < 2.0)
865
+ return `expected weightedScore >= 2.0, got: ${result.weightedScore}`;
866
+ return true;
867
+ } finally {
868
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
869
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
870
+ }
871
+ });
872
+
873
+ // ─── Test 37: failure decay reduces old failure weight ─────────────────────
874
+ test('failure decay: old failures score low', () => {
875
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
876
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
877
+
878
+ try {
879
+ const hash = 'decay_old_' + Date.now();
880
+ const ninetyMinAgo = new Date(Date.now() - 90 * 60 * 1000).toISOString();
881
+ const entry = JSON.stringify({
882
+ type: 'failure', timestamp: ninetyMinAgo, prompt_hash: hash,
883
+ tier: 'execute', reason: 'test_decay_old', success: false,
884
+ });
885
+ writeFileSync(LEDGER, entry + '\n' + entry + '\n', 'utf8');
886
+
887
+ const script = `
888
+ import { checkFailureLoop } from './failure-detector.mjs';
889
+ const result = checkFailureLoop('${hash}');
890
+ process.stdout.write(JSON.stringify(result));
891
+ `;
892
+ const proc = spawnSync(process.execPath, [
893
+ '--input-type=module',
894
+ '-e', script,
895
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
896
+
897
+ if (proc.status !== 0) return `script failed: ${proc.stderr}`;
898
+ let result;
899
+ try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
900
+ if (result.isLoop) return `expected isLoop=false for old failures (weightedScore should be ~0.5), got: ${JSON.stringify(result)}`;
901
+ if (typeof result.weightedScore !== 'number')
902
+ return `expected weightedScore in result, got: ${JSON.stringify(result)}`;
903
+ if (result.weightedScore >= 2.0)
904
+ return `expected weightedScore < 2.0 for 90-min-old failures, got: ${result.weightedScore}`;
905
+ return true;
906
+ } finally {
907
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
908
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
909
+ }
910
+ });
911
+
912
+ // ─── Test 38: failure scoping by tier ──────────────────────────────────────
913
+ test('failure decay: scoping by tier', () => {
914
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
915
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
916
+
917
+ try {
918
+ const hash = 'tier_scope_' + Date.now();
919
+ const now = new Date().toISOString();
920
+ const mkEntry = (tier) => JSON.stringify({
921
+ type: 'failure', timestamp: now, prompt_hash: hash,
922
+ tier, reason: 'test_tier_scope', success: false,
923
+ });
924
+ const content = [
925
+ mkEntry('execute'), mkEntry('execute'),
926
+ mkEntry('search'), mkEntry('search'),
927
+ ].join('\n') + '\n';
928
+ writeFileSync(LEDGER, content, 'utf8');
929
+
930
+ const checkTier = (tier) => {
931
+ const script = `
932
+ import { checkFailureLoop } from './failure-detector.mjs';
933
+ const result = checkFailureLoop('${hash}', '${tier}');
934
+ process.stdout.write(JSON.stringify(result));
935
+ `;
936
+ const proc = spawnSync(process.execPath, [
937
+ '--input-type=module',
938
+ '-e', script,
939
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
940
+ if (proc.status !== 0) return { error: `script failed for tier=${tier}: ${proc.stderr}` };
941
+ try { return JSON.parse(proc.stdout.trim()); } catch { return { error: `output not JSON for tier=${tier}: ${proc.stdout}` }; }
942
+ };
943
+
944
+ const execResult = checkTier('execute');
945
+ if (execResult.error) return execResult.error;
946
+ if (!execResult.isLoop) return `expected isLoop=true for execute tier, got: ${JSON.stringify(execResult)}`;
947
+
948
+ const searchResult = checkTier('search');
949
+ if (searchResult.error) return searchResult.error;
950
+ if (!searchResult.isLoop) return `expected isLoop=true for search tier, got: ${JSON.stringify(searchResult)}`;
951
+
952
+ const thinkResult = checkTier('think');
953
+ if (thinkResult.error) return thinkResult.error;
954
+ if (thinkResult.isLoop) return `expected isLoop=false for think tier (no think failures), got: ${JSON.stringify(thinkResult)}`;
955
+
956
+ return true;
957
+ } finally {
958
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
959
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
960
+ }
961
+ });
962
+
963
+ // ─── Test 39: pruneOldFailures removes stale entries ───────────────────────
964
+ test('failure decay: pruneOldFailures removes stale entries', () => {
965
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
966
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
967
+
968
+ try {
969
+ const twentyFiveHoursAgo = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString();
970
+ const oneHourAgo = new Date(Date.now() - 1 * 60 * 60 * 1000).toISOString();
971
+ const staleEntry = JSON.stringify({
972
+ type: 'failure', timestamp: twentyFiveHoursAgo, prompt_hash: 'stale',
973
+ tier: 'execute', reason: 'old', success: false,
974
+ });
975
+ const recentEntry = JSON.stringify({
976
+ type: 'failure', timestamp: oneHourAgo, prompt_hash: 'recent',
977
+ tier: 'execute', reason: 'new', success: false,
978
+ });
979
+ const content = [staleEntry, staleEntry, recentEntry, recentEntry].join('\n') + '\n';
980
+ writeFileSync(LEDGER, content, 'utf8');
981
+
982
+ const script = `
983
+ import { pruneOldFailures } from './failure-detector.mjs';
984
+ pruneOldFailures();
985
+ `;
986
+ const proc = spawnSync(process.execPath, [
987
+ '--input-type=module',
988
+ '-e', script,
989
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
990
+
991
+ if (proc.status !== 0) return `pruneOldFailures script failed: ${proc.stderr}`;
992
+ if (!existsSync(LEDGER)) return 'ledger file was deleted instead of pruned';
993
+
994
+ const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
995
+ if (lines.length !== 2) return `expected 2 entries after prune, got: ${lines.length}`;
996
+
997
+ for (const line of lines) {
998
+ let entry;
999
+ try { entry = JSON.parse(line); } catch { return `pruned ledger has invalid JSON: ${line}`; }
1000
+ if (entry.prompt_hash !== 'recent')
1001
+ return `expected only recent entries to remain, found prompt_hash=${entry.prompt_hash}`;
1002
+ }
1003
+ return true;
1004
+ } finally {
1005
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
1006
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
1007
+ }
1008
+ });
1009
+
731
1010
  // ─── Summary ─────────────────────────────────────────────────────────────────
732
1011
  const total = passed + failed;
733
1012
  console.log(`\n${passed}/${total} tests passed`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dual-brain",
3
- "version": "3.8.0",
3
+ "version": "3.8.1",
4
4
  "description": "Dual-provider orchestration for Claude Code — tiered routing, budget balancing, and GPT dual-brain review across Claude + OpenAI subscriptions",
5
5
  "type": "module",
6
6
  "bin": {