dual-brain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/AGENTS.md +97 -0
  2. package/CLAUDE.md +147 -0
  3. package/LICENSE +21 -0
  4. package/README.md +197 -0
  5. package/agents/implementer.md +22 -0
  6. package/agents/researcher.md +25 -0
  7. package/agents/verifier.md +30 -0
  8. package/bin/dual-brain.mjs +2868 -0
  9. package/hooks/auto-update-wrapper.mjs +102 -0
  10. package/hooks/auto-update.sh +67 -0
  11. package/hooks/budget-balancer.mjs +679 -0
  12. package/hooks/control-panel.mjs +1195 -0
  13. package/hooks/cost-logger.mjs +286 -0
  14. package/hooks/cost-report.mjs +351 -0
  15. package/hooks/decision-ledger.mjs +299 -0
  16. package/hooks/dual-brain-review.mjs +404 -0
  17. package/hooks/dual-brain-think.mjs +393 -0
  18. package/hooks/enforce-tier.mjs +469 -0
  19. package/hooks/failure-detector.mjs +138 -0
  20. package/hooks/gpt-work-dispatcher.mjs +512 -0
  21. package/hooks/head-guard.mjs +105 -0
  22. package/hooks/health-check.mjs +444 -0
  23. package/hooks/install-git-hooks.mjs +106 -0
  24. package/hooks/model-registry.mjs +859 -0
  25. package/hooks/plan-generator.mjs +544 -0
  26. package/hooks/profiles.mjs +254 -0
  27. package/hooks/quality-gate.mjs +355 -0
  28. package/hooks/risk-classifier.mjs +41 -0
  29. package/hooks/session-report.mjs +514 -0
  30. package/hooks/setup-wizard.mjs +130 -0
  31. package/hooks/summary-checkpoint.mjs +432 -0
  32. package/hooks/task-classifier.mjs +328 -0
  33. package/hooks/test-orchestrator.mjs +1077 -0
  34. package/hooks/vibe-memory.mjs +463 -0
  35. package/hooks/vibe-router.mjs +387 -0
  36. package/hooks/wave-orchestrator.mjs +1397 -0
  37. package/install.mjs +1541 -0
  38. package/mcp-server/README.md +81 -0
  39. package/mcp-server/index.mjs +388 -0
  40. package/orchestrator.json +215 -0
  41. package/package.json +108 -0
  42. package/playbooks/debug.json +49 -0
  43. package/playbooks/refactor.json +57 -0
  44. package/playbooks/security-audit.json +57 -0
  45. package/playbooks/security.json +38 -0
  46. package/playbooks/test-gen.json +48 -0
  47. package/plugin.json +22 -0
  48. package/review-rules.md +17 -0
  49. package/shell-hook.sh +26 -0
  50. package/skills/go.md +22 -0
  51. package/skills/review.md +19 -0
  52. package/skills/status.md +13 -0
  53. package/skills/think.md +22 -0
  54. package/src/brief.mjs +266 -0
  55. package/src/decide.mjs +635 -0
  56. package/src/decompose.mjs +331 -0
  57. package/src/detect.mjs +345 -0
  58. package/src/dispatch.mjs +942 -0
  59. package/src/health.mjs +253 -0
  60. package/src/index.mjs +44 -0
  61. package/src/install-hooks.mjs +100 -0
  62. package/src/playbook.mjs +257 -0
  63. package/src/profile.mjs +990 -0
  64. package/src/redact.mjs +192 -0
  65. package/src/repo.mjs +292 -0
  66. package/src/session.mjs +1036 -0
  67. package/src/tui.mjs +197 -0
  68. package/src/update-check.mjs +35 -0
@@ -0,0 +1,1077 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * test-orchestrator.mjs — Self-test harness for all dual-brain orchestrator hooks.
4
+ *
5
+ * Usage: node .claude/hooks/test-orchestrator.mjs
6
+ *
7
+ * Runs a suite of fast tests against the hook scripts, prints PASS/FAIL per
8
+ * test, and exits with code 0 if all pass, 1 if any fail.
9
+ */
10
+
11
+ import { execSync, spawnSync } from 'child_process';
12
+ import {
13
+ appendFileSync,
14
+ existsSync,
15
+ readFileSync,
16
+ unlinkSync,
17
+ writeFileSync,
18
+ } from 'fs';
19
+ import { dirname, resolve } from 'path';
20
+ import { fileURLToPath } from 'url';
21
+
22
+ const __dirname = dirname(fileURLToPath(import.meta.url));
23
+ const HOOKS = __dirname;
24
+
25
+ const ENFORCE_TIER = resolve(HOOKS, 'enforce-tier.mjs');
26
+ const COST_LOGGER = resolve(HOOKS, 'cost-logger.mjs');
27
+ const DUAL_BRAIN = resolve(HOOKS, 'dual-brain-review.mjs');
28
+ const ORCHESTRATOR = resolve(HOOKS, '..', 'orchestrator.json');
29
+ const USAGE_JSONL = resolve(HOOKS, `usage-${new Date().toISOString().slice(0, 10)}.jsonl`);
30
+ const BURST_FILE = resolve(HOOKS, '.burst-state');
31
+
32
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
33
+
34
+ /**
35
+ * Run a hook script, passing stdinData through a shell pipe so that
36
+ * readFileSync('/dev/stdin') inside the script can read it correctly.
37
+ *
38
+ * We use `sh -c "echo '<json>' | node <script>"` so that /dev/stdin is a
39
+ * real pipe file descriptor, not a spawnSync input buffer.
40
+ */
41
+ function run(scriptPath, stdinData, extraEnv = {}) {
42
+ // Escape single quotes in the JSON payload for use inside single-quoted shell string
43
+ const escaped = (stdinData || '').replace(/'/g, "'\\''");
44
+ const shellCmd = `printf '%s' '${escaped}' | ${process.execPath} ${scriptPath}`;
45
+
46
+ const proc = spawnSync('sh', ['-c', shellCmd], {
47
+ encoding: 'utf8',
48
+ stdio: ['ignore', 'pipe', 'pipe'],
49
+ env: { ...process.env, ...extraEnv },
50
+ timeout: 8_000,
51
+ });
52
+
53
+ let parsed = null;
54
+ try { parsed = JSON.parse((proc.stdout || '').trim()); } catch {}
55
+ return { raw: proc.stdout || '', stderr: proc.stderr || '', parsed, status: proc.status };
56
+ }
57
+
58
+ /**
59
+ * Run a hook that reads from a for-await stdin loop (cost-logger style),
60
+ * using spawnSync with the input option (works for stream-based reads).
61
+ */
62
+ function runStream(scriptPath, stdinData, extraEnv = {}) {
63
+ const proc = spawnSync(process.execPath, [scriptPath], {
64
+ input: stdinData || '',
65
+ encoding: 'utf8',
66
+ stdio: ['pipe', 'pipe', 'pipe'],
67
+ env: { ...process.env, ...extraEnv },
68
+ timeout: 8_000,
69
+ });
70
+ let parsed = null;
71
+ try { parsed = JSON.parse((proc.stdout || '').trim()); } catch {}
72
+ return { raw: proc.stdout || '', stderr: proc.stderr || '', parsed, status: proc.status };
73
+ }
74
+
75
+ let passed = 0;
76
+ let failed = 0;
77
+
78
+ function test(name, fn) {
79
+ try {
80
+ const result = fn();
81
+ if (result === true) {
82
+ console.log(`PASS ${name}`);
83
+ passed++;
84
+ } else {
85
+ console.log(`FAIL ${name}${result ? ` — ${result}` : ''}`);
86
+ failed++;
87
+ }
88
+ } catch (err) {
89
+ console.log(`FAIL ${name} — threw: ${err?.message ?? String(err)}`);
90
+ failed++;
91
+ }
92
+ }
93
+
94
+ // ─── Test 1: enforce-tier: search with opus ───────────────────────────────────
95
+ test('enforce-tier: search with opus', () => {
96
+ const payload = JSON.stringify({
97
+ tool_name: 'Agent',
98
+ tool_input: { prompt: 'find auth files', model: 'opus', subagent_type: 'Explore' },
99
+ });
100
+ const { parsed } = run(ENFORCE_TIER, payload);
101
+ if (!parsed) return 'no valid JSON output';
102
+ if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
103
+ if (!parsed.systemMessage.toLowerCase().includes('haiku'))
104
+ return `expected "haiku" in systemMessage, got: ${parsed.systemMessage}`;
105
+ return true;
106
+ });
107
+
108
+ // ─── Test 2: enforce-tier: correct tier ──────────────────────────────────────
109
+ test('enforce-tier: correct tier', () => {
110
+ const payload = JSON.stringify({
111
+ tool_name: 'Agent',
112
+ tool_input: { prompt: `unique test prompt ${Date.now()}`, model: 'sonnet' },
113
+ });
114
+ const { parsed } = run(ENFORCE_TIER, payload);
115
+ if (!parsed) return 'no valid JSON output';
116
+ // Should return {} or at most a drift warning (not a tier mismatch)
117
+ if (parsed.systemMessage && parsed.systemMessage.includes('Tier Enforcer'))
118
+ return `unexpected tier mismatch: ${parsed.systemMessage}`;
119
+ return true;
120
+ });
121
+
122
+ // ─── Test 3: enforce-tier: think task on haiku ───────────────────────────────
123
+ test('enforce-tier: think on haiku', () => {
124
+ const payload = JSON.stringify({
125
+ tool_name: 'Agent',
126
+ tool_input: { prompt: 'review security', model: 'haiku' },
127
+ });
128
+ const { parsed } = run(ENFORCE_TIER, payload);
129
+ if (!parsed) return 'no valid JSON output';
130
+ if (!parsed.systemMessage)
131
+ return `expected systemMessage warning, got: ${JSON.stringify(parsed)}`;
132
+ return true;
133
+ });
134
+
135
+ // ─── Test 4: enforce-tier: non-Agent tool ────────────────────────────────────
136
+ test('enforce-tier: non-Agent tool', () => {
137
+ const payload = JSON.stringify({
138
+ tool_name: 'Bash',
139
+ tool_input: { command: 'ls' },
140
+ });
141
+ const { parsed } = run(ENFORCE_TIER, payload);
142
+ if (!parsed) return 'no valid JSON output';
143
+ if (Object.keys(parsed).length !== 0)
144
+ return `expected {}, got: ${JSON.stringify(parsed)}`;
145
+ return true;
146
+ });
147
+
148
+ // ─── Test 5: enforce-tier: missing config (bad JSON in config path) ───────────
149
+ test('enforce-tier: missing config', () => {
150
+ // enforce-tier catches config read errors and falls back to {} — verify that
151
+ // an Agent payload still exits cleanly when config can't be parsed.
152
+ // We set HOME to /tmp/nonexistent-orch-test so readFileSync of the hardcoded
153
+ // config path will fail (the path is hardcoded, but we can't easily redirect
154
+ // it). Instead, verify that sending a model string that matches no known tier
155
+ // still results in a clean non-crashing exit.
156
+ const payload = JSON.stringify({
157
+ tool_name: 'Agent',
158
+ tool_input: { prompt: 'do something', model: 'unknown-model-xyz' },
159
+ });
160
+ const { parsed, status } = run(ENFORCE_TIER, payload);
161
+ // Should exit 0 and produce valid JSON (either {} or a systemMessage)
162
+ if (status !== 0) return `non-zero exit: ${status}`;
163
+ if (!parsed) return 'no valid JSON output';
164
+ return true;
165
+ });
166
+
167
+ // ─── Test 6: cost-logger: logs entry ─────────────────────────────────────────
168
+ test('cost-logger: logs entry', () => {
169
+ // Record current line count of usage.jsonl before the test.
170
+ let linesBefore = 0;
171
+ if (existsSync(USAGE_JSONL)) {
172
+ linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
173
+ }
174
+
175
+ const payload = JSON.stringify({
176
+ tool_name: 'Read',
177
+ tool_input: { file_path: '/some/file.ts' },
178
+ });
179
+ // cost-logger uses for-await on process.stdin → use runStream (spawnSync input pipe)
180
+ const { parsed, status } = runStream(COST_LOGGER, payload);
181
+
182
+ if (status !== 0) return `non-zero exit: ${status}`;
183
+ if (!parsed || Object.keys(parsed).length !== 0)
184
+ return `expected {}, got: ${JSON.stringify(parsed)}`;
185
+
186
+ if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
187
+
188
+ const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
189
+ const linesAfter = lines.length;
190
+ if (linesAfter <= linesBefore) return 'no new line was appended to daily usage log';
191
+
192
+ // Validate the new entry is valid JSON with expected fields
193
+ const lastLine = lines[linesAfter - 1];
194
+ let entry;
195
+ try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
196
+ if (!entry.timestamp) return 'entry missing timestamp';
197
+ if (!entry.tier) return 'entry missing tier';
198
+ if (!entry.tool) return 'entry missing tool';
199
+
200
+ // Clean up the test line we just added
201
+ try {
202
+ const kept = lines.slice(0, linesBefore).join('\n');
203
+ writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
204
+ } catch {
205
+ // Best-effort cleanup; don't fail the test over it
206
+ }
207
+
208
+ return true;
209
+ });
210
+
211
+ // ─── Test 7: dual-brain: valid output ────────────────────────────────────────
212
+ test('dual-brain: valid output', () => {
213
+ // Run dual-brain-review.mjs in a temp git repo with no changes so the test
214
+ // is deterministic and never triggers codex/API calls on a dirty working tree.
215
+ const tmpDir = spawnSync('mktemp', ['-d'], { encoding: 'utf8' }).stdout.trim();
216
+ try {
217
+ execSync(
218
+ `git init -q "${tmpDir}" && git -C "${tmpDir}" commit --allow-empty -m init -q`,
219
+ { stdio: 'pipe' }
220
+ );
221
+ const proc = spawnSync(process.execPath, [DUAL_BRAIN], {
222
+ cwd: tmpDir,
223
+ encoding: 'utf8',
224
+ timeout: 10_000,
225
+ stdio: ['pipe', 'pipe', 'pipe'],
226
+ });
227
+ // status null means the process was killed (timeout/signal) — treat as fail
228
+ if (proc.status == null) return `process killed or timed out (signal/null status)`;
229
+ if (proc.status !== 0) return `non-zero exit: ${proc.status}`;
230
+ let parsed = null;
231
+ try { parsed = JSON.parse((proc.stdout || '').trim()); } catch {}
232
+ if (!parsed) return `no valid JSON output; raw: ${(proc.stdout || '').slice(0, 200)}`;
233
+ if (typeof parsed.review !== 'string') return `expected review string, got: ${JSON.stringify(parsed)}`;
234
+ return true;
235
+ } finally {
236
+ spawnSync('rm', ['-rf', tmpDir], { stdio: 'pipe' });
237
+ }
238
+ });
239
+
240
+ // ─── Test 8: orchestrator.json: valid JSON ────────────────────────────────────
241
+ test('orchestrator.json: valid JSON', () => {
242
+ if (!existsSync(ORCHESTRATOR)) return 'orchestrator.json not found';
243
+ let config;
244
+ try {
245
+ config = JSON.parse(readFileSync(ORCHESTRATOR, 'utf8'));
246
+ } catch (err) {
247
+ return `invalid JSON: ${err.message}`;
248
+ }
249
+ if (!config.quality_gate) return 'missing quality_gate section';
250
+ if (!config.tiers) return 'missing tiers section';
251
+ if (!config.subscriptions) return 'missing subscriptions section';
252
+ return true;
253
+ });
254
+
255
+ // ─── Test 9: enforce-tier: think on gpt-4.1-mini ─────────────────────────────
256
+ test('enforce-tier: think on gpt-4.1-mini', () => {
257
+ const input = JSON.stringify({ tool_name: 'Agent', tool_input: { description: 'review security architecture', prompt: 'audit auth', model: 'gpt-4.1-mini' } });
258
+ const { parsed } = run(ENFORCE_TIER, input);
259
+ if (!parsed) return 'no valid JSON output';
260
+ if (!parsed.systemMessage) return `expected systemMessage warning, got: ${JSON.stringify(parsed)}`;
261
+ if (!parsed.systemMessage.toLowerCase().includes('think'))
262
+ return `expected "think" in systemMessage, got: ${parsed.systemMessage}`;
263
+ return true;
264
+ });
265
+
266
+ // ─── Test 10: orchestrator.json: model_intelligence ──────────────────────────
267
+ test('orchestrator.json: model_intelligence', () => {
268
+ const config = JSON.parse(readFileSync(resolve(__dirname, '..', 'orchestrator.json'), 'utf8'));
269
+ const mi = config.model_intelligence;
270
+ if (!mi) return 'model_intelligence key missing';
271
+ if (!mi.opus) return 'model_intelligence missing opus entry';
272
+ if (!mi.sonnet) return 'model_intelligence missing sonnet entry';
273
+ if (!mi.haiku) return 'model_intelligence missing haiku entry';
274
+ return true;
275
+ });
276
+
277
+ // ─── Test 11: orchestrator.json: pricing_verified ────────────────────────────
278
+ test('orchestrator.json: pricing_verified', () => {
279
+ const config = JSON.parse(readFileSync(resolve(__dirname, '..', 'orchestrator.json'), 'utf8'));
280
+ if (!config.pricing_verified) return 'pricing_verified field missing';
281
+ if (isNaN(Date.parse(config.pricing_verified))) return `pricing_verified is not a valid date: ${config.pricing_verified}`;
282
+ return true;
283
+ });
284
+
285
+ // ─── Test 12: budget-balancer: loads and runs ────────────────────────────────
286
+ test('budget-balancer: loads and runs', () => {
287
+ const proc = spawnSync(process.execPath, [resolve(__dirname, 'budget-balancer.mjs')], {
288
+ encoding: 'utf8',
289
+ timeout: 10000,
290
+ cwd: resolve(__dirname, '..', '..'),
291
+ stdio: ['pipe', 'pipe', 'pipe'],
292
+ });
293
+ if (proc.status !== 0) return `exit code ${proc.status}: ${proc.stderr}`;
294
+ if (!proc.stdout.includes('Provider Balance')) return 'missing output header';
295
+ return true;
296
+ });
297
+
298
+ // ─── Test 13: orchestrator.json: providers configured ────────────────────────
299
+ test('orchestrator.json: providers configured', () => {
300
+ const config = JSON.parse(readFileSync(resolve(__dirname, '..', 'orchestrator.json'), 'utf8'));
301
+ if (!config.providers?.claude?.enabled) return 'claude provider not enabled';
302
+ if (!config.providers?.openai?.enabled) return 'openai provider not enabled';
303
+ if (!config.routing?.strategy) return 'routing strategy missing';
304
+ return true;
305
+ });
306
+
307
+ // ─── Test 14: orchestrator.json: dual_thinking configured ────────────────────
308
+ test('orchestrator.json: dual_thinking configured', () => {
309
+ const config = JSON.parse(readFileSync(resolve(__dirname, '..', 'orchestrator.json'), 'utf8'));
310
+ if (!config.dual_thinking?.enabled) return 'dual_thinking not enabled';
311
+ if (!config.dual_thinking?.auto_triggers?.length) return 'no auto_triggers';
312
+ if (!config.dual_thinking?.sensitive_paths?.length) return 'no sensitive_paths';
313
+ return true;
314
+ });
315
+
316
+ // ─── Test 15: profile consistency across modules ────────────────────────────
317
+ test('profiles: consistent across modules', () => {
318
+ const profilesSrc = readFileSync(resolve(__dirname, 'profiles.mjs'), 'utf8');
319
+ const profileNames = ['auto', 'balanced', 'cost-saver', 'quality-first'];
320
+ for (const name of profileNames) {
321
+ if (!profilesSrc.includes(`${name}:`) && !profilesSrc.includes(`'${name}':`)) return `profiles.mjs missing: ${name}`;
322
+ }
323
+
324
+ const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
325
+ for (const name of profileNames) {
326
+ if (!installSrc.includes(`${name}:`) && !installSrc.includes(`'${name}':`)) return `install.mjs missing profile: ${name}`;
327
+ }
328
+
329
+ const enforceSrc = readFileSync(resolve(__dirname, 'enforce-tier.mjs'), 'utf8');
330
+ if (!enforceSrc.includes('auto:')) return 'enforce-tier.mjs missing auto in PROFILE_SETTINGS';
331
+
332
+ return true;
333
+ });
334
+
335
+ // ─── Test 16: failure-detector only counts real failures ─────────────────────
336
+ test('failure-detector: ignores followed=false', () => {
337
+ const src = readFileSync(resolve(__dirname, 'failure-detector.mjs'), 'utf8');
338
+ if (src.includes('followed === false')) return 'still conflates followed=false with failure';
339
+ if (!src.includes('success === false') && !src.includes('success !== false')) return 'missing success check';
340
+ return true;
341
+ });
342
+
343
+ // ─── Test 17: enforce-tier: malformed stdin ─────────────────────────────────
344
+ test('enforce-tier: malformed stdin', () => {
345
+ const { parsed, status } = run(ENFORCE_TIER, 'this is not json at all {{{');
346
+ if (status !== 0) return `non-zero exit: ${status}`;
347
+ if (!parsed) return 'no valid JSON output';
348
+ return true;
349
+ });
350
+
351
+ // ─── Test 18: enforce-tier: missing tool_input ──────────────────────────────
352
+ test('enforce-tier: missing tool_input', () => {
353
+ const payload = JSON.stringify({ tool_name: 'Agent' });
354
+ const { parsed, status } = run(ENFORCE_TIER, payload);
355
+ if (status !== 0) return `non-zero exit: ${status}`;
356
+ if (!parsed) return 'no valid JSON output';
357
+ return true;
358
+ });
359
+
360
+ // ─── Test 19: enforce-tier: non-Agent tool passthrough ──────────────────────
361
+ test('enforce-tier: non-Agent tool passthrough', () => {
362
+ const payload = JSON.stringify({ tool_name: 'Read', tool_input: { file_path: '/foo' } });
363
+ const { parsed, status } = run(ENFORCE_TIER, payload);
364
+ if (status !== 0) return `non-zero exit: ${status}`;
365
+ if (!parsed) return 'no valid JSON output';
366
+ if (Object.keys(parsed).length !== 0)
367
+ return `expected {}, got: ${JSON.stringify(parsed)}`;
368
+ return true;
369
+ });
370
+
371
+ // ─── Test 20: cost-logger: malformed stdin ──────────────────────────────────
372
+ test('cost-logger: malformed stdin', () => {
373
+ const { parsed, status } = runStream(COST_LOGGER, 'not json garbage >>>');
374
+ if (status !== 0) return `non-zero exit: ${status}`;
375
+ if (!parsed) return 'no valid JSON output';
376
+ return true;
377
+ });
378
+
379
+ // ─── Test 21: cost-logger: missing fields ───────────────────────────────────
380
+ test('cost-logger: missing fields', () => {
381
+ let linesBefore = 0;
382
+ if (existsSync(USAGE_JSONL)) {
383
+ linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
384
+ }
385
+
386
+ const { parsed, status } = runStream(COST_LOGGER, '{}');
387
+ if (status !== 0) return `non-zero exit: ${status}`;
388
+ if (!parsed) return 'no valid JSON output';
389
+
390
+ if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
391
+ const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
392
+ if (lines.length <= linesBefore) return 'no new line was appended to daily usage log';
393
+
394
+ // Clean up the test line
395
+ try {
396
+ const kept = lines.slice(0, linesBefore).join('\n');
397
+ writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
398
+ } catch {}
399
+
400
+ return true;
401
+ });
402
+
403
+ // ─── Test 22: cost-logger: error status recorded ────────────────────────────
404
+ test('cost-logger: error status recorded', () => {
405
+ let linesBefore = 0;
406
+ if (existsSync(USAGE_JSONL)) {
407
+ linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
408
+ }
409
+
410
+ const payload = JSON.stringify({
411
+ tool_name: 'Agent',
412
+ tool_input: { prompt: 'test' },
413
+ error: 'something failed',
414
+ });
415
+ const { parsed, status } = runStream(COST_LOGGER, payload);
416
+ if (status !== 0) return `non-zero exit: ${status}`;
417
+ if (!parsed) return 'no valid JSON output';
418
+
419
+ if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
420
+ const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
421
+ if (lines.length <= linesBefore) return 'no new line was appended to daily usage log';
422
+
423
+ const lastLine = lines[lines.length - 1];
424
+ let entry;
425
+ try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
426
+ if (entry.status !== 'error') return `expected status "error", got: "${entry.status}"`;
427
+
428
+ // Clean up the test line
429
+ try {
430
+ const kept = lines.slice(0, linesBefore).join('\n');
431
+ writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
432
+ } catch {}
433
+
434
+ return true;
435
+ });
436
+
437
+ // ─── Test 23: enforce-tier: cost-saver demotes think ────────────────────────
438
+ test('enforce-tier: cost-saver demotes think', () => {
439
+ const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
440
+ let originalProfile;
441
+ try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
442
+ try {
443
+ writeFileSync(profileFile, JSON.stringify({ active: 'cost-saver' }));
444
+ // "edit the README file" — execute-like text, no think words
445
+ // cost-saver's demote_think=true demotes think→execute when text lacks think words
446
+ const payload = JSON.stringify({
447
+ tool_name: 'Agent',
448
+ tool_input: { prompt: '<!-- dual-brain-dispatch: test23 -->edit the README file', model: 'opus' },
449
+ });
450
+ const { parsed, status } = run(ENFORCE_TIER, payload);
451
+ if (status !== 0) return `non-zero exit: ${status}`;
452
+ if (!parsed) return 'no valid JSON output';
453
+ // With demote_think, the tier stays execute, so opus on execute work exits 0 with valid JSON
454
+ return true;
455
+ } finally {
456
+ if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
457
+ else try { unlinkSync(profileFile); } catch {}
458
+ }
459
+ });
460
+
461
+ // ─── Test 24: enforce-tier: quality-first promotes execute ──────────────────
462
+ test('enforce-tier: quality-first promotes execute', () => {
463
+ const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
464
+ let originalProfile;
465
+ try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
466
+ try {
467
+ writeFileSync(profileFile, JSON.stringify({ active: 'quality-first' }));
468
+ // Think-like description on sonnet model — quality-first's promote_execute=true
469
+ // promotes to think when text matches think words
470
+ const payload = JSON.stringify({
471
+ tool_name: 'Agent',
472
+ tool_input: { prompt: 'review architecture and plan the migration', model: 'sonnet' },
473
+ });
474
+ const { parsed, status } = run(ENFORCE_TIER, payload);
475
+ if (status !== 0) return `non-zero exit: ${status}`;
476
+ if (!parsed) return 'no valid JSON output';
477
+ if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
478
+ if (!parsed.systemMessage.toLowerCase().includes('think'))
479
+ return `expected "think" in systemMessage, got: ${parsed.systemMessage}`;
480
+ return true;
481
+ } finally {
482
+ if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
483
+ else try { unlinkSync(profileFile); } catch {}
484
+ }
485
+ });
486
+
487
+ // ─── Test 25: enforce-tier: auto profile with high-risk file ────────────────
488
+ test('enforce-tier: auto profile with high-risk file', () => {
489
+ const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
490
+ let originalProfile;
491
+ try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
492
+ try {
493
+ writeFileSync(profileFile, JSON.stringify({ active: 'auto' }));
494
+ // Description with auth/credentials path → risk classifier detects critical risk → promote to think
495
+ const payload = JSON.stringify({
496
+ tool_name: 'Agent',
497
+ tool_input: { description: 'update src/auth/credentials.mjs', prompt: '<!-- dual-brain-dispatch: test25 -->change the token logic', model: 'sonnet' },
498
+ });
499
+ const { parsed, status } = run(ENFORCE_TIER, payload);
500
+ if (status !== 0) return `non-zero exit: ${status}`;
501
+ if (!parsed) return 'no valid JSON output';
502
+ if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
503
+ const msg = parsed.systemMessage.toLowerCase();
504
+ if (!msg.includes('think') && !msg.includes('dual-brain'))
505
+ return `expected "think" or "dual-brain" in systemMessage, got: ${parsed.systemMessage}`;
506
+ return true;
507
+ } finally {
508
+ // Always restore profile to auto so subsequent tests aren't affected
509
+ writeFileSync(profileFile, JSON.stringify({ active: 'auto' }));
510
+ }
511
+ });
512
+
513
+ // ─── Test 26: adaptive: recordFailure writes to ledger ─────────────────────
514
+ test('adaptive: recordFailure writes to ledger', () => {
515
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
516
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
517
+
518
+ try {
519
+ const script = `
520
+ import { recordFailure } from './failure-detector.mjs';
521
+ recordFailure('testhash123', 'execute', 'test_error');
522
+ `;
523
+ const proc = spawnSync(process.execPath, [
524
+ '--input-type=module',
525
+ '-e', script,
526
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
527
+
528
+ if (proc.status !== 0) return `recordFailure script failed: ${proc.stderr}`;
529
+ if (!existsSync(LEDGER)) return 'ledger file not created';
530
+
531
+ const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
532
+ const lastLine = lines[lines.length - 1];
533
+ let entry;
534
+ try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
535
+ if (entry.prompt_hash !== 'testhash123') return `expected prompt_hash=testhash123, got: ${entry.prompt_hash}`;
536
+ if (entry.success !== false) return `expected success=false, got: ${entry.success}`;
537
+ return true;
538
+ } finally {
539
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
540
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
541
+ }
542
+ });
543
+
544
+ // ─── Test 27: adaptive: checkFailureLoop detects 2+ failures ───────────────
545
+ test('adaptive: checkFailureLoop detects 2+ failures', () => {
546
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
547
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
548
+
549
+ try {
550
+ const hash = 'looptest_' + Date.now();
551
+ const now = new Date().toISOString();
552
+ const failEntry = JSON.stringify({
553
+ type: 'failure', timestamp: now, prompt_hash: hash,
554
+ tier: 'execute', reason: 'test', success: false,
555
+ });
556
+ const content = (backup || '') + failEntry + '\n' + failEntry + '\n';
557
+ writeFileSync(LEDGER, content, 'utf8');
558
+
559
+ const script = `
560
+ import { checkFailureLoop } from './failure-detector.mjs';
561
+ const result = checkFailureLoop('${hash}');
562
+ process.stdout.write(JSON.stringify(result));
563
+ `;
564
+ const proc = spawnSync(process.execPath, [
565
+ '--input-type=module',
566
+ '-e', script,
567
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
568
+
569
+ if (proc.status !== 0) return `checkFailureLoop script failed: ${proc.stderr}`;
570
+ let result;
571
+ try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
572
+ if (!result.isLoop) return `expected isLoop=true, got: ${JSON.stringify(result)}`;
573
+ if (result.count < 2) return `expected count>=2, got: ${result.count}`;
574
+ if (result.suggestion !== 'promote_tier' && result.suggestion !== 'escalate_to_dual_brain')
575
+ return `unexpected suggestion: ${result.suggestion}`;
576
+ return true;
577
+ } finally {
578
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
579
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
580
+ }
581
+ });
582
+
583
+ // ─── Test 28: adaptive: checkFailureLoop ignores old failures ──────────────
584
+ test('adaptive: checkFailureLoop ignores old failures', () => {
585
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
586
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
587
+
588
+ try {
589
+ const hash = 'oldtest_' + Date.now();
590
+ const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000).toISOString();
591
+ const oldEntry = JSON.stringify({
592
+ type: 'failure', timestamp: threeHoursAgo, prompt_hash: hash,
593
+ tier: 'execute', reason: 'old_test', success: false,
594
+ });
595
+ writeFileSync(LEDGER, oldEntry + '\n' + oldEntry + '\n', 'utf8');
596
+
597
+ const script = `
598
+ import { checkFailureLoop } from './failure-detector.mjs';
599
+ const result = checkFailureLoop('${hash}');
600
+ process.stdout.write(JSON.stringify(result));
601
+ `;
602
+ const proc = spawnSync(process.execPath, [
603
+ '--input-type=module',
604
+ '-e', script,
605
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
606
+
607
+ if (proc.status !== 0) return `checkFailureLoop script failed: ${proc.stderr}`;
608
+ let result;
609
+ try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
610
+ if (result.isLoop) return `expected isLoop=false for old failures, got: ${JSON.stringify(result)}`;
611
+ return true;
612
+ } finally {
613
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
614
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
615
+ }
616
+ });
617
+
618
+ // ─── Test 29: adaptive: cost-logger records Agent errors ───────────────────
619
+ test('adaptive: cost-logger records Agent errors', () => {
620
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
621
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
622
+
623
+ try {
624
+ let linesBefore = 0;
625
+ if (existsSync(LEDGER)) {
626
+ linesBefore = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean).length;
627
+ }
628
+
629
+ const payload = JSON.stringify({
630
+ tool_name: 'Agent',
631
+ tool_input: { prompt: 'failing task hash test' },
632
+ error: 'test failure',
633
+ });
634
+ const { status } = runStream(COST_LOGGER, payload);
635
+ if (status !== 0) return `non-zero exit: ${status}`;
636
+
637
+ if (!existsSync(LEDGER)) return 'ledger file not created';
638
+ const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
639
+ if (lines.length <= linesBefore) return 'no new failure entry appended to ledger';
640
+
641
+ const newEntry = lines[lines.length - 1];
642
+ let entry;
643
+ try { entry = JSON.parse(newEntry); } catch { return `last line not valid JSON: ${newEntry}`; }
644
+ if (entry.success !== false) return `expected success=false, got: ${entry.success}`;
645
+ if (entry.type !== 'failure') return `expected type=failure, got: ${entry.type}`;
646
+ return true;
647
+ } finally {
648
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
649
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
650
+ }
651
+ });
652
+
653
+ // ─── Test 30: enforce-tier: burst detection activates on 3+ agents ─────────
654
+ test('enforce-tier: burst detection activates on 3+ agents', () => {
655
+ try {
656
+ // Write burst state at count 2, within window
657
+ writeFileSync(BURST_FILE, JSON.stringify({ count: 2, window_start: Date.now() }));
658
+ const payload = JSON.stringify({
659
+ tool_name: 'Agent',
660
+ tool_input: { prompt: `burst activation test ${Date.now()}`, model: 'sonnet' },
661
+ });
662
+ const { parsed, status } = run(ENFORCE_TIER, payload);
663
+ if (status !== 0) return `non-zero exit: ${status}`;
664
+ if (!parsed) return 'no valid JSON output';
665
+
666
+ // Read burst state — count should have incremented to >= 3
667
+ if (!existsSync(BURST_FILE)) return '.burst-state file was removed unexpectedly';
668
+ let state;
669
+ try { state = JSON.parse(readFileSync(BURST_FILE, 'utf8')); } catch (e) { return `.burst-state not valid JSON: ${e.message}`; }
670
+ if (state.count < 3) return `expected count >= 3, got: ${state.count}`;
671
+ return true;
672
+ } finally {
673
+ try { unlinkSync(BURST_FILE); } catch {}
674
+ }
675
+ });
676
+
677
+ // ─── Test 31: enforce-tier: burst mode suppresses duplicate warnings ───────
678
+ test('enforce-tier: burst mode suppresses duplicate warnings', () => {
679
+ try {
680
+ // Pre-set burst mode (count=5, active window)
681
+ writeFileSync(BURST_FILE, JSON.stringify({ count: 5, window_start: Date.now() }));
682
+ const payload = JSON.stringify({
683
+ tool_name: 'Agent',
684
+ tool_input: { prompt: 'burst duplicate test identical prompt', model: 'sonnet' },
685
+ });
686
+
687
+ // First call — establishes the prompt hash
688
+ run(ENFORCE_TIER, payload);
689
+ // Second identical call — in burst mode, duplicate warning should be suppressed or [Wave]-prefixed
690
+ const { parsed, status } = run(ENFORCE_TIER, payload);
691
+ if (status !== 0) return `non-zero exit: ${status}`;
692
+ if (!parsed) return 'no valid JSON output';
693
+
694
+ // In burst mode: either no duplicate warning at all, or a [Wave]-prefixed one
695
+ const msg = parsed.systemMessage || '';
696
+ const hasDuplicateWarning = msg.toLowerCase().includes('duplicate') || msg.toLowerCase().includes('similar task');
697
+ if (hasDuplicateWarning && !msg.includes('[Wave]') && !msg.includes('wave detected'))
698
+ return `expected no duplicate warning or [Wave]-prefixed in burst mode, got: ${msg}`;
699
+ return true;
700
+ } finally {
701
+ try { unlinkSync(BURST_FILE); } catch {}
702
+ }
703
+ });
704
+
705
+ // ─── Test 32: enforce-tier: non-burst mode still warns on duplicates ───────
706
+ test('enforce-tier: non-burst mode still warns on duplicates', () => {
707
+ try {
708
+ // Expire burst state by setting window_start to 0 (well outside 90s window)
709
+ writeFileSync(BURST_FILE, JSON.stringify({ count: 0, window_start: 0 }));
710
+ const payload = JSON.stringify({
711
+ tool_name: 'Agent',
712
+ tool_input: { prompt: 'non-burst duplicate test identical prompt', model: 'sonnet' },
713
+ });
714
+
715
+ // First call — establishes the prompt hash
716
+ run(ENFORCE_TIER, payload);
717
+ // Second identical call — should trigger duplicate warning
718
+ const { parsed, status } = run(ENFORCE_TIER, payload);
719
+ if (status !== 0) return `non-zero exit: ${status}`;
720
+ if (!parsed) return 'no valid JSON output';
721
+
722
+ const msg = parsed.systemMessage || '';
723
+ if (!msg.toLowerCase().includes('similar task') && !msg.toLowerCase().includes('duplicate'))
724
+ return `expected duplicate warning in non-burst mode, got: ${msg || '(empty)'}`;
725
+ return true;
726
+ } finally {
727
+ try { unlinkSync(BURST_FILE); } catch {}
728
+ }
729
+ });
730
+
731
+ // ─── Test 33: install preserves existing hooks ─────────────────────────────
732
+ test('install: preserves existing hooks', () => {
733
+ const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
734
+
735
+ // install.mjs must define DUAL_BRAIN_CMDS to identify its own hooks
736
+ if (!installSrc.includes('DUAL_BRAIN_CMDS'))
737
+ return 'install.mjs missing DUAL_BRAIN_CMDS constant for filtering';
738
+
739
+ // It must filter out only dual-brain hooks (not all hooks) before merging
740
+ if (!installSrc.includes('.filter'))
741
+ return 'install.mjs missing .filter() call — may clobber non-dual-brain hooks';
742
+
743
+ // The merge logic should filter existing hooks before merging dual-brain hooks
744
+ if (!installSrc.includes('existingPre') && !installSrc.includes('existingEntries'))
745
+ return 'install.mjs missing existing hook preservation — may not preserve other hooks';
746
+
747
+ // Verify it reads existing settings before overwriting
748
+ if (!installSrc.includes('existing') || !installSrc.includes('settings.json'))
749
+ return 'install.mjs does not read existing settings.json before writing';
750
+
751
+ return true;
752
+ });
753
+
754
+ // ─── Test 34: gitignore entries don't conflict with data-tools ─────────────
755
+ test('install: gitignore entries scoped to dual-brain', () => {
756
+ const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
757
+
758
+ // Extract the generateGitignoreEntries function body
759
+ const fnMatch = installSrc.match(/generateGitignoreEntries[\s\S]*?const entries\s*=\s*\[([\s\S]*?)\]/);
760
+ if (!fnMatch) return 'could not find generateGitignoreEntries entries array';
761
+
762
+ const entriesBlock = fnMatch[1];
763
+
764
+ // Extract individual entry strings
765
+ const entryStrings = [...entriesBlock.matchAll(/'([^']+)'/g)].map(m => m[1]);
766
+ if (entryStrings.length === 0) return 'no gitignore entries found in install.mjs';
767
+
768
+ // Each entry must be scoped — no broad patterns like *.json, *.jsonl, .claude/hooks/
769
+ const broadPatterns = ['*.json', '*.jsonl', '*.mjs', '.claude/', '.claude/hooks/'];
770
+ for (const entry of entryStrings) {
771
+ for (const bad of broadPatterns) {
772
+ if (entry === bad)
773
+ return `gitignore entry "${entry}" is too broad — could match data-tools files`;
774
+ }
775
+ }
776
+
777
+ // Each entry should reference dual-brain-specific names
778
+ const validScopes = ['dual-brain', 'usage-', 'usage.jsonl', 'decision-ledger', 'drift-warned', 'budget-alerted', 'summary-', 'reviews/', '.launched'];
779
+ for (const entry of entryStrings) {
780
+ const isScoped = validScopes.some(scope => entry.includes(scope));
781
+ if (!isScoped)
782
+ return `gitignore entry "${entry}" may not be scoped to dual-brain files`;
783
+ }
784
+
785
+ return true;
786
+ });
787
+
788
+ // ─── Test 35: hooks use isolated file paths ────────────────────────────────
789
+ test('hooks: output files use dual-brain-namespaced paths', () => {
790
+ const validNames = ['dual-brain', 'usage-', 'usage.jsonl', 'decision-ledger', 'summary-checkpoint', '.drift-warned', '.burst-state', '.budget-alerted', 'orchestrator.json', '.launched'];
791
+
792
+ const hookFiles = {
793
+ 'enforce-tier.mjs': ['DRIFT_STATE', 'BURST_FILE', 'PROFILE_FILE'],
794
+ 'cost-logger.mjs': ['usage-', 'PROFILE_FILE'],
795
+ 'summary-checkpoint.mjs': ['usage-summary-', 'usage-'],
796
+ };
797
+
798
+ for (const [hookFile, expectedRefs] of Object.entries(hookFiles)) {
799
+ const src = readFileSync(resolve(__dirname, hookFile), 'utf8');
800
+
801
+ // Find all file paths the hook writes to (writeFileSync / appendFileSync targets)
802
+ const writeTargets = [...src.matchAll(/(?:writeFileSync|appendFileSync|renameSync)\(\s*([^,)]+)/g)].map(m => m[1].trim());
803
+
804
+ if (writeTargets.length === 0) return `${hookFile}: no write targets found`;
805
+
806
+ // Verify none of the write targets use generic names
807
+ // They should resolve to variables defined with dual-brain-specific names
808
+ const genericNames = ['config.json', 'state.json', 'log.jsonl', 'data.json', 'output.json'];
809
+ for (const target of writeTargets) {
810
+ for (const bad of genericNames) {
811
+ if (target.includes(`'${bad}'`) || target.includes(`"${bad}"`))
812
+ return `${hookFile}: writes to generic filename "${bad}" — could collide with other tools`;
813
+ }
814
+ }
815
+ }
816
+
817
+ // Verify the actual file path constants in enforce-tier use dual-brain-scoped names
818
+ const enforceSrc = readFileSync(resolve(__dirname, 'enforce-tier.mjs'), 'utf8');
819
+ if (!enforceSrc.includes('dual-brain.profile.json'))
820
+ return 'enforce-tier.mjs PROFILE_FILE does not reference dual-brain namespace';
821
+ if (!enforceSrc.includes('.drift-warned'))
822
+ return 'enforce-tier.mjs DRIFT_STATE does not use scoped filename';
823
+ if (!enforceSrc.includes('.burst-state'))
824
+ return 'enforce-tier.mjs BURST_FILE does not use scoped filename';
825
+
826
+ // Verify cost-logger writes to usage-dated files, not generic names
827
+ const costSrc = readFileSync(resolve(__dirname, 'cost-logger.mjs'), 'utf8');
828
+ if (!costSrc.includes('usage-'))
829
+ return 'cost-logger.mjs does not write to usage-prefixed files';
830
+ if (!costSrc.includes('dual-brain.profile.json'))
831
+ return 'cost-logger.mjs PROFILE_FILE does not reference dual-brain namespace';
832
+
833
+ return true;
834
+ });
835
+
836
+ // ─── Test 36: failure decay weights recent failures higher ─────────────────
837
+ test('failure decay: recent failures score high', () => {
838
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
839
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
840
+
841
+ try {
842
+ const hash = 'decay_recent_' + Date.now();
843
+ const fiveMinAgo = new Date(Date.now() - 5 * 60 * 1000).toISOString();
844
+ const entry = JSON.stringify({
845
+ type: 'failure', timestamp: fiveMinAgo, prompt_hash: hash,
846
+ tier: 'execute', reason: 'test_decay', success: false,
847
+ });
848
+ writeFileSync(LEDGER, entry + '\n' + entry + '\n', 'utf8');
849
+
850
+ const script = `
851
+ import { checkFailureLoop } from './failure-detector.mjs';
852
+ const result = checkFailureLoop('${hash}');
853
+ process.stdout.write(JSON.stringify(result));
854
+ `;
855
+ const proc = spawnSync(process.execPath, [
856
+ '--input-type=module',
857
+ '-e', script,
858
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
859
+
860
+ if (proc.status !== 0) return `script failed: ${proc.stderr}`;
861
+ let result;
862
+ try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
863
+ if (!result.isLoop) return `expected isLoop=true for recent failures, got: ${JSON.stringify(result)}`;
864
+ if (typeof result.weightedScore !== 'number' || result.weightedScore < 2.0)
865
+ return `expected weightedScore >= 2.0, got: ${result.weightedScore}`;
866
+ return true;
867
+ } finally {
868
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
869
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
870
+ }
871
+ });
872
+
873
+ // ─── Test 37: failure decay reduces old failure weight ─────────────────────
874
+ test('failure decay: old failures score low', () => {
875
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
876
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
877
+
878
+ try {
879
+ const hash = 'decay_old_' + Date.now();
880
+ const ninetyMinAgo = new Date(Date.now() - 90 * 60 * 1000).toISOString();
881
+ const entry = JSON.stringify({
882
+ type: 'failure', timestamp: ninetyMinAgo, prompt_hash: hash,
883
+ tier: 'execute', reason: 'test_decay_old', success: false,
884
+ });
885
+ writeFileSync(LEDGER, entry + '\n' + entry + '\n', 'utf8');
886
+
887
+ const script = `
888
+ import { checkFailureLoop } from './failure-detector.mjs';
889
+ const result = checkFailureLoop('${hash}');
890
+ process.stdout.write(JSON.stringify(result));
891
+ `;
892
+ const proc = spawnSync(process.execPath, [
893
+ '--input-type=module',
894
+ '-e', script,
895
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
896
+
897
+ if (proc.status !== 0) return `script failed: ${proc.stderr}`;
898
+ let result;
899
+ try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
900
+ if (result.isLoop) return `expected isLoop=false for old failures (weightedScore should be ~0.5), got: ${JSON.stringify(result)}`;
901
+ if (typeof result.weightedScore !== 'number')
902
+ return `expected weightedScore in result, got: ${JSON.stringify(result)}`;
903
+ if (result.weightedScore >= 2.0)
904
+ return `expected weightedScore < 2.0 for 90-min-old failures, got: ${result.weightedScore}`;
905
+ return true;
906
+ } finally {
907
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
908
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
909
+ }
910
+ });
911
+
912
+ // ─── Test 38: failure scoping by tier ──────────────────────────────────────
913
+ test('failure decay: scoping by tier', () => {
914
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
915
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
916
+
917
+ try {
918
+ const hash = 'tier_scope_' + Date.now();
919
+ const now = new Date().toISOString();
920
+ const mkEntry = (tier) => JSON.stringify({
921
+ type: 'failure', timestamp: now, prompt_hash: hash,
922
+ tier, reason: 'test_tier_scope', success: false,
923
+ });
924
+ const content = [
925
+ mkEntry('execute'), mkEntry('execute'),
926
+ mkEntry('search'), mkEntry('search'),
927
+ ].join('\n') + '\n';
928
+ writeFileSync(LEDGER, content, 'utf8');
929
+
930
+ const checkTier = (tier) => {
931
+ const script = `
932
+ import { checkFailureLoop } from './failure-detector.mjs';
933
+ const result = checkFailureLoop('${hash}', '${tier}');
934
+ process.stdout.write(JSON.stringify(result));
935
+ `;
936
+ const proc = spawnSync(process.execPath, [
937
+ '--input-type=module',
938
+ '-e', script,
939
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
940
+ if (proc.status !== 0) return { error: `script failed for tier=${tier}: ${proc.stderr}` };
941
+ try { return JSON.parse(proc.stdout.trim()); } catch { return { error: `output not JSON for tier=${tier}: ${proc.stdout}` }; }
942
+ };
943
+
944
+ const execResult = checkTier('execute');
945
+ if (execResult.error) return execResult.error;
946
+ if (!execResult.isLoop) return `expected isLoop=true for execute tier, got: ${JSON.stringify(execResult)}`;
947
+
948
+ const searchResult = checkTier('search');
949
+ if (searchResult.error) return searchResult.error;
950
+ if (!searchResult.isLoop) return `expected isLoop=true for search tier, got: ${JSON.stringify(searchResult)}`;
951
+
952
+ const thinkResult = checkTier('think');
953
+ if (thinkResult.error) return thinkResult.error;
954
+ if (thinkResult.isLoop) return `expected isLoop=false for think tier (no think failures), got: ${JSON.stringify(thinkResult)}`;
955
+
956
+ return true;
957
+ } finally {
958
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
959
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
960
+ }
961
+ });
962
+
963
+ // ─── Test 39: pruneOldFailures removes stale entries ───────────────────────
964
+ test('failure decay: pruneOldFailures removes stale entries', () => {
965
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
966
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
967
+
968
+ try {
969
+ const twentyFiveHoursAgo = new Date(Date.now() - 25 * 60 * 60 * 1000).toISOString();
970
+ const oneHourAgo = new Date(Date.now() - 1 * 60 * 60 * 1000).toISOString();
971
+ const staleEntry = JSON.stringify({
972
+ type: 'failure', timestamp: twentyFiveHoursAgo, prompt_hash: 'stale',
973
+ tier: 'execute', reason: 'old', success: false,
974
+ });
975
+ const recentEntry = JSON.stringify({
976
+ type: 'failure', timestamp: oneHourAgo, prompt_hash: 'recent',
977
+ tier: 'execute', reason: 'new', success: false,
978
+ });
979
+ const content = [staleEntry, staleEntry, recentEntry, recentEntry].join('\n') + '\n';
980
+ writeFileSync(LEDGER, content, 'utf8');
981
+
982
+ const script = `
983
+ import { pruneOldFailures } from './failure-detector.mjs';
984
+ pruneOldFailures();
985
+ `;
986
+ const proc = spawnSync(process.execPath, [
987
+ '--input-type=module',
988
+ '-e', script,
989
+ ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
990
+
991
+ if (proc.status !== 0) return `pruneOldFailures script failed: ${proc.stderr}`;
992
+ if (!existsSync(LEDGER)) return 'ledger file was deleted instead of pruned';
993
+
994
+ const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
995
+ if (lines.length !== 2) return `expected 2 entries after prune, got: ${lines.length}`;
996
+
997
+ for (const line of lines) {
998
+ let entry;
999
+ try { entry = JSON.parse(line); } catch { return `pruned ledger has invalid JSON: ${line}`; }
1000
+ if (entry.prompt_hash !== 'recent')
1001
+ return `expected only recent entries to remain, found prompt_hash=${entry.prompt_hash}`;
1002
+ }
1003
+ return true;
1004
+ } finally {
1005
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
1006
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
1007
+ }
1008
+ });
1009
+
1010
+ // ─── Test 40: adaptive loop end-to-end hash match ─────────────────────────
1011
+ test('adaptive loop: end-to-end hash match', () => {
1012
+ const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
1013
+ const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
1014
+
1015
+ try {
1016
+ // Start with a clean ledger so prior failures don't interfere
1017
+ writeFileSync(LEDGER, '', 'utf8');
1018
+
1019
+ // Step 1: Define a specific Agent payload used consistently across all steps
1020
+ const toolInput = { prompt: '<!-- dual-brain-dispatch: test40 -->fix the auth bug', description: 'patch auth module' };
1021
+ const agentPayload = JSON.stringify({ tool_name: 'Agent', tool_input: toolInput });
1022
+
1023
+ // Step 2: Run enforce-tier with this payload (computes and may log a promptHash)
1024
+ const firstRun = run(ENFORCE_TIER, agentPayload);
1025
+ if (firstRun.status !== 0) return `first enforce-tier run failed with status: ${firstRun.status}`;
1026
+ if (!firstRun.parsed) return `first enforce-tier run produced no valid JSON`;
1027
+
1028
+ // Step 3: Simulate 2 failures via cost-logger with the SAME tool_input
1029
+ const errorPayload = JSON.stringify({
1030
+ tool_name: 'Agent',
1031
+ tool_input: toolInput,
1032
+ error: 'test failure',
1033
+ });
1034
+
1035
+ const fail1 = runStream(COST_LOGGER, errorPayload);
1036
+ if (fail1.status !== 0) return `first cost-logger failure run failed with status: ${fail1.status}`;
1037
+
1038
+ const fail2 = runStream(COST_LOGGER, errorPayload);
1039
+ if (fail2.status !== 0) return `second cost-logger failure run failed with status: ${fail2.status}`;
1040
+
1041
+ // Verify cost-logger actually wrote failure entries to the ledger
1042
+ if (!existsSync(LEDGER)) return 'ledger file not created after cost-logger failures';
1043
+ const ledgerLines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
1044
+ const failureEntries = ledgerLines
1045
+ .map(l => { try { return JSON.parse(l); } catch { return null; } })
1046
+ .filter(e => e && e.type === 'failure' && e.success === false);
1047
+ if (failureEntries.length < 2)
1048
+ return `expected >= 2 failure entries in ledger, got: ${failureEntries.length}`;
1049
+
1050
+ // Step 4: Run enforce-tier again with the same Agent payload
1051
+ const secondRun = run(ENFORCE_TIER, agentPayload);
1052
+ if (secondRun.status !== 0) return `second enforce-tier run failed with status: ${secondRun.status}`;
1053
+ if (!secondRun.parsed) return `second enforce-tier run produced no valid JSON`;
1054
+
1055
+ // Step 5: The second enforce-tier run should detect the failure loop
1056
+ // and mention escalation or failure loop in its systemMessage
1057
+ const msg = (secondRun.parsed.systemMessage || '').toLowerCase();
1058
+ if (!msg.includes('failure') && !msg.includes('escalat') && !msg.includes('loop') && !msg.includes('dual-brain'))
1059
+ return `expected failure loop / escalation in second enforce-tier systemMessage, got: "${secondRun.parsed.systemMessage || '(empty)'}"`;
1060
+
1061
+ // Bonus: verify the hashes match — the failure entries recorded by cost-logger
1062
+ // should have the same prompt_hash that enforce-tier uses for checkFailureLoop
1063
+ const failureHashes = [...new Set(failureEntries.map(e => e.prompt_hash))];
1064
+ if (failureHashes.length !== 1)
1065
+ return `expected all failure entries to share one hash, got ${failureHashes.length} distinct hashes: ${failureHashes.join(', ')}`;
1066
+
1067
+ return true;
1068
+ } finally {
1069
+ if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
1070
+ else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
1071
+ }
1072
+ });
1073
+
1074
+ // ─── Summary ─────────────────────────────────────────────────────────────────
1075
+ const total = passed + failed;
1076
+ console.log(`\n${passed}/${total} tests passed`);
1077
+ process.exit(failed > 0 ? 1 : 0);