dual-brain 3.7.0 → 3.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/cost-logger.mjs +12 -2
- package/hooks/dual-brain-review.mjs +1 -1
- package/hooks/enforce-tier.mjs +1 -1
- package/hooks/failure-detector.mjs +1 -1
- package/hooks/quality-gate.mjs +3 -9
- package/hooks/test-orchestrator.mjs +339 -0
- package/install.mjs +6 -0
- package/package.json +1 -1
package/hooks/cost-logger.mjs
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* Output contract: must print "{}" to stdout and exit 0 within ~100 ms.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
import { createHash } from "crypto";
|
|
11
12
|
import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
12
13
|
import { dirname, join } from "path";
|
|
13
14
|
import { fileURLToPath } from "url";
|
|
@@ -25,8 +26,8 @@ mkdirSync(__dirname, { recursive: true });
|
|
|
25
26
|
function loadActiveProfile() {
|
|
26
27
|
try {
|
|
27
28
|
const data = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
|
|
28
|
-
return data.active || '
|
|
29
|
-
} catch { return '
|
|
29
|
+
return data.active || 'auto';
|
|
30
|
+
} catch { return 'auto'; }
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
const SESSION_ID = process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || null;
|
|
@@ -261,6 +262,15 @@ async function main() {
|
|
|
261
262
|
updateSummary(entryObj);
|
|
262
263
|
} catch {}
|
|
263
264
|
|
|
265
|
+
// Record failures for adaptive routing (failure-loop detection)
|
|
266
|
+
if (status === 'error' && toolName === 'Agent') {
|
|
267
|
+
try {
|
|
268
|
+
const { recordFailure } = await import('./failure-detector.mjs');
|
|
269
|
+
const promptHash = createHash('md5').update(JSON.stringify(toolInput)).digest('hex').slice(0, 12);
|
|
270
|
+
recordFailure(promptHash, tier, payload?.error || 'agent_error');
|
|
271
|
+
} catch {}
|
|
272
|
+
}
|
|
273
|
+
|
|
264
274
|
const budgetMsg = await checkBudget();
|
|
265
275
|
|
|
266
276
|
// PostToolUse hooks must emit a JSON object to stdout
|
|
@@ -135,7 +135,7 @@ function hasIssues(text) {
|
|
|
135
135
|
if (hasIssueIndicators) return true;
|
|
136
136
|
|
|
137
137
|
// No concrete issues — check if review explicitly says it's clean
|
|
138
|
-
const good = ['lgtm', 'looks good', 'no issues', 'no problems', 'no concerns', 'all good', 'clean'];
|
|
138
|
+
const good = ['lgtm', 'looks good', 'no issues', 'no problems', 'no concerns', 'all good', 'clean', 'approved', 'ship it', 'ready to merge', 'good to go', 'looks fine', 'no blockers'];
|
|
139
139
|
if (good.some(g => lower.includes(g))) return false;
|
|
140
140
|
|
|
141
141
|
// Ambiguous — default to flagging for human review
|
package/hooks/enforce-tier.mjs
CHANGED
|
@@ -4,7 +4,7 @@ import { createHash } from 'crypto';
|
|
|
4
4
|
import { dirname, resolve, join } from 'path';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
6
|
import { classifyRisk, extractPaths } from './risk-classifier.mjs';
|
|
7
|
-
import { checkFailureLoop } from './failure-detector.mjs';
|
|
7
|
+
import { checkFailureLoop, recordFailure } from './failure-detector.mjs';
|
|
8
8
|
|
|
9
9
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
10
10
|
const CONFIG_FILE = resolve(__dirname, '..', 'orchestrator.json');
|
|
@@ -28,7 +28,7 @@ function checkFailureLoop(promptHash) {
|
|
|
28
28
|
const entry = JSON.parse(line);
|
|
29
29
|
if (entry.prompt_hash !== promptHash) continue;
|
|
30
30
|
if (Date.parse(entry.timestamp) < twoHoursAgo) continue;
|
|
31
|
-
if (entry.success === false
|
|
31
|
+
if (entry.success === false) {
|
|
32
32
|
failures++;
|
|
33
33
|
lastTier = entry.tier;
|
|
34
34
|
}
|
package/hooks/quality-gate.mjs
CHANGED
|
@@ -21,9 +21,10 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
|
21
21
|
import { dirname, extname, join, resolve } from 'path';
|
|
22
22
|
import { fileURLToPath } from 'url';
|
|
23
23
|
|
|
24
|
+
import { getProfileOverrides as _getProfileOverrides } from './profiles.mjs';
|
|
25
|
+
|
|
24
26
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
25
27
|
const ORCHESTRATOR_CONFIG = resolve(__dirname, '..', 'orchestrator.json');
|
|
26
|
-
const PROFILE_FILE = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
27
28
|
const REVIEWS_DIR = resolve(__dirname, '..', 'reviews');
|
|
28
29
|
const DUAL_BRAIN = resolve(__dirname, 'dual-brain-review.mjs');
|
|
29
30
|
|
|
@@ -31,14 +32,7 @@ const RISK_LEVELS = ['low', 'medium', 'high', 'critical'];
|
|
|
31
32
|
|
|
32
33
|
function loadProfileGateSettings() {
|
|
33
34
|
try {
|
|
34
|
-
|
|
35
|
-
const name = data.active || 'balanced';
|
|
36
|
-
const defaults = {
|
|
37
|
-
balanced: { sensitivity_floor: 'medium', dual_brain_minimum: 'high' },
|
|
38
|
-
'cost-saver': { sensitivity_floor: 'high', dual_brain_minimum: 'critical' },
|
|
39
|
-
'quality-first': { sensitivity_floor: 'low', dual_brain_minimum: 'medium' },
|
|
40
|
-
};
|
|
41
|
-
return defaults[name] || defaults.balanced;
|
|
35
|
+
return _getProfileOverrides('quality-gate');
|
|
42
36
|
} catch {
|
|
43
37
|
return { sensitivity_floor: 'medium', dual_brain_minimum: 'high' };
|
|
44
38
|
}
|
|
@@ -10,8 +10,10 @@
|
|
|
10
10
|
|
|
11
11
|
import { execSync, spawnSync } from 'child_process';
|
|
12
12
|
import {
|
|
13
|
+
appendFileSync,
|
|
13
14
|
existsSync,
|
|
14
15
|
readFileSync,
|
|
16
|
+
unlinkSync,
|
|
15
17
|
writeFileSync,
|
|
16
18
|
} from 'fs';
|
|
17
19
|
import { dirname, resolve } from 'path';
|
|
@@ -310,6 +312,343 @@ test('orchestrator.json: dual_thinking configured', () => {
|
|
|
310
312
|
return true;
|
|
311
313
|
});
|
|
312
314
|
|
|
315
|
+
// ─── Test 15: profile consistency across modules ────────────────────────────
|
|
316
|
+
test('profiles: consistent across modules', () => {
|
|
317
|
+
const profilesSrc = readFileSync(resolve(__dirname, 'profiles.mjs'), 'utf8');
|
|
318
|
+
const profileNames = ['auto', 'balanced', 'cost-saver', 'quality-first'];
|
|
319
|
+
for (const name of profileNames) {
|
|
320
|
+
if (!profilesSrc.includes(`${name}:`) && !profilesSrc.includes(`'${name}':`)) return `profiles.mjs missing: ${name}`;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
|
|
324
|
+
for (const name of profileNames) {
|
|
325
|
+
if (!installSrc.includes(`${name}:`) && !installSrc.includes(`'${name}':`)) return `install.mjs missing profile: ${name}`;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const enforceSrc = readFileSync(resolve(__dirname, 'enforce-tier.mjs'), 'utf8');
|
|
329
|
+
if (!enforceSrc.includes('auto:')) return 'enforce-tier.mjs missing auto in PROFILE_SETTINGS';
|
|
330
|
+
|
|
331
|
+
return true;
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
// ─── Test 16: failure-detector only counts real failures ─────────────────────
|
|
335
|
+
test('failure-detector: ignores followed=false', () => {
|
|
336
|
+
const src = readFileSync(resolve(__dirname, 'failure-detector.mjs'), 'utf8');
|
|
337
|
+
if (src.includes('followed === false')) return 'still conflates followed=false with failure';
|
|
338
|
+
if (!src.includes('success === false')) return 'missing success===false check';
|
|
339
|
+
return true;
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
// ─── Test 17: enforce-tier: malformed stdin ─────────────────────────────────
|
|
343
|
+
test('enforce-tier: malformed stdin', () => {
|
|
344
|
+
const { parsed, status } = run(ENFORCE_TIER, 'this is not json at all {{{');
|
|
345
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
346
|
+
if (!parsed) return 'no valid JSON output';
|
|
347
|
+
return true;
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
// ─── Test 18: enforce-tier: missing tool_input ──────────────────────────────
|
|
351
|
+
test('enforce-tier: missing tool_input', () => {
|
|
352
|
+
const payload = JSON.stringify({ tool_name: 'Agent' });
|
|
353
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
354
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
355
|
+
if (!parsed) return 'no valid JSON output';
|
|
356
|
+
return true;
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
// ─── Test 19: enforce-tier: non-Agent tool passthrough ──────────────────────
|
|
360
|
+
test('enforce-tier: non-Agent tool passthrough', () => {
|
|
361
|
+
const payload = JSON.stringify({ tool_name: 'Read', tool_input: { file_path: '/foo' } });
|
|
362
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
363
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
364
|
+
if (!parsed) return 'no valid JSON output';
|
|
365
|
+
if (Object.keys(parsed).length !== 0)
|
|
366
|
+
return `expected {}, got: ${JSON.stringify(parsed)}`;
|
|
367
|
+
return true;
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
// ─── Test 20: cost-logger: malformed stdin ──────────────────────────────────
|
|
371
|
+
test('cost-logger: malformed stdin', () => {
|
|
372
|
+
const { parsed, status } = runStream(COST_LOGGER, 'not json garbage >>>');
|
|
373
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
374
|
+
if (!parsed) return 'no valid JSON output';
|
|
375
|
+
return true;
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
// ─── Test 21: cost-logger: missing fields ───────────────────────────────────
|
|
379
|
+
test('cost-logger: missing fields', () => {
|
|
380
|
+
let linesBefore = 0;
|
|
381
|
+
if (existsSync(USAGE_JSONL)) {
|
|
382
|
+
linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const { parsed, status } = runStream(COST_LOGGER, '{}');
|
|
386
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
387
|
+
if (!parsed) return 'no valid JSON output';
|
|
388
|
+
|
|
389
|
+
if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
|
|
390
|
+
const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
|
|
391
|
+
if (lines.length <= linesBefore) return 'no new line was appended to daily usage log';
|
|
392
|
+
|
|
393
|
+
// Clean up the test line
|
|
394
|
+
try {
|
|
395
|
+
const kept = lines.slice(0, linesBefore).join('\n');
|
|
396
|
+
writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
|
|
397
|
+
} catch {}
|
|
398
|
+
|
|
399
|
+
return true;
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
// ─── Test 22: cost-logger: error status recorded ────────────────────────────
|
|
403
|
+
test('cost-logger: error status recorded', () => {
|
|
404
|
+
let linesBefore = 0;
|
|
405
|
+
if (existsSync(USAGE_JSONL)) {
|
|
406
|
+
linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const payload = JSON.stringify({
|
|
410
|
+
tool_name: 'Agent',
|
|
411
|
+
tool_input: { prompt: 'test' },
|
|
412
|
+
error: 'something failed',
|
|
413
|
+
});
|
|
414
|
+
const { parsed, status } = runStream(COST_LOGGER, payload);
|
|
415
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
416
|
+
if (!parsed) return 'no valid JSON output';
|
|
417
|
+
|
|
418
|
+
if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
|
|
419
|
+
const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
|
|
420
|
+
if (lines.length <= linesBefore) return 'no new line was appended to daily usage log';
|
|
421
|
+
|
|
422
|
+
const lastLine = lines[lines.length - 1];
|
|
423
|
+
let entry;
|
|
424
|
+
try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
|
|
425
|
+
if (entry.status !== 'error') return `expected status "error", got: "${entry.status}"`;
|
|
426
|
+
|
|
427
|
+
// Clean up the test line
|
|
428
|
+
try {
|
|
429
|
+
const kept = lines.slice(0, linesBefore).join('\n');
|
|
430
|
+
writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
|
|
431
|
+
} catch {}
|
|
432
|
+
|
|
433
|
+
return true;
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
// ─── Test 23: enforce-tier: cost-saver demotes think ────────────────────────
|
|
437
|
+
test('enforce-tier: cost-saver demotes think', () => {
|
|
438
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
439
|
+
let originalProfile;
|
|
440
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
441
|
+
try {
|
|
442
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'cost-saver' }));
|
|
443
|
+
// "edit the README file" — execute-like text, no think words
|
|
444
|
+
// cost-saver's demote_think=true demotes think→execute when text lacks think words
|
|
445
|
+
const payload = JSON.stringify({
|
|
446
|
+
tool_name: 'Agent',
|
|
447
|
+
tool_input: { prompt: 'edit the README file', model: 'opus' },
|
|
448
|
+
});
|
|
449
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
450
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
451
|
+
if (!parsed) return 'no valid JSON output';
|
|
452
|
+
// With demote_think, the tier stays execute, so opus on execute work exits 0 with valid JSON
|
|
453
|
+
return true;
|
|
454
|
+
} finally {
|
|
455
|
+
if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
|
|
456
|
+
else try { unlinkSync(profileFile); } catch {}
|
|
457
|
+
}
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
// ─── Test 24: enforce-tier: quality-first promotes execute ──────────────────
|
|
461
|
+
test('enforce-tier: quality-first promotes execute', () => {
|
|
462
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
463
|
+
let originalProfile;
|
|
464
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
465
|
+
try {
|
|
466
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'quality-first' }));
|
|
467
|
+
// Think-like description on sonnet model — quality-first's promote_execute=true
|
|
468
|
+
// promotes to think when text matches think words
|
|
469
|
+
const payload = JSON.stringify({
|
|
470
|
+
tool_name: 'Agent',
|
|
471
|
+
tool_input: { prompt: 'review architecture and plan the migration', model: 'sonnet' },
|
|
472
|
+
});
|
|
473
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
474
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
475
|
+
if (!parsed) return 'no valid JSON output';
|
|
476
|
+
if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
|
|
477
|
+
if (!parsed.systemMessage.toLowerCase().includes('think'))
|
|
478
|
+
return `expected "think" in systemMessage, got: ${parsed.systemMessage}`;
|
|
479
|
+
return true;
|
|
480
|
+
} finally {
|
|
481
|
+
if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
|
|
482
|
+
else try { unlinkSync(profileFile); } catch {}
|
|
483
|
+
}
|
|
484
|
+
});
|
|
485
|
+
|
|
486
|
+
// ─── Test 25: enforce-tier: auto profile with high-risk file ────────────────
|
|
487
|
+
test('enforce-tier: auto profile with high-risk file', () => {
|
|
488
|
+
const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
489
|
+
let originalProfile;
|
|
490
|
+
try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
|
|
491
|
+
try {
|
|
492
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'auto' }));
|
|
493
|
+
// Description with auth/credentials path → risk classifier detects critical risk → promote to think
|
|
494
|
+
const payload = JSON.stringify({
|
|
495
|
+
tool_name: 'Agent',
|
|
496
|
+
tool_input: { description: 'update src/auth/credentials.mjs', prompt: 'change the token logic', model: 'sonnet' },
|
|
497
|
+
});
|
|
498
|
+
const { parsed, status } = run(ENFORCE_TIER, payload);
|
|
499
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
500
|
+
if (!parsed) return 'no valid JSON output';
|
|
501
|
+
if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
|
|
502
|
+
const msg = parsed.systemMessage.toLowerCase();
|
|
503
|
+
if (!msg.includes('think') && !msg.includes('dual-brain'))
|
|
504
|
+
return `expected "think" or "dual-brain" in systemMessage, got: ${parsed.systemMessage}`;
|
|
505
|
+
return true;
|
|
506
|
+
} finally {
|
|
507
|
+
// Always restore profile to auto so subsequent tests aren't affected
|
|
508
|
+
writeFileSync(profileFile, JSON.stringify({ active: 'auto' }));
|
|
509
|
+
}
|
|
510
|
+
});
|
|
511
|
+
|
|
512
|
+
// ─── Test 26: adaptive: recordFailure writes to ledger ─────────────────────
|
|
513
|
+
test('adaptive: recordFailure writes to ledger', () => {
|
|
514
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
515
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
516
|
+
|
|
517
|
+
try {
|
|
518
|
+
const script = `
|
|
519
|
+
import { recordFailure } from './failure-detector.mjs';
|
|
520
|
+
recordFailure('testhash123', 'execute', 'test_error');
|
|
521
|
+
`;
|
|
522
|
+
const proc = spawnSync(process.execPath, [
|
|
523
|
+
'--input-type=module',
|
|
524
|
+
'-e', script,
|
|
525
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
526
|
+
|
|
527
|
+
if (proc.status !== 0) return `recordFailure script failed: ${proc.stderr}`;
|
|
528
|
+
if (!existsSync(LEDGER)) return 'ledger file not created';
|
|
529
|
+
|
|
530
|
+
const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
|
|
531
|
+
const lastLine = lines[lines.length - 1];
|
|
532
|
+
let entry;
|
|
533
|
+
try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
|
|
534
|
+
if (entry.prompt_hash !== 'testhash123') return `expected prompt_hash=testhash123, got: ${entry.prompt_hash}`;
|
|
535
|
+
if (entry.success !== false) return `expected success=false, got: ${entry.success}`;
|
|
536
|
+
return true;
|
|
537
|
+
} finally {
|
|
538
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
539
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
540
|
+
}
|
|
541
|
+
});
|
|
542
|
+
|
|
543
|
+
// ─── Test 27: adaptive: checkFailureLoop detects 2+ failures ───────────────
|
|
544
|
+
test('adaptive: checkFailureLoop detects 2+ failures', () => {
|
|
545
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
546
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
547
|
+
|
|
548
|
+
try {
|
|
549
|
+
const hash = 'looptest_' + Date.now();
|
|
550
|
+
const now = new Date().toISOString();
|
|
551
|
+
const failEntry = JSON.stringify({
|
|
552
|
+
type: 'failure', timestamp: now, prompt_hash: hash,
|
|
553
|
+
tier: 'execute', reason: 'test', success: false,
|
|
554
|
+
});
|
|
555
|
+
const content = (backup || '') + failEntry + '\n' + failEntry + '\n';
|
|
556
|
+
writeFileSync(LEDGER, content, 'utf8');
|
|
557
|
+
|
|
558
|
+
const script = `
|
|
559
|
+
import { checkFailureLoop } from './failure-detector.mjs';
|
|
560
|
+
const result = checkFailureLoop('${hash}');
|
|
561
|
+
process.stdout.write(JSON.stringify(result));
|
|
562
|
+
`;
|
|
563
|
+
const proc = spawnSync(process.execPath, [
|
|
564
|
+
'--input-type=module',
|
|
565
|
+
'-e', script,
|
|
566
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
567
|
+
|
|
568
|
+
if (proc.status !== 0) return `checkFailureLoop script failed: ${proc.stderr}`;
|
|
569
|
+
let result;
|
|
570
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
571
|
+
if (!result.isLoop) return `expected isLoop=true, got: ${JSON.stringify(result)}`;
|
|
572
|
+
if (result.count < 2) return `expected count>=2, got: ${result.count}`;
|
|
573
|
+
if (result.suggestion !== 'promote_tier' && result.suggestion !== 'escalate_to_dual_brain')
|
|
574
|
+
return `unexpected suggestion: ${result.suggestion}`;
|
|
575
|
+
return true;
|
|
576
|
+
} finally {
|
|
577
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
578
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
579
|
+
}
|
|
580
|
+
});
|
|
581
|
+
|
|
582
|
+
// ─── Test 28: adaptive: checkFailureLoop ignores old failures ──────────────
|
|
583
|
+
test('adaptive: checkFailureLoop ignores old failures', () => {
|
|
584
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
585
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
586
|
+
|
|
587
|
+
try {
|
|
588
|
+
const hash = 'oldtest_' + Date.now();
|
|
589
|
+
const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000).toISOString();
|
|
590
|
+
const oldEntry = JSON.stringify({
|
|
591
|
+
type: 'failure', timestamp: threeHoursAgo, prompt_hash: hash,
|
|
592
|
+
tier: 'execute', reason: 'old_test', success: false,
|
|
593
|
+
});
|
|
594
|
+
writeFileSync(LEDGER, oldEntry + '\n' + oldEntry + '\n', 'utf8');
|
|
595
|
+
|
|
596
|
+
const script = `
|
|
597
|
+
import { checkFailureLoop } from './failure-detector.mjs';
|
|
598
|
+
const result = checkFailureLoop('${hash}');
|
|
599
|
+
process.stdout.write(JSON.stringify(result));
|
|
600
|
+
`;
|
|
601
|
+
const proc = spawnSync(process.execPath, [
|
|
602
|
+
'--input-type=module',
|
|
603
|
+
'-e', script,
|
|
604
|
+
], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
|
|
605
|
+
|
|
606
|
+
if (proc.status !== 0) return `checkFailureLoop script failed: ${proc.stderr}`;
|
|
607
|
+
let result;
|
|
608
|
+
try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
|
|
609
|
+
if (result.isLoop) return `expected isLoop=false for old failures, got: ${JSON.stringify(result)}`;
|
|
610
|
+
return true;
|
|
611
|
+
} finally {
|
|
612
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
613
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
614
|
+
}
|
|
615
|
+
});
|
|
616
|
+
|
|
617
|
+
// ─── Test 29: adaptive: cost-logger records Agent errors ───────────────────
|
|
618
|
+
test('adaptive: cost-logger records Agent errors', () => {
|
|
619
|
+
const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
|
|
620
|
+
const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
|
|
621
|
+
|
|
622
|
+
try {
|
|
623
|
+
let linesBefore = 0;
|
|
624
|
+
if (existsSync(LEDGER)) {
|
|
625
|
+
linesBefore = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean).length;
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
const payload = JSON.stringify({
|
|
629
|
+
tool_name: 'Agent',
|
|
630
|
+
tool_input: { prompt: 'failing task hash test' },
|
|
631
|
+
error: 'test failure',
|
|
632
|
+
});
|
|
633
|
+
const { status } = runStream(COST_LOGGER, payload);
|
|
634
|
+
if (status !== 0) return `non-zero exit: ${status}`;
|
|
635
|
+
|
|
636
|
+
if (!existsSync(LEDGER)) return 'ledger file not created';
|
|
637
|
+
const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
|
|
638
|
+
if (lines.length <= linesBefore) return 'no new failure entry appended to ledger';
|
|
639
|
+
|
|
640
|
+
const newEntry = lines[lines.length - 1];
|
|
641
|
+
let entry;
|
|
642
|
+
try { entry = JSON.parse(newEntry); } catch { return `last line not valid JSON: ${newEntry}`; }
|
|
643
|
+
if (entry.success !== false) return `expected success=false, got: ${entry.success}`;
|
|
644
|
+
if (entry.type !== 'failure') return `expected type=failure, got: ${entry.type}`;
|
|
645
|
+
return true;
|
|
646
|
+
} finally {
|
|
647
|
+
if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
|
|
648
|
+
else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
|
|
649
|
+
}
|
|
650
|
+
});
|
|
651
|
+
|
|
313
652
|
// ─── Summary ─────────────────────────────────────────────────────────────────
|
|
314
653
|
const total = passed + failed;
|
|
315
654
|
console.log(`\n${passed}/${total} tests passed`);
|
package/install.mjs
CHANGED
|
@@ -424,6 +424,12 @@ function profilePath(workspace) {
|
|
|
424
424
|
}
|
|
425
425
|
|
|
426
426
|
const PROFILES = {
|
|
427
|
+
auto: {
|
|
428
|
+
description: 'Adapts routing based on task risk, provider health, and outcomes',
|
|
429
|
+
routing: { prefer_provider: 'auto', think_threshold: 'adaptive', gpt_dispatch_bias: 0 },
|
|
430
|
+
budgets: { session_warn_usd: 5, session_limit_usd: 10, daily_warn_usd: 20, daily_limit_usd: 50 },
|
|
431
|
+
quality_gate: { sensitivity_floor: 'medium', dual_brain_minimum: 'high' },
|
|
432
|
+
},
|
|
427
433
|
balanced: {
|
|
428
434
|
description: 'Auto-routes by complexity, uses both providers evenly',
|
|
429
435
|
routing: { prefer_provider: 'auto', think_threshold: 'normal', gpt_dispatch_bias: 0 },
|
package/package.json
CHANGED