npm - dual-brain - Versions diffs - 3.7.0 → 3.7.2 - Mend

dual-brain 3.7.0 → 3.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/hooks/cost-logger.mjs +12 -2
package/hooks/dual-brain-review.mjs +1 -1
package/hooks/enforce-tier.mjs +1 -1
package/hooks/failure-detector.mjs +1 -1
package/hooks/quality-gate.mjs +3 -9
package/hooks/test-orchestrator.mjs +339 -0
package/install.mjs +6 -0
package/package.json +1 -1

package/hooks/cost-logger.mjs CHANGED Viewed

@@ -8,6 +8,7 @@
  * Output contract: must print "{}" to stdout and exit 0 within ~100 ms.
  */
+import { createHash } from "crypto";
 import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from "fs";
 import { dirname, join } from "path";
 import { fileURLToPath } from "url";
@@ -25,8 +26,8 @@ mkdirSync(__dirname, { recursive: true });
 function loadActiveProfile() {
   try {
     const data = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
-    return data.active || 'balanced';
-  } catch { return 'balanced'; }
+    return data.active || 'auto';
+  } catch { return 'auto'; }
 }
 const SESSION_ID = process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || null;
@@ -261,6 +262,15 @@ async function main() {
     updateSummary(entryObj);
   } catch {}
+  // Record failures for adaptive routing (failure-loop detection)
+  if (status === 'error' && toolName === 'Agent') {
+    try {
+      const { recordFailure } = await import('./failure-detector.mjs');
+      const promptHash = createHash('md5').update(JSON.stringify(toolInput)).digest('hex').slice(0, 12);
+      recordFailure(promptHash, tier, payload?.error || 'agent_error');
+    } catch {}
+  }
   const budgetMsg = await checkBudget();
   // PostToolUse hooks must emit a JSON object to stdout

package/hooks/dual-brain-review.mjs CHANGED Viewed

@@ -135,7 +135,7 @@ function hasIssues(text) {
   if (hasIssueIndicators) return true;
   // No concrete issues — check if review explicitly says it's clean
-  const good = ['lgtm', 'looks good', 'no issues', 'no problems', 'no concerns', 'all good', 'clean'];
+  const good = ['lgtm', 'looks good', 'no issues', 'no problems', 'no concerns', 'all good', 'clean', 'approved', 'ship it', 'ready to merge', 'good to go', 'looks fine', 'no blockers'];
   if (good.some(g => lower.includes(g))) return false;
   // Ambiguous — default to flagging for human review

package/hooks/enforce-tier.mjs CHANGED Viewed

@@ -4,7 +4,7 @@ import { createHash } from 'crypto';
 import { dirname, resolve, join } from 'path';
 import { fileURLToPath } from 'url';
 import { classifyRisk, extractPaths } from './risk-classifier.mjs';
-import { checkFailureLoop } from './failure-detector.mjs';
+import { checkFailureLoop, recordFailure } from './failure-detector.mjs';
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const CONFIG_FILE = resolve(__dirname, '..', 'orchestrator.json');

package/hooks/failure-detector.mjs CHANGED Viewed

@@ -28,7 +28,7 @@ function checkFailureLoop(promptHash) {
         const entry = JSON.parse(line);
         if (entry.prompt_hash !== promptHash) continue;
         if (Date.parse(entry.timestamp) < twoHoursAgo) continue;
-        if (entry.success === false || entry.followed === false) {
+        if (entry.success === false) {
           failures++;
           lastTier = entry.tier;
         }

package/hooks/quality-gate.mjs CHANGED Viewed

@@ -21,9 +21,10 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
 import { dirname, extname, join, resolve } from 'path';
 import { fileURLToPath } from 'url';
+import { getProfileOverrides as _getProfileOverrides } from './profiles.mjs';
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ORCHESTRATOR_CONFIG = resolve(__dirname, '..', 'orchestrator.json');
-const PROFILE_FILE = resolve(__dirname, '..', 'dual-brain.profile.json');
 const REVIEWS_DIR = resolve(__dirname, '..', 'reviews');
 const DUAL_BRAIN = resolve(__dirname, 'dual-brain-review.mjs');
@@ -31,14 +32,7 @@ const RISK_LEVELS = ['low', 'medium', 'high', 'critical'];
 function loadProfileGateSettings() {
   try {
-    const data = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
-    const name = data.active || 'balanced';
-    const defaults = {
-      balanced:        { sensitivity_floor: 'medium', dual_brain_minimum: 'high' },
-      'cost-saver':    { sensitivity_floor: 'high',   dual_brain_minimum: 'critical' },
-      'quality-first': { sensitivity_floor: 'low',    dual_brain_minimum: 'medium' },
-    };
-    return defaults[name] || defaults.balanced;
+    return _getProfileOverrides('quality-gate');
   } catch {
     return { sensitivity_floor: 'medium', dual_brain_minimum: 'high' };
   }

package/hooks/test-orchestrator.mjs CHANGED Viewed

@@ -10,8 +10,10 @@
 import { execSync, spawnSync } from 'child_process';
 import {
+  appendFileSync,
   existsSync,
   readFileSync,
+  unlinkSync,
   writeFileSync,
 } from 'fs';
 import { dirname, resolve } from 'path';
@@ -310,6 +312,343 @@ test('orchestrator.json: dual_thinking configured', () => {
   return true;
 });
+// ─── Test 15: profile consistency across modules ────────────────────────────
+test('profiles: consistent across modules', () => {
+  const profilesSrc = readFileSync(resolve(__dirname, 'profiles.mjs'), 'utf8');
+  const profileNames = ['auto', 'balanced', 'cost-saver', 'quality-first'];
+  for (const name of profileNames) {
+    if (!profilesSrc.includes(`${name}:`) && !profilesSrc.includes(`'${name}':`)) return `profiles.mjs missing: ${name}`;
+  }
+  const installSrc = readFileSync(resolve(__dirname, '..', 'install.mjs'), 'utf8');
+  for (const name of profileNames) {
+    if (!installSrc.includes(`${name}:`) && !installSrc.includes(`'${name}':`)) return `install.mjs missing profile: ${name}`;
+  }
+  const enforceSrc = readFileSync(resolve(__dirname, 'enforce-tier.mjs'), 'utf8');
+  if (!enforceSrc.includes('auto:')) return 'enforce-tier.mjs missing auto in PROFILE_SETTINGS';
+  return true;
+});
+// ─── Test 16: failure-detector only counts real failures ─────────────────────
+test('failure-detector: ignores followed=false', () => {
+  const src = readFileSync(resolve(__dirname, 'failure-detector.mjs'), 'utf8');
+  if (src.includes('followed === false')) return 'still conflates followed=false with failure';
+  if (!src.includes('success === false')) return 'missing success===false check';
+  return true;
+});
+// ─── Test 17: enforce-tier: malformed stdin ─────────────────────────────────
+test('enforce-tier: malformed stdin', () => {
+  const { parsed, status } = run(ENFORCE_TIER, 'this is not json at all {{{');
+  if (status !== 0) return `non-zero exit: ${status}`;
+  if (!parsed) return 'no valid JSON output';
+  return true;
+});
+// ─── Test 18: enforce-tier: missing tool_input ──────────────────────────────
+test('enforce-tier: missing tool_input', () => {
+  const payload = JSON.stringify({ tool_name: 'Agent' });
+  const { parsed, status } = run(ENFORCE_TIER, payload);
+  if (status !== 0) return `non-zero exit: ${status}`;
+  if (!parsed) return 'no valid JSON output';
+  return true;
+});
+// ─── Test 19: enforce-tier: non-Agent tool passthrough ──────────────────────
+test('enforce-tier: non-Agent tool passthrough', () => {
+  const payload = JSON.stringify({ tool_name: 'Read', tool_input: { file_path: '/foo' } });
+  const { parsed, status } = run(ENFORCE_TIER, payload);
+  if (status !== 0) return `non-zero exit: ${status}`;
+  if (!parsed) return 'no valid JSON output';
+  if (Object.keys(parsed).length !== 0)
+    return `expected {}, got: ${JSON.stringify(parsed)}`;
+  return true;
+});
+// ─── Test 20: cost-logger: malformed stdin ──────────────────────────────────
+test('cost-logger: malformed stdin', () => {
+  const { parsed, status } = runStream(COST_LOGGER, 'not json garbage >>>');
+  if (status !== 0) return `non-zero exit: ${status}`;
+  if (!parsed) return 'no valid JSON output';
+  return true;
+});
+// ─── Test 21: cost-logger: missing fields ───────────────────────────────────
+test('cost-logger: missing fields', () => {
+  let linesBefore = 0;
+  if (existsSync(USAGE_JSONL)) {
+    linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
+  }
+  const { parsed, status } = runStream(COST_LOGGER, '{}');
+  if (status !== 0) return `non-zero exit: ${status}`;
+  if (!parsed) return 'no valid JSON output';
+  if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
+  const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
+  if (lines.length <= linesBefore) return 'no new line was appended to daily usage log';
+  // Clean up the test line
+  try {
+    const kept = lines.slice(0, linesBefore).join('\n');
+    writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
+  } catch {}
+  return true;
+});
+// ─── Test 22: cost-logger: error status recorded ────────────────────────────
+test('cost-logger: error status recorded', () => {
+  let linesBefore = 0;
+  if (existsSync(USAGE_JSONL)) {
+    linesBefore = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean).length;
+  }
+  const payload = JSON.stringify({
+    tool_name: 'Agent',
+    tool_input: { prompt: 'test' },
+    error: 'something failed',
+  });
+  const { parsed, status } = runStream(COST_LOGGER, payload);
+  if (status !== 0) return `non-zero exit: ${status}`;
+  if (!parsed) return 'no valid JSON output';
+  if (!existsSync(USAGE_JSONL)) return 'daily usage log was not created';
+  const lines = readFileSync(USAGE_JSONL, 'utf8').split('\n').filter(Boolean);
+  if (lines.length <= linesBefore) return 'no new line was appended to daily usage log';
+  const lastLine = lines[lines.length - 1];
+  let entry;
+  try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
+  if (entry.status !== 'error') return `expected status "error", got: "${entry.status}"`;
+  // Clean up the test line
+  try {
+    const kept = lines.slice(0, linesBefore).join('\n');
+    writeFileSync(USAGE_JSONL, kept ? kept + '\n' : '', 'utf8');
+  } catch {}
+  return true;
+});
+// ─── Test 23: enforce-tier: cost-saver demotes think ────────────────────────
+test('enforce-tier: cost-saver demotes think', () => {
+  const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
+  let originalProfile;
+  try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
+  try {
+    writeFileSync(profileFile, JSON.stringify({ active: 'cost-saver' }));
+    // "edit the README file" — execute-like text, no think words
+    // cost-saver's demote_think=true demotes think→execute when text lacks think words
+    const payload = JSON.stringify({
+      tool_name: 'Agent',
+      tool_input: { prompt: 'edit the README file', model: 'opus' },
+    });
+    const { parsed, status } = run(ENFORCE_TIER, payload);
+    if (status !== 0) return `non-zero exit: ${status}`;
+    if (!parsed) return 'no valid JSON output';
+    // With demote_think, the tier stays execute, so opus on execute work exits 0 with valid JSON
+    return true;
+  } finally {
+    if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
+    else try { unlinkSync(profileFile); } catch {}
+  }
+});
+// ─── Test 24: enforce-tier: quality-first promotes execute ──────────────────
+test('enforce-tier: quality-first promotes execute', () => {
+  const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
+  let originalProfile;
+  try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
+  try {
+    writeFileSync(profileFile, JSON.stringify({ active: 'quality-first' }));
+    // Think-like description on sonnet model — quality-first's promote_execute=true
+    // promotes to think when text matches think words
+    const payload = JSON.stringify({
+      tool_name: 'Agent',
+      tool_input: { prompt: 'review architecture and plan the migration', model: 'sonnet' },
+    });
+    const { parsed, status } = run(ENFORCE_TIER, payload);
+    if (status !== 0) return `non-zero exit: ${status}`;
+    if (!parsed) return 'no valid JSON output';
+    if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
+    if (!parsed.systemMessage.toLowerCase().includes('think'))
+      return `expected "think" in systemMessage, got: ${parsed.systemMessage}`;
+    return true;
+  } finally {
+    if (originalProfile !== null) writeFileSync(profileFile, originalProfile);
+    else try { unlinkSync(profileFile); } catch {}
+  }
+});
+// ─── Test 25: enforce-tier: auto profile with high-risk file ────────────────
+test('enforce-tier: auto profile with high-risk file', () => {
+  const profileFile = resolve(__dirname, '..', 'dual-brain.profile.json');
+  let originalProfile;
+  try { originalProfile = readFileSync(profileFile, 'utf8'); } catch { originalProfile = null; }
+  try {
+    writeFileSync(profileFile, JSON.stringify({ active: 'auto' }));
+    // Description with auth/credentials path → risk classifier detects critical risk → promote to think
+    const payload = JSON.stringify({
+      tool_name: 'Agent',
+      tool_input: { description: 'update src/auth/credentials.mjs', prompt: 'change the token logic', model: 'sonnet' },
+    });
+    const { parsed, status } = run(ENFORCE_TIER, payload);
+    if (status !== 0) return `non-zero exit: ${status}`;
+    if (!parsed) return 'no valid JSON output';
+    if (!parsed.systemMessage) return `expected systemMessage, got: ${JSON.stringify(parsed)}`;
+    const msg = parsed.systemMessage.toLowerCase();
+    if (!msg.includes('think') && !msg.includes('dual-brain'))
+      return `expected "think" or "dual-brain" in systemMessage, got: ${parsed.systemMessage}`;
+    return true;
+  } finally {
+    // Always restore profile to auto so subsequent tests aren't affected
+    writeFileSync(profileFile, JSON.stringify({ active: 'auto' }));
+  }
+});
+// ─── Test 26: adaptive: recordFailure writes to ledger ─────────────────────
+test('adaptive: recordFailure writes to ledger', () => {
+  const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
+  const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
+  try {
+    const script = `
+      import { recordFailure } from './failure-detector.mjs';
+      recordFailure('testhash123', 'execute', 'test_error');
+    `;
+    const proc = spawnSync(process.execPath, [
+      '--input-type=module',
+      '-e', script,
+    ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
+    if (proc.status !== 0) return `recordFailure script failed: ${proc.stderr}`;
+    if (!existsSync(LEDGER)) return 'ledger file not created';
+    const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
+    const lastLine = lines[lines.length - 1];
+    let entry;
+    try { entry = JSON.parse(lastLine); } catch { return `last line not valid JSON: ${lastLine}`; }
+    if (entry.prompt_hash !== 'testhash123') return `expected prompt_hash=testhash123, got: ${entry.prompt_hash}`;
+    if (entry.success !== false) return `expected success=false, got: ${entry.success}`;
+    return true;
+  } finally {
+    if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
+    else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
+  }
+});
+// ─── Test 27: adaptive: checkFailureLoop detects 2+ failures ───────────────
+test('adaptive: checkFailureLoop detects 2+ failures', () => {
+  const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
+  const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
+  try {
+    const hash = 'looptest_' + Date.now();
+    const now = new Date().toISOString();
+    const failEntry = JSON.stringify({
+      type: 'failure', timestamp: now, prompt_hash: hash,
+      tier: 'execute', reason: 'test', success: false,
+    });
+    const content = (backup || '') + failEntry + '\n' + failEntry + '\n';
+    writeFileSync(LEDGER, content, 'utf8');
+    const script = `
+      import { checkFailureLoop } from './failure-detector.mjs';
+      const result = checkFailureLoop('${hash}');
+      process.stdout.write(JSON.stringify(result));
+    `;
+    const proc = spawnSync(process.execPath, [
+      '--input-type=module',
+      '-e', script,
+    ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
+    if (proc.status !== 0) return `checkFailureLoop script failed: ${proc.stderr}`;
+    let result;
+    try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
+    if (!result.isLoop) return `expected isLoop=true, got: ${JSON.stringify(result)}`;
+    if (result.count < 2) return `expected count>=2, got: ${result.count}`;
+    if (result.suggestion !== 'promote_tier' && result.suggestion !== 'escalate_to_dual_brain')
+      return `unexpected suggestion: ${result.suggestion}`;
+    return true;
+  } finally {
+    if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
+    else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
+  }
+});
+// ─── Test 28: adaptive: checkFailureLoop ignores old failures ──────────────
+test('adaptive: checkFailureLoop ignores old failures', () => {
+  const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
+  const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
+  try {
+    const hash = 'oldtest_' + Date.now();
+    const threeHoursAgo = new Date(Date.now() - 3 * 60 * 60 * 1000).toISOString();
+    const oldEntry = JSON.stringify({
+      type: 'failure', timestamp: threeHoursAgo, prompt_hash: hash,
+      tier: 'execute', reason: 'old_test', success: false,
+    });
+    writeFileSync(LEDGER, oldEntry + '\n' + oldEntry + '\n', 'utf8');
+    const script = `
+      import { checkFailureLoop } from './failure-detector.mjs';
+      const result = checkFailureLoop('${hash}');
+      process.stdout.write(JSON.stringify(result));
+    `;
+    const proc = spawnSync(process.execPath, [
+      '--input-type=module',
+      '-e', script,
+    ], { encoding: 'utf8', timeout: 5000, cwd: HOOKS });
+    if (proc.status !== 0) return `checkFailureLoop script failed: ${proc.stderr}`;
+    let result;
+    try { result = JSON.parse(proc.stdout.trim()); } catch { return `output not JSON: ${proc.stdout}`; }
+    if (result.isLoop) return `expected isLoop=false for old failures, got: ${JSON.stringify(result)}`;
+    return true;
+  } finally {
+    if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
+    else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
+  }
+});
+// ─── Test 29: adaptive: cost-logger records Agent errors ───────────────────
+test('adaptive: cost-logger records Agent errors', () => {
+  const LEDGER = resolve(HOOKS, 'decision-ledger.jsonl');
+  const backup = existsSync(LEDGER) ? readFileSync(LEDGER, 'utf8') : null;
+  try {
+    let linesBefore = 0;
+    if (existsSync(LEDGER)) {
+      linesBefore = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean).length;
+    }
+    const payload = JSON.stringify({
+      tool_name: 'Agent',
+      tool_input: { prompt: 'failing task hash test' },
+      error: 'test failure',
+    });
+    const { status } = runStream(COST_LOGGER, payload);
+    if (status !== 0) return `non-zero exit: ${status}`;
+    if (!existsSync(LEDGER)) return 'ledger file not created';
+    const lines = readFileSync(LEDGER, 'utf8').split('\n').filter(Boolean);
+    if (lines.length <= linesBefore) return 'no new failure entry appended to ledger';
+    const newEntry = lines[lines.length - 1];
+    let entry;
+    try { entry = JSON.parse(newEntry); } catch { return `last line not valid JSON: ${newEntry}`; }
+    if (entry.success !== false) return `expected success=false, got: ${entry.success}`;
+    if (entry.type !== 'failure') return `expected type=failure, got: ${entry.type}`;
+    return true;
+  } finally {
+    if (backup !== null) writeFileSync(LEDGER, backup, 'utf8');
+    else try { writeFileSync(LEDGER, '', 'utf8'); } catch {}
+  }
+});
 // ─── Summary ─────────────────────────────────────────────────────────────────
 const total = passed + failed;
 console.log(`\n${passed}/${total} tests passed`);

package/install.mjs CHANGED Viewed

@@ -424,6 +424,12 @@ function profilePath(workspace) {
 }
 const PROFILES = {
+  auto: {
+    description: 'Adapts routing based on task risk, provider health, and outcomes',
+    routing: { prefer_provider: 'auto', think_threshold: 'adaptive', gpt_dispatch_bias: 0 },
+    budgets: { session_warn_usd: 5, session_limit_usd: 10, daily_warn_usd: 20, daily_limit_usd: 50 },
+    quality_gate: { sensitivity_floor: 'medium', dual_brain_minimum: 'high' },
+  },
   balanced: {
     description: 'Auto-routes by complexity, uses both providers evenly',
     routing: { prefer_provider: 'auto', think_threshold: 'normal', gpt_dispatch_bias: 0 },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "dual-brain",
-  "version": "3.7.0",
+  "version": "3.7.2",
   "description": "Dual-provider orchestration for Claude Code — tiered routing, budget balancing, and GPT dual-brain review across Claude + OpenAI subscriptions",
   "type": "module",
   "bin": {