npm - dual-brain - Versions diffs - 0.2.24 → 0.2.25 - Mend

dual-brain 0.2.24 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "dual-brain",
-  "version": "0.2.24",
+  "version": "0.2.25",
   "description": "AI orchestration across Claude + OpenAI subscriptions — smart routing, budget awareness, and dual-brain collaboration",
   "type": "module",
   "bin": {
@@ -47,7 +47,9 @@
     "./envelope": "./src/envelope.mjs",
     "./session-lock": "./src/session-lock.mjs",
     "./governance": "./src/governance.mjs",
-    "./context-intel": "./src/context-intel.mjs"
+    "./context-intel": "./src/context-intel.mjs",
+    "./signal": "./src/signal.mjs",
+    "./routing-advisor": "./src/routing-advisor.mjs"
   },
   "keywords": [
     "claude-code",
@@ -134,6 +136,8 @@
     "src/session-lock.mjs",
     "src/governance.mjs",
     "src/context-intel.mjs",
+    "src/signal.mjs",
+    "src/routing-advisor.mjs",
     "bin/*.mjs",
     "hooks/enforce-tier.mjs",
     "hooks/cost-logger.mjs",

package/src/outcome.mjs CHANGED Viewed

@@ -1,6 +1,7 @@
-import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync } from 'fs';
+import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync, readdirSync } from 'fs';
 import { join } from 'path';
 import { randomUUID } from 'crypto';
+import { execSync } from 'child_process';
 const STOP_WORDS = new Set([
   'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'from',
@@ -204,6 +205,77 @@ export async function getRelevantOutcomes(prompt, files = [], cwd, options = {})
   }
 }
+export async function checkFileSurvival(cwd) {
+  try {
+    const dir = join(cwd, '.dualbrain', 'outcomes');
+    if (!existsSync(dir)) return [];
+    // Collect up to the last 20 individual outcome JSON files
+    let files;
+    try {
+      files = readdirSync(dir)
+        .filter(f => f.startsWith('outcome_') && f.endsWith('.json'))
+        .sort()
+        .slice(-20);
+    } catch {
+      return [];
+    }
+    // Get current git-modified files (best-effort)
+    let modifiedFiles = new Set();
+    try {
+      const gitOut = execSync('git diff --name-only', { cwd, stdio: ['ignore', 'pipe', 'pipe'] }).toString();
+      for (const f of gitOut.split('\n').map(l => l.trim()).filter(Boolean)) {
+        modifiedFiles.add(f);
+        modifiedFiles.add(join(cwd, f));
+      }
+    } catch {
+      // git unavailable — proceed without modified-file check
+    }
+    const scored = [];
+    for (const fname of files) {
+      const fpath = join(dir, fname);
+      let record;
+      try {
+        record = JSON.parse(readFileSync(fpath, 'utf8'));
+      } catch {
+        continue;
+      }
+      // Skip if already scored or no filesChanged list
+      if (record.survivalScore !== undefined) continue;
+      const changedFiles = record.result?.filesChanged;
+      if (!Array.isArray(changedFiles) || changedFiles.length === 0) continue;
+      let survived = 0;
+      for (const f of changedFiles) {
+        const absPath = f.startsWith('/') ? f : join(cwd, f);
+        const exists = existsSync(absPath);
+        const modified = modifiedFiles.has(f) || modifiedFiles.has(absPath);
+        if (exists && !modified) survived++;
+      }
+      const survivalScore = survived / changedFiles.length;
+      record.survivalScore = survivalScore;
+      try {
+        writeFileSync(fpath, JSON.stringify(record, null, 2), 'utf8');
+      } catch {
+        // write failed — skip
+        continue;
+      }
+      scored.push({ id: record.id, survivalScore });
+    }
+    return scored;
+  } catch {
+    return [];
+  }
+}
 export async function getOutcomeStats(cwd, days = 7) {
   try {
     const allFiles = last7DaysFiles(cwd).slice(0, days);

package/src/pipeline.mjs CHANGED Viewed

@@ -10,7 +10,7 @@ import { detectTask } from './detect.mjs';
 import { decideRoute, getWorkStyle, WORK_STYLES } from './decide.mjs';
 import { dispatch } from './dispatch.mjs';
 import { loadProfile } from './profile.mjs';
-import { mkdirSync, writeFileSync } from 'node:fs';
+import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { buildContextPack as buildContextPackIntel } from './context.mjs';
 import { compilePacket } from './context-intel.mjs';
@@ -708,6 +708,18 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
     // profile unavailable — proceed
   }
+  // Auto-disable if ROI is bad (< 30% hit rate after 10+ observations)
+  {
+    const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
+    let metrics = { hits: 0, misses: 0, totalTokens: 0 };
+    try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
+    if (metrics.hits + metrics.misses >= 10 && metrics.hits / (metrics.hits + metrics.misses) < 0.3) {
+      const verbose = opts.verbose ?? false;
+      if (verbose) process.stderr.write('[dual-brain] pre-dispatch think disabled: hit rate below 30%\n');
+      return { refined: false, reason: 'think ROI too low, auto-disabled' };
+    }
+  }
   try {
     log('[dual-brain] pre-dispatch think: refining work spec...');
@@ -756,12 +768,14 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
     if (!parsed || typeof parsed.confidence !== 'number' || parsed.confidence <= 0.7) {
       const reason = !parsed ? 'unparseable response' : `confidence ${parsed.confidence} <= 0.7`;
       log(`[dual-brain] pre-dispatch think: skipped (${reason})`);
+      _recordThinkMetrics(false, cwd);
       return { refined: false };
     }
     const ws = parsed.workSpec;
     if (!ws || !ws.objective) {
       log('[dual-brain] pre-dispatch think: skipped (no workSpec.objective)');
+      _recordThinkMetrics(false, cwd);
       return { refined: false };
     }
@@ -774,19 +788,44 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
     log(`[dual-brain] think refined: "${newObjective.slice(0, 60)}..." (confidence: ${parsed.confidence})`);
+    _recordThinkMetrics(true, cwd);
     return {
-      refined:  true,
-      prompt:   newObjective,
-      files:    newFiles,
-      decision: newDecision,
+      refined:    true,
+      prompt:     newObjective,
+      files:      newFiles,
+      decision:   newDecision,
+      confidence: parsed.confidence,
     };
   } catch (err) {
     // Non-blocking on any failure
     log(`[dual-brain] pre-dispatch think: skipped (error: ${err.message})`);
+    _recordThinkMetrics(false, cwd);
     return { refined: false };
   }
 }
+/**
+ * Record a think hit or miss into think-metrics.json (non-blocking).
+ * @param {boolean} hit  — true if the think agent produced a usable refinement
+ * @param {string}  cwd
+ */
+function _recordThinkMetrics(hit, cwd) {
+  try {
+    const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
+    let metrics = { hits: 0, misses: 0, totalTokens: 0 };
+    try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
+    if (hit) {
+      metrics.hits++;
+    } else {
+      metrics.misses++;
+    }
+    metrics.totalTokens += 3000; // budget per think call
+    metrics.lastUpdated = new Date().toISOString();
+    mkdirSync(join(cwd, '.dualbrain'), { recursive: true });
+    writeFileSync(metricsPath, JSON.stringify(metrics, null, 2) + '\n');
+  } catch { /* non-blocking */ }
+}
 // ─── Main entry point ─────────────────────────────────────────────────────────
 /**
@@ -1230,6 +1269,22 @@ export async function runPipeline(trigger, prompt, options = {}) {
         run._thinkRefinedPrompt  = thinkRefinement.prompt;
         run._thinkRefinedFiles   = thinkRefinement.files;
         decision                 = thinkRefinement.decision;
+        // Cascade: if think agent is highly confident and task is simple, downgrade worker model
+        if (thinkRefinement.decision) {
+          const thinkConf = thinkRefinement.confidence || 0;
+          const currentModel = decision.model || 'sonnet';
+          if (thinkConf >= 0.9 && currentModel !== 'haiku') {
+            // High confidence from thinker = clear spec = cheaper model can execute
+            const prevModel = decision.model;
+            decision.model = 'haiku';
+            if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded ${prevModel || 'sonnet'} to haiku\n`);
+          } else if (thinkConf >= 0.75 && currentModel === 'opus') {
+            // Moderate confidence but spec is clear enough for sonnet
+            decision.model = 'sonnet';
+            if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded opus to sonnet\n`);
+          }
+        }
       }
     }

package/src/routing-advisor.mjs ADDED Viewed

@@ -0,0 +1,138 @@
+// routing-advisor.mjs — EMA + epsilon-greedy routing advisor
+// Learns which model works best for which task type from outcome signals.
+import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync } from 'node:fs';
+import { join } from 'node:path';
+const ALPHA = 0.3;
+const MIN_EPSILON = 0.1;
+const MIN_OBSERVATIONS = 5;
+const PRIOR_WEIGHT = 5;
+const STATIC_PRIORS = {
+  'search:haiku': 0.85,  'search:sonnet': 0.70,  'search:opus': 0.50,
+  'execute:haiku': 0.55, 'execute:sonnet': 0.80,  'execute:opus': 0.85,
+  'think:haiku': 0.30,   'think:sonnet': 0.70,    'think:opus': 0.90,
+  'review:haiku': 0.40,  'review:sonnet': 0.75,   'review:opus': 0.85,
+};
+const VALID_MODELS = {
+  search:  ['haiku', 'sonnet'],
+  execute: ['haiku', 'sonnet', 'opus'],
+  think:   ['sonnet', 'opus'],
+  review:  ['sonnet', 'opus'],
+};
+function stateFile(cwd) { return join(cwd || process.cwd(), '.dualbrain', 'routing-state.json'); }
+function loadState(cwd) {
+  try {
+    const p = stateFile(cwd);
+    return existsSync(p) ? JSON.parse(readFileSync(p, 'utf8')) : {};
+  } catch { return {}; }
+}
+function saveState(state, cwd) {
+  try {
+    const dir = join(cwd || process.cwd(), '.dualbrain');
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+    const p = stateFile(cwd), tmp = p + '.tmp';
+    writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf8');
+    renameSync(tmp, p);
+  } catch { /* non-throwing */ }
+}
+const staticPrior = (tier, model) => STATIC_PRIORS[`${tier}:${model}`] ?? 0.5;
+const cellObs = (state, key) => Object.values(state[key] ?? {}).reduce((s, m) => s + (m.observations ?? 0), 0);
+const blended = (ema, n, tier, model) =>
+  (n / (n + PRIOR_WEIGHT)) * ema + (PRIOR_WEIGHT / (n + PRIOR_WEIGHT)) * staticPrior(tier, model);
+// taskProfile: { intent, tier, risk, files?, complexity? }
+// Returns: { model, reason, confidence, explored }
+export function adviseModel(taskProfile, cwd) {
+  try {
+    const { tier, intent } = taskProfile ?? {};
+    const validTier = tier && VALID_MODELS[tier] ? tier : 'execute';
+    const cellKey = `${validTier}:${intent ?? 'implement'}`;
+    const models = VALID_MODELS[validTier];
+    const state = loadState(cwd);
+    const totalObs = cellObs(state, cellKey);
+    if (totalObs < MIN_OBSERVATIONS) {
+      // Heuristic: pick highest static prior
+      const best = models.reduce((a, b) => staticPrior(validTier, a) >= staticPrior(validTier, b) ? a : b);
+      return { model: best, reason: 'insufficient data, using heuristic', confidence: 0.3, explored: false };
+    }
+    const epsilon = Math.max(MIN_EPSILON, 0.5 * Math.pow(0.9, totalObs));
+    const explored = Math.random() < epsilon;
+    if (explored) {
+      const model = models[Math.floor(Math.random() * models.length)];
+      return { model, reason: 'exploration', confidence: epsilon, explored: true };
+    }
+    // Exploitation: pick highest blended score
+    const cell = state[cellKey] ?? {};
+    let bestModel = models[0];
+    let bestScore = -Infinity;
+    for (const m of models) {
+      const entry = cell[m];
+      const ema = entry?.ema ?? staticPrior(validTier, m);
+      const n = entry?.observations ?? 0;
+      const score = blended(ema, n, validTier, m);
+      if (score > bestScore) { bestScore = score; bestModel = m; }
+    }
+    return { model: bestModel, reason: 'exploitation', confidence: 1 - epsilon, explored: false };
+  } catch {
+    return { model: 'sonnet', reason: 'error fallback', confidence: 0.1, explored: false };
+  }
+}
+// reward: number in [0, 1]
+export function recordReward(cellKey, model, reward, cwd) {
+  try {
+    const state = loadState(cwd);
+    if (!state[cellKey]) state[cellKey] = {};
+    const entry = state[cellKey][model] ?? { ema: reward, observations: 0 };
+    entry.ema = ALPHA * reward + (1 - ALPHA) * entry.ema;
+    entry.observations = (entry.observations ?? 0) + 1;
+    entry.lastUpdated = new Date().toISOString();
+    entry.lastReward = reward;
+    state[cellKey][model] = entry;
+    saveState(state, cwd);
+  } catch {
+    // non-throwing
+  }
+}
+export function getRoutingStats(cwd) {
+  try {
+    const state = loadState(cwd);
+    const cells = {}, flat = [];
+    let totalObservations = 0;
+    for (const [cellKey, models] of Object.entries(state)) {
+      cells[cellKey] ??= {};
+      for (const [model, entry] of Object.entries(models)) {
+        const obs = entry.observations ?? 0;
+        cells[cellKey][model] = { ema: entry.ema, observations: obs };
+        totalObservations += obs;
+        flat.push({ cell: cellKey, model, ema: entry.ema, observations: obs });
+      }
+    }
+    flat.sort((a, b) => b.ema - a.ema);
+    return { cells, totalObservations, topPerformers: flat.slice(0, 5), worstPerformers: flat.slice(-5).reverse() };
+  } catch {
+    return { cells: {}, totalObservations: 0, topPerformers: [], worstPerformers: [] };
+  }
+}
+export function resetAdvisor(cwd) {
+  try {
+    saveState({}, cwd);
+  } catch {
+    // non-throwing
+  }
+}

package/src/signal.mjs ADDED Viewed

@@ -0,0 +1,114 @@
+// signal.mjs — Compound outcome signal scoring
+// Combines multiple weak signals into one reliable reward score.
+import { existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { execSync } from 'node:child_process';
+export const EXPECTED_DURATION_MS = { search: 15000, execute: 45000, think: 30000, review: 40000 };
+export function scoreDurationRatio(durationMs, tier) {
+  try {
+    const expected = EXPECTED_DURATION_MS[tier] ?? EXPECTED_DURATION_MS.execute;
+    const ratio = durationMs / expected;
+    if (ratio >= 0.5 && ratio <= 1.5) return 1.0;
+    if (ratio < 0.2) return 0.5;
+    if (ratio > 3.0) return 0.3;
+    if (ratio < 0.5) return 0.5 + ((ratio - 0.2) / (0.5 - 0.2)) * 0.5;
+    // ratio 1.5–3.0
+    return 1.0 - ((ratio - 1.5) / (3.0 - 1.5)) * 0.7;
+  } catch {
+    return null;
+  }
+}
+export function measureFileSurvival(outcome, cwd) {
+  try {
+    const files = Array.isArray(outcome.filesChanged)
+      ? outcome.filesChanged
+      : [];
+    if (files.length === 0) return 1.0;
+    let changed;
+    try {
+      changed = new Set(
+        execSync('git diff --name-only', { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] })
+          .split('\n')
+          .map(f => f.trim())
+          .filter(Boolean)
+      );
+    } catch {
+      changed = new Set();
+    }
+    const survived = files.filter(f => {
+      const abs = join(cwd, f);
+      return existsSync(abs) && !changed.has(f);
+    });
+    return survived.length / files.length;
+  } catch {
+    return null;
+  }
+}
+export function scoreOutcome(outcome, context = {}) {
+  try {
+    const tier = outcome.tier ?? 'execute';
+    const signals = [];
+    // Signal 1: exit success (weight 0.3)
+    let exitVal;
+    if (outcome.success === true) exitVal = 1.0;
+    else if (outcome.status === 'partial') exitVal = 0.4;
+    else exitVal = 0.0;
+    signals.push({ name: 'exitSuccess', value: exitVal, weight: 0.3 });
+    // Signal 2: duration ratio (weight 0.25)
+    const durationMs = outcome.durationMs ?? 0;
+    const durVal = durationMs > 0 ? scoreDurationRatio(durationMs, tier) : null;
+    signals.push({ name: 'durationRatio', value: durVal, weight: 0.25 });
+    // Signal 3: token efficiency (weight 0.25)
+    let effVal = null;
+    const filesChanged = outcome.filesChanged ?? 0;
+    const fileCount = typeof filesChanged === 'number' ? filesChanged : filesChanged.length;
+    if (!(fileCount === 0 && tier === 'think')) {
+      const tokensUsed =
+        outcome.tokensUsed?.output ??
+        (durationMs > 0 ? Math.round(durationMs / 100) : null);
+      if (tokensUsed !== null) {
+        const efficiency = fileCount / Math.max(1, tokensUsed / 1000);
+        if (efficiency > 2) effVal = 1.0;
+        else if (efficiency >= 0.5) effVal = 0.5 + ((efficiency - 0.5) / 1.5) * 0.5;
+        else if (efficiency < 0.1) effVal = 0.2;
+        else effVal = 0.2 + ((efficiency - 0.1) / 0.4) * 0.3;
+      }
+    }
+    signals.push({ name: 'tokenEfficiency', value: effVal, weight: 0.25 });
+    // Signal 4: file survival (weight 0.2) — delayed, may be null
+    const survivalVal = context.fileSurvival ?? null;
+    signals.push({ name: 'fileSurvival', value: survivalVal, weight: 0.2 });
+    // Compound score with weight redistribution
+    const active = signals.filter(s => s.value !== null);
+    const totalWeight = active.reduce((sum, s) => sum + s.weight, 0);
+    const reward = totalWeight > 0
+      ? active.reduce((sum, s) => sum + (s.value * s.weight / totalWeight), 0)
+      : 0;
+    const confidence = totalWeight;
+    return {
+      reward: Math.min(1, Math.max(0, reward)),
+      confidence: Math.min(1, confidence),
+      signals: {
+        exitSuccess: exitVal,
+        durationRatio: durVal,
+        tokenEfficiency: effVal,
+        fileSurvival: survivalVal,
+      },
+    };
+  } catch {
+    return { reward: 0, confidence: 0, signals: { exitSuccess: false, durationRatio: null, tokenEfficiency: null, fileSurvival: null } };
+  }
+}