npm - dual-brain - Versions diffs - 0.2.23 → 0.2.25 - Mend

dual-brain 0.2.23 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/hooks/enforce-tier.mjs CHANGED Viewed

@@ -290,7 +290,19 @@ const THINK_WORDS = /\b(plan|design|architect|review|audit|security|code[-\s]?re
 const WRITE_INTENT_WORDS = /\b(edit|fix|change|update|create|write|modify|implement|refactor|add|remove|delete|build|install|configure|patch|apply|move|rename|migrate|replace|rewrite|generate|scaffold|init(?:ialize)?|setup|deploy|run\s+tests?|commit|push|install|uninstall)\b/i;
 // Dispatch marker prefix stamped by src/dispatch.mjs for all legitimate dispatches.
-const DISPATCH_MARKER_RE = /<!--\s*dual-brain-dispatch:\s*[a-z0-9]+\s*-->/i;
+const DISPATCH_MARKER_RE = /<!--\s*dual-brain-dispatch:[a-z0-9|:.\-]+\s*-->/i;
+function parseDispatchMarker(prompt) {
+  const match = prompt?.match(/<!-- dual-brain-dispatch:([^>]+) -->/);
+  if (!match) return null;
+  const parts = match[1].split('|');
+  const fields = { runId: parts[0] };
+  for (const part of parts.slice(1)) {
+    const [key, val] = part.split(':');
+    if (key && val) fields[key] = val;
+  }
+  return fields;
+}
 /**
  * Determine whether a prompt is purely read-only (no write keywords at all).
@@ -357,6 +369,22 @@ try {
     // Non-blocking governance warning — will be included in final output
   }
+  // ── Over-provisioning check via enriched dispatch marker ───────────────────
+  // If the marker carries governance scores, validate that the model tier isn't
+  // higher than the task actually requires (closes the brainstorm-opus loophole).
+  const markerFields = parseDispatchMarker(rawPrompt);
+  if (markerFields?.req && markerFields?.model) {
+    const reqTier = parseInt(markerFields.req, 10);
+    const modelTier = getGovernanceTier(markerFields.model);
+    if (!isNaN(reqTier) && modelTier > reqTier && reqTier <= 2) {
+      process.stdout.write(JSON.stringify({
+        systemMessage: `[governance] Over-provisioned: task requires tier ${reqTier} but using tier ${modelTier} model (${markerFields.model}). Consider downgrading.`,
+      }));
+      process.exit(0);
+    }
+  }
+  // ── End over-provisioning check ────────────────────────────────────────────
   // Compute prompt hash early for duplicate detection and logging
   const promptHash = computePromptHash(ti);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "dual-brain",
-  "version": "0.2.23",
+  "version": "0.2.25",
   "description": "AI orchestration across Claude + OpenAI subscriptions — smart routing, budget awareness, and dual-brain collaboration",
   "type": "module",
   "bin": {
@@ -47,7 +47,9 @@
     "./envelope": "./src/envelope.mjs",
     "./session-lock": "./src/session-lock.mjs",
     "./governance": "./src/governance.mjs",
-    "./context-intel": "./src/context-intel.mjs"
+    "./context-intel": "./src/context-intel.mjs",
+    "./signal": "./src/signal.mjs",
+    "./routing-advisor": "./src/routing-advisor.mjs"
   },
   "keywords": [
     "claude-code",
@@ -134,6 +136,8 @@
     "src/session-lock.mjs",
     "src/governance.mjs",
     "src/context-intel.mjs",
+    "src/signal.mjs",
+    "src/routing-advisor.mjs",
     "bin/*.mjs",
     "hooks/enforce-tier.mjs",
     "hooks/cost-logger.mjs",

package/src/dispatch.mjs CHANGED Viewed

@@ -18,6 +18,7 @@ import { getFailoverOrder } from './decide.mjs';
 import { getTemplate, renderPrompt, quickRender } from './templates.mjs';
 import { compilePacket, shapeForRole } from './context-intel.mjs';
 import { buildContextPack } from './context.mjs';
+import { scoreTask, computeRequiredTier } from './governance.mjs';
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const USAGE_DIR = join(__dirname, '..', '.dualbrain', 'usage');
@@ -706,8 +707,8 @@ function _renderTemplatedPrompt(prompt, decision, context = {}) {
 // Prepend a marker to every prompt that goes through the official dispatch pipeline.
 // The enforce-tier hook checks for this marker to distinguish legitimate dispatches
 // from raw Agent calls made by the HEAD that bypass the dual-brain pipeline.
-// Format: <!-- dual-brain-dispatch: <runId> -->
-// runId is a short timestamp-based ID that ties back to this dispatch session.
+// Format: <!-- dual-brain-dispatch:<runId>|tier:<tier>|model:<model>|risk:<risk>|req:<requiredTier> -->
+// runId is a short timestamp-based ID; governance fields enable over-provisioning validation.
 let _dispatchRunId = null;
@@ -719,9 +720,14 @@ function _getDispatchRunId() {
   return _dispatchRunId;
 }
-function _prependDispatchMarker(prompt) {
+function _prependDispatchMarker(prompt, decision = {}) {
   const runId = _getDispatchRunId();
-  return `<!-- dual-brain-dispatch: ${runId} -->\n${prompt}`;
+  const tier = decision.tier || 'execute';
+  const model = decision.model || 'sonnet';
+  const risk = decision.risk || 'medium';
+  const requiredTier = decision._requiredTier || '';
+  const marker = `<!-- dual-brain-dispatch:${runId}|tier:${tier}|model:${model}|risk:${risk}|req:${requiredTier} -->`;
+  return `${marker}\n${prompt}`;
 }
 // ─── Related session age label ────────────────────────────────────────────────
@@ -845,7 +851,12 @@ async function dispatch(input = {}) {
   // Stamp the prompt with the dispatch marker so enforce-tier.mjs can recognise
   // that this agent call came through the official pipeline.
-  prompt = _prependDispatchMarker(prompt);
+  // Compute required tier for governance validation
+  try {
+    const scores = scoreTask({ intent: decision.tier, risk: decision.risk, files, objective: prompt.slice(0, 200) });
+    decision = { ...decision, _requiredTier: computeRequiredTier(scores) };
+  } catch { /* non-blocking */ }
+  prompt = _prependDispatchMarker(prompt, decision);
   // ── Situation brief injection ────────────────────────────────────────────────
   // Prepend a compact project-state summary when provided by the pipeline.
@@ -1149,7 +1160,7 @@ async function dispatch(input = {}) {
     }
     // ── End auto-review annotation ────────────────────────────────────────────
-    return {
+    const nativeResult = {
       status:        success ? 'completed' : 'failed',
       type:          'native-agent',
       provider:      currentProvider,
@@ -1166,6 +1177,11 @@ async function dispatch(input = {}) {
       authVerified:  true,
       error: success ? null : errorText.slice(0, 200),
     };
+    try {
+      const { recordDispatchOutcome } = await import('./outcome.mjs');
+      recordDispatchOutcome(input, nativeResult);
+    } catch { /* never block */ }
+    return nativeResult;
   }
   const command = buildCommand(effectiveDecision, prompt, files, cwd);
@@ -1268,7 +1284,7 @@ async function dispatch(input = {}) {
   }
   // ── End auto-review annotation ──────────────────────────────────────────────
-  return {
+  const subResult = {
     status:      success ? 'completed' : 'failed',
     provider:    subProvider,
     model:       subModel,
@@ -1283,6 +1299,11 @@ async function dispatch(input = {}) {
     authVerified: true,
     error: success ? null : errorText.slice(0, 200),
   };
+  try {
+    const { recordDispatchOutcome } = await import('./outcome.mjs');
+    recordDispatchOutcome(input, subResult);
+  } catch { /* never block */ }
+  return subResult;
 }
 // ─── Dual-brain dispatch (parallel) ───────────────────────────────────────────
@@ -1295,7 +1316,12 @@ async function dispatchDualBrain(input = {}) {
   prompt = redact(prompt);
   // Stamp with dispatch marker so enforce-tier.mjs allows this Agent call
-  prompt = _prependDispatchMarker(prompt);
+  // Compute required tier for governance validation
+  try {
+    const scores = scoreTask({ intent: decision.tier, risk: decision.risk, files, objective: prompt.slice(0, 200) });
+    decision = { ...decision, _requiredTier: computeRequiredTier(scores) };
+  } catch { /* non-blocking */ }
+  prompt = _prependDispatchMarker(prompt, decision);
   // ── Situation brief injection ────────────────────────────────────────────────
   const _dualBrainBrief = typeof input.situationBrief === 'string' && input.situationBrief.trim()

package/src/outcome.mjs CHANGED Viewed

@@ -1,6 +1,7 @@
-import { mkdirSync, appendFileSync, readFileSync, existsSync } from 'fs';
+import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync, readdirSync } from 'fs';
 import { join } from 'path';
 import { randomUUID } from 'crypto';
+import { execSync } from 'child_process';
 const STOP_WORDS = new Set([
   'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'from',
@@ -44,6 +45,36 @@ function last7DaysFiles(cwd) {
   return files;
 }
+export function recordDispatchOutcome(dispatchInput, result) {
+  try {
+    const cwd = dispatchInput.cwd ?? process.cwd();
+    const decision = dispatchInput.decision ?? {};
+    ensureDir(cwd);
+    const id = `out_${Date.now().toString(36)}`;
+    const record = {
+      id,
+      timestamp: new Date().toISOString(),
+      prompt: (dispatchInput.prompt ?? '').slice(0, 200),
+      tier: decision.tier ?? result.tier ?? 'execute',
+      model: decision.model ?? result.model ?? 'unknown',
+      provider: decision.provider ?? result.provider ?? 'unknown',
+      success: result.status === 'success' || result.status === 'completed',
+      status: result.status ?? 'unknown',
+      durationMs: result.durationMs ?? 0,
+      filesChanged: result.filesChanged?.length ?? 0,
+      errors: (result.errors ?? (result.error ? [result.error] : [])).slice(0, 3),
+      lesson: '',
+    };
+    const filePath = join(outcomesDir(cwd), `outcome_${id}.json`);
+    writeFileSync(filePath, JSON.stringify(record, null, 2), 'utf8');
+    return record;
+  } catch {
+    return null;
+  }
+}
 export function computeRoutingScore(plan, result, verification) {
   let score = 3;
   if (result.success && result.duration < 60_000) score += 1;
@@ -174,6 +205,77 @@ export async function getRelevantOutcomes(prompt, files = [], cwd, options = {})
   }
 }
+export async function checkFileSurvival(cwd) {
+  try {
+    const dir = join(cwd, '.dualbrain', 'outcomes');
+    if (!existsSync(dir)) return [];
+    // Collect up to the last 20 individual outcome JSON files
+    let files;
+    try {
+      files = readdirSync(dir)
+        .filter(f => f.startsWith('outcome_') && f.endsWith('.json'))
+        .sort()
+        .slice(-20);
+    } catch {
+      return [];
+    }
+    // Get current git-modified files (best-effort)
+    let modifiedFiles = new Set();
+    try {
+      const gitOut = execSync('git diff --name-only', { cwd, stdio: ['ignore', 'pipe', 'pipe'] }).toString();
+      for (const f of gitOut.split('\n').map(l => l.trim()).filter(Boolean)) {
+        modifiedFiles.add(f);
+        modifiedFiles.add(join(cwd, f));
+      }
+    } catch {
+      // git unavailable — proceed without modified-file check
+    }
+    const scored = [];
+    for (const fname of files) {
+      const fpath = join(dir, fname);
+      let record;
+      try {
+        record = JSON.parse(readFileSync(fpath, 'utf8'));
+      } catch {
+        continue;
+      }
+      // Skip if already scored or no filesChanged list
+      if (record.survivalScore !== undefined) continue;
+      const changedFiles = record.result?.filesChanged;
+      if (!Array.isArray(changedFiles) || changedFiles.length === 0) continue;
+      let survived = 0;
+      for (const f of changedFiles) {
+        const absPath = f.startsWith('/') ? f : join(cwd, f);
+        const exists = existsSync(absPath);
+        const modified = modifiedFiles.has(f) || modifiedFiles.has(absPath);
+        if (exists && !modified) survived++;
+      }
+      const survivalScore = survived / changedFiles.length;
+      record.survivalScore = survivalScore;
+      try {
+        writeFileSync(fpath, JSON.stringify(record, null, 2), 'utf8');
+      } catch {
+        // write failed — skip
+        continue;
+      }
+      scored.push({ id: record.id, survivalScore });
+    }
+    return scored;
+  } catch {
+    return [];
+  }
+}
 export async function getOutcomeStats(cwd, days = 7) {
   try {
     const allFiles = last7DaysFiles(cwd).slice(0, days);

package/src/pipeline.mjs CHANGED Viewed

@@ -10,8 +10,10 @@ import { detectTask } from './detect.mjs';
 import { decideRoute, getWorkStyle, WORK_STYLES } from './decide.mjs';
 import { dispatch } from './dispatch.mjs';
 import { loadProfile } from './profile.mjs';
-import { mkdirSync, writeFileSync } from 'node:fs';
+import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
 import { join } from 'node:path';
+import { buildContextPack as buildContextPackIntel } from './context.mjs';
+import { compilePacket } from './context-intel.mjs';
 // Lazy-load collaboration module
 let _collab = null;
@@ -648,6 +650,182 @@ function runGate(run, gateName, gateFn) {
   return result.passed;
 }
+// ─── Pre-dispatch think (Position 1: context intelligence) ───────────────────
+/**
+ * Optionally spawn a cheap think agent to produce a refined work spec before
+ * the real dispatch. Non-blocking on any failure.
+ *
+ * @param {string}   prompt
+ * @param {string[]} files
+ * @param {object}   decision   — from plan._decision
+ * @param {string}   cwd
+ * @param {object}   profile
+ * @param {object}   [opts]
+ * @param {boolean}  [opts._skipPreDispatchThink]  — set true on recursive calls
+ * @param {object}   [opts.log]                    — logging function
+ * @returns {Promise<{ refined: boolean, prompt?, files?, decision? }>}
+ */
+async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}) {
+  const log = opts.log ?? (() => {});
+  // Guard: never recurse
+  if (opts._skipPreDispatchThink) {
+    log('[dual-brain] pre-dispatch think: skipped (recursive call)');
+    return { refined: false };
+  }
+  // Guard: only execute/think tiers
+  const tier = decision?.tier ?? 'execute';
+  if (tier === 'search') {
+    log('[dual-brain] pre-dispatch think: skipped (search tier)');
+    return { refined: false };
+  }
+  // Guard: governance tier >= 2 (map tier names to numeric levels)
+  const TIER_LEVEL = { search: 1, execute: 2, think: 3 };
+  const tierLevel = TIER_LEVEL[tier] ?? 2;
+  if (tierLevel < 2) {
+    log('[dual-brain] pre-dispatch think: skipped (tier < 2)');
+    return { refined: false };
+  }
+  // Guard: decision confidence must be < 0.9
+  const confidence = decision?.confidence ?? 0.5;
+  if (confidence >= 0.9) {
+    log('[dual-brain] pre-dispatch think: skipped (confidence >= 0.9)');
+    return { refined: false };
+  }
+  // Guard: not cost-saver work style
+  try {
+    const style = getWorkStyle(profile);
+    if (style.key === 'cost-saver') {
+      log('[dual-brain] pre-dispatch think: skipped (cost-saver profile)');
+      return { refined: false };
+    }
+  } catch {
+    // profile unavailable — proceed
+  }
+  // Auto-disable if ROI is bad (< 30% hit rate after 10+ observations)
+  {
+    const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
+    let metrics = { hits: 0, misses: 0, totalTokens: 0 };
+    try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
+    if (metrics.hits + metrics.misses >= 10 && metrics.hits / (metrics.hits + metrics.misses) < 0.3) {
+      const verbose = opts.verbose ?? false;
+      if (verbose) process.stderr.write('[dual-brain] pre-dispatch think disabled: hit rate below 30%\n');
+      return { refined: false, reason: 'think ROI too low, auto-disabled' };
+    }
+  }
+  try {
+    log('[dual-brain] pre-dispatch think: refining work spec...');
+    // Build the thinker context pack
+    const pack = await buildContextPackIntel(prompt, files, cwd);
+    // Compile to a thinker-shaped prompt (sonnet, 3000 token budget)
+    const thinkerPrompt = compilePacket(pack, 'thinker', 'sonnet', 3000);
+    // Dispatch to a think agent — use sonnet, tier=think, skip all extras
+    const thinkDecision = {
+      provider: 'claude',
+      model: 'sonnet',
+      tier: 'think',
+      confidence: 1,   // internal call — fully confident
+    };
+    const thinkResult = await dispatch({
+      decision: thinkDecision,
+      prompt: thinkerPrompt,
+      files: [],
+      cwd,
+      dryRun: false,
+      verbose: false,
+      profile,
+      _skipPreDispatchThink: true,
+      _skipRelatedContext: true,
+    });
+    // Parse the think result — expect JSON with { decision, confidence, workSpec }
+    let parsed = null;
+    try {
+      const raw = typeof thinkResult === 'string'
+        ? thinkResult
+        : (thinkResult?.output ?? thinkResult?.result ?? thinkResult?.text ?? JSON.stringify(thinkResult));
+      // Extract JSON from possible prose wrapping
+      const jsonMatch = raw.match(/\{[\s\S]*\}/);
+      if (jsonMatch) {
+        parsed = JSON.parse(jsonMatch[0]);
+      }
+    } catch {
+      // JSON parse failed — proceed unchanged
+    }
+    if (!parsed || typeof parsed.confidence !== 'number' || parsed.confidence <= 0.7) {
+      const reason = !parsed ? 'unparseable response' : `confidence ${parsed.confidence} <= 0.7`;
+      log(`[dual-brain] pre-dispatch think: skipped (${reason})`);
+      _recordThinkMetrics(false, cwd);
+      return { refined: false };
+    }
+    const ws = parsed.workSpec;
+    if (!ws || !ws.objective) {
+      log('[dual-brain] pre-dispatch think: skipped (no workSpec.objective)');
+      _recordThinkMetrics(false, cwd);
+      return { refined: false };
+    }
+    // Apply refinements
+    const newObjective = ws.objective;
+    const newFiles     = [...new Set([...files, ...(ws.files ?? [])])];
+    const newDecision  = ws.criteria?.length
+      ? { ...decision, acceptanceCriteria: [...(decision.acceptanceCriteria ?? []), ...ws.criteria] }
+      : decision;
+    log(`[dual-brain] think refined: "${newObjective.slice(0, 60)}..." (confidence: ${parsed.confidence})`);
+    _recordThinkMetrics(true, cwd);
+    return {
+      refined:    true,
+      prompt:     newObjective,
+      files:      newFiles,
+      decision:   newDecision,
+      confidence: parsed.confidence,
+    };
+  } catch (err) {
+    // Non-blocking on any failure
+    log(`[dual-brain] pre-dispatch think: skipped (error: ${err.message})`);
+    _recordThinkMetrics(false, cwd);
+    return { refined: false };
+  }
+}
+/**
+ * Record a think hit or miss into think-metrics.json (non-blocking).
+ * @param {boolean} hit  — true if the think agent produced a usable refinement
+ * @param {string}  cwd
+ */
+function _recordThinkMetrics(hit, cwd) {
+  try {
+    const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
+    let metrics = { hits: 0, misses: 0, totalTokens: 0 };
+    try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
+    if (hit) {
+      metrics.hits++;
+    } else {
+      metrics.misses++;
+    }
+    metrics.totalTokens += 3000; // budget per think call
+    metrics.lastUpdated = new Date().toISOString();
+    mkdirSync(join(cwd, '.dualbrain'), { recursive: true });
+    writeFileSync(metricsPath, JSON.stringify(metrics, null, 2) + '\n');
+  } catch { /* non-blocking */ }
+}
 // ─── Main entry point ─────────────────────────────────────────────────────────
 /**
@@ -1070,7 +1248,49 @@ export async function runPipeline(trigger, prompt, options = {}) {
       }
     }
-    const decision = { ...run.plan._decision };
+    let decision = { ...run.plan._decision };
+    // ── Pre-dispatch think (Position 1: context intelligence) ────────────────
+    // For tier-2+ non-trivial tasks with decision confidence < 0.9, spawn a
+    // cheap sonnet think agent to produce a refined work spec before the real
+    // dispatch. Non-blocking — if it fails or confidence is low, proceed as-is.
+    {
+      const thinkRefinement = await preDispatchThink(
+        effectivePrompt,
+        files,
+        decision,
+        cwd,
+        run.context?.profile ?? {},
+        { log, _skipPreDispatchThink: options._skipPreDispatchThink }
+      );
+      if (thinkRefinement.refined) {
+        // Mutate locals so both collab and direct paths use the refined inputs
+        // (effectivePrompt is const — store refinement in a mutable local)
+        run._thinkRefinedPrompt  = thinkRefinement.prompt;
+        run._thinkRefinedFiles   = thinkRefinement.files;
+        decision                 = thinkRefinement.decision;
+        // Cascade: if think agent is highly confident and task is simple, downgrade worker model
+        if (thinkRefinement.decision) {
+          const thinkConf = thinkRefinement.confidence || 0;
+          const currentModel = decision.model || 'sonnet';
+          if (thinkConf >= 0.9 && currentModel !== 'haiku') {
+            // High confidence from thinker = clear spec = cheaper model can execute
+            const prevModel = decision.model;
+            decision.model = 'haiku';
+            if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded ${prevModel || 'sonnet'} to haiku\n`);
+          } else if (thinkConf >= 0.75 && currentModel === 'opus') {
+            // Moderate confidence but spec is clear enough for sonnet
+            decision.model = 'sonnet';
+            if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded opus to sonnet\n`);
+          }
+        }
+      }
+    }
+    // Resolve the (possibly refined) prompt and file list for dispatch
+    const dispatchPrompt = run._thinkRefinedPrompt ?? effectivePrompt;
+    const dispatchFiles  = run._thinkRefinedFiles  ?? files;
     // ── HEAD judgment injection into agent prompts ─────────────────────────────
     // HEAD's obligations, noticings, and uncertainties flow to the work agent
@@ -1130,13 +1350,13 @@ export async function runPipeline(trigger, prompt, options = {}) {
       // Inject collaboration context + HEAD judgment into prompt
       const collabContext = collab.buildAgentContext(session, primaryId);
-      const promptParts = [collabContext, headJudgmentBlock, effectivePrompt].filter(Boolean);
+      const promptParts = [collabContext, headJudgmentBlock, dispatchPrompt].filter(Boolean);
       const collabPrompt = promptParts.join('\n\n');
       run.result = await dispatch({
         decision,
         prompt: collabPrompt,
-        files,
+        files: dispatchFiles,
         cwd,
         dryRun: false,
         verbose,
@@ -1192,13 +1412,13 @@ export async function runPipeline(trigger, prompt, options = {}) {
       try { collab.persistEvents(session, cwd); } catch {}
     } else {
       const directPrompt = headJudgmentBlock
-        ? `${headJudgmentBlock}\n\n${effectivePrompt}`
-        : effectivePrompt;
+        ? `${headJudgmentBlock}\n\n${dispatchPrompt}`
+        : dispatchPrompt;
       run.result = await dispatch({
         decision,
         prompt: directPrompt,
-        files,
+        files: dispatchFiles,
         cwd,
         dryRun: false,
         verbose,

package/src/routing-advisor.mjs ADDED Viewed

@@ -0,0 +1,138 @@
+// routing-advisor.mjs — EMA + epsilon-greedy routing advisor
+// Learns which model works best for which task type from outcome signals.
+import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync } from 'node:fs';
+import { join } from 'node:path';
+const ALPHA = 0.3;
+const MIN_EPSILON = 0.1;
+const MIN_OBSERVATIONS = 5;
+const PRIOR_WEIGHT = 5;
+const STATIC_PRIORS = {
+  'search:haiku': 0.85,  'search:sonnet': 0.70,  'search:opus': 0.50,
+  'execute:haiku': 0.55, 'execute:sonnet': 0.80,  'execute:opus': 0.85,
+  'think:haiku': 0.30,   'think:sonnet': 0.70,    'think:opus': 0.90,
+  'review:haiku': 0.40,  'review:sonnet': 0.75,   'review:opus': 0.85,
+};
+const VALID_MODELS = {
+  search:  ['haiku', 'sonnet'],
+  execute: ['haiku', 'sonnet', 'opus'],
+  think:   ['sonnet', 'opus'],
+  review:  ['sonnet', 'opus'],
+};
+function stateFile(cwd) { return join(cwd || process.cwd(), '.dualbrain', 'routing-state.json'); }
+function loadState(cwd) {
+  try {
+    const p = stateFile(cwd);
+    return existsSync(p) ? JSON.parse(readFileSync(p, 'utf8')) : {};
+  } catch { return {}; }
+}
+function saveState(state, cwd) {
+  try {
+    const dir = join(cwd || process.cwd(), '.dualbrain');
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+    const p = stateFile(cwd), tmp = p + '.tmp';
+    writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf8');
+    renameSync(tmp, p);
+  } catch { /* non-throwing */ }
+}
+const staticPrior = (tier, model) => STATIC_PRIORS[`${tier}:${model}`] ?? 0.5;
+const cellObs = (state, key) => Object.values(state[key] ?? {}).reduce((s, m) => s + (m.observations ?? 0), 0);
+const blended = (ema, n, tier, model) =>
+  (n / (n + PRIOR_WEIGHT)) * ema + (PRIOR_WEIGHT / (n + PRIOR_WEIGHT)) * staticPrior(tier, model);
+// taskProfile: { intent, tier, risk, files?, complexity? }
+// Returns: { model, reason, confidence, explored }
+export function adviseModel(taskProfile, cwd) {
+  try {
+    const { tier, intent } = taskProfile ?? {};
+    const validTier = tier && VALID_MODELS[tier] ? tier : 'execute';
+    const cellKey = `${validTier}:${intent ?? 'implement'}`;
+    const models = VALID_MODELS[validTier];
+    const state = loadState(cwd);
+    const totalObs = cellObs(state, cellKey);
+    if (totalObs < MIN_OBSERVATIONS) {
+      // Heuristic: pick highest static prior
+      const best = models.reduce((a, b) => staticPrior(validTier, a) >= staticPrior(validTier, b) ? a : b);
+      return { model: best, reason: 'insufficient data, using heuristic', confidence: 0.3, explored: false };
+    }
+    const epsilon = Math.max(MIN_EPSILON, 0.5 * Math.pow(0.9, totalObs));
+    const explored = Math.random() < epsilon;
+    if (explored) {
+      const model = models[Math.floor(Math.random() * models.length)];
+      return { model, reason: 'exploration', confidence: epsilon, explored: true };
+    }
+    // Exploitation: pick highest blended score
+    const cell = state[cellKey] ?? {};
+    let bestModel = models[0];
+    let bestScore = -Infinity;
+    for (const m of models) {
+      const entry = cell[m];
+      const ema = entry?.ema ?? staticPrior(validTier, m);
+      const n = entry?.observations ?? 0;
+      const score = blended(ema, n, validTier, m);
+      if (score > bestScore) { bestScore = score; bestModel = m; }
+    }
+    return { model: bestModel, reason: 'exploitation', confidence: 1 - epsilon, explored: false };
+  } catch {
+    return { model: 'sonnet', reason: 'error fallback', confidence: 0.1, explored: false };
+  }
+}
+// reward: number in [0, 1]
+export function recordReward(cellKey, model, reward, cwd) {
+  try {
+    const state = loadState(cwd);
+    if (!state[cellKey]) state[cellKey] = {};
+    const entry = state[cellKey][model] ?? { ema: reward, observations: 0 };
+    entry.ema = ALPHA * reward + (1 - ALPHA) * entry.ema;
+    entry.observations = (entry.observations ?? 0) + 1;
+    entry.lastUpdated = new Date().toISOString();
+    entry.lastReward = reward;
+    state[cellKey][model] = entry;
+    saveState(state, cwd);
+  } catch {
+    // non-throwing
+  }
+}
+export function getRoutingStats(cwd) {
+  try {
+    const state = loadState(cwd);
+    const cells = {}, flat = [];
+    let totalObservations = 0;
+    for (const [cellKey, models] of Object.entries(state)) {
+      cells[cellKey] ??= {};
+      for (const [model, entry] of Object.entries(models)) {
+        const obs = entry.observations ?? 0;
+        cells[cellKey][model] = { ema: entry.ema, observations: obs };
+        totalObservations += obs;
+        flat.push({ cell: cellKey, model, ema: entry.ema, observations: obs });
+      }
+    }
+    flat.sort((a, b) => b.ema - a.ema);
+    return { cells, totalObservations, topPerformers: flat.slice(0, 5), worstPerformers: flat.slice(-5).reverse() };
+  } catch {
+    return { cells: {}, totalObservations: 0, topPerformers: [], worstPerformers: [] };
+  }
+}
+export function resetAdvisor(cwd) {
+  try {
+    saveState({}, cwd);
+  } catch {
+    // non-throwing
+  }
+}

package/src/signal.mjs ADDED Viewed

@@ -0,0 +1,114 @@
+// signal.mjs — Compound outcome signal scoring
+// Combines multiple weak signals into one reliable reward score.
+import { existsSync } from 'node:fs';
+import { join } from 'node:path';
+import { execSync } from 'node:child_process';
+export const EXPECTED_DURATION_MS = { search: 15000, execute: 45000, think: 30000, review: 40000 };
+export function scoreDurationRatio(durationMs, tier) {
+  try {
+    const expected = EXPECTED_DURATION_MS[tier] ?? EXPECTED_DURATION_MS.execute;
+    const ratio = durationMs / expected;
+    if (ratio >= 0.5 && ratio <= 1.5) return 1.0;
+    if (ratio < 0.2) return 0.5;
+    if (ratio > 3.0) return 0.3;
+    if (ratio < 0.5) return 0.5 + ((ratio - 0.2) / (0.5 - 0.2)) * 0.5;
+    // ratio 1.5–3.0
+    return 1.0 - ((ratio - 1.5) / (3.0 - 1.5)) * 0.7;
+  } catch {
+    return null;
+  }
+}
+export function measureFileSurvival(outcome, cwd) {
+  try {
+    const files = Array.isArray(outcome.filesChanged)
+      ? outcome.filesChanged
+      : [];
+    if (files.length === 0) return 1.0;
+    let changed;
+    try {
+      changed = new Set(
+        execSync('git diff --name-only', { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] })
+          .split('\n')
+          .map(f => f.trim())
+          .filter(Boolean)
+      );
+    } catch {
+      changed = new Set();
+    }
+    const survived = files.filter(f => {
+      const abs = join(cwd, f);
+      return existsSync(abs) && !changed.has(f);
+    });
+    return survived.length / files.length;
+  } catch {
+    return null;
+  }
+}
+export function scoreOutcome(outcome, context = {}) {
+  try {
+    const tier = outcome.tier ?? 'execute';
+    const signals = [];
+    // Signal 1: exit success (weight 0.3)
+    let exitVal;
+    if (outcome.success === true) exitVal = 1.0;
+    else if (outcome.status === 'partial') exitVal = 0.4;
+    else exitVal = 0.0;
+    signals.push({ name: 'exitSuccess', value: exitVal, weight: 0.3 });
+    // Signal 2: duration ratio (weight 0.25)
+    const durationMs = outcome.durationMs ?? 0;
+    const durVal = durationMs > 0 ? scoreDurationRatio(durationMs, tier) : null;
+    signals.push({ name: 'durationRatio', value: durVal, weight: 0.25 });
+    // Signal 3: token efficiency (weight 0.25)
+    let effVal = null;
+    const filesChanged = outcome.filesChanged ?? 0;
+    const fileCount = typeof filesChanged === 'number' ? filesChanged : filesChanged.length;
+    if (!(fileCount === 0 && tier === 'think')) {
+      const tokensUsed =
+        outcome.tokensUsed?.output ??
+        (durationMs > 0 ? Math.round(durationMs / 100) : null);
+      if (tokensUsed !== null) {
+        const efficiency = fileCount / Math.max(1, tokensUsed / 1000);
+        if (efficiency > 2) effVal = 1.0;
+        else if (efficiency >= 0.5) effVal = 0.5 + ((efficiency - 0.5) / 1.5) * 0.5;
+        else if (efficiency < 0.1) effVal = 0.2;
+        else effVal = 0.2 + ((efficiency - 0.1) / 0.4) * 0.3;
+      }
+    }
+    signals.push({ name: 'tokenEfficiency', value: effVal, weight: 0.25 });
+    // Signal 4: file survival (weight 0.2) — delayed, may be null
+    const survivalVal = context.fileSurvival ?? null;
+    signals.push({ name: 'fileSurvival', value: survivalVal, weight: 0.2 });
+    // Compound score with weight redistribution
+    const active = signals.filter(s => s.value !== null);
+    const totalWeight = active.reduce((sum, s) => sum + s.weight, 0);
+    const reward = totalWeight > 0
+      ? active.reduce((sum, s) => sum + (s.value * s.weight / totalWeight), 0)
+      : 0;
+    const confidence = totalWeight;
+    return {
+      reward: Math.min(1, Math.max(0, reward)),
+      confidence: Math.min(1, confidence),
+      signals: {
+        exitSuccess: exitVal,
+        durationRatio: durVal,
+        tokenEfficiency: effVal,
+        fileSurvival: survivalVal,
+      },
+    };
+  } catch {
+    return { reward: 0, confidence: 0, signals: { exitSuccess: false, durationRatio: null, tokenEfficiency: null, fileSurvival: null } };
+  }
+}