npm - dual-brain - Versions diffs - 3.1.0 → 3.2.0 - Mend

dual-brain 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CLAUDE.md +13 -0
package/hooks/budget-balancer.mjs +45 -6
package/hooks/cost-logger.mjs +51 -26
package/hooks/decision-ledger.mjs +299 -0
package/hooks/enforce-tier.mjs +103 -10
package/hooks/gpt-work-dispatcher.mjs +50 -6
package/hooks/profiles.mjs +203 -0
package/hooks/quality-gate.mjs +34 -6
package/hooks/summary-checkpoint.mjs +231 -0
package/install.mjs +367 -9
package/package.json +2 -2
package/hooks/usage-2026-05-14.jsonl +0 -5

package/CLAUDE.md CHANGED Viewed

@@ -32,9 +32,22 @@ Before ending a session with code changes:
 Gate statuses: `pass` (safe to end), `issues_found` (fix first), `needs_human_review` (GPT unavailable).
+## Profiles
+Active profile controls routing posture, budgets, and quality gate behavior.
+Profile persists to `.claude/dual-brain.profile.json` (gitignored).
+- **balanced** (default): Best model per tier, normal budgets, reviews at medium+ risk
+- **cost-saver**: Prefer cheaper models, lower budgets, skip GPT for non-critical
+- **quality-first**: Dual-brain for medium+ risk, higher budgets, stricter reviews
+Switch profiles: `npx dual-brain mode cost-saver`
+Check status: `npx dual-brain status`
 ## Available Tools
 - `node .claude/hooks/cost-report.mjs` — activity and cost estimates
 - `node .claude/hooks/health-check.mjs` — verify system health
 - `node .claude/hooks/budget-balancer.mjs` — provider balance status
+- `node .claude/hooks/decision-ledger.mjs` — routing outcome insights
 - `node .claude/hooks/test-orchestrator.mjs` — run self-tests

package/hooks/budget-balancer.mjs CHANGED Viewed

@@ -48,13 +48,39 @@ const WINDOW_BUDGETS = {
   },
 };
-/** Estimated tokens consumed per call, by tier */
-const TOKENS_PER_CALL = {
+/** Static fallback tokens per call, by tier */
+const TOKENS_PER_CALL_DEFAULT = {
   search:  2_500,
   execute: 5_500,
   think:  11_000,
 };
+/** Load moving averages from summary checkpoint, fall back to static defaults */
+function getTokensPerCall() {
+  try {
+    const today = new Date().toISOString().slice(0, 10);
+    const summaryPath = join(__dirname, `usage-summary-${today}.json`);
+    const summary = JSON.parse(readFileSync(summaryPath, 'utf8'));
+    const avgs = summary.token_averages || {};
+    const result = { ...TOKENS_PER_CALL_DEFAULT };
+    for (const tier of ['search', 'execute', 'think']) {
+      // Check both providers for averages, prefer whichever has data
+      for (const provider of ['claude', 'openai']) {
+        const key = `${provider}:${tier}`;
+        if (avgs[key]?.count >= 5) {
+          result[tier] = Math.round(avgs[key].avg_input + avgs[key].avg_output);
+          break;
+        }
+      }
+    }
+    return result;
+  } catch {
+    return { ...TOKENS_PER_CALL_DEFAULT };
+  }
+}
+const TOKENS_PER_CALL = getTokensPerCall();
 /** Default pressure thresholds (fraction 0–1) */
 const DEFAULT_THRESHOLDS = {
   warm:      0.65,
@@ -286,13 +312,26 @@ function chooseProvider(taskProfile = {}) {
     score -= PRESSURE_PENALTY[tierStatus.state] ?? 0;
     // Latency penalty (OpenAI only — Codex has higher startup overhead)
+    // Uses adaptive threshold from observed Codex startup times when available
     if (provider === "openai") {
-      if (estimatedDurationMs < 180_000) {
-        score -= 25; // < 3 min: overhead not worth it
+      let minTaskMs = 180_000;
+      try {
+        const today = new Date().toISOString().slice(0, 10);
+        const summaryPath = join(__dirname, `usage-summary-${today}.json`);
+        const summary = JSON.parse(readFileSync(summaryPath, 'utf8'));
+        const latencies = (summary.codex_latencies || []).map(l => l.startup_ms).filter(Boolean);
+        if (latencies.length >= 5) {
+          const sorted = latencies.sort((a, b) => a - b);
+          const p75 = sorted[Math.floor(sorted.length * 0.75)];
+          minTaskMs = Math.max(90_000, p75 * 4);
+        }
+      } catch {}
+      if (estimatedDurationMs < minTaskMs) {
+        score -= 25;
       } else if (estimatedDurationMs < 600_000) {
-        score -= 10; // < 10 min: minor penalty
+        score -= 10;
       }
-      // >= 10 min: no penalty
     }
     // Underused bonus

package/hooks/cost-logger.mjs CHANGED Viewed

@@ -12,19 +12,25 @@ import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from "fs";
 import { dirname, join } from "path";
 import { fileURLToPath } from "url";
-// ---------------------------------------------------------------------------
-// Paths
-// ---------------------------------------------------------------------------
 const __dirname = dirname(fileURLToPath(import.meta.url));
+const PROFILE_FILE = join(__dirname, '..', 'dual-brain.profile.json');
 function usageFile(date) {
   const d = date || new Date().toISOString().slice(0, 10);
   return join(__dirname, `usage-${d}.jsonl`);
 }
-// Ensure the hooks dir exists (idempotent, defensive)
 mkdirSync(__dirname, { recursive: true });
+function loadActiveProfile() {
+  try {
+    const data = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
+    return data.active || 'balanced';
+  } catch { return 'balanced'; }
+}
+const SESSION_ID = process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || null;
 // ---------------------------------------------------------------------------
 // Tier classification
 // ---------------------------------------------------------------------------
@@ -135,14 +141,21 @@ function classify(toolName, toolInput = {}, agentModel = null) {
 // Budget alerts
 // ---------------------------------------------------------------------------
-function checkBudget() {
+async function checkBudget() {
   let config;
   try {
     config = JSON.parse(readFileSync(join(__dirname, '..', 'orchestrator.json'), 'utf8'));
   } catch { return null; }
-  const budgets = config.budgets;
+  // Merge profile budget overrides on top of config defaults
+  let budgets = config.budgets;
   if (!budgets) return null;
+  try {
+    const profileData = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
+    if (profileData.custom_overrides?.budgets) {
+      budgets = { ...budgets, ...profileData.custom_overrides.budgets };
+    }
+  } catch {}
   // Rate limit alerts
   const cooldownFile = join(__dirname, '.budget-alerted');
@@ -152,18 +165,24 @@ function checkBudget() {
     if (Date.now() - Date.parse(lastAlert) < cooldownMin * 60 * 1000) return null;
   } catch {}
-  // Calculate today's estimated cost
-  const todayFile = usageFile();
-  let records = [];
+  // Use summary checkpoint for fast budget check (O(1) instead of full scan)
+  let totalCost = 0;
   try {
-    records = readFileSync(todayFile, 'utf8').split('\n').filter(Boolean).map(l => {
-      try { return JSON.parse(l); } catch { return null; }
-    }).filter(Boolean);
-  } catch { return null; }
-  // Simple cost estimate using tier heuristics
-  const RATES = { search: 0.003, execute: 0.012, think: 0.055 };
-  const totalCost = records.reduce((sum, r) => sum + (RATES[r.tier] || RATES.execute), 0);
+    const { readSummary } = await import('./summary-checkpoint.mjs');
+    const summary = readSummary();
+    totalCost = summary.totals.cost_estimate;
+  } catch {
+    // Fallback: scan the log (only if summary unavailable)
+    const todayFile = usageFile();
+    let records = [];
+    try {
+      records = readFileSync(todayFile, 'utf8').split('\n').filter(Boolean).map(l => {
+        try { return JSON.parse(l); } catch { return null; }
+      }).filter(Boolean);
+    } catch { return null; }
+    const RATES = { search: 0.003, execute: 0.012, think: 0.055 };
+    totalCost = records.reduce((sum, r) => sum + (RATES[r.tier] || RATES.execute), 0);
+  }
   let msg = null;
   if (budgets.daily_limit_usd && totalCost >= budgets.daily_limit_usd) {
@@ -215,8 +234,8 @@ async function main() {
   const status = (payload?.error || payload?.tool_response?.error || payload?.is_error) ? 'error' : 'ok';
-  const entry = JSON.stringify({
-    schema_version: 2,
+  const entryObj = {
+    schema_version: 3,
     timestamp: new Date().toISOString(),
     tier,
     tool: toolName,
@@ -224,19 +243,25 @@ async function main() {
     provider: detectProvider(model),
     dispatcher: 'claude-code',
     status,
-    session_id: process.env.CLAUDE_SESSION_ID || null,
+    session_id: SESSION_ID,
+    profile: loadActiveProfile(),
     input_tokens: inputTokens,
     output_tokens: outputTokens,
-  });
+  };
+  const entry = JSON.stringify(entryObj);
   try {
     appendFileSync(usageFile(), entry + "\n", { encoding: "utf8", flag: "a" });
-  } catch {
-    // Disk write failed — silently ignore so the hook never blocks the IDE
-  }
+  } catch {}
+  // Update summary checkpoint (non-blocking, best-effort)
+  try {
+    const { updateSummary } = await import('./summary-checkpoint.mjs');
+    updateSummary(entryObj);
+  } catch {}
-  // Check budget thresholds and emit a systemMessage if over limit
-  const budgetMsg = checkBudget();
+  const budgetMsg = await checkBudget();
   // PostToolUse hooks must emit a JSON object to stdout
   if (budgetMsg) {

package/hooks/decision-ledger.mjs ADDED Viewed

@@ -0,0 +1,299 @@
+#!/usr/bin/env node
+/**
+ * decision-ledger.mjs — Routing outcome tracking for the Dual-Brain Orchestrator.
+ *
+ * Records every routing decision with its context, and later enriches it with
+ * outcome data (duration, success, retries, user overrides, follow-up fixes).
+ *
+ * Over time, this builds a per-repo knowledge base of which provider/model
+ * performs best for which task shapes.
+ *
+ * Exported API:
+ *   recordDecision(decision)     → log a routing decision, returns decision_id
+ *   recordOutcome(id, outcome)   → enrich a decision with its outcome
+ *   getInsights(opts?)           → aggregate patterns from the ledger
+ *
+ * CLI:
+ *   node .claude/hooks/decision-ledger.mjs                # show insights
+ *   node .claude/hooks/decision-ledger.mjs --json         # JSON output
+ *   node .claude/hooks/decision-ledger.mjs --recent 20    # last N decisions
+ */
+import { appendFileSync, existsSync, readFileSync } from 'fs';
+import { dirname, join } from 'path';
+import { fileURLToPath } from 'url';
+import { randomBytes } from 'crypto';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const LEDGER_FILE = join(__dirname, 'decision-ledger.jsonl');
+function genId() {
+  return randomBytes(6).toString('hex');
+}
+function recordDecision(decision = {}) {
+  const id = genId();
+  const entry = JSON.stringify({
+    type: 'decision',
+    id,
+    timestamp: new Date().toISOString(),
+    session_id: decision.session_id || process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || null,
+    profile: decision.profile || 'balanced',
+    // Routing context
+    tier: decision.tier || 'execute',
+    provider: decision.provider || 'claude',
+    model: decision.model || 'unknown',
+    recommended_model: decision.recommended_model || null,
+    followed: decision.followed ?? null,
+    // Task shape
+    task_type: decision.task_type || null,
+    prompt_hash: decision.prompt_hash || null,
+    estimated_duration_ms: decision.estimated_duration_ms || null,
+    file_count: decision.file_count || null,
+    context_coupling: decision.context_coupling || null,
+    isolation: decision.isolation || null,
+    // Provider state at decision time
+    claude_pressure: decision.claude_pressure || null,
+    openai_pressure: decision.openai_pressure || null,
+  });
+  try {
+    appendFileSync(LEDGER_FILE, entry + '\n');
+  } catch {}
+  return id;
+}
+function recordOutcome(decisionId, outcome = {}) {
+  const entry = JSON.stringify({
+    type: 'outcome',
+    decision_id: decisionId,
+    timestamp: new Date().toISOString(),
+    // Timing
+    actual_duration_ms: outcome.actual_duration_ms || null,
+    codex_startup_ms: outcome.codex_startup_ms || null,
+    // Quality signals
+    success: outcome.success ?? null,
+    tests_passed: outcome.tests_passed ?? null,
+    tests_failed: outcome.tests_failed ?? null,
+    retries: outcome.retries || 0,
+    user_override: outcome.user_override ?? false,
+    followup_fix_needed: outcome.followup_fix_needed ?? false,
+    // Cost
+    actual_input_tokens: outcome.actual_input_tokens || null,
+    actual_output_tokens: outcome.actual_output_tokens || null,
+    estimated_cost_usd: outcome.estimated_cost_usd || null,
+    // Files
+    files_changed: outcome.files_changed || null,
+    files_read: outcome.files_read || null,
+  });
+  try {
+    appendFileSync(LEDGER_FILE, entry + '\n');
+  } catch {}
+}
+function loadLedger() {
+  if (!existsSync(LEDGER_FILE)) return { decisions: [], outcomes: [] };
+  let raw;
+  try { raw = readFileSync(LEDGER_FILE, 'utf8'); } catch { return { decisions: [], outcomes: [] }; }
+  const decisions = [];
+  const outcomes = [];
+  for (const line of raw.split('\n').filter(Boolean)) {
+    try {
+      const entry = JSON.parse(line);
+      if (entry.type === 'decision') decisions.push(entry);
+      else if (entry.type === 'outcome') outcomes.push(entry);
+    } catch {}
+  }
+  return { decisions, outcomes };
+}
+function mergeDecisionsWithOutcomes(decisions, outcomes) {
+  const outcomeMap = {};
+  for (const o of outcomes) {
+    outcomeMap[o.decision_id] = o;
+  }
+  return decisions.map(d => ({
+    ...d,
+    outcome: outcomeMap[d.id] || null,
+  }));
+}
+function getInsights(opts = {}) {
+  const { decisions, outcomes } = loadLedger();
+  const merged = mergeDecisionsWithOutcomes(decisions, outcomes);
+  const withOutcomes = merged.filter(d => d.outcome);
+  // Provider win rates
+  const providerStats = {};
+  for (const d of withOutcomes) {
+    const key = d.provider;
+    if (!providerStats[key]) providerStats[key] = { total: 0, success: 0, overrides: 0, followups: 0, totalDuration: 0, counted: 0 };
+    providerStats[key].total++;
+    if (d.outcome.success) providerStats[key].success++;
+    if (d.outcome.user_override) providerStats[key].overrides++;
+    if (d.outcome.followup_fix_needed) providerStats[key].followups++;
+    if (d.outcome.actual_duration_ms) {
+      providerStats[key].totalDuration += d.outcome.actual_duration_ms;
+      providerStats[key].counted++;
+    }
+  }
+  // Tier performance
+  const tierStats = {};
+  for (const d of withOutcomes) {
+    const key = `${d.provider}:${d.tier}`;
+    if (!tierStats[key]) tierStats[key] = { total: 0, success: 0, avgDuration: 0, counted: 0 };
+    tierStats[key].total++;
+    if (d.outcome.success) tierStats[key].success++;
+    if (d.outcome.actual_duration_ms) {
+      tierStats[key].counted++;
+      tierStats[key].avgDuration += (d.outcome.actual_duration_ms - tierStats[key].avgDuration) / tierStats[key].counted;
+    }
+  }
+  // Task type patterns
+  const taskPatterns = {};
+  for (const d of withOutcomes) {
+    if (!d.task_type) continue;
+    const key = d.task_type;
+    if (!taskPatterns[key]) taskPatterns[key] = {};
+    const pk = d.provider;
+    if (!taskPatterns[key][pk]) taskPatterns[key][pk] = { total: 0, success: 0 };
+    taskPatterns[key][pk].total++;
+    if (d.outcome.success) taskPatterns[key][pk].success++;
+  }
+  // Compliance rate
+  const total = decisions.length;
+  const followedCount = decisions.filter(d => d.followed === true).length;
+  const compliance = total > 0 ? Math.round((followedCount / total) * 100) : 0;
+  // Recommendations
+  const recommendations = [];
+  for (const [task, providers] of Object.entries(taskPatterns)) {
+    const sorted = Object.entries(providers)
+      .map(([p, s]) => ({ provider: p, rate: s.total > 0 ? s.success / s.total : 0, total: s.total }))
+      .filter(x => x.total >= 3)
+      .sort((a, b) => b.rate - a.rate);
+    if (sorted.length >= 2 && sorted[0].rate > sorted[1].rate + 0.1) {
+      recommendations.push(`${sorted[0].provider} wins ${task} tasks (${Math.round(sorted[0].rate * 100)}% vs ${Math.round(sorted[1].rate * 100)}%)`);
+    }
+  }
+  return {
+    total_decisions: total,
+    with_outcomes: withOutcomes.length,
+    compliance_rate: compliance,
+    provider_stats: providerStats,
+    tier_stats: tierStats,
+    task_patterns: taskPatterns,
+    recommendations,
+  };
+}
+// ─── CLI ────────────────────────────────────────────────────────────────────
+function printInsights() {
+  const insights = getInsights();
+  if (insights.total_decisions === 0) {
+    console.log('');
+    console.log('  No routing decisions recorded yet.');
+    console.log('  The decision ledger builds over time as you use Claude Code.');
+    console.log('');
+    return;
+  }
+  const W = 52;
+  const pad = (s, len = W - 2) => {
+    s = String(s);
+    return s.length >= len ? s.slice(0, len) : s + ' '.repeat(len - s.length);
+  };
+  const ln = (s) => `║ ${pad(s)} ║`;
+  const br = (l, r) => l + '═'.repeat(W) + r;
+  const sep = () => '╠' + '═'.repeat(W) + '╣';
+  const lines = [];
+  lines.push(br('╔', '╗'));
+  lines.push(ln('Decision Ledger Insights'));
+  lines.push(sep());
+  lines.push(ln(`Total decisions:  ${insights.total_decisions}`));
+  lines.push(ln(`With outcomes:    ${insights.with_outcomes}`));
+  lines.push(ln(`Compliance rate:  ${insights.compliance_rate}%`));
+  lines.push(sep());
+  // Provider stats
+  lines.push(ln('Provider Performance'));
+  for (const [provider, stats] of Object.entries(insights.provider_stats)) {
+    const rate = stats.total > 0 ? Math.round((stats.success / stats.total) * 100) : 0;
+    const avgMs = stats.counted > 0 ? Math.round(stats.totalDuration / stats.counted / 1000) : '?';
+    lines.push(ln(`  ${provider}: ${rate}% success, ${stats.overrides} overrides, avg ${avgMs}s`));
+    if (stats.followups > 0) {
+      lines.push(ln(`    ${stats.followups} follow-up fixes needed`));
+    }
+  }
+  // Recommendations
+  if (insights.recommendations.length > 0) {
+    lines.push(sep());
+    lines.push(ln('Recommendations'));
+    for (const rec of insights.recommendations) {
+      lines.push(ln(`  ${rec}`));
+    }
+  }
+  lines.push(br('╚', '╝'));
+  console.log('');
+  for (const l of lines) console.log(`  ${l}`);
+  console.log('');
+}
+function printRecent(n) {
+  const { decisions, outcomes } = loadLedger();
+  const merged = mergeDecisionsWithOutcomes(decisions, outcomes);
+  const recent = merged.slice(-n);
+  if (recent.length === 0) {
+    console.log('  No decisions recorded yet.');
+    return;
+  }
+  console.log('');
+  for (const d of recent) {
+    const time = d.timestamp?.slice(11, 19) || '??:??:??';
+    const status = d.outcome?.success ? '✓' : d.outcome ? '✗' : '?';
+    const dur = d.outcome?.actual_duration_ms ? `${Math.round(d.outcome.actual_duration_ms / 1000)}s` : '';
+    console.log(`  ${status} ${time} ${d.provider}/${d.model} [${d.tier}] ${dur}`);
+  }
+  console.log('');
+}
+// CLI entry
+if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) {
+  const args = process.argv.slice(2);
+  if (args.includes('--json')) {
+    console.log(JSON.stringify(getInsights(), null, 2));
+  } else if (args.includes('--recent')) {
+    const idx = args.indexOf('--recent');
+    const n = parseInt(args[idx + 1]) || 20;
+    printRecent(n);
+  } else {
+    printInsights();
+  }
+}
+export { recordDecision, recordOutcome, getInsights, loadLedger };