dual-brain 0.2.24 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dual-brain",
3
- "version": "0.2.24",
3
+ "version": "0.2.25",
4
4
  "description": "AI orchestration across Claude + OpenAI subscriptions — smart routing, budget awareness, and dual-brain collaboration",
5
5
  "type": "module",
6
6
  "bin": {
@@ -47,7 +47,9 @@
47
47
  "./envelope": "./src/envelope.mjs",
48
48
  "./session-lock": "./src/session-lock.mjs",
49
49
  "./governance": "./src/governance.mjs",
50
- "./context-intel": "./src/context-intel.mjs"
50
+ "./context-intel": "./src/context-intel.mjs",
51
+ "./signal": "./src/signal.mjs",
52
+ "./routing-advisor": "./src/routing-advisor.mjs"
51
53
  },
52
54
  "keywords": [
53
55
  "claude-code",
@@ -134,6 +136,8 @@
134
136
  "src/session-lock.mjs",
135
137
  "src/governance.mjs",
136
138
  "src/context-intel.mjs",
139
+ "src/signal.mjs",
140
+ "src/routing-advisor.mjs",
137
141
  "bin/*.mjs",
138
142
  "hooks/enforce-tier.mjs",
139
143
  "hooks/cost-logger.mjs",
package/src/outcome.mjs CHANGED
@@ -1,6 +1,7 @@
1
- import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync } from 'fs';
1
+ import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync, readdirSync } from 'fs';
2
2
  import { join } from 'path';
3
3
  import { randomUUID } from 'crypto';
4
+ import { execSync } from 'child_process';
4
5
 
5
6
  const STOP_WORDS = new Set([
6
7
  'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'from',
@@ -204,6 +205,77 @@ export async function getRelevantOutcomes(prompt, files = [], cwd, options = {})
204
205
  }
205
206
  }
206
207
 
208
+ export async function checkFileSurvival(cwd) {
209
+ try {
210
+ const dir = join(cwd, '.dualbrain', 'outcomes');
211
+ if (!existsSync(dir)) return [];
212
+
213
+ // Collect up to the last 20 individual outcome JSON files
214
+ let files;
215
+ try {
216
+ files = readdirSync(dir)
217
+ .filter(f => f.startsWith('outcome_') && f.endsWith('.json'))
218
+ .sort()
219
+ .slice(-20);
220
+ } catch {
221
+ return [];
222
+ }
223
+
224
+ // Get current git-modified files (best-effort)
225
+ let modifiedFiles = new Set();
226
+ try {
227
+ const gitOut = execSync('git diff --name-only', { cwd, stdio: ['ignore', 'pipe', 'pipe'] }).toString();
228
+ for (const f of gitOut.split('\n').map(l => l.trim()).filter(Boolean)) {
229
+ modifiedFiles.add(f);
230
+ modifiedFiles.add(join(cwd, f));
231
+ }
232
+ } catch {
233
+ // git unavailable — proceed without modified-file check
234
+ }
235
+
236
+ const scored = [];
237
+
238
+ for (const fname of files) {
239
+ const fpath = join(dir, fname);
240
+ let record;
241
+ try {
242
+ record = JSON.parse(readFileSync(fpath, 'utf8'));
243
+ } catch {
244
+ continue;
245
+ }
246
+
247
+ // Skip if already scored or no filesChanged list
248
+ if (record.survivalScore !== undefined) continue;
249
+ const changedFiles = record.result?.filesChanged;
250
+ if (!Array.isArray(changedFiles) || changedFiles.length === 0) continue;
251
+
252
+ let survived = 0;
253
+ for (const f of changedFiles) {
254
+ const absPath = f.startsWith('/') ? f : join(cwd, f);
255
+ const exists = existsSync(absPath);
256
+ const modified = modifiedFiles.has(f) || modifiedFiles.has(absPath);
257
+ if (exists && !modified) survived++;
258
+ }
259
+
260
+ const survivalScore = survived / changedFiles.length;
261
+ record.survivalScore = survivalScore;
262
+
263
+ try {
264
+ writeFileSync(fpath, JSON.stringify(record, null, 2), 'utf8');
265
+ } catch {
266
+ // write failed — skip
267
+ continue;
268
+ }
269
+
270
+ scored.push({ id: record.id, survivalScore });
271
+ }
272
+
273
+ return scored;
274
+ } catch {
275
+ return [];
276
+ }
277
+ }
278
+
207
279
  export async function getOutcomeStats(cwd, days = 7) {
208
280
  try {
209
281
  const allFiles = last7DaysFiles(cwd).slice(0, days);
package/src/pipeline.mjs CHANGED
@@ -10,7 +10,7 @@ import { detectTask } from './detect.mjs';
10
10
  import { decideRoute, getWorkStyle, WORK_STYLES } from './decide.mjs';
11
11
  import { dispatch } from './dispatch.mjs';
12
12
  import { loadProfile } from './profile.mjs';
13
- import { mkdirSync, writeFileSync } from 'node:fs';
13
+ import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
14
14
  import { join } from 'node:path';
15
15
  import { buildContextPack as buildContextPackIntel } from './context.mjs';
16
16
  import { compilePacket } from './context-intel.mjs';
@@ -708,6 +708,18 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
708
708
  // profile unavailable — proceed
709
709
  }
710
710
 
711
+ // Auto-disable if ROI is bad (< 30% hit rate after 10+ observations)
712
+ {
713
+ const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
714
+ let metrics = { hits: 0, misses: 0, totalTokens: 0 };
715
+ try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
716
+ if (metrics.hits + metrics.misses >= 10 && metrics.hits / (metrics.hits + metrics.misses) < 0.3) {
717
+ const verbose = opts.verbose ?? false;
718
+ if (verbose) process.stderr.write('[dual-brain] pre-dispatch think disabled: hit rate below 30%\n');
719
+ return { refined: false, reason: 'think ROI too low, auto-disabled' };
720
+ }
721
+ }
722
+
711
723
  try {
712
724
  log('[dual-brain] pre-dispatch think: refining work spec...');
713
725
 
@@ -756,12 +768,14 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
756
768
  if (!parsed || typeof parsed.confidence !== 'number' || parsed.confidence <= 0.7) {
757
769
  const reason = !parsed ? 'unparseable response' : `confidence ${parsed.confidence} <= 0.7`;
758
770
  log(`[dual-brain] pre-dispatch think: skipped (${reason})`);
771
+ _recordThinkMetrics(false, cwd);
759
772
  return { refined: false };
760
773
  }
761
774
 
762
775
  const ws = parsed.workSpec;
763
776
  if (!ws || !ws.objective) {
764
777
  log('[dual-brain] pre-dispatch think: skipped (no workSpec.objective)');
778
+ _recordThinkMetrics(false, cwd);
765
779
  return { refined: false };
766
780
  }
767
781
 
@@ -774,19 +788,44 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
774
788
 
775
789
  log(`[dual-brain] think refined: "${newObjective.slice(0, 60)}..." (confidence: ${parsed.confidence})`);
776
790
 
791
+ _recordThinkMetrics(true, cwd);
777
792
  return {
778
- refined: true,
779
- prompt: newObjective,
780
- files: newFiles,
781
- decision: newDecision,
793
+ refined: true,
794
+ prompt: newObjective,
795
+ files: newFiles,
796
+ decision: newDecision,
797
+ confidence: parsed.confidence,
782
798
  };
783
799
  } catch (err) {
784
800
  // Non-blocking on any failure
785
801
  log(`[dual-brain] pre-dispatch think: skipped (error: ${err.message})`);
802
+ _recordThinkMetrics(false, cwd);
786
803
  return { refined: false };
787
804
  }
788
805
  }
789
806
 
807
+ /**
808
+ * Record a think hit or miss into think-metrics.json (non-blocking).
809
+ * @param {boolean} hit — true if the think agent produced a usable refinement
810
+ * @param {string} cwd
811
+ */
812
+ function _recordThinkMetrics(hit, cwd) {
813
+ try {
814
+ const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
815
+ let metrics = { hits: 0, misses: 0, totalTokens: 0 };
816
+ try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
817
+ if (hit) {
818
+ metrics.hits++;
819
+ } else {
820
+ metrics.misses++;
821
+ }
822
+ metrics.totalTokens += 3000; // budget per think call
823
+ metrics.lastUpdated = new Date().toISOString();
824
+ mkdirSync(join(cwd, '.dualbrain'), { recursive: true });
825
+ writeFileSync(metricsPath, JSON.stringify(metrics, null, 2) + '\n');
826
+ } catch { /* non-blocking */ }
827
+ }
828
+
790
829
  // ─── Main entry point ─────────────────────────────────────────────────────────
791
830
 
792
831
  /**
@@ -1230,6 +1269,22 @@ export async function runPipeline(trigger, prompt, options = {}) {
1230
1269
  run._thinkRefinedPrompt = thinkRefinement.prompt;
1231
1270
  run._thinkRefinedFiles = thinkRefinement.files;
1232
1271
  decision = thinkRefinement.decision;
1272
+
1273
+ // Cascade: if think agent is highly confident and task is simple, downgrade worker model
1274
+ if (thinkRefinement.decision) {
1275
+ const thinkConf = thinkRefinement.confidence || 0;
1276
+ const currentModel = decision.model || 'sonnet';
1277
+ if (thinkConf >= 0.9 && currentModel !== 'haiku') {
1278
+ // High confidence from thinker = clear spec = cheaper model can execute
1279
+ const prevModel = decision.model;
1280
+ decision.model = 'haiku';
1281
+ if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded ${prevModel || 'sonnet'} to haiku\n`);
1282
+ } else if (thinkConf >= 0.75 && currentModel === 'opus') {
1283
+ // Moderate confidence but spec is clear enough for sonnet
1284
+ decision.model = 'sonnet';
1285
+ if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded opus to sonnet\n`);
1286
+ }
1287
+ }
1233
1288
  }
1234
1289
  }
1235
1290
 
@@ -0,0 +1,138 @@
1
+ // routing-advisor.mjs — EMA + epsilon-greedy routing advisor
2
+ // Learns which model works best for which task type from outcome signals.
3
+
4
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+
7
+ const ALPHA = 0.3;
8
+ const MIN_EPSILON = 0.1;
9
+ const MIN_OBSERVATIONS = 5;
10
+ const PRIOR_WEIGHT = 5;
11
+
12
+ const STATIC_PRIORS = {
13
+ 'search:haiku': 0.85, 'search:sonnet': 0.70, 'search:opus': 0.50,
14
+ 'execute:haiku': 0.55, 'execute:sonnet': 0.80, 'execute:opus': 0.85,
15
+ 'think:haiku': 0.30, 'think:sonnet': 0.70, 'think:opus': 0.90,
16
+ 'review:haiku': 0.40, 'review:sonnet': 0.75, 'review:opus': 0.85,
17
+ };
18
+
19
+ const VALID_MODELS = {
20
+ search: ['haiku', 'sonnet'],
21
+ execute: ['haiku', 'sonnet', 'opus'],
22
+ think: ['sonnet', 'opus'],
23
+ review: ['sonnet', 'opus'],
24
+ };
25
+
26
+ function stateFile(cwd) { return join(cwd || process.cwd(), '.dualbrain', 'routing-state.json'); }
27
+
28
+ function loadState(cwd) {
29
+ try {
30
+ const p = stateFile(cwd);
31
+ return existsSync(p) ? JSON.parse(readFileSync(p, 'utf8')) : {};
32
+ } catch { return {}; }
33
+ }
34
+
35
+ function saveState(state, cwd) {
36
+ try {
37
+ const dir = join(cwd || process.cwd(), '.dualbrain');
38
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
39
+ const p = stateFile(cwd), tmp = p + '.tmp';
40
+ writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf8');
41
+ renameSync(tmp, p);
42
+ } catch { /* non-throwing */ }
43
+ }
44
+
45
+ const staticPrior = (tier, model) => STATIC_PRIORS[`${tier}:${model}`] ?? 0.5;
46
+ const cellObs = (state, key) => Object.values(state[key] ?? {}).reduce((s, m) => s + (m.observations ?? 0), 0);
47
+ const blended = (ema, n, tier, model) =>
48
+ (n / (n + PRIOR_WEIGHT)) * ema + (PRIOR_WEIGHT / (n + PRIOR_WEIGHT)) * staticPrior(tier, model);
49
+
50
+ // taskProfile: { intent, tier, risk, files?, complexity? }
51
+ // Returns: { model, reason, confidence, explored }
52
+ export function adviseModel(taskProfile, cwd) {
53
+ try {
54
+ const { tier, intent } = taskProfile ?? {};
55
+ const validTier = tier && VALID_MODELS[tier] ? tier : 'execute';
56
+ const cellKey = `${validTier}:${intent ?? 'implement'}`;
57
+ const models = VALID_MODELS[validTier];
58
+
59
+ const state = loadState(cwd);
60
+ const totalObs = cellObs(state, cellKey);
61
+
62
+ if (totalObs < MIN_OBSERVATIONS) {
63
+ // Heuristic: pick highest static prior
64
+ const best = models.reduce((a, b) => staticPrior(validTier, a) >= staticPrior(validTier, b) ? a : b);
65
+ return { model: best, reason: 'insufficient data, using heuristic', confidence: 0.3, explored: false };
66
+ }
67
+
68
+ const epsilon = Math.max(MIN_EPSILON, 0.5 * Math.pow(0.9, totalObs));
69
+ const explored = Math.random() < epsilon;
70
+
71
+ if (explored) {
72
+ const model = models[Math.floor(Math.random() * models.length)];
73
+ return { model, reason: 'exploration', confidence: epsilon, explored: true };
74
+ }
75
+
76
+ // Exploitation: pick highest blended score
77
+ const cell = state[cellKey] ?? {};
78
+ let bestModel = models[0];
79
+ let bestScore = -Infinity;
80
+ for (const m of models) {
81
+ const entry = cell[m];
82
+ const ema = entry?.ema ?? staticPrior(validTier, m);
83
+ const n = entry?.observations ?? 0;
84
+ const score = blended(ema, n, validTier, m);
85
+ if (score > bestScore) { bestScore = score; bestModel = m; }
86
+ }
87
+
88
+ return { model: bestModel, reason: 'exploitation', confidence: 1 - epsilon, explored: false };
89
+ } catch {
90
+ return { model: 'sonnet', reason: 'error fallback', confidence: 0.1, explored: false };
91
+ }
92
+ }
93
+
94
+ // reward: number in [0, 1]
95
+ export function recordReward(cellKey, model, reward, cwd) {
96
+ try {
97
+ const state = loadState(cwd);
98
+ if (!state[cellKey]) state[cellKey] = {};
99
+ const entry = state[cellKey][model] ?? { ema: reward, observations: 0 };
100
+ entry.ema = ALPHA * reward + (1 - ALPHA) * entry.ema;
101
+ entry.observations = (entry.observations ?? 0) + 1;
102
+ entry.lastUpdated = new Date().toISOString();
103
+ entry.lastReward = reward;
104
+ state[cellKey][model] = entry;
105
+ saveState(state, cwd);
106
+ } catch {
107
+ // non-throwing
108
+ }
109
+ }
110
+
111
+ export function getRoutingStats(cwd) {
112
+ try {
113
+ const state = loadState(cwd);
114
+ const cells = {}, flat = [];
115
+ let totalObservations = 0;
116
+ for (const [cellKey, models] of Object.entries(state)) {
117
+ cells[cellKey] ??= {};
118
+ for (const [model, entry] of Object.entries(models)) {
119
+ const obs = entry.observations ?? 0;
120
+ cells[cellKey][model] = { ema: entry.ema, observations: obs };
121
+ totalObservations += obs;
122
+ flat.push({ cell: cellKey, model, ema: entry.ema, observations: obs });
123
+ }
124
+ }
125
+ flat.sort((a, b) => b.ema - a.ema);
126
+ return { cells, totalObservations, topPerformers: flat.slice(0, 5), worstPerformers: flat.slice(-5).reverse() };
127
+ } catch {
128
+ return { cells: {}, totalObservations: 0, topPerformers: [], worstPerformers: [] };
129
+ }
130
+ }
131
+
132
+ export function resetAdvisor(cwd) {
133
+ try {
134
+ saveState({}, cwd);
135
+ } catch {
136
+ // non-throwing
137
+ }
138
+ }
package/src/signal.mjs ADDED
@@ -0,0 +1,114 @@
1
+ // signal.mjs — Compound outcome signal scoring
2
+ // Combines multiple weak signals into one reliable reward score.
3
+
4
+ import { existsSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+ import { execSync } from 'node:child_process';
7
+
8
+ export const EXPECTED_DURATION_MS = { search: 15000, execute: 45000, think: 30000, review: 40000 };
9
+
10
+ export function scoreDurationRatio(durationMs, tier) {
11
+ try {
12
+ const expected = EXPECTED_DURATION_MS[tier] ?? EXPECTED_DURATION_MS.execute;
13
+ const ratio = durationMs / expected;
14
+ if (ratio >= 0.5 && ratio <= 1.5) return 1.0;
15
+ if (ratio < 0.2) return 0.5;
16
+ if (ratio > 3.0) return 0.3;
17
+ if (ratio < 0.5) return 0.5 + ((ratio - 0.2) / (0.5 - 0.2)) * 0.5;
18
+ // ratio 1.5–3.0
19
+ return 1.0 - ((ratio - 1.5) / (3.0 - 1.5)) * 0.7;
20
+ } catch {
21
+ return null;
22
+ }
23
+ }
24
+
25
+ export function measureFileSurvival(outcome, cwd) {
26
+ try {
27
+ const files = Array.isArray(outcome.filesChanged)
28
+ ? outcome.filesChanged
29
+ : [];
30
+ if (files.length === 0) return 1.0;
31
+
32
+ let changed;
33
+ try {
34
+ changed = new Set(
35
+ execSync('git diff --name-only', { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] })
36
+ .split('\n')
37
+ .map(f => f.trim())
38
+ .filter(Boolean)
39
+ );
40
+ } catch {
41
+ changed = new Set();
42
+ }
43
+
44
+ const survived = files.filter(f => {
45
+ const abs = join(cwd, f);
46
+ return existsSync(abs) && !changed.has(f);
47
+ });
48
+ return survived.length / files.length;
49
+ } catch {
50
+ return null;
51
+ }
52
+ }
53
+
54
+ export function scoreOutcome(outcome, context = {}) {
55
+ try {
56
+ const tier = outcome.tier ?? 'execute';
57
+ const signals = [];
58
+
59
+ // Signal 1: exit success (weight 0.3)
60
+ let exitVal;
61
+ if (outcome.success === true) exitVal = 1.0;
62
+ else if (outcome.status === 'partial') exitVal = 0.4;
63
+ else exitVal = 0.0;
64
+ signals.push({ name: 'exitSuccess', value: exitVal, weight: 0.3 });
65
+
66
+ // Signal 2: duration ratio (weight 0.25)
67
+ const durationMs = outcome.durationMs ?? 0;
68
+ const durVal = durationMs > 0 ? scoreDurationRatio(durationMs, tier) : null;
69
+ signals.push({ name: 'durationRatio', value: durVal, weight: 0.25 });
70
+
71
+ // Signal 3: token efficiency (weight 0.25)
72
+ let effVal = null;
73
+ const filesChanged = outcome.filesChanged ?? 0;
74
+ const fileCount = typeof filesChanged === 'number' ? filesChanged : filesChanged.length;
75
+ if (!(fileCount === 0 && tier === 'think')) {
76
+ const tokensUsed =
77
+ outcome.tokensUsed?.output ??
78
+ (durationMs > 0 ? Math.round(durationMs / 100) : null);
79
+ if (tokensUsed !== null) {
80
+ const efficiency = fileCount / Math.max(1, tokensUsed / 1000);
81
+ if (efficiency > 2) effVal = 1.0;
82
+ else if (efficiency >= 0.5) effVal = 0.5 + ((efficiency - 0.5) / 1.5) * 0.5;
83
+ else if (efficiency < 0.1) effVal = 0.2;
84
+ else effVal = 0.2 + ((efficiency - 0.1) / 0.4) * 0.3;
85
+ }
86
+ }
87
+ signals.push({ name: 'tokenEfficiency', value: effVal, weight: 0.25 });
88
+
89
+ // Signal 4: file survival (weight 0.2) — delayed, may be null
90
+ const survivalVal = context.fileSurvival ?? null;
91
+ signals.push({ name: 'fileSurvival', value: survivalVal, weight: 0.2 });
92
+
93
+ // Compound score with weight redistribution
94
+ const active = signals.filter(s => s.value !== null);
95
+ const totalWeight = active.reduce((sum, s) => sum + s.weight, 0);
96
+ const reward = totalWeight > 0
97
+ ? active.reduce((sum, s) => sum + (s.value * s.weight / totalWeight), 0)
98
+ : 0;
99
+ const confidence = totalWeight;
100
+
101
+ return {
102
+ reward: Math.min(1, Math.max(0, reward)),
103
+ confidence: Math.min(1, confidence),
104
+ signals: {
105
+ exitSuccess: exitVal,
106
+ durationRatio: durVal,
107
+ tokenEfficiency: effVal,
108
+ fileSurvival: survivalVal,
109
+ },
110
+ };
111
+ } catch {
112
+ return { reward: 0, confidence: 0, signals: { exitSuccess: false, durationRatio: null, tokenEfficiency: null, fileSurvival: null } };
113
+ }
114
+ }