dual-brain 0.2.23 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -290,7 +290,19 @@ const THINK_WORDS = /\b(plan|design|architect|review|audit|security|code[-\s]?re
290
290
  const WRITE_INTENT_WORDS = /\b(edit|fix|change|update|create|write|modify|implement|refactor|add|remove|delete|build|install|configure|patch|apply|move|rename|migrate|replace|rewrite|generate|scaffold|init(?:ialize)?|setup|deploy|run\s+tests?|commit|push|install|uninstall)\b/i;
291
291
 
292
292
  // Dispatch marker prefix stamped by src/dispatch.mjs for all legitimate dispatches.
293
- const DISPATCH_MARKER_RE = /<!--\s*dual-brain-dispatch:\s*[a-z0-9]+\s*-->/i;
293
+ const DISPATCH_MARKER_RE = /<!--\s*dual-brain-dispatch:[a-z0-9|:.\-]+\s*-->/i;
294
+
295
+ function parseDispatchMarker(prompt) {
296
+ const match = prompt?.match(/<!-- dual-brain-dispatch:([^>]+) -->/);
297
+ if (!match) return null;
298
+ const parts = match[1].split('|');
299
+ const fields = { runId: parts[0] };
300
+ for (const part of parts.slice(1)) {
301
+ const [key, val] = part.split(':');
302
+ if (key && val) fields[key] = val;
303
+ }
304
+ return fields;
305
+ }
294
306
 
295
307
  /**
296
308
  * Determine whether a prompt is purely read-only (no write keywords at all).
@@ -357,6 +369,22 @@ try {
357
369
  // Non-blocking governance warning — will be included in final output
358
370
  }
359
371
 
372
+ // ── Over-provisioning check via enriched dispatch marker ───────────────────
373
+ // If the marker carries governance scores, validate that the model tier isn't
374
+ // higher than the task actually requires (closes the brainstorm-opus loophole).
375
+ const markerFields = parseDispatchMarker(rawPrompt);
376
+ if (markerFields?.req && markerFields?.model) {
377
+ const reqTier = parseInt(markerFields.req, 10);
378
+ const modelTier = getGovernanceTier(markerFields.model);
379
+ if (!isNaN(reqTier) && modelTier > reqTier && reqTier <= 2) {
380
+ process.stdout.write(JSON.stringify({
381
+ systemMessage: `[governance] Over-provisioned: task requires tier ${reqTier} but using tier ${modelTier} model (${markerFields.model}). Consider downgrading.`,
382
+ }));
383
+ process.exit(0);
384
+ }
385
+ }
386
+ // ── End over-provisioning check ────────────────────────────────────────────
387
+
360
388
  // Compute prompt hash early for duplicate detection and logging
361
389
  const promptHash = computePromptHash(ti);
362
390
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dual-brain",
3
- "version": "0.2.23",
3
+ "version": "0.2.25",
4
4
  "description": "AI orchestration across Claude + OpenAI subscriptions — smart routing, budget awareness, and dual-brain collaboration",
5
5
  "type": "module",
6
6
  "bin": {
@@ -47,7 +47,9 @@
47
47
  "./envelope": "./src/envelope.mjs",
48
48
  "./session-lock": "./src/session-lock.mjs",
49
49
  "./governance": "./src/governance.mjs",
50
- "./context-intel": "./src/context-intel.mjs"
50
+ "./context-intel": "./src/context-intel.mjs",
51
+ "./signal": "./src/signal.mjs",
52
+ "./routing-advisor": "./src/routing-advisor.mjs"
51
53
  },
52
54
  "keywords": [
53
55
  "claude-code",
@@ -134,6 +136,8 @@
134
136
  "src/session-lock.mjs",
135
137
  "src/governance.mjs",
136
138
  "src/context-intel.mjs",
139
+ "src/signal.mjs",
140
+ "src/routing-advisor.mjs",
137
141
  "bin/*.mjs",
138
142
  "hooks/enforce-tier.mjs",
139
143
  "hooks/cost-logger.mjs",
package/src/dispatch.mjs CHANGED
@@ -18,6 +18,7 @@ import { getFailoverOrder } from './decide.mjs';
18
18
  import { getTemplate, renderPrompt, quickRender } from './templates.mjs';
19
19
  import { compilePacket, shapeForRole } from './context-intel.mjs';
20
20
  import { buildContextPack } from './context.mjs';
21
+ import { scoreTask, computeRequiredTier } from './governance.mjs';
21
22
 
22
23
  const __dirname = dirname(fileURLToPath(import.meta.url));
23
24
  const USAGE_DIR = join(__dirname, '..', '.dualbrain', 'usage');
@@ -706,8 +707,8 @@ function _renderTemplatedPrompt(prompt, decision, context = {}) {
706
707
  // Prepend a marker to every prompt that goes through the official dispatch pipeline.
707
708
  // The enforce-tier hook checks for this marker to distinguish legitimate dispatches
708
709
  // from raw Agent calls made by the HEAD that bypass the dual-brain pipeline.
709
- // Format: <!-- dual-brain-dispatch: <runId> -->
710
- // runId is a short timestamp-based ID that ties back to this dispatch session.
710
+ // Format: <!-- dual-brain-dispatch:<runId>|tier:<tier>|model:<model>|risk:<risk>|req:<requiredTier> -->
711
+ // runId is a short timestamp-based ID; governance fields enable over-provisioning validation.
711
712
 
712
713
  let _dispatchRunId = null;
713
714
 
@@ -719,9 +720,14 @@ function _getDispatchRunId() {
719
720
  return _dispatchRunId;
720
721
  }
721
722
 
722
- function _prependDispatchMarker(prompt) {
723
+ function _prependDispatchMarker(prompt, decision = {}) {
723
724
  const runId = _getDispatchRunId();
724
- return `<!-- dual-brain-dispatch: ${runId} -->\n${prompt}`;
725
+ const tier = decision.tier || 'execute';
726
+ const model = decision.model || 'sonnet';
727
+ const risk = decision.risk || 'medium';
728
+ const requiredTier = decision._requiredTier || '';
729
+ const marker = `<!-- dual-brain-dispatch:${runId}|tier:${tier}|model:${model}|risk:${risk}|req:${requiredTier} -->`;
730
+ return `${marker}\n${prompt}`;
725
731
  }
726
732
 
727
733
  // ─── Related session age label ────────────────────────────────────────────────
@@ -845,7 +851,12 @@ async function dispatch(input = {}) {
845
851
 
846
852
  // Stamp the prompt with the dispatch marker so enforce-tier.mjs can recognise
847
853
  // that this agent call came through the official pipeline.
848
- prompt = _prependDispatchMarker(prompt);
854
+ // Compute required tier for governance validation
855
+ try {
856
+ const scores = scoreTask({ intent: decision.tier, risk: decision.risk, files, objective: prompt.slice(0, 200) });
857
+ decision = { ...decision, _requiredTier: computeRequiredTier(scores) };
858
+ } catch { /* non-blocking */ }
859
+ prompt = _prependDispatchMarker(prompt, decision);
849
860
 
850
861
  // ── Situation brief injection ────────────────────────────────────────────────
851
862
  // Prepend a compact project-state summary when provided by the pipeline.
@@ -1149,7 +1160,7 @@ async function dispatch(input = {}) {
1149
1160
  }
1150
1161
  // ── End auto-review annotation ────────────────────────────────────────────
1151
1162
 
1152
- return {
1163
+ const nativeResult = {
1153
1164
  status: success ? 'completed' : 'failed',
1154
1165
  type: 'native-agent',
1155
1166
  provider: currentProvider,
@@ -1166,6 +1177,11 @@ async function dispatch(input = {}) {
1166
1177
  authVerified: true,
1167
1178
  error: success ? null : errorText.slice(0, 200),
1168
1179
  };
1180
+ try {
1181
+ const { recordDispatchOutcome } = await import('./outcome.mjs');
1182
+ recordDispatchOutcome(input, nativeResult);
1183
+ } catch { /* never block */ }
1184
+ return nativeResult;
1169
1185
  }
1170
1186
 
1171
1187
  const command = buildCommand(effectiveDecision, prompt, files, cwd);
@@ -1268,7 +1284,7 @@ async function dispatch(input = {}) {
1268
1284
  }
1269
1285
  // ── End auto-review annotation ──────────────────────────────────────────────
1270
1286
 
1271
- return {
1287
+ const subResult = {
1272
1288
  status: success ? 'completed' : 'failed',
1273
1289
  provider: subProvider,
1274
1290
  model: subModel,
@@ -1283,6 +1299,11 @@ async function dispatch(input = {}) {
1283
1299
  authVerified: true,
1284
1300
  error: success ? null : errorText.slice(0, 200),
1285
1301
  };
1302
+ try {
1303
+ const { recordDispatchOutcome } = await import('./outcome.mjs');
1304
+ recordDispatchOutcome(input, subResult);
1305
+ } catch { /* never block */ }
1306
+ return subResult;
1286
1307
  }
1287
1308
 
1288
1309
  // ─── Dual-brain dispatch (parallel) ───────────────────────────────────────────
@@ -1295,7 +1316,12 @@ async function dispatchDualBrain(input = {}) {
1295
1316
  prompt = redact(prompt);
1296
1317
 
1297
1318
  // Stamp with dispatch marker so enforce-tier.mjs allows this Agent call
1298
- prompt = _prependDispatchMarker(prompt);
1319
+ // Compute required tier for governance validation
1320
+ try {
1321
+ const scores = scoreTask({ intent: decision.tier, risk: decision.risk, files, objective: prompt.slice(0, 200) });
1322
+ decision = { ...decision, _requiredTier: computeRequiredTier(scores) };
1323
+ } catch { /* non-blocking */ }
1324
+ prompt = _prependDispatchMarker(prompt, decision);
1299
1325
 
1300
1326
  // ── Situation brief injection ────────────────────────────────────────────────
1301
1327
  const _dualBrainBrief = typeof input.situationBrief === 'string' && input.situationBrief.trim()
package/src/outcome.mjs CHANGED
@@ -1,6 +1,7 @@
1
- import { mkdirSync, appendFileSync, readFileSync, existsSync } from 'fs';
1
+ import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync, readdirSync } from 'fs';
2
2
  import { join } from 'path';
3
3
  import { randomUUID } from 'crypto';
4
+ import { execSync } from 'child_process';
4
5
 
5
6
  const STOP_WORDS = new Set([
6
7
  'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'from',
@@ -44,6 +45,36 @@ function last7DaysFiles(cwd) {
44
45
  return files;
45
46
  }
46
47
 
48
+ export function recordDispatchOutcome(dispatchInput, result) {
49
+ try {
50
+ const cwd = dispatchInput.cwd ?? process.cwd();
51
+ const decision = dispatchInput.decision ?? {};
52
+ ensureDir(cwd);
53
+
54
+ const id = `out_${Date.now().toString(36)}`;
55
+ const record = {
56
+ id,
57
+ timestamp: new Date().toISOString(),
58
+ prompt: (dispatchInput.prompt ?? '').slice(0, 200),
59
+ tier: decision.tier ?? result.tier ?? 'execute',
60
+ model: decision.model ?? result.model ?? 'unknown',
61
+ provider: decision.provider ?? result.provider ?? 'unknown',
62
+ success: result.status === 'success' || result.status === 'completed',
63
+ status: result.status ?? 'unknown',
64
+ durationMs: result.durationMs ?? 0,
65
+ filesChanged: result.filesChanged?.length ?? 0,
66
+ errors: (result.errors ?? (result.error ? [result.error] : [])).slice(0, 3),
67
+ lesson: '',
68
+ };
69
+
70
+ const filePath = join(outcomesDir(cwd), `outcome_${id}.json`);
71
+ writeFileSync(filePath, JSON.stringify(record, null, 2), 'utf8');
72
+ return record;
73
+ } catch {
74
+ return null;
75
+ }
76
+ }
77
+
47
78
  export function computeRoutingScore(plan, result, verification) {
48
79
  let score = 3;
49
80
  if (result.success && result.duration < 60_000) score += 1;
@@ -174,6 +205,77 @@ export async function getRelevantOutcomes(prompt, files = [], cwd, options = {})
174
205
  }
175
206
  }
176
207
 
208
+ export async function checkFileSurvival(cwd) {
209
+ try {
210
+ const dir = join(cwd, '.dualbrain', 'outcomes');
211
+ if (!existsSync(dir)) return [];
212
+
213
+ // Collect up to the last 20 individual outcome JSON files
214
+ let files;
215
+ try {
216
+ files = readdirSync(dir)
217
+ .filter(f => f.startsWith('outcome_') && f.endsWith('.json'))
218
+ .sort()
219
+ .slice(-20);
220
+ } catch {
221
+ return [];
222
+ }
223
+
224
+ // Get current git-modified files (best-effort)
225
+ let modifiedFiles = new Set();
226
+ try {
227
+ const gitOut = execSync('git diff --name-only', { cwd, stdio: ['ignore', 'pipe', 'pipe'] }).toString();
228
+ for (const f of gitOut.split('\n').map(l => l.trim()).filter(Boolean)) {
229
+ modifiedFiles.add(f);
230
+ modifiedFiles.add(join(cwd, f));
231
+ }
232
+ } catch {
233
+ // git unavailable — proceed without modified-file check
234
+ }
235
+
236
+ const scored = [];
237
+
238
+ for (const fname of files) {
239
+ const fpath = join(dir, fname);
240
+ let record;
241
+ try {
242
+ record = JSON.parse(readFileSync(fpath, 'utf8'));
243
+ } catch {
244
+ continue;
245
+ }
246
+
247
+ // Skip if already scored or no filesChanged list
248
+ if (record.survivalScore !== undefined) continue;
249
+ const changedFiles = record.result?.filesChanged;
250
+ if (!Array.isArray(changedFiles) || changedFiles.length === 0) continue;
251
+
252
+ let survived = 0;
253
+ for (const f of changedFiles) {
254
+ const absPath = f.startsWith('/') ? f : join(cwd, f);
255
+ const exists = existsSync(absPath);
256
+ const modified = modifiedFiles.has(f) || modifiedFiles.has(absPath);
257
+ if (exists && !modified) survived++;
258
+ }
259
+
260
+ const survivalScore = survived / changedFiles.length;
261
+ record.survivalScore = survivalScore;
262
+
263
+ try {
264
+ writeFileSync(fpath, JSON.stringify(record, null, 2), 'utf8');
265
+ } catch {
266
+ // write failed — skip
267
+ continue;
268
+ }
269
+
270
+ scored.push({ id: record.id, survivalScore });
271
+ }
272
+
273
+ return scored;
274
+ } catch {
275
+ return [];
276
+ }
277
+ }
278
+
177
279
  export async function getOutcomeStats(cwd, days = 7) {
178
280
  try {
179
281
  const allFiles = last7DaysFiles(cwd).slice(0, days);
package/src/pipeline.mjs CHANGED
@@ -10,8 +10,10 @@ import { detectTask } from './detect.mjs';
10
10
  import { decideRoute, getWorkStyle, WORK_STYLES } from './decide.mjs';
11
11
  import { dispatch } from './dispatch.mjs';
12
12
  import { loadProfile } from './profile.mjs';
13
- import { mkdirSync, writeFileSync } from 'node:fs';
13
+ import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
14
14
  import { join } from 'node:path';
15
+ import { buildContextPack as buildContextPackIntel } from './context.mjs';
16
+ import { compilePacket } from './context-intel.mjs';
15
17
 
16
18
  // Lazy-load collaboration module
17
19
  let _collab = null;
@@ -648,6 +650,182 @@ function runGate(run, gateName, gateFn) {
648
650
  return result.passed;
649
651
  }
650
652
 
653
+ // ─── Pre-dispatch think (Position 1: context intelligence) ───────────────────
654
+
655
+ /**
656
+ * Optionally spawn a cheap think agent to produce a refined work spec before
657
+ * the real dispatch. Non-blocking on any failure.
658
+ *
659
+ * @param {string} prompt
660
+ * @param {string[]} files
661
+ * @param {object} decision — from plan._decision
662
+ * @param {string} cwd
663
+ * @param {object} profile
664
+ * @param {object} [opts]
665
+ * @param {boolean} [opts._skipPreDispatchThink] — set true on recursive calls
666
+ * @param {object} [opts.log] — logging function
667
+ * @returns {Promise<{ refined: boolean, prompt?, files?, decision? }>}
668
+ */
669
+ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}) {
670
+ const log = opts.log ?? (() => {});
671
+
672
+ // Guard: never recurse
673
+ if (opts._skipPreDispatchThink) {
674
+ log('[dual-brain] pre-dispatch think: skipped (recursive call)');
675
+ return { refined: false };
676
+ }
677
+
678
+ // Guard: only execute/think tiers
679
+ const tier = decision?.tier ?? 'execute';
680
+ if (tier === 'search') {
681
+ log('[dual-brain] pre-dispatch think: skipped (search tier)');
682
+ return { refined: false };
683
+ }
684
+
685
+ // Guard: governance tier >= 2 (map tier names to numeric levels)
686
+ const TIER_LEVEL = { search: 1, execute: 2, think: 3 };
687
+ const tierLevel = TIER_LEVEL[tier] ?? 2;
688
+ if (tierLevel < 2) {
689
+ log('[dual-brain] pre-dispatch think: skipped (tier < 2)');
690
+ return { refined: false };
691
+ }
692
+
693
+ // Guard: decision confidence must be < 0.9
694
+ const confidence = decision?.confidence ?? 0.5;
695
+ if (confidence >= 0.9) {
696
+ log('[dual-brain] pre-dispatch think: skipped (confidence >= 0.9)');
697
+ return { refined: false };
698
+ }
699
+
700
+ // Guard: not cost-saver work style
701
+ try {
702
+ const style = getWorkStyle(profile);
703
+ if (style.key === 'cost-saver') {
704
+ log('[dual-brain] pre-dispatch think: skipped (cost-saver profile)');
705
+ return { refined: false };
706
+ }
707
+ } catch {
708
+ // profile unavailable — proceed
709
+ }
710
+
711
+ // Auto-disable if ROI is bad (< 30% hit rate after 10+ observations)
712
+ {
713
+ const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
714
+ let metrics = { hits: 0, misses: 0, totalTokens: 0 };
715
+ try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
716
+ if (metrics.hits + metrics.misses >= 10 && metrics.hits / (metrics.hits + metrics.misses) < 0.3) {
717
+ const verbose = opts.verbose ?? false;
718
+ if (verbose) process.stderr.write('[dual-brain] pre-dispatch think disabled: hit rate below 30%\n');
719
+ return { refined: false, reason: 'think ROI too low, auto-disabled' };
720
+ }
721
+ }
722
+
723
+ try {
724
+ log('[dual-brain] pre-dispatch think: refining work spec...');
725
+
726
+ // Build the thinker context pack
727
+ const pack = await buildContextPackIntel(prompt, files, cwd);
728
+
729
+ // Compile to a thinker-shaped prompt (sonnet, 3000 token budget)
730
+ const thinkerPrompt = compilePacket(pack, 'thinker', 'sonnet', 3000);
731
+
732
+ // Dispatch to a think agent — use sonnet, tier=think, skip all extras
733
+ const thinkDecision = {
734
+ provider: 'claude',
735
+ model: 'sonnet',
736
+ tier: 'think',
737
+ confidence: 1, // internal call — fully confident
738
+ };
739
+
740
+ const thinkResult = await dispatch({
741
+ decision: thinkDecision,
742
+ prompt: thinkerPrompt,
743
+ files: [],
744
+ cwd,
745
+ dryRun: false,
746
+ verbose: false,
747
+ profile,
748
+ _skipPreDispatchThink: true,
749
+ _skipRelatedContext: true,
750
+ });
751
+
752
+ // Parse the think result — expect JSON with { decision, confidence, workSpec }
753
+ let parsed = null;
754
+ try {
755
+ const raw = typeof thinkResult === 'string'
756
+ ? thinkResult
757
+ : (thinkResult?.output ?? thinkResult?.result ?? thinkResult?.text ?? JSON.stringify(thinkResult));
758
+
759
+ // Extract JSON from possible prose wrapping
760
+ const jsonMatch = raw.match(/\{[\s\S]*\}/);
761
+ if (jsonMatch) {
762
+ parsed = JSON.parse(jsonMatch[0]);
763
+ }
764
+ } catch {
765
+ // JSON parse failed — proceed unchanged
766
+ }
767
+
768
+ if (!parsed || typeof parsed.confidence !== 'number' || parsed.confidence <= 0.7) {
769
+ const reason = !parsed ? 'unparseable response' : `confidence ${parsed.confidence} <= 0.7`;
770
+ log(`[dual-brain] pre-dispatch think: skipped (${reason})`);
771
+ _recordThinkMetrics(false, cwd);
772
+ return { refined: false };
773
+ }
774
+
775
+ const ws = parsed.workSpec;
776
+ if (!ws || !ws.objective) {
777
+ log('[dual-brain] pre-dispatch think: skipped (no workSpec.objective)');
778
+ _recordThinkMetrics(false, cwd);
779
+ return { refined: false };
780
+ }
781
+
782
+ // Apply refinements
783
+ const newObjective = ws.objective;
784
+ const newFiles = [...new Set([...files, ...(ws.files ?? [])])];
785
+ const newDecision = ws.criteria?.length
786
+ ? { ...decision, acceptanceCriteria: [...(decision.acceptanceCriteria ?? []), ...ws.criteria] }
787
+ : decision;
788
+
789
+ log(`[dual-brain] think refined: "${newObjective.slice(0, 60)}..." (confidence: ${parsed.confidence})`);
790
+
791
+ _recordThinkMetrics(true, cwd);
792
+ return {
793
+ refined: true,
794
+ prompt: newObjective,
795
+ files: newFiles,
796
+ decision: newDecision,
797
+ confidence: parsed.confidence,
798
+ };
799
+ } catch (err) {
800
+ // Non-blocking on any failure
801
+ log(`[dual-brain] pre-dispatch think: skipped (error: ${err.message})`);
802
+ _recordThinkMetrics(false, cwd);
803
+ return { refined: false };
804
+ }
805
+ }
806
+
807
+ /**
808
+ * Record a think hit or miss into think-metrics.json (non-blocking).
809
+ * @param {boolean} hit — true if the think agent produced a usable refinement
810
+ * @param {string} cwd
811
+ */
812
+ function _recordThinkMetrics(hit, cwd) {
813
+ try {
814
+ const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
815
+ let metrics = { hits: 0, misses: 0, totalTokens: 0 };
816
+ try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
817
+ if (hit) {
818
+ metrics.hits++;
819
+ } else {
820
+ metrics.misses++;
821
+ }
822
+ metrics.totalTokens += 3000; // budget per think call
823
+ metrics.lastUpdated = new Date().toISOString();
824
+ mkdirSync(join(cwd, '.dualbrain'), { recursive: true });
825
+ writeFileSync(metricsPath, JSON.stringify(metrics, null, 2) + '\n');
826
+ } catch { /* non-blocking */ }
827
+ }
828
+
651
829
  // ─── Main entry point ─────────────────────────────────────────────────────────
652
830
 
653
831
  /**
@@ -1070,7 +1248,49 @@ export async function runPipeline(trigger, prompt, options = {}) {
1070
1248
  }
1071
1249
  }
1072
1250
 
1073
- const decision = { ...run.plan._decision };
1251
+ let decision = { ...run.plan._decision };
1252
+
1253
+ // ── Pre-dispatch think (Position 1: context intelligence) ────────────────
1254
+ // For tier-2+ non-trivial tasks with decision confidence < 0.9, spawn a
1255
+ // cheap sonnet think agent to produce a refined work spec before the real
1256
+ // dispatch. Non-blocking — if it fails or confidence is low, proceed as-is.
1257
+ {
1258
+ const thinkRefinement = await preDispatchThink(
1259
+ effectivePrompt,
1260
+ files,
1261
+ decision,
1262
+ cwd,
1263
+ run.context?.profile ?? {},
1264
+ { log, _skipPreDispatchThink: options._skipPreDispatchThink }
1265
+ );
1266
+ if (thinkRefinement.refined) {
1267
+ // Mutate locals so both collab and direct paths use the refined inputs
1268
+ // (effectivePrompt is const — store refinement in a mutable local)
1269
+ run._thinkRefinedPrompt = thinkRefinement.prompt;
1270
+ run._thinkRefinedFiles = thinkRefinement.files;
1271
+ decision = thinkRefinement.decision;
1272
+
1273
+ // Cascade: if think agent is highly confident and task is simple, downgrade worker model
1274
+ if (thinkRefinement.decision) {
1275
+ const thinkConf = thinkRefinement.confidence || 0;
1276
+ const currentModel = decision.model || 'sonnet';
1277
+ if (thinkConf >= 0.9 && currentModel !== 'haiku') {
1278
+ // High confidence from thinker = clear spec = cheaper model can execute
1279
+ const prevModel = decision.model;
1280
+ decision.model = 'haiku';
1281
+ if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded ${prevModel || 'sonnet'} to haiku\n`);
1282
+ } else if (thinkConf >= 0.75 && currentModel === 'opus') {
1283
+ // Moderate confidence but spec is clear enough for sonnet
1284
+ decision.model = 'sonnet';
1285
+ if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded opus to sonnet\n`);
1286
+ }
1287
+ }
1288
+ }
1289
+ }
1290
+
1291
+ // Resolve the (possibly refined) prompt and file list for dispatch
1292
+ const dispatchPrompt = run._thinkRefinedPrompt ?? effectivePrompt;
1293
+ const dispatchFiles = run._thinkRefinedFiles ?? files;
1074
1294
 
1075
1295
  // ── HEAD judgment injection into agent prompts ─────────────────────────────
1076
1296
  // HEAD's obligations, noticings, and uncertainties flow to the work agent
@@ -1130,13 +1350,13 @@ export async function runPipeline(trigger, prompt, options = {}) {
1130
1350
 
1131
1351
  // Inject collaboration context + HEAD judgment into prompt
1132
1352
  const collabContext = collab.buildAgentContext(session, primaryId);
1133
- const promptParts = [collabContext, headJudgmentBlock, effectivePrompt].filter(Boolean);
1353
+ const promptParts = [collabContext, headJudgmentBlock, dispatchPrompt].filter(Boolean);
1134
1354
  const collabPrompt = promptParts.join('\n\n');
1135
1355
 
1136
1356
  run.result = await dispatch({
1137
1357
  decision,
1138
1358
  prompt: collabPrompt,
1139
- files,
1359
+ files: dispatchFiles,
1140
1360
  cwd,
1141
1361
  dryRun: false,
1142
1362
  verbose,
@@ -1192,13 +1412,13 @@ export async function runPipeline(trigger, prompt, options = {}) {
1192
1412
  try { collab.persistEvents(session, cwd); } catch {}
1193
1413
  } else {
1194
1414
  const directPrompt = headJudgmentBlock
1195
- ? `${headJudgmentBlock}\n\n${effectivePrompt}`
1196
- : effectivePrompt;
1415
+ ? `${headJudgmentBlock}\n\n${dispatchPrompt}`
1416
+ : dispatchPrompt;
1197
1417
 
1198
1418
  run.result = await dispatch({
1199
1419
  decision,
1200
1420
  prompt: directPrompt,
1201
- files,
1421
+ files: dispatchFiles,
1202
1422
  cwd,
1203
1423
  dryRun: false,
1204
1424
  verbose,
@@ -0,0 +1,138 @@
1
+ // routing-advisor.mjs — EMA + epsilon-greedy routing advisor
2
+ // Learns which model works best for which task type from outcome signals.
3
+
4
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+
7
+ const ALPHA = 0.3;
8
+ const MIN_EPSILON = 0.1;
9
+ const MIN_OBSERVATIONS = 5;
10
+ const PRIOR_WEIGHT = 5;
11
+
12
+ const STATIC_PRIORS = {
13
+ 'search:haiku': 0.85, 'search:sonnet': 0.70, 'search:opus': 0.50,
14
+ 'execute:haiku': 0.55, 'execute:sonnet': 0.80, 'execute:opus': 0.85,
15
+ 'think:haiku': 0.30, 'think:sonnet': 0.70, 'think:opus': 0.90,
16
+ 'review:haiku': 0.40, 'review:sonnet': 0.75, 'review:opus': 0.85,
17
+ };
18
+
19
+ const VALID_MODELS = {
20
+ search: ['haiku', 'sonnet'],
21
+ execute: ['haiku', 'sonnet', 'opus'],
22
+ think: ['sonnet', 'opus'],
23
+ review: ['sonnet', 'opus'],
24
+ };
25
+
26
+ function stateFile(cwd) { return join(cwd || process.cwd(), '.dualbrain', 'routing-state.json'); }
27
+
28
+ function loadState(cwd) {
29
+ try {
30
+ const p = stateFile(cwd);
31
+ return existsSync(p) ? JSON.parse(readFileSync(p, 'utf8')) : {};
32
+ } catch { return {}; }
33
+ }
34
+
35
+ function saveState(state, cwd) {
36
+ try {
37
+ const dir = join(cwd || process.cwd(), '.dualbrain');
38
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
39
+ const p = stateFile(cwd), tmp = p + '.tmp';
40
+ writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf8');
41
+ renameSync(tmp, p);
42
+ } catch { /* non-throwing */ }
43
+ }
44
+
45
+ const staticPrior = (tier, model) => STATIC_PRIORS[`${tier}:${model}`] ?? 0.5;
46
+ const cellObs = (state, key) => Object.values(state[key] ?? {}).reduce((s, m) => s + (m.observations ?? 0), 0);
47
+ const blended = (ema, n, tier, model) =>
48
+ (n / (n + PRIOR_WEIGHT)) * ema + (PRIOR_WEIGHT / (n + PRIOR_WEIGHT)) * staticPrior(tier, model);
49
+
50
+ // taskProfile: { intent, tier, risk, files?, complexity? }
51
+ // Returns: { model, reason, confidence, explored }
52
+ export function adviseModel(taskProfile, cwd) {
53
+ try {
54
+ const { tier, intent } = taskProfile ?? {};
55
+ const validTier = tier && VALID_MODELS[tier] ? tier : 'execute';
56
+ const cellKey = `${validTier}:${intent ?? 'implement'}`;
57
+ const models = VALID_MODELS[validTier];
58
+
59
+ const state = loadState(cwd);
60
+ const totalObs = cellObs(state, cellKey);
61
+
62
+ if (totalObs < MIN_OBSERVATIONS) {
63
+ // Heuristic: pick highest static prior
64
+ const best = models.reduce((a, b) => staticPrior(validTier, a) >= staticPrior(validTier, b) ? a : b);
65
+ return { model: best, reason: 'insufficient data, using heuristic', confidence: 0.3, explored: false };
66
+ }
67
+
68
+ const epsilon = Math.max(MIN_EPSILON, 0.5 * Math.pow(0.9, totalObs));
69
+ const explored = Math.random() < epsilon;
70
+
71
+ if (explored) {
72
+ const model = models[Math.floor(Math.random() * models.length)];
73
+ return { model, reason: 'exploration', confidence: epsilon, explored: true };
74
+ }
75
+
76
+ // Exploitation: pick highest blended score
77
+ const cell = state[cellKey] ?? {};
78
+ let bestModel = models[0];
79
+ let bestScore = -Infinity;
80
+ for (const m of models) {
81
+ const entry = cell[m];
82
+ const ema = entry?.ema ?? staticPrior(validTier, m);
83
+ const n = entry?.observations ?? 0;
84
+ const score = blended(ema, n, validTier, m);
85
+ if (score > bestScore) { bestScore = score; bestModel = m; }
86
+ }
87
+
88
+ return { model: bestModel, reason: 'exploitation', confidence: 1 - epsilon, explored: false };
89
+ } catch {
90
+ return { model: 'sonnet', reason: 'error fallback', confidence: 0.1, explored: false };
91
+ }
92
+ }
93
+
94
+ // reward: number in [0, 1]
95
+ export function recordReward(cellKey, model, reward, cwd) {
96
+ try {
97
+ const state = loadState(cwd);
98
+ if (!state[cellKey]) state[cellKey] = {};
99
+ const entry = state[cellKey][model] ?? { ema: reward, observations: 0 };
100
+ entry.ema = ALPHA * reward + (1 - ALPHA) * entry.ema;
101
+ entry.observations = (entry.observations ?? 0) + 1;
102
+ entry.lastUpdated = new Date().toISOString();
103
+ entry.lastReward = reward;
104
+ state[cellKey][model] = entry;
105
+ saveState(state, cwd);
106
+ } catch {
107
+ // non-throwing
108
+ }
109
+ }
110
+
111
+ export function getRoutingStats(cwd) {
112
+ try {
113
+ const state = loadState(cwd);
114
+ const cells = {}, flat = [];
115
+ let totalObservations = 0;
116
+ for (const [cellKey, models] of Object.entries(state)) {
117
+ cells[cellKey] ??= {};
118
+ for (const [model, entry] of Object.entries(models)) {
119
+ const obs = entry.observations ?? 0;
120
+ cells[cellKey][model] = { ema: entry.ema, observations: obs };
121
+ totalObservations += obs;
122
+ flat.push({ cell: cellKey, model, ema: entry.ema, observations: obs });
123
+ }
124
+ }
125
+ flat.sort((a, b) => b.ema - a.ema);
126
+ return { cells, totalObservations, topPerformers: flat.slice(0, 5), worstPerformers: flat.slice(-5).reverse() };
127
+ } catch {
128
+ return { cells: {}, totalObservations: 0, topPerformers: [], worstPerformers: [] };
129
+ }
130
+ }
131
+
132
+ export function resetAdvisor(cwd) {
133
+ try {
134
+ saveState({}, cwd);
135
+ } catch {
136
+ // non-throwing
137
+ }
138
+ }
package/src/signal.mjs ADDED
@@ -0,0 +1,114 @@
1
+ // signal.mjs — Compound outcome signal scoring
2
+ // Combines multiple weak signals into one reliable reward score.
3
+
4
+ import { existsSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+ import { execSync } from 'node:child_process';
7
+
8
+ export const EXPECTED_DURATION_MS = { search: 15000, execute: 45000, think: 30000, review: 40000 };
9
+
10
+ export function scoreDurationRatio(durationMs, tier) {
11
+ try {
12
+ const expected = EXPECTED_DURATION_MS[tier] ?? EXPECTED_DURATION_MS.execute;
13
+ const ratio = durationMs / expected;
14
+ if (ratio >= 0.5 && ratio <= 1.5) return 1.0;
15
+ if (ratio < 0.2) return 0.5;
16
+ if (ratio > 3.0) return 0.3;
17
+ if (ratio < 0.5) return 0.5 + ((ratio - 0.2) / (0.5 - 0.2)) * 0.5;
18
+ // ratio 1.5–3.0
19
+ return 1.0 - ((ratio - 1.5) / (3.0 - 1.5)) * 0.7;
20
+ } catch {
21
+ return null;
22
+ }
23
+ }
24
+
25
+ export function measureFileSurvival(outcome, cwd) {
26
+ try {
27
+ const files = Array.isArray(outcome.filesChanged)
28
+ ? outcome.filesChanged
29
+ : [];
30
+ if (files.length === 0) return 1.0;
31
+
32
+ let changed;
33
+ try {
34
+ changed = new Set(
35
+ execSync('git diff --name-only', { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] })
36
+ .split('\n')
37
+ .map(f => f.trim())
38
+ .filter(Boolean)
39
+ );
40
+ } catch {
41
+ changed = new Set();
42
+ }
43
+
44
+ const survived = files.filter(f => {
45
+ const abs = join(cwd, f);
46
+ return existsSync(abs) && !changed.has(f);
47
+ });
48
+ return survived.length / files.length;
49
+ } catch {
50
+ return null;
51
+ }
52
+ }
53
+
54
+ export function scoreOutcome(outcome, context = {}) {
55
+ try {
56
+ const tier = outcome.tier ?? 'execute';
57
+ const signals = [];
58
+
59
+ // Signal 1: exit success (weight 0.3)
60
+ let exitVal;
61
+ if (outcome.success === true) exitVal = 1.0;
62
+ else if (outcome.status === 'partial') exitVal = 0.4;
63
+ else exitVal = 0.0;
64
+ signals.push({ name: 'exitSuccess', value: exitVal, weight: 0.3 });
65
+
66
+ // Signal 2: duration ratio (weight 0.25)
67
+ const durationMs = outcome.durationMs ?? 0;
68
+ const durVal = durationMs > 0 ? scoreDurationRatio(durationMs, tier) : null;
69
+ signals.push({ name: 'durationRatio', value: durVal, weight: 0.25 });
70
+
71
+ // Signal 3: token efficiency (weight 0.25)
72
+ let effVal = null;
73
+ const filesChanged = outcome.filesChanged ?? 0;
74
+ const fileCount = typeof filesChanged === 'number' ? filesChanged : filesChanged.length;
75
+ if (!(fileCount === 0 && tier === 'think')) {
76
+ const tokensUsed =
77
+ outcome.tokensUsed?.output ??
78
+ (durationMs > 0 ? Math.round(durationMs / 100) : null);
79
+ if (tokensUsed !== null) {
80
+ const efficiency = fileCount / Math.max(1, tokensUsed / 1000);
81
+ if (efficiency > 2) effVal = 1.0;
82
+ else if (efficiency >= 0.5) effVal = 0.5 + ((efficiency - 0.5) / 1.5) * 0.5;
83
+ else if (efficiency < 0.1) effVal = 0.2;
84
+ else effVal = 0.2 + ((efficiency - 0.1) / 0.4) * 0.3;
85
+ }
86
+ }
87
+ signals.push({ name: 'tokenEfficiency', value: effVal, weight: 0.25 });
88
+
89
+ // Signal 4: file survival (weight 0.2) — delayed, may be null
90
+ const survivalVal = context.fileSurvival ?? null;
91
+ signals.push({ name: 'fileSurvival', value: survivalVal, weight: 0.2 });
92
+
93
+ // Compound score with weight redistribution
94
+ const active = signals.filter(s => s.value !== null);
95
+ const totalWeight = active.reduce((sum, s) => sum + s.weight, 0);
96
+ const reward = totalWeight > 0
97
+ ? active.reduce((sum, s) => sum + (s.value * s.weight / totalWeight), 0)
98
+ : 0;
99
+ const confidence = totalWeight;
100
+
101
+ return {
102
+ reward: Math.min(1, Math.max(0, reward)),
103
+ confidence: Math.min(1, confidence),
104
+ signals: {
105
+ exitSuccess: exitVal,
106
+ durationRatio: durVal,
107
+ tokenEfficiency: effVal,
108
+ fileSurvival: survivalVal,
109
+ },
110
+ };
111
+ } catch {
112
+ return { reward: 0, confidence: 0, signals: { exitSuccess: false, durationRatio: null, tokenEfficiency: null, fileSurvival: null } };
113
+ }
114
+ }