dual-brain 0.2.23 → 0.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/enforce-tier.mjs +29 -1
- package/package.json +6 -2
- package/src/dispatch.mjs +34 -8
- package/src/outcome.mjs +103 -1
- package/src/pipeline.mjs +227 -7
- package/src/routing-advisor.mjs +138 -0
- package/src/signal.mjs +114 -0
package/hooks/enforce-tier.mjs
CHANGED
|
@@ -290,7 +290,19 @@ const THINK_WORDS = /\b(plan|design|architect|review|audit|security|code[-\s]?re
|
|
|
290
290
|
const WRITE_INTENT_WORDS = /\b(edit|fix|change|update|create|write|modify|implement|refactor|add|remove|delete|build|install|configure|patch|apply|move|rename|migrate|replace|rewrite|generate|scaffold|init(?:ialize)?|setup|deploy|run\s+tests?|commit|push|install|uninstall)\b/i;
|
|
291
291
|
|
|
292
292
|
// Dispatch marker prefix stamped by src/dispatch.mjs for all legitimate dispatches.
|
|
293
|
-
const DISPATCH_MARKER_RE = /<!--\s*dual-brain-dispatch
|
|
293
|
+
const DISPATCH_MARKER_RE = /<!--\s*dual-brain-dispatch:[a-z0-9|:.\-]+\s*-->/i;
|
|
294
|
+
|
|
295
|
+
function parseDispatchMarker(prompt) {
|
|
296
|
+
const match = prompt?.match(/<!-- dual-brain-dispatch:([^>]+) -->/);
|
|
297
|
+
if (!match) return null;
|
|
298
|
+
const parts = match[1].split('|');
|
|
299
|
+
const fields = { runId: parts[0] };
|
|
300
|
+
for (const part of parts.slice(1)) {
|
|
301
|
+
const [key, val] = part.split(':');
|
|
302
|
+
if (key && val) fields[key] = val;
|
|
303
|
+
}
|
|
304
|
+
return fields;
|
|
305
|
+
}
|
|
294
306
|
|
|
295
307
|
/**
|
|
296
308
|
* Determine whether a prompt is purely read-only (no write keywords at all).
|
|
@@ -357,6 +369,22 @@ try {
|
|
|
357
369
|
// Non-blocking governance warning — will be included in final output
|
|
358
370
|
}
|
|
359
371
|
|
|
372
|
+
// ── Over-provisioning check via enriched dispatch marker ───────────────────
|
|
373
|
+
// If the marker carries governance scores, validate that the model tier isn't
|
|
374
|
+
// higher than the task actually requires (closes the brainstorm-opus loophole).
|
|
375
|
+
const markerFields = parseDispatchMarker(rawPrompt);
|
|
376
|
+
if (markerFields?.req && markerFields?.model) {
|
|
377
|
+
const reqTier = parseInt(markerFields.req, 10);
|
|
378
|
+
const modelTier = getGovernanceTier(markerFields.model);
|
|
379
|
+
if (!isNaN(reqTier) && modelTier > reqTier && reqTier <= 2) {
|
|
380
|
+
process.stdout.write(JSON.stringify({
|
|
381
|
+
systemMessage: `[governance] Over-provisioned: task requires tier ${reqTier} but using tier ${modelTier} model (${markerFields.model}). Consider downgrading.`,
|
|
382
|
+
}));
|
|
383
|
+
process.exit(0);
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
// ── End over-provisioning check ────────────────────────────────────────────
|
|
387
|
+
|
|
360
388
|
// Compute prompt hash early for duplicate detection and logging
|
|
361
389
|
const promptHash = computePromptHash(ti);
|
|
362
390
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dual-brain",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.25",
|
|
4
4
|
"description": "AI orchestration across Claude + OpenAI subscriptions — smart routing, budget awareness, and dual-brain collaboration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -47,7 +47,9 @@
|
|
|
47
47
|
"./envelope": "./src/envelope.mjs",
|
|
48
48
|
"./session-lock": "./src/session-lock.mjs",
|
|
49
49
|
"./governance": "./src/governance.mjs",
|
|
50
|
-
"./context-intel": "./src/context-intel.mjs"
|
|
50
|
+
"./context-intel": "./src/context-intel.mjs",
|
|
51
|
+
"./signal": "./src/signal.mjs",
|
|
52
|
+
"./routing-advisor": "./src/routing-advisor.mjs"
|
|
51
53
|
},
|
|
52
54
|
"keywords": [
|
|
53
55
|
"claude-code",
|
|
@@ -134,6 +136,8 @@
|
|
|
134
136
|
"src/session-lock.mjs",
|
|
135
137
|
"src/governance.mjs",
|
|
136
138
|
"src/context-intel.mjs",
|
|
139
|
+
"src/signal.mjs",
|
|
140
|
+
"src/routing-advisor.mjs",
|
|
137
141
|
"bin/*.mjs",
|
|
138
142
|
"hooks/enforce-tier.mjs",
|
|
139
143
|
"hooks/cost-logger.mjs",
|
package/src/dispatch.mjs
CHANGED
|
@@ -18,6 +18,7 @@ import { getFailoverOrder } from './decide.mjs';
|
|
|
18
18
|
import { getTemplate, renderPrompt, quickRender } from './templates.mjs';
|
|
19
19
|
import { compilePacket, shapeForRole } from './context-intel.mjs';
|
|
20
20
|
import { buildContextPack } from './context.mjs';
|
|
21
|
+
import { scoreTask, computeRequiredTier } from './governance.mjs';
|
|
21
22
|
|
|
22
23
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
23
24
|
const USAGE_DIR = join(__dirname, '..', '.dualbrain', 'usage');
|
|
@@ -706,8 +707,8 @@ function _renderTemplatedPrompt(prompt, decision, context = {}) {
|
|
|
706
707
|
// Prepend a marker to every prompt that goes through the official dispatch pipeline.
|
|
707
708
|
// The enforce-tier hook checks for this marker to distinguish legitimate dispatches
|
|
708
709
|
// from raw Agent calls made by the HEAD that bypass the dual-brain pipeline.
|
|
709
|
-
// Format: <!-- dual-brain-dispatch
|
|
710
|
-
// runId is a short timestamp-based ID
|
|
710
|
+
// Format: <!-- dual-brain-dispatch:<runId>|tier:<tier>|model:<model>|risk:<risk>|req:<requiredTier> -->
|
|
711
|
+
// runId is a short timestamp-based ID; governance fields enable over-provisioning validation.
|
|
711
712
|
|
|
712
713
|
let _dispatchRunId = null;
|
|
713
714
|
|
|
@@ -719,9 +720,14 @@ function _getDispatchRunId() {
|
|
|
719
720
|
return _dispatchRunId;
|
|
720
721
|
}
|
|
721
722
|
|
|
722
|
-
function _prependDispatchMarker(prompt) {
|
|
723
|
+
function _prependDispatchMarker(prompt, decision = {}) {
|
|
723
724
|
const runId = _getDispatchRunId();
|
|
724
|
-
|
|
725
|
+
const tier = decision.tier || 'execute';
|
|
726
|
+
const model = decision.model || 'sonnet';
|
|
727
|
+
const risk = decision.risk || 'medium';
|
|
728
|
+
const requiredTier = decision._requiredTier || '';
|
|
729
|
+
const marker = `<!-- dual-brain-dispatch:${runId}|tier:${tier}|model:${model}|risk:${risk}|req:${requiredTier} -->`;
|
|
730
|
+
return `${marker}\n${prompt}`;
|
|
725
731
|
}
|
|
726
732
|
|
|
727
733
|
// ─── Related session age label ────────────────────────────────────────────────
|
|
@@ -845,7 +851,12 @@ async function dispatch(input = {}) {
|
|
|
845
851
|
|
|
846
852
|
// Stamp the prompt with the dispatch marker so enforce-tier.mjs can recognise
|
|
847
853
|
// that this agent call came through the official pipeline.
|
|
848
|
-
|
|
854
|
+
// Compute required tier for governance validation
|
|
855
|
+
try {
|
|
856
|
+
const scores = scoreTask({ intent: decision.tier, risk: decision.risk, files, objective: prompt.slice(0, 200) });
|
|
857
|
+
decision = { ...decision, _requiredTier: computeRequiredTier(scores) };
|
|
858
|
+
} catch { /* non-blocking */ }
|
|
859
|
+
prompt = _prependDispatchMarker(prompt, decision);
|
|
849
860
|
|
|
850
861
|
// ── Situation brief injection ────────────────────────────────────────────────
|
|
851
862
|
// Prepend a compact project-state summary when provided by the pipeline.
|
|
@@ -1149,7 +1160,7 @@ async function dispatch(input = {}) {
|
|
|
1149
1160
|
}
|
|
1150
1161
|
// ── End auto-review annotation ────────────────────────────────────────────
|
|
1151
1162
|
|
|
1152
|
-
|
|
1163
|
+
const nativeResult = {
|
|
1153
1164
|
status: success ? 'completed' : 'failed',
|
|
1154
1165
|
type: 'native-agent',
|
|
1155
1166
|
provider: currentProvider,
|
|
@@ -1166,6 +1177,11 @@ async function dispatch(input = {}) {
|
|
|
1166
1177
|
authVerified: true,
|
|
1167
1178
|
error: success ? null : errorText.slice(0, 200),
|
|
1168
1179
|
};
|
|
1180
|
+
try {
|
|
1181
|
+
const { recordDispatchOutcome } = await import('./outcome.mjs');
|
|
1182
|
+
recordDispatchOutcome(input, nativeResult);
|
|
1183
|
+
} catch { /* never block */ }
|
|
1184
|
+
return nativeResult;
|
|
1169
1185
|
}
|
|
1170
1186
|
|
|
1171
1187
|
const command = buildCommand(effectiveDecision, prompt, files, cwd);
|
|
@@ -1268,7 +1284,7 @@ async function dispatch(input = {}) {
|
|
|
1268
1284
|
}
|
|
1269
1285
|
// ── End auto-review annotation ──────────────────────────────────────────────
|
|
1270
1286
|
|
|
1271
|
-
|
|
1287
|
+
const subResult = {
|
|
1272
1288
|
status: success ? 'completed' : 'failed',
|
|
1273
1289
|
provider: subProvider,
|
|
1274
1290
|
model: subModel,
|
|
@@ -1283,6 +1299,11 @@ async function dispatch(input = {}) {
|
|
|
1283
1299
|
authVerified: true,
|
|
1284
1300
|
error: success ? null : errorText.slice(0, 200),
|
|
1285
1301
|
};
|
|
1302
|
+
try {
|
|
1303
|
+
const { recordDispatchOutcome } = await import('./outcome.mjs');
|
|
1304
|
+
recordDispatchOutcome(input, subResult);
|
|
1305
|
+
} catch { /* never block */ }
|
|
1306
|
+
return subResult;
|
|
1286
1307
|
}
|
|
1287
1308
|
|
|
1288
1309
|
// ─── Dual-brain dispatch (parallel) ───────────────────────────────────────────
|
|
@@ -1295,7 +1316,12 @@ async function dispatchDualBrain(input = {}) {
|
|
|
1295
1316
|
prompt = redact(prompt);
|
|
1296
1317
|
|
|
1297
1318
|
// Stamp with dispatch marker so enforce-tier.mjs allows this Agent call
|
|
1298
|
-
|
|
1319
|
+
// Compute required tier for governance validation
|
|
1320
|
+
try {
|
|
1321
|
+
const scores = scoreTask({ intent: decision.tier, risk: decision.risk, files, objective: prompt.slice(0, 200) });
|
|
1322
|
+
decision = { ...decision, _requiredTier: computeRequiredTier(scores) };
|
|
1323
|
+
} catch { /* non-blocking */ }
|
|
1324
|
+
prompt = _prependDispatchMarker(prompt, decision);
|
|
1299
1325
|
|
|
1300
1326
|
// ── Situation brief injection ────────────────────────────────────────────────
|
|
1301
1327
|
const _dualBrainBrief = typeof input.situationBrief === 'string' && input.situationBrief.trim()
|
package/src/outcome.mjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { mkdirSync, appendFileSync, readFileSync, existsSync } from 'fs';
|
|
1
|
+
import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync, readdirSync } from 'fs';
|
|
2
2
|
import { join } from 'path';
|
|
3
3
|
import { randomUUID } from 'crypto';
|
|
4
|
+
import { execSync } from 'child_process';
|
|
4
5
|
|
|
5
6
|
const STOP_WORDS = new Set([
|
|
6
7
|
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'from',
|
|
@@ -44,6 +45,36 @@ function last7DaysFiles(cwd) {
|
|
|
44
45
|
return files;
|
|
45
46
|
}
|
|
46
47
|
|
|
48
|
+
export function recordDispatchOutcome(dispatchInput, result) {
|
|
49
|
+
try {
|
|
50
|
+
const cwd = dispatchInput.cwd ?? process.cwd();
|
|
51
|
+
const decision = dispatchInput.decision ?? {};
|
|
52
|
+
ensureDir(cwd);
|
|
53
|
+
|
|
54
|
+
const id = `out_${Date.now().toString(36)}`;
|
|
55
|
+
const record = {
|
|
56
|
+
id,
|
|
57
|
+
timestamp: new Date().toISOString(),
|
|
58
|
+
prompt: (dispatchInput.prompt ?? '').slice(0, 200),
|
|
59
|
+
tier: decision.tier ?? result.tier ?? 'execute',
|
|
60
|
+
model: decision.model ?? result.model ?? 'unknown',
|
|
61
|
+
provider: decision.provider ?? result.provider ?? 'unknown',
|
|
62
|
+
success: result.status === 'success' || result.status === 'completed',
|
|
63
|
+
status: result.status ?? 'unknown',
|
|
64
|
+
durationMs: result.durationMs ?? 0,
|
|
65
|
+
filesChanged: result.filesChanged?.length ?? 0,
|
|
66
|
+
errors: (result.errors ?? (result.error ? [result.error] : [])).slice(0, 3),
|
|
67
|
+
lesson: '',
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const filePath = join(outcomesDir(cwd), `outcome_${id}.json`);
|
|
71
|
+
writeFileSync(filePath, JSON.stringify(record, null, 2), 'utf8');
|
|
72
|
+
return record;
|
|
73
|
+
} catch {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
47
78
|
export function computeRoutingScore(plan, result, verification) {
|
|
48
79
|
let score = 3;
|
|
49
80
|
if (result.success && result.duration < 60_000) score += 1;
|
|
@@ -174,6 +205,77 @@ export async function getRelevantOutcomes(prompt, files = [], cwd, options = {})
|
|
|
174
205
|
}
|
|
175
206
|
}
|
|
176
207
|
|
|
208
|
+
export async function checkFileSurvival(cwd) {
|
|
209
|
+
try {
|
|
210
|
+
const dir = join(cwd, '.dualbrain', 'outcomes');
|
|
211
|
+
if (!existsSync(dir)) return [];
|
|
212
|
+
|
|
213
|
+
// Collect up to the last 20 individual outcome JSON files
|
|
214
|
+
let files;
|
|
215
|
+
try {
|
|
216
|
+
files = readdirSync(dir)
|
|
217
|
+
.filter(f => f.startsWith('outcome_') && f.endsWith('.json'))
|
|
218
|
+
.sort()
|
|
219
|
+
.slice(-20);
|
|
220
|
+
} catch {
|
|
221
|
+
return [];
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Get current git-modified files (best-effort)
|
|
225
|
+
let modifiedFiles = new Set();
|
|
226
|
+
try {
|
|
227
|
+
const gitOut = execSync('git diff --name-only', { cwd, stdio: ['ignore', 'pipe', 'pipe'] }).toString();
|
|
228
|
+
for (const f of gitOut.split('\n').map(l => l.trim()).filter(Boolean)) {
|
|
229
|
+
modifiedFiles.add(f);
|
|
230
|
+
modifiedFiles.add(join(cwd, f));
|
|
231
|
+
}
|
|
232
|
+
} catch {
|
|
233
|
+
// git unavailable — proceed without modified-file check
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const scored = [];
|
|
237
|
+
|
|
238
|
+
for (const fname of files) {
|
|
239
|
+
const fpath = join(dir, fname);
|
|
240
|
+
let record;
|
|
241
|
+
try {
|
|
242
|
+
record = JSON.parse(readFileSync(fpath, 'utf8'));
|
|
243
|
+
} catch {
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Skip if already scored or no filesChanged list
|
|
248
|
+
if (record.survivalScore !== undefined) continue;
|
|
249
|
+
const changedFiles = record.result?.filesChanged;
|
|
250
|
+
if (!Array.isArray(changedFiles) || changedFiles.length === 0) continue;
|
|
251
|
+
|
|
252
|
+
let survived = 0;
|
|
253
|
+
for (const f of changedFiles) {
|
|
254
|
+
const absPath = f.startsWith('/') ? f : join(cwd, f);
|
|
255
|
+
const exists = existsSync(absPath);
|
|
256
|
+
const modified = modifiedFiles.has(f) || modifiedFiles.has(absPath);
|
|
257
|
+
if (exists && !modified) survived++;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const survivalScore = survived / changedFiles.length;
|
|
261
|
+
record.survivalScore = survivalScore;
|
|
262
|
+
|
|
263
|
+
try {
|
|
264
|
+
writeFileSync(fpath, JSON.stringify(record, null, 2), 'utf8');
|
|
265
|
+
} catch {
|
|
266
|
+
// write failed — skip
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
scored.push({ id: record.id, survivalScore });
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return scored;
|
|
274
|
+
} catch {
|
|
275
|
+
return [];
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
177
279
|
export async function getOutcomeStats(cwd, days = 7) {
|
|
178
280
|
try {
|
|
179
281
|
const allFiles = last7DaysFiles(cwd).slice(0, days);
|
package/src/pipeline.mjs
CHANGED
|
@@ -10,8 +10,10 @@ import { detectTask } from './detect.mjs';
|
|
|
10
10
|
import { decideRoute, getWorkStyle, WORK_STYLES } from './decide.mjs';
|
|
11
11
|
import { dispatch } from './dispatch.mjs';
|
|
12
12
|
import { loadProfile } from './profile.mjs';
|
|
13
|
-
import { mkdirSync, writeFileSync } from 'node:fs';
|
|
13
|
+
import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
|
|
14
14
|
import { join } from 'node:path';
|
|
15
|
+
import { buildContextPack as buildContextPackIntel } from './context.mjs';
|
|
16
|
+
import { compilePacket } from './context-intel.mjs';
|
|
15
17
|
|
|
16
18
|
// Lazy-load collaboration module
|
|
17
19
|
let _collab = null;
|
|
@@ -648,6 +650,182 @@ function runGate(run, gateName, gateFn) {
|
|
|
648
650
|
return result.passed;
|
|
649
651
|
}
|
|
650
652
|
|
|
653
|
+
// ─── Pre-dispatch think (Position 1: context intelligence) ───────────────────
|
|
654
|
+
|
|
655
|
+
/**
|
|
656
|
+
* Optionally spawn a cheap think agent to produce a refined work spec before
|
|
657
|
+
* the real dispatch. Non-blocking on any failure.
|
|
658
|
+
*
|
|
659
|
+
* @param {string} prompt
|
|
660
|
+
* @param {string[]} files
|
|
661
|
+
* @param {object} decision — from plan._decision
|
|
662
|
+
* @param {string} cwd
|
|
663
|
+
* @param {object} profile
|
|
664
|
+
* @param {object} [opts]
|
|
665
|
+
* @param {boolean} [opts._skipPreDispatchThink] — set true on recursive calls
|
|
666
|
+
* @param {object} [opts.log] — logging function
|
|
667
|
+
* @returns {Promise<{ refined: boolean, prompt?, files?, decision? }>}
|
|
668
|
+
*/
|
|
669
|
+
async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}) {
|
|
670
|
+
const log = opts.log ?? (() => {});
|
|
671
|
+
|
|
672
|
+
// Guard: never recurse
|
|
673
|
+
if (opts._skipPreDispatchThink) {
|
|
674
|
+
log('[dual-brain] pre-dispatch think: skipped (recursive call)');
|
|
675
|
+
return { refined: false };
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
// Guard: only execute/think tiers
|
|
679
|
+
const tier = decision?.tier ?? 'execute';
|
|
680
|
+
if (tier === 'search') {
|
|
681
|
+
log('[dual-brain] pre-dispatch think: skipped (search tier)');
|
|
682
|
+
return { refined: false };
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// Guard: governance tier >= 2 (map tier names to numeric levels)
|
|
686
|
+
const TIER_LEVEL = { search: 1, execute: 2, think: 3 };
|
|
687
|
+
const tierLevel = TIER_LEVEL[tier] ?? 2;
|
|
688
|
+
if (tierLevel < 2) {
|
|
689
|
+
log('[dual-brain] pre-dispatch think: skipped (tier < 2)');
|
|
690
|
+
return { refined: false };
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
// Guard: decision confidence must be < 0.9
|
|
694
|
+
const confidence = decision?.confidence ?? 0.5;
|
|
695
|
+
if (confidence >= 0.9) {
|
|
696
|
+
log('[dual-brain] pre-dispatch think: skipped (confidence >= 0.9)');
|
|
697
|
+
return { refined: false };
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// Guard: not cost-saver work style
|
|
701
|
+
try {
|
|
702
|
+
const style = getWorkStyle(profile);
|
|
703
|
+
if (style.key === 'cost-saver') {
|
|
704
|
+
log('[dual-brain] pre-dispatch think: skipped (cost-saver profile)');
|
|
705
|
+
return { refined: false };
|
|
706
|
+
}
|
|
707
|
+
} catch {
|
|
708
|
+
// profile unavailable — proceed
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// Auto-disable if ROI is bad (< 30% hit rate after 10+ observations)
|
|
712
|
+
{
|
|
713
|
+
const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
|
|
714
|
+
let metrics = { hits: 0, misses: 0, totalTokens: 0 };
|
|
715
|
+
try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
|
|
716
|
+
if (metrics.hits + metrics.misses >= 10 && metrics.hits / (metrics.hits + metrics.misses) < 0.3) {
|
|
717
|
+
const verbose = opts.verbose ?? false;
|
|
718
|
+
if (verbose) process.stderr.write('[dual-brain] pre-dispatch think disabled: hit rate below 30%\n');
|
|
719
|
+
return { refined: false, reason: 'think ROI too low, auto-disabled' };
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
try {
|
|
724
|
+
log('[dual-brain] pre-dispatch think: refining work spec...');
|
|
725
|
+
|
|
726
|
+
// Build the thinker context pack
|
|
727
|
+
const pack = await buildContextPackIntel(prompt, files, cwd);
|
|
728
|
+
|
|
729
|
+
// Compile to a thinker-shaped prompt (sonnet, 3000 token budget)
|
|
730
|
+
const thinkerPrompt = compilePacket(pack, 'thinker', 'sonnet', 3000);
|
|
731
|
+
|
|
732
|
+
// Dispatch to a think agent — use sonnet, tier=think, skip all extras
|
|
733
|
+
const thinkDecision = {
|
|
734
|
+
provider: 'claude',
|
|
735
|
+
model: 'sonnet',
|
|
736
|
+
tier: 'think',
|
|
737
|
+
confidence: 1, // internal call — fully confident
|
|
738
|
+
};
|
|
739
|
+
|
|
740
|
+
const thinkResult = await dispatch({
|
|
741
|
+
decision: thinkDecision,
|
|
742
|
+
prompt: thinkerPrompt,
|
|
743
|
+
files: [],
|
|
744
|
+
cwd,
|
|
745
|
+
dryRun: false,
|
|
746
|
+
verbose: false,
|
|
747
|
+
profile,
|
|
748
|
+
_skipPreDispatchThink: true,
|
|
749
|
+
_skipRelatedContext: true,
|
|
750
|
+
});
|
|
751
|
+
|
|
752
|
+
// Parse the think result — expect JSON with { decision, confidence, workSpec }
|
|
753
|
+
let parsed = null;
|
|
754
|
+
try {
|
|
755
|
+
const raw = typeof thinkResult === 'string'
|
|
756
|
+
? thinkResult
|
|
757
|
+
: (thinkResult?.output ?? thinkResult?.result ?? thinkResult?.text ?? JSON.stringify(thinkResult));
|
|
758
|
+
|
|
759
|
+
// Extract JSON from possible prose wrapping
|
|
760
|
+
const jsonMatch = raw.match(/\{[\s\S]*\}/);
|
|
761
|
+
if (jsonMatch) {
|
|
762
|
+
parsed = JSON.parse(jsonMatch[0]);
|
|
763
|
+
}
|
|
764
|
+
} catch {
|
|
765
|
+
// JSON parse failed — proceed unchanged
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
if (!parsed || typeof parsed.confidence !== 'number' || parsed.confidence <= 0.7) {
|
|
769
|
+
const reason = !parsed ? 'unparseable response' : `confidence ${parsed.confidence} <= 0.7`;
|
|
770
|
+
log(`[dual-brain] pre-dispatch think: skipped (${reason})`);
|
|
771
|
+
_recordThinkMetrics(false, cwd);
|
|
772
|
+
return { refined: false };
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
const ws = parsed.workSpec;
|
|
776
|
+
if (!ws || !ws.objective) {
|
|
777
|
+
log('[dual-brain] pre-dispatch think: skipped (no workSpec.objective)');
|
|
778
|
+
_recordThinkMetrics(false, cwd);
|
|
779
|
+
return { refined: false };
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
// Apply refinements
|
|
783
|
+
const newObjective = ws.objective;
|
|
784
|
+
const newFiles = [...new Set([...files, ...(ws.files ?? [])])];
|
|
785
|
+
const newDecision = ws.criteria?.length
|
|
786
|
+
? { ...decision, acceptanceCriteria: [...(decision.acceptanceCriteria ?? []), ...ws.criteria] }
|
|
787
|
+
: decision;
|
|
788
|
+
|
|
789
|
+
log(`[dual-brain] think refined: "${newObjective.slice(0, 60)}..." (confidence: ${parsed.confidence})`);
|
|
790
|
+
|
|
791
|
+
_recordThinkMetrics(true, cwd);
|
|
792
|
+
return {
|
|
793
|
+
refined: true,
|
|
794
|
+
prompt: newObjective,
|
|
795
|
+
files: newFiles,
|
|
796
|
+
decision: newDecision,
|
|
797
|
+
confidence: parsed.confidence,
|
|
798
|
+
};
|
|
799
|
+
} catch (err) {
|
|
800
|
+
// Non-blocking on any failure
|
|
801
|
+
log(`[dual-brain] pre-dispatch think: skipped (error: ${err.message})`);
|
|
802
|
+
_recordThinkMetrics(false, cwd);
|
|
803
|
+
return { refined: false };
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
/**
|
|
808
|
+
* Record a think hit or miss into think-metrics.json (non-blocking).
|
|
809
|
+
* @param {boolean} hit — true if the think agent produced a usable refinement
|
|
810
|
+
* @param {string} cwd
|
|
811
|
+
*/
|
|
812
|
+
function _recordThinkMetrics(hit, cwd) {
|
|
813
|
+
try {
|
|
814
|
+
const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
|
|
815
|
+
let metrics = { hits: 0, misses: 0, totalTokens: 0 };
|
|
816
|
+
try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
|
|
817
|
+
if (hit) {
|
|
818
|
+
metrics.hits++;
|
|
819
|
+
} else {
|
|
820
|
+
metrics.misses++;
|
|
821
|
+
}
|
|
822
|
+
metrics.totalTokens += 3000; // budget per think call
|
|
823
|
+
metrics.lastUpdated = new Date().toISOString();
|
|
824
|
+
mkdirSync(join(cwd, '.dualbrain'), { recursive: true });
|
|
825
|
+
writeFileSync(metricsPath, JSON.stringify(metrics, null, 2) + '\n');
|
|
826
|
+
} catch { /* non-blocking */ }
|
|
827
|
+
}
|
|
828
|
+
|
|
651
829
|
// ─── Main entry point ─────────────────────────────────────────────────────────
|
|
652
830
|
|
|
653
831
|
/**
|
|
@@ -1070,7 +1248,49 @@ export async function runPipeline(trigger, prompt, options = {}) {
|
|
|
1070
1248
|
}
|
|
1071
1249
|
}
|
|
1072
1250
|
|
|
1073
|
-
|
|
1251
|
+
let decision = { ...run.plan._decision };
|
|
1252
|
+
|
|
1253
|
+
// ── Pre-dispatch think (Position 1: context intelligence) ────────────────
|
|
1254
|
+
// For tier-2+ non-trivial tasks with decision confidence < 0.9, spawn a
|
|
1255
|
+
// cheap sonnet think agent to produce a refined work spec before the real
|
|
1256
|
+
// dispatch. Non-blocking — if it fails or confidence is low, proceed as-is.
|
|
1257
|
+
{
|
|
1258
|
+
const thinkRefinement = await preDispatchThink(
|
|
1259
|
+
effectivePrompt,
|
|
1260
|
+
files,
|
|
1261
|
+
decision,
|
|
1262
|
+
cwd,
|
|
1263
|
+
run.context?.profile ?? {},
|
|
1264
|
+
{ log, _skipPreDispatchThink: options._skipPreDispatchThink }
|
|
1265
|
+
);
|
|
1266
|
+
if (thinkRefinement.refined) {
|
|
1267
|
+
// Mutate locals so both collab and direct paths use the refined inputs
|
|
1268
|
+
// (effectivePrompt is const — store refinement in a mutable local)
|
|
1269
|
+
run._thinkRefinedPrompt = thinkRefinement.prompt;
|
|
1270
|
+
run._thinkRefinedFiles = thinkRefinement.files;
|
|
1271
|
+
decision = thinkRefinement.decision;
|
|
1272
|
+
|
|
1273
|
+
// Cascade: if think agent is highly confident and task is simple, downgrade worker model
|
|
1274
|
+
if (thinkRefinement.decision) {
|
|
1275
|
+
const thinkConf = thinkRefinement.confidence || 0;
|
|
1276
|
+
const currentModel = decision.model || 'sonnet';
|
|
1277
|
+
if (thinkConf >= 0.9 && currentModel !== 'haiku') {
|
|
1278
|
+
// High confidence from thinker = clear spec = cheaper model can execute
|
|
1279
|
+
const prevModel = decision.model;
|
|
1280
|
+
decision.model = 'haiku';
|
|
1281
|
+
if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded ${prevModel || 'sonnet'} to haiku\n`);
|
|
1282
|
+
} else if (thinkConf >= 0.75 && currentModel === 'opus') {
|
|
1283
|
+
// Moderate confidence but spec is clear enough for sonnet
|
|
1284
|
+
decision.model = 'sonnet';
|
|
1285
|
+
if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded opus to sonnet\n`);
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
// Resolve the (possibly refined) prompt and file list for dispatch
|
|
1292
|
+
const dispatchPrompt = run._thinkRefinedPrompt ?? effectivePrompt;
|
|
1293
|
+
const dispatchFiles = run._thinkRefinedFiles ?? files;
|
|
1074
1294
|
|
|
1075
1295
|
// ── HEAD judgment injection into agent prompts ─────────────────────────────
|
|
1076
1296
|
// HEAD's obligations, noticings, and uncertainties flow to the work agent
|
|
@@ -1130,13 +1350,13 @@ export async function runPipeline(trigger, prompt, options = {}) {
|
|
|
1130
1350
|
|
|
1131
1351
|
// Inject collaboration context + HEAD judgment into prompt
|
|
1132
1352
|
const collabContext = collab.buildAgentContext(session, primaryId);
|
|
1133
|
-
const promptParts = [collabContext, headJudgmentBlock,
|
|
1353
|
+
const promptParts = [collabContext, headJudgmentBlock, dispatchPrompt].filter(Boolean);
|
|
1134
1354
|
const collabPrompt = promptParts.join('\n\n');
|
|
1135
1355
|
|
|
1136
1356
|
run.result = await dispatch({
|
|
1137
1357
|
decision,
|
|
1138
1358
|
prompt: collabPrompt,
|
|
1139
|
-
files,
|
|
1359
|
+
files: dispatchFiles,
|
|
1140
1360
|
cwd,
|
|
1141
1361
|
dryRun: false,
|
|
1142
1362
|
verbose,
|
|
@@ -1192,13 +1412,13 @@ export async function runPipeline(trigger, prompt, options = {}) {
|
|
|
1192
1412
|
try { collab.persistEvents(session, cwd); } catch {}
|
|
1193
1413
|
} else {
|
|
1194
1414
|
const directPrompt = headJudgmentBlock
|
|
1195
|
-
? `${headJudgmentBlock}\n\n${
|
|
1196
|
-
:
|
|
1415
|
+
? `${headJudgmentBlock}\n\n${dispatchPrompt}`
|
|
1416
|
+
: dispatchPrompt;
|
|
1197
1417
|
|
|
1198
1418
|
run.result = await dispatch({
|
|
1199
1419
|
decision,
|
|
1200
1420
|
prompt: directPrompt,
|
|
1201
|
-
files,
|
|
1421
|
+
files: dispatchFiles,
|
|
1202
1422
|
cwd,
|
|
1203
1423
|
dryRun: false,
|
|
1204
1424
|
verbose,
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
// routing-advisor.mjs — EMA + epsilon-greedy routing advisor
|
|
2
|
+
// Learns which model works best for which task type from outcome signals.
|
|
3
|
+
|
|
4
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync } from 'node:fs';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const ALPHA = 0.3;
|
|
8
|
+
const MIN_EPSILON = 0.1;
|
|
9
|
+
const MIN_OBSERVATIONS = 5;
|
|
10
|
+
const PRIOR_WEIGHT = 5;
|
|
11
|
+
|
|
12
|
+
const STATIC_PRIORS = {
|
|
13
|
+
'search:haiku': 0.85, 'search:sonnet': 0.70, 'search:opus': 0.50,
|
|
14
|
+
'execute:haiku': 0.55, 'execute:sonnet': 0.80, 'execute:opus': 0.85,
|
|
15
|
+
'think:haiku': 0.30, 'think:sonnet': 0.70, 'think:opus': 0.90,
|
|
16
|
+
'review:haiku': 0.40, 'review:sonnet': 0.75, 'review:opus': 0.85,
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
const VALID_MODELS = {
|
|
20
|
+
search: ['haiku', 'sonnet'],
|
|
21
|
+
execute: ['haiku', 'sonnet', 'opus'],
|
|
22
|
+
think: ['sonnet', 'opus'],
|
|
23
|
+
review: ['sonnet', 'opus'],
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
function stateFile(cwd) { return join(cwd || process.cwd(), '.dualbrain', 'routing-state.json'); }
|
|
27
|
+
|
|
28
|
+
function loadState(cwd) {
|
|
29
|
+
try {
|
|
30
|
+
const p = stateFile(cwd);
|
|
31
|
+
return existsSync(p) ? JSON.parse(readFileSync(p, 'utf8')) : {};
|
|
32
|
+
} catch { return {}; }
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function saveState(state, cwd) {
|
|
36
|
+
try {
|
|
37
|
+
const dir = join(cwd || process.cwd(), '.dualbrain');
|
|
38
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
39
|
+
const p = stateFile(cwd), tmp = p + '.tmp';
|
|
40
|
+
writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf8');
|
|
41
|
+
renameSync(tmp, p);
|
|
42
|
+
} catch { /* non-throwing */ }
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const staticPrior = (tier, model) => STATIC_PRIORS[`${tier}:${model}`] ?? 0.5;
|
|
46
|
+
const cellObs = (state, key) => Object.values(state[key] ?? {}).reduce((s, m) => s + (m.observations ?? 0), 0);
|
|
47
|
+
const blended = (ema, n, tier, model) =>
|
|
48
|
+
(n / (n + PRIOR_WEIGHT)) * ema + (PRIOR_WEIGHT / (n + PRIOR_WEIGHT)) * staticPrior(tier, model);
|
|
49
|
+
|
|
50
|
+
// taskProfile: { intent, tier, risk, files?, complexity? }
|
|
51
|
+
// Returns: { model, reason, confidence, explored }
|
|
52
|
+
export function adviseModel(taskProfile, cwd) {
|
|
53
|
+
try {
|
|
54
|
+
const { tier, intent } = taskProfile ?? {};
|
|
55
|
+
const validTier = tier && VALID_MODELS[tier] ? tier : 'execute';
|
|
56
|
+
const cellKey = `${validTier}:${intent ?? 'implement'}`;
|
|
57
|
+
const models = VALID_MODELS[validTier];
|
|
58
|
+
|
|
59
|
+
const state = loadState(cwd);
|
|
60
|
+
const totalObs = cellObs(state, cellKey);
|
|
61
|
+
|
|
62
|
+
if (totalObs < MIN_OBSERVATIONS) {
|
|
63
|
+
// Heuristic: pick highest static prior
|
|
64
|
+
const best = models.reduce((a, b) => staticPrior(validTier, a) >= staticPrior(validTier, b) ? a : b);
|
|
65
|
+
return { model: best, reason: 'insufficient data, using heuristic', confidence: 0.3, explored: false };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const epsilon = Math.max(MIN_EPSILON, 0.5 * Math.pow(0.9, totalObs));
|
|
69
|
+
const explored = Math.random() < epsilon;
|
|
70
|
+
|
|
71
|
+
if (explored) {
|
|
72
|
+
const model = models[Math.floor(Math.random() * models.length)];
|
|
73
|
+
return { model, reason: 'exploration', confidence: epsilon, explored: true };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Exploitation: pick highest blended score
|
|
77
|
+
const cell = state[cellKey] ?? {};
|
|
78
|
+
let bestModel = models[0];
|
|
79
|
+
let bestScore = -Infinity;
|
|
80
|
+
for (const m of models) {
|
|
81
|
+
const entry = cell[m];
|
|
82
|
+
const ema = entry?.ema ?? staticPrior(validTier, m);
|
|
83
|
+
const n = entry?.observations ?? 0;
|
|
84
|
+
const score = blended(ema, n, validTier, m);
|
|
85
|
+
if (score > bestScore) { bestScore = score; bestModel = m; }
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return { model: bestModel, reason: 'exploitation', confidence: 1 - epsilon, explored: false };
|
|
89
|
+
} catch {
|
|
90
|
+
return { model: 'sonnet', reason: 'error fallback', confidence: 0.1, explored: false };
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// reward: number in [0, 1]
|
|
95
|
+
export function recordReward(cellKey, model, reward, cwd) {
|
|
96
|
+
try {
|
|
97
|
+
const state = loadState(cwd);
|
|
98
|
+
if (!state[cellKey]) state[cellKey] = {};
|
|
99
|
+
const entry = state[cellKey][model] ?? { ema: reward, observations: 0 };
|
|
100
|
+
entry.ema = ALPHA * reward + (1 - ALPHA) * entry.ema;
|
|
101
|
+
entry.observations = (entry.observations ?? 0) + 1;
|
|
102
|
+
entry.lastUpdated = new Date().toISOString();
|
|
103
|
+
entry.lastReward = reward;
|
|
104
|
+
state[cellKey][model] = entry;
|
|
105
|
+
saveState(state, cwd);
|
|
106
|
+
} catch {
|
|
107
|
+
// non-throwing
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export function getRoutingStats(cwd) {
|
|
112
|
+
try {
|
|
113
|
+
const state = loadState(cwd);
|
|
114
|
+
const cells = {}, flat = [];
|
|
115
|
+
let totalObservations = 0;
|
|
116
|
+
for (const [cellKey, models] of Object.entries(state)) {
|
|
117
|
+
cells[cellKey] ??= {};
|
|
118
|
+
for (const [model, entry] of Object.entries(models)) {
|
|
119
|
+
const obs = entry.observations ?? 0;
|
|
120
|
+
cells[cellKey][model] = { ema: entry.ema, observations: obs };
|
|
121
|
+
totalObservations += obs;
|
|
122
|
+
flat.push({ cell: cellKey, model, ema: entry.ema, observations: obs });
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
flat.sort((a, b) => b.ema - a.ema);
|
|
126
|
+
return { cells, totalObservations, topPerformers: flat.slice(0, 5), worstPerformers: flat.slice(-5).reverse() };
|
|
127
|
+
} catch {
|
|
128
|
+
return { cells: {}, totalObservations: 0, topPerformers: [], worstPerformers: [] };
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export function resetAdvisor(cwd) {
|
|
133
|
+
try {
|
|
134
|
+
saveState({}, cwd);
|
|
135
|
+
} catch {
|
|
136
|
+
// non-throwing
|
|
137
|
+
}
|
|
138
|
+
}
|
package/src/signal.mjs
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// signal.mjs — Compound outcome signal scoring
|
|
2
|
+
// Combines multiple weak signals into one reliable reward score.
|
|
3
|
+
|
|
4
|
+
import { existsSync } from 'node:fs';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
import { execSync } from 'node:child_process';
|
|
7
|
+
|
|
8
|
+
export const EXPECTED_DURATION_MS = { search: 15000, execute: 45000, think: 30000, review: 40000 };
|
|
9
|
+
|
|
10
|
+
export function scoreDurationRatio(durationMs, tier) {
|
|
11
|
+
try {
|
|
12
|
+
const expected = EXPECTED_DURATION_MS[tier] ?? EXPECTED_DURATION_MS.execute;
|
|
13
|
+
const ratio = durationMs / expected;
|
|
14
|
+
if (ratio >= 0.5 && ratio <= 1.5) return 1.0;
|
|
15
|
+
if (ratio < 0.2) return 0.5;
|
|
16
|
+
if (ratio > 3.0) return 0.3;
|
|
17
|
+
if (ratio < 0.5) return 0.5 + ((ratio - 0.2) / (0.5 - 0.2)) * 0.5;
|
|
18
|
+
// ratio 1.5–3.0
|
|
19
|
+
return 1.0 - ((ratio - 1.5) / (3.0 - 1.5)) * 0.7;
|
|
20
|
+
} catch {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function measureFileSurvival(outcome, cwd) {
|
|
26
|
+
try {
|
|
27
|
+
const files = Array.isArray(outcome.filesChanged)
|
|
28
|
+
? outcome.filesChanged
|
|
29
|
+
: [];
|
|
30
|
+
if (files.length === 0) return 1.0;
|
|
31
|
+
|
|
32
|
+
let changed;
|
|
33
|
+
try {
|
|
34
|
+
changed = new Set(
|
|
35
|
+
execSync('git diff --name-only', { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] })
|
|
36
|
+
.split('\n')
|
|
37
|
+
.map(f => f.trim())
|
|
38
|
+
.filter(Boolean)
|
|
39
|
+
);
|
|
40
|
+
} catch {
|
|
41
|
+
changed = new Set();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const survived = files.filter(f => {
|
|
45
|
+
const abs = join(cwd, f);
|
|
46
|
+
return existsSync(abs) && !changed.has(f);
|
|
47
|
+
});
|
|
48
|
+
return survived.length / files.length;
|
|
49
|
+
} catch {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function scoreOutcome(outcome, context = {}) {
|
|
55
|
+
try {
|
|
56
|
+
const tier = outcome.tier ?? 'execute';
|
|
57
|
+
const signals = [];
|
|
58
|
+
|
|
59
|
+
// Signal 1: exit success (weight 0.3)
|
|
60
|
+
let exitVal;
|
|
61
|
+
if (outcome.success === true) exitVal = 1.0;
|
|
62
|
+
else if (outcome.status === 'partial') exitVal = 0.4;
|
|
63
|
+
else exitVal = 0.0;
|
|
64
|
+
signals.push({ name: 'exitSuccess', value: exitVal, weight: 0.3 });
|
|
65
|
+
|
|
66
|
+
// Signal 2: duration ratio (weight 0.25)
|
|
67
|
+
const durationMs = outcome.durationMs ?? 0;
|
|
68
|
+
const durVal = durationMs > 0 ? scoreDurationRatio(durationMs, tier) : null;
|
|
69
|
+
signals.push({ name: 'durationRatio', value: durVal, weight: 0.25 });
|
|
70
|
+
|
|
71
|
+
// Signal 3: token efficiency (weight 0.25)
|
|
72
|
+
let effVal = null;
|
|
73
|
+
const filesChanged = outcome.filesChanged ?? 0;
|
|
74
|
+
const fileCount = typeof filesChanged === 'number' ? filesChanged : filesChanged.length;
|
|
75
|
+
if (!(fileCount === 0 && tier === 'think')) {
|
|
76
|
+
const tokensUsed =
|
|
77
|
+
outcome.tokensUsed?.output ??
|
|
78
|
+
(durationMs > 0 ? Math.round(durationMs / 100) : null);
|
|
79
|
+
if (tokensUsed !== null) {
|
|
80
|
+
const efficiency = fileCount / Math.max(1, tokensUsed / 1000);
|
|
81
|
+
if (efficiency > 2) effVal = 1.0;
|
|
82
|
+
else if (efficiency >= 0.5) effVal = 0.5 + ((efficiency - 0.5) / 1.5) * 0.5;
|
|
83
|
+
else if (efficiency < 0.1) effVal = 0.2;
|
|
84
|
+
else effVal = 0.2 + ((efficiency - 0.1) / 0.4) * 0.3;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
signals.push({ name: 'tokenEfficiency', value: effVal, weight: 0.25 });
|
|
88
|
+
|
|
89
|
+
// Signal 4: file survival (weight 0.2) — delayed, may be null
|
|
90
|
+
const survivalVal = context.fileSurvival ?? null;
|
|
91
|
+
signals.push({ name: 'fileSurvival', value: survivalVal, weight: 0.2 });
|
|
92
|
+
|
|
93
|
+
// Compound score with weight redistribution
|
|
94
|
+
const active = signals.filter(s => s.value !== null);
|
|
95
|
+
const totalWeight = active.reduce((sum, s) => sum + s.weight, 0);
|
|
96
|
+
const reward = totalWeight > 0
|
|
97
|
+
? active.reduce((sum, s) => sum + (s.value * s.weight / totalWeight), 0)
|
|
98
|
+
: 0;
|
|
99
|
+
const confidence = totalWeight;
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
reward: Math.min(1, Math.max(0, reward)),
|
|
103
|
+
confidence: Math.min(1, confidence),
|
|
104
|
+
signals: {
|
|
105
|
+
exitSuccess: exitVal,
|
|
106
|
+
durationRatio: durVal,
|
|
107
|
+
tokenEfficiency: effVal,
|
|
108
|
+
fileSurvival: survivalVal,
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
} catch {
|
|
112
|
+
return { reward: 0, confidence: 0, signals: { exitSuccess: false, durationRatio: null, tokenEfficiency: null, fileSurvival: null } };
|
|
113
|
+
}
|
|
114
|
+
}
|