dual-brain 0.2.24 → 0.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -2
- package/src/outcome.mjs +73 -1
- package/src/pipeline.mjs +60 -5
- package/src/routing-advisor.mjs +138 -0
- package/src/signal.mjs +114 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dual-brain",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.25",
|
|
4
4
|
"description": "AI orchestration across Claude + OpenAI subscriptions — smart routing, budget awareness, and dual-brain collaboration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -47,7 +47,9 @@
|
|
|
47
47
|
"./envelope": "./src/envelope.mjs",
|
|
48
48
|
"./session-lock": "./src/session-lock.mjs",
|
|
49
49
|
"./governance": "./src/governance.mjs",
|
|
50
|
-
"./context-intel": "./src/context-intel.mjs"
|
|
50
|
+
"./context-intel": "./src/context-intel.mjs",
|
|
51
|
+
"./signal": "./src/signal.mjs",
|
|
52
|
+
"./routing-advisor": "./src/routing-advisor.mjs"
|
|
51
53
|
},
|
|
52
54
|
"keywords": [
|
|
53
55
|
"claude-code",
|
|
@@ -134,6 +136,8 @@
|
|
|
134
136
|
"src/session-lock.mjs",
|
|
135
137
|
"src/governance.mjs",
|
|
136
138
|
"src/context-intel.mjs",
|
|
139
|
+
"src/signal.mjs",
|
|
140
|
+
"src/routing-advisor.mjs",
|
|
137
141
|
"bin/*.mjs",
|
|
138
142
|
"hooks/enforce-tier.mjs",
|
|
139
143
|
"hooks/cost-logger.mjs",
|
package/src/outcome.mjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync } from 'fs';
|
|
1
|
+
import { mkdirSync, appendFileSync, writeFileSync, readFileSync, existsSync, readdirSync } from 'fs';
|
|
2
2
|
import { join } from 'path';
|
|
3
3
|
import { randomUUID } from 'crypto';
|
|
4
|
+
import { execSync } from 'child_process';
|
|
4
5
|
|
|
5
6
|
const STOP_WORDS = new Set([
|
|
6
7
|
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'from',
|
|
@@ -204,6 +205,77 @@ export async function getRelevantOutcomes(prompt, files = [], cwd, options = {})
|
|
|
204
205
|
}
|
|
205
206
|
}
|
|
206
207
|
|
|
208
|
+
export async function checkFileSurvival(cwd) {
|
|
209
|
+
try {
|
|
210
|
+
const dir = join(cwd, '.dualbrain', 'outcomes');
|
|
211
|
+
if (!existsSync(dir)) return [];
|
|
212
|
+
|
|
213
|
+
// Collect up to the last 20 individual outcome JSON files
|
|
214
|
+
let files;
|
|
215
|
+
try {
|
|
216
|
+
files = readdirSync(dir)
|
|
217
|
+
.filter(f => f.startsWith('outcome_') && f.endsWith('.json'))
|
|
218
|
+
.sort()
|
|
219
|
+
.slice(-20);
|
|
220
|
+
} catch {
|
|
221
|
+
return [];
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Get current git-modified files (best-effort)
|
|
225
|
+
let modifiedFiles = new Set();
|
|
226
|
+
try {
|
|
227
|
+
const gitOut = execSync('git diff --name-only', { cwd, stdio: ['ignore', 'pipe', 'pipe'] }).toString();
|
|
228
|
+
for (const f of gitOut.split('\n').map(l => l.trim()).filter(Boolean)) {
|
|
229
|
+
modifiedFiles.add(f);
|
|
230
|
+
modifiedFiles.add(join(cwd, f));
|
|
231
|
+
}
|
|
232
|
+
} catch {
|
|
233
|
+
// git unavailable — proceed without modified-file check
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const scored = [];
|
|
237
|
+
|
|
238
|
+
for (const fname of files) {
|
|
239
|
+
const fpath = join(dir, fname);
|
|
240
|
+
let record;
|
|
241
|
+
try {
|
|
242
|
+
record = JSON.parse(readFileSync(fpath, 'utf8'));
|
|
243
|
+
} catch {
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Skip if already scored or no filesChanged list
|
|
248
|
+
if (record.survivalScore !== undefined) continue;
|
|
249
|
+
const changedFiles = record.result?.filesChanged;
|
|
250
|
+
if (!Array.isArray(changedFiles) || changedFiles.length === 0) continue;
|
|
251
|
+
|
|
252
|
+
let survived = 0;
|
|
253
|
+
for (const f of changedFiles) {
|
|
254
|
+
const absPath = f.startsWith('/') ? f : join(cwd, f);
|
|
255
|
+
const exists = existsSync(absPath);
|
|
256
|
+
const modified = modifiedFiles.has(f) || modifiedFiles.has(absPath);
|
|
257
|
+
if (exists && !modified) survived++;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const survivalScore = survived / changedFiles.length;
|
|
261
|
+
record.survivalScore = survivalScore;
|
|
262
|
+
|
|
263
|
+
try {
|
|
264
|
+
writeFileSync(fpath, JSON.stringify(record, null, 2), 'utf8');
|
|
265
|
+
} catch {
|
|
266
|
+
// write failed — skip
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
scored.push({ id: record.id, survivalScore });
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return scored;
|
|
274
|
+
} catch {
|
|
275
|
+
return [];
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
207
279
|
export async function getOutcomeStats(cwd, days = 7) {
|
|
208
280
|
try {
|
|
209
281
|
const allFiles = last7DaysFiles(cwd).slice(0, days);
|
package/src/pipeline.mjs
CHANGED
|
@@ -10,7 +10,7 @@ import { detectTask } from './detect.mjs';
|
|
|
10
10
|
import { decideRoute, getWorkStyle, WORK_STYLES } from './decide.mjs';
|
|
11
11
|
import { dispatch } from './dispatch.mjs';
|
|
12
12
|
import { loadProfile } from './profile.mjs';
|
|
13
|
-
import { mkdirSync, writeFileSync } from 'node:fs';
|
|
13
|
+
import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
|
|
14
14
|
import { join } from 'node:path';
|
|
15
15
|
import { buildContextPack as buildContextPackIntel } from './context.mjs';
|
|
16
16
|
import { compilePacket } from './context-intel.mjs';
|
|
@@ -708,6 +708,18 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
|
|
|
708
708
|
// profile unavailable — proceed
|
|
709
709
|
}
|
|
710
710
|
|
|
711
|
+
// Auto-disable if ROI is bad (< 30% hit rate after 10+ observations)
|
|
712
|
+
{
|
|
713
|
+
const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
|
|
714
|
+
let metrics = { hits: 0, misses: 0, totalTokens: 0 };
|
|
715
|
+
try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
|
|
716
|
+
if (metrics.hits + metrics.misses >= 10 && metrics.hits / (metrics.hits + metrics.misses) < 0.3) {
|
|
717
|
+
const verbose = opts.verbose ?? false;
|
|
718
|
+
if (verbose) process.stderr.write('[dual-brain] pre-dispatch think disabled: hit rate below 30%\n');
|
|
719
|
+
return { refined: false, reason: 'think ROI too low, auto-disabled' };
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
|
|
711
723
|
try {
|
|
712
724
|
log('[dual-brain] pre-dispatch think: refining work spec...');
|
|
713
725
|
|
|
@@ -756,12 +768,14 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
|
|
|
756
768
|
if (!parsed || typeof parsed.confidence !== 'number' || parsed.confidence <= 0.7) {
|
|
757
769
|
const reason = !parsed ? 'unparseable response' : `confidence ${parsed.confidence} <= 0.7`;
|
|
758
770
|
log(`[dual-brain] pre-dispatch think: skipped (${reason})`);
|
|
771
|
+
_recordThinkMetrics(false, cwd);
|
|
759
772
|
return { refined: false };
|
|
760
773
|
}
|
|
761
774
|
|
|
762
775
|
const ws = parsed.workSpec;
|
|
763
776
|
if (!ws || !ws.objective) {
|
|
764
777
|
log('[dual-brain] pre-dispatch think: skipped (no workSpec.objective)');
|
|
778
|
+
_recordThinkMetrics(false, cwd);
|
|
765
779
|
return { refined: false };
|
|
766
780
|
}
|
|
767
781
|
|
|
@@ -774,19 +788,44 @@ async function preDispatchThink(prompt, files, decision, cwd, profile, opts = {}
|
|
|
774
788
|
|
|
775
789
|
log(`[dual-brain] think refined: "${newObjective.slice(0, 60)}..." (confidence: ${parsed.confidence})`);
|
|
776
790
|
|
|
791
|
+
_recordThinkMetrics(true, cwd);
|
|
777
792
|
return {
|
|
778
|
-
refined:
|
|
779
|
-
prompt:
|
|
780
|
-
files:
|
|
781
|
-
decision:
|
|
793
|
+
refined: true,
|
|
794
|
+
prompt: newObjective,
|
|
795
|
+
files: newFiles,
|
|
796
|
+
decision: newDecision,
|
|
797
|
+
confidence: parsed.confidence,
|
|
782
798
|
};
|
|
783
799
|
} catch (err) {
|
|
784
800
|
// Non-blocking on any failure
|
|
785
801
|
log(`[dual-brain] pre-dispatch think: skipped (error: ${err.message})`);
|
|
802
|
+
_recordThinkMetrics(false, cwd);
|
|
786
803
|
return { refined: false };
|
|
787
804
|
}
|
|
788
805
|
}
|
|
789
806
|
|
|
807
|
+
/**
|
|
808
|
+
* Record a think hit or miss into think-metrics.json (non-blocking).
|
|
809
|
+
* @param {boolean} hit — true if the think agent produced a usable refinement
|
|
810
|
+
* @param {string} cwd
|
|
811
|
+
*/
|
|
812
|
+
function _recordThinkMetrics(hit, cwd) {
|
|
813
|
+
try {
|
|
814
|
+
const metricsPath = join(cwd, '.dualbrain', 'think-metrics.json');
|
|
815
|
+
let metrics = { hits: 0, misses: 0, totalTokens: 0 };
|
|
816
|
+
try { metrics = JSON.parse(readFileSync(metricsPath, 'utf8')); } catch {}
|
|
817
|
+
if (hit) {
|
|
818
|
+
metrics.hits++;
|
|
819
|
+
} else {
|
|
820
|
+
metrics.misses++;
|
|
821
|
+
}
|
|
822
|
+
metrics.totalTokens += 3000; // budget per think call
|
|
823
|
+
metrics.lastUpdated = new Date().toISOString();
|
|
824
|
+
mkdirSync(join(cwd, '.dualbrain'), { recursive: true });
|
|
825
|
+
writeFileSync(metricsPath, JSON.stringify(metrics, null, 2) + '\n');
|
|
826
|
+
} catch { /* non-blocking */ }
|
|
827
|
+
}
|
|
828
|
+
|
|
790
829
|
// ─── Main entry point ─────────────────────────────────────────────────────────
|
|
791
830
|
|
|
792
831
|
/**
|
|
@@ -1230,6 +1269,22 @@ export async function runPipeline(trigger, prompt, options = {}) {
|
|
|
1230
1269
|
run._thinkRefinedPrompt = thinkRefinement.prompt;
|
|
1231
1270
|
run._thinkRefinedFiles = thinkRefinement.files;
|
|
1232
1271
|
decision = thinkRefinement.decision;
|
|
1272
|
+
|
|
1273
|
+
// Cascade: if think agent is highly confident and task is simple, downgrade worker model
|
|
1274
|
+
if (thinkRefinement.decision) {
|
|
1275
|
+
const thinkConf = thinkRefinement.confidence || 0;
|
|
1276
|
+
const currentModel = decision.model || 'sonnet';
|
|
1277
|
+
if (thinkConf >= 0.9 && currentModel !== 'haiku') {
|
|
1278
|
+
// High confidence from thinker = clear spec = cheaper model can execute
|
|
1279
|
+
const prevModel = decision.model;
|
|
1280
|
+
decision.model = 'haiku';
|
|
1281
|
+
if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded ${prevModel || 'sonnet'} to haiku\n`);
|
|
1282
|
+
} else if (thinkConf >= 0.75 && currentModel === 'opus') {
|
|
1283
|
+
// Moderate confidence but spec is clear enough for sonnet
|
|
1284
|
+
decision.model = 'sonnet';
|
|
1285
|
+
if (verbose || run?.verbose) process.stderr.write(`[dual-brain] cascade: think confidence ${thinkConf} → downgraded opus to sonnet\n`);
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1233
1288
|
}
|
|
1234
1289
|
}
|
|
1235
1290
|
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
// routing-advisor.mjs — EMA + epsilon-greedy routing advisor
|
|
2
|
+
// Learns which model works best for which task type from outcome signals.
|
|
3
|
+
|
|
4
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync } from 'node:fs';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const ALPHA = 0.3;
|
|
8
|
+
const MIN_EPSILON = 0.1;
|
|
9
|
+
const MIN_OBSERVATIONS = 5;
|
|
10
|
+
const PRIOR_WEIGHT = 5;
|
|
11
|
+
|
|
12
|
+
const STATIC_PRIORS = {
|
|
13
|
+
'search:haiku': 0.85, 'search:sonnet': 0.70, 'search:opus': 0.50,
|
|
14
|
+
'execute:haiku': 0.55, 'execute:sonnet': 0.80, 'execute:opus': 0.85,
|
|
15
|
+
'think:haiku': 0.30, 'think:sonnet': 0.70, 'think:opus': 0.90,
|
|
16
|
+
'review:haiku': 0.40, 'review:sonnet': 0.75, 'review:opus': 0.85,
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
const VALID_MODELS = {
|
|
20
|
+
search: ['haiku', 'sonnet'],
|
|
21
|
+
execute: ['haiku', 'sonnet', 'opus'],
|
|
22
|
+
think: ['sonnet', 'opus'],
|
|
23
|
+
review: ['sonnet', 'opus'],
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
function stateFile(cwd) { return join(cwd || process.cwd(), '.dualbrain', 'routing-state.json'); }
|
|
27
|
+
|
|
28
|
+
function loadState(cwd) {
|
|
29
|
+
try {
|
|
30
|
+
const p = stateFile(cwd);
|
|
31
|
+
return existsSync(p) ? JSON.parse(readFileSync(p, 'utf8')) : {};
|
|
32
|
+
} catch { return {}; }
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function saveState(state, cwd) {
|
|
36
|
+
try {
|
|
37
|
+
const dir = join(cwd || process.cwd(), '.dualbrain');
|
|
38
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
39
|
+
const p = stateFile(cwd), tmp = p + '.tmp';
|
|
40
|
+
writeFileSync(tmp, JSON.stringify(state, null, 2), 'utf8');
|
|
41
|
+
renameSync(tmp, p);
|
|
42
|
+
} catch { /* non-throwing */ }
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const staticPrior = (tier, model) => STATIC_PRIORS[`${tier}:${model}`] ?? 0.5;
|
|
46
|
+
const cellObs = (state, key) => Object.values(state[key] ?? {}).reduce((s, m) => s + (m.observations ?? 0), 0);
|
|
47
|
+
const blended = (ema, n, tier, model) =>
|
|
48
|
+
(n / (n + PRIOR_WEIGHT)) * ema + (PRIOR_WEIGHT / (n + PRIOR_WEIGHT)) * staticPrior(tier, model);
|
|
49
|
+
|
|
50
|
+
// taskProfile: { intent, tier, risk, files?, complexity? }
|
|
51
|
+
// Returns: { model, reason, confidence, explored }
|
|
52
|
+
export function adviseModel(taskProfile, cwd) {
|
|
53
|
+
try {
|
|
54
|
+
const { tier, intent } = taskProfile ?? {};
|
|
55
|
+
const validTier = tier && VALID_MODELS[tier] ? tier : 'execute';
|
|
56
|
+
const cellKey = `${validTier}:${intent ?? 'implement'}`;
|
|
57
|
+
const models = VALID_MODELS[validTier];
|
|
58
|
+
|
|
59
|
+
const state = loadState(cwd);
|
|
60
|
+
const totalObs = cellObs(state, cellKey);
|
|
61
|
+
|
|
62
|
+
if (totalObs < MIN_OBSERVATIONS) {
|
|
63
|
+
// Heuristic: pick highest static prior
|
|
64
|
+
const best = models.reduce((a, b) => staticPrior(validTier, a) >= staticPrior(validTier, b) ? a : b);
|
|
65
|
+
return { model: best, reason: 'insufficient data, using heuristic', confidence: 0.3, explored: false };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const epsilon = Math.max(MIN_EPSILON, 0.5 * Math.pow(0.9, totalObs));
|
|
69
|
+
const explored = Math.random() < epsilon;
|
|
70
|
+
|
|
71
|
+
if (explored) {
|
|
72
|
+
const model = models[Math.floor(Math.random() * models.length)];
|
|
73
|
+
return { model, reason: 'exploration', confidence: epsilon, explored: true };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Exploitation: pick highest blended score
|
|
77
|
+
const cell = state[cellKey] ?? {};
|
|
78
|
+
let bestModel = models[0];
|
|
79
|
+
let bestScore = -Infinity;
|
|
80
|
+
for (const m of models) {
|
|
81
|
+
const entry = cell[m];
|
|
82
|
+
const ema = entry?.ema ?? staticPrior(validTier, m);
|
|
83
|
+
const n = entry?.observations ?? 0;
|
|
84
|
+
const score = blended(ema, n, validTier, m);
|
|
85
|
+
if (score > bestScore) { bestScore = score; bestModel = m; }
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return { model: bestModel, reason: 'exploitation', confidence: 1 - epsilon, explored: false };
|
|
89
|
+
} catch {
|
|
90
|
+
return { model: 'sonnet', reason: 'error fallback', confidence: 0.1, explored: false };
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// reward: number in [0, 1]
|
|
95
|
+
export function recordReward(cellKey, model, reward, cwd) {
|
|
96
|
+
try {
|
|
97
|
+
const state = loadState(cwd);
|
|
98
|
+
if (!state[cellKey]) state[cellKey] = {};
|
|
99
|
+
const entry = state[cellKey][model] ?? { ema: reward, observations: 0 };
|
|
100
|
+
entry.ema = ALPHA * reward + (1 - ALPHA) * entry.ema;
|
|
101
|
+
entry.observations = (entry.observations ?? 0) + 1;
|
|
102
|
+
entry.lastUpdated = new Date().toISOString();
|
|
103
|
+
entry.lastReward = reward;
|
|
104
|
+
state[cellKey][model] = entry;
|
|
105
|
+
saveState(state, cwd);
|
|
106
|
+
} catch {
|
|
107
|
+
// non-throwing
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export function getRoutingStats(cwd) {
|
|
112
|
+
try {
|
|
113
|
+
const state = loadState(cwd);
|
|
114
|
+
const cells = {}, flat = [];
|
|
115
|
+
let totalObservations = 0;
|
|
116
|
+
for (const [cellKey, models] of Object.entries(state)) {
|
|
117
|
+
cells[cellKey] ??= {};
|
|
118
|
+
for (const [model, entry] of Object.entries(models)) {
|
|
119
|
+
const obs = entry.observations ?? 0;
|
|
120
|
+
cells[cellKey][model] = { ema: entry.ema, observations: obs };
|
|
121
|
+
totalObservations += obs;
|
|
122
|
+
flat.push({ cell: cellKey, model, ema: entry.ema, observations: obs });
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
flat.sort((a, b) => b.ema - a.ema);
|
|
126
|
+
return { cells, totalObservations, topPerformers: flat.slice(0, 5), worstPerformers: flat.slice(-5).reverse() };
|
|
127
|
+
} catch {
|
|
128
|
+
return { cells: {}, totalObservations: 0, topPerformers: [], worstPerformers: [] };
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export function resetAdvisor(cwd) {
|
|
133
|
+
try {
|
|
134
|
+
saveState({}, cwd);
|
|
135
|
+
} catch {
|
|
136
|
+
// non-throwing
|
|
137
|
+
}
|
|
138
|
+
}
|
package/src/signal.mjs
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
// signal.mjs — Compound outcome signal scoring
|
|
2
|
+
// Combines multiple weak signals into one reliable reward score.
|
|
3
|
+
|
|
4
|
+
import { existsSync } from 'node:fs';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
import { execSync } from 'node:child_process';
|
|
7
|
+
|
|
8
|
+
export const EXPECTED_DURATION_MS = { search: 15000, execute: 45000, think: 30000, review: 40000 };
|
|
9
|
+
|
|
10
|
+
export function scoreDurationRatio(durationMs, tier) {
|
|
11
|
+
try {
|
|
12
|
+
const expected = EXPECTED_DURATION_MS[tier] ?? EXPECTED_DURATION_MS.execute;
|
|
13
|
+
const ratio = durationMs / expected;
|
|
14
|
+
if (ratio >= 0.5 && ratio <= 1.5) return 1.0;
|
|
15
|
+
if (ratio < 0.2) return 0.5;
|
|
16
|
+
if (ratio > 3.0) return 0.3;
|
|
17
|
+
if (ratio < 0.5) return 0.5 + ((ratio - 0.2) / (0.5 - 0.2)) * 0.5;
|
|
18
|
+
// ratio 1.5–3.0
|
|
19
|
+
return 1.0 - ((ratio - 1.5) / (3.0 - 1.5)) * 0.7;
|
|
20
|
+
} catch {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function measureFileSurvival(outcome, cwd) {
|
|
26
|
+
try {
|
|
27
|
+
const files = Array.isArray(outcome.filesChanged)
|
|
28
|
+
? outcome.filesChanged
|
|
29
|
+
: [];
|
|
30
|
+
if (files.length === 0) return 1.0;
|
|
31
|
+
|
|
32
|
+
let changed;
|
|
33
|
+
try {
|
|
34
|
+
changed = new Set(
|
|
35
|
+
execSync('git diff --name-only', { cwd, encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] })
|
|
36
|
+
.split('\n')
|
|
37
|
+
.map(f => f.trim())
|
|
38
|
+
.filter(Boolean)
|
|
39
|
+
);
|
|
40
|
+
} catch {
|
|
41
|
+
changed = new Set();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const survived = files.filter(f => {
|
|
45
|
+
const abs = join(cwd, f);
|
|
46
|
+
return existsSync(abs) && !changed.has(f);
|
|
47
|
+
});
|
|
48
|
+
return survived.length / files.length;
|
|
49
|
+
} catch {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function scoreOutcome(outcome, context = {}) {
|
|
55
|
+
try {
|
|
56
|
+
const tier = outcome.tier ?? 'execute';
|
|
57
|
+
const signals = [];
|
|
58
|
+
|
|
59
|
+
// Signal 1: exit success (weight 0.3)
|
|
60
|
+
let exitVal;
|
|
61
|
+
if (outcome.success === true) exitVal = 1.0;
|
|
62
|
+
else if (outcome.status === 'partial') exitVal = 0.4;
|
|
63
|
+
else exitVal = 0.0;
|
|
64
|
+
signals.push({ name: 'exitSuccess', value: exitVal, weight: 0.3 });
|
|
65
|
+
|
|
66
|
+
// Signal 2: duration ratio (weight 0.25)
|
|
67
|
+
const durationMs = outcome.durationMs ?? 0;
|
|
68
|
+
const durVal = durationMs > 0 ? scoreDurationRatio(durationMs, tier) : null;
|
|
69
|
+
signals.push({ name: 'durationRatio', value: durVal, weight: 0.25 });
|
|
70
|
+
|
|
71
|
+
// Signal 3: token efficiency (weight 0.25)
|
|
72
|
+
let effVal = null;
|
|
73
|
+
const filesChanged = outcome.filesChanged ?? 0;
|
|
74
|
+
const fileCount = typeof filesChanged === 'number' ? filesChanged : filesChanged.length;
|
|
75
|
+
if (!(fileCount === 0 && tier === 'think')) {
|
|
76
|
+
const tokensUsed =
|
|
77
|
+
outcome.tokensUsed?.output ??
|
|
78
|
+
(durationMs > 0 ? Math.round(durationMs / 100) : null);
|
|
79
|
+
if (tokensUsed !== null) {
|
|
80
|
+
const efficiency = fileCount / Math.max(1, tokensUsed / 1000);
|
|
81
|
+
if (efficiency > 2) effVal = 1.0;
|
|
82
|
+
else if (efficiency >= 0.5) effVal = 0.5 + ((efficiency - 0.5) / 1.5) * 0.5;
|
|
83
|
+
else if (efficiency < 0.1) effVal = 0.2;
|
|
84
|
+
else effVal = 0.2 + ((efficiency - 0.1) / 0.4) * 0.3;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
signals.push({ name: 'tokenEfficiency', value: effVal, weight: 0.25 });
|
|
88
|
+
|
|
89
|
+
// Signal 4: file survival (weight 0.2) — delayed, may be null
|
|
90
|
+
const survivalVal = context.fileSurvival ?? null;
|
|
91
|
+
signals.push({ name: 'fileSurvival', value: survivalVal, weight: 0.2 });
|
|
92
|
+
|
|
93
|
+
// Compound score with weight redistribution
|
|
94
|
+
const active = signals.filter(s => s.value !== null);
|
|
95
|
+
const totalWeight = active.reduce((sum, s) => sum + s.weight, 0);
|
|
96
|
+
const reward = totalWeight > 0
|
|
97
|
+
? active.reduce((sum, s) => sum + (s.value * s.weight / totalWeight), 0)
|
|
98
|
+
: 0;
|
|
99
|
+
const confidence = totalWeight;
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
reward: Math.min(1, Math.max(0, reward)),
|
|
103
|
+
confidence: Math.min(1, confidence),
|
|
104
|
+
signals: {
|
|
105
|
+
exitSuccess: exitVal,
|
|
106
|
+
durationRatio: durVal,
|
|
107
|
+
tokenEfficiency: effVal,
|
|
108
|
+
fileSurvival: survivalVal,
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
} catch {
|
|
112
|
+
return { reward: 0, confidence: 0, signals: { exitSuccess: false, durationRatio: null, tokenEfficiency: null, fileSurvival: null } };
|
|
113
|
+
}
|
|
114
|
+
}
|