dual-brain 7.1.21 → 7.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/dual-brain.mjs +2580 -717
- package/hooks/budget-balancer.mjs +104 -266
- package/hooks/wave-orchestrator.mjs +29 -26
- package/package.json +14 -3
- package/scripts/verify-publish.mjs +26 -0
- package/src/context.mjs +389 -0
- package/src/decide.mjs +283 -60
- package/src/detect.mjs +133 -1
- package/src/dispatch.mjs +195 -30
- package/src/doctor.mjs +577 -0
- package/src/failure-memory.mjs +178 -0
- package/src/intelligence.mjs +423 -0
- package/src/nextstep.mjs +100 -0
- package/src/observer.mjs +241 -0
- package/src/outcome.mjs +256 -0
- package/src/pipeline.mjs +808 -0
- package/src/profile.mjs +357 -485
- package/src/receipt.mjs +131 -0
- package/src/session.mjs +358 -10
package/src/nextstep.mjs
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { execSync } from 'child_process';
|
|
2
|
+
import { existsSync } from 'fs';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
|
|
5
|
+
const AUTH_PAT = /\b(auth|credential|secret|token|password|encrypt|permission|oauth|jwt|api.?key)\b/i;
|
|
6
|
+
const TEST_PAT = /\b(test|spec|\.test\.|\.spec\.)\b/i;
|
|
7
|
+
|
|
8
|
+
function gitBranch(cwd) {
|
|
9
|
+
try { return execSync('git rev-parse --abbrev-ref HEAD', { cwd, stdio: ['ignore', 'pipe', 'ignore'] }).toString().trim(); }
|
|
10
|
+
catch { return null; }
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function packageVersionChanged(cwd, files) {
|
|
14
|
+
if (!files.some(f => f.includes('package.json'))) return false;
|
|
15
|
+
try { return execSync('git diff HEAD~1 HEAD -- package.json', { cwd, stdio: ['ignore', 'pipe', 'ignore'] }).toString().includes('"version"'); }
|
|
16
|
+
catch { return false; }
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function changelogExists(cwd) {
|
|
20
|
+
return ['CHANGELOG.md', 'CHANGELOG', 'changelog.md'].some(f => existsSync(join(cwd, f)));
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function step(priority, type, message, command, reason) {
|
|
24
|
+
return { priority, type, message, command, reason };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function dedup(steps) {
|
|
28
|
+
const seen = new Set();
|
|
29
|
+
return steps.filter(s => { if (seen.has(s.type)) return false; seen.add(s.type); return true; });
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export async function suggestNextSteps(completedTask = {}, outcome = {}, cwd = process.cwd()) {
|
|
33
|
+
try {
|
|
34
|
+
const { prompt = '', files = [], trigger } = completedTask;
|
|
35
|
+
const { success = false, filesChanged = [], error = '' } = outcome;
|
|
36
|
+
const steps = [];
|
|
37
|
+
const branch = gitBranch(cwd);
|
|
38
|
+
const onMain = !branch || branch === 'main' || branch === 'master';
|
|
39
|
+
const allFiles = [...files, ...filesChanged];
|
|
40
|
+
const hasAuth = allFiles.some(f => AUTH_PAT.test(f));
|
|
41
|
+
const hasTests = allFiles.some(f => TEST_PAT.test(f));
|
|
42
|
+
const n = filesChanged.length;
|
|
43
|
+
const fs = (count) => `${count} file${count !== 1 ? 's' : ''}`;
|
|
44
|
+
|
|
45
|
+
if (trigger === 'auto-commit') {
|
|
46
|
+
steps.push(!onMain && branch
|
|
47
|
+
? step(4, 'pr', `Open a pull request for branch "${branch}"`, `gh pr create --head ${branch}`, `On feature branch — changes need review before merging`)
|
|
48
|
+
: step(3, 'deploy', 'Deploy or tag a release', null, 'Committed to main — ready to ship or version'));
|
|
49
|
+
if (packageVersionChanged(cwd, filesChanged))
|
|
50
|
+
steps.push(step(4, 'publish', 'Publish the new package version to npm', 'npm publish', 'package.json version changed in this commit'));
|
|
51
|
+
if (changelogExists(cwd) && !filesChanged.some(f => /changelog/i.test(f)))
|
|
52
|
+
steps.push(step(2, 'changelog', 'Update CHANGELOG with this change', null, 'CHANGELOG exists but was not updated'));
|
|
53
|
+
|
|
54
|
+
} else if (trigger === 'review' || trigger === 'think') {
|
|
55
|
+
const issues = error || /issue|problem|fail|error|warn/i.test(prompt);
|
|
56
|
+
steps.push(issues
|
|
57
|
+
? step(5, 'fix', 'Fix the issues identified in the review', `dual-brain go "fix issues identified in review"`, 'Review found problems that need resolution')
|
|
58
|
+
: step(3, 'continue', 'Ship it — the review looks good', null, 'Review completed without critical findings'));
|
|
59
|
+
|
|
60
|
+
} else if (!success) {
|
|
61
|
+
steps.push(step(5, 'fix', 'Retry with higher reasoning depth', `dual-brain go --tier think "${prompt}"`, 'Task failed — escalating tier may resolve it'));
|
|
62
|
+
if (error && /test/i.test(error))
|
|
63
|
+
steps.push(step(4, 'test', 'Look at test output for clues', null, 'Error references tests — check output to understand the failure'));
|
|
64
|
+
steps.push(step(3, 'review', 'Try a different approach — dual-brain think', `node .claude/hooks/dual-brain-think.mjs --question "${prompt}"`, 'GPT perspective may surface a different solution'));
|
|
65
|
+
|
|
66
|
+
} else if (success && n > 0) {
|
|
67
|
+
if (!hasTests)
|
|
68
|
+
steps.push(step(5, 'test', `Run tests to verify the ${fs(n)} changed`, 'npm test', `${fs(n)} changed without test verification`));
|
|
69
|
+
if (hasAuth)
|
|
70
|
+
steps.push(step(5, 'review', 'Run a security review on auth/credential changes', `node .claude/hooks/dual-brain-think.mjs --question "Security review: ${prompt}"`, 'Auth or security-sensitive files were modified'));
|
|
71
|
+
if (n > 3)
|
|
72
|
+
steps.push(step(4, 'review', `Review the ${n}-file diff before committing`, 'git diff', `${n} files changed — quick diff review before committing`));
|
|
73
|
+
if (!onMain && branch)
|
|
74
|
+
steps.push(step(4, 'pr', `Open a pull request for branch "${branch}"`, `gh pr create --head ${branch}`, `Changes are on feature branch "${branch}" — ready for PR`));
|
|
75
|
+
if (hasTests)
|
|
76
|
+
steps.push(step(3, 'commit', 'Commit changes', 'git add -p && git commit', 'Tests passed — safe to commit'));
|
|
77
|
+
steps.push(step(2, 'continue', 'Check for edge cases in the changed code', null, 'Edge cases are often missed during implementation'));
|
|
78
|
+
if (changelogExists(cwd) && !filesChanged.some(f => /changelog/i.test(f)))
|
|
79
|
+
steps.push(step(2, 'changelog', 'Update CHANGELOG with this change', null, 'CHANGELOG exists but was not updated in this batch'));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const sorted = dedup(steps.sort((a, b) => b.priority - a.priority));
|
|
83
|
+
return {
|
|
84
|
+
steps: sorted,
|
|
85
|
+
topSuggestion: sorted.length > 0 ? `→ ${sorted[0].message}` : '→ Nothing urgent — task complete',
|
|
86
|
+
};
|
|
87
|
+
} catch {
|
|
88
|
+
return { steps: [], topSuggestion: '→ Task complete' };
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function formatNextSteps(steps, limit = 3) {
|
|
93
|
+
if (!steps?.length) return '';
|
|
94
|
+
return `📋 Next steps\n${steps.slice(0, limit).map((s, i) => ` ${i + 1}. ${s.message}`).join('\n')}`;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function getTopSuggestion(steps) {
|
|
98
|
+
if (!steps?.length) return '→ Task complete';
|
|
99
|
+
return `→ ${steps[0].message}`;
|
|
100
|
+
}
|
package/src/observer.mjs
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import { execSync } from 'child_process';
|
|
2
|
+
import { existsSync, readdirSync, readFileSync } from 'fs';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
|
|
5
|
+
const SEC_PATTERNS = /auth|login|password|token|secret|credential|session|jwt|oauth|permission|role|middleware/i;
|
|
6
|
+
const SOURCE_EXT = /\.(mjs|js|ts|py)$/;
|
|
7
|
+
|
|
8
|
+
function exec(cmd, cwd, timeout = 5000) {
|
|
9
|
+
try {
|
|
10
|
+
return execSync(cmd, { cwd, encoding: 'utf8', timeout, stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
11
|
+
} catch {
|
|
12
|
+
return '';
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function changedFiles(cwd) {
|
|
17
|
+
const output = exec('git diff --name-only HEAD 2>/dev/null || git diff --name-only', cwd);
|
|
18
|
+
return output ? output.split('\n').filter(Boolean) : [];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function checkSecurity(files) {
|
|
22
|
+
const hits = files.filter(f => SEC_PATTERNS.test(f));
|
|
23
|
+
if (!hits.length) return null;
|
|
24
|
+
return {
|
|
25
|
+
type: 'security-review',
|
|
26
|
+
priority: 'high',
|
|
27
|
+
message: 'Auth-related files changed — want a security review?',
|
|
28
|
+
action: 'dual-brain review',
|
|
29
|
+
files: hits,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function checkNoTests(files, cwd) {
|
|
34
|
+
const sources = files.filter(f => SOURCE_EXT.test(f));
|
|
35
|
+
if (!sources.length) return null;
|
|
36
|
+
|
|
37
|
+
const untested = sources.filter(f => {
|
|
38
|
+
const base = f.replace(SOURCE_EXT, '');
|
|
39
|
+
const dir = join(cwd, f.split('/').slice(0, -1).join('/'));
|
|
40
|
+
const name = f.split('/').pop().replace(SOURCE_EXT, '');
|
|
41
|
+
const candidates = ['test','spec'].flatMap(k =>
|
|
42
|
+
['mjs','js','ts'].flatMap(e => [
|
|
43
|
+
join(cwd, `${base}.${k}.${e}`),
|
|
44
|
+
join(dir, '__tests__', `${name}.${e}`),
|
|
45
|
+
])
|
|
46
|
+
);
|
|
47
|
+
return !candidates.some(c => existsSync(c));
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
if (!untested.length) return null;
|
|
51
|
+
return {
|
|
52
|
+
type: 'no-tests',
|
|
53
|
+
priority: 'medium',
|
|
54
|
+
message: `${untested.length} changed file${untested.length > 1 ? 's' : ''} have no tests`,
|
|
55
|
+
action: "dual-brain go 'add tests for changed files'",
|
|
56
|
+
files: untested,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function checkLargeDiff(cwd) {
|
|
61
|
+
const stat = exec('git diff --stat', cwd);
|
|
62
|
+
if (!stat) return null;
|
|
63
|
+
const match = stat.match(/(\d+) insertion|(\d+) deletion/g);
|
|
64
|
+
if (!match) return null;
|
|
65
|
+
const total = match.reduce((sum, m) => sum + parseInt(m), 0);
|
|
66
|
+
if (total <= 500) return null;
|
|
67
|
+
return {
|
|
68
|
+
type: 'large-diff',
|
|
69
|
+
priority: 'medium',
|
|
70
|
+
message: `Large uncommitted changes (${total} lines) — consider committing`,
|
|
71
|
+
action: "dual-brain go 'commit current changes'",
|
|
72
|
+
files: [],
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function checkStaleBranch(cwd, files) {
|
|
77
|
+
if (!files.length) return null;
|
|
78
|
+
const ts = exec('git log -1 --format=%ct', cwd);
|
|
79
|
+
if (!ts) return null;
|
|
80
|
+
const age = Date.now() / 1000 - parseInt(ts);
|
|
81
|
+
if (age < 86400) return null;
|
|
82
|
+
const hours = Math.round(age / 3600);
|
|
83
|
+
return {
|
|
84
|
+
type: 'stale-branch',
|
|
85
|
+
priority: 'low',
|
|
86
|
+
message: `Last commit was ${hours}h ago with uncommitted work`,
|
|
87
|
+
action: "dual-brain go 'commit current changes'",
|
|
88
|
+
files: [],
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function checkConflicts(cwd) {
|
|
93
|
+
const conflicted = exec('git diff --name-only --diff-filter=U', cwd);
|
|
94
|
+
if (!conflicted) return null;
|
|
95
|
+
const files = conflicted.split('\n').filter(Boolean);
|
|
96
|
+
if (!files.length) return null;
|
|
97
|
+
return {
|
|
98
|
+
type: 'conflict',
|
|
99
|
+
priority: 'high',
|
|
100
|
+
message: `${files.length} file${files.length > 1 ? 's' : ''} have merge conflicts`,
|
|
101
|
+
action: "dual-brain go 'resolve merge conflicts'",
|
|
102
|
+
files,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function checkUnfinishedWork(cwd) {
|
|
107
|
+
const outcomesDir = join(cwd, '.dualbrain', 'outcomes');
|
|
108
|
+
if (!existsSync(outcomesDir)) return null;
|
|
109
|
+
|
|
110
|
+
const cutoff = Date.now() - 86_400_000;
|
|
111
|
+
let failed = null;
|
|
112
|
+
|
|
113
|
+
try {
|
|
114
|
+
const files = readdirSync(outcomesDir).filter(f => f.endsWith('.jsonl')).sort().reverse();
|
|
115
|
+
for (const file of files) {
|
|
116
|
+
const lines = readFileSync(join(outcomesDir, file), 'utf8')
|
|
117
|
+
.split('\n').filter(Boolean);
|
|
118
|
+
for (const line of lines.reverse()) {
|
|
119
|
+
try {
|
|
120
|
+
const rec = JSON.parse(line);
|
|
121
|
+
if (rec.timestamp && rec.timestamp < cutoff) break;
|
|
122
|
+
if (rec.result && rec.result.success === false && rec.prompt) {
|
|
123
|
+
failed = rec;
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
} catch { /* skip */ }
|
|
127
|
+
}
|
|
128
|
+
if (failed) break;
|
|
129
|
+
}
|
|
130
|
+
} catch { return null; }
|
|
131
|
+
|
|
132
|
+
if (!failed) return null;
|
|
133
|
+
const prompt = failed.prompt.slice(0, 60);
|
|
134
|
+
return {
|
|
135
|
+
type: 'unfinished-work',
|
|
136
|
+
priority: 'medium',
|
|
137
|
+
message: `Last session had a failed task: '${prompt}' — resume?`,
|
|
138
|
+
action: `dual-brain go '${failed.prompt}'`,
|
|
139
|
+
files: [],
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async function checkFailingTests(cwd) {
|
|
144
|
+
const pkgPath = join(cwd, 'package.json');
|
|
145
|
+
if (!existsSync(pkgPath)) return null;
|
|
146
|
+
try {
|
|
147
|
+
const pkg = JSON.parse(readFileSync(pkgPath, 'utf8'));
|
|
148
|
+
if (!pkg.scripts?.test) return null;
|
|
149
|
+
} catch { return null; }
|
|
150
|
+
|
|
151
|
+
try {
|
|
152
|
+
execSync('npm test --silent 2>&1', { cwd, encoding: 'utf8', timeout: 30000, stdio: 'pipe' });
|
|
153
|
+
return null;
|
|
154
|
+
} catch {
|
|
155
|
+
return {
|
|
156
|
+
type: 'failing-tests',
|
|
157
|
+
priority: 'high',
|
|
158
|
+
message: 'Tests are failing — want me to investigate?',
|
|
159
|
+
action: "dual-brain go 'fix failing tests'",
|
|
160
|
+
files: [],
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function buildSummary(files, observations) {
|
|
166
|
+
const conflicts = observations.filter(o => o.type === 'conflict').length;
|
|
167
|
+
const hi = observations.filter(o => o.priority === 'high').length;
|
|
168
|
+
const parts = [];
|
|
169
|
+
if (files.length) parts.push(`${files.length} file${files.length > 1 ? 's' : ''} changed`);
|
|
170
|
+
else parts.push('no uncommitted changes');
|
|
171
|
+
if (conflicts) parts.push(`${conflicts} conflict${conflicts > 1 ? 's' : ''}`);
|
|
172
|
+
if (hi) parts.push(`${hi} high-priority suggestion${hi > 1 ? 's' : ''}`);
|
|
173
|
+
return parts.join(', ');
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export async function observe(cwd, options = {}) {
|
|
177
|
+
const observations = [];
|
|
178
|
+
try {
|
|
179
|
+
const files = changedFiles(cwd);
|
|
180
|
+
|
|
181
|
+
const sec = checkSecurity(files);
|
|
182
|
+
if (sec) observations.push(sec);
|
|
183
|
+
|
|
184
|
+
const conflicts = checkConflicts(cwd);
|
|
185
|
+
if (conflicts) observations.push(conflicts);
|
|
186
|
+
|
|
187
|
+
const noTests = checkNoTests(files, cwd);
|
|
188
|
+
if (noTests) observations.push(noTests);
|
|
189
|
+
|
|
190
|
+
const largeDiff = checkLargeDiff(cwd);
|
|
191
|
+
if (largeDiff) observations.push(largeDiff);
|
|
192
|
+
|
|
193
|
+
const stale = checkStaleBranch(cwd, files);
|
|
194
|
+
if (stale) observations.push(stale);
|
|
195
|
+
|
|
196
|
+
const unfinished = checkUnfinishedWork(cwd);
|
|
197
|
+
if (unfinished) observations.push(unfinished);
|
|
198
|
+
|
|
199
|
+
if (options.runTests) {
|
|
200
|
+
const failing = await checkFailingTests(cwd);
|
|
201
|
+
if (failing) observations.push(failing);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return { observations, summary: buildSummary(files, observations) };
|
|
205
|
+
} catch {
|
|
206
|
+
return { observations: [], summary: 'unable to observe repo state' };
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
export function formatObservations(observations) {
|
|
211
|
+
if (!observations.length) return '💡 Suggestions\n (none)';
|
|
212
|
+
const icon = { high: '🔴', medium: '🟡', low: '🟢' };
|
|
213
|
+
const lines = observations.map(o => ` ${icon[o.priority] || '⚪'} ${o.message}`);
|
|
214
|
+
return `💡 Suggestions\n${lines.join('\n')}`;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export async function getQuickState(cwd) {
|
|
218
|
+
try {
|
|
219
|
+
const files = changedFiles(cwd);
|
|
220
|
+
const observations = [];
|
|
221
|
+
|
|
222
|
+
const sec = checkSecurity(files);
|
|
223
|
+
if (sec) observations.push(sec);
|
|
224
|
+
|
|
225
|
+
const conflicts = checkConflicts(cwd);
|
|
226
|
+
if (conflicts) observations.push(conflicts);
|
|
227
|
+
|
|
228
|
+
const noTests = checkNoTests(files, cwd);
|
|
229
|
+
if (noTests) observations.push(noTests);
|
|
230
|
+
|
|
231
|
+
const largeDiff = checkLargeDiff(cwd);
|
|
232
|
+
if (largeDiff) observations.push(largeDiff);
|
|
233
|
+
|
|
234
|
+
const stale = checkStaleBranch(cwd, files);
|
|
235
|
+
if (stale) observations.push(stale);
|
|
236
|
+
|
|
237
|
+
return { observations, summary: buildSummary(files, observations) };
|
|
238
|
+
} catch {
|
|
239
|
+
return { observations: [], summary: 'unable to observe repo state' };
|
|
240
|
+
}
|
|
241
|
+
}
|
package/src/outcome.mjs
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import { mkdirSync, appendFileSync, readFileSync, existsSync } from 'fs';
|
|
2
|
+
import { join } from 'path';
|
|
3
|
+
import { randomUUID } from 'crypto';
|
|
4
|
+
|
|
5
|
+
const STOP_WORDS = new Set([
|
|
6
|
+
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'to', 'from',
|
|
7
|
+
'in', 'on', 'for', 'with', 'and', 'or', 'but', 'not', 'this', 'that', 'it',
|
|
8
|
+
]);
|
|
9
|
+
|
|
10
|
+
function outcomesDir(cwd) {
|
|
11
|
+
return join(cwd, '.dualbrain', 'outcomes');
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function todayFile(cwd) {
|
|
15
|
+
const date = new Date().toISOString().slice(0, 10);
|
|
16
|
+
return join(outcomesDir(cwd), `outcomes-${date}.jsonl`);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function ensureDir(cwd) {
|
|
20
|
+
mkdirSync(outcomesDir(cwd), { recursive: true });
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function readOutcomeFile(filePath) {
|
|
24
|
+
try {
|
|
25
|
+
return readFileSync(filePath, 'utf8')
|
|
26
|
+
.split('\n')
|
|
27
|
+
.filter(Boolean)
|
|
28
|
+
.flatMap(line => {
|
|
29
|
+
try { return [JSON.parse(line)]; } catch { return []; }
|
|
30
|
+
});
|
|
31
|
+
} catch {
|
|
32
|
+
return [];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function last7DaysFiles(cwd) {
|
|
37
|
+
const dir = outcomesDir(cwd);
|
|
38
|
+
const files = [];
|
|
39
|
+
for (let i = 0; i < 7; i++) {
|
|
40
|
+
const d = new Date(Date.now() - i * 86_400_000).toISOString().slice(0, 10);
|
|
41
|
+
const f = join(dir, `outcomes-${d}.jsonl`);
|
|
42
|
+
if (existsSync(f)) files.push(f);
|
|
43
|
+
}
|
|
44
|
+
return files;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function computeRoutingScore(plan, result, verification) {
|
|
48
|
+
let score = 3;
|
|
49
|
+
if (result.success && result.duration < 60_000) score += 1;
|
|
50
|
+
if (verification.filesVerified && verification.testsPassed === true) score += 1;
|
|
51
|
+
if (result.error) score -= 1;
|
|
52
|
+
if (result.duration > 180_000) score -= 1;
|
|
53
|
+
if ((plan.challengerPolicy === 'none' || !plan.challengerPolicy) && !result.success) score -= 2;
|
|
54
|
+
return Math.max(1, Math.min(5, score));
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function generateLessons(plan, result, verification) {
|
|
58
|
+
const lessons = [];
|
|
59
|
+
const noChallenger = !plan.challengerPolicy || plan.challengerPolicy === 'none';
|
|
60
|
+
|
|
61
|
+
if (noChallenger && !result.success) {
|
|
62
|
+
lessons.push('Task failed without challenger — consider escalating similar tasks');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (
|
|
66
|
+
plan.reasoningDepth === 'ultra' &&
|
|
67
|
+
result.duration < 60_000 &&
|
|
68
|
+
(plan.complexity === 'simple' || plan.complexity === 'low')
|
|
69
|
+
) {
|
|
70
|
+
lessons.push('Ultra reasoning unnecessary — task completed quickly at low complexity');
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (!result.success) {
|
|
74
|
+
const keywords = (plan.prompt || '')
|
|
75
|
+
.toLowerCase()
|
|
76
|
+
.split(/\s+/)
|
|
77
|
+
.filter(w => w.length > 3 && !STOP_WORDS.has(w))
|
|
78
|
+
.slice(0, 4)
|
|
79
|
+
.join(' ');
|
|
80
|
+
if (keywords) {
|
|
81
|
+
lessons.push(`Prior failure pattern: ${keywords} on ${plan.tier}`);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (!noChallenger && result.success && verification.filesVerified) {
|
|
86
|
+
lessons.push(`Challenger caught issues — keep challenger policy for ${plan.risk} risk`);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return lessons;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function recordOutcome(plan, result, verification, cwd) {
|
|
93
|
+
try {
|
|
94
|
+
ensureDir(cwd);
|
|
95
|
+
|
|
96
|
+
const routingScore = computeRoutingScore(plan, result, verification);
|
|
97
|
+
const lessons = generateLessons(plan, result, verification);
|
|
98
|
+
|
|
99
|
+
const record = {
|
|
100
|
+
id: randomUUID(),
|
|
101
|
+
timestamp: Date.now(),
|
|
102
|
+
prompt: plan.prompt ?? '',
|
|
103
|
+
tier: plan.tier ?? '',
|
|
104
|
+
primaryModel: plan.primaryModel ?? '',
|
|
105
|
+
reasoningDepth: plan.reasoningDepth ?? '',
|
|
106
|
+
challengerPolicy: plan.challengerPolicy ?? 'none',
|
|
107
|
+
risk: plan.risk ?? '',
|
|
108
|
+
result: {
|
|
109
|
+
success: result.success ?? false,
|
|
110
|
+
filesChanged: result.filesChanged ?? [],
|
|
111
|
+
duration: result.duration ?? 0,
|
|
112
|
+
error: result.error ?? null,
|
|
113
|
+
},
|
|
114
|
+
verification: {
|
|
115
|
+
filesVerified: verification.filesVerified ?? false,
|
|
116
|
+
testsRun: verification.testsRun ?? false,
|
|
117
|
+
testsPassed: verification.testsPassed ?? null,
|
|
118
|
+
},
|
|
119
|
+
routingScore,
|
|
120
|
+
lessons,
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
appendFileSync(todayFile(cwd), JSON.stringify(record) + '\n', 'utf8');
|
|
124
|
+
return record;
|
|
125
|
+
} catch {
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function tokenize(text) {
|
|
131
|
+
return (text || '')
|
|
132
|
+
.toLowerCase()
|
|
133
|
+
.split(/\W+/)
|
|
134
|
+
.filter(w => w.length > 3 && !STOP_WORDS.has(w));
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function promptOverlap(a, b) {
|
|
138
|
+
const wordsA = new Set(tokenize(a));
|
|
139
|
+
const wordsB = tokenize(b);
|
|
140
|
+
return wordsB.filter(w => wordsA.has(w)).length;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function fileOverlap(filesA = [], filesB = []) {
|
|
144
|
+
const setA = new Set(filesA.map(f => f.split('/').pop()));
|
|
145
|
+
return filesB.map(f => f.split('/').pop()).filter(f => setA.has(f)).length;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export async function getRelevantOutcomes(prompt, files = [], cwd, options = {}) {
|
|
149
|
+
try {
|
|
150
|
+
const allFiles = last7DaysFiles(cwd);
|
|
151
|
+
const outcomes = allFiles.flatMap(readOutcomeFile);
|
|
152
|
+
|
|
153
|
+
const scored = outcomes.map(o => {
|
|
154
|
+
let score = promptOverlap(prompt, o.prompt);
|
|
155
|
+
score += fileOverlap(files, o.result?.filesChanged ?? []);
|
|
156
|
+
return { o, score };
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
return scored
|
|
160
|
+
.filter(({ score }) => score >= 2)
|
|
161
|
+
.sort((a, b) => b.score - a.score)
|
|
162
|
+
.slice(0, 5)
|
|
163
|
+
.map(({ o, score }) => ({
|
|
164
|
+
id: o.id,
|
|
165
|
+
timestamp: o.timestamp,
|
|
166
|
+
prompt: o.prompt,
|
|
167
|
+
success: o.result?.success ?? false,
|
|
168
|
+
routingScore: o.routingScore,
|
|
169
|
+
lessons: o.lessons,
|
|
170
|
+
relevanceScore: score,
|
|
171
|
+
}));
|
|
172
|
+
} catch {
|
|
173
|
+
return [];
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export async function getOutcomeStats(cwd, days = 7) {
|
|
178
|
+
try {
|
|
179
|
+
const allFiles = last7DaysFiles(cwd).slice(0, days);
|
|
180
|
+
const outcomes = allFiles.flatMap(readOutcomeFile);
|
|
181
|
+
|
|
182
|
+
if (outcomes.length === 0) {
|
|
183
|
+
return {
|
|
184
|
+
totalTasks: 0,
|
|
185
|
+
successRate: 0,
|
|
186
|
+
avgRoutingScore: 0,
|
|
187
|
+
avgDuration: 0,
|
|
188
|
+
challengerHelpRate: 0,
|
|
189
|
+
topLessons: [],
|
|
190
|
+
modelBreakdown: {},
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const totalTasks = outcomes.length;
|
|
195
|
+
const successes = outcomes.filter(o => o.result?.success).length;
|
|
196
|
+
const successRate = successes / totalTasks;
|
|
197
|
+
|
|
198
|
+
const avgRoutingScore =
|
|
199
|
+
outcomes.reduce((sum, o) => sum + (o.routingScore ?? 3), 0) / totalTasks;
|
|
200
|
+
|
|
201
|
+
const avgDuration =
|
|
202
|
+
outcomes.reduce((sum, o) => sum + (o.result?.duration ?? 0), 0) / totalTasks;
|
|
203
|
+
|
|
204
|
+
const challengerUsed = outcomes.filter(
|
|
205
|
+
o => o.challengerPolicy && o.challengerPolicy !== 'none'
|
|
206
|
+
);
|
|
207
|
+
const challengerHelped = challengerUsed.filter(o => o.result?.success);
|
|
208
|
+
const challengerHelpRate =
|
|
209
|
+
challengerUsed.length > 0 ? challengerHelped.length / challengerUsed.length : 0;
|
|
210
|
+
|
|
211
|
+
const lessonCounts = {};
|
|
212
|
+
for (const o of outcomes) {
|
|
213
|
+
for (const lesson of o.lessons ?? []) {
|
|
214
|
+
lessonCounts[lesson] = (lessonCounts[lesson] ?? 0) + 1;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
const topLessons = Object.entries(lessonCounts)
|
|
218
|
+
.sort((a, b) => b[1] - a[1])
|
|
219
|
+
.slice(0, 5)
|
|
220
|
+
.map(([lesson]) => lesson);
|
|
221
|
+
|
|
222
|
+
const modelBreakdown = {};
|
|
223
|
+
for (const o of outcomes) {
|
|
224
|
+
const model = o.primaryModel;
|
|
225
|
+
if (!model) continue;
|
|
226
|
+
if (!modelBreakdown[model]) modelBreakdown[model] = { count: 0, successCount: 0 };
|
|
227
|
+
modelBreakdown[model].count += 1;
|
|
228
|
+
if (o.result?.success) modelBreakdown[model].successCount += 1;
|
|
229
|
+
}
|
|
230
|
+
for (const model of Object.keys(modelBreakdown)) {
|
|
231
|
+
const { count, successCount } = modelBreakdown[model];
|
|
232
|
+
modelBreakdown[model].successRate = count > 0 ? successCount / count : 0;
|
|
233
|
+
delete modelBreakdown[model].successCount;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return {
|
|
237
|
+
totalTasks,
|
|
238
|
+
successRate,
|
|
239
|
+
avgRoutingScore,
|
|
240
|
+
avgDuration,
|
|
241
|
+
challengerHelpRate,
|
|
242
|
+
topLessons,
|
|
243
|
+
modelBreakdown,
|
|
244
|
+
};
|
|
245
|
+
} catch {
|
|
246
|
+
return {
|
|
247
|
+
totalTasks: 0,
|
|
248
|
+
successRate: 0,
|
|
249
|
+
avgRoutingScore: 0,
|
|
250
|
+
avgDuration: 0,
|
|
251
|
+
challengerHelpRate: 0,
|
|
252
|
+
topLessons: [],
|
|
253
|
+
modelBreakdown: {},
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
}
|