dual-brain 7.1.21 → 7.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/dual-brain.mjs +2580 -717
- package/hooks/budget-balancer.mjs +104 -266
- package/hooks/wave-orchestrator.mjs +29 -26
- package/package.json +14 -3
- package/scripts/verify-publish.mjs +26 -0
- package/src/context.mjs +389 -0
- package/src/decide.mjs +283 -60
- package/src/detect.mjs +133 -1
- package/src/dispatch.mjs +195 -30
- package/src/doctor.mjs +577 -0
- package/src/failure-memory.mjs +178 -0
- package/src/intelligence.mjs +423 -0
- package/src/nextstep.mjs +100 -0
- package/src/observer.mjs +241 -0
- package/src/outcome.mjs +256 -0
- package/src/pipeline.mjs +808 -0
- package/src/profile.mjs +357 -485
- package/src/receipt.mjs +131 -0
- package/src/session.mjs +358 -10
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* failure-memory.mjs — Track task failures and enable automatic escalation.
|
|
4
|
+
*
|
|
5
|
+
* Exports: recordFailure, checkFailureHistory, formatEscalation,
|
|
6
|
+
* clearFailures, getFailureStats
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { readFileSync, appendFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
import { randomUUID } from 'crypto';
|
|
12
|
+
|
|
13
|
+
const STOP_WORDS = new Set(['the','a','an','is','in','on','at','to','for','of','and','or','with','this','that','it','be','was','are','were','has','have','had','do','does','did','not','from','by','as','if','but','we','i','you']);
|
|
14
|
+
const WINDOW_48H = 48 * 60 * 60 * 1000;
|
|
15
|
+
|
|
16
|
+
const DEPTH_ORDER = ['low', 'medium', 'high', 'ultra'];
|
|
17
|
+
const MODEL_ORDER = ['haiku', 'sonnet', 'opus'];
|
|
18
|
+
|
|
19
|
+
function failuresPath(cwd) {
|
|
20
|
+
const dir = join(cwd, '.dualbrain');
|
|
21
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
22
|
+
return join(dir, 'failures.jsonl');
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function categorizeError(error = '') {
|
|
26
|
+
const e = error.toLowerCase();
|
|
27
|
+
if (/test|assert|expect/.test(e)) return 'test-failure';
|
|
28
|
+
if (/timeout|timed out/.test(e)) return 'timeout';
|
|
29
|
+
if (/syntax|parse|unexpected token/.test(e)) return 'syntax-error';
|
|
30
|
+
if (/permission|eacces/.test(e)) return 'permission-error';
|
|
31
|
+
if (/not found|enoent/.test(e)) return 'not-found';
|
|
32
|
+
return 'unknown';
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function tokenize(text = '') {
|
|
36
|
+
return text.toLowerCase().split(/\W+/).filter(w => w.length > 2 && !STOP_WORDS.has(w));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function similarity(promptA, promptB, filesA = [], filesB = []) {
|
|
40
|
+
const wordsA = new Set(tokenize(promptA));
|
|
41
|
+
const wordsB = new Set(tokenize(promptB));
|
|
42
|
+
if (!wordsA.size && !wordsB.size) return 0;
|
|
43
|
+
const shared = [...wordsA].filter(w => wordsB.has(w)).length;
|
|
44
|
+
const wordScore = shared / Math.max(wordsA.size, wordsB.size);
|
|
45
|
+
const sharedFiles = filesA.some(f => filesB.includes(f));
|
|
46
|
+
return sharedFiles ? Math.max(wordScore, 0.5) : wordScore;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function readFailures(cwd) {
|
|
50
|
+
const path = failuresPath(cwd);
|
|
51
|
+
if (!existsSync(path)) return [];
|
|
52
|
+
return readFileSync(path, 'utf8')
|
|
53
|
+
.split('\n')
|
|
54
|
+
.filter(Boolean)
|
|
55
|
+
.map(line => { try { return JSON.parse(line); } catch { return null; } })
|
|
56
|
+
.filter(Boolean);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function writeAll(cwd, records) {
|
|
60
|
+
writeFileSync(failuresPath(cwd), records.map(r => JSON.stringify(r)).join('\n') + '\n');
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function bumpDepth(depth) {
|
|
64
|
+
const idx = DEPTH_ORDER.indexOf(depth);
|
|
65
|
+
return idx === -1 || idx >= DEPTH_ORDER.length - 1 ? 'ultra' : DEPTH_ORDER[idx + 1];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function bumpModel(model = '') {
|
|
69
|
+
const m = model.toLowerCase();
|
|
70
|
+
const match = MODEL_ORDER.find(k => m.includes(k)) ?? 'sonnet';
|
|
71
|
+
const idx = MODEL_ORDER.indexOf(match);
|
|
72
|
+
return idx >= MODEL_ORDER.length - 1 ? `claude-opus-4-5` : `claude-${MODEL_ORDER[idx + 1]}-4-5`;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ─── Exports ──────────────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
export async function recordFailure(prompt, plan = {}, error = '', cwd = process.cwd()) {
|
|
78
|
+
const record = {
|
|
79
|
+
id: randomUUID(),
|
|
80
|
+
timestamp: Date.now(),
|
|
81
|
+
prompt,
|
|
82
|
+
promptWords: tokenize(prompt),
|
|
83
|
+
model: plan.model ?? null,
|
|
84
|
+
reasoningDepth: plan.reasoningDepth ?? null,
|
|
85
|
+
tier: plan.tier ?? null,
|
|
86
|
+
error: String(error),
|
|
87
|
+
errorCategory: categorizeError(error),
|
|
88
|
+
files: plan.files ?? [],
|
|
89
|
+
escalatedFrom: plan.escalatedFrom ?? null,
|
|
90
|
+
resolved: false,
|
|
91
|
+
};
|
|
92
|
+
appendFileSync(failuresPath(cwd), JSON.stringify(record) + '\n');
|
|
93
|
+
return record;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export async function checkFailureHistory(prompt, files = [], cwd = process.cwd()) {
|
|
97
|
+
const cutoff = Date.now() - WINDOW_48H;
|
|
98
|
+
const all = readFailures(cwd);
|
|
99
|
+
const recent = all.filter(r => !r.resolved && r.timestamp >= cutoff);
|
|
100
|
+
const matches = recent
|
|
101
|
+
.map(r => ({ r, score: similarity(prompt, r.prompt, files, r.files ?? []) }))
|
|
102
|
+
.filter(({ score }) => score >= 0.4)
|
|
103
|
+
.sort((a, b) => b.r.timestamp - a.r.timestamp);
|
|
104
|
+
|
|
105
|
+
const count = matches.length;
|
|
106
|
+
const last = matches[0]?.r ?? null;
|
|
107
|
+
|
|
108
|
+
const escalation = { recommended: false, fromModel: null, toModel: null, fromDepth: null, toDepth: null, useChallenger: false, reason: '' };
|
|
109
|
+
|
|
110
|
+
if (count >= 1) {
|
|
111
|
+
escalation.recommended = true;
|
|
112
|
+
escalation.fromModel = last.model;
|
|
113
|
+
escalation.fromDepth = last.reasoningDepth;
|
|
114
|
+
|
|
115
|
+
if (count === 1) {
|
|
116
|
+
escalation.toDepth = bumpDepth(last.reasoningDepth ?? 'medium');
|
|
117
|
+
escalation.toModel = last.model;
|
|
118
|
+
escalation.useChallenger = false;
|
|
119
|
+
escalation.reason = `1 prior failure on similar task, bumping depth to ${escalation.toDepth}`;
|
|
120
|
+
} else if (count === 2) {
|
|
121
|
+
escalation.toDepth = 'ultra';
|
|
122
|
+
escalation.toModel = last.model?.includes('opus') ? last.model : bumpModel(last.model);
|
|
123
|
+
escalation.useChallenger = false;
|
|
124
|
+
escalation.reason = `2 prior failures on similar task, escalating to Opus + ultrathink`;
|
|
125
|
+
} else {
|
|
126
|
+
escalation.toDepth = 'ultra';
|
|
127
|
+
escalation.toModel = last.model?.includes('opus') ? last.model : bumpModel(last.model);
|
|
128
|
+
escalation.useChallenger = true;
|
|
129
|
+
escalation.reason = `${count} prior failures on similar task, forcing dual-brain`;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return { hasPriorFailures: count > 0, failureCount: count, lastFailure: last, escalation };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function formatEscalation(escalation) {
|
|
137
|
+
if (!escalation?.recommended) return '';
|
|
138
|
+
const prev = [escalation.fromModel, escalation.fromDepth].filter(Boolean).join(', ') || 'unknown';
|
|
139
|
+
const next = [escalation.toModel, escalation.toDepth, escalation.useChallenger ? 'GPT challenger' : null].filter(Boolean).join(' + ');
|
|
140
|
+
return `⚡ Strategy changed\n Previous: failed (${prev})\n Escalated: ${next}\n Reason: ${escalation.reason}`;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export async function clearFailures(prompt, cwd = process.cwd()) {
|
|
144
|
+
const all = readFailures(cwd);
|
|
145
|
+
const promptWords = tokenize(prompt);
|
|
146
|
+
const fakePrompt = promptWords.join(' ');
|
|
147
|
+
let changed = false;
|
|
148
|
+
const updated = all.map(r => {
|
|
149
|
+
if (!r.resolved && similarity(fakePrompt, r.prompt) >= 0.4) {
|
|
150
|
+
changed = true;
|
|
151
|
+
return { ...r, resolved: true };
|
|
152
|
+
}
|
|
153
|
+
return r;
|
|
154
|
+
});
|
|
155
|
+
if (changed) writeAll(cwd, updated);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export async function getFailureStats(cwd = process.cwd()) {
|
|
159
|
+
const all = readFailures(cwd);
|
|
160
|
+
const byCategory = {};
|
|
161
|
+
let resolved = 0;
|
|
162
|
+
let escalationSum = 0;
|
|
163
|
+
let escalationCount = 0;
|
|
164
|
+
|
|
165
|
+
for (const r of all) {
|
|
166
|
+
if (r.resolved) resolved++;
|
|
167
|
+
byCategory[r.errorCategory] = (byCategory[r.errorCategory] ?? 0) + 1;
|
|
168
|
+
if (r.escalatedFrom) { escalationSum++; escalationCount++; }
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
total: all.length,
|
|
173
|
+
resolved,
|
|
174
|
+
unresolved: all.length - resolved,
|
|
175
|
+
byCategory,
|
|
176
|
+
avgEscalationsToResolve: escalationCount ? +(escalationSum / escalationCount).toFixed(2) : 0,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* intelligence.mjs — Situational awareness for every pipeline run.
|
|
3
|
+
* Reads project reality fresh, derives task context, and detects contradictions
|
|
4
|
+
* between what an agent plans to do and what is actually true.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
8
|
+
import { execSync } from 'node:child_process';
|
|
9
|
+
import { join } from 'node:path';
|
|
10
|
+
|
|
11
|
+
const PROTECTED_PATHS = [
|
|
12
|
+
'src/pipeline.mjs',
|
|
13
|
+
'src/dispatch.mjs',
|
|
14
|
+
'src/decide.mjs',
|
|
15
|
+
'.claude/hooks/head-guard.mjs',
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
// ─── Git helpers ──────────────────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
function safeExec(cmd, cwd) {
|
|
21
|
+
try {
|
|
22
|
+
return execSync(cmd, { cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] }).trim();
|
|
23
|
+
} catch {
|
|
24
|
+
return '';
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function getDirtyFiles(cwd) {
|
|
29
|
+
const raw = safeExec('git status --porcelain', cwd);
|
|
30
|
+
if (!raw) return [];
|
|
31
|
+
return raw
|
|
32
|
+
.split('\n')
|
|
33
|
+
.filter(l => l.trim())
|
|
34
|
+
.map(l => l.slice(3).trim().replace(/^"(.*)"$/, '$1'))
|
|
35
|
+
.filter(Boolean);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function getRecentCommits(cwd, n = 5) {
|
|
39
|
+
const raw = safeExec(`git log -${n} --pretty=format:%s`, cwd);
|
|
40
|
+
if (!raw) return [];
|
|
41
|
+
return raw.split('\n').filter(Boolean);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function getAheadCount(cwd) {
|
|
45
|
+
const raw = safeExec('git rev-list --count @{u}..HEAD', cwd);
|
|
46
|
+
const n = parseInt(raw, 10);
|
|
47
|
+
return isNaN(n) ? 0 : n;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function getCurrentBranch(cwd) {
|
|
51
|
+
return safeExec('git branch --show-current', cwd) || 'unknown';
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ─── Failure reader ───────────────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
function readRecentFailures(cwd, limit = 10) {
|
|
57
|
+
const path = join(cwd, '.dualbrain', 'failures.jsonl');
|
|
58
|
+
if (!existsSync(path)) return [];
|
|
59
|
+
try {
|
|
60
|
+
const lines = readFileSync(path, 'utf8').split('\n').filter(Boolean);
|
|
61
|
+
return lines
|
|
62
|
+
.slice(-limit)
|
|
63
|
+
.reverse()
|
|
64
|
+
.map(line => {
|
|
65
|
+
try { return JSON.parse(line); } catch { return null; }
|
|
66
|
+
})
|
|
67
|
+
.filter(r => r && !r.resolved)
|
|
68
|
+
.map(r => ({
|
|
69
|
+
prompt: r.prompt ?? '',
|
|
70
|
+
error: r.error ?? '',
|
|
71
|
+
approach: r.tier ? `${r.tier}/${r.model ?? 'unknown'}` : (r.model ?? 'unknown'),
|
|
72
|
+
timestamp: r.timestamp ?? 0,
|
|
73
|
+
}));
|
|
74
|
+
} catch {
|
|
75
|
+
return [];
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ─── Outcome reader ───────────────────────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
function readRecentOutcomes(cwd, limit = 10) {
|
|
82
|
+
const dir = join(cwd, '.dualbrain', 'outcomes');
|
|
83
|
+
if (!existsSync(dir)) return [];
|
|
84
|
+
try {
|
|
85
|
+
const files = readdirSync(dir)
|
|
86
|
+
.filter(f => f.endsWith('.jsonl'))
|
|
87
|
+
.sort()
|
|
88
|
+
.reverse()
|
|
89
|
+
.slice(0, 3);
|
|
90
|
+
|
|
91
|
+
const records = [];
|
|
92
|
+
for (const file of files) {
|
|
93
|
+
try {
|
|
94
|
+
const lines = readFileSync(join(dir, file), 'utf8').split('\n').filter(Boolean);
|
|
95
|
+
for (const line of lines) {
|
|
96
|
+
try {
|
|
97
|
+
const r = JSON.parse(line);
|
|
98
|
+
records.push({
|
|
99
|
+
task: r.prompt ?? '',
|
|
100
|
+
success: r.result?.success ?? false,
|
|
101
|
+
timestamp: r.timestamp ?? 0,
|
|
102
|
+
});
|
|
103
|
+
} catch { /* skip bad line */ }
|
|
104
|
+
}
|
|
105
|
+
} catch { /* skip unreadable file */ }
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return records
|
|
109
|
+
.sort((a, b) => b.timestamp - a.timestamp)
|
|
110
|
+
.slice(0, limit);
|
|
111
|
+
} catch {
|
|
112
|
+
return [];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ─── Package.json reader ──────────────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
function readPackageJson(cwd) {
|
|
119
|
+
try {
|
|
120
|
+
return JSON.parse(readFileSync(join(cwd, 'package.json'), 'utf8'));
|
|
121
|
+
} catch {
|
|
122
|
+
return {};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ─── Exports ──────────────────────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Read project reality fresh. No cache. Returns a ProjectBrief.
|
|
130
|
+
*/
|
|
131
|
+
export function deriveProjectState(cwd = process.cwd()) {
|
|
132
|
+
const pkg = readPackageJson(cwd);
|
|
133
|
+
|
|
134
|
+
const version = pkg.version ?? '0.0.0';
|
|
135
|
+
const versionMajor = parseInt(version.split('.')[0], 10) || 0;
|
|
136
|
+
|
|
137
|
+
const dirtyFiles = getDirtyFiles(cwd);
|
|
138
|
+
const recentCommits = getRecentCommits(cwd, 5);
|
|
139
|
+
const branch = getCurrentBranch(cwd);
|
|
140
|
+
const aheadOfRemote = getAheadCount(cwd);
|
|
141
|
+
|
|
142
|
+
const binField = pkg.bin ?? {};
|
|
143
|
+
const binValues = Object.values(binField);
|
|
144
|
+
const entryPoint = binValues[0] ?? (pkg.main ?? '');
|
|
145
|
+
|
|
146
|
+
const testCommand = pkg.scripts?.test ?? null;
|
|
147
|
+
|
|
148
|
+
const recentFailures = readRecentFailures(cwd, 10);
|
|
149
|
+
const recentOutcomes = readRecentOutcomes(cwd, 10);
|
|
150
|
+
|
|
151
|
+
return {
|
|
152
|
+
packageName: pkg.name ?? 'unknown',
|
|
153
|
+
version,
|
|
154
|
+
versionMajor,
|
|
155
|
+
description: pkg.description ?? '',
|
|
156
|
+
|
|
157
|
+
branch,
|
|
158
|
+
dirty: dirtyFiles.length > 0,
|
|
159
|
+
dirtyFiles,
|
|
160
|
+
recentCommits,
|
|
161
|
+
aheadOfRemote,
|
|
162
|
+
|
|
163
|
+
brandName: 'dual-brain',
|
|
164
|
+
cliCommand: 'dual-brain',
|
|
165
|
+
|
|
166
|
+
moduleType: pkg.type === 'module' ? 'esm' : 'cjs',
|
|
167
|
+
entryPoint,
|
|
168
|
+
testCommand,
|
|
169
|
+
|
|
170
|
+
protectedPaths: PROTECTED_PATHS,
|
|
171
|
+
|
|
172
|
+
recentFailures,
|
|
173
|
+
recentOutcomes,
|
|
174
|
+
|
|
175
|
+
derivedAt: Date.now(),
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Derive task-scoped context from the current prompt and optional session events.
|
|
181
|
+
*/
|
|
182
|
+
export function deriveTaskContext(task = '', recentEvents = []) {
|
|
183
|
+
const priorAttempts = [];
|
|
184
|
+
const filesOutOfScope = [];
|
|
185
|
+
const filesInScopeSet = new Set();
|
|
186
|
+
|
|
187
|
+
const FILE_RE = /(?:^|\s)((?:src|hooks|bin|scripts|\.claude)\/[\w./\-]+\.\w+)/g;
|
|
188
|
+
let m;
|
|
189
|
+
|
|
190
|
+
FILE_RE.lastIndex = 0;
|
|
191
|
+
while ((m = FILE_RE.exec(task)) !== null) filesInScopeSet.add(m[1]);
|
|
192
|
+
|
|
193
|
+
for (const ev of (recentEvents ?? [])) {
|
|
194
|
+
if (!ev) continue;
|
|
195
|
+
|
|
196
|
+
if (ev.type === 'failure' || ev.failed) {
|
|
197
|
+
priorAttempts.push({
|
|
198
|
+
approach: ev.approach ?? ev.tier ?? 'unknown',
|
|
199
|
+
failed: true,
|
|
200
|
+
reason: ev.error ?? ev.reason ?? '',
|
|
201
|
+
});
|
|
202
|
+
for (const f of (ev.files ?? ev.filesChanged ?? [])) {
|
|
203
|
+
filesOutOfScope.push(f);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
FILE_RE.lastIndex = 0;
|
|
208
|
+
const evText = JSON.stringify(ev);
|
|
209
|
+
while ((m = FILE_RE.exec(evText)) !== null) filesInScopeSet.add(m[1]);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const failureCount = priorAttempts.filter(a => a.failed).length;
|
|
213
|
+
const escalationLevel =
|
|
214
|
+
failureCount >= 3 ? 'critical' :
|
|
215
|
+
failureCount >= 1 ? 'elevated' :
|
|
216
|
+
'normal';
|
|
217
|
+
|
|
218
|
+
const constraintKeywords = [];
|
|
219
|
+
const CONSTRAINT_RE = /\b(must|never|always|do not|don't|only|no\s+\w+)\b[^.!?]{0,80}/gi;
|
|
220
|
+
let cm;
|
|
221
|
+
CONSTRAINT_RE.lastIndex = 0;
|
|
222
|
+
while ((cm = CONSTRAINT_RE.exec(task)) !== null) {
|
|
223
|
+
constraintKeywords.push(cm[0].trim());
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return {
|
|
227
|
+
task,
|
|
228
|
+
priorAttempts,
|
|
229
|
+
activeConstraints: constraintKeywords,
|
|
230
|
+
filesInScope: [...filesInScopeSet],
|
|
231
|
+
filesOutOfScope: [...new Set(filesOutOfScope)],
|
|
232
|
+
escalationLevel,
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Detect contradictions between project reality, task context, and a proposed plan.
|
|
238
|
+
* Returns an array of contradiction objects.
|
|
239
|
+
*/
|
|
240
|
+
export function detectContradictions(projectBrief, taskBrief, plan = {}) {
|
|
241
|
+
const contradictions = [];
|
|
242
|
+
|
|
243
|
+
const planDesc = plan.description ?? '';
|
|
244
|
+
const planAssumptions = plan.assumptions ?? {};
|
|
245
|
+
const targetFiles = Array.isArray(plan.targetFiles) ? plan.targetFiles : [];
|
|
246
|
+
|
|
247
|
+
// 1. version_mismatch
|
|
248
|
+
const assumedVersion = typeof planAssumptions === 'string'
|
|
249
|
+
? planAssumptions
|
|
250
|
+
: (planAssumptions.version ?? planAssumptions.packageVersion ?? '');
|
|
251
|
+
|
|
252
|
+
if (assumedVersion) {
|
|
253
|
+
const assumedMajor = parseInt(String(assumedVersion).split('.')[0], 10);
|
|
254
|
+
if (!isNaN(assumedMajor) && assumedMajor !== projectBrief.versionMajor) {
|
|
255
|
+
contradictions.push({
|
|
256
|
+
type: 'version_mismatch',
|
|
257
|
+
severity: 'block',
|
|
258
|
+
message: `Plan assumes major version ${assumedMajor} but package is v${projectBrief.versionMajor} (${projectBrief.version})`,
|
|
259
|
+
evidence: { expected: projectBrief.version, actual: assumedVersion },
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// version reference in description
|
|
265
|
+
const versionInDesc = planDesc.match(/\bv?(\d+)\.\d+\.\d+\b/);
|
|
266
|
+
if (versionInDesc) {
|
|
267
|
+
const descMajor = parseInt(versionInDesc[1], 10);
|
|
268
|
+
if (!isNaN(descMajor) && descMajor !== projectBrief.versionMajor) {
|
|
269
|
+
contradictions.push({
|
|
270
|
+
type: 'version_mismatch',
|
|
271
|
+
severity: 'warn',
|
|
272
|
+
message: `Plan description references v${versionInDesc[0]} but package is v${projectBrief.version}`,
|
|
273
|
+
evidence: { expected: projectBrief.version, actual: versionInDesc[0] },
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// 2. branding_error
|
|
279
|
+
const WRONG_NAMES = ['data-tools', 'orchestrator', 'dual_brain', 'dualbrain', 'brain-dual'];
|
|
280
|
+
const searchText = [planDesc, JSON.stringify(planAssumptions)].join(' ').toLowerCase();
|
|
281
|
+
for (const wrongName of WRONG_NAMES) {
|
|
282
|
+
if (searchText.includes(wrongName) && !searchText.includes('dual-brain')) {
|
|
283
|
+
contradictions.push({
|
|
284
|
+
type: 'branding_error',
|
|
285
|
+
severity: 'block',
|
|
286
|
+
message: `Plan references "${wrongName}" but correct package name is "${projectBrief.brandName}"`,
|
|
287
|
+
evidence: { expected: projectBrief.brandName, actual: wrongName },
|
|
288
|
+
});
|
|
289
|
+
break;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// check explicit packageName assumption
|
|
294
|
+
const assumedName = typeof planAssumptions === 'object' ? planAssumptions.packageName : null;
|
|
295
|
+
if (assumedName && assumedName !== projectBrief.packageName) {
|
|
296
|
+
contradictions.push({
|
|
297
|
+
type: 'name_mismatch',
|
|
298
|
+
severity: 'block',
|
|
299
|
+
message: `Plan assumes packageName "${assumedName}" but actual package is "${projectBrief.packageName}"`,
|
|
300
|
+
evidence: { expected: projectBrief.packageName, actual: assumedName },
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// 3. repeated_failure
|
|
305
|
+
const planWords = new Set(
|
|
306
|
+
planDesc.toLowerCase().split(/\W+/).filter(w => w.length > 3)
|
|
307
|
+
);
|
|
308
|
+
for (const failure of (projectBrief.recentFailures ?? [])) {
|
|
309
|
+
const failureWords = (failure.prompt ?? '').toLowerCase().split(/\W+/).filter(w => w.length > 3);
|
|
310
|
+
const overlap = failureWords.filter(w => planWords.has(w)).length;
|
|
311
|
+
const similarity = overlap / Math.max(planWords.size, failureWords.length, 1);
|
|
312
|
+
if (similarity >= 0.4) {
|
|
313
|
+
contradictions.push({
|
|
314
|
+
type: 'repeated_failure',
|
|
315
|
+
severity: 'warn',
|
|
316
|
+
message: `Plan resembles a recent failed attempt: "${failure.prompt.slice(0, 80)}"`,
|
|
317
|
+
evidence: { expected: 'novel approach', actual: failure.prompt.slice(0, 80) },
|
|
318
|
+
});
|
|
319
|
+
break;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// 4. scope_violation + 5. protected_file
|
|
324
|
+
const taskFiles = new Set(taskBrief?.filesInScope ?? []);
|
|
325
|
+
const protectedSet = new Set(projectBrief.protectedPaths ?? []);
|
|
326
|
+
|
|
327
|
+
for (const f of targetFiles) {
|
|
328
|
+
const isProtected = protectedSet.has(f) || [...protectedSet].some(p => f.endsWith(p));
|
|
329
|
+
const inScope = taskFiles.has(f) || taskFiles.size === 0;
|
|
330
|
+
|
|
331
|
+
if (isProtected && !inScope) {
|
|
332
|
+
contradictions.push({
|
|
333
|
+
type: 'protected_file',
|
|
334
|
+
severity: 'block',
|
|
335
|
+
message: `Plan targets protected file "${f}" without explicit scope justification`,
|
|
336
|
+
evidence: { expected: 'file not in plan', actual: f },
|
|
337
|
+
});
|
|
338
|
+
} else if (!inScope && isProtected) {
|
|
339
|
+
contradictions.push({
|
|
340
|
+
type: 'scope_violation',
|
|
341
|
+
severity: 'warn',
|
|
342
|
+
message: `Plan targets "${f}" which is protected and not mentioned in task scope`,
|
|
343
|
+
evidence: { expected: [...taskFiles].join(', ') || 'none', actual: f },
|
|
344
|
+
});
|
|
345
|
+
} else if (!inScope && taskFiles.size > 0) {
|
|
346
|
+
contradictions.push({
|
|
347
|
+
type: 'scope_violation',
|
|
348
|
+
severity: 'warn',
|
|
349
|
+
message: `Plan targets "${f}" which is outside the task's stated file scope`,
|
|
350
|
+
evidence: { expected: [...taskFiles].join(', '), actual: f },
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
return contradictions;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Format a compact situational awareness summary (max 15 lines) for agent prompts.
|
|
360
|
+
*/
|
|
361
|
+
export function formatBrief(projectBrief, taskBrief) {
|
|
362
|
+
const lines = [];
|
|
363
|
+
|
|
364
|
+
const dirtyLabel = projectBrief.dirty ? 'dirty' : 'clean';
|
|
365
|
+
lines.push(
|
|
366
|
+
`PROJECT: ${projectBrief.packageName} v${projectBrief.version} (${projectBrief.moduleType})`
|
|
367
|
+
);
|
|
368
|
+
|
|
369
|
+
lines.push(
|
|
370
|
+
`BRANCH: ${projectBrief.branch} (${dirtyLabel}) | ${projectBrief.aheadOfRemote} ahead`
|
|
371
|
+
);
|
|
372
|
+
|
|
373
|
+
if (projectBrief.recentCommits?.length > 0) {
|
|
374
|
+
const preview = projectBrief.recentCommits
|
|
375
|
+
.slice(0, 2)
|
|
376
|
+
.map(c => `"${c.slice(0, 50)}"`)
|
|
377
|
+
.join(' · ');
|
|
378
|
+
lines.push(`RECENT: ${preview}`);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
const failureCount = (projectBrief.recentFailures ?? []).length;
|
|
382
|
+
if (failureCount > 0) {
|
|
383
|
+
const dayMs = 24 * 60 * 60 * 1000;
|
|
384
|
+
const cutoff = Date.now() - dayMs;
|
|
385
|
+
const recent24 = projectBrief.recentFailures.filter(f => f.timestamp >= cutoff).length;
|
|
386
|
+
const categories = [...new Set(
|
|
387
|
+
projectBrief.recentFailures.slice(0, 5).map(f => f.error?.split(':')[0]?.trim()).filter(Boolean)
|
|
388
|
+
)].slice(0, 2).join(', ');
|
|
389
|
+
lines.push(
|
|
390
|
+
`FAILURES: ${recent24} in last 24h${categories ? ` (${categories})` : ''}`
|
|
391
|
+
);
|
|
392
|
+
} else {
|
|
393
|
+
lines.push('FAILURES: none');
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
const protectedNames = (projectBrief.protectedPaths ?? [])
|
|
397
|
+
.map(p => p.split('/').pop())
|
|
398
|
+
.join(', ');
|
|
399
|
+
if (protectedNames) lines.push(`PROTECTED: ${protectedNames}`);
|
|
400
|
+
|
|
401
|
+
if (taskBrief) {
|
|
402
|
+
const taskPreview = (taskBrief.task ?? '').slice(0, 80);
|
|
403
|
+
if (taskPreview) lines.push(`TASK: "${taskPreview}"`);
|
|
404
|
+
|
|
405
|
+
const failedAttempts = (taskBrief.priorAttempts ?? []).filter(a => a.failed);
|
|
406
|
+
if (failedAttempts.length > 0) {
|
|
407
|
+
const lastReason = failedAttempts[0].reason
|
|
408
|
+
? ` (${failedAttempts[0].reason.slice(0, 40)})`
|
|
409
|
+
: '';
|
|
410
|
+
lines.push(`PRIOR ATTEMPTS: ${failedAttempts.length} failed${lastReason}`);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if (taskBrief.escalationLevel && taskBrief.escalationLevel !== 'normal') {
|
|
414
|
+
lines.push(`ESCALATION: ${taskBrief.escalationLevel}`);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (taskBrief.filesInScope?.length > 0) {
|
|
418
|
+
lines.push(`IN SCOPE: ${taskBrief.filesInScope.slice(0, 4).join(', ')}`);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
return lines.slice(0, 15).join('\n');
|
|
423
|
+
}
|