dual-brain 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +33 -1
- package/hooks/budget-balancer.mjs +45 -6
- package/hooks/control-panel.mjs +489 -0
- package/hooks/cost-logger.mjs +51 -26
- package/hooks/decision-ledger.mjs +299 -0
- package/hooks/dual-brain-review.mjs +106 -17
- package/hooks/dual-brain-think.mjs +81 -17
- package/hooks/enforce-tier.mjs +103 -10
- package/hooks/gpt-work-dispatcher.mjs +50 -6
- package/hooks/profiles.mjs +203 -0
- package/hooks/quality-gate.mjs +34 -6
- package/hooks/summary-checkpoint.mjs +231 -0
- package/install.mjs +402 -33
- package/package.json +2 -2
- package/hooks/usage-2026-05-14.jsonl +0 -5
package/hooks/cost-logger.mjs
CHANGED
|
@@ -12,19 +12,25 @@ import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
|
12
12
|
import { dirname, join } from "path";
|
|
13
13
|
import { fileURLToPath } from "url";
|
|
14
14
|
|
|
15
|
-
// ---------------------------------------------------------------------------
|
|
16
|
-
// Paths
|
|
17
|
-
// ---------------------------------------------------------------------------
|
|
18
15
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
16
|
+
const PROFILE_FILE = join(__dirname, '..', 'dual-brain.profile.json');
|
|
19
17
|
|
|
20
18
|
function usageFile(date) {
|
|
21
19
|
const d = date || new Date().toISOString().slice(0, 10);
|
|
22
20
|
return join(__dirname, `usage-${d}.jsonl`);
|
|
23
21
|
}
|
|
24
22
|
|
|
25
|
-
// Ensure the hooks dir exists (idempotent, defensive)
|
|
26
23
|
mkdirSync(__dirname, { recursive: true });
|
|
27
24
|
|
|
25
|
+
function loadActiveProfile() {
|
|
26
|
+
try {
|
|
27
|
+
const data = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
|
|
28
|
+
return data.active || 'balanced';
|
|
29
|
+
} catch { return 'balanced'; }
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const SESSION_ID = process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || null;
|
|
33
|
+
|
|
28
34
|
// ---------------------------------------------------------------------------
|
|
29
35
|
// Tier classification
|
|
30
36
|
// ---------------------------------------------------------------------------
|
|
@@ -135,14 +141,21 @@ function classify(toolName, toolInput = {}, agentModel = null) {
|
|
|
135
141
|
// Budget alerts
|
|
136
142
|
// ---------------------------------------------------------------------------
|
|
137
143
|
|
|
138
|
-
function checkBudget() {
|
|
144
|
+
async function checkBudget() {
|
|
139
145
|
let config;
|
|
140
146
|
try {
|
|
141
147
|
config = JSON.parse(readFileSync(join(__dirname, '..', 'orchestrator.json'), 'utf8'));
|
|
142
148
|
} catch { return null; }
|
|
143
149
|
|
|
144
|
-
|
|
150
|
+
// Merge profile budget overrides on top of config defaults
|
|
151
|
+
let budgets = config.budgets;
|
|
145
152
|
if (!budgets) return null;
|
|
153
|
+
try {
|
|
154
|
+
const profileData = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
|
|
155
|
+
if (profileData.custom_overrides?.budgets) {
|
|
156
|
+
budgets = { ...budgets, ...profileData.custom_overrides.budgets };
|
|
157
|
+
}
|
|
158
|
+
} catch {}
|
|
146
159
|
|
|
147
160
|
// Rate limit alerts
|
|
148
161
|
const cooldownFile = join(__dirname, '.budget-alerted');
|
|
@@ -152,18 +165,24 @@ function checkBudget() {
|
|
|
152
165
|
if (Date.now() - Date.parse(lastAlert) < cooldownMin * 60 * 1000) return null;
|
|
153
166
|
} catch {}
|
|
154
167
|
|
|
155
|
-
//
|
|
156
|
-
|
|
157
|
-
let records = [];
|
|
168
|
+
// Use summary checkpoint for fast budget check (O(1) instead of full scan)
|
|
169
|
+
let totalCost = 0;
|
|
158
170
|
try {
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
} catch {
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
171
|
+
const { readSummary } = await import('./summary-checkpoint.mjs');
|
|
172
|
+
const summary = readSummary();
|
|
173
|
+
totalCost = summary.totals.cost_estimate;
|
|
174
|
+
} catch {
|
|
175
|
+
// Fallback: scan the log (only if summary unavailable)
|
|
176
|
+
const todayFile = usageFile();
|
|
177
|
+
let records = [];
|
|
178
|
+
try {
|
|
179
|
+
records = readFileSync(todayFile, 'utf8').split('\n').filter(Boolean).map(l => {
|
|
180
|
+
try { return JSON.parse(l); } catch { return null; }
|
|
181
|
+
}).filter(Boolean);
|
|
182
|
+
} catch { return null; }
|
|
183
|
+
const RATES = { search: 0.003, execute: 0.012, think: 0.055 };
|
|
184
|
+
totalCost = records.reduce((sum, r) => sum + (RATES[r.tier] || RATES.execute), 0);
|
|
185
|
+
}
|
|
167
186
|
|
|
168
187
|
let msg = null;
|
|
169
188
|
if (budgets.daily_limit_usd && totalCost >= budgets.daily_limit_usd) {
|
|
@@ -215,8 +234,8 @@ async function main() {
|
|
|
215
234
|
|
|
216
235
|
const status = (payload?.error || payload?.tool_response?.error || payload?.is_error) ? 'error' : 'ok';
|
|
217
236
|
|
|
218
|
-
const
|
|
219
|
-
schema_version:
|
|
237
|
+
const entryObj = {
|
|
238
|
+
schema_version: 3,
|
|
220
239
|
timestamp: new Date().toISOString(),
|
|
221
240
|
tier,
|
|
222
241
|
tool: toolName,
|
|
@@ -224,19 +243,25 @@ async function main() {
|
|
|
224
243
|
provider: detectProvider(model),
|
|
225
244
|
dispatcher: 'claude-code',
|
|
226
245
|
status,
|
|
227
|
-
session_id:
|
|
246
|
+
session_id: SESSION_ID,
|
|
247
|
+
profile: loadActiveProfile(),
|
|
228
248
|
input_tokens: inputTokens,
|
|
229
249
|
output_tokens: outputTokens,
|
|
230
|
-
}
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
const entry = JSON.stringify(entryObj);
|
|
231
253
|
|
|
232
254
|
try {
|
|
233
255
|
appendFileSync(usageFile(), entry + "\n", { encoding: "utf8", flag: "a" });
|
|
234
|
-
} catch {
|
|
235
|
-
|
|
236
|
-
|
|
256
|
+
} catch {}
|
|
257
|
+
|
|
258
|
+
// Update summary checkpoint (non-blocking, best-effort)
|
|
259
|
+
try {
|
|
260
|
+
const { updateSummary } = await import('./summary-checkpoint.mjs');
|
|
261
|
+
updateSummary(entryObj);
|
|
262
|
+
} catch {}
|
|
237
263
|
|
|
238
|
-
|
|
239
|
-
const budgetMsg = checkBudget();
|
|
264
|
+
const budgetMsg = await checkBudget();
|
|
240
265
|
|
|
241
266
|
// PostToolUse hooks must emit a JSON object to stdout
|
|
242
267
|
if (budgetMsg) {
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* decision-ledger.mjs — Routing outcome tracking for the Dual-Brain Orchestrator.
|
|
4
|
+
*
|
|
5
|
+
* Records every routing decision with its context, and later enriches it with
|
|
6
|
+
* outcome data (duration, success, retries, user overrides, follow-up fixes).
|
|
7
|
+
*
|
|
8
|
+
* Over time, this builds a per-repo knowledge base of which provider/model
|
|
9
|
+
* performs best for which task shapes.
|
|
10
|
+
*
|
|
11
|
+
* Exported API:
|
|
12
|
+
* recordDecision(decision) → log a routing decision, returns decision_id
|
|
13
|
+
* recordOutcome(id, outcome) → enrich a decision with its outcome
|
|
14
|
+
* getInsights(opts?) → aggregate patterns from the ledger
|
|
15
|
+
*
|
|
16
|
+
* CLI:
|
|
17
|
+
* node .claude/hooks/decision-ledger.mjs # show insights
|
|
18
|
+
* node .claude/hooks/decision-ledger.mjs --json # JSON output
|
|
19
|
+
* node .claude/hooks/decision-ledger.mjs --recent 20 # last N decisions
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { appendFileSync, existsSync, readFileSync } from 'fs';
|
|
23
|
+
import { dirname, join } from 'path';
|
|
24
|
+
import { fileURLToPath } from 'url';
|
|
25
|
+
import { randomBytes } from 'crypto';
|
|
26
|
+
|
|
27
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
28
|
+
const LEDGER_FILE = join(__dirname, 'decision-ledger.jsonl');
|
|
29
|
+
|
|
30
|
+
function genId() {
|
|
31
|
+
return randomBytes(6).toString('hex');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function recordDecision(decision = {}) {
|
|
35
|
+
const id = genId();
|
|
36
|
+
const entry = JSON.stringify({
|
|
37
|
+
type: 'decision',
|
|
38
|
+
id,
|
|
39
|
+
timestamp: new Date().toISOString(),
|
|
40
|
+
session_id: decision.session_id || process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || null,
|
|
41
|
+
profile: decision.profile || 'balanced',
|
|
42
|
+
|
|
43
|
+
// Routing context
|
|
44
|
+
tier: decision.tier || 'execute',
|
|
45
|
+
provider: decision.provider || 'claude',
|
|
46
|
+
model: decision.model || 'unknown',
|
|
47
|
+
recommended_model: decision.recommended_model || null,
|
|
48
|
+
followed: decision.followed ?? null,
|
|
49
|
+
|
|
50
|
+
// Task shape
|
|
51
|
+
task_type: decision.task_type || null,
|
|
52
|
+
prompt_hash: decision.prompt_hash || null,
|
|
53
|
+
estimated_duration_ms: decision.estimated_duration_ms || null,
|
|
54
|
+
file_count: decision.file_count || null,
|
|
55
|
+
context_coupling: decision.context_coupling || null,
|
|
56
|
+
isolation: decision.isolation || null,
|
|
57
|
+
|
|
58
|
+
// Provider state at decision time
|
|
59
|
+
claude_pressure: decision.claude_pressure || null,
|
|
60
|
+
openai_pressure: decision.openai_pressure || null,
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
appendFileSync(LEDGER_FILE, entry + '\n');
|
|
65
|
+
} catch {}
|
|
66
|
+
|
|
67
|
+
return id;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function recordOutcome(decisionId, outcome = {}) {
|
|
71
|
+
const entry = JSON.stringify({
|
|
72
|
+
type: 'outcome',
|
|
73
|
+
decision_id: decisionId,
|
|
74
|
+
timestamp: new Date().toISOString(),
|
|
75
|
+
|
|
76
|
+
// Timing
|
|
77
|
+
actual_duration_ms: outcome.actual_duration_ms || null,
|
|
78
|
+
codex_startup_ms: outcome.codex_startup_ms || null,
|
|
79
|
+
|
|
80
|
+
// Quality signals
|
|
81
|
+
success: outcome.success ?? null,
|
|
82
|
+
tests_passed: outcome.tests_passed ?? null,
|
|
83
|
+
tests_failed: outcome.tests_failed ?? null,
|
|
84
|
+
retries: outcome.retries || 0,
|
|
85
|
+
user_override: outcome.user_override ?? false,
|
|
86
|
+
followup_fix_needed: outcome.followup_fix_needed ?? false,
|
|
87
|
+
|
|
88
|
+
// Cost
|
|
89
|
+
actual_input_tokens: outcome.actual_input_tokens || null,
|
|
90
|
+
actual_output_tokens: outcome.actual_output_tokens || null,
|
|
91
|
+
estimated_cost_usd: outcome.estimated_cost_usd || null,
|
|
92
|
+
|
|
93
|
+
// Files
|
|
94
|
+
files_changed: outcome.files_changed || null,
|
|
95
|
+
files_read: outcome.files_read || null,
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
appendFileSync(LEDGER_FILE, entry + '\n');
|
|
100
|
+
} catch {}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function loadLedger() {
|
|
104
|
+
if (!existsSync(LEDGER_FILE)) return { decisions: [], outcomes: [] };
|
|
105
|
+
|
|
106
|
+
let raw;
|
|
107
|
+
try { raw = readFileSync(LEDGER_FILE, 'utf8'); } catch { return { decisions: [], outcomes: [] }; }
|
|
108
|
+
|
|
109
|
+
const decisions = [];
|
|
110
|
+
const outcomes = [];
|
|
111
|
+
|
|
112
|
+
for (const line of raw.split('\n').filter(Boolean)) {
|
|
113
|
+
try {
|
|
114
|
+
const entry = JSON.parse(line);
|
|
115
|
+
if (entry.type === 'decision') decisions.push(entry);
|
|
116
|
+
else if (entry.type === 'outcome') outcomes.push(entry);
|
|
117
|
+
} catch {}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return { decisions, outcomes };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function mergeDecisionsWithOutcomes(decisions, outcomes) {
|
|
124
|
+
const outcomeMap = {};
|
|
125
|
+
for (const o of outcomes) {
|
|
126
|
+
outcomeMap[o.decision_id] = o;
|
|
127
|
+
}
|
|
128
|
+
return decisions.map(d => ({
|
|
129
|
+
...d,
|
|
130
|
+
outcome: outcomeMap[d.id] || null,
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function getInsights(opts = {}) {
|
|
135
|
+
const { decisions, outcomes } = loadLedger();
|
|
136
|
+
const merged = mergeDecisionsWithOutcomes(decisions, outcomes);
|
|
137
|
+
const withOutcomes = merged.filter(d => d.outcome);
|
|
138
|
+
|
|
139
|
+
// Provider win rates
|
|
140
|
+
const providerStats = {};
|
|
141
|
+
for (const d of withOutcomes) {
|
|
142
|
+
const key = d.provider;
|
|
143
|
+
if (!providerStats[key]) providerStats[key] = { total: 0, success: 0, overrides: 0, followups: 0, totalDuration: 0, counted: 0 };
|
|
144
|
+
providerStats[key].total++;
|
|
145
|
+
if (d.outcome.success) providerStats[key].success++;
|
|
146
|
+
if (d.outcome.user_override) providerStats[key].overrides++;
|
|
147
|
+
if (d.outcome.followup_fix_needed) providerStats[key].followups++;
|
|
148
|
+
if (d.outcome.actual_duration_ms) {
|
|
149
|
+
providerStats[key].totalDuration += d.outcome.actual_duration_ms;
|
|
150
|
+
providerStats[key].counted++;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Tier performance
|
|
155
|
+
const tierStats = {};
|
|
156
|
+
for (const d of withOutcomes) {
|
|
157
|
+
const key = `${d.provider}:${d.tier}`;
|
|
158
|
+
if (!tierStats[key]) tierStats[key] = { total: 0, success: 0, avgDuration: 0, counted: 0 };
|
|
159
|
+
tierStats[key].total++;
|
|
160
|
+
if (d.outcome.success) tierStats[key].success++;
|
|
161
|
+
if (d.outcome.actual_duration_ms) {
|
|
162
|
+
tierStats[key].counted++;
|
|
163
|
+
tierStats[key].avgDuration += (d.outcome.actual_duration_ms - tierStats[key].avgDuration) / tierStats[key].counted;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Task type patterns
|
|
168
|
+
const taskPatterns = {};
|
|
169
|
+
for (const d of withOutcomes) {
|
|
170
|
+
if (!d.task_type) continue;
|
|
171
|
+
const key = d.task_type;
|
|
172
|
+
if (!taskPatterns[key]) taskPatterns[key] = {};
|
|
173
|
+
const pk = d.provider;
|
|
174
|
+
if (!taskPatterns[key][pk]) taskPatterns[key][pk] = { total: 0, success: 0 };
|
|
175
|
+
taskPatterns[key][pk].total++;
|
|
176
|
+
if (d.outcome.success) taskPatterns[key][pk].success++;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Compliance rate
|
|
180
|
+
const total = decisions.length;
|
|
181
|
+
const followedCount = decisions.filter(d => d.followed === true).length;
|
|
182
|
+
const compliance = total > 0 ? Math.round((followedCount / total) * 100) : 0;
|
|
183
|
+
|
|
184
|
+
// Recommendations
|
|
185
|
+
const recommendations = [];
|
|
186
|
+
for (const [task, providers] of Object.entries(taskPatterns)) {
|
|
187
|
+
const sorted = Object.entries(providers)
|
|
188
|
+
.map(([p, s]) => ({ provider: p, rate: s.total > 0 ? s.success / s.total : 0, total: s.total }))
|
|
189
|
+
.filter(x => x.total >= 3)
|
|
190
|
+
.sort((a, b) => b.rate - a.rate);
|
|
191
|
+
if (sorted.length >= 2 && sorted[0].rate > sorted[1].rate + 0.1) {
|
|
192
|
+
recommendations.push(`${sorted[0].provider} wins ${task} tasks (${Math.round(sorted[0].rate * 100)}% vs ${Math.round(sorted[1].rate * 100)}%)`);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
total_decisions: total,
|
|
198
|
+
with_outcomes: withOutcomes.length,
|
|
199
|
+
compliance_rate: compliance,
|
|
200
|
+
provider_stats: providerStats,
|
|
201
|
+
tier_stats: tierStats,
|
|
202
|
+
task_patterns: taskPatterns,
|
|
203
|
+
recommendations,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// ─── CLI ────────────────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
function printInsights() {
|
|
210
|
+
const insights = getInsights();
|
|
211
|
+
|
|
212
|
+
if (insights.total_decisions === 0) {
|
|
213
|
+
console.log('');
|
|
214
|
+
console.log(' No routing decisions recorded yet.');
|
|
215
|
+
console.log(' The decision ledger builds over time as you use Claude Code.');
|
|
216
|
+
console.log('');
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const W = 52;
|
|
221
|
+
const pad = (s, len = W - 2) => {
|
|
222
|
+
s = String(s);
|
|
223
|
+
return s.length >= len ? s.slice(0, len) : s + ' '.repeat(len - s.length);
|
|
224
|
+
};
|
|
225
|
+
const ln = (s) => `║ ${pad(s)} ║`;
|
|
226
|
+
const br = (l, r) => l + '═'.repeat(W) + r;
|
|
227
|
+
const sep = () => '╠' + '═'.repeat(W) + '╣';
|
|
228
|
+
|
|
229
|
+
const lines = [];
|
|
230
|
+
lines.push(br('╔', '╗'));
|
|
231
|
+
lines.push(ln('Decision Ledger Insights'));
|
|
232
|
+
lines.push(sep());
|
|
233
|
+
lines.push(ln(`Total decisions: ${insights.total_decisions}`));
|
|
234
|
+
lines.push(ln(`With outcomes: ${insights.with_outcomes}`));
|
|
235
|
+
lines.push(ln(`Compliance rate: ${insights.compliance_rate}%`));
|
|
236
|
+
lines.push(sep());
|
|
237
|
+
|
|
238
|
+
// Provider stats
|
|
239
|
+
lines.push(ln('Provider Performance'));
|
|
240
|
+
for (const [provider, stats] of Object.entries(insights.provider_stats)) {
|
|
241
|
+
const rate = stats.total > 0 ? Math.round((stats.success / stats.total) * 100) : 0;
|
|
242
|
+
const avgMs = stats.counted > 0 ? Math.round(stats.totalDuration / stats.counted / 1000) : '?';
|
|
243
|
+
lines.push(ln(` ${provider}: ${rate}% success, ${stats.overrides} overrides, avg ${avgMs}s`));
|
|
244
|
+
if (stats.followups > 0) {
|
|
245
|
+
lines.push(ln(` ${stats.followups} follow-up fixes needed`));
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Recommendations
|
|
250
|
+
if (insights.recommendations.length > 0) {
|
|
251
|
+
lines.push(sep());
|
|
252
|
+
lines.push(ln('Recommendations'));
|
|
253
|
+
for (const rec of insights.recommendations) {
|
|
254
|
+
lines.push(ln(` ${rec}`));
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
lines.push(br('╚', '╝'));
|
|
259
|
+
console.log('');
|
|
260
|
+
for (const l of lines) console.log(` ${l}`);
|
|
261
|
+
console.log('');
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function printRecent(n) {
|
|
265
|
+
const { decisions, outcomes } = loadLedger();
|
|
266
|
+
const merged = mergeDecisionsWithOutcomes(decisions, outcomes);
|
|
267
|
+
const recent = merged.slice(-n);
|
|
268
|
+
|
|
269
|
+
if (recent.length === 0) {
|
|
270
|
+
console.log(' No decisions recorded yet.');
|
|
271
|
+
return;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
console.log('');
|
|
275
|
+
for (const d of recent) {
|
|
276
|
+
const time = d.timestamp?.slice(11, 19) || '??:??:??';
|
|
277
|
+
const status = d.outcome?.success ? '✓' : d.outcome ? '✗' : '?';
|
|
278
|
+
const dur = d.outcome?.actual_duration_ms ? `${Math.round(d.outcome.actual_duration_ms / 1000)}s` : '';
|
|
279
|
+
console.log(` ${status} ${time} ${d.provider}/${d.model} [${d.tier}] ${dur}`);
|
|
280
|
+
}
|
|
281
|
+
console.log('');
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// CLI entry
|
|
285
|
+
if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) {
|
|
286
|
+
const args = process.argv.slice(2);
|
|
287
|
+
|
|
288
|
+
if (args.includes('--json')) {
|
|
289
|
+
console.log(JSON.stringify(getInsights(), null, 2));
|
|
290
|
+
} else if (args.includes('--recent')) {
|
|
291
|
+
const idx = args.indexOf('--recent');
|
|
292
|
+
const n = parseInt(args[idx + 1]) || 20;
|
|
293
|
+
printRecent(n);
|
|
294
|
+
} else {
|
|
295
|
+
printInsights();
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
export { recordDecision, recordOutcome, getInsights, loadLedger };
|
|
@@ -19,7 +19,11 @@ import { fileURLToPath } from 'url';
|
|
|
19
19
|
|
|
20
20
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
21
|
|
|
22
|
-
const
|
|
22
|
+
const REVIEW_PROMPT_R1 = `You are GPT-5.5 performing Round 1 of a dual-brain code review.
|
|
23
|
+
Claude (Opus) will independently review the same changes, then send you their findings
|
|
24
|
+
for a collaborative Round 2 discussion.
|
|
25
|
+
|
|
26
|
+
Review the current uncommitted changes for:
|
|
23
27
|
1. Correctness — logic errors, off-by-one, null/undefined risks
|
|
24
28
|
2. Security — injection, auth bypass, data exposure
|
|
25
29
|
3. Edge cases — what could break under unusual input
|
|
@@ -34,6 +38,24 @@ Required output:
|
|
|
34
38
|
|
|
35
39
|
Be concise. Flag only real issues, not style preferences. If the code looks good, say "LGTM" and note any minor suggestions. Output your review as plain text, not JSON.`;
|
|
36
40
|
|
|
41
|
+
const REVIEW_PROMPT_R2 = `You are GPT-5.5 in Round 2 of a collaborative code review with Claude (Opus).
|
|
42
|
+
You already reviewed this diff in Round 1. Claude has now independently reviewed the same changes.
|
|
43
|
+
This is a professional peer review dialogue — two senior engineers refining their assessment together.
|
|
44
|
+
|
|
45
|
+
Claude's review findings:
|
|
46
|
+
---CLAUDE_REVIEW---
|
|
47
|
+
|
|
48
|
+
Now respond as a peer reviewer:
|
|
49
|
+
1. CONFIRMED: Issues you both found — these are high-confidence findings
|
|
50
|
+
2. MISSED: Issues Claude caught that you missed — acknowledge them
|
|
51
|
+
3. DISAGREE: Claude's findings you think are false positives — explain why
|
|
52
|
+
4. ESCALATED: Issues that are MORE severe than either of you initially rated
|
|
53
|
+
5. VERDICT: Combined assessment — LGTM, minor issues, or blocks merge
|
|
54
|
+
|
|
55
|
+
Be direct. If Claude found something real that you missed, say so.
|
|
56
|
+
If Claude flagged something that isn't actually a problem, explain why with evidence.
|
|
57
|
+
The goal is the most accurate review, not defending your initial take.`;
|
|
58
|
+
|
|
37
59
|
function loadReviewRules() {
|
|
38
60
|
const rulesFile = resolve(__dirname, '..', 'review-rules.md');
|
|
39
61
|
try {
|
|
@@ -127,9 +149,9 @@ function exit(obj) {
|
|
|
127
149
|
|
|
128
150
|
/**
|
|
129
151
|
* Try GPT review via Codex CLI (uses ChatGPT subscription auth).
|
|
130
|
-
*
|
|
152
|
+
* Round 1: independent review. Round 2: respond to Claude's review.
|
|
131
153
|
*/
|
|
132
|
-
function tryCodexReview(diff) {
|
|
154
|
+
function tryCodexReview(diff, { round = 1, claudeReview = null } = {}) {
|
|
133
155
|
if (!CODEX_BIN) return null;
|
|
134
156
|
try {
|
|
135
157
|
spawnSync(CODEX_BIN, ['login', 'status'], {
|
|
@@ -145,7 +167,14 @@ function tryCodexReview(diff) {
|
|
|
145
167
|
? diff.slice(0, MAX_DIFF_CHARS) + '\n[truncated]'
|
|
146
168
|
: diff;
|
|
147
169
|
|
|
148
|
-
|
|
170
|
+
let basePrompt;
|
|
171
|
+
if (round === 2 && claudeReview) {
|
|
172
|
+
basePrompt = REVIEW_PROMPT_R2.replace('---CLAUDE_REVIEW---', claudeReview);
|
|
173
|
+
} else {
|
|
174
|
+
basePrompt = REVIEW_PROMPT_R1;
|
|
175
|
+
}
|
|
176
|
+
const fullPrompt = basePrompt + loadReviewRules();
|
|
177
|
+
|
|
149
178
|
const proc = spawnSync(CODEX_BIN, [
|
|
150
179
|
'exec', '--json', '--ephemeral',
|
|
151
180
|
'-c', `model="${model}"`,
|
|
@@ -159,7 +188,6 @@ function tryCodexReview(diff) {
|
|
|
159
188
|
});
|
|
160
189
|
const result = proc.stdout || '';
|
|
161
190
|
|
|
162
|
-
// Parse JSONL output, find agent_message items
|
|
163
191
|
const messages = result
|
|
164
192
|
.split('\n')
|
|
165
193
|
.filter(l => l.trim())
|
|
@@ -173,16 +201,17 @@ function tryCodexReview(diff) {
|
|
|
173
201
|
const usage = messages.find(m => m.type === 'turn.completed')?.usage;
|
|
174
202
|
|
|
175
203
|
if (agentMessages.length > 0) {
|
|
204
|
+
const reviewText = agentMessages.join('\n\n');
|
|
176
205
|
return {
|
|
177
|
-
|
|
206
|
+
round,
|
|
207
|
+
review: reviewText,
|
|
178
208
|
model,
|
|
179
209
|
auth_type: 'codex_subscription',
|
|
180
|
-
issues_found: hasIssues(
|
|
210
|
+
issues_found: hasIssues(reviewText),
|
|
181
211
|
tokens: usage || null,
|
|
182
212
|
};
|
|
183
213
|
}
|
|
184
214
|
|
|
185
|
-
// Check for errors
|
|
186
215
|
const errors = messages.filter(m => m.type === 'error' || m.type === 'turn.failed');
|
|
187
216
|
if (errors.length > 0) {
|
|
188
217
|
return {
|
|
@@ -205,7 +234,7 @@ function tryCodexReview(diff) {
|
|
|
205
234
|
/**
|
|
206
235
|
* Try GPT review via direct API call (needs OPENAI_API_KEY).
|
|
207
236
|
*/
|
|
208
|
-
async function tryApiReview(diff) {
|
|
237
|
+
async function tryApiReview(diff, { round = 1, claudeReview = null } = {}) {
|
|
209
238
|
const apiKey = process.env.OPENAI_API_KEY;
|
|
210
239
|
if (!apiKey) return null;
|
|
211
240
|
|
|
@@ -214,7 +243,14 @@ async function tryApiReview(diff) {
|
|
|
214
243
|
? diff.slice(0, MAX_DIFF_CHARS) + '\n[truncated]'
|
|
215
244
|
: diff;
|
|
216
245
|
|
|
217
|
-
|
|
246
|
+
let basePrompt;
|
|
247
|
+
if (round === 2 && claudeReview) {
|
|
248
|
+
basePrompt = REVIEW_PROMPT_R2.replace('---CLAUDE_REVIEW---', claudeReview);
|
|
249
|
+
} else {
|
|
250
|
+
basePrompt = REVIEW_PROMPT_R1;
|
|
251
|
+
}
|
|
252
|
+
const fullPrompt = basePrompt + loadReviewRules();
|
|
253
|
+
|
|
218
254
|
const controller = new AbortController();
|
|
219
255
|
const timer = setTimeout(() => controller.abort(), 30_000);
|
|
220
256
|
|
|
@@ -245,6 +281,7 @@ async function tryApiReview(diff) {
|
|
|
245
281
|
if (!text) return null;
|
|
246
282
|
|
|
247
283
|
return {
|
|
284
|
+
round,
|
|
248
285
|
review: text,
|
|
249
286
|
model,
|
|
250
287
|
auth_type: 'api_key',
|
|
@@ -256,7 +293,37 @@ async function tryApiReview(diff) {
|
|
|
256
293
|
}
|
|
257
294
|
}
|
|
258
295
|
|
|
296
|
+
function parseArgs(argv) {
|
|
297
|
+
const args = {};
|
|
298
|
+
let i = 0;
|
|
299
|
+
while (i < argv.length) {
|
|
300
|
+
const arg = argv[i];
|
|
301
|
+
if (arg.startsWith('--')) {
|
|
302
|
+
const eqIdx = arg.indexOf('=');
|
|
303
|
+
if (eqIdx !== -1) {
|
|
304
|
+
args[arg.slice(2, eqIdx)] = arg.slice(eqIdx + 1);
|
|
305
|
+
} else {
|
|
306
|
+
const key = arg.slice(2);
|
|
307
|
+
const next = argv[i + 1];
|
|
308
|
+
if (next !== undefined && !next.startsWith('--')) {
|
|
309
|
+
args[key] = next;
|
|
310
|
+
i++;
|
|
311
|
+
} else {
|
|
312
|
+
args[key] = true;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
i++;
|
|
317
|
+
}
|
|
318
|
+
return args;
|
|
319
|
+
}
|
|
320
|
+
|
|
259
321
|
async function main() {
|
|
322
|
+
const args = parseArgs(process.argv.slice(2));
|
|
323
|
+
const round = args.round ? parseInt(args.round, 10) : 1;
|
|
324
|
+
const claudeReview = args['claude-review'] || null;
|
|
325
|
+
const opts = { round, claudeReview };
|
|
326
|
+
|
|
260
327
|
// 1. Get diff
|
|
261
328
|
let diff = runGit('git diff --staged') || '';
|
|
262
329
|
if (countLines(diff) < MIN_DIFF_LINES) {
|
|
@@ -264,12 +331,11 @@ async function main() {
|
|
|
264
331
|
if (countLines(headDiff) > countLines(diff)) diff = headDiff;
|
|
265
332
|
}
|
|
266
333
|
|
|
267
|
-
// Also gather content of untracked source files
|
|
268
334
|
try {
|
|
269
335
|
const untracked = runGit('git ls-files --others --exclude-standard') || '';
|
|
270
336
|
const sourceExts = /\.(ts|tsx|js|jsx|py|rs|go|java|rb|swift|kt|mjs|cjs)$/;
|
|
271
337
|
const untrackedSrc = untracked.split('\n').filter(f => f && sourceExts.test(f));
|
|
272
|
-
for (const f of untrackedSrc.slice(0, 10)) {
|
|
338
|
+
for (const f of untrackedSrc.slice(0, 10)) {
|
|
273
339
|
const content = runGit(`git diff --no-index /dev/null "${f}"`);
|
|
274
340
|
if (content) diff += '\n' + content;
|
|
275
341
|
}
|
|
@@ -279,13 +345,36 @@ async function main() {
|
|
|
279
345
|
exit({ review: 'No significant changes to review' });
|
|
280
346
|
}
|
|
281
347
|
|
|
282
|
-
// 2. Try Codex CLI first
|
|
283
|
-
const codexResult = tryCodexReview(diff);
|
|
284
|
-
if (codexResult)
|
|
348
|
+
// 2. Try Codex CLI first
|
|
349
|
+
const codexResult = tryCodexReview(diff, opts);
|
|
350
|
+
if (codexResult) {
|
|
351
|
+
if (round === 1) {
|
|
352
|
+
codexResult.instructions = `Round 1 complete. Now:
|
|
353
|
+
1. Provide YOUR independent code review of the same changes
|
|
354
|
+
2. Then call Round 2 to send your findings back to GPT:
|
|
355
|
+
node .claude/hooks/dual-brain-review.mjs --round 2 --claude-review "<your findings>"
|
|
356
|
+
3. GPT will respond — confirming shared findings, acknowledging misses, and pushing back on false positives
|
|
357
|
+
4. You then synthesize both rounds into the final review verdict`;
|
|
358
|
+
} else {
|
|
359
|
+
codexResult.instructions = `GPT has responded to your review. Synthesize into a FINAL REVIEW:
|
|
360
|
+
- CONFIRMED findings (both found) → high confidence, must fix
|
|
361
|
+
- GPT-only findings you agree with → add to your list
|
|
362
|
+
- Your findings GPT disputed → re-evaluate honestly
|
|
363
|
+
- Final verdict: LGTM, minor issues, or blocks merge`;
|
|
364
|
+
}
|
|
365
|
+
exit(codexResult);
|
|
366
|
+
}
|
|
285
367
|
|
|
286
368
|
// 3. Try direct API
|
|
287
|
-
const apiResult = await tryApiReview(diff);
|
|
288
|
-
if (apiResult)
|
|
369
|
+
const apiResult = await tryApiReview(diff, opts);
|
|
370
|
+
if (apiResult) {
|
|
371
|
+
if (round === 1) {
|
|
372
|
+
apiResult.instructions = `Round 1 complete. Provide YOUR independent review, then call Round 2 with --round 2 --claude-review "<findings>"`;
|
|
373
|
+
} else {
|
|
374
|
+
apiResult.instructions = `Synthesize both rounds into a final review verdict.`;
|
|
375
|
+
}
|
|
376
|
+
exit(apiResult);
|
|
377
|
+
}
|
|
289
378
|
|
|
290
379
|
// 4. No GPT available
|
|
291
380
|
exit({
|