dual-brain 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +97 -0
- package/CLAUDE.md +147 -0
- package/LICENSE +21 -0
- package/README.md +197 -0
- package/agents/implementer.md +22 -0
- package/agents/researcher.md +25 -0
- package/agents/verifier.md +30 -0
- package/bin/dual-brain.mjs +2868 -0
- package/hooks/auto-update-wrapper.mjs +102 -0
- package/hooks/auto-update.sh +67 -0
- package/hooks/budget-balancer.mjs +679 -0
- package/hooks/control-panel.mjs +1195 -0
- package/hooks/cost-logger.mjs +286 -0
- package/hooks/cost-report.mjs +351 -0
- package/hooks/decision-ledger.mjs +299 -0
- package/hooks/dual-brain-review.mjs +404 -0
- package/hooks/dual-brain-think.mjs +393 -0
- package/hooks/enforce-tier.mjs +469 -0
- package/hooks/failure-detector.mjs +138 -0
- package/hooks/gpt-work-dispatcher.mjs +512 -0
- package/hooks/head-guard.mjs +105 -0
- package/hooks/health-check.mjs +444 -0
- package/hooks/install-git-hooks.mjs +106 -0
- package/hooks/model-registry.mjs +859 -0
- package/hooks/plan-generator.mjs +544 -0
- package/hooks/profiles.mjs +254 -0
- package/hooks/quality-gate.mjs +355 -0
- package/hooks/risk-classifier.mjs +41 -0
- package/hooks/session-report.mjs +514 -0
- package/hooks/setup-wizard.mjs +130 -0
- package/hooks/summary-checkpoint.mjs +432 -0
- package/hooks/task-classifier.mjs +328 -0
- package/hooks/test-orchestrator.mjs +1077 -0
- package/hooks/vibe-memory.mjs +463 -0
- package/hooks/vibe-router.mjs +387 -0
- package/hooks/wave-orchestrator.mjs +1397 -0
- package/install.mjs +1541 -0
- package/mcp-server/README.md +81 -0
- package/mcp-server/index.mjs +388 -0
- package/orchestrator.json +215 -0
- package/package.json +108 -0
- package/playbooks/debug.json +49 -0
- package/playbooks/refactor.json +57 -0
- package/playbooks/security-audit.json +57 -0
- package/playbooks/security.json +38 -0
- package/playbooks/test-gen.json +48 -0
- package/plugin.json +22 -0
- package/review-rules.md +17 -0
- package/shell-hook.sh +26 -0
- package/skills/go.md +22 -0
- package/skills/review.md +19 -0
- package/skills/status.md +13 -0
- package/skills/think.md +22 -0
- package/src/brief.mjs +266 -0
- package/src/decide.mjs +635 -0
- package/src/decompose.mjs +331 -0
- package/src/detect.mjs +345 -0
- package/src/dispatch.mjs +942 -0
- package/src/health.mjs +253 -0
- package/src/index.mjs +44 -0
- package/src/install-hooks.mjs +100 -0
- package/src/playbook.mjs +257 -0
- package/src/profile.mjs +990 -0
- package/src/redact.mjs +192 -0
- package/src/repo.mjs +292 -0
- package/src/session.mjs +1036 -0
- package/src/tui.mjs +197 -0
- package/src/update-check.mjs +35 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* decision-ledger.mjs — Routing outcome tracking for the Dual-Brain Orchestrator.
|
|
4
|
+
*
|
|
5
|
+
* Records every routing decision with its context, and later enriches it with
|
|
6
|
+
* outcome data (duration, success, retries, user overrides, follow-up fixes).
|
|
7
|
+
*
|
|
8
|
+
* Over time, this builds a per-repo knowledge base of which provider/model
|
|
9
|
+
* performs best for which task shapes.
|
|
10
|
+
*
|
|
11
|
+
* Exported API:
|
|
12
|
+
* recordDecision(decision) → log a routing decision, returns decision_id
|
|
13
|
+
* recordOutcome(id, outcome) → enrich a decision with its outcome
|
|
14
|
+
* getInsights(opts?) → aggregate patterns from the ledger
|
|
15
|
+
*
|
|
16
|
+
* CLI:
|
|
17
|
+
* node .claude/hooks/decision-ledger.mjs # show insights
|
|
18
|
+
* node .claude/hooks/decision-ledger.mjs --json # JSON output
|
|
19
|
+
* node .claude/hooks/decision-ledger.mjs --recent 20 # last N decisions
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { appendFileSync, existsSync, readFileSync } from 'fs';
|
|
23
|
+
import { dirname, join } from 'path';
|
|
24
|
+
import { fileURLToPath } from 'url';
|
|
25
|
+
import { randomBytes } from 'crypto';
|
|
26
|
+
|
|
27
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
28
|
+
const LEDGER_FILE = join(__dirname, 'decision-ledger.jsonl');
|
|
29
|
+
|
|
30
|
+
function genId() {
|
|
31
|
+
return randomBytes(6).toString('hex');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function recordDecision(decision = {}) {
|
|
35
|
+
const id = genId();
|
|
36
|
+
const entry = JSON.stringify({
|
|
37
|
+
type: 'decision',
|
|
38
|
+
id,
|
|
39
|
+
timestamp: new Date().toISOString(),
|
|
40
|
+
session_id: decision.session_id || process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || null,
|
|
41
|
+
profile: decision.profile || 'balanced',
|
|
42
|
+
|
|
43
|
+
// Routing context
|
|
44
|
+
tier: decision.tier || 'execute',
|
|
45
|
+
provider: decision.provider || 'claude',
|
|
46
|
+
model: decision.model || 'unknown',
|
|
47
|
+
recommended_model: decision.recommended_model || null,
|
|
48
|
+
followed: decision.followed ?? null,
|
|
49
|
+
|
|
50
|
+
// Task shape
|
|
51
|
+
task_type: decision.task_type || null,
|
|
52
|
+
prompt_hash: decision.prompt_hash || null,
|
|
53
|
+
estimated_duration_ms: decision.estimated_duration_ms || null,
|
|
54
|
+
file_count: decision.file_count || null,
|
|
55
|
+
context_coupling: decision.context_coupling || null,
|
|
56
|
+
isolation: decision.isolation || null,
|
|
57
|
+
|
|
58
|
+
// Provider state at decision time
|
|
59
|
+
claude_pressure: decision.claude_pressure || null,
|
|
60
|
+
openai_pressure: decision.openai_pressure || null,
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
appendFileSync(LEDGER_FILE, entry + '\n');
|
|
65
|
+
} catch {}
|
|
66
|
+
|
|
67
|
+
return id;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function recordOutcome(decisionId, outcome = {}) {
|
|
71
|
+
const entry = JSON.stringify({
|
|
72
|
+
type: 'outcome',
|
|
73
|
+
decision_id: decisionId,
|
|
74
|
+
timestamp: new Date().toISOString(),
|
|
75
|
+
|
|
76
|
+
// Timing
|
|
77
|
+
actual_duration_ms: outcome.actual_duration_ms || null,
|
|
78
|
+
codex_startup_ms: outcome.codex_startup_ms || null,
|
|
79
|
+
|
|
80
|
+
// Quality signals
|
|
81
|
+
success: outcome.success ?? null,
|
|
82
|
+
tests_passed: outcome.tests_passed ?? null,
|
|
83
|
+
tests_failed: outcome.tests_failed ?? null,
|
|
84
|
+
retries: outcome.retries || 0,
|
|
85
|
+
user_override: outcome.user_override ?? false,
|
|
86
|
+
followup_fix_needed: outcome.followup_fix_needed ?? false,
|
|
87
|
+
|
|
88
|
+
// Cost
|
|
89
|
+
actual_input_tokens: outcome.actual_input_tokens || null,
|
|
90
|
+
actual_output_tokens: outcome.actual_output_tokens || null,
|
|
91
|
+
estimated_cost_usd: outcome.estimated_cost_usd || null,
|
|
92
|
+
|
|
93
|
+
// Files
|
|
94
|
+
files_changed: outcome.files_changed || null,
|
|
95
|
+
files_read: outcome.files_read || null,
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
appendFileSync(LEDGER_FILE, entry + '\n');
|
|
100
|
+
} catch {}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function loadLedger() {
|
|
104
|
+
if (!existsSync(LEDGER_FILE)) return { decisions: [], outcomes: [] };
|
|
105
|
+
|
|
106
|
+
let raw;
|
|
107
|
+
try { raw = readFileSync(LEDGER_FILE, 'utf8'); } catch { return { decisions: [], outcomes: [] }; }
|
|
108
|
+
|
|
109
|
+
const decisions = [];
|
|
110
|
+
const outcomes = [];
|
|
111
|
+
|
|
112
|
+
for (const line of raw.split('\n').filter(Boolean)) {
|
|
113
|
+
try {
|
|
114
|
+
const entry = JSON.parse(line);
|
|
115
|
+
if (entry.type === 'decision') decisions.push(entry);
|
|
116
|
+
else if (entry.type === 'outcome') outcomes.push(entry);
|
|
117
|
+
} catch {}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return { decisions, outcomes };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function mergeDecisionsWithOutcomes(decisions, outcomes) {
|
|
124
|
+
const outcomeMap = {};
|
|
125
|
+
for (const o of outcomes) {
|
|
126
|
+
outcomeMap[o.decision_id] = o;
|
|
127
|
+
}
|
|
128
|
+
return decisions.map(d => ({
|
|
129
|
+
...d,
|
|
130
|
+
outcome: outcomeMap[d.id] || null,
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function getInsights(opts = {}) {
|
|
135
|
+
const { decisions, outcomes } = loadLedger();
|
|
136
|
+
const merged = mergeDecisionsWithOutcomes(decisions, outcomes);
|
|
137
|
+
const withOutcomes = merged.filter(d => d.outcome);
|
|
138
|
+
|
|
139
|
+
// Provider win rates
|
|
140
|
+
const providerStats = {};
|
|
141
|
+
for (const d of withOutcomes) {
|
|
142
|
+
const key = d.provider;
|
|
143
|
+
if (!providerStats[key]) providerStats[key] = { total: 0, success: 0, overrides: 0, followups: 0, totalDuration: 0, counted: 0 };
|
|
144
|
+
providerStats[key].total++;
|
|
145
|
+
if (d.outcome.success) providerStats[key].success++;
|
|
146
|
+
if (d.outcome.user_override) providerStats[key].overrides++;
|
|
147
|
+
if (d.outcome.followup_fix_needed) providerStats[key].followups++;
|
|
148
|
+
if (d.outcome.actual_duration_ms) {
|
|
149
|
+
providerStats[key].totalDuration += d.outcome.actual_duration_ms;
|
|
150
|
+
providerStats[key].counted++;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Tier performance
|
|
155
|
+
const tierStats = {};
|
|
156
|
+
for (const d of withOutcomes) {
|
|
157
|
+
const key = `${d.provider}:${d.tier}`;
|
|
158
|
+
if (!tierStats[key]) tierStats[key] = { total: 0, success: 0, avgDuration: 0, counted: 0 };
|
|
159
|
+
tierStats[key].total++;
|
|
160
|
+
if (d.outcome.success) tierStats[key].success++;
|
|
161
|
+
if (d.outcome.actual_duration_ms) {
|
|
162
|
+
tierStats[key].counted++;
|
|
163
|
+
tierStats[key].avgDuration += (d.outcome.actual_duration_ms - tierStats[key].avgDuration) / tierStats[key].counted;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Task type patterns
|
|
168
|
+
const taskPatterns = {};
|
|
169
|
+
for (const d of withOutcomes) {
|
|
170
|
+
if (!d.task_type) continue;
|
|
171
|
+
const key = d.task_type;
|
|
172
|
+
if (!taskPatterns[key]) taskPatterns[key] = {};
|
|
173
|
+
const pk = d.provider;
|
|
174
|
+
if (!taskPatterns[key][pk]) taskPatterns[key][pk] = { total: 0, success: 0 };
|
|
175
|
+
taskPatterns[key][pk].total++;
|
|
176
|
+
if (d.outcome.success) taskPatterns[key][pk].success++;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Compliance rate
|
|
180
|
+
const total = decisions.length;
|
|
181
|
+
const followedCount = decisions.filter(d => d.followed === true).length;
|
|
182
|
+
const compliance = total > 0 ? Math.round((followedCount / total) * 100) : 0;
|
|
183
|
+
|
|
184
|
+
// Recommendations
|
|
185
|
+
const recommendations = [];
|
|
186
|
+
for (const [task, providers] of Object.entries(taskPatterns)) {
|
|
187
|
+
const sorted = Object.entries(providers)
|
|
188
|
+
.map(([p, s]) => ({ provider: p, rate: s.total > 0 ? s.success / s.total : 0, total: s.total }))
|
|
189
|
+
.filter(x => x.total >= 3)
|
|
190
|
+
.sort((a, b) => b.rate - a.rate);
|
|
191
|
+
if (sorted.length >= 2 && sorted[0].rate > sorted[1].rate + 0.1) {
|
|
192
|
+
recommendations.push(`${sorted[0].provider} wins ${task} tasks (${Math.round(sorted[0].rate * 100)}% vs ${Math.round(sorted[1].rate * 100)}%)`);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
total_decisions: total,
|
|
198
|
+
with_outcomes: withOutcomes.length,
|
|
199
|
+
compliance_rate: compliance,
|
|
200
|
+
provider_stats: providerStats,
|
|
201
|
+
tier_stats: tierStats,
|
|
202
|
+
task_patterns: taskPatterns,
|
|
203
|
+
recommendations,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// ─── CLI ────────────────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
function printInsights() {
|
|
210
|
+
const insights = getInsights();
|
|
211
|
+
|
|
212
|
+
if (insights.total_decisions === 0) {
|
|
213
|
+
console.log('');
|
|
214
|
+
console.log(' No routing decisions recorded yet.');
|
|
215
|
+
console.log(' The decision ledger builds over time as you use Claude Code.');
|
|
216
|
+
console.log('');
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const W = 52;
|
|
221
|
+
const pad = (s, len = W - 2) => {
|
|
222
|
+
s = String(s);
|
|
223
|
+
return s.length >= len ? s.slice(0, len) : s + ' '.repeat(len - s.length);
|
|
224
|
+
};
|
|
225
|
+
const ln = (s) => `║ ${pad(s)} ║`;
|
|
226
|
+
const br = (l, r) => l + '═'.repeat(W) + r;
|
|
227
|
+
const sep = () => '╠' + '═'.repeat(W) + '╣';
|
|
228
|
+
|
|
229
|
+
const lines = [];
|
|
230
|
+
lines.push(br('╔', '╗'));
|
|
231
|
+
lines.push(ln('Decision Ledger Insights'));
|
|
232
|
+
lines.push(sep());
|
|
233
|
+
lines.push(ln(`Total decisions: ${insights.total_decisions}`));
|
|
234
|
+
lines.push(ln(`With outcomes: ${insights.with_outcomes}`));
|
|
235
|
+
lines.push(ln(`Compliance rate: ${insights.compliance_rate}%`));
|
|
236
|
+
lines.push(sep());
|
|
237
|
+
|
|
238
|
+
// Provider stats
|
|
239
|
+
lines.push(ln('Provider Performance'));
|
|
240
|
+
for (const [provider, stats] of Object.entries(insights.provider_stats)) {
|
|
241
|
+
const rate = stats.total > 0 ? Math.round((stats.success / stats.total) * 100) : 0;
|
|
242
|
+
const avgMs = stats.counted > 0 ? Math.round(stats.totalDuration / stats.counted / 1000) : '?';
|
|
243
|
+
lines.push(ln(` ${provider}: ${rate}% success, ${stats.overrides} overrides, avg ${avgMs}s`));
|
|
244
|
+
if (stats.followups > 0) {
|
|
245
|
+
lines.push(ln(` ${stats.followups} follow-up fixes needed`));
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Recommendations
|
|
250
|
+
if (insights.recommendations.length > 0) {
|
|
251
|
+
lines.push(sep());
|
|
252
|
+
lines.push(ln('Recommendations'));
|
|
253
|
+
for (const rec of insights.recommendations) {
|
|
254
|
+
lines.push(ln(` ${rec}`));
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
lines.push(br('╚', '╝'));
|
|
259
|
+
console.log('');
|
|
260
|
+
for (const l of lines) console.log(` ${l}`);
|
|
261
|
+
console.log('');
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function printRecent(n) {
|
|
265
|
+
const { decisions, outcomes } = loadLedger();
|
|
266
|
+
const merged = mergeDecisionsWithOutcomes(decisions, outcomes);
|
|
267
|
+
const recent = merged.slice(-n);
|
|
268
|
+
|
|
269
|
+
if (recent.length === 0) {
|
|
270
|
+
console.log(' No decisions recorded yet.');
|
|
271
|
+
return;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
console.log('');
|
|
275
|
+
for (const d of recent) {
|
|
276
|
+
const time = d.timestamp?.slice(11, 19) || '??:??:??';
|
|
277
|
+
const status = d.outcome?.success ? '✓' : d.outcome ? '✗' : '?';
|
|
278
|
+
const dur = d.outcome?.actual_duration_ms ? `${Math.round(d.outcome.actual_duration_ms / 1000)}s` : '';
|
|
279
|
+
console.log(` ${status} ${time} ${d.provider}/${d.model} [${d.tier}] ${dur}`);
|
|
280
|
+
}
|
|
281
|
+
console.log('');
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// CLI entry
|
|
285
|
+
if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) {
|
|
286
|
+
const args = process.argv.slice(2);
|
|
287
|
+
|
|
288
|
+
if (args.includes('--json')) {
|
|
289
|
+
console.log(JSON.stringify(getInsights(), null, 2));
|
|
290
|
+
} else if (args.includes('--recent')) {
|
|
291
|
+
const idx = args.indexOf('--recent');
|
|
292
|
+
const n = parseInt(args[idx + 1]) || 20;
|
|
293
|
+
printRecent(n);
|
|
294
|
+
} else {
|
|
295
|
+
printInsights();
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
export { recordDecision, recordOutcome, getInsights, loadLedger };
|
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* dual-brain-review.mjs
|
|
4
|
+
*
|
|
5
|
+
* Sends git diffs to GPT for independent code review using the Codex CLI
|
|
6
|
+
* (uses your ChatGPT subscription — no API key needed).
|
|
7
|
+
*
|
|
8
|
+
* Falls back to direct OpenAI API if OPENAI_API_KEY is set.
|
|
9
|
+
* Falls back to "no GPT available" if neither works.
|
|
10
|
+
*
|
|
11
|
+
* Usage: node .claude/hooks/dual-brain-review.mjs
|
|
12
|
+
* Output: JSON to stdout — always valid, never crashes.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { spawnSync } from 'child_process';
|
|
16
|
+
import { readFileSync } from 'fs';
|
|
17
|
+
import { dirname, join, resolve } from 'path';
|
|
18
|
+
import { fileURLToPath } from 'url';
|
|
19
|
+
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const IS_REPLIT = !!(process.env.REPL_ID || process.env.REPL_SLUG);
|
|
22
|
+
const SANDBOX = IS_REPLIT ? 'danger-full-access' : 'read-only';
|
|
23
|
+
|
|
24
|
+
const REVIEW_PROMPT_R1 = `You are GPT-5.5 performing Round 1 of a dual-brain code review.
|
|
25
|
+
Claude (Opus) will independently review the same changes, then send you their findings
|
|
26
|
+
for a collaborative Round 2 discussion.
|
|
27
|
+
|
|
28
|
+
Review the current uncommitted changes for:
|
|
29
|
+
1. Correctness — logic errors, off-by-one, null/undefined risks
|
|
30
|
+
2. Security — injection, auth bypass, data exposure
|
|
31
|
+
3. Edge cases — what could break under unusual input
|
|
32
|
+
4. Quality — naming, structure, unnecessary complexity
|
|
33
|
+
|
|
34
|
+
Required output:
|
|
35
|
+
- Findings only, ordered by severity
|
|
36
|
+
- File/line references when possible
|
|
37
|
+
- Whether tests cover the changed behavior
|
|
38
|
+
- Whether the change follows existing repo patterns
|
|
39
|
+
- Whether any issue should block merge
|
|
40
|
+
|
|
41
|
+
Be concise. Flag only real issues, not style preferences. If the code looks good, say "LGTM" and note any minor suggestions. Output your review as plain text, not JSON.`;
|
|
42
|
+
|
|
43
|
+
const REVIEW_PROMPT_R2 = `You are GPT-5.5 in Round 2 of a collaborative code review with Claude (Opus).
|
|
44
|
+
You already reviewed this diff in Round 1. Claude has now independently reviewed the same changes.
|
|
45
|
+
This is a professional peer review dialogue — two senior engineers refining their assessment together.
|
|
46
|
+
|
|
47
|
+
Claude's review findings:
|
|
48
|
+
---CLAUDE_REVIEW---
|
|
49
|
+
|
|
50
|
+
Now respond as a peer reviewer:
|
|
51
|
+
1. CONFIRMED: Issues you both found — these are high-confidence findings
|
|
52
|
+
2. MISSED: Issues Claude caught that you missed — acknowledge them
|
|
53
|
+
3. DISAGREE: Claude's findings you think are false positives — explain why
|
|
54
|
+
4. ESCALATED: Issues that are MORE severe than either of you initially rated
|
|
55
|
+
5. VERDICT: Combined assessment — LGTM, minor issues, or blocks merge
|
|
56
|
+
|
|
57
|
+
Be direct. If Claude found something real that you missed, say so.
|
|
58
|
+
If Claude flagged something that isn't actually a problem, explain why with evidence.
|
|
59
|
+
The goal is the most accurate review, not defending your initial take.`;
|
|
60
|
+
|
|
61
|
+
function loadReviewRules() {
|
|
62
|
+
const rulesFile = resolve(__dirname, '..', 'review-rules.md');
|
|
63
|
+
try {
|
|
64
|
+
const content = readFileSync(rulesFile, 'utf8').trim();
|
|
65
|
+
if (!content) return '';
|
|
66
|
+
return '\n\nAlso enforce these project-specific rules:\n' + content;
|
|
67
|
+
} catch {
|
|
68
|
+
return '';
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const MAX_DIFF_CHARS = 15000;
|
|
73
|
+
const MIN_DIFF_LINES = 5;
|
|
74
|
+
const CODEX_TIMEOUT = 90;
|
|
75
|
+
|
|
76
|
+
function findCodex() {
|
|
77
|
+
const candidates = [
|
|
78
|
+
process.env.CODEX_BIN,
|
|
79
|
+
].filter(Boolean);
|
|
80
|
+
for (const c of candidates) {
|
|
81
|
+
try { spawnSync(c, ['--version'], { stdio: 'pipe', timeout: 3000 }); return c; } catch {}
|
|
82
|
+
}
|
|
83
|
+
try {
|
|
84
|
+
const which = spawnSync('which', ['codex'], { encoding: 'utf8', stdio: 'pipe', timeout: 3000 });
|
|
85
|
+
if (which.status === 0 && which.stdout.trim()) return which.stdout.trim();
|
|
86
|
+
} catch {}
|
|
87
|
+
const home = process.env.HOME || process.env.USERPROFILE || '';
|
|
88
|
+
const fallbacks = [
|
|
89
|
+
join(home, '.local', 'bin', 'codex'),
|
|
90
|
+
join(home, 'bin', 'codex'),
|
|
91
|
+
'/usr/local/bin/codex',
|
|
92
|
+
];
|
|
93
|
+
for (const p of fallbacks) {
|
|
94
|
+
try { spawnSync(p, ['--version'], { stdio: 'pipe', timeout: 3000 }); return p; } catch {}
|
|
95
|
+
}
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const CODEX_BIN = findCodex();
|
|
100
|
+
|
|
101
|
+
function runGit(args) {
|
|
102
|
+
try {
|
|
103
|
+
const proc = spawnSync('git', args, {
|
|
104
|
+
encoding: 'utf8',
|
|
105
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
106
|
+
timeout: 10_000,
|
|
107
|
+
});
|
|
108
|
+
return proc.status === 0 ? proc.stdout : null;
|
|
109
|
+
} catch { return null; }
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function isCodexAuthenticated(result) {
|
|
113
|
+
const out = ((result?.stdout || '') + (result?.stderr || '')).toLowerCase();
|
|
114
|
+
if (/\b(not\s+logged\s+in|unauthenticated|logged\s+out|no\s+auth)\b/.test(out)) return false;
|
|
115
|
+
return result?.status === 0 ||
|
|
116
|
+
/\b(logged\s+in|authenticated|signed\s+in)\b/.test(out);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function countLines(str) {
|
|
120
|
+
return (str || '').split('\n').filter(l => l.trim().length > 0).length;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function getThinkModel() {
|
|
124
|
+
try {
|
|
125
|
+
const config = JSON.parse(readFileSync(resolve(__dirname, '..', 'orchestrator.json'), 'utf8'));
|
|
126
|
+
const models = config?.subscriptions?.openai?.models ?? {};
|
|
127
|
+
for (const [name, info] of Object.entries(models)) {
|
|
128
|
+
if (info?.tier === 'think') return name;
|
|
129
|
+
}
|
|
130
|
+
} catch {}
|
|
131
|
+
return 'gpt-5.5';
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function hasIssues(text) {
|
|
135
|
+
const lower = text.toLowerCase();
|
|
136
|
+
|
|
137
|
+
// Check for concrete issue indicators first
|
|
138
|
+
const issuePatterns = [
|
|
139
|
+
/\b(bug|crash|vulnerability|incorrect|broken|dangerous|unsafe|injection|leak)\b/i,
|
|
140
|
+
/\bshould\s+(fix|change|update|remove|replace|refactor)\b/i,
|
|
141
|
+
/\bmust\s+(fix|change|update|remove|replace|refactor)\b/i,
|
|
142
|
+
/\b(will\s+break|could\s+break|might\s+break|can\s+crash|could\s+crash)\b/i,
|
|
143
|
+
/\b(missing\s+(check|validation|guard|null|error|handling))\b/i,
|
|
144
|
+
/\b(race\s+condition|deadlock|overflow|underflow|out\s+of\s+bounds)\b/i,
|
|
145
|
+
];
|
|
146
|
+
const hasIssueIndicators = issuePatterns.some(p => p.test(text));
|
|
147
|
+
|
|
148
|
+
// If concrete issues found, always flag — even if "LGTM" also appears
|
|
149
|
+
if (hasIssueIndicators) return true;
|
|
150
|
+
|
|
151
|
+
// No concrete issues — check if review explicitly says it's clean
|
|
152
|
+
const good = ['lgtm', 'looks good', 'no issues', 'no problems', 'no concerns', 'all good', 'clean', 'approved', 'ship it', 'ready to merge', 'good to go', 'looks fine', 'no blockers'];
|
|
153
|
+
if (good.some(g => lower.includes(g))) return false;
|
|
154
|
+
|
|
155
|
+
// Ambiguous — default to flagging for human review
|
|
156
|
+
return true;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function exit(obj) {
|
|
160
|
+
process.stdout.write(JSON.stringify(obj) + '\n');
|
|
161
|
+
process.exit(0);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Try GPT review via Codex CLI (uses ChatGPT subscription auth).
|
|
166
|
+
* Round 1: independent review. Round 2: respond to Claude's review.
|
|
167
|
+
*/
|
|
168
|
+
function tryCodexReview(diff, { round = 1, claudeReview = null } = {}) {
|
|
169
|
+
if (!CODEX_BIN) return null;
|
|
170
|
+
const login = spawnSync(CODEX_BIN, ['login', 'status'], {
|
|
171
|
+
encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 5000,
|
|
172
|
+
});
|
|
173
|
+
if (!isCodexAuthenticated(login)) {
|
|
174
|
+
return null;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
try {
|
|
178
|
+
const model = getThinkModel();
|
|
179
|
+
const truncated = diff.length > MAX_DIFF_CHARS
|
|
180
|
+
? diff.slice(0, MAX_DIFF_CHARS) + '\n[truncated]'
|
|
181
|
+
: diff;
|
|
182
|
+
|
|
183
|
+
let basePrompt;
|
|
184
|
+
if (round === 2 && claudeReview) {
|
|
185
|
+
basePrompt = REVIEW_PROMPT_R2.replace('---CLAUDE_REVIEW---', claudeReview);
|
|
186
|
+
} else {
|
|
187
|
+
basePrompt = REVIEW_PROMPT_R1;
|
|
188
|
+
}
|
|
189
|
+
const fullPrompt = basePrompt + loadReviewRules();
|
|
190
|
+
|
|
191
|
+
const proc = spawnSync(CODEX_BIN, [
|
|
192
|
+
'exec', '--json', '--ephemeral',
|
|
193
|
+
'-c', `model="${model}"`,
|
|
194
|
+
'-s', SANDBOX,
|
|
195
|
+
fullPrompt,
|
|
196
|
+
], {
|
|
197
|
+
input: truncated,
|
|
198
|
+
encoding: 'utf8',
|
|
199
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
200
|
+
timeout: CODEX_TIMEOUT * 1000,
|
|
201
|
+
});
|
|
202
|
+
const result = proc.stdout || '';
|
|
203
|
+
|
|
204
|
+
const messages = result
|
|
205
|
+
.split('\n')
|
|
206
|
+
.filter(l => l.trim())
|
|
207
|
+
.map(l => { try { return JSON.parse(l); } catch { return null; } })
|
|
208
|
+
.filter(Boolean);
|
|
209
|
+
|
|
210
|
+
const agentMessages = messages
|
|
211
|
+
.filter(m => m.type === 'item.completed' && m.item?.type === 'agent_message')
|
|
212
|
+
.map(m => m.item.text);
|
|
213
|
+
|
|
214
|
+
const usage = messages.find(m => m.type === 'turn.completed')?.usage;
|
|
215
|
+
|
|
216
|
+
if (agentMessages.length > 0) {
|
|
217
|
+
const reviewText = agentMessages.join('\n\n');
|
|
218
|
+
return {
|
|
219
|
+
round,
|
|
220
|
+
review: reviewText,
|
|
221
|
+
model,
|
|
222
|
+
auth_type: 'codex_subscription',
|
|
223
|
+
issues_found: hasIssues(reviewText),
|
|
224
|
+
tokens: usage || null,
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const errors = messages.filter(m => m.type === 'error' || m.type === 'turn.failed');
|
|
229
|
+
if (errors.length > 0) {
|
|
230
|
+
return {
|
|
231
|
+
review: `Codex error: ${errors[0].message || errors[0].error?.message || 'unknown'}`,
|
|
232
|
+
error: true,
|
|
233
|
+
auth_type: 'codex_subscription',
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return null;
|
|
238
|
+
} catch (err) {
|
|
239
|
+
return {
|
|
240
|
+
review: `Codex exec failed: ${err.message?.slice(0, 200) || 'unknown error'}`,
|
|
241
|
+
error: true,
|
|
242
|
+
auth_type: 'codex_subscription',
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Try GPT review via direct API call (needs OPENAI_API_KEY).
|
|
249
|
+
*/
|
|
250
|
+
async function tryApiReview(diff, { round = 1, claudeReview = null } = {}) {
|
|
251
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
252
|
+
if (!apiKey) return null;
|
|
253
|
+
|
|
254
|
+
const model = getThinkModel();
|
|
255
|
+
const truncated = diff.length > MAX_DIFF_CHARS
|
|
256
|
+
? diff.slice(0, MAX_DIFF_CHARS) + '\n[truncated]'
|
|
257
|
+
: diff;
|
|
258
|
+
|
|
259
|
+
let basePrompt;
|
|
260
|
+
if (round === 2 && claudeReview) {
|
|
261
|
+
basePrompt = REVIEW_PROMPT_R2.replace('---CLAUDE_REVIEW---', claudeReview);
|
|
262
|
+
} else {
|
|
263
|
+
basePrompt = REVIEW_PROMPT_R1;
|
|
264
|
+
}
|
|
265
|
+
const fullPrompt = basePrompt + loadReviewRules();
|
|
266
|
+
|
|
267
|
+
const controller = new AbortController();
|
|
268
|
+
const timer = setTimeout(() => controller.abort(), 30_000);
|
|
269
|
+
|
|
270
|
+
try {
|
|
271
|
+
const response = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
272
|
+
method: 'POST',
|
|
273
|
+
signal: controller.signal,
|
|
274
|
+
headers: {
|
|
275
|
+
'Content-Type': 'application/json',
|
|
276
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
277
|
+
},
|
|
278
|
+
body: JSON.stringify({
|
|
279
|
+
model,
|
|
280
|
+
messages: [
|
|
281
|
+
{ role: 'system', content: fullPrompt },
|
|
282
|
+
{ role: 'user', content: `Review this diff:\n\n\`\`\`diff\n${truncated}\n\`\`\`` },
|
|
283
|
+
],
|
|
284
|
+
temperature: 0,
|
|
285
|
+
max_tokens: 1000,
|
|
286
|
+
}),
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
clearTimeout(timer);
|
|
290
|
+
if (!response.ok) return null;
|
|
291
|
+
|
|
292
|
+
const data = await response.json();
|
|
293
|
+
const text = data?.choices?.[0]?.message?.content ?? '';
|
|
294
|
+
if (!text) return null;
|
|
295
|
+
|
|
296
|
+
return {
|
|
297
|
+
round,
|
|
298
|
+
review: text,
|
|
299
|
+
model,
|
|
300
|
+
auth_type: 'api_key',
|
|
301
|
+
issues_found: hasIssues(text),
|
|
302
|
+
};
|
|
303
|
+
} catch {
|
|
304
|
+
clearTimeout(timer);
|
|
305
|
+
return null;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
function parseArgs(argv) {
|
|
310
|
+
const args = {};
|
|
311
|
+
let i = 0;
|
|
312
|
+
while (i < argv.length) {
|
|
313
|
+
const arg = argv[i];
|
|
314
|
+
if (arg.startsWith('--')) {
|
|
315
|
+
const eqIdx = arg.indexOf('=');
|
|
316
|
+
if (eqIdx !== -1) {
|
|
317
|
+
args[arg.slice(2, eqIdx)] = arg.slice(eqIdx + 1);
|
|
318
|
+
} else {
|
|
319
|
+
const key = arg.slice(2);
|
|
320
|
+
const next = argv[i + 1];
|
|
321
|
+
if (next !== undefined && !next.startsWith('--')) {
|
|
322
|
+
args[key] = next;
|
|
323
|
+
i++;
|
|
324
|
+
} else {
|
|
325
|
+
args[key] = true;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
i++;
|
|
330
|
+
}
|
|
331
|
+
return args;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
async function main() {
|
|
335
|
+
const args = parseArgs(process.argv.slice(2));
|
|
336
|
+
const round = args.round ? parseInt(args.round, 10) : 1;
|
|
337
|
+
const claudeReview = args['claude-review'] || null;
|
|
338
|
+
const opts = { round, claudeReview };
|
|
339
|
+
|
|
340
|
+
// 1. Get diff
|
|
341
|
+
let diff = runGit(['diff', '--staged']) || '';
|
|
342
|
+
if (countLines(diff) < MIN_DIFF_LINES) {
|
|
343
|
+
const headDiff = runGit(['diff', 'HEAD']) || '';
|
|
344
|
+
if (countLines(headDiff) > countLines(diff)) diff = headDiff;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
try {
|
|
348
|
+
const untracked = runGit(['ls-files', '--others', '--exclude-standard']) || '';
|
|
349
|
+
const sourceExts = /\.(ts|tsx|js|jsx|py|rs|go|java|rb|swift|kt|mjs|cjs)$/;
|
|
350
|
+
const untrackedSrc = untracked.split('\n').filter(f => f && sourceExts.test(f));
|
|
351
|
+
for (const f of untrackedSrc.slice(0, 10)) {
|
|
352
|
+
const content = runGit(['diff', '--no-index', '/dev/null', f]);
|
|
353
|
+
if (content) diff += '\n' + content;
|
|
354
|
+
}
|
|
355
|
+
} catch {}
|
|
356
|
+
|
|
357
|
+
if (countLines(diff) < MIN_DIFF_LINES) {
|
|
358
|
+
exit({ review: 'No significant changes to review' });
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// 2. Try Codex CLI first
|
|
362
|
+
const codexResult = tryCodexReview(diff, opts);
|
|
363
|
+
if (codexResult) {
|
|
364
|
+
if (round === 1) {
|
|
365
|
+
codexResult.instructions = `Round 1 complete. Now:
|
|
366
|
+
1. Provide YOUR independent code review of the same changes
|
|
367
|
+
2. Then call Round 2 to send your findings back to GPT:
|
|
368
|
+
node .claude/hooks/dual-brain-review.mjs --round 2 --claude-review "<your findings>"
|
|
369
|
+
3. GPT will respond — confirming shared findings, acknowledging misses, and pushing back on false positives
|
|
370
|
+
4. You then synthesize both rounds into the final review verdict`;
|
|
371
|
+
} else {
|
|
372
|
+
codexResult.instructions = `GPT has responded to your review. Synthesize into a FINAL REVIEW:
|
|
373
|
+
- CONFIRMED findings (both found) → high confidence, must fix
|
|
374
|
+
- GPT-only findings you agree with → add to your list
|
|
375
|
+
- Your findings GPT disputed → re-evaluate honestly
|
|
376
|
+
- Final verdict: LGTM, minor issues, or blocks merge`;
|
|
377
|
+
}
|
|
378
|
+
exit(codexResult);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// 3. Try direct API
|
|
382
|
+
const apiResult = await tryApiReview(diff, opts);
|
|
383
|
+
if (apiResult) {
|
|
384
|
+
if (round === 1) {
|
|
385
|
+
apiResult.instructions = `Round 1 complete. Provide YOUR independent review, then call Round 2 with --round 2 --claude-review "<findings>"`;
|
|
386
|
+
} else {
|
|
387
|
+
apiResult.instructions = `Synthesize both rounds into a final review verdict.`;
|
|
388
|
+
}
|
|
389
|
+
exit(apiResult);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// 4. No GPT available
|
|
393
|
+
exit({
|
|
394
|
+
review: 'No GPT review available. Install Codex CLI and login with your ChatGPT subscription, or set OPENAI_API_KEY.',
|
|
395
|
+
skip_reason: 'no_gpt_auth',
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
main().catch(err => {
|
|
400
|
+
process.stdout.write(
|
|
401
|
+
JSON.stringify({ review: `Unexpected error: ${err?.message ?? String(err)}`, error: true }) + '\n'
|
|
402
|
+
);
|
|
403
|
+
process.exit(0);
|
|
404
|
+
});
|