dual-brain 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +97 -0
- package/CLAUDE.md +147 -0
- package/LICENSE +21 -0
- package/README.md +197 -0
- package/agents/implementer.md +22 -0
- package/agents/researcher.md +25 -0
- package/agents/verifier.md +30 -0
- package/bin/dual-brain.mjs +2868 -0
- package/hooks/auto-update-wrapper.mjs +102 -0
- package/hooks/auto-update.sh +67 -0
- package/hooks/budget-balancer.mjs +679 -0
- package/hooks/control-panel.mjs +1195 -0
- package/hooks/cost-logger.mjs +286 -0
- package/hooks/cost-report.mjs +351 -0
- package/hooks/decision-ledger.mjs +299 -0
- package/hooks/dual-brain-review.mjs +404 -0
- package/hooks/dual-brain-think.mjs +393 -0
- package/hooks/enforce-tier.mjs +469 -0
- package/hooks/failure-detector.mjs +138 -0
- package/hooks/gpt-work-dispatcher.mjs +512 -0
- package/hooks/head-guard.mjs +105 -0
- package/hooks/health-check.mjs +444 -0
- package/hooks/install-git-hooks.mjs +106 -0
- package/hooks/model-registry.mjs +859 -0
- package/hooks/plan-generator.mjs +544 -0
- package/hooks/profiles.mjs +254 -0
- package/hooks/quality-gate.mjs +355 -0
- package/hooks/risk-classifier.mjs +41 -0
- package/hooks/session-report.mjs +514 -0
- package/hooks/setup-wizard.mjs +130 -0
- package/hooks/summary-checkpoint.mjs +432 -0
- package/hooks/task-classifier.mjs +328 -0
- package/hooks/test-orchestrator.mjs +1077 -0
- package/hooks/vibe-memory.mjs +463 -0
- package/hooks/vibe-router.mjs +387 -0
- package/hooks/wave-orchestrator.mjs +1397 -0
- package/install.mjs +1541 -0
- package/mcp-server/README.md +81 -0
- package/mcp-server/index.mjs +388 -0
- package/orchestrator.json +215 -0
- package/package.json +108 -0
- package/playbooks/debug.json +49 -0
- package/playbooks/refactor.json +57 -0
- package/playbooks/security-audit.json +57 -0
- package/playbooks/security.json +38 -0
- package/playbooks/test-gen.json +48 -0
- package/plugin.json +22 -0
- package/review-rules.md +17 -0
- package/shell-hook.sh +26 -0
- package/skills/go.md +22 -0
- package/skills/review.md +19 -0
- package/skills/status.md +13 -0
- package/skills/think.md +22 -0
- package/src/brief.mjs +266 -0
- package/src/decide.mjs +635 -0
- package/src/decompose.mjs +331 -0
- package/src/detect.mjs +345 -0
- package/src/dispatch.mjs +942 -0
- package/src/health.mjs +253 -0
- package/src/index.mjs +44 -0
- package/src/install-hooks.mjs +100 -0
- package/src/playbook.mjs +257 -0
- package/src/profile.mjs +990 -0
- package/src/redact.mjs +192 -0
- package/src/repo.mjs +292 -0
- package/src/session.mjs +1036 -0
- package/src/tui.mjs +197 -0
- package/src/update-check.mjs +35 -0
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { readFileSync, writeFileSync, appendFileSync, renameSync } from 'fs';
|
|
3
|
+
import { dirname, resolve, join } from 'path';
|
|
4
|
+
import { fileURLToPath } from 'url';
|
|
5
|
+
import { classifyRisk, extractPaths } from './risk-classifier.mjs';
|
|
6
|
+
import { computePromptHash, checkFailureLoop, recordFailure } from './failure-detector.mjs';
|
|
7
|
+
|
|
8
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
9
|
+
const CONFIG_FILE = resolve(__dirname, '..', 'orchestrator.json');
|
|
10
|
+
const PROFILE_FILE = resolve(__dirname, '..', 'dual-brain.profile.json');
|
|
11
|
+
const DRIFT_STATE = resolve(__dirname, '.drift-warned');
|
|
12
|
+
const BURST_FILE = resolve(__dirname, '.burst-state');
|
|
13
|
+
|
|
14
|
+
function detectBurst() {
|
|
15
|
+
const now = Date.now();
|
|
16
|
+
let state = { count: 0, window_start: now };
|
|
17
|
+
try {
|
|
18
|
+
try { state = JSON.parse(readFileSync(BURST_FILE, 'utf8')); } catch {}
|
|
19
|
+
if (now - state.window_start > 90_000) state = { count: 0, window_start: now };
|
|
20
|
+
state.count++;
|
|
21
|
+
const tmp = BURST_FILE + '.tmp.' + process.pid;
|
|
22
|
+
writeFileSync(tmp, JSON.stringify(state));
|
|
23
|
+
renameSync(tmp, BURST_FILE);
|
|
24
|
+
} catch {}
|
|
25
|
+
return state.count >= 3;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function loadProfile() {
|
|
29
|
+
try {
|
|
30
|
+
const data = JSON.parse(readFileSync(PROFILE_FILE, 'utf8'));
|
|
31
|
+
return data.active || 'auto';
|
|
32
|
+
} catch { return 'auto'; }
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const PROFILE_SETTINGS = {
|
|
36
|
+
auto: { demote_think: false, promote_execute: false, bias: 0 },
|
|
37
|
+
balanced: { demote_think: false, promote_execute: false, bias: 0 },
|
|
38
|
+
'cost-saver': { demote_think: true, promote_execute: false, bias: -20 },
|
|
39
|
+
'quality-first': { demote_think: false, promote_execute: true, bias: 10 },
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
function checkPricingDrift(config) {
|
|
43
|
+
const verified = config.pricing_verified;
|
|
44
|
+
if (!verified) return null;
|
|
45
|
+
|
|
46
|
+
const age = Math.floor((Date.now() - Date.parse(verified)) / 86400000);
|
|
47
|
+
if (age < 30) return null;
|
|
48
|
+
|
|
49
|
+
// Rate limit: only warn once per day
|
|
50
|
+
try {
|
|
51
|
+
const lastWarn = readFileSync(DRIFT_STATE, 'utf8').trim();
|
|
52
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
53
|
+
if (lastWarn === today) return null;
|
|
54
|
+
} catch {}
|
|
55
|
+
|
|
56
|
+
try {
|
|
57
|
+
writeFileSync(DRIFT_STATE, new Date().toISOString().slice(0, 10));
|
|
58
|
+
} catch {}
|
|
59
|
+
|
|
60
|
+
return `**[Drift Warning]** Pricing was last verified ${age} days ago. Run \`node .claude/hooks/setup-wizard.mjs\` to update.`;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const SESSION_ID = process.env.CLAUDE_SESSION_ID || process.ppid?.toString() || null;
|
|
64
|
+
|
|
65
|
+
function logRecommendation(event) {
|
|
66
|
+
const logFile = join(__dirname, `usage-${new Date().toISOString().slice(0, 10)}.jsonl`);
|
|
67
|
+
const profileName = event.profile || 'balanced';
|
|
68
|
+
const entryObj = {
|
|
69
|
+
timestamp: new Date().toISOString(),
|
|
70
|
+
type: 'tier_recommendation',
|
|
71
|
+
detected_tier: event.tier,
|
|
72
|
+
recommended_model: event.recommended,
|
|
73
|
+
actual_model: event.actual,
|
|
74
|
+
prompt_hash: event.promptHash,
|
|
75
|
+
followed: event.followed,
|
|
76
|
+
session_id: SESSION_ID,
|
|
77
|
+
profile: profileName,
|
|
78
|
+
};
|
|
79
|
+
const entry = JSON.stringify(entryObj);
|
|
80
|
+
try {
|
|
81
|
+
appendFileSync(logFile, entry + '\n');
|
|
82
|
+
} catch {}
|
|
83
|
+
|
|
84
|
+
// Sync summary update (for dupe detection on next call)
|
|
85
|
+
try {
|
|
86
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
87
|
+
const summaryFile = join(__dirname, `usage-summary-${today}.json`);
|
|
88
|
+
let summary;
|
|
89
|
+
try { summary = JSON.parse(readFileSync(summaryFile, 'utf8')); } catch { summary = { version: 1, recent_hashes: [] }; }
|
|
90
|
+
if (event.promptHash) {
|
|
91
|
+
summary.recent_hashes = summary.recent_hashes || [];
|
|
92
|
+
summary.recent_hashes.push({ hash: event.promptHash, ts: entryObj.timestamp });
|
|
93
|
+
const tenMinAgo = Date.now() - 10 * 60 * 1000;
|
|
94
|
+
summary.recent_hashes = summary.recent_hashes.filter(h => Date.parse(h.ts) >= tenMinAgo);
|
|
95
|
+
}
|
|
96
|
+
summary.updated_at = new Date().toISOString();
|
|
97
|
+
const tmp = summaryFile + '.tmp.' + process.pid;
|
|
98
|
+
writeFileSync(tmp, JSON.stringify(summary, null, 2) + '\n');
|
|
99
|
+
renameSync(tmp, summaryFile);
|
|
100
|
+
} catch {}
|
|
101
|
+
|
|
102
|
+
// Sync ledger write (append-only, fast)
|
|
103
|
+
try {
|
|
104
|
+
const ledgerEntry = JSON.stringify({
|
|
105
|
+
type: 'decision',
|
|
106
|
+
id: entryObj.timestamp.replace(/\W/g, '').slice(-12),
|
|
107
|
+
timestamp: entryObj.timestamp,
|
|
108
|
+
session_id: SESSION_ID,
|
|
109
|
+
profile: profileName,
|
|
110
|
+
tier: event.tier,
|
|
111
|
+
provider: detectProvider(event.actual),
|
|
112
|
+
model: event.actual || 'unknown',
|
|
113
|
+
recommended_model: event.recommended,
|
|
114
|
+
followed: event.followed,
|
|
115
|
+
prompt_hash: event.promptHash,
|
|
116
|
+
});
|
|
117
|
+
appendFileSync(join(__dirname, 'decision-ledger.jsonl'), ledgerEntry + '\n');
|
|
118
|
+
} catch {}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function checkDuplicate(promptHash) {
|
|
122
|
+
// Try summary checkpoint first (O(1))
|
|
123
|
+
try {
|
|
124
|
+
const summaryPath = join(__dirname, `usage-summary-${new Date().toISOString().slice(0, 10)}.json`);
|
|
125
|
+
const summary = JSON.parse(readFileSync(summaryPath, 'utf8'));
|
|
126
|
+
const tenMinAgo = Date.now() - 10 * 60 * 1000;
|
|
127
|
+
const match = (summary.recent_hashes || []).find(
|
|
128
|
+
h => h.hash === promptHash && Date.parse(h.ts) >= tenMinAgo
|
|
129
|
+
);
|
|
130
|
+
if (match) return { timestamp: match.ts, prompt_hash: promptHash };
|
|
131
|
+
} catch {}
|
|
132
|
+
|
|
133
|
+
// Fallback: scan log
|
|
134
|
+
const logFile = join(__dirname, `usage-${new Date().toISOString().slice(0, 10)}.jsonl`);
|
|
135
|
+
try {
|
|
136
|
+
const lines = readFileSync(logFile, 'utf8').split('\n').filter(Boolean);
|
|
137
|
+
const tenMinAgo = Date.now() - 10 * 60 * 1000;
|
|
138
|
+
for (const line of lines) {
|
|
139
|
+
try {
|
|
140
|
+
const entry = JSON.parse(line);
|
|
141
|
+
if (entry.type === 'tier_recommendation' &&
|
|
142
|
+
entry.prompt_hash === promptHash &&
|
|
143
|
+
Date.parse(entry.timestamp) > tenMinAgo) {
|
|
144
|
+
return entry;
|
|
145
|
+
}
|
|
146
|
+
} catch {}
|
|
147
|
+
}
|
|
148
|
+
} catch {}
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function detectProvider(model) {
|
|
153
|
+
if (!model || model === 'main-session') return 'claude';
|
|
154
|
+
const m = String(model).toLowerCase();
|
|
155
|
+
if (m.includes('gpt') || m.includes('o1') || m.includes('o3') || m.includes('o4')) return 'openai';
|
|
156
|
+
if (m.includes('opus') || m.includes('sonnet') || m.includes('haiku') || m.includes('claude')) return 'claude';
|
|
157
|
+
return 'claude';
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function quickPressureCheck(tier) {
|
|
161
|
+
// Try summary checkpoint first (O(1))
|
|
162
|
+
try {
|
|
163
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
164
|
+
const summaryPath = join(__dirname, `usage-summary-${today}.json`);
|
|
165
|
+
const summary = JSON.parse(readFileSync(summaryPath, 'utf8'));
|
|
166
|
+
const cutoff = Date.now() - 5 * 60 * 60 * 1000;
|
|
167
|
+
const claudeTs = (summary.pressure?.claude?.[tier] || []).filter(t => Date.parse(t) >= cutoff);
|
|
168
|
+
const openaiTs = (summary.pressure?.openai?.[tier] || []).filter(t => Date.parse(t) >= cutoff);
|
|
169
|
+
return { claudeCalls: claudeTs.length, openaiCalls: openaiTs.length };
|
|
170
|
+
} catch {}
|
|
171
|
+
|
|
172
|
+
// Fallback: scan log
|
|
173
|
+
try {
|
|
174
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
175
|
+
const logFile = join(__dirname, `usage-${today}.jsonl`);
|
|
176
|
+
const lines = readFileSync(logFile, 'utf8').split('\n').filter(Boolean);
|
|
177
|
+
const fiveHoursAgo = Date.now() - 5 * 60 * 60 * 1000;
|
|
178
|
+
let claudeCalls = 0, openaiCalls = 0;
|
|
179
|
+
for (const line of lines) {
|
|
180
|
+
try {
|
|
181
|
+
const entry = JSON.parse(line);
|
|
182
|
+
if (Date.parse(entry.timestamp) < fiveHoursAgo) continue;
|
|
183
|
+
if (entry.tier !== tier) continue;
|
|
184
|
+
const provider = entry.provider || (entry.model?.includes('gpt') ? 'openai' : 'claude');
|
|
185
|
+
if (provider === 'claude') claudeCalls++;
|
|
186
|
+
else openaiCalls++;
|
|
187
|
+
} catch {}
|
|
188
|
+
}
|
|
189
|
+
return { claudeCalls, openaiCalls };
|
|
190
|
+
} catch {
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const SEARCH_WORDS = /\b(explore|search|find|grep|locate|where\s+is|list\s+files|read[-\s]?only|lookup|scan)\b/i;
|
|
196
|
+
const THINK_WORDS = /\b(plan|design|architect|review|audit|security|code[-\s]?review|threat[-\s]?model|complex[-\s]?debug)\b/i;
|
|
197
|
+
|
|
198
|
+
// ─── Write-intent enforcement ─────────────────────────────────────────────────
|
|
199
|
+
// Keywords that indicate an agent will mutate files or system state.
|
|
200
|
+
const WRITE_INTENT_WORDS = /\b(edit|fix|change|update|create|write|modify|implement|refactor|add|remove|delete|build|install|configure|patch|apply|move|rename|migrate|replace|rewrite|generate|scaffold|init(?:ialize)?|setup|deploy|run\s+tests?|commit|push|install|uninstall)\b/i;
|
|
201
|
+
|
|
202
|
+
// Dispatch marker prefix stamped by src/dispatch.mjs for all legitimate dispatches.
|
|
203
|
+
const DISPATCH_MARKER_RE = /<!--\s*dual-brain-dispatch:\s*[a-z0-9]+\s*-->/i;
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Determine whether a prompt is purely read-only (no write keywords at all).
|
|
207
|
+
*/
|
|
208
|
+
function isReadOnly(prompt) {
|
|
209
|
+
return !WRITE_INTENT_WORDS.test(prompt);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function preferredModel(config, tier) {
|
|
213
|
+
const models = config?.subscriptions?.claude?.models ?? {};
|
|
214
|
+
for (const [name, meta] of Object.entries(models)) {
|
|
215
|
+
if (meta?.tier === tier) return name;
|
|
216
|
+
}
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
try {
|
|
221
|
+
const input = JSON.parse(readFileSync('/dev/stdin', 'utf8'));
|
|
222
|
+
|
|
223
|
+
if (input.tool_name !== 'Agent') {
|
|
224
|
+
process.stdout.write('{}');
|
|
225
|
+
process.exit(0);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const ti = input.tool_input || {};
|
|
229
|
+
// Use the raw prompt for dispatch-marker and write-intent checks (before lowercasing).
|
|
230
|
+
const rawPrompt = `${ti.description || ''} ${ti.prompt || ''}`;
|
|
231
|
+
const text = rawPrompt.toLowerCase();
|
|
232
|
+
const subType = (ti.subagent_type || '').toLowerCase();
|
|
233
|
+
const currentModel = (ti.model || '').toLowerCase();
|
|
234
|
+
|
|
235
|
+
// ── Dispatch pipeline gate ─────────────────────────────────────────────────
|
|
236
|
+
// Block write-capable agents that did NOT come through src/dispatch.mjs.
|
|
237
|
+
// Legitimate dispatches have a <!-- dual-brain-dispatch: <runId> --> marker
|
|
238
|
+
// prepended to the prompt by dispatch() / dispatchDualBrain().
|
|
239
|
+
//
|
|
240
|
+
// Skip enforcement when already inside a subagent (agent_id present) —
|
|
241
|
+
// nested agent spawns from within a work agent are fine.
|
|
242
|
+
const hasMarker = DISPATCH_MARKER_RE.test(rawPrompt);
|
|
243
|
+
const inSubagent = Boolean(input.agent_id);
|
|
244
|
+
|
|
245
|
+
if (!inSubagent && !hasMarker && !isReadOnly(rawPrompt)) {
|
|
246
|
+
// Write-intent detected in HEAD session without the dispatch marker → block.
|
|
247
|
+
process.stdout.write(JSON.stringify({
|
|
248
|
+
hookSpecificOutput: {
|
|
249
|
+
hookEventName: 'PreToolUse',
|
|
250
|
+
permissionDecision: 'deny',
|
|
251
|
+
permissionDecisionReason:
|
|
252
|
+
'[dual-brain] Write-capable agents must go through dispatch. Use: dual-brain go "task"',
|
|
253
|
+
},
|
|
254
|
+
}));
|
|
255
|
+
process.exit(2);
|
|
256
|
+
}
|
|
257
|
+
// (If hasMarker is true OR the prompt is read-only we fall through to normal
|
|
258
|
+
// tier-routing logic below.)
|
|
259
|
+
|
|
260
|
+
// Compute prompt hash early for duplicate detection and logging
|
|
261
|
+
const promptHash = computePromptHash(ti);
|
|
262
|
+
|
|
263
|
+
// Burst detection — suppress noise during wave launches (3+ agents in 90s)
|
|
264
|
+
const burstMode = detectBurst();
|
|
265
|
+
|
|
266
|
+
// Check for duplicate agent dispatch before tier classification
|
|
267
|
+
const duplicate = checkDuplicate(promptHash);
|
|
268
|
+
let duplicateWarning = null;
|
|
269
|
+
if (duplicate) {
|
|
270
|
+
const minutesAgo = Math.round((Date.now() - Date.parse(duplicate.timestamp)) / 60000);
|
|
271
|
+
if (burstMode) {
|
|
272
|
+
// In burst mode, only warn on exact hash matches (same description+prompt)
|
|
273
|
+
if (duplicate.prompt_hash === promptHash) {
|
|
274
|
+
duplicateWarning = `Heads up — a similar task ran ${minutesAgo} minute${minutesAgo !== 1 ? 's' : ''} ago (wave detected). Reuse that result if the scope hasn't changed.`;
|
|
275
|
+
}
|
|
276
|
+
// Otherwise suppress — similar-but-different agents in a wave are expected
|
|
277
|
+
} else {
|
|
278
|
+
duplicateWarning = `Heads up — a similar task ran ${minutesAgo} minute${minutesAgo !== 1 ? 's' : ''} ago. Reuse that result if the scope hasn't changed.`;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
let config;
|
|
283
|
+
try {
|
|
284
|
+
config = JSON.parse(readFileSync(CONFIG_FILE, 'utf8'));
|
|
285
|
+
} catch {
|
|
286
|
+
process.stdout.write('{}');
|
|
287
|
+
process.exit(0);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const driftWarning = checkPricingDrift(config);
|
|
291
|
+
|
|
292
|
+
const intelligence = config.model_intelligence || {};
|
|
293
|
+
const defaults = config.routing_rules?.subagent_defaults || {};
|
|
294
|
+
let tier = null;
|
|
295
|
+
|
|
296
|
+
for (const [key, val] of Object.entries(defaults)) {
|
|
297
|
+
if (subType === key.toLowerCase()) { tier = val; break; }
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Balance hint — populated after tier is fully resolved
|
|
301
|
+
let balanceHint = null;
|
|
302
|
+
let failureMessage = null;
|
|
303
|
+
let autoStatus = null;
|
|
304
|
+
|
|
305
|
+
// Helper to prepend optional warnings (duplicate + drift + balance + auto) before a message
|
|
306
|
+
const prependWarnings = (msg) => {
|
|
307
|
+
const parts = [duplicateWarning, driftWarning, failureMessage, msg, autoStatus, balanceHint].filter(Boolean);
|
|
308
|
+
return parts.join('\n\n');
|
|
309
|
+
};
|
|
310
|
+
|
|
311
|
+
// Load profile early so all log entries can reference it
|
|
312
|
+
const profileName = loadProfile();
|
|
313
|
+
const profileSettings = PROFILE_SETTINGS[profileName] || PROFILE_SETTINGS.balanced;
|
|
314
|
+
|
|
315
|
+
// Multi-tier detection — only when tier is not already resolved from subagent_defaults
|
|
316
|
+
if (!tier) {
|
|
317
|
+
const hasThink = THINK_WORDS.test(text);
|
|
318
|
+
const hasExecute = /\b(edit|write|fix|implement|modify|refactor|delete|commit|test|build|run|add|update|create)\b/i.test(text);
|
|
319
|
+
const hasSearch = SEARCH_WORDS.test(text);
|
|
320
|
+
|
|
321
|
+
const detectedTiers = [
|
|
322
|
+
hasSearch && 'search',
|
|
323
|
+
hasExecute && 'execute',
|
|
324
|
+
hasThink && 'think',
|
|
325
|
+
].filter(Boolean);
|
|
326
|
+
|
|
327
|
+
if (detectedTiers.length > 1) {
|
|
328
|
+
const splitMsg = `This spans ${detectedTiers.join(' + ')} work. Consider splitting: ` +
|
|
329
|
+
(hasSearch ? 'search first (haiku), ' : '') +
|
|
330
|
+
(hasExecute ? 'then execute edits (sonnet), ' : '') +
|
|
331
|
+
(hasThink ? 'keep planning/review on the main session (opus).' : '');
|
|
332
|
+
const fullMsg = prependWarnings(splitMsg.replace(/, $/, '.'));
|
|
333
|
+
logRecommendation({
|
|
334
|
+
tier: detectedTiers.join('+'),
|
|
335
|
+
recommended: null,
|
|
336
|
+
actual: currentModel,
|
|
337
|
+
promptHash,
|
|
338
|
+
followed: false,
|
|
339
|
+
profile: profileName,
|
|
340
|
+
});
|
|
341
|
+
process.stdout.write(JSON.stringify({ systemMessage: fullMsg }));
|
|
342
|
+
process.exit(0);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (THINK_WORDS.test(text)) tier = 'think';
|
|
346
|
+
else if (/\b(edit|write|fix|implement|modify|refactor|delete|commit|test|build|run|add|update|create)\b/i.test(text)) tier = 'execute';
|
|
347
|
+
else if (SEARCH_WORDS.test(text)) tier = 'search';
|
|
348
|
+
else tier = 'execute';
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Risk classification from file paths in description
|
|
352
|
+
const filePaths = extractPaths(ti.description || '');
|
|
353
|
+
const riskResult = classifyRisk(filePaths);
|
|
354
|
+
|
|
355
|
+
// Bias high/critical risk toward think tier
|
|
356
|
+
if ((riskResult.level === 'critical' || riskResult.level === 'high') && tier !== 'think') {
|
|
357
|
+
tier = 'think';
|
|
358
|
+
autoStatus = riskResult.level === 'critical'
|
|
359
|
+
? `This touches ${riskResult.reason.split(':')[0].toLowerCase()} — recommending dual-brain review for safety.`
|
|
360
|
+
: `Promoting to think tier — this is ${riskResult.reason.split(':')[0].toLowerCase()}.`;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// Failure loop detection
|
|
364
|
+
const failureCheck = checkFailureLoop(promptHash);
|
|
365
|
+
if (failureCheck.isLoop) {
|
|
366
|
+
if (failureCheck.suggestion === 'promote_tier' && tier === 'execute') {
|
|
367
|
+
tier = 'think';
|
|
368
|
+
autoStatus = 'Escalating to think tier — this has failed before, let\'s take a different approach.';
|
|
369
|
+
} else if (failureCheck.suggestion === 'escalate_to_dual_brain') {
|
|
370
|
+
autoStatus = 'Repeated failures detected — recommending dual-brain review to diagnose the issue.';
|
|
371
|
+
}
|
|
372
|
+
failureMessage = `⚠️ This has failed ${failureCheck.count} times in the last 2 hours. Consider a dual-brain think session to diagnose the root cause.`;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Apply profile-driven tier adjustments
|
|
376
|
+
if (profileSettings.demote_think && tier === 'think' && !THINK_WORDS.test(text)) {
|
|
377
|
+
tier = 'execute';
|
|
378
|
+
}
|
|
379
|
+
if (profileSettings.promote_execute && tier === 'execute' && THINK_WORDS.test(text)) {
|
|
380
|
+
tier = 'think';
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Compute balance hint now that tier is resolved
|
|
384
|
+
// In burst mode, skip balance hints — one hint per wave is enough
|
|
385
|
+
if (!burstMode) {
|
|
386
|
+
const currentProvider = detectProvider(currentModel);
|
|
387
|
+
if (currentProvider === 'claude') {
|
|
388
|
+
const balance = quickPressureCheck(tier);
|
|
389
|
+
const biasThreshold = profileSettings.bias >= 0 ? 10 : 20;
|
|
390
|
+
if (balance && balance.claudeCalls > balance.openaiCalls * 2 && balance.claudeCalls > biasThreshold) {
|
|
391
|
+
const dispatchModel = tier === 'think' ? 'gpt-5.5' : tier === 'execute' ? 'gpt-5.4' : 'gpt-4.1-mini';
|
|
392
|
+
balanceHint = `\n\n💡 Claude is handling most work right now (${balance.claudeCalls} ${tier} calls vs ${balance.openaiCalls} GPT). For isolated tasks, consider routing to GPT to balance subscriptions.`;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
const expected = preferredModel(config, tier);
|
|
398
|
+
|
|
399
|
+
if (tier === 'think') {
|
|
400
|
+
const thinkModels = ['opus', 'gpt-5.5', 'o1', 'o3'];
|
|
401
|
+
const isThink = !currentModel || thinkModels.some(m => currentModel.includes(m));
|
|
402
|
+
if (isThink) {
|
|
403
|
+
logRecommendation({
|
|
404
|
+
tier,
|
|
405
|
+
recommended: expected,
|
|
406
|
+
actual: currentModel,
|
|
407
|
+
promptHash,
|
|
408
|
+
followed: true,
|
|
409
|
+
profile: profileName,
|
|
410
|
+
});
|
|
411
|
+
const onlyWarnings = [duplicateWarning, driftWarning, failureMessage, autoStatus, balanceHint].filter(Boolean).join('\n\n');
|
|
412
|
+
if (onlyWarnings) {
|
|
413
|
+
process.stdout.write(JSON.stringify({ systemMessage: onlyWarnings }));
|
|
414
|
+
} else {
|
|
415
|
+
process.stdout.write('{}');
|
|
416
|
+
}
|
|
417
|
+
process.exit(0);
|
|
418
|
+
}
|
|
419
|
+
// If we get here, a non-think model is being used for think work
|
|
420
|
+
const thinkBestFor = intelligence[expected || 'opus']?.best_for;
|
|
421
|
+
const thinkBestForSuffix = thinkBestFor ? ` (best for: ${thinkBestFor})` : '';
|
|
422
|
+
const msg = `This looks like think-level work (architecture/review/planning) — better kept on the main session (${expected || 'opus'}${thinkBestForSuffix}) rather than delegated to ${currentModel}.`;
|
|
423
|
+
logRecommendation({
|
|
424
|
+
tier,
|
|
425
|
+
recommended: expected,
|
|
426
|
+
actual: currentModel,
|
|
427
|
+
promptHash,
|
|
428
|
+
followed: false,
|
|
429
|
+
profile: profileName,
|
|
430
|
+
});
|
|
431
|
+
process.stdout.write(JSON.stringify({ systemMessage: prependWarnings(msg) }));
|
|
432
|
+
} else {
|
|
433
|
+
if (!expected || currentModel.includes(expected)) {
|
|
434
|
+
logRecommendation({
|
|
435
|
+
tier,
|
|
436
|
+
recommended: expected,
|
|
437
|
+
actual: currentModel,
|
|
438
|
+
promptHash,
|
|
439
|
+
followed: true,
|
|
440
|
+
profile: profileName,
|
|
441
|
+
});
|
|
442
|
+
const onlyWarnings = [duplicateWarning, driftWarning, failureMessage, autoStatus, balanceHint].filter(Boolean).join('\n\n');
|
|
443
|
+
if (onlyWarnings) {
|
|
444
|
+
process.stdout.write(JSON.stringify({ systemMessage: onlyWarnings }));
|
|
445
|
+
} else {
|
|
446
|
+
process.stdout.write('{}');
|
|
447
|
+
}
|
|
448
|
+
process.exit(0);
|
|
449
|
+
}
|
|
450
|
+
const savings = tier === 'search' ? 'Haiku is 19x cheaper than Opus for read-only lookups.' : 'Sonnet is 5x cheaper than Opus for implementation work.';
|
|
451
|
+
const bestFor = intelligence[expected]?.best_for;
|
|
452
|
+
const bestForSuffix = bestFor ? ` (best for: ${bestFor})` : '';
|
|
453
|
+
const msg = `This looks like ${tier} work — use ${expected}${bestForSuffix} instead of ${currentModel || 'opus (inherited)'}. ${savings}`;
|
|
454
|
+
logRecommendation({
|
|
455
|
+
tier,
|
|
456
|
+
recommended: expected,
|
|
457
|
+
actual: currentModel,
|
|
458
|
+
promptHash,
|
|
459
|
+
followed: false,
|
|
460
|
+
profile: profileName,
|
|
461
|
+
});
|
|
462
|
+
process.stdout.write(JSON.stringify({ systemMessage: prependWarnings(msg) }));
|
|
463
|
+
}
|
|
464
|
+
} catch (err) {
|
|
465
|
+
process.stdout.write(JSON.stringify({
|
|
466
|
+
systemMessage: `[Tier Enforcer] Config error: ${err?.message?.slice(0, 100) || 'unknown'}. Falling back to main-session judgment.`
|
|
467
|
+
}));
|
|
468
|
+
}
|
|
469
|
+
process.exit(0);
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* failure-detector.mjs — Detects repeated failure loops for adaptive routing.
|
|
4
|
+
*
|
|
5
|
+
* Exports:
|
|
6
|
+
* checkFailureLoop(promptHash, tier?) → { isLoop, count, weightedScore, suggestion }
|
|
7
|
+
* recordFailure(promptHash, tier, reason) → void
|
|
8
|
+
* pruneOldFailures() → { pruned, remaining }
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { createHash } from 'crypto';
|
|
12
|
+
import { readFileSync, appendFileSync, writeFileSync, renameSync, unlinkSync } from 'fs';
|
|
13
|
+
import { dirname, join } from 'path';
|
|
14
|
+
import { fileURLToPath } from 'url';
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
const LEDGER_FILE = join(__dirname, 'decision-ledger.jsonl');
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Canonical prompt hash used by all hooks for failure-loop correlation.
|
|
22
|
+
* Both enforce-tier (PreToolUse) and cost-logger (PostToolUse) must use this
|
|
23
|
+
* same function so that recorded failures can be matched during escalation.
|
|
24
|
+
*
|
|
25
|
+
* @param {object} toolInput — the raw tool_input from the hook payload
|
|
26
|
+
* @returns {string} 12-char hex hash
|
|
27
|
+
*/
|
|
28
|
+
function computePromptHash(toolInput) {
|
|
29
|
+
const text = (toolInput?.description || '') + (toolInput?.prompt || '');
|
|
30
|
+
return createHash('sha256').update(text).digest('hex').slice(0, 12);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Compute a decay weight based on failure age.
|
|
35
|
+
* 0-30 min → 1.0, 30-60 min → 0.5, 60-120 min → 0.25, >120 min → 0 (excluded by window)
|
|
36
|
+
*/
|
|
37
|
+
function decayWeight(timestampMs, now) {
|
|
38
|
+
const ageMs = now - timestampMs;
|
|
39
|
+
const ageMin = ageMs / (60 * 1000);
|
|
40
|
+
if (ageMin <= 30) return 1.0;
|
|
41
|
+
if (ageMin <= 60) return 0.5;
|
|
42
|
+
return 0.25; // 60-120 min
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function checkFailureLoop(promptHash, tier) {
|
|
46
|
+
if (!promptHash) return { isLoop: false, count: 0, weightedScore: 0, suggestion: null };
|
|
47
|
+
|
|
48
|
+
const now = Date.now();
|
|
49
|
+
const twoHoursAgo = now - 2 * 60 * 60 * 1000;
|
|
50
|
+
let count = 0;
|
|
51
|
+
let weightedScore = 0;
|
|
52
|
+
let lastTier = null;
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
const lines = readFileSync(LEDGER_FILE, 'utf8').split('\n').filter(Boolean);
|
|
56
|
+
for (const line of lines) {
|
|
57
|
+
try {
|
|
58
|
+
const entry = JSON.parse(line);
|
|
59
|
+
if (entry.prompt_hash !== promptHash) continue;
|
|
60
|
+
const entryTime = Date.parse(entry.timestamp);
|
|
61
|
+
if (entryTime < twoHoursAgo) continue;
|
|
62
|
+
if (entry.success !== false) continue;
|
|
63
|
+
// If tier is provided, only count matching tiers
|
|
64
|
+
if (tier && entry.tier && entry.tier !== tier) continue;
|
|
65
|
+
|
|
66
|
+
count++;
|
|
67
|
+
weightedScore += decayWeight(entryTime, now);
|
|
68
|
+
lastTier = entry.tier;
|
|
69
|
+
} catch {}
|
|
70
|
+
}
|
|
71
|
+
} catch {}
|
|
72
|
+
|
|
73
|
+
if (weightedScore < 2.0) return { isLoop: false, count, weightedScore, suggestion: null };
|
|
74
|
+
|
|
75
|
+
const suggestion = lastTier === 'execute'
|
|
76
|
+
? 'promote_tier'
|
|
77
|
+
: 'escalate_to_dual_brain';
|
|
78
|
+
|
|
79
|
+
return { isLoop: true, count, weightedScore, suggestion };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function recordFailure(promptHash, tier, reason) {
|
|
83
|
+
const entry = JSON.stringify({
|
|
84
|
+
type: 'failure',
|
|
85
|
+
timestamp: new Date().toISOString(),
|
|
86
|
+
prompt_hash: promptHash,
|
|
87
|
+
tier,
|
|
88
|
+
reason: reason || 'unknown',
|
|
89
|
+
success: false,
|
|
90
|
+
});
|
|
91
|
+
try {
|
|
92
|
+
appendFileSync(LEDGER_FILE, entry + '\n');
|
|
93
|
+
} catch {}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Remove failure entries older than 24 hours from the ledger.
|
|
98
|
+
* Uses atomic write (tmp file + rename) to avoid corruption.
|
|
99
|
+
*/
|
|
100
|
+
function pruneOldFailures() {
|
|
101
|
+
const twentyFourHoursAgo = Date.now() - 24 * 60 * 60 * 1000;
|
|
102
|
+
let pruned = 0;
|
|
103
|
+
let remaining = 0;
|
|
104
|
+
const kept = [];
|
|
105
|
+
|
|
106
|
+
try {
|
|
107
|
+
const lines = readFileSync(LEDGER_FILE, 'utf8').split('\n').filter(Boolean);
|
|
108
|
+
for (const line of lines) {
|
|
109
|
+
try {
|
|
110
|
+
const entry = JSON.parse(line);
|
|
111
|
+
const entryTime = Date.parse(entry.timestamp);
|
|
112
|
+
if (entry.type === 'failure' && entryTime < twentyFourHoursAgo) {
|
|
113
|
+
pruned++;
|
|
114
|
+
} else {
|
|
115
|
+
kept.push(line);
|
|
116
|
+
remaining++;
|
|
117
|
+
}
|
|
118
|
+
} catch {
|
|
119
|
+
// Keep unparseable lines to avoid data loss
|
|
120
|
+
kept.push(line);
|
|
121
|
+
remaining++;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const tmpFile = LEDGER_FILE + `.tmp.${process.pid}`;
|
|
126
|
+
writeFileSync(tmpFile, kept.length > 0 ? kept.join('\n') + '\n' : '');
|
|
127
|
+
renameSync(tmpFile, LEDGER_FILE);
|
|
128
|
+
} catch (err) {
|
|
129
|
+
if (err.code !== 'ENOENT') {
|
|
130
|
+
try { unlinkSync(LEDGER_FILE + `.tmp.${process.pid}`); } catch {}
|
|
131
|
+
}
|
|
132
|
+
return { pruned: 0, remaining: 0 };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return { pruned, remaining };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export { computePromptHash, checkFailureLoop, recordFailure, pruneOldFailures };
|