dual-brain 7.1.2 → 7.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/dual-brain.mjs +38 -28
- package/mcp-server/index.mjs +1 -1
- package/package.json +44 -4
- package/src/decide.mjs +32 -0
- package/src/index.mjs +1 -1
- package/src/profile.mjs +7 -4
- package/src/session.mjs +50 -10
- package/src/tui.mjs +10 -1
- package/hooks/agent-fleet.mjs +0 -659
- package/hooks/context-guard.mjs +0 -468
- package/hooks/dag-scheduler.mjs +0 -1249
- package/hooks/head-guard.sh +0 -41
- package/hooks/hook-dispatch.mjs +0 -254
- package/hooks/ledger-analysis.mjs +0 -337
- package/hooks/parallelism-scaler.mjs +0 -572
- package/hooks/quality-tiers.mjs +0 -642
- package/src/test.mjs +0 -1374
package/hooks/head-guard.sh
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
# head-guard.sh — DEPRECATED. Replaced by head-guard.mjs.
|
|
3
|
-
#
|
|
4
|
-
# This file is kept for reference only. It never worked correctly because it
|
|
5
|
-
# reads CLAUDE_TOOL_NAME from the environment, but Claude Code delivers tool
|
|
6
|
-
# info via stdin JSON, not environment variables.
|
|
7
|
-
#
|
|
8
|
-
# The replacement (head-guard.mjs) reads stdin JSON, detects HEAD vs subagent
|
|
9
|
-
# via `agent_id`, and returns the correct permissionDecision block format.
|
|
10
|
-
#
|
|
11
|
-
# Do not use this file. See hooks/head-guard.mjs instead.
|
|
12
|
-
|
|
13
|
-
BLOCK_MSG='[dual-brain] HEAD cannot use this tool directly. Dispatch via: dual-brain go "task description"'
|
|
14
|
-
|
|
15
|
-
# ── 1. Role check ────────────────────────────────────────────────────────────
|
|
16
|
-
# Only enforce when the session has been explicitly marked as the HEAD agent.
|
|
17
|
-
# If the env var is unset we allow everything (backward compat for non-dual-brain usage).
|
|
18
|
-
|
|
19
|
-
if [[ -z "${DUAL_BRAIN_ROLE}" ]]; then
|
|
20
|
-
exit 0
|
|
21
|
-
fi
|
|
22
|
-
|
|
23
|
-
if [[ "${DUAL_BRAIN_ROLE}" != "head" ]]; then
|
|
24
|
-
# Work-agent session — no restrictions.
|
|
25
|
-
exit 0
|
|
26
|
-
fi
|
|
27
|
-
|
|
28
|
-
# ── 2. Tool name check ───────────────────────────────────────────────────────
|
|
29
|
-
TOOL="${CLAUDE_TOOL_NAME:-}"
|
|
30
|
-
|
|
31
|
-
# Block direct file-editing tools and Bash unconditionally for HEAD.
|
|
32
|
-
# HEAD should use Read tool for reading and Agent (via dual-brain go) for all other work.
|
|
33
|
-
case "${TOOL}" in
|
|
34
|
-
Edit|Write|NotebookEdit|Bash)
|
|
35
|
-
echo "${BLOCK_MSG}" >&2
|
|
36
|
-
exit 2
|
|
37
|
-
;;
|
|
38
|
-
esac
|
|
39
|
-
|
|
40
|
-
# ── 3. Default: allow ────────────────────────────────────────────────────────
|
|
41
|
-
exit 0
|
package/hooks/hook-dispatch.mjs
DELETED
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* hook-dispatch.mjs — Single entry point for the HEAD to dispatch work.
|
|
4
|
-
*
|
|
5
|
-
* Classifies the task, checks budget, decides if a strategist is needed,
|
|
6
|
-
* then launches the appropriate agent and returns a compressed result.
|
|
7
|
-
*
|
|
8
|
-
* CLI: node hooks/hook-dispatch.mjs --task "..." [--files a.js,b.js]
|
|
9
|
-
* [--tier execute|think|search] [--force-provider claude|openai]
|
|
10
|
-
* [--dry-run] [--pipeline]
|
|
11
|
-
*
|
|
12
|
-
* Exports: dispatch
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
import { spawnSync } from 'node:child_process';
|
|
16
|
-
import { randomBytes } from 'node:crypto';
|
|
17
|
-
import { fileURLToPath } from 'node:url';
|
|
18
|
-
import { dirname } from 'node:path';
|
|
19
|
-
|
|
20
|
-
import { classifyTask, selectModelEffort } from './task-classifier.mjs';
|
|
21
|
-
import { chooseProvider, getProviderStatus } from './budget-balancer.mjs';
|
|
22
|
-
import { recordDecision, getInsights } from './decision-ledger.mjs';
|
|
23
|
-
|
|
24
|
-
let getAgentRecommendation = null;
|
|
25
|
-
try {
|
|
26
|
-
({ getAgentRecommendation } = await import('./agent-fleet.mjs'));
|
|
27
|
-
} catch {}
|
|
28
|
-
|
|
29
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
30
|
-
|
|
31
|
-
// ─── Constants ────────────────────────────────────────────────────────────────
|
|
32
|
-
|
|
33
|
-
const UNCERTAINTY_MARKERS = /\b(maybe|not sure|could be|might need|possibly|unclear|unsure|not certain)\b/i;
|
|
34
|
-
|
|
35
|
-
const TIER_DURATION = { search: '~15s', execute: '~45s', think: '~90s' };
|
|
36
|
-
|
|
37
|
-
// ─── Strategist check ─────────────────────────────────────────────────────────
|
|
38
|
-
|
|
39
|
-
function needsStrategist(taskProfile, files, description) {
|
|
40
|
-
const { risk, complexity } = taskProfile;
|
|
41
|
-
if (risk === 'critical') return { needed: true, reason: 'critical risk requires strategic review' };
|
|
42
|
-
if (complexity === 'complex' && risk === 'high') return { needed: true, reason: 'complex + high-risk task' };
|
|
43
|
-
if (UNCERTAINTY_MARKERS.test(description)) return { needed: true, reason: 'uncertainty markers detected in description' };
|
|
44
|
-
if (files.length > 5) return { needed: true, reason: `${files.length} files exceed 5-file threshold` };
|
|
45
|
-
|
|
46
|
-
// Check decision ledger failure rate for similar tasks
|
|
47
|
-
try {
|
|
48
|
-
const insights = getInsights();
|
|
49
|
-
const taskType = taskProfile.intent;
|
|
50
|
-
const patterns = insights.task_patterns?.[taskType];
|
|
51
|
-
if (patterns) {
|
|
52
|
-
for (const [, stats] of Object.entries(patterns)) {
|
|
53
|
-
if (stats.total >= 5) {
|
|
54
|
-
const failRate = 1 - (stats.success / stats.total);
|
|
55
|
-
if (failRate > 0.3) {
|
|
56
|
-
return { needed: true, reason: `${Math.round(failRate * 100)}% failure rate for ${taskType} tasks in ledger` };
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
} catch {}
|
|
62
|
-
|
|
63
|
-
return { needed: false, reason: null };
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// ─── Strategist (heuristic-only, no LLM call) ────────────────────────────────
|
|
67
|
-
|
|
68
|
-
function getStrategistWarnings(taskProfile) {
|
|
69
|
-
const warnings = [];
|
|
70
|
-
if (taskProfile.risk === 'critical') warnings.push('Critical risk — validate with dual-brain review');
|
|
71
|
-
if (taskProfile.complexity === 'complex') warnings.push('Complex task — consider decomposing');
|
|
72
|
-
if (taskProfile.intent === 'security') warnings.push('Security-sensitive — require review before merge');
|
|
73
|
-
return warnings;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// ─── Core dispatch ────────────────────────────────────────────────────────────
|
|
77
|
-
|
|
78
|
-
async function dispatch(options = {}) {
|
|
79
|
-
const {
|
|
80
|
-
task: description,
|
|
81
|
-
files = [],
|
|
82
|
-
tier: forceTier = null,
|
|
83
|
-
forceProvider = null,
|
|
84
|
-
dryRun = false,
|
|
85
|
-
pipeline = false,
|
|
86
|
-
} = options;
|
|
87
|
-
|
|
88
|
-
if (!description) throw new Error('--task is required');
|
|
89
|
-
|
|
90
|
-
const taskId = randomBytes(4).toString('hex');
|
|
91
|
-
|
|
92
|
-
// 1. Classify
|
|
93
|
-
const taskProfile = classifyTask(description, { files });
|
|
94
|
-
const tier = forceTier || (
|
|
95
|
-
taskProfile.intent === 'search' || taskProfile.intent === 'explain' ? 'search'
|
|
96
|
-
: taskProfile.intent === 'architecture' || taskProfile.intent === 'security' || taskProfile.intent === 'planning' ? 'think'
|
|
97
|
-
: 'execute'
|
|
98
|
-
);
|
|
99
|
-
|
|
100
|
-
// 2. Budget check
|
|
101
|
-
const budgetRec = chooseProvider({ tier, contextCoupling: forceProvider === 'claude' ? 'high' : 'low' });
|
|
102
|
-
const provider = forceProvider || budgetRec.provider;
|
|
103
|
-
const status = getProviderStatus();
|
|
104
|
-
const providerPressure = status[provider]?.[tier]?.effectivePressure ?? 0;
|
|
105
|
-
const modelRec = selectModelEffort(taskProfile, { budgetPressure: providerPressure });
|
|
106
|
-
const model = provider === 'claude' ? modelRec.claude.model : modelRec.openai.model;
|
|
107
|
-
const effort = provider === 'claude' ? (modelRec.claude.effort || taskProfile.effort) : modelRec.openai.effort;
|
|
108
|
-
|
|
109
|
-
// 3. Risk check (heuristic only, no LLM cost)
|
|
110
|
-
const strategistCheck = needsStrategist(taskProfile, files, description);
|
|
111
|
-
const warnings = strategistCheck.needed ? getStrategistWarnings(taskProfile) : [];
|
|
112
|
-
|
|
113
|
-
// 4. Pipeline mode
|
|
114
|
-
if (pipeline) {
|
|
115
|
-
const pipelineRec = getAgentRecommendation
|
|
116
|
-
? getAgentRecommendation(description, taskProfile.risk, taskProfile.complexity)
|
|
117
|
-
: { pipeline: [tier === 'think' ? 'planner' : 'worker'], rationale: 'direct dispatch', preset: null };
|
|
118
|
-
|
|
119
|
-
return {
|
|
120
|
-
dispatched: false,
|
|
121
|
-
dryRun: true,
|
|
122
|
-
pipeline: true,
|
|
123
|
-
taskId,
|
|
124
|
-
recommendation: taskProfile,
|
|
125
|
-
steps: pipelineRec.pipeline,
|
|
126
|
-
rationale: pipelineRec.rationale,
|
|
127
|
-
provider,
|
|
128
|
-
model,
|
|
129
|
-
strategistNeeded: strategistCheck.needed,
|
|
130
|
-
reason: pipelineRec.rationale,
|
|
131
|
-
};
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// 5. Dry-run
|
|
135
|
-
if (dryRun) {
|
|
136
|
-
return {
|
|
137
|
-
dispatched: false,
|
|
138
|
-
dryRun: true,
|
|
139
|
-
taskId,
|
|
140
|
-
recommendation: taskProfile,
|
|
141
|
-
provider,
|
|
142
|
-
model,
|
|
143
|
-
tier,
|
|
144
|
-
effort,
|
|
145
|
-
strategistNeeded: strategistCheck.needed,
|
|
146
|
-
reason: strategistCheck.needed
|
|
147
|
-
? strategistCheck.reason
|
|
148
|
-
: `${taskProfile.risk} risk, ${taskProfile.complexity} task → direct dispatch via ${provider}/${model}`,
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
// 6. Build prompt for worker agent
|
|
153
|
-
const workerPrompt = [
|
|
154
|
-
warnings.length ? `Warnings: ${warnings.join('; ')}` : null,
|
|
155
|
-
`Task: ${description}`,
|
|
156
|
-
files.length ? `Files in scope: ${files.join(', ')}` : null,
|
|
157
|
-
`Risk: ${taskProfile.risk} | Complexity: ${taskProfile.complexity} | Tier: ${tier}`,
|
|
158
|
-
`Return a concise JSON result: { "done": bool, "filesChanged": [], "notes": "..." }`,
|
|
159
|
-
].filter(Boolean).join('\n');
|
|
160
|
-
|
|
161
|
-
// 8. Dispatch to provider
|
|
162
|
-
let agentResult = null;
|
|
163
|
-
|
|
164
|
-
if (provider === 'claude') {
|
|
165
|
-
const claudeModelId = model === 'opus' ? 'claude-opus-4-5'
|
|
166
|
-
: model === 'haiku' ? 'claude-haiku-4-5'
|
|
167
|
-
: 'claude-sonnet-4-5';
|
|
168
|
-
const result = spawnSync(
|
|
169
|
-
'claude',
|
|
170
|
-
['--model', claudeModelId, '--print', '-p', workerPrompt],
|
|
171
|
-
{ encoding: 'utf8', timeout: 120_000 },
|
|
172
|
-
);
|
|
173
|
-
agentResult = result.status === 0 ? result.stdout?.trim() : `exit ${result.status}: ${result.stderr?.trim()}`;
|
|
174
|
-
} else {
|
|
175
|
-
try {
|
|
176
|
-
const { dispatchGptTask } = await import('./gpt-work-dispatcher.mjs');
|
|
177
|
-
const gptResult = await dispatchGptTask({ task: workerPrompt, model, tier, files });
|
|
178
|
-
agentResult = typeof gptResult === 'object' ? JSON.stringify(gptResult) : String(gptResult);
|
|
179
|
-
} catch (err) {
|
|
180
|
-
agentResult = `gpt dispatch error: ${err.message}`;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
// 9. Record decision
|
|
185
|
-
const decisionId = recordDecision({
|
|
186
|
-
task_type: taskProfile.intent,
|
|
187
|
-
provider,
|
|
188
|
-
tier,
|
|
189
|
-
model,
|
|
190
|
-
effort,
|
|
191
|
-
reason: budgetRec.reason,
|
|
192
|
-
followed: true,
|
|
193
|
-
});
|
|
194
|
-
|
|
195
|
-
const estimatedDuration = TIER_DURATION[tier] || '~60s';
|
|
196
|
-
|
|
197
|
-
return {
|
|
198
|
-
dispatched: true,
|
|
199
|
-
taskId,
|
|
200
|
-
decisionId,
|
|
201
|
-
provider,
|
|
202
|
-
model,
|
|
203
|
-
tier,
|
|
204
|
-
effort,
|
|
205
|
-
warnings,
|
|
206
|
-
reason: strategistCheck.needed
|
|
207
|
-
? `${strategistCheck.reason} — dispatched with warnings via ${provider}/${model}`
|
|
208
|
-
: `${taskProfile.risk} risk, ${taskProfile.complexity} ${taskProfile.intent} → direct dispatch`,
|
|
209
|
-
estimatedDuration,
|
|
210
|
-
agentResult: agentResult?.slice(0, 500), // truncate for head context
|
|
211
|
-
};
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
// ─── CLI ──────────────────────────────────────────────────────────────────────
|
|
215
|
-
|
|
216
|
-
if (process.argv[1] && new URL(import.meta.url).pathname === process.argv[1]) {
|
|
217
|
-
const args = process.argv.slice(2);
|
|
218
|
-
|
|
219
|
-
function flag(name) {
|
|
220
|
-
const i = args.indexOf(name);
|
|
221
|
-
return i !== -1 ? (args[i + 1] ?? true) : null;
|
|
222
|
-
}
|
|
223
|
-
function flagVal(name) {
|
|
224
|
-
const explicit = args.find(a => a.startsWith(`${name}=`));
|
|
225
|
-
if (explicit) return explicit.slice(name.length + 1);
|
|
226
|
-
return flag(name);
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
const task = flagVal('--task') || args.find(a => !a.startsWith('--'));
|
|
230
|
-
const filesArg = flagVal('--files');
|
|
231
|
-
const files = filesArg ? String(filesArg).split(',').map(f => f.trim()) : [];
|
|
232
|
-
|
|
233
|
-
if (!task) {
|
|
234
|
-
console.error('Usage: node hooks/hook-dispatch.mjs --task "description" [--files a.js,b.js] [--tier execute|think|search] [--force-provider claude|openai] [--dry-run] [--pipeline]');
|
|
235
|
-
process.exit(1);
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
try {
|
|
239
|
-
const result = await dispatch({
|
|
240
|
-
task,
|
|
241
|
-
files,
|
|
242
|
-
tier: flagVal('--tier') || null,
|
|
243
|
-
forceProvider: flagVal('--force-provider') || null,
|
|
244
|
-
dryRun: args.includes('--dry-run'),
|
|
245
|
-
pipeline: args.includes('--pipeline'),
|
|
246
|
-
});
|
|
247
|
-
console.log(JSON.stringify(result, null, 2));
|
|
248
|
-
} catch (err) {
|
|
249
|
-
console.error('dispatch error:', err.message);
|
|
250
|
-
process.exit(1);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
export { dispatch };
|
|
@@ -1,337 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* ledger-analysis.mjs — Analyze the decision ledger to improve routing over time.
|
|
4
|
-
*
|
|
5
|
-
* Reads decision-ledger.jsonl, detects patterns, and emits routing weight
|
|
6
|
-
* adjustments that the task-classifier can consume.
|
|
7
|
-
*
|
|
8
|
-
* CLI:
|
|
9
|
-
* node hooks/ledger-analysis.mjs # full analysis + write weights
|
|
10
|
-
* node hooks/ledger-analysis.mjs --summary # one-paragraph summary
|
|
11
|
-
* node hooks/ledger-analysis.mjs --since 7d # only last N days
|
|
12
|
-
* node hooks/ledger-analysis.mjs --dry-run # analyze but don't write
|
|
13
|
-
*/
|
|
14
|
-
|
|
15
|
-
import { existsSync, readFileSync, mkdirSync, writeFileSync } from 'fs';
|
|
16
|
-
import { dirname, join } from 'path';
|
|
17
|
-
import { fileURLToPath } from 'url';
|
|
18
|
-
|
|
19
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
20
|
-
const LEDGER_FILE = join(__dirname, 'decision-ledger.jsonl');
|
|
21
|
-
const WEIGHTS_FILE = join(__dirname, '..', '.dualbrain', 'routing-weights.json');
|
|
22
|
-
const MIN_SAMPLES = 10;
|
|
23
|
-
const FAIL_THRESHOLD = 0.60;
|
|
24
|
-
const IMBALANCE_THRESHOLD = 0.80;
|
|
25
|
-
|
|
26
|
-
// ─── Ledger loading ───────────────────────────────────────────────────────────
|
|
27
|
-
|
|
28
|
-
function loadLedger(sinceMs = 0) {
|
|
29
|
-
if (!existsSync(LEDGER_FILE)) return [];
|
|
30
|
-
|
|
31
|
-
let raw;
|
|
32
|
-
try { raw = readFileSync(LEDGER_FILE, 'utf8'); } catch { return []; }
|
|
33
|
-
|
|
34
|
-
const decisions = {};
|
|
35
|
-
const outcomes = {};
|
|
36
|
-
|
|
37
|
-
for (const line of raw.split('\n').filter(Boolean)) {
|
|
38
|
-
try {
|
|
39
|
-
const e = JSON.parse(line);
|
|
40
|
-
if (sinceMs && new Date(e.timestamp).getTime() < sinceMs) continue;
|
|
41
|
-
if (e.type === 'decision') decisions[e.id] = e;
|
|
42
|
-
else if (e.type === 'outcome') outcomes[e.decision_id] = e;
|
|
43
|
-
} catch {}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
return Object.values(decisions).map(d => ({
|
|
47
|
-
...d,
|
|
48
|
-
outcome: outcomes[d.id] || null,
|
|
49
|
-
}));
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
function parseSince(flag) {
|
|
53
|
-
if (!flag) return 0;
|
|
54
|
-
const m = flag.match(/^(\d+)([dh])$/);
|
|
55
|
-
if (!m) return 0;
|
|
56
|
-
const mul = m[2] === 'd' ? 86400000 : 3600000;
|
|
57
|
-
return Date.now() - parseInt(m[1]) * mul;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
// ─── Analysis functions ───────────────────────────────────────────────────────
|
|
61
|
-
|
|
62
|
-
function analyzeSuccessRates(records) {
|
|
63
|
-
// Per model+intent (task_type) combo
|
|
64
|
-
const buckets = {};
|
|
65
|
-
for (const r of records) {
|
|
66
|
-
if (!r.outcome || r.outcome.success === null) continue;
|
|
67
|
-
const intent = r.task_type || 'unknown';
|
|
68
|
-
const key = `${r.provider}/${r.model}::${intent}`;
|
|
69
|
-
if (!buckets[key]) buckets[key] = { provider: r.provider, model: r.model, intent, success: 0, total: 0 };
|
|
70
|
-
buckets[key].total++;
|
|
71
|
-
if (r.outcome.success) buckets[key].success++;
|
|
72
|
-
}
|
|
73
|
-
return Object.values(buckets).map(b => ({
|
|
74
|
-
...b,
|
|
75
|
-
rate: b.total ? b.success / b.total : null,
|
|
76
|
-
}));
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
function analyzeEffortCalibration(records) {
|
|
80
|
-
// Group by intent + effort, flag mismatches
|
|
81
|
-
const buckets = {};
|
|
82
|
-
for (const r of records) {
|
|
83
|
-
if (!r.outcome || r.outcome.success === null) continue;
|
|
84
|
-
const intent = r.task_type || 'unknown';
|
|
85
|
-
const effort = r.effort || 'medium';
|
|
86
|
-
const key = `${intent}::${effort}`;
|
|
87
|
-
if (!buckets[key]) buckets[key] = { intent, effort, success: 0, total: 0, totalRetries: 0, totalMs: 0, countMs: 0 };
|
|
88
|
-
buckets[key].total++;
|
|
89
|
-
if (r.outcome.success) buckets[key].success++;
|
|
90
|
-
buckets[key].totalRetries += r.outcome.retries || 0;
|
|
91
|
-
if (r.outcome.actual_duration_ms) {
|
|
92
|
-
buckets[key].totalMs += r.outcome.actual_duration_ms;
|
|
93
|
-
buckets[key].countMs++;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
const suggestions = [];
|
|
98
|
-
const EFFORT_ORDER = ['low', 'medium', 'high'];
|
|
99
|
-
|
|
100
|
-
for (const b of Object.values(buckets)) {
|
|
101
|
-
if (b.total < 3) continue;
|
|
102
|
-
const rate = b.success / b.total;
|
|
103
|
-
const avgRetries = b.totalRetries / b.total;
|
|
104
|
-
const avgMs = b.countMs ? b.totalMs / b.countMs : null;
|
|
105
|
-
|
|
106
|
-
// Over-prescribed: high success, no retries, fast → downgrade
|
|
107
|
-
if (b.effort === 'high' && rate >= 0.90 && avgRetries < 0.1) {
|
|
108
|
-
suggestions.push({ intent: b.intent, current: b.effort, suggested: 'medium', samples: b.total,
|
|
109
|
-
reason: `${Math.round(rate * 100)}% success with no retries at high effort, downgrade safe` });
|
|
110
|
-
} else if (b.effort === 'medium' && rate >= 0.95 && avgRetries < 0.05) {
|
|
111
|
-
suggestions.push({ intent: b.intent, current: b.effort, suggested: 'low', samples: b.total,
|
|
112
|
-
reason: `${Math.round(rate * 100)}% success with no retries at medium, downgrade safe` });
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// Under-prescribed: low success + high retries → upgrade
|
|
116
|
-
if (b.effort !== 'high' && rate < FAIL_THRESHOLD && avgRetries > 0.5) {
|
|
117
|
-
const idx = EFFORT_ORDER.indexOf(b.effort);
|
|
118
|
-
const next = EFFORT_ORDER[Math.min(idx + 1, 2)];
|
|
119
|
-
suggestions.push({ intent: b.intent, current: b.effort, suggested: next, samples: b.total,
|
|
120
|
-
reason: `${Math.round(rate * 100)}% success with avg ${avgRetries.toFixed(1)} retries, upgrade needed` });
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
return suggestions;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
function analyzeProviderBalance(records) {
|
|
128
|
-
const tiers = {};
|
|
129
|
-
for (const r of records) {
|
|
130
|
-
const tier = r.tier || 'execute';
|
|
131
|
-
if (!tiers[tier]) tiers[tier] = {};
|
|
132
|
-
const p = r.provider || 'unknown';
|
|
133
|
-
tiers[tier][p] = (tiers[tier][p] || 0) + 1;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
const suggestions = [];
|
|
137
|
-
for (const [tier, providers] of Object.entries(tiers)) {
|
|
138
|
-
const total = Object.values(providers).reduce((a, b) => a + b, 0);
|
|
139
|
-
if (total < 5) continue;
|
|
140
|
-
for (const [p, count] of Object.entries(providers)) {
|
|
141
|
-
const share = count / total;
|
|
142
|
-
if (share >= IMBALANCE_THRESHOLD) {
|
|
143
|
-
const other = Object.keys(providers).find(k => k !== p) || 'openai';
|
|
144
|
-
suggestions.push({
|
|
145
|
-
tier,
|
|
146
|
-
dominant: p,
|
|
147
|
-
dominantShare: Math.round(share * 100),
|
|
148
|
-
suggestion: `route more ${tier} tasks to ${other}`,
|
|
149
|
-
reason: `${p} handling ${Math.round(share * 100)}% of ${tier}, ${other} ${tier} tier underused`,
|
|
150
|
-
});
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
return suggestions;
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
function analyzeCostEfficiency(records) {
|
|
159
|
-
// Best outcome-per-token by model+intent
|
|
160
|
-
const buckets = {};
|
|
161
|
-
for (const r of records) {
|
|
162
|
-
if (!r.outcome || r.outcome.success === null) continue;
|
|
163
|
-
const tokens = (r.outcome.actual_input_tokens || 0) + (r.outcome.actual_output_tokens || 0);
|
|
164
|
-
if (!tokens) continue;
|
|
165
|
-
const key = `${r.provider}/${r.model}::${r.task_type || 'unknown'}`;
|
|
166
|
-
if (!buckets[key]) buckets[key] = { provider: r.provider, model: r.model, intent: r.task_type || 'unknown',
|
|
167
|
-
totalTokens: 0, success: 0, total: 0 };
|
|
168
|
-
buckets[key].totalTokens += tokens;
|
|
169
|
-
buckets[key].total++;
|
|
170
|
-
if (r.outcome.success) buckets[key].success++;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
return Object.values(buckets)
|
|
174
|
-
.filter(b => b.total >= 3)
|
|
175
|
-
.map(b => ({
|
|
176
|
-
...b,
|
|
177
|
-
rate: b.success / b.total,
|
|
178
|
-
avgTokens: Math.round(b.totalTokens / b.total),
|
|
179
|
-
// Simple efficiency score: success_rate / normalized_tokens
|
|
180
|
-
efficiency: b.total ? (b.success / b.total) / (b.totalTokens / b.total / 1000) : 0,
|
|
181
|
-
}))
|
|
182
|
-
.sort((a, b) => b.efficiency - a.efficiency);
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
// ─── Recommendation generator ─────────────────────────────────────────────────
|
|
186
|
-
|
|
187
|
-
function generateRecommendations(records) {
|
|
188
|
-
const recommendations = [];
|
|
189
|
-
const warnings = [];
|
|
190
|
-
|
|
191
|
-
// --- Model preference: find best model per intent ---
|
|
192
|
-
const successRates = analyzeSuccessRates(records);
|
|
193
|
-
const intentBuckets = {};
|
|
194
|
-
for (const s of successRates) {
|
|
195
|
-
if (!intentBuckets[s.intent]) intentBuckets[s.intent] = [];
|
|
196
|
-
intentBuckets[s.intent].push(s);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
for (const [intent, combos] of Object.entries(intentBuckets)) {
|
|
200
|
-
const qualified = combos.filter(c => c.total >= 3 && c.rate !== null);
|
|
201
|
-
if (qualified.length < 2) continue;
|
|
202
|
-
qualified.sort((a, b) => b.rate - a.rate);
|
|
203
|
-
const best = qualified[0];
|
|
204
|
-
const worst = qualified[qualified.length - 1];
|
|
205
|
-
|
|
206
|
-
// Flag warnings for consistently failing combos
|
|
207
|
-
for (const c of combos) {
|
|
208
|
-
if (c.total >= 5 && c.rate !== null && c.rate < FAIL_THRESHOLD) {
|
|
209
|
-
warnings.push({ type: 'high_failure_rate', model: c.model, intent: c.intent,
|
|
210
|
-
rate: Math.round(c.rate * 100) / 100, samples: c.total });
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
// Recommend best if clearly better
|
|
215
|
-
if (best.rate - worst.rate >= 0.15 && best.total >= 3) {
|
|
216
|
-
recommendations.push({
|
|
217
|
-
type: 'model_preference',
|
|
218
|
-
intent,
|
|
219
|
-
provider: best.provider,
|
|
220
|
-
model: best.model,
|
|
221
|
-
confidence: Math.round(best.rate * 100) / 100,
|
|
222
|
-
reason: `${Math.round(best.rate * 100)}% success rate vs ${Math.round(worst.rate * 100)}% for ${worst.provider}/${worst.model}`,
|
|
223
|
-
});
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
// --- Effort calibration ---
|
|
228
|
-
const effortSuggestions = analyzeEffortCalibration(records);
|
|
229
|
-
for (const s of effortSuggestions) {
|
|
230
|
-
recommendations.push({
|
|
231
|
-
type: 'effort_adjustment',
|
|
232
|
-
intent: s.intent,
|
|
233
|
-
current: s.current,
|
|
234
|
-
suggested: s.suggested,
|
|
235
|
-
reason: s.reason,
|
|
236
|
-
});
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
// --- Provider balance ---
|
|
240
|
-
const balanceSuggestions = analyzeProviderBalance(records);
|
|
241
|
-
for (const s of balanceSuggestions) {
|
|
242
|
-
recommendations.push({
|
|
243
|
-
type: 'provider_balance',
|
|
244
|
-
suggestion: s.suggestion,
|
|
245
|
-
reason: s.reason,
|
|
246
|
-
});
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
return { recommendations, warnings };
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
// ─── Summary paragraph ────────────────────────────────────────────────────────
|
|
253
|
-
|
|
254
|
-
function buildSummary(records, recommendations, warnings) {
|
|
255
|
-
const total = records.length;
|
|
256
|
-
const withOutcome = records.filter(r => r.outcome).length;
|
|
257
|
-
const successes = records.filter(r => r.outcome?.success).length;
|
|
258
|
-
const rate = withOutcome ? Math.round((successes / withOutcome) * 100) : 0;
|
|
259
|
-
|
|
260
|
-
const providerCounts = {};
|
|
261
|
-
for (const r of records) providerCounts[r.provider] = (providerCounts[r.provider] || 0) + 1;
|
|
262
|
-
const providerSummary = Object.entries(providerCounts)
|
|
263
|
-
.map(([p, n]) => `${p} (${Math.round((n / total) * 100)}%)`)
|
|
264
|
-
.join(', ');
|
|
265
|
-
|
|
266
|
-
const warnCount = warnings.length;
|
|
267
|
-
const recCount = recommendations.length;
|
|
268
|
-
|
|
269
|
-
return `Analyzed ${total} decisions (${withOutcome} with outcomes, ${rate}% success). ` +
|
|
270
|
-
`Provider split: ${providerSummary || 'n/a'}. ` +
|
|
271
|
-
`Generated ${recCount} routing recommendation${recCount !== 1 ? 's' : ''} and ` +
|
|
272
|
-
`${warnCount} warning${warnCount !== 1 ? 's' : ''}. ` +
|
|
273
|
-
(warnings.length ? `High-failure combos: ${warnings.map(w => `${w.model}/${w.intent} (${Math.round(w.rate * 100)}%)`).join(', ')}.` : 'No critical failure patterns detected.');
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// ─── Main ─────────────────────────────────────────────────────────────────────
|
|
277
|
-
|
|
278
|
-
function run(opts = {}) {
|
|
279
|
-
const sinceMs = parseSince(opts.since);
|
|
280
|
-
const records = loadLedger(sinceMs);
|
|
281
|
-
|
|
282
|
-
if (records.length < MIN_SAMPLES) {
|
|
283
|
-
const msg = { status: 'insufficient_data', samples: records.length, required: MIN_SAMPLES };
|
|
284
|
-
if (opts.summary) {
|
|
285
|
-
console.log(`Insufficient data: only ${records.length} entries (need ${MIN_SAMPLES}).`);
|
|
286
|
-
} else {
|
|
287
|
-
console.log(JSON.stringify(msg, null, 2));
|
|
288
|
-
}
|
|
289
|
-
return;
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
const { recommendations, warnings } = generateRecommendations(records);
|
|
293
|
-
const costEfficiency = analyzeCostEfficiency(records);
|
|
294
|
-
|
|
295
|
-
const output = {
|
|
296
|
-
generatedAt: new Date().toISOString(),
|
|
297
|
-
sampleSize: records.length,
|
|
298
|
-
sinceFilter: opts.since || null,
|
|
299
|
-
recommendations,
|
|
300
|
-
warnings,
|
|
301
|
-
costEfficiency: costEfficiency.slice(0, 5), // top 5 efficient combos
|
|
302
|
-
};
|
|
303
|
-
|
|
304
|
-
if (opts.summary) {
|
|
305
|
-
console.log(buildSummary(records, recommendations, warnings));
|
|
306
|
-
return;
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
console.log(JSON.stringify(output, null, 2));
|
|
310
|
-
|
|
311
|
-
if (!opts.dryRun) {
|
|
312
|
-
try {
|
|
313
|
-
mkdirSync(dirname(WEIGHTS_FILE), { recursive: true });
|
|
314
|
-
writeFileSync(WEIGHTS_FILE, JSON.stringify(output, null, 2));
|
|
315
|
-
console.error(`\nWrote routing weights → ${WEIGHTS_FILE}`);
|
|
316
|
-
} catch (e) {
|
|
317
|
-
console.error(`\nFailed to write weights: ${e.message}`);
|
|
318
|
-
}
|
|
319
|
-
} else {
|
|
320
|
-
console.error('\n[dry-run] Weights not written.');
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
// ─── CLI ──────────────────────────────────────────────────────────────────────
|
|
325
|
-
|
|
326
|
-
if (process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1]) {
|
|
327
|
-
const args = process.argv.slice(2);
|
|
328
|
-
const idx = (flag) => args.indexOf(flag);
|
|
329
|
-
|
|
330
|
-
const opts = {
|
|
331
|
-
summary: args.includes('--summary'),
|
|
332
|
-
dryRun: args.includes('--dry-run'),
|
|
333
|
-
since: idx('--since') !== -1 ? args[idx('--since') + 1] : null,
|
|
334
|
-
};
|
|
335
|
-
|
|
336
|
-
run(opts);
|
|
337
|
-
}
|