@kinqs/brainrouter-cli 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +109 -0
- package/README.md +185 -0
- package/dist/agent/agent.d.ts +765 -0
- package/dist/agent/agent.js +1977 -0
- package/dist/cli/cliPrompt.d.ts +15 -0
- package/dist/cli/cliPrompt.js +62 -0
- package/dist/cli/commands/_context.d.ts +53 -0
- package/dist/cli/commands/_context.js +14 -0
- package/dist/cli/commands/_helpers.d.ts +45 -0
- package/dist/cli/commands/_helpers.js +140 -0
- package/dist/cli/commands/guard.d.ts +6 -0
- package/dist/cli/commands/guard.js +292 -0
- package/dist/cli/commands/memory.d.ts +12 -0
- package/dist/cli/commands/memory.js +263 -0
- package/dist/cli/commands/obs.d.ts +6 -0
- package/dist/cli/commands/obs.js +208 -0
- package/dist/cli/commands/orchestration.d.ts +6 -0
- package/dist/cli/commands/orchestration.js +218 -0
- package/dist/cli/commands/session.d.ts +6 -0
- package/dist/cli/commands/session.js +191 -0
- package/dist/cli/commands/ui.d.ts +6 -0
- package/dist/cli/commands/ui.js +477 -0
- package/dist/cli/commands/workflow.d.ts +6 -0
- package/dist/cli/commands/workflow.js +691 -0
- package/dist/cli/repl.d.ts +12 -0
- package/dist/cli/repl.js +894 -0
- package/dist/config/config.d.ts +22 -0
- package/dist/config/config.js +105 -0
- package/dist/config/workspace.d.ts +7 -0
- package/dist/config/workspace.js +62 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +610 -0
- package/dist/memory/briefing.d.ts +46 -0
- package/dist/memory/briefing.js +152 -0
- package/dist/memory/consolidation.d.ts +60 -0
- package/dist/memory/consolidation.js +208 -0
- package/dist/memory/formatters.d.ts +38 -0
- package/dist/memory/formatters.js +102 -0
- package/dist/memory/mentions.d.ts +10 -0
- package/dist/memory/mentions.js +72 -0
- package/dist/orchestration/orchestrator.d.ts +36 -0
- package/dist/orchestration/orchestrator.js +71 -0
- package/dist/orchestration/roles.d.ts +11 -0
- package/dist/orchestration/roles.js +117 -0
- package/dist/orchestration/tools.d.ts +244 -0
- package/dist/orchestration/tools.js +528 -0
- package/dist/prompt/breadthHint.d.ts +48 -0
- package/dist/prompt/breadthHint.js +93 -0
- package/dist/prompt/compactor.d.ts +31 -0
- package/dist/prompt/compactor.js +112 -0
- package/dist/prompt/initAgentMd.d.ts +13 -0
- package/dist/prompt/initAgentMd.js +194 -0
- package/dist/prompt/skillRunner.d.ts +34 -0
- package/dist/prompt/skillRunner.js +146 -0
- package/dist/prompt/systemPrompt.d.ts +10 -0
- package/dist/prompt/systemPrompt.js +171 -0
- package/dist/runtime/clipboard.d.ts +17 -0
- package/dist/runtime/clipboard.js +52 -0
- package/dist/runtime/llmSemaphore.d.ts +30 -0
- package/dist/runtime/llmSemaphore.js +67 -0
- package/dist/runtime/loopRunner.d.ts +25 -0
- package/dist/runtime/loopRunner.js +79 -0
- package/dist/runtime/mcpClient.d.ts +156 -0
- package/dist/runtime/mcpClient.js +234 -0
- package/dist/runtime/mcpUtils.d.ts +36 -0
- package/dist/runtime/mcpUtils.js +64 -0
- package/dist/runtime/sandbox.d.ts +48 -0
- package/dist/runtime/sandbox.js +156 -0
- package/dist/runtime/tracing.d.ts +25 -0
- package/dist/runtime/tracing.js +91 -0
- package/dist/state/cliState.d.ts +59 -0
- package/dist/state/cliState.js +311 -0
- package/dist/state/goalStore.d.ts +174 -0
- package/dist/state/goalStore.js +410 -0
- package/dist/state/hookifyStore.d.ts +80 -0
- package/dist/state/hookifyStore.js +237 -0
- package/dist/state/hooksStore.d.ts +42 -0
- package/dist/state/hooksStore.js +71 -0
- package/dist/state/preferencesStore.d.ts +41 -0
- package/dist/state/preferencesStore.js +25 -0
- package/dist/state/sessionStore.d.ts +42 -0
- package/dist/state/sessionStore.js +193 -0
- package/dist/state/taskStore.d.ts +23 -0
- package/dist/state/taskStore.js +80 -0
- package/dist/state/workflowArtifacts.d.ts +33 -0
- package/dist/state/workflowArtifacts.js +139 -0
- package/package.json +71 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
import { Agent } from '../agent/agent.js';
|
|
2
|
+
import { createSession, formatSessionSummary, getSession, listSessions, updateSession, } from './orchestrator.js';
|
|
3
|
+
import { buildRolePrompt, resolveRole } from './roles.js';
|
|
4
|
+
import { buildSystemPrompt, loadWorkspaceInstructionSummary } from '../prompt/systemPrompt.js';
|
|
5
|
+
import { readTranscriptEntries } from '../state/sessionStore.js';
|
|
6
|
+
import { callMcpTool, childSessionKey } from '../runtime/mcpUtils.js';
|
|
7
|
+
import { readPreferences } from '../state/preferencesStore.js';
|
|
8
|
+
// Threshold above which a child agent's final output is offloaded to the
|
|
9
|
+
// BrainRouter working-memory canvas rather than embedded directly in the
|
|
10
|
+
// parent's context. ~6k chars ≈ 1.5k tokens — enough room for short reports
|
|
11
|
+
// in-line, big enough that a 20k-char architecture analysis goes out-of-band.
|
|
12
|
+
const OFFLOAD_THRESHOLD_CHARS = 6000;
|
|
13
|
+
const OFFLOAD_PREVIEW_CHARS = 800;
|
|
14
|
+
/**
|
|
15
|
+
* Order the three access modes by power so spawn_agent can refuse to grant
|
|
16
|
+
* a child more than the parent already has.
|
|
17
|
+
*/
|
|
18
|
+
const ACCESS_RANK = { read: 0, write: 1, shell: 2 };
|
|
19
|
+
export function clampAccess(parent, requested) {
|
|
20
|
+
return ACCESS_RANK[requested] <= ACCESS_RANK[parent] ? requested : parent;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Build the parent-visible preview of an offloaded child output. The naive
|
|
24
|
+
* `slice(0, N)` form hid the conclusion when children wrote long reports;
|
|
25
|
+
* here we prefer an explicit summary section (the role overlays nudge each
|
|
26
|
+
* child to start with one) and fall back to head+tail so both the framing
|
|
27
|
+
* and the punchline survive the clamp.
|
|
28
|
+
*
|
|
29
|
+
* Exported for testability.
|
|
30
|
+
*/
|
|
31
|
+
export function extractChildPreview(output, maxChars) {
|
|
32
|
+
// 1. Pick a leading Markdown summary heading if present. The role overlays
|
|
33
|
+
// encourage children to open with one of these.
|
|
34
|
+
const HEADING_PATTERNS = [
|
|
35
|
+
/^#{1,3}\s+(headline|tl;?dr|summary|key findings?|bottom line|conclusion)[^\n]*/im,
|
|
36
|
+
];
|
|
37
|
+
for (const heading of HEADING_PATTERNS) {
|
|
38
|
+
const match = heading.exec(output);
|
|
39
|
+
if (match) {
|
|
40
|
+
const start = match.index;
|
|
41
|
+
// Section runs until the next `##` heading or end of doc.
|
|
42
|
+
const next = output.slice(start + match[0].length).search(/\n#{1,3}\s/);
|
|
43
|
+
const end = next < 0 ? output.length : start + match[0].length + next;
|
|
44
|
+
const section = output.slice(start, end).trim();
|
|
45
|
+
if (section.length <= maxChars)
|
|
46
|
+
return section;
|
|
47
|
+
return section.slice(0, maxChars - 1) + '…';
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
// 2. Otherwise show head + tail so the conclusion isn't hidden.
|
|
51
|
+
if (output.length <= maxChars)
|
|
52
|
+
return output;
|
|
53
|
+
const head = Math.floor(maxChars * 0.6);
|
|
54
|
+
const tail = maxChars - head - 6; // 6 chars for the `\n...\n` divider
|
|
55
|
+
return output.slice(0, head) + '\n…\n' + output.slice(-tail);
|
|
56
|
+
}
|
|
57
|
+
const ORCHESTRATION_TOOL_NAMES = new Set([
|
|
58
|
+
'spawn_agent',
|
|
59
|
+
'spawn_agents',
|
|
60
|
+
'list_agents',
|
|
61
|
+
'wait_agent',
|
|
62
|
+
'wait_agents',
|
|
63
|
+
'read_agent_transcript',
|
|
64
|
+
'close_agent',
|
|
65
|
+
'route_agent',
|
|
66
|
+
]);
|
|
67
|
+
/**
|
|
68
|
+
* Heuristic auto-router. Maps a free-text task to the best role based on
|
|
69
|
+
* leading verbs and intent keywords. Pure text-classification — callers can
|
|
70
|
+
* opt in via `route_agent` without first spending an LLM turn.
|
|
71
|
+
*/
|
|
72
|
+
export function inferRoleFromTask(task) {
|
|
73
|
+
const t = task.trim().toLowerCase();
|
|
74
|
+
if (/^(investigate|explore|map|survey|find|locate|inspect|audit|scan|read|look at|grep|trace)/.test(t)
|
|
75
|
+
|| /\b(where is|where does|how does|what files|which files)\b/.test(t)) {
|
|
76
|
+
return 'explorer';
|
|
77
|
+
}
|
|
78
|
+
if (/^(design|propose|architect|plan|outline|sketch|model|compare)/.test(t)
|
|
79
|
+
|| /\b(architecture|design alternatives|tradeoff|spec)\b/.test(t)) {
|
|
80
|
+
return 'architect';
|
|
81
|
+
}
|
|
82
|
+
if (/^(review|critique|evaluate|assess|grade)/.test(t)
|
|
83
|
+
|| /\b(code review|nitpick|smell|maintainability)\b/.test(t)) {
|
|
84
|
+
return 'reviewer';
|
|
85
|
+
}
|
|
86
|
+
if (/^(test|verify|run tests|check|validate)/.test(t)
|
|
87
|
+
|| /\b(typecheck|lint|build passes?|tests? pass)\b/.test(t)) {
|
|
88
|
+
return 'verifier';
|
|
89
|
+
}
|
|
90
|
+
// Default — implementation work.
|
|
91
|
+
return 'worker';
|
|
92
|
+
}
|
|
93
|
+
export function isOrchestrationToolName(name) {
|
|
94
|
+
return ORCHESTRATION_TOOL_NAMES.has(name);
|
|
95
|
+
}
|
|
96
|
+
const runningPromises = new Map();
|
|
97
|
+
export function trackedPromiseFor(id) {
|
|
98
|
+
return runningPromises.get(id);
|
|
99
|
+
}
|
|
100
|
+
export function createSpawnAgentTool() {
|
|
101
|
+
return {
|
|
102
|
+
name: 'spawn_agent',
|
|
103
|
+
description: 'Spawn a child agent with a specific role (explorer, architect, reviewer, worker, verifier) and a bounded prompt. Returns the child agent id immediately; the child runs in the background.',
|
|
104
|
+
inputSchema: {
|
|
105
|
+
type: 'object',
|
|
106
|
+
properties: {
|
|
107
|
+
role: { type: 'string', description: 'One of: explorer, architect, reviewer, worker, verifier.' },
|
|
108
|
+
prompt: { type: 'string', description: 'The bounded task prompt for the child agent.' },
|
|
109
|
+
label: { type: 'string', description: 'Optional short label for the child run.' },
|
|
110
|
+
access: { type: 'string', enum: ['read', 'write', 'shell'], description: 'Override the role default access mode. Default: role default.' },
|
|
111
|
+
wait: { type: 'boolean', description: 'If true, block until the child completes and return its final output. Default: false.' },
|
|
112
|
+
timeoutMs: { type: 'integer', description: 'Optional timeout in milliseconds when wait=true. Default 120000.' },
|
|
113
|
+
seedRecordIds: {
|
|
114
|
+
type: 'array',
|
|
115
|
+
items: { type: 'string' },
|
|
116
|
+
description: 'Optional BrainRouter memory record IDs that the parent already recalled. The child agent is told to build on these instead of re-discovering them.',
|
|
117
|
+
},
|
|
118
|
+
},
|
|
119
|
+
required: ['role', 'prompt'],
|
|
120
|
+
},
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
export function createListAgentsTool() {
|
|
124
|
+
return {
|
|
125
|
+
name: 'list_agents',
|
|
126
|
+
description: 'List all child agent sessions for the current workspace with status, role, and elapsed time.',
|
|
127
|
+
inputSchema: { type: 'object', properties: {} },
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
export function createWaitAgentTool() {
|
|
131
|
+
return {
|
|
132
|
+
name: 'wait_agent',
|
|
133
|
+
description: 'Wait for a child agent to complete. Returns final output, error, or timeout state.',
|
|
134
|
+
inputSchema: {
|
|
135
|
+
type: 'object',
|
|
136
|
+
properties: {
|
|
137
|
+
id: { type: 'string', description: 'Child agent id returned by spawn_agent.' },
|
|
138
|
+
timeoutMs: { type: 'integer', description: 'Maximum wait time in ms. Default 120000.' },
|
|
139
|
+
},
|
|
140
|
+
required: ['id'],
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
export function createReadAgentTranscriptTool() {
|
|
145
|
+
return {
|
|
146
|
+
name: 'read_agent_transcript',
|
|
147
|
+
description: 'Read recent transcript entries (default 40) of a child agent session.',
|
|
148
|
+
inputSchema: {
|
|
149
|
+
type: 'object',
|
|
150
|
+
properties: {
|
|
151
|
+
id: { type: 'string', description: 'Child agent id.' },
|
|
152
|
+
limit: { type: 'integer', description: 'Max entries to return. Default 40.' },
|
|
153
|
+
},
|
|
154
|
+
required: ['id'],
|
|
155
|
+
},
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
export function createCloseAgentTool() {
|
|
159
|
+
return {
|
|
160
|
+
name: 'close_agent',
|
|
161
|
+
description: 'Mark a child agent session closed without deleting its transcript. Use this for cleanup.',
|
|
162
|
+
inputSchema: {
|
|
163
|
+
type: 'object',
|
|
164
|
+
properties: { id: { type: 'string', description: 'Child agent id.' } },
|
|
165
|
+
required: ['id'],
|
|
166
|
+
},
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
export function createSpawnAgentsTool() {
|
|
170
|
+
return {
|
|
171
|
+
name: 'spawn_agents',
|
|
172
|
+
description: 'Spawn multiple child agents in parallel with one tool call. Returns all child ids immediately. ' +
|
|
173
|
+
'Use this for batched fan-out (e.g. 3 explorers covering different parts of the codebase) instead of N back-to-back spawn_agent calls.',
|
|
174
|
+
inputSchema: {
|
|
175
|
+
type: 'object',
|
|
176
|
+
properties: {
|
|
177
|
+
agents: {
|
|
178
|
+
type: 'array',
|
|
179
|
+
minItems: 1,
|
|
180
|
+
items: {
|
|
181
|
+
type: 'object',
|
|
182
|
+
properties: {
|
|
183
|
+
role: { type: 'string', description: 'explorer | architect | reviewer | worker | verifier (omit to auto-route from the prompt).' },
|
|
184
|
+
prompt: { type: 'string', description: 'Bounded task prompt for this child.' },
|
|
185
|
+
label: { type: 'string' },
|
|
186
|
+
access: { type: 'string', enum: ['read', 'write', 'shell'] },
|
|
187
|
+
seedRecordIds: { type: 'array', items: { type: 'string' } },
|
|
188
|
+
},
|
|
189
|
+
required: ['prompt'],
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
},
|
|
193
|
+
required: ['agents'],
|
|
194
|
+
},
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
export function createWaitAgentsTool() {
|
|
198
|
+
return {
|
|
199
|
+
name: 'wait_agents',
|
|
200
|
+
description: 'Wait for multiple child agents in parallel. Returns each child\'s final status / output / error. ' +
|
|
201
|
+
'Use after spawn_agents to drain the whole batch before synthesizing.',
|
|
202
|
+
inputSchema: {
|
|
203
|
+
type: 'object',
|
|
204
|
+
properties: {
|
|
205
|
+
ids: { type: 'array', items: { type: 'string' }, minItems: 1 },
|
|
206
|
+
timeoutMs: { type: 'integer', description: 'Maximum total wait. Default 240000.' },
|
|
207
|
+
},
|
|
208
|
+
required: ['ids'],
|
|
209
|
+
},
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
export function createRouteAgentTool() {
|
|
213
|
+
return {
|
|
214
|
+
name: 'route_agent',
|
|
215
|
+
description: 'Recommend a role (explorer/architect/reviewer/worker/verifier) for a task without spawning. ' +
|
|
216
|
+
'Useful when you want a sanity check on which role a free-text task should go to before calling spawn_agent.',
|
|
217
|
+
inputSchema: {
|
|
218
|
+
type: 'object',
|
|
219
|
+
properties: { task: { type: 'string' } },
|
|
220
|
+
required: ['task'],
|
|
221
|
+
},
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
export async function executeOrchestrationTool(name, args, ctx) {
|
|
225
|
+
switch (name) {
|
|
226
|
+
case 'spawn_agent':
|
|
227
|
+
return await handleSpawn(args, ctx);
|
|
228
|
+
case 'spawn_agents':
|
|
229
|
+
return await handleSpawnBatch(args, ctx);
|
|
230
|
+
case 'list_agents':
|
|
231
|
+
return handleList(ctx);
|
|
232
|
+
case 'wait_agent':
|
|
233
|
+
return await handleWait(args, ctx);
|
|
234
|
+
case 'wait_agents':
|
|
235
|
+
return await handleWaitBatch(args, ctx);
|
|
236
|
+
case 'read_agent_transcript':
|
|
237
|
+
return handleReadTranscript(args, ctx);
|
|
238
|
+
case 'close_agent':
|
|
239
|
+
return handleClose(args, ctx);
|
|
240
|
+
case 'route_agent':
|
|
241
|
+
return handleRoute(args);
|
|
242
|
+
default:
|
|
243
|
+
throw new Error(`Unknown orchestration tool: ${name}`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
async function handleSpawnBatch(args, ctx) {
|
|
247
|
+
const list = Array.isArray(args?.agents) ? args.agents : [];
|
|
248
|
+
if (list.length === 0)
|
|
249
|
+
throw new Error('spawn_agents requires at least one entry in `agents`.');
|
|
250
|
+
const results = [];
|
|
251
|
+
// Spawn sequentially so each gets a unique session id and createSession's
|
|
252
|
+
// write isn't racy. The CHILDREN themselves still run in parallel — handleSpawn
|
|
253
|
+
// kicks off the runTurn detached via runningPromises.set, then returns.
|
|
254
|
+
for (const entry of list) {
|
|
255
|
+
const role = entry.role ?? inferRoleFromTask(String(entry.prompt ?? ''));
|
|
256
|
+
const out = await handleSpawn({ ...entry, role }, ctx);
|
|
257
|
+
try {
|
|
258
|
+
results.push(JSON.parse(out));
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
results.push({ raw: out });
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
return JSON.stringify({ spawned: results.length, agents: results }, null, 2);
|
|
265
|
+
}
|
|
266
|
+
async function handleWaitBatch(args, ctx) {
|
|
267
|
+
const ids = Array.isArray(args?.ids) ? args.ids.map(String) : [];
|
|
268
|
+
if (ids.length === 0)
|
|
269
|
+
throw new Error('wait_agents requires a non-empty `ids` array.');
|
|
270
|
+
const timeoutMs = Number(args?.timeoutMs ?? 240_000);
|
|
271
|
+
const settled = await Promise.all(ids.map(async (id) => {
|
|
272
|
+
const single = await handleWait({ id, timeoutMs }, ctx);
|
|
273
|
+
try {
|
|
274
|
+
return JSON.parse(single);
|
|
275
|
+
}
|
|
276
|
+
catch {
|
|
277
|
+
return { id, raw: single };
|
|
278
|
+
}
|
|
279
|
+
}));
|
|
280
|
+
return JSON.stringify({ waited: settled.length, agents: settled }, null, 2);
|
|
281
|
+
}
|
|
282
|
+
function handleRoute(args) {
|
|
283
|
+
const task = String(args?.task ?? '');
|
|
284
|
+
if (!task.trim())
|
|
285
|
+
throw new Error('route_agent requires `task`.');
|
|
286
|
+
const role = inferRoleFromTask(task);
|
|
287
|
+
const rationale = explainRoute(task, role);
|
|
288
|
+
return JSON.stringify({ task: task.slice(0, 200), role, rationale }, null, 2);
|
|
289
|
+
}
|
|
290
|
+
function explainRoute(task, role) {
|
|
291
|
+
switch (role) {
|
|
292
|
+
case 'explorer': return 'Verbs like "investigate / explore / map / find" → read-only investigation child.';
|
|
293
|
+
case 'architect': return 'Verbs like "design / propose / plan / outline" → architect proposes ≥2 design alternatives.';
|
|
294
|
+
case 'reviewer': return 'Verbs like "review / critique / evaluate" → reviewer reads diff, returns severity-ordered findings.';
|
|
295
|
+
case 'verifier': return 'Verbs like "test / verify / typecheck" → verifier runs the suite and reports PASS/FAIL.';
|
|
296
|
+
default: return 'Default → worker (write access for implementation).';
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
async function handleSpawn(args, ctx) {
|
|
300
|
+
const role = resolveRole(String(args.role));
|
|
301
|
+
const prompt = String(args.prompt ?? '');
|
|
302
|
+
if (!prompt.trim())
|
|
303
|
+
throw new Error('spawn_agent requires a non-empty prompt.');
|
|
304
|
+
const requested = args.access ?? role.defaultAccess;
|
|
305
|
+
const access = clampAccess(ctx.parentAccessMode ?? 'shell', requested);
|
|
306
|
+
const record = createSession(ctx.workspaceRoot, {
|
|
307
|
+
role: role.name,
|
|
308
|
+
prompt,
|
|
309
|
+
parentSessionKey: ctx.parentSessionKey,
|
|
310
|
+
access,
|
|
311
|
+
label: typeof args.label === 'string' ? args.label : undefined,
|
|
312
|
+
});
|
|
313
|
+
const childKey = childSessionKey(ctx.parentSessionKey, record.id);
|
|
314
|
+
const seededIds = Array.isArray(args.seedRecordIds)
|
|
315
|
+
? args.seedRecordIds.filter((id) => typeof id === 'string').slice(0, 20)
|
|
316
|
+
: [];
|
|
317
|
+
const basePrompt = buildSystemPrompt({
|
|
318
|
+
workspaceRoot: ctx.workspaceRoot,
|
|
319
|
+
launchCwd: ctx.launchCwd,
|
|
320
|
+
sessionKey: childKey,
|
|
321
|
+
instructionSummary: loadWorkspaceInstructionSummary(ctx.workspaceRoot),
|
|
322
|
+
});
|
|
323
|
+
let systemPromptOverride = buildRolePrompt(role, basePrompt, '');
|
|
324
|
+
if (seededIds.length > 0) {
|
|
325
|
+
systemPromptOverride +=
|
|
326
|
+
`\n\n## Parent-recalled BrainRouter records\n` +
|
|
327
|
+
`The parent agent already recalled these memory record IDs: ${seededIds.join(', ')}. ` +
|
|
328
|
+
`Call memory_recall (or memory_search) with the same intent before doing duplicate exploration, and prefer building on these records over re-deriving them.`;
|
|
329
|
+
}
|
|
330
|
+
const childAgent = new Agent(ctx.mcpClient, ctx.llmConfig, {
|
|
331
|
+
workspaceRoot: ctx.workspaceRoot,
|
|
332
|
+
launchCwd: ctx.launchCwd,
|
|
333
|
+
sessionKey: childKey,
|
|
334
|
+
// The role overlay is already embedded inside `systemPromptOverride` via
|
|
335
|
+
// buildRolePrompt() above — passing it again as a separate field would
|
|
336
|
+
// append a second copy and waste 1.5–3k tokens per child turn.
|
|
337
|
+
roleOverlay: undefined,
|
|
338
|
+
accessMode: access,
|
|
339
|
+
silent: true,
|
|
340
|
+
// Children NEED memory: skipping the briefing makes them amnesiac and the
|
|
341
|
+
// parent LLM eventually learns inline work outperforms fan-out. With recall
|
|
342
|
+
// enabled, children join the same cognitive context as the parent.
|
|
343
|
+
enableRecall: true,
|
|
344
|
+
systemPromptOverride,
|
|
345
|
+
// Inherit the parent's OTEL trace context so spans nest under the
|
|
346
|
+
// dispatching spawn_agent tool span instead of starting a fresh tree.
|
|
347
|
+
parentTraceId: ctx.parentTraceId,
|
|
348
|
+
parentSpanId: ctx.parentSpanId,
|
|
349
|
+
});
|
|
350
|
+
if (ctx.parentAgentId)
|
|
351
|
+
childAgent.setParentAgentId(ctx.parentAgentId);
|
|
352
|
+
updateSession(ctx.workspaceRoot, record.id, { status: 'running' });
|
|
353
|
+
const promise = (async () => {
|
|
354
|
+
try {
|
|
355
|
+
const output = await childAgent.runTurn(prompt, {
|
|
356
|
+
onStatusUpdate: () => { },
|
|
357
|
+
onToolStart: () => { },
|
|
358
|
+
onToolEnd: (tool, result) => {
|
|
359
|
+
ctx.onChildToolEvent?.({
|
|
360
|
+
childId: record.id,
|
|
361
|
+
role: role.name,
|
|
362
|
+
tool,
|
|
363
|
+
ok: result.success,
|
|
364
|
+
summary: result.summary,
|
|
365
|
+
});
|
|
366
|
+
},
|
|
367
|
+
});
|
|
368
|
+
// Working-memory offload: when a child returns a sizeable payload, push
|
|
369
|
+
// the full body into the BrainRouter working canvas and keep only a
|
|
370
|
+
// pointer in the session record. This is the main context-saving win
|
|
371
|
+
// for parents synthesizing multiple child outputs.
|
|
372
|
+
//
|
|
373
|
+
// The preview the parent sees was previously `output.slice(0, 800)`,
|
|
374
|
+
// which often hid the actual conclusion — e.g. a 15k-char review
|
|
375
|
+
// report with the headline finding at the BOTTOM. Now we prefer an
|
|
376
|
+
// explicit `## Headline` / `## Summary` / `## TL;DR` section when
|
|
377
|
+
// the child wrote one (the role overlays nudge for this), and fall
|
|
378
|
+
// back to the head-and-tail slice so we capture both the framing
|
|
379
|
+
// and the conclusion.
|
|
380
|
+
let storedOutput = output;
|
|
381
|
+
let workingRef;
|
|
382
|
+
if (output && output.length >= OFFLOAD_THRESHOLD_CHARS) {
|
|
383
|
+
workingRef = await offloadChildOutput(ctx, record.id, role.name, prompt, output);
|
|
384
|
+
if (workingRef) {
|
|
385
|
+
const preview = extractChildPreview(output, OFFLOAD_PREVIEW_CHARS);
|
|
386
|
+
storedOutput =
|
|
387
|
+
`[offloaded to working memory ref=${workingRef}]\n` +
|
|
388
|
+
`Preview (${preview.length} chars of ${output.length}):\n` +
|
|
389
|
+
preview;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
updateSession(ctx.workspaceRoot, record.id, {
|
|
393
|
+
status: 'completed',
|
|
394
|
+
completedAt: new Date().toISOString(),
|
|
395
|
+
finalOutput: storedOutput,
|
|
396
|
+
usage: { ...childAgent.sessionUsage },
|
|
397
|
+
});
|
|
398
|
+
// Roll the offload savings into the parent's metrics so /tokens can
|
|
399
|
+
// report what didn't have to land back in the parent's context window.
|
|
400
|
+
if (workingRef && output.length > OFFLOAD_PREVIEW_CHARS) {
|
|
401
|
+
ctx.recordOffload?.(output.length - OFFLOAD_PREVIEW_CHARS);
|
|
402
|
+
}
|
|
403
|
+
// Tell the REPL the child finished — otherwise the user sees the child's
|
|
404
|
+
// tool calls scroll by and then silence, with no signal that it's safe
|
|
405
|
+
// to ask the parent agent to continue.
|
|
406
|
+
ctx.onChildComplete?.({
|
|
407
|
+
childId: record.id,
|
|
408
|
+
role: role.name,
|
|
409
|
+
status: 'completed',
|
|
410
|
+
preview: (storedOutput ?? '').replace(/\s+/g, ' ').slice(0, 160),
|
|
411
|
+
});
|
|
412
|
+
// Auto-review: when the user has /auto-review on and a worker just
|
|
413
|
+
// finished, queue a reviewer agent on the worker's output. This closes
|
|
414
|
+
// the "agent shipped, did it actually work" loop without the user
|
|
415
|
+
// having to remember to ask.
|
|
416
|
+
if (role.name === 'worker') {
|
|
417
|
+
const prefs = readPreferences(ctx.workspaceRoot);
|
|
418
|
+
if (prefs.autoReview) {
|
|
419
|
+
await handleSpawn({
|
|
420
|
+
role: 'reviewer',
|
|
421
|
+
prompt: `Auto-review the changes made by worker agent ${record.id}.\n\nOriginal task:\n${prompt}\n\nWorker output (or ref):\n${storedOutput}`,
|
|
422
|
+
label: `auto-review-${record.id}`,
|
|
423
|
+
access: 'read',
|
|
424
|
+
seedRecordIds: seededIds,
|
|
425
|
+
}, ctx);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
catch (err) {
|
|
430
|
+
const message = err?.message ?? String(err);
|
|
431
|
+
updateSession(ctx.workspaceRoot, record.id, {
|
|
432
|
+
status: 'failed',
|
|
433
|
+
completedAt: new Date().toISOString(),
|
|
434
|
+
error: message,
|
|
435
|
+
});
|
|
436
|
+
ctx.onChildComplete?.({
|
|
437
|
+
childId: record.id,
|
|
438
|
+
role: role.name,
|
|
439
|
+
status: 'failed',
|
|
440
|
+
error: message,
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
finally {
|
|
444
|
+
runningPromises.delete(record.id);
|
|
445
|
+
}
|
|
446
|
+
})();
|
|
447
|
+
runningPromises.set(record.id, promise);
|
|
448
|
+
if (args.wait) {
|
|
449
|
+
return await handleWait({ id: record.id, timeoutMs: args.timeoutMs ?? 120000 }, ctx);
|
|
450
|
+
}
|
|
451
|
+
return JSON.stringify({ id: record.id, role: role.name, access, status: 'running' }, null, 2);
|
|
452
|
+
}
|
|
453
|
+
function handleList(ctx) {
|
|
454
|
+
const sessions = listSessions(ctx.workspaceRoot);
|
|
455
|
+
return JSON.stringify(sessions.map(s => summarize(s)), null, 2);
|
|
456
|
+
}
|
|
457
|
+
async function handleWait(args, ctx) {
|
|
458
|
+
const id = String(args.id ?? '');
|
|
459
|
+
if (!id)
|
|
460
|
+
throw new Error('wait_agent requires an id.');
|
|
461
|
+
const timeoutMs = Number(args.timeoutMs ?? 120000);
|
|
462
|
+
const promise = runningPromises.get(id);
|
|
463
|
+
if (promise) {
|
|
464
|
+
let timedOut = false;
|
|
465
|
+
await Promise.race([
|
|
466
|
+
promise,
|
|
467
|
+
new Promise((resolve) => setTimeout(() => { timedOut = true; resolve(); }, timeoutMs)),
|
|
468
|
+
]);
|
|
469
|
+
if (timedOut) {
|
|
470
|
+
return JSON.stringify({ id, status: 'timeout' }, null, 2);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
const record = getSession(ctx.workspaceRoot, id);
|
|
474
|
+
if (!record)
|
|
475
|
+
throw new Error(`No child session with id ${id}.`);
|
|
476
|
+
return JSON.stringify(summarize(record, true), null, 2);
|
|
477
|
+
}
|
|
478
|
+
function handleReadTranscript(args, ctx) {
|
|
479
|
+
const id = String(args.id ?? '');
|
|
480
|
+
const limit = Number(args.limit ?? 40);
|
|
481
|
+
const record = getSession(ctx.workspaceRoot, id);
|
|
482
|
+
if (!record)
|
|
483
|
+
throw new Error(`No child session with id ${id}.`);
|
|
484
|
+
const childKey = childSessionKey(record.parentSessionKey, record.id);
|
|
485
|
+
const entries = readTranscriptEntries(ctx.workspaceRoot, childKey, limit);
|
|
486
|
+
return JSON.stringify({ id, entries }, null, 2);
|
|
487
|
+
}
|
|
488
|
+
function handleClose(args, ctx) {
|
|
489
|
+
const id = String(args.id ?? '');
|
|
490
|
+
const record = getSession(ctx.workspaceRoot, id);
|
|
491
|
+
if (!record)
|
|
492
|
+
throw new Error(`No child session with id ${id}.`);
|
|
493
|
+
const next = updateSession(ctx.workspaceRoot, id, { status: 'closed', completedAt: new Date().toISOString() });
|
|
494
|
+
return JSON.stringify(summarize(next, true), null, 2);
|
|
495
|
+
}
|
|
496
|
+
async function offloadChildOutput(ctx, childId, role, prompt, output) {
|
|
497
|
+
const res = await callMcpTool(ctx.mcpClient, 'memory_working_offload', {
|
|
498
|
+
sessionKey: childSessionKey(ctx.parentSessionKey, childId),
|
|
499
|
+
workspacePath: ctx.workspaceRoot,
|
|
500
|
+
payload: output,
|
|
501
|
+
title: `Child ${childId} (${role}) output`,
|
|
502
|
+
summary: prompt.slice(0, 240),
|
|
503
|
+
kind: `child-agent-${role}`,
|
|
504
|
+
});
|
|
505
|
+
if (res.isError)
|
|
506
|
+
return undefined;
|
|
507
|
+
return res.parsed?.refNodeId ?? res.parsed?.nodeId ?? res.parsed?.ref ?? undefined;
|
|
508
|
+
}
|
|
509
|
+
function summarize(record, includeOutput = false) {
|
|
510
|
+
const base = {
|
|
511
|
+
id: record.id,
|
|
512
|
+
role: record.role,
|
|
513
|
+
status: record.status,
|
|
514
|
+
access: record.access,
|
|
515
|
+
label: record.label,
|
|
516
|
+
startedAt: record.startedAt,
|
|
517
|
+
updatedAt: record.updatedAt,
|
|
518
|
+
completedAt: record.completedAt,
|
|
519
|
+
summary: formatSessionSummary(record),
|
|
520
|
+
};
|
|
521
|
+
if (includeOutput) {
|
|
522
|
+
if (record.finalOutput)
|
|
523
|
+
base.finalOutput = record.finalOutput;
|
|
524
|
+
if (record.error)
|
|
525
|
+
base.error = record.error;
|
|
526
|
+
}
|
|
527
|
+
return base;
|
|
528
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detect "breadth" intent in a user's prompt so the agent loop can inject a
|
|
3
|
+
* fan-out hint asking the LLM to default to `spawn_agents` instead of doing
|
|
4
|
+
* everything sequentially in a single thread.
|
|
5
|
+
*
|
|
6
|
+
* Heuristic only — pure text, no LLM. False positives are cheap (the worst
|
|
7
|
+
* case is the model fans out when it doesn't strictly need to, which is
|
|
8
|
+
* usually still useful). False negatives mean the user gets a single
|
|
9
|
+
* sequential turn for a task that wanted parallelism.
|
|
10
|
+
*
|
|
11
|
+
* Signals we look for:
|
|
12
|
+
* - Quantifier breadth: "everything", "all of", "as much as", "every"
|
|
13
|
+
* - Time-budget breadth: "in 1 go", "in one shot", "all at once", "at once"
|
|
14
|
+
* - Coverage breadth: "thoroughly", "comprehensively", "extensively", "deep dive"
|
|
15
|
+
* - Verb breadth: "test more", "explore all", "map everything"
|
|
16
|
+
* - Multi-tool breadth: mention of ≥3 tool names or "tools" (plural intent)
|
|
17
|
+
*/
|
|
18
|
+
export interface BreadthIntent {
|
|
19
|
+
/** Total weighted signal score. The agent prompt threshold is ~2. */
|
|
20
|
+
score: number;
|
|
21
|
+
/** Snippets that triggered (for debugging / explainability). */
|
|
22
|
+
signals: string[];
|
|
23
|
+
}
|
|
24
|
+
export declare function detectBreadthIntent(prompt: string): BreadthIntent;
|
|
25
|
+
/**
|
|
26
|
+
* Threshold above which we inject a fan-out hint into the system context.
|
|
27
|
+
*
|
|
28
|
+
* Calibration history:
|
|
29
|
+
* - Original 1.8 missed common prompts that obviously want fan-out
|
|
30
|
+
* ("test all the MCP tools" scored 1.5, just under). The dedicated
|
|
31
|
+
* verb-object-broad pattern now scores 2.0 on its own, so the threshold
|
|
32
|
+
* can stay slightly conservative without missing them.
|
|
33
|
+
* - A single weaker signal (1.5) plus any 0.7+ companion still clears.
|
|
34
|
+
* - False positives cost very little (LLM may fan out when it didn't
|
|
35
|
+
* strictly need to); false negatives mean a sequential single-thread
|
|
36
|
+
* turn that should have been parallel.
|
|
37
|
+
*/
|
|
38
|
+
export declare const BREADTH_FAN_OUT_THRESHOLD = 1.5;
|
|
39
|
+
export declare function shouldSuggestFanOut(prompt: string): {
|
|
40
|
+
suggest: boolean;
|
|
41
|
+
intent: BreadthIntent;
|
|
42
|
+
};
|
|
43
|
+
/**
|
|
44
|
+
* The system message we inject to nudge the agent toward spawn_agents. It
|
|
45
|
+
* intentionally lists concrete child labels so the model has a template to
|
|
46
|
+
* adapt rather than starting from a blank brief.
|
|
47
|
+
*/
|
|
48
|
+
export declare function buildFanOutHint(prompt: string, intent: BreadthIntent): string;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detect "breadth" intent in a user's prompt so the agent loop can inject a
|
|
3
|
+
* fan-out hint asking the LLM to default to `spawn_agents` instead of doing
|
|
4
|
+
* everything sequentially in a single thread.
|
|
5
|
+
*
|
|
6
|
+
* Heuristic only — pure text, no LLM. False positives are cheap (the worst
|
|
7
|
+
* case is the model fans out when it doesn't strictly need to, which is
|
|
8
|
+
* usually still useful). False negatives mean the user gets a single
|
|
9
|
+
* sequential turn for a task that wanted parallelism.
|
|
10
|
+
*
|
|
11
|
+
* Signals we look for:
|
|
12
|
+
* - Quantifier breadth: "everything", "all of", "as much as", "every"
|
|
13
|
+
* - Time-budget breadth: "in 1 go", "in one shot", "all at once", "at once"
|
|
14
|
+
* - Coverage breadth: "thoroughly", "comprehensively", "extensively", "deep dive"
|
|
15
|
+
* - Verb breadth: "test more", "explore all", "map everything"
|
|
16
|
+
* - Multi-tool breadth: mention of ≥3 tool names or "tools" (plural intent)
|
|
17
|
+
*/
|
|
18
|
+
const PHRASE_SIGNALS = [
|
|
19
|
+
{ pattern: /\b(every|everything|all of|each one|as many as|as much as|as much information)\b/i, weight: 1.5, label: 'quantifier-breadth' },
|
|
20
|
+
{ pattern: /\bin\s+(1|one)\s+(go|shot|turn|pass)\b/i, weight: 2.0, label: 'one-shot' },
|
|
21
|
+
{ pattern: /\b(at\s+once|all at once|in parallel|fan out|fan-out)\b/i, weight: 2.0, label: 'parallel' },
|
|
22
|
+
{ pattern: /\b(thoroughly|comprehensively|extensively|exhaustively|deep[- ]?dive|systematically|manually)\b/i, weight: 1.5, label: 'coverage' },
|
|
23
|
+
{ pattern: /\b(test\s+(more|all|every|out)|explore\s+(all|every)|map\s+(all|every)|cover\s+all)\b/i, weight: 1.5, label: 'verb-breadth' },
|
|
24
|
+
{ pattern: /\b(across|throughout|the whole|the entire)\b/i, weight: 0.7, label: 'spatial-breadth' },
|
|
25
|
+
{ pattern: /\bmultiple\s+(angles?|approaches?|directions?|files?|tools?)\b/i, weight: 1.5, label: 'multi-angle' },
|
|
26
|
+
// "as much as I could" or "as much as possible"
|
|
27
|
+
{ pattern: /\bas much as\b.*\b(possible|could|I can|you can)\b/i, weight: 1.5, label: 'max-effort' },
|
|
28
|
+
// Realistic broad prompts the original heuristic missed. The user's "test
|
|
29
|
+
// all the MCP tools" / "review every file" / "audit the whole codebase"
|
|
30
|
+
// category — each lands on its own without needing a second signal.
|
|
31
|
+
{ pattern: /\b(test|review|audit|check|verify|inspect|analyze|examine|cover)\s+(all|every|each|the\s+(whole|entire|full))\b/i, weight: 2.0, label: 'verb-object-broad' },
|
|
32
|
+
// "every single line", "every single file", "everything in the X"
|
|
33
|
+
{ pattern: /\bevery\s+(single|whole)\b/i, weight: 2.0, label: 'emphatic-every' },
|
|
34
|
+
// "make sure things work" + a broad object → effectively a fan-out request
|
|
35
|
+
{ pattern: /\bmake\s+sure\s+.*\b(works?|passes?|everything|all)\b/i, weight: 1.0, label: 'verification-blanket' },
|
|
36
|
+
// "for everything" / "for each" — usually appended to a broad noun phrase
|
|
37
|
+
{ pattern: /\bfor\s+(everything|each|every|all)\b/i, weight: 1.0, label: 'distributive' },
|
|
38
|
+
];
|
|
39
|
+
export function detectBreadthIntent(prompt) {
|
|
40
|
+
const text = (prompt ?? '').toString();
|
|
41
|
+
if (!text.trim())
|
|
42
|
+
return { score: 0, signals: [] };
|
|
43
|
+
let score = 0;
|
|
44
|
+
const signals = [];
|
|
45
|
+
for (const { pattern, weight, label } of PHRASE_SIGNALS) {
|
|
46
|
+
if (pattern.test(text)) {
|
|
47
|
+
score += weight;
|
|
48
|
+
signals.push(label);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return { score, signals };
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Threshold above which we inject a fan-out hint into the system context.
|
|
55
|
+
*
|
|
56
|
+
* Calibration history:
|
|
57
|
+
* - Original 1.8 missed common prompts that obviously want fan-out
|
|
58
|
+
* ("test all the MCP tools" scored 1.5, just under). The dedicated
|
|
59
|
+
* verb-object-broad pattern now scores 2.0 on its own, so the threshold
|
|
60
|
+
* can stay slightly conservative without missing them.
|
|
61
|
+
* - A single weaker signal (1.5) plus any 0.7+ companion still clears.
|
|
62
|
+
* - False positives cost very little (LLM may fan out when it didn't
|
|
63
|
+
* strictly need to); false negatives mean a sequential single-thread
|
|
64
|
+
* turn that should have been parallel.
|
|
65
|
+
*/
|
|
66
|
+
export const BREADTH_FAN_OUT_THRESHOLD = 1.5;
|
|
67
|
+
export function shouldSuggestFanOut(prompt) {
|
|
68
|
+
const intent = detectBreadthIntent(prompt);
|
|
69
|
+
return { suggest: intent.score >= BREADTH_FAN_OUT_THRESHOLD, intent };
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* The system message we inject to nudge the agent toward spawn_agents. It
|
|
73
|
+
* intentionally lists concrete child labels so the model has a template to
|
|
74
|
+
* adapt rather than starting from a blank brief.
|
|
75
|
+
*/
|
|
76
|
+
export function buildFanOutHint(prompt, intent) {
|
|
77
|
+
return [
|
|
78
|
+
'## Fan-out hint (auto-detected)',
|
|
79
|
+
'',
|
|
80
|
+
`The user's request looks broad — matched signals: ${intent.signals.join(', ')} (score ${intent.score.toFixed(1)}).`,
|
|
81
|
+
'Instead of doing one tool call and stopping, **default to `spawn_agents` with 3–5 parallel children** covering distinct angles, then synthesize their outputs in a final answer.',
|
|
82
|
+
'',
|
|
83
|
+
'## Recommended fan-out template',
|
|
84
|
+
'- `spawn_agents({ agents: [...] })` — pick 3-5 angles relevant to the request.',
|
|
85
|
+
'- After spawning, `wait_agents({ ids: [...] })` to drain the batch.',
|
|
86
|
+
'- Then synthesize: combine each child\'s preview/output into a single response.',
|
|
87
|
+
'',
|
|
88
|
+
'## Anti-patterns to avoid',
|
|
89
|
+
'- Do NOT call a single tool, write a paragraph, then ask "which should we test next?". The user already said to do everything — execute, do not consult.',
|
|
90
|
+
'- Do NOT serialize what can be parallelized. If two child tasks are independent, spawn them together in one `spawn_agents` call.',
|
|
91
|
+
'- Do NOT skip the synthesis step. The user wants the merged result, not a list of pending child ids.',
|
|
92
|
+
].join('\n');
|
|
93
|
+
}
|