@wrongstack/core 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-bridge-DmBiCipY.d.ts +33 -0
- package/dist/compactor-DSl2FK7a.d.ts +17 -0
- package/dist/config-DXrqb41m.d.ts +193 -0
- package/dist/{provider-txgB0Oq9.d.ts → context-u0bryklF.d.ts} +540 -472
- package/dist/coordination/index.d.ts +892 -0
- package/dist/coordination/index.js +2869 -0
- package/dist/coordination/index.js.map +1 -0
- package/dist/defaults/index.d.ts +34 -2309
- package/dist/defaults/index.js +5610 -4608
- package/dist/defaults/index.js.map +1 -1
- package/dist/events-B6Q03pTu.d.ts +290 -0
- package/dist/execution/index.d.ts +260 -0
- package/dist/execution/index.js +1625 -0
- package/dist/execution/index.js.map +1 -0
- package/dist/index.d.ts +81 -11
- package/dist/index.js +7727 -6174
- package/dist/index.js.map +1 -1
- package/dist/infrastructure/index.d.ts +10 -0
- package/dist/infrastructure/index.js +575 -0
- package/dist/infrastructure/index.js.map +1 -0
- package/dist/input-reader-E-ffP2ee.d.ts +12 -0
- package/dist/kernel/index.d.ts +15 -4
- package/dist/kernel/index.js.map +1 -1
- package/dist/logger-BH6AE0W9.d.ts +24 -0
- package/dist/logger-BMQgxvdy.d.ts +12 -0
- package/dist/mcp-servers-BA1Ofmfj.d.ts +100 -0
- package/dist/memory-CEXuo7sz.d.ts +16 -0
- package/dist/mode-CV077NjV.d.ts +27 -0
- package/dist/models/index.d.ts +60 -0
- package/dist/models/index.js +621 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models-registry-DqzwpBQy.d.ts +46 -0
- package/dist/models-registry-Y2xbog0E.d.ts +95 -0
- package/dist/multi-agent-BDfkxL5C.d.ts +351 -0
- package/dist/observability/index.d.ts +353 -0
- package/dist/observability/index.js +691 -0
- package/dist/observability/index.js.map +1 -0
- package/dist/observability-BhnVLBLS.d.ts +67 -0
- package/dist/path-resolver-CPRj4bFY.d.ts +10 -0
- package/dist/path-resolver-Crkt8wTQ.d.ts +54 -0
- package/dist/plugin-CoYYZKdn.d.ts +447 -0
- package/dist/renderer-0A2ZEtca.d.ts +158 -0
- package/dist/sdd/index.d.ts +206 -0
- package/dist/sdd/index.js +864 -0
- package/dist/sdd/index.js.map +1 -0
- package/dist/secret-scrubber-3TLUkiCV.d.ts +31 -0
- package/dist/secret-scrubber-CwYliRWd.d.ts +54 -0
- package/dist/secret-vault-DoISxaKO.d.ts +19 -0
- package/dist/security/index.d.ts +46 -0
- package/dist/security/index.js +536 -0
- package/dist/security/index.js.map +1 -0
- package/dist/selector-BRqzvugb.d.ts +51 -0
- package/dist/session-reader-C3x96CDR.d.ts +150 -0
- package/dist/skill-Bx8jxznf.d.ts +72 -0
- package/dist/storage/index.d.ts +540 -0
- package/dist/storage/index.js +1802 -0
- package/dist/storage/index.js.map +1 -0
- package/dist/{system-prompt-vAB0F54-.d.ts → system-prompt-CG9jU5-5.d.ts} +9 -1
- package/dist/task-graph-BITvWt4t.d.ts +160 -0
- package/dist/tool-executor-CYdZdtno.d.ts +97 -0
- package/dist/types/index.d.ts +26 -4
- package/dist/types/index.js +1787 -4
- package/dist/types/index.js.map +1 -1
- package/dist/utils/index.d.ts +49 -2
- package/dist/utils/index.js +100 -2
- package/dist/utils/index.js.map +1 -1
- package/package.json +34 -2
- package/dist/mode-Pjt5vMS6.d.ts +0 -815
- package/dist/session-reader-9sOTgmeC.d.ts +0 -1087
|
@@ -0,0 +1,2869 @@
|
|
|
1
|
+
import { randomUUID, randomBytes } from 'crypto';
|
|
2
|
+
import * as fsp4 from 'fs/promises';
|
|
3
|
+
import * as path4 from 'path';
|
|
4
|
+
import { EventEmitter } from 'events';
|
|
5
|
+
|
|
6
|
+
// src/coordination/director.ts
|
|
7
|
+
async function atomicWrite(targetPath, content, opts = {}) {
|
|
8
|
+
const dir = path4.dirname(targetPath);
|
|
9
|
+
await fsp4.mkdir(dir, { recursive: true });
|
|
10
|
+
const tmp = path4.join(dir, `.${path4.basename(targetPath)}.${randomBytes(6).toString("hex")}.tmp`);
|
|
11
|
+
try {
|
|
12
|
+
if (typeof content === "string") {
|
|
13
|
+
await fsp4.writeFile(tmp, content, { flag: "wx", encoding: opts.encoding ?? "utf8" });
|
|
14
|
+
} else {
|
|
15
|
+
await fsp4.writeFile(tmp, content, { flag: "wx" });
|
|
16
|
+
}
|
|
17
|
+
try {
|
|
18
|
+
const fh = await fsp4.open(tmp, "r+");
|
|
19
|
+
try {
|
|
20
|
+
await fh.sync();
|
|
21
|
+
} finally {
|
|
22
|
+
await fh.close();
|
|
23
|
+
}
|
|
24
|
+
} catch {
|
|
25
|
+
}
|
|
26
|
+
let mode;
|
|
27
|
+
try {
|
|
28
|
+
const stat3 = await fsp4.stat(targetPath);
|
|
29
|
+
mode = stat3.mode & 511;
|
|
30
|
+
} catch {
|
|
31
|
+
mode = opts.mode;
|
|
32
|
+
}
|
|
33
|
+
if (mode !== void 0) {
|
|
34
|
+
await fsp4.chmod(tmp, mode);
|
|
35
|
+
}
|
|
36
|
+
await fsp4.rename(tmp, targetPath);
|
|
37
|
+
} catch (err) {
|
|
38
|
+
try {
|
|
39
|
+
await fsp4.unlink(tmp);
|
|
40
|
+
} catch {
|
|
41
|
+
}
|
|
42
|
+
throw err;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
async function ensureDir(dir) {
|
|
46
|
+
await fsp4.mkdir(dir, { recursive: true });
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// src/storage/director-state.ts
|
|
50
|
+
var DirectorStateCheckpoint = class {
|
|
51
|
+
snapshot;
|
|
52
|
+
filePath;
|
|
53
|
+
timer = null;
|
|
54
|
+
debounceMs;
|
|
55
|
+
writing = false;
|
|
56
|
+
rewriteRequested = false;
|
|
57
|
+
constructor(filePath, init, debounceMs = 250) {
|
|
58
|
+
this.filePath = filePath;
|
|
59
|
+
this.debounceMs = debounceMs;
|
|
60
|
+
this.snapshot = {
|
|
61
|
+
version: 1,
|
|
62
|
+
directorRunId: init.directorRunId,
|
|
63
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
64
|
+
spawnCount: 0,
|
|
65
|
+
maxSpawns: init.maxSpawns,
|
|
66
|
+
spawnDepth: init.spawnDepth,
|
|
67
|
+
maxSpawnDepth: init.maxSpawnDepth,
|
|
68
|
+
subagents: [],
|
|
69
|
+
tasks: []
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
current() {
|
|
73
|
+
return this.snapshot;
|
|
74
|
+
}
|
|
75
|
+
recordSpawn(sub, spawnCount) {
|
|
76
|
+
this.snapshot = {
|
|
77
|
+
...this.snapshot,
|
|
78
|
+
spawnCount,
|
|
79
|
+
subagents: [...this.snapshot.subagents.filter((s) => s.id !== sub.id), sub]
|
|
80
|
+
};
|
|
81
|
+
this.bumpUpdatedAt();
|
|
82
|
+
this.schedule();
|
|
83
|
+
}
|
|
84
|
+
recordTaskAssigned(task) {
|
|
85
|
+
const exists = this.snapshot.tasks.some((t) => t.taskId === task.taskId);
|
|
86
|
+
this.snapshot = {
|
|
87
|
+
...this.snapshot,
|
|
88
|
+
tasks: exists ? this.snapshot.tasks.map((t) => t.taskId === task.taskId ? { ...t, ...task } : t) : [...this.snapshot.tasks, task]
|
|
89
|
+
};
|
|
90
|
+
this.bumpUpdatedAt();
|
|
91
|
+
this.schedule();
|
|
92
|
+
}
|
|
93
|
+
recordTaskStatus(taskId, patch) {
|
|
94
|
+
this.snapshot = {
|
|
95
|
+
...this.snapshot,
|
|
96
|
+
tasks: this.snapshot.tasks.map(
|
|
97
|
+
(t) => t.taskId === taskId ? { ...t, ...patch } : t
|
|
98
|
+
)
|
|
99
|
+
};
|
|
100
|
+
this.bumpUpdatedAt();
|
|
101
|
+
this.schedule();
|
|
102
|
+
}
|
|
103
|
+
setUsage(usage) {
|
|
104
|
+
this.snapshot = { ...this.snapshot, usage };
|
|
105
|
+
this.bumpUpdatedAt();
|
|
106
|
+
this.schedule();
|
|
107
|
+
}
|
|
108
|
+
/** Force a synchronous flush — used by Director.shutdown(). */
|
|
109
|
+
async flush() {
|
|
110
|
+
if (this.timer) {
|
|
111
|
+
clearTimeout(this.timer);
|
|
112
|
+
this.timer = null;
|
|
113
|
+
}
|
|
114
|
+
await this.persist();
|
|
115
|
+
}
|
|
116
|
+
bumpUpdatedAt() {
|
|
117
|
+
this.snapshot = { ...this.snapshot, updatedAt: (/* @__PURE__ */ new Date()).toISOString() };
|
|
118
|
+
}
|
|
119
|
+
schedule() {
|
|
120
|
+
if (this.timer) return;
|
|
121
|
+
this.timer = setTimeout(() => {
|
|
122
|
+
this.timer = null;
|
|
123
|
+
void this.persist();
|
|
124
|
+
}, this.debounceMs);
|
|
125
|
+
}
|
|
126
|
+
async persist() {
|
|
127
|
+
if (this.writing) {
|
|
128
|
+
this.rewriteRequested = true;
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
this.writing = true;
|
|
132
|
+
try {
|
|
133
|
+
await atomicWrite(this.filePath, JSON.stringify(this.snapshot, null, 2), {
|
|
134
|
+
mode: 384
|
|
135
|
+
});
|
|
136
|
+
} catch (err) {
|
|
137
|
+
console.warn(
|
|
138
|
+
"[director-state] checkpoint write failed:",
|
|
139
|
+
err instanceof Error ? err.message : String(err)
|
|
140
|
+
);
|
|
141
|
+
} finally {
|
|
142
|
+
this.writing = false;
|
|
143
|
+
if (this.rewriteRequested) {
|
|
144
|
+
this.rewriteRequested = false;
|
|
145
|
+
this.schedule();
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
// src/coordination/in-memory-transport.ts
|
|
152
|
+
var InMemoryBridgeTransport = class {
|
|
153
|
+
subs = /* @__PURE__ */ new Map();
|
|
154
|
+
send(msg, to) {
|
|
155
|
+
if (to === "*") {
|
|
156
|
+
for (const [id, handlers2] of this.subs) {
|
|
157
|
+
if (id === msg.from) continue;
|
|
158
|
+
for (const h of handlers2) {
|
|
159
|
+
try {
|
|
160
|
+
h(msg);
|
|
161
|
+
} catch {
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return Promise.resolve();
|
|
166
|
+
}
|
|
167
|
+
const handlers = this.subs.get(to);
|
|
168
|
+
if (handlers) {
|
|
169
|
+
for (const h of handlers) {
|
|
170
|
+
try {
|
|
171
|
+
h(msg);
|
|
172
|
+
} catch {
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return Promise.resolve();
|
|
177
|
+
}
|
|
178
|
+
subscribe(agentId, handler) {
|
|
179
|
+
if (!this.subs.has(agentId)) this.subs.set(agentId, /* @__PURE__ */ new Set());
|
|
180
|
+
this.subs.get(agentId).add(handler);
|
|
181
|
+
return () => this.subs.get(agentId)?.delete(handler);
|
|
182
|
+
}
|
|
183
|
+
close(agentId) {
|
|
184
|
+
this.subs.delete(agentId);
|
|
185
|
+
return Promise.resolve();
|
|
186
|
+
}
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
// src/coordination/agent-bridge.ts
|
|
190
|
+
var InMemoryAgentBridge = class {
|
|
191
|
+
agentId;
|
|
192
|
+
coordinatorId;
|
|
193
|
+
transport;
|
|
194
|
+
subscriptions = /* @__PURE__ */ new Set();
|
|
195
|
+
pendingRequests = /* @__PURE__ */ new Map();
|
|
196
|
+
stopped = false;
|
|
197
|
+
timeoutMs;
|
|
198
|
+
/** Guards request() so concurrent calls on the same id can't silently overwrite. */
|
|
199
|
+
inflightGuards = /* @__PURE__ */ new Set();
|
|
200
|
+
constructor(config, transport) {
|
|
201
|
+
this.agentId = config.agentId;
|
|
202
|
+
this.coordinatorId = config.coordinatorId;
|
|
203
|
+
this.transport = transport;
|
|
204
|
+
this.timeoutMs = config.timeoutMs ?? 3e4;
|
|
205
|
+
this.transport.subscribe(this.agentId, (msg) => {
|
|
206
|
+
if (msg.type === "heartbeat") return;
|
|
207
|
+
const pending = this.pendingRequests.get(msg.id);
|
|
208
|
+
if (pending) {
|
|
209
|
+
clearTimeout(pending.timer);
|
|
210
|
+
this.pendingRequests.delete(msg.id);
|
|
211
|
+
this.inflightGuards.delete(msg.id);
|
|
212
|
+
pending.resolve(msg);
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
for (const h of this.subscriptions) {
|
|
216
|
+
try {
|
|
217
|
+
h(msg);
|
|
218
|
+
} catch {
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
async send(msg) {
|
|
224
|
+
msg.timestamp = Date.now();
|
|
225
|
+
await this.transport.send(msg, msg.to ?? this.coordinatorId);
|
|
226
|
+
}
|
|
227
|
+
async broadcast(msg) {
|
|
228
|
+
msg.timestamp = Date.now();
|
|
229
|
+
msg.to = "*";
|
|
230
|
+
await this.transport.send(msg, "*");
|
|
231
|
+
}
|
|
232
|
+
subscribe(handler) {
|
|
233
|
+
this.subscriptions.add(handler);
|
|
234
|
+
return () => this.subscriptions.delete(handler);
|
|
235
|
+
}
|
|
236
|
+
async request(msg, timeoutMs) {
|
|
237
|
+
if (this.stopped) throw new Error("Bridge is stopped");
|
|
238
|
+
const timeout = timeoutMs ?? this.timeoutMs;
|
|
239
|
+
const correlationId = msg.id;
|
|
240
|
+
if (this.inflightGuards.has(correlationId)) {
|
|
241
|
+
throw new Error(
|
|
242
|
+
`Bridge request id "${correlationId}" collides with an in-flight request \u2014 caller is reusing message ids`
|
|
243
|
+
);
|
|
244
|
+
}
|
|
245
|
+
this.inflightGuards.add(correlationId);
|
|
246
|
+
return new Promise((resolve, reject) => {
|
|
247
|
+
const timer = setTimeout(() => {
|
|
248
|
+
this.inflightGuards.delete(correlationId);
|
|
249
|
+
this.pendingRequests.delete(correlationId);
|
|
250
|
+
reject(new Error(`Request ${correlationId} timed out after ${timeout}ms`));
|
|
251
|
+
}, timeout);
|
|
252
|
+
this.pendingRequests.set(correlationId, {
|
|
253
|
+
resolve,
|
|
254
|
+
reject,
|
|
255
|
+
timer
|
|
256
|
+
});
|
|
257
|
+
msg.timestamp = Date.now();
|
|
258
|
+
this.transport.send(msg, msg.to ?? this.coordinatorId).catch((e) => {
|
|
259
|
+
clearTimeout(timer);
|
|
260
|
+
this.inflightGuards.delete(correlationId);
|
|
261
|
+
this.pendingRequests.delete(correlationId);
|
|
262
|
+
reject(e);
|
|
263
|
+
});
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
async stop() {
|
|
267
|
+
this.stopped = true;
|
|
268
|
+
for (const [, p] of this.pendingRequests) {
|
|
269
|
+
clearTimeout(p.timer);
|
|
270
|
+
}
|
|
271
|
+
this.pendingRequests.clear();
|
|
272
|
+
this.inflightGuards.clear();
|
|
273
|
+
this.subscriptions.clear();
|
|
274
|
+
await this.transport.close(this.agentId);
|
|
275
|
+
}
|
|
276
|
+
};
|
|
277
|
+
function createMessage(type, from, payload, to) {
|
|
278
|
+
return {
|
|
279
|
+
id: randomUUID(),
|
|
280
|
+
type,
|
|
281
|
+
from,
|
|
282
|
+
to,
|
|
283
|
+
payload,
|
|
284
|
+
timestamp: Date.now(),
|
|
285
|
+
priority: "normal"
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// src/coordination/director-prompts.ts
|
|
290
|
+
var DEFAULT_DIRECTOR_PREAMBLE = `You are the Director of a multi-agent fleet. You orchestrate worker
|
|
291
|
+
subagents by spawning them, assigning tasks, awaiting completions, and
|
|
292
|
+
rolling up their outputs into your next decision.
|
|
293
|
+
|
|
294
|
+
Core fleet tools available to you:
|
|
295
|
+
- spawn_subagent \u2014 create a worker with a chosen provider / model / role
|
|
296
|
+
- assign_task \u2014 hand a piece of work to a specific subagent
|
|
297
|
+
- await_tasks \u2014 block until named task ids complete (parallel-safe)
|
|
298
|
+
- ask_subagent \u2014 synchronously query a running subagent via the bridge
|
|
299
|
+
- roll_up \u2014 aggregate finished tasks into a markdown/json summary
|
|
300
|
+
- terminate_subagent \u2014 abort a stuck worker (use sparingly)
|
|
301
|
+
- fleet_status \u2014 snapshot of all subagents and pending tasks
|
|
302
|
+
- fleet_usage \u2014 token + cost breakdown per subagent and total
|
|
303
|
+
|
|
304
|
+
Working rules:
|
|
305
|
+
1. Decompose first. Before spawning, decide which sub-tasks are
|
|
306
|
+
independent and can run in parallel. Sequential work doesn't need a
|
|
307
|
+
subagent \u2014 do it yourself.
|
|
308
|
+
2. Match worker to job. Cheap/fast model for triage, capable model for
|
|
309
|
+
synthesis. Different providers per sibling is allowed and encouraged.
|
|
310
|
+
3. Always pair an assign with an await. Don't fire-and-forget; you owe
|
|
311
|
+
the user a single coherent answer at the end.
|
|
312
|
+
4. Roll up before deciding. After await_tasks resolves, call roll_up so
|
|
313
|
+
the results are folded back into your context in a compact form.
|
|
314
|
+
5. Budget is real. Check fleet_usage periodically. If a subagent is
|
|
315
|
+
thrashing, terminate it rather than letting cost climb silently.
|
|
316
|
+
6. Never claim a subagent's work as your own without verifying it. If a
|
|
317
|
+
result looks wrong, ask_subagent for clarification before passing it
|
|
318
|
+
to the user.`;
|
|
319
|
+
var DEFAULT_SUBAGENT_BASELINE = `You are a subagent operating under a Director. You were spawned to handle
|
|
320
|
+
a specific slice of a larger plan \u2014 do that slice well and report back.
|
|
321
|
+
|
|
322
|
+
Bridge contract:
|
|
323
|
+
- You have a parent (the Director). You may call \`request\` on the
|
|
324
|
+
parent bridge to ask a clarifying question. Use this sparingly; the
|
|
325
|
+
parent is also working.
|
|
326
|
+
- You MAY NOT request the parent's system prompt, tool list, or other
|
|
327
|
+
subagents' context. Those are not yours to read.
|
|
328
|
+
- Your final task output is what the Director sees. Be concise,
|
|
329
|
+
structured, and self-contained \u2014 assume the Director will paste your
|
|
330
|
+
output into its own context.`;
|
|
331
|
+
function composeDirectorPrompt(parts = {}) {
|
|
332
|
+
const sections = [];
|
|
333
|
+
const preamble = parts.directorPreamble ?? DEFAULT_DIRECTOR_PREAMBLE;
|
|
334
|
+
if (preamble && preamble.trim().length > 0) sections.push(preamble.trim());
|
|
335
|
+
if (parts.rosterSummary && parts.rosterSummary.trim().length > 0) {
|
|
336
|
+
sections.push(`Available roles you can spawn:
|
|
337
|
+
${parts.rosterSummary.trim()}`);
|
|
338
|
+
}
|
|
339
|
+
if (parts.basePrompt && parts.basePrompt.trim().length > 0) {
|
|
340
|
+
sections.push(parts.basePrompt.trim());
|
|
341
|
+
}
|
|
342
|
+
return sections.join("\n\n");
|
|
343
|
+
}
|
|
344
|
+
function composeSubagentPrompt(parts = {}) {
|
|
345
|
+
const sections = [];
|
|
346
|
+
const baseline = parts.baseline ?? DEFAULT_SUBAGENT_BASELINE;
|
|
347
|
+
if (baseline && baseline.trim().length > 0) sections.push(baseline.trim());
|
|
348
|
+
if (parts.role && parts.role.trim().length > 0) {
|
|
349
|
+
sections.push(`Role:
|
|
350
|
+
${parts.role.trim()}`);
|
|
351
|
+
}
|
|
352
|
+
if (parts.task && parts.task.trim().length > 0) {
|
|
353
|
+
sections.push(`Task:
|
|
354
|
+
${parts.task.trim()}`);
|
|
355
|
+
}
|
|
356
|
+
if (parts.sharedScratchpad && parts.sharedScratchpad.trim().length > 0) {
|
|
357
|
+
sections.push(
|
|
358
|
+
`Shared notes:
|
|
359
|
+
A scratchpad shared with the rest of the fleet is mounted at \`${parts.sharedScratchpad.trim()}\`.
|
|
360
|
+
- Write your final findings as markdown files there (e.g. \`findings.md\`, \`security.md\`).
|
|
361
|
+
- Before starting, list the directory and read any sibling files relevant to your task \u2014 they may already contain context you can build on.
|
|
362
|
+
- Use stable filenames (one file per concern); overwrite instead of appending so the Director sees the latest state.`
|
|
363
|
+
);
|
|
364
|
+
}
|
|
365
|
+
if (parts.override && parts.override.trim().length > 0) {
|
|
366
|
+
sections.push(parts.override.trim());
|
|
367
|
+
}
|
|
368
|
+
return sections.join("\n\n");
|
|
369
|
+
}
|
|
370
|
+
function rosterSummaryFromConfigs(roster) {
|
|
371
|
+
const lines = [];
|
|
372
|
+
for (const [roleId, cfg] of Object.entries(roster)) {
|
|
373
|
+
const tag = cfg.provider && cfg.model ? ` (${cfg.provider}/${cfg.model})` : "";
|
|
374
|
+
const headline = cfg.prompt ? (cfg.prompt.split("\n").find((l) => l.trim().length > 0) ?? "").trim().slice(0, 80) : "";
|
|
375
|
+
const tail = headline ? ` \u2014 ${headline}` : "";
|
|
376
|
+
lines.push(`- ${roleId}: ${cfg.name}${tag}${tail}`);
|
|
377
|
+
}
|
|
378
|
+
return lines.join("\n");
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// src/coordination/fleet-bus.ts
|
|
382
|
+
var FleetBus = class {
|
|
383
|
+
byId = /* @__PURE__ */ new Map();
|
|
384
|
+
byType = /* @__PURE__ */ new Map();
|
|
385
|
+
any = /* @__PURE__ */ new Set();
|
|
386
|
+
/**
|
|
387
|
+
* Hook a subagent's EventBus into the fleet. EventBus is strongly
|
|
388
|
+
* typed and doesn't expose an `onAny` hook, so we subscribe to the
|
|
389
|
+
* canonical set of event types a subagent emits during a run. New
|
|
390
|
+
* event types added to the kernel must be added here too — but the
|
|
391
|
+
* cost is a tiny single line per type, and the explicit list keeps
|
|
392
|
+
* the wire format clear.
|
|
393
|
+
*
|
|
394
|
+
* Returns a disposer that detaches every subscription; call on
|
|
395
|
+
* subagent teardown so the listeners don't outlive the run.
|
|
396
|
+
*/
|
|
397
|
+
attach(subagentId, bus, taskId) {
|
|
398
|
+
const FORWARDED_TYPES = [
|
|
399
|
+
"tool.started",
|
|
400
|
+
"tool.executed",
|
|
401
|
+
"tool.progress",
|
|
402
|
+
"tool.confirm_needed",
|
|
403
|
+
"iteration.started",
|
|
404
|
+
"iteration.completed",
|
|
405
|
+
"provider.text_delta",
|
|
406
|
+
// Subagent extended-thinking output. Forwarded so the FleetPanel /
|
|
407
|
+
// /fleet log can surface "the planner is thinking…" instead of a
|
|
408
|
+
// silent gap between iteration.started and the first text_delta.
|
|
409
|
+
"provider.thinking_delta",
|
|
410
|
+
"provider.response",
|
|
411
|
+
"provider.retry",
|
|
412
|
+
"provider.error",
|
|
413
|
+
"session.started",
|
|
414
|
+
"session.ended",
|
|
415
|
+
"session.damaged",
|
|
416
|
+
"compaction.fired",
|
|
417
|
+
"compaction.failed",
|
|
418
|
+
"token.threshold"
|
|
419
|
+
];
|
|
420
|
+
const offs = [];
|
|
421
|
+
for (const t of FORWARDED_TYPES) {
|
|
422
|
+
offs.push(
|
|
423
|
+
bus.on(t, (payload) => {
|
|
424
|
+
this.emit({ subagentId, taskId, ts: Date.now(), type: t, payload });
|
|
425
|
+
})
|
|
426
|
+
);
|
|
427
|
+
}
|
|
428
|
+
return () => {
|
|
429
|
+
for (const off of offs) off();
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
/** Subscribe to every event from one subagent. */
|
|
433
|
+
subscribe(subagentId, handler) {
|
|
434
|
+
let set = this.byId.get(subagentId);
|
|
435
|
+
if (!set) {
|
|
436
|
+
set = /* @__PURE__ */ new Set();
|
|
437
|
+
this.byId.set(subagentId, set);
|
|
438
|
+
}
|
|
439
|
+
set.add(handler);
|
|
440
|
+
return () => {
|
|
441
|
+
set.delete(handler);
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
/** Subscribe to one event type across all subagents. */
|
|
445
|
+
filter(type, handler) {
|
|
446
|
+
let set = this.byType.get(type);
|
|
447
|
+
if (!set) {
|
|
448
|
+
set = /* @__PURE__ */ new Set();
|
|
449
|
+
this.byType.set(type, set);
|
|
450
|
+
}
|
|
451
|
+
set.add(handler);
|
|
452
|
+
return () => {
|
|
453
|
+
set.delete(handler);
|
|
454
|
+
};
|
|
455
|
+
}
|
|
456
|
+
/** Subscribe to literally everything. The fleet roll-up uses this. */
|
|
457
|
+
onAny(handler) {
|
|
458
|
+
this.any.add(handler);
|
|
459
|
+
return () => {
|
|
460
|
+
this.any.delete(handler);
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
emit(event) {
|
|
464
|
+
const byId = this.byId.get(event.subagentId);
|
|
465
|
+
if (byId)
|
|
466
|
+
for (const h of byId) {
|
|
467
|
+
try {
|
|
468
|
+
h(event);
|
|
469
|
+
} catch {
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
const byType = this.byType.get(event.type);
|
|
473
|
+
if (byType)
|
|
474
|
+
for (const h of byType) {
|
|
475
|
+
try {
|
|
476
|
+
h(event);
|
|
477
|
+
} catch {
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
for (const h of this.any) {
|
|
481
|
+
try {
|
|
482
|
+
h(event);
|
|
483
|
+
} catch {
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
};
|
|
488
|
+
var FleetUsageAggregator = class {
|
|
489
|
+
constructor(bus, priceLookup, metaLookup) {
|
|
490
|
+
this.bus = bus;
|
|
491
|
+
this.priceLookup = priceLookup;
|
|
492
|
+
this.metaLookup = metaLookup;
|
|
493
|
+
bus.filter("provider.response", (e) => this.onProviderResponse(e));
|
|
494
|
+
bus.filter("tool.executed", (e) => this.onToolExecuted(e));
|
|
495
|
+
bus.filter("iteration.started", (e) => this.onIterationStarted(e));
|
|
496
|
+
}
|
|
497
|
+
bus;
|
|
498
|
+
priceLookup;
|
|
499
|
+
metaLookup;
|
|
500
|
+
perSubagent = /* @__PURE__ */ new Map();
|
|
501
|
+
total = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0 };
|
|
502
|
+
/** Live snapshot — safe to call from a tool's execute() body. */
|
|
503
|
+
snapshot() {
|
|
504
|
+
return {
|
|
505
|
+
total: { ...this.total },
|
|
506
|
+
perSubagent: Object.fromEntries(
|
|
507
|
+
Array.from(this.perSubagent.entries()).map(([k, v]) => [k, { ...v }])
|
|
508
|
+
)
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
ensure(subagentId) {
|
|
512
|
+
let snap = this.perSubagent.get(subagentId);
|
|
513
|
+
if (!snap) {
|
|
514
|
+
const meta = this.metaLookup?.(subagentId);
|
|
515
|
+
snap = {
|
|
516
|
+
subagentId,
|
|
517
|
+
provider: meta?.provider,
|
|
518
|
+
model: meta?.model,
|
|
519
|
+
input: 0,
|
|
520
|
+
output: 0,
|
|
521
|
+
cacheRead: 0,
|
|
522
|
+
cacheWrite: 0,
|
|
523
|
+
cost: 0,
|
|
524
|
+
toolCalls: 0,
|
|
525
|
+
iterations: 0,
|
|
526
|
+
startedAt: Date.now(),
|
|
527
|
+
lastEventAt: Date.now()
|
|
528
|
+
};
|
|
529
|
+
this.perSubagent.set(subagentId, snap);
|
|
530
|
+
}
|
|
531
|
+
return snap;
|
|
532
|
+
}
|
|
533
|
+
onProviderResponse(e) {
|
|
534
|
+
const snap = this.ensure(e.subagentId);
|
|
535
|
+
const p = e.payload;
|
|
536
|
+
const usage = p?.usage;
|
|
537
|
+
if (!usage) return;
|
|
538
|
+
snap.input += usage.input ?? 0;
|
|
539
|
+
snap.output += usage.output ?? 0;
|
|
540
|
+
snap.cacheRead += usage.cacheRead ?? 0;
|
|
541
|
+
snap.cacheWrite += usage.cacheWrite ?? 0;
|
|
542
|
+
this.total.input += usage.input ?? 0;
|
|
543
|
+
this.total.output += usage.output ?? 0;
|
|
544
|
+
this.total.cacheRead += usage.cacheRead ?? 0;
|
|
545
|
+
this.total.cacheWrite += usage.cacheWrite ?? 0;
|
|
546
|
+
const price = this.priceLookup?.(e.subagentId);
|
|
547
|
+
if (price) {
|
|
548
|
+
const delta = (usage.input ?? 0) / 1e6 * (price.input ?? 0) + (usage.output ?? 0) / 1e6 * (price.output ?? 0) + (usage.cacheRead ?? 0) / 1e6 * (price.cacheRead ?? 0) + (usage.cacheWrite ?? 0) / 1e6 * (price.cacheWrite ?? 0);
|
|
549
|
+
snap.cost += delta;
|
|
550
|
+
this.total.cost += delta;
|
|
551
|
+
}
|
|
552
|
+
snap.lastEventAt = e.ts;
|
|
553
|
+
}
|
|
554
|
+
onToolExecuted(e) {
|
|
555
|
+
const snap = this.ensure(e.subagentId);
|
|
556
|
+
snap.toolCalls += 1;
|
|
557
|
+
snap.lastEventAt = e.ts;
|
|
558
|
+
}
|
|
559
|
+
onIterationStarted(e) {
|
|
560
|
+
const snap = this.ensure(e.subagentId);
|
|
561
|
+
snap.iterations += 1;
|
|
562
|
+
snap.lastEventAt = e.ts;
|
|
563
|
+
}
|
|
564
|
+
};
|
|
565
|
+
|
|
566
|
+
// src/types/errors.ts
|
|
567
|
+
var WrongStackError = class extends Error {
|
|
568
|
+
code;
|
|
569
|
+
subsystem;
|
|
570
|
+
severity;
|
|
571
|
+
recoverable;
|
|
572
|
+
context;
|
|
573
|
+
constructor(opts) {
|
|
574
|
+
super(opts.message, { cause: opts.cause });
|
|
575
|
+
this.name = "WrongStackError";
|
|
576
|
+
this.code = opts.code;
|
|
577
|
+
this.subsystem = opts.subsystem;
|
|
578
|
+
this.severity = opts.severity ?? "error";
|
|
579
|
+
this.recoverable = opts.recoverable ?? false;
|
|
580
|
+
this.context = opts.context;
|
|
581
|
+
}
|
|
582
|
+
/**
|
|
583
|
+
* Render a one-line user-facing description.
|
|
584
|
+
* Subclasses should override for domain-specific formatting.
|
|
585
|
+
*/
|
|
586
|
+
describe() {
|
|
587
|
+
const ctx = this.context ? ` ${formatContext(this.context)}` : "";
|
|
588
|
+
return `${this.code}: ${this.message}${ctx}`;
|
|
589
|
+
}
|
|
590
|
+
};
|
|
591
|
+
function formatContext(ctx) {
|
|
592
|
+
const parts = Object.entries(ctx).filter(([, v]) => v !== void 0).slice(0, 3).map(([k, v]) => `${k}=${String(v)}`);
|
|
593
|
+
return parts.length > 0 ? `[${parts.join(" ")}]` : "";
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
// src/types/provider.ts
|
|
597
|
+
var ProviderError = class extends WrongStackError {
|
|
598
|
+
status;
|
|
599
|
+
retryable;
|
|
600
|
+
providerId;
|
|
601
|
+
body;
|
|
602
|
+
constructor(message, status, retryable, providerId, opts = {}) {
|
|
603
|
+
super({
|
|
604
|
+
message,
|
|
605
|
+
code: providerStatusToCode(status, opts.body?.type),
|
|
606
|
+
subsystem: "provider",
|
|
607
|
+
severity: status >= 500 ? "error" : "warning",
|
|
608
|
+
recoverable: retryable,
|
|
609
|
+
context: { providerId, status },
|
|
610
|
+
cause: opts.cause
|
|
611
|
+
});
|
|
612
|
+
this.name = "ProviderError";
|
|
613
|
+
this.status = status;
|
|
614
|
+
this.retryable = retryable;
|
|
615
|
+
this.providerId = providerId;
|
|
616
|
+
this.body = opts.body;
|
|
617
|
+
}
|
|
618
|
+
/**
|
|
619
|
+
* Render a one-line, user-facing description. Designed for the CLI/TUI
|
|
620
|
+
* status line and the agent's retry warning. Avoids dumping raw JSON
|
|
621
|
+
* (which is what users see today when a 529 lands and the log message
|
|
622
|
+
* includes the full `{"type":"error",...}` body).
|
|
623
|
+
*
|
|
624
|
+
* Examples:
|
|
625
|
+
* "minimax-coding-plan overloaded (529): High traffic detected. Upgrade for highspeed model. [req 06534785201de9c0…]"
|
|
626
|
+
* "openai rate limited (429): Retry after 12s"
|
|
627
|
+
* "anthropic invalid request (400): messages.0.role must be one of 'user'|'assistant'"
|
|
628
|
+
* "groq HTTP 500 (server error)"
|
|
629
|
+
*/
|
|
630
|
+
describe() {
|
|
631
|
+
const kind = describeStatus(this.status, this.body?.type);
|
|
632
|
+
const head = `${this.providerId} ${kind}`;
|
|
633
|
+
const detail = this.body?.message?.trim();
|
|
634
|
+
const reqId = this.body?.requestId ? ` [req ${this.body.requestId.slice(0, 16)}${this.body.requestId.length > 16 ? "\u2026" : ""}]` : "";
|
|
635
|
+
if (detail && detail.length > 0) {
|
|
636
|
+
return `${head}: ${truncate(detail, 240)}${reqId}`;
|
|
637
|
+
}
|
|
638
|
+
return `${head}${reqId}`;
|
|
639
|
+
}
|
|
640
|
+
};
|
|
641
|
+
function describeStatus(status, type) {
|
|
642
|
+
if (status === 0) return "network error";
|
|
643
|
+
if (type === "overloaded_error" || status === 529) return `overloaded (${status})`;
|
|
644
|
+
if (type === "rate_limit_error" || status === 429) return `rate limited (${status})`;
|
|
645
|
+
if (type === "authentication_error" || status === 401) return `auth failed (${status})`;
|
|
646
|
+
if (type === "permission_error" || status === 403) return `forbidden (${status})`;
|
|
647
|
+
if (type === "not_found_error" || status === 404) return `not found (${status})`;
|
|
648
|
+
if (type === "invalid_request_error" || status === 400) return `invalid request (${status})`;
|
|
649
|
+
if (status === 408) return `timeout (${status})`;
|
|
650
|
+
if (status >= 500 && status < 600) return `HTTP ${status} (server error)`;
|
|
651
|
+
if (type) return `${type} (${status})`;
|
|
652
|
+
return `HTTP ${status}`;
|
|
653
|
+
}
|
|
654
|
+
function truncate(s, n) {
|
|
655
|
+
return s.length <= n ? s : `${s.slice(0, n - 1)}\u2026`;
|
|
656
|
+
}
|
|
657
|
+
function providerStatusToCode(status, type) {
|
|
658
|
+
if (status === 0) return "PROVIDER_NETWORK_ERROR";
|
|
659
|
+
if (type === "rate_limit_error" || status === 429) return "PROVIDER_RATE_LIMITED";
|
|
660
|
+
if (type === "authentication_error" || status === 401) return "PROVIDER_AUTH_FAILED";
|
|
661
|
+
if (type === "overloaded_error" || status === 529) return "PROVIDER_OVERLOADED";
|
|
662
|
+
if (type === "invalid_request_error" || status === 400) return "PROVIDER_INVALID_REQUEST";
|
|
663
|
+
if (status === 408) return "PROVIDER_NETWORK_ERROR";
|
|
664
|
+
if (status >= 500) return "PROVIDER_SERVER_ERROR";
|
|
665
|
+
return "PROVIDER_INVALID_REQUEST";
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
// src/coordination/subagent-budget.ts
|
|
669
|
+
var BudgetExceededError = class extends Error {
|
|
670
|
+
kind;
|
|
671
|
+
limit;
|
|
672
|
+
observed;
|
|
673
|
+
constructor(kind, limit, observed) {
|
|
674
|
+
super(`Budget exceeded: ${kind} (limit=${limit}, observed=${observed})`);
|
|
675
|
+
this.name = "BudgetExceededError";
|
|
676
|
+
this.kind = kind;
|
|
677
|
+
this.limit = limit;
|
|
678
|
+
this.observed = observed;
|
|
679
|
+
}
|
|
680
|
+
};
|
|
681
|
+
var SubagentBudget = class {
|
|
682
|
+
limits;
|
|
683
|
+
iterations = 0;
|
|
684
|
+
toolCalls = 0;
|
|
685
|
+
tokenInput = 0;
|
|
686
|
+
tokenOutput = 0;
|
|
687
|
+
costUsd = 0;
|
|
688
|
+
startTime = null;
|
|
689
|
+
constructor(limits = {}) {
|
|
690
|
+
this.limits = Object.freeze({ ...limits });
|
|
691
|
+
}
|
|
692
|
+
start() {
|
|
693
|
+
this.startTime = Date.now();
|
|
694
|
+
}
|
|
695
|
+
recordIteration() {
|
|
696
|
+
this.iterations++;
|
|
697
|
+
if (this.limits.maxIterations !== void 0 && this.iterations > this.limits.maxIterations) {
|
|
698
|
+
throw new BudgetExceededError("iterations", this.limits.maxIterations, this.iterations);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
recordToolCall() {
|
|
702
|
+
this.toolCalls++;
|
|
703
|
+
if (this.limits.maxToolCalls !== void 0 && this.toolCalls > this.limits.maxToolCalls) {
|
|
704
|
+
throw new BudgetExceededError("tool_calls", this.limits.maxToolCalls, this.toolCalls);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
recordUsage(usage, costUsd = 0) {
|
|
708
|
+
this.tokenInput += usage.input;
|
|
709
|
+
this.tokenOutput += usage.output;
|
|
710
|
+
this.costUsd += costUsd;
|
|
711
|
+
const totalTokens = this.tokenInput + this.tokenOutput;
|
|
712
|
+
if (this.limits.maxTokens !== void 0 && totalTokens > this.limits.maxTokens) {
|
|
713
|
+
throw new BudgetExceededError("tokens", this.limits.maxTokens, totalTokens);
|
|
714
|
+
}
|
|
715
|
+
if (this.limits.maxCostUsd !== void 0 && this.costUsd > this.limits.maxCostUsd) {
|
|
716
|
+
throw new BudgetExceededError("cost", this.limits.maxCostUsd, this.costUsd);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
/**
|
|
720
|
+
* Throws if the wall-clock budget is exhausted. Call this from the iteration
|
|
721
|
+
* loop so a hung tool can't keep a subagent running past its deadline.
|
|
722
|
+
*/
|
|
723
|
+
checkTimeout() {
|
|
724
|
+
if (this.startTime === null || this.limits.timeoutMs === void 0) return;
|
|
725
|
+
const elapsed = Date.now() - this.startTime;
|
|
726
|
+
if (elapsed > this.limits.timeoutMs) {
|
|
727
|
+
throw new BudgetExceededError("timeout", this.limits.timeoutMs, elapsed);
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
/** Returns true if a timeout has occurred without throwing. Useful for races. */
|
|
731
|
+
isTimedOut() {
|
|
732
|
+
if (this.startTime === null || this.limits.timeoutMs === void 0) return false;
|
|
733
|
+
return Date.now() - this.startTime > this.limits.timeoutMs;
|
|
734
|
+
}
|
|
735
|
+
usage() {
|
|
736
|
+
return {
|
|
737
|
+
iterations: this.iterations,
|
|
738
|
+
toolCalls: this.toolCalls,
|
|
739
|
+
tokens: {
|
|
740
|
+
input: this.tokenInput,
|
|
741
|
+
output: this.tokenOutput,
|
|
742
|
+
total: this.tokenInput + this.tokenOutput
|
|
743
|
+
},
|
|
744
|
+
costUsd: this.costUsd,
|
|
745
|
+
elapsedMs: this.startTime === null ? 0 : Date.now() - this.startTime
|
|
746
|
+
};
|
|
747
|
+
}
|
|
748
|
+
};
|
|
749
|
+
|
|
750
|
+
// src/coordination/multi-agent-coordinator.ts
|
|
751
|
+
var DefaultMultiAgentCoordinator = class extends EventEmitter {
|
|
752
|
+
coordinatorId;
|
|
753
|
+
config;
|
|
754
|
+
runner;
|
|
755
|
+
subagents = /* @__PURE__ */ new Map();
|
|
756
|
+
pendingTasks = [];
|
|
757
|
+
completedResults = [];
|
|
758
|
+
totalIterations = 0;
|
|
759
|
+
inFlight = 0;
|
|
760
|
+
/**
|
|
761
|
+
* Subagents currently being stopped. Set on entry to `stop()`, cleared
|
|
762
|
+
* once `recordCompletion` lands the terminal TaskResult. Used by
|
|
763
|
+
* `runDispatched` and `findIdleSubagent` to refuse mid-flight dispatch
|
|
764
|
+
* to a subagent the caller has already asked to terminate — closes the
|
|
765
|
+
* assign+terminate race where a fresh task could land on a worker that
|
|
766
|
+
* was about to be killed.
|
|
767
|
+
*/
|
|
768
|
+
terminating = /* @__PURE__ */ new Set();
|
|
769
|
+
constructor(config, options = {}) {
|
|
770
|
+
super();
|
|
771
|
+
this.coordinatorId = config.coordinatorId;
|
|
772
|
+
this.config = config;
|
|
773
|
+
this.runner = options.runner;
|
|
774
|
+
}
|
|
775
|
+
/**
|
|
776
|
+
* Replace the runner after construction. Used when the runner depends
|
|
777
|
+
* on infrastructure (e.g. FleetBus) that isn't available until after
|
|
778
|
+
* the coordinator's owning Director is built.
|
|
779
|
+
*/
|
|
780
|
+
setRunner(runner) {
|
|
781
|
+
this.runner = runner;
|
|
782
|
+
}
|
|
783
|
+
async spawn(subagent) {
|
|
784
|
+
const id = subagent.id || randomUUID();
|
|
785
|
+
if (this.subagents.has(id)) {
|
|
786
|
+
throw new Error(`Subagent id "${id}" already exists \u2014 refusing to overwrite`);
|
|
787
|
+
}
|
|
788
|
+
const context = {
|
|
789
|
+
subagentId: id,
|
|
790
|
+
tasks: [],
|
|
791
|
+
// Wired later by the caller via setSubagentBridge() once the
|
|
792
|
+
// bidirectional bridge is created. Readers must null-check / use
|
|
793
|
+
// hasParentBridge() — the type now reflects this.
|
|
794
|
+
parentBridge: null,
|
|
795
|
+
doneCondition: this.config.doneCondition,
|
|
796
|
+
maxConcurrent: this.config.maxConcurrent ?? 4
|
|
797
|
+
};
|
|
798
|
+
this.subagents.set(id, {
|
|
799
|
+
config: { ...subagent, id },
|
|
800
|
+
context,
|
|
801
|
+
status: "idle",
|
|
802
|
+
abortController: new AbortController()
|
|
803
|
+
});
|
|
804
|
+
this.emit("subagent.started", { subagent: { ...subagent, id } });
|
|
805
|
+
return { subagentId: id, agentId: id };
|
|
806
|
+
}
|
|
807
|
+
async assign(task) {
|
|
808
|
+
this.pendingTasks.push(task);
|
|
809
|
+
this.tryDispatchNext();
|
|
810
|
+
}
|
|
811
|
+
async delegate(to, msg) {
|
|
812
|
+
const subagent = this.subagents.get(to);
|
|
813
|
+
if (!subagent) throw new Error(`Subagent "${to}" not found`);
|
|
814
|
+
if (!subagent.context.parentBridge) {
|
|
815
|
+
throw new Error(`Subagent "${to}" has no parentBridge \u2014 call setSubagentBridge() first`);
|
|
816
|
+
}
|
|
817
|
+
await subagent.context.parentBridge.send(msg);
|
|
818
|
+
}
|
|
819
|
+
/**
|
|
820
|
+
* Wire up the communication bridge for a subagent. Call after spawn() once
|
|
821
|
+
* the caller has created the bidirectional connection.
|
|
822
|
+
*/
|
|
823
|
+
setSubagentBridge(subagentId, bridge) {
|
|
824
|
+
const subagent = this.subagents.get(subagentId);
|
|
825
|
+
if (!subagent) throw new Error(`Subagent "${subagentId}" not found`);
|
|
826
|
+
subagent.context.parentBridge = bridge;
|
|
827
|
+
}
|
|
828
|
+
async stop(subagentId) {
|
|
829
|
+
const subagent = this.subagents.get(subagentId);
|
|
830
|
+
if (!subagent) return;
|
|
831
|
+
this.terminating.add(subagentId);
|
|
832
|
+
subagent.abortController.abort();
|
|
833
|
+
subagent.status = "stopped";
|
|
834
|
+
subagent.currentTask = void 0;
|
|
835
|
+
subagent.context.parentBridge = null;
|
|
836
|
+
this.emit("subagent.stopped", { subagentId, reason: "stopped by coordinator" });
|
|
837
|
+
}
|
|
838
|
+
async stopAll() {
|
|
839
|
+
this.drainPendingAsAborted("Coordinator stopAll() drained the pending queue");
|
|
840
|
+
await Promise.allSettled([...this.subagents.keys()].map((id) => this.stop(id)));
|
|
841
|
+
}
|
|
842
|
+
getStatus() {
|
|
843
|
+
return {
|
|
844
|
+
coordinatorId: this.coordinatorId,
|
|
845
|
+
subagents: Array.from(this.subagents.entries()).map(([id, s]) => ({
|
|
846
|
+
id,
|
|
847
|
+
name: s.config.name,
|
|
848
|
+
status: s.status,
|
|
849
|
+
currentTask: s.currentTask
|
|
850
|
+
})),
|
|
851
|
+
pendingTasks: this.pendingTasks.length,
|
|
852
|
+
completedTasks: this.completedResults.length,
|
|
853
|
+
totalIterations: this.totalIterations,
|
|
854
|
+
done: this.isDone()
|
|
855
|
+
};
|
|
856
|
+
}
|
|
857
|
+
/** Expose snapshot of completed results — useful for callers awaiting all done. */
|
|
858
|
+
results() {
|
|
859
|
+
return this.completedResults;
|
|
860
|
+
}
|
|
861
|
+
/**
|
|
862
|
+
* Manual completion — for callers that drive subagents without a runner
|
|
863
|
+
* (e.g. external orchestrators). When a runner is configured the coordinator
|
|
864
|
+
* calls this itself.
|
|
865
|
+
*/
|
|
866
|
+
completeTask(result) {
|
|
867
|
+
this.recordCompletion(result);
|
|
868
|
+
}
|
|
869
|
+
// --- internal dispatching ---------------------------------------------
|
|
870
|
+
tryDispatchNext() {
|
|
871
|
+
while (this.canDispatch()) {
|
|
872
|
+
const subagentId = this.findIdleSubagent();
|
|
873
|
+
if (!subagentId) {
|
|
874
|
+
if (this.pendingTasks.length > 0 && !this.hasLiveSubagent()) {
|
|
875
|
+
this.drainPendingAsAborted(
|
|
876
|
+
"No live subagent available \u2014 all stopped or mid-termination"
|
|
877
|
+
);
|
|
878
|
+
}
|
|
879
|
+
return;
|
|
880
|
+
}
|
|
881
|
+
const task = this.pendingTasks.shift();
|
|
882
|
+
if (!task) return;
|
|
883
|
+
this.runDispatched(subagentId, task).catch((err) => {
|
|
884
|
+
this.recordCompletion({
|
|
885
|
+
subagentId,
|
|
886
|
+
taskId: task.id,
|
|
887
|
+
status: "failed",
|
|
888
|
+
error: classifySubagentError(err),
|
|
889
|
+
iterations: 0,
|
|
890
|
+
toolCalls: 0,
|
|
891
|
+
durationMs: 0
|
|
892
|
+
});
|
|
893
|
+
});
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
canDispatch() {
|
|
897
|
+
const max = this.config.maxConcurrent ?? 4;
|
|
898
|
+
return this.inFlight < max && this.pendingTasks.length > 0;
|
|
899
|
+
}
|
|
900
|
+
findIdleSubagent() {
|
|
901
|
+
for (const [id, s] of this.subagents) {
|
|
902
|
+
if (s.status === "idle" && !this.terminating.has(id)) return id;
|
|
903
|
+
}
|
|
904
|
+
return null;
|
|
905
|
+
}
|
|
906
|
+
/**
|
|
907
|
+
* Returns true iff at least one spawned subagent could still
|
|
908
|
+
* process a task. A "live" subagent is one that is not stopped
|
|
909
|
+
* AND not mid-termination — `running` workers count because they
|
|
910
|
+
* will eventually finish and become idle.
|
|
911
|
+
*
|
|
912
|
+
* When no subagent has ever been spawned, returns `true` so a
|
|
913
|
+
* pre-spawn `assign()` simply queues (legacy behaviour). The
|
|
914
|
+
* dead-end detection only fires after `stop()` has retired every
|
|
915
|
+
* spawned worker.
|
|
916
|
+
*
|
|
917
|
+
* Used by `tryDispatchNext` to detect a dead-end pending queue.
|
|
918
|
+
*/
|
|
919
|
+
hasLiveSubagent() {
|
|
920
|
+
if (this.subagents.size === 0) return true;
|
|
921
|
+
for (const [id, s] of this.subagents) {
|
|
922
|
+
if (s.status !== "stopped" && !this.terminating.has(id)) return true;
|
|
923
|
+
}
|
|
924
|
+
return false;
|
|
925
|
+
}
|
|
926
|
+
/**
|
|
927
|
+
* Drain every pending task with a synthetic `aborted_by_parent`
|
|
928
|
+
* completion event. Same shape as the `stopAll()` drain — we go
|
|
929
|
+
* around `recordCompletion` because pending tasks were never
|
|
930
|
+
* counted in `inFlight` and routing them through would trip the
|
|
931
|
+
* underflow guard on every task after the first.
|
|
932
|
+
*/
|
|
933
|
+
drainPendingAsAborted(message) {
|
|
934
|
+
const dropped = this.pendingTasks.splice(0, this.pendingTasks.length);
|
|
935
|
+
for (const t of dropped) {
|
|
936
|
+
const synthetic = {
|
|
937
|
+
subagentId: t.subagentId ?? "unassigned",
|
|
938
|
+
taskId: t.id,
|
|
939
|
+
status: "stopped",
|
|
940
|
+
error: {
|
|
941
|
+
kind: "aborted_by_parent",
|
|
942
|
+
message,
|
|
943
|
+
retryable: false
|
|
944
|
+
},
|
|
945
|
+
iterations: 0,
|
|
946
|
+
toolCalls: 0,
|
|
947
|
+
durationMs: 0
|
|
948
|
+
};
|
|
949
|
+
this.completedResults.push(synthetic);
|
|
950
|
+
this.emit("task.completed", { task: t, result: synthetic });
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
async runDispatched(subagentId, task) {
|
|
954
|
+
const subagent = this.subagents.get(subagentId);
|
|
955
|
+
if (!subagent) return;
|
|
956
|
+
if (this.terminating.has(subagentId) || subagent.status === "stopped") {
|
|
957
|
+
this.recordCompletion({
|
|
958
|
+
subagentId,
|
|
959
|
+
taskId: task.id,
|
|
960
|
+
status: "stopped",
|
|
961
|
+
error: {
|
|
962
|
+
kind: "aborted_by_parent",
|
|
963
|
+
message: "Subagent was terminated before task could start",
|
|
964
|
+
retryable: false
|
|
965
|
+
},
|
|
966
|
+
iterations: 0,
|
|
967
|
+
toolCalls: 0,
|
|
968
|
+
durationMs: 0
|
|
969
|
+
});
|
|
970
|
+
return;
|
|
971
|
+
}
|
|
972
|
+
subagent.status = "running";
|
|
973
|
+
subagent.currentTask = task.id;
|
|
974
|
+
task.subagentId = subagentId;
|
|
975
|
+
subagent.context.tasks.push(task);
|
|
976
|
+
this.emit("task.assigned", { task, subagentId });
|
|
977
|
+
const budget = new SubagentBudget({
|
|
978
|
+
maxIterations: subagent.config.maxIterations ?? this.config.defaultBudget?.maxIterations,
|
|
979
|
+
maxToolCalls: task.maxToolCalls ?? subagent.config.maxToolCalls ?? this.config.defaultBudget?.maxToolCalls,
|
|
980
|
+
maxTokens: subagent.config.maxTokens ?? this.config.defaultBudget?.maxTokens,
|
|
981
|
+
maxCostUsd: subagent.config.maxCostUsd ?? this.config.defaultBudget?.maxCostUsd,
|
|
982
|
+
timeoutMs: task.timeoutMs ?? subagent.config.timeoutMs ?? this.config.defaultBudget?.timeoutMs
|
|
983
|
+
});
|
|
984
|
+
subagent.activeBudget = budget;
|
|
985
|
+
if (!this.runner) {
|
|
986
|
+
return;
|
|
987
|
+
}
|
|
988
|
+
this.inFlight++;
|
|
989
|
+
const startTime = Date.now();
|
|
990
|
+
const runCtx = {
|
|
991
|
+
subagentId,
|
|
992
|
+
config: subagent.config,
|
|
993
|
+
budget,
|
|
994
|
+
signal: subagent.abortController.signal,
|
|
995
|
+
bridge: subagent.context.parentBridge || null
|
|
996
|
+
};
|
|
997
|
+
let result;
|
|
998
|
+
budget.start();
|
|
999
|
+
try {
|
|
1000
|
+
const outcome = await this.executeWithTimeout(this.runner, task, runCtx, budget);
|
|
1001
|
+
result = {
|
|
1002
|
+
subagentId,
|
|
1003
|
+
taskId: task.id,
|
|
1004
|
+
status: "success",
|
|
1005
|
+
result: outcome.result,
|
|
1006
|
+
iterations: outcome.iterations,
|
|
1007
|
+
toolCalls: outcome.toolCalls,
|
|
1008
|
+
durationMs: Date.now() - startTime
|
|
1009
|
+
};
|
|
1010
|
+
} catch (err) {
|
|
1011
|
+
const status = err instanceof BudgetExceededError && err.kind === "timeout" ? "timeout" : subagent.abortController.signal.aborted ? "stopped" : "failed";
|
|
1012
|
+
const usage = budget.usage();
|
|
1013
|
+
result = {
|
|
1014
|
+
subagentId,
|
|
1015
|
+
taskId: task.id,
|
|
1016
|
+
status,
|
|
1017
|
+
error: classifySubagentError(err, {
|
|
1018
|
+
parentAborted: subagent.abortController.signal.aborted
|
|
1019
|
+
}),
|
|
1020
|
+
iterations: usage.iterations,
|
|
1021
|
+
toolCalls: usage.toolCalls,
|
|
1022
|
+
durationMs: Date.now() - startTime
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1025
|
+
this.recordCompletion(result);
|
|
1026
|
+
}
|
|
1027
|
+
async executeWithTimeout(runner, task, ctx, budget) {
|
|
1028
|
+
const timeoutMs = budget.limits.timeoutMs;
|
|
1029
|
+
if (timeoutMs === void 0) return runner(task, ctx);
|
|
1030
|
+
let timer = null;
|
|
1031
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
1032
|
+
timer = setTimeout(() => {
|
|
1033
|
+
this.subagents.get(ctx.subagentId)?.abortController.abort();
|
|
1034
|
+
reject(new BudgetExceededError("timeout", timeoutMs, Date.now()));
|
|
1035
|
+
}, timeoutMs);
|
|
1036
|
+
});
|
|
1037
|
+
try {
|
|
1038
|
+
return await Promise.race([runner(task, ctx), timeoutPromise]);
|
|
1039
|
+
} finally {
|
|
1040
|
+
if (timer) clearTimeout(timer);
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
recordCompletion(result) {
|
|
1044
|
+
this.completedResults.push(result);
|
|
1045
|
+
this.totalIterations += result.iterations;
|
|
1046
|
+
if (this.inFlight > 0) {
|
|
1047
|
+
this.inFlight--;
|
|
1048
|
+
} else if (this.runner) {
|
|
1049
|
+
this.emit("warning", {
|
|
1050
|
+
type: "inFlight_underflow",
|
|
1051
|
+
taskId: result.taskId,
|
|
1052
|
+
subagentId: result.subagentId
|
|
1053
|
+
});
|
|
1054
|
+
return;
|
|
1055
|
+
}
|
|
1056
|
+
const subagent = this.subagents.get(result.subagentId);
|
|
1057
|
+
if (subagent && subagent.status !== "stopped") {
|
|
1058
|
+
result.status === "failed" || result.status === "timeout";
|
|
1059
|
+
subagent.status = "idle";
|
|
1060
|
+
subagent.currentTask = void 0;
|
|
1061
|
+
if (subagent.abortController.signal.aborted) {
|
|
1062
|
+
subagent.abortController = new AbortController();
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
1065
|
+
this.terminating.delete(result.subagentId);
|
|
1066
|
+
this.emit("task.completed", {
|
|
1067
|
+
task: subagent?.context.tasks.find((t) => t.id === result.taskId) ?? { id: result.taskId },
|
|
1068
|
+
result
|
|
1069
|
+
});
|
|
1070
|
+
this.tryDispatchNext();
|
|
1071
|
+
if (this.isDone()) {
|
|
1072
|
+
this.emit("done", {
|
|
1073
|
+
results: this.completedResults,
|
|
1074
|
+
totalIterations: this.totalIterations
|
|
1075
|
+
});
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
isDone() {
|
|
1079
|
+
if (this.config.doneCondition.type === "all_tasks_done") {
|
|
1080
|
+
return this.pendingTasks.length === 0 && this.inFlight === 0;
|
|
1081
|
+
}
|
|
1082
|
+
if (this.config.doneCondition.maxIterations !== void 0 && this.totalIterations >= this.config.doneCondition.maxIterations) {
|
|
1083
|
+
return true;
|
|
1084
|
+
}
|
|
1085
|
+
return false;
|
|
1086
|
+
}
|
|
1087
|
+
};
|
|
1088
|
+
function classifySubagentError(err, hints = {}) {
|
|
1089
|
+
const cause = err instanceof Error ? { name: err.name, message: err.message, stack: err.stack } : void 0;
|
|
1090
|
+
const baseMessage = err instanceof Error ? err.message : String(err);
|
|
1091
|
+
if (err instanceof ProviderError) {
|
|
1092
|
+
return providerErrorToSubagentError(err, baseMessage, cause);
|
|
1093
|
+
}
|
|
1094
|
+
if (err instanceof BudgetExceededError) {
|
|
1095
|
+
const map = {
|
|
1096
|
+
iterations: "budget_iterations",
|
|
1097
|
+
tool_calls: "budget_tool_calls",
|
|
1098
|
+
tokens: "budget_tokens",
|
|
1099
|
+
cost: "budget_cost",
|
|
1100
|
+
timeout: "budget_timeout"
|
|
1101
|
+
};
|
|
1102
|
+
return {
|
|
1103
|
+
kind: map[err.kind],
|
|
1104
|
+
message: baseMessage,
|
|
1105
|
+
// Budgets are user-configured ceilings, not transient failures —
|
|
1106
|
+
// retrying with the same budget will hit the same ceiling. The
|
|
1107
|
+
// orchestrator must raise the budget or narrow the task first.
|
|
1108
|
+
retryable: false,
|
|
1109
|
+
cause
|
|
1110
|
+
};
|
|
1111
|
+
}
|
|
1112
|
+
if (hints.parentAborted) {
|
|
1113
|
+
return {
|
|
1114
|
+
kind: "aborted_by_parent",
|
|
1115
|
+
message: baseMessage,
|
|
1116
|
+
retryable: false,
|
|
1117
|
+
cause
|
|
1118
|
+
};
|
|
1119
|
+
}
|
|
1120
|
+
const lower = baseMessage.toLowerCase();
|
|
1121
|
+
if (/agent aborted$/i.test(baseMessage)) {
|
|
1122
|
+
return {
|
|
1123
|
+
kind: "aborted_by_parent",
|
|
1124
|
+
message: baseMessage,
|
|
1125
|
+
retryable: false,
|
|
1126
|
+
cause
|
|
1127
|
+
};
|
|
1128
|
+
}
|
|
1129
|
+
if (/agent exhausted iteration limit$/i.test(baseMessage)) {
|
|
1130
|
+
return { kind: "budget_iterations", message: baseMessage, retryable: false, cause };
|
|
1131
|
+
}
|
|
1132
|
+
if (/empty response$/i.test(baseMessage)) {
|
|
1133
|
+
return { kind: "empty_response", message: baseMessage, retryable: false, cause };
|
|
1134
|
+
}
|
|
1135
|
+
if (/^tool failed: /i.test(baseMessage)) {
|
|
1136
|
+
return { kind: "tool_failed", message: baseMessage, retryable: false, cause };
|
|
1137
|
+
}
|
|
1138
|
+
if (lower.includes("bridge transport") || /bridge.*(closed|disconnect)/i.test(baseMessage)) {
|
|
1139
|
+
return { kind: "bridge_failed", message: baseMessage, retryable: false, cause };
|
|
1140
|
+
}
|
|
1141
|
+
if (/context length|max.*tokens?.*exceeded|prompt is too long/i.test(baseMessage)) {
|
|
1142
|
+
return { kind: "context_overflow", message: baseMessage, retryable: false, cause };
|
|
1143
|
+
}
|
|
1144
|
+
return {
|
|
1145
|
+
kind: "unknown",
|
|
1146
|
+
message: baseMessage,
|
|
1147
|
+
retryable: false,
|
|
1148
|
+
cause
|
|
1149
|
+
};
|
|
1150
|
+
}
|
|
1151
|
+
function providerErrorToSubagentError(err, message, cause) {
|
|
1152
|
+
const status = err.status;
|
|
1153
|
+
if (status === 429 || err.body?.type === "rate_limit_error") {
|
|
1154
|
+
return {
|
|
1155
|
+
kind: "provider_rate_limit",
|
|
1156
|
+
message,
|
|
1157
|
+
retryable: true,
|
|
1158
|
+
// Conservative default: 5s. Provider-specific code can override
|
|
1159
|
+
// by emitting an error whose body carries an explicit hint.
|
|
1160
|
+
backoffMs: 5e3,
|
|
1161
|
+
cause
|
|
1162
|
+
};
|
|
1163
|
+
}
|
|
1164
|
+
if (status === 401 || status === 403 || err.body?.type === "authentication_error") {
|
|
1165
|
+
return { kind: "provider_auth", message, retryable: false, cause };
|
|
1166
|
+
}
|
|
1167
|
+
if (status === 408 || status === 0) {
|
|
1168
|
+
return { kind: "provider_timeout", message, retryable: true, cause };
|
|
1169
|
+
}
|
|
1170
|
+
if (status >= 500 && status < 600) {
|
|
1171
|
+
return {
|
|
1172
|
+
kind: "provider_5xx",
|
|
1173
|
+
message,
|
|
1174
|
+
retryable: true,
|
|
1175
|
+
backoffMs: 3e3,
|
|
1176
|
+
cause
|
|
1177
|
+
};
|
|
1178
|
+
}
|
|
1179
|
+
return { kind: "unknown", message, retryable: err.retryable, cause };
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
// src/coordination/director.ts
|
|
1183
|
+
var DirectorBudgetError = class extends Error {
|
|
1184
|
+
kind;
|
|
1185
|
+
limit;
|
|
1186
|
+
observed;
|
|
1187
|
+
constructor(kind, limit, observed) {
|
|
1188
|
+
super(
|
|
1189
|
+
kind === "max_spawns" ? `Director spawn budget exceeded: tried to spawn #${observed} but maxSpawns is ${limit}` : `Director spawn depth budget exceeded: this director is at depth ${observed} and maxSpawnDepth is ${limit}`
|
|
1190
|
+
);
|
|
1191
|
+
this.name = "DirectorBudgetError";
|
|
1192
|
+
this.kind = kind;
|
|
1193
|
+
this.limit = limit;
|
|
1194
|
+
this.observed = observed;
|
|
1195
|
+
}
|
|
1196
|
+
};
|
|
1197
|
+
var Director = class {
|
|
1198
|
+
id;
|
|
1199
|
+
fleet;
|
|
1200
|
+
usage;
|
|
1201
|
+
/**
|
|
1202
|
+
* Director-side bridge endpoint. Subagents are wired to the same
|
|
1203
|
+
* in-memory transport so the director can `ask()` them synchronously
|
|
1204
|
+
* and they can `send()` progress back. Exposed so external code (e.g.
|
|
1205
|
+
* the TUI) can subscribe to inbound messages.
|
|
1206
|
+
*/
|
|
1207
|
+
bridge;
|
|
1208
|
+
transport;
|
|
1209
|
+
coordinator;
|
|
1210
|
+
/** Resolves with the matching `TaskResult` the first time the
|
|
1211
|
+
* coordinator emits `task.completed` for a given task id. Each entry
|
|
1212
|
+
* is created lazily on first poll/await and cleared once consumed. */
|
|
1213
|
+
taskWaiters = /* @__PURE__ */ new Map();
|
|
1214
|
+
/** Cache of completed results in case the consumer asks AFTER the
|
|
1215
|
+
* coordinator already fired the event — `awaitTasks(['t-1'])` after
|
|
1216
|
+
* t-1 finished should resolve immediately, not hang. */
|
|
1217
|
+
completed = /* @__PURE__ */ new Map();
|
|
1218
|
+
/** Per-subagent provider/model metadata, captured at spawn time so the
|
|
1219
|
+
* FleetUsageAggregator's metaLookup can surface readable rows. */
|
|
1220
|
+
subagentMeta = /* @__PURE__ */ new Map();
|
|
1221
|
+
priceLookups = /* @__PURE__ */ new Map();
|
|
1222
|
+
/** Bridge endpoints we created per subagent (so we can `stop()` them
|
|
1223
|
+
* on shutdown and free transport subscriptions). */
|
|
1224
|
+
subagentBridges = /* @__PURE__ */ new Map();
|
|
1225
|
+
/** Tracks per-spawn config + assigned task ids for manifest writing. */
|
|
1226
|
+
manifestEntries = /* @__PURE__ */ new Map();
|
|
1227
|
+
manifestPath;
|
|
1228
|
+
roster;
|
|
1229
|
+
directorPreamble;
|
|
1230
|
+
subagentBaseline;
|
|
1231
|
+
/** Absolute path to the fleet's shared scratchpad directory, or null
|
|
1232
|
+
* when none was configured. Exposed as a readonly getter for callers
|
|
1233
|
+
* that need to surface the path to the user (e.g. the CLI logging
|
|
1234
|
+
* the location after `--director` boots). */
|
|
1235
|
+
sharedScratchpadPath;
|
|
1236
|
+
/** Spawn cap (lifetime total). Infinity means unlimited. */
|
|
1237
|
+
maxSpawns;
|
|
1238
|
+
/** Nesting cap. The N-th director in a chain has `spawnDepth = N-1`. */
|
|
1239
|
+
maxSpawnDepth;
|
|
1240
|
+
/** This director's position in a director chain. Root director = 0. */
|
|
1241
|
+
spawnDepth;
|
|
1242
|
+
/** Live spawn counter for `maxSpawns` enforcement. */
|
|
1243
|
+
spawnCount = 0;
|
|
1244
|
+
/** Optional checkpoint mirror — writes the live task graph + roster to disk. */
|
|
1245
|
+
stateCheckpoint;
|
|
1246
|
+
/** Optional session writer for emitting task_* / agent_* lifecycle events. */
|
|
1247
|
+
sessionWriter;
|
|
1248
|
+
/** Debounce timer for periodic manifest writes. */
|
|
1249
|
+
manifestTimer = null;
|
|
1250
|
+
manifestDebounceMs;
|
|
1251
|
+
/** Resolves task descriptions back from `assign()` so completion events
|
|
1252
|
+
* can also carry a human-readable title. */
|
|
1253
|
+
taskDescriptions = /* @__PURE__ */ new Map();
|
|
1254
|
+
/** Snapshot of which subagent owns each task — drives state-checkpoint
|
|
1255
|
+
* status updates without re-walking the manifest. */
|
|
1256
|
+
taskOwners = /* @__PURE__ */ new Map();
|
|
1257
|
+
/**
|
|
1258
|
+
* Handle to the coordinator-side `task.completed` listener so we can
|
|
1259
|
+
* unsubscribe in `shutdown()`. Without this, repeated Director
|
|
1260
|
+
* construction (e.g. tests, hot reloads) accumulates listeners on a
|
|
1261
|
+
* cached coordinator and slowly drifts the EventEmitter past its
|
|
1262
|
+
* default cap.
|
|
1263
|
+
*/
|
|
1264
|
+
taskCompletedListener = null;
|
|
1265
|
+
constructor(opts) {
|
|
1266
|
+
this.id = opts.config.coordinatorId || randomUUID();
|
|
1267
|
+
this.manifestPath = opts.manifestPath;
|
|
1268
|
+
this.roster = opts.roster;
|
|
1269
|
+
this.directorPreamble = opts.directorPreamble ?? DEFAULT_DIRECTOR_PREAMBLE;
|
|
1270
|
+
this.subagentBaseline = opts.subagentBaseline ?? DEFAULT_SUBAGENT_BASELINE;
|
|
1271
|
+
this.sharedScratchpadPath = opts.sharedScratchpadPath ?? null;
|
|
1272
|
+
this.maxSpawns = opts.maxSpawns ?? Number.POSITIVE_INFINITY;
|
|
1273
|
+
this.maxSpawnDepth = opts.maxSpawnDepth ?? 2;
|
|
1274
|
+
this.spawnDepth = opts.spawnDepth ?? 0;
|
|
1275
|
+
this.sessionWriter = opts.sessionWriter ?? null;
|
|
1276
|
+
this.manifestDebounceMs = opts.manifestDebounceMs ?? 2e3;
|
|
1277
|
+
this.stateCheckpoint = opts.stateCheckpointPath ? new DirectorStateCheckpoint(opts.stateCheckpointPath, {
|
|
1278
|
+
directorRunId: this.id,
|
|
1279
|
+
maxSpawns: opts.maxSpawns,
|
|
1280
|
+
spawnDepth: this.spawnDepth,
|
|
1281
|
+
maxSpawnDepth: this.maxSpawnDepth
|
|
1282
|
+
}) : null;
|
|
1283
|
+
if (this.sharedScratchpadPath) {
|
|
1284
|
+
void fsp4.mkdir(this.sharedScratchpadPath, { recursive: true }).catch(() => void 0);
|
|
1285
|
+
}
|
|
1286
|
+
this.transport = new InMemoryBridgeTransport();
|
|
1287
|
+
this.bridge = new InMemoryAgentBridge(
|
|
1288
|
+
{ agentId: this.id, coordinatorId: this.id },
|
|
1289
|
+
this.transport
|
|
1290
|
+
);
|
|
1291
|
+
this.fleet = new FleetBus();
|
|
1292
|
+
this.usage = new FleetUsageAggregator(
|
|
1293
|
+
this.fleet,
|
|
1294
|
+
(id) => this.priceLookups.get(id),
|
|
1295
|
+
(id) => this.subagentMeta.get(id)
|
|
1296
|
+
);
|
|
1297
|
+
this.coordinator = new DefaultMultiAgentCoordinator(
|
|
1298
|
+
{ ...opts.config, coordinatorId: this.id },
|
|
1299
|
+
{ runner: opts.runner }
|
|
1300
|
+
);
|
|
1301
|
+
this.taskCompletedListener = (payload) => {
|
|
1302
|
+
const r = payload.result;
|
|
1303
|
+
this.completed.set(r.taskId, r);
|
|
1304
|
+
const waiter = this.taskWaiters.get(r.taskId);
|
|
1305
|
+
if (waiter) {
|
|
1306
|
+
waiter.resolve(r);
|
|
1307
|
+
this.taskWaiters.delete(r.taskId);
|
|
1308
|
+
}
|
|
1309
|
+
const title = this.taskDescriptions.get(r.taskId) ?? payload.task.description ?? r.taskId;
|
|
1310
|
+
const failed = r.status !== "success";
|
|
1311
|
+
const errorString = r.error ? `${r.error.kind}: ${r.error.message}` : void 0;
|
|
1312
|
+
this.stateCheckpoint?.recordTaskStatus(r.taskId, {
|
|
1313
|
+
status: failed ? r.status : "completed",
|
|
1314
|
+
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1315
|
+
iterations: r.iterations,
|
|
1316
|
+
toolCalls: r.toolCalls,
|
|
1317
|
+
durationMs: r.durationMs,
|
|
1318
|
+
error: errorString
|
|
1319
|
+
});
|
|
1320
|
+
this.stateCheckpoint?.setUsage(this.usage.snapshot());
|
|
1321
|
+
void this.appendSessionEvent(
|
|
1322
|
+
failed ? {
|
|
1323
|
+
type: "task_failed",
|
|
1324
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1325
|
+
taskId: r.taskId,
|
|
1326
|
+
title,
|
|
1327
|
+
error: errorString ?? r.status
|
|
1328
|
+
} : {
|
|
1329
|
+
type: "task_completed",
|
|
1330
|
+
ts: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1331
|
+
taskId: r.taskId,
|
|
1332
|
+
title
|
|
1333
|
+
}
|
|
1334
|
+
);
|
|
1335
|
+
this.scheduleManifest();
|
|
1336
|
+
};
|
|
1337
|
+
this.coordinator.on("task.completed", this.taskCompletedListener);
|
|
1338
|
+
}
|
|
1339
|
+
/** Best-effort session-writer append. Swallows failures — the director
|
|
1340
|
+
* must not break a fleet run because the session JSONL handle closed. */
|
|
1341
|
+
async appendSessionEvent(event) {
|
|
1342
|
+
if (!this.sessionWriter) return;
|
|
1343
|
+
try {
|
|
1344
|
+
await this.sessionWriter.append(event);
|
|
1345
|
+
} catch {
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
/** Debounced manifest writer. A burst of spawn/assign/complete events
|
|
1349
|
+
* collapses into one write. Set `manifestDebounceMs` to 0 to disable. */
|
|
1350
|
+
scheduleManifest() {
|
|
1351
|
+
if (!this.manifestPath || this.manifestDebounceMs <= 0) return;
|
|
1352
|
+
if (this.manifestTimer) return;
|
|
1353
|
+
this.manifestTimer = setTimeout(() => {
|
|
1354
|
+
this.manifestTimer = null;
|
|
1355
|
+
void this.writeManifest().catch(() => void 0);
|
|
1356
|
+
}, this.manifestDebounceMs);
|
|
1357
|
+
}
|
|
1358
|
+
/**
|
|
1359
|
+
* Spawn a subagent. Identical to the coordinator's `spawn()` but
|
|
1360
|
+
* captures provider/model metadata for the usage aggregator and
|
|
1361
|
+
* lets the FleetBus attach to the runner's EventBus when the task
|
|
1362
|
+
* actually runs (see `attachSubagentBus`).
|
|
1363
|
+
*
|
|
1364
|
+
* Caller-supplied `priceLookup` is optional but recommended — without
|
|
1365
|
+
* it the `cost` column in `usage.snapshot()` stays at 0.
|
|
1366
|
+
*/
|
|
1367
|
+
async spawn(config, priceLookup) {
|
|
1368
|
+
if (this.spawnDepth >= this.maxSpawnDepth) {
|
|
1369
|
+
throw new DirectorBudgetError("max_spawn_depth", this.maxSpawnDepth, this.spawnDepth);
|
|
1370
|
+
}
|
|
1371
|
+
if (this.spawnCount >= this.maxSpawns) {
|
|
1372
|
+
throw new DirectorBudgetError("max_spawns", this.maxSpawns, this.spawnCount + 1);
|
|
1373
|
+
}
|
|
1374
|
+
this.spawnCount += 1;
|
|
1375
|
+
const result = await this.coordinator.spawn(config);
|
|
1376
|
+
this.subagentMeta.set(result.subagentId, {
|
|
1377
|
+
provider: config.provider,
|
|
1378
|
+
model: config.model
|
|
1379
|
+
});
|
|
1380
|
+
if (priceLookup) this.priceLookups.set(result.subagentId, priceLookup);
|
|
1381
|
+
const subagentBridge = new InMemoryAgentBridge(
|
|
1382
|
+
{ agentId: result.subagentId, coordinatorId: this.id },
|
|
1383
|
+
this.transport
|
|
1384
|
+
);
|
|
1385
|
+
this.coordinator.setSubagentBridge(result.subagentId, subagentBridge);
|
|
1386
|
+
this.subagentBridges.set(result.subagentId, subagentBridge);
|
|
1387
|
+
this.manifestEntries.set(result.subagentId, {
|
|
1388
|
+
subagentId: result.subagentId,
|
|
1389
|
+
name: config.name,
|
|
1390
|
+
role: config.role,
|
|
1391
|
+
provider: config.provider,
|
|
1392
|
+
model: config.model,
|
|
1393
|
+
taskIds: []
|
|
1394
|
+
});
|
|
1395
|
+
const spawnedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1396
|
+
this.stateCheckpoint?.recordSpawn(
|
|
1397
|
+
{
|
|
1398
|
+
id: result.subagentId,
|
|
1399
|
+
name: config.name,
|
|
1400
|
+
role: config.role,
|
|
1401
|
+
provider: config.provider,
|
|
1402
|
+
model: config.model,
|
|
1403
|
+
spawnedAt
|
|
1404
|
+
},
|
|
1405
|
+
this.spawnCount
|
|
1406
|
+
);
|
|
1407
|
+
void this.appendSessionEvent({
|
|
1408
|
+
type: "agent_spawned",
|
|
1409
|
+
ts: spawnedAt,
|
|
1410
|
+
agentId: result.subagentId,
|
|
1411
|
+
role: config.role ?? config.name
|
|
1412
|
+
});
|
|
1413
|
+
this.scheduleManifest();
|
|
1414
|
+
return result.subagentId;
|
|
1415
|
+
}
|
|
1416
|
+
/**
|
|
1417
|
+
* Synchronously ask a subagent something via the bridge. Sends a
|
|
1418
|
+
* `task` message addressed to the subagent and awaits a matching
|
|
1419
|
+
* reply (matched by message id). Subagent runners that handle these
|
|
1420
|
+
* requests subscribe to `ctx.bridge` and reply with a message whose
|
|
1421
|
+
* `id` equals the incoming request's id (see `InMemoryAgentBridge`'s
|
|
1422
|
+
* `request<T>` implementation).
|
|
1423
|
+
*
|
|
1424
|
+
* Returns the response payload directly (the bridge wrapper is
|
|
1425
|
+
* unwrapped for ergonomics). Times out after `timeoutMs` (default
|
|
1426
|
+
* matches the bridge's own default of 30s) — surface those rejections
|
|
1427
|
+
* to the caller as actionable errors instead of letting tools hang.
|
|
1428
|
+
*/
|
|
1429
|
+
async ask(subagentId, payload, timeoutMs) {
|
|
1430
|
+
if (!this.subagentBridges.has(subagentId)) {
|
|
1431
|
+
throw new Error(
|
|
1432
|
+
`ask: unknown subagent "${subagentId}" (spawn() it first; current fleet: ${Array.from(this.subagentBridges.keys()).join(", ") || "(empty)"})`
|
|
1433
|
+
);
|
|
1434
|
+
}
|
|
1435
|
+
const msg = {
|
|
1436
|
+
id: randomUUID(),
|
|
1437
|
+
type: "task",
|
|
1438
|
+
from: this.id,
|
|
1439
|
+
to: subagentId,
|
|
1440
|
+
payload,
|
|
1441
|
+
timestamp: Date.now(),
|
|
1442
|
+
priority: "normal"
|
|
1443
|
+
};
|
|
1444
|
+
const reply = await this.bridge.request(msg, timeoutMs);
|
|
1445
|
+
return reply.payload;
|
|
1446
|
+
}
|
|
1447
|
+
/**
|
|
1448
|
+
* Read completed task results and format them as a structured text
|
|
1449
|
+
* block the director's LLM can paste into its own context. The
|
|
1450
|
+
* Director keeps every completed `TaskResult` in `completed` so this
|
|
1451
|
+
* is a pure read — no bridge round-trip, cheap to call.
|
|
1452
|
+
*
|
|
1453
|
+
* The returned string is intentionally markdown-flavored: headers per
|
|
1454
|
+
* subagent, a one-line meta row (iter / tools / ms), and the task's
|
|
1455
|
+
* result text. Pass `style: 'json'` for a programmatic shape instead
|
|
1456
|
+
* (useful when the director model is doing structured-output work).
|
|
1457
|
+
*/
|
|
1458
|
+
rollUp(taskIds, style = "markdown") {
|
|
1459
|
+
const rows = taskIds.map((id) => this.completed.get(id)).filter((r) => !!r);
|
|
1460
|
+
if (style === "json") {
|
|
1461
|
+
return JSON.stringify(
|
|
1462
|
+
rows.map((r) => ({
|
|
1463
|
+
taskId: r.taskId,
|
|
1464
|
+
subagentId: r.subagentId,
|
|
1465
|
+
status: r.status,
|
|
1466
|
+
iterations: r.iterations,
|
|
1467
|
+
toolCalls: r.toolCalls,
|
|
1468
|
+
durationMs: r.durationMs,
|
|
1469
|
+
result: r.result,
|
|
1470
|
+
error: r.error
|
|
1471
|
+
})),
|
|
1472
|
+
null,
|
|
1473
|
+
2
|
|
1474
|
+
);
|
|
1475
|
+
}
|
|
1476
|
+
if (rows.length === 0) {
|
|
1477
|
+
return "_No completed tasks for the requested ids \u2014 try waiting first._";
|
|
1478
|
+
}
|
|
1479
|
+
const lines = [];
|
|
1480
|
+
for (const r of rows) {
|
|
1481
|
+
const meta = this.subagentMeta.get(r.subagentId);
|
|
1482
|
+
const tag = meta?.provider && meta?.model ? ` \xB7 ${meta.provider}/${meta.model}` : "";
|
|
1483
|
+
lines.push(`### ${r.subagentId}${tag}`);
|
|
1484
|
+
lines.push(`_${r.status} \u2014 ${r.iterations} iter \xB7 ${r.toolCalls} tools \xB7 ${r.durationMs}ms_`);
|
|
1485
|
+
lines.push("");
|
|
1486
|
+
if (r.error) lines.push(`**Error:** ${r.error}`);
|
|
1487
|
+
else if (typeof r.result === "string") lines.push(r.result);
|
|
1488
|
+
else if (r.result !== void 0)
|
|
1489
|
+
lines.push("```json\n" + JSON.stringify(r.result, null, 2) + "\n```");
|
|
1490
|
+
else lines.push("_(no output)_");
|
|
1491
|
+
lines.push("");
|
|
1492
|
+
}
|
|
1493
|
+
return lines.join("\n").trimEnd();
|
|
1494
|
+
}
|
|
1495
|
+
/**
|
|
1496
|
+
* Write the fleet manifest to `manifestPath`. Returns the path written
|
|
1497
|
+
* or null when no path was configured. Captures every spawn + its
|
|
1498
|
+
* assigned tasks — paired with per-subagent JSONLs, this is enough to
|
|
1499
|
+
* replay an entire director run.
|
|
1500
|
+
*/
|
|
1501
|
+
async writeManifest() {
|
|
1502
|
+
if (!this.manifestPath) return null;
|
|
1503
|
+
const manifest = {
|
|
1504
|
+
directorRunId: this.id,
|
|
1505
|
+
writtenAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1506
|
+
children: Array.from(this.manifestEntries.values()).map((e) => ({
|
|
1507
|
+
...e,
|
|
1508
|
+
// Surface final status from `completed` when available — manifest
|
|
1509
|
+
// becomes much more useful for replay when it carries the
|
|
1510
|
+
// success/failure state.
|
|
1511
|
+
results: e.taskIds.map((tid) => {
|
|
1512
|
+
const r = this.completed.get(tid);
|
|
1513
|
+
return r ? {
|
|
1514
|
+
taskId: tid,
|
|
1515
|
+
status: r.status,
|
|
1516
|
+
iterations: r.iterations,
|
|
1517
|
+
toolCalls: r.toolCalls,
|
|
1518
|
+
durationMs: r.durationMs
|
|
1519
|
+
} : { taskId: tid, status: "pending" };
|
|
1520
|
+
})
|
|
1521
|
+
})),
|
|
1522
|
+
usage: this.usage.snapshot()
|
|
1523
|
+
};
|
|
1524
|
+
await fsp4.mkdir(path4.dirname(this.manifestPath), { recursive: true });
|
|
1525
|
+
await fsp4.writeFile(this.manifestPath, JSON.stringify(manifest, null, 2), { mode: 384 });
|
|
1526
|
+
return this.manifestPath;
|
|
1527
|
+
}
|
|
1528
|
+
/**
|
|
1529
|
+
* Tear down the director: stop every subagent, close every bridge
|
|
1530
|
+
* endpoint, and (when configured) write the final manifest. Idempotent
|
|
1531
|
+
* — calling shutdown twice is a no-op on the second invocation.
|
|
1532
|
+
*/
|
|
1533
|
+
async shutdown() {
|
|
1534
|
+
if (this.manifestTimer) {
|
|
1535
|
+
clearTimeout(this.manifestTimer);
|
|
1536
|
+
this.manifestTimer = null;
|
|
1537
|
+
}
|
|
1538
|
+
if (this.taskCompletedListener) {
|
|
1539
|
+
this.coordinator.off("task.completed", this.taskCompletedListener);
|
|
1540
|
+
this.taskCompletedListener = null;
|
|
1541
|
+
}
|
|
1542
|
+
await this.coordinator.stopAll();
|
|
1543
|
+
for (const b of this.subagentBridges.values()) {
|
|
1544
|
+
await b.stop().catch((err) => this.logShutdownError("subagent_bridge_stop", err));
|
|
1545
|
+
}
|
|
1546
|
+
this.subagentBridges.clear();
|
|
1547
|
+
await this.bridge.stop().catch((err) => this.logShutdownError("director_bridge_stop", err));
|
|
1548
|
+
if (this.manifestPath)
|
|
1549
|
+
await this.writeManifest().catch((err) => this.logShutdownError("manifest_write", err));
|
|
1550
|
+
if (this.stateCheckpoint) {
|
|
1551
|
+
this.stateCheckpoint.setUsage(this.usage.snapshot());
|
|
1552
|
+
await this.stateCheckpoint.flush().catch((err) => this.logShutdownError("state_checkpoint_flush", err));
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
/**
|
|
1556
|
+
* Funnel for shutdown-phase errors. We can't throw — `shutdown()` is
|
|
1557
|
+
* called from process-exit paths where an uncaught throw would lose
|
|
1558
|
+
* the manifest write that comes after. But we MUST NOT silently
|
|
1559
|
+
* swallow either — a persistent bridge-close failure would otherwise
|
|
1560
|
+
* mask a real bug. `process.emitWarning` is the right tier:
|
|
1561
|
+
* surfaces on stderr by default, lets the host plug a warning
|
|
1562
|
+
* listener for structured collection, and never affects exit code.
|
|
1563
|
+
*/
|
|
1564
|
+
logShutdownError(phase, err) {
|
|
1565
|
+
const detail = err instanceof Error ? err.message : String(err);
|
|
1566
|
+
process.emitWarning(
|
|
1567
|
+
`Director shutdown phase "${phase}" failed: ${detail}`,
|
|
1568
|
+
"DirectorShutdownWarning"
|
|
1569
|
+
);
|
|
1570
|
+
}
|
|
1571
|
+
/**
|
|
1572
|
+
* Hand a task to the coordinator. Returns the assigned task id so
|
|
1573
|
+
* callers can wait on it via `awaitTasks([id])`. The coordinator's
|
|
1574
|
+
* concurrency limit applies — the task may queue before running.
|
|
1575
|
+
*/
|
|
1576
|
+
async assign(task) {
|
|
1577
|
+
const taskWithId = task.id ? task : { ...task, id: randomUUID() };
|
|
1578
|
+
if (task.subagentId) {
|
|
1579
|
+
const entry = this.manifestEntries.get(task.subagentId);
|
|
1580
|
+
if (entry) entry.taskIds.push(taskWithId.id);
|
|
1581
|
+
}
|
|
1582
|
+
await this.coordinator.assign(taskWithId);
|
|
1583
|
+
this.taskDescriptions.set(taskWithId.id, taskWithId.description);
|
|
1584
|
+
if (taskWithId.subagentId) this.taskOwners.set(taskWithId.id, taskWithId.subagentId);
|
|
1585
|
+
const assignedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1586
|
+
this.stateCheckpoint?.recordTaskAssigned({
|
|
1587
|
+
taskId: taskWithId.id,
|
|
1588
|
+
subagentId: taskWithId.subagentId,
|
|
1589
|
+
description: taskWithId.description,
|
|
1590
|
+
status: "running",
|
|
1591
|
+
assignedAt
|
|
1592
|
+
});
|
|
1593
|
+
void this.appendSessionEvent({
|
|
1594
|
+
type: "task_created",
|
|
1595
|
+
ts: assignedAt,
|
|
1596
|
+
taskId: taskWithId.id,
|
|
1597
|
+
title: taskWithId.description
|
|
1598
|
+
});
|
|
1599
|
+
this.scheduleManifest();
|
|
1600
|
+
return taskWithId.id;
|
|
1601
|
+
}
|
|
1602
|
+
/**
|
|
1603
|
+
* Block until every task id resolves. Returns results in the same
|
|
1604
|
+
* order as the input. If any task hasn't completed by the time this
|
|
1605
|
+
* is called, the promise hangs until it does — pair with a timeout
|
|
1606
|
+
* at the caller if that's a concern. Resolves immediately for ids
|
|
1607
|
+
* whose results were already cached.
|
|
1608
|
+
*/
|
|
1609
|
+
awaitTasks(taskIds) {
|
|
1610
|
+
return Promise.all(
|
|
1611
|
+
taskIds.map((id) => {
|
|
1612
|
+
const cached = this.completed.get(id);
|
|
1613
|
+
if (cached) return cached;
|
|
1614
|
+
const existing = this.taskWaiters.get(id);
|
|
1615
|
+
if (existing) return existing.promise;
|
|
1616
|
+
let resolve;
|
|
1617
|
+
const promise = new Promise((res) => {
|
|
1618
|
+
resolve = res;
|
|
1619
|
+
});
|
|
1620
|
+
this.taskWaiters.set(id, { promise, resolve });
|
|
1621
|
+
return promise;
|
|
1622
|
+
})
|
|
1623
|
+
);
|
|
1624
|
+
}
|
|
1625
|
+
async terminate(subagentId) {
|
|
1626
|
+
await this.coordinator.stop(subagentId);
|
|
1627
|
+
}
|
|
1628
|
+
async terminateAll() {
|
|
1629
|
+
await this.coordinator.stopAll();
|
|
1630
|
+
}
|
|
1631
|
+
status() {
|
|
1632
|
+
return this.coordinator.getStatus();
|
|
1633
|
+
}
|
|
1634
|
+
/**
|
|
1635
|
+
* Subscribe to coordinator events. Currently only `task.completed` is
|
|
1636
|
+
* exposed (the others are internal lifecycle). Returns an unsubscribe
|
|
1637
|
+
* function. External callers (e.g. the CLI's `MultiAgentHost`) use this
|
|
1638
|
+
* to drive their own pending/results tracking without poking the
|
|
1639
|
+
* coordinator directly.
|
|
1640
|
+
*/
|
|
1641
|
+
on(event, handler) {
|
|
1642
|
+
this.coordinator.on(event, handler);
|
|
1643
|
+
return () => {
|
|
1644
|
+
this.coordinator.off(event, handler);
|
|
1645
|
+
};
|
|
1646
|
+
}
|
|
1647
|
+
/**
|
|
1648
|
+
* Snapshot of every task that has resolved (success, failed, timeout,
|
|
1649
|
+
* stopped) since the director started. Returned in completion order
|
|
1650
|
+
* via the internal map's iteration order. Used by `/fleet status` to
|
|
1651
|
+
* paint the completed table without reaching into private state.
|
|
1652
|
+
*/
|
|
1653
|
+
completedResults() {
|
|
1654
|
+
return Array.from(this.completed.values());
|
|
1655
|
+
}
|
|
1656
|
+
snapshot() {
|
|
1657
|
+
return this.usage.snapshot();
|
|
1658
|
+
}
|
|
1659
|
+
/**
|
|
1660
|
+
* Look up provider/model metadata for a spawned subagent. Returns
|
|
1661
|
+
* undefined when the subagent id is unknown (not yet spawned, or
|
|
1662
|
+
* already torn down). Callers — notably the TUI fleet panel — use
|
|
1663
|
+
* this to render human-readable provider/model tags next to each
|
|
1664
|
+
* subagent row without reaching into private state.
|
|
1665
|
+
*/
|
|
1666
|
+
getSubagentMeta(id) {
|
|
1667
|
+
const usage = this.subagentMeta.get(id);
|
|
1668
|
+
const manifest = this.manifestEntries.get(id);
|
|
1669
|
+
if (!usage && !manifest) return void 0;
|
|
1670
|
+
return {
|
|
1671
|
+
provider: usage?.provider ?? manifest?.provider,
|
|
1672
|
+
model: usage?.model ?? manifest?.model,
|
|
1673
|
+
name: manifest?.name
|
|
1674
|
+
};
|
|
1675
|
+
}
|
|
1676
|
+
/**
|
|
1677
|
+
* Compose the leader/director-agent system prompt: fleet preamble +
|
|
1678
|
+
* (optional) roster summary + user base prompt. Pass the result to your
|
|
1679
|
+
* leader Agent's `ctx.systemPrompt` when constructing it.
|
|
1680
|
+
*
|
|
1681
|
+
* `basePrompt` defaults to `config.leaderSystemPrompt` so callers can
|
|
1682
|
+
* use the no-arg form when the multi-agent config already carries it.
|
|
1683
|
+
*/
|
|
1684
|
+
leaderSystemPrompt(basePrompt) {
|
|
1685
|
+
return composeDirectorPrompt({
|
|
1686
|
+
basePrompt: basePrompt ?? this.coordinator.config.leaderSystemPrompt,
|
|
1687
|
+
directorPreamble: this.directorPreamble,
|
|
1688
|
+
rosterSummary: this.roster ? rosterSummaryFromConfigs(this.roster) : void 0
|
|
1689
|
+
});
|
|
1690
|
+
}
|
|
1691
|
+
/**
|
|
1692
|
+
* Compose a subagent's system prompt for a given `SubagentConfig`:
|
|
1693
|
+
* baseline + role + task + per-spawn override. Returned by value — does
|
|
1694
|
+
* not mutate the config. Factories (the user-supplied `AgentFactory`)
|
|
1695
|
+
* should call this when building each subagent's Agent so the bridge
|
|
1696
|
+
* contract, role context, and override are all surfaced.
|
|
1697
|
+
*
|
|
1698
|
+
* When `taskBrief` is omitted the Task section is dropped. Pass the
|
|
1699
|
+
* actual task description here to reinforce it in the system prompt
|
|
1700
|
+
* (the runner already passes it as user input — duplicating in the
|
|
1701
|
+
* system prompt is optional but improves anchoring on small models).
|
|
1702
|
+
*/
|
|
1703
|
+
subagentSystemPrompt(config, taskBrief) {
|
|
1704
|
+
return composeSubagentPrompt({
|
|
1705
|
+
baseline: this.subagentBaseline,
|
|
1706
|
+
role: config.prompt,
|
|
1707
|
+
task: taskBrief,
|
|
1708
|
+
sharedScratchpad: this.sharedScratchpadPath ?? void 0,
|
|
1709
|
+
override: config.systemPromptOverride
|
|
1710
|
+
});
|
|
1711
|
+
}
|
|
1712
|
+
/**
|
|
1713
|
+
* Build the tool set the LLM-driven director uses to orchestrate.
|
|
1714
|
+
* Returns an array of `Tool` definitions; register these on the
|
|
1715
|
+
* director's `Agent` to expose `spawn_subagent`, `assign_task`, etc.
|
|
1716
|
+
* Each tool's `execute()` delegates straight to the matching method
|
|
1717
|
+
* above.
|
|
1718
|
+
*
|
|
1719
|
+
* Tools all carry `permission: 'auto'` — the *user* has already
|
|
1720
|
+
* approved running the director when they kicked off the run, so
|
|
1721
|
+
* gating individual orchestration calls behind a confirm prompt
|
|
1722
|
+
* would just be noise. The actual subagent tools they spawn are
|
|
1723
|
+
* still permission-checked normally.
|
|
1724
|
+
*/
|
|
1725
|
+
tools(roster) {
|
|
1726
|
+
const t = [
|
|
1727
|
+
makeSpawnTool(this, roster),
|
|
1728
|
+
makeAssignTool(this),
|
|
1729
|
+
makeAwaitTasksTool(this),
|
|
1730
|
+
makeAskTool(this),
|
|
1731
|
+
makeRollUpTool(this),
|
|
1732
|
+
makeTerminateTool(this),
|
|
1733
|
+
makeFleetStatusTool(this),
|
|
1734
|
+
makeFleetUsageTool(this)
|
|
1735
|
+
];
|
|
1736
|
+
return t;
|
|
1737
|
+
}
|
|
1738
|
+
};
|
|
1739
|
+
function makeSpawnTool(director, roster) {
|
|
1740
|
+
const inputSchema = {
|
|
1741
|
+
type: "object",
|
|
1742
|
+
properties: {
|
|
1743
|
+
role: {
|
|
1744
|
+
type: "string",
|
|
1745
|
+
description: "Roster role id (preferred). When set, the spawn uses the matching config from the roster and ignores other fields."
|
|
1746
|
+
},
|
|
1747
|
+
name: {
|
|
1748
|
+
type: "string",
|
|
1749
|
+
description: "Display name for the subagent. Required when not using roster."
|
|
1750
|
+
},
|
|
1751
|
+
provider: {
|
|
1752
|
+
type: "string",
|
|
1753
|
+
description: 'Provider id (e.g. "anthropic", "openai"). Defaults to the leader provider when omitted.'
|
|
1754
|
+
},
|
|
1755
|
+
model: {
|
|
1756
|
+
type: "string",
|
|
1757
|
+
description: "Model id within the provider. Defaults to the leader model when omitted."
|
|
1758
|
+
},
|
|
1759
|
+
systemPromptOverride: {
|
|
1760
|
+
type: "string",
|
|
1761
|
+
description: "Extra prompt text appended after the role-base prompt."
|
|
1762
|
+
},
|
|
1763
|
+
maxIterations: { type: "number" },
|
|
1764
|
+
maxToolCalls: { type: "number" },
|
|
1765
|
+
maxCostUsd: { type: "number" }
|
|
1766
|
+
},
|
|
1767
|
+
required: []
|
|
1768
|
+
};
|
|
1769
|
+
return {
|
|
1770
|
+
name: "spawn_subagent",
|
|
1771
|
+
description: "Create a new subagent under this director. Returns the subagent id. Use this when you need a worker with a specific provider, model, or role to handle a piece of the plan.",
|
|
1772
|
+
usageHint: "Either pass `role` (matches the roster) OR pass `name` + optional `provider`/`model`. Returns `{ subagentId }`.",
|
|
1773
|
+
permission: "auto",
|
|
1774
|
+
mutating: false,
|
|
1775
|
+
inputSchema,
|
|
1776
|
+
async execute(input) {
|
|
1777
|
+
const i = input ?? {};
|
|
1778
|
+
const role = typeof i.role === "string" ? i.role : void 0;
|
|
1779
|
+
const base = role && roster ? roster[role] : void 0;
|
|
1780
|
+
if (role && !base) {
|
|
1781
|
+
return {
|
|
1782
|
+
error: `unknown role "${role}". roster has: ${roster ? Object.keys(roster).join(", ") : "(empty)"}`
|
|
1783
|
+
};
|
|
1784
|
+
}
|
|
1785
|
+
const cfg = {
|
|
1786
|
+
...base ?? { name: i.name ?? "subagent" }
|
|
1787
|
+
};
|
|
1788
|
+
if (typeof i.name === "string") cfg.name = i.name;
|
|
1789
|
+
if (typeof i.provider === "string") cfg.provider = i.provider;
|
|
1790
|
+
if (typeof i.model === "string") cfg.model = i.model;
|
|
1791
|
+
if (typeof i.systemPromptOverride === "string")
|
|
1792
|
+
cfg.systemPromptOverride = i.systemPromptOverride;
|
|
1793
|
+
if (typeof i.maxIterations === "number") cfg.maxIterations = i.maxIterations;
|
|
1794
|
+
if (typeof i.maxToolCalls === "number") cfg.maxToolCalls = i.maxToolCalls;
|
|
1795
|
+
if (typeof i.maxCostUsd === "number") cfg.maxCostUsd = i.maxCostUsd;
|
|
1796
|
+
try {
|
|
1797
|
+
const subagentId = await director.spawn(cfg);
|
|
1798
|
+
return { subagentId, provider: cfg.provider, model: cfg.model, name: cfg.name };
|
|
1799
|
+
} catch (err) {
|
|
1800
|
+
if (err instanceof DirectorBudgetError) {
|
|
1801
|
+
return { error: err.message, kind: err.kind, limit: err.limit, observed: err.observed };
|
|
1802
|
+
}
|
|
1803
|
+
return { error: err instanceof Error ? err.message : String(err) };
|
|
1804
|
+
}
|
|
1805
|
+
}
|
|
1806
|
+
};
|
|
1807
|
+
}
|
|
1808
|
+
function makeAssignTool(director) {
|
|
1809
|
+
const inputSchema = {
|
|
1810
|
+
type: "object",
|
|
1811
|
+
properties: {
|
|
1812
|
+
subagentId: { type: "string", description: "Target subagent id. Required." },
|
|
1813
|
+
description: {
|
|
1814
|
+
type: "string",
|
|
1815
|
+
description: "The task in natural language \u2014 what you want this subagent to do."
|
|
1816
|
+
},
|
|
1817
|
+
maxToolCalls: { type: "number", description: "Optional per-task tool-call budget override." },
|
|
1818
|
+
timeoutMs: { type: "number", description: "Optional per-task timeout in ms." }
|
|
1819
|
+
},
|
|
1820
|
+
required: ["subagentId", "description"]
|
|
1821
|
+
};
|
|
1822
|
+
return {
|
|
1823
|
+
name: "assign_task",
|
|
1824
|
+
description: "Hand a task to a previously spawned subagent. Returns the task id \u2014 pass it to `await_tasks` to block on completion.",
|
|
1825
|
+
permission: "auto",
|
|
1826
|
+
mutating: false,
|
|
1827
|
+
inputSchema,
|
|
1828
|
+
async execute(input) {
|
|
1829
|
+
const i = input;
|
|
1830
|
+
const task = {
|
|
1831
|
+
id: randomUUID(),
|
|
1832
|
+
description: i.description,
|
|
1833
|
+
subagentId: i.subagentId,
|
|
1834
|
+
maxToolCalls: i.maxToolCalls,
|
|
1835
|
+
timeoutMs: i.timeoutMs
|
|
1836
|
+
};
|
|
1837
|
+
const taskId = await director.assign(task);
|
|
1838
|
+
return { taskId, subagentId: i.subagentId };
|
|
1839
|
+
}
|
|
1840
|
+
};
|
|
1841
|
+
}
|
|
1842
|
+
function makeAwaitTasksTool(director) {
|
|
1843
|
+
const inputSchema = {
|
|
1844
|
+
type: "object",
|
|
1845
|
+
properties: {
|
|
1846
|
+
taskIds: {
|
|
1847
|
+
type: "array",
|
|
1848
|
+
items: { type: "string" },
|
|
1849
|
+
description: "One or more task ids returned by `assign_task`. The call blocks until every id resolves."
|
|
1850
|
+
}
|
|
1851
|
+
},
|
|
1852
|
+
required: ["taskIds"]
|
|
1853
|
+
};
|
|
1854
|
+
return {
|
|
1855
|
+
name: "await_tasks",
|
|
1856
|
+
description: "Block until every named task completes. Returns the array of TaskResult \u2014 use this to gather subagent output before deciding the next step.",
|
|
1857
|
+
permission: "auto",
|
|
1858
|
+
mutating: false,
|
|
1859
|
+
inputSchema,
|
|
1860
|
+
async execute(input) {
|
|
1861
|
+
const i = input;
|
|
1862
|
+
const results = await director.awaitTasks(i.taskIds);
|
|
1863
|
+
return { results };
|
|
1864
|
+
}
|
|
1865
|
+
};
|
|
1866
|
+
}
|
|
1867
|
+
function makeAskTool(director) {
|
|
1868
|
+
const inputSchema = {
|
|
1869
|
+
type: "object",
|
|
1870
|
+
properties: {
|
|
1871
|
+
subagentId: {
|
|
1872
|
+
type: "string",
|
|
1873
|
+
description: "Subagent to ask. Must be a previously spawned id."
|
|
1874
|
+
},
|
|
1875
|
+
question: {
|
|
1876
|
+
type: "string",
|
|
1877
|
+
description: "The question or instruction. Sent as the bridge message payload."
|
|
1878
|
+
},
|
|
1879
|
+
timeoutMs: { type: "number", description: "Optional timeout in ms (default 30s)." }
|
|
1880
|
+
},
|
|
1881
|
+
required: ["subagentId", "question"]
|
|
1882
|
+
};
|
|
1883
|
+
return {
|
|
1884
|
+
name: "ask_subagent",
|
|
1885
|
+
description: "Synchronously ask a subagent a question. Blocks until the subagent replies via the bridge (or the timeout fires). Use this when you need a one-shot answer without spawning a fresh task.",
|
|
1886
|
+
permission: "auto",
|
|
1887
|
+
mutating: false,
|
|
1888
|
+
inputSchema,
|
|
1889
|
+
async execute(input) {
|
|
1890
|
+
const i = input;
|
|
1891
|
+
try {
|
|
1892
|
+
const answer = await director.ask(i.subagentId, { question: i.question }, i.timeoutMs);
|
|
1893
|
+
return { ok: true, answer };
|
|
1894
|
+
} catch (err) {
|
|
1895
|
+
return { ok: false, error: err instanceof Error ? err.message : String(err) };
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
};
|
|
1899
|
+
}
|
|
1900
|
+
function makeRollUpTool(director) {
|
|
1901
|
+
const inputSchema = {
|
|
1902
|
+
type: "object",
|
|
1903
|
+
properties: {
|
|
1904
|
+
taskIds: {
|
|
1905
|
+
type: "array",
|
|
1906
|
+
items: { type: "string" },
|
|
1907
|
+
description: "Completed task ids to aggregate. Pass the ids returned by previous `assign_task` calls."
|
|
1908
|
+
},
|
|
1909
|
+
style: {
|
|
1910
|
+
type: "string",
|
|
1911
|
+
enum: ["markdown", "json"],
|
|
1912
|
+
description: "Output flavor \u2014 markdown (default) for in-prompt summarization, json for structured downstream processing."
|
|
1913
|
+
}
|
|
1914
|
+
},
|
|
1915
|
+
required: ["taskIds"]
|
|
1916
|
+
};
|
|
1917
|
+
return {
|
|
1918
|
+
name: "roll_up",
|
|
1919
|
+
description: "Aggregate completed task results into a single formatted summary. Use this after `await_tasks` to fold subagent outputs back into the director's context before deciding the next step.",
|
|
1920
|
+
permission: "auto",
|
|
1921
|
+
mutating: false,
|
|
1922
|
+
inputSchema,
|
|
1923
|
+
async execute(input) {
|
|
1924
|
+
const i = input;
|
|
1925
|
+
const summary = director.rollUp(i.taskIds, i.style ?? "markdown");
|
|
1926
|
+
return { summary, count: i.taskIds.length };
|
|
1927
|
+
}
|
|
1928
|
+
};
|
|
1929
|
+
}
|
|
1930
|
+
function makeTerminateTool(director) {
|
|
1931
|
+
const inputSchema = {
|
|
1932
|
+
type: "object",
|
|
1933
|
+
properties: {
|
|
1934
|
+
subagentId: { type: "string", description: "Subagent to abort." }
|
|
1935
|
+
},
|
|
1936
|
+
required: ["subagentId"]
|
|
1937
|
+
};
|
|
1938
|
+
return {
|
|
1939
|
+
name: "terminate_subagent",
|
|
1940
|
+
description: 'Forcibly abort a subagent. Use sparingly \u2014 prefer waiting on the natural budget to expire. The current task (if any) ends with status "stopped".',
|
|
1941
|
+
permission: "auto",
|
|
1942
|
+
mutating: true,
|
|
1943
|
+
inputSchema,
|
|
1944
|
+
async execute(input) {
|
|
1945
|
+
const i = input;
|
|
1946
|
+
await director.terminate(i.subagentId);
|
|
1947
|
+
return { ok: true };
|
|
1948
|
+
}
|
|
1949
|
+
};
|
|
1950
|
+
}
|
|
1951
|
+
function makeFleetStatusTool(director) {
|
|
1952
|
+
return {
|
|
1953
|
+
name: "fleet_status",
|
|
1954
|
+
description: "Snapshot of the fleet \u2014 every subagent's current status, pending vs. completed task counts, and the running total iteration count. Cheap; call freely.",
|
|
1955
|
+
permission: "auto",
|
|
1956
|
+
mutating: false,
|
|
1957
|
+
inputSchema: { type: "object", properties: {}, required: [] },
|
|
1958
|
+
async execute() {
|
|
1959
|
+
return director.status();
|
|
1960
|
+
}
|
|
1961
|
+
};
|
|
1962
|
+
}
|
|
1963
|
+
function makeFleetUsageTool(director) {
|
|
1964
|
+
return {
|
|
1965
|
+
name: "fleet_usage",
|
|
1966
|
+
description: "Token + cost breakdown across the fleet, per-subagent and totals. Use this to reason about which workers to assign costly tasks to or when to wrap up to stay within budget.",
|
|
1967
|
+
permission: "auto",
|
|
1968
|
+
mutating: false,
|
|
1969
|
+
inputSchema: { type: "object", properties: {}, required: [] },
|
|
1970
|
+
async execute() {
|
|
1971
|
+
return director.snapshot();
|
|
1972
|
+
}
|
|
1973
|
+
};
|
|
1974
|
+
}
|
|
1975
|
+
function createDelegateTool(opts) {
|
|
1976
|
+
const defaultTimeoutMs = opts.defaultTimeoutMs ?? 4 * 60 * 60 * 1e3;
|
|
1977
|
+
const rosterIds = opts.roster ? Object.keys(opts.roster) : [];
|
|
1978
|
+
const inputSchema = {
|
|
1979
|
+
type: "object",
|
|
1980
|
+
properties: {
|
|
1981
|
+
task: {
|
|
1982
|
+
type: "string",
|
|
1983
|
+
description: "What the subagent should do \u2014 natural language, complete sentence(s). The subagent has its own tool slice, its own LLM call, and returns when its task is done."
|
|
1984
|
+
},
|
|
1985
|
+
role: {
|
|
1986
|
+
type: "string",
|
|
1987
|
+
description: rosterIds.length > 0 ? `Roster role (preferred). One of: ${rosterIds.join(", ")}. Picks a pre-tuned config (prompt, budgets, tools) for that role.` : "No roster is configured \u2014 pass `name` instead.",
|
|
1988
|
+
enum: rosterIds.length > 0 ? rosterIds : void 0
|
|
1989
|
+
},
|
|
1990
|
+
name: {
|
|
1991
|
+
type: "string",
|
|
1992
|
+
description: "Display name for the subagent when not using a roster role. Required when `role` is omitted."
|
|
1993
|
+
},
|
|
1994
|
+
provider: {
|
|
1995
|
+
type: "string",
|
|
1996
|
+
description: 'Provider id (e.g. "anthropic", "openai"). Defaults to the host provider when omitted.'
|
|
1997
|
+
},
|
|
1998
|
+
model: {
|
|
1999
|
+
type: "string",
|
|
2000
|
+
description: "Model id within the provider. Defaults to the host model when omitted."
|
|
2001
|
+
},
|
|
2002
|
+
systemPromptOverride: {
|
|
2003
|
+
type: "string",
|
|
2004
|
+
description: "Optional extra prompt text appended to the role baseline."
|
|
2005
|
+
},
|
|
2006
|
+
timeoutMs: {
|
|
2007
|
+
type: "number",
|
|
2008
|
+
description: `Wall-clock budget for this delegate in milliseconds. No hard cap \u2014 set as high as the task realistically needs (a monorepo audit can take hours, a single-file lint takes seconds). Default ${Math.round(defaultTimeoutMs / 1e3 / 60)} minutes.`
|
|
2009
|
+
},
|
|
2010
|
+
maxIterations: {
|
|
2011
|
+
type: "number",
|
|
2012
|
+
description: "Maximum LLM iterations the subagent may take. Unset = use the role/coordinator default. Raise this for tasks with many tool-think-tool cycles (deep code analysis, multi-file refactors)."
|
|
2013
|
+
},
|
|
2014
|
+
maxToolCalls: {
|
|
2015
|
+
type: "number",
|
|
2016
|
+
description: "Maximum number of tool invocations the subagent may make. Unset = use the role/coordinator default. Raise this for tasks that touch many files (large grep + read + report)."
|
|
2017
|
+
}
|
|
2018
|
+
},
|
|
2019
|
+
required: ["task"]
|
|
2020
|
+
};
|
|
2021
|
+
return {
|
|
2022
|
+
name: "delegate",
|
|
2023
|
+
description: "Hand a discrete piece of work to a dedicated subagent and wait for its result. The subagent has its own context, its own LLM call, and its own budget \u2014 use this when a task is self-contained, would otherwise blow up your context, or benefits from a specialized role (bug-hunter, security-scanner, refactor-planner, audit-log). YOU decide how big the budget is: pass `timeoutMs`, `maxIterations`, and `maxToolCalls` sized to the actual work. There is no hidden cap forcing a 3-minute / 80-iteration limit \u2014 if a monorepo audit needs 2 hours and 500 tool calls, ask for that. Call multiple delegates in parallel through the provider's parallel-tool-call surface to fan work out across roles.",
|
|
2024
|
+
usageHint: "Set `task` to a complete instruction. Either pick `role` from the roster or pass `name` + `provider` + `model`. For non-trivial work, also pass `timeoutMs` (the wall-clock budget you actually need), `maxIterations`, and `maxToolCalls` \u2014 defaults are intentionally generous (4 hours) but the right values depend on scope. Returns the subagent's `TaskResult` \u2014 including the textual `result`, iteration count, tool count, and duration. Auto-promotes the host into director mode on first call.",
|
|
2025
|
+
permission: "auto",
|
|
2026
|
+
mutating: false,
|
|
2027
|
+
inputSchema,
|
|
2028
|
+
async execute(input) {
|
|
2029
|
+
const i = input ?? {};
|
|
2030
|
+
if (typeof i.task !== "string" || !i.task.trim()) {
|
|
2031
|
+
return { ok: false, error: "`task` is required." };
|
|
2032
|
+
}
|
|
2033
|
+
let director = await opts.host.ensureDirector();
|
|
2034
|
+
if (!director) {
|
|
2035
|
+
director = await opts.host.promoteToDirector();
|
|
2036
|
+
}
|
|
2037
|
+
if (!director) {
|
|
2038
|
+
const reason = opts.host.getPromotionBlockReason?.();
|
|
2039
|
+
return {
|
|
2040
|
+
ok: false,
|
|
2041
|
+
error: reason ?? "Director could not be activated \u2014 multi-agent host already running in legacy non-director mode. Restart with `--director` for fleet support."
|
|
2042
|
+
};
|
|
2043
|
+
}
|
|
2044
|
+
const timeoutMs = i.timeoutMs ?? defaultTimeoutMs;
|
|
2045
|
+
let cfg;
|
|
2046
|
+
if (i.role) {
|
|
2047
|
+
const base = opts.roster?.[i.role];
|
|
2048
|
+
if (!base) {
|
|
2049
|
+
return {
|
|
2050
|
+
ok: false,
|
|
2051
|
+
error: `Unknown role "${i.role}". Available: ${rosterIds.join(", ") || "(no roster configured)"}.`
|
|
2052
|
+
};
|
|
2053
|
+
}
|
|
2054
|
+
cfg = { ...base };
|
|
2055
|
+
if (i.systemPromptOverride) cfg.systemPromptOverride = i.systemPromptOverride;
|
|
2056
|
+
if (i.provider) cfg.provider = i.provider;
|
|
2057
|
+
if (i.model) cfg.model = i.model;
|
|
2058
|
+
} else {
|
|
2059
|
+
if (!i.name) {
|
|
2060
|
+
return {
|
|
2061
|
+
ok: false,
|
|
2062
|
+
error: "Either `role` (from the roster) or `name` is required."
|
|
2063
|
+
};
|
|
2064
|
+
}
|
|
2065
|
+
cfg = {
|
|
2066
|
+
name: i.name,
|
|
2067
|
+
provider: i.provider,
|
|
2068
|
+
model: i.model,
|
|
2069
|
+
systemPromptOverride: i.systemPromptOverride
|
|
2070
|
+
};
|
|
2071
|
+
}
|
|
2072
|
+
if (typeof i.maxIterations === "number" && i.maxIterations > 0) {
|
|
2073
|
+
cfg.maxIterations = i.maxIterations;
|
|
2074
|
+
}
|
|
2075
|
+
if (typeof i.maxToolCalls === "number" && i.maxToolCalls > 0) {
|
|
2076
|
+
cfg.maxToolCalls = i.maxToolCalls;
|
|
2077
|
+
}
|
|
2078
|
+
const SUBAGENT_TIMEOUT_BUFFER_MS = 3e4;
|
|
2079
|
+
const desiredSubTimeout = Math.max(3e4, timeoutMs - SUBAGENT_TIMEOUT_BUFFER_MS);
|
|
2080
|
+
if (!cfg.timeoutMs || cfg.timeoutMs > desiredSubTimeout) {
|
|
2081
|
+
cfg.timeoutMs = desiredSubTimeout;
|
|
2082
|
+
}
|
|
2083
|
+
try {
|
|
2084
|
+
const subagentId = await director.spawn(cfg);
|
|
2085
|
+
const taskId = await director.assign({
|
|
2086
|
+
id: "",
|
|
2087
|
+
description: i.task,
|
|
2088
|
+
subagentId
|
|
2089
|
+
});
|
|
2090
|
+
const result = await Promise.race([
|
|
2091
|
+
director.awaitTasks([taskId]).then((r) => r[0]),
|
|
2092
|
+
new Promise(
|
|
2093
|
+
(resolve) => setTimeout(() => resolve({ __timeout: true }), timeoutMs)
|
|
2094
|
+
)
|
|
2095
|
+
]);
|
|
2096
|
+
if ("__timeout" in result) {
|
|
2097
|
+
const partial2 = await readSubagentPartial(opts, subagentId);
|
|
2098
|
+
return {
|
|
2099
|
+
ok: false,
|
|
2100
|
+
stopReason: "host_timeout",
|
|
2101
|
+
error: `Subagent did not finish within ${timeoutMs}ms.`,
|
|
2102
|
+
hint: "Reduce scope of the next delegate, raise timeoutMs, or use spawn_subagent + await_tasks for long-running work.",
|
|
2103
|
+
subagentId,
|
|
2104
|
+
taskId,
|
|
2105
|
+
partial: partial2
|
|
2106
|
+
};
|
|
2107
|
+
}
|
|
2108
|
+
const baseStopReason = result.status === "success" ? "end_turn" : result.status === "timeout" ? "subagent_timeout" : result.status === "stopped" ? "aborted" : "budget_exhausted";
|
|
2109
|
+
const partial = result.status === "success" ? void 0 : await readSubagentPartial(opts, subagentId);
|
|
2110
|
+
const errorKind = result.error?.kind;
|
|
2111
|
+
const retryable = result.error?.retryable;
|
|
2112
|
+
const backoffMs = result.error?.backoffMs;
|
|
2113
|
+
return {
|
|
2114
|
+
ok: result.status === "success",
|
|
2115
|
+
status: result.status,
|
|
2116
|
+
stopReason: baseStopReason,
|
|
2117
|
+
errorKind,
|
|
2118
|
+
retryable,
|
|
2119
|
+
backoffMs,
|
|
2120
|
+
subagentId: result.subagentId,
|
|
2121
|
+
taskId: result.taskId,
|
|
2122
|
+
result: result.result,
|
|
2123
|
+
error: result.error,
|
|
2124
|
+
iterations: result.iterations,
|
|
2125
|
+
toolCalls: result.toolCalls,
|
|
2126
|
+
durationMs: result.durationMs,
|
|
2127
|
+
...partial ? { partial } : {},
|
|
2128
|
+
...hintForKind(errorKind, retryable, backoffMs) ? { hint: hintForKind(errorKind, retryable, backoffMs) } : {}
|
|
2129
|
+
};
|
|
2130
|
+
} catch (err) {
|
|
2131
|
+
return {
|
|
2132
|
+
ok: false,
|
|
2133
|
+
stopReason: "error",
|
|
2134
|
+
error: err instanceof Error ? err.message : String(err)
|
|
2135
|
+
};
|
|
2136
|
+
}
|
|
2137
|
+
}
|
|
2138
|
+
};
|
|
2139
|
+
}
|
|
2140
|
+
function hintForKind(kind, retryable, backoffMs) {
|
|
2141
|
+
if (!kind) return void 0;
|
|
2142
|
+
switch (kind) {
|
|
2143
|
+
case "provider_rate_limit":
|
|
2144
|
+
return `Provider rate-limited. Retry safe after ${backoffMs ?? 5e3}ms backoff. Consider a smaller model or fewer parallel delegates.`;
|
|
2145
|
+
case "provider_5xx":
|
|
2146
|
+
return `Provider server error. Retry safe after ${backoffMs ?? 3e3}ms backoff \u2014 usually transient.`;
|
|
2147
|
+
case "provider_timeout":
|
|
2148
|
+
return "Provider network timeout. Retry safe; reduce input size if it persists.";
|
|
2149
|
+
case "provider_auth":
|
|
2150
|
+
return "Provider rejected credentials. Cannot retry \u2014 fix the API key / config and re-invoke.";
|
|
2151
|
+
case "context_overflow":
|
|
2152
|
+
return "Subagent context exceeded the model limit. Narrow the task, use a larger-context model, or split into multiple delegates.";
|
|
2153
|
+
case "budget_iterations":
|
|
2154
|
+
case "budget_tool_calls":
|
|
2155
|
+
case "budget_tokens":
|
|
2156
|
+
case "budget_cost":
|
|
2157
|
+
return "Subagent exhausted its budget. Raise the matching `max*` field on the next delegate or narrow task scope.";
|
|
2158
|
+
case "budget_timeout":
|
|
2159
|
+
return "Subagent hit its wall-clock budget. Raise `timeoutMs` on the next delegate or split the task.";
|
|
2160
|
+
case "aborted_by_parent":
|
|
2161
|
+
return "Subagent was aborted (user Ctrl+C, parent unwound, or sibling failure cascade). Not retryable until the abort condition is resolved.";
|
|
2162
|
+
case "empty_response":
|
|
2163
|
+
return "Subagent ended its turn with no text and no tool calls. Almost always a prompt / config issue \u2014 clarify the task or check the model.";
|
|
2164
|
+
case "tool_failed":
|
|
2165
|
+
return "A tool inside the subagent returned ok:false. Inspect `partial.lastAssistantText` for the agent reasoning, then retry with corrected inputs.";
|
|
2166
|
+
case "bridge_failed":
|
|
2167
|
+
return "Parent-child bridge transport failed. This is rare \u2014 restart the session and retry.";
|
|
2168
|
+
default:
|
|
2169
|
+
return retryable ? "Failure classified as retryable. Try again with the same input." : void 0;
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2172
|
+
async function readSubagentPartial(opts, subagentId) {
|
|
2173
|
+
if (!opts.sessionsRoot) return void 0;
|
|
2174
|
+
const candidates = [];
|
|
2175
|
+
if (opts.directorRunId) {
|
|
2176
|
+
candidates.push(path4.join(opts.sessionsRoot, opts.directorRunId, `${subagentId}.jsonl`));
|
|
2177
|
+
} else {
|
|
2178
|
+
try {
|
|
2179
|
+
const runDirs = await fsp4.readdir(opts.sessionsRoot);
|
|
2180
|
+
for (const r of runDirs) {
|
|
2181
|
+
candidates.push(path4.join(opts.sessionsRoot, r, `${subagentId}.jsonl`));
|
|
2182
|
+
}
|
|
2183
|
+
} catch {
|
|
2184
|
+
return void 0;
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
for (const file of candidates) {
|
|
2188
|
+
let raw;
|
|
2189
|
+
try {
|
|
2190
|
+
raw = await fsp4.readFile(file, "utf8");
|
|
2191
|
+
} catch {
|
|
2192
|
+
continue;
|
|
2193
|
+
}
|
|
2194
|
+
const lines = raw.split("\n").filter((l) => l.trim());
|
|
2195
|
+
let lastAssistantText;
|
|
2196
|
+
let lastStopReason;
|
|
2197
|
+
let toolUses = 0;
|
|
2198
|
+
for (const line of lines) {
|
|
2199
|
+
try {
|
|
2200
|
+
const ev = JSON.parse(line);
|
|
2201
|
+
if (ev.type === "tool_use") toolUses += 1;
|
|
2202
|
+
if (ev.type === "llm_response") {
|
|
2203
|
+
if (typeof ev.stopReason === "string") lastStopReason = ev.stopReason;
|
|
2204
|
+
if (Array.isArray(ev.content)) {
|
|
2205
|
+
const txt = ev.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("\n").trim();
|
|
2206
|
+
if (txt) lastAssistantText = txt;
|
|
2207
|
+
}
|
|
2208
|
+
}
|
|
2209
|
+
} catch {
|
|
2210
|
+
}
|
|
2211
|
+
}
|
|
2212
|
+
return {
|
|
2213
|
+
lastAssistantText,
|
|
2214
|
+
lastStopReason,
|
|
2215
|
+
toolUsesObserved: toolUses,
|
|
2216
|
+
events: lines.length
|
|
2217
|
+
};
|
|
2218
|
+
}
|
|
2219
|
+
return void 0;
|
|
2220
|
+
}
|
|
2221
|
+
|
|
2222
|
+
// src/coordination/agent-subagent-runner.ts
|
|
2223
|
+
function makeAgentSubagentRunner(opts) {
|
|
2224
|
+
const format = opts.formatTaskInput ?? defaultFormatTaskInput;
|
|
2225
|
+
return async (task, ctx) => {
|
|
2226
|
+
const factoryResult = await opts.factory(ctx.config);
|
|
2227
|
+
const { agent, events } = factoryResult;
|
|
2228
|
+
const detachFleet = opts.fleetBus?.attach(ctx.subagentId, events, task.id);
|
|
2229
|
+
const aborter = new AbortController();
|
|
2230
|
+
let budgetError = null;
|
|
2231
|
+
const onBudgetError = (err) => {
|
|
2232
|
+
aborter.abort();
|
|
2233
|
+
budgetError = err instanceof BudgetExceededError ? err : new BudgetExceededError(
|
|
2234
|
+
"tool_calls",
|
|
2235
|
+
0,
|
|
2236
|
+
0
|
|
2237
|
+
);
|
|
2238
|
+
if (budgetError !== err && err instanceof Error) {
|
|
2239
|
+
budgetError.message += ` (caused by: ${err.message})`;
|
|
2240
|
+
}
|
|
2241
|
+
};
|
|
2242
|
+
let lastToolFailed = null;
|
|
2243
|
+
const unsub = [];
|
|
2244
|
+
unsub.push(
|
|
2245
|
+
events.on("tool.executed", (e) => {
|
|
2246
|
+
try {
|
|
2247
|
+
ctx.budget.recordToolCall();
|
|
2248
|
+
} catch (eb) {
|
|
2249
|
+
onBudgetError(eb);
|
|
2250
|
+
}
|
|
2251
|
+
if (e.ok === false) {
|
|
2252
|
+
lastToolFailed = e.name;
|
|
2253
|
+
} else if (e.ok === true) {
|
|
2254
|
+
lastToolFailed = null;
|
|
2255
|
+
}
|
|
2256
|
+
}),
|
|
2257
|
+
events.on("provider.response", (e) => {
|
|
2258
|
+
try {
|
|
2259
|
+
ctx.budget.recordUsage(e.usage);
|
|
2260
|
+
} catch (e2) {
|
|
2261
|
+
onBudgetError(e2);
|
|
2262
|
+
}
|
|
2263
|
+
}),
|
|
2264
|
+
events.on("iteration.started", () => {
|
|
2265
|
+
try {
|
|
2266
|
+
ctx.budget.recordIteration();
|
|
2267
|
+
ctx.budget.checkTimeout();
|
|
2268
|
+
} catch (e) {
|
|
2269
|
+
onBudgetError(e);
|
|
2270
|
+
}
|
|
2271
|
+
}),
|
|
2272
|
+
// D3: cooperative timeout enforcement DURING a long tool call.
|
|
2273
|
+
// The iteration-loop checkTimeout() only fires between agent
|
|
2274
|
+
// iterations — a single `bash sleep 3600` call would otherwise
|
|
2275
|
+
// park inside one tool execution while the timeout silently
|
|
2276
|
+
// passes, relying solely on the coordinator's hard Promise.race
|
|
2277
|
+
// to interrupt. Tools that emit `tool.progress` (bash chunks,
|
|
2278
|
+
// fetch byte progress, spawn-stream stdout) give us a heartbeat
|
|
2279
|
+
// we can hang the check on. When the budget trips here:
|
|
2280
|
+
// 1. onBudgetError sets budgetError + aborter.abort()
|
|
2281
|
+
// 2. aborter signal propagates to agent.run → tool executor
|
|
2282
|
+
// 3. tool's own signal listener kills the child process
|
|
2283
|
+
// Cheap: O(1) per progress event, and the budget short-circuits
|
|
2284
|
+
// when timeoutMs is unset (most subagents have one set anyway).
|
|
2285
|
+
events.on("tool.progress", () => {
|
|
2286
|
+
try {
|
|
2287
|
+
ctx.budget.checkTimeout();
|
|
2288
|
+
} catch (e) {
|
|
2289
|
+
onBudgetError(e);
|
|
2290
|
+
}
|
|
2291
|
+
})
|
|
2292
|
+
);
|
|
2293
|
+
const onParentAbort = () => aborter.abort();
|
|
2294
|
+
ctx.signal.addEventListener("abort", onParentAbort);
|
|
2295
|
+
let result;
|
|
2296
|
+
try {
|
|
2297
|
+
result = await agent.run(format(task, ctx.config), { signal: aborter.signal });
|
|
2298
|
+
} finally {
|
|
2299
|
+
detachFleet?.();
|
|
2300
|
+
ctx.signal.removeEventListener("abort", onParentAbort);
|
|
2301
|
+
for (const u of unsub) u();
|
|
2302
|
+
if (factoryResult.dispose) {
|
|
2303
|
+
try {
|
|
2304
|
+
await factoryResult.dispose();
|
|
2305
|
+
} catch {
|
|
2306
|
+
}
|
|
2307
|
+
}
|
|
2308
|
+
}
|
|
2309
|
+
if (budgetError) throw budgetError;
|
|
2310
|
+
if (result.status === "failed") {
|
|
2311
|
+
throw result.error instanceof Error ? result.error : new Error(String(result.error ?? "agent failed"));
|
|
2312
|
+
}
|
|
2313
|
+
if (result.status === "aborted") {
|
|
2314
|
+
throw new Error("agent aborted");
|
|
2315
|
+
}
|
|
2316
|
+
if (result.status === "max_iterations") {
|
|
2317
|
+
throw new Error("agent exhausted iteration limit");
|
|
2318
|
+
}
|
|
2319
|
+
const usage = ctx.budget.usage();
|
|
2320
|
+
const finalText = (result.finalText ?? "").trim();
|
|
2321
|
+
if (finalText.length === 0 && usage.toolCalls === 0) {
|
|
2322
|
+
throw new Error("empty response");
|
|
2323
|
+
}
|
|
2324
|
+
if (finalText.length === 0 && lastToolFailed !== null) {
|
|
2325
|
+
throw new Error(`tool failed: ${lastToolFailed}`);
|
|
2326
|
+
}
|
|
2327
|
+
return {
|
|
2328
|
+
result: result.finalText,
|
|
2329
|
+
iterations: result.iterations,
|
|
2330
|
+
toolCalls: usage.toolCalls
|
|
2331
|
+
};
|
|
2332
|
+
};
|
|
2333
|
+
}
|
|
2334
|
+
function defaultFormatTaskInput(task) {
|
|
2335
|
+
return task.description ?? "";
|
|
2336
|
+
}
|
|
2337
|
+
var DefaultSessionStore = class {
|
|
2338
|
+
dir;
|
|
2339
|
+
events;
|
|
2340
|
+
constructor(opts) {
|
|
2341
|
+
this.dir = opts.dir;
|
|
2342
|
+
this.events = opts.events;
|
|
2343
|
+
}
|
|
2344
|
+
async create(meta) {
|
|
2345
|
+
await ensureDir(this.dir);
|
|
2346
|
+
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
2347
|
+
const id = meta.id ?? `${startedAt.replace(/[:.]/g, "-")}-${randomBytes(2).toString("hex")}`;
|
|
2348
|
+
const file = path4.join(this.dir, `${id}.jsonl`);
|
|
2349
|
+
let handle;
|
|
2350
|
+
try {
|
|
2351
|
+
handle = await fsp4.open(file, "a", 384);
|
|
2352
|
+
} catch (err) {
|
|
2353
|
+
throw new Error(
|
|
2354
|
+
`Failed to open session file: ${err instanceof Error ? err.message : String(err)}`,
|
|
2355
|
+
{
|
|
2356
|
+
cause: err
|
|
2357
|
+
}
|
|
2358
|
+
);
|
|
2359
|
+
}
|
|
2360
|
+
try {
|
|
2361
|
+
return new FileSessionWriter(id, handle, startedAt, meta, { dir: this.dir, filePath: file });
|
|
2362
|
+
} catch (err) {
|
|
2363
|
+
await handle.close().catch(() => {
|
|
2364
|
+
});
|
|
2365
|
+
throw err;
|
|
2366
|
+
}
|
|
2367
|
+
}
|
|
2368
|
+
async resume(id) {
|
|
2369
|
+
const data = await this.load(id);
|
|
2370
|
+
const file = path4.join(this.dir, `${id}.jsonl`);
|
|
2371
|
+
let handle;
|
|
2372
|
+
try {
|
|
2373
|
+
handle = await fsp4.open(file, "a", 384);
|
|
2374
|
+
} catch (err) {
|
|
2375
|
+
throw new Error(
|
|
2376
|
+
`Failed to open session "${id}" for append: ${err instanceof Error ? err.message : String(err)}`,
|
|
2377
|
+
{ cause: err }
|
|
2378
|
+
);
|
|
2379
|
+
}
|
|
2380
|
+
const writer = new FileSessionWriter(
|
|
2381
|
+
id,
|
|
2382
|
+
handle,
|
|
2383
|
+
(/* @__PURE__ */ new Date()).toISOString(),
|
|
2384
|
+
{
|
|
2385
|
+
id,
|
|
2386
|
+
model: data.metadata.model,
|
|
2387
|
+
provider: data.metadata.provider
|
|
2388
|
+
},
|
|
2389
|
+
{ resumed: true, dir: this.dir, filePath: file }
|
|
2390
|
+
);
|
|
2391
|
+
return { writer, data };
|
|
2392
|
+
}
|
|
2393
|
+
async load(id) {
|
|
2394
|
+
const file = path4.join(this.dir, `${id}.jsonl`);
|
|
2395
|
+
const raw = await fsp4.readFile(file, "utf8");
|
|
2396
|
+
const lines = raw.split("\n").filter((l) => l.trim());
|
|
2397
|
+
const events = [];
|
|
2398
|
+
for (const line of lines) {
|
|
2399
|
+
try {
|
|
2400
|
+
const parsed = JSON.parse(line);
|
|
2401
|
+
if (parsed !== null && typeof parsed === "object" && typeof parsed.type === "string" && typeof parsed.ts === "string") {
|
|
2402
|
+
events.push(parsed);
|
|
2403
|
+
}
|
|
2404
|
+
} catch {
|
|
2405
|
+
}
|
|
2406
|
+
}
|
|
2407
|
+
const meta = this.metaFromEvents(id, events);
|
|
2408
|
+
const { messages, usage } = this.replay(events, id);
|
|
2409
|
+
return { metadata: meta, events, messages, usage };
|
|
2410
|
+
}
|
|
2411
|
+
async list(limit = 20) {
|
|
2412
|
+
try {
|
|
2413
|
+
await ensureDir(this.dir);
|
|
2414
|
+
const files = await fsp4.readdir(this.dir);
|
|
2415
|
+
const ids = files.filter((f) => f.endsWith(".jsonl")).map((f) => f.replace(/\.jsonl$/, ""));
|
|
2416
|
+
const sessions = await Promise.all(ids.map((id) => this.summaryFor(id).catch(() => null)));
|
|
2417
|
+
const out = sessions.filter((s) => s !== null);
|
|
2418
|
+
out.sort((a, b) => {
|
|
2419
|
+
if (a.startedAt < b.startedAt) return 1;
|
|
2420
|
+
if (a.startedAt > b.startedAt) return -1;
|
|
2421
|
+
return a.id.localeCompare(b.id);
|
|
2422
|
+
});
|
|
2423
|
+
return out.slice(0, limit);
|
|
2424
|
+
} catch {
|
|
2425
|
+
return [];
|
|
2426
|
+
}
|
|
2427
|
+
}
|
|
2428
|
+
async summaryFor(id) {
|
|
2429
|
+
const manifest = path4.join(this.dir, `${id}.summary.json`);
|
|
2430
|
+
try {
|
|
2431
|
+
const raw = await fsp4.readFile(manifest, "utf8");
|
|
2432
|
+
return JSON.parse(raw);
|
|
2433
|
+
} catch {
|
|
2434
|
+
const full = path4.join(this.dir, `${id}.jsonl`);
|
|
2435
|
+
const stat3 = await fsp4.stat(full);
|
|
2436
|
+
const summary = await this.summarize(id, stat3.mtime.toISOString());
|
|
2437
|
+
await fsp4.writeFile(manifest, JSON.stringify(summary), { mode: 384 }).catch((err) => {
|
|
2438
|
+
console.warn(
|
|
2439
|
+
`[session-store] Failed to write manifest for "${id}":`,
|
|
2440
|
+
err instanceof Error ? err.message : String(err)
|
|
2441
|
+
);
|
|
2442
|
+
});
|
|
2443
|
+
return summary;
|
|
2444
|
+
}
|
|
2445
|
+
}
|
|
2446
|
+
async delete(id) {
|
|
2447
|
+
await fsp4.unlink(path4.join(this.dir, `${id}.jsonl`));
|
|
2448
|
+
await fsp4.unlink(path4.join(this.dir, `${id}.summary.json`)).catch(() => void 0);
|
|
2449
|
+
}
|
|
2450
|
+
async summarize(id, mtime) {
|
|
2451
|
+
try {
|
|
2452
|
+
const data = await this.load(id);
|
|
2453
|
+
const firstUser = data.events.find((e) => e.type === "user_input");
|
|
2454
|
+
const title = firstUser && firstUser.type === "user_input" ? userInputTitle(firstUser.content) : "(empty session)";
|
|
2455
|
+
return {
|
|
2456
|
+
id,
|
|
2457
|
+
title,
|
|
2458
|
+
startedAt: data.metadata.startedAt,
|
|
2459
|
+
model: data.metadata.model ?? "unknown",
|
|
2460
|
+
provider: data.metadata.provider ?? "unknown",
|
|
2461
|
+
tokenTotal: data.usage.input + data.usage.output
|
|
2462
|
+
};
|
|
2463
|
+
} catch {
|
|
2464
|
+
return {
|
|
2465
|
+
id,
|
|
2466
|
+
title: "(damaged)",
|
|
2467
|
+
startedAt: mtime,
|
|
2468
|
+
model: "unknown",
|
|
2469
|
+
provider: "unknown",
|
|
2470
|
+
tokenTotal: 0
|
|
2471
|
+
};
|
|
2472
|
+
}
|
|
2473
|
+
}
|
|
2474
|
+
metaFromEvents(id, events) {
|
|
2475
|
+
const start = events.find((e) => e.type === "session_start");
|
|
2476
|
+
const end = events.find((e) => e.type === "session_end");
|
|
2477
|
+
return {
|
|
2478
|
+
id,
|
|
2479
|
+
startedAt: start?.ts ?? (/* @__PURE__ */ new Date(0)).toISOString(),
|
|
2480
|
+
endedAt: end?.ts,
|
|
2481
|
+
model: start?.model,
|
|
2482
|
+
provider: start?.provider
|
|
2483
|
+
};
|
|
2484
|
+
}
|
|
2485
|
+
replay(events, sessionId = "unknown") {
|
|
2486
|
+
const messages = [];
|
|
2487
|
+
let usage = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
|
|
2488
|
+
const openToolUses = /* @__PURE__ */ new Set();
|
|
2489
|
+
for (const e of events) {
|
|
2490
|
+
if (e.type === "user_input") {
|
|
2491
|
+
openToolUses.clear();
|
|
2492
|
+
messages.push({ role: "user", content: e.content });
|
|
2493
|
+
} else if (e.type === "llm_response") {
|
|
2494
|
+
messages.push({ role: "assistant", content: e.content });
|
|
2495
|
+
for (const b of e.content) {
|
|
2496
|
+
if (b.type === "tool_use") openToolUses.add(b.id);
|
|
2497
|
+
}
|
|
2498
|
+
usage = {
|
|
2499
|
+
input: usage.input + (e.usage.input ?? 0),
|
|
2500
|
+
output: usage.output + (e.usage.output ?? 0),
|
|
2501
|
+
cacheRead: (usage.cacheRead ?? 0) + (e.usage.cacheRead ?? 0),
|
|
2502
|
+
cacheWrite: (usage.cacheWrite ?? 0) + (e.usage.cacheWrite ?? 0)
|
|
2503
|
+
};
|
|
2504
|
+
} else if (e.type === "tool_result") {
|
|
2505
|
+
if (!openToolUses.has(e.id)) {
|
|
2506
|
+
this.events?.emit("session.damaged", {
|
|
2507
|
+
sessionId,
|
|
2508
|
+
detail: `Orphan tool_result "${e.id}" has no matching tool_use`
|
|
2509
|
+
});
|
|
2510
|
+
continue;
|
|
2511
|
+
}
|
|
2512
|
+
openToolUses.delete(e.id);
|
|
2513
|
+
const content = [
|
|
2514
|
+
{
|
|
2515
|
+
type: "tool_result",
|
|
2516
|
+
tool_use_id: e.id,
|
|
2517
|
+
content: typeof e.content === "string" ? e.content : JSON.stringify(e.content),
|
|
2518
|
+
is_error: e.isError
|
|
2519
|
+
}
|
|
2520
|
+
];
|
|
2521
|
+
const last = messages[messages.length - 1];
|
|
2522
|
+
if (last && last.role === "user") {
|
|
2523
|
+
if (Array.isArray(last.content)) {
|
|
2524
|
+
last.content.push(...content);
|
|
2525
|
+
} else if (typeof last.content === "string") {
|
|
2526
|
+
last.content = [{ type: "text", text: last.content }, ...content];
|
|
2527
|
+
} else {
|
|
2528
|
+
messages.push({ role: "user", content });
|
|
2529
|
+
}
|
|
2530
|
+
} else {
|
|
2531
|
+
messages.push({ role: "user", content });
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2534
|
+
}
|
|
2535
|
+
if (openToolUses.size > 0) {
|
|
2536
|
+
this.events?.emit("session.damaged", {
|
|
2537
|
+
sessionId,
|
|
2538
|
+
detail: `${openToolUses.size} tool_use blocks without matching results \u2014 replay truncated`
|
|
2539
|
+
});
|
|
2540
|
+
return { messages, usage };
|
|
2541
|
+
}
|
|
2542
|
+
return { messages, usage };
|
|
2543
|
+
}
|
|
2544
|
+
};
|
|
2545
|
+
var FileSessionWriter = class {
|
|
2546
|
+
constructor(id, handle, startedAt, meta, opts = {}) {
|
|
2547
|
+
this.id = id;
|
|
2548
|
+
this.handle = handle;
|
|
2549
|
+
this.startedAt = startedAt;
|
|
2550
|
+
this.meta = meta;
|
|
2551
|
+
this.resumed = opts.resumed ?? false;
|
|
2552
|
+
this.manifestFile = opts.dir ? path4.join(opts.dir, `${id}.summary.json`) : "";
|
|
2553
|
+
this.filePath = opts.filePath ?? "";
|
|
2554
|
+
this.summary = {
|
|
2555
|
+
id,
|
|
2556
|
+
title: "(empty session)",
|
|
2557
|
+
startedAt,
|
|
2558
|
+
model: meta.model ?? "unknown",
|
|
2559
|
+
provider: meta.provider ?? "unknown",
|
|
2560
|
+
tokenTotal: 0
|
|
2561
|
+
};
|
|
2562
|
+
}
|
|
2563
|
+
id;
|
|
2564
|
+
handle;
|
|
2565
|
+
startedAt;
|
|
2566
|
+
meta;
|
|
2567
|
+
closed = false;
|
|
2568
|
+
manifestFile;
|
|
2569
|
+
summary;
|
|
2570
|
+
tokenIn = 0;
|
|
2571
|
+
tokenOut = 0;
|
|
2572
|
+
filePath;
|
|
2573
|
+
/** Public accessor for the JSONL path — required by SessionWriter so
|
|
2574
|
+
* observability surfaces (`/fleet log`, FleetPanel) can locate the
|
|
2575
|
+
* transcript without recomputing the path from session metadata. */
|
|
2576
|
+
get transcriptPath() {
|
|
2577
|
+
return this.filePath || void 0;
|
|
2578
|
+
}
|
|
2579
|
+
initDone = false;
|
|
2580
|
+
resumed;
|
|
2581
|
+
appendFailCount = 0;
|
|
2582
|
+
lastAppendWarnAt = 0;
|
|
2583
|
+
async writeSessionStart() {
|
|
2584
|
+
if (this.initDone || this.closed) return;
|
|
2585
|
+
this.initDone = true;
|
|
2586
|
+
const record = `${JSON.stringify({
|
|
2587
|
+
type: this.resumed ? "session_resumed" : "session_start",
|
|
2588
|
+
ts: this.startedAt,
|
|
2589
|
+
id: this.id,
|
|
2590
|
+
model: this.meta.model ?? "unknown",
|
|
2591
|
+
provider: this.meta.provider ?? "unknown"
|
|
2592
|
+
})}
|
|
2593
|
+
`;
|
|
2594
|
+
try {
|
|
2595
|
+
if (this.filePath) {
|
|
2596
|
+
await fsp4.writeFile(this.filePath, record, { flag: "a", mode: 384 });
|
|
2597
|
+
}
|
|
2598
|
+
} catch {
|
|
2599
|
+
}
|
|
2600
|
+
}
|
|
2601
|
+
async append(event) {
|
|
2602
|
+
if (this.closed) return;
|
|
2603
|
+
if (!this.initDone) {
|
|
2604
|
+
await this.writeSessionStart();
|
|
2605
|
+
}
|
|
2606
|
+
this.observeForSummary(event);
|
|
2607
|
+
try {
|
|
2608
|
+
await this.handle.appendFile(`${JSON.stringify(event)}
|
|
2609
|
+
`, "utf8");
|
|
2610
|
+
} catch (err) {
|
|
2611
|
+
this.appendFailCount++;
|
|
2612
|
+
const now = Date.now();
|
|
2613
|
+
if (now - this.lastAppendWarnAt > 5e3) {
|
|
2614
|
+
const suppressed = this.appendFailCount - 1;
|
|
2615
|
+
const tail = suppressed > 0 ? ` (+${suppressed} suppressed)` : "";
|
|
2616
|
+
console.warn(
|
|
2617
|
+
"[session] append failed:",
|
|
2618
|
+
err instanceof Error ? err.message : String(err),
|
|
2619
|
+
tail
|
|
2620
|
+
);
|
|
2621
|
+
this.lastAppendWarnAt = now;
|
|
2622
|
+
this.appendFailCount = 0;
|
|
2623
|
+
}
|
|
2624
|
+
}
|
|
2625
|
+
}
|
|
2626
|
+
/**
|
|
2627
|
+
* Watch events as they're appended and keep the summary state hot, so
|
|
2628
|
+
* `close()` can flush a `<id>.summary.json` manifest without re-reading
|
|
2629
|
+
* the JSONL. `list()` reads only manifests, turning a per-session full
|
|
2630
|
+
* parse into a single stat+read.
|
|
2631
|
+
*/
|
|
2632
|
+
observeForSummary(event) {
|
|
2633
|
+
if (event.type === "user_input" && this.summary.title === "(empty session)") {
|
|
2634
|
+
this.summary = { ...this.summary, title: userInputTitle(event.content) };
|
|
2635
|
+
} else if (event.type === "llm_response") {
|
|
2636
|
+
this.tokenIn += event.usage.input;
|
|
2637
|
+
this.tokenOut += event.usage.output;
|
|
2638
|
+
this.summary = { ...this.summary, tokenTotal: this.tokenIn + this.tokenOut };
|
|
2639
|
+
} else if (event.type === "session_end") {
|
|
2640
|
+
const total = event.usage.input + event.usage.output;
|
|
2641
|
+
if (total > 0) this.summary = { ...this.summary, tokenTotal: total };
|
|
2642
|
+
}
|
|
2643
|
+
}
|
|
2644
|
+
async close() {
|
|
2645
|
+
if (this.closed) return;
|
|
2646
|
+
this.closed = true;
|
|
2647
|
+
if (this.manifestFile) {
|
|
2648
|
+
try {
|
|
2649
|
+
await fsp4.writeFile(this.manifestFile, JSON.stringify(this.summary), { mode: 384 });
|
|
2650
|
+
} catch {
|
|
2651
|
+
}
|
|
2652
|
+
}
|
|
2653
|
+
try {
|
|
2654
|
+
await this.handle.close();
|
|
2655
|
+
} catch {
|
|
2656
|
+
}
|
|
2657
|
+
}
|
|
2658
|
+
};
|
|
2659
|
+
function userInputTitle(content) {
|
|
2660
|
+
if (typeof content === "string") return content.slice(0, 60);
|
|
2661
|
+
const text = content.filter((b) => b.type === "text").map((b) => b.text).join(" ");
|
|
2662
|
+
return (text || "(non-text input)").slice(0, 60);
|
|
2663
|
+
}
|
|
2664
|
+
|
|
2665
|
+
// src/coordination/director-session.ts
|
|
2666
|
+
function makeDirectorSessionFactory(opts) {
|
|
2667
|
+
const runId = opts.directorRunId ?? `${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}-director`;
|
|
2668
|
+
let store;
|
|
2669
|
+
let dir;
|
|
2670
|
+
if (opts.store) {
|
|
2671
|
+
store = opts.store;
|
|
2672
|
+
dir = opts.sessionsRoot ? path4.join(opts.sessionsRoot, runId) : "(caller-managed)";
|
|
2673
|
+
} else if (opts.sessionsRoot) {
|
|
2674
|
+
dir = path4.join(opts.sessionsRoot, runId);
|
|
2675
|
+
store = new DefaultSessionStore({ dir });
|
|
2676
|
+
} else {
|
|
2677
|
+
throw new Error("makeDirectorSessionFactory requires either `store` or `sessionsRoot`");
|
|
2678
|
+
}
|
|
2679
|
+
return {
|
|
2680
|
+
dir,
|
|
2681
|
+
directorRunId: runId,
|
|
2682
|
+
async createSubagentSession({ subagentId, provider, model, title }) {
|
|
2683
|
+
return store.create({
|
|
2684
|
+
id: subagentId,
|
|
2685
|
+
title: title ?? subagentId,
|
|
2686
|
+
provider: provider ?? "unknown",
|
|
2687
|
+
model: model ?? "unknown"
|
|
2688
|
+
});
|
|
2689
|
+
}
|
|
2690
|
+
};
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2693
|
+
// src/coordination/fleet.ts
|
|
2694
|
+
var AUDIT_LOG_AGENT = {
|
|
2695
|
+
id: "audit-log",
|
|
2696
|
+
name: "Audit Log",
|
|
2697
|
+
role: "audit-log",
|
|
2698
|
+
prompt: `You are the Audit Log agent. Your job is to analyze structured JSONL
|
|
2699
|
+
session logs and produce actionable markdown reports.
|
|
2700
|
+
|
|
2701
|
+
Scope:
|
|
2702
|
+
- Parse session logs (iteration counts, tool calls, errors, usage)
|
|
2703
|
+
- Detect repeated failure patterns across multiple runs
|
|
2704
|
+
- Identify tool usage anomalies (over-use, failures, unexpected chains)
|
|
2705
|
+
- Track token consumption trends
|
|
2706
|
+
- Generate structured audit reports with severity ratings
|
|
2707
|
+
|
|
2708
|
+
Input format you accept:
|
|
2709
|
+
{ "task": "analyze | report | trends", "sessionPath": "<path>", "focus": "errors | tools | usage | all" }
|
|
2710
|
+
|
|
2711
|
+
Output: Markdown audit report with sections:
|
|
2712
|
+
- ## Summary (totals, error rate)
|
|
2713
|
+
- ## Top Errors (count + context)
|
|
2714
|
+
- ## Tool Usage (table with calls, failures, avg duration)
|
|
2715
|
+
- ## Anomalies (pattern \u2192 severity)
|
|
2716
|
+
|
|
2717
|
+
Working rules:
|
|
2718
|
+
- Never fabricate numbers \u2014 read the actual logs first
|
|
2719
|
+
- Always include file:line references for errors
|
|
2720
|
+
- If sessionPath is missing, ask the director to provide it
|
|
2721
|
+
- Report confidence level: high (>90% accuracy), medium, low`
|
|
2722
|
+
// No hardcoded budgets — the orchestrator (delegate tool or
|
|
2723
|
+
// spawn_subagent) decides per-task how much room a subagent gets.
|
|
2724
|
+
// A monorepo audit needs hours; a single-file lint check needs
|
|
2725
|
+
// seconds. Pinning a number here forces the orchestrator to fight
|
|
2726
|
+
// the role's default instead of just asking for what it needs.
|
|
2727
|
+
};
|
|
2728
|
+
var BUG_HUNTER_AGENT = {
|
|
2729
|
+
id: "bug-hunter",
|
|
2730
|
+
name: "Bug Hunter",
|
|
2731
|
+
role: "bug-hunter",
|
|
2732
|
+
prompt: `You are the Bug Hunter agent. Your job is to systematically scan
|
|
2733
|
+
source code for bugs, anti-patterns, and code smells using pattern matching
|
|
2734
|
+
and heuristics. Output a prioritized hit list with file:line references.
|
|
2735
|
+
|
|
2736
|
+
Scope:
|
|
2737
|
+
- Detect common bug patterns (uncaught errors, resource leaks, race conditions)
|
|
2738
|
+
- Identify anti-patterns (callback hell, God objects, circular deps)
|
|
2739
|
+
- Find TypeScript-specific issues (unsafe any, missing null checks, branded types)
|
|
2740
|
+
- Flag security-sensitive constructs (eval, innerHTML, hardcoded secrets)
|
|
2741
|
+
- Rank findings: critical > high > medium > low
|
|
2742
|
+
|
|
2743
|
+
Input format you accept:
|
|
2744
|
+
{ "task": "scan | hunt | check", "paths": ["src/**/*.ts"], "focus": "bugs | patterns | security | all", "severityThreshold": "medium" }
|
|
2745
|
+
|
|
2746
|
+
Output: Markdown bug hunt report:
|
|
2747
|
+
- ## Critical (must fix first)
|
|
2748
|
+
- ## High (should fix)
|
|
2749
|
+
- ## Medium
|
|
2750
|
+
- ## Low (consider)
|
|
2751
|
+
Each entry: **[TYPE]** \`file:line\` \u2014 description + suggested fix
|
|
2752
|
+
|
|
2753
|
+
Bug pattern reference you know:
|
|
2754
|
+
| Pattern | Regex hint | Severity |
|
|
2755
|
+
|---------|------------|----------|
|
|
2756
|
+
| Uncaught promise | /.then\\(.*\\)/ without catch | high |
|
|
2757
|
+
| Event leak | on\\( without off/removeListener | high |
|
|
2758
|
+
| Hardcoded secret | [a-zA-Z0-9/_-]{20,} in config files | critical |
|
|
2759
|
+
| unsafe any | : any\\b or <any> | medium |
|
|
2760
|
+
| innerHTML | innerHTML\\s*= | high |
|
|
2761
|
+
|
|
2762
|
+
Working rules:
|
|
2763
|
+
- Never scan node_modules \u2014 it's noise
|
|
2764
|
+
- Always include file:line for every finding
|
|
2765
|
+
- If >30% of findings are false positives, note the confidence level
|
|
2766
|
+
- Ask director for clarification if paths are ambiguous`
|
|
2767
|
+
// Budgets are set by the orchestrator per task — see fleet.ts header.
|
|
2768
|
+
};
|
|
2769
|
+
var REFACTOR_PLANNER_AGENT = {
|
|
2770
|
+
id: "refactor-planner",
|
|
2771
|
+
name: "Refactor Planner",
|
|
2772
|
+
role: "refactor-planner",
|
|
2773
|
+
prompt: `You are the Refactor Planner agent. Your job is to analyze code
|
|
2774
|
+
structure and produce a concrete, phased refactoring plan with risk
|
|
2775
|
+
assessment, dependency ordering, and rollback strategy.
|
|
2776
|
+
|
|
2777
|
+
Scope:
|
|
2778
|
+
- Map module-level dependencies (import graph)
|
|
2779
|
+
- Identify coupling hotspots (high fan-in/out modules)
|
|
2780
|
+
- Assess refactoring risk by complexity and test coverage
|
|
2781
|
+
- Generate phased plans with checkpoint milestones
|
|
2782
|
+
- Produce diff-friendly task lists (one task = one concern)
|
|
2783
|
+
|
|
2784
|
+
Input format you accept:
|
|
2785
|
+
{ "task": "plan | assess | roadmap", "target": "src/core", "constraint": "no-breaking-changes | minimal-downtime | full-rewrite", "focus": "architecture | performance | maintainability" }
|
|
2786
|
+
|
|
2787
|
+
Output: Markdown refactor plan:
|
|
2788
|
+
- ## Phase 1: Low Risk / High Payoff (do first)
|
|
2789
|
+
Table: | # | Task | Module | Risk | Est. Time |
|
|
2790
|
+
- ## Phase 2: Medium Risk
|
|
2791
|
+
- ## Phase 3: High Risk (requires full regression)
|
|
2792
|
+
- ## Dependency Graph (abbreviated ASCII)
|
|
2793
|
+
- ## Rollback Strategy
|
|
2794
|
+
- ## Exit Criteria (checkbox list)
|
|
2795
|
+
|
|
2796
|
+
Risk scoring criteria:
|
|
2797
|
+
| Factor | Low | Medium | High |
|
|
2798
|
+
|--------|-----|--------|------|
|
|
2799
|
+
| Cyclomatic complexity | <10 | 10-20 | >20 |
|
|
2800
|
+
| Test coverage | >80% | 50-80% | <50% |
|
|
2801
|
+
| Fan-out (imports) | <5 | 5-15 | >15 |
|
|
2802
|
+
|
|
2803
|
+
Working rules:
|
|
2804
|
+
- Always include rollback strategy \u2014 every refactor can fail
|
|
2805
|
+
- Merge tasks that take <1h into a single phase
|
|
2806
|
+
- Respect team constraints (reviewer availability, parallelization)
|
|
2807
|
+
- Never plan without analyzing the actual code first`
|
|
2808
|
+
// Budgets are set by the orchestrator per task — see fleet.ts header.
|
|
2809
|
+
};
|
|
2810
|
+
var SECURITY_SCANNER_AGENT = {
|
|
2811
|
+
id: "security-scanner",
|
|
2812
|
+
name: "Security Scanner",
|
|
2813
|
+
role: "security-scanner",
|
|
2814
|
+
prompt: `You are the Security Scanner agent. Your job is to scan code,
|
|
2815
|
+
configs, and dependencies for security issues from hardcoded secrets to
|
|
2816
|
+
supply chain risks.
|
|
2817
|
+
|
|
2818
|
+
Scope:
|
|
2819
|
+
- Detect hardcoded secrets: API keys, tokens, passwords, private keys
|
|
2820
|
+
- Find injection vectors: eval, innerHTML, SQL concat, shell injection
|
|
2821
|
+
- Identify insecure patterns: weak crypto, hardcoded IVs, disabled TLS
|
|
2822
|
+
- Scan dependencies for known CVEs (via npm/pnpm audit)
|
|
2823
|
+
- Flag supply chain risks: postinstall hooks, unverified scripts, .npmrc
|
|
2824
|
+
|
|
2825
|
+
Input format you accept:
|
|
2826
|
+
{ "task": "scan | audit | secrets | dependencies", "paths": ["src", "config"], "depth": "quick | normal | deep" }
|
|
2827
|
+
|
|
2828
|
+
Output: Markdown security report:
|
|
2829
|
+
- ## CRITICAL: Secrets Found (with code snippets)
|
|
2830
|
+
- ## HIGH: Injection Vectors
|
|
2831
|
+
- ## MEDIUM: Insecure Patterns
|
|
2832
|
+
- ## Dependency Issues (CVE list)
|
|
2833
|
+
- ## Summary table (severity \u2192 count)
|
|
2834
|
+
- ## Remediation Checklist (with checkboxes)
|
|
2835
|
+
|
|
2836
|
+
Secret patterns you detect:
|
|
2837
|
+
| Pattern | Example | Severity |
|
|
2838
|
+
|---------|---------|----------|
|
|
2839
|
+
| AWS Access Key | AKIAIOSFODNN7EXAMPLE | critical |
|
|
2840
|
+
| AWS Secret Key | [a-zA-Z0-9/+=]{40} base64 | critical |
|
|
2841
|
+
| GitHub Token | ghp_[a-zA-Z0-9]{36} | critical |
|
|
2842
|
+
| Private Key PEM | -----BEGIN.*PRIVATE KEY----- | critical |
|
|
2843
|
+
| JWT | eyJ[a-zA-Z0-9_-]+ | high |
|
|
2844
|
+
|
|
2845
|
+
Injection patterns:
|
|
2846
|
+
| Construct | Safe alternative |
|
|
2847
|
+
|-----------|-----------------|
|
|
2848
|
+
| eval(str) | new Function() or parse |
|
|
2849
|
+
| innerHTML = x | textContent or sanitize |
|
|
2850
|
+
| exec(\`cmd \${x}\`) | execFile with args array |
|
|
2851
|
+
|
|
2852
|
+
Working rules:
|
|
2853
|
+
- Never scan node_modules \u2014 use npm audit instead
|
|
2854
|
+
- Always provide remediation steps, not just findings
|
|
2855
|
+
- Verify regex-based secrets before flagging (false positive risk)
|
|
2856
|
+
- When in doubt, flag as medium rather than ignoring potential issues`
|
|
2857
|
+
// Budgets are set by the orchestrator per task — see fleet.ts header.
|
|
2858
|
+
};
|
|
2859
|
+
var FLEET_ROSTER = {
|
|
2860
|
+
"audit-log": AUDIT_LOG_AGENT,
|
|
2861
|
+
"bug-hunter": BUG_HUNTER_AGENT,
|
|
2862
|
+
"refactor-planner": REFACTOR_PLANNER_AGENT,
|
|
2863
|
+
"security-scanner": SECURITY_SCANNER_AGENT
|
|
2864
|
+
};
|
|
2865
|
+
var ALL_FLEET_AGENTS = Object.values(FLEET_ROSTER);
|
|
2866
|
+
|
|
2867
|
+
export { ALL_FLEET_AGENTS, AUDIT_LOG_AGENT, BUG_HUNTER_AGENT, BudgetExceededError, DEFAULT_DIRECTOR_PREAMBLE, DEFAULT_SUBAGENT_BASELINE, DefaultMultiAgentCoordinator, Director, DirectorBudgetError, FLEET_ROSTER, FleetBus, FleetUsageAggregator, InMemoryAgentBridge, InMemoryBridgeTransport, REFACTOR_PLANNER_AGENT, SECURITY_SCANNER_AGENT, SubagentBudget, composeDirectorPrompt, composeSubagentPrompt, createDelegateTool, createMessage, makeAgentSubagentRunner, makeDirectorSessionFactory, rosterSummaryFromConfigs };
|
|
2868
|
+
//# sourceMappingURL=index.js.map
|
|
2869
|
+
//# sourceMappingURL=index.js.map
|