@forwardimpact/libeval 0.1.43 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-benchmark.js +2 -2
- package/bin/fit-eval.js +101 -21
- package/bin/fit-trace.js +14 -0
- package/package.json +1 -1
- package/src/commands/benchmark-run.js +1 -1
- package/src/commands/by-discussion.js +84 -0
- package/src/commands/callback.js +104 -0
- package/src/commands/discuss.js +116 -0
- package/src/commands/facilitate.js +2 -2
- package/src/commands/supervise.js +3 -3
- package/src/discuss-tools.js +203 -0
- package/src/discusser.js +332 -0
- package/src/facilitator.js +39 -333
- package/src/index.js +14 -0
- package/src/orchestration-loop.js +369 -0
- package/src/redaction.js +10 -0
- package/src/render/orchestrator-filter.js +1 -0
- package/src/trace-collector.js +4 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OrchestrationLoop — N agent sessions + one lead LLM session. The
|
|
3
|
+
* Ask/Answer contract is enforced at turn boundaries via checkPendingAsk:
|
|
4
|
+
* one synthetic reminder, then a `protocol_violation` event plus a
|
|
5
|
+
* null-answer injection so the session advances instead of deadlocking.
|
|
6
|
+
*
|
|
7
|
+
* Mode-specific concepts (Conclude vs. Adjourn/Recess, lead role name,
|
|
8
|
+
* system prompts, tool sets) live in mode-specific wrappers
|
|
9
|
+
* (`Facilitator` for facilitate mode, `Discusser` for discuss mode). This
|
|
10
|
+
* file owns only the loop itself.
|
|
11
|
+
*/
|
|
12
|
+
import { SequenceCounter } from "./sequence-counter.js";
|
|
13
|
+
import {
|
|
14
|
+
createOrchestrationContext,
|
|
15
|
+
checkPendingAsk,
|
|
16
|
+
} from "./orchestration-toolkit.js";
|
|
17
|
+
import { createAsyncQueue, formatMessages } from "./orchestrator-helpers.js";
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Orchestrate N agent sessions coordinated by a single lead LLM session.
|
|
21
|
+
* Mode-neutral. Callers parameterise the lead participant's name and the
|
|
22
|
+
* `protocol_violation` mode tag so the same loop powers both facilitate
|
|
23
|
+
* and discuss modes without either knowing about the other.
|
|
24
|
+
*/
|
|
25
|
+
export class OrchestrationLoop {
|
|
26
|
+
/**
|
|
27
|
+
* @param {object} deps
|
|
28
|
+
* @param {import("./agent-runner.js").AgentRunner} deps.leadRunner
|
|
29
|
+
* @param {Array<{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}>} deps.agents
|
|
30
|
+
* @param {import("./message-bus.js").MessageBus} deps.messageBus
|
|
31
|
+
* @param {import("stream").Writable} deps.output
|
|
32
|
+
* @param {string} [deps.leadName] - Canonical name of the lead participant on the messageBus (default "lead").
|
|
33
|
+
* @param {"facilitated"|"discussion"|"supervised"} [deps.mode] - Mode tag emitted on `protocol_violation` events.
|
|
34
|
+
* @param {number} [deps.maxTurns]
|
|
35
|
+
* @param {object} [deps.ctx]
|
|
36
|
+
* @param {object} [deps.eventQueue]
|
|
37
|
+
* @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
|
|
38
|
+
* @param {object} deps.redactor
|
|
39
|
+
*/
|
|
40
|
+
constructor({
|
|
41
|
+
leadRunner,
|
|
42
|
+
agents,
|
|
43
|
+
messageBus,
|
|
44
|
+
output,
|
|
45
|
+
leadName,
|
|
46
|
+
mode,
|
|
47
|
+
maxTurns,
|
|
48
|
+
ctx,
|
|
49
|
+
eventQueue,
|
|
50
|
+
taskAmend,
|
|
51
|
+
redactor,
|
|
52
|
+
}) {
|
|
53
|
+
if (!redactor) throw new Error("redactor is required");
|
|
54
|
+
this.redactor = redactor;
|
|
55
|
+
this.leadRunner = leadRunner;
|
|
56
|
+
this.leadName = leadName ?? "lead";
|
|
57
|
+
this.mode = mode ?? "facilitated";
|
|
58
|
+
this.agents = agents;
|
|
59
|
+
this.messageBus = messageBus;
|
|
60
|
+
this.output = output;
|
|
61
|
+
this.maxTurns = maxTurns ?? 20;
|
|
62
|
+
this.ctx = ctx ?? createOrchestrationContext();
|
|
63
|
+
this.counter = new SequenceCounter();
|
|
64
|
+
this.eventQueue = eventQueue ?? createAsyncQueue();
|
|
65
|
+
this.leadTurns = 0;
|
|
66
|
+
this.taskAmend = taskAmend ?? null;
|
|
67
|
+
|
|
68
|
+
let resolve;
|
|
69
|
+
const promise = new Promise((r) => {
|
|
70
|
+
resolve = r;
|
|
71
|
+
});
|
|
72
|
+
this.concludePromise = promise;
|
|
73
|
+
this.concludeResolve = resolve;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Run the full orchestrated session.
|
|
78
|
+
* @param {string} task
|
|
79
|
+
* @returns {Promise<{success: boolean, turns: number}>}
|
|
80
|
+
*/
|
|
81
|
+
async run(task) {
|
|
82
|
+
this.emitOrchestratorEvent({ type: "session_start" });
|
|
83
|
+
|
|
84
|
+
const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
|
|
85
|
+
|
|
86
|
+
// Launch agent loops first — they wait for messages via messageBus.
|
|
87
|
+
// This lets agents process Ask/Announce messages that arrive during
|
|
88
|
+
// the lead's initial run, rather than after it completes.
|
|
89
|
+
const agentPromises = this.agents.map((a) => this.#runAgent(a));
|
|
90
|
+
|
|
91
|
+
// Turn 0: lead receives the task
|
|
92
|
+
this.leadTurns++;
|
|
93
|
+
await this.leadRunner.run(initialTask);
|
|
94
|
+
|
|
95
|
+
// Handle redirect after turn 0
|
|
96
|
+
await this.#processRedirect();
|
|
97
|
+
|
|
98
|
+
if (this.ctx.concluded) {
|
|
99
|
+
// Lead concluded during its initial run. Let agents finish any
|
|
100
|
+
// in-progress work before returning — they may have received Ask/Answer
|
|
101
|
+
// messages and started processing concurrently.
|
|
102
|
+
this.concludeResolve();
|
|
103
|
+
await Promise.allSettled(agentPromises);
|
|
104
|
+
const success = this.ctx.verdict === "success";
|
|
105
|
+
this.emitSummary({
|
|
106
|
+
success,
|
|
107
|
+
verdict: this.ctx.verdict,
|
|
108
|
+
turns: this.leadTurns,
|
|
109
|
+
summary: this.ctx.summary,
|
|
110
|
+
});
|
|
111
|
+
return { success, turns: this.leadTurns };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Abort agents promptly when the session concludes during the event loop
|
|
115
|
+
this.concludePromise.then(() => {
|
|
116
|
+
for (const agent of this.agents) {
|
|
117
|
+
agent.runner.currentAbortController?.abort();
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Concurrent phase: lead event loop + already-running agent loops
|
|
122
|
+
const leadPromise = this.#leadLoop();
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
await Promise.all([...agentPromises, leadPromise]);
|
|
126
|
+
} catch (err) {
|
|
127
|
+
for (const agent of this.agents) {
|
|
128
|
+
agent.runner.currentAbortController?.abort();
|
|
129
|
+
}
|
|
130
|
+
this.leadRunner.currentAbortController?.abort();
|
|
131
|
+
throw err;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const success = this.ctx.concluded && this.ctx.verdict === "success";
|
|
135
|
+
const result = {
|
|
136
|
+
success,
|
|
137
|
+
turns: this.leadTurns,
|
|
138
|
+
};
|
|
139
|
+
this.emitSummary({
|
|
140
|
+
success,
|
|
141
|
+
verdict: this.ctx.verdict,
|
|
142
|
+
turns: result.turns,
|
|
143
|
+
summary: this.ctx.summary,
|
|
144
|
+
});
|
|
145
|
+
return result;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
#checkAsk(name) {
|
|
149
|
+
return checkPendingAsk({
|
|
150
|
+
ctx: this.ctx,
|
|
151
|
+
messageBus: this.messageBus,
|
|
152
|
+
addresseeName: name,
|
|
153
|
+
mode: this.mode,
|
|
154
|
+
emitViolation: (e) => this.emitOrchestratorEvent(e),
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async #enforcePendingAsk(agent) {
|
|
159
|
+
if (this.#checkAsk(agent.name) !== "recheck") return;
|
|
160
|
+
if (this.ctx.concluded) return;
|
|
161
|
+
const reminders = this.messageBus.drain(agent.name);
|
|
162
|
+
if (reminders.length === 0) return;
|
|
163
|
+
await agent.runner.resume(formatMessages(reminders));
|
|
164
|
+
if (this.ctx.concluded) return;
|
|
165
|
+
this.#checkAsk(agent.name);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Agent outer loop — waits for messages, runs/resumes the agent.
|
|
170
|
+
* @param {{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}} agent
|
|
171
|
+
*/
|
|
172
|
+
async #runAgent(agent) {
|
|
173
|
+
// Wait for first message (lazy start)
|
|
174
|
+
await Promise.race([
|
|
175
|
+
this.messageBus.waitForMessages(agent.name),
|
|
176
|
+
this.concludePromise,
|
|
177
|
+
]);
|
|
178
|
+
if (this.ctx.concluded) return;
|
|
179
|
+
|
|
180
|
+
let messages = this.messageBus.drain(agent.name);
|
|
181
|
+
if (messages.length === 0) return;
|
|
182
|
+
|
|
183
|
+
this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
|
|
184
|
+
await agent.runner.run(formatMessages(messages));
|
|
185
|
+
if (await this.#settleAgentTurn(agent)) return;
|
|
186
|
+
|
|
187
|
+
// Loop: check for new messages, resume if any
|
|
188
|
+
while (!this.ctx.concluded) {
|
|
189
|
+
messages = await this.#awaitAgentMessages(agent.name);
|
|
190
|
+
if (messages.length === 0) break;
|
|
191
|
+
await agent.runner.resume(formatMessages(messages));
|
|
192
|
+
if (await this.#settleAgentTurn(agent)) break;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Enforce pending-ask and emit turn_complete. Returns true when the
|
|
198
|
+
* session has concluded and the caller should stop.
|
|
199
|
+
*/
|
|
200
|
+
async #settleAgentTurn(agent) {
|
|
201
|
+
if (this.ctx.concluded) return true;
|
|
202
|
+
await this.#enforcePendingAsk(agent);
|
|
203
|
+
if (this.ctx.concluded) return true;
|
|
204
|
+
this.eventQueue.enqueue({
|
|
205
|
+
type: "lifecycle",
|
|
206
|
+
agent: agent.name,
|
|
207
|
+
status: "turn_complete",
|
|
208
|
+
});
|
|
209
|
+
return false;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Wait for messages addressed to `name`, returning an empty array when
|
|
214
|
+
* the session concludes first.
|
|
215
|
+
*/
|
|
216
|
+
async #awaitAgentMessages(name) {
|
|
217
|
+
const messages = this.messageBus.drain(name);
|
|
218
|
+
if (messages.length > 0) return messages;
|
|
219
|
+
await Promise.race([
|
|
220
|
+
this.messageBus.waitForMessages(name),
|
|
221
|
+
this.concludePromise,
|
|
222
|
+
]);
|
|
223
|
+
if (this.ctx.concluded) return [];
|
|
224
|
+
return this.messageBus.drain(name);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Lead event loop — only runs when input arrives.
|
|
229
|
+
*/
|
|
230
|
+
async #leadLoop() {
|
|
231
|
+
while (!this.ctx.concluded) {
|
|
232
|
+
const event = await this.eventQueue.dequeue();
|
|
233
|
+
if (this.ctx.concluded || event === null) break;
|
|
234
|
+
await this.#handleEvent(event);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async #handleEvent(event) {
|
|
239
|
+
switch (event.type) {
|
|
240
|
+
case "messages":
|
|
241
|
+
case "lifecycle": {
|
|
242
|
+
const msgs = this.messageBus.drain(this.leadName);
|
|
243
|
+
if (msgs.length === 0) break;
|
|
244
|
+
this.leadTurns++;
|
|
245
|
+
await this.leadRunner.resume(formatMessages(msgs));
|
|
246
|
+
await this.#processRedirect();
|
|
247
|
+
if (!this.ctx.concluded) await this.#enforceLeadPendingAsk();
|
|
248
|
+
break;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (this.ctx.concluded) {
|
|
253
|
+
this.concludeResolve();
|
|
254
|
+
this.eventQueue.close();
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
async #enforceLeadPendingAsk() {
|
|
259
|
+
if (this.#checkAsk(this.leadName) !== "recheck") return;
|
|
260
|
+
if (this.ctx.concluded) return;
|
|
261
|
+
const reminders = this.messageBus.drain(this.leadName);
|
|
262
|
+
if (reminders.length === 0) return;
|
|
263
|
+
this.leadTurns++;
|
|
264
|
+
await this.leadRunner.resume(formatMessages(reminders));
|
|
265
|
+
await this.#processRedirect();
|
|
266
|
+
if (this.ctx.concluded) return;
|
|
267
|
+
this.#checkAsk(this.leadName);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Process a pending redirect after a lead turn.
|
|
272
|
+
*/
|
|
273
|
+
async #processRedirect() {
|
|
274
|
+
if (!this.ctx.redirect) return;
|
|
275
|
+
const redirect = this.ctx.redirect;
|
|
276
|
+
this.ctx.redirect = null;
|
|
277
|
+
|
|
278
|
+
this.emitOrchestratorEvent({
|
|
279
|
+
type: "redirect",
|
|
280
|
+
to: redirect.to,
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
if (redirect.to === "all") {
|
|
284
|
+
// Abort all agents and deliver redirect via broadcast
|
|
285
|
+
for (const agent of this.agents) {
|
|
286
|
+
agent.runner.currentAbortController?.abort();
|
|
287
|
+
}
|
|
288
|
+
this.messageBus.announce(this.leadName, redirect.message);
|
|
289
|
+
} else if (redirect.to) {
|
|
290
|
+
// Abort specific agent and deliver via direct message
|
|
291
|
+
const target = this.agents.find((a) => a.name === redirect.to);
|
|
292
|
+
if (target) {
|
|
293
|
+
target.runner.currentAbortController?.abort();
|
|
294
|
+
}
|
|
295
|
+
this.messageBus.direct(this.leadName, redirect.to, redirect.message);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/** Return the last assistant text block from a runner's buffer, or the fallback if none exists. */
|
|
300
|
+
extractLastText(runner, fallback) {
|
|
301
|
+
const lines = runner.buffer;
|
|
302
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
303
|
+
const event = JSON.parse(lines[i]);
|
|
304
|
+
if (event.type !== "assistant") continue;
|
|
305
|
+
const content = event.message?.content ?? event.content;
|
|
306
|
+
if (!Array.isArray(content)) continue;
|
|
307
|
+
for (let j = content.length - 1; j >= 0; j--) {
|
|
308
|
+
if (content[j].type === "text" && content[j].text) {
|
|
309
|
+
return content[j].text;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
return fallback;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Emit a single NDJSON line tagged with source and seq.
|
|
318
|
+
* @param {string} source - Participant name
|
|
319
|
+
* @param {string} line - Raw NDJSON line
|
|
320
|
+
*/
|
|
321
|
+
emitLine(source, line) {
|
|
322
|
+
const event = JSON.parse(line);
|
|
323
|
+
this.output.write(
|
|
324
|
+
JSON.stringify(
|
|
325
|
+
this.redactor.redactValue({
|
|
326
|
+
source,
|
|
327
|
+
seq: this.counter.next(),
|
|
328
|
+
event,
|
|
329
|
+
}),
|
|
330
|
+
) + "\n",
|
|
331
|
+
);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* @param {{type: string}} event
|
|
336
|
+
*/
|
|
337
|
+
emitOrchestratorEvent(event) {
|
|
338
|
+
this.output.write(
|
|
339
|
+
JSON.stringify(
|
|
340
|
+
this.redactor.redactValue({
|
|
341
|
+
source: "orchestrator",
|
|
342
|
+
seq: this.counter.next(),
|
|
343
|
+
event,
|
|
344
|
+
}),
|
|
345
|
+
) + "\n",
|
|
346
|
+
);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* @param {{success: boolean, verdict?: string|null, turns: number, summary?: string}} result
|
|
351
|
+
*/
|
|
352
|
+
emitSummary(result) {
|
|
353
|
+
this.output.write(
|
|
354
|
+
JSON.stringify(
|
|
355
|
+
this.redactor.redactValue({
|
|
356
|
+
source: "orchestrator",
|
|
357
|
+
seq: this.counter.next(),
|
|
358
|
+
event: {
|
|
359
|
+
type: "summary",
|
|
360
|
+
success: result.success,
|
|
361
|
+
...(result.verdict && { verdict: result.verdict }),
|
|
362
|
+
turns: result.turns,
|
|
363
|
+
...(result.summary && { summary: result.summary }),
|
|
364
|
+
},
|
|
365
|
+
}),
|
|
366
|
+
) + "\n",
|
|
367
|
+
);
|
|
368
|
+
}
|
|
369
|
+
}
|
package/src/redaction.js
CHANGED
|
@@ -10,8 +10,18 @@
|
|
|
10
10
|
|
|
11
11
|
export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
|
|
12
12
|
"ANTHROPIC_API_KEY",
|
|
13
|
+
"AWS_ACCESS_KEY_ID",
|
|
14
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
15
|
+
"DATABASE_PASSWORD",
|
|
13
16
|
"GH_TOKEN",
|
|
14
17
|
"GITHUB_TOKEN",
|
|
18
|
+
"MCP_TOKEN",
|
|
19
|
+
"MICROSOFT_APP_PASSWORD",
|
|
20
|
+
"PRODUCT_LANDMARK_TOKEN",
|
|
21
|
+
"SERVICE_SECRET",
|
|
22
|
+
"SUPABASE_ANON_KEY",
|
|
23
|
+
"SUPABASE_JWT_SECRET",
|
|
24
|
+
"SUPABASE_SERVICE_ROLE_KEY",
|
|
15
25
|
]);
|
|
16
26
|
|
|
17
27
|
// Anchored prefixes per
|
package/src/trace-collector.js
CHANGED
|
@@ -40,6 +40,7 @@ export class TraceCollector {
|
|
|
40
40
|
* Malformed lines are silently skipped.
|
|
41
41
|
* @param {string} line - A single JSON line from stream-json output
|
|
42
42
|
*/
|
|
43
|
+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: NDJSON envelope unwrap + orchestrator/system/assistant/user dispatch
|
|
43
44
|
addLine(line) {
|
|
44
45
|
const trimmed = line.trim();
|
|
45
46
|
if (!trimmed) return;
|
|
@@ -74,6 +75,9 @@ export class TraceCollector {
|
|
|
74
75
|
...(typeof event.turns === "number" && { turns: event.turns }),
|
|
75
76
|
};
|
|
76
77
|
}
|
|
78
|
+
if (event.type === "meta" && typeof event.discussion_id === "string") {
|
|
79
|
+
this.discussionId = event.discussion_id;
|
|
80
|
+
}
|
|
77
81
|
return;
|
|
78
82
|
}
|
|
79
83
|
|