@forwardimpact/libeval 0.1.43 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-benchmark.js +2 -2
- package/bin/fit-eval.js +101 -21
- package/bin/fit-trace.js +14 -0
- package/package.json +1 -1
- package/src/commands/benchmark-run.js +1 -1
- package/src/commands/by-discussion.js +84 -0
- package/src/commands/callback.js +104 -0
- package/src/commands/discuss.js +116 -0
- package/src/commands/facilitate.js +2 -2
- package/src/commands/supervise.js +3 -3
- package/src/discuss-tools.js +203 -0
- package/src/discusser.js +332 -0
- package/src/facilitator.js +39 -333
- package/src/index.js +14 -0
- package/src/orchestration-loop.js +369 -0
- package/src/redaction.js +10 -0
- package/src/render/orchestrator-filter.js +1 -0
- package/src/trace-collector.js +4 -0
package/src/facilitator.js
CHANGED
|
@@ -1,33 +1,35 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Facilitator —
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
2
|
+
* Facilitator — facilitate-mode wrapper around `OrchestrationLoop`. The
|
|
3
|
+
* lead participant is named "facilitator" and uses the `Conclude` tool to
|
|
4
|
+
* end the session. The within-run turn loop itself lives in
|
|
5
|
+
* `orchestration-loop.js`; this file owns only the facilitate-mode
|
|
6
|
+
* specifics (lead role name, system prompts, tool wiring, factory).
|
|
6
7
|
*/
|
|
7
8
|
|
|
8
9
|
import { Writable } from "node:stream";
|
|
9
10
|
import { resolve } from "node:path";
|
|
10
11
|
import { createAgentRunner } from "./agent-runner.js";
|
|
11
12
|
import { composeProfilePrompt } from "./profile-prompt.js";
|
|
12
|
-
import { SequenceCounter } from "./sequence-counter.js";
|
|
13
13
|
import { createMessageBus } from "./message-bus.js";
|
|
14
14
|
import {
|
|
15
15
|
createOrchestrationContext,
|
|
16
16
|
createFacilitatorToolServer,
|
|
17
17
|
createFacilitatedAgentToolServer,
|
|
18
|
-
checkPendingAsk,
|
|
19
18
|
} from "./orchestration-toolkit.js";
|
|
20
|
-
import { createAsyncQueue
|
|
19
|
+
import { createAsyncQueue } from "./orchestrator-helpers.js";
|
|
20
|
+
import { OrchestrationLoop } from "./orchestration-loop.js";
|
|
21
21
|
|
|
22
22
|
/** System prompt appended for the facilitator runner. */
|
|
23
23
|
export const FACILITATOR_SYSTEM_PROMPT =
|
|
24
|
-
"You coordinate multiple participants
|
|
25
|
-
"Ask
|
|
26
|
-
"Announce
|
|
27
|
-
"Redirect interrupts
|
|
28
|
-
"RollCall
|
|
29
|
-
"Conclude ends the session with a verdict ('success' or 'failure') and a summary
|
|
30
|
-
"
|
|
24
|
+
"You coordinate multiple participants via these tools: " +
|
|
25
|
+
"Ask delivers a question to one named participant — or broadcasts when no addressee is named — and blocks until that participant answers. " +
|
|
26
|
+
"Announce delivers a message with no reply obligation. " +
|
|
27
|
+
"Redirect interrupts an in-progress participant with replacement instructions. " +
|
|
28
|
+
"RollCall returns the participant roster. " +
|
|
29
|
+
"Conclude ends the session with a verdict ('success' or 'failure') and a summary. " +
|
|
30
|
+
"Ask and Announce calls issued in the same turn dispatch in parallel. " +
|
|
31
|
+
"You MUST call Conclude to end every session — never end a turn with only text. " +
|
|
32
|
+
"If you can answer the task yourself, still call Conclude with verdict='success' and the answer as the summary.";
|
|
31
33
|
|
|
32
34
|
/** System prompt appended for facilitated agent runners. */
|
|
33
35
|
export const FACILITATED_AGENT_SYSTEM_PROMPT =
|
|
@@ -37,8 +39,13 @@ export const FACILITATED_AGENT_SYSTEM_PROMPT =
|
|
|
37
39
|
"Announce broadcasts a message. " +
|
|
38
40
|
"RollCall lists participants.";
|
|
39
41
|
|
|
40
|
-
/**
|
|
41
|
-
|
|
42
|
+
/**
|
|
43
|
+
* Facilitate-mode wrapper around `OrchestrationLoop`. The lead participant
|
|
44
|
+
* is `"facilitator"` and the protocol mode is `"facilitated"`. Preserves
|
|
45
|
+
* the public surface (`facilitatorRunner`, `facilitatorTurns`) that
|
|
46
|
+
* existing callers rely on.
|
|
47
|
+
*/
|
|
48
|
+
export class Facilitator extends OrchestrationLoop {
|
|
42
49
|
/**
|
|
43
50
|
* @param {object} deps
|
|
44
51
|
* @param {import("./agent-runner.js").AgentRunner} deps.facilitatorRunner
|
|
@@ -48,332 +55,31 @@ export class Facilitator {
|
|
|
48
55
|
* @param {number} [deps.maxTurns]
|
|
49
56
|
* @param {object} [deps.ctx]
|
|
50
57
|
* @param {object} [deps.eventQueue]
|
|
51
|
-
* @param {string} [deps.taskAmend]
|
|
52
|
-
|
|
53
|
-
constructor({
|
|
54
|
-
facilitatorRunner,
|
|
55
|
-
agents,
|
|
56
|
-
messageBus,
|
|
57
|
-
output,
|
|
58
|
-
maxTurns,
|
|
59
|
-
ctx,
|
|
60
|
-
eventQueue,
|
|
61
|
-
taskAmend,
|
|
62
|
-
redactor,
|
|
63
|
-
}) {
|
|
64
|
-
if (!redactor) throw new Error("redactor is required");
|
|
65
|
-
this.redactor = redactor;
|
|
66
|
-
this.facilitatorRunner = facilitatorRunner;
|
|
67
|
-
this.agents = agents;
|
|
68
|
-
this.messageBus = messageBus;
|
|
69
|
-
this.output = output;
|
|
70
|
-
this.maxTurns = maxTurns ?? 20;
|
|
71
|
-
this.ctx = ctx ?? createOrchestrationContext();
|
|
72
|
-
this.counter = new SequenceCounter();
|
|
73
|
-
this.eventQueue = eventQueue ?? createAsyncQueue();
|
|
74
|
-
this.facilitatorTurns = 0;
|
|
75
|
-
this.taskAmend = taskAmend ?? null;
|
|
76
|
-
|
|
77
|
-
let resolve;
|
|
78
|
-
const promise = new Promise((r) => {
|
|
79
|
-
resolve = r;
|
|
80
|
-
});
|
|
81
|
-
this.concludePromise = promise;
|
|
82
|
-
this.concludeResolve = resolve;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
/**
|
|
86
|
-
* Run the full facilitated session.
|
|
87
|
-
* @param {string} task
|
|
88
|
-
* @returns {Promise<{success: boolean, turns: number}>}
|
|
58
|
+
* @param {string} [deps.taskAmend]
|
|
59
|
+
* @param {object} deps.redactor
|
|
89
60
|
*/
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
// Launch agent loops first — they wait for messages via messageBus.
|
|
96
|
-
// This lets agents process Ask/Announce messages that arrive during
|
|
97
|
-
// the facilitator's initial run, rather than after it completes.
|
|
98
|
-
const agentPromises = this.agents.map((a) => this.#runAgent(a));
|
|
99
|
-
|
|
100
|
-
// Turn 0: facilitator receives the task
|
|
101
|
-
this.facilitatorTurns++;
|
|
102
|
-
await this.facilitatorRunner.run(initialTask);
|
|
103
|
-
|
|
104
|
-
// Handle redirect after turn 0
|
|
105
|
-
await this.#processRedirect();
|
|
106
|
-
|
|
107
|
-
if (this.ctx.concluded) {
|
|
108
|
-
// Facilitator concluded during its initial run. Let agents finish any
|
|
109
|
-
// in-progress work before returning — they may have received Ask/Answer
|
|
110
|
-
// messages and started processing concurrently.
|
|
111
|
-
this.concludeResolve();
|
|
112
|
-
await Promise.allSettled(agentPromises);
|
|
113
|
-
const success = this.ctx.verdict === "success";
|
|
114
|
-
this.emitSummary({
|
|
115
|
-
success,
|
|
116
|
-
verdict: this.ctx.verdict,
|
|
117
|
-
turns: this.facilitatorTurns,
|
|
118
|
-
summary: this.ctx.summary,
|
|
119
|
-
});
|
|
120
|
-
return { success, turns: this.facilitatorTurns };
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
// Abort agents promptly when Conclude is called during the event loop
|
|
124
|
-
this.concludePromise.then(() => {
|
|
125
|
-
for (const agent of this.agents) {
|
|
126
|
-
agent.runner.currentAbortController?.abort();
|
|
127
|
-
}
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
// Concurrent phase: facilitator event loop + already-running agent loops
|
|
131
|
-
const facilitatorPromise = this.#facilitatorLoop();
|
|
132
|
-
|
|
133
|
-
try {
|
|
134
|
-
await Promise.all([...agentPromises, facilitatorPromise]);
|
|
135
|
-
} catch (err) {
|
|
136
|
-
for (const agent of this.agents) {
|
|
137
|
-
agent.runner.currentAbortController?.abort();
|
|
138
|
-
}
|
|
139
|
-
this.facilitatorRunner.currentAbortController?.abort();
|
|
140
|
-
throw err;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
const success = this.ctx.concluded && this.ctx.verdict === "success";
|
|
144
|
-
const result = {
|
|
145
|
-
success,
|
|
146
|
-
turns: this.facilitatorTurns,
|
|
147
|
-
};
|
|
148
|
-
this.emitSummary({
|
|
149
|
-
success,
|
|
150
|
-
verdict: this.ctx.verdict,
|
|
151
|
-
turns: result.turns,
|
|
152
|
-
summary: this.ctx.summary,
|
|
153
|
-
});
|
|
154
|
-
return result;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
#checkAsk(name) {
|
|
158
|
-
return checkPendingAsk({
|
|
159
|
-
ctx: this.ctx,
|
|
160
|
-
messageBus: this.messageBus,
|
|
161
|
-
addresseeName: name,
|
|
61
|
+
constructor(deps) {
|
|
62
|
+
super({
|
|
63
|
+
...deps,
|
|
64
|
+
leadRunner: deps.facilitatorRunner,
|
|
65
|
+
leadName: "facilitator",
|
|
162
66
|
mode: "facilitated",
|
|
163
|
-
emitViolation: (e) => this.emitOrchestratorEvent(e),
|
|
164
|
-
});
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
async #enforcePendingAsk(agent) {
|
|
168
|
-
if (this.#checkAsk(agent.name) !== "recheck") return;
|
|
169
|
-
if (this.ctx.concluded) return;
|
|
170
|
-
const reminders = this.messageBus.drain(agent.name);
|
|
171
|
-
if (reminders.length === 0) return;
|
|
172
|
-
await agent.runner.resume(formatMessages(reminders));
|
|
173
|
-
if (this.ctx.concluded) return;
|
|
174
|
-
this.#checkAsk(agent.name);
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
/**
|
|
178
|
-
* Agent outer loop — waits for messages, runs/resumes the agent.
|
|
179
|
-
* @param {{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}} agent
|
|
180
|
-
*/
|
|
181
|
-
async #runAgent(agent) {
|
|
182
|
-
// Wait for first message (lazy start)
|
|
183
|
-
await Promise.race([
|
|
184
|
-
this.messageBus.waitForMessages(agent.name),
|
|
185
|
-
this.concludePromise,
|
|
186
|
-
]);
|
|
187
|
-
if (this.ctx.concluded) return;
|
|
188
|
-
|
|
189
|
-
let messages = this.messageBus.drain(agent.name);
|
|
190
|
-
if (messages.length === 0) return;
|
|
191
|
-
|
|
192
|
-
this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
|
|
193
|
-
await agent.runner.run(formatMessages(messages));
|
|
194
|
-
if (await this.#settleAgentTurn(agent)) return;
|
|
195
|
-
|
|
196
|
-
// Loop: check for new messages, resume if any
|
|
197
|
-
while (!this.ctx.concluded) {
|
|
198
|
-
messages = await this.#awaitAgentMessages(agent.name);
|
|
199
|
-
if (messages.length === 0) break;
|
|
200
|
-
await agent.runner.resume(formatMessages(messages));
|
|
201
|
-
if (await this.#settleAgentTurn(agent)) break;
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
/**
|
|
206
|
-
* Enforce pending-ask and emit turn_complete. Returns true when the
|
|
207
|
-
* session has concluded and the caller should stop.
|
|
208
|
-
*/
|
|
209
|
-
async #settleAgentTurn(agent) {
|
|
210
|
-
if (this.ctx.concluded) return true;
|
|
211
|
-
await this.#enforcePendingAsk(agent);
|
|
212
|
-
if (this.ctx.concluded) return true;
|
|
213
|
-
this.eventQueue.enqueue({
|
|
214
|
-
type: "lifecycle",
|
|
215
|
-
agent: agent.name,
|
|
216
|
-
status: "turn_complete",
|
|
217
|
-
});
|
|
218
|
-
return false;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
/**
|
|
222
|
-
* Wait for messages addressed to `name`, returning an empty array when
|
|
223
|
-
* the session concludes first.
|
|
224
|
-
*/
|
|
225
|
-
async #awaitAgentMessages(name) {
|
|
226
|
-
const messages = this.messageBus.drain(name);
|
|
227
|
-
if (messages.length > 0) return messages;
|
|
228
|
-
await Promise.race([
|
|
229
|
-
this.messageBus.waitForMessages(name),
|
|
230
|
-
this.concludePromise,
|
|
231
|
-
]);
|
|
232
|
-
if (this.ctx.concluded) return [];
|
|
233
|
-
return this.messageBus.drain(name);
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
/**
|
|
237
|
-
* Facilitator event loop — only runs when input arrives.
|
|
238
|
-
*/
|
|
239
|
-
async #facilitatorLoop() {
|
|
240
|
-
while (!this.ctx.concluded) {
|
|
241
|
-
const event = await this.eventQueue.dequeue();
|
|
242
|
-
if (this.ctx.concluded || event === null) break;
|
|
243
|
-
await this.#handleEvent(event);
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
async #handleEvent(event) {
|
|
248
|
-
switch (event.type) {
|
|
249
|
-
case "messages":
|
|
250
|
-
case "lifecycle": {
|
|
251
|
-
const msgs = this.messageBus.drain("facilitator");
|
|
252
|
-
if (msgs.length === 0) break;
|
|
253
|
-
this.facilitatorTurns++;
|
|
254
|
-
await this.facilitatorRunner.resume(formatMessages(msgs));
|
|
255
|
-
await this.#processRedirect();
|
|
256
|
-
if (!this.ctx.concluded) await this.#enforceFacilitatorPendingAsk();
|
|
257
|
-
break;
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
if (this.ctx.concluded) {
|
|
262
|
-
this.concludeResolve();
|
|
263
|
-
this.eventQueue.close();
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
async #enforceFacilitatorPendingAsk() {
|
|
268
|
-
if (this.#checkAsk("facilitator") !== "recheck") return;
|
|
269
|
-
if (this.ctx.concluded) return;
|
|
270
|
-
const reminders = this.messageBus.drain("facilitator");
|
|
271
|
-
if (reminders.length === 0) return;
|
|
272
|
-
this.facilitatorTurns++;
|
|
273
|
-
await this.facilitatorRunner.resume(formatMessages(reminders));
|
|
274
|
-
await this.#processRedirect();
|
|
275
|
-
if (this.ctx.concluded) return;
|
|
276
|
-
this.#checkAsk("facilitator");
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
/**
|
|
280
|
-
* Process a pending redirect after a facilitator turn.
|
|
281
|
-
*/
|
|
282
|
-
async #processRedirect() {
|
|
283
|
-
if (!this.ctx.redirect) return;
|
|
284
|
-
const redirect = this.ctx.redirect;
|
|
285
|
-
this.ctx.redirect = null;
|
|
286
|
-
|
|
287
|
-
this.emitOrchestratorEvent({
|
|
288
|
-
type: "redirect",
|
|
289
|
-
to: redirect.to,
|
|
290
67
|
});
|
|
291
|
-
|
|
292
|
-
if (redirect.to === "all") {
|
|
293
|
-
// Abort all agents and deliver redirect via broadcast
|
|
294
|
-
for (const agent of this.agents) {
|
|
295
|
-
agent.runner.currentAbortController?.abort();
|
|
296
|
-
}
|
|
297
|
-
this.messageBus.announce("facilitator", redirect.message);
|
|
298
|
-
} else if (redirect.to) {
|
|
299
|
-
// Abort specific agent and deliver via direct message
|
|
300
|
-
const target = this.agents.find((a) => a.name === redirect.to);
|
|
301
|
-
if (target) {
|
|
302
|
-
target.runner.currentAbortController?.abort();
|
|
303
|
-
}
|
|
304
|
-
this.messageBus.direct("facilitator", redirect.to, redirect.message);
|
|
305
|
-
}
|
|
306
68
|
}
|
|
307
69
|
|
|
308
|
-
/**
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
for (let i = lines.length - 1; i >= 0; i--) {
|
|
312
|
-
const event = JSON.parse(lines[i]);
|
|
313
|
-
if (event.type !== "assistant") continue;
|
|
314
|
-
const content = event.message?.content ?? event.content;
|
|
315
|
-
if (!Array.isArray(content)) continue;
|
|
316
|
-
for (let j = content.length - 1; j >= 0; j--) {
|
|
317
|
-
if (content[j].type === "text" && content[j].text) {
|
|
318
|
-
return content[j].text;
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
return fallback;
|
|
70
|
+
/** @returns {import("./agent-runner.js").AgentRunner} */
|
|
71
|
+
get facilitatorRunner() {
|
|
72
|
+
return this.leadRunner;
|
|
323
73
|
}
|
|
324
74
|
|
|
325
|
-
/**
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
* @param {string} line - Raw NDJSON line
|
|
329
|
-
*/
|
|
330
|
-
emitLine(source, line) {
|
|
331
|
-
const event = JSON.parse(line);
|
|
332
|
-
this.output.write(
|
|
333
|
-
JSON.stringify(
|
|
334
|
-
this.redactor.redactValue({
|
|
335
|
-
source,
|
|
336
|
-
seq: this.counter.next(),
|
|
337
|
-
event,
|
|
338
|
-
}),
|
|
339
|
-
) + "\n",
|
|
340
|
-
);
|
|
75
|
+
/** @returns {number} */
|
|
76
|
+
get facilitatorTurns() {
|
|
77
|
+
return this.leadTurns;
|
|
341
78
|
}
|
|
342
79
|
|
|
343
|
-
/**
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
emitOrchestratorEvent(event) {
|
|
347
|
-
this.output.write(
|
|
348
|
-
JSON.stringify(
|
|
349
|
-
this.redactor.redactValue({
|
|
350
|
-
source: "orchestrator",
|
|
351
|
-
seq: this.counter.next(),
|
|
352
|
-
event,
|
|
353
|
-
}),
|
|
354
|
-
) + "\n",
|
|
355
|
-
);
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
/**
|
|
359
|
-
* @param {{success: boolean, verdict?: string|null, turns: number, summary?: string}} result
|
|
360
|
-
*/
|
|
361
|
-
emitSummary(result) {
|
|
362
|
-
this.output.write(
|
|
363
|
-
JSON.stringify(
|
|
364
|
-
this.redactor.redactValue({
|
|
365
|
-
source: "orchestrator",
|
|
366
|
-
seq: this.counter.next(),
|
|
367
|
-
event: {
|
|
368
|
-
type: "summary",
|
|
369
|
-
success: result.success,
|
|
370
|
-
...(result.verdict && { verdict: result.verdict }),
|
|
371
|
-
turns: result.turns,
|
|
372
|
-
...(result.summary && { summary: result.summary }),
|
|
373
|
-
},
|
|
374
|
-
}),
|
|
375
|
-
) + "\n",
|
|
376
|
-
);
|
|
80
|
+
/** @param {number} v */
|
|
81
|
+
set facilitatorTurns(v) {
|
|
82
|
+
this.leadTurns = v;
|
|
377
83
|
}
|
|
378
84
|
}
|
|
379
85
|
|
package/src/index.js
CHANGED
|
@@ -26,12 +26,26 @@ export {
|
|
|
26
26
|
createJudgeToolServer,
|
|
27
27
|
} from "./orchestration-toolkit.js";
|
|
28
28
|
export { MessageBus, createMessageBus } from "./message-bus.js";
|
|
29
|
+
export { OrchestrationLoop } from "./orchestration-loop.js";
|
|
29
30
|
export {
|
|
30
31
|
Facilitator,
|
|
31
32
|
createFacilitator,
|
|
32
33
|
FACILITATOR_SYSTEM_PROMPT,
|
|
33
34
|
FACILITATED_AGENT_SYSTEM_PROMPT,
|
|
34
35
|
} from "./facilitator.js";
|
|
36
|
+
export {
|
|
37
|
+
Discusser,
|
|
38
|
+
createDiscusser,
|
|
39
|
+
DISCUSS_SYSTEM_PROMPT,
|
|
40
|
+
augmentContextForDiscuss,
|
|
41
|
+
pendingAsksToPlain,
|
|
42
|
+
pendingAsksFromPlain,
|
|
43
|
+
} from "./discusser.js";
|
|
44
|
+
export {
|
|
45
|
+
createDiscussLeadToolServer,
|
|
46
|
+
createDiscussAgentToolServer,
|
|
47
|
+
DISCUSS_AGENT_SYSTEM_PROMPT,
|
|
48
|
+
} from "./discuss-tools.js";
|
|
35
49
|
export { Judge, createJudge, JUDGE_SYSTEM_PROMPT } from "./judge.js";
|
|
36
50
|
export {
|
|
37
51
|
Redactor,
|