@forwardimpact/libeval 0.1.43 → 0.1.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,316 @@
1
+ /**
2
+ * OrchestrationLoop — N agent sessions coordinated by one lead LLM session.
3
+ *
4
+ * Ask is **async**: the tool returns immediately, the actual reply arrives
5
+ * on a later turn as `[answer#N] participant: …` on the asker's bus queue.
6
+ * Pending state keys by `askId` (visible in the `[ask#N]` tag), so duplicate
7
+ * Asks to the same addressee coexist without overwriting each other, and
8
+ * the asker can map each reply unambiguously back to its question.
9
+ *
10
+ * Both lead and participants follow the same outer pattern: drain the bus
11
+ * queue, run / resume the LLM with the drained messages, then settle any
12
+ * unanswered Asks the participant owes. They differ only in how the first
13
+ * turn starts (the lead receives the task; participants wait for traffic).
14
+ *
15
+ * Termination signals:
16
+ * - `ctx.concluded` — explicit Conclude / Adjourn / Recess.
17
+ * - `stopped` — broader: also true on lead error, agent crash, or any
18
+ * other abort path. Loops watch `stopped`; `ctx.concluded` is only used
19
+ * for the summary's success/verdict.
20
+ */
21
+ import { SequenceCounter } from "./sequence-counter.js";
22
+ import {
23
+ cancelPendingAsks,
24
+ pendingAsksOwedBy,
25
+ remindOwedAsks,
26
+ } from "./orchestration-toolkit.js";
27
+ import { formatMessages } from "./orchestrator-helpers.js";
28
+
29
+ /** Default per-session lead-turn budget (one resume per round of traffic). */
30
+ const DEFAULT_MAX_LEAD_TURNS = 40;
31
+
32
+ /** Orchestrate N agent sessions coordinated by a single lead LLM session. */
33
+ export class OrchestrationLoop {
34
+ /**
35
+ * @param {object} deps
36
+ * @param {import("./agent-runner.js").AgentRunner} deps.leadRunner
37
+ * @param {Array<{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}>} deps.agents
38
+ * @param {import("./message-bus.js").MessageBus} deps.messageBus
39
+ * @param {import("stream").Writable} deps.output
40
+ * @param {string} deps.leadName - Canonical name of the lead participant on the bus.
41
+ * @param {"facilitated"|"discussion"|"supervised"} deps.mode - Carries through to `protocol_violation` events.
42
+ * @param {object} deps.ctx - Orchestration context (from `createOrchestrationContext()`).
43
+ * @param {object} deps.redactor
44
+ * @param {number} [deps.maxLeadTurns] - Cap on lead resumes per session (default 40).
45
+ * @param {string} [deps.taskAmend] - Appended to the task before delivery.
46
+ */
47
+ constructor({
48
+ leadRunner,
49
+ agents,
50
+ messageBus,
51
+ output,
52
+ leadName,
53
+ mode,
54
+ maxLeadTurns,
55
+ ctx,
56
+ taskAmend,
57
+ redactor,
58
+ }) {
59
+ if (!leadRunner) throw new Error("leadRunner is required");
60
+ if (!agents) throw new Error("agents is required");
61
+ if (!messageBus) throw new Error("messageBus is required");
62
+ if (!output) throw new Error("output is required");
63
+ if (!leadName) throw new Error("leadName is required");
64
+ if (!mode) throw new Error("mode is required");
65
+ if (!ctx) throw new Error("ctx is required");
66
+ if (!redactor) throw new Error("redactor is required");
67
+ this.leadRunner = leadRunner;
68
+ this.agents = agents;
69
+ this.messageBus = messageBus;
70
+ this.output = output;
71
+ this.leadName = leadName;
72
+ this.mode = mode;
73
+ this.ctx = ctx;
74
+ this.redactor = redactor;
75
+ this.taskAmend = taskAmend ?? null;
76
+ this.maxLeadTurns = maxLeadTurns ?? DEFAULT_MAX_LEAD_TURNS;
77
+ this.counter = new SequenceCounter();
78
+ this.leadTurns = 0;
79
+ this.stopped = false;
80
+ let resolveDone;
81
+ this.donePromise = new Promise((r) => {
82
+ resolveDone = r;
83
+ });
84
+ this.#signalDone = resolveDone;
85
+ }
86
+
87
+ /** Internal — resolved when `stopped` flips true so waiters unblock. */
88
+ #signalDone;
89
+
90
+ /**
91
+ * Run the full orchestrated session.
92
+ * @param {string} task
93
+ * @returns {Promise<{success: boolean, turns: number}>}
94
+ */
95
+ async run(task) {
96
+ this.emitOrchestratorEvent({ type: "session_start" });
97
+ const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
98
+
99
+ let firstError = null;
100
+ const abort = (err) => {
101
+ if (err && !firstError) firstError = err;
102
+ this.#stop();
103
+ };
104
+
105
+ // Start agent loops in parallel. Wrapped so a crash flips `stopped`
106
+ // but the wrapper itself resolves — Promise.allSettled below never
107
+ // sees an unhandled rejection.
108
+ const agentPromises = this.agents.map((a) =>
109
+ this.#runAgent(a).catch(abort),
110
+ );
111
+
112
+ try {
113
+ await this.#runLead(initialTask);
114
+ } catch (err) {
115
+ abort(err);
116
+ } finally {
117
+ this.#stop();
118
+ }
119
+
120
+ await Promise.allSettled(agentPromises);
121
+ if (firstError) throw firstError;
122
+
123
+ const success = this.ctx.concluded && this.ctx.verdict === "success";
124
+ this.emitSummary({
125
+ success,
126
+ verdict: this.ctx.verdict,
127
+ turns: this.leadTurns,
128
+ summary: this.ctx.summary,
129
+ });
130
+ return { success, turns: this.leadTurns };
131
+ }
132
+
133
+ #stop() {
134
+ if (this.stopped) return;
135
+ this.stopped = true;
136
+ this.#signalDone();
137
+ for (const agent of this.agents) {
138
+ agent.runner.currentAbortController?.abort();
139
+ }
140
+ this.leadRunner.currentAbortController?.abort();
141
+ }
142
+
143
+ /**
144
+ * Lead loop. The lead's first turn carries the task; every subsequent
145
+ * turn is a resume triggered by something landing on its inbox.
146
+ *
147
+ * `messages.length === 0` from `#drainOrWait` means the session ended
148
+ * before any message arrived — that's the natural exit. If
149
+ * `drainOrWait` returned messages, deliver them even if the session
150
+ * concluded in the microtask window between wake-up and this check;
151
+ * the inbox already has them and they deserve to be seen.
152
+ */
153
+ async #runLead(initialTask) {
154
+ this.leadTurns = 1;
155
+ this.emitOrchestratorEvent({ type: "agent_start", agent: this.leadName });
156
+ await this.leadRunner.run(initialTask);
157
+ if (this.#exiting()) return;
158
+ await this.#settleOwedAsks(this.leadName, this.leadRunner);
159
+
160
+ while (!this.#exiting()) {
161
+ if (this.leadTurns >= this.maxLeadTurns) {
162
+ this.emitOrchestratorEvent({
163
+ type: "lead_turn_limit",
164
+ limit: this.maxLeadTurns,
165
+ });
166
+ return;
167
+ }
168
+ const messages = await this.#drainOrWait(this.leadName);
169
+ if (messages.length === 0) return;
170
+
171
+ this.leadTurns++;
172
+ await this.leadRunner.resume(formatMessages(messages));
173
+ if (this.#exiting()) return;
174
+ await this.#settleOwedAsks(this.leadName, this.leadRunner);
175
+ }
176
+ }
177
+
178
+ /**
179
+ * Agent loop. The first message off the inbox triggers `run()`; every
180
+ * subsequent batch triggers `resume()`. No turn budget — the agent
181
+ * runner's own `maxTurns` caps each SDK call.
182
+ */
183
+ async #runAgent({ name, runner }) {
184
+ let started = false;
185
+ while (!this.#exiting()) {
186
+ const messages = await this.#drainOrWait(name);
187
+ if (messages.length === 0) return;
188
+
189
+ if (!started) {
190
+ started = true;
191
+ this.emitOrchestratorEvent({ type: "agent_start", agent: name });
192
+ await runner.run(formatMessages(messages));
193
+ } else {
194
+ await runner.resume(formatMessages(messages));
195
+ }
196
+ if (this.#exiting()) return;
197
+ await this.#settleOwedAsks(name, runner);
198
+ }
199
+ }
200
+
201
+ /** Either an explicit Conclude or any abort path. */
202
+ #exiting() {
203
+ return this.stopped || this.ctx.concluded;
204
+ }
205
+
206
+ /**
207
+ * Drain the queue, or wait for the first message to arrive. Returns an
208
+ * empty array when the session ended before any message landed.
209
+ */
210
+ async #drainOrWait(name) {
211
+ let messages = this.messageBus.drain(name);
212
+ if (messages.length > 0) return messages;
213
+ await Promise.race([
214
+ this.messageBus.waitForMessages(name),
215
+ this.donePromise,
216
+ ]);
217
+ if (this.stopped) return [];
218
+ messages = this.messageBus.drain(name);
219
+ return messages;
220
+ }
221
+
222
+ /**
223
+ * If `name` left a pending Ask unanswered, inject one synthetic reminder
224
+ * and resume once more. If still unanswered after the reminder, emit a
225
+ * `protocol_violation` event per outstanding ask and cancel them — the
226
+ * asker's queue gets a synthetic `[no answer: …]` so it doesn't deadlock
227
+ * on a participant that's silently ignoring its inbox.
228
+ */
229
+ async #settleOwedAsks(name, runner) {
230
+ if (pendingAsksOwedBy(this.ctx, name).length === 0) return;
231
+ if (this.stopped) return;
232
+
233
+ const reminded = remindOwedAsks(this.ctx, name);
234
+ if (!reminded) return;
235
+ const reminders = this.messageBus.drain(name);
236
+ if (reminders.length === 0) return;
237
+
238
+ await runner.resume(formatMessages(reminders));
239
+ if (this.stopped) return;
240
+
241
+ const stillOwed = pendingAsksOwedBy(this.ctx, name);
242
+ if (stillOwed.length === 0) return;
243
+
244
+ for (const entry of stillOwed) {
245
+ this.emitOrchestratorEvent({
246
+ type: "protocol_violation",
247
+ agent: name,
248
+ askId: entry.askId,
249
+ mode: this.mode,
250
+ });
251
+ }
252
+ cancelPendingAsks(this.ctx, `${name} did not answer after reminder`, name);
253
+ }
254
+
255
+ /**
256
+ * Emit one NDJSON line tagged with its source (participant name) and a
257
+ * monotonic seq, wrapped in the universal `{source, seq, event}` envelope.
258
+ * Called from each runner's `onLine` callback.
259
+ * @param {string} source
260
+ * @param {string} line - Raw NDJSON line from the SDK iterator.
261
+ */
262
+ emitLine(source, line) {
263
+ const event = JSON.parse(line);
264
+ this.output.write(
265
+ JSON.stringify(
266
+ this.redactor.redactValue({
267
+ source,
268
+ seq: this.counter.next(),
269
+ event,
270
+ }),
271
+ ) + "\n",
272
+ );
273
+ }
274
+
275
+ /**
276
+ * Emit one orchestrator-source event (`session_start`, `agent_start`,
277
+ * `protocol_violation`, `lead_turn_limit`) wrapped in the universal
278
+ * envelope.
279
+ * @param {object} event
280
+ */
281
+ emitOrchestratorEvent(event) {
282
+ this.output.write(
283
+ JSON.stringify(
284
+ this.redactor.redactValue({
285
+ source: "orchestrator",
286
+ seq: this.counter.next(),
287
+ event,
288
+ }),
289
+ ) + "\n",
290
+ );
291
+ }
292
+
293
+ /**
294
+ * Emit the terminal summary line. `Discusser` emits its own discuss-
295
+ * augmented summary after this one; trace consumers keep the last
296
+ * summary they see.
297
+ * @param {{success: boolean, verdict?: string|null, turns: number, summary?: string|null}} result
298
+ */
299
+ emitSummary(result) {
300
+ this.output.write(
301
+ JSON.stringify(
302
+ this.redactor.redactValue({
303
+ source: "orchestrator",
304
+ seq: this.counter.next(),
305
+ event: {
306
+ type: "summary",
307
+ success: result.success,
308
+ ...(result.verdict && { verdict: result.verdict }),
309
+ turns: result.turns,
310
+ ...(result.summary && { summary: result.summary }),
311
+ },
312
+ }),
313
+ ) + "\n",
314
+ );
315
+ }
316
+ }