@forwardimpact/libeval 0.1.44 → 0.1.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,27 +1,35 @@
1
1
  /**
2
- * OrchestrationLoop — N agent sessions + one lead LLM session. The
3
- * Ask/Answer contract is enforced at turn boundaries via checkPendingAsk:
4
- * one synthetic reminder, then a `protocol_violation` event plus a
5
- * null-answer injection so the session advances instead of deadlocking.
2
+ * OrchestrationLoop — N agent sessions coordinated by one lead LLM session.
6
3
  *
7
- * Mode-specific concepts (Conclude vs. Adjourn/Recess, lead role name,
8
- * system prompts, tool sets) live in mode-specific wrappers
9
- * (`Facilitator` for facilitate mode, `Discusser` for discuss mode). This
10
- * file owns only the loop itself.
4
+ * Ask is **async**: the tool returns immediately, the actual reply arrives
5
+ * on a later turn as `[answer#N] participant: …` on the asker's bus queue.
6
+ * Pending state keys by `askId` (visible in the `[ask#N]` tag), so duplicate
7
+ * Asks to the same addressee coexist without overwriting each other, and
8
+ * the asker can map each reply unambiguously back to its question.
9
+ *
10
+ * Both lead and participants follow the same outer pattern: drain the bus
11
+ * queue, run / resume the LLM with the drained messages, then settle any
12
+ * unanswered Asks the participant owes. They differ only in how the first
13
+ * turn starts (the lead receives the task; participants wait for traffic).
14
+ *
15
+ * Termination signals:
16
+ * - `ctx.concluded` — explicit Conclude / Adjourn / Recess.
17
+ * - `stopped` — broader: also true on lead error, agent crash, or any
18
+ * other abort path. Loops watch `stopped`; `ctx.concluded` is only used
19
+ * for the summary's success/verdict.
11
20
  */
12
21
  import { SequenceCounter } from "./sequence-counter.js";
13
22
  import {
14
- createOrchestrationContext,
15
- checkPendingAsk,
23
+ cancelPendingAsks,
24
+ pendingAsksOwedBy,
25
+ remindOwedAsks,
16
26
  } from "./orchestration-toolkit.js";
17
- import { createAsyncQueue, formatMessages } from "./orchestrator-helpers.js";
27
+ import { formatMessages } from "./orchestrator-helpers.js";
18
28
 
19
- /**
20
- * Orchestrate N agent sessions coordinated by a single lead LLM session.
21
- * Mode-neutral. Callers parameterise the lead participant's name and the
22
- * `protocol_violation` mode tag so the same loop powers both facilitate
23
- * and discuss modes without either knowing about the other.
24
- */
29
+ /** Default per-session lead-turn budget (one resume per round of traffic). */
30
+ const DEFAULT_MAX_LEAD_TURNS = 40;
31
+
32
+ /** Orchestrate N agent sessions coordinated by a single lead LLM session. */
25
33
  export class OrchestrationLoop {
26
34
  /**
27
35
  * @param {object} deps
@@ -29,13 +37,12 @@ export class OrchestrationLoop {
29
37
  * @param {Array<{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}>} deps.agents
30
38
  * @param {import("./message-bus.js").MessageBus} deps.messageBus
31
39
  * @param {import("stream").Writable} deps.output
32
- * @param {string} [deps.leadName] - Canonical name of the lead participant on the messageBus (default "lead").
33
- * @param {"facilitated"|"discussion"|"supervised"} [deps.mode] - Mode tag emitted on `protocol_violation` events.
34
- * @param {number} [deps.maxTurns]
35
- * @param {object} [deps.ctx]
36
- * @param {object} [deps.eventQueue]
37
- * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
40
+ * @param {string} deps.leadName - Canonical name of the lead participant on the bus.
41
+ * @param {"facilitated"|"discussion"|"supervised"} deps.mode - Carries through to `protocol_violation` events.
42
+ * @param {object} deps.ctx - Orchestration context (from `createOrchestrationContext()`).
38
43
  * @param {object} deps.redactor
44
+ * @param {number} [deps.maxLeadTurns] - Cap on lead resumes per session (default 40).
45
+ * @param {string} [deps.taskAmend] - Appended to the task before delivery.
39
46
  */
40
47
  constructor({
41
48
  leadRunner,
@@ -44,35 +51,42 @@ export class OrchestrationLoop {
44
51
  output,
45
52
  leadName,
46
53
  mode,
47
- maxTurns,
54
+ maxLeadTurns,
48
55
  ctx,
49
- eventQueue,
50
56
  taskAmend,
51
57
  redactor,
52
58
  }) {
59
+ if (!leadRunner) throw new Error("leadRunner is required");
60
+ if (!agents) throw new Error("agents is required");
61
+ if (!messageBus) throw new Error("messageBus is required");
62
+ if (!output) throw new Error("output is required");
63
+ if (!leadName) throw new Error("leadName is required");
64
+ if (!mode) throw new Error("mode is required");
65
+ if (!ctx) throw new Error("ctx is required");
53
66
  if (!redactor) throw new Error("redactor is required");
54
- this.redactor = redactor;
55
67
  this.leadRunner = leadRunner;
56
- this.leadName = leadName ?? "lead";
57
- this.mode = mode ?? "facilitated";
58
68
  this.agents = agents;
59
69
  this.messageBus = messageBus;
60
70
  this.output = output;
61
- this.maxTurns = maxTurns ?? 20;
62
- this.ctx = ctx ?? createOrchestrationContext();
71
+ this.leadName = leadName;
72
+ this.mode = mode;
73
+ this.ctx = ctx;
74
+ this.redactor = redactor;
75
+ this.taskAmend = taskAmend ?? null;
76
+ this.maxLeadTurns = maxLeadTurns ?? DEFAULT_MAX_LEAD_TURNS;
63
77
  this.counter = new SequenceCounter();
64
- this.eventQueue = eventQueue ?? createAsyncQueue();
65
78
  this.leadTurns = 0;
66
- this.taskAmend = taskAmend ?? null;
67
-
68
- let resolve;
69
- const promise = new Promise((r) => {
70
- resolve = r;
79
+ this.stopped = false;
80
+ let resolveDone;
81
+ this.donePromise = new Promise((r) => {
82
+ resolveDone = r;
71
83
  });
72
- this.concludePromise = promise;
73
- this.concludeResolve = resolve;
84
+ this.#signalDone = resolveDone;
74
85
  }
75
86
 
87
+ /** Internal — resolved when `stopped` flips true so waiters unblock. */
88
+ #signalDone;
89
+
76
90
  /**
77
91
  * Run the full orchestrated session.
78
92
  * @param {string} task
@@ -80,243 +94,170 @@ export class OrchestrationLoop {
80
94
  */
81
95
  async run(task) {
82
96
  this.emitOrchestratorEvent({ type: "session_start" });
83
-
84
97
  const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
85
98
 
86
- // Launch agent loops first — they wait for messages via messageBus.
87
- // This lets agents process Ask/Announce messages that arrive during
88
- // the lead's initial run, rather than after it completes.
89
- const agentPromises = this.agents.map((a) => this.#runAgent(a));
90
-
91
- // Turn 0: lead receives the task
92
- this.leadTurns++;
93
- await this.leadRunner.run(initialTask);
94
-
95
- // Handle redirect after turn 0
96
- await this.#processRedirect();
97
-
98
- if (this.ctx.concluded) {
99
- // Lead concluded during its initial run. Let agents finish any
100
- // in-progress work before returning — they may have received Ask/Answer
101
- // messages and started processing concurrently.
102
- this.concludeResolve();
103
- await Promise.allSettled(agentPromises);
104
- const success = this.ctx.verdict === "success";
105
- this.emitSummary({
106
- success,
107
- verdict: this.ctx.verdict,
108
- turns: this.leadTurns,
109
- summary: this.ctx.summary,
110
- });
111
- return { success, turns: this.leadTurns };
112
- }
113
-
114
- // Abort agents promptly when the session concludes during the event loop
115
- this.concludePromise.then(() => {
116
- for (const agent of this.agents) {
117
- agent.runner.currentAbortController?.abort();
118
- }
119
- });
99
+ let firstError = null;
100
+ const abort = (err) => {
101
+ if (err && !firstError) firstError = err;
102
+ this.#stop();
103
+ };
120
104
 
121
- // Concurrent phase: lead event loop + already-running agent loops
122
- const leadPromise = this.#leadLoop();
105
+ // Start agent loops in parallel. Wrapped so a crash flips `stopped`
106
+ // but the wrapper itself resolves — Promise.allSettled below never
107
+ // sees an unhandled rejection.
108
+ const agentPromises = this.agents.map((a) =>
109
+ this.#runAgent(a).catch(abort),
110
+ );
123
111
 
124
112
  try {
125
- await Promise.all([...agentPromises, leadPromise]);
113
+ await this.#runLead(initialTask);
126
114
  } catch (err) {
127
- for (const agent of this.agents) {
128
- agent.runner.currentAbortController?.abort();
129
- }
130
- this.leadRunner.currentAbortController?.abort();
131
- throw err;
115
+ abort(err);
116
+ } finally {
117
+ this.#stop();
132
118
  }
133
119
 
120
+ await Promise.allSettled(agentPromises);
121
+ if (firstError) throw firstError;
122
+
134
123
  const success = this.ctx.concluded && this.ctx.verdict === "success";
135
- const result = {
136
- success,
137
- turns: this.leadTurns,
138
- };
139
124
  this.emitSummary({
140
125
  success,
141
126
  verdict: this.ctx.verdict,
142
- turns: result.turns,
127
+ turns: this.leadTurns,
143
128
  summary: this.ctx.summary,
144
129
  });
145
- return result;
146
- }
147
-
148
- #checkAsk(name) {
149
- return checkPendingAsk({
150
- ctx: this.ctx,
151
- messageBus: this.messageBus,
152
- addresseeName: name,
153
- mode: this.mode,
154
- emitViolation: (e) => this.emitOrchestratorEvent(e),
155
- });
130
+ return { success, turns: this.leadTurns };
156
131
  }
157
132
 
158
- async #enforcePendingAsk(agent) {
159
- if (this.#checkAsk(agent.name) !== "recheck") return;
160
- if (this.ctx.concluded) return;
161
- const reminders = this.messageBus.drain(agent.name);
162
- if (reminders.length === 0) return;
163
- await agent.runner.resume(formatMessages(reminders));
164
- if (this.ctx.concluded) return;
165
- this.#checkAsk(agent.name);
133
+ #stop() {
134
+ if (this.stopped) return;
135
+ this.stopped = true;
136
+ this.#signalDone();
137
+ for (const agent of this.agents) {
138
+ agent.runner.currentAbortController?.abort();
139
+ }
140
+ this.leadRunner.currentAbortController?.abort();
166
141
  }
167
142
 
168
143
  /**
169
- * Agent outer loop waits for messages, runs/resumes the agent.
170
- * @param {{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}} agent
144
+ * Lead loop. The lead's first turn carries the task; every subsequent
145
+ * turn is a resume triggered by something landing on its inbox.
146
+ *
147
+ * `messages.length === 0` from `#drainOrWait` means the session ended
148
+ * before any message arrived — that's the natural exit. If
149
+ * `drainOrWait` returned messages, deliver them even if the session
150
+ * concluded in the microtask window between wake-up and this check;
151
+ * the inbox already has them and they deserve to be seen.
171
152
  */
172
- async #runAgent(agent) {
173
- // Wait for first message (lazy start)
174
- await Promise.race([
175
- this.messageBus.waitForMessages(agent.name),
176
- this.concludePromise,
177
- ]);
178
- if (this.ctx.concluded) return;
179
-
180
- let messages = this.messageBus.drain(agent.name);
181
- if (messages.length === 0) return;
153
+ async #runLead(initialTask) {
154
+ this.leadTurns = 1;
155
+ this.emitOrchestratorEvent({ type: "agent_start", agent: this.leadName });
156
+ await this.leadRunner.run(initialTask);
157
+ if (this.#exiting()) return;
158
+ await this.#settleOwedAsks(this.leadName, this.leadRunner);
182
159
 
183
- this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
184
- await agent.runner.run(formatMessages(messages));
185
- if (await this.#settleAgentTurn(agent)) return;
160
+ while (!this.#exiting()) {
161
+ if (this.leadTurns >= this.maxLeadTurns) {
162
+ this.emitOrchestratorEvent({
163
+ type: "lead_turn_limit",
164
+ limit: this.maxLeadTurns,
165
+ });
166
+ return;
167
+ }
168
+ const messages = await this.#drainOrWait(this.leadName);
169
+ if (messages.length === 0) return;
186
170
 
187
- // Loop: check for new messages, resume if any
188
- while (!this.ctx.concluded) {
189
- messages = await this.#awaitAgentMessages(agent.name);
190
- if (messages.length === 0) break;
191
- await agent.runner.resume(formatMessages(messages));
192
- if (await this.#settleAgentTurn(agent)) break;
171
+ this.leadTurns++;
172
+ await this.leadRunner.resume(formatMessages(messages));
173
+ if (this.#exiting()) return;
174
+ await this.#settleOwedAsks(this.leadName, this.leadRunner);
193
175
  }
194
176
  }
195
177
 
196
178
  /**
197
- * Enforce pending-ask and emit turn_complete. Returns true when the
198
- * session has concluded and the caller should stop.
179
+ * Agent loop. The first message off the inbox triggers `run()`; every
180
+ * subsequent batch triggers `resume()`. No turn budget — the agent
181
+ * runner's own `maxTurns` caps each SDK call.
199
182
  */
200
- async #settleAgentTurn(agent) {
201
- if (this.ctx.concluded) return true;
202
- await this.#enforcePendingAsk(agent);
203
- if (this.ctx.concluded) return true;
204
- this.eventQueue.enqueue({
205
- type: "lifecycle",
206
- agent: agent.name,
207
- status: "turn_complete",
208
- });
209
- return false;
183
+ async #runAgent({ name, runner }) {
184
+ let started = false;
185
+ while (!this.#exiting()) {
186
+ const messages = await this.#drainOrWait(name);
187
+ if (messages.length === 0) return;
188
+
189
+ if (!started) {
190
+ started = true;
191
+ this.emitOrchestratorEvent({ type: "agent_start", agent: name });
192
+ await runner.run(formatMessages(messages));
193
+ } else {
194
+ await runner.resume(formatMessages(messages));
195
+ }
196
+ if (this.#exiting()) return;
197
+ await this.#settleOwedAsks(name, runner);
198
+ }
199
+ }
200
+
201
+ /** Either an explicit Conclude or any abort path. */
202
+ #exiting() {
203
+ return this.stopped || this.ctx.concluded;
210
204
  }
211
205
 
212
206
  /**
213
- * Wait for messages addressed to `name`, returning an empty array when
214
- * the session concludes first.
207
+ * Drain the queue, or wait for the first message to arrive. Returns an
208
+ * empty array when the session ended before any message landed.
215
209
  */
216
- async #awaitAgentMessages(name) {
217
- const messages = this.messageBus.drain(name);
210
+ async #drainOrWait(name) {
211
+ let messages = this.messageBus.drain(name);
218
212
  if (messages.length > 0) return messages;
219
213
  await Promise.race([
220
214
  this.messageBus.waitForMessages(name),
221
- this.concludePromise,
215
+ this.donePromise,
222
216
  ]);
223
- if (this.ctx.concluded) return [];
224
- return this.messageBus.drain(name);
217
+ if (this.stopped) return [];
218
+ messages = this.messageBus.drain(name);
219
+ return messages;
225
220
  }
226
221
 
227
222
  /**
228
- * Lead event loop only runs when input arrives.
223
+ * If `name` left a pending Ask unanswered, inject one synthetic reminder
224
+ * and resume once more. If still unanswered after the reminder, emit a
225
+ * `protocol_violation` event per outstanding ask and cancel them — the
226
+ * asker's queue gets a synthetic `[no answer: …]` so it doesn't deadlock
227
+ * on a participant that's silently ignoring its inbox.
229
228
  */
230
- async #leadLoop() {
231
- while (!this.ctx.concluded) {
232
- const event = await this.eventQueue.dequeue();
233
- if (this.ctx.concluded || event === null) break;
234
- await this.#handleEvent(event);
235
- }
236
- }
237
-
238
- async #handleEvent(event) {
239
- switch (event.type) {
240
- case "messages":
241
- case "lifecycle": {
242
- const msgs = this.messageBus.drain(this.leadName);
243
- if (msgs.length === 0) break;
244
- this.leadTurns++;
245
- await this.leadRunner.resume(formatMessages(msgs));
246
- await this.#processRedirect();
247
- if (!this.ctx.concluded) await this.#enforceLeadPendingAsk();
248
- break;
249
- }
250
- }
251
-
252
- if (this.ctx.concluded) {
253
- this.concludeResolve();
254
- this.eventQueue.close();
255
- }
256
- }
229
+ async #settleOwedAsks(name, runner) {
230
+ if (pendingAsksOwedBy(this.ctx, name).length === 0) return;
231
+ if (this.stopped) return;
257
232
 
258
- async #enforceLeadPendingAsk() {
259
- if (this.#checkAsk(this.leadName) !== "recheck") return;
260
- if (this.ctx.concluded) return;
261
- const reminders = this.messageBus.drain(this.leadName);
233
+ const reminded = remindOwedAsks(this.ctx, name);
234
+ if (!reminded) return;
235
+ const reminders = this.messageBus.drain(name);
262
236
  if (reminders.length === 0) return;
263
- this.leadTurns++;
264
- await this.leadRunner.resume(formatMessages(reminders));
265
- await this.#processRedirect();
266
- if (this.ctx.concluded) return;
267
- this.#checkAsk(this.leadName);
268
- }
269
-
270
- /**
271
- * Process a pending redirect after a lead turn.
272
- */
273
- async #processRedirect() {
274
- if (!this.ctx.redirect) return;
275
- const redirect = this.ctx.redirect;
276
- this.ctx.redirect = null;
277
237
 
278
- this.emitOrchestratorEvent({
279
- type: "redirect",
280
- to: redirect.to,
281
- });
238
+ await runner.resume(formatMessages(reminders));
239
+ if (this.stopped) return;
282
240
 
283
- if (redirect.to === "all") {
284
- // Abort all agents and deliver redirect via broadcast
285
- for (const agent of this.agents) {
286
- agent.runner.currentAbortController?.abort();
287
- }
288
- this.messageBus.announce(this.leadName, redirect.message);
289
- } else if (redirect.to) {
290
- // Abort specific agent and deliver via direct message
291
- const target = this.agents.find((a) => a.name === redirect.to);
292
- if (target) {
293
- target.runner.currentAbortController?.abort();
294
- }
295
- this.messageBus.direct(this.leadName, redirect.to, redirect.message);
296
- }
297
- }
241
+ const stillOwed = pendingAsksOwedBy(this.ctx, name);
242
+ if (stillOwed.length === 0) return;
298
243
 
299
- /** Return the last assistant text block from a runner's buffer, or the fallback if none exists. */
300
- extractLastText(runner, fallback) {
301
- const lines = runner.buffer;
302
- for (let i = lines.length - 1; i >= 0; i--) {
303
- const event = JSON.parse(lines[i]);
304
- if (event.type !== "assistant") continue;
305
- const content = event.message?.content ?? event.content;
306
- if (!Array.isArray(content)) continue;
307
- for (let j = content.length - 1; j >= 0; j--) {
308
- if (content[j].type === "text" && content[j].text) {
309
- return content[j].text;
310
- }
311
- }
244
+ for (const entry of stillOwed) {
245
+ this.emitOrchestratorEvent({
246
+ type: "protocol_violation",
247
+ agent: name,
248
+ askId: entry.askId,
249
+ mode: this.mode,
250
+ });
312
251
  }
313
- return fallback;
252
+ cancelPendingAsks(this.ctx, `${name} did not answer after reminder`, name);
314
253
  }
315
254
 
316
255
  /**
317
- * Emit a single NDJSON line tagged with source and seq.
318
- * @param {string} source - Participant name
319
- * @param {string} line - Raw NDJSON line
256
+ * Emit one NDJSON line tagged with its source (participant name) and a
257
+ * monotonic seq, wrapped in the universal `{source, seq, event}` envelope.
258
+ * Called from each runner's `onLine` callback.
259
+ * @param {string} source
260
+ * @param {string} line - Raw NDJSON line from the SDK iterator.
320
261
  */
321
262
  emitLine(source, line) {
322
263
  const event = JSON.parse(line);
@@ -332,7 +273,10 @@ export class OrchestrationLoop {
332
273
  }
333
274
 
334
275
  /**
335
- * @param {{type: string}} event
276
+ * Emit one orchestrator-source event (`session_start`, `agent_start`,
277
+ * `protocol_violation`, `lead_turn_limit`) wrapped in the universal
278
+ * envelope.
279
+ * @param {object} event
336
280
  */
337
281
  emitOrchestratorEvent(event) {
338
282
  this.output.write(
@@ -347,7 +291,10 @@ export class OrchestrationLoop {
347
291
  }
348
292
 
349
293
  /**
350
- * @param {{success: boolean, verdict?: string|null, turns: number, summary?: string}} result
294
+ * Emit the terminal summary line. `Discusser` emits its own discuss-
295
+ * augmented summary after this one; trace consumers keep the last
296
+ * summary they see.
297
+ * @param {{success: boolean, verdict?: string|null, turns: number, summary?: string|null}} result
351
298
  */
352
299
  emitSummary(result) {
353
300
  this.output.write(