@forwardimpact/libeval 0.1.42 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,369 @@
1
+ /**
2
+ * OrchestrationLoop — N agent sessions + one lead LLM session. The
3
+ * Ask/Answer contract is enforced at turn boundaries via checkPendingAsk:
4
+ * one synthetic reminder, then a `protocol_violation` event plus a
5
+ * null-answer injection so the session advances instead of deadlocking.
6
+ *
7
+ * Mode-specific concepts (Conclude vs. Adjourn/Recess, lead role name,
8
+ * system prompts, tool sets) live in mode-specific wrappers
9
+ * (`Facilitator` for facilitate mode, `Discusser` for discuss mode). This
10
+ * file owns only the loop itself.
11
+ */
12
+ import { SequenceCounter } from "./sequence-counter.js";
13
+ import {
14
+ createOrchestrationContext,
15
+ checkPendingAsk,
16
+ } from "./orchestration-toolkit.js";
17
+ import { createAsyncQueue, formatMessages } from "./orchestrator-helpers.js";
18
+
19
+ /**
20
+ * Orchestrate N agent sessions coordinated by a single lead LLM session.
21
+ * Mode-neutral. Callers parameterise the lead participant's name and the
22
+ * `protocol_violation` mode tag so the same loop powers both facilitate
23
+ * and discuss modes without either knowing about the other.
24
+ */
25
+ export class OrchestrationLoop {
26
+ /**
27
+ * @param {object} deps
28
+ * @param {import("./agent-runner.js").AgentRunner} deps.leadRunner
29
+ * @param {Array<{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}>} deps.agents
30
+ * @param {import("./message-bus.js").MessageBus} deps.messageBus
31
+ * @param {import("stream").Writable} deps.output
32
+ * @param {string} [deps.leadName] - Canonical name of the lead participant on the messageBus (default "lead").
33
+ * @param {"facilitated"|"discussion"|"supervised"} [deps.mode] - Mode tag emitted on `protocol_violation` events.
34
+ * @param {number} [deps.maxTurns]
35
+ * @param {object} [deps.ctx]
36
+ * @param {object} [deps.eventQueue]
37
+ * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
38
+ * @param {object} deps.redactor
39
+ */
40
+ constructor({
41
+ leadRunner,
42
+ agents,
43
+ messageBus,
44
+ output,
45
+ leadName,
46
+ mode,
47
+ maxTurns,
48
+ ctx,
49
+ eventQueue,
50
+ taskAmend,
51
+ redactor,
52
+ }) {
53
+ if (!redactor) throw new Error("redactor is required");
54
+ this.redactor = redactor;
55
+ this.leadRunner = leadRunner;
56
+ this.leadName = leadName ?? "lead";
57
+ this.mode = mode ?? "facilitated";
58
+ this.agents = agents;
59
+ this.messageBus = messageBus;
60
+ this.output = output;
61
+ this.maxTurns = maxTurns ?? 20;
62
+ this.ctx = ctx ?? createOrchestrationContext();
63
+ this.counter = new SequenceCounter();
64
+ this.eventQueue = eventQueue ?? createAsyncQueue();
65
+ this.leadTurns = 0;
66
+ this.taskAmend = taskAmend ?? null;
67
+
68
+ let resolve;
69
+ const promise = new Promise((r) => {
70
+ resolve = r;
71
+ });
72
+ this.concludePromise = promise;
73
+ this.concludeResolve = resolve;
74
+ }
75
+
76
+ /**
77
+ * Run the full orchestrated session.
78
+ * @param {string} task
79
+ * @returns {Promise<{success: boolean, turns: number}>}
80
+ */
81
+ async run(task) {
82
+ this.emitOrchestratorEvent({ type: "session_start" });
83
+
84
+ const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
85
+
86
+ // Launch agent loops first — they wait for messages via messageBus.
87
+ // This lets agents process Ask/Announce messages that arrive during
88
+ // the lead's initial run, rather than after it completes.
89
+ const agentPromises = this.agents.map((a) => this.#runAgent(a));
90
+
91
+ // Turn 0: lead receives the task
92
+ this.leadTurns++;
93
+ await this.leadRunner.run(initialTask);
94
+
95
+ // Handle redirect after turn 0
96
+ await this.#processRedirect();
97
+
98
+ if (this.ctx.concluded) {
99
+ // Lead concluded during its initial run. Let agents finish any
100
+ // in-progress work before returning — they may have received Ask/Answer
101
+ // messages and started processing concurrently.
102
+ this.concludeResolve();
103
+ await Promise.allSettled(agentPromises);
104
+ const success = this.ctx.verdict === "success";
105
+ this.emitSummary({
106
+ success,
107
+ verdict: this.ctx.verdict,
108
+ turns: this.leadTurns,
109
+ summary: this.ctx.summary,
110
+ });
111
+ return { success, turns: this.leadTurns };
112
+ }
113
+
114
+ // Abort agents promptly when the session concludes during the event loop
115
+ this.concludePromise.then(() => {
116
+ for (const agent of this.agents) {
117
+ agent.runner.currentAbortController?.abort();
118
+ }
119
+ });
120
+
121
+ // Concurrent phase: lead event loop + already-running agent loops
122
+ const leadPromise = this.#leadLoop();
123
+
124
+ try {
125
+ await Promise.all([...agentPromises, leadPromise]);
126
+ } catch (err) {
127
+ for (const agent of this.agents) {
128
+ agent.runner.currentAbortController?.abort();
129
+ }
130
+ this.leadRunner.currentAbortController?.abort();
131
+ throw err;
132
+ }
133
+
134
+ const success = this.ctx.concluded && this.ctx.verdict === "success";
135
+ const result = {
136
+ success,
137
+ turns: this.leadTurns,
138
+ };
139
+ this.emitSummary({
140
+ success,
141
+ verdict: this.ctx.verdict,
142
+ turns: result.turns,
143
+ summary: this.ctx.summary,
144
+ });
145
+ return result;
146
+ }
147
+
148
+ #checkAsk(name) {
149
+ return checkPendingAsk({
150
+ ctx: this.ctx,
151
+ messageBus: this.messageBus,
152
+ addresseeName: name,
153
+ mode: this.mode,
154
+ emitViolation: (e) => this.emitOrchestratorEvent(e),
155
+ });
156
+ }
157
+
158
+ async #enforcePendingAsk(agent) {
159
+ if (this.#checkAsk(agent.name) !== "recheck") return;
160
+ if (this.ctx.concluded) return;
161
+ const reminders = this.messageBus.drain(agent.name);
162
+ if (reminders.length === 0) return;
163
+ await agent.runner.resume(formatMessages(reminders));
164
+ if (this.ctx.concluded) return;
165
+ this.#checkAsk(agent.name);
166
+ }
167
+
168
+ /**
169
+ * Agent outer loop — waits for messages, runs/resumes the agent.
170
+ * @param {{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}} agent
171
+ */
172
+ async #runAgent(agent) {
173
+ // Wait for first message (lazy start)
174
+ await Promise.race([
175
+ this.messageBus.waitForMessages(agent.name),
176
+ this.concludePromise,
177
+ ]);
178
+ if (this.ctx.concluded) return;
179
+
180
+ let messages = this.messageBus.drain(agent.name);
181
+ if (messages.length === 0) return;
182
+
183
+ this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
184
+ await agent.runner.run(formatMessages(messages));
185
+ if (await this.#settleAgentTurn(agent)) return;
186
+
187
+ // Loop: check for new messages, resume if any
188
+ while (!this.ctx.concluded) {
189
+ messages = await this.#awaitAgentMessages(agent.name);
190
+ if (messages.length === 0) break;
191
+ await agent.runner.resume(formatMessages(messages));
192
+ if (await this.#settleAgentTurn(agent)) break;
193
+ }
194
+ }
195
+
196
+ /**
197
+ * Enforce pending-ask and emit turn_complete. Returns true when the
198
+ * session has concluded and the caller should stop.
199
+ */
200
+ async #settleAgentTurn(agent) {
201
+ if (this.ctx.concluded) return true;
202
+ await this.#enforcePendingAsk(agent);
203
+ if (this.ctx.concluded) return true;
204
+ this.eventQueue.enqueue({
205
+ type: "lifecycle",
206
+ agent: agent.name,
207
+ status: "turn_complete",
208
+ });
209
+ return false;
210
+ }
211
+
212
+ /**
213
+ * Wait for messages addressed to `name`, returning an empty array when
214
+ * the session concludes first.
215
+ */
216
+ async #awaitAgentMessages(name) {
217
+ const messages = this.messageBus.drain(name);
218
+ if (messages.length > 0) return messages;
219
+ await Promise.race([
220
+ this.messageBus.waitForMessages(name),
221
+ this.concludePromise,
222
+ ]);
223
+ if (this.ctx.concluded) return [];
224
+ return this.messageBus.drain(name);
225
+ }
226
+
227
+ /**
228
+ * Lead event loop — only runs when input arrives.
229
+ */
230
+ async #leadLoop() {
231
+ while (!this.ctx.concluded) {
232
+ const event = await this.eventQueue.dequeue();
233
+ if (this.ctx.concluded || event === null) break;
234
+ await this.#handleEvent(event);
235
+ }
236
+ }
237
+
238
+ async #handleEvent(event) {
239
+ switch (event.type) {
240
+ case "messages":
241
+ case "lifecycle": {
242
+ const msgs = this.messageBus.drain(this.leadName);
243
+ if (msgs.length === 0) break;
244
+ this.leadTurns++;
245
+ await this.leadRunner.resume(formatMessages(msgs));
246
+ await this.#processRedirect();
247
+ if (!this.ctx.concluded) await this.#enforceLeadPendingAsk();
248
+ break;
249
+ }
250
+ }
251
+
252
+ if (this.ctx.concluded) {
253
+ this.concludeResolve();
254
+ this.eventQueue.close();
255
+ }
256
+ }
257
+
258
+ async #enforceLeadPendingAsk() {
259
+ if (this.#checkAsk(this.leadName) !== "recheck") return;
260
+ if (this.ctx.concluded) return;
261
+ const reminders = this.messageBus.drain(this.leadName);
262
+ if (reminders.length === 0) return;
263
+ this.leadTurns++;
264
+ await this.leadRunner.resume(formatMessages(reminders));
265
+ await this.#processRedirect();
266
+ if (this.ctx.concluded) return;
267
+ this.#checkAsk(this.leadName);
268
+ }
269
+
270
+ /**
271
+ * Process a pending redirect after a lead turn.
272
+ */
273
+ async #processRedirect() {
274
+ if (!this.ctx.redirect) return;
275
+ const redirect = this.ctx.redirect;
276
+ this.ctx.redirect = null;
277
+
278
+ this.emitOrchestratorEvent({
279
+ type: "redirect",
280
+ to: redirect.to,
281
+ });
282
+
283
+ if (redirect.to === "all") {
284
+ // Abort all agents and deliver redirect via broadcast
285
+ for (const agent of this.agents) {
286
+ agent.runner.currentAbortController?.abort();
287
+ }
288
+ this.messageBus.announce(this.leadName, redirect.message);
289
+ } else if (redirect.to) {
290
+ // Abort specific agent and deliver via direct message
291
+ const target = this.agents.find((a) => a.name === redirect.to);
292
+ if (target) {
293
+ target.runner.currentAbortController?.abort();
294
+ }
295
+ this.messageBus.direct(this.leadName, redirect.to, redirect.message);
296
+ }
297
+ }
298
+
299
+ /** Return the last assistant text block from a runner's buffer, or the fallback if none exists. */
300
+ extractLastText(runner, fallback) {
301
+ const lines = runner.buffer;
302
+ for (let i = lines.length - 1; i >= 0; i--) {
303
+ const event = JSON.parse(lines[i]);
304
+ if (event.type !== "assistant") continue;
305
+ const content = event.message?.content ?? event.content;
306
+ if (!Array.isArray(content)) continue;
307
+ for (let j = content.length - 1; j >= 0; j--) {
308
+ if (content[j].type === "text" && content[j].text) {
309
+ return content[j].text;
310
+ }
311
+ }
312
+ }
313
+ return fallback;
314
+ }
315
+
316
+ /**
317
+ * Emit a single NDJSON line tagged with source and seq.
318
+ * @param {string} source - Participant name
319
+ * @param {string} line - Raw NDJSON line
320
+ */
321
+ emitLine(source, line) {
322
+ const event = JSON.parse(line);
323
+ this.output.write(
324
+ JSON.stringify(
325
+ this.redactor.redactValue({
326
+ source,
327
+ seq: this.counter.next(),
328
+ event,
329
+ }),
330
+ ) + "\n",
331
+ );
332
+ }
333
+
334
+ /**
335
+ * @param {{type: string}} event
336
+ */
337
+ emitOrchestratorEvent(event) {
338
+ this.output.write(
339
+ JSON.stringify(
340
+ this.redactor.redactValue({
341
+ source: "orchestrator",
342
+ seq: this.counter.next(),
343
+ event,
344
+ }),
345
+ ) + "\n",
346
+ );
347
+ }
348
+
349
+ /**
350
+ * @param {{success: boolean, verdict?: string|null, turns: number, summary?: string}} result
351
+ */
352
+ emitSummary(result) {
353
+ this.output.write(
354
+ JSON.stringify(
355
+ this.redactor.redactValue({
356
+ source: "orchestrator",
357
+ seq: this.counter.next(),
358
+ event: {
359
+ type: "summary",
360
+ success: result.success,
361
+ ...(result.verdict && { verdict: result.verdict }),
362
+ turns: result.turns,
363
+ ...(result.summary && { summary: result.summary }),
364
+ },
365
+ }),
366
+ ) + "\n",
367
+ );
368
+ }
369
+ }
package/src/redaction.js CHANGED
@@ -10,8 +10,18 @@
10
10
 
11
11
  export const DEFAULT_ENV_ALLOWLIST = Object.freeze([
12
12
  "ANTHROPIC_API_KEY",
13
+ "AWS_ACCESS_KEY_ID",
14
+ "AWS_SECRET_ACCESS_KEY",
15
+ "DATABASE_PASSWORD",
13
16
  "GH_TOKEN",
14
17
  "GITHUB_TOKEN",
18
+ "MCP_TOKEN",
19
+ "MICROSOFT_APP_PASSWORD",
20
+ "PRODUCT_LANDMARK_TOKEN",
21
+ "SERVICE_SECRET",
22
+ "SUPABASE_ANON_KEY",
23
+ "SUPABASE_JWT_SECRET",
24
+ "SUPABASE_SERVICE_ROLE_KEY",
15
25
  ]);
16
26
 
17
27
  // Anchored prefixes per
@@ -13,6 +13,7 @@ const SUPPRESSED = new Set([
13
13
  "ask_answered",
14
14
  "redirect",
15
15
  "summary",
16
+ "meta",
16
17
  ]);
17
18
 
18
19
  /**
package/src/supervisor.js CHANGED
@@ -50,10 +50,17 @@ export const AGENT_SYSTEM_PROMPT =
50
50
  * Maximum number of mid-turn interventions allowed within a single agent turn.
51
51
  * Bounded so a looping supervisor exhausts its quota fast (observability) but
52
52
  * leaves headroom for legitimate "intervene, observe, intervene again" patterns.
53
- * The outer maxTurns budget still bounds overall runtime.
53
+ * The outer exchange budget still bounds overall runtime.
54
54
  */
55
55
  const MAX_INTERVENTIONS_PER_TURN = 5;
56
56
 
57
+ /**
58
+ * Default cap on supervisor↔agent exchanges in a single run. Not exposed via
59
+ * CLI — `--max-turns` governs the per-runner invocation budget instead. When
60
+ * a `--max-exchanges` flag is added this becomes the default for that flag.
61
+ */
62
+ const DEFAULT_MAX_EXCHANGES = 100;
63
+
57
64
  /** Orchestrate a relay loop between a supervisor LLM and an agent LLM with mid-turn review. */
58
65
  export class Supervisor {
59
66
  /**
@@ -485,7 +492,7 @@ const devNull = new Writable({
485
492
  * @param {string} [deps.model] - Default model for both runners.
486
493
  * @param {string} [deps.agentModel] - Agent model override (falls back to `model`).
487
494
  * @param {string} [deps.supervisorModel] - Supervisor model override (falls back to `model`).
488
- * @param {number} [deps.maxTurns]
495
+ * @param {number} [deps.maxTurns] - Per-runner invocation budget for both the supervisor and the agent (default 200; 0 = unlimited). Outer supervisor↔agent exchanges are bounded separately by `DEFAULT_MAX_EXCHANGES` (passes through to unlimited when `maxTurns === 0`).
489
496
  * @param {string[]} [deps.allowedTools]
490
497
  * @param {string[]} [deps.supervisorAllowedTools]
491
498
  * @param {string[]} [deps.supervisorDisallowedTools]
@@ -544,8 +551,13 @@ export function createSupervisor({
544
551
 
545
552
  const onLine = (line) => supervisor.emitLine(line);
546
553
 
547
- const perInvocationTurns =
548
- maxTurns === 0 ? 0 : Math.max(maxTurns ?? 100, 200);
554
+ // `maxTurns` is the per-runner invocation budget — matches `run` and
555
+ // `facilitate` semantics. The outer supervisor↔agent exchange loop is
556
+ // bounded separately by `DEFAULT_MAX_EXCHANGES`; when --max-exchanges is
557
+ // added it will become a parameter. `maxTurns === 0` propagates through
558
+ // to mean unlimited on both axes.
559
+ const perInvocationTurns = maxTurns ?? 200;
560
+ const exchangeBudget = maxTurns === 0 ? 0 : DEFAULT_MAX_EXCHANGES;
549
561
 
550
562
  const agentRunner = createAgentRunner({
551
563
  cwd: agentCwd,
@@ -595,7 +607,7 @@ export function createSupervisor({
595
607
  agentRunner,
596
608
  supervisorRunner,
597
609
  output,
598
- maxTurns,
610
+ maxTurns: exchangeBudget,
599
611
  ctx,
600
612
  messageBus,
601
613
  taskAmend,
@@ -40,6 +40,7 @@ export class TraceCollector {
40
40
  * Malformed lines are silently skipped.
41
41
  * @param {string} line - A single JSON line from stream-json output
42
42
  */
43
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: NDJSON envelope unwrap + orchestrator/system/assistant/user dispatch
43
44
  addLine(line) {
44
45
  const trimmed = line.trim();
45
46
  if (!trimmed) return;
@@ -74,6 +75,9 @@ export class TraceCollector {
74
75
  ...(typeof event.turns === "number" && { turns: event.turns }),
75
76
  };
76
77
  }
78
+ if (event.type === "meta" && typeof event.discussion_id === "string") {
79
+ this.discussionId = event.discussion_id;
80
+ }
77
81
  return;
78
82
  }
79
83