@forwardimpact/libeval 0.1.44 → 0.1.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/discusser.js CHANGED
@@ -27,14 +27,15 @@ import { OrchestrationLoop } from "./orchestration-loop.js";
27
27
  /** System prompt appended for the lead (Chair) runner in discuss mode. */
28
28
  export const DISCUSS_SYSTEM_PROMPT =
29
29
  "You lead an asynchronous discussion across multiple participants and a human channel. " +
30
- "Ask delivers a question to one named participant — or broadcasts when no addressee is named and blocks until that participant answers. " +
30
+ "Ask sends a question and returns immediately with {askIds:[N,…]}. The reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox between turns you can plan, reflect, or send more Asks while participants work in parallel. End your turn with text after you've asked everything you intend to; the orchestrator wakes you when the next message lands. " +
31
+ "Answer replies to an ask a participant addressed to you (you'll see it tagged `[ask#N] <participant>: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
31
32
  "Announce delivers a message with no reply obligation. " +
32
- "Redirect interrupts an in-progress participant with replacement instructions. " +
33
33
  "RollCall returns the participant roster. " +
34
34
  "RequestForComment posts a message to the human thread via the bridge. Every reply you want the human to see MUST go through RequestForComment — the bridge delivers only queued replies, not your text output. " +
35
- "Recess suspends the run with a resumption trigger (responses / elapsed / either). " +
35
+ "Recess suspends the run with a resumption trigger (responses / elapsed / either); any open Asks get a synthetic '[no answer: session concluded]' on the asker's queue so nothing dangles. " +
36
36
  "Adjourn ends the discussion with a verdict ('adjourned' / 'failed') and a summary. " +
37
- "You MUST call RequestForComment with your response before calling Adjourn. You MUST end every run by calling Adjourn or Recess never end a turn with only text.";
37
+ "Multiple Ask / Announce calls in one assistant turn dispatch in parallel issue them as parallel tool_use blocks rather than sending the same question both broadcast and individually. " +
38
+ "You MUST call RequestForComment with your response before calling Adjourn. You MUST end every run by calling Adjourn or Recess — never end a turn with only text *after* every Ask round has resolved.";
38
39
 
39
40
  /**
40
41
  * Augment a base orchestration context with discuss-mode fields.
@@ -44,34 +45,13 @@ export const DISCUSS_SYSTEM_PROMPT =
44
45
  */
45
46
  export function augmentContextForDiscuss(ctx, discussionId) {
46
47
  ctx.discussionId = discussionId;
47
- ctx.recessed = false;
48
48
  ctx.recessTrigger = null;
49
- ctx.recessReason = null;
50
49
  ctx.replies = [];
51
50
  ctx.rfcCounter = 0;
52
51
  ctx.outcome = null;
53
52
  return ctx;
54
53
  }
55
54
 
56
- /**
57
- * Round-trip-safe representation of `ctx.pendingAsks` (a `Map`).
58
- * @param {Map<string, object>} map
59
- * @returns {object}
60
- */
61
- export function pendingAsksToPlain(map) {
62
- return Object.fromEntries(map);
63
- }
64
-
65
- /**
66
- * Restore a plain object back into a `Map<string, …>`.
67
- * @param {object|null|undefined} plain
68
- * @returns {Map<string, object>}
69
- */
70
- export function pendingAsksFromPlain(plain) {
71
- if (!plain) return new Map();
72
- return new Map(Object.entries(plain));
73
- }
74
-
75
55
  const devNull = new Writable({
76
56
  write(_chunk, _enc, cb) {
77
57
  cb();
@@ -89,9 +69,9 @@ export class Discusser {
89
69
  * @param {OrchestrationLoop} deps.loop
90
70
  * @param {object} deps.ctx
91
71
  * @param {import("stream").Writable} deps.output
72
+ * @param {object} deps.redactor
92
73
  * @param {string|null} [deps.discussionId]
93
74
  * @param {SequenceCounter} [deps.counter]
94
- * @param {object} [deps.redactor]
95
75
  */
96
76
  constructor({ loop, ctx, output, discussionId, counter, redactor }) {
97
77
  if (!loop) throw new Error("loop is required");
@@ -123,7 +103,7 @@ export class Discusser {
123
103
  await this.loop.run(task);
124
104
 
125
105
  const verdict = this.ctx.verdict ?? "failed";
126
- const success = verdict === "adjourned" || verdict === "concluded";
106
+ const success = verdict === "adjourned";
127
107
  this.#emitDiscussSummary({
128
108
  success,
129
109
  verdict,
@@ -163,7 +143,6 @@ export class Discusser {
163
143
  replies: this.ctx.replies,
164
144
  ...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
165
145
  ...(this.discussionId && { discussion_id: this.discussionId }),
166
- pending_asks: pendingAsksToPlain(this.ctx.pendingAsks),
167
146
  };
168
147
  this.output.write(
169
148
  JSON.stringify(
@@ -182,6 +161,12 @@ export class Discusser {
182
161
  * the `OrchestrationLoop` (with `leadName: "lead"` and discuss-mode
183
162
  * protocol tagging) and the wrapping `Discusser`.
184
163
  *
164
+ * Resume semantics: Recess ends the run, cancels any open Asks via
165
+ * `cancelPendingAsks`, and emits a synthetic null answer per cancelled
166
+ * ask so nothing dangles in the trace. The bridge later re-dispatches
167
+ * the workflow against a fresh context; the human reads the trail of
168
+ * events to decide what to re-ask.
169
+ *
185
170
  * @param {object} deps
186
171
  * @param {string} [deps.leadProfile]
187
172
  * @param {string} [deps.leadModel]
@@ -225,12 +210,11 @@ export function createDiscusser({
225
210
  discussionId ?? null,
226
211
  );
227
212
 
228
- // Hydrate resume context — pendingAsks, participants, history, replies.
229
- // resumeContext is the entire suspend/resume contract; every mutation a
230
- // Recess needs to preserve must travel through it.
213
+ // Hydrate resume context — participants, replies, counters. `pendingAsks`
214
+ // is intentionally not restored: Recess cancelled every in-flight Ask
215
+ // with a synthetic null answer, so there's nothing meaningful to carry
216
+ // forward.
231
217
  if (resumeContext) {
232
- if (resumeContext.pendingAsks)
233
- ctx.pendingAsks = pendingAsksFromPlain(resumeContext.pendingAsks);
234
218
  if (Array.isArray(resumeContext.participants))
235
219
  ctx.participants = resumeContext.participants;
236
220
  if (Array.isArray(resumeContext.replies))
@@ -297,7 +281,7 @@ export function createDiscusser({
297
281
  query,
298
282
  output: devNull,
299
283
  model: leadModel ?? "claude-opus-4-7[1m]",
300
- maxTurns: maxTurns ?? 40,
284
+ maxTurns: maxTurns ?? 80,
301
285
  allowedTools: ["Bash", "Read", "Glob", "Grep", "Write", "Edit"],
302
286
  disallowedTools: defaultDisallowed,
303
287
  onLine: (line) => discusser.loop.emitLine("lead", line),
@@ -314,7 +298,6 @@ export function createDiscusser({
314
298
  output,
315
299
  leadName: "lead",
316
300
  mode: "discussion",
317
- maxTurns: maxTurns ?? 40,
318
301
  ctx,
319
302
  taskAmend,
320
303
  redactor,
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Facilitator — facilitate-mode wrapper around `OrchestrationLoop`. The
3
- * lead participant is named "facilitator" and uses the `Conclude` tool to
4
- * end the session. The within-run turn loop itself lives in
3
+ * lead participant is named "facilitator" and ends the session via the
4
+ * `Conclude` tool. The within-run turn loop lives in
5
5
  * `orchestration-loop.js`; this file owns only the facilitate-mode
6
6
  * specifics (lead role name, system prompts, tool wiring, factory).
7
7
  */
@@ -16,34 +16,33 @@ import {
16
16
  createFacilitatorToolServer,
17
17
  createFacilitatedAgentToolServer,
18
18
  } from "./orchestration-toolkit.js";
19
- import { createAsyncQueue } from "./orchestrator-helpers.js";
20
19
  import { OrchestrationLoop } from "./orchestration-loop.js";
21
20
 
22
21
  /** System prompt appended for the facilitator runner. */
23
22
  export const FACILITATOR_SYSTEM_PROMPT =
24
23
  "You coordinate multiple participants via these tools: " +
25
- "Ask delivers a question to one named participant — or broadcasts when no addressee is named and blocks until that participant answers. " +
24
+ "Ask sends a question and returns immediately with {askIds:[N,…]}. The reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox between turns you can plan, reflect, or send more Asks while participants work in parallel. End your turn with text after you've asked everything you intend to; the orchestrator wakes you again as soon as a reply (or any message) lands. " +
25
+ "Answer replies to an ask a participant addressed to you (you'll see it tagged `[ask#N] <participant>: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
26
26
  "Announce delivers a message with no reply obligation. " +
27
- "Redirect interrupts an in-progress participant with replacement instructions. " +
28
27
  "RollCall returns the participant roster. " +
29
28
  "Conclude ends the session with a verdict ('success' or 'failure') and a summary. " +
30
- "Ask and Announce calls issued in the same turn dispatch in parallel. " +
31
- "You MUST call Conclude to end every session — never end a turn with only text. " +
29
+ "Multiple Ask / Announce calls in one assistant turn dispatch in parallel — issue them as parallel tool_use blocks rather than sending the same question both broadcast and individually. " +
30
+ "You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
32
31
  "If you can answer the task yourself, still call Conclude with verdict='success' and the answer as the summary.";
33
32
 
34
33
  /** System prompt appended for facilitated agent runners. */
35
34
  export const FACILITATED_AGENT_SYSTEM_PROMPT =
36
35
  "You participate in a coordinated session. " +
37
- "Answer replies to an ask addressed to you. " +
38
- "Ask sends a question to another participant. " +
39
- "Announce broadcasts a message. " +
36
+ "Each question you receive carries an [ask#N] header quote that N back as the askId field on Answer so the reply pairs with the right question. " +
37
+ "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
38
+ "Ask sends a question to another participant and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox. " +
39
+ "Announce broadcasts a message to every other participant — use this for unsolicited remarks or to reply to an Announce. " +
40
40
  "RollCall lists participants.";
41
41
 
42
42
  /**
43
- * Facilitate-mode wrapper around `OrchestrationLoop`. The lead participant
44
- * is `"facilitator"` and the protocol mode is `"facilitated"`. Preserves
45
- * the public surface (`facilitatorRunner`, `facilitatorTurns`) that
46
- * existing callers rely on.
43
+ * Facilitate-mode wrapper around `OrchestrationLoop`. The lead is named
44
+ * `"facilitator"`. `facilitatorRunner` getter is a readability shim for
45
+ * tests that read the runner directly.
47
46
  */
48
47
  export class Facilitator extends OrchestrationLoop {
49
48
  /**
@@ -52,11 +51,9 @@ export class Facilitator extends OrchestrationLoop {
52
51
  * @param {Array<{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}>} deps.agents
53
52
  * @param {import("./message-bus.js").MessageBus} deps.messageBus
54
53
  * @param {import("stream").Writable} deps.output
55
- * @param {number} [deps.maxTurns]
56
- * @param {object} [deps.ctx]
57
- * @param {object} [deps.eventQueue]
58
- * @param {string} [deps.taskAmend]
54
+ * @param {object} deps.ctx
59
55
  * @param {object} deps.redactor
56
+ * @param {string} [deps.taskAmend]
60
57
  */
61
58
  constructor(deps) {
62
59
  super({
@@ -67,20 +64,10 @@ export class Facilitator extends OrchestrationLoop {
67
64
  });
68
65
  }
69
66
 
70
- /** @returns {import("./agent-runner.js").AgentRunner} */
67
+ /** Readability shim — exposes the lead runner under its mode-specific name. */
71
68
  get facilitatorRunner() {
72
69
  return this.leadRunner;
73
70
  }
74
-
75
- /** @returns {number} */
76
- get facilitatorTurns() {
77
- return this.leadTurns;
78
- }
79
-
80
- /** @param {number} v */
81
- set facilitatorTurns(v) {
82
- this.leadTurns = v;
83
- }
84
71
  }
85
72
 
86
73
  const devNull = new Writable({
@@ -96,15 +83,15 @@ const devNull = new Writable({
96
83
  * @param {Array<{name: string, role: string, cwd?: string, maxTurns?: number, allowedTools?: string[], agentProfile?: string, systemPromptAmend?: string}>} deps.agentConfigs
97
84
  * @param {function} deps.query
98
85
  * @param {import("stream").Writable} deps.output
99
- * @param {string} [deps.model] - Default model for all participants.
100
- * @param {string} [deps.agentModel] - Agent model override (falls back to `model`).
101
- * @param {string} [deps.facilitatorModel] - Facilitator model override (falls back to `model`).
102
- * @param {number} [deps.maxTurns] - Facilitator's own per-invocation turn budget (default 20). Each participating agent's budget is taken from `config.maxTurns` on its entry in `agentConfigs` (default 50 when unset). The CLI command (`commands/facilitate.js`) threads `--max-turns` into both this parameter and every agent config so a single CLI value bounds all participants uniformly.
103
- * @param {string[]} [deps.facilitatorAllowedTools] - Tools the facilitator may use; defaults to a read/write file-edit set.
104
- * @param {string[]} [deps.facilitatorDisallowedTools] - Additional tools to block on the facilitator; merged with the sub-agent spawn defaults (Agent/Task/TaskOutput/TaskStop).
105
- * @param {string} [deps.facilitatorProfile] - Facilitator profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
106
- * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<facilitatorCwd>/.claude/agents`. Resolved once from the facilitator's cwd so profiles travel with the project, not with per-agent sandboxes.
107
- * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
86
+ * @param {string} [deps.model]
87
+ * @param {string} [deps.agentModel]
88
+ * @param {string} [deps.facilitatorModel]
89
+ * @param {number} [deps.maxTurns] - Per-SDK-call turn budget for the facilitator runner (default 80). Each agent's budget is taken from `config.maxTurns` (default 50). The lead is resumed once per inbox-drain round, so this caps the size of one such round, not the whole session — `OrchestrationLoop.maxLeadTurns` bounds session length.
90
+ * @param {string[]} [deps.facilitatorAllowedTools]
91
+ * @param {string[]} [deps.facilitatorDisallowedTools]
92
+ * @param {string} [deps.facilitatorProfile]
93
+ * @param {string} [deps.profilesDir]
94
+ * @param {string} [deps.taskAmend]
108
95
  * @returns {Facilitator}
109
96
  */
110
97
  export function createFacilitator({
@@ -147,8 +134,6 @@ export function createFacilitator({
147
134
 
148
135
  let facilitator;
149
136
 
150
- const eventQueue = createAsyncQueue();
151
-
152
137
  const facilitatorServer = createFacilitatorToolServer(ctx);
153
138
 
154
139
  const agents = agentConfigs.map((config) => {
@@ -190,7 +175,7 @@ export function createFacilitator({
190
175
  query,
191
176
  output: devNull,
192
177
  model: facilitatorModel ?? model,
193
- maxTurns: maxTurns ?? 20,
178
+ maxTurns: maxTurns ?? 80,
194
179
  allowedTools: facilitatorAllowedTools ?? [
195
180
  "Bash",
196
181
  "Read",
@@ -215,9 +200,7 @@ export function createFacilitator({
215
200
  agents,
216
201
  messageBus,
217
202
  output,
218
- maxTurns,
219
203
  ctx,
220
- eventQueue,
221
204
  taskAmend,
222
205
  redactor,
223
206
  });
package/src/index.js CHANGED
@@ -38,8 +38,6 @@ export {
38
38
  createDiscusser,
39
39
  DISCUSS_SYSTEM_PROMPT,
40
40
  augmentContextForDiscuss,
41
- pendingAsksToPlain,
42
- pendingAsksFromPlain,
43
41
  } from "./discusser.js";
44
42
  export {
45
43
  createDiscussLeadToolServer,
package/src/judge.js CHANGED
@@ -2,7 +2,7 @@
2
2
  * Judge — one agent session that inspects a completed agent's work and emits
3
3
  * a verdict via the orchestration `Conclude` tool. Parallel concept to
4
4
  * `Supervisor` and `Facilitator`, but post-hoc and solo: no peer agents,
5
- * no message bus, no relay loop. The judge reads the task, optionally
5
+ * no message bus, no orchestration loop. The judge reads the task, optionally
6
6
  * inspects the working directory and trace via read-only tools, and calls
7
7
  * Conclude exactly once.
8
8
  *
@@ -1,22 +1,26 @@
1
1
  /**
2
- * MessageBus — in-memory per-participant message queues for facilitated and
3
- * supervised modes. The message vocabulary mirrors the orchestration toolkit:
2
+ * MessageBus — in-memory per-participant message queues.
4
3
  *
5
- * - ask(from, to, text, askId) — direct question; registers nothing
6
- * itself (the handler's caller owns pending-ask state). `to === "@broadcast"`
7
- * sends an identical entry to every participant except the sender.
8
- * - answer(from, to, text, askId) direct reply to the asker.
9
- * - announce(from, text) broadcast, no reply expected.
10
- * - synthetic(to, text) orchestrator-only reminder injection.
4
+ * Four message kinds, each pushed onto the addressee's queue:
5
+ *
6
+ * - `ask(from, to, text, askId)` direct question; the toolkit owns the
7
+ * pending-ask state separately. Fan-out (broadcast Ask) happens at the
8
+ * handler level by calling `ask()` once per addressee.
9
+ * - `answer(from, to, text, askId)` direct reply to the original asker.
10
+ * The orchestrator may inject synthetic answers (`from === "@orchestrator"`)
11
+ * when an Ask times out.
12
+ * - `announce(from, text)` — broadcast, no reply expected; lands on every
13
+ * participant's queue except the sender's.
14
+ * - `synthetic(to, text)` — orchestrator-only reminder injection.
11
15
  *
12
16
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
13
17
  */
14
18
 
15
- /** In-memory per-participant message queues for facilitated and supervised orchestration modes. */
19
+ /** In-memory per-participant message queues. */
16
20
  export class MessageBus {
17
21
  /**
18
22
  * @param {object} deps
19
- * @param {string[]} deps.participants - Participant names
23
+ * @param {string[]} deps.participants - Canonical participant names.
20
24
  */
21
25
  constructor({ participants }) {
22
26
  this.queues = new Map();
@@ -27,55 +31,30 @@ export class MessageBus {
27
31
  }
28
32
  }
29
33
 
30
- /**
31
- * Send a question to a participant (direct), or broadcast when
32
- * `to === "@broadcast"`.
33
- * @param {string} from
34
- * @param {string} to - Recipient name or "@broadcast"
35
- * @param {string} text
36
- * @param {number} askId
37
- */
34
+ /** Send a question to one participant. */
38
35
  ask(from, to, text, askId) {
39
36
  this.#assertParticipant(from);
40
- if (to === "@broadcast") {
41
- for (const [name, queue] of this.queues) {
42
- if (name === from) continue;
43
- queue.push({ from, text, kind: "ask", askId, direct: false });
44
- this.#resolveWaiter(name);
45
- }
46
- return;
47
- }
48
37
  this.#assertParticipant(to);
49
- this.queues.get(to).push({ from, text, kind: "ask", askId, direct: true });
38
+ this.queues.get(to).push({ from, text, kind: "ask", askId });
50
39
  this.#resolveWaiter(to);
51
40
  }
52
41
 
53
42
  /**
54
- * Reply to a pending ask.
55
- * @param {string} from - Answerer (or "@orchestrator" for a synthetic answer)
56
- * @param {string} to - Original asker
57
- * @param {string} text
58
- * @param {number} askId
43
+ * Reply to a pending ask. `from === "@orchestrator"` is allowed for
44
+ * synthetic null answers the orchestrator is not a real participant
45
+ * but it routes through the bus.
59
46
  */
60
47
  answer(from, to, text, askId) {
61
48
  this.#assertParticipant(to);
62
- // Synthetic answers from the orchestrator bypass the participant check
63
- // on `from` — the orchestrator is not a message-bus participant.
64
49
  if (from !== "@orchestrator") this.#assertParticipant(from);
65
- this.queues
66
- .get(to)
67
- .push({ from, text, kind: "answer", askId, direct: true });
50
+ this.queues.get(to).push({ from, text, kind: "answer", askId });
68
51
  this.#resolveWaiter(to);
69
52
  }
70
53
 
71
- /**
72
- * Broadcast a message to every participant except the sender.
73
- * @param {string} from
74
- * @param {string} text
75
- */
54
+ /** Broadcast a message to every participant except the sender. */
76
55
  announce(from, text) {
77
56
  this.#assertParticipant(from);
78
- const msg = { from, text, kind: "announce", direct: false };
57
+ const msg = { from, text, kind: "announce" };
79
58
  for (const [name, queue] of this.queues) {
80
59
  if (name === from) continue;
81
60
  queue.push(msg);
@@ -83,53 +62,24 @@ export class MessageBus {
83
62
  }
84
63
  }
85
64
 
86
- /**
87
- * Send a direct message with no reply expected. Used by the Redirect
88
- * runtime plumbing (facilitator / supervisor) to deliver replacement
89
- * instructions to a single participant without engaging the ask/answer
90
- * contract.
91
- * @param {string} from
92
- * @param {string} to
93
- * @param {string} text
94
- */
95
- direct(from, to, text) {
96
- this.#assertParticipant(from);
97
- this.#assertParticipant(to);
98
- this.queues.get(to).push({ from, text, kind: "direct", direct: true });
99
- this.#resolveWaiter(to);
100
- }
101
-
102
- /**
103
- * Inject an orchestrator-originated reminder onto a single participant's
104
- * queue. Used by the turn-complete guard.
105
- * @param {string} to
106
- * @param {string} text
107
- */
65
+ /** Inject an orchestrator-originated reminder onto one participant's queue. */
108
66
  synthetic(to, text) {
109
67
  this.#assertParticipant(to);
110
68
  this.queues
111
69
  .get(to)
112
- .push({ from: "@orchestrator", text, kind: "synthetic", direct: true });
70
+ .push({ from: "@orchestrator", text, kind: "synthetic" });
113
71
  this.#resolveWaiter(to);
114
72
  }
115
73
 
116
- /**
117
- * Return and clear pending messages for a participant.
118
- * @param {string} participant - Participant name
119
- * @returns {{from: string, text: string, kind: string, direct: boolean, askId?: number}[]}
120
- */
74
+ /** Return and clear pending messages for a participant. */
121
75
  drain(participant) {
122
76
  this.#assertParticipant(participant);
123
- const queue = this.queues.get(participant);
124
- const messages = queue.splice(0);
125
- return messages;
77
+ return this.queues.get(participant).splice(0);
126
78
  }
127
79
 
128
80
  /**
129
81
  * Return a Promise that resolves when at least one message is pending.
130
82
  * Resolves immediately if messages are already queued.
131
- * @param {string} participant - Participant name
132
- * @returns {Promise<void>}
133
83
  */
134
84
  waitForMessages(participant) {
135
85
  this.#assertParticipant(participant);
@@ -156,11 +106,7 @@ export class MessageBus {
156
106
  }
157
107
  }
158
108
 
159
- /**
160
- * Factory function.
161
- * @param {object} deps - Same as MessageBus constructor
162
- * @returns {MessageBus}
163
- */
109
+ /** Factory function. */
164
110
  export function createMessageBus(deps) {
165
111
  return new MessageBus(deps);
166
112
  }