@forwardimpact/libeval 0.1.43 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,203 @@
1
+ /**
2
+ * DiscussTools — tool servers and prompts for the `discuss` orchestration
3
+ * mode. The lead's set is sibling to (not derived from) the facilitator's:
4
+ * `Conclude` is absent; instead `Adjourn` (terminal verdict) and `Recess`
5
+ * (suspend with a ResumeTrigger) end a run, and `RequestForComment` queues
6
+ * structured replies onto the trace for the bridge to deliver after the
7
+ * workflow run completes.
8
+ *
9
+ * Discuss-mode prompts and tool wiring stay in this module; nothing here
10
+ * imports from `facilitator.js`.
11
+ */
12
+
13
+ import { createSdkMcpServer, tool } from "@anthropic-ai/claude-agent-sdk";
14
+ import { z } from "zod";
15
+
16
+ import {
17
+ createAskHandler,
18
+ createAnswerHandler,
19
+ createAnnounceHandler,
20
+ createRollCallHandler,
21
+ createRedirectHandler,
22
+ } from "./orchestration-toolkit.js";
23
+
24
+ /** System prompt appended for discuss-mode agent runners. */
25
+ export const DISCUSS_AGENT_SYSTEM_PROMPT =
26
+ "You participate in an asynchronous discussion. " +
27
+ "Answer replies to an ask addressed to you. " +
28
+ "Ask sends a question to the lead or another participant. " +
29
+ "Announce broadcasts a message. " +
30
+ "RollCall lists participants.";
31
+
32
+ const RESUME_TRIGGER_SCHEMA = z
33
+ .object({
34
+ kind: z.enum(["responses", "elapsed", "either"]),
35
+ responses: z.number().optional(),
36
+ elapsed: z.string().optional(),
37
+ })
38
+ .strict();
39
+
40
+ /**
41
+ * Lead tools for the discusser. The discuss-mode surface is Ask / Answer /
42
+ * Announce / Redirect / RollCall plus the discuss-only RequestForComment,
43
+ * Recess, and Adjourn. `Conclude` is intentionally absent — discuss mode
44
+ * ends via Adjourn or Recess, never Conclude. `RequestForComment` writes
45
+ * a structured reply onto `ctx.replies[]`; the discusser flushes those
46
+ * into the terminal summary event at end-of-run.
47
+ *
48
+ * @param {object} ctx - Orchestration context (must carry `replies` array)
49
+ * @returns {object} MCP server config (type: "sdk")
50
+ */
51
+ export function createDiscussLeadToolServer(ctx) {
52
+ return createSdkMcpServer({
53
+ name: "orchestration",
54
+ tools: [
55
+ tool(
56
+ "RollCall",
57
+ "List all participants in the session.",
58
+ {},
59
+ createRollCallHandler(ctx),
60
+ ),
61
+ tool(
62
+ "Ask",
63
+ "Send a question to a participant. Omit 'to' to broadcast. The reply arrives via Answer.",
64
+ { question: z.string(), to: z.string().optional() },
65
+ createAskHandler(ctx, { from: "lead", defaultTo: undefined }),
66
+ ),
67
+ tool(
68
+ "Answer",
69
+ "Reply to an ask addressed to you.",
70
+ { message: z.string() },
71
+ createAnswerHandler(ctx, { from: "lead" }),
72
+ ),
73
+ tool(
74
+ "Announce",
75
+ "Broadcast a message with no reply expected.",
76
+ { message: z.string() },
77
+ createAnnounceHandler(ctx, { from: "lead" }),
78
+ ),
79
+ tool(
80
+ "Redirect",
81
+ "Interrupt a participant with replacement instructions.",
82
+ { message: z.string(), to: z.string().optional() },
83
+ createRedirectHandler(ctx),
84
+ ),
85
+ tool(
86
+ "RequestForComment",
87
+ "Post a fire-and-forget message to a channel via the bridge. Returns a correlation id; the reply arrives on a later workflow run.",
88
+ {
89
+ channel: z.string(),
90
+ body: z.string(),
91
+ addressees: z.array(z.string()).optional(),
92
+ },
93
+ createRequestForCommentHandler(ctx),
94
+ ),
95
+ tool(
96
+ "Recess",
97
+ "Suspend the run. The bridge re-dispatches the workflow when the trigger fires.",
98
+ { reason: z.string(), trigger: RESUME_TRIGGER_SCHEMA },
99
+ createRecessHandler(ctx),
100
+ ),
101
+ tool(
102
+ "Adjourn",
103
+ "End the discussion with a verdict and a summary.",
104
+ {
105
+ verdict: z.enum(["adjourned", "failed"]),
106
+ summary: z.string(),
107
+ outcome: z.string().optional(),
108
+ },
109
+ createAdjournHandler(ctx),
110
+ ),
111
+ ],
112
+ });
113
+ }
114
+
115
+ /**
116
+ * Discuss-mode agent tools: Ask / Answer / Announce / RollCall. Surface is
117
+ * defined here (not borrowed from facilitate mode) so the two modes stay
118
+ * structurally independent.
119
+ *
120
+ * @param {object} ctx - Orchestration context
121
+ * @param {{from: string}} opts - Agent name (canonical)
122
+ * @returns {object} MCP server config (type: "sdk")
123
+ */
124
+ export function createDiscussAgentToolServer(ctx, { from }) {
125
+ return createSdkMcpServer({
126
+ name: "orchestration",
127
+ tools: [
128
+ tool(
129
+ "Ask",
130
+ "Send a question to another participant. Omit 'to' to ask the lead.",
131
+ { question: z.string(), to: z.string().optional() },
132
+ createAskHandler(ctx, { from, defaultTo: "lead" }),
133
+ ),
134
+ tool(
135
+ "Answer",
136
+ "Reply to an ask addressed to you.",
137
+ { message: z.string() },
138
+ createAnswerHandler(ctx, { from }),
139
+ ),
140
+ tool(
141
+ "Announce",
142
+ "Broadcast a message with no reply expected.",
143
+ { message: z.string() },
144
+ createAnnounceHandler(ctx, { from }),
145
+ ),
146
+ tool(
147
+ "RollCall",
148
+ "List all participants in the session.",
149
+ {},
150
+ createRollCallHandler(ctx),
151
+ ),
152
+ ],
153
+ });
154
+ }
155
+
156
+ /** Create a RequestForComment handler. Queues a reply into ctx.replies[]. */
157
+ export function createRequestForCommentHandler(ctx) {
158
+ return async ({ channel, body, addressees }) => {
159
+ const correlationId = `rfc_${++ctx.rfcCounter}`;
160
+ const addresseeList =
161
+ Array.isArray(addressees) && addressees.length > 0 ? addressees : [null];
162
+ for (const addressee of addresseeList) {
163
+ ctx.replies.push({
164
+ ...(addressee && { addressee }),
165
+ body,
166
+ ...(ctx.discussionId && { thread_id: ctx.discussionId }),
167
+ correlation_id: correlationId,
168
+ });
169
+ }
170
+ return {
171
+ content: [
172
+ {
173
+ type: "text",
174
+ text: JSON.stringify({ correlation_id: correlationId, channel }),
175
+ },
176
+ ],
177
+ };
178
+ };
179
+ }
180
+
181
+ /** Create a Recess handler. Marks the session as recessed with a trigger. */
182
+ export function createRecessHandler(ctx) {
183
+ return async ({ reason, trigger }) => {
184
+ ctx.recessed = true;
185
+ ctx.recessTrigger = trigger;
186
+ ctx.recessReason = reason;
187
+ ctx.concluded = true;
188
+ ctx.verdict = "recessed";
189
+ ctx.summary = reason;
190
+ return { content: [{ type: "text", text: "Recess queued." }] };
191
+ };
192
+ }
193
+
194
+ /** Create an Adjourn handler. Marks the session as concluded with a verdict. */
195
+ export function createAdjournHandler(ctx) {
196
+ return async ({ verdict, summary, outcome }) => {
197
+ ctx.concluded = true;
198
+ ctx.verdict = verdict;
199
+ ctx.summary = summary;
200
+ if (outcome !== undefined) ctx.outcome = outcome;
201
+ return { content: [{ type: "text", text: "Session adjourned." }] };
202
+ };
203
+ }
@@ -0,0 +1,332 @@
1
+ /**
2
+ * Discusser — async, suspendable orchestration on top of a within-run
3
+ * `OrchestrationLoop`. The lead role uses `DiscussTools` (Adjourn / Recess
4
+ * / RequestForComment) instead of the facilitator's Conclude.
5
+ *
6
+ * Discuss mode is a sibling of facilitate mode, not a subset of it. The
7
+ * within-run turn loop is shared via `OrchestrationLoop`, but the lead
8
+ * role, tool set, system prompts, and participant naming all stay
9
+ * mode-local.
10
+ */
11
+
12
+ import { Writable } from "node:stream";
13
+ import { resolve } from "node:path";
14
+
15
+ import { createAgentRunner } from "./agent-runner.js";
16
+ import { composeProfilePrompt } from "./profile-prompt.js";
17
+ import { SequenceCounter } from "./sequence-counter.js";
18
+ import { createMessageBus } from "./message-bus.js";
19
+ import { createOrchestrationContext } from "./orchestration-toolkit.js";
20
+ import {
21
+ createDiscussLeadToolServer,
22
+ createDiscussAgentToolServer,
23
+ DISCUSS_AGENT_SYSTEM_PROMPT,
24
+ } from "./discuss-tools.js";
25
+ import { OrchestrationLoop } from "./orchestration-loop.js";
26
+
27
+ /** System prompt appended for the lead (Chair) runner in discuss mode. */
28
+ export const DISCUSS_SYSTEM_PROMPT =
29
+ "You lead an asynchronous discussion across multiple participants and a human channel. " +
30
+ "Ask delivers a question to one named participant — or broadcasts when no addressee is named — and blocks until that participant answers. " +
31
+ "Announce delivers a message with no reply obligation. " +
32
+ "Redirect interrupts an in-progress participant with replacement instructions. " +
33
+ "RollCall returns the participant roster. " +
34
+ "RequestForComment posts a message to the human thread via the bridge. Every reply you want the human to see MUST go through RequestForComment — the bridge delivers only queued replies, not your text output. " +
35
+ "Recess suspends the run with a resumption trigger (responses / elapsed / either). " +
36
+ "Adjourn ends the discussion with a verdict ('adjourned' / 'failed') and a summary. " +
37
+ "You MUST call RequestForComment with your response before calling Adjourn. You MUST end every run by calling Adjourn or Recess — never end a turn with only text.";
38
+
39
+ /**
40
+ * Augment a base orchestration context with discuss-mode fields.
41
+ * @param {object} ctx
42
+ * @param {string|null} discussionId
43
+ * @returns {object}
44
+ */
45
+ export function augmentContextForDiscuss(ctx, discussionId) {
46
+ ctx.discussionId = discussionId;
47
+ ctx.recessed = false;
48
+ ctx.recessTrigger = null;
49
+ ctx.recessReason = null;
50
+ ctx.replies = [];
51
+ ctx.rfcCounter = 0;
52
+ ctx.outcome = null;
53
+ return ctx;
54
+ }
55
+
56
+ /**
57
+ * Round-trip-safe representation of `ctx.pendingAsks` (a `Map`).
58
+ * @param {Map<string, object>} map
59
+ * @returns {object}
60
+ */
61
+ export function pendingAsksToPlain(map) {
62
+ return Object.fromEntries(map);
63
+ }
64
+
65
+ /**
66
+ * Restore a plain object back into a `Map<string, …>`.
67
+ * @param {object|null|undefined} plain
68
+ * @returns {Map<string, object>}
69
+ */
70
+ export function pendingAsksFromPlain(plain) {
71
+ if (!plain) return new Map();
72
+ return new Map(Object.entries(plain));
73
+ }
74
+
75
+ const devNull = new Writable({
76
+ write(_chunk, _enc, cb) {
77
+ cb();
78
+ },
79
+ });
80
+
81
+ /**
82
+ * Async orchestrator for the `discuss` mode. Composes an
83
+ * `OrchestrationLoop` for the within-run turns but owns the discussion id,
84
+ * the resumption trigger, and the discuss-augmented terminal summary.
85
+ */
86
+ export class Discusser {
87
+ /**
88
+ * @param {object} deps
89
+ * @param {OrchestrationLoop} deps.loop
90
+ * @param {object} deps.ctx
91
+ * @param {import("stream").Writable} deps.output
92
+ * @param {string|null} [deps.discussionId]
93
+ * @param {SequenceCounter} [deps.counter]
94
+ * @param {object} [deps.redactor]
95
+ */
96
+ constructor({ loop, ctx, output, discussionId, counter, redactor }) {
97
+ if (!loop) throw new Error("loop is required");
98
+ if (!ctx) throw new Error("ctx is required");
99
+ if (!output) throw new Error("output is required");
100
+ if (!redactor) throw new Error("redactor is required");
101
+ this.loop = loop;
102
+ this.ctx = ctx;
103
+ this.output = output;
104
+ this.discussionId = discussionId ?? null;
105
+ this.counter = counter ?? new SequenceCounter();
106
+ this.redactor = redactor;
107
+ }
108
+
109
+ /**
110
+ * Run the discussion. Emits the meta header first (when a discussion_id
111
+ * is set), delegates the within-run loop to `OrchestrationLoop`, then
112
+ * emits the discuss-augmented summary (overrides the loop's earlier
113
+ * summary; trace consumers keep the last summary they see).
114
+ *
115
+ * @param {string} task
116
+ * @returns {Promise<{success: boolean, verdict: string, turns: number, replies: object[], trigger: object|null}>}
117
+ */
118
+ async run(task) {
119
+ this.#emitMeta();
120
+
121
+ // The loop owns within-run turns. Its emitSummary fires once before
122
+ // run() returns; ours replaces it as the last summary line.
123
+ await this.loop.run(task);
124
+
125
+ const verdict = this.ctx.verdict ?? "failed";
126
+ const success = verdict === "adjourned" || verdict === "concluded";
127
+ this.#emitDiscussSummary({
128
+ success,
129
+ verdict,
130
+ turns: this.loop.leadTurns,
131
+ });
132
+
133
+ return {
134
+ success,
135
+ verdict,
136
+ turns: this.loop.leadTurns,
137
+ replies: this.ctx.replies.slice(),
138
+ trigger: this.ctx.recessTrigger ?? null,
139
+ };
140
+ }
141
+
142
+ #emitMeta() {
143
+ if (!this.discussionId) return;
144
+ this.output.write(
145
+ JSON.stringify(
146
+ this.redactor.redactValue({
147
+ source: "orchestrator",
148
+ seq: this.counter.next(),
149
+ event: { type: "meta", discussion_id: this.discussionId },
150
+ }),
151
+ ) + "\n",
152
+ );
153
+ }
154
+
155
+ #emitDiscussSummary({ success, verdict, turns }) {
156
+ const event = {
157
+ type: "summary",
158
+ success,
159
+ verdict,
160
+ turns,
161
+ ...(this.ctx.summary && { summary: this.ctx.summary }),
162
+ ...(this.ctx.outcome && { outcome: this.ctx.outcome }),
163
+ replies: this.ctx.replies,
164
+ ...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
165
+ ...(this.discussionId && { discussion_id: this.discussionId }),
166
+ pending_asks: pendingAsksToPlain(this.ctx.pendingAsks),
167
+ };
168
+ this.output.write(
169
+ JSON.stringify(
170
+ this.redactor.redactValue({
171
+ source: "orchestrator",
172
+ seq: this.counter.next(),
173
+ event,
174
+ }),
175
+ ) + "\n",
176
+ );
177
+ }
178
+ }
179
+
180
+ /**
181
+ * Factory — wires the lead and agent runners with `DiscussTools`, builds
182
+ * the `OrchestrationLoop` (with `leadName: "lead"` and discuss-mode
183
+ * protocol tagging) and the wrapping `Discusser`.
184
+ *
185
+ * @param {object} deps
186
+ * @param {string} [deps.leadProfile]
187
+ * @param {string} [deps.leadModel]
188
+ * @param {string} [deps.agentModel]
189
+ * @param {Array<object>} [deps.agentConfigs]
190
+ * @param {string|null} [deps.discussionId]
191
+ * @param {object|null} [deps.resumeContext]
192
+ * @param {function} deps.query
193
+ * @param {import("stream").Writable} deps.output
194
+ * @param {number} [deps.maxTurns]
195
+ * @param {string} [deps.leadCwd]
196
+ * @param {string} [deps.profilesDir]
197
+ * @param {string} [deps.taskAmend]
198
+ * @param {object} deps.redactor
199
+ * @returns {Discusser}
200
+ */
201
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: factory wires N runners + resume hydration paths
202
+ export function createDiscusser({
203
+ leadProfile,
204
+ leadModel,
205
+ agentModel,
206
+ agentConfigs,
207
+ discussionId,
208
+ resumeContext,
209
+ query,
210
+ output,
211
+ maxTurns,
212
+ leadCwd,
213
+ profilesDir,
214
+ taskAmend,
215
+ redactor,
216
+ }) {
217
+ if (!redactor) throw new Error("redactor is required");
218
+ const resolvedLeadCwd = resolve(leadCwd ?? ".");
219
+ const resolvedProfilesDir =
220
+ profilesDir ?? resolve(resolvedLeadCwd, ".claude/agents");
221
+ const resolvedConfigs = agentConfigs ?? [];
222
+
223
+ const ctx = augmentContextForDiscuss(
224
+ createOrchestrationContext(),
225
+ discussionId ?? null,
226
+ );
227
+
228
+ // Hydrate resume context — pendingAsks, participants, history, replies.
229
+ // resumeContext is the entire suspend/resume contract; every mutation a
230
+ // Recess needs to preserve must travel through it.
231
+ if (resumeContext) {
232
+ if (resumeContext.pendingAsks)
233
+ ctx.pendingAsks = pendingAsksFromPlain(resumeContext.pendingAsks);
234
+ if (Array.isArray(resumeContext.participants))
235
+ ctx.participants = resumeContext.participants;
236
+ if (Array.isArray(resumeContext.replies))
237
+ ctx.replies = resumeContext.replies;
238
+ if (typeof resumeContext.askIdCounter === "number")
239
+ ctx.askIdCounter = resumeContext.askIdCounter;
240
+ if (typeof resumeContext.rfcCounter === "number")
241
+ ctx.rfcCounter = resumeContext.rfcCounter;
242
+ }
243
+
244
+ const messageBus = createMessageBus({
245
+ participants: ["lead", ...resolvedConfigs.map((a) => a.name)],
246
+ });
247
+ ctx.messageBus = messageBus;
248
+ if (ctx.participants.length === 0) {
249
+ ctx.participants = [
250
+ { name: "lead", role: "lead" },
251
+ ...resolvedConfigs.map((a) => ({ name: a.name, role: a.role })),
252
+ ];
253
+ }
254
+
255
+ const systemPromptFor = (profile, trailer) => {
256
+ if (!trailer) throw new Error("trailer is required");
257
+ return profile
258
+ ? composeProfilePrompt(profile, {
259
+ profilesDir: resolvedProfilesDir,
260
+ trailer,
261
+ })
262
+ : { type: "preset", preset: "claude_code", append: trailer };
263
+ };
264
+
265
+ let discusser;
266
+ const leadServer = createDiscussLeadToolServer(ctx);
267
+
268
+ const agents = resolvedConfigs.map((config) => {
269
+ const agentServer = createDiscussAgentToolServer(ctx, {
270
+ from: config.name,
271
+ });
272
+
273
+ const agentTrailer = config.systemPromptAmend
274
+ ? `${DISCUSS_AGENT_SYSTEM_PROMPT}\n\n${config.systemPromptAmend}`
275
+ : DISCUSS_AGENT_SYSTEM_PROMPT;
276
+
277
+ const runner = createAgentRunner({
278
+ cwd: config.cwd ?? resolvedLeadCwd,
279
+ query,
280
+ output: devNull,
281
+ model: agentModel ?? "claude-opus-4-7[1m]",
282
+ maxTurns: config.maxTurns ?? 50,
283
+ allowedTools: config.allowedTools,
284
+ onLine: (line) => discusser.loop.emitLine(config.name, line),
285
+ mcpServers: { orchestration: agentServer },
286
+ settingSources: ["project"],
287
+ systemPrompt: systemPromptFor(config.agentProfile, agentTrailer),
288
+ redactor,
289
+ });
290
+
291
+ return { name: config.name, role: config.role, runner };
292
+ });
293
+
294
+ const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
295
+ const leadRunner = createAgentRunner({
296
+ cwd: resolvedLeadCwd,
297
+ query,
298
+ output: devNull,
299
+ model: leadModel ?? "claude-opus-4-7[1m]",
300
+ maxTurns: maxTurns ?? 40,
301
+ allowedTools: ["Bash", "Read", "Glob", "Grep", "Write", "Edit"],
302
+ disallowedTools: defaultDisallowed,
303
+ onLine: (line) => discusser.loop.emitLine("lead", line),
304
+ mcpServers: { orchestration: leadServer },
305
+ settingSources: ["project"],
306
+ systemPrompt: systemPromptFor(leadProfile, DISCUSS_SYSTEM_PROMPT),
307
+ redactor,
308
+ });
309
+
310
+ const loop = new OrchestrationLoop({
311
+ leadRunner,
312
+ agents,
313
+ messageBus,
314
+ output,
315
+ leadName: "lead",
316
+ mode: "discussion",
317
+ maxTurns: maxTurns ?? 40,
318
+ ctx,
319
+ taskAmend,
320
+ redactor,
321
+ });
322
+
323
+ discusser = new Discusser({
324
+ loop,
325
+ ctx,
326
+ output,
327
+ discussionId: discussionId ?? null,
328
+ redactor,
329
+ counter: loop.counter,
330
+ });
331
+ return discusser;
332
+ }