@forwardimpact/libeval 0.1.47 → 0.1.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -69,6 +69,20 @@ Inbox lines on resume:
69
69
  Async means the lead can issue Asks, end its turn, and plan in the gap
70
70
  while participants work in parallel — nothing blocks the LLM thread.
71
71
 
72
+ ### Discuss-mode replies
73
+
74
+ In discussion mode, Answer calls routed to the lead are captured as
75
+ thread replies delivered via the bridge callback. The lead delegates work
76
+ via Ask; each agent's Answer becomes a separate reply posted to the
77
+ discussion thread. No explicit reply tool is needed on the lead surface —
78
+ the message bus intercepts answers and appends them to `ctx.replies[]`.
79
+
80
+ `RequestForComment` is a separate coordination tool available on agent
81
+ roles (facilitated agents and discuss agents). It queues an intent to
82
+ open a new Discussion thread for long-horizon coordination on open
83
+ questions; these are accumulated in `ctx.rfcs[]`, separate from the
84
+ thread replies in `ctx.replies[]`.
85
+
72
86
  ## Orchestration loop
73
87
 
74
88
  Each participant drains the bus (or waits), runs/resumes the LLM with
@@ -84,15 +98,15 @@ only feeds the summary's `success`/`verdict`.
84
98
 
85
99
  ## Tool surface, by role
86
100
 
87
- | Role | Ask | Answer | Announce | RollCall | Conclude | Other |
88
- | ------------ | --- | ------ | -------- | -------- | -------- | ---------------------------------------- |
89
- | Facilitator | ✓ | ✓ | ✓ | ✓ | ✓ | |
90
- | Fac. agent | ✓ | ✓ | ✓ | ✓ | | |
91
- | Supervisor | ✓ | ✓ | ✓ | ✓ | ✓ | |
92
- | Sup. agent | ✓ | ✓ | ✓ | ✓ | | |
93
- | Discuss lead | ✓ | ✓ | ✓ | ✓ | | `RequestForComment`, `Recess`, `Adjourn` |
94
- | Discuss agt | ✓ | ✓ | ✓ | ✓ | | |
95
- | Judge | | | | | ✓ | |
101
+ | Role | Ask | Answer | Announce | RollCall | Conclude | Other |
102
+ | ------------ | --- | ------ | -------- | -------- | -------- | ------------------------------ |
103
+ | Facilitator | ✓ | ✓ | ✓ | ✓ | ✓ | |
104
+ | Fac. agent | ✓ | ✓ | ✓ | ✓ | | `RequestForComment` |
105
+ | Supervisor | ✓ | ✓ | ✓ | ✓ | ✓ | |
106
+ | Sup. agent | ✓ | ✓ | ✓ | ✓ | | |
107
+ | Discuss lead | ✓ | ✓ | ✓ | ✓ | | `Recess`, `Adjourn` |
108
+ | Discuss agt | ✓ | ✓ | ✓ | ✓ | | `RequestForComment` |
109
+ | Judge | | | | | ✓ | |
96
110
 
97
111
  Ask's `to` accepts a participant name on multi-participant roles
98
112
  (facilitator, discuss lead, all participants). The supervise pair has
@@ -152,10 +166,10 @@ downloadable through retention.
152
166
  | ----------------------------------------------------------- | -------------------------------------------------------------------- |
153
167
  | `agent-runner.js` | One Claude Agent SDK session; emits NDJSON via the redactor. |
154
168
  | `message-bus.js` | Per-participant queues + `waitForMessages` Promise wakeup. |
155
- | `orchestration-toolkit.js` | Shared Ask/Answer/Announce/Conclude/RollCall handlers + builders. |
169
+ | `orchestration-toolkit.js` | Shared Ask/Answer/Announce/Conclude/RollCall/RequestForComment handlers + builders. |
156
170
  | `orchestration-loop.js` | Unified lead+participant loop; reminder/violation handling. |
157
171
  | `facilitator.js` / `supervisor.js` / `discusser.js` / `judge.js` | Per-mode class + factory + system prompt. |
158
- | `discuss-tools.js` | Discuss-only `RequestForComment`/`Recess`/`Adjourn`. |
172
+ | `discuss-tools.js` | Discuss-only `Recess`/`Adjourn`. |
159
173
  | `trace-collector.js` / `trace-query.js` / `trace-github.js` | Trace ingestion / querying / GitHub-attachment helpers. |
160
174
  | `redaction.js` | Env-var allowlist + credential-shape pattern redaction. |
161
175
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.47",
3
+ "version": "0.1.48",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
@@ -52,7 +52,7 @@ export function parseDiscussOptions(values) {
52
52
  taskContent,
53
53
  taskAmend,
54
54
  agentConfigs,
55
- leadProfile: values["lead-profile"] ?? "release-engineer",
55
+ leadProfile: values["lead-profile"] ?? undefined,
56
56
  leadModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
57
57
  agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
58
58
  maxTurns,
@@ -1,15 +1,19 @@
1
1
  /**
2
2
  * DiscussTools — discuss-mode tool servers. The lead's surface extends the
3
- * base set with three discuss-only terminal tools:
3
+ * base set with two discuss-only terminal tools:
4
4
  *
5
- * - `RequestForComment` posts a fire-and-forget message to a human channel
6
- * via the bridge; the reply arrives on a later workflow run.
7
5
  * - `Recess` suspends the session with a resumption trigger.
8
6
  * - `Adjourn` ends the discussion with a verdict.
9
7
  *
10
- * `Conclude` is absent — discuss mode ends via Adjourn or Recess. The
11
- * agent surface is identical to the facilitated agent's: Ask / Answer /
12
- * Announce / RollCall, with Ask defaulting to the lead.
8
+ * `Conclude` is absent — discuss mode ends via Adjourn or Recess.
9
+ *
10
+ * `RequestForComment` is an agent-level coordination tool available on
11
+ * discuss agents and facilitated agents (not leads). It opens a new
12
+ * Discussion thread for long-horizon coordination on open questions.
13
+ *
14
+ * In discuss mode, each agent Answer routed to the lead is captured as a
15
+ * thread reply delivered via the bridge callback — no explicit reply tool
16
+ * is needed on the lead surface.
13
17
  */
14
18
 
15
19
  import { tool } from "@anthropic-ai/claude-agent-sdk";
@@ -19,16 +23,17 @@ import {
19
23
  baseTools,
20
24
  concludeSession,
21
25
  orchestrationServer,
26
+ requestForCommentTool,
22
27
  } from "./orchestration-toolkit.js";
23
28
 
24
- /** System prompt appended for discuss-mode agent runners. */
29
+ /** System prompt for discuss-mode agent participants. L0 mechanics only per COALIGNED. */
25
30
  export const DISCUSS_AGENT_SYSTEM_PROMPT =
26
- "You participate in an asynchronous discussion. " +
27
- "Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
28
- "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
29
- "Ask sends a question to the lead or another participant and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox. " +
30
- "Announce broadcasts a message to every other participant use this for unsolicited remarks or to reply to an Announce. " +
31
- "RollCall lists participants.";
31
+ "You are a participant in a discussion.\n" +
32
+ "Each question arrives as `[ask#N] <name>: <text>`.\n" +
33
+ "Quote N as askId on your `Answer` to route the reply correctly.\n" +
34
+ "Your `Answer` is posted to the discussion thread as a separate reply.\n" +
35
+ "If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
36
+ "Do not redo completed work.";
32
37
 
33
38
  const RESUME_TRIGGER_SCHEMA = z
34
39
  .object({
@@ -42,16 +47,6 @@ const RESUME_TRIGGER_SCHEMA = z
42
47
  export function createDiscussLeadToolServer(ctx) {
43
48
  return orchestrationServer([
44
49
  ...baseTools(ctx, { from: "lead", defaultTo: undefined, broadcast: true }),
45
- tool(
46
- "RequestForComment",
47
- "Post a fire-and-forget message to a channel via the bridge. Returns a correlation id; the reply arrives on a later workflow run.",
48
- {
49
- channel: z.string(),
50
- body: z.string(),
51
- addressees: z.array(z.string()).optional(),
52
- },
53
- createRequestForCommentHandler(ctx),
54
- ),
55
50
  tool(
56
51
  "Recess",
57
52
  "Suspend the run. The bridge re-dispatches the workflow when the trigger fires.",
@@ -73,33 +68,10 @@ export function createDiscussLeadToolServer(ctx) {
73
68
 
74
69
  /** Discuss-mode agent tool server. */
75
70
  export function createDiscussAgentToolServer(ctx, { from }) {
76
- return orchestrationServer(
77
- baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
78
- );
79
- }
80
-
81
- /** RequestForComment handler — queues structured replies on `ctx.replies[]`. */
82
- export function createRequestForCommentHandler(ctx) {
83
- return async ({ channel, body, addressees }) => {
84
- const correlationId = `rfc_${++ctx.rfcCounter}`;
85
- const addresseeList = addressees?.length ? addressees : [null];
86
- for (const addressee of addresseeList) {
87
- ctx.replies.push({
88
- ...(addressee && { addressee }),
89
- body,
90
- ...(ctx.discussionId && { thread_id: ctx.discussionId }),
91
- correlation_id: correlationId,
92
- });
93
- }
94
- return {
95
- content: [
96
- {
97
- type: "text",
98
- text: JSON.stringify({ correlation_id: correlationId, channel }),
99
- },
100
- ],
101
- };
102
- };
71
+ return orchestrationServer([
72
+ ...baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
73
+ requestForCommentTool(ctx),
74
+ ]);
103
75
  }
104
76
 
105
77
  /**
package/src/discusser.js CHANGED
@@ -1,19 +1,23 @@
1
1
  /**
2
2
  * Discusser — async, suspendable orchestration on top of a within-run
3
- * `OrchestrationLoop`. The lead role uses `DiscussTools` (Adjourn / Recess
4
- * / RequestForComment) instead of the facilitator's Conclude.
3
+ * `OrchestrationLoop`. The lead role uses `DiscussTools` (Adjourn / Recess)
4
+ * instead of the facilitator's Conclude.
5
5
  *
6
6
  * Discuss mode is a sibling of facilitate mode, not a subset of it. The
7
7
  * within-run turn loop is shared via `OrchestrationLoop`, but the lead
8
8
  * role, tool set, system prompts, and participant naming all stay
9
9
  * mode-local.
10
+ *
11
+ * Each agent Answer routed to the lead is captured as a thread reply
12
+ * delivered via the bridge callback — no explicit reply tool is needed
13
+ * on the lead surface.
10
14
  */
11
15
 
12
16
  import { Writable } from "node:stream";
13
17
  import { resolve } from "node:path";
14
18
 
15
19
  import { createAgentRunner } from "./agent-runner.js";
16
- import { composeProfilePrompt } from "./profile-prompt.js";
20
+ import { composeSystemPrompt } from "./profile-prompt.js";
17
21
  import { SequenceCounter } from "./sequence-counter.js";
18
22
  import { createMessageBus } from "./message-bus.js";
19
23
  import { createOrchestrationContext } from "./orchestration-toolkit.js";
@@ -24,18 +28,18 @@ import {
24
28
  } from "./discuss-tools.js";
25
29
  import { OrchestrationLoop } from "./orchestration-loop.js";
26
30
 
27
- /** System prompt appended for the lead (Chair) runner in discuss mode. */
31
+ /** System prompt for the discuss-mode lead. L0 mechanics only per COALIGNED. */
28
32
  export const DISCUSS_SYSTEM_PROMPT =
29
- "You lead an asynchronous discussion across multiple participants and a human channel. " +
30
- "Ask sends a question and returns immediately with {askIds:[N,…]}. The reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox — between turns you can plan, reflect, or send more Asks while participants work in parallel. End your turn with text after you've asked everything you intend to; the orchestrator wakes you when the next message lands. " +
31
- "Answer replies to an ask a participant addressed to you (you'll see it tagged `[ask#N] <participant>: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
32
- "Announce delivers a message with no reply obligation. " +
33
- "RollCall returns the participant roster. " +
34
- "RequestForComment posts a message to the human thread via the bridge. Every reply you want the human to see MUST go through RequestForComment — the bridge delivers only queued replies, not your text output. " +
35
- "Recess suspends the run with a resumption trigger (responses / elapsed / either); any open Asks get a synthetic '[no answer: session concluded]' on the asker's queue so nothing dangles. " +
36
- "Adjourn ends the discussion with a verdict ('adjourned' / 'failed') and a summary. " +
37
- "Multiple Ask / Announce calls in one assistant turn dispatch in parallel — issue them as parallel tool_use blocks rather than sending the same question both broadcast and individually. " +
38
- "You MUST call RequestForComment with your response before calling Adjourn. You MUST end every run by calling Adjourn or Recess — never end a turn with only text *after* every Ask round has resolved.";
33
+ "You lead a discussion.\n" +
34
+ "You have no tools to perform work yourself.\n" +
35
+ "Use `RollCall` to list participants.\n" +
36
+ "Use `Ask` to delegate work to the best-suited participant.\n" +
37
+ "Participants are domain experts; state the task, not how to do it.\n" +
38
+ "Each participant's `Answer` is posted to the discussion thread as a separate reply.\n" +
39
+ "`Ask` returns {askIds:[N,…]} immediately.\n" +
40
+ "Answers arrive on your next turn as `[answer#N] <participant>: <text>`.\n" +
41
+ "Multiple `Ask` calls in one turn run participants concurrently.\n" +
42
+ "Wait for all participants to `Answer` before calling `Adjourn` or `Recess`.";
39
43
 
40
44
  /**
41
45
  * Augment a base orchestration context with discuss-mode fields.
@@ -47,6 +51,7 @@ export function augmentContextForDiscuss(ctx, discussionId) {
47
51
  ctx.discussionId = discussionId;
48
52
  ctx.recessTrigger = null;
49
53
  ctx.replies = [];
54
+ ctx.rfcs = [];
50
55
  ctx.rfcCounter = 0;
51
56
  ctx.outcome = null;
52
57
  return ctx;
@@ -141,6 +146,7 @@ export class Discusser {
141
146
  ...(this.ctx.summary && { summary: this.ctx.summary }),
142
147
  ...(this.ctx.outcome && { outcome: this.ctx.outcome }),
143
148
  replies: this.ctx.replies,
149
+ ...(this.ctx.rfcs?.length && { rfcs: this.ctx.rfcs }),
144
150
  ...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
145
151
  ...(this.discussionId && { discussion_id: this.discussionId }),
146
152
  };
@@ -228,6 +234,20 @@ export function createDiscusser({
228
234
  const messageBus = createMessageBus({
229
235
  participants: ["lead", ...resolvedConfigs.map((a) => a.name)],
230
236
  });
237
+
238
+ // Intercept answers routed to the lead — each becomes a discussion reply.
239
+ const originalAnswer = messageBus.answer.bind(messageBus);
240
+ messageBus.answer = (from, to, text, askId) => {
241
+ if (to === "lead" && from !== "@orchestrator") {
242
+ ctx.replies.push({
243
+ body: text,
244
+ agent: from,
245
+ ...(ctx.discussionId && { thread_id: ctx.discussionId }),
246
+ });
247
+ }
248
+ originalAnswer(from, to, text, askId);
249
+ };
250
+
231
251
  ctx.messageBus = messageBus;
232
252
  if (ctx.participants.length === 0) {
233
253
  ctx.participants = [
@@ -236,16 +256,6 @@ export function createDiscusser({
236
256
  ];
237
257
  }
238
258
 
239
- const systemPromptFor = (profile, trailer) => {
240
- if (!trailer) throw new Error("trailer is required");
241
- return profile
242
- ? composeProfilePrompt(profile, {
243
- profilesDir: resolvedProfilesDir,
244
- trailer,
245
- })
246
- : { type: "preset", preset: "claude_code", append: trailer };
247
- };
248
-
249
259
  let discusser;
250
260
  const leadServer = createDiscussLeadToolServer(ctx);
251
261
 
@@ -268,26 +278,44 @@ export function createDiscusser({
268
278
  onLine: (line) => discusser.loop.emitLine(config.name, line),
269
279
  mcpServers: { orchestration: agentServer },
270
280
  settingSources: ["project"],
271
- systemPrompt: systemPromptFor(config.agentProfile, agentTrailer),
281
+ systemPrompt: composeSystemPrompt({
282
+ role: "agent",
283
+ profile: config.agentProfile,
284
+ profilesDir: resolvedProfilesDir,
285
+ trailer: agentTrailer,
286
+ }),
272
287
  redactor,
273
288
  });
274
289
 
275
290
  return { name: config.name, role: config.role, runner };
276
291
  });
277
292
 
278
- const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
293
+ const defaultDisallowed = [
294
+ "Agent",
295
+ "Task",
296
+ "TaskOutput",
297
+ "TaskStop",
298
+ "Bash",
299
+ "Write",
300
+ "Edit",
301
+ ];
279
302
  const leadRunner = createAgentRunner({
280
303
  cwd: resolvedLeadCwd,
281
304
  query,
282
305
  output: devNull,
283
306
  model: leadModel ?? "claude-opus-4-7[1m]",
284
307
  maxTurns: maxTurns ?? 80,
285
- allowedTools: ["Bash", "Read", "Glob", "Grep", "Write", "Edit"],
308
+ allowedTools: ["Read", "Glob", "Grep"],
286
309
  disallowedTools: defaultDisallowed,
287
310
  onLine: (line) => discusser.loop.emitLine("lead", line),
288
311
  mcpServers: { orchestration: leadServer },
289
312
  settingSources: ["project"],
290
- systemPrompt: systemPromptFor(leadProfile, DISCUSS_SYSTEM_PROMPT),
313
+ systemPrompt: composeSystemPrompt({
314
+ role: "lead",
315
+ profile: leadProfile,
316
+ profilesDir: resolvedProfilesDir,
317
+ trailer: DISCUSS_SYSTEM_PROMPT,
318
+ }),
291
319
  redactor,
292
320
  });
293
321
 
@@ -9,7 +9,7 @@
9
9
  import { Writable } from "node:stream";
10
10
  import { resolve } from "node:path";
11
11
  import { createAgentRunner } from "./agent-runner.js";
12
- import { composeProfilePrompt } from "./profile-prompt.js";
12
+ import { composeSystemPrompt } from "./profile-prompt.js";
13
13
  import { createMessageBus } from "./message-bus.js";
14
14
  import {
15
15
  createOrchestrationContext,
@@ -18,26 +18,25 @@ import {
18
18
  } from "./orchestration-toolkit.js";
19
19
  import { OrchestrationLoop } from "./orchestration-loop.js";
20
20
 
21
- /** System prompt appended for the facilitator runner. */
21
+ /** System prompt for the facilitator lead. L0 mechanics only per COALIGNED. */
22
22
  export const FACILITATOR_SYSTEM_PROMPT =
23
- "You coordinate multiple participants via these tools: " +
24
- "Ask sends a question and returns immediately with {askIds:[N,…]}. The reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox — between turns you can plan, reflect, or send more Asks while participants work in parallel. End your turn with text after you've asked everything you intend to; the orchestrator wakes you again as soon as a reply (or any message) lands. " +
25
- "Answer replies to an ask a participant addressed to you (you'll see it tagged `[ask#N] <participant>: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
26
- "Announce delivers a message with no reply obligation. " +
27
- "RollCall returns the participant roster. " +
28
- "Conclude ends the session with a verdict ('success' or 'failure') and a summary. " +
29
- "Multiple Ask / Announce calls in one assistant turn dispatch in parallel — issue them as parallel tool_use blocks rather than sending the same question both broadcast and individually. " +
30
- "You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
31
- "If you can answer the task yourself, still call Conclude with verdict='success' and the answer as the summary.";
32
-
33
- /** System prompt appended for facilitated agent runners. */
23
+ "You are the facilitator.\n" +
24
+ "You have no tools to perform work yourself.\n" +
25
+ "Use `RollCall` to list participants.\n" +
26
+ "Use `Ask` to delegate work to the best-suited participant.\n" +
27
+ "Participants are domain experts; state the task, not how to do it.\n" +
28
+ "`Ask` returns {askIds:[N,…]} immediately.\n" +
29
+ "Answers arrive on your next turn as `[answer#N] <participant>: <text>`.\n" +
30
+ "Multiple `Ask` calls in one turn run participants concurrently.\n" +
31
+ "Wait for all participants to `Answer` before calling `Conclude`.";
32
+
33
+ /** System prompt for facilitated agent participants. L0 mechanics only per COALIGNED. */
34
34
  export const FACILITATED_AGENT_SYSTEM_PROMPT =
35
- "You participate in a coordinated session. " +
36
- "Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
37
- "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
38
- "Ask sends a question to another participant and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox. " +
39
- "Announce broadcasts a message to every other participant — use this for unsolicited remarks or to reply to an Announce. " +
40
- "RollCall lists participants.";
35
+ "You are a participant in a facilitated session.\n" +
36
+ "Each question arrives as `[ask#N] <name>: <text>`.\n" +
37
+ "Quote N as askId on your `Answer` to route the reply correctly.\n" +
38
+ "If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
39
+ "Do not redo completed work.";
41
40
 
42
41
  /**
43
42
  * Facilitate-mode wrapper around `OrchestrationLoop`. The lead is named
@@ -113,15 +112,6 @@ export function createFacilitator({
113
112
  if (!redactor) throw new Error("redactor is required");
114
113
  const resolvedProfilesDir =
115
114
  profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
116
- const systemPromptFor = (profile, trailer) => {
117
- if (!trailer) throw new Error("trailer is required");
118
- return profile
119
- ? composeProfilePrompt(profile, {
120
- profilesDir: resolvedProfilesDir,
121
- trailer,
122
- })
123
- : { type: "preset", preset: "claude_code", append: trailer };
124
- };
125
115
  const ctx = createOrchestrationContext();
126
116
  const messageBus = createMessageBus({
127
117
  participants: ["facilitator", ...agentConfigs.map((a) => a.name)],
@@ -155,17 +145,27 @@ export function createFacilitator({
155
145
  onLine: (line) => facilitator.emitLine(config.name, line),
156
146
  mcpServers: { orchestration: agentServer },
157
147
  settingSources: ["project"],
158
- systemPrompt: systemPromptFor(config.agentProfile, agentTrailer),
148
+ systemPrompt: composeSystemPrompt({
149
+ role: "agent",
150
+ profile: config.agentProfile,
151
+ profilesDir: resolvedProfilesDir,
152
+ trailer: agentTrailer,
153
+ }),
159
154
  redactor,
160
155
  });
161
156
 
162
157
  return { name: config.name, role: config.role, runner };
163
158
  });
164
159
 
165
- // Block the SDK's sub-agent spawn tools on the facilitator: its job is to
166
- // coordinate participants through the libeval orchestration harness, not
167
- // to fan work out to ad-hoc Claude Code sub-agents. Mirrors the supervisor.
168
- const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
160
+ const defaultDisallowed = [
161
+ "Agent",
162
+ "Task",
163
+ "TaskOutput",
164
+ "TaskStop",
165
+ "Bash",
166
+ "Write",
167
+ "Edit",
168
+ ];
169
169
  const disallowedTools = facilitatorDisallowedTools
170
170
  ? [...new Set([...defaultDisallowed, ...facilitatorDisallowedTools])]
171
171
  : defaultDisallowed;
@@ -176,22 +176,17 @@ export function createFacilitator({
176
176
  output: devNull,
177
177
  model: facilitatorModel ?? model,
178
178
  maxTurns: maxTurns ?? 80,
179
- allowedTools: facilitatorAllowedTools ?? [
180
- "Bash",
181
- "Read",
182
- "Glob",
183
- "Grep",
184
- "Write",
185
- "Edit",
186
- ],
179
+ allowedTools: facilitatorAllowedTools ?? ["Read", "Glob", "Grep"],
187
180
  disallowedTools,
188
181
  onLine: (line) => facilitator.emitLine("facilitator", line),
189
182
  mcpServers: { orchestration: facilitatorServer },
190
183
  settingSources: ["project"],
191
- systemPrompt: systemPromptFor(
192
- facilitatorProfile,
193
- FACILITATOR_SYSTEM_PROMPT,
194
- ),
184
+ systemPrompt: composeSystemPrompt({
185
+ role: "lead",
186
+ profile: facilitatorProfile,
187
+ profilesDir: resolvedProfilesDir,
188
+ trailer: FACILITATOR_SYSTEM_PROMPT,
189
+ }),
195
190
  redactor,
196
191
  });
197
192
 
package/src/index.js CHANGED
@@ -8,7 +8,11 @@ export {
8
8
  parseGitRemote,
9
9
  } from "./trace-github.js";
10
10
  export { AgentRunner, createAgentRunner } from "./agent-runner.js";
11
- export { composeProfilePrompt } from "./profile-prompt.js";
11
+ export {
12
+ composeProfilePrompt,
13
+ composeLeadPrompt,
14
+ composeSystemPrompt,
15
+ } from "./profile-prompt.js";
12
16
  export {
13
17
  Supervisor,
14
18
  createSupervisor,
@@ -19,6 +23,7 @@ export { TeeWriter, createTeeWriter } from "./tee-writer.js";
19
23
  export { SequenceCounter, createSequenceCounter } from "./sequence-counter.js";
20
24
  export {
21
25
  createOrchestrationContext,
26
+ createRequestForCommentHandler,
22
27
  createSupervisorToolServer,
23
28
  createSupervisedAgentToolServer,
24
29
  createFacilitatorToolServer,
@@ -5,15 +5,15 @@
5
5
  *
6
6
  * **Tool surface, by role:**
7
7
  *
8
- * | | Ask | Answer | Announce | RollCall | Conclude | …extras |
9
- * |-------------|-----|--------|----------|----------|----------|---------|
10
- * | Facilitator | ✓ | ✓ | ✓ | ✓ | ✓ | |
11
- * | Fac. agent | ✓ | ✓ | ✓ | ✓ | | |
12
- * | Supervisor | ✓ | ✓ | ✓ | ✓ | ✓ | |
13
- * | Sup. agent | ✓ | ✓ | ✓ | ✓ | | |
14
- * | Discuss lead| ✓ | ✓ | ✓ | ✓ | | RFC / Recess / Adjourn |
15
- * | Discuss agt | ✓ | ✓ | ✓ | ✓ | | |
16
- * | Judge | | | | | ✓ | |
8
+ * | | Ask | Answer | Announce | RollCall | Conclude | …extras |
9
+ * |-------------|-----|--------|----------|----------|----------|-----------------------|
10
+ * | Facilitator | ✓ | ✓ | ✓ | ✓ | ✓ | |
11
+ * | Fac. agent | ✓ | ✓ | ✓ | ✓ | | RFC |
12
+ * | Supervisor | ✓ | ✓ | ✓ | ✓ | ✓ | |
13
+ * | Sup. agent | ✓ | ✓ | ✓ | ✓ | | |
14
+ * | Discuss lead| ✓ | ✓ | ✓ | ✓ | | Recess / Adjourn |
15
+ * | Discuss agt | ✓ | ✓ | ✓ | ✓ | | RFC |
16
+ * | Judge | | | | | ✓ | |
17
17
  *
18
18
  * **Ask is async.** Ask returns `{askIds:[…]}` immediately and posts the
19
19
  * question to the addressee's bus queue. The reply arrives on the asker's
@@ -337,11 +337,12 @@ export function createFacilitatorToolServer(ctx) {
337
337
  ]);
338
338
  }
339
339
 
340
- /** Facilitated agent tools: Ask + Answer + Announce + RollCall. */
340
+ /** Facilitated agent tools: Ask + Answer + Announce + RollCall + RequestForComment. */
341
341
  export function createFacilitatedAgentToolServer(ctx, { from }) {
342
- return orchestrationServer(
343
- baseTools(ctx, { from, defaultTo: "facilitator", broadcast: true }),
344
- );
342
+ return orchestrationServer([
343
+ ...baseTools(ctx, { from, defaultTo: "facilitator", broadcast: true }),
344
+ requestForCommentTool(ctx),
345
+ ]);
345
346
  }
346
347
 
347
348
  /**
@@ -352,6 +353,42 @@ export function createJudgeToolServer(ctx) {
352
353
  return orchestrationServer([concludeTool(ctx)]);
353
354
  }
354
355
 
356
+ // --- RequestForComment (agent-level coordination tool) ---
357
+
358
+ /** RequestForComment handler — queues RFC intent on `ctx.rfcs[]`. */
359
+ export function createRequestForCommentHandler(ctx) {
360
+ return async ({ channel, body, addressees }) => {
361
+ if (!ctx.rfcs) ctx.rfcs = [];
362
+ if (typeof ctx.rfcCounter !== "number") ctx.rfcCounter = 0;
363
+ const correlationId = `rfc_${++ctx.rfcCounter}`;
364
+ const addresseeList = addressees?.length ? addressees : [null];
365
+ for (const addressee of addresseeList) {
366
+ ctx.rfcs.push({
367
+ ...(addressee && { addressee }),
368
+ body,
369
+ channel,
370
+ ...(ctx.discussionId && { thread_id: ctx.discussionId }),
371
+ correlation_id: correlationId,
372
+ });
373
+ }
374
+ return jsonResult({ correlation_id: correlationId, channel });
375
+ };
376
+ }
377
+
378
+ /** Build the RequestForComment tool definition. */
379
+ function requestForCommentTool(ctx) {
380
+ return tool(
381
+ "RequestForComment",
382
+ "Open a new Discussion thread for long-horizon coordination on an open question. The bridge creates the thread; replies arrive asynchronously on future runs.",
383
+ {
384
+ channel: z.string(),
385
+ body: z.string(),
386
+ addressees: z.array(z.string()).optional(),
387
+ },
388
+ createRequestForCommentHandler(ctx),
389
+ );
390
+ }
391
+
355
392
  // Re-export the building blocks discuss-tools.js needs to assemble its
356
- // own lead tool surface (it has three extra terminal tools).
357
- export { baseTools, orchestrationServer };
393
+ // own lead tool surface (it has two extra terminal tools).
394
+ export { baseTools, orchestrationServer, requestForCommentTool };
@@ -1,22 +1,28 @@
1
1
  /**
2
- * Compose an SDK `systemPrompt` value from a `.claude/agents/<name>.md` file.
2
+ * System prompt composition for agent runners.
3
3
  *
4
- * Pure function. Reads the profile file, strips YAML frontmatter, and returns
5
- * the SDK-shaped `{ type: "preset", preset: "claude_code", append }` object
6
- * with the profile body plus an optional mode-specific trailer — in the
7
- * `append` slot. Callers in libeval pass the result straight into an
8
- * `AgentRunner`'s `systemPrompt` input so the profile reaches the main-thread
9
- * system prompt without going through the SDK's top-level `agent` option.
4
+ * Two helpers:
5
+ *
6
+ * - `composeProfilePrompt(name, opts)`profile + `claude_code` preset.
7
+ * Used by agent participants that need the full Claude Code tool surface.
8
+ *
9
+ * - `composeLeadPrompt(opts)` plain string, no preset. Used by lead
10
+ * roles (supervisor, facilitator, discuss lead) that should only see
11
+ * the orchestration instructions and optionally a profile body.
12
+ *
13
+ * - `composeSystemPrompt(opts)` — unified entry point. Delegates to one
14
+ * of the above based on `opts.role`.
10
15
  */
11
16
 
12
17
  import { readFileSync } from "node:fs";
13
18
  import { join } from "node:path";
14
19
 
15
20
  /**
21
+ * Compose a `claude_code`-preset system prompt from a profile file.
16
22
  * @param {string} name - Profile basename (no `.md` suffix)
17
23
  * @param {object} opts
18
24
  * @param {string} opts.profilesDir - Directory containing `<name>.md`
19
- * @param {string} [opts.trailer] - Optional mode-specific trailer appended after a blank line
25
+ * @param {string} [opts.trailer] - Mode-specific trailer appended after a blank line
20
26
  * @returns {{type: "preset", preset: "claude_code", append: string}}
21
27
  */
22
28
  export function composeProfilePrompt(name, { profilesDir, trailer }) {
@@ -27,6 +33,45 @@ export function composeProfilePrompt(name, { profilesDir, trailer }) {
27
33
  return { type: "preset", preset: "claude_code", append };
28
34
  }
29
35
 
36
+ /**
37
+ * Compose a plain-string system prompt for a lead role (no Claude Code preset).
38
+ * @param {object} opts
39
+ * @param {string} [opts.profile] - Profile basename (no `.md` suffix)
40
+ * @param {string} [opts.profilesDir] - Directory containing profile files
41
+ * @param {string} opts.trailer - Mode-specific orchestration instructions
42
+ * @returns {string}
43
+ */
44
+ export function composeLeadPrompt({ profile, profilesDir, trailer }) {
45
+ if (!trailer) throw new Error("trailer is required");
46
+ if (!profile) return trailer;
47
+ const path = join(profilesDir, `${profile}.md`);
48
+ const raw = readFileSync(path, "utf8");
49
+ const body = stripFrontmatter(raw).trim();
50
+ return `${body}\n\n${trailer}`;
51
+ }
52
+
53
+ /**
54
+ * Unified entry point for composing system prompts.
55
+ *
56
+ * @param {object} opts
57
+ * @param {"lead"|"agent"} opts.role - `"lead"` produces a plain string;
58
+ * `"agent"` produces a `claude_code` preset object.
59
+ * @param {string} [opts.profile] - Profile basename
60
+ * @param {string} [opts.profilesDir]
61
+ * @param {string} opts.trailer - Mode-specific instructions
62
+ * @returns {string | {type: "preset", preset: "claude_code", append: string}}
63
+ */
64
+ export function composeSystemPrompt({ role, profile, profilesDir, trailer }) {
65
+ if (!trailer) throw new Error("trailer is required");
66
+ if (role === "lead") {
67
+ return composeLeadPrompt({ profile, profilesDir, trailer });
68
+ }
69
+ if (profile) {
70
+ return composeProfilePrompt(profile, { profilesDir, trailer });
71
+ }
72
+ return { type: "preset", preset: "claude_code", append: trailer };
73
+ }
74
+
30
75
  /**
31
76
  * Strip a leading YAML frontmatter fence (`---\n…\n---\n`) from a markdown
32
77
  * string. Returns the input unchanged when no frontmatter is present.
package/src/supervisor.js CHANGED
@@ -18,7 +18,7 @@
18
18
  import { Writable } from "node:stream";
19
19
  import { resolve } from "node:path";
20
20
  import { createAgentRunner } from "./agent-runner.js";
21
- import { composeProfilePrompt } from "./profile-prompt.js";
21
+ import { composeSystemPrompt } from "./profile-prompt.js";
22
22
  import { createMessageBus } from "./message-bus.js";
23
23
  import {
24
24
  createOrchestrationContext,
@@ -27,23 +27,23 @@ import {
27
27
  } from "./orchestration-toolkit.js";
28
28
  import { OrchestrationLoop } from "./orchestration-loop.js";
29
29
 
30
- /** System prompt appended for the supervisor runner in supervise mode. */
30
+ /** System prompt for the supervisor lead. L0 mechanics only per COALIGNED. */
31
31
  export const SUPERVISOR_SYSTEM_PROMPT =
32
- "You supervise one agent named `agent`. " +
33
- "Ask sends a question and returns immediately with {askIds:[N]}. The reply arrives on a later turn as `[answer#N] agent: <text>` in your inbox — between turns you can plan and reflect while the agent works. End your turn with text after asking; the orchestrator wakes you when the agent replies. " +
34
- "Answer replies to an ask the agent addressed to you (you'll see it tagged `[ask#N] agent: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
35
- "Announce delivers a message with no reply obligation. " +
36
- "Conclude ends the session with a verdict ('success' or 'failure') and a summary; the verdict reflects whether the agent's work meets the criteria stated in the task. " +
37
- "You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
38
- "If the agent goes off-track, course-correct by issuing a new Ask with corrected instructions; each Ask carries a fresh askId, so a follow-up never collides with an earlier one.";
32
+ "You supervise one agent.\n" +
33
+ "You have no tools to perform work yourself.\n" +
34
+ "Use `Ask` to delegate work to the agent.\n" +
35
+ "`Ask` returns {askIds:[N]} immediately.\n" +
36
+ "The reply arrives on your next turn as `[answer#N] agent: <text>`.\n" +
37
+ "If the agent goes off-track, send a corrective `Ask`.\n" +
38
+ "End every session by calling `Conclude`.";
39
39
 
40
- /** System prompt appended for the agent runner in supervise mode. */
40
+ /** System prompt for the supervised agent. L0 mechanics only per COALIGNED. */
41
41
  export const AGENT_SYSTEM_PROMPT =
42
- "A supervisor watches your work. " +
43
- "Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
44
- "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
45
- "Ask sends a question to the supervisor and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] supervisor: <text>` in your inbox. " +
46
- "Announce sends a message with no reply expected — use this for unsolicited remarks or to reply to an Announce.";
42
+ "A supervisor directs your work.\n" +
43
+ "Each question arrives as `[ask#N] supervisor: <text>`.\n" +
44
+ "Quote N as askId on your `Answer` to route the reply correctly.\n" +
45
+ "If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
46
+ "Do not redo completed work.";
47
47
 
48
48
  /**
49
49
  * Supervise-mode wrapper around `OrchestrationLoop`. The lead is
@@ -148,15 +148,6 @@ export function createSupervisor({
148
148
  if (!redactor) throw new Error("redactor is required");
149
149
  const resolvedProfilesDir =
150
150
  profilesDir ?? resolve(supervisorCwd, ".claude/agents");
151
- const systemPromptFor = (profile, trailer) => {
152
- if (!trailer) throw new Error("trailer is required");
153
- return profile
154
- ? composeProfilePrompt(profile, {
155
- profilesDir: resolvedProfilesDir,
156
- trailer,
157
- })
158
- : { type: "preset", preset: "claude_code", append: trailer };
159
- };
160
151
 
161
152
  const ctx = createOrchestrationContext();
162
153
  const messageBus = createMessageBus({
@@ -183,15 +174,25 @@ export function createSupervisor({
183
174
  allowedTools,
184
175
  onLine: (line) => supervisor.emitLine("agent", line),
185
176
  settingSources: ["project"],
186
- systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
177
+ systemPrompt: composeSystemPrompt({
178
+ role: "agent",
179
+ profile: agentProfile,
180
+ profilesDir: resolvedProfilesDir,
181
+ trailer: AGENT_SYSTEM_PROMPT,
182
+ }),
187
183
  mcpServers: { orchestration: agentServer, ...agentMcpServers },
188
184
  redactor,
189
185
  });
190
186
 
191
- // Block the SDK's sub-agent spawn tools on the supervisor: it should
192
- // coordinate the agent through orchestration tools, not fan work out
193
- // to ad-hoc Claude Code sub-agents.
194
- const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
187
+ const defaultDisallowed = [
188
+ "Agent",
189
+ "Task",
190
+ "TaskOutput",
191
+ "TaskStop",
192
+ "Bash",
193
+ "Write",
194
+ "Edit",
195
+ ];
195
196
  const disallowedTools = supervisorDisallowedTools
196
197
  ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
197
198
  : defaultDisallowed;
@@ -202,18 +203,16 @@ export function createSupervisor({
202
203
  output: devNull,
203
204
  model: supervisorModel ?? model,
204
205
  maxTurns: perRunBudget,
205
- allowedTools: supervisorAllowedTools ?? [
206
- "Bash",
207
- "Read",
208
- "Glob",
209
- "Grep",
210
- "Write",
211
- "Edit",
212
- ],
206
+ allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep"],
213
207
  disallowedTools,
214
208
  onLine: (line) => supervisor.emitLine("supervisor", line),
215
209
  settingSources: ["project"],
216
- systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
210
+ systemPrompt: composeSystemPrompt({
211
+ role: "lead",
212
+ profile: supervisorProfile,
213
+ profilesDir: resolvedProfilesDir,
214
+ trailer: SUPERVISOR_SYSTEM_PROMPT,
215
+ }),
217
216
  mcpServers: { orchestration: supervisorServer },
218
217
  redactor,
219
218
  });