npm - @forwardimpact/libeval - Versions diffs - 0.1.47 → 0.1.48 - Mend

@forwardimpact/libeval 0.1.47 → 0.1.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +25 -11
package/package.json +1 -1
package/src/commands/discuss.js +1 -1
package/src/discuss-tools.js +22 -50
package/src/discusser.js +56 -28
package/src/facilitator.js +40 -45
package/src/index.js +6 -1
package/src/orchestration-toolkit.js +52 -15
package/src/profile-prompt.js +53 -8
package/src/supervisor.js +37 -38

package/README.md CHANGED Viewed

@@ -69,6 +69,20 @@ Inbox lines on resume:
 Async means the lead can issue Asks, end its turn, and plan in the gap
 while participants work in parallel — nothing blocks the LLM thread.
+### Discuss-mode replies
+In discussion mode, Answer calls routed to the lead are captured as
+thread replies delivered via the bridge callback. The lead delegates work
+via Ask; each agent's Answer becomes a separate reply posted to the
+discussion thread. No explicit reply tool is needed on the lead surface —
+the message bus intercepts answers and appends them to `ctx.replies[]`.
+`RequestForComment` is a separate coordination tool available on agent
+roles (facilitated agents and discuss agents). It queues an intent to
+open a new Discussion thread for long-horizon coordination on open
+questions; these are accumulated in `ctx.rfcs[]`, separate from the
+thread replies in `ctx.replies[]`.
 ## Orchestration loop
 Each participant drains the bus (or waits), runs/resumes the LLM with
@@ -84,15 +98,15 @@ only feeds the summary's `success`/`verdict`.
 ## Tool surface, by role
-| Role         | Ask | Answer | Announce | RollCall | Conclude | Other                                    |
-| ------------ | --- | ------ | -------- | -------- | -------- | ---------------------------------------- |
-| Facilitator  | ✓   | ✓      | ✓        | ✓        | ✓        |                                          |
-| Fac. agent   | ✓   | ✓      | ✓        | ✓        |          |                                          |
-| Supervisor   | ✓   | ✓      | ✓        | ✓        | ✓        |                                          |
-| Sup. agent   | ✓   | ✓      | ✓        | ✓        |          |                                          |
-| Discuss lead | ✓   | ✓      | ✓        | ✓        |          | `RequestForComment`, `Recess`, `Adjourn` |
-| Discuss agt  | ✓   | ✓      | ✓        | ✓        |          |                                          |
-| Judge        |     |        |          |          | ✓        |                                          |
+| Role         | Ask | Answer | Announce | RollCall | Conclude | Other                          |
+| ------------ | --- | ------ | -------- | -------- | -------- | ------------------------------ |
+| Facilitator  | ✓   | ✓      | ✓        | ✓        | ✓        |                                |
+| Fac. agent   | ✓   | ✓      | ✓        | ✓        |          | `RequestForComment`            |
+| Supervisor   | ✓   | ✓      | ✓        | ✓        | ✓        |                                |
+| Sup. agent   | ✓   | ✓      | ✓        | ✓        |          |                                |
+| Discuss lead | ✓   | ✓      | ✓        | ✓        |          | `Recess`, `Adjourn`            |
+| Discuss agt  | ✓   | ✓      | ✓        | ✓        |          | `RequestForComment`            |
+| Judge        |     |        |          |          | ✓        |                                |
 Ask's `to` accepts a participant name on multi-participant roles
 (facilitator, discuss lead, all participants). The supervise pair has
@@ -152,10 +166,10 @@ downloadable through retention.
 | ----------------------------------------------------------- | -------------------------------------------------------------------- |
 | `agent-runner.js`                                           | One Claude Agent SDK session; emits NDJSON via the redactor.         |
 | `message-bus.js`                                            | Per-participant queues + `waitForMessages` Promise wakeup.           |
-| `orchestration-toolkit.js`                                  | Shared Ask/Answer/Announce/Conclude/RollCall handlers + builders.    |
+| `orchestration-toolkit.js`                                  | Shared Ask/Answer/Announce/Conclude/RollCall/RequestForComment handlers + builders. |
 | `orchestration-loop.js`                                     | Unified lead+participant loop; reminder/violation handling.          |
 | `facilitator.js` / `supervisor.js` / `discusser.js` / `judge.js` | Per-mode class + factory + system prompt.                       |
-| `discuss-tools.js`                                          | Discuss-only `RequestForComment`/`Recess`/`Adjourn`.                 |
+| `discuss-tools.js`                                          | Discuss-only `Recess`/`Adjourn`.                                     |
 | `trace-collector.js` / `trace-query.js` / `trace-github.js` | Trace ingestion / querying / GitHub-attachment helpers.              |
 | `redaction.js`                                              | Env-var allowlist + credential-shape pattern redaction.              |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@forwardimpact/libeval",
-  "version": "0.1.47",
+  "version": "0.1.48",
   "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
   "keywords": [
     "eval",

package/src/commands/discuss.js CHANGED Viewed

@@ -52,7 +52,7 @@ export function parseDiscussOptions(values) {
     taskContent,
     taskAmend,
     agentConfigs,
-    leadProfile: values["lead-profile"] ?? "release-engineer",
+    leadProfile: values["lead-profile"] ?? undefined,
     leadModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
     agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
     maxTurns,

package/src/discuss-tools.js CHANGED Viewed

@@ -1,15 +1,19 @@
 /**
  * DiscussTools — discuss-mode tool servers. The lead's surface extends the
- * base set with three discuss-only terminal tools:
+ * base set with two discuss-only terminal tools:
  *
- * - `RequestForComment` posts a fire-and-forget message to a human channel
- *   via the bridge; the reply arrives on a later workflow run.
  * - `Recess` suspends the session with a resumption trigger.
  * - `Adjourn` ends the discussion with a verdict.
  *
- * `Conclude` is absent — discuss mode ends via Adjourn or Recess. The
- * agent surface is identical to the facilitated agent's: Ask / Answer /
- * Announce / RollCall, with Ask defaulting to the lead.
+ * `Conclude` is absent — discuss mode ends via Adjourn or Recess.
+ *
+ * `RequestForComment` is an agent-level coordination tool — available on
+ * discuss agents and facilitated agents (not leads). It opens a new
+ * Discussion thread for long-horizon coordination on open questions.
+ *
+ * In discuss mode, each agent Answer routed to the lead is captured as a
+ * thread reply delivered via the bridge callback — no explicit reply tool
+ * is needed on the lead surface.
  */
 import { tool } from "@anthropic-ai/claude-agent-sdk";
@@ -19,16 +23,17 @@ import {
   baseTools,
   concludeSession,
   orchestrationServer,
+  requestForCommentTool,
 } from "./orchestration-toolkit.js";
-/** System prompt appended for discuss-mode agent runners. */
+/** System prompt for discuss-mode agent participants. L0 mechanics only per COALIGNED. */
 export const DISCUSS_AGENT_SYSTEM_PROMPT =
-  "You participate in an asynchronous discussion. " +
-  "Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
-  "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
-  "Ask sends a question to the lead or another participant and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox. " +
-  "Announce broadcasts a message to every other participant — use this for unsolicited remarks or to reply to an Announce. " +
-  "RollCall lists participants.";
+  "You are a participant in a discussion.\n" +
+  "Each question arrives as `[ask#N] <name>: <text>`.\n" +
+  "Quote N as askId on your `Answer` to route the reply correctly.\n" +
+  "Your `Answer` is posted to the discussion thread as a separate reply.\n" +
+  "If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
+  "Do not redo completed work.";
 const RESUME_TRIGGER_SCHEMA = z
   .object({
@@ -42,16 +47,6 @@ const RESUME_TRIGGER_SCHEMA = z
 export function createDiscussLeadToolServer(ctx) {
   return orchestrationServer([
     ...baseTools(ctx, { from: "lead", defaultTo: undefined, broadcast: true }),
-    tool(
-      "RequestForComment",
-      "Post a fire-and-forget message to a channel via the bridge. Returns a correlation id; the reply arrives on a later workflow run.",
-      {
-        channel: z.string(),
-        body: z.string(),
-        addressees: z.array(z.string()).optional(),
-      },
-      createRequestForCommentHandler(ctx),
-    ),
     tool(
       "Recess",
       "Suspend the run. The bridge re-dispatches the workflow when the trigger fires.",
@@ -73,33 +68,10 @@ export function createDiscussLeadToolServer(ctx) {
 /** Discuss-mode agent tool server. */
 export function createDiscussAgentToolServer(ctx, { from }) {
-  return orchestrationServer(
-    baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
-  );
-}
-/** RequestForComment handler — queues structured replies on `ctx.replies[]`. */
-export function createRequestForCommentHandler(ctx) {
-  return async ({ channel, body, addressees }) => {
-    const correlationId = `rfc_${++ctx.rfcCounter}`;
-    const addresseeList = addressees?.length ? addressees : [null];
-    for (const addressee of addresseeList) {
-      ctx.replies.push({
-        ...(addressee && { addressee }),
-        body,
-        ...(ctx.discussionId && { thread_id: ctx.discussionId }),
-        correlation_id: correlationId,
-      });
-    }
-    return {
-      content: [
-        {
-          type: "text",
-          text: JSON.stringify({ correlation_id: correlationId, channel }),
-        },
-      ],
-    };
-  };
+  return orchestrationServer([
+    ...baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
+    requestForCommentTool(ctx),
+  ]);
 }
 /**

package/src/discusser.js CHANGED Viewed

@@ -1,19 +1,23 @@
 /**
  * Discusser — async, suspendable orchestration on top of a within-run
- * `OrchestrationLoop`. The lead role uses `DiscussTools` (Adjourn / Recess
- * / RequestForComment) instead of the facilitator's Conclude.
+ * `OrchestrationLoop`. The lead role uses `DiscussTools` (Adjourn / Recess)
+ * instead of the facilitator's Conclude.
  *
  * Discuss mode is a sibling of facilitate mode, not a subset of it. The
  * within-run turn loop is shared via `OrchestrationLoop`, but the lead
  * role, tool set, system prompts, and participant naming all stay
  * mode-local.
+ *
+ * Each agent Answer routed to the lead is captured as a thread reply
+ * delivered via the bridge callback — no explicit reply tool is needed
+ * on the lead surface.
  */
 import { Writable } from "node:stream";
 import { resolve } from "node:path";
 import { createAgentRunner } from "./agent-runner.js";
-import { composeProfilePrompt } from "./profile-prompt.js";
+import { composeSystemPrompt } from "./profile-prompt.js";
 import { SequenceCounter } from "./sequence-counter.js";
 import { createMessageBus } from "./message-bus.js";
 import { createOrchestrationContext } from "./orchestration-toolkit.js";
@@ -24,18 +28,18 @@ import {
 } from "./discuss-tools.js";
 import { OrchestrationLoop } from "./orchestration-loop.js";
-/** System prompt appended for the lead (Chair) runner in discuss mode. */
+/** System prompt for the discuss-mode lead. L0 mechanics only per COALIGNED. */
 export const DISCUSS_SYSTEM_PROMPT =
-  "You lead an asynchronous discussion across multiple participants and a human channel. " +
-  "Ask sends a question and returns immediately with {askIds:[N,…]}. The reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox — between turns you can plan, reflect, or send more Asks while participants work in parallel. End your turn with text after you've asked everything you intend to; the orchestrator wakes you when the next message lands. " +
-  "Answer replies to an ask a participant addressed to you (you'll see it tagged `[ask#N] <participant>: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
-  "Announce delivers a message with no reply obligation. " +
-  "RollCall returns the participant roster. " +
-  "RequestForComment posts a message to the human thread via the bridge. Every reply you want the human to see MUST go through RequestForComment — the bridge delivers only queued replies, not your text output. " +
-  "Recess suspends the run with a resumption trigger (responses / elapsed / either); any open Asks get a synthetic '[no answer: session concluded]' on the asker's queue so nothing dangles. " +
-  "Adjourn ends the discussion with a verdict ('adjourned' / 'failed') and a summary. " +
-  "Multiple Ask / Announce calls in one assistant turn dispatch in parallel — issue them as parallel tool_use blocks rather than sending the same question both broadcast and individually. " +
-  "You MUST call RequestForComment with your response before calling Adjourn. You MUST end every run by calling Adjourn or Recess — never end a turn with only text *after* every Ask round has resolved.";
+  "You lead a discussion.\n" +
+  "You have no tools to perform work yourself.\n" +
+  "Use `RollCall` to list participants.\n" +
+  "Use `Ask` to delegate work to the best-suited participant.\n" +
+  "Participants are domain experts; state the task, not how to do it.\n" +
+  "Each participant's `Answer` is posted to the discussion thread as a separate reply.\n" +
+  "`Ask` returns {askIds:[N,…]} immediately.\n" +
+  "Answers arrive on your next turn as `[answer#N] <participant>: <text>`.\n" +
+  "Multiple `Ask` calls in one turn run participants concurrently.\n" +
+  "Wait for all participants to `Answer` before calling `Adjourn` or `Recess`.";
 /**
  * Augment a base orchestration context with discuss-mode fields.
@@ -47,6 +51,7 @@ export function augmentContextForDiscuss(ctx, discussionId) {
   ctx.discussionId = discussionId;
   ctx.recessTrigger = null;
   ctx.replies = [];
+  ctx.rfcs = [];
   ctx.rfcCounter = 0;
   ctx.outcome = null;
   return ctx;
@@ -141,6 +146,7 @@ export class Discusser {
       ...(this.ctx.summary && { summary: this.ctx.summary }),
       ...(this.ctx.outcome && { outcome: this.ctx.outcome }),
       replies: this.ctx.replies,
+      ...(this.ctx.rfcs?.length && { rfcs: this.ctx.rfcs }),
       ...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
       ...(this.discussionId && { discussion_id: this.discussionId }),
     };
@@ -228,6 +234,20 @@ export function createDiscusser({
   const messageBus = createMessageBus({
     participants: ["lead", ...resolvedConfigs.map((a) => a.name)],
   });
+  // Intercept answers routed to the lead — each becomes a discussion reply.
+  const originalAnswer = messageBus.answer.bind(messageBus);
+  messageBus.answer = (from, to, text, askId) => {
+    if (to === "lead" && from !== "@orchestrator") {
+      ctx.replies.push({
+        body: text,
+        agent: from,
+        ...(ctx.discussionId && { thread_id: ctx.discussionId }),
+      });
+    }
+    originalAnswer(from, to, text, askId);
+  };
   ctx.messageBus = messageBus;
   if (ctx.participants.length === 0) {
     ctx.participants = [
@@ -236,16 +256,6 @@ export function createDiscusser({
     ];
   }
-  const systemPromptFor = (profile, trailer) => {
-    if (!trailer) throw new Error("trailer is required");
-    return profile
-      ? composeProfilePrompt(profile, {
-          profilesDir: resolvedProfilesDir,
-          trailer,
-        })
-      : { type: "preset", preset: "claude_code", append: trailer };
-  };
   let discusser;
   const leadServer = createDiscussLeadToolServer(ctx);
@@ -268,26 +278,44 @@ export function createDiscusser({
       onLine: (line) => discusser.loop.emitLine(config.name, line),
       mcpServers: { orchestration: agentServer },
       settingSources: ["project"],
-      systemPrompt: systemPromptFor(config.agentProfile, agentTrailer),
+      systemPrompt: composeSystemPrompt({
+        role: "agent",
+        profile: config.agentProfile,
+        profilesDir: resolvedProfilesDir,
+        trailer: agentTrailer,
+      }),
       redactor,
     });
     return { name: config.name, role: config.role, runner };
   });
-  const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
+  const defaultDisallowed = [
+    "Agent",
+    "Task",
+    "TaskOutput",
+    "TaskStop",
+    "Bash",
+    "Write",
+    "Edit",
+  ];
   const leadRunner = createAgentRunner({
     cwd: resolvedLeadCwd,
     query,
     output: devNull,
     model: leadModel ?? "claude-opus-4-7[1m]",
     maxTurns: maxTurns ?? 80,
-    allowedTools: ["Bash", "Read", "Glob", "Grep", "Write", "Edit"],
+    allowedTools: ["Read", "Glob", "Grep"],
     disallowedTools: defaultDisallowed,
     onLine: (line) => discusser.loop.emitLine("lead", line),
     mcpServers: { orchestration: leadServer },
     settingSources: ["project"],
-    systemPrompt: systemPromptFor(leadProfile, DISCUSS_SYSTEM_PROMPT),
+    systemPrompt: composeSystemPrompt({
+      role: "lead",
+      profile: leadProfile,
+      profilesDir: resolvedProfilesDir,
+      trailer: DISCUSS_SYSTEM_PROMPT,
+    }),
     redactor,
   });

package/src/facilitator.js CHANGED Viewed

@@ -9,7 +9,7 @@
 import { Writable } from "node:stream";
 import { resolve } from "node:path";
 import { createAgentRunner } from "./agent-runner.js";
-import { composeProfilePrompt } from "./profile-prompt.js";
+import { composeSystemPrompt } from "./profile-prompt.js";
 import { createMessageBus } from "./message-bus.js";
 import {
   createOrchestrationContext,
@@ -18,26 +18,25 @@ import {
 } from "./orchestration-toolkit.js";
 import { OrchestrationLoop } from "./orchestration-loop.js";
-/** System prompt appended for the facilitator runner. */
+/** System prompt for the facilitator lead. L0 mechanics only per COALIGNED. */
 export const FACILITATOR_SYSTEM_PROMPT =
-  "You coordinate multiple participants via these tools: " +
-  "Ask sends a question and returns immediately with {askIds:[N,…]}. The reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox — between turns you can plan, reflect, or send more Asks while participants work in parallel. End your turn with text after you've asked everything you intend to; the orchestrator wakes you again as soon as a reply (or any message) lands. " +
-  "Answer replies to an ask a participant addressed to you (you'll see it tagged `[ask#N] <participant>: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
-  "Announce delivers a message with no reply obligation. " +
-  "RollCall returns the participant roster. " +
-  "Conclude ends the session with a verdict ('success' or 'failure') and a summary. " +
-  "Multiple Ask / Announce calls in one assistant turn dispatch in parallel — issue them as parallel tool_use blocks rather than sending the same question both broadcast and individually. " +
-  "You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
-  "If you can answer the task yourself, still call Conclude with verdict='success' and the answer as the summary.";
-/** System prompt appended for facilitated agent runners. */
+  "You are the facilitator.\n" +
+  "You have no tools to perform work yourself.\n" +
+  "Use `RollCall` to list participants.\n" +
+  "Use `Ask` to delegate work to the best-suited participant.\n" +
+  "Participants are domain experts; state the task, not how to do it.\n" +
+  "`Ask` returns {askIds:[N,…]} immediately.\n" +
+  "Answers arrive on your next turn as `[answer#N] <participant>: <text>`.\n" +
+  "Multiple `Ask` calls in one turn run participants concurrently.\n" +
+  "Wait for all participants to `Answer` before calling `Conclude`.";
+/** System prompt for facilitated agent participants. L0 mechanics only per COALIGNED. */
 export const FACILITATED_AGENT_SYSTEM_PROMPT =
-  "You participate in a coordinated session. " +
-  "Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
-  "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
-  "Ask sends a question to another participant and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox. " +
-  "Announce broadcasts a message to every other participant — use this for unsolicited remarks or to reply to an Announce. " +
-  "RollCall lists participants.";
+  "You are a participant in a facilitated session.\n" +
+  "Each question arrives as `[ask#N] <name>: <text>`.\n" +
+  "Quote N as askId on your `Answer` to route the reply correctly.\n" +
+  "If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
+  "Do not redo completed work.";
 /**
  * Facilitate-mode wrapper around `OrchestrationLoop`. The lead is named
@@ -113,15 +112,6 @@ export function createFacilitator({
   if (!redactor) throw new Error("redactor is required");
   const resolvedProfilesDir =
     profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
-  const systemPromptFor = (profile, trailer) => {
-    if (!trailer) throw new Error("trailer is required");
-    return profile
-      ? composeProfilePrompt(profile, {
-          profilesDir: resolvedProfilesDir,
-          trailer,
-        })
-      : { type: "preset", preset: "claude_code", append: trailer };
-  };
   const ctx = createOrchestrationContext();
   const messageBus = createMessageBus({
     participants: ["facilitator", ...agentConfigs.map((a) => a.name)],
@@ -155,17 +145,27 @@ export function createFacilitator({
       onLine: (line) => facilitator.emitLine(config.name, line),
       mcpServers: { orchestration: agentServer },
       settingSources: ["project"],
-      systemPrompt: systemPromptFor(config.agentProfile, agentTrailer),
+      systemPrompt: composeSystemPrompt({
+        role: "agent",
+        profile: config.agentProfile,
+        profilesDir: resolvedProfilesDir,
+        trailer: agentTrailer,
+      }),
       redactor,
     });
     return { name: config.name, role: config.role, runner };
   });
-  // Block the SDK's sub-agent spawn tools on the facilitator: its job is to
-  // coordinate participants through the libeval orchestration harness, not
-  // to fan work out to ad-hoc Claude Code sub-agents. Mirrors the supervisor.
-  const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
+  const defaultDisallowed = [
+    "Agent",
+    "Task",
+    "TaskOutput",
+    "TaskStop",
+    "Bash",
+    "Write",
+    "Edit",
+  ];
   const disallowedTools = facilitatorDisallowedTools
     ? [...new Set([...defaultDisallowed, ...facilitatorDisallowedTools])]
     : defaultDisallowed;
@@ -176,22 +176,17 @@ export function createFacilitator({
     output: devNull,
     model: facilitatorModel ?? model,
     maxTurns: maxTurns ?? 80,
-    allowedTools: facilitatorAllowedTools ?? [
-      "Bash",
-      "Read",
-      "Glob",
-      "Grep",
-      "Write",
-      "Edit",
-    ],
+    allowedTools: facilitatorAllowedTools ?? ["Read", "Glob", "Grep"],
     disallowedTools,
     onLine: (line) => facilitator.emitLine("facilitator", line),
     mcpServers: { orchestration: facilitatorServer },
     settingSources: ["project"],
-    systemPrompt: systemPromptFor(
-      facilitatorProfile,
-      FACILITATOR_SYSTEM_PROMPT,
-    ),
+    systemPrompt: composeSystemPrompt({
+      role: "lead",
+      profile: facilitatorProfile,
+      profilesDir: resolvedProfilesDir,
+      trailer: FACILITATOR_SYSTEM_PROMPT,
+    }),
     redactor,
   });

package/src/index.js CHANGED Viewed

@@ -8,7 +8,11 @@ export {
   parseGitRemote,
 } from "./trace-github.js";
 export { AgentRunner, createAgentRunner } from "./agent-runner.js";
-export { composeProfilePrompt } from "./profile-prompt.js";
+export {
+  composeProfilePrompt,
+  composeLeadPrompt,
+  composeSystemPrompt,
+} from "./profile-prompt.js";
 export {
   Supervisor,
   createSupervisor,
@@ -19,6 +23,7 @@ export { TeeWriter, createTeeWriter } from "./tee-writer.js";
 export { SequenceCounter, createSequenceCounter } from "./sequence-counter.js";
 export {
   createOrchestrationContext,
+  createRequestForCommentHandler,
   createSupervisorToolServer,
   createSupervisedAgentToolServer,
   createFacilitatorToolServer,

package/src/orchestration-toolkit.js CHANGED Viewed

@@ -5,15 +5,15 @@
  *
  * **Tool surface, by role:**
  *
- *   |             | Ask | Answer | Announce | RollCall | Conclude | …extras |
- *   |-------------|-----|--------|----------|----------|----------|---------|
- *   | Facilitator |  ✓  |   ✓    |    ✓     |    ✓     |    ✓     |         |
- *   | Fac. agent  |  ✓  |   ✓    |    ✓     |    ✓     |          |         |
- *   | Supervisor  |  ✓  |   ✓    |    ✓     |    ✓     |    ✓     |         |
- *   | Sup. agent  |  ✓  |   ✓    |    ✓     |    ✓     |          |         |
- *   | Discuss lead|  ✓  |   ✓    |    ✓     |    ✓     |          | RFC / Recess / Adjourn |
- *   | Discuss agt |  ✓  |   ✓    |    ✓     |    ✓     |          |         |
- *   | Judge       |     |        |          |          |    ✓     |         |
+ *   |             | Ask | Answer | Announce | RollCall | Conclude | …extras              |
+ *   |-------------|-----|--------|----------|----------|----------|-----------------------|
+ *   | Facilitator |  ✓  |   ✓    |    ✓     |    ✓     |    ✓     |                       |
+ *   | Fac. agent  |  ✓  |   ✓    |    ✓     |    ✓     |          | RFC                   |
+ *   | Supervisor  |  ✓  |   ✓    |    ✓     |    ✓     |    ✓     |                       |
+ *   | Sup. agent  |  ✓  |   ✓    |    ✓     |    ✓     |          |                       |
+ *   | Discuss lead|  ✓  |   ✓    |    ✓     |    ✓     |          | Recess / Adjourn      |
+ *   | Discuss agt |  ✓  |   ✓    |    ✓     |    ✓     |          | RFC                   |
+ *   | Judge       |     |        |          |          |    ✓     |                       |
  *
  * **Ask is async.** Ask returns `{askIds:[…]}` immediately and posts the
  * question to the addressee's bus queue. The reply arrives on the asker's
@@ -337,11 +337,12 @@ export function createFacilitatorToolServer(ctx) {
   ]);
 }
-/** Facilitated agent tools: Ask + Answer + Announce + RollCall. */
+/** Facilitated agent tools: Ask + Answer + Announce + RollCall + RequestForComment. */
 export function createFacilitatedAgentToolServer(ctx, { from }) {
-  return orchestrationServer(
-    baseTools(ctx, { from, defaultTo: "facilitator", broadcast: true }),
-  );
+  return orchestrationServer([
+    ...baseTools(ctx, { from, defaultTo: "facilitator", broadcast: true }),
+    requestForCommentTool(ctx),
+  ]);
 }
 /**
@@ -352,6 +353,42 @@ export function createJudgeToolServer(ctx) {
   return orchestrationServer([concludeTool(ctx)]);
 }
+// --- RequestForComment (agent-level coordination tool) ---
+/** RequestForComment handler — queues RFC intent on `ctx.rfcs[]`. */
+export function createRequestForCommentHandler(ctx) {
+  return async ({ channel, body, addressees }) => {
+    if (!ctx.rfcs) ctx.rfcs = [];
+    if (typeof ctx.rfcCounter !== "number") ctx.rfcCounter = 0;
+    const correlationId = `rfc_${++ctx.rfcCounter}`;
+    const addresseeList = addressees?.length ? addressees : [null];
+    for (const addressee of addresseeList) {
+      ctx.rfcs.push({
+        ...(addressee && { addressee }),
+        body,
+        channel,
+        ...(ctx.discussionId && { thread_id: ctx.discussionId }),
+        correlation_id: correlationId,
+      });
+    }
+    return jsonResult({ correlation_id: correlationId, channel });
+  };
+}
+/** Build the RequestForComment tool definition. */
+function requestForCommentTool(ctx) {
+  return tool(
+    "RequestForComment",
+    "Open a new Discussion thread for long-horizon coordination on an open question. The bridge creates the thread; replies arrive asynchronously on future runs.",
+    {
+      channel: z.string(),
+      body: z.string(),
+      addressees: z.array(z.string()).optional(),
+    },
+    createRequestForCommentHandler(ctx),
+  );
+}
 // Re-export the building blocks discuss-tools.js needs to assemble its
-// own lead tool surface (it has three extra terminal tools).
-export { baseTools, orchestrationServer };
+// own lead tool surface (it has two extra terminal tools).
+export { baseTools, orchestrationServer, requestForCommentTool };

package/src/profile-prompt.js CHANGED Viewed

@@ -1,22 +1,28 @@
 /**
- * Compose an SDK `systemPrompt` value from a `.claude/agents/<name>.md` file.
+ * System prompt composition for agent runners.
  *
- * Pure function. Reads the profile file, strips YAML frontmatter, and returns
- * the SDK-shaped `{ type: "preset", preset: "claude_code", append }` object
- * with the profile body — plus an optional mode-specific trailer — in the
- * `append` slot. Callers in libeval pass the result straight into an
- * `AgentRunner`'s `systemPrompt` input so the profile reaches the main-thread
- * system prompt without going through the SDK's top-level `agent` option.
+ * Two helpers:
+ *
+ * - `composeProfilePrompt(name, opts)` — profile + `claude_code` preset.
+ *   Used by agent participants that need the full Claude Code tool surface.
+ *
+ * - `composeLeadPrompt(opts)` — plain string, no preset. Used by lead
+ *   roles (supervisor, facilitator, discuss lead) that should only see
+ *   the orchestration instructions and optionally a profile body.
+ *
+ * - `composeSystemPrompt(opts)` — unified entry point. Delegates to one
+ *   of the above based on `opts.role`.
  */
 import { readFileSync } from "node:fs";
 import { join } from "node:path";
 /**
+ * Compose a `claude_code`-preset system prompt from a profile file.
  * @param {string} name - Profile basename (no `.md` suffix)
  * @param {object} opts
  * @param {string} opts.profilesDir - Directory containing `<name>.md`
- * @param {string} [opts.trailer] - Optional mode-specific trailer appended after a blank line
+ * @param {string} [opts.trailer] - Mode-specific trailer appended after a blank line
  * @returns {{type: "preset", preset: "claude_code", append: string}}
  */
 export function composeProfilePrompt(name, { profilesDir, trailer }) {
@@ -27,6 +33,45 @@ export function composeProfilePrompt(name, { profilesDir, trailer }) {
   return { type: "preset", preset: "claude_code", append };
 }
+/**
+ * Compose a plain-string system prompt for a lead role (no Claude Code preset).
+ * @param {object} opts
+ * @param {string} [opts.profile] - Profile basename (no `.md` suffix)
+ * @param {string} [opts.profilesDir] - Directory containing profile files
+ * @param {string} opts.trailer - Mode-specific orchestration instructions
+ * @returns {string}
+ */
+export function composeLeadPrompt({ profile, profilesDir, trailer }) {
+  if (!trailer) throw new Error("trailer is required");
+  if (!profile) return trailer;
+  const path = join(profilesDir, `${profile}.md`);
+  const raw = readFileSync(path, "utf8");
+  const body = stripFrontmatter(raw).trim();
+  return `${body}\n\n${trailer}`;
+}
+/**
+ * Unified entry point for composing system prompts.
+ *
+ * @param {object} opts
+ * @param {"lead"|"agent"} opts.role - `"lead"` produces a plain string;
+ *   `"agent"` produces a `claude_code` preset object.
+ * @param {string} [opts.profile] - Profile basename
+ * @param {string} [opts.profilesDir]
+ * @param {string} opts.trailer - Mode-specific instructions
+ * @returns {string | {type: "preset", preset: "claude_code", append: string}}
+ */
+export function composeSystemPrompt({ role, profile, profilesDir, trailer }) {
+  if (!trailer) throw new Error("trailer is required");
+  if (role === "lead") {
+    return composeLeadPrompt({ profile, profilesDir, trailer });
+  }
+  if (profile) {
+    return composeProfilePrompt(profile, { profilesDir, trailer });
+  }
+  return { type: "preset", preset: "claude_code", append: trailer };
+}
 /**
  * Strip a leading YAML frontmatter fence (`---\n…\n---\n`) from a markdown
  * string. Returns the input unchanged when no frontmatter is present.

package/src/supervisor.js CHANGED Viewed

@@ -18,7 +18,7 @@
 import { Writable } from "node:stream";
 import { resolve } from "node:path";
 import { createAgentRunner } from "./agent-runner.js";
-import { composeProfilePrompt } from "./profile-prompt.js";
+import { composeSystemPrompt } from "./profile-prompt.js";
 import { createMessageBus } from "./message-bus.js";
 import {
   createOrchestrationContext,
@@ -27,23 +27,23 @@ import {
 } from "./orchestration-toolkit.js";
 import { OrchestrationLoop } from "./orchestration-loop.js";
-/** System prompt appended for the supervisor runner in supervise mode. */
+/** System prompt for the supervisor lead. L0 mechanics only per COALIGNED. */
 export const SUPERVISOR_SYSTEM_PROMPT =
-  "You supervise one agent named `agent`. " +
-  "Ask sends a question and returns immediately with {askIds:[N]}. The reply arrives on a later turn as `[answer#N] agent: <text>` in your inbox — between turns you can plan and reflect while the agent works. End your turn with text after asking; the orchestrator wakes you when the agent replies. " +
-  "Answer replies to an ask the agent addressed to you (you'll see it tagged `[ask#N] agent: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
-  "Announce delivers a message with no reply obligation. " +
-  "Conclude ends the session with a verdict ('success' or 'failure') and a summary; the verdict reflects whether the agent's work meets the criteria stated in the task. " +
-  "You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
-  "If the agent goes off-track, course-correct by issuing a new Ask with corrected instructions; each Ask carries a fresh askId, so a follow-up never collides with an earlier one.";
+  "You supervise one agent.\n" +
+  "You have no tools to perform work yourself.\n" +
+  "Use `Ask` to delegate work to the agent.\n" +
+  "`Ask` returns {askIds:[N]} immediately.\n" +
+  "The reply arrives on your next turn as `[answer#N] agent: <text>`.\n" +
+  "If the agent goes off-track, send a corrective `Ask`.\n" +
+  "End every session by calling `Conclude`.";
-/** System prompt appended for the agent runner in supervise mode. */
+/** System prompt for the supervised agent. L0 mechanics only per COALIGNED. */
 export const AGENT_SYSTEM_PROMPT =
-  "A supervisor watches your work. " +
-  "Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
-  "Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
-  "Ask sends a question to the supervisor and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] supervisor: <text>` in your inbox. " +
-  "Announce sends a message with no reply expected — use this for unsolicited remarks or to reply to an Announce.";
+  "A supervisor directs your work.\n" +
+  "Each question arrives as `[ask#N] supervisor: <text>`.\n" +
+  "Quote N as askId on your `Answer` to route the reply correctly.\n" +
+  "If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
+  "Do not redo completed work.";
 /**
  * Supervise-mode wrapper around `OrchestrationLoop`. The lead is
@@ -148,15 +148,6 @@ export function createSupervisor({
   if (!redactor) throw new Error("redactor is required");
   const resolvedProfilesDir =
     profilesDir ?? resolve(supervisorCwd, ".claude/agents");
-  const systemPromptFor = (profile, trailer) => {
-    if (!trailer) throw new Error("trailer is required");
-    return profile
-      ? composeProfilePrompt(profile, {
-          profilesDir: resolvedProfilesDir,
-          trailer,
-        })
-      : { type: "preset", preset: "claude_code", append: trailer };
-  };
   const ctx = createOrchestrationContext();
   const messageBus = createMessageBus({
@@ -183,15 +174,25 @@ export function createSupervisor({
     allowedTools,
     onLine: (line) => supervisor.emitLine("agent", line),
     settingSources: ["project"],
-    systemPrompt: systemPromptFor(agentProfile, AGENT_SYSTEM_PROMPT),
+    systemPrompt: composeSystemPrompt({
+      role: "agent",
+      profile: agentProfile,
+      profilesDir: resolvedProfilesDir,
+      trailer: AGENT_SYSTEM_PROMPT,
+    }),
     mcpServers: { orchestration: agentServer, ...agentMcpServers },
     redactor,
   });
-  // Block the SDK's sub-agent spawn tools on the supervisor: it should
-  // coordinate the agent through orchestration tools, not fan work out
-  // to ad-hoc Claude Code sub-agents.
-  const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
+  const defaultDisallowed = [
+    "Agent",
+    "Task",
+    "TaskOutput",
+    "TaskStop",
+    "Bash",
+    "Write",
+    "Edit",
+  ];
   const disallowedTools = supervisorDisallowedTools
     ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
     : defaultDisallowed;
@@ -202,18 +203,16 @@ export function createSupervisor({
     output: devNull,
     model: supervisorModel ?? model,
     maxTurns: perRunBudget,
-    allowedTools: supervisorAllowedTools ?? [
-      "Bash",
-      "Read",
-      "Glob",
-      "Grep",
-      "Write",
-      "Edit",
-    ],
+    allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep"],
     disallowedTools,
     onLine: (line) => supervisor.emitLine("supervisor", line),
     settingSources: ["project"],
-    systemPrompt: systemPromptFor(supervisorProfile, SUPERVISOR_SYSTEM_PROMPT),
+    systemPrompt: composeSystemPrompt({
+      role: "lead",
+      profile: supervisorProfile,
+      profilesDir: resolvedProfilesDir,
+      trailer: SUPERVISOR_SYSTEM_PROMPT,
+    }),
     mcpServers: { orchestration: supervisorServer },
     redactor,
   });