npm - @forwardimpact/libeval - Versions diffs - 0.1.20 → 0.1.22 - Mend

@forwardimpact/libeval 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/bin/fit-trace.js +49 -0
package/package.json +6 -3
package/src/agent-runner.js +5 -1
package/src/commands/facilitate.js +3 -2
package/src/commands/run.js +4 -2
package/src/commands/supervise.js +3 -2
package/src/commands/trace.js +46 -14
package/src/facilitator.js +78 -135
package/src/index.js +1 -0
package/src/message-bus.js +78 -13
package/src/orchestration-toolkit.js +211 -63
package/src/orchestrator-helpers.js +58 -0
package/src/render/tool-hints.js +3 -3
package/src/signature-filter.js +27 -0
package/src/supervisor.js +110 -38
package/src/tee-writer.js +21 -0
package/src/trace-collector.js +52 -3
package/src/trace-query.js +141 -28

package/src/supervisor.js CHANGED Viewed

@@ -4,8 +4,11 @@
  * introduces itself, and delegates work to the agent. The loop then alternates:
  * agent → supervisor → agent.
  *
- * Signaling uses orchestration tools (Conclude, Redirect, Ask) via in-process
- * MCP servers. No text-token detection.
+ * Signaling uses orchestration tools (Ask / Answer / Announce / Redirect /
+ * Conclude) via in-process MCP servers. The Ask/Answer contract is enforced
+ * at turn boundaries: an unanswered Ask triggers one synthetic reminder and
+ * then a `protocol_violation` trace event plus a null-answer injection so the
+ * session advances without silent deadlock.
  *
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
  */
@@ -16,27 +19,30 @@ import { createAgentRunner } from "./agent-runner.js";
 import { composeProfilePrompt } from "./profile-prompt.js";
 import { TraceCollector } from "./trace-collector.js";
 import { SequenceCounter } from "./sequence-counter.js";
+import { createMessageBus } from "./message-bus.js";
 import {
   createOrchestrationContext,
   createSupervisorToolServer,
   createSupervisedAgentToolServer,
+  checkPendingAsk,
 } from "./orchestration-toolkit.js";
+import { formatMessages } from "./orchestrator-helpers.js";
 /** System prompt appended for the supervisor runner in supervise mode. */
 export const SUPERVISOR_SYSTEM_PROMPT =
-  "You relay messages to one persistent agent session — your only output " +
-  "channel. Spawning sub-agents or restarting the agent is blocked. Do not " +
-  "do the work yourself. Reply briefly to let the agent continue. Use your " +
-  "Redirect tool to interrupt and correct the agent. Use your Conclude tool " +
-  "with a summary when the task is fully done. Only your final message each " +
-  "turn is relayed.";
+  "You supervise one agent. " +
+  "Ask sends a question to the agent; the reply arrives via Answer. " +
+  "Answer replies to an ask the agent addressed to you. " +
+  "Announce sends a message with no reply obligation. " +
+  "Redirect interrupts the agent with replacement instructions. " +
+  "Conclude ends the session with a summary.";
 /** System prompt appended for the agent runner in supervise mode. */
 export const AGENT_SYSTEM_PROMPT =
-  "A supervisor watches your work and may interrupt with new instructions " +
-  "mid-task. Treat any new prompt as authoritative and adjust course. " +
-  "When uncertain, use your Ask tool to ask the supervisor a clarifying " +
-  "question — you will receive a direct answer.";
+  "A supervisor watches your work. " +
+  "Answer replies to an ask addressed to you. " +
+  "Ask sends a question to the supervisor; the reply arrives via Answer. " +
+  "Announce sends a message with no reply expected.";
 /**
  * Maximum number of mid-turn interventions allowed within a single agent turn.
@@ -54,8 +60,18 @@ export class Supervisor {
    * @param {import("stream").Writable} deps.output - Stream to emit tagged NDJSON to
    * @param {number} [deps.maxTurns] - Maximum supervisor ↔ agent exchanges
    * @param {object} [deps.ctx] - Orchestration context (injected by factory)
+   * @param {import("./message-bus.js").MessageBus} [deps.messageBus] - Two-participant message bus ("supervisor" / "agent")
+   * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
    */
-  constructor({ agentRunner, supervisorRunner, output, maxTurns, ctx }) {
+  constructor({
+    agentRunner,
+    supervisorRunner,
+    output,
+    maxTurns,
+    ctx,
+    messageBus,
+    taskAmend,
+  }) {
     if (!agentRunner) throw new Error("agentRunner is required");
     if (!supervisorRunner) throw new Error("supervisorRunner is required");
     if (!output) throw new Error("output is required");
@@ -64,7 +80,11 @@ export class Supervisor {
     this.output = output;
     this.maxTurns = maxTurns ?? 100;
     this.ctx = ctx ?? createOrchestrationContext();
+    this.messageBus =
+      messageBus ?? createMessageBus({ participants: ["supervisor", "agent"] });
+    if (!this.ctx.messageBus) this.ctx.messageBus = this.messageBus;
     this.counter = new SequenceCounter();
+    this.taskAmend = taskAmend ?? null;
     /** @type {"agent"|"supervisor"} */
     this.currentSource = "agent";
     /** @type {number} */
@@ -77,9 +97,10 @@ export class Supervisor {
    * @returns {Promise<{success: boolean, turns: number}>}
    */
   async run(task) {
+    const initialTask = this.taskAmend ? `${task}\n\n${this.taskAmend}` : task;
     this.currentSource = "supervisor";
     this.currentTurn = 0;
-    let supervisorResult = await this.supervisorRunner.run(task);
+    let supervisorResult = await this.supervisorRunner.run(initialTask);
     if (supervisorResult.error) {
       this.emitSummary({ success: false, turns: 0 });
@@ -95,8 +116,7 @@ export class Supervisor {
     const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
     for (let turn = 1; turn <= turnLimit; turn++) {
       const relay =
-        pendingRelay ??
-        this.extractLastText(this.supervisorRunner, supervisorResult.text);
+        pendingRelay ?? this.#buildInitialRelay(supervisorResult.text);
       const turnOutcome = await this.#runAgentTurn(turn, relay);
       if (turnOutcome.exit) return turnOutcome.exit;
@@ -111,6 +131,22 @@ export class Supervisor {
     return { success: false, turns: this.maxTurns };
   }
+  #buildInitialRelay(fallbackText) {
+    const queued = this.messageBus.drain("agent");
+    if (queued.length > 0) return formatMessages(queued);
+    return this.extractLastText(this.supervisorRunner, fallbackText);
+  }
+  #checkAsk(name) {
+    return checkPendingAsk({
+      ctx: this.ctx,
+      messageBus: this.messageBus,
+      addresseeName: name,
+      mode: "supervised",
+      emitViolation: (e) => this.emitOrchestratorEvent(e),
+    });
+  }
   /**
    * Drive the agent through one turn, allowing the supervisor to interrupt
    * via the Redirect tool. Returns either an `exit` outcome (the loop should
@@ -122,6 +158,7 @@ export class Supervisor {
   async #runAgentTurn(turn, initialRelay) {
     let relay = initialRelay;
     let interventions = 0;
+    let agentCalled = this.agentRunner.sessionId !== null;
     this.agentRunner.onBatch = (batchLines, ctx) =>
       this.#midTurnReview(turn, batchLines, ctx);
@@ -130,10 +167,10 @@ export class Supervisor {
       while (true) {
         this.currentSource = "agent";
         this.currentTurn = turn;
-        const isFirstAgentCall = turn === 1 && interventions === 0;
-        const agentResult = isFirstAgentCall
-          ? await this.agentRunner.run(relay)
-          : await this.agentRunner.resume(relay);
+        const agentResult = agentCalled
+          ? await this.agentRunner.resume(relay)
+          : await this.agentRunner.run(relay);
+        agentCalled = true;
         if (agentResult.error && !agentResult.aborted) {
           this.emitSummary({ success: false, turns: turn });
@@ -162,6 +199,14 @@ export class Supervisor {
           continue;
         }
+        if (this.#checkAsk("agent") === "recheck" && !this.ctx.concluded) {
+          const reminders = this.messageBus.drain("agent");
+          if (reminders.length > 0) {
+            relay = formatMessages(reminders);
+            continue;
+          }
+        }
         return { exit: null };
       }
     } finally {
@@ -209,14 +254,20 @@ export class Supervisor {
    * @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object, relay?: string}>}
    */
   async #endOfTurnReview(turn) {
+    const queuedForSupervisor = this.messageBus.drain("supervisor");
     const agentTranscript = this.extractTranscript(this.agentRunner);
     this.currentSource = "supervisor";
     this.currentTurn = turn;
     this.ctx.redirect = null;
-    const supervisorResult = await this.supervisorRunner.resume(
-      `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`,
-    );
+    const reviewPrompt =
+      queuedForSupervisor.length > 0
+        ? `The agent reported:\n\n${agentTranscript}\n\n` +
+          `Agent messages:\n${formatMessages(queuedForSupervisor)}\n\n` +
+          `Review and decide how to proceed.`
+        : `The agent reported:\n\n${agentTranscript}\n\nReview the agent's work and decide how to proceed.`;
+    let supervisorResult = await this.supervisorRunner.resume(reviewPrompt);
     if (supervisorResult.error) {
       this.emitSummary({ success: false, turns: turn });
@@ -232,13 +283,34 @@ export class Supervisor {
       return { exit: { success: true, turns: turn } };
     }
+    if (this.#checkAsk("supervisor") === "recheck" && !this.ctx.concluded) {
+      const reminders = this.messageBus.drain("supervisor");
+      if (reminders.length > 0) {
+        supervisorResult = await this.supervisorRunner.resume(
+          formatMessages(reminders),
+        );
+        if (this.ctx.concluded) {
+          this.emitSummary({
+            success: true,
+            turns: turn,
+            summary: this.ctx.summary,
+          });
+          return { exit: { success: true, turns: turn } };
+        }
+        this.#checkAsk("supervisor");
+      }
+    }
     if (this.ctx.redirect) {
       const redirect = this.ctx.redirect;
       this.ctx.redirect = null;
       return { exit: null, supervisorResult, relay: redirect.message };
     }
-    return { exit: null, supervisorResult };
+    const queuedForAgent = this.messageBus.drain("agent");
+    const relay =
+      queuedForAgent.length > 0 ? formatMessages(queuedForAgent) : undefined;
+    return { exit: null, supervisorResult, relay };
   }
   /**
@@ -360,6 +432,7 @@ const devNull = new Writable({
  * @param {string} [deps.supervisorProfile] - Supervisor profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
  * @param {string} [deps.agentProfile] - Agent profile name; resolved into the main-thread system prompt via `composeProfilePrompt`.
  * @param {string} [deps.profilesDir] - Directory containing `<name>.md` profile files. Defaults to `<supervisorCwd>/.claude/agents`. Resolved once from the orchestrator's cwd so profiles travel with the project, not with a per-agent sandbox.
+ * @param {string} [deps.taskAmend] - Opaque addendum appended to the task before delivery.
  * @returns {Supervisor}
  */
 export function createSupervisor({
@@ -375,6 +448,7 @@ export function createSupervisor({
   supervisorProfile,
   agentProfile,
   profilesDir,
+  taskAmend,
 }) {
   const resolvedProfilesDir =
     profilesDir ?? resolve(supervisorCwd, ".claude/agents");
@@ -388,23 +462,19 @@ export function createSupervisor({
       : { type: "preset", preset: "claude_code", append: trailer };
   };
   let supervisor;
-  let supervisorRunner;
   const ctx = createOrchestrationContext();
+  const messageBus = createMessageBus({
+    participants: ["supervisor", "agent"],
+  });
+  ctx.messageBus = messageBus;
+  ctx.participants = [
+    { name: "supervisor", role: "supervisor" },
+    { name: "agent", role: "agent" },
+  ];
   const supervisorServer = createSupervisorToolServer(ctx);
-  const agentServer = createSupervisedAgentToolServer(ctx, {
-    onAsk: async (question) => {
-      supervisor.currentSource = "supervisor";
-      supervisor.emitOrchestratorEvent({ type: "ask_received" });
-      await supervisorRunner.resume(
-        `The agent asks: "${question}"\n\nAnswer the question directly.`,
-      );
-      supervisor.currentSource = "agent";
-      supervisor.emitOrchestratorEvent({ type: "ask_answered" });
-      return supervisor.extractLastText(supervisorRunner, "No answer.");
-    },
-  });
+  const agentServer = createSupervisedAgentToolServer(ctx);
   const onLine = (line) => supervisor.emitLine(line);
@@ -426,7 +496,7 @@ export function createSupervisor({
     ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
     : defaultDisallowed;
-  supervisorRunner = createAgentRunner({
+  const supervisorRunner = createAgentRunner({
     cwd: supervisorCwd,
     query,
     output: devNull,
@@ -453,6 +523,8 @@ export function createSupervisor({
     output,
     maxTurns,
     ctx,
+    messageBus,
+    taskAmend,
   });
   return supervisor;
 }

package/src/tee-writer.js CHANGED Viewed

@@ -163,6 +163,27 @@ export class TeeWriter extends Writable {
             withPrefix,
           }),
         );
+      } else if (turn.role === "system") {
+        const label = turn.subtype ?? "system";
+        this.textStream.write(
+          renderTextLine({
+            source: turn.source,
+            text: `[${label}]`,
+            withPrefix,
+          }),
+        );
+      } else if (turn.role === "user") {
+        for (const block of turn.content) {
+          if (block.type === "text") {
+            this.textStream.write(
+              renderTextLine({
+                source: turn.source,
+                text: `[user] ${block.text}`,
+                withPrefix,
+              }),
+            );
+          }
+        }
       }
     }
   }

package/src/trace-collector.js CHANGED Viewed

@@ -37,6 +37,8 @@ export class TraceCollector {
     this.result = null;
     /** @type {number} */
     this.turnIndex = 0;
+    /** @type {object|null} */
+    this.initEvent = null;
   }
   /**
@@ -73,7 +75,7 @@ export class TraceCollector {
     switch (event.type) {
       case "system":
-        this.handleSystem(event);
+        this.handleSystem(event, source);
         break;
       case "assistant":
         this.handleAssistant(event, source);
@@ -91,8 +93,11 @@ export class TraceCollector {
   /**
    * @param {object} event
+   * @param {string|null} source
    */
-  handleSystem(event) {
+  handleSystem(event, source) {
+    const { type: _type, ...payload } = event;
     if (event.subtype === "init") {
       this.metadata = {
         timestamp: event.timestamp ?? this.now(),
@@ -102,7 +107,16 @@ export class TraceCollector {
         tools: event.tools ?? [],
         permissionMode: event.permissionMode ?? null,
       };
+      this.initEvent = payload;
     }
+    this.turns.push({
+      index: this.turnIndex++,
+      role: "system",
+      source,
+      subtype: event.subtype ?? null,
+      data: payload,
+    });
   }
   /**
@@ -158,6 +172,19 @@ export class TraceCollector {
     const contentItems = message.content;
     if (!Array.isArray(contentItems)) return;
+    const textBlocks = contentItems
+      .filter((item) => item.type === "text")
+      .map((item) => ({ type: "text", text: item.text }));
+    if (textBlocks.length > 0) {
+      this.turns.push({
+        index: this.turnIndex++,
+        role: "user",
+        source,
+        content: textBlocks,
+      });
+    }
     for (const item of contentItems) {
       if (item.type === "tool_result") {
         this.turns.push({
@@ -204,7 +231,7 @@ export class TraceCollector {
    */
   toJSON() {
     return {
-      version: "1.0.0",
+      version: "1.1.0",
       metadata: this.metadata ?? {
         timestamp: this.now(),
         sessionId: null,
@@ -213,6 +240,7 @@ export class TraceCollector {
         tools: [],
         permissionMode: null,
       },
+      initEvent: this.initEvent ?? null,
       turns: this.turns,
       summary: this.result ?? {
         result: "unknown",
@@ -271,6 +299,27 @@ export class TraceCollector {
             withPrefix,
           }),
         );
+      } else if (turn.role === "system") {
+        const label = turn.subtype ?? "system";
+        out.push(
+          renderTextLine({
+            source: turn.source,
+            text: `[${label}]`,
+            withPrefix,
+          }),
+        );
+      } else if (turn.role === "user") {
+        for (const block of turn.content) {
+          if (block.type === "text") {
+            out.push(
+              renderTextLine({
+                source: turn.source,
+                text: `[user] ${block.text}`,
+                withPrefix,
+              }),
+            );
+          }
+        }
       }
     }

package/src/trace-query.js CHANGED Viewed

@@ -17,18 +17,90 @@ export class TraceQuery {
   }
   /**
-   * High-level overview: metadata, summary, turn count, and tool frequency.
+   * High-level overview: metadata, summary, turn count, tool frequency,
+   * and the first user message text (taskPrompt) when present.
    * @returns {object}
    */
   overview() {
+    const firstUser = this.turns.find((t) => t.role === "user");
+    const taskPrompt = firstUser
+      ? firstUser.content
+          .filter((b) => b.type === "text")
+          .map((b) => b.text)
+          .join("\n")
+      : null;
     return {
       metadata: this.metadata,
       summary: this.summary,
       turnCount: this.turns.length,
       tools: this.toolFrequency(),
+      taskPrompt,
     };
   }
+  /**
+   * Full system/init event — the single most diagnostic message for
+   * root-cause analysis. Returns null for traces collected before this
+   * field existed.
+   * @returns {object|null}
+   */
+  init() {
+    return this.trace.initEvent ?? null;
+  }
+  /**
+   * Retrieve a single turn by its index.
+   * @param {number} index
+   * @returns {object|null}
+   */
+  turn(index) {
+    return this.turns.find((t) => t.index === index) ?? null;
+  }
+  /**
+   * Filter turns by composable structural criteria. All criteria are
+   * combined as AND. `tool()` and `errors()` remain as convenience
+   * shortcuts for pre-existing workflows.
+   *
+   * `toolName` matches assistant turns only. Applying `toolName` without
+   * `role: "assistant"` still drops every non-assistant turn, because
+   * resolving tool_use → tool_result pairs requires the `tool()` method.
+   * `isError` matches tool_result turns only. Combining `toolName` with
+   * `isError` therefore always returns `[]` (no turn is both assistant
+   * and tool_result) — use `tool(name)` for "errors from Bash"–shaped
+   * queries.
+   *
+   * @param {object} [opts]
+   * @param {string} [opts.role] - Exact role match (system | user |
+   *   assistant | tool_result).
+   * @param {string} [opts.toolName] - Matches assistant turns with a
+   *   tool_use block of this name. Drops all non-assistant turns.
+   * @param {boolean} [opts.isError] - Matches tool_result turns by
+   *   `isError` value. Drops all non-tool_result turns.
+   * @returns {object[]}
+   */
+  filter(opts = {}) {
+    const { role, toolName, isError } = opts;
+    return this.turns.filter((turn) => {
+      if (role !== undefined && turn.role !== role) return false;
+      if (isError !== undefined) {
+        if (turn.role !== "tool_result") return false;
+        if (turn.isError !== isError) return false;
+      }
+      if (toolName !== undefined) {
+        if (turn.role === "assistant") {
+          const has = turn.content.some(
+            (b) => b.type === "tool_use" && b.name === toolName,
+          );
+          if (!has) return false;
+        } else {
+          return false;
+        }
+      }
+      return true;
+    });
+  }
   /** @returns {number} */
   count() {
     return this.turns.length;
@@ -73,16 +145,18 @@ export class TraceQuery {
    * @param {object} [opts]
    * @param {number} [opts.context=0] - Number of surrounding turns to include
    * @param {number} [opts.limit=50] - Max results
+   * @param {boolean} [opts.full=false] - Emit full content block text in
+   *   match descriptions instead of the default narrow excerpt window.
    * @returns {object[]} Array of {turn, matches, context?}
    */
   search(pattern, opts = {}) {
-    const { context = 0, limit = 50 } = opts;
+    const { context = 0, limit = 50, full = false } = opts;
     // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
     const re = new RegExp(pattern, "gi");
     const hits = [];
     for (const turn of this.turns) {
-      const matches = matchTurn(turn, re);
+      const matches = matchTurn(turn, re, full);
       if (matches.length > 0) {
         const entry = { turn, matches };
         if (context > 0) {
@@ -273,40 +347,79 @@ export class TraceQuery {
  * Search a single turn for regex matches. Returns array of match descriptions.
  * @param {object} turn
  * @param {RegExp} re
+ * @param {boolean} [full=false] - Emit full block text instead of an excerpt.
  * @returns {string[]}
  */
-function matchTurn(turn, re) {
+function matchTurn(turn, re, full = false) {
+  if (turn.role === "assistant") return matchAssistantTurn(turn, re, full);
+  if (turn.role === "tool_result") return matchToolResultTurn(turn, re, full);
+  if (turn.role === "user") return matchUserTurn(turn, re, full);
+  return [];
+}
+function matchAssistantTurn(turn, re, full) {
   const matches = [];
-  if (turn.role === "assistant") {
-    for (const block of turn.content) {
-      if (block.type === "text" && re.test(block.text)) {
-        re.lastIndex = 0;
-        matches.push(`text: ${excerptAround(block.text, re)}`);
-      }
-      if (block.type === "tool_use") {
-        if (re.test(block.name)) {
-          re.lastIndex = 0;
-          matches.push(`tool_name: ${block.name}`);
-        }
-        const inputStr = JSON.stringify(block.input);
-        if (re.test(inputStr)) {
-          re.lastIndex = 0;
-          matches.push(
-            `tool_input(${block.name}): ${excerptAround(inputStr, re)}`,
-          );
-        }
-      }
+  for (const block of turn.content) {
+    if (block.type === "text") {
+      const desc = describeText(block.text, re, "text", full);
+      if (desc) matches.push(desc);
+    } else if (block.type === "tool_use") {
+      matches.push(...matchToolUseBlock(block, re, full));
     }
-  } else if (turn.role === "tool_result") {
-    const content = turn.content ?? "";
-    if (re.test(content)) {
-      re.lastIndex = 0;
-      matches.push(`result: ${excerptAround(content, re)}`);
+  }
+  return matches;
+}
+function matchToolUseBlock(block, re, full) {
+  const matches = [];
+  if (re.test(block.name)) {
+    re.lastIndex = 0;
+    matches.push(`tool_name: ${block.name}`);
+  }
+  const inputStr = JSON.stringify(block.input);
+  const inputDesc = describeText(
+    inputStr,
+    re,
+    `tool_input(${block.name})`,
+    full,
+  );
+  if (inputDesc) matches.push(inputDesc);
+  return matches;
+}
+function matchToolResultTurn(turn, re, full) {
+  const content = turn.content ?? "";
+  const desc = describeText(content, re, "result", full);
+  return desc ? [desc] : [];
+}
+function matchUserTurn(turn, re, full) {
+  const matches = [];
+  for (const block of turn.content ?? []) {
+    if (block.type === "text") {
+      const desc = describeText(block.text, re, "user_text", full);
+      if (desc) matches.push(desc);
     }
   }
   return matches;
 }
+/**
+ * Return a `<prefix>: <text-or-excerpt>` description when `text` matches
+ * the regex, or null when it does not. Centralises the full-vs-excerpt
+ * choice so each call site just supplies its prefix.
+ * @param {string} text
+ * @param {RegExp} re
+ * @param {string} prefix
+ * @param {boolean} full
+ * @returns {string|null}
+ */
+function describeText(text, re, prefix, full) {
+  if (!re.test(text)) return null;
+  re.lastIndex = 0;
+  return `${prefix}: ${full ? text : excerptAround(text, re)}`;
+}
 /**
  * Extract a short excerpt around the first regex match in text.
  * @param {string} text