npm - @forwardimpact/libeval - Versions diffs - 0.1.26 → 0.1.28 - Mend

@forwardimpact/libeval 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +6 -1
package/bin/fit-eval.js +7 -7
package/bin/fit-trace.js +7 -7
package/package.json +21 -13
package/src/agent-runner.js +18 -2
package/src/commands/facilitate.js +4 -0
package/src/commands/run.js +4 -0
package/src/commands/supervise.js +4 -0
package/src/facilitator.js +36 -28
package/src/message-bus.js +1 -0
package/src/orchestration-toolkit.js +3 -0
package/src/orchestrator-helpers.js +1 -0
package/src/render/turn-renderer.js +92 -0
package/src/sequence-counter.js +4 -0
package/src/supervisor.js +61 -28
package/src/tee-writer.js +4 -60
package/src/trace-collector.js +18 -70
package/src/trace-github.js +0 -1
package/src/trace-query.js +69 -43

package/README.md CHANGED Viewed

@@ -1,6 +1,11 @@
 # libeval
-Process Claude Code stream-json output into structured traces.
+<!-- BEGIN:description — Do not edit. Generated from package.json. -->
+Agent evaluation framework — prove whether agent changes improved outcomes with
+reproducible evidence.
+<!-- END:description -->
 ## Getting Started

package/bin/fit-eval.js CHANGED Viewed

@@ -177,20 +177,20 @@ const definition = {
   ],
   documentation: [
     {
-      title: "Agent Evaluations",
-      url: "https://www.forwardimpact.team/docs/libraries/agent-evaluations/index.md",
+      title: "Run an Eval",
+      url: "https://www.forwardimpact.team/docs/libraries/prove-changes/run-eval/index.md",
       description:
         "Author a judge profile, run an eval locally, wire it into CI, and inspect the resulting trace.",
     },
     {
-      title: "Agent Collaboration",
-      url: "https://www.forwardimpact.team/docs/libraries/agent-collaboration/index.md",
+      title: "Prove Agent Changes",
+      url: "https://www.forwardimpact.team/docs/libraries/prove-changes/index.md",
       description:
-        "Author a facilitator and participant profiles, run a multi-agent session, and read the message flow.",
+        "End-to-end workflow from dataset generation through evaluation to trace analysis, including multi-agent collaboration sessions.",
     },
     {
-      title: "Trace Analysis",
-      url: "https://www.forwardimpact.team/docs/libraries/trace-analysis/index.md",
+      title: "Analyze Traces",
+      url: "https://www.forwardimpact.team/docs/libraries/prove-changes/trace-analysis/index.md",
       description:
         "Read the NDJSON traces produced by `fit-eval` with `fit-trace` — grounded-theory method and worked examples.",
     },

package/bin/fit-trace.js CHANGED Viewed

@@ -214,22 +214,22 @@ const definition = {
   ],
   documentation: [
     {
-      title: "Trace Analysis",
-      url: "https://www.forwardimpact.team/docs/libraries/trace-analysis/index.md",
+      title: "Analyze Traces",
+      url: "https://www.forwardimpact.team/docs/libraries/prove-changes/trace-analysis/index.md",
       description:
         "The full method walkthrough with worked examples (an eval that failed, a multi-agent session that stalled).",
     },
     {
-      title: "Agent Evaluations",
-      url: "https://www.forwardimpact.team/docs/libraries/agent-evaluations/index.md",
+      title: "Run an Eval",
+      url: "https://www.forwardimpact.team/docs/libraries/prove-changes/run-eval/index.md",
       description:
         "How `fit-eval supervise` produces the traces this skill analyzes.",
     },
     {
-      title: "Agent Collaboration",
-      url: "https://www.forwardimpact.team/docs/libraries/agent-collaboration/index.md",
+      title: "Prove Agent Changes",
+      url: "https://www.forwardimpact.team/docs/libraries/prove-changes/index.md",
       description:
-        "How `fit-eval facilitate` produces multi-agent traces; `split` is the bridge into per-source files.",
+        "End-to-end workflow including multi-agent collaboration; `split` is the bridge into per-source trace files.",
     },
   ],
 };

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@forwardimpact/libeval",
-  "version": "0.1.26",
-  "description": "Agent evaluation: collect Claude Code traces, run agent loops, supervise multi-step workflows.",
+  "version": "0.1.28",
+  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
   "keywords": [
     "eval",
     "agent",
@@ -9,16 +9,24 @@
     "claude-code",
     "supervisor"
   ],
-  "forwardimpact": {
-    "capability": "agent-self-improvement",
-    "needs": [
-      "Parse and query Claude Code trace NDJSON files",
-      "Drive an LLM agent through a scripted run and capture its trace",
-      "Supervise a multi-step or multi-agent workflow"
-    ]
+  "homepage": "https://www.forwardimpact.team",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/forwardimpact/monorepo.git",
+    "directory": "libraries/libeval"
   },
   "license": "Apache-2.0",
   "author": "D. Olsson <hi@senzilla.io>",
+  "jobs": [
+    {
+      "user": "Platform Builders",
+      "goal": "Prove Agent Changes",
+      "trigger": "An eval passes locally but fails in CI and the only output is 'assertion failed.'",
+      "bigHire": "prove whether agent changes improved outcomes with reproducible evidence.",
+      "littleHire": "run an eval and get a trace that shows exactly what the agent did.",
+      "competesWith": "manual before/after comparison; trusting gut feeling over evidence; skipping evaluation entirely"
+    }
+  ],
   "type": "module",
   "main": "./src/index.js",
   "exports": {
@@ -35,10 +43,6 @@
     "bin/**/*.js",
     "README.md"
   ],
-  "engines": {
-    "bun": ">=1.2.0",
-    "node": ">=18.0.0"
-  },
   "scripts": {
     "test": "bun test test/*.test.js"
   },
@@ -52,6 +56,10 @@
   "devDependencies": {
     "@forwardimpact/libharness": "^0.1.14"
   },
+  "engines": {
+    "bun": ">=1.2.0",
+    "node": ">=18.0.0"
+  },
   "publishConfig": {
     "access": "public"
   }

package/src/agent-runner.js CHANGED Viewed

@@ -32,6 +32,7 @@ function applyDefaults(deps) {
   };
 }
+/** Run a single Claude Agent SDK session and emit raw NDJSON events to an output stream. */
 export class AgentRunner {
   /**
    * @param {object} deps
@@ -211,8 +212,9 @@ export class AgentRunner {
     if (message.type === "system" && message.subtype === "init") {
       this.sessionId = message.session_id;
     }
-    if (message.type === "assistant" && hasTextBlock(message)) {
-      state.assistantTextCount++;
+    if (message.type === "assistant") {
+      if (hasTextBlock(message)) state.assistantTextCount++;
+      trackSkillInvocation(message);
     }
   }
@@ -293,6 +295,20 @@ export function hasTextBlock(message) {
   return false;
 }
+function trackSkillInvocation(message) {
+  const content = message.message?.content ?? message.content;
+  if (!Array.isArray(content)) return;
+  for (const block of content) {
+    if (
+      block.type === "tool_use" &&
+      block.name === "Skill" &&
+      block.input?.skill
+    ) {
+      process.env.LIBEVAL_SKILL = block.input.skill;
+    }
+  }
+}
 /**
  * Factory function — wires real dependencies.
  * @param {object} deps - Same as AgentRunner constructor

package/src/commands/facilitate.js CHANGED Viewed

@@ -73,6 +73,10 @@ export async function runFacilitateCommand(values, _args) {
       })
     : process.stdout;
+  if (opts.facilitatorProfile) {
+    process.env.LIBEVAL_AGENT_PROFILE = opts.facilitatorProfile;
+  }
   const { query } = await import("@anthropic-ai/claude-agent-sdk");
   const facilitator = createFacilitator({
     facilitatorCwd: opts.facilitatorCwd,

package/src/commands/run.js CHANGED Viewed

@@ -78,6 +78,10 @@ export async function runRunCommand(values, _args) {
     );
   };
+  if (agentProfile) {
+    process.env.LIBEVAL_AGENT_PROFILE = agentProfile;
+  }
   const systemPrompt = agentProfile
     ? composeProfilePrompt(agentProfile, {
         profilesDir: resolve(cwd, ".claude/agents"),

package/src/commands/supervise.js CHANGED Viewed

@@ -71,6 +71,10 @@ export async function runSuperviseCommand(values, _args) {
       })
     : process.stdout;
+  if (opts.agentProfile) {
+    process.env.LIBEVAL_AGENT_PROFILE = opts.agentProfile;
+  }
   const { query } = await import("@anthropic-ai/claude-agent-sdk");
   const supervisor = createSupervisor({
     supervisorCwd: opts.supervisorCwd,

package/src/facilitator.js CHANGED Viewed

@@ -36,6 +36,7 @@ export const FACILITATED_AGENT_SYSTEM_PROMPT =
   "Announce broadcasts a message. " +
   "RollCall lists participants.";
+/** Orchestrate N agent sessions coordinated by a single facilitator LLM session. */
 export class Facilitator {
   /**
    * @param {object} deps
@@ -180,42 +181,48 @@ export class Facilitator {
     let messages = this.messageBus.drain(agent.name);
     if (messages.length === 0) return;
-    this.emitOrchestratorEvent({
-      type: "agent_start",
-      agent: agent.name,
-    });
+    this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
     await agent.runner.run(formatMessages(messages));
-    if (this.ctx.concluded) return;
+    if (await this.#settleAgentTurn(agent)) return;
+    // Loop: check for new messages, resume if any
+    while (!this.ctx.concluded) {
+      messages = await this.#awaitAgentMessages(agent.name);
+      if (messages.length === 0) break;
+      await agent.runner.resume(formatMessages(messages));
+      if (await this.#settleAgentTurn(agent)) break;
+    }
+  }
+  /**
+   * Enforce pending-ask and emit turn_complete. Returns true when the
+   * session has concluded and the caller should stop.
+   */
+  async #settleAgentTurn(agent) {
+    if (this.ctx.concluded) return true;
     await this.#enforcePendingAsk(agent);
-    if (this.ctx.concluded) return;
+    if (this.ctx.concluded) return true;
     this.eventQueue.enqueue({
       type: "lifecycle",
       agent: agent.name,
       status: "turn_complete",
     });
+    return false;
+  }
-    // Loop: check for new messages, resume if any
-    while (!this.ctx.concluded) {
-      messages = this.messageBus.drain(agent.name);
-      if (messages.length === 0) {
-        await Promise.race([
-          this.messageBus.waitForMessages(agent.name),
-          this.concludePromise,
-        ]);
-        if (this.ctx.concluded) break;
-        messages = this.messageBus.drain(agent.name);
-        if (messages.length === 0) break;
-      }
-      await agent.runner.resume(formatMessages(messages));
-      if (this.ctx.concluded) break;
-      await this.#enforcePendingAsk(agent);
-      if (this.ctx.concluded) break;
-      this.eventQueue.enqueue({
-        type: "lifecycle",
-        agent: agent.name,
-        status: "turn_complete",
-      });
-    }
+  /**
+   * Wait for messages addressed to `name`, returning an empty array when
+   * the session concludes first.
+   */
+  async #awaitAgentMessages(name) {
+    const messages = this.messageBus.drain(name);
+    if (messages.length > 0) return messages;
+    await Promise.race([
+      this.messageBus.waitForMessages(name),
+      this.concludePromise,
+    ]);
+    if (this.ctx.concluded) return [];
+    return this.messageBus.drain(name);
   }
   /**
@@ -290,6 +297,7 @@ export class Facilitator {
     }
   }
+  /** Return the last assistant text block from a runner's buffer, or the fallback if none exists. */
   extractLastText(runner, fallback) {
     const lines = runner.buffer;
     for (let i = lines.length - 1; i >= 0; i--) {

package/src/message-bus.js CHANGED Viewed

@@ -12,6 +12,7 @@
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
  */
+/** In-memory per-participant message queues for facilitated and supervised orchestration modes. */
 export class MessageBus {
   /**
    * @param {object} deps

package/src/orchestration-toolkit.js CHANGED Viewed

@@ -37,6 +37,7 @@ export function createOrchestrationContext() {
 // --- Handler factories ---
+/** Create a handler that marks the session as concluded and records the summary. */
 export function createConcludeHandler(ctx) {
   return async ({ summary }) => {
     ctx.concluded = true;
@@ -45,6 +46,7 @@ export function createConcludeHandler(ctx) {
   };
 }
+/** Create a handler that queues a redirect to interrupt a participant with replacement instructions. */
 export function createRedirectHandler(ctx) {
   return async ({ message, to }) => {
     ctx.redirect = { message, to: to ?? null };
@@ -52,6 +54,7 @@ export function createRedirectHandler(ctx) {
   };
 }
+/** Create a handler that returns the list of all session participants and their roles. */
 export function createRollCallHandler(ctx) {
   return async () => {
     return {

package/src/orchestrator-helpers.js CHANGED Viewed

@@ -5,6 +5,7 @@
  * - `formatMessages`    — render a drained message batch as tagged lines.
  */
+/** Create a promise-based async queue for serializing event delivery to the facilitator loop. */
 export function createAsyncQueue() {
   const items = [];
   let waiter = null;

package/src/render/turn-renderer.js ADDED Viewed

@@ -0,0 +1,92 @@
+/**
+ * Turn renderer — maps a structured turn into formatted text lines.
+ *
+ * Shared by `TeeWriter.flushTurns()` (live stream) and
+ * `TraceCollector.toText()` (offline replay) so both emit identical output
+ * (spec 540).
+ */
+import {
+  renderTextLine,
+  renderToolCallLine,
+  renderToolResultLine,
+} from "./line-renderer.js";
+import {
+  hintForCall,
+  previewForResult,
+  simplifyToolName,
+} from "./tool-hints.js";
+/**
+ * Render a single turn to formatted text lines.
+ *
+ * @param {object} turn - Structured turn object
+ * @param {boolean} withPrefix - Whether to include source labels
+ * @returns {string[]} Array of rendered line strings
+ */
+export function renderTurnLines(turn, withPrefix) {
+  if (turn.role === "assistant") return renderAssistantTurn(turn, withPrefix);
+  if (turn.role === "tool_result")
+    return renderToolResultTurn(turn, withPrefix);
+  if (turn.role === "system") return renderSystemTurn(turn, withPrefix);
+  if (turn.role === "user") return renderUserTurn(turn, withPrefix);
+  return [];
+}
+/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
+function renderAssistantTurn(turn, withPrefix) {
+  const lines = [];
+  for (const block of turn.content) {
+    if (block.type === "text") {
+      lines.push(
+        renderTextLine({ source: turn.source, text: block.text, withPrefix }),
+      );
+    } else if (block.type === "tool_use") {
+      lines.push(
+        renderToolCallLine({
+          source: turn.source,
+          toolName: simplifyToolName(block.name),
+          hint: hintForCall(block.name, block.input),
+          withPrefix,
+        }),
+      );
+    }
+  }
+  return lines;
+}
+/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
+function renderToolResultTurn(turn, withPrefix) {
+  return [
+    renderToolResultLine({
+      source: turn.source,
+      preview: previewForResult(turn.content, turn.isError),
+      withPrefix,
+    }),
+  ];
+}
+/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
+function renderSystemTurn(turn, withPrefix) {
+  const label = turn.subtype ?? "system";
+  return [
+    renderTextLine({ source: turn.source, text: `[${label}]`, withPrefix }),
+  ];
+}
+/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
+function renderUserTurn(turn, withPrefix) {
+  const lines = [];
+  for (const block of turn.content) {
+    if (block.type === "text") {
+      lines.push(
+        renderTextLine({
+          source: turn.source,
+          text: `[user] ${block.text}`,
+          withPrefix,
+        }),
+      );
+    }
+  }
+  return lines;
+}

package/src/sequence-counter.js CHANGED Viewed

@@ -2,16 +2,20 @@
  * SequenceCounter — global monotonic counter shared across all participants
  * in a session. Single-threaded JS means no synchronization needed.
  */
+/** Monotonic counter that assigns globally ordered sequence numbers within a session. */
 export class SequenceCounter {
+  /** Initialize the counter at zero. */
   constructor() {
     this.value = 0;
   }
+  /** Return the current value and advance the counter by one. */
   next() {
     return this.value++;
   }
 }
+/** Create a new SequenceCounter starting at zero. */
 export function createSequenceCounter() {
   return new SequenceCounter();
 }

package/src/supervisor.js CHANGED Viewed

@@ -4,8 +4,9 @@
  * introduces itself, and delegates work to the agent. The loop then alternates:
  * agent → supervisor → agent.
  *
- * Signaling uses orchestration tools (Ask / Answer / Announce / Redirect /
- * Conclude) via in-process MCP servers. The Ask/Answer contract is enforced
+ * Signaling uses orchestration tools (Ask / Announce / Redirect / Conclude)
+ * via in-process MCP servers; the supervisor has no Answer tool — agent replies
+ * are routed back through the relay loop. The Ask/Answer contract is enforced
  * at turn boundaries: an unanswered Ask triggers one synthetic reminder and
  * then a `protocol_violation` trace event plus a null-answer injection so the
  * session advances without silent deadlock.
@@ -52,6 +53,7 @@ export const AGENT_SYSTEM_PROMPT =
  */
 const MAX_INTERVENTIONS_PER_TURN = 5;
+/** Orchestrate a relay loop between a supervisor LLM and an agent LLM with mid-turn review. */
 export class Supervisor {
   /**
    * @param {object} deps
@@ -172,39 +174,26 @@ export class Supervisor {
           : await this.agentRunner.run(relay);
         agentCalled = true;
-        if (agentResult.error && !agentResult.aborted) {
-          this.emitSummary({ success: false, turns: turn });
-          return { exit: { success: false, turns: turn } };
-        }
+        const outcome = this.#classifyAgentOutcome(
+          agentResult,
+          turn,
+          interventions,
+        );
-        if (this.ctx.concluded) {
-          this.emitSummary({
-            success: true,
-            turns: turn,
-            summary: this.ctx.summary,
-          });
-          return { exit: { success: true, turns: turn } };
-        }
+        if (outcome.type === "exit") return { exit: outcome.exit };
+        if (outcome.type === "intervention_limit") return { exit: null };
-        if (agentResult.aborted && this.ctx.redirect) {
+        if (outcome.type === "redirect") {
           interventions++;
-          const redirect = this.ctx.redirect;
-          this.ctx.redirect = null;
-          if (interventions >= MAX_INTERVENTIONS_PER_TURN) {
-            this.emitOrchestratorEvent({ type: "intervention_limit", turn });
-            return { exit: null };
-          }
-          relay = redirect.message;
+          relay = outcome.relay;
           this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
           continue;
         }
-        if (this.#checkAsk("agent") === "recheck" && !this.ctx.concluded) {
-          const reminders = this.messageBus.drain("agent");
-          if (reminders.length > 0) {
-            relay = formatMessages(reminders);
-            continue;
-          }
+        const askRelay = this.#drainAgentAskRelay();
+        if (askRelay) {
+          relay = askRelay;
+          continue;
         }
         return { exit: null };
@@ -214,6 +203,50 @@ export class Supervisor {
     }
   }
+  /**
+   * Classify the outcome of a single agent execution within #runAgentTurn.
+   * @returns {{type: string, exit?: object|null, relay?: string}}
+   */
+  #classifyAgentOutcome(agentResult, turn, interventions) {
+    if (agentResult.error && !agentResult.aborted) {
+      this.emitSummary({ success: false, turns: turn });
+      return { type: "exit", exit: { success: false, turns: turn } };
+    }
+    if (this.ctx.concluded) {
+      this.emitSummary({
+        success: true,
+        turns: turn,
+        summary: this.ctx.summary,
+      });
+      return { type: "exit", exit: { success: true, turns: turn } };
+    }
+    if (agentResult.aborted && this.ctx.redirect) {
+      const redirect = this.ctx.redirect;
+      this.ctx.redirect = null;
+      if (interventions + 1 >= MAX_INTERVENTIONS_PER_TURN) {
+        this.emitOrchestratorEvent({ type: "intervention_limit", turn });
+        return { type: "intervention_limit" };
+      }
+      return { type: "redirect", relay: redirect.message };
+    }
+    return { type: "continue" };
+  }
+  /**
+   * If the agent has an unanswered ask, drain reminders and return a
+   * formatted relay string. Returns null when no relay is needed.
+   * @returns {string|null}
+   */
+  #drainAgentAskRelay() {
+    if (this.#checkAsk("agent") !== "recheck" || this.ctx.concluded)
+      return null;
+    const reminders = this.messageBus.drain("agent");
+    return reminders.length > 0 ? formatMessages(reminders) : null;
+  }
   /**
    * Mid-turn supervisor review fired from inside the agent's onBatch hook.
    * Runs the supervisor's LLM against the batch and aborts the agent if

package/src/tee-writer.js CHANGED Viewed

@@ -17,18 +17,10 @@
 import { Writable } from "node:stream";
 import { TraceCollector } from "./trace-collector.js";
-import {
-  renderTextLine,
-  renderToolCallLine,
-  renderToolResultLine,
-} from "./render/line-renderer.js";
-import {
-  hintForCall,
-  previewForResult,
-  simplifyToolName,
-} from "./render/tool-hints.js";
+import { renderTurnLines } from "./render/turn-renderer.js";
 import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
+/** Writable stream that saves raw NDJSON to a file while streaming human-readable text to a display stream. */
 export class TeeWriter extends Writable {
   /**
    * @param {object} deps
@@ -134,56 +126,8 @@ export class TeeWriter extends Writable {
     const withPrefix = this.mode !== "raw";
     while (this.turnsEmitted < turns.length) {
       const turn = turns[this.turnsEmitted++];
-      if (turn.role === "assistant") {
-        for (const block of turn.content) {
-          if (block.type === "text") {
-            this.textStream.write(
-              renderTextLine({
-                source: turn.source,
-                text: block.text,
-                withPrefix,
-              }),
-            );
-          } else if (block.type === "tool_use") {
-            this.textStream.write(
-              renderToolCallLine({
-                source: turn.source,
-                toolName: simplifyToolName(block.name),
-                hint: hintForCall(block.name, block.input),
-                withPrefix,
-              }),
-            );
-          }
-        }
-      } else if (turn.role === "tool_result") {
-        this.textStream.write(
-          renderToolResultLine({
-            source: turn.source,
-            preview: previewForResult(turn.content, turn.isError),
-            withPrefix,
-          }),
-        );
-      } else if (turn.role === "system") {
-        const label = turn.subtype ?? "system";
-        this.textStream.write(
-          renderTextLine({
-            source: turn.source,
-            text: `[${label}]`,
-            withPrefix,
-          }),
-        );
-      } else if (turn.role === "user") {
-        for (const block of turn.content) {
-          if (block.type === "text") {
-            this.textStream.write(
-              renderTextLine({
-                source: turn.source,
-                text: `[user] ${block.text}`,
-                withPrefix,
-              }),
-            );
-          }
-        }
+      for (const line of renderTurnLines(turn, withPrefix)) {
+        this.textStream.write(line);
       }
     }
   }

package/src/trace-collector.js CHANGED Viewed

@@ -9,18 +9,10 @@
  * one formatting path (spec 540).
  */
-import {
-  renderTextLine,
-  renderToolCallLine,
-  renderToolResultLine,
-} from "./render/line-renderer.js";
-import {
-  hintForCall,
-  previewForResult,
-  simplifyToolName,
-} from "./render/tool-hints.js";
+import { renderTurnLines } from "./render/turn-renderer.js";
 import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
+/** Accumulate Claude Code NDJSON stream events into structured traces for analysis or text replay. */
 export class TraceCollector {
   /**
    * @param {object} [deps]
@@ -270,68 +262,10 @@ export class TraceCollector {
     const out = [];
     for (const turn of this.turns) {
-      if (turn.role === "assistant") {
-        for (const block of turn.content) {
-          if (block.type === "text") {
-            out.push(
-              renderTextLine({
-                source: turn.source,
-                text: block.text,
-                withPrefix,
-              }),
-            );
-          } else if (block.type === "tool_use") {
-            out.push(
-              renderToolCallLine({
-                source: turn.source,
-                toolName: simplifyToolName(block.name),
-                hint: hintForCall(block.name, block.input),
-                withPrefix,
-              }),
-            );
-          }
-        }
-      } else if (turn.role === "tool_result") {
-        out.push(
-          renderToolResultLine({
-            source: turn.source,
-            preview: previewForResult(turn.content, turn.isError),
-            withPrefix,
-          }),
-        );
-      } else if (turn.role === "system") {
-        const label = turn.subtype ?? "system";
-        out.push(
-          renderTextLine({
-            source: turn.source,
-            text: `[${label}]`,
-            withPrefix,
-          }),
-        );
-      } else if (turn.role === "user") {
-        for (const block of turn.content) {
-          if (block.type === "text") {
-            out.push(
-              renderTextLine({
-                source: turn.source,
-                text: `[user] ${block.text}`,
-                withPrefix,
-              }),
-            );
-          }
-        }
-      }
+      out.push(...renderTurnLines(turn, withPrefix));
     }
-    // Trailing result block — the one summary line humans want (spec 540).
-    let tail = "";
-    if (this.result) {
-      const duration = formatDuration(this.result.durationMs);
-      const cost = Number(this.result.totalCostUsd).toFixed(4);
-      tail =
-        "\n" +
-        `--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`;
-    }
+    const tail = this.#formatResultTail();
     // Each rendered line already ends with `\n`; concatenate, drop the
     // trailing newline, then append the tail so the output shape stays
@@ -341,6 +275,20 @@ export class TraceCollector {
     const body = out.join("").replace(/\n$/, "");
     return body + tail;
   }
+  /**
+   * Format the trailing result summary line (spec 540).
+   * @returns {string}
+   */
+  #formatResultTail() {
+    if (!this.result) return "";
+    const duration = formatDuration(this.result.durationMs);
+    const cost = Number(this.result.totalCostUsd).toFixed(4);
+    return (
+      "\n" +
+      `--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`
+    );
+  }
 }
 /**

package/src/trace-github.js CHANGED Viewed

@@ -48,7 +48,6 @@ export class TraceGitHub {
     const data = await this.#get(url);
     const runs = data.workflow_runs ?? [];
-    // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
     const re = new RegExp(pattern, "i");
     return runs
       .filter((r) => re.test(r.name))

package/src/trace-query.js CHANGED Viewed

@@ -81,24 +81,12 @@ export class TraceQuery {
    */
   filter(opts = {}) {
     const { role, toolName, isError } = opts;
-    return this.turns.filter((turn) => {
-      if (role !== undefined && turn.role !== role) return false;
-      if (isError !== undefined) {
-        if (turn.role !== "tool_result") return false;
-        if (turn.isError !== isError) return false;
-      }
-      if (toolName !== undefined) {
-        if (turn.role === "assistant") {
-          const has = turn.content.some(
-            (b) => b.type === "tool_use" && b.name === toolName,
-          );
-          if (!has) return false;
-        } else {
-          return false;
-        }
-      }
-      return true;
-    });
+    return this.turns.filter(
+      (turn) =>
+        matchesRole(turn, role) &&
+        matchesError(turn, isError) &&
+        matchesToolName(turn, toolName),
+    );
   }
   /** @returns {number} */
@@ -151,7 +139,6 @@ export class TraceQuery {
    */
   search(pattern, opts = {}) {
     const { context = 0, limit = 50, full = false } = opts;
-    // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
     const re = new RegExp(pattern, "gi");
     const hits = [];
@@ -200,30 +187,18 @@ export class TraceQuery {
    * @returns {object[]}
    */
   tool(name) {
-    const toolUseIds = new Set();
-    const results = [];
-    for (const turn of this.turns) {
-      if (turn.role === "assistant") {
-        const hasTool = turn.content.some(
-          (b) => b.type === "tool_use" && b.name === name,
-        );
-        if (hasTool) {
-          results.push(turn);
-          for (const b of turn.content) {
-            if (b.type === "tool_use" && b.name === name && b.toolUseId) {
-              toolUseIds.add(b.toolUseId);
-            }
-          }
-        }
-      } else if (
-        turn.role === "tool_result" &&
-        toolUseIds.has(turn.toolUseId)
-      ) {
-        results.push(turn);
-      }
-    }
-    return results;
+    const toolUseIds = collectToolUseIds(this.turns, name);
+    const assistantTurns = this.turns.filter(
+      (t) =>
+        t.role === "assistant" &&
+        t.content.some((b) => b.type === "tool_use" && b.name === name),
+    );
+    const resultTurns = this.turns.filter(
+      (t) => t.role === "tool_result" && toolUseIds.has(t.toolUseId),
+    );
+    return [...assistantTurns, ...resultTurns].sort(
+      (a, b) => a.index - b.index,
+    );
   }
   /**
@@ -343,6 +318,57 @@ export class TraceQuery {
   }
 }
+/**
+ * @param {object} turn
+ * @param {string|undefined} role
+ * @returns {boolean}
+ */
+function matchesRole(turn, role) {
+  return role === undefined || turn.role === role;
+}
+/**
+ * @param {object} turn
+ * @param {boolean|undefined} isError
+ * @returns {boolean}
+ */
+function matchesError(turn, isError) {
+  if (isError === undefined) return true;
+  return turn.role === "tool_result" && turn.isError === isError;
+}
+/**
+ * @param {object} turn
+ * @param {string|undefined} toolName
+ * @returns {boolean}
+ */
+function matchesToolName(turn, toolName) {
+  if (toolName === undefined) return true;
+  return (
+    turn.role === "assistant" &&
+    turn.content.some((b) => b.type === "tool_use" && b.name === toolName)
+  );
+}
+/**
+ * Collect all toolUseIds for a given tool name from assistant turns.
+ * @param {object[]} turns
+ * @param {string} name
+ * @returns {Set<string>}
+ */
+function collectToolUseIds(turns, name) {
+  const ids = new Set();
+  for (const turn of turns) {
+    if (turn.role !== "assistant") continue;
+    for (const b of turn.content) {
+      if (b.type === "tool_use" && b.name === name && b.toolUseId) {
+        ids.add(b.toolUseId);
+      }
+    }
+  }
+  return ids;
+}
 /**
  * Search a single turn for regex matches. Returns array of match descriptions.
  * @param {object} turn