@forwardimpact/libeval 0.1.26 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,11 @@
1
1
  # libeval
2
2
 
3
- Process Claude Code stream-json output into structured traces.
3
+ <!-- BEGIN:description Do not edit. Generated from package.json. -->
4
+
5
+ Agent evaluation framework — prove whether agent changes improved outcomes with
6
+ reproducible evidence.
7
+
8
+ <!-- END:description -->
4
9
 
5
10
  ## Getting Started
6
11
 
package/bin/fit-eval.js CHANGED
@@ -177,20 +177,20 @@ const definition = {
177
177
  ],
178
178
  documentation: [
179
179
  {
180
- title: "Agent Evaluations",
181
- url: "https://www.forwardimpact.team/docs/libraries/agent-evaluations/index.md",
180
+ title: "Run an Eval",
181
+ url: "https://www.forwardimpact.team/docs/libraries/prove-changes/run-eval/index.md",
182
182
  description:
183
183
  "Author a judge profile, run an eval locally, wire it into CI, and inspect the resulting trace.",
184
184
  },
185
185
  {
186
- title: "Agent Collaboration",
187
- url: "https://www.forwardimpact.team/docs/libraries/agent-collaboration/index.md",
186
+ title: "Prove Agent Changes",
187
+ url: "https://www.forwardimpact.team/docs/libraries/prove-changes/index.md",
188
188
  description:
189
- "Author a facilitator and participant profiles, run a multi-agent session, and read the message flow.",
189
+ "End-to-end workflow from dataset generation through evaluation to trace analysis, including multi-agent collaboration sessions.",
190
190
  },
191
191
  {
192
- title: "Trace Analysis",
193
- url: "https://www.forwardimpact.team/docs/libraries/trace-analysis/index.md",
192
+ title: "Analyze Traces",
193
+ url: "https://www.forwardimpact.team/docs/libraries/prove-changes/trace-analysis/index.md",
194
194
  description:
195
195
  "Read the NDJSON traces produced by `fit-eval` with `fit-trace` — grounded-theory method and worked examples.",
196
196
  },
package/bin/fit-trace.js CHANGED
@@ -214,22 +214,22 @@ const definition = {
214
214
  ],
215
215
  documentation: [
216
216
  {
217
- title: "Trace Analysis",
218
- url: "https://www.forwardimpact.team/docs/libraries/trace-analysis/index.md",
217
+ title: "Analyze Traces",
218
+ url: "https://www.forwardimpact.team/docs/libraries/prove-changes/trace-analysis/index.md",
219
219
  description:
220
220
  "The full method walkthrough with worked examples (an eval that failed, a multi-agent session that stalled).",
221
221
  },
222
222
  {
223
- title: "Agent Evaluations",
224
- url: "https://www.forwardimpact.team/docs/libraries/agent-evaluations/index.md",
223
+ title: "Run an Eval",
224
+ url: "https://www.forwardimpact.team/docs/libraries/prove-changes/run-eval/index.md",
225
225
  description:
226
226
  "How `fit-eval supervise` produces the traces this skill analyzes.",
227
227
  },
228
228
  {
229
- title: "Agent Collaboration",
230
- url: "https://www.forwardimpact.team/docs/libraries/agent-collaboration/index.md",
229
+ title: "Prove Agent Changes",
230
+ url: "https://www.forwardimpact.team/docs/libraries/prove-changes/index.md",
231
231
  description:
232
- "How `fit-eval facilitate` produces multi-agent traces; `split` is the bridge into per-source files.",
232
+ "End-to-end workflow including multi-agent collaboration; `split` is the bridge into per-source trace files.",
233
233
  },
234
234
  ],
235
235
  };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.26",
4
- "description": "Agent evaluation: collect Claude Code traces, run agent loops, supervise multi-step workflows.",
3
+ "version": "0.1.28",
4
+ "description": "Agent evaluation framework prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",
7
7
  "agent",
@@ -9,16 +9,24 @@
9
9
  "claude-code",
10
10
  "supervisor"
11
11
  ],
12
- "forwardimpact": {
13
- "capability": "agent-self-improvement",
14
- "needs": [
15
- "Parse and query Claude Code trace NDJSON files",
16
- "Drive an LLM agent through a scripted run and capture its trace",
17
- "Supervise a multi-step or multi-agent workflow"
18
- ]
12
+ "homepage": "https://www.forwardimpact.team",
13
+ "repository": {
14
+ "type": "git",
15
+ "url": "git+https://github.com/forwardimpact/monorepo.git",
16
+ "directory": "libraries/libeval"
19
17
  },
20
18
  "license": "Apache-2.0",
21
19
  "author": "D. Olsson <hi@senzilla.io>",
20
+ "jobs": [
21
+ {
22
+ "user": "Platform Builders",
23
+ "goal": "Prove Agent Changes",
24
+ "trigger": "An eval passes locally but fails in CI and the only output is 'assertion failed.'",
25
+ "bigHire": "prove whether agent changes improved outcomes with reproducible evidence.",
26
+ "littleHire": "run an eval and get a trace that shows exactly what the agent did.",
27
+ "competesWith": "manual before/after comparison; trusting gut feeling over evidence; skipping evaluation entirely"
28
+ }
29
+ ],
22
30
  "type": "module",
23
31
  "main": "./src/index.js",
24
32
  "exports": {
@@ -35,10 +43,6 @@
35
43
  "bin/**/*.js",
36
44
  "README.md"
37
45
  ],
38
- "engines": {
39
- "bun": ">=1.2.0",
40
- "node": ">=18.0.0"
41
- },
42
46
  "scripts": {
43
47
  "test": "bun test test/*.test.js"
44
48
  },
@@ -52,6 +56,10 @@
52
56
  "devDependencies": {
53
57
  "@forwardimpact/libharness": "^0.1.14"
54
58
  },
59
+ "engines": {
60
+ "bun": ">=1.2.0",
61
+ "node": ">=18.0.0"
62
+ },
55
63
  "publishConfig": {
56
64
  "access": "public"
57
65
  }
@@ -32,6 +32,7 @@ function applyDefaults(deps) {
32
32
  };
33
33
  }
34
34
 
35
+ /** Run a single Claude Agent SDK session and emit raw NDJSON events to an output stream. */
35
36
  export class AgentRunner {
36
37
  /**
37
38
  * @param {object} deps
@@ -211,8 +212,9 @@ export class AgentRunner {
211
212
  if (message.type === "system" && message.subtype === "init") {
212
213
  this.sessionId = message.session_id;
213
214
  }
214
- if (message.type === "assistant" && hasTextBlock(message)) {
215
- state.assistantTextCount++;
215
+ if (message.type === "assistant") {
216
+ if (hasTextBlock(message)) state.assistantTextCount++;
217
+ trackSkillInvocation(message);
216
218
  }
217
219
  }
218
220
 
@@ -293,6 +295,20 @@ export function hasTextBlock(message) {
293
295
  return false;
294
296
  }
295
297
 
298
+ function trackSkillInvocation(message) {
299
+ const content = message.message?.content ?? message.content;
300
+ if (!Array.isArray(content)) return;
301
+ for (const block of content) {
302
+ if (
303
+ block.type === "tool_use" &&
304
+ block.name === "Skill" &&
305
+ block.input?.skill
306
+ ) {
307
+ process.env.LIBEVAL_SKILL = block.input.skill;
308
+ }
309
+ }
310
+ }
311
+
296
312
  /**
297
313
  * Factory function — wires real dependencies.
298
314
  * @param {object} deps - Same as AgentRunner constructor
@@ -73,6 +73,10 @@ export async function runFacilitateCommand(values, _args) {
73
73
  })
74
74
  : process.stdout;
75
75
 
76
+ if (opts.facilitatorProfile) {
77
+ process.env.LIBEVAL_AGENT_PROFILE = opts.facilitatorProfile;
78
+ }
79
+
76
80
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
77
81
  const facilitator = createFacilitator({
78
82
  facilitatorCwd: opts.facilitatorCwd,
@@ -78,6 +78,10 @@ export async function runRunCommand(values, _args) {
78
78
  );
79
79
  };
80
80
 
81
+ if (agentProfile) {
82
+ process.env.LIBEVAL_AGENT_PROFILE = agentProfile;
83
+ }
84
+
81
85
  const systemPrompt = agentProfile
82
86
  ? composeProfilePrompt(agentProfile, {
83
87
  profilesDir: resolve(cwd, ".claude/agents"),
@@ -71,6 +71,10 @@ export async function runSuperviseCommand(values, _args) {
71
71
  })
72
72
  : process.stdout;
73
73
 
74
+ if (opts.agentProfile) {
75
+ process.env.LIBEVAL_AGENT_PROFILE = opts.agentProfile;
76
+ }
77
+
74
78
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
75
79
  const supervisor = createSupervisor({
76
80
  supervisorCwd: opts.supervisorCwd,
@@ -36,6 +36,7 @@ export const FACILITATED_AGENT_SYSTEM_PROMPT =
36
36
  "Announce broadcasts a message. " +
37
37
  "RollCall lists participants.";
38
38
 
39
+ /** Orchestrate N agent sessions coordinated by a single facilitator LLM session. */
39
40
  export class Facilitator {
40
41
  /**
41
42
  * @param {object} deps
@@ -180,42 +181,48 @@ export class Facilitator {
180
181
  let messages = this.messageBus.drain(agent.name);
181
182
  if (messages.length === 0) return;
182
183
 
183
- this.emitOrchestratorEvent({
184
- type: "agent_start",
185
- agent: agent.name,
186
- });
184
+ this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
187
185
  await agent.runner.run(formatMessages(messages));
188
- if (this.ctx.concluded) return;
186
+ if (await this.#settleAgentTurn(agent)) return;
187
+
188
+ // Loop: check for new messages, resume if any
189
+ while (!this.ctx.concluded) {
190
+ messages = await this.#awaitAgentMessages(agent.name);
191
+ if (messages.length === 0) break;
192
+ await agent.runner.resume(formatMessages(messages));
193
+ if (await this.#settleAgentTurn(agent)) break;
194
+ }
195
+ }
196
+
197
+ /**
198
+ * Enforce pending-ask and emit turn_complete. Returns true when the
199
+ * session has concluded and the caller should stop.
200
+ */
201
+ async #settleAgentTurn(agent) {
202
+ if (this.ctx.concluded) return true;
189
203
  await this.#enforcePendingAsk(agent);
190
- if (this.ctx.concluded) return;
204
+ if (this.ctx.concluded) return true;
191
205
  this.eventQueue.enqueue({
192
206
  type: "lifecycle",
193
207
  agent: agent.name,
194
208
  status: "turn_complete",
195
209
  });
210
+ return false;
211
+ }
196
212
 
197
- // Loop: check for new messages, resume if any
198
- while (!this.ctx.concluded) {
199
- messages = this.messageBus.drain(agent.name);
200
- if (messages.length === 0) {
201
- await Promise.race([
202
- this.messageBus.waitForMessages(agent.name),
203
- this.concludePromise,
204
- ]);
205
- if (this.ctx.concluded) break;
206
- messages = this.messageBus.drain(agent.name);
207
- if (messages.length === 0) break;
208
- }
209
- await agent.runner.resume(formatMessages(messages));
210
- if (this.ctx.concluded) break;
211
- await this.#enforcePendingAsk(agent);
212
- if (this.ctx.concluded) break;
213
- this.eventQueue.enqueue({
214
- type: "lifecycle",
215
- agent: agent.name,
216
- status: "turn_complete",
217
- });
218
- }
213
+ /**
214
+ * Wait for messages addressed to `name`, returning an empty array when
215
+ * the session concludes first.
216
+ */
217
+ async #awaitAgentMessages(name) {
218
+ const messages = this.messageBus.drain(name);
219
+ if (messages.length > 0) return messages;
220
+ await Promise.race([
221
+ this.messageBus.waitForMessages(name),
222
+ this.concludePromise,
223
+ ]);
224
+ if (this.ctx.concluded) return [];
225
+ return this.messageBus.drain(name);
219
226
  }
220
227
 
221
228
  /**
@@ -290,6 +297,7 @@ export class Facilitator {
290
297
  }
291
298
  }
292
299
 
300
+ /** Return the last assistant text block from a runner's buffer, or the fallback if none exists. */
293
301
  extractLastText(runner, fallback) {
294
302
  const lines = runner.buffer;
295
303
  for (let i = lines.length - 1; i >= 0; i--) {
@@ -12,6 +12,7 @@
12
12
  * Follows OO+DI: constructor injection, factory function, tests bypass factory.
13
13
  */
14
14
 
15
+ /** In-memory per-participant message queues for facilitated and supervised orchestration modes. */
15
16
  export class MessageBus {
16
17
  /**
17
18
  * @param {object} deps
@@ -37,6 +37,7 @@ export function createOrchestrationContext() {
37
37
 
38
38
  // --- Handler factories ---
39
39
 
40
+ /** Create a handler that marks the session as concluded and records the summary. */
40
41
  export function createConcludeHandler(ctx) {
41
42
  return async ({ summary }) => {
42
43
  ctx.concluded = true;
@@ -45,6 +46,7 @@ export function createConcludeHandler(ctx) {
45
46
  };
46
47
  }
47
48
 
49
+ /** Create a handler that queues a redirect to interrupt a participant with replacement instructions. */
48
50
  export function createRedirectHandler(ctx) {
49
51
  return async ({ message, to }) => {
50
52
  ctx.redirect = { message, to: to ?? null };
@@ -52,6 +54,7 @@ export function createRedirectHandler(ctx) {
52
54
  };
53
55
  }
54
56
 
57
+ /** Create a handler that returns the list of all session participants and their roles. */
55
58
  export function createRollCallHandler(ctx) {
56
59
  return async () => {
57
60
  return {
@@ -5,6 +5,7 @@
5
5
  * - `formatMessages` — render a drained message batch as tagged lines.
6
6
  */
7
7
 
8
+ /** Create a promise-based async queue for serializing event delivery to the facilitator loop. */
8
9
  export function createAsyncQueue() {
9
10
  const items = [];
10
11
  let waiter = null;
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Turn renderer — maps a structured turn into formatted text lines.
3
+ *
4
+ * Shared by `TeeWriter.flushTurns()` (live stream) and
5
+ * `TraceCollector.toText()` (offline replay) so both emit identical output
6
+ * (spec 540).
7
+ */
8
+
9
+ import {
10
+ renderTextLine,
11
+ renderToolCallLine,
12
+ renderToolResultLine,
13
+ } from "./line-renderer.js";
14
+ import {
15
+ hintForCall,
16
+ previewForResult,
17
+ simplifyToolName,
18
+ } from "./tool-hints.js";
19
+
20
+ /**
21
+ * Render a single turn to formatted text lines.
22
+ *
23
+ * @param {object} turn - Structured turn object
24
+ * @param {boolean} withPrefix - Whether to include source labels
25
+ * @returns {string[]} Array of rendered line strings
26
+ */
27
+ export function renderTurnLines(turn, withPrefix) {
28
+ if (turn.role === "assistant") return renderAssistantTurn(turn, withPrefix);
29
+ if (turn.role === "tool_result")
30
+ return renderToolResultTurn(turn, withPrefix);
31
+ if (turn.role === "system") return renderSystemTurn(turn, withPrefix);
32
+ if (turn.role === "user") return renderUserTurn(turn, withPrefix);
33
+ return [];
34
+ }
35
+
36
+ /** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
37
+ function renderAssistantTurn(turn, withPrefix) {
38
+ const lines = [];
39
+ for (const block of turn.content) {
40
+ if (block.type === "text") {
41
+ lines.push(
42
+ renderTextLine({ source: turn.source, text: block.text, withPrefix }),
43
+ );
44
+ } else if (block.type === "tool_use") {
45
+ lines.push(
46
+ renderToolCallLine({
47
+ source: turn.source,
48
+ toolName: simplifyToolName(block.name),
49
+ hint: hintForCall(block.name, block.input),
50
+ withPrefix,
51
+ }),
52
+ );
53
+ }
54
+ }
55
+ return lines;
56
+ }
57
+
58
+ /** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
59
+ function renderToolResultTurn(turn, withPrefix) {
60
+ return [
61
+ renderToolResultLine({
62
+ source: turn.source,
63
+ preview: previewForResult(turn.content, turn.isError),
64
+ withPrefix,
65
+ }),
66
+ ];
67
+ }
68
+
69
+ /** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
70
+ function renderSystemTurn(turn, withPrefix) {
71
+ const label = turn.subtype ?? "system";
72
+ return [
73
+ renderTextLine({ source: turn.source, text: `[${label}]`, withPrefix }),
74
+ ];
75
+ }
76
+
77
+ /** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
78
+ function renderUserTurn(turn, withPrefix) {
79
+ const lines = [];
80
+ for (const block of turn.content) {
81
+ if (block.type === "text") {
82
+ lines.push(
83
+ renderTextLine({
84
+ source: turn.source,
85
+ text: `[user] ${block.text}`,
86
+ withPrefix,
87
+ }),
88
+ );
89
+ }
90
+ }
91
+ return lines;
92
+ }
@@ -2,16 +2,20 @@
2
2
  * SequenceCounter — global monotonic counter shared across all participants
3
3
  * in a session. Single-threaded JS means no synchronization needed.
4
4
  */
5
+ /** Monotonic counter that assigns globally ordered sequence numbers within a session. */
5
6
  export class SequenceCounter {
7
+ /** Initialize the counter at zero. */
6
8
  constructor() {
7
9
  this.value = 0;
8
10
  }
9
11
 
12
+ /** Return the current value and advance the counter by one. */
10
13
  next() {
11
14
  return this.value++;
12
15
  }
13
16
  }
14
17
 
18
+ /** Create a new SequenceCounter starting at zero. */
15
19
  export function createSequenceCounter() {
16
20
  return new SequenceCounter();
17
21
  }
package/src/supervisor.js CHANGED
@@ -4,8 +4,9 @@
4
4
  * introduces itself, and delegates work to the agent. The loop then alternates:
5
5
  * agent → supervisor → agent.
6
6
  *
7
- * Signaling uses orchestration tools (Ask / Answer / Announce / Redirect /
8
- * Conclude) via in-process MCP servers. The Ask/Answer contract is enforced
7
+ * Signaling uses orchestration tools (Ask / Announce / Redirect / Conclude)
8
+ * via in-process MCP servers; the supervisor has no Answer tool agent replies
9
+ * are routed back through the relay loop. The Ask/Answer contract is enforced
9
10
  * at turn boundaries: an unanswered Ask triggers one synthetic reminder and
10
11
  * then a `protocol_violation` trace event plus a null-answer injection so the
11
12
  * session advances without silent deadlock.
@@ -52,6 +53,7 @@ export const AGENT_SYSTEM_PROMPT =
52
53
  */
53
54
  const MAX_INTERVENTIONS_PER_TURN = 5;
54
55
 
56
+ /** Orchestrate a relay loop between a supervisor LLM and an agent LLM with mid-turn review. */
55
57
  export class Supervisor {
56
58
  /**
57
59
  * @param {object} deps
@@ -172,39 +174,26 @@ export class Supervisor {
172
174
  : await this.agentRunner.run(relay);
173
175
  agentCalled = true;
174
176
 
175
- if (agentResult.error && !agentResult.aborted) {
176
- this.emitSummary({ success: false, turns: turn });
177
- return { exit: { success: false, turns: turn } };
178
- }
177
+ const outcome = this.#classifyAgentOutcome(
178
+ agentResult,
179
+ turn,
180
+ interventions,
181
+ );
179
182
 
180
- if (this.ctx.concluded) {
181
- this.emitSummary({
182
- success: true,
183
- turns: turn,
184
- summary: this.ctx.summary,
185
- });
186
- return { exit: { success: true, turns: turn } };
187
- }
183
+ if (outcome.type === "exit") return { exit: outcome.exit };
184
+ if (outcome.type === "intervention_limit") return { exit: null };
188
185
 
189
- if (agentResult.aborted && this.ctx.redirect) {
186
+ if (outcome.type === "redirect") {
190
187
  interventions++;
191
- const redirect = this.ctx.redirect;
192
- this.ctx.redirect = null;
193
- if (interventions >= MAX_INTERVENTIONS_PER_TURN) {
194
- this.emitOrchestratorEvent({ type: "intervention_limit", turn });
195
- return { exit: null };
196
- }
197
- relay = redirect.message;
188
+ relay = outcome.relay;
198
189
  this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
199
190
  continue;
200
191
  }
201
192
 
202
- if (this.#checkAsk("agent") === "recheck" && !this.ctx.concluded) {
203
- const reminders = this.messageBus.drain("agent");
204
- if (reminders.length > 0) {
205
- relay = formatMessages(reminders);
206
- continue;
207
- }
193
+ const askRelay = this.#drainAgentAskRelay();
194
+ if (askRelay) {
195
+ relay = askRelay;
196
+ continue;
208
197
  }
209
198
 
210
199
  return { exit: null };
@@ -214,6 +203,50 @@ export class Supervisor {
214
203
  }
215
204
  }
216
205
 
206
+ /**
207
+ * Classify the outcome of a single agent execution within #runAgentTurn.
208
+ * @returns {{type: string, exit?: object|null, relay?: string}}
209
+ */
210
+ #classifyAgentOutcome(agentResult, turn, interventions) {
211
+ if (agentResult.error && !agentResult.aborted) {
212
+ this.emitSummary({ success: false, turns: turn });
213
+ return { type: "exit", exit: { success: false, turns: turn } };
214
+ }
215
+
216
+ if (this.ctx.concluded) {
217
+ this.emitSummary({
218
+ success: true,
219
+ turns: turn,
220
+ summary: this.ctx.summary,
221
+ });
222
+ return { type: "exit", exit: { success: true, turns: turn } };
223
+ }
224
+
225
+ if (agentResult.aborted && this.ctx.redirect) {
226
+ const redirect = this.ctx.redirect;
227
+ this.ctx.redirect = null;
228
+ if (interventions + 1 >= MAX_INTERVENTIONS_PER_TURN) {
229
+ this.emitOrchestratorEvent({ type: "intervention_limit", turn });
230
+ return { type: "intervention_limit" };
231
+ }
232
+ return { type: "redirect", relay: redirect.message };
233
+ }
234
+
235
+ return { type: "continue" };
236
+ }
237
+
238
+ /**
239
+ * If the agent has an unanswered ask, drain reminders and return a
240
+ * formatted relay string. Returns null when no relay is needed.
241
+ * @returns {string|null}
242
+ */
243
+ #drainAgentAskRelay() {
244
+ if (this.#checkAsk("agent") !== "recheck" || this.ctx.concluded)
245
+ return null;
246
+ const reminders = this.messageBus.drain("agent");
247
+ return reminders.length > 0 ? formatMessages(reminders) : null;
248
+ }
249
+
217
250
  /**
218
251
  * Mid-turn supervisor review fired from inside the agent's onBatch hook.
219
252
  * Runs the supervisor's LLM against the batch and aborts the agent if
package/src/tee-writer.js CHANGED
@@ -17,18 +17,10 @@
17
17
 
18
18
  import { Writable } from "node:stream";
19
19
  import { TraceCollector } from "./trace-collector.js";
20
- import {
21
- renderTextLine,
22
- renderToolCallLine,
23
- renderToolResultLine,
24
- } from "./render/line-renderer.js";
25
- import {
26
- hintForCall,
27
- previewForResult,
28
- simplifyToolName,
29
- } from "./render/tool-hints.js";
20
+ import { renderTurnLines } from "./render/turn-renderer.js";
30
21
  import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
31
22
 
23
+ /** Writable stream that saves raw NDJSON to a file while streaming human-readable text to a display stream. */
32
24
  export class TeeWriter extends Writable {
33
25
  /**
34
26
  * @param {object} deps
@@ -134,56 +126,8 @@ export class TeeWriter extends Writable {
134
126
  const withPrefix = this.mode !== "raw";
135
127
  while (this.turnsEmitted < turns.length) {
136
128
  const turn = turns[this.turnsEmitted++];
137
- if (turn.role === "assistant") {
138
- for (const block of turn.content) {
139
- if (block.type === "text") {
140
- this.textStream.write(
141
- renderTextLine({
142
- source: turn.source,
143
- text: block.text,
144
- withPrefix,
145
- }),
146
- );
147
- } else if (block.type === "tool_use") {
148
- this.textStream.write(
149
- renderToolCallLine({
150
- source: turn.source,
151
- toolName: simplifyToolName(block.name),
152
- hint: hintForCall(block.name, block.input),
153
- withPrefix,
154
- }),
155
- );
156
- }
157
- }
158
- } else if (turn.role === "tool_result") {
159
- this.textStream.write(
160
- renderToolResultLine({
161
- source: turn.source,
162
- preview: previewForResult(turn.content, turn.isError),
163
- withPrefix,
164
- }),
165
- );
166
- } else if (turn.role === "system") {
167
- const label = turn.subtype ?? "system";
168
- this.textStream.write(
169
- renderTextLine({
170
- source: turn.source,
171
- text: `[${label}]`,
172
- withPrefix,
173
- }),
174
- );
175
- } else if (turn.role === "user") {
176
- for (const block of turn.content) {
177
- if (block.type === "text") {
178
- this.textStream.write(
179
- renderTextLine({
180
- source: turn.source,
181
- text: `[user] ${block.text}`,
182
- withPrefix,
183
- }),
184
- );
185
- }
186
- }
129
+ for (const line of renderTurnLines(turn, withPrefix)) {
130
+ this.textStream.write(line);
187
131
  }
188
132
  }
189
133
  }
@@ -9,18 +9,10 @@
9
9
  * one formatting path (spec 540).
10
10
  */
11
11
 
12
- import {
13
- renderTextLine,
14
- renderToolCallLine,
15
- renderToolResultLine,
16
- } from "./render/line-renderer.js";
17
- import {
18
- hintForCall,
19
- previewForResult,
20
- simplifyToolName,
21
- } from "./render/tool-hints.js";
12
+ import { renderTurnLines } from "./render/turn-renderer.js";
22
13
  import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
23
14
 
15
+ /** Accumulate Claude Code NDJSON stream events into structured traces for analysis or text replay. */
24
16
  export class TraceCollector {
25
17
  /**
26
18
  * @param {object} [deps]
@@ -270,68 +262,10 @@ export class TraceCollector {
270
262
  const out = [];
271
263
 
272
264
  for (const turn of this.turns) {
273
- if (turn.role === "assistant") {
274
- for (const block of turn.content) {
275
- if (block.type === "text") {
276
- out.push(
277
- renderTextLine({
278
- source: turn.source,
279
- text: block.text,
280
- withPrefix,
281
- }),
282
- );
283
- } else if (block.type === "tool_use") {
284
- out.push(
285
- renderToolCallLine({
286
- source: turn.source,
287
- toolName: simplifyToolName(block.name),
288
- hint: hintForCall(block.name, block.input),
289
- withPrefix,
290
- }),
291
- );
292
- }
293
- }
294
- } else if (turn.role === "tool_result") {
295
- out.push(
296
- renderToolResultLine({
297
- source: turn.source,
298
- preview: previewForResult(turn.content, turn.isError),
299
- withPrefix,
300
- }),
301
- );
302
- } else if (turn.role === "system") {
303
- const label = turn.subtype ?? "system";
304
- out.push(
305
- renderTextLine({
306
- source: turn.source,
307
- text: `[${label}]`,
308
- withPrefix,
309
- }),
310
- );
311
- } else if (turn.role === "user") {
312
- for (const block of turn.content) {
313
- if (block.type === "text") {
314
- out.push(
315
- renderTextLine({
316
- source: turn.source,
317
- text: `[user] ${block.text}`,
318
- withPrefix,
319
- }),
320
- );
321
- }
322
- }
323
- }
265
+ out.push(...renderTurnLines(turn, withPrefix));
324
266
  }
325
267
 
326
- // Trailing result block — the one summary line humans want (spec 540).
327
- let tail = "";
328
- if (this.result) {
329
- const duration = formatDuration(this.result.durationMs);
330
- const cost = Number(this.result.totalCostUsd).toFixed(4);
331
- tail =
332
- "\n" +
333
- `--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`;
334
- }
268
+ const tail = this.#formatResultTail();
335
269
 
336
270
  // Each rendered line already ends with `\n`; concatenate, drop the
337
271
  // trailing newline, then append the tail so the output shape stays
@@ -341,6 +275,20 @@ export class TraceCollector {
341
275
  const body = out.join("").replace(/\n$/, "");
342
276
  return body + tail;
343
277
  }
278
+
279
+ /**
280
+ * Format the trailing result summary line (spec 540).
281
+ * @returns {string}
282
+ */
283
+ #formatResultTail() {
284
+ if (!this.result) return "";
285
+ const duration = formatDuration(this.result.durationMs);
286
+ const cost = Number(this.result.totalCostUsd).toFixed(4);
287
+ return (
288
+ "\n" +
289
+ `--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`
290
+ );
291
+ }
344
292
  }
345
293
 
346
294
  /**
@@ -48,7 +48,6 @@ export class TraceGitHub {
48
48
  const data = await this.#get(url);
49
49
  const runs = data.workflow_runs ?? [];
50
50
 
51
- // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
52
51
  const re = new RegExp(pattern, "i");
53
52
  return runs
54
53
  .filter((r) => re.test(r.name))
@@ -81,24 +81,12 @@ export class TraceQuery {
81
81
  */
82
82
  filter(opts = {}) {
83
83
  const { role, toolName, isError } = opts;
84
- return this.turns.filter((turn) => {
85
- if (role !== undefined && turn.role !== role) return false;
86
- if (isError !== undefined) {
87
- if (turn.role !== "tool_result") return false;
88
- if (turn.isError !== isError) return false;
89
- }
90
- if (toolName !== undefined) {
91
- if (turn.role === "assistant") {
92
- const has = turn.content.some(
93
- (b) => b.type === "tool_use" && b.name === toolName,
94
- );
95
- if (!has) return false;
96
- } else {
97
- return false;
98
- }
99
- }
100
- return true;
101
- });
84
+ return this.turns.filter(
85
+ (turn) =>
86
+ matchesRole(turn, role) &&
87
+ matchesError(turn, isError) &&
88
+ matchesToolName(turn, toolName),
89
+ );
102
90
  }
103
91
 
104
92
  /** @returns {number} */
@@ -151,7 +139,6 @@ export class TraceQuery {
151
139
  */
152
140
  search(pattern, opts = {}) {
153
141
  const { context = 0, limit = 50, full = false } = opts;
154
- // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
155
142
  const re = new RegExp(pattern, "gi");
156
143
  const hits = [];
157
144
 
@@ -200,30 +187,18 @@ export class TraceQuery {
200
187
  * @returns {object[]}
201
188
  */
202
189
  tool(name) {
203
- const toolUseIds = new Set();
204
- const results = [];
205
-
206
- for (const turn of this.turns) {
207
- if (turn.role === "assistant") {
208
- const hasTool = turn.content.some(
209
- (b) => b.type === "tool_use" && b.name === name,
210
- );
211
- if (hasTool) {
212
- results.push(turn);
213
- for (const b of turn.content) {
214
- if (b.type === "tool_use" && b.name === name && b.toolUseId) {
215
- toolUseIds.add(b.toolUseId);
216
- }
217
- }
218
- }
219
- } else if (
220
- turn.role === "tool_result" &&
221
- toolUseIds.has(turn.toolUseId)
222
- ) {
223
- results.push(turn);
224
- }
225
- }
226
- return results;
190
+ const toolUseIds = collectToolUseIds(this.turns, name);
191
+ const assistantTurns = this.turns.filter(
192
+ (t) =>
193
+ t.role === "assistant" &&
194
+ t.content.some((b) => b.type === "tool_use" && b.name === name),
195
+ );
196
+ const resultTurns = this.turns.filter(
197
+ (t) => t.role === "tool_result" && toolUseIds.has(t.toolUseId),
198
+ );
199
+ return [...assistantTurns, ...resultTurns].sort(
200
+ (a, b) => a.index - b.index,
201
+ );
227
202
  }
228
203
 
229
204
  /**
@@ -343,6 +318,57 @@ export class TraceQuery {
343
318
  }
344
319
  }
345
320
 
321
+ /**
322
+ * @param {object} turn
323
+ * @param {string|undefined} role
324
+ * @returns {boolean}
325
+ */
326
+ function matchesRole(turn, role) {
327
+ return role === undefined || turn.role === role;
328
+ }
329
+
330
+ /**
331
+ * @param {object} turn
332
+ * @param {boolean|undefined} isError
333
+ * @returns {boolean}
334
+ */
335
+ function matchesError(turn, isError) {
336
+ if (isError === undefined) return true;
337
+ return turn.role === "tool_result" && turn.isError === isError;
338
+ }
339
+
340
+ /**
341
+ * @param {object} turn
342
+ * @param {string|undefined} toolName
343
+ * @returns {boolean}
344
+ */
345
+ function matchesToolName(turn, toolName) {
346
+ if (toolName === undefined) return true;
347
+ return (
348
+ turn.role === "assistant" &&
349
+ turn.content.some((b) => b.type === "tool_use" && b.name === toolName)
350
+ );
351
+ }
352
+
353
+ /**
354
+ * Collect all toolUseIds for a given tool name from assistant turns.
355
+ * @param {object[]} turns
356
+ * @param {string} name
357
+ * @returns {Set<string>}
358
+ */
359
+ function collectToolUseIds(turns, name) {
360
+ const ids = new Set();
361
+ for (const turn of turns) {
362
+ if (turn.role !== "assistant") continue;
363
+ for (const b of turn.content) {
364
+ if (b.type === "tool_use" && b.name === name && b.toolUseId) {
365
+ ids.add(b.toolUseId);
366
+ }
367
+ }
368
+ }
369
+ return ids;
370
+ }
371
+
346
372
  /**
347
373
  * Search a single turn for regex matches. Returns array of match descriptions.
348
374
  * @param {object} turn