@forwardimpact/libeval 0.1.26 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.26",
3
+ "version": "0.1.27",
4
4
  "description": "Agent evaluation: collect Claude Code traces, run agent loops, supervise multi-step workflows.",
5
5
  "keywords": [
6
6
  "eval",
@@ -9,6 +9,14 @@
9
9
  "claude-code",
10
10
  "supervisor"
11
11
  ],
12
+ "homepage": "https://www.forwardimpact.team",
13
+ "repository": {
14
+ "type": "git",
15
+ "url": "git+https://github.com/forwardimpact/monorepo.git",
16
+ "directory": "libraries/libeval"
17
+ },
18
+ "license": "Apache-2.0",
19
+ "author": "D. Olsson <hi@senzilla.io>",
12
20
  "forwardimpact": {
13
21
  "capability": "agent-self-improvement",
14
22
  "needs": [
@@ -17,8 +25,6 @@
17
25
  "Supervise a multi-step or multi-agent workflow"
18
26
  ]
19
27
  },
20
- "license": "Apache-2.0",
21
- "author": "D. Olsson <hi@senzilla.io>",
22
28
  "type": "module",
23
29
  "main": "./src/index.js",
24
30
  "exports": {
@@ -35,10 +41,6 @@
35
41
  "bin/**/*.js",
36
42
  "README.md"
37
43
  ],
38
- "engines": {
39
- "bun": ">=1.2.0",
40
- "node": ">=18.0.0"
41
- },
42
44
  "scripts": {
43
45
  "test": "bun test test/*.test.js"
44
46
  },
@@ -52,6 +54,10 @@
52
54
  "devDependencies": {
53
55
  "@forwardimpact/libharness": "^0.1.14"
54
56
  },
57
+ "engines": {
58
+ "bun": ">=1.2.0",
59
+ "node": ">=18.0.0"
60
+ },
55
61
  "publishConfig": {
56
62
  "access": "public"
57
63
  }
@@ -211,8 +211,9 @@ export class AgentRunner {
211
211
  if (message.type === "system" && message.subtype === "init") {
212
212
  this.sessionId = message.session_id;
213
213
  }
214
- if (message.type === "assistant" && hasTextBlock(message)) {
215
- state.assistantTextCount++;
214
+ if (message.type === "assistant") {
215
+ if (hasTextBlock(message)) state.assistantTextCount++;
216
+ trackSkillInvocation(message);
216
217
  }
217
218
  }
218
219
 
@@ -293,6 +294,20 @@ export function hasTextBlock(message) {
293
294
  return false;
294
295
  }
295
296
 
297
+ function trackSkillInvocation(message) {
298
+ const content = message.message?.content ?? message.content;
299
+ if (!Array.isArray(content)) return;
300
+ for (const block of content) {
301
+ if (
302
+ block.type === "tool_use" &&
303
+ block.name === "Skill" &&
304
+ block.input?.skill
305
+ ) {
306
+ process.env.LIBEVAL_SKILL = block.input.skill;
307
+ }
308
+ }
309
+ }
310
+
296
311
  /**
297
312
  * Factory function — wires real dependencies.
298
313
  * @param {object} deps - Same as AgentRunner constructor
@@ -73,6 +73,10 @@ export async function runFacilitateCommand(values, _args) {
73
73
  })
74
74
  : process.stdout;
75
75
 
76
+ if (opts.facilitatorProfile) {
77
+ process.env.LIBEVAL_AGENT_PROFILE = opts.facilitatorProfile;
78
+ }
79
+
76
80
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
77
81
  const facilitator = createFacilitator({
78
82
  facilitatorCwd: opts.facilitatorCwd,
@@ -78,6 +78,10 @@ export async function runRunCommand(values, _args) {
78
78
  );
79
79
  };
80
80
 
81
+ if (agentProfile) {
82
+ process.env.LIBEVAL_AGENT_PROFILE = agentProfile;
83
+ }
84
+
81
85
  const systemPrompt = agentProfile
82
86
  ? composeProfilePrompt(agentProfile, {
83
87
  profilesDir: resolve(cwd, ".claude/agents"),
@@ -71,6 +71,10 @@ export async function runSuperviseCommand(values, _args) {
71
71
  })
72
72
  : process.stdout;
73
73
 
74
+ if (opts.agentProfile) {
75
+ process.env.LIBEVAL_AGENT_PROFILE = opts.agentProfile;
76
+ }
77
+
74
78
  const { query } = await import("@anthropic-ai/claude-agent-sdk");
75
79
  const supervisor = createSupervisor({
76
80
  supervisorCwd: opts.supervisorCwd,
@@ -180,42 +180,48 @@ export class Facilitator {
180
180
  let messages = this.messageBus.drain(agent.name);
181
181
  if (messages.length === 0) return;
182
182
 
183
- this.emitOrchestratorEvent({
184
- type: "agent_start",
185
- agent: agent.name,
186
- });
183
+ this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
187
184
  await agent.runner.run(formatMessages(messages));
188
- if (this.ctx.concluded) return;
185
+ if (await this.#settleAgentTurn(agent)) return;
186
+
187
+ // Loop: check for new messages, resume if any
188
+ while (!this.ctx.concluded) {
189
+ messages = await this.#awaitAgentMessages(agent.name);
190
+ if (messages.length === 0) break;
191
+ await agent.runner.resume(formatMessages(messages));
192
+ if (await this.#settleAgentTurn(agent)) break;
193
+ }
194
+ }
195
+
196
+ /**
197
+ * Enforce pending-ask and emit turn_complete. Returns true when the
198
+ * session has concluded and the caller should stop.
199
+ */
200
+ async #settleAgentTurn(agent) {
201
+ if (this.ctx.concluded) return true;
189
202
  await this.#enforcePendingAsk(agent);
190
- if (this.ctx.concluded) return;
203
+ if (this.ctx.concluded) return true;
191
204
  this.eventQueue.enqueue({
192
205
  type: "lifecycle",
193
206
  agent: agent.name,
194
207
  status: "turn_complete",
195
208
  });
209
+ return false;
210
+ }
196
211
 
197
- // Loop: check for new messages, resume if any
198
- while (!this.ctx.concluded) {
199
- messages = this.messageBus.drain(agent.name);
200
- if (messages.length === 0) {
201
- await Promise.race([
202
- this.messageBus.waitForMessages(agent.name),
203
- this.concludePromise,
204
- ]);
205
- if (this.ctx.concluded) break;
206
- messages = this.messageBus.drain(agent.name);
207
- if (messages.length === 0) break;
208
- }
209
- await agent.runner.resume(formatMessages(messages));
210
- if (this.ctx.concluded) break;
211
- await this.#enforcePendingAsk(agent);
212
- if (this.ctx.concluded) break;
213
- this.eventQueue.enqueue({
214
- type: "lifecycle",
215
- agent: agent.name,
216
- status: "turn_complete",
217
- });
218
- }
212
+ /**
213
+ * Wait for messages addressed to `name`, returning an empty array when
214
+ * the session concludes first.
215
+ */
216
+ async #awaitAgentMessages(name) {
217
+ const messages = this.messageBus.drain(name);
218
+ if (messages.length > 0) return messages;
219
+ await Promise.race([
220
+ this.messageBus.waitForMessages(name),
221
+ this.concludePromise,
222
+ ]);
223
+ if (this.ctx.concluded) return [];
224
+ return this.messageBus.drain(name);
219
225
  }
220
226
 
221
227
  /**
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Turn renderer — maps a structured turn into formatted text lines.
3
+ *
4
+ * Shared by `TeeWriter.flushTurns()` (live stream) and
5
+ * `TraceCollector.toText()` (offline replay) so both emit identical output
6
+ * (spec 540).
7
+ */
8
+
9
+ import {
10
+ renderTextLine,
11
+ renderToolCallLine,
12
+ renderToolResultLine,
13
+ } from "./line-renderer.js";
14
+ import {
15
+ hintForCall,
16
+ previewForResult,
17
+ simplifyToolName,
18
+ } from "./tool-hints.js";
19
+
20
+ /**
21
+ * Render a single turn to formatted text lines.
22
+ *
23
+ * @param {object} turn - Structured turn object
24
+ * @param {boolean} withPrefix - Whether to include source labels
25
+ * @returns {string[]} Array of rendered line strings
26
+ */
27
+ export function renderTurnLines(turn, withPrefix) {
28
+ if (turn.role === "assistant") return renderAssistantTurn(turn, withPrefix);
29
+ if (turn.role === "tool_result")
30
+ return renderToolResultTurn(turn, withPrefix);
31
+ if (turn.role === "system") return renderSystemTurn(turn, withPrefix);
32
+ if (turn.role === "user") return renderUserTurn(turn, withPrefix);
33
+ return [];
34
+ }
35
+
36
+ /** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
37
+ function renderAssistantTurn(turn, withPrefix) {
38
+ const lines = [];
39
+ for (const block of turn.content) {
40
+ if (block.type === "text") {
41
+ lines.push(
42
+ renderTextLine({ source: turn.source, text: block.text, withPrefix }),
43
+ );
44
+ } else if (block.type === "tool_use") {
45
+ lines.push(
46
+ renderToolCallLine({
47
+ source: turn.source,
48
+ toolName: simplifyToolName(block.name),
49
+ hint: hintForCall(block.name, block.input),
50
+ withPrefix,
51
+ }),
52
+ );
53
+ }
54
+ }
55
+ return lines;
56
+ }
57
+
58
+ /** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
59
+ function renderToolResultTurn(turn, withPrefix) {
60
+ return [
61
+ renderToolResultLine({
62
+ source: turn.source,
63
+ preview: previewForResult(turn.content, turn.isError),
64
+ withPrefix,
65
+ }),
66
+ ];
67
+ }
68
+
69
+ /** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
70
+ function renderSystemTurn(turn, withPrefix) {
71
+ const label = turn.subtype ?? "system";
72
+ return [
73
+ renderTextLine({ source: turn.source, text: `[${label}]`, withPrefix }),
74
+ ];
75
+ }
76
+
77
+ /** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
78
+ function renderUserTurn(turn, withPrefix) {
79
+ const lines = [];
80
+ for (const block of turn.content) {
81
+ if (block.type === "text") {
82
+ lines.push(
83
+ renderTextLine({
84
+ source: turn.source,
85
+ text: `[user] ${block.text}`,
86
+ withPrefix,
87
+ }),
88
+ );
89
+ }
90
+ }
91
+ return lines;
92
+ }
package/src/supervisor.js CHANGED
@@ -172,39 +172,26 @@ export class Supervisor {
172
172
  : await this.agentRunner.run(relay);
173
173
  agentCalled = true;
174
174
 
175
- if (agentResult.error && !agentResult.aborted) {
176
- this.emitSummary({ success: false, turns: turn });
177
- return { exit: { success: false, turns: turn } };
178
- }
175
+ const outcome = this.#classifyAgentOutcome(
176
+ agentResult,
177
+ turn,
178
+ interventions,
179
+ );
179
180
 
180
- if (this.ctx.concluded) {
181
- this.emitSummary({
182
- success: true,
183
- turns: turn,
184
- summary: this.ctx.summary,
185
- });
186
- return { exit: { success: true, turns: turn } };
187
- }
181
+ if (outcome.type === "exit") return { exit: outcome.exit };
182
+ if (outcome.type === "intervention_limit") return { exit: null };
188
183
 
189
- if (agentResult.aborted && this.ctx.redirect) {
184
+ if (outcome.type === "redirect") {
190
185
  interventions++;
191
- const redirect = this.ctx.redirect;
192
- this.ctx.redirect = null;
193
- if (interventions >= MAX_INTERVENTIONS_PER_TURN) {
194
- this.emitOrchestratorEvent({ type: "intervention_limit", turn });
195
- return { exit: null };
196
- }
197
- relay = redirect.message;
186
+ relay = outcome.relay;
198
187
  this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
199
188
  continue;
200
189
  }
201
190
 
202
- if (this.#checkAsk("agent") === "recheck" && !this.ctx.concluded) {
203
- const reminders = this.messageBus.drain("agent");
204
- if (reminders.length > 0) {
205
- relay = formatMessages(reminders);
206
- continue;
207
- }
191
+ const askRelay = this.#drainAgentAskRelay();
192
+ if (askRelay) {
193
+ relay = askRelay;
194
+ continue;
208
195
  }
209
196
 
210
197
  return { exit: null };
@@ -214,6 +201,50 @@ export class Supervisor {
214
201
  }
215
202
  }
216
203
 
204
+ /**
205
+ * Classify the outcome of a single agent execution within #runAgentTurn.
206
+ * @returns {{type: string, exit?: object|null, relay?: string}}
207
+ */
208
+ #classifyAgentOutcome(agentResult, turn, interventions) {
209
+ if (agentResult.error && !agentResult.aborted) {
210
+ this.emitSummary({ success: false, turns: turn });
211
+ return { type: "exit", exit: { success: false, turns: turn } };
212
+ }
213
+
214
+ if (this.ctx.concluded) {
215
+ this.emitSummary({
216
+ success: true,
217
+ turns: turn,
218
+ summary: this.ctx.summary,
219
+ });
220
+ return { type: "exit", exit: { success: true, turns: turn } };
221
+ }
222
+
223
+ if (agentResult.aborted && this.ctx.redirect) {
224
+ const redirect = this.ctx.redirect;
225
+ this.ctx.redirect = null;
226
+ if (interventions + 1 >= MAX_INTERVENTIONS_PER_TURN) {
227
+ this.emitOrchestratorEvent({ type: "intervention_limit", turn });
228
+ return { type: "intervention_limit" };
229
+ }
230
+ return { type: "redirect", relay: redirect.message };
231
+ }
232
+
233
+ return { type: "continue" };
234
+ }
235
+
236
+ /**
237
+ * If the agent has an unanswered ask, drain reminders and return a
238
+ * formatted relay string. Returns null when no relay is needed.
239
+ * @returns {string|null}
240
+ */
241
+ #drainAgentAskRelay() {
242
+ if (this.#checkAsk("agent") !== "recheck" || this.ctx.concluded)
243
+ return null;
244
+ const reminders = this.messageBus.drain("agent");
245
+ return reminders.length > 0 ? formatMessages(reminders) : null;
246
+ }
247
+
217
248
  /**
218
249
  * Mid-turn supervisor review fired from inside the agent's onBatch hook.
219
250
  * Runs the supervisor's LLM against the batch and aborts the agent if
package/src/tee-writer.js CHANGED
@@ -17,16 +17,7 @@
17
17
 
18
18
  import { Writable } from "node:stream";
19
19
  import { TraceCollector } from "./trace-collector.js";
20
- import {
21
- renderTextLine,
22
- renderToolCallLine,
23
- renderToolResultLine,
24
- } from "./render/line-renderer.js";
25
- import {
26
- hintForCall,
27
- previewForResult,
28
- simplifyToolName,
29
- } from "./render/tool-hints.js";
20
+ import { renderTurnLines } from "./render/turn-renderer.js";
30
21
  import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
31
22
 
32
23
  export class TeeWriter extends Writable {
@@ -134,56 +125,8 @@ export class TeeWriter extends Writable {
134
125
  const withPrefix = this.mode !== "raw";
135
126
  while (this.turnsEmitted < turns.length) {
136
127
  const turn = turns[this.turnsEmitted++];
137
- if (turn.role === "assistant") {
138
- for (const block of turn.content) {
139
- if (block.type === "text") {
140
- this.textStream.write(
141
- renderTextLine({
142
- source: turn.source,
143
- text: block.text,
144
- withPrefix,
145
- }),
146
- );
147
- } else if (block.type === "tool_use") {
148
- this.textStream.write(
149
- renderToolCallLine({
150
- source: turn.source,
151
- toolName: simplifyToolName(block.name),
152
- hint: hintForCall(block.name, block.input),
153
- withPrefix,
154
- }),
155
- );
156
- }
157
- }
158
- } else if (turn.role === "tool_result") {
159
- this.textStream.write(
160
- renderToolResultLine({
161
- source: turn.source,
162
- preview: previewForResult(turn.content, turn.isError),
163
- withPrefix,
164
- }),
165
- );
166
- } else if (turn.role === "system") {
167
- const label = turn.subtype ?? "system";
168
- this.textStream.write(
169
- renderTextLine({
170
- source: turn.source,
171
- text: `[${label}]`,
172
- withPrefix,
173
- }),
174
- );
175
- } else if (turn.role === "user") {
176
- for (const block of turn.content) {
177
- if (block.type === "text") {
178
- this.textStream.write(
179
- renderTextLine({
180
- source: turn.source,
181
- text: `[user] ${block.text}`,
182
- withPrefix,
183
- }),
184
- );
185
- }
186
- }
128
+ for (const line of renderTurnLines(turn, withPrefix)) {
129
+ this.textStream.write(line);
187
130
  }
188
131
  }
189
132
  }
@@ -9,16 +9,7 @@
9
9
  * one formatting path (spec 540).
10
10
  */
11
11
 
12
- import {
13
- renderTextLine,
14
- renderToolCallLine,
15
- renderToolResultLine,
16
- } from "./render/line-renderer.js";
17
- import {
18
- hintForCall,
19
- previewForResult,
20
- simplifyToolName,
21
- } from "./render/tool-hints.js";
12
+ import { renderTurnLines } from "./render/turn-renderer.js";
22
13
  import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
23
14
 
24
15
  export class TraceCollector {
@@ -270,68 +261,10 @@ export class TraceCollector {
270
261
  const out = [];
271
262
 
272
263
  for (const turn of this.turns) {
273
- if (turn.role === "assistant") {
274
- for (const block of turn.content) {
275
- if (block.type === "text") {
276
- out.push(
277
- renderTextLine({
278
- source: turn.source,
279
- text: block.text,
280
- withPrefix,
281
- }),
282
- );
283
- } else if (block.type === "tool_use") {
284
- out.push(
285
- renderToolCallLine({
286
- source: turn.source,
287
- toolName: simplifyToolName(block.name),
288
- hint: hintForCall(block.name, block.input),
289
- withPrefix,
290
- }),
291
- );
292
- }
293
- }
294
- } else if (turn.role === "tool_result") {
295
- out.push(
296
- renderToolResultLine({
297
- source: turn.source,
298
- preview: previewForResult(turn.content, turn.isError),
299
- withPrefix,
300
- }),
301
- );
302
- } else if (turn.role === "system") {
303
- const label = turn.subtype ?? "system";
304
- out.push(
305
- renderTextLine({
306
- source: turn.source,
307
- text: `[${label}]`,
308
- withPrefix,
309
- }),
310
- );
311
- } else if (turn.role === "user") {
312
- for (const block of turn.content) {
313
- if (block.type === "text") {
314
- out.push(
315
- renderTextLine({
316
- source: turn.source,
317
- text: `[user] ${block.text}`,
318
- withPrefix,
319
- }),
320
- );
321
- }
322
- }
323
- }
264
+ out.push(...renderTurnLines(turn, withPrefix));
324
265
  }
325
266
 
326
- // Trailing result block — the one summary line humans want (spec 540).
327
- let tail = "";
328
- if (this.result) {
329
- const duration = formatDuration(this.result.durationMs);
330
- const cost = Number(this.result.totalCostUsd).toFixed(4);
331
- tail =
332
- "\n" +
333
- `--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`;
334
- }
267
+ const tail = this.#formatResultTail();
335
268
 
336
269
  // Each rendered line already ends with `\n`; concatenate, drop the
337
270
  // trailing newline, then append the tail so the output shape stays
@@ -341,6 +274,20 @@ export class TraceCollector {
341
274
  const body = out.join("").replace(/\n$/, "");
342
275
  return body + tail;
343
276
  }
277
+
278
+ /**
279
+ * Format the trailing result summary line (spec 540).
280
+ * @returns {string}
281
+ */
282
+ #formatResultTail() {
283
+ if (!this.result) return "";
284
+ const duration = formatDuration(this.result.durationMs);
285
+ const cost = Number(this.result.totalCostUsd).toFixed(4);
286
+ return (
287
+ "\n" +
288
+ `--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`
289
+ );
290
+ }
344
291
  }
345
292
 
346
293
  /**
@@ -48,7 +48,6 @@ export class TraceGitHub {
48
48
  const data = await this.#get(url);
49
49
  const runs = data.workflow_runs ?? [];
50
50
 
51
- // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
52
51
  const re = new RegExp(pattern, "i");
53
52
  return runs
54
53
  .filter((r) => re.test(r.name))
@@ -81,24 +81,12 @@ export class TraceQuery {
81
81
  */
82
82
  filter(opts = {}) {
83
83
  const { role, toolName, isError } = opts;
84
- return this.turns.filter((turn) => {
85
- if (role !== undefined && turn.role !== role) return false;
86
- if (isError !== undefined) {
87
- if (turn.role !== "tool_result") return false;
88
- if (turn.isError !== isError) return false;
89
- }
90
- if (toolName !== undefined) {
91
- if (turn.role === "assistant") {
92
- const has = turn.content.some(
93
- (b) => b.type === "tool_use" && b.name === toolName,
94
- );
95
- if (!has) return false;
96
- } else {
97
- return false;
98
- }
99
- }
100
- return true;
101
- });
84
+ return this.turns.filter(
85
+ (turn) =>
86
+ matchesRole(turn, role) &&
87
+ matchesError(turn, isError) &&
88
+ matchesToolName(turn, toolName),
89
+ );
102
90
  }
103
91
 
104
92
  /** @returns {number} */
@@ -151,7 +139,6 @@ export class TraceQuery {
151
139
  */
152
140
  search(pattern, opts = {}) {
153
141
  const { context = 0, limit = 50, full = false } = opts;
154
- // eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
155
142
  const re = new RegExp(pattern, "gi");
156
143
  const hits = [];
157
144
 
@@ -200,30 +187,18 @@ export class TraceQuery {
200
187
  * @returns {object[]}
201
188
  */
202
189
  tool(name) {
203
- const toolUseIds = new Set();
204
- const results = [];
205
-
206
- for (const turn of this.turns) {
207
- if (turn.role === "assistant") {
208
- const hasTool = turn.content.some(
209
- (b) => b.type === "tool_use" && b.name === name,
210
- );
211
- if (hasTool) {
212
- results.push(turn);
213
- for (const b of turn.content) {
214
- if (b.type === "tool_use" && b.name === name && b.toolUseId) {
215
- toolUseIds.add(b.toolUseId);
216
- }
217
- }
218
- }
219
- } else if (
220
- turn.role === "tool_result" &&
221
- toolUseIds.has(turn.toolUseId)
222
- ) {
223
- results.push(turn);
224
- }
225
- }
226
- return results;
190
+ const toolUseIds = collectToolUseIds(this.turns, name);
191
+ const assistantTurns = this.turns.filter(
192
+ (t) =>
193
+ t.role === "assistant" &&
194
+ t.content.some((b) => b.type === "tool_use" && b.name === name),
195
+ );
196
+ const resultTurns = this.turns.filter(
197
+ (t) => t.role === "tool_result" && toolUseIds.has(t.toolUseId),
198
+ );
199
+ return [...assistantTurns, ...resultTurns].sort(
200
+ (a, b) => a.index - b.index,
201
+ );
227
202
  }
228
203
 
229
204
  /**
@@ -343,6 +318,57 @@ export class TraceQuery {
343
318
  }
344
319
  }
345
320
 
321
+ /**
322
+ * @param {object} turn
323
+ * @param {string|undefined} role
324
+ * @returns {boolean}
325
+ */
326
+ function matchesRole(turn, role) {
327
+ return role === undefined || turn.role === role;
328
+ }
329
+
330
+ /**
331
+ * @param {object} turn
332
+ * @param {boolean|undefined} isError
333
+ * @returns {boolean}
334
+ */
335
+ function matchesError(turn, isError) {
336
+ if (isError === undefined) return true;
337
+ return turn.role === "tool_result" && turn.isError === isError;
338
+ }
339
+
340
+ /**
341
+ * @param {object} turn
342
+ * @param {string|undefined} toolName
343
+ * @returns {boolean}
344
+ */
345
+ function matchesToolName(turn, toolName) {
346
+ if (toolName === undefined) return true;
347
+ return (
348
+ turn.role === "assistant" &&
349
+ turn.content.some((b) => b.type === "tool_use" && b.name === toolName)
350
+ );
351
+ }
352
+
353
+ /**
354
+ * Collect all toolUseIds for a given tool name from assistant turns.
355
+ * @param {object[]} turns
356
+ * @param {string} name
357
+ * @returns {Set<string>}
358
+ */
359
+ function collectToolUseIds(turns, name) {
360
+ const ids = new Set();
361
+ for (const turn of turns) {
362
+ if (turn.role !== "assistant") continue;
363
+ for (const b of turn.content) {
364
+ if (b.type === "tool_use" && b.name === name && b.toolUseId) {
365
+ ids.add(b.toolUseId);
366
+ }
367
+ }
368
+ }
369
+ return ids;
370
+ }
371
+
346
372
  /**
347
373
  * Search a single turn for regex matches. Returns array of match descriptions.
348
374
  * @param {object} turn