@forwardimpact/libeval 0.1.25 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-eval.js +4 -4
- package/bin/fit-trace.js +3 -3
- package/package.json +27 -6
- package/src/agent-runner.js +17 -2
- package/src/commands/facilitate.js +4 -0
- package/src/commands/run.js +4 -0
- package/src/commands/supervise.js +4 -0
- package/src/facilitator.js +34 -28
- package/src/render/turn-renderer.js +92 -0
- package/src/supervisor.js +57 -26
- package/src/tee-writer.js +3 -60
- package/src/trace-collector.js +17 -70
- package/src/trace-github.js +0 -1
- package/src/trace-query.js +69 -43
package/bin/fit-eval.js
CHANGED
|
@@ -178,25 +178,25 @@ const definition = {
|
|
|
178
178
|
documentation: [
|
|
179
179
|
{
|
|
180
180
|
title: "Agent Evaluations",
|
|
181
|
-
url: "https://www.forwardimpact.team/docs/
|
|
181
|
+
url: "https://www.forwardimpact.team/docs/libraries/agent-evaluations/index.md",
|
|
182
182
|
description:
|
|
183
183
|
"Author a judge profile, run an eval locally, wire it into CI, and inspect the resulting trace.",
|
|
184
184
|
},
|
|
185
185
|
{
|
|
186
186
|
title: "Agent Collaboration",
|
|
187
|
-
url: "https://www.forwardimpact.team/docs/
|
|
187
|
+
url: "https://www.forwardimpact.team/docs/libraries/agent-collaboration/index.md",
|
|
188
188
|
description:
|
|
189
189
|
"Author a facilitator and participant profiles, run a multi-agent session, and read the message flow.",
|
|
190
190
|
},
|
|
191
191
|
{
|
|
192
192
|
title: "Trace Analysis",
|
|
193
|
-
url: "https://www.forwardimpact.team/docs/
|
|
193
|
+
url: "https://www.forwardimpact.team/docs/libraries/trace-analysis/index.md",
|
|
194
194
|
description:
|
|
195
195
|
"Read the NDJSON traces produced by `fit-eval` with `fit-trace` — grounded-theory method and worked examples.",
|
|
196
196
|
},
|
|
197
197
|
{
|
|
198
198
|
title: "Agent Teams",
|
|
199
|
-
url: "https://www.forwardimpact.team/docs/
|
|
199
|
+
url: "https://www.forwardimpact.team/docs/products/agent-teams/index.md",
|
|
200
200
|
description:
|
|
201
201
|
"How to author the agent, supervisor, and facilitator profiles consumed by --agent-profile, --supervisor-profile, --facilitator-profile, and --agent-profiles.",
|
|
202
202
|
},
|
package/bin/fit-trace.js
CHANGED
|
@@ -215,19 +215,19 @@ const definition = {
|
|
|
215
215
|
documentation: [
|
|
216
216
|
{
|
|
217
217
|
title: "Trace Analysis",
|
|
218
|
-
url: "https://www.forwardimpact.team/docs/
|
|
218
|
+
url: "https://www.forwardimpact.team/docs/libraries/trace-analysis/index.md",
|
|
219
219
|
description:
|
|
220
220
|
"The full method walkthrough with worked examples (an eval that failed, a multi-agent session that stalled).",
|
|
221
221
|
},
|
|
222
222
|
{
|
|
223
223
|
title: "Agent Evaluations",
|
|
224
|
-
url: "https://www.forwardimpact.team/docs/
|
|
224
|
+
url: "https://www.forwardimpact.team/docs/libraries/agent-evaluations/index.md",
|
|
225
225
|
description:
|
|
226
226
|
"How `fit-eval supervise` produces the traces this skill analyzes.",
|
|
227
227
|
},
|
|
228
228
|
{
|
|
229
229
|
title: "Agent Collaboration",
|
|
230
|
-
url: "https://www.forwardimpact.team/docs/
|
|
230
|
+
url: "https://www.forwardimpact.team/docs/libraries/agent-collaboration/index.md",
|
|
231
231
|
description:
|
|
232
232
|
"How `fit-eval facilitate` produces multi-agent traces; `split` is the bridge into per-source files.",
|
|
233
233
|
},
|
package/package.json
CHANGED
|
@@ -1,9 +1,30 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forwardimpact/libeval",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.1.27",
|
|
4
|
+
"description": "Agent evaluation: collect Claude Code traces, run agent loops, supervise multi-step workflows.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"eval",
|
|
7
|
+
"agent",
|
|
8
|
+
"trace",
|
|
9
|
+
"claude-code",
|
|
10
|
+
"supervisor"
|
|
11
|
+
],
|
|
12
|
+
"homepage": "https://www.forwardimpact.team",
|
|
13
|
+
"repository": {
|
|
14
|
+
"type": "git",
|
|
15
|
+
"url": "git+https://github.com/forwardimpact/monorepo.git",
|
|
16
|
+
"directory": "libraries/libeval"
|
|
17
|
+
},
|
|
5
18
|
"license": "Apache-2.0",
|
|
6
19
|
"author": "D. Olsson <hi@senzilla.io>",
|
|
20
|
+
"forwardimpact": {
|
|
21
|
+
"capability": "agent-self-improvement",
|
|
22
|
+
"needs": [
|
|
23
|
+
"Parse and query Claude Code trace NDJSON files",
|
|
24
|
+
"Drive an LLM agent through a scripted run and capture its trace",
|
|
25
|
+
"Supervise a multi-step or multi-agent workflow"
|
|
26
|
+
]
|
|
27
|
+
},
|
|
7
28
|
"type": "module",
|
|
8
29
|
"main": "./src/index.js",
|
|
9
30
|
"exports": {
|
|
@@ -20,10 +41,6 @@
|
|
|
20
41
|
"bin/**/*.js",
|
|
21
42
|
"README.md"
|
|
22
43
|
],
|
|
23
|
-
"engines": {
|
|
24
|
-
"bun": ">=1.2.0",
|
|
25
|
-
"node": ">=18.0.0"
|
|
26
|
-
},
|
|
27
44
|
"scripts": {
|
|
28
45
|
"test": "bun test test/*.test.js"
|
|
29
46
|
},
|
|
@@ -37,6 +54,10 @@
|
|
|
37
54
|
"devDependencies": {
|
|
38
55
|
"@forwardimpact/libharness": "^0.1.14"
|
|
39
56
|
},
|
|
57
|
+
"engines": {
|
|
58
|
+
"bun": ">=1.2.0",
|
|
59
|
+
"node": ">=18.0.0"
|
|
60
|
+
},
|
|
40
61
|
"publishConfig": {
|
|
41
62
|
"access": "public"
|
|
42
63
|
}
|
package/src/agent-runner.js
CHANGED
|
@@ -211,8 +211,9 @@ export class AgentRunner {
|
|
|
211
211
|
if (message.type === "system" && message.subtype === "init") {
|
|
212
212
|
this.sessionId = message.session_id;
|
|
213
213
|
}
|
|
214
|
-
if (message.type === "assistant"
|
|
215
|
-
state.assistantTextCount++;
|
|
214
|
+
if (message.type === "assistant") {
|
|
215
|
+
if (hasTextBlock(message)) state.assistantTextCount++;
|
|
216
|
+
trackSkillInvocation(message);
|
|
216
217
|
}
|
|
217
218
|
}
|
|
218
219
|
|
|
@@ -293,6 +294,20 @@ export function hasTextBlock(message) {
|
|
|
293
294
|
return false;
|
|
294
295
|
}
|
|
295
296
|
|
|
297
|
+
function trackSkillInvocation(message) {
|
|
298
|
+
const content = message.message?.content ?? message.content;
|
|
299
|
+
if (!Array.isArray(content)) return;
|
|
300
|
+
for (const block of content) {
|
|
301
|
+
if (
|
|
302
|
+
block.type === "tool_use" &&
|
|
303
|
+
block.name === "Skill" &&
|
|
304
|
+
block.input?.skill
|
|
305
|
+
) {
|
|
306
|
+
process.env.LIBEVAL_SKILL = block.input.skill;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
296
311
|
/**
|
|
297
312
|
* Factory function — wires real dependencies.
|
|
298
313
|
* @param {object} deps - Same as AgentRunner constructor
|
|
@@ -73,6 +73,10 @@ export async function runFacilitateCommand(values, _args) {
|
|
|
73
73
|
})
|
|
74
74
|
: process.stdout;
|
|
75
75
|
|
|
76
|
+
if (opts.facilitatorProfile) {
|
|
77
|
+
process.env.LIBEVAL_AGENT_PROFILE = opts.facilitatorProfile;
|
|
78
|
+
}
|
|
79
|
+
|
|
76
80
|
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
77
81
|
const facilitator = createFacilitator({
|
|
78
82
|
facilitatorCwd: opts.facilitatorCwd,
|
package/src/commands/run.js
CHANGED
|
@@ -78,6 +78,10 @@ export async function runRunCommand(values, _args) {
|
|
|
78
78
|
);
|
|
79
79
|
};
|
|
80
80
|
|
|
81
|
+
if (agentProfile) {
|
|
82
|
+
process.env.LIBEVAL_AGENT_PROFILE = agentProfile;
|
|
83
|
+
}
|
|
84
|
+
|
|
81
85
|
const systemPrompt = agentProfile
|
|
82
86
|
? composeProfilePrompt(agentProfile, {
|
|
83
87
|
profilesDir: resolve(cwd, ".claude/agents"),
|
|
@@ -71,6 +71,10 @@ export async function runSuperviseCommand(values, _args) {
|
|
|
71
71
|
})
|
|
72
72
|
: process.stdout;
|
|
73
73
|
|
|
74
|
+
if (opts.agentProfile) {
|
|
75
|
+
process.env.LIBEVAL_AGENT_PROFILE = opts.agentProfile;
|
|
76
|
+
}
|
|
77
|
+
|
|
74
78
|
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
75
79
|
const supervisor = createSupervisor({
|
|
76
80
|
supervisorCwd: opts.supervisorCwd,
|
package/src/facilitator.js
CHANGED
|
@@ -180,42 +180,48 @@ export class Facilitator {
|
|
|
180
180
|
let messages = this.messageBus.drain(agent.name);
|
|
181
181
|
if (messages.length === 0) return;
|
|
182
182
|
|
|
183
|
-
this.emitOrchestratorEvent({
|
|
184
|
-
type: "agent_start",
|
|
185
|
-
agent: agent.name,
|
|
186
|
-
});
|
|
183
|
+
this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
|
|
187
184
|
await agent.runner.run(formatMessages(messages));
|
|
188
|
-
if (this
|
|
185
|
+
if (await this.#settleAgentTurn(agent)) return;
|
|
186
|
+
|
|
187
|
+
// Loop: check for new messages, resume if any
|
|
188
|
+
while (!this.ctx.concluded) {
|
|
189
|
+
messages = await this.#awaitAgentMessages(agent.name);
|
|
190
|
+
if (messages.length === 0) break;
|
|
191
|
+
await agent.runner.resume(formatMessages(messages));
|
|
192
|
+
if (await this.#settleAgentTurn(agent)) break;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Enforce pending-ask and emit turn_complete. Returns true when the
|
|
198
|
+
* session has concluded and the caller should stop.
|
|
199
|
+
*/
|
|
200
|
+
async #settleAgentTurn(agent) {
|
|
201
|
+
if (this.ctx.concluded) return true;
|
|
189
202
|
await this.#enforcePendingAsk(agent);
|
|
190
|
-
if (this.ctx.concluded) return;
|
|
203
|
+
if (this.ctx.concluded) return true;
|
|
191
204
|
this.eventQueue.enqueue({
|
|
192
205
|
type: "lifecycle",
|
|
193
206
|
agent: agent.name,
|
|
194
207
|
status: "turn_complete",
|
|
195
208
|
});
|
|
209
|
+
return false;
|
|
210
|
+
}
|
|
196
211
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
if (this.ctx.concluded) break;
|
|
211
|
-
await this.#enforcePendingAsk(agent);
|
|
212
|
-
if (this.ctx.concluded) break;
|
|
213
|
-
this.eventQueue.enqueue({
|
|
214
|
-
type: "lifecycle",
|
|
215
|
-
agent: agent.name,
|
|
216
|
-
status: "turn_complete",
|
|
217
|
-
});
|
|
218
|
-
}
|
|
212
|
+
/**
|
|
213
|
+
* Wait for messages addressed to `name`, returning an empty array when
|
|
214
|
+
* the session concludes first.
|
|
215
|
+
*/
|
|
216
|
+
async #awaitAgentMessages(name) {
|
|
217
|
+
const messages = this.messageBus.drain(name);
|
|
218
|
+
if (messages.length > 0) return messages;
|
|
219
|
+
await Promise.race([
|
|
220
|
+
this.messageBus.waitForMessages(name),
|
|
221
|
+
this.concludePromise,
|
|
222
|
+
]);
|
|
223
|
+
if (this.ctx.concluded) return [];
|
|
224
|
+
return this.messageBus.drain(name);
|
|
219
225
|
}
|
|
220
226
|
|
|
221
227
|
/**
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Turn renderer — maps a structured turn into formatted text lines.
|
|
3
|
+
*
|
|
4
|
+
* Shared by `TeeWriter.flushTurns()` (live stream) and
|
|
5
|
+
* `TraceCollector.toText()` (offline replay) so both emit identical output
|
|
6
|
+
* (spec 540).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
renderTextLine,
|
|
11
|
+
renderToolCallLine,
|
|
12
|
+
renderToolResultLine,
|
|
13
|
+
} from "./line-renderer.js";
|
|
14
|
+
import {
|
|
15
|
+
hintForCall,
|
|
16
|
+
previewForResult,
|
|
17
|
+
simplifyToolName,
|
|
18
|
+
} from "./tool-hints.js";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Render a single turn to formatted text lines.
|
|
22
|
+
*
|
|
23
|
+
* @param {object} turn - Structured turn object
|
|
24
|
+
* @param {boolean} withPrefix - Whether to include source labels
|
|
25
|
+
* @returns {string[]} Array of rendered line strings
|
|
26
|
+
*/
|
|
27
|
+
export function renderTurnLines(turn, withPrefix) {
|
|
28
|
+
if (turn.role === "assistant") return renderAssistantTurn(turn, withPrefix);
|
|
29
|
+
if (turn.role === "tool_result")
|
|
30
|
+
return renderToolResultTurn(turn, withPrefix);
|
|
31
|
+
if (turn.role === "system") return renderSystemTurn(turn, withPrefix);
|
|
32
|
+
if (turn.role === "user") return renderUserTurn(turn, withPrefix);
|
|
33
|
+
return [];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
|
|
37
|
+
function renderAssistantTurn(turn, withPrefix) {
|
|
38
|
+
const lines = [];
|
|
39
|
+
for (const block of turn.content) {
|
|
40
|
+
if (block.type === "text") {
|
|
41
|
+
lines.push(
|
|
42
|
+
renderTextLine({ source: turn.source, text: block.text, withPrefix }),
|
|
43
|
+
);
|
|
44
|
+
} else if (block.type === "tool_use") {
|
|
45
|
+
lines.push(
|
|
46
|
+
renderToolCallLine({
|
|
47
|
+
source: turn.source,
|
|
48
|
+
toolName: simplifyToolName(block.name),
|
|
49
|
+
hint: hintForCall(block.name, block.input),
|
|
50
|
+
withPrefix,
|
|
51
|
+
}),
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return lines;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
|
|
59
|
+
function renderToolResultTurn(turn, withPrefix) {
|
|
60
|
+
return [
|
|
61
|
+
renderToolResultLine({
|
|
62
|
+
source: turn.source,
|
|
63
|
+
preview: previewForResult(turn.content, turn.isError),
|
|
64
|
+
withPrefix,
|
|
65
|
+
}),
|
|
66
|
+
];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
|
|
70
|
+
function renderSystemTurn(turn, withPrefix) {
|
|
71
|
+
const label = turn.subtype ?? "system";
|
|
72
|
+
return [
|
|
73
|
+
renderTextLine({ source: turn.source, text: `[${label}]`, withPrefix }),
|
|
74
|
+
];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
|
|
78
|
+
function renderUserTurn(turn, withPrefix) {
|
|
79
|
+
const lines = [];
|
|
80
|
+
for (const block of turn.content) {
|
|
81
|
+
if (block.type === "text") {
|
|
82
|
+
lines.push(
|
|
83
|
+
renderTextLine({
|
|
84
|
+
source: turn.source,
|
|
85
|
+
text: `[user] ${block.text}`,
|
|
86
|
+
withPrefix,
|
|
87
|
+
}),
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return lines;
|
|
92
|
+
}
|
package/src/supervisor.js
CHANGED
|
@@ -172,39 +172,26 @@ export class Supervisor {
|
|
|
172
172
|
: await this.agentRunner.run(relay);
|
|
173
173
|
agentCalled = true;
|
|
174
174
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
175
|
+
const outcome = this.#classifyAgentOutcome(
|
|
176
|
+
agentResult,
|
|
177
|
+
turn,
|
|
178
|
+
interventions,
|
|
179
|
+
);
|
|
179
180
|
|
|
180
|
-
if (
|
|
181
|
-
|
|
182
|
-
success: true,
|
|
183
|
-
turns: turn,
|
|
184
|
-
summary: this.ctx.summary,
|
|
185
|
-
});
|
|
186
|
-
return { exit: { success: true, turns: turn } };
|
|
187
|
-
}
|
|
181
|
+
if (outcome.type === "exit") return { exit: outcome.exit };
|
|
182
|
+
if (outcome.type === "intervention_limit") return { exit: null };
|
|
188
183
|
|
|
189
|
-
if (
|
|
184
|
+
if (outcome.type === "redirect") {
|
|
190
185
|
interventions++;
|
|
191
|
-
|
|
192
|
-
this.ctx.redirect = null;
|
|
193
|
-
if (interventions >= MAX_INTERVENTIONS_PER_TURN) {
|
|
194
|
-
this.emitOrchestratorEvent({ type: "intervention_limit", turn });
|
|
195
|
-
return { exit: null };
|
|
196
|
-
}
|
|
197
|
-
relay = redirect.message;
|
|
186
|
+
relay = outcome.relay;
|
|
198
187
|
this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
|
|
199
188
|
continue;
|
|
200
189
|
}
|
|
201
190
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
continue;
|
|
207
|
-
}
|
|
191
|
+
const askRelay = this.#drainAgentAskRelay();
|
|
192
|
+
if (askRelay) {
|
|
193
|
+
relay = askRelay;
|
|
194
|
+
continue;
|
|
208
195
|
}
|
|
209
196
|
|
|
210
197
|
return { exit: null };
|
|
@@ -214,6 +201,50 @@ export class Supervisor {
|
|
|
214
201
|
}
|
|
215
202
|
}
|
|
216
203
|
|
|
204
|
+
/**
|
|
205
|
+
* Classify the outcome of a single agent execution within #runAgentTurn.
|
|
206
|
+
* @returns {{type: string, exit?: object|null, relay?: string}}
|
|
207
|
+
*/
|
|
208
|
+
#classifyAgentOutcome(agentResult, turn, interventions) {
|
|
209
|
+
if (agentResult.error && !agentResult.aborted) {
|
|
210
|
+
this.emitSummary({ success: false, turns: turn });
|
|
211
|
+
return { type: "exit", exit: { success: false, turns: turn } };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (this.ctx.concluded) {
|
|
215
|
+
this.emitSummary({
|
|
216
|
+
success: true,
|
|
217
|
+
turns: turn,
|
|
218
|
+
summary: this.ctx.summary,
|
|
219
|
+
});
|
|
220
|
+
return { type: "exit", exit: { success: true, turns: turn } };
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (agentResult.aborted && this.ctx.redirect) {
|
|
224
|
+
const redirect = this.ctx.redirect;
|
|
225
|
+
this.ctx.redirect = null;
|
|
226
|
+
if (interventions + 1 >= MAX_INTERVENTIONS_PER_TURN) {
|
|
227
|
+
this.emitOrchestratorEvent({ type: "intervention_limit", turn });
|
|
228
|
+
return { type: "intervention_limit" };
|
|
229
|
+
}
|
|
230
|
+
return { type: "redirect", relay: redirect.message };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return { type: "continue" };
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* If the agent has an unanswered ask, drain reminders and return a
|
|
238
|
+
* formatted relay string. Returns null when no relay is needed.
|
|
239
|
+
* @returns {string|null}
|
|
240
|
+
*/
|
|
241
|
+
#drainAgentAskRelay() {
|
|
242
|
+
if (this.#checkAsk("agent") !== "recheck" || this.ctx.concluded)
|
|
243
|
+
return null;
|
|
244
|
+
const reminders = this.messageBus.drain("agent");
|
|
245
|
+
return reminders.length > 0 ? formatMessages(reminders) : null;
|
|
246
|
+
}
|
|
247
|
+
|
|
217
248
|
/**
|
|
218
249
|
* Mid-turn supervisor review fired from inside the agent's onBatch hook.
|
|
219
250
|
* Runs the supervisor's LLM against the batch and aborts the agent if
|
package/src/tee-writer.js
CHANGED
|
@@ -17,16 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
import { Writable } from "node:stream";
|
|
19
19
|
import { TraceCollector } from "./trace-collector.js";
|
|
20
|
-
import {
|
|
21
|
-
renderTextLine,
|
|
22
|
-
renderToolCallLine,
|
|
23
|
-
renderToolResultLine,
|
|
24
|
-
} from "./render/line-renderer.js";
|
|
25
|
-
import {
|
|
26
|
-
hintForCall,
|
|
27
|
-
previewForResult,
|
|
28
|
-
simplifyToolName,
|
|
29
|
-
} from "./render/tool-hints.js";
|
|
20
|
+
import { renderTurnLines } from "./render/turn-renderer.js";
|
|
30
21
|
import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
|
|
31
22
|
|
|
32
23
|
export class TeeWriter extends Writable {
|
|
@@ -134,56 +125,8 @@ export class TeeWriter extends Writable {
|
|
|
134
125
|
const withPrefix = this.mode !== "raw";
|
|
135
126
|
while (this.turnsEmitted < turns.length) {
|
|
136
127
|
const turn = turns[this.turnsEmitted++];
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if (block.type === "text") {
|
|
140
|
-
this.textStream.write(
|
|
141
|
-
renderTextLine({
|
|
142
|
-
source: turn.source,
|
|
143
|
-
text: block.text,
|
|
144
|
-
withPrefix,
|
|
145
|
-
}),
|
|
146
|
-
);
|
|
147
|
-
} else if (block.type === "tool_use") {
|
|
148
|
-
this.textStream.write(
|
|
149
|
-
renderToolCallLine({
|
|
150
|
-
source: turn.source,
|
|
151
|
-
toolName: simplifyToolName(block.name),
|
|
152
|
-
hint: hintForCall(block.name, block.input),
|
|
153
|
-
withPrefix,
|
|
154
|
-
}),
|
|
155
|
-
);
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
} else if (turn.role === "tool_result") {
|
|
159
|
-
this.textStream.write(
|
|
160
|
-
renderToolResultLine({
|
|
161
|
-
source: turn.source,
|
|
162
|
-
preview: previewForResult(turn.content, turn.isError),
|
|
163
|
-
withPrefix,
|
|
164
|
-
}),
|
|
165
|
-
);
|
|
166
|
-
} else if (turn.role === "system") {
|
|
167
|
-
const label = turn.subtype ?? "system";
|
|
168
|
-
this.textStream.write(
|
|
169
|
-
renderTextLine({
|
|
170
|
-
source: turn.source,
|
|
171
|
-
text: `[${label}]`,
|
|
172
|
-
withPrefix,
|
|
173
|
-
}),
|
|
174
|
-
);
|
|
175
|
-
} else if (turn.role === "user") {
|
|
176
|
-
for (const block of turn.content) {
|
|
177
|
-
if (block.type === "text") {
|
|
178
|
-
this.textStream.write(
|
|
179
|
-
renderTextLine({
|
|
180
|
-
source: turn.source,
|
|
181
|
-
text: `[user] ${block.text}`,
|
|
182
|
-
withPrefix,
|
|
183
|
-
}),
|
|
184
|
-
);
|
|
185
|
-
}
|
|
186
|
-
}
|
|
128
|
+
for (const line of renderTurnLines(turn, withPrefix)) {
|
|
129
|
+
this.textStream.write(line);
|
|
187
130
|
}
|
|
188
131
|
}
|
|
189
132
|
}
|
package/src/trace-collector.js
CHANGED
|
@@ -9,16 +9,7 @@
|
|
|
9
9
|
* one formatting path (spec 540).
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import {
|
|
13
|
-
renderTextLine,
|
|
14
|
-
renderToolCallLine,
|
|
15
|
-
renderToolResultLine,
|
|
16
|
-
} from "./render/line-renderer.js";
|
|
17
|
-
import {
|
|
18
|
-
hintForCall,
|
|
19
|
-
previewForResult,
|
|
20
|
-
simplifyToolName,
|
|
21
|
-
} from "./render/tool-hints.js";
|
|
12
|
+
import { renderTurnLines } from "./render/turn-renderer.js";
|
|
22
13
|
import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
|
|
23
14
|
|
|
24
15
|
export class TraceCollector {
|
|
@@ -270,68 +261,10 @@ export class TraceCollector {
|
|
|
270
261
|
const out = [];
|
|
271
262
|
|
|
272
263
|
for (const turn of this.turns) {
|
|
273
|
-
|
|
274
|
-
for (const block of turn.content) {
|
|
275
|
-
if (block.type === "text") {
|
|
276
|
-
out.push(
|
|
277
|
-
renderTextLine({
|
|
278
|
-
source: turn.source,
|
|
279
|
-
text: block.text,
|
|
280
|
-
withPrefix,
|
|
281
|
-
}),
|
|
282
|
-
);
|
|
283
|
-
} else if (block.type === "tool_use") {
|
|
284
|
-
out.push(
|
|
285
|
-
renderToolCallLine({
|
|
286
|
-
source: turn.source,
|
|
287
|
-
toolName: simplifyToolName(block.name),
|
|
288
|
-
hint: hintForCall(block.name, block.input),
|
|
289
|
-
withPrefix,
|
|
290
|
-
}),
|
|
291
|
-
);
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
} else if (turn.role === "tool_result") {
|
|
295
|
-
out.push(
|
|
296
|
-
renderToolResultLine({
|
|
297
|
-
source: turn.source,
|
|
298
|
-
preview: previewForResult(turn.content, turn.isError),
|
|
299
|
-
withPrefix,
|
|
300
|
-
}),
|
|
301
|
-
);
|
|
302
|
-
} else if (turn.role === "system") {
|
|
303
|
-
const label = turn.subtype ?? "system";
|
|
304
|
-
out.push(
|
|
305
|
-
renderTextLine({
|
|
306
|
-
source: turn.source,
|
|
307
|
-
text: `[${label}]`,
|
|
308
|
-
withPrefix,
|
|
309
|
-
}),
|
|
310
|
-
);
|
|
311
|
-
} else if (turn.role === "user") {
|
|
312
|
-
for (const block of turn.content) {
|
|
313
|
-
if (block.type === "text") {
|
|
314
|
-
out.push(
|
|
315
|
-
renderTextLine({
|
|
316
|
-
source: turn.source,
|
|
317
|
-
text: `[user] ${block.text}`,
|
|
318
|
-
withPrefix,
|
|
319
|
-
}),
|
|
320
|
-
);
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
}
|
|
264
|
+
out.push(...renderTurnLines(turn, withPrefix));
|
|
324
265
|
}
|
|
325
266
|
|
|
326
|
-
|
|
327
|
-
let tail = "";
|
|
328
|
-
if (this.result) {
|
|
329
|
-
const duration = formatDuration(this.result.durationMs);
|
|
330
|
-
const cost = Number(this.result.totalCostUsd).toFixed(4);
|
|
331
|
-
tail =
|
|
332
|
-
"\n" +
|
|
333
|
-
`--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`;
|
|
334
|
-
}
|
|
267
|
+
const tail = this.#formatResultTail();
|
|
335
268
|
|
|
336
269
|
// Each rendered line already ends with `\n`; concatenate, drop the
|
|
337
270
|
// trailing newline, then append the tail so the output shape stays
|
|
@@ -341,6 +274,20 @@ export class TraceCollector {
|
|
|
341
274
|
const body = out.join("").replace(/\n$/, "");
|
|
342
275
|
return body + tail;
|
|
343
276
|
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Format the trailing result summary line (spec 540).
|
|
280
|
+
* @returns {string}
|
|
281
|
+
*/
|
|
282
|
+
#formatResultTail() {
|
|
283
|
+
if (!this.result) return "";
|
|
284
|
+
const duration = formatDuration(this.result.durationMs);
|
|
285
|
+
const cost = Number(this.result.totalCostUsd).toFixed(4);
|
|
286
|
+
return (
|
|
287
|
+
"\n" +
|
|
288
|
+
`--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`
|
|
289
|
+
);
|
|
290
|
+
}
|
|
344
291
|
}
|
|
345
292
|
|
|
346
293
|
/**
|
package/src/trace-github.js
CHANGED
|
@@ -48,7 +48,6 @@ export class TraceGitHub {
|
|
|
48
48
|
const data = await this.#get(url);
|
|
49
49
|
const runs = data.workflow_runs ?? [];
|
|
50
50
|
|
|
51
|
-
// eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
|
|
52
51
|
const re = new RegExp(pattern, "i");
|
|
53
52
|
return runs
|
|
54
53
|
.filter((r) => re.test(r.name))
|
package/src/trace-query.js
CHANGED
|
@@ -81,24 +81,12 @@ export class TraceQuery {
|
|
|
81
81
|
*/
|
|
82
82
|
filter(opts = {}) {
|
|
83
83
|
const { role, toolName, isError } = opts;
|
|
84
|
-
return this.turns.filter(
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if (toolName !== undefined) {
|
|
91
|
-
if (turn.role === "assistant") {
|
|
92
|
-
const has = turn.content.some(
|
|
93
|
-
(b) => b.type === "tool_use" && b.name === toolName,
|
|
94
|
-
);
|
|
95
|
-
if (!has) return false;
|
|
96
|
-
} else {
|
|
97
|
-
return false;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
return true;
|
|
101
|
-
});
|
|
84
|
+
return this.turns.filter(
|
|
85
|
+
(turn) =>
|
|
86
|
+
matchesRole(turn, role) &&
|
|
87
|
+
matchesError(turn, isError) &&
|
|
88
|
+
matchesToolName(turn, toolName),
|
|
89
|
+
);
|
|
102
90
|
}
|
|
103
91
|
|
|
104
92
|
/** @returns {number} */
|
|
@@ -151,7 +139,6 @@ export class TraceQuery {
|
|
|
151
139
|
*/
|
|
152
140
|
search(pattern, opts = {}) {
|
|
153
141
|
const { context = 0, limit = 50, full = false } = opts;
|
|
154
|
-
// eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
|
|
155
142
|
const re = new RegExp(pattern, "gi");
|
|
156
143
|
const hits = [];
|
|
157
144
|
|
|
@@ -200,30 +187,18 @@ export class TraceQuery {
|
|
|
200
187
|
* @returns {object[]}
|
|
201
188
|
*/
|
|
202
189
|
tool(name) {
|
|
203
|
-
const toolUseIds =
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
toolUseIds.add(b.toolUseId);
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
} else if (
|
|
220
|
-
turn.role === "tool_result" &&
|
|
221
|
-
toolUseIds.has(turn.toolUseId)
|
|
222
|
-
) {
|
|
223
|
-
results.push(turn);
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
return results;
|
|
190
|
+
const toolUseIds = collectToolUseIds(this.turns, name);
|
|
191
|
+
const assistantTurns = this.turns.filter(
|
|
192
|
+
(t) =>
|
|
193
|
+
t.role === "assistant" &&
|
|
194
|
+
t.content.some((b) => b.type === "tool_use" && b.name === name),
|
|
195
|
+
);
|
|
196
|
+
const resultTurns = this.turns.filter(
|
|
197
|
+
(t) => t.role === "tool_result" && toolUseIds.has(t.toolUseId),
|
|
198
|
+
);
|
|
199
|
+
return [...assistantTurns, ...resultTurns].sort(
|
|
200
|
+
(a, b) => a.index - b.index,
|
|
201
|
+
);
|
|
227
202
|
}
|
|
228
203
|
|
|
229
204
|
/**
|
|
@@ -343,6 +318,57 @@ export class TraceQuery {
|
|
|
343
318
|
}
|
|
344
319
|
}
|
|
345
320
|
|
|
321
|
+
/**
|
|
322
|
+
* @param {object} turn
|
|
323
|
+
* @param {string|undefined} role
|
|
324
|
+
* @returns {boolean}
|
|
325
|
+
*/
|
|
326
|
+
function matchesRole(turn, role) {
|
|
327
|
+
return role === undefined || turn.role === role;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* @param {object} turn
|
|
332
|
+
* @param {boolean|undefined} isError
|
|
333
|
+
* @returns {boolean}
|
|
334
|
+
*/
|
|
335
|
+
function matchesError(turn, isError) {
|
|
336
|
+
if (isError === undefined) return true;
|
|
337
|
+
return turn.role === "tool_result" && turn.isError === isError;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* @param {object} turn
|
|
342
|
+
* @param {string|undefined} toolName
|
|
343
|
+
* @returns {boolean}
|
|
344
|
+
*/
|
|
345
|
+
function matchesToolName(turn, toolName) {
|
|
346
|
+
if (toolName === undefined) return true;
|
|
347
|
+
return (
|
|
348
|
+
turn.role === "assistant" &&
|
|
349
|
+
turn.content.some((b) => b.type === "tool_use" && b.name === toolName)
|
|
350
|
+
);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Collect all toolUseIds for a given tool name from assistant turns.
|
|
355
|
+
* @param {object[]} turns
|
|
356
|
+
* @param {string} name
|
|
357
|
+
* @returns {Set<string>}
|
|
358
|
+
*/
|
|
359
|
+
function collectToolUseIds(turns, name) {
|
|
360
|
+
const ids = new Set();
|
|
361
|
+
for (const turn of turns) {
|
|
362
|
+
if (turn.role !== "assistant") continue;
|
|
363
|
+
for (const b of turn.content) {
|
|
364
|
+
if (b.type === "tool_use" && b.name === name && b.toolUseId) {
|
|
365
|
+
ids.add(b.toolUseId);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return ids;
|
|
370
|
+
}
|
|
371
|
+
|
|
346
372
|
/**
|
|
347
373
|
* Search a single turn for regex matches. Returns array of match descriptions.
|
|
348
374
|
* @param {object} turn
|