@forwardimpact/libeval 0.1.26 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +13 -7
- package/src/agent-runner.js +17 -2
- package/src/commands/facilitate.js +4 -0
- package/src/commands/run.js +4 -0
- package/src/commands/supervise.js +4 -0
- package/src/facilitator.js +34 -28
- package/src/render/turn-renderer.js +92 -0
- package/src/supervisor.js +57 -26
- package/src/tee-writer.js +3 -60
- package/src/trace-collector.js +17 -70
- package/src/trace-github.js +0 -1
- package/src/trace-query.js +69 -43
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forwardimpact/libeval",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.27",
|
|
4
4
|
"description": "Agent evaluation: collect Claude Code traces, run agent loops, supervise multi-step workflows.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"eval",
|
|
@@ -9,6 +9,14 @@
|
|
|
9
9
|
"claude-code",
|
|
10
10
|
"supervisor"
|
|
11
11
|
],
|
|
12
|
+
"homepage": "https://www.forwardimpact.team",
|
|
13
|
+
"repository": {
|
|
14
|
+
"type": "git",
|
|
15
|
+
"url": "git+https://github.com/forwardimpact/monorepo.git",
|
|
16
|
+
"directory": "libraries/libeval"
|
|
17
|
+
},
|
|
18
|
+
"license": "Apache-2.0",
|
|
19
|
+
"author": "D. Olsson <hi@senzilla.io>",
|
|
12
20
|
"forwardimpact": {
|
|
13
21
|
"capability": "agent-self-improvement",
|
|
14
22
|
"needs": [
|
|
@@ -17,8 +25,6 @@
|
|
|
17
25
|
"Supervise a multi-step or multi-agent workflow"
|
|
18
26
|
]
|
|
19
27
|
},
|
|
20
|
-
"license": "Apache-2.0",
|
|
21
|
-
"author": "D. Olsson <hi@senzilla.io>",
|
|
22
28
|
"type": "module",
|
|
23
29
|
"main": "./src/index.js",
|
|
24
30
|
"exports": {
|
|
@@ -35,10 +41,6 @@
|
|
|
35
41
|
"bin/**/*.js",
|
|
36
42
|
"README.md"
|
|
37
43
|
],
|
|
38
|
-
"engines": {
|
|
39
|
-
"bun": ">=1.2.0",
|
|
40
|
-
"node": ">=18.0.0"
|
|
41
|
-
},
|
|
42
44
|
"scripts": {
|
|
43
45
|
"test": "bun test test/*.test.js"
|
|
44
46
|
},
|
|
@@ -52,6 +54,10 @@
|
|
|
52
54
|
"devDependencies": {
|
|
53
55
|
"@forwardimpact/libharness": "^0.1.14"
|
|
54
56
|
},
|
|
57
|
+
"engines": {
|
|
58
|
+
"bun": ">=1.2.0",
|
|
59
|
+
"node": ">=18.0.0"
|
|
60
|
+
},
|
|
55
61
|
"publishConfig": {
|
|
56
62
|
"access": "public"
|
|
57
63
|
}
|
package/src/agent-runner.js
CHANGED
|
@@ -211,8 +211,9 @@ export class AgentRunner {
|
|
|
211
211
|
if (message.type === "system" && message.subtype === "init") {
|
|
212
212
|
this.sessionId = message.session_id;
|
|
213
213
|
}
|
|
214
|
-
if (message.type === "assistant"
|
|
215
|
-
state.assistantTextCount++;
|
|
214
|
+
if (message.type === "assistant") {
|
|
215
|
+
if (hasTextBlock(message)) state.assistantTextCount++;
|
|
216
|
+
trackSkillInvocation(message);
|
|
216
217
|
}
|
|
217
218
|
}
|
|
218
219
|
|
|
@@ -293,6 +294,20 @@ export function hasTextBlock(message) {
|
|
|
293
294
|
return false;
|
|
294
295
|
}
|
|
295
296
|
|
|
297
|
+
function trackSkillInvocation(message) {
|
|
298
|
+
const content = message.message?.content ?? message.content;
|
|
299
|
+
if (!Array.isArray(content)) return;
|
|
300
|
+
for (const block of content) {
|
|
301
|
+
if (
|
|
302
|
+
block.type === "tool_use" &&
|
|
303
|
+
block.name === "Skill" &&
|
|
304
|
+
block.input?.skill
|
|
305
|
+
) {
|
|
306
|
+
process.env.LIBEVAL_SKILL = block.input.skill;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
296
311
|
/**
|
|
297
312
|
* Factory function — wires real dependencies.
|
|
298
313
|
* @param {object} deps - Same as AgentRunner constructor
|
|
@@ -73,6 +73,10 @@ export async function runFacilitateCommand(values, _args) {
|
|
|
73
73
|
})
|
|
74
74
|
: process.stdout;
|
|
75
75
|
|
|
76
|
+
if (opts.facilitatorProfile) {
|
|
77
|
+
process.env.LIBEVAL_AGENT_PROFILE = opts.facilitatorProfile;
|
|
78
|
+
}
|
|
79
|
+
|
|
76
80
|
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
77
81
|
const facilitator = createFacilitator({
|
|
78
82
|
facilitatorCwd: opts.facilitatorCwd,
|
package/src/commands/run.js
CHANGED
|
@@ -78,6 +78,10 @@ export async function runRunCommand(values, _args) {
|
|
|
78
78
|
);
|
|
79
79
|
};
|
|
80
80
|
|
|
81
|
+
if (agentProfile) {
|
|
82
|
+
process.env.LIBEVAL_AGENT_PROFILE = agentProfile;
|
|
83
|
+
}
|
|
84
|
+
|
|
81
85
|
const systemPrompt = agentProfile
|
|
82
86
|
? composeProfilePrompt(agentProfile, {
|
|
83
87
|
profilesDir: resolve(cwd, ".claude/agents"),
|
|
@@ -71,6 +71,10 @@ export async function runSuperviseCommand(values, _args) {
|
|
|
71
71
|
})
|
|
72
72
|
: process.stdout;
|
|
73
73
|
|
|
74
|
+
if (opts.agentProfile) {
|
|
75
|
+
process.env.LIBEVAL_AGENT_PROFILE = opts.agentProfile;
|
|
76
|
+
}
|
|
77
|
+
|
|
74
78
|
const { query } = await import("@anthropic-ai/claude-agent-sdk");
|
|
75
79
|
const supervisor = createSupervisor({
|
|
76
80
|
supervisorCwd: opts.supervisorCwd,
|
package/src/facilitator.js
CHANGED
|
@@ -180,42 +180,48 @@ export class Facilitator {
|
|
|
180
180
|
let messages = this.messageBus.drain(agent.name);
|
|
181
181
|
if (messages.length === 0) return;
|
|
182
182
|
|
|
183
|
-
this.emitOrchestratorEvent({
|
|
184
|
-
type: "agent_start",
|
|
185
|
-
agent: agent.name,
|
|
186
|
-
});
|
|
183
|
+
this.emitOrchestratorEvent({ type: "agent_start", agent: agent.name });
|
|
187
184
|
await agent.runner.run(formatMessages(messages));
|
|
188
|
-
if (this
|
|
185
|
+
if (await this.#settleAgentTurn(agent)) return;
|
|
186
|
+
|
|
187
|
+
// Loop: check for new messages, resume if any
|
|
188
|
+
while (!this.ctx.concluded) {
|
|
189
|
+
messages = await this.#awaitAgentMessages(agent.name);
|
|
190
|
+
if (messages.length === 0) break;
|
|
191
|
+
await agent.runner.resume(formatMessages(messages));
|
|
192
|
+
if (await this.#settleAgentTurn(agent)) break;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Enforce pending-ask and emit turn_complete. Returns true when the
|
|
198
|
+
* session has concluded and the caller should stop.
|
|
199
|
+
*/
|
|
200
|
+
async #settleAgentTurn(agent) {
|
|
201
|
+
if (this.ctx.concluded) return true;
|
|
189
202
|
await this.#enforcePendingAsk(agent);
|
|
190
|
-
if (this.ctx.concluded) return;
|
|
203
|
+
if (this.ctx.concluded) return true;
|
|
191
204
|
this.eventQueue.enqueue({
|
|
192
205
|
type: "lifecycle",
|
|
193
206
|
agent: agent.name,
|
|
194
207
|
status: "turn_complete",
|
|
195
208
|
});
|
|
209
|
+
return false;
|
|
210
|
+
}
|
|
196
211
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
if (this.ctx.concluded) break;
|
|
211
|
-
await this.#enforcePendingAsk(agent);
|
|
212
|
-
if (this.ctx.concluded) break;
|
|
213
|
-
this.eventQueue.enqueue({
|
|
214
|
-
type: "lifecycle",
|
|
215
|
-
agent: agent.name,
|
|
216
|
-
status: "turn_complete",
|
|
217
|
-
});
|
|
218
|
-
}
|
|
212
|
+
/**
|
|
213
|
+
* Wait for messages addressed to `name`, returning an empty array when
|
|
214
|
+
* the session concludes first.
|
|
215
|
+
*/
|
|
216
|
+
async #awaitAgentMessages(name) {
|
|
217
|
+
const messages = this.messageBus.drain(name);
|
|
218
|
+
if (messages.length > 0) return messages;
|
|
219
|
+
await Promise.race([
|
|
220
|
+
this.messageBus.waitForMessages(name),
|
|
221
|
+
this.concludePromise,
|
|
222
|
+
]);
|
|
223
|
+
if (this.ctx.concluded) return [];
|
|
224
|
+
return this.messageBus.drain(name);
|
|
219
225
|
}
|
|
220
226
|
|
|
221
227
|
/**
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Turn renderer — maps a structured turn into formatted text lines.
|
|
3
|
+
*
|
|
4
|
+
* Shared by `TeeWriter.flushTurns()` (live stream) and
|
|
5
|
+
* `TraceCollector.toText()` (offline replay) so both emit identical output
|
|
6
|
+
* (spec 540).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
renderTextLine,
|
|
11
|
+
renderToolCallLine,
|
|
12
|
+
renderToolResultLine,
|
|
13
|
+
} from "./line-renderer.js";
|
|
14
|
+
import {
|
|
15
|
+
hintForCall,
|
|
16
|
+
previewForResult,
|
|
17
|
+
simplifyToolName,
|
|
18
|
+
} from "./tool-hints.js";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Render a single turn to formatted text lines.
|
|
22
|
+
*
|
|
23
|
+
* @param {object} turn - Structured turn object
|
|
24
|
+
* @param {boolean} withPrefix - Whether to include source labels
|
|
25
|
+
* @returns {string[]} Array of rendered line strings
|
|
26
|
+
*/
|
|
27
|
+
export function renderTurnLines(turn, withPrefix) {
|
|
28
|
+
if (turn.role === "assistant") return renderAssistantTurn(turn, withPrefix);
|
|
29
|
+
if (turn.role === "tool_result")
|
|
30
|
+
return renderToolResultTurn(turn, withPrefix);
|
|
31
|
+
if (turn.role === "system") return renderSystemTurn(turn, withPrefix);
|
|
32
|
+
if (turn.role === "user") return renderUserTurn(turn, withPrefix);
|
|
33
|
+
return [];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
|
|
37
|
+
function renderAssistantTurn(turn, withPrefix) {
|
|
38
|
+
const lines = [];
|
|
39
|
+
for (const block of turn.content) {
|
|
40
|
+
if (block.type === "text") {
|
|
41
|
+
lines.push(
|
|
42
|
+
renderTextLine({ source: turn.source, text: block.text, withPrefix }),
|
|
43
|
+
);
|
|
44
|
+
} else if (block.type === "tool_use") {
|
|
45
|
+
lines.push(
|
|
46
|
+
renderToolCallLine({
|
|
47
|
+
source: turn.source,
|
|
48
|
+
toolName: simplifyToolName(block.name),
|
|
49
|
+
hint: hintForCall(block.name, block.input),
|
|
50
|
+
withPrefix,
|
|
51
|
+
}),
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return lines;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
|
|
59
|
+
function renderToolResultTurn(turn, withPrefix) {
|
|
60
|
+
return [
|
|
61
|
+
renderToolResultLine({
|
|
62
|
+
source: turn.source,
|
|
63
|
+
preview: previewForResult(turn.content, turn.isError),
|
|
64
|
+
withPrefix,
|
|
65
|
+
}),
|
|
66
|
+
];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
|
|
70
|
+
function renderSystemTurn(turn, withPrefix) {
|
|
71
|
+
const label = turn.subtype ?? "system";
|
|
72
|
+
return [
|
|
73
|
+
renderTextLine({ source: turn.source, text: `[${label}]`, withPrefix }),
|
|
74
|
+
];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** @param {object} turn @param {boolean} withPrefix @returns {string[]} */
|
|
78
|
+
function renderUserTurn(turn, withPrefix) {
|
|
79
|
+
const lines = [];
|
|
80
|
+
for (const block of turn.content) {
|
|
81
|
+
if (block.type === "text") {
|
|
82
|
+
lines.push(
|
|
83
|
+
renderTextLine({
|
|
84
|
+
source: turn.source,
|
|
85
|
+
text: `[user] ${block.text}`,
|
|
86
|
+
withPrefix,
|
|
87
|
+
}),
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return lines;
|
|
92
|
+
}
|
package/src/supervisor.js
CHANGED
|
@@ -172,39 +172,26 @@ export class Supervisor {
|
|
|
172
172
|
: await this.agentRunner.run(relay);
|
|
173
173
|
agentCalled = true;
|
|
174
174
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
175
|
+
const outcome = this.#classifyAgentOutcome(
|
|
176
|
+
agentResult,
|
|
177
|
+
turn,
|
|
178
|
+
interventions,
|
|
179
|
+
);
|
|
179
180
|
|
|
180
|
-
if (
|
|
181
|
-
|
|
182
|
-
success: true,
|
|
183
|
-
turns: turn,
|
|
184
|
-
summary: this.ctx.summary,
|
|
185
|
-
});
|
|
186
|
-
return { exit: { success: true, turns: turn } };
|
|
187
|
-
}
|
|
181
|
+
if (outcome.type === "exit") return { exit: outcome.exit };
|
|
182
|
+
if (outcome.type === "intervention_limit") return { exit: null };
|
|
188
183
|
|
|
189
|
-
if (
|
|
184
|
+
if (outcome.type === "redirect") {
|
|
190
185
|
interventions++;
|
|
191
|
-
|
|
192
|
-
this.ctx.redirect = null;
|
|
193
|
-
if (interventions >= MAX_INTERVENTIONS_PER_TURN) {
|
|
194
|
-
this.emitOrchestratorEvent({ type: "intervention_limit", turn });
|
|
195
|
-
return { exit: null };
|
|
196
|
-
}
|
|
197
|
-
relay = redirect.message;
|
|
186
|
+
relay = outcome.relay;
|
|
198
187
|
this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
|
|
199
188
|
continue;
|
|
200
189
|
}
|
|
201
190
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
continue;
|
|
207
|
-
}
|
|
191
|
+
const askRelay = this.#drainAgentAskRelay();
|
|
192
|
+
if (askRelay) {
|
|
193
|
+
relay = askRelay;
|
|
194
|
+
continue;
|
|
208
195
|
}
|
|
209
196
|
|
|
210
197
|
return { exit: null };
|
|
@@ -214,6 +201,50 @@ export class Supervisor {
|
|
|
214
201
|
}
|
|
215
202
|
}
|
|
216
203
|
|
|
204
|
+
/**
|
|
205
|
+
* Classify the outcome of a single agent execution within #runAgentTurn.
|
|
206
|
+
* @returns {{type: string, exit?: object|null, relay?: string}}
|
|
207
|
+
*/
|
|
208
|
+
#classifyAgentOutcome(agentResult, turn, interventions) {
|
|
209
|
+
if (agentResult.error && !agentResult.aborted) {
|
|
210
|
+
this.emitSummary({ success: false, turns: turn });
|
|
211
|
+
return { type: "exit", exit: { success: false, turns: turn } };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (this.ctx.concluded) {
|
|
215
|
+
this.emitSummary({
|
|
216
|
+
success: true,
|
|
217
|
+
turns: turn,
|
|
218
|
+
summary: this.ctx.summary,
|
|
219
|
+
});
|
|
220
|
+
return { type: "exit", exit: { success: true, turns: turn } };
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (agentResult.aborted && this.ctx.redirect) {
|
|
224
|
+
const redirect = this.ctx.redirect;
|
|
225
|
+
this.ctx.redirect = null;
|
|
226
|
+
if (interventions + 1 >= MAX_INTERVENTIONS_PER_TURN) {
|
|
227
|
+
this.emitOrchestratorEvent({ type: "intervention_limit", turn });
|
|
228
|
+
return { type: "intervention_limit" };
|
|
229
|
+
}
|
|
230
|
+
return { type: "redirect", relay: redirect.message };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return { type: "continue" };
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* If the agent has an unanswered ask, drain reminders and return a
|
|
238
|
+
* formatted relay string. Returns null when no relay is needed.
|
|
239
|
+
* @returns {string|null}
|
|
240
|
+
*/
|
|
241
|
+
#drainAgentAskRelay() {
|
|
242
|
+
if (this.#checkAsk("agent") !== "recheck" || this.ctx.concluded)
|
|
243
|
+
return null;
|
|
244
|
+
const reminders = this.messageBus.drain("agent");
|
|
245
|
+
return reminders.length > 0 ? formatMessages(reminders) : null;
|
|
246
|
+
}
|
|
247
|
+
|
|
217
248
|
/**
|
|
218
249
|
* Mid-turn supervisor review fired from inside the agent's onBatch hook.
|
|
219
250
|
* Runs the supervisor's LLM against the batch and aborts the agent if
|
package/src/tee-writer.js
CHANGED
|
@@ -17,16 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
import { Writable } from "node:stream";
|
|
19
19
|
import { TraceCollector } from "./trace-collector.js";
|
|
20
|
-
import {
|
|
21
|
-
renderTextLine,
|
|
22
|
-
renderToolCallLine,
|
|
23
|
-
renderToolResultLine,
|
|
24
|
-
} from "./render/line-renderer.js";
|
|
25
|
-
import {
|
|
26
|
-
hintForCall,
|
|
27
|
-
previewForResult,
|
|
28
|
-
simplifyToolName,
|
|
29
|
-
} from "./render/tool-hints.js";
|
|
20
|
+
import { renderTurnLines } from "./render/turn-renderer.js";
|
|
30
21
|
import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
|
|
31
22
|
|
|
32
23
|
export class TeeWriter extends Writable {
|
|
@@ -134,56 +125,8 @@ export class TeeWriter extends Writable {
|
|
|
134
125
|
const withPrefix = this.mode !== "raw";
|
|
135
126
|
while (this.turnsEmitted < turns.length) {
|
|
136
127
|
const turn = turns[this.turnsEmitted++];
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if (block.type === "text") {
|
|
140
|
-
this.textStream.write(
|
|
141
|
-
renderTextLine({
|
|
142
|
-
source: turn.source,
|
|
143
|
-
text: block.text,
|
|
144
|
-
withPrefix,
|
|
145
|
-
}),
|
|
146
|
-
);
|
|
147
|
-
} else if (block.type === "tool_use") {
|
|
148
|
-
this.textStream.write(
|
|
149
|
-
renderToolCallLine({
|
|
150
|
-
source: turn.source,
|
|
151
|
-
toolName: simplifyToolName(block.name),
|
|
152
|
-
hint: hintForCall(block.name, block.input),
|
|
153
|
-
withPrefix,
|
|
154
|
-
}),
|
|
155
|
-
);
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
} else if (turn.role === "tool_result") {
|
|
159
|
-
this.textStream.write(
|
|
160
|
-
renderToolResultLine({
|
|
161
|
-
source: turn.source,
|
|
162
|
-
preview: previewForResult(turn.content, turn.isError),
|
|
163
|
-
withPrefix,
|
|
164
|
-
}),
|
|
165
|
-
);
|
|
166
|
-
} else if (turn.role === "system") {
|
|
167
|
-
const label = turn.subtype ?? "system";
|
|
168
|
-
this.textStream.write(
|
|
169
|
-
renderTextLine({
|
|
170
|
-
source: turn.source,
|
|
171
|
-
text: `[${label}]`,
|
|
172
|
-
withPrefix,
|
|
173
|
-
}),
|
|
174
|
-
);
|
|
175
|
-
} else if (turn.role === "user") {
|
|
176
|
-
for (const block of turn.content) {
|
|
177
|
-
if (block.type === "text") {
|
|
178
|
-
this.textStream.write(
|
|
179
|
-
renderTextLine({
|
|
180
|
-
source: turn.source,
|
|
181
|
-
text: `[user] ${block.text}`,
|
|
182
|
-
withPrefix,
|
|
183
|
-
}),
|
|
184
|
-
);
|
|
185
|
-
}
|
|
186
|
-
}
|
|
128
|
+
for (const line of renderTurnLines(turn, withPrefix)) {
|
|
129
|
+
this.textStream.write(line);
|
|
187
130
|
}
|
|
188
131
|
}
|
|
189
132
|
}
|
package/src/trace-collector.js
CHANGED
|
@@ -9,16 +9,7 @@
|
|
|
9
9
|
* one formatting path (spec 540).
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import {
|
|
13
|
-
renderTextLine,
|
|
14
|
-
renderToolCallLine,
|
|
15
|
-
renderToolResultLine,
|
|
16
|
-
} from "./render/line-renderer.js";
|
|
17
|
-
import {
|
|
18
|
-
hintForCall,
|
|
19
|
-
previewForResult,
|
|
20
|
-
simplifyToolName,
|
|
21
|
-
} from "./render/tool-hints.js";
|
|
12
|
+
import { renderTurnLines } from "./render/turn-renderer.js";
|
|
22
13
|
import { isSuppressedOrchestratorEvent } from "./render/orchestrator-filter.js";
|
|
23
14
|
|
|
24
15
|
export class TraceCollector {
|
|
@@ -270,68 +261,10 @@ export class TraceCollector {
|
|
|
270
261
|
const out = [];
|
|
271
262
|
|
|
272
263
|
for (const turn of this.turns) {
|
|
273
|
-
|
|
274
|
-
for (const block of turn.content) {
|
|
275
|
-
if (block.type === "text") {
|
|
276
|
-
out.push(
|
|
277
|
-
renderTextLine({
|
|
278
|
-
source: turn.source,
|
|
279
|
-
text: block.text,
|
|
280
|
-
withPrefix,
|
|
281
|
-
}),
|
|
282
|
-
);
|
|
283
|
-
} else if (block.type === "tool_use") {
|
|
284
|
-
out.push(
|
|
285
|
-
renderToolCallLine({
|
|
286
|
-
source: turn.source,
|
|
287
|
-
toolName: simplifyToolName(block.name),
|
|
288
|
-
hint: hintForCall(block.name, block.input),
|
|
289
|
-
withPrefix,
|
|
290
|
-
}),
|
|
291
|
-
);
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
} else if (turn.role === "tool_result") {
|
|
295
|
-
out.push(
|
|
296
|
-
renderToolResultLine({
|
|
297
|
-
source: turn.source,
|
|
298
|
-
preview: previewForResult(turn.content, turn.isError),
|
|
299
|
-
withPrefix,
|
|
300
|
-
}),
|
|
301
|
-
);
|
|
302
|
-
} else if (turn.role === "system") {
|
|
303
|
-
const label = turn.subtype ?? "system";
|
|
304
|
-
out.push(
|
|
305
|
-
renderTextLine({
|
|
306
|
-
source: turn.source,
|
|
307
|
-
text: `[${label}]`,
|
|
308
|
-
withPrefix,
|
|
309
|
-
}),
|
|
310
|
-
);
|
|
311
|
-
} else if (turn.role === "user") {
|
|
312
|
-
for (const block of turn.content) {
|
|
313
|
-
if (block.type === "text") {
|
|
314
|
-
out.push(
|
|
315
|
-
renderTextLine({
|
|
316
|
-
source: turn.source,
|
|
317
|
-
text: `[user] ${block.text}`,
|
|
318
|
-
withPrefix,
|
|
319
|
-
}),
|
|
320
|
-
);
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
}
|
|
264
|
+
out.push(...renderTurnLines(turn, withPrefix));
|
|
324
265
|
}
|
|
325
266
|
|
|
326
|
-
|
|
327
|
-
let tail = "";
|
|
328
|
-
if (this.result) {
|
|
329
|
-
const duration = formatDuration(this.result.durationMs);
|
|
330
|
-
const cost = Number(this.result.totalCostUsd).toFixed(4);
|
|
331
|
-
tail =
|
|
332
|
-
"\n" +
|
|
333
|
-
`--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`;
|
|
334
|
-
}
|
|
267
|
+
const tail = this.#formatResultTail();
|
|
335
268
|
|
|
336
269
|
// Each rendered line already ends with `\n`; concatenate, drop the
|
|
337
270
|
// trailing newline, then append the tail so the output shape stays
|
|
@@ -341,6 +274,20 @@ export class TraceCollector {
|
|
|
341
274
|
const body = out.join("").replace(/\n$/, "");
|
|
342
275
|
return body + tail;
|
|
343
276
|
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Format the trailing result summary line (spec 540).
|
|
280
|
+
* @returns {string}
|
|
281
|
+
*/
|
|
282
|
+
#formatResultTail() {
|
|
283
|
+
if (!this.result) return "";
|
|
284
|
+
const duration = formatDuration(this.result.durationMs);
|
|
285
|
+
const cost = Number(this.result.totalCostUsd).toFixed(4);
|
|
286
|
+
return (
|
|
287
|
+
"\n" +
|
|
288
|
+
`--- Result: ${this.result.result} | Turns: ${this.result.numTurns} | Cost: $${cost} | Duration: ${duration} ---`
|
|
289
|
+
);
|
|
290
|
+
}
|
|
344
291
|
}
|
|
345
292
|
|
|
346
293
|
/**
|
package/src/trace-github.js
CHANGED
|
@@ -48,7 +48,6 @@ export class TraceGitHub {
|
|
|
48
48
|
const data = await this.#get(url);
|
|
49
49
|
const runs = data.workflow_runs ?? [];
|
|
50
50
|
|
|
51
|
-
// eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
|
|
52
51
|
const re = new RegExp(pattern, "i");
|
|
53
52
|
return runs
|
|
54
53
|
.filter((r) => re.test(r.name))
|
package/src/trace-query.js
CHANGED
|
@@ -81,24 +81,12 @@ export class TraceQuery {
|
|
|
81
81
|
*/
|
|
82
82
|
filter(opts = {}) {
|
|
83
83
|
const { role, toolName, isError } = opts;
|
|
84
|
-
return this.turns.filter(
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
if (toolName !== undefined) {
|
|
91
|
-
if (turn.role === "assistant") {
|
|
92
|
-
const has = turn.content.some(
|
|
93
|
-
(b) => b.type === "tool_use" && b.name === toolName,
|
|
94
|
-
);
|
|
95
|
-
if (!has) return false;
|
|
96
|
-
} else {
|
|
97
|
-
return false;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
return true;
|
|
101
|
-
});
|
|
84
|
+
return this.turns.filter(
|
|
85
|
+
(turn) =>
|
|
86
|
+
matchesRole(turn, role) &&
|
|
87
|
+
matchesError(turn, isError) &&
|
|
88
|
+
matchesToolName(turn, toolName),
|
|
89
|
+
);
|
|
102
90
|
}
|
|
103
91
|
|
|
104
92
|
/** @returns {number} */
|
|
@@ -151,7 +139,6 @@ export class TraceQuery {
|
|
|
151
139
|
*/
|
|
152
140
|
search(pattern, opts = {}) {
|
|
153
141
|
const { context = 0, limit = 50, full = false } = opts;
|
|
154
|
-
// eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
|
|
155
142
|
const re = new RegExp(pattern, "gi");
|
|
156
143
|
const hits = [];
|
|
157
144
|
|
|
@@ -200,30 +187,18 @@ export class TraceQuery {
|
|
|
200
187
|
* @returns {object[]}
|
|
201
188
|
*/
|
|
202
189
|
tool(name) {
|
|
203
|
-
const toolUseIds =
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
toolUseIds.add(b.toolUseId);
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
} else if (
|
|
220
|
-
turn.role === "tool_result" &&
|
|
221
|
-
toolUseIds.has(turn.toolUseId)
|
|
222
|
-
) {
|
|
223
|
-
results.push(turn);
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
return results;
|
|
190
|
+
const toolUseIds = collectToolUseIds(this.turns, name);
|
|
191
|
+
const assistantTurns = this.turns.filter(
|
|
192
|
+
(t) =>
|
|
193
|
+
t.role === "assistant" &&
|
|
194
|
+
t.content.some((b) => b.type === "tool_use" && b.name === name),
|
|
195
|
+
);
|
|
196
|
+
const resultTurns = this.turns.filter(
|
|
197
|
+
(t) => t.role === "tool_result" && toolUseIds.has(t.toolUseId),
|
|
198
|
+
);
|
|
199
|
+
return [...assistantTurns, ...resultTurns].sort(
|
|
200
|
+
(a, b) => a.index - b.index,
|
|
201
|
+
);
|
|
227
202
|
}
|
|
228
203
|
|
|
229
204
|
/**
|
|
@@ -343,6 +318,57 @@ export class TraceQuery {
|
|
|
343
318
|
}
|
|
344
319
|
}
|
|
345
320
|
|
|
321
|
+
/**
|
|
322
|
+
* @param {object} turn
|
|
323
|
+
* @param {string|undefined} role
|
|
324
|
+
* @returns {boolean}
|
|
325
|
+
*/
|
|
326
|
+
function matchesRole(turn, role) {
|
|
327
|
+
return role === undefined || turn.role === role;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* @param {object} turn
|
|
332
|
+
* @param {boolean|undefined} isError
|
|
333
|
+
* @returns {boolean}
|
|
334
|
+
*/
|
|
335
|
+
function matchesError(turn, isError) {
|
|
336
|
+
if (isError === undefined) return true;
|
|
337
|
+
return turn.role === "tool_result" && turn.isError === isError;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* @param {object} turn
|
|
342
|
+
* @param {string|undefined} toolName
|
|
343
|
+
* @returns {boolean}
|
|
344
|
+
*/
|
|
345
|
+
function matchesToolName(turn, toolName) {
|
|
346
|
+
if (toolName === undefined) return true;
|
|
347
|
+
return (
|
|
348
|
+
turn.role === "assistant" &&
|
|
349
|
+
turn.content.some((b) => b.type === "tool_use" && b.name === toolName)
|
|
350
|
+
);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Collect all toolUseIds for a given tool name from assistant turns.
|
|
355
|
+
* @param {object[]} turns
|
|
356
|
+
* @param {string} name
|
|
357
|
+
* @returns {Set<string>}
|
|
358
|
+
*/
|
|
359
|
+
function collectToolUseIds(turns, name) {
|
|
360
|
+
const ids = new Set();
|
|
361
|
+
for (const turn of turns) {
|
|
362
|
+
if (turn.role !== "assistant") continue;
|
|
363
|
+
for (const b of turn.content) {
|
|
364
|
+
if (b.type === "tool_use" && b.name === name && b.toolUseId) {
|
|
365
|
+
ids.add(b.toolUseId);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return ids;
|
|
370
|
+
}
|
|
371
|
+
|
|
346
372
|
/**
|
|
347
373
|
* Search a single turn for regex matches. Returns array of match descriptions.
|
|
348
374
|
* @param {object} turn
|