@forwardimpact/libeval 0.1.15 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-eval.js +7 -4
- package/bin/fit-trace.js +198 -0
- package/package.json +7 -4
- package/src/agent-runner.js +1 -0
- package/src/commands/facilitate.js +14 -28
- package/src/commands/trace.js +149 -0
- package/src/facilitator.js +37 -17
- package/src/index.js +6 -0
- package/src/orchestration-toolkit.js +2 -2
- package/src/trace-github.js +213 -0
- package/src/trace-query.js +346 -0
package/bin/fit-eval.js
CHANGED
|
@@ -117,10 +117,13 @@ const definition = {
|
|
|
117
117
|
type: "string",
|
|
118
118
|
description: "Facilitator profile name",
|
|
119
119
|
},
|
|
120
|
-
|
|
120
|
+
"agent-profiles": {
|
|
121
121
|
type: "string",
|
|
122
|
-
description:
|
|
123
|
-
|
|
122
|
+
description: "Comma-separated agent profile names",
|
|
123
|
+
},
|
|
124
|
+
"agent-cwd": {
|
|
125
|
+
type: "string",
|
|
126
|
+
description: "Agent working directory (default: .)",
|
|
124
127
|
},
|
|
125
128
|
},
|
|
126
129
|
},
|
|
@@ -135,7 +138,7 @@ const definition = {
|
|
|
135
138
|
"fit-eval output --format=text < trace.ndjson",
|
|
136
139
|
"fit-eval run --task-file=task.md --model=opus",
|
|
137
140
|
"fit-eval supervise --task-file=task.md --supervisor-cwd=.",
|
|
138
|
-
'fit-eval facilitate --task-file=task.md --
|
|
141
|
+
'fit-eval facilitate --task-file=task.md --agent-profiles "security-engineer,technical-writer"',
|
|
139
142
|
],
|
|
140
143
|
};
|
|
141
144
|
|
package/bin/fit-trace.js
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
import { createCli } from "@forwardimpact/libcli";
|
|
5
|
+
import { createLogger } from "@forwardimpact/libtelemetry";
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
runRunsCommand,
|
|
9
|
+
runDownloadCommand,
|
|
10
|
+
runOverviewCommand,
|
|
11
|
+
runCountCommand,
|
|
12
|
+
runBatchCommand,
|
|
13
|
+
runHeadCommand,
|
|
14
|
+
runTailCommand,
|
|
15
|
+
runSearchCommand,
|
|
16
|
+
runToolsCommand,
|
|
17
|
+
runToolCommand,
|
|
18
|
+
runErrorsCommand,
|
|
19
|
+
runReasoningCommand,
|
|
20
|
+
runTimelineCommand,
|
|
21
|
+
runStatsCommand,
|
|
22
|
+
} from "../src/commands/trace.js";
|
|
23
|
+
|
|
24
|
+
const { version: VERSION } = JSON.parse(
|
|
25
|
+
readFileSync(new URL("../package.json", import.meta.url), "utf8"),
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
const definition = {
|
|
29
|
+
name: "fit-trace",
|
|
30
|
+
version: VERSION,
|
|
31
|
+
description: "Download, query, and search agent execution traces",
|
|
32
|
+
commands: [
|
|
33
|
+
{
|
|
34
|
+
name: "runs",
|
|
35
|
+
args: "[pattern]",
|
|
36
|
+
description: "List recent workflow runs (default pattern: agent)",
|
|
37
|
+
options: {
|
|
38
|
+
lookback: {
|
|
39
|
+
type: "string",
|
|
40
|
+
description: "How far back to search (default: 7d)",
|
|
41
|
+
},
|
|
42
|
+
repo: {
|
|
43
|
+
type: "string",
|
|
44
|
+
description: "GitHub repo override (default: git remote)",
|
|
45
|
+
},
|
|
46
|
+
},
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
name: "download",
|
|
50
|
+
args: "<run-id>",
|
|
51
|
+
description: "Download trace artifact and convert to structured JSON",
|
|
52
|
+
options: {
|
|
53
|
+
dir: { type: "string", description: "Output directory" },
|
|
54
|
+
artifact: { type: "string", description: "Artifact name override" },
|
|
55
|
+
repo: {
|
|
56
|
+
type: "string",
|
|
57
|
+
description: "GitHub repo override (default: git remote)",
|
|
58
|
+
},
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
name: "overview",
|
|
63
|
+
args: "<file>",
|
|
64
|
+
description: "Metadata, summary, turn count, tool frequency",
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: "count",
|
|
68
|
+
args: "<file>",
|
|
69
|
+
description: "Number of turns",
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
name: "batch",
|
|
73
|
+
args: "<file> <from> <to>",
|
|
74
|
+
description: "Turns in range [from, to) (zero-indexed)",
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
name: "head",
|
|
78
|
+
args: "<file> [N]",
|
|
79
|
+
description: "First N turns (default 10)",
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
name: "tail",
|
|
83
|
+
args: "<file> [N]",
|
|
84
|
+
description: "Last N turns (default 10)",
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
name: "search",
|
|
88
|
+
args: "<file> <pattern>",
|
|
89
|
+
description: "Search all content for regex pattern",
|
|
90
|
+
options: {
|
|
91
|
+
limit: {
|
|
92
|
+
type: "string",
|
|
93
|
+
description: "Max results (default: 50)",
|
|
94
|
+
},
|
|
95
|
+
context: {
|
|
96
|
+
type: "string",
|
|
97
|
+
description: "Surrounding turns per hit (default: 0)",
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
name: "tools",
|
|
103
|
+
args: "<file>",
|
|
104
|
+
description: "Tool usage frequency (descending)",
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
name: "tool",
|
|
108
|
+
args: "<file> <name>",
|
|
109
|
+
description: "All turns involving a specific tool",
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
name: "errors",
|
|
113
|
+
args: "<file>",
|
|
114
|
+
description: "Tool results with isError=true",
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
name: "reasoning",
|
|
118
|
+
args: "<file>",
|
|
119
|
+
description: "Agent reasoning text only",
|
|
120
|
+
options: {
|
|
121
|
+
from: { type: "string", description: "Start at turn index" },
|
|
122
|
+
to: { type: "string", description: "Stop before turn index" },
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
name: "timeline",
|
|
127
|
+
args: "<file>",
|
|
128
|
+
description: "Compact one-line-per-turn overview",
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
name: "stats",
|
|
132
|
+
args: "<file>",
|
|
133
|
+
description: "Token usage and cost breakdown",
|
|
134
|
+
},
|
|
135
|
+
],
|
|
136
|
+
globalOptions: {
|
|
137
|
+
help: { type: "boolean", short: "h", description: "Show this help" },
|
|
138
|
+
version: { type: "boolean", description: "Show version" },
|
|
139
|
+
json: { type: "boolean", description: "Output help as JSON" },
|
|
140
|
+
},
|
|
141
|
+
examples: [
|
|
142
|
+
"fit-trace runs --lookback 7d",
|
|
143
|
+
"fit-trace download 24497273755",
|
|
144
|
+
"fit-trace overview structured.json",
|
|
145
|
+
"fit-trace timeline structured.json",
|
|
146
|
+
"fit-trace search structured.json 'error|fail' --context 1",
|
|
147
|
+
"fit-trace tool structured.json Bash",
|
|
148
|
+
"fit-trace batch structured.json 0 20",
|
|
149
|
+
],
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
const cli = createCli(definition);
|
|
153
|
+
const logger = createLogger("trace");
|
|
154
|
+
|
|
155
|
+
const COMMANDS = {
|
|
156
|
+
runs: runRunsCommand,
|
|
157
|
+
download: runDownloadCommand,
|
|
158
|
+
overview: runOverviewCommand,
|
|
159
|
+
count: runCountCommand,
|
|
160
|
+
batch: runBatchCommand,
|
|
161
|
+
head: runHeadCommand,
|
|
162
|
+
tail: runTailCommand,
|
|
163
|
+
search: runSearchCommand,
|
|
164
|
+
tools: runToolsCommand,
|
|
165
|
+
tool: runToolCommand,
|
|
166
|
+
errors: runErrorsCommand,
|
|
167
|
+
reasoning: runReasoningCommand,
|
|
168
|
+
timeline: runTimelineCommand,
|
|
169
|
+
stats: runStatsCommand,
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
async function main() {
|
|
173
|
+
const parsed = cli.parse(process.argv.slice(2));
|
|
174
|
+
if (!parsed) process.exit(0);
|
|
175
|
+
|
|
176
|
+
const { values, positionals } = parsed;
|
|
177
|
+
|
|
178
|
+
if (positionals.length === 0) {
|
|
179
|
+
cli.usageError("no command specified");
|
|
180
|
+
process.exit(2);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const [command, ...args] = positionals;
|
|
184
|
+
const handler = COMMANDS[command];
|
|
185
|
+
|
|
186
|
+
if (!handler) {
|
|
187
|
+
cli.usageError(`unknown command "${command}"`);
|
|
188
|
+
process.exit(2);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
await handler(values, args);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
main().catch((error) => {
|
|
195
|
+
logger.exception("main", error);
|
|
196
|
+
cli.error(error.message);
|
|
197
|
+
process.exit(1);
|
|
198
|
+
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forwardimpact/libeval",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.16",
|
|
4
4
|
"description": "Process Claude Code stream-json output into structured traces",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": "D. Olsson <hi@senzilla.io>",
|
|
@@ -8,10 +8,12 @@
|
|
|
8
8
|
"main": "./src/index.js",
|
|
9
9
|
"exports": {
|
|
10
10
|
".": "./src/index.js",
|
|
11
|
-
"./bin/fit-eval.js": "./bin/fit-eval.js"
|
|
11
|
+
"./bin/fit-eval.js": "./bin/fit-eval.js",
|
|
12
|
+
"./bin/fit-trace.js": "./bin/fit-trace.js"
|
|
12
13
|
},
|
|
13
14
|
"bin": {
|
|
14
|
-
"fit-eval": "./bin/fit-eval.js"
|
|
15
|
+
"fit-eval": "./bin/fit-eval.js",
|
|
16
|
+
"fit-trace": "./bin/fit-trace.js"
|
|
15
17
|
},
|
|
16
18
|
"files": [
|
|
17
19
|
"src/**/*.js",
|
|
@@ -26,8 +28,9 @@
|
|
|
26
28
|
"test": "bun run node --test test/*.test.js"
|
|
27
29
|
},
|
|
28
30
|
"dependencies": {
|
|
29
|
-
"@anthropic-ai/claude-agent-sdk": "^0.2.
|
|
31
|
+
"@anthropic-ai/claude-agent-sdk": "^0.2.112",
|
|
30
32
|
"@forwardimpact/libcli": "^0.1.0",
|
|
33
|
+
"@forwardimpact/libconfig": "^0.1.0",
|
|
31
34
|
"@forwardimpact/libtelemetry": "^0.1.22",
|
|
32
35
|
"zod": "^3.23.0"
|
|
33
36
|
},
|
package/src/agent-runner.js
CHANGED
|
@@ -108,6 +108,7 @@ export class AgentRunner {
|
|
|
108
108
|
permissionMode: this.permissionMode,
|
|
109
109
|
allowDangerouslySkipPermissions: true,
|
|
110
110
|
abortController,
|
|
111
|
+
...(this.mcpServers && { mcpServers: this.mcpServers }),
|
|
111
112
|
},
|
|
112
113
|
});
|
|
113
114
|
return await this.#consumeQuery(iterator);
|
|
@@ -1,30 +1,18 @@
|
|
|
1
|
-
import { readFileSync, createWriteStream
|
|
2
|
-
import { resolve
|
|
3
|
-
import { tmpdir } from "node:os";
|
|
1
|
+
import { readFileSync, createWriteStream } from "node:fs";
|
|
2
|
+
import { resolve } from "node:path";
|
|
4
3
|
import { createFacilitator } from "../facilitator.js";
|
|
5
4
|
import { createTeeWriter } from "../tee-writer.js";
|
|
6
5
|
|
|
7
6
|
/**
|
|
8
|
-
* Parse agent
|
|
9
|
-
*
|
|
10
|
-
* @param {string}
|
|
11
|
-
* @returns {Array<{name: string, role: string, cwd: string,
|
|
7
|
+
* Parse comma-separated agent profile names into structured configs.
|
|
8
|
+
* @param {string} raw - Comma-separated profile names
|
|
9
|
+
* @param {string} cwd - Shared working directory for all agents
|
|
10
|
+
* @returns {Array<{name: string, role: string, cwd: string, agentProfile: string}>}
|
|
12
11
|
*/
|
|
13
|
-
function
|
|
14
|
-
return raw.split(",").map((
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
const config = { name, role: name };
|
|
18
|
-
for (let i = 1; i < parts.length; i++) {
|
|
19
|
-
const [key, val] = parts[i].split("=");
|
|
20
|
-
if (key === "cwd") config.cwd = resolve(val);
|
|
21
|
-
else if (key === "role") config.role = val;
|
|
22
|
-
else if (key === "maxTurns") config.maxTurns = parseInt(val, 10);
|
|
23
|
-
}
|
|
24
|
-
if (!config.cwd) {
|
|
25
|
-
config.cwd = mkdtempSync(join(tmpdir(), `fit-eval-${name}-`));
|
|
26
|
-
}
|
|
27
|
-
return config;
|
|
12
|
+
function parseAgentProfiles(raw, cwd) {
|
|
13
|
+
return raw.split(",").map((entry) => {
|
|
14
|
+
const name = entry.trim();
|
|
15
|
+
return { name, role: name, cwd, agentProfile: name };
|
|
28
16
|
});
|
|
29
17
|
}
|
|
30
18
|
|
|
@@ -45,12 +33,10 @@ function parseFacilitateOptions(values) {
|
|
|
45
33
|
let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
|
|
46
34
|
if (taskAmend) taskContent += `\n\n${taskAmend}`;
|
|
47
35
|
|
|
48
|
-
const
|
|
49
|
-
if (!
|
|
50
|
-
|
|
51
|
-
const agentConfigs =
|
|
52
|
-
if (agentConfigs.length < 1)
|
|
53
|
-
throw new Error("--agents must specify at least one agent");
|
|
36
|
+
const profilesRaw = values["agent-profiles"];
|
|
37
|
+
if (!profilesRaw) throw new Error("--agent-profiles is required");
|
|
38
|
+
const agentCwd = resolve(values["agent-cwd"] ?? ".");
|
|
39
|
+
const agentConfigs = parseAgentProfiles(profilesRaw, agentCwd);
|
|
54
40
|
|
|
55
41
|
const maxTurnsRaw = values["max-turns"] ?? "20";
|
|
56
42
|
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
4
|
+
import { createTraceQuery } from "../trace-query.js";
|
|
5
|
+
import { createTraceGitHub } from "../trace-github.js";
|
|
6
|
+
|
|
7
|
+
// --- GitHub commands ---
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* List recent workflow runs matching a pattern.
|
|
11
|
+
* @param {object} values - Parsed option values
|
|
12
|
+
* @param {string[]} args - [pattern?]
|
|
13
|
+
*/
|
|
14
|
+
export async function runRunsCommand(values, args) {
|
|
15
|
+
const gh = await createTraceGitHub({ repo: values.repo });
|
|
16
|
+
const pattern = args[0] ?? "agent";
|
|
17
|
+
const lookback = values.lookback ?? "7d";
|
|
18
|
+
const runs = await gh.listRuns({ pattern, lookback });
|
|
19
|
+
writeJSON(runs);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Download a trace artifact and auto-convert to structured JSON.
|
|
24
|
+
* @param {object} values - Parsed option values
|
|
25
|
+
* @param {string[]} args - [run-id]
|
|
26
|
+
*/
|
|
27
|
+
export async function runDownloadCommand(values, args) {
|
|
28
|
+
const gh = await createTraceGitHub({ repo: values.repo });
|
|
29
|
+
const result = await gh.downloadTrace(args[0], {
|
|
30
|
+
dir: values.dir,
|
|
31
|
+
name: values.artifact,
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
const ndjsonFile = result.files.find((f) => f.endsWith(".ndjson"));
|
|
35
|
+
if (ndjsonFile) {
|
|
36
|
+
const ndjsonPath = join(result.dir, ndjsonFile);
|
|
37
|
+
const collector = createTraceCollector();
|
|
38
|
+
for (const line of readFileSync(ndjsonPath, "utf8").split("\n")) {
|
|
39
|
+
collector.addLine(line);
|
|
40
|
+
}
|
|
41
|
+
const structuredPath = join(result.dir, "structured.json");
|
|
42
|
+
writeFileSync(structuredPath, JSON.stringify(collector.toJSON()) + "\n");
|
|
43
|
+
result.files.push("structured.json");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
writeJSON(result);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// --- Query commands ---
|
|
50
|
+
|
|
51
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
52
|
+
export async function runOverviewCommand(values, args) {
|
|
53
|
+
writeJSON(loadTrace(args[0]).overview());
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
57
|
+
export async function runCountCommand(values, args) {
|
|
58
|
+
process.stdout.write(String(loadTrace(args[0]).count()) + "\n");
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** @param {object} values @param {string[]} args - [file, from, to] */
|
|
62
|
+
export async function runBatchCommand(values, args) {
|
|
63
|
+
writeJSON(
|
|
64
|
+
loadTrace(args[0]).batch(parseInt(args[1], 10), parseInt(args[2], 10)),
|
|
65
|
+
);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/** @param {object} values @param {string[]} args - [file, N?] */
|
|
69
|
+
export async function runHeadCommand(values, args) {
|
|
70
|
+
const n = args[1] ? parseInt(args[1], 10) : 10;
|
|
71
|
+
writeJSON(loadTrace(args[0]).head(n));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** @param {object} values @param {string[]} args - [file, N?] */
|
|
75
|
+
export async function runTailCommand(values, args) {
|
|
76
|
+
const n = args[1] ? parseInt(args[1], 10) : 10;
|
|
77
|
+
writeJSON(loadTrace(args[0]).tail(n));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** @param {object} values @param {string[]} args - [file, pattern] */
|
|
81
|
+
export async function runSearchCommand(values, args) {
|
|
82
|
+
const limit = values.limit ? parseInt(values.limit, 10) : 50;
|
|
83
|
+
const context = values.context ? parseInt(values.context, 10) : 0;
|
|
84
|
+
writeJSON(loadTrace(args[0]).search(args[1], { limit, context }));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
88
|
+
export async function runToolsCommand(values, args) {
|
|
89
|
+
writeJSON(loadTrace(args[0]).toolFrequency());
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** @param {object} values @param {string[]} args - [file, name] */
|
|
93
|
+
export async function runToolCommand(values, args) {
|
|
94
|
+
writeJSON(loadTrace(args[0]).tool(args[1]));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
98
|
+
export async function runErrorsCommand(values, args) {
|
|
99
|
+
writeJSON(loadTrace(args[0]).errors());
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
103
|
+
export async function runReasoningCommand(values, args) {
|
|
104
|
+
const from = values.from ? parseInt(values.from, 10) : undefined;
|
|
105
|
+
const to = values.to ? parseInt(values.to, 10) : undefined;
|
|
106
|
+
writeJSON(loadTrace(args[0]).reasoning({ from, to }));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
110
|
+
export async function runTimelineCommand(values, args) {
|
|
111
|
+
const lines = loadTrace(args[0]).timeline();
|
|
112
|
+
process.stdout.write(lines.join("\n") + "\n");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
116
|
+
export async function runStatsCommand(values, args) {
|
|
117
|
+
writeJSON(loadTrace(args[0]).stats());
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// --- Shared helpers ---
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Load a trace file. Supports structured JSON and raw NDJSON.
|
|
124
|
+
* @param {string} file
|
|
125
|
+
* @returns {import("../trace-query.js").TraceQuery}
|
|
126
|
+
*/
|
|
127
|
+
function loadTrace(file) {
|
|
128
|
+
const content = readFileSync(file, "utf8");
|
|
129
|
+
|
|
130
|
+
try {
|
|
131
|
+
const parsed = JSON.parse(content);
|
|
132
|
+
if (parsed.turns) {
|
|
133
|
+
return createTraceQuery(parsed);
|
|
134
|
+
}
|
|
135
|
+
} catch {
|
|
136
|
+
// Not valid JSON — fall through to NDJSON.
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const collector = createTraceCollector();
|
|
140
|
+
for (const line of content.split("\n")) {
|
|
141
|
+
collector.addLine(line);
|
|
142
|
+
}
|
|
143
|
+
return createTraceQuery(collector.toJSON());
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** @param {object} data */
|
|
147
|
+
function writeJSON(data) {
|
|
148
|
+
process.stdout.write(JSON.stringify(data, null, 2) + "\n");
|
|
149
|
+
}
|
package/src/facilitator.js
CHANGED
|
@@ -18,19 +18,27 @@ import {
|
|
|
18
18
|
|
|
19
19
|
/** System prompt appended for the facilitator runner. */
|
|
20
20
|
export const FACILITATOR_SYSTEM_PROMPT =
|
|
21
|
-
"You coordinate multiple agents working on a shared task.
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
21
|
+
"You coordinate multiple agents working on a shared task. " +
|
|
22
|
+
"Tell sends a direct message to one participant. " +
|
|
23
|
+
"Share broadcasts a message to all participants. " +
|
|
24
|
+
"Redirect interrupts a participant and replaces their current instructions. " +
|
|
25
|
+
"RollCall lists available participants and their roles. " +
|
|
26
|
+
"Conclude ends the session with a summary. " +
|
|
27
|
+
"Participants communicate with you via Share and may Ask you questions. " +
|
|
28
|
+
"IMPORTANT: After sending messages via Tell or Share, stop making tool " +
|
|
29
|
+
"calls and produce a text response. The system will resume you with " +
|
|
30
|
+
"participant responses. Do not proceed to the next question or call " +
|
|
31
|
+
"Conclude until you have received responses from participants.";
|
|
26
32
|
|
|
27
33
|
/** System prompt appended for facilitated agent runners. */
|
|
28
34
|
export const FACILITATED_AGENT_SYSTEM_PROMPT =
|
|
29
35
|
"You are one of several agents working on a shared task under a " +
|
|
30
|
-
"facilitator's coordination.
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
36
|
+
"facilitator's coordination. " +
|
|
37
|
+
"Share broadcasts your message to all participants. " +
|
|
38
|
+
"Tell sends a direct message to one participant. " +
|
|
39
|
+
"Ask sends a question to the facilitator — you block until answered. " +
|
|
40
|
+
"RollCall lists available participants and their roles. " +
|
|
41
|
+
"The facilitator may Redirect you with new instructions " +
|
|
34
42
|
"— treat redirections as authoritative.";
|
|
35
43
|
|
|
36
44
|
function createAsyncQueue() {
|
|
@@ -109,28 +117,40 @@ export class Facilitator {
|
|
|
109
117
|
async run(task) {
|
|
110
118
|
this.emitOrchestratorEvent({ type: "session_start" });
|
|
111
119
|
|
|
120
|
+
// Launch agent loops first — they wait for messages via messageBus.
|
|
121
|
+
// This lets agents process Tell/Share messages that arrive during the
|
|
122
|
+
// facilitator's initial run, rather than after it completes.
|
|
123
|
+
const agentPromises = this.agents.map((a) => this.#runAgent(a));
|
|
124
|
+
|
|
112
125
|
// Turn 0: facilitator receives the task
|
|
113
126
|
this.facilitatorTurns++;
|
|
114
127
|
await this.facilitatorRunner.run(task);
|
|
115
128
|
|
|
129
|
+
// Handle redirect after turn 0
|
|
130
|
+
await this.#processRedirect();
|
|
131
|
+
|
|
116
132
|
if (this.ctx.concluded) {
|
|
133
|
+
// Facilitator concluded during its initial run. Let agents finish any
|
|
134
|
+
// in-progress work before returning — they may have received Tell/Share
|
|
135
|
+
// messages and started processing concurrently.
|
|
117
136
|
this.concludeResolve();
|
|
118
|
-
|
|
119
|
-
|
|
137
|
+
await Promise.allSettled(agentPromises);
|
|
138
|
+
this.emitSummary({
|
|
139
|
+
success: true,
|
|
140
|
+
turns: this.facilitatorTurns,
|
|
141
|
+
summary: this.ctx.summary,
|
|
142
|
+
});
|
|
143
|
+
return { success: true, turns: this.facilitatorTurns };
|
|
120
144
|
}
|
|
121
145
|
|
|
122
|
-
//
|
|
123
|
-
await this.#processRedirect();
|
|
124
|
-
|
|
125
|
-
// Abort agents promptly when Conclude is called
|
|
146
|
+
// Abort agents promptly when Conclude is called during the event loop
|
|
126
147
|
this.concludePromise.then(() => {
|
|
127
148
|
for (const agent of this.agents) {
|
|
128
149
|
agent.runner.currentAbortController?.abort();
|
|
129
150
|
}
|
|
130
151
|
});
|
|
131
152
|
|
|
132
|
-
//
|
|
133
|
-
const agentPromises = this.agents.map((a) => this.#runAgent(a));
|
|
153
|
+
// Concurrent phase: facilitator event loop + already-running agent loops
|
|
134
154
|
const facilitatorPromise = this.#facilitatorLoop();
|
|
135
155
|
|
|
136
156
|
try {
|
package/src/index.js
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
export { TraceCollector, createTraceCollector } from "./trace-collector.js";
|
|
2
|
+
export { TraceQuery, createTraceQuery } from "./trace-query.js";
|
|
3
|
+
export {
|
|
4
|
+
TraceGitHub,
|
|
5
|
+
createTraceGitHub,
|
|
6
|
+
parseGitRemote,
|
|
7
|
+
} from "./trace-github.js";
|
|
2
8
|
export { AgentRunner, createAgentRunner } from "./agent-runner.js";
|
|
3
9
|
export {
|
|
4
10
|
Supervisor,
|
|
@@ -154,13 +154,13 @@ export function createFacilitatorToolServer(ctx) {
|
|
|
154
154
|
),
|
|
155
155
|
tool(
|
|
156
156
|
"Share",
|
|
157
|
-
"Broadcast a message to all participants.",
|
|
157
|
+
"Broadcast a message to all participants. After sending, stop making tool calls to receive responses.",
|
|
158
158
|
{ message: z.string() },
|
|
159
159
|
createShareHandler(ctx, { from: "facilitator" }),
|
|
160
160
|
),
|
|
161
161
|
tool(
|
|
162
162
|
"Tell",
|
|
163
|
-
"Send a direct message to one participant.",
|
|
163
|
+
"Send a direct message to one participant. After sending, stop making tool calls to receive their response.",
|
|
164
164
|
{ message: z.string(), to: z.string() },
|
|
165
165
|
createTellHandler(ctx, { from: "facilitator" }),
|
|
166
166
|
),
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import { createWriteStream } from "node:fs";
|
|
2
|
+
import { mkdir } from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { pipeline } from "node:stream/promises";
|
|
5
|
+
import { Readable } from "node:stream";
|
|
6
|
+
|
|
7
|
+
const API = "https://api.github.com";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* GitHub API client for trace-related operations: listing workflow runs
|
|
11
|
+
* and downloading trace artifacts.
|
|
12
|
+
*/
|
|
13
|
+
export class TraceGitHub {
|
|
14
|
+
/**
|
|
15
|
+
* @param {object} deps
|
|
16
|
+
* @param {string} deps.token - GitHub token
|
|
17
|
+
* @param {string} deps.owner - Repository owner
|
|
18
|
+
* @param {string} deps.repo - Repository name
|
|
19
|
+
*/
|
|
20
|
+
constructor({ token, owner, repo }) {
|
|
21
|
+
this.token = token;
|
|
22
|
+
this.owner = owner;
|
|
23
|
+
this.repo = repo;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* List recent workflow runs, optionally filtered by name pattern.
|
|
28
|
+
*
|
|
29
|
+
* @param {object} [opts]
|
|
30
|
+
* @param {string} [opts.pattern] - Case-insensitive substring to match workflow name (default: "agent")
|
|
31
|
+
* @param {number} [opts.limit=50] - Max runs to return from GitHub API
|
|
32
|
+
* @param {string} [opts.lookback="7d"] - How far back to search (e.g. "7d", "24h", "2w")
|
|
33
|
+
* @returns {Promise<object[]>} Array of {workflow, runId, status, conclusion, createdAt, branch, url}
|
|
34
|
+
*/
|
|
35
|
+
async listRuns(opts = {}) {
|
|
36
|
+
const { pattern = "agent", limit = 50, lookback = "7d" } = opts;
|
|
37
|
+
const cutoff = parseLookback(lookback);
|
|
38
|
+
|
|
39
|
+
const params = new URLSearchParams({
|
|
40
|
+
per_page: String(Math.min(limit, 100)),
|
|
41
|
+
});
|
|
42
|
+
if (cutoff) {
|
|
43
|
+
params.set("created", `>=${cutoff}`);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs?${params}`;
|
|
47
|
+
const data = await this.#get(url);
|
|
48
|
+
const runs = data.workflow_runs ?? [];
|
|
49
|
+
|
|
50
|
+
// eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
|
|
51
|
+
const re = new RegExp(pattern, "i");
|
|
52
|
+
return runs
|
|
53
|
+
.filter((r) => re.test(r.name))
|
|
54
|
+
.map((r) => ({
|
|
55
|
+
workflow: r.name,
|
|
56
|
+
runId: r.id,
|
|
57
|
+
status: r.status,
|
|
58
|
+
conclusion: r.conclusion,
|
|
59
|
+
createdAt: r.created_at,
|
|
60
|
+
branch: r.head_branch,
|
|
61
|
+
url: r.html_url,
|
|
62
|
+
}));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Download a trace artifact from a workflow run and extract it.
|
|
67
|
+
*
|
|
68
|
+
* Tries artifact names in order: combined-trace, agent-trace.
|
|
69
|
+
* The artifact zip is downloaded and extracted to the output directory.
|
|
70
|
+
*
|
|
71
|
+
* @param {number|string} runId
|
|
72
|
+
* @param {object} [opts]
|
|
73
|
+
* @param {string} [opts.dir] - Output directory (default: /tmp/trace-<runId>)
|
|
74
|
+
* @param {string} [opts.name] - Specific artifact name to download
|
|
75
|
+
* @returns {Promise<{dir: string, artifact: string, files: string[]}>}
|
|
76
|
+
*/
|
|
77
|
+
async downloadTrace(runId, opts = {}) {
|
|
78
|
+
const dir = opts.dir ?? `/tmp/trace-${runId}`;
|
|
79
|
+
await mkdir(dir, { recursive: true });
|
|
80
|
+
|
|
81
|
+
// List artifacts for this run.
|
|
82
|
+
const url = `${API}/repos/${this.owner}/${this.repo}/actions/runs/${runId}/artifacts`;
|
|
83
|
+
const data = await this.#get(url);
|
|
84
|
+
const artifacts = data.artifacts ?? [];
|
|
85
|
+
|
|
86
|
+
// Find the trace artifact.
|
|
87
|
+
const preferredNames = opts.name
|
|
88
|
+
? [opts.name]
|
|
89
|
+
: ["combined-trace", "agent-trace"];
|
|
90
|
+
let artifact = null;
|
|
91
|
+
for (const name of preferredNames) {
|
|
92
|
+
artifact = artifacts.find((a) => a.name === name);
|
|
93
|
+
if (artifact) break;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (!artifact) {
|
|
97
|
+
const available = artifacts.map((a) => a.name).join(", ");
|
|
98
|
+
throw new Error(
|
|
99
|
+
`No trace artifact found for run ${runId}. Available: ${available || "none"}`,
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Download the zip.
|
|
104
|
+
const zipPath = path.join(dir, `${artifact.name}.zip`);
|
|
105
|
+
const downloadUrl = `${API}/repos/${this.owner}/${this.repo}/actions/artifacts/${artifact.id}/zip`;
|
|
106
|
+
const response = await fetch(downloadUrl, {
|
|
107
|
+
headers: this.#headers(),
|
|
108
|
+
redirect: "follow",
|
|
109
|
+
});
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
throw new Error(
|
|
112
|
+
`Failed to download artifact: ${response.status} ${response.statusText}`,
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Stream to disk then extract.
|
|
117
|
+
await pipeline(Readable.fromWeb(response.body), createWriteStream(zipPath));
|
|
118
|
+
|
|
119
|
+
const { execSync } = await import("node:child_process");
|
|
120
|
+
execSync(
|
|
121
|
+
`unzip -o -q ${JSON.stringify(zipPath)} -d ${JSON.stringify(dir)}`,
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
// List extracted files.
|
|
125
|
+
const { readdirSync } = await import("node:fs");
|
|
126
|
+
const files = readdirSync(dir).filter((f) => !f.endsWith(".zip"));
|
|
127
|
+
|
|
128
|
+
return { dir, artifact: artifact.name, files };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* @param {string} url
|
|
133
|
+
* @returns {Promise<object>}
|
|
134
|
+
*/
|
|
135
|
+
async #get(url) {
|
|
136
|
+
const response = await fetch(url, { headers: this.#headers() });
|
|
137
|
+
if (!response.ok) {
|
|
138
|
+
throw new Error(`GitHub API: ${response.status} ${response.statusText}`);
|
|
139
|
+
}
|
|
140
|
+
return response.json();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/** @returns {Record<string, string>} */
|
|
144
|
+
#headers() {
|
|
145
|
+
return {
|
|
146
|
+
Authorization: `Bearer ${this.token}`,
|
|
147
|
+
Accept: "application/vnd.github+json",
|
|
148
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Parse a lookback duration string into an ISO date string.
|
|
155
|
+
* Supports: Nd (days), Nh (hours), Nw (weeks).
|
|
156
|
+
* @param {string} lookback
|
|
157
|
+
* @returns {string|null} ISO date string or null if unparseable
|
|
158
|
+
*/
|
|
159
|
+
function parseLookback(lookback) {
|
|
160
|
+
const match = lookback.match(/^(\d+)([dhw])$/);
|
|
161
|
+
if (!match) return null;
|
|
162
|
+
const [, val, unit] = match;
|
|
163
|
+
const ms = { d: 86400000, h: 3600000, w: 604800000 }[unit];
|
|
164
|
+
return new Date(Date.now() - parseInt(val, 10) * ms).toISOString();
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Parse a GitHub repository URL or "owner/repo" string.
|
|
169
|
+
* @param {string} remote - Git remote URL or owner/repo string
|
|
170
|
+
* @returns {{owner: string, repo: string}}
|
|
171
|
+
*/
|
|
172
|
+
export function parseGitRemote(remote) {
|
|
173
|
+
// SSH: git@github.com:owner/repo.git
|
|
174
|
+
const ssh = remote.match(/github\.com[:/]([^/]+)\/(.+?)(?:\.git)?$/);
|
|
175
|
+
if (ssh) return { owner: ssh[1], repo: ssh[2] };
|
|
176
|
+
|
|
177
|
+
// HTTPS: https://github.com/owner/repo
|
|
178
|
+
const https = remote.match(/github\.com\/([^/]+)\/(.+?)(?:\.git)?$/);
|
|
179
|
+
if (https) return { owner: https[1], repo: https[2] };
|
|
180
|
+
|
|
181
|
+
// Plain owner/repo format (no github.com prefix).
|
|
182
|
+
const simple = remote.match(/^([^/:@]+)\/([^/]+)$/);
|
|
183
|
+
if (simple) return { owner: simple[1], repo: simple[2] };
|
|
184
|
+
|
|
185
|
+
throw new Error(`Cannot parse GitHub remote: ${remote}`);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Create a TraceGitHub instance using libconfig for the token and
|
|
190
|
+
* git remote for the repo.
|
|
191
|
+
*
|
|
192
|
+
* @param {object} [opts]
|
|
193
|
+
* @param {string} [opts.repo] - "owner/repo" override (default: detect from git remote)
|
|
194
|
+
* @returns {Promise<TraceGitHub>}
|
|
195
|
+
*/
|
|
196
|
+
export async function createTraceGitHub(opts = {}) {
|
|
197
|
+
const { createScriptConfig } = await import("@forwardimpact/libconfig");
|
|
198
|
+
const config = await createScriptConfig("eval");
|
|
199
|
+
const token = config.ghToken();
|
|
200
|
+
|
|
201
|
+
let owner, repo;
|
|
202
|
+
if (opts.repo) {
|
|
203
|
+
({ owner, repo } = parseGitRemote(opts.repo));
|
|
204
|
+
} else {
|
|
205
|
+
const { execSync } = await import("node:child_process");
|
|
206
|
+
const remote = execSync("git remote get-url origin", {
|
|
207
|
+
encoding: "utf8",
|
|
208
|
+
}).trim();
|
|
209
|
+
({ owner, repo } = parseGitRemote(remote));
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return new TraceGitHub({ token, owner, repo });
|
|
213
|
+
}
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query engine for structured trace documents produced by TraceCollector.
|
|
3
|
+
*
|
|
4
|
+
* Loads a structured JSON trace into memory and provides methods for
|
|
5
|
+
* paging, searching, filtering, and summarizing turns — the operations
|
|
6
|
+
* agents need to analyze large traces efficiently.
|
|
7
|
+
*/
|
|
8
|
+
export class TraceQuery {
|
|
9
|
+
/**
|
|
10
|
+
* @param {object} trace - Structured trace document (output of TraceCollector.toJSON())
|
|
11
|
+
*/
|
|
12
|
+
constructor(trace) {
|
|
13
|
+
this.trace = trace;
|
|
14
|
+
this.metadata = trace.metadata ?? {};
|
|
15
|
+
this.turns = trace.turns ?? [];
|
|
16
|
+
this.summary = trace.summary ?? {};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* High-level overview: metadata, summary, turn count, and tool frequency.
|
|
21
|
+
* @returns {object}
|
|
22
|
+
*/
|
|
23
|
+
overview() {
|
|
24
|
+
return {
|
|
25
|
+
metadata: this.metadata,
|
|
26
|
+
summary: this.summary,
|
|
27
|
+
turnCount: this.turns.length,
|
|
28
|
+
tools: this.toolFrequency(),
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** @returns {number} */
|
|
33
|
+
count() {
|
|
34
|
+
return this.turns.length;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Return turns in range [from, to) (zero-indexed).
|
|
39
|
+
* @param {number} from
|
|
40
|
+
* @param {number} to
|
|
41
|
+
* @returns {object[]}
|
|
42
|
+
*/
|
|
43
|
+
batch(from, to) {
|
|
44
|
+
return this.turns.slice(from, to);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* First N turns.
|
|
49
|
+
* @param {number} [n=10]
|
|
50
|
+
* @returns {object[]}
|
|
51
|
+
*/
|
|
52
|
+
head(n = 10) {
|
|
53
|
+
return this.turns.slice(0, n);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Last N turns.
|
|
58
|
+
* @param {number} [n=10]
|
|
59
|
+
* @returns {object[]}
|
|
60
|
+
*/
|
|
61
|
+
tail(n = 10) {
|
|
62
|
+
return this.turns.slice(-n);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Search all turn content for a regex pattern. Returns matching turns
|
|
67
|
+
* with the matched text highlighted by context.
|
|
68
|
+
*
|
|
69
|
+
* Searches: assistant text blocks, tool_use names and stringified input,
|
|
70
|
+
* and tool_result content.
|
|
71
|
+
*
|
|
72
|
+
* @param {string} pattern - Regex pattern (case-insensitive)
|
|
73
|
+
* @param {object} [opts]
|
|
74
|
+
* @param {number} [opts.context=0] - Number of surrounding turns to include
|
|
75
|
+
* @param {number} [opts.limit=50] - Max results
|
|
76
|
+
* @returns {object[]} Array of {turn, matches, context?}
|
|
77
|
+
*/
|
|
78
|
+
search(pattern, opts = {}) {
|
|
79
|
+
const { context = 0, limit = 50 } = opts;
|
|
80
|
+
// eslint-disable-next-line security/detect-non-literal-regexp -- pattern is caller-controlled, not untrusted input
|
|
81
|
+
const re = new RegExp(pattern, "gi");
|
|
82
|
+
const hits = [];
|
|
83
|
+
|
|
84
|
+
for (const turn of this.turns) {
|
|
85
|
+
const matches = matchTurn(turn, re);
|
|
86
|
+
if (matches.length > 0) {
|
|
87
|
+
const entry = { turn, matches };
|
|
88
|
+
if (context > 0) {
|
|
89
|
+
const idx = turn.index;
|
|
90
|
+
entry.context = this.turns.filter(
|
|
91
|
+
(t) =>
|
|
92
|
+
t.index !== idx &&
|
|
93
|
+
t.index >= idx - context &&
|
|
94
|
+
t.index <= idx + context,
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
hits.push(entry);
|
|
98
|
+
if (hits.length >= limit) break;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return hits;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Tool usage frequency, sorted descending.
|
|
106
|
+
* @returns {Array<{tool: string, count: number}>}
|
|
107
|
+
*/
|
|
108
|
+
toolFrequency() {
|
|
109
|
+
const counts = {};
|
|
110
|
+
for (const turn of this.turns) {
|
|
111
|
+
if (turn.role !== "assistant") continue;
|
|
112
|
+
for (const block of turn.content) {
|
|
113
|
+
if (block.type === "tool_use") {
|
|
114
|
+
counts[block.name] = (counts[block.name] ?? 0) + 1;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return Object.entries(counts)
|
|
119
|
+
.map(([tool, count]) => ({ tool, count }))
|
|
120
|
+
.sort((a, b) => b.count - a.count);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Filter turns involving a specific tool (both the tool_use and its result).
|
|
125
|
+
* @param {string} name - Tool name
|
|
126
|
+
* @returns {object[]}
|
|
127
|
+
*/
|
|
128
|
+
tool(name) {
|
|
129
|
+
const toolUseIds = new Set();
|
|
130
|
+
const results = [];
|
|
131
|
+
|
|
132
|
+
for (const turn of this.turns) {
|
|
133
|
+
if (turn.role === "assistant") {
|
|
134
|
+
const hasTool = turn.content.some(
|
|
135
|
+
(b) => b.type === "tool_use" && b.name === name,
|
|
136
|
+
);
|
|
137
|
+
if (hasTool) {
|
|
138
|
+
results.push(turn);
|
|
139
|
+
for (const b of turn.content) {
|
|
140
|
+
if (b.type === "tool_use" && b.name === name && b.toolUseId) {
|
|
141
|
+
toolUseIds.add(b.toolUseId);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
} else if (
|
|
146
|
+
turn.role === "tool_result" &&
|
|
147
|
+
toolUseIds.has(turn.toolUseId)
|
|
148
|
+
) {
|
|
149
|
+
results.push(turn);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return results;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* All error turns (tool results with isError=true).
|
|
157
|
+
* @returns {object[]}
|
|
158
|
+
*/
|
|
159
|
+
errors() {
|
|
160
|
+
return this.turns.filter(
|
|
161
|
+
(t) => t.role === "tool_result" && t.isError === true,
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Extract just the reasoning text from assistant turns.
|
|
167
|
+
* @param {object} [opts]
|
|
168
|
+
* @param {number} [opts.from] - Start turn index
|
|
169
|
+
* @param {number} [opts.to] - End turn index (exclusive)
|
|
170
|
+
* @returns {Array<{index: number, text: string}>}
|
|
171
|
+
*/
|
|
172
|
+
reasoning(opts = {}) {
|
|
173
|
+
const { from, to } = opts;
|
|
174
|
+
const results = [];
|
|
175
|
+
for (const turn of this.turns) {
|
|
176
|
+
if (turn.role !== "assistant") continue;
|
|
177
|
+
if (from !== undefined && turn.index < from) continue;
|
|
178
|
+
if (to !== undefined && turn.index >= to) continue;
|
|
179
|
+
const texts = turn.content
|
|
180
|
+
.filter((b) => b.type === "text")
|
|
181
|
+
.map((b) => b.text);
|
|
182
|
+
if (texts.length > 0) {
|
|
183
|
+
results.push({ index: turn.index, text: texts.join("\n") });
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return results;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Compact one-line-per-assistant-turn timeline showing tool names,
|
|
191
|
+
* reasoning snippet, and token usage. Thinking-only turns are marked
|
|
192
|
+
* as such and their content is omitted (it is model-internal).
|
|
193
|
+
* @returns {string[]}
|
|
194
|
+
*/
|
|
195
|
+
timeline() {
|
|
196
|
+
const lines = [];
|
|
197
|
+
for (const turn of this.turns) {
|
|
198
|
+
if (turn.role !== "assistant") continue;
|
|
199
|
+
|
|
200
|
+
const tools = turn.content
|
|
201
|
+
.filter((b) => b.type === "tool_use")
|
|
202
|
+
.map((b) => b.name);
|
|
203
|
+
|
|
204
|
+
const textBlocks = turn.content
|
|
205
|
+
.filter((b) => b.type === "text")
|
|
206
|
+
.map((b) => b.text);
|
|
207
|
+
|
|
208
|
+
const hasThinking = turn.content.some((b) => b.type === "thinking");
|
|
209
|
+
|
|
210
|
+
// Skip thinking-only turns (no user-visible content).
|
|
211
|
+
if (hasThinking && tools.length === 0 && textBlocks.length === 0)
|
|
212
|
+
continue;
|
|
213
|
+
|
|
214
|
+
const snippet = textBlocks.join(" ").slice(0, 80).replace(/\n/g, " ");
|
|
215
|
+
|
|
216
|
+
const input = turn.usage?.inputTokens ?? 0;
|
|
217
|
+
const output = turn.usage?.outputTokens ?? 0;
|
|
218
|
+
const cacheRead = turn.usage?.cacheReadInputTokens ?? 0;
|
|
219
|
+
|
|
220
|
+
const toolStr = tools.length > 0 ? tools.join(", ") : "(text only)";
|
|
221
|
+
const tokenStr = `in:${fmtK(input + cacheRead)} out:${fmtK(output)}`;
|
|
222
|
+
|
|
223
|
+
lines.push(
|
|
224
|
+
`[${turn.index}] ${toolStr.padEnd(30)} ${tokenStr.padEnd(18)} ${snippet}`,
|
|
225
|
+
);
|
|
226
|
+
}
|
|
227
|
+
return lines;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Token usage and cost breakdown per assistant turn, plus totals.
|
|
232
|
+
* @returns {object}
|
|
233
|
+
*/
|
|
234
|
+
stats() {
|
|
235
|
+
let totalInput = 0;
|
|
236
|
+
let totalOutput = 0;
|
|
237
|
+
let totalCacheRead = 0;
|
|
238
|
+
let totalCacheCreate = 0;
|
|
239
|
+
const perTurn = [];
|
|
240
|
+
|
|
241
|
+
for (const turn of this.turns) {
|
|
242
|
+
if (turn.role !== "assistant" || !turn.usage) continue;
|
|
243
|
+
const u = turn.usage;
|
|
244
|
+
totalInput += u.inputTokens ?? 0;
|
|
245
|
+
totalOutput += u.outputTokens ?? 0;
|
|
246
|
+
totalCacheRead += u.cacheReadInputTokens ?? 0;
|
|
247
|
+
totalCacheCreate += u.cacheCreationInputTokens ?? 0;
|
|
248
|
+
|
|
249
|
+
perTurn.push({
|
|
250
|
+
index: turn.index,
|
|
251
|
+
inputTokens: u.inputTokens ?? 0,
|
|
252
|
+
outputTokens: u.outputTokens ?? 0,
|
|
253
|
+
cacheReadInputTokens: u.cacheReadInputTokens ?? 0,
|
|
254
|
+
cacheCreationInputTokens: u.cacheCreationInputTokens ?? 0,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
totals: {
|
|
260
|
+
inputTokens: totalInput,
|
|
261
|
+
outputTokens: totalOutput,
|
|
262
|
+
cacheReadInputTokens: totalCacheRead,
|
|
263
|
+
cacheCreationInputTokens: totalCacheCreate,
|
|
264
|
+
totalCostUsd: this.summary.totalCostUsd ?? 0,
|
|
265
|
+
durationMs: this.summary.durationMs ?? 0,
|
|
266
|
+
},
|
|
267
|
+
perTurn,
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Search a single turn for regex matches. Returns array of match descriptions.
|
|
274
|
+
* @param {object} turn
|
|
275
|
+
* @param {RegExp} re
|
|
276
|
+
* @returns {string[]}
|
|
277
|
+
*/
|
|
278
|
+
function matchTurn(turn, re) {
|
|
279
|
+
const matches = [];
|
|
280
|
+
if (turn.role === "assistant") {
|
|
281
|
+
for (const block of turn.content) {
|
|
282
|
+
if (block.type === "text" && re.test(block.text)) {
|
|
283
|
+
re.lastIndex = 0;
|
|
284
|
+
matches.push(`text: ${excerptAround(block.text, re)}`);
|
|
285
|
+
}
|
|
286
|
+
if (block.type === "tool_use") {
|
|
287
|
+
if (re.test(block.name)) {
|
|
288
|
+
re.lastIndex = 0;
|
|
289
|
+
matches.push(`tool_name: ${block.name}`);
|
|
290
|
+
}
|
|
291
|
+
const inputStr = JSON.stringify(block.input);
|
|
292
|
+
if (re.test(inputStr)) {
|
|
293
|
+
re.lastIndex = 0;
|
|
294
|
+
matches.push(
|
|
295
|
+
`tool_input(${block.name}): ${excerptAround(inputStr, re)}`,
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
} else if (turn.role === "tool_result") {
|
|
301
|
+
const content = turn.content ?? "";
|
|
302
|
+
if (re.test(content)) {
|
|
303
|
+
re.lastIndex = 0;
|
|
304
|
+
matches.push(`result: ${excerptAround(content, re)}`);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
return matches;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Extract a short excerpt around the first regex match in text.
|
|
312
|
+
* @param {string} text
|
|
313
|
+
* @param {RegExp} re
|
|
314
|
+
* @returns {string}
|
|
315
|
+
*/
|
|
316
|
+
function excerptAround(text, re) {
|
|
317
|
+
re.lastIndex = 0;
|
|
318
|
+
const m = re.exec(text);
|
|
319
|
+
if (!m) return text.slice(0, 100);
|
|
320
|
+
const start = Math.max(0, m.index - 40);
|
|
321
|
+
const end = Math.min(text.length, m.index + m[0].length + 40);
|
|
322
|
+
let excerpt = text.slice(start, end);
|
|
323
|
+
if (start > 0) excerpt = "..." + excerpt;
|
|
324
|
+
if (end < text.length) excerpt = excerpt + "...";
|
|
325
|
+
return excerpt;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Format a token count as compact K notation.
|
|
330
|
+
* @param {number} n
|
|
331
|
+
* @returns {string}
|
|
332
|
+
*/
|
|
333
|
+
function fmtK(n) {
|
|
334
|
+
if (n < 1000) return String(n);
|
|
335
|
+
return (n / 1000).toFixed(1) + "K";
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Load a structured trace from a JSON string.
|
|
340
|
+
* @param {string} json
|
|
341
|
+
* @returns {TraceQuery}
|
|
342
|
+
*/
|
|
343
|
+
export function createTraceQuery(json) {
|
|
344
|
+
const trace = typeof json === "string" ? JSON.parse(json) : json;
|
|
345
|
+
return new TraceQuery(trace);
|
|
346
|
+
}
|