@forwardimpact/libeval 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-trace.js +49 -0
- package/package.json +6 -3
- package/src/agent-runner.js +5 -1
- package/src/commands/facilitate.js +3 -2
- package/src/commands/run.js +4 -2
- package/src/commands/supervise.js +3 -2
- package/src/commands/trace.js +46 -14
- package/src/facilitator.js +78 -135
- package/src/index.js +1 -0
- package/src/message-bus.js +78 -13
- package/src/orchestration-toolkit.js +211 -63
- package/src/orchestrator-helpers.js +58 -0
- package/src/render/tool-hints.js +3 -3
- package/src/signature-filter.js +27 -0
- package/src/supervisor.js +110 -38
- package/src/tee-writer.js +21 -0
- package/src/trace-collector.js +52 -3
- package/src/trace-query.js +141 -28
package/bin/fit-trace.js
CHANGED
|
@@ -20,6 +20,9 @@ import {
|
|
|
20
20
|
runReasoningCommand,
|
|
21
21
|
runTimelineCommand,
|
|
22
22
|
runStatsCommand,
|
|
23
|
+
runInitCommand,
|
|
24
|
+
runTurnCommand,
|
|
25
|
+
runFilterCommand,
|
|
23
26
|
} from "../src/commands/trace.js";
|
|
24
27
|
|
|
25
28
|
const { version: VERSION } = JSON.parse(
|
|
@@ -99,6 +102,10 @@ const definition = {
|
|
|
99
102
|
type: "string",
|
|
100
103
|
description: "Surrounding turns per hit (default: 0)",
|
|
101
104
|
},
|
|
105
|
+
full: {
|
|
106
|
+
type: "boolean",
|
|
107
|
+
description: "Full content block in match descriptions",
|
|
108
|
+
},
|
|
102
109
|
},
|
|
103
110
|
},
|
|
104
111
|
{
|
|
@@ -135,11 +142,45 @@ const definition = {
|
|
|
135
142
|
args: "<file>",
|
|
136
143
|
description: "Token usage and cost breakdown",
|
|
137
144
|
},
|
|
145
|
+
{
|
|
146
|
+
name: "init",
|
|
147
|
+
args: "<file>",
|
|
148
|
+
description: "Full system/init event",
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
name: "turn",
|
|
152
|
+
args: "<file> <index>",
|
|
153
|
+
description: "Single turn by index",
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
name: "filter",
|
|
157
|
+
args: "<file>",
|
|
158
|
+
description: "Filter turns by structural properties",
|
|
159
|
+
options: {
|
|
160
|
+
role: {
|
|
161
|
+
type: "string",
|
|
162
|
+
description: "Turn role (system, user, assistant, tool_result)",
|
|
163
|
+
},
|
|
164
|
+
tool: {
|
|
165
|
+
type: "string",
|
|
166
|
+
description: "Tool name (matches assistant turns)",
|
|
167
|
+
},
|
|
168
|
+
error: {
|
|
169
|
+
type: "boolean",
|
|
170
|
+
description:
|
|
171
|
+
"Error tool_result turns only (flag-only; for non-errors use the API)",
|
|
172
|
+
},
|
|
173
|
+
},
|
|
174
|
+
},
|
|
138
175
|
],
|
|
139
176
|
globalOptions: {
|
|
140
177
|
help: { type: "boolean", short: "h", description: "Show this help" },
|
|
141
178
|
version: { type: "boolean", description: "Show version" },
|
|
142
179
|
json: { type: "boolean", description: "Output help as JSON" },
|
|
180
|
+
signatures: {
|
|
181
|
+
type: "boolean",
|
|
182
|
+
description: "Include thinking.signature blobs in output",
|
|
183
|
+
},
|
|
143
184
|
},
|
|
144
185
|
examples: [
|
|
145
186
|
"fit-trace runs --lookback 7d",
|
|
@@ -149,6 +190,11 @@ const definition = {
|
|
|
149
190
|
"fit-trace search structured.json 'error|fail' --context 1",
|
|
150
191
|
"fit-trace tool structured.json Bash",
|
|
151
192
|
"fit-trace batch structured.json 0 20",
|
|
193
|
+
"fit-trace init structured.json",
|
|
194
|
+
"fit-trace turn structured.json 3",
|
|
195
|
+
"fit-trace filter structured.json --role system",
|
|
196
|
+
"fit-trace filter structured.json --tool Bash --role assistant",
|
|
197
|
+
"fit-trace search structured.json 'error' --full",
|
|
152
198
|
],
|
|
153
199
|
};
|
|
154
200
|
|
|
@@ -170,6 +216,9 @@ const COMMANDS = {
|
|
|
170
216
|
reasoning: runReasoningCommand,
|
|
171
217
|
timeline: runTimelineCommand,
|
|
172
218
|
stats: runStatsCommand,
|
|
219
|
+
init: runInitCommand,
|
|
220
|
+
turn: runTurnCommand,
|
|
221
|
+
filter: runFilterCommand,
|
|
173
222
|
};
|
|
174
223
|
|
|
175
224
|
async function main() {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@forwardimpact/libeval",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.22",
|
|
4
4
|
"description": "Process Claude Code stream-json output into structured traces",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": "D. Olsson <hi@senzilla.io>",
|
|
@@ -25,15 +25,18 @@
|
|
|
25
25
|
"node": ">=18.0.0"
|
|
26
26
|
},
|
|
27
27
|
"scripts": {
|
|
28
|
-
"test": "bun
|
|
28
|
+
"test": "bun test test/*.test.js"
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
|
-
"@anthropic-ai/claude-agent-sdk": "
|
|
31
|
+
"@anthropic-ai/claude-agent-sdk": "0.2.112",
|
|
32
32
|
"@forwardimpact/libcli": "^0.1.0",
|
|
33
33
|
"@forwardimpact/libconfig": "^0.1.0",
|
|
34
34
|
"@forwardimpact/libtelemetry": "^0.1.22",
|
|
35
35
|
"zod": "^4.3.6"
|
|
36
36
|
},
|
|
37
|
+
"devDependencies": {
|
|
38
|
+
"@forwardimpact/libharness": "^0.1.14"
|
|
39
|
+
},
|
|
37
40
|
"publishConfig": {
|
|
38
41
|
"access": "public"
|
|
39
42
|
}
|
package/src/agent-runner.js
CHANGED
|
@@ -28,6 +28,7 @@ function applyDefaults(deps) {
|
|
|
28
28
|
systemPrompt: deps.systemPrompt ?? null,
|
|
29
29
|
disallowedTools: deps.disallowedTools ?? [],
|
|
30
30
|
mcpServers: deps.mcpServers ?? null,
|
|
31
|
+
taskAmend: deps.taskAmend ?? null,
|
|
31
32
|
};
|
|
32
33
|
}
|
|
33
34
|
|
|
@@ -67,9 +68,12 @@ export class AgentRunner {
|
|
|
67
68
|
async run(task) {
|
|
68
69
|
const abortController = new AbortController();
|
|
69
70
|
this.currentAbortController = abortController;
|
|
71
|
+
const effectiveTask = this.taskAmend
|
|
72
|
+
? `${task}\n\n${this.taskAmend}`
|
|
73
|
+
: task;
|
|
70
74
|
try {
|
|
71
75
|
const iterator = this.query({
|
|
72
|
-
prompt:
|
|
76
|
+
prompt: effectiveTask,
|
|
73
77
|
options: {
|
|
74
78
|
cwd: this.cwd,
|
|
75
79
|
allowedTools: this.allowedTools,
|
|
@@ -30,8 +30,7 @@ function parseFacilitateOptions(values) {
|
|
|
30
30
|
throw new Error("--task-file or --task-text is required");
|
|
31
31
|
|
|
32
32
|
const taskAmend = values["task-amend"] ?? undefined;
|
|
33
|
-
|
|
34
|
-
if (taskAmend) taskContent += `\n\n${taskAmend}`;
|
|
33
|
+
const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
|
|
35
34
|
|
|
36
35
|
const profilesRaw = values["agent-profiles"];
|
|
37
36
|
if (!profilesRaw) throw new Error("--agent-profiles is required");
|
|
@@ -42,6 +41,7 @@ function parseFacilitateOptions(values) {
|
|
|
42
41
|
|
|
43
42
|
return {
|
|
44
43
|
taskContent,
|
|
44
|
+
taskAmend,
|
|
45
45
|
agentConfigs,
|
|
46
46
|
facilitatorCwd: resolve(values["facilitator-cwd"] ?? "."),
|
|
47
47
|
model: values.model ?? "opus",
|
|
@@ -82,6 +82,7 @@ export async function runFacilitateCommand(values, _args) {
|
|
|
82
82
|
model: opts.model,
|
|
83
83
|
maxTurns: opts.maxTurns,
|
|
84
84
|
facilitatorProfile: opts.facilitatorProfile,
|
|
85
|
+
taskAmend: opts.taskAmend,
|
|
85
86
|
});
|
|
86
87
|
|
|
87
88
|
const result = await facilitator.run(opts.taskContent);
|
package/src/commands/run.js
CHANGED
|
@@ -21,11 +21,11 @@ function parseRunOptions(values) {
|
|
|
21
21
|
|
|
22
22
|
const maxTurnsRaw = values["max-turns"] ?? "50";
|
|
23
23
|
const taskAmend = values["task-amend"] ?? undefined;
|
|
24
|
-
|
|
25
|
-
if (taskAmend) taskContent += `\n\n${taskAmend}`;
|
|
24
|
+
const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
|
|
26
25
|
|
|
27
26
|
return {
|
|
28
27
|
taskContent,
|
|
28
|
+
taskAmend,
|
|
29
29
|
cwd: resolve(values.cwd ?? "."),
|
|
30
30
|
model: values.model ?? "opus",
|
|
31
31
|
maxTurns: maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10),
|
|
@@ -49,6 +49,7 @@ function parseRunOptions(values) {
|
|
|
49
49
|
export async function runRunCommand(values, _args) {
|
|
50
50
|
const {
|
|
51
51
|
taskContent,
|
|
52
|
+
taskAmend,
|
|
52
53
|
cwd,
|
|
53
54
|
model,
|
|
54
55
|
maxTurns,
|
|
@@ -94,6 +95,7 @@ export async function runRunCommand(values, _args) {
|
|
|
94
95
|
onLine,
|
|
95
96
|
settingSources: ["project"],
|
|
96
97
|
systemPrompt,
|
|
98
|
+
taskAmend,
|
|
97
99
|
});
|
|
98
100
|
|
|
99
101
|
const result = await runner.run(taskContent);
|
|
@@ -20,11 +20,11 @@ function parseSuperviseOptions(values) {
|
|
|
20
20
|
const supervisorAllowedToolsRaw = values["supervisor-allowed-tools"];
|
|
21
21
|
|
|
22
22
|
const taskAmend = values["task-amend"] ?? undefined;
|
|
23
|
-
|
|
24
|
-
if (taskAmend) taskContent += `\n\n${taskAmend}`;
|
|
23
|
+
const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
|
|
25
24
|
|
|
26
25
|
return {
|
|
27
26
|
taskContent,
|
|
27
|
+
taskAmend,
|
|
28
28
|
supervisorCwd: resolve(values["supervisor-cwd"] ?? "."),
|
|
29
29
|
agentCwd: resolve(
|
|
30
30
|
values["agent-cwd"] ?? mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
|
|
@@ -83,6 +83,7 @@ export async function runSuperviseCommand(values, _args) {
|
|
|
83
83
|
supervisorAllowedTools: opts.supervisorAllowedTools,
|
|
84
84
|
supervisorProfile: opts.supervisorProfile,
|
|
85
85
|
agentProfile: opts.agentProfile,
|
|
86
|
+
taskAmend: opts.taskAmend,
|
|
86
87
|
});
|
|
87
88
|
|
|
88
89
|
const result = await supervisor.run(opts.taskContent);
|
package/src/commands/trace.js
CHANGED
|
@@ -3,6 +3,7 @@ import { join } from "node:path";
|
|
|
3
3
|
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
4
4
|
import { createTraceQuery } from "../trace-query.js";
|
|
5
5
|
import { createTraceGitHub } from "../trace-github.js";
|
|
6
|
+
import { stripSignatures } from "../signature-filter.js";
|
|
6
7
|
|
|
7
8
|
// --- GitHub commands ---
|
|
8
9
|
|
|
@@ -20,7 +21,7 @@ export async function runRunsCommand(values, args, ctx) {
|
|
|
20
21
|
const pattern = args[0] ?? "agent";
|
|
21
22
|
const lookback = values.lookback ?? "7d";
|
|
22
23
|
const runs = await gh.listRuns({ pattern, lookback });
|
|
23
|
-
writeJSON(runs);
|
|
24
|
+
writeJSON(runs, values);
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
/**
|
|
@@ -51,14 +52,14 @@ export async function runDownloadCommand(values, args, ctx) {
|
|
|
51
52
|
result.files.push("structured.json");
|
|
52
53
|
}
|
|
53
54
|
|
|
54
|
-
writeJSON(result);
|
|
55
|
+
writeJSON(result, values);
|
|
55
56
|
}
|
|
56
57
|
|
|
57
58
|
// --- Query commands ---
|
|
58
59
|
|
|
59
60
|
/** @param {object} values @param {string[]} args - [file] */
|
|
60
61
|
export async function runOverviewCommand(values, args) {
|
|
61
|
-
writeJSON(loadTrace(args[0]).overview());
|
|
62
|
+
writeJSON(loadTrace(args[0]).overview(), values);
|
|
62
63
|
}
|
|
63
64
|
|
|
64
65
|
/** @param {object} values @param {string[]} args - [file] */
|
|
@@ -70,48 +71,53 @@ export async function runCountCommand(values, args) {
|
|
|
70
71
|
export async function runBatchCommand(values, args) {
|
|
71
72
|
writeJSON(
|
|
72
73
|
loadTrace(args[0]).batch(parseInt(args[1], 10), parseInt(args[2], 10)),
|
|
74
|
+
values,
|
|
73
75
|
);
|
|
74
76
|
}
|
|
75
77
|
|
|
76
78
|
/** @param {object} values @param {string[]} args - [file, N?] */
|
|
77
79
|
export async function runHeadCommand(values, args) {
|
|
78
80
|
const n = args[1] ? parseInt(args[1], 10) : 10;
|
|
79
|
-
writeJSON(loadTrace(args[0]).head(n));
|
|
81
|
+
writeJSON(loadTrace(args[0]).head(n), values);
|
|
80
82
|
}
|
|
81
83
|
|
|
82
84
|
/** @param {object} values @param {string[]} args - [file, N?] */
|
|
83
85
|
export async function runTailCommand(values, args) {
|
|
84
86
|
const n = args[1] ? parseInt(args[1], 10) : 10;
|
|
85
|
-
writeJSON(loadTrace(args[0]).tail(n));
|
|
87
|
+
writeJSON(loadTrace(args[0]).tail(n), values);
|
|
86
88
|
}
|
|
87
89
|
|
|
88
90
|
/** @param {object} values @param {string[]} args - [file, pattern] */
|
|
89
91
|
export async function runSearchCommand(values, args) {
|
|
90
92
|
const limit = values.limit ? parseInt(values.limit, 10) : 50;
|
|
91
93
|
const context = values.context ? parseInt(values.context, 10) : 0;
|
|
92
|
-
|
|
94
|
+
const full = values.full ?? false;
|
|
95
|
+
writeJSON(
|
|
96
|
+
loadTrace(args[0]).search(args[1], { limit, context, full }),
|
|
97
|
+
values,
|
|
98
|
+
);
|
|
93
99
|
}
|
|
94
100
|
|
|
95
101
|
/** @param {object} values @param {string[]} args - [file] */
|
|
96
102
|
export async function runToolsCommand(values, args) {
|
|
97
|
-
writeJSON(loadTrace(args[0]).toolFrequency());
|
|
103
|
+
writeJSON(loadTrace(args[0]).toolFrequency(), values);
|
|
98
104
|
}
|
|
99
105
|
|
|
100
106
|
/** @param {object} values @param {string[]} args - [file, name] */
|
|
101
107
|
export async function runToolCommand(values, args) {
|
|
102
|
-
writeJSON(loadTrace(args[0]).tool(args[1]));
|
|
108
|
+
writeJSON(loadTrace(args[0]).tool(args[1]), values);
|
|
103
109
|
}
|
|
104
110
|
|
|
105
111
|
/** @param {object} values @param {string[]} args - [file] */
|
|
106
112
|
export async function runErrorsCommand(values, args) {
|
|
107
|
-
writeJSON(loadTrace(args[0]).errors());
|
|
113
|
+
writeJSON(loadTrace(args[0]).errors(), values);
|
|
108
114
|
}
|
|
109
115
|
|
|
110
116
|
/** @param {object} values @param {string[]} args - [file] */
|
|
111
117
|
export async function runReasoningCommand(values, args) {
|
|
112
118
|
const from = values.from ? parseInt(values.from, 10) : undefined;
|
|
113
119
|
const to = values.to ? parseInt(values.to, 10) : undefined;
|
|
114
|
-
writeJSON(loadTrace(args[0]).reasoning({ from, to }));
|
|
120
|
+
writeJSON(loadTrace(args[0]).reasoning({ from, to }), values);
|
|
115
121
|
}
|
|
116
122
|
|
|
117
123
|
/** @param {object} values @param {string[]} args - [file] */
|
|
@@ -122,7 +128,26 @@ export async function runTimelineCommand(values, args) {
|
|
|
122
128
|
|
|
123
129
|
/** @param {object} values @param {string[]} args - [file] */
|
|
124
130
|
export async function runStatsCommand(values, args) {
|
|
125
|
-
writeJSON(loadTrace(args[0]).stats());
|
|
131
|
+
writeJSON(loadTrace(args[0]).stats(), values);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
135
|
+
export async function runInitCommand(values, args) {
|
|
136
|
+
writeJSON(loadTrace(args[0]).init(), values);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** @param {object} values @param {string[]} args - [file, index] */
|
|
140
|
+
export async function runTurnCommand(values, args) {
|
|
141
|
+
writeJSON(loadTrace(args[0]).turn(parseInt(args[1], 10)), values);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/** @param {object} values @param {string[]} args - [file] */
|
|
145
|
+
export async function runFilterCommand(values, args) {
|
|
146
|
+
const opts = {};
|
|
147
|
+
if (values.role) opts.role = values.role;
|
|
148
|
+
if (values.tool) opts.toolName = values.tool;
|
|
149
|
+
if (values.error) opts.isError = true;
|
|
150
|
+
writeJSON(loadTrace(args[0]).filter(opts), values);
|
|
126
151
|
}
|
|
127
152
|
|
|
128
153
|
// --- Shared helpers ---
|
|
@@ -151,7 +176,14 @@ function loadTrace(file) {
|
|
|
151
176
|
return createTraceQuery(collector.toJSON());
|
|
152
177
|
}
|
|
153
178
|
|
|
154
|
-
/**
|
|
155
|
-
|
|
156
|
-
|
|
179
|
+
/**
|
|
180
|
+
* Write JSON output to stdout. By default strips `thinking.signature`
|
|
181
|
+
* base64 blobs from the payload so they don't dominate terminal output;
|
|
182
|
+
* pass `--signatures` (surfaced as `values.signatures`) to keep them.
|
|
183
|
+
* @param {*} data
|
|
184
|
+
* @param {object} [values]
|
|
185
|
+
*/
|
|
186
|
+
function writeJSON(data, values = {}) {
|
|
187
|
+
const output = values.signatures ? data : stripSignatures(data);
|
|
188
|
+
process.stdout.write(JSON.stringify(output, null, 2) + "\n");
|
|
157
189
|
}
|