@forwardimpact/libeval 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-trace.js CHANGED
@@ -20,6 +20,9 @@ import {
20
20
  runReasoningCommand,
21
21
  runTimelineCommand,
22
22
  runStatsCommand,
23
+ runInitCommand,
24
+ runTurnCommand,
25
+ runFilterCommand,
23
26
  } from "../src/commands/trace.js";
24
27
 
25
28
  const { version: VERSION } = JSON.parse(
@@ -99,6 +102,10 @@ const definition = {
99
102
  type: "string",
100
103
  description: "Surrounding turns per hit (default: 0)",
101
104
  },
105
+ full: {
106
+ type: "boolean",
107
+ description: "Full content block in match descriptions",
108
+ },
102
109
  },
103
110
  },
104
111
  {
@@ -135,11 +142,45 @@ const definition = {
135
142
  args: "<file>",
136
143
  description: "Token usage and cost breakdown",
137
144
  },
145
+ {
146
+ name: "init",
147
+ args: "<file>",
148
+ description: "Full system/init event",
149
+ },
150
+ {
151
+ name: "turn",
152
+ args: "<file> <index>",
153
+ description: "Single turn by index",
154
+ },
155
+ {
156
+ name: "filter",
157
+ args: "<file>",
158
+ description: "Filter turns by structural properties",
159
+ options: {
160
+ role: {
161
+ type: "string",
162
+ description: "Turn role (system, user, assistant, tool_result)",
163
+ },
164
+ tool: {
165
+ type: "string",
166
+ description: "Tool name (matches assistant turns)",
167
+ },
168
+ error: {
169
+ type: "boolean",
170
+ description:
171
+ "Error tool_result turns only (flag-only; for non-errors use the API)",
172
+ },
173
+ },
174
+ },
138
175
  ],
139
176
  globalOptions: {
140
177
  help: { type: "boolean", short: "h", description: "Show this help" },
141
178
  version: { type: "boolean", description: "Show version" },
142
179
  json: { type: "boolean", description: "Output help as JSON" },
180
+ signatures: {
181
+ type: "boolean",
182
+ description: "Include thinking.signature blobs in output",
183
+ },
143
184
  },
144
185
  examples: [
145
186
  "fit-trace runs --lookback 7d",
@@ -149,6 +190,11 @@ const definition = {
149
190
  "fit-trace search structured.json 'error|fail' --context 1",
150
191
  "fit-trace tool structured.json Bash",
151
192
  "fit-trace batch structured.json 0 20",
193
+ "fit-trace init structured.json",
194
+ "fit-trace turn structured.json 3",
195
+ "fit-trace filter structured.json --role system",
196
+ "fit-trace filter structured.json --tool Bash --role assistant",
197
+ "fit-trace search structured.json 'error' --full",
152
198
  ],
153
199
  };
154
200
 
@@ -170,6 +216,9 @@ const COMMANDS = {
170
216
  reasoning: runReasoningCommand,
171
217
  timeline: runTimelineCommand,
172
218
  stats: runStatsCommand,
219
+ init: runInitCommand,
220
+ turn: runTurnCommand,
221
+ filter: runFilterCommand,
173
222
  };
174
223
 
175
224
  async function main() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.20",
3
+ "version": "0.1.22",
4
4
  "description": "Process Claude Code stream-json output into structured traces",
5
5
  "license": "Apache-2.0",
6
6
  "author": "D. Olsson <hi@senzilla.io>",
@@ -25,15 +25,18 @@
25
25
  "node": ">=18.0.0"
26
26
  },
27
27
  "scripts": {
28
- "test": "bun run node --test test/*.test.js"
28
+ "test": "bun test test/*.test.js"
29
29
  },
30
30
  "dependencies": {
31
- "@anthropic-ai/claude-agent-sdk": "^0.2.112",
31
+ "@anthropic-ai/claude-agent-sdk": "0.2.112",
32
32
  "@forwardimpact/libcli": "^0.1.0",
33
33
  "@forwardimpact/libconfig": "^0.1.0",
34
34
  "@forwardimpact/libtelemetry": "^0.1.22",
35
35
  "zod": "^4.3.6"
36
36
  },
37
+ "devDependencies": {
38
+ "@forwardimpact/libharness": "^0.1.14"
39
+ },
37
40
  "publishConfig": {
38
41
  "access": "public"
39
42
  }
@@ -28,6 +28,7 @@ function applyDefaults(deps) {
28
28
  systemPrompt: deps.systemPrompt ?? null,
29
29
  disallowedTools: deps.disallowedTools ?? [],
30
30
  mcpServers: deps.mcpServers ?? null,
31
+ taskAmend: deps.taskAmend ?? null,
31
32
  };
32
33
  }
33
34
 
@@ -67,9 +68,12 @@ export class AgentRunner {
67
68
  async run(task) {
68
69
  const abortController = new AbortController();
69
70
  this.currentAbortController = abortController;
71
+ const effectiveTask = this.taskAmend
72
+ ? `${task}\n\n${this.taskAmend}`
73
+ : task;
70
74
  try {
71
75
  const iterator = this.query({
72
- prompt: task,
76
+ prompt: effectiveTask,
73
77
  options: {
74
78
  cwd: this.cwd,
75
79
  allowedTools: this.allowedTools,
@@ -30,8 +30,7 @@ function parseFacilitateOptions(values) {
30
30
  throw new Error("--task-file or --task-text is required");
31
31
 
32
32
  const taskAmend = values["task-amend"] ?? undefined;
33
- let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
34
- if (taskAmend) taskContent += `\n\n${taskAmend}`;
33
+ const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
35
34
 
36
35
  const profilesRaw = values["agent-profiles"];
37
36
  if (!profilesRaw) throw new Error("--agent-profiles is required");
@@ -42,6 +41,7 @@ function parseFacilitateOptions(values) {
42
41
 
43
42
  return {
44
43
  taskContent,
44
+ taskAmend,
45
45
  agentConfigs,
46
46
  facilitatorCwd: resolve(values["facilitator-cwd"] ?? "."),
47
47
  model: values.model ?? "opus",
@@ -82,6 +82,7 @@ export async function runFacilitateCommand(values, _args) {
82
82
  model: opts.model,
83
83
  maxTurns: opts.maxTurns,
84
84
  facilitatorProfile: opts.facilitatorProfile,
85
+ taskAmend: opts.taskAmend,
85
86
  });
86
87
 
87
88
  const result = await facilitator.run(opts.taskContent);
@@ -21,11 +21,11 @@ function parseRunOptions(values) {
21
21
 
22
22
  const maxTurnsRaw = values["max-turns"] ?? "50";
23
23
  const taskAmend = values["task-amend"] ?? undefined;
24
- let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
25
- if (taskAmend) taskContent += `\n\n${taskAmend}`;
24
+ const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
26
25
 
27
26
  return {
28
27
  taskContent,
28
+ taskAmend,
29
29
  cwd: resolve(values.cwd ?? "."),
30
30
  model: values.model ?? "opus",
31
31
  maxTurns: maxTurnsRaw === "0" ? 0 : parseInt(maxTurnsRaw, 10),
@@ -49,6 +49,7 @@ function parseRunOptions(values) {
49
49
  export async function runRunCommand(values, _args) {
50
50
  const {
51
51
  taskContent,
52
+ taskAmend,
52
53
  cwd,
53
54
  model,
54
55
  maxTurns,
@@ -94,6 +95,7 @@ export async function runRunCommand(values, _args) {
94
95
  onLine,
95
96
  settingSources: ["project"],
96
97
  systemPrompt,
98
+ taskAmend,
97
99
  });
98
100
 
99
101
  const result = await runner.run(taskContent);
@@ -20,11 +20,11 @@ function parseSuperviseOptions(values) {
20
20
  const supervisorAllowedToolsRaw = values["supervisor-allowed-tools"];
21
21
 
22
22
  const taskAmend = values["task-amend"] ?? undefined;
23
- let taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
24
- if (taskAmend) taskContent += `\n\n${taskAmend}`;
23
+ const taskContent = taskFile ? readFileSync(taskFile, "utf8") : taskText;
25
24
 
26
25
  return {
27
26
  taskContent,
27
+ taskAmend,
28
28
  supervisorCwd: resolve(values["supervisor-cwd"] ?? "."),
29
29
  agentCwd: resolve(
30
30
  values["agent-cwd"] ?? mkdtempSync(join(tmpdir(), "fit-eval-agent-")),
@@ -83,6 +83,7 @@ export async function runSuperviseCommand(values, _args) {
83
83
  supervisorAllowedTools: opts.supervisorAllowedTools,
84
84
  supervisorProfile: opts.supervisorProfile,
85
85
  agentProfile: opts.agentProfile,
86
+ taskAmend: opts.taskAmend,
86
87
  });
87
88
 
88
89
  const result = await supervisor.run(opts.taskContent);
@@ -3,6 +3,7 @@ import { join } from "node:path";
3
3
  import { createTraceCollector } from "@forwardimpact/libeval";
4
4
  import { createTraceQuery } from "../trace-query.js";
5
5
  import { createTraceGitHub } from "../trace-github.js";
6
+ import { stripSignatures } from "../signature-filter.js";
6
7
 
7
8
  // --- GitHub commands ---
8
9
 
@@ -20,7 +21,7 @@ export async function runRunsCommand(values, args, ctx) {
20
21
  const pattern = args[0] ?? "agent";
21
22
  const lookback = values.lookback ?? "7d";
22
23
  const runs = await gh.listRuns({ pattern, lookback });
23
- writeJSON(runs);
24
+ writeJSON(runs, values);
24
25
  }
25
26
 
26
27
  /**
@@ -51,14 +52,14 @@ export async function runDownloadCommand(values, args, ctx) {
51
52
  result.files.push("structured.json");
52
53
  }
53
54
 
54
- writeJSON(result);
55
+ writeJSON(result, values);
55
56
  }
56
57
 
57
58
  // --- Query commands ---
58
59
 
59
60
  /** @param {object} values @param {string[]} args - [file] */
60
61
  export async function runOverviewCommand(values, args) {
61
- writeJSON(loadTrace(args[0]).overview());
62
+ writeJSON(loadTrace(args[0]).overview(), values);
62
63
  }
63
64
 
64
65
  /** @param {object} values @param {string[]} args - [file] */
@@ -70,48 +71,53 @@ export async function runCountCommand(values, args) {
70
71
  export async function runBatchCommand(values, args) {
71
72
  writeJSON(
72
73
  loadTrace(args[0]).batch(parseInt(args[1], 10), parseInt(args[2], 10)),
74
+ values,
73
75
  );
74
76
  }
75
77
 
76
78
  /** @param {object} values @param {string[]} args - [file, N?] */
77
79
  export async function runHeadCommand(values, args) {
78
80
  const n = args[1] ? parseInt(args[1], 10) : 10;
79
- writeJSON(loadTrace(args[0]).head(n));
81
+ writeJSON(loadTrace(args[0]).head(n), values);
80
82
  }
81
83
 
82
84
  /** @param {object} values @param {string[]} args - [file, N?] */
83
85
  export async function runTailCommand(values, args) {
84
86
  const n = args[1] ? parseInt(args[1], 10) : 10;
85
- writeJSON(loadTrace(args[0]).tail(n));
87
+ writeJSON(loadTrace(args[0]).tail(n), values);
86
88
  }
87
89
 
88
90
  /** @param {object} values @param {string[]} args - [file, pattern] */
89
91
  export async function runSearchCommand(values, args) {
90
92
  const limit = values.limit ? parseInt(values.limit, 10) : 50;
91
93
  const context = values.context ? parseInt(values.context, 10) : 0;
92
- writeJSON(loadTrace(args[0]).search(args[1], { limit, context }));
94
+ const full = values.full ?? false;
95
+ writeJSON(
96
+ loadTrace(args[0]).search(args[1], { limit, context, full }),
97
+ values,
98
+ );
93
99
  }
94
100
 
95
101
  /** @param {object} values @param {string[]} args - [file] */
96
102
  export async function runToolsCommand(values, args) {
97
- writeJSON(loadTrace(args[0]).toolFrequency());
103
+ writeJSON(loadTrace(args[0]).toolFrequency(), values);
98
104
  }
99
105
 
100
106
  /** @param {object} values @param {string[]} args - [file, name] */
101
107
  export async function runToolCommand(values, args) {
102
- writeJSON(loadTrace(args[0]).tool(args[1]));
108
+ writeJSON(loadTrace(args[0]).tool(args[1]), values);
103
109
  }
104
110
 
105
111
  /** @param {object} values @param {string[]} args - [file] */
106
112
  export async function runErrorsCommand(values, args) {
107
- writeJSON(loadTrace(args[0]).errors());
113
+ writeJSON(loadTrace(args[0]).errors(), values);
108
114
  }
109
115
 
110
116
  /** @param {object} values @param {string[]} args - [file] */
111
117
  export async function runReasoningCommand(values, args) {
112
118
  const from = values.from ? parseInt(values.from, 10) : undefined;
113
119
  const to = values.to ? parseInt(values.to, 10) : undefined;
114
- writeJSON(loadTrace(args[0]).reasoning({ from, to }));
120
+ writeJSON(loadTrace(args[0]).reasoning({ from, to }), values);
115
121
  }
116
122
 
117
123
  /** @param {object} values @param {string[]} args - [file] */
@@ -122,7 +128,26 @@ export async function runTimelineCommand(values, args) {
122
128
 
123
129
  /** @param {object} values @param {string[]} args - [file] */
124
130
  export async function runStatsCommand(values, args) {
125
- writeJSON(loadTrace(args[0]).stats());
131
+ writeJSON(loadTrace(args[0]).stats(), values);
132
+ }
133
+
134
+ /** @param {object} values @param {string[]} args - [file] */
135
+ export async function runInitCommand(values, args) {
136
+ writeJSON(loadTrace(args[0]).init(), values);
137
+ }
138
+
139
+ /** @param {object} values @param {string[]} args - [file, index] */
140
+ export async function runTurnCommand(values, args) {
141
+ writeJSON(loadTrace(args[0]).turn(parseInt(args[1], 10)), values);
142
+ }
143
+
144
+ /** @param {object} values @param {string[]} args - [file] */
145
+ export async function runFilterCommand(values, args) {
146
+ const opts = {};
147
+ if (values.role) opts.role = values.role;
148
+ if (values.tool) opts.toolName = values.tool;
149
+ if (values.error) opts.isError = true;
150
+ writeJSON(loadTrace(args[0]).filter(opts), values);
126
151
  }
127
152
 
128
153
  // --- Shared helpers ---
@@ -151,7 +176,14 @@ function loadTrace(file) {
151
176
  return createTraceQuery(collector.toJSON());
152
177
  }
153
178
 
154
- /** @param {object} data */
155
- function writeJSON(data) {
156
- process.stdout.write(JSON.stringify(data, null, 2) + "\n");
179
+ /**
180
+ * Write JSON output to stdout. By default strips `thinking.signature`
181
+ * base64 blobs from the payload so they don't dominate terminal output;
182
+ * pass `--signatures` (surfaced as `values.signatures`) to keep them.
183
+ * @param {*} data
184
+ * @param {object} [values]
185
+ */
186
+ function writeJSON(data, values = {}) {
187
+ const output = values.signatures ? data : stripSignatures(data);
188
+ process.stdout.write(JSON.stringify(output, null, 2) + "\n");
157
189
  }