@forwardimpact/libeval 0.1.64 → 0.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/fit-trace.js CHANGED
@@ -28,10 +28,25 @@ import {
28
28
  runTurnCommand,
29
29
  runFilterCommand,
30
30
  runSplitCommand,
31
+ runToolCallsCommand,
32
+ runCommandsCommand,
33
+ runPathsCommand,
34
+ runCompareCommand,
31
35
  } from "../src/commands/trace.js";
32
36
  import { runAssertCommand } from "../src/commands/assert.js";
33
37
  import { runByDiscussionCommand } from "../src/commands/by-discussion.js";
34
38
 
39
+ // Cross-trace verbs take one or more trace files via repeated `--file`
40
+ // (libcli's named-slot `dispatch()` has no variadic positional). A value with
41
+ // glob metacharacters is expanded by the handler via `runtime.fsSync.globSync`.
42
+ const fileOption = () => ({
43
+ file: {
44
+ type: "string",
45
+ multiple: true,
46
+ description: "Trace file (repeat or pass a quoted glob for several)",
47
+ },
48
+ });
49
+
35
50
  const definition = {
36
51
  name: "fit-trace",
37
52
  description:
@@ -99,17 +114,17 @@ const definition = {
99
114
  },
100
115
  {
101
116
  name: "overview",
102
- args: ["file"],
103
- argsUsage: "<file>",
117
+ args: [],
104
118
  handler: runOverviewCommand,
105
119
  description: "Metadata, summary, turn count, tool frequency",
120
+ options: fileOption(),
106
121
  },
107
122
  {
108
123
  name: "count",
109
- args: ["file"],
110
- argsUsage: "<file>",
124
+ args: [],
111
125
  handler: runCountCommand,
112
126
  description: "Number of turns",
127
+ options: fileOption(),
113
128
  },
114
129
  {
115
130
  name: "batch",
@@ -120,17 +135,23 @@ const definition = {
120
135
  },
121
136
  {
122
137
  name: "head",
123
- args: ["file", "n"],
124
- argsUsage: "<file> [N]",
138
+ args: [],
125
139
  handler: runHeadCommand,
126
- description: "First N turns (default 10)",
140
+ description: "First N turns (default 10; set with --lines)",
141
+ options: {
142
+ ...fileOption(),
143
+ lines: { type: "string", description: "Number of turns (default: 10)" },
144
+ },
127
145
  },
128
146
  {
129
147
  name: "tail",
130
- args: ["file", "n"],
131
- argsUsage: "<file> [N]",
148
+ args: [],
132
149
  handler: runTailCommand,
133
- description: "Last N turns (default 10)",
150
+ description: "Last N turns (default 10; set with --lines)",
151
+ options: {
152
+ ...fileOption(),
153
+ lines: { type: "string", description: "Number of turns (default: 10)" },
154
+ },
134
155
  },
135
156
  {
136
157
  name: "search",
@@ -155,49 +176,105 @@ const definition = {
155
176
  },
156
177
  {
157
178
  name: "tools",
158
- args: ["file"],
159
- argsUsage: "<file>",
179
+ args: [],
160
180
  handler: runToolsCommand,
161
- description: "Tool usage frequency (descending)",
181
+ description:
182
+ "Tool usage frequency (descending). See also `tool` (turns for one tool) and `tool-calls` (paired use+result records)",
183
+ options: fileOption(),
162
184
  },
163
185
  {
164
186
  name: "tool",
165
187
  args: ["file", "name"],
166
188
  argsUsage: "<file> <name>",
167
189
  handler: runToolCommand,
168
- description: "All turns involving a specific tool",
190
+ description:
191
+ "All turns involving a specific tool. See also `tools` (frequency) and `tool-calls` (paired use+result records)",
192
+ },
193
+ {
194
+ name: "tool-calls",
195
+ args: [],
196
+ handler: runToolCallsCommand,
197
+ description:
198
+ "One record per tool_use block, each paired with its tool_result by toolUseId (orphans emit result:null). See also `tool` and `tools`",
199
+ options: fileOption(),
200
+ },
201
+ {
202
+ name: "commands",
203
+ args: [],
204
+ handler: runCommandsCommand,
205
+ description:
206
+ "One record per Bash tool_use block, carrying the command text",
207
+ options: {
208
+ ...fileOption(),
209
+ match: {
210
+ type: "string",
211
+ description: "Filter to commands whose text matches this regex",
212
+ },
213
+ },
214
+ },
215
+ {
216
+ name: "paths",
217
+ args: [],
218
+ handler: runPathsCommand,
219
+ description:
220
+ "Distinct Read/Edit/Write file_path arguments, frequency-sorted",
221
+ options: {
222
+ ...fileOption(),
223
+ prefix: {
224
+ type: "string",
225
+ description: "Filter to paths beginning with this prefix",
226
+ },
227
+ },
228
+ },
229
+ {
230
+ name: "compare",
231
+ args: ["file-a", "file-b"],
232
+ argsUsage: "<file-a> <file-b>",
233
+ handler: runCompareCommand,
234
+ description:
235
+ "Side-by-side comparison of two traces: turns, tools, paths, cost, and per-tool delta",
169
236
  },
170
237
  {
171
238
  name: "errors",
172
- args: ["file"],
173
- argsUsage: "<file>",
239
+ args: [],
174
240
  handler: runErrorsCommand,
175
241
  description: "Tool results with isError=true",
242
+ options: fileOption(),
176
243
  },
177
244
  {
178
245
  name: "reasoning",
179
- args: ["file"],
180
- argsUsage: "<file>",
246
+ args: [],
181
247
  handler: runReasoningCommand,
182
248
  description: "Agent reasoning text only",
183
249
  options: {
250
+ ...fileOption(),
184
251
  from: { type: "string", description: "Start at turn index" },
185
252
  to: { type: "string", description: "Stop before turn index" },
186
253
  },
187
254
  },
188
255
  {
189
256
  name: "timeline",
190
- args: ["file"],
191
- argsUsage: "<file>",
257
+ args: [],
192
258
  handler: runTimelineCommand,
193
259
  description: "Compact one-line-per-turn overview",
260
+ options: fileOption(),
194
261
  },
195
262
  {
196
263
  name: "stats",
197
- args: ["file"],
198
- argsUsage: "<file>",
264
+ args: [],
199
265
  handler: runStatsCommand,
200
266
  description: "Token usage and cost breakdown",
267
+ options: {
268
+ ...fileOption(),
269
+ "by-tool": {
270
+ type: "boolean",
271
+ description: "Per-tool token attribution and cost share",
272
+ },
273
+ summary: {
274
+ type: "boolean",
275
+ description: "Totals only (suppress the per-turn array)",
276
+ },
277
+ },
201
278
  },
202
279
  {
203
280
  name: "cost",
@@ -216,10 +293,10 @@ const definition = {
216
293
  },
217
294
  {
218
295
  name: "init",
219
- args: ["file"],
220
- argsUsage: "<file>",
296
+ args: [],
221
297
  handler: runInitCommand,
222
298
  description: "Full system/init event",
299
+ options: fileOption(),
223
300
  },
224
301
  {
225
302
  name: "turn",
@@ -244,11 +321,11 @@ const definition = {
244
321
  },
245
322
  {
246
323
  name: "filter",
247
- args: ["file"],
248
- argsUsage: "<file>",
324
+ args: [],
249
325
  handler: runFilterCommand,
250
326
  description: "Filter turns by role, tool, or error status",
251
327
  options: {
328
+ ...fileOption(),
252
329
  role: {
253
330
  type: "string",
254
331
  description: "Turn role (system, user, assistant, tool_result)",
@@ -327,7 +404,15 @@ const definition = {
327
404
  globalOptions: {
328
405
  help: { type: "boolean", short: "h", description: "Show this help" },
329
406
  version: { type: "boolean", description: "Show version" },
330
- json: { type: "boolean", description: "Output help as JSON" },
407
+ json: {
408
+ type: "boolean",
409
+ description: "Output help as JSON (use --format json for command output)",
410
+ },
411
+ format: {
412
+ type: "string",
413
+ default: "text",
414
+ description: "Command output format: text (default) or json",
415
+ },
331
416
  signatures: {
332
417
  type: "boolean",
333
418
  description: "Include thinking.signature blobs in output",
@@ -339,14 +424,19 @@ const definition = {
339
424
  "fit-trace find 27401632821 release-engineer",
340
425
  "fit-trace download 24497273755",
341
426
  "fit-trace split structured.json --mode=facilitate",
342
- "fit-trace overview structured.json",
343
- "fit-trace timeline structured.json",
344
- "fit-trace stats structured.json",
427
+ "fit-trace overview --file structured.json",
428
+ "fit-trace overview --file structured.json --format json",
429
+ "fit-trace timeline --file structured.json",
430
+ "fit-trace stats --file structured.json --by-tool",
345
431
  "fit-trace cost trace.ndjson",
346
432
  "fit-trace cost trace.ndjson --markdown",
347
433
  "fit-trace tool structured.json Conclude",
434
+ "fit-trace tool-calls --file structured.json",
435
+ "fit-trace commands --file structured.json --match '^git'",
436
+ "fit-trace paths --file 'traces/*.ndjson' --prefix /app",
437
+ "fit-trace compare trace-a.ndjson trace-b.ndjson",
348
438
  "fit-trace search structured.json 'error|fail' --context 1",
349
- "fit-trace filter structured.json --tool Bash --error",
439
+ "fit-trace filter --file structured.json --tool Bash --error",
350
440
  "fit-trace turn structured.json 3",
351
441
  "fit-trace assert has-heading --grep '^## Problem' spec.md",
352
442
  "fit-trace assert no-leak --not --grep 'password' output.log",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@forwardimpact/libeval",
3
- "version": "0.1.64",
3
+ "version": "0.1.65",
4
4
  "description": "Agent evaluation framework — prove whether agent changes improved outcomes with reproducible evidence.",
5
5
  "keywords": [
6
6
  "eval",