@forwardimpact/libeval 0.1.64 → 0.1.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fit-trace.js +121 -31
- package/package.json +1 -1
- package/src/commands/trace.js +245 -51
- package/src/trace-multi.js +101 -0
- package/src/trace-query.js +206 -137
- package/src/trace-render.js +211 -0
- package/src/trace-usage.js +249 -0
package/bin/fit-trace.js
CHANGED
|
@@ -28,10 +28,25 @@ import {
|
|
|
28
28
|
runTurnCommand,
|
|
29
29
|
runFilterCommand,
|
|
30
30
|
runSplitCommand,
|
|
31
|
+
runToolCallsCommand,
|
|
32
|
+
runCommandsCommand,
|
|
33
|
+
runPathsCommand,
|
|
34
|
+
runCompareCommand,
|
|
31
35
|
} from "../src/commands/trace.js";
|
|
32
36
|
import { runAssertCommand } from "../src/commands/assert.js";
|
|
33
37
|
import { runByDiscussionCommand } from "../src/commands/by-discussion.js";
|
|
34
38
|
|
|
39
|
+
// Cross-trace verbs take one or more trace files via repeated `--file`
|
|
40
|
+
// (libcli's named-slot `dispatch()` has no variadic positional). A value with
|
|
41
|
+
// glob metacharacters is expanded by the handler via `runtime.fsSync.globSync`.
|
|
42
|
+
const fileOption = () => ({
|
|
43
|
+
file: {
|
|
44
|
+
type: "string",
|
|
45
|
+
multiple: true,
|
|
46
|
+
description: "Trace file (repeat or pass a quoted glob for several)",
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
|
|
35
50
|
const definition = {
|
|
36
51
|
name: "fit-trace",
|
|
37
52
|
description:
|
|
@@ -99,17 +114,17 @@ const definition = {
|
|
|
99
114
|
},
|
|
100
115
|
{
|
|
101
116
|
name: "overview",
|
|
102
|
-
args: [
|
|
103
|
-
argsUsage: "<file>",
|
|
117
|
+
args: [],
|
|
104
118
|
handler: runOverviewCommand,
|
|
105
119
|
description: "Metadata, summary, turn count, tool frequency",
|
|
120
|
+
options: fileOption(),
|
|
106
121
|
},
|
|
107
122
|
{
|
|
108
123
|
name: "count",
|
|
109
|
-
args: [
|
|
110
|
-
argsUsage: "<file>",
|
|
124
|
+
args: [],
|
|
111
125
|
handler: runCountCommand,
|
|
112
126
|
description: "Number of turns",
|
|
127
|
+
options: fileOption(),
|
|
113
128
|
},
|
|
114
129
|
{
|
|
115
130
|
name: "batch",
|
|
@@ -120,17 +135,23 @@ const definition = {
|
|
|
120
135
|
},
|
|
121
136
|
{
|
|
122
137
|
name: "head",
|
|
123
|
-
args: [
|
|
124
|
-
argsUsage: "<file> [N]",
|
|
138
|
+
args: [],
|
|
125
139
|
handler: runHeadCommand,
|
|
126
|
-
description: "First N turns (default 10)",
|
|
140
|
+
description: "First N turns (default 10; set with --lines)",
|
|
141
|
+
options: {
|
|
142
|
+
...fileOption(),
|
|
143
|
+
lines: { type: "string", description: "Number of turns (default: 10)" },
|
|
144
|
+
},
|
|
127
145
|
},
|
|
128
146
|
{
|
|
129
147
|
name: "tail",
|
|
130
|
-
args: [
|
|
131
|
-
argsUsage: "<file> [N]",
|
|
148
|
+
args: [],
|
|
132
149
|
handler: runTailCommand,
|
|
133
|
-
description: "Last N turns (default 10)",
|
|
150
|
+
description: "Last N turns (default 10; set with --lines)",
|
|
151
|
+
options: {
|
|
152
|
+
...fileOption(),
|
|
153
|
+
lines: { type: "string", description: "Number of turns (default: 10)" },
|
|
154
|
+
},
|
|
134
155
|
},
|
|
135
156
|
{
|
|
136
157
|
name: "search",
|
|
@@ -155,49 +176,105 @@ const definition = {
|
|
|
155
176
|
},
|
|
156
177
|
{
|
|
157
178
|
name: "tools",
|
|
158
|
-
args: [
|
|
159
|
-
argsUsage: "<file>",
|
|
179
|
+
args: [],
|
|
160
180
|
handler: runToolsCommand,
|
|
161
|
-
description:
|
|
181
|
+
description:
|
|
182
|
+
"Tool usage frequency (descending). See also `tool` (turns for one tool) and `tool-calls` (paired use+result records)",
|
|
183
|
+
options: fileOption(),
|
|
162
184
|
},
|
|
163
185
|
{
|
|
164
186
|
name: "tool",
|
|
165
187
|
args: ["file", "name"],
|
|
166
188
|
argsUsage: "<file> <name>",
|
|
167
189
|
handler: runToolCommand,
|
|
168
|
-
description:
|
|
190
|
+
description:
|
|
191
|
+
"All turns involving a specific tool. See also `tools` (frequency) and `tool-calls` (paired use+result records)",
|
|
192
|
+
},
|
|
193
|
+
{
|
|
194
|
+
name: "tool-calls",
|
|
195
|
+
args: [],
|
|
196
|
+
handler: runToolCallsCommand,
|
|
197
|
+
description:
|
|
198
|
+
"One record per tool_use block, each paired with its tool_result by toolUseId (orphans emit result:null). See also `tool` and `tools`",
|
|
199
|
+
options: fileOption(),
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
name: "commands",
|
|
203
|
+
args: [],
|
|
204
|
+
handler: runCommandsCommand,
|
|
205
|
+
description:
|
|
206
|
+
"One record per Bash tool_use block, carrying the command text",
|
|
207
|
+
options: {
|
|
208
|
+
...fileOption(),
|
|
209
|
+
match: {
|
|
210
|
+
type: "string",
|
|
211
|
+
description: "Filter to commands whose text matches this regex",
|
|
212
|
+
},
|
|
213
|
+
},
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
name: "paths",
|
|
217
|
+
args: [],
|
|
218
|
+
handler: runPathsCommand,
|
|
219
|
+
description:
|
|
220
|
+
"Distinct Read/Edit/Write file_path arguments, frequency-sorted",
|
|
221
|
+
options: {
|
|
222
|
+
...fileOption(),
|
|
223
|
+
prefix: {
|
|
224
|
+
type: "string",
|
|
225
|
+
description: "Filter to paths beginning with this prefix",
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
name: "compare",
|
|
231
|
+
args: ["file-a", "file-b"],
|
|
232
|
+
argsUsage: "<file-a> <file-b>",
|
|
233
|
+
handler: runCompareCommand,
|
|
234
|
+
description:
|
|
235
|
+
"Side-by-side comparison of two traces: turns, tools, paths, cost, and per-tool delta",
|
|
169
236
|
},
|
|
170
237
|
{
|
|
171
238
|
name: "errors",
|
|
172
|
-
args: [
|
|
173
|
-
argsUsage: "<file>",
|
|
239
|
+
args: [],
|
|
174
240
|
handler: runErrorsCommand,
|
|
175
241
|
description: "Tool results with isError=true",
|
|
242
|
+
options: fileOption(),
|
|
176
243
|
},
|
|
177
244
|
{
|
|
178
245
|
name: "reasoning",
|
|
179
|
-
args: [
|
|
180
|
-
argsUsage: "<file>",
|
|
246
|
+
args: [],
|
|
181
247
|
handler: runReasoningCommand,
|
|
182
248
|
description: "Agent reasoning text only",
|
|
183
249
|
options: {
|
|
250
|
+
...fileOption(),
|
|
184
251
|
from: { type: "string", description: "Start at turn index" },
|
|
185
252
|
to: { type: "string", description: "Stop before turn index" },
|
|
186
253
|
},
|
|
187
254
|
},
|
|
188
255
|
{
|
|
189
256
|
name: "timeline",
|
|
190
|
-
args: [
|
|
191
|
-
argsUsage: "<file>",
|
|
257
|
+
args: [],
|
|
192
258
|
handler: runTimelineCommand,
|
|
193
259
|
description: "Compact one-line-per-turn overview",
|
|
260
|
+
options: fileOption(),
|
|
194
261
|
},
|
|
195
262
|
{
|
|
196
263
|
name: "stats",
|
|
197
|
-
args: [
|
|
198
|
-
argsUsage: "<file>",
|
|
264
|
+
args: [],
|
|
199
265
|
handler: runStatsCommand,
|
|
200
266
|
description: "Token usage and cost breakdown",
|
|
267
|
+
options: {
|
|
268
|
+
...fileOption(),
|
|
269
|
+
"by-tool": {
|
|
270
|
+
type: "boolean",
|
|
271
|
+
description: "Per-tool token attribution and cost share",
|
|
272
|
+
},
|
|
273
|
+
summary: {
|
|
274
|
+
type: "boolean",
|
|
275
|
+
description: "Totals only (suppress the per-turn array)",
|
|
276
|
+
},
|
|
277
|
+
},
|
|
201
278
|
},
|
|
202
279
|
{
|
|
203
280
|
name: "cost",
|
|
@@ -216,10 +293,10 @@ const definition = {
|
|
|
216
293
|
},
|
|
217
294
|
{
|
|
218
295
|
name: "init",
|
|
219
|
-
args: [
|
|
220
|
-
argsUsage: "<file>",
|
|
296
|
+
args: [],
|
|
221
297
|
handler: runInitCommand,
|
|
222
298
|
description: "Full system/init event",
|
|
299
|
+
options: fileOption(),
|
|
223
300
|
},
|
|
224
301
|
{
|
|
225
302
|
name: "turn",
|
|
@@ -244,11 +321,11 @@ const definition = {
|
|
|
244
321
|
},
|
|
245
322
|
{
|
|
246
323
|
name: "filter",
|
|
247
|
-
args: [
|
|
248
|
-
argsUsage: "<file>",
|
|
324
|
+
args: [],
|
|
249
325
|
handler: runFilterCommand,
|
|
250
326
|
description: "Filter turns by role, tool, or error status",
|
|
251
327
|
options: {
|
|
328
|
+
...fileOption(),
|
|
252
329
|
role: {
|
|
253
330
|
type: "string",
|
|
254
331
|
description: "Turn role (system, user, assistant, tool_result)",
|
|
@@ -327,7 +404,15 @@ const definition = {
|
|
|
327
404
|
globalOptions: {
|
|
328
405
|
help: { type: "boolean", short: "h", description: "Show this help" },
|
|
329
406
|
version: { type: "boolean", description: "Show version" },
|
|
330
|
-
json: {
|
|
407
|
+
json: {
|
|
408
|
+
type: "boolean",
|
|
409
|
+
description: "Output help as JSON (use --format json for command output)",
|
|
410
|
+
},
|
|
411
|
+
format: {
|
|
412
|
+
type: "string",
|
|
413
|
+
default: "text",
|
|
414
|
+
description: "Command output format: text (default) or json",
|
|
415
|
+
},
|
|
331
416
|
signatures: {
|
|
332
417
|
type: "boolean",
|
|
333
418
|
description: "Include thinking.signature blobs in output",
|
|
@@ -339,14 +424,19 @@ const definition = {
|
|
|
339
424
|
"fit-trace find 27401632821 release-engineer",
|
|
340
425
|
"fit-trace download 24497273755",
|
|
341
426
|
"fit-trace split structured.json --mode=facilitate",
|
|
342
|
-
"fit-trace overview structured.json",
|
|
343
|
-
"fit-trace
|
|
344
|
-
"fit-trace
|
|
427
|
+
"fit-trace overview --file structured.json",
|
|
428
|
+
"fit-trace overview --file structured.json --format json",
|
|
429
|
+
"fit-trace timeline --file structured.json",
|
|
430
|
+
"fit-trace stats --file structured.json --by-tool",
|
|
345
431
|
"fit-trace cost trace.ndjson",
|
|
346
432
|
"fit-trace cost trace.ndjson --markdown",
|
|
347
433
|
"fit-trace tool structured.json Conclude",
|
|
434
|
+
"fit-trace tool-calls --file structured.json",
|
|
435
|
+
"fit-trace commands --file structured.json --match '^git'",
|
|
436
|
+
"fit-trace paths --file 'traces/*.ndjson' --prefix /app",
|
|
437
|
+
"fit-trace compare trace-a.ndjson trace-b.ndjson",
|
|
348
438
|
"fit-trace search structured.json 'error|fail' --context 1",
|
|
349
|
-
"fit-trace filter structured.json --tool Bash --error",
|
|
439
|
+
"fit-trace filter --file structured.json --tool Bash --error",
|
|
350
440
|
"fit-trace turn structured.json 3",
|
|
351
441
|
"fit-trace assert has-heading --grep '^## Problem' spec.md",
|
|
352
442
|
"fit-trace assert no-leak --not --grep 'password' output.log",
|