@forwardimpact/libeval 0.1.63 → 0.1.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -201
- package/bin/fit-trace.js +166 -31
- package/package.json +1 -1
- package/src/benchmark/judge.js +16 -1
- package/src/benchmark/result.js +12 -0
- package/src/benchmark/runner.js +44 -25
- package/src/commands/callback.js +11 -5
- package/src/commands/trace.js +333 -53
- package/src/cost.js +79 -0
- package/src/index.js +2 -0
- package/src/redaction.js +65 -6
- package/src/trace-collector.js +58 -2
- package/src/trace-github.js +175 -3
- package/src/trace-multi.js +101 -0
- package/src/trace-query.js +294 -45
- package/src/trace-render.js +211 -0
- package/src/trace-usage.js +249 -0
package/src/commands/trace.js
CHANGED
|
@@ -1,9 +1,20 @@
|
|
|
1
|
-
import { join, dirname } from "node:path";
|
|
1
|
+
import { join, dirname, basename } from "node:path";
|
|
2
2
|
import { isoTimestamp } from "@forwardimpact/libutil";
|
|
3
|
-
import { createTraceCollector } from "@forwardimpact/libeval";
|
|
3
|
+
import { createTraceCollector, sumTraceCost } from "@forwardimpact/libeval";
|
|
4
4
|
import { createTraceQuery } from "../trace-query.js";
|
|
5
5
|
import { createTraceGitHub } from "../trace-github.js";
|
|
6
6
|
import { stripSignatures } from "../signature-filter.js";
|
|
7
|
+
import { runOver, aggregate, compareTwo } from "../trace-multi.js";
|
|
8
|
+
import {
|
|
9
|
+
renderToolCalls,
|
|
10
|
+
renderCommands,
|
|
11
|
+
renderPaths,
|
|
12
|
+
renderCompare,
|
|
13
|
+
renderStatsByTool,
|
|
14
|
+
renderStatsSummary,
|
|
15
|
+
renderSearch,
|
|
16
|
+
renderDefault,
|
|
17
|
+
} from "../trace-render.js";
|
|
7
18
|
|
|
8
19
|
// Every handler receives a libcli `InvocationContext`:
|
|
9
20
|
// ctx.options — parsed flag values (`cli.parse().values`)
|
|
@@ -12,6 +23,58 @@ import { stripSignatures } from "../signature-filter.js";
|
|
|
12
23
|
// Handlers read/write the filesystem and stdout exclusively through
|
|
13
24
|
// `ctx.deps.runtime` and return `{ ok: true }` on success.
|
|
14
25
|
|
|
26
|
+
/** Characters whose presence in a `--file` value marks it as a glob. */
|
|
27
|
+
const GLOB_CHARS = /[*?[\]{}]/;
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Resolve the cross-trace `--file` option (`ctx.options.file`) into a sorted
|
|
31
|
+
* flat list of file paths. A literal path passes through; a value carrying
|
|
32
|
+
* glob metacharacters expands via `runtime.fsSync.globSync`. The literal-path
|
|
33
|
+
* fast path means the common single-file and shell-pre-expanded cases never
|
|
34
|
+
* touch `globSync`.
|
|
35
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
36
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
37
|
+
* @returns {string[]}
|
|
38
|
+
*/
|
|
39
|
+
function resolveFiles(runtime, ctx) {
|
|
40
|
+
const raw = ctx.options.file;
|
|
41
|
+
const values = raw === undefined ? [] : Array.isArray(raw) ? raw : [raw];
|
|
42
|
+
const out = [];
|
|
43
|
+
for (const value of values) {
|
|
44
|
+
if (GLOB_CHARS.test(value)) {
|
|
45
|
+
out.push(...runtime.fsSync.globSync(value));
|
|
46
|
+
} else {
|
|
47
|
+
out.push(value);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return out.sort();
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Emit a query result for a cross-trace verb: under `--format json` write the
|
|
55
|
+
* JSON payload (single-object verbs unwrap when single-file so the envelope
|
|
56
|
+
* deep-equals today's output); otherwise render text to stdout. Source
|
|
57
|
+
* attribution is the renderer's job, gated by `multi`.
|
|
58
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
59
|
+
* @param {object|object[]} result
|
|
60
|
+
* @param {Function} renderer
|
|
61
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
62
|
+
* @param {boolean} multi
|
|
63
|
+
* @param {boolean} [unwrap=false] - Single-object verb wrapped in a one-element array.
|
|
64
|
+
*/
|
|
65
|
+
function emit(runtime, result, renderer, ctx, multi, unwrap = false) {
|
|
66
|
+
if (ctx.options.format === "json") {
|
|
67
|
+
const payload = unwrap && !multi ? result[0] : result;
|
|
68
|
+
writeJSON(runtime, payload, ctx.options);
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
const text = renderer(result, {
|
|
72
|
+
multi,
|
|
73
|
+
signatures: !!ctx.options.signatures,
|
|
74
|
+
});
|
|
75
|
+
runtime.proc.stdout.write(text + "\n");
|
|
76
|
+
}
|
|
77
|
+
|
|
15
78
|
// --- GitHub commands ---
|
|
16
79
|
|
|
17
80
|
/**
|
|
@@ -26,11 +89,33 @@ export async function runRunsCommand(ctx) {
|
|
|
26
89
|
runtime,
|
|
27
90
|
});
|
|
28
91
|
const lookback = ctx.options.lookback ?? "7d";
|
|
29
|
-
const runs = await gh.listRuns({
|
|
92
|
+
const runs = await gh.listRuns({
|
|
93
|
+
pattern: ctx.args.pattern,
|
|
94
|
+
lookback,
|
|
95
|
+
participant: ctx.options.participant,
|
|
96
|
+
});
|
|
30
97
|
writeJSON(runtime, runs, ctx.options);
|
|
31
98
|
return { ok: true };
|
|
32
99
|
}
|
|
33
100
|
|
|
101
|
+
/**
|
|
102
|
+
* Resolve a participant's lane trace for a known run id in one keyed lookup.
|
|
103
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
104
|
+
*/
|
|
105
|
+
export async function runFindCommand(ctx) {
|
|
106
|
+
const { runtime, config } = ctx.deps;
|
|
107
|
+
const gh = await createTraceGitHub({
|
|
108
|
+
token: config.ghToken(),
|
|
109
|
+
repo: ctx.options.repo,
|
|
110
|
+
runtime,
|
|
111
|
+
});
|
|
112
|
+
const result = await gh.findByKey(ctx.args["run-id"], ctx.args.participant, {
|
|
113
|
+
dir: ctx.options.dir,
|
|
114
|
+
});
|
|
115
|
+
writeJSON(runtime, result, ctx.options);
|
|
116
|
+
return { ok: true };
|
|
117
|
+
}
|
|
118
|
+
|
|
34
119
|
/**
|
|
35
120
|
* Download a trace artifact and auto-convert to structured JSON.
|
|
36
121
|
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
@@ -72,49 +157,78 @@ export async function runDownloadCommand(ctx) {
|
|
|
72
157
|
|
|
73
158
|
// --- Query commands ---
|
|
74
159
|
|
|
160
|
+
/**
|
|
161
|
+
* Build the injected loader the orchestrator uses (wires the runtime IO seam).
|
|
162
|
+
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
163
|
+
* @returns {(file: string) => import("../trace-query.js").TraceQuery}
|
|
164
|
+
*/
|
|
165
|
+
function loader(runtime) {
|
|
166
|
+
return (file) => loadTrace(runtime, file);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/** No-files error envelope for a cross-trace verb. */
|
|
170
|
+
function noFiles(verb) {
|
|
171
|
+
return { ok: false, code: 1, error: `${verb}: no files (use --file)` };
|
|
172
|
+
}
|
|
173
|
+
|
|
75
174
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
76
175
|
export async function runOverviewCommand(ctx) {
|
|
77
176
|
const { runtime } = ctx.deps;
|
|
78
|
-
|
|
177
|
+
const files = resolveFiles(runtime, ctx);
|
|
178
|
+
if (files.length === 0) return noFiles("overview");
|
|
179
|
+
const result = runOver(files, (tq) => [tq.overview()], loader(runtime));
|
|
180
|
+
emit(runtime, result, renderDefault, ctx, files.length > 1, true);
|
|
79
181
|
return { ok: true };
|
|
80
182
|
}
|
|
81
183
|
|
|
82
184
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
83
185
|
export async function runCountCommand(ctx) {
|
|
84
186
|
const { runtime } = ctx.deps;
|
|
85
|
-
runtime
|
|
86
|
-
|
|
187
|
+
const files = resolveFiles(runtime, ctx);
|
|
188
|
+
if (files.length === 0) return noFiles("count");
|
|
189
|
+
const multi = files.length > 1;
|
|
190
|
+
const result = runOver(
|
|
191
|
+
files,
|
|
192
|
+
(tq) => [{ count: tq.count() }],
|
|
193
|
+
loader(runtime),
|
|
87
194
|
);
|
|
195
|
+
for (const r of result) {
|
|
196
|
+
const prefix = multi && r.source ? `${r.source}:` : "";
|
|
197
|
+
runtime.proc.stdout.write(`${prefix}${r.count}\n`);
|
|
198
|
+
}
|
|
88
199
|
return { ok: true };
|
|
89
200
|
}
|
|
90
201
|
|
|
91
202
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
92
203
|
export async function runBatchCommand(ctx) {
|
|
93
204
|
const { runtime } = ctx.deps;
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
parseInt(ctx.args.from, 10),
|
|
98
|
-
parseInt(ctx.args.to, 10),
|
|
99
|
-
),
|
|
100
|
-
ctx.options,
|
|
205
|
+
const result = loadTrace(runtime, ctx.args.file).batch(
|
|
206
|
+
parseInt(ctx.args.from, 10),
|
|
207
|
+
parseInt(ctx.args.to, 10),
|
|
101
208
|
);
|
|
209
|
+
emit(runtime, result, renderDefault, ctx, false);
|
|
102
210
|
return { ok: true };
|
|
103
211
|
}
|
|
104
212
|
|
|
105
213
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
106
214
|
export async function runHeadCommand(ctx) {
|
|
107
215
|
const { runtime } = ctx.deps;
|
|
108
|
-
const
|
|
109
|
-
|
|
216
|
+
const files = resolveFiles(runtime, ctx);
|
|
217
|
+
if (files.length === 0) return noFiles("head");
|
|
218
|
+
const n = ctx.options.lines ? parseInt(ctx.options.lines, 10) : 10;
|
|
219
|
+
const result = runOver(files, (tq) => tq.head(n), loader(runtime));
|
|
220
|
+
emit(runtime, result, renderDefault, ctx, files.length > 1);
|
|
110
221
|
return { ok: true };
|
|
111
222
|
}
|
|
112
223
|
|
|
113
224
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
114
225
|
export async function runTailCommand(ctx) {
|
|
115
226
|
const { runtime } = ctx.deps;
|
|
116
|
-
const
|
|
117
|
-
|
|
227
|
+
const files = resolveFiles(runtime, ctx);
|
|
228
|
+
if (files.length === 0) return noFiles("tail");
|
|
229
|
+
const n = ctx.options.lines ? parseInt(ctx.options.lines, 10) : 10;
|
|
230
|
+
const result = runOver(files, (tq) => tq.tail(n), loader(runtime));
|
|
231
|
+
emit(runtime, result, renderDefault, ctx, files.length > 1);
|
|
118
232
|
return { ok: true };
|
|
119
233
|
}
|
|
120
234
|
|
|
@@ -124,105 +238,252 @@ export async function runSearchCommand(ctx) {
|
|
|
124
238
|
const limit = ctx.options.limit ? parseInt(ctx.options.limit, 10) : 50;
|
|
125
239
|
const context = ctx.options.context ? parseInt(ctx.options.context, 10) : 0;
|
|
126
240
|
const full = ctx.options.full ?? false;
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
}),
|
|
134
|
-
ctx.options,
|
|
135
|
-
);
|
|
241
|
+
const result = loadTrace(runtime, ctx.args.file).search(ctx.args.pattern, {
|
|
242
|
+
limit,
|
|
243
|
+
context,
|
|
244
|
+
full,
|
|
245
|
+
});
|
|
246
|
+
emit(runtime, result, renderSearch, ctx, false);
|
|
136
247
|
return { ok: true };
|
|
137
248
|
}
|
|
138
249
|
|
|
139
250
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
140
251
|
export async function runToolsCommand(ctx) {
|
|
141
252
|
const { runtime } = ctx.deps;
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
253
|
+
const files = resolveFiles(runtime, ctx);
|
|
254
|
+
if (files.length === 0) return noFiles("tools");
|
|
255
|
+
const result = aggregate(
|
|
256
|
+
files,
|
|
257
|
+
(tq) => tq.toolFrequency(),
|
|
258
|
+
(r) => r.tool,
|
|
259
|
+
loader(runtime),
|
|
146
260
|
);
|
|
261
|
+
emit(runtime, result, renderDefault, ctx, files.length > 1);
|
|
147
262
|
return { ok: true };
|
|
148
263
|
}
|
|
149
264
|
|
|
150
265
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
151
266
|
export async function runToolCommand(ctx) {
|
|
152
267
|
const { runtime } = ctx.deps;
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
loadTrace(runtime, ctx.args.file).tool(ctx.args.name),
|
|
156
|
-
ctx.options,
|
|
157
|
-
);
|
|
268
|
+
const result = loadTrace(runtime, ctx.args.file).tool(ctx.args.name);
|
|
269
|
+
emit(runtime, result, renderDefault, ctx, false);
|
|
158
270
|
return { ok: true };
|
|
159
271
|
}
|
|
160
272
|
|
|
161
273
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
162
274
|
export async function runErrorsCommand(ctx) {
|
|
163
275
|
const { runtime } = ctx.deps;
|
|
164
|
-
|
|
276
|
+
const files = resolveFiles(runtime, ctx);
|
|
277
|
+
if (files.length === 0) return noFiles("errors");
|
|
278
|
+
const result = runOver(files, (tq) => tq.errors(), loader(runtime));
|
|
279
|
+
emit(runtime, result, renderDefault, ctx, files.length > 1);
|
|
165
280
|
return { ok: true };
|
|
166
281
|
}
|
|
167
282
|
|
|
168
283
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
169
284
|
export async function runReasoningCommand(ctx) {
|
|
170
285
|
const { runtime } = ctx.deps;
|
|
286
|
+
const files = resolveFiles(runtime, ctx);
|
|
287
|
+
if (files.length === 0) return noFiles("reasoning");
|
|
171
288
|
const from = ctx.options.from ? parseInt(ctx.options.from, 10) : undefined;
|
|
172
289
|
const to = ctx.options.to ? parseInt(ctx.options.to, 10) : undefined;
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
290
|
+
const result = runOver(
|
|
291
|
+
files,
|
|
292
|
+
(tq) => tq.reasoning({ from, to }),
|
|
293
|
+
loader(runtime),
|
|
177
294
|
);
|
|
295
|
+
emit(runtime, result, renderDefault, ctx, files.length > 1);
|
|
178
296
|
return { ok: true };
|
|
179
297
|
}
|
|
180
298
|
|
|
181
299
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
182
300
|
export async function runTimelineCommand(ctx) {
|
|
183
301
|
const { runtime } = ctx.deps;
|
|
184
|
-
const
|
|
185
|
-
|
|
302
|
+
const files = resolveFiles(runtime, ctx);
|
|
303
|
+
if (files.length === 0) return noFiles("timeline");
|
|
304
|
+
const multi = files.length > 1;
|
|
305
|
+
for (const file of files) {
|
|
306
|
+
if (multi) runtime.proc.stdout.write(`# ${basename(file)}\n`);
|
|
307
|
+
runtime.proc.stdout.write(
|
|
308
|
+
loadTrace(runtime, file).timeline().join("\n") + "\n",
|
|
309
|
+
);
|
|
310
|
+
}
|
|
186
311
|
return { ok: true };
|
|
187
312
|
}
|
|
188
313
|
|
|
314
|
+
/** Select the per-file `stats` query for the active flag combination. */
|
|
315
|
+
function statsQuery(ctx) {
|
|
316
|
+
if (ctx.options.summary) return (tq) => tq.statsSummary();
|
|
317
|
+
if (ctx.options["by-tool"]) return (tq) => tq.statsByTool();
|
|
318
|
+
return (tq) => tq.stats();
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/** Select the `stats` text renderer for the active flag combination. */
|
|
322
|
+
function statsRenderer(ctx) {
|
|
323
|
+
if (ctx.options.summary) return renderStatsSummary;
|
|
324
|
+
if (ctx.options["by-tool"]) return renderStatsByTool;
|
|
325
|
+
return (result) => renderDefault(result);
|
|
326
|
+
}
|
|
327
|
+
|
|
189
328
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
190
329
|
export async function runStatsCommand(ctx) {
|
|
191
330
|
const { runtime } = ctx.deps;
|
|
192
|
-
|
|
331
|
+
const files = resolveFiles(runtime, ctx);
|
|
332
|
+
if (files.length === 0) return noFiles("stats");
|
|
333
|
+
const multi = files.length > 1;
|
|
334
|
+
const query = statsQuery(ctx);
|
|
335
|
+
// stats results are per-file objects; one block per file (no cross-file sum),
|
|
336
|
+
// tagged with source only when multi-file.
|
|
337
|
+
const results = files.map((file) => ({
|
|
338
|
+
result: query(loadTrace(runtime, file)),
|
|
339
|
+
source: multi ? basename(file) : undefined,
|
|
340
|
+
}));
|
|
341
|
+
|
|
342
|
+
if (ctx.options.format === "json") {
|
|
343
|
+
const payloads = results.map((r) =>
|
|
344
|
+
multi ? { ...r.result, source: r.source } : r.result,
|
|
345
|
+
);
|
|
346
|
+
writeJSON(runtime, multi ? payloads : payloads[0], ctx.options);
|
|
347
|
+
return { ok: true };
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
const render = statsRenderer(ctx);
|
|
351
|
+
const blocks = results.map((r) =>
|
|
352
|
+
multi ? `# ${r.source}\n${render(r.result)}` : render(r.result),
|
|
353
|
+
);
|
|
354
|
+
runtime.proc.stdout.write(blocks.join("\n") + "\n");
|
|
355
|
+
return { ok: true };
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Total run cost across every participant (agent, supervisor, judge, and any
|
|
360
|
+
* named profile), summed from each `result` event in the trace and attributed
|
|
361
|
+
* per source. The combined trace from a supervised, facilitated, or discuss
|
|
362
|
+
* session already interleaves all participants, so one file yields the whole
|
|
363
|
+
* run's spend. Default output is `{totalCostUsd, bySource}` JSON; `--markdown`
|
|
364
|
+
* emits a GitHub-flavored block to redirect into `$GITHUB_STEP_SUMMARY`.
|
|
365
|
+
*
|
|
366
|
+
* @param {import("@forwardimpact/libcli").InvocationContext} ctx
|
|
367
|
+
*/
|
|
368
|
+
export async function runCostCommand(ctx) {
|
|
369
|
+
const { runtime } = ctx.deps;
|
|
370
|
+
const cost = computeTraceCost(
|
|
371
|
+
runtime.fsSync.readFileSync(ctx.args.file, "utf8"),
|
|
372
|
+
);
|
|
373
|
+
if (ctx.options.markdown) {
|
|
374
|
+
runtime.proc.stdout.write(renderCostMarkdown(cost));
|
|
375
|
+
} else {
|
|
376
|
+
writeJSON(runtime, cost, ctx.options);
|
|
377
|
+
}
|
|
193
378
|
return { ok: true };
|
|
194
379
|
}
|
|
195
380
|
|
|
381
|
+
/**
|
|
382
|
+
* Render a cost summary as a GitHub-flavored markdown block for a CI step
|
|
383
|
+
* summary: a headline total plus a per-participant table (descending).
|
|
384
|
+
* @param {{totalCostUsd: number, bySource: Record<string, number>}} cost
|
|
385
|
+
* @returns {string}
|
|
386
|
+
*/
|
|
387
|
+
function renderCostMarkdown(cost) {
|
|
388
|
+
const lines = [
|
|
389
|
+
`### 💰 Run cost: $${cost.totalCostUsd.toFixed(4)}`,
|
|
390
|
+
"",
|
|
391
|
+
"Summed across every participant (agent, supervisor, judge, named profiles).",
|
|
392
|
+
];
|
|
393
|
+
const sources = Object.entries(cost.bySource).sort((a, b) => b[1] - a[1]);
|
|
394
|
+
if (sources.length > 0) {
|
|
395
|
+
lines.push("", "| Participant | Cost (USD) |", "| --- | --- |");
|
|
396
|
+
for (const [source, usd] of sources) {
|
|
397
|
+
lines.push(`| ${source} | ${usd.toFixed(4)} |`);
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
return lines.join("\n") + "\n";
|
|
401
|
+
}
|
|
402
|
+
|
|
196
403
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
197
404
|
export async function runInitCommand(ctx) {
|
|
198
405
|
const { runtime } = ctx.deps;
|
|
199
|
-
|
|
406
|
+
const files = resolveFiles(runtime, ctx);
|
|
407
|
+
if (files.length === 0) return noFiles("init");
|
|
408
|
+
const result = runOver(files, (tq) => [tq.init()], loader(runtime));
|
|
409
|
+
emit(runtime, result, renderDefault, ctx, files.length > 1, true);
|
|
200
410
|
return { ok: true };
|
|
201
411
|
}
|
|
202
412
|
|
|
203
413
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
204
414
|
export async function runTurnCommand(ctx) {
|
|
205
415
|
const { runtime } = ctx.deps;
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
loadTrace(runtime, ctx.args.file).turn(parseInt(ctx.args.index, 10)),
|
|
209
|
-
ctx.options,
|
|
416
|
+
const result = loadTrace(runtime, ctx.args.file).turn(
|
|
417
|
+
parseInt(ctx.args.index, 10),
|
|
210
418
|
);
|
|
419
|
+
emit(runtime, result, renderDefault, ctx, false);
|
|
211
420
|
return { ok: true };
|
|
212
421
|
}
|
|
213
422
|
|
|
214
423
|
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
215
424
|
export async function runFilterCommand(ctx) {
|
|
216
425
|
const { runtime } = ctx.deps;
|
|
426
|
+
const files = resolveFiles(runtime, ctx);
|
|
427
|
+
if (files.length === 0) return noFiles("filter");
|
|
217
428
|
const opts = {};
|
|
218
429
|
if (ctx.options.role) opts.role = ctx.options.role;
|
|
219
430
|
if (ctx.options.tool) opts.toolName = ctx.options.tool;
|
|
220
431
|
if (ctx.options.error) opts.isError = true;
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
432
|
+
const result = runOver(files, (tq) => tq.filter(opts), loader(runtime));
|
|
433
|
+
emit(runtime, result, renderDefault, ctx, files.length > 1);
|
|
434
|
+
return { ok: true };
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// --- Aggregator verbs (tool-calls, commands, paths, compare) ---
|
|
438
|
+
|
|
439
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
440
|
+
export async function runToolCallsCommand(ctx) {
|
|
441
|
+
const { runtime } = ctx.deps;
|
|
442
|
+
const files = resolveFiles(runtime, ctx);
|
|
443
|
+
if (files.length === 0) return noFiles("tool-calls");
|
|
444
|
+
const result = runOver(files, (tq) => tq.toolCalls(), loader(runtime));
|
|
445
|
+
emit(runtime, result, renderToolCalls, ctx, files.length > 1);
|
|
446
|
+
return { ok: true };
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
450
|
+
export async function runCommandsCommand(ctx) {
|
|
451
|
+
const { runtime } = ctx.deps;
|
|
452
|
+
const files = resolveFiles(runtime, ctx);
|
|
453
|
+
if (files.length === 0) return noFiles("commands");
|
|
454
|
+
const result = runOver(
|
|
455
|
+
files,
|
|
456
|
+
(tq) => tq.commands(ctx.options.match),
|
|
457
|
+
loader(runtime),
|
|
458
|
+
);
|
|
459
|
+
emit(runtime, result, renderCommands, ctx, files.length > 1);
|
|
460
|
+
return { ok: true };
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
464
|
+
export async function runPathsCommand(ctx) {
|
|
465
|
+
const { runtime } = ctx.deps;
|
|
466
|
+
const files = resolveFiles(runtime, ctx);
|
|
467
|
+
if (files.length === 0) return noFiles("paths");
|
|
468
|
+
const result = aggregate(
|
|
469
|
+
files,
|
|
470
|
+
(tq) => tq.paths(ctx.options.prefix),
|
|
471
|
+
(r) => r.path,
|
|
472
|
+
loader(runtime),
|
|
225
473
|
);
|
|
474
|
+
emit(runtime, result, renderPaths, ctx, files.length > 1);
|
|
475
|
+
return { ok: true };
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
/** @param {import("@forwardimpact/libcli").InvocationContext} ctx */
|
|
479
|
+
export async function runCompareCommand(ctx) {
|
|
480
|
+
const { runtime } = ctx.deps;
|
|
481
|
+
const result = compareTwo(
|
|
482
|
+
ctx.args["file-a"],
|
|
483
|
+
ctx.args["file-b"],
|
|
484
|
+
loader(runtime),
|
|
485
|
+
);
|
|
486
|
+
emit(runtime, result, renderCompare, ctx, false);
|
|
226
487
|
return { ok: true };
|
|
227
488
|
}
|
|
228
489
|
|
|
@@ -309,13 +570,32 @@ function parseBuckets(content) {
|
|
|
309
570
|
|
|
310
571
|
// --- Shared helpers ---
|
|
311
572
|
|
|
573
|
+
/**
|
|
574
|
+
* Compute total + per-source cost from raw file content. A structured JSON
|
|
575
|
+
* trace (from `fit-trace download`) carries its total in `summary.totalCostUsd`
|
|
576
|
+
* but no per-source split; raw NDJSON is summed via `sumTraceCost`.
|
|
577
|
+
* @param {string} content - Raw file content (structured JSON or NDJSON).
|
|
578
|
+
* @returns {{totalCostUsd: number, bySource: Record<string, number>}}
|
|
579
|
+
*/
|
|
580
|
+
function computeTraceCost(content) {
|
|
581
|
+
try {
|
|
582
|
+
const parsed = JSON.parse(content);
|
|
583
|
+
if (parsed && typeof parsed.summary?.totalCostUsd === "number") {
|
|
584
|
+
return { totalCostUsd: parsed.summary.totalCostUsd, bySource: {} };
|
|
585
|
+
}
|
|
586
|
+
} catch {
|
|
587
|
+
// Not a single JSON object — treat as NDJSON below.
|
|
588
|
+
}
|
|
589
|
+
return sumTraceCost(content.split("\n"));
|
|
590
|
+
}
|
|
591
|
+
|
|
312
592
|
/**
|
|
313
593
|
* Load a trace file. Supports structured JSON and raw NDJSON.
|
|
314
594
|
* @param {import("@forwardimpact/libutil/runtime").Runtime} runtime
|
|
315
595
|
* @param {string} file
|
|
316
596
|
* @returns {import("../trace-query.js").TraceQuery}
|
|
317
597
|
*/
|
|
318
|
-
function loadTrace(runtime, file) {
|
|
598
|
+
export function loadTrace(runtime, file) {
|
|
319
599
|
const content = runtime.fsSync.readFileSync(file, "utf8");
|
|
320
600
|
|
|
321
601
|
try {
|
package/src/cost.js
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost aggregation over Claude Code NDJSON traces — the single source of
|
|
3
|
+
* truth for "how much did this run cost, across every participant?".
|
|
4
|
+
*
|
|
5
|
+
* The SDK reports the cumulative session cost on each `result` event as
|
|
6
|
+
* `total_cost_usd`. Supervised, facilitated, and discuss sessions interleave
|
|
7
|
+
* one runner's events with another's in a single combined trace, wrapping
|
|
8
|
+
* each in a `{source, seq, event}` envelope; a plain `run` trace carries bare
|
|
9
|
+
* events with no envelope. A judge runs as its own session in a separate
|
|
10
|
+
* trace. In every case the rule is the same: sum the `total_cost_usd` of each
|
|
11
|
+
* `result` event, and keep a per-source breakdown so callers can attribute
|
|
12
|
+
* spend to the agent, supervisor, judge, or any named participant.
|
|
13
|
+
*
|
|
14
|
+
* This mirrors `TraceCollector.handleResult`, which accumulates the same
|
|
15
|
+
* figure for its summary footer — kept as a standalone pure helper so the
|
|
16
|
+
* benchmark runner, the callback command, and `fit-trace cost` share one
|
|
17
|
+
* implementation rather than each re-deriving it (and drifting).
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/** Bucket key for bare (un-enveloped) `run`-mode events: a lone agent session. */
|
|
21
|
+
export const UNSOURCED = "agent";
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Sum `total_cost_usd` across every `result` event in an NDJSON trace.
|
|
25
|
+
*
|
|
26
|
+
* @param {Iterable<string>} lines - NDJSON lines (e.g. `content.split("\n")`).
|
|
27
|
+
* Blank and malformed lines are skipped.
|
|
28
|
+
* @returns {{totalCostUsd: number, bySource: Record<string, number>}}
|
|
29
|
+
* `totalCostUsd` is the sum across all participants; `bySource` maps each
|
|
30
|
+
* envelope `source` (or {@link UNSOURCED} for bare events) to its subtotal.
|
|
31
|
+
*/
|
|
32
|
+
export function sumTraceCost(lines) {
|
|
33
|
+
let totalCostUsd = 0;
|
|
34
|
+
/** @type {Record<string, number>} */
|
|
35
|
+
const bySource = {};
|
|
36
|
+
|
|
37
|
+
for (const line of lines) {
|
|
38
|
+
const parsed = parseCostLine(line);
|
|
39
|
+
if (!parsed) continue;
|
|
40
|
+
const { source, cost } = parsed;
|
|
41
|
+
totalCostUsd += cost;
|
|
42
|
+
bySource[source] = (bySource[source] ?? 0) + cost;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return { totalCostUsd, bySource };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Parse a single NDJSON line and return its `result`-event cost contribution,
|
|
50
|
+
* or null when the line is blank, malformed, not a result event, or carries
|
|
51
|
+
* no numeric `total_cost_usd`.
|
|
52
|
+
*
|
|
53
|
+
* @param {string} line
|
|
54
|
+
* @returns {{source: string, cost: number} | null}
|
|
55
|
+
*/
|
|
56
|
+
function parseCostLine(line) {
|
|
57
|
+
const trimmed = line.trim();
|
|
58
|
+
if (!trimmed) return null;
|
|
59
|
+
|
|
60
|
+
let event;
|
|
61
|
+
try {
|
|
62
|
+
event = JSON.parse(trimmed);
|
|
63
|
+
} catch {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Unwrap the combined-trace envelope {source, seq, event}; bare events
|
|
68
|
+
// (plain `run` traces) have a `type` and no `source`.
|
|
69
|
+
let source = UNSOURCED;
|
|
70
|
+
if (event.event && !event.type && typeof event.source === "string") {
|
|
71
|
+
source = event.source;
|
|
72
|
+
event = event.event;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (event.type !== "result") return null;
|
|
76
|
+
if (typeof event.total_cost_usd !== "number") return null;
|
|
77
|
+
|
|
78
|
+
return { source, cost: event.total_cost_usd };
|
|
79
|
+
}
|
package/src/index.js
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
export { TraceCollector, createTraceCollector } from "./trace-collector.js";
|
|
2
2
|
export { TraceQuery, createTraceQuery } from "./trace-query.js";
|
|
3
|
+
export { sumTraceCost, UNSOURCED } from "./cost.js";
|
|
3
4
|
export { stripSignatures } from "./signature-filter.js";
|
|
4
5
|
export {
|
|
5
6
|
TraceGitHub,
|
|
6
7
|
createTraceGitHub,
|
|
7
8
|
detectRepoSlug,
|
|
8
9
|
parseGitRemote,
|
|
10
|
+
participantInNames,
|
|
9
11
|
pickTraceArtifact,
|
|
10
12
|
} from "./trace-github.js";
|
|
11
13
|
export { AgentRunner, createAgentRunner } from "./agent-runner.js";
|