@forwardimpact/libeval 0.1.63 → 0.1.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -201
- package/bin/fit-trace.js +166 -31
- package/package.json +1 -1
- package/src/benchmark/judge.js +16 -1
- package/src/benchmark/result.js +12 -0
- package/src/benchmark/runner.js +44 -25
- package/src/commands/callback.js +11 -5
- package/src/commands/trace.js +333 -53
- package/src/cost.js +79 -0
- package/src/index.js +2 -0
- package/src/redaction.js +65 -6
- package/src/trace-collector.js +58 -2
- package/src/trace-github.js +175 -3
- package/src/trace-multi.js +101 -0
- package/src/trace-query.js +294 -45
- package/src/trace-render.js +211 -0
- package/src/trace-usage.js +249 -0
package/src/trace-query.js
CHANGED
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ZERO_USAGE,
|
|
3
|
+
bucketUsageByTool,
|
|
4
|
+
carriedPerTurn,
|
|
5
|
+
computeDivergence,
|
|
6
|
+
isPreChangeDoc,
|
|
7
|
+
perMessageUsage,
|
|
8
|
+
reconcileBucketsToTotals,
|
|
9
|
+
} from "./trace-usage.js";
|
|
10
|
+
|
|
1
11
|
/**
|
|
2
12
|
* Query engine for structured trace documents produced by TraceCollector.
|
|
3
13
|
*
|
|
@@ -33,6 +43,11 @@ export class TraceQuery {
|
|
|
33
43
|
metadata: this.metadata,
|
|
34
44
|
summary: this.summary,
|
|
35
45
|
turnCount: this.turns.length,
|
|
46
|
+
resultEventTurns: this.summary.numTurns ?? null,
|
|
47
|
+
turnPopulations: {
|
|
48
|
+
turnCount: "rendered-trace-turns",
|
|
49
|
+
resultEventTurns: "result-event-turns",
|
|
50
|
+
},
|
|
36
51
|
tools: this.toolFrequency(),
|
|
37
52
|
taskPrompt,
|
|
38
53
|
};
|
|
@@ -277,59 +292,216 @@ export class TraceQuery {
|
|
|
277
292
|
}
|
|
278
293
|
|
|
279
294
|
/**
|
|
280
|
-
* Token usage and cost breakdown per
|
|
295
|
+
* Token usage and cost breakdown, accounted once per API message, plus
|
|
296
|
+
* totals that name their population.
|
|
297
|
+
*
|
|
298
|
+
* A structured document collected before this change (version < 1.2.0)
|
|
299
|
+
* carries no message identity, so it reports its carried last-wins summary
|
|
300
|
+
* labeled as such — corrected figures come from re-running the NDJSON source.
|
|
281
301
|
*
|
|
282
|
-
*
|
|
283
|
-
*
|
|
284
|
-
*
|
|
285
|
-
*
|
|
286
|
-
* in-flight)
|
|
302
|
+
* Otherwise: when the trace carries result events, totals are the SDK's
|
|
303
|
+
* accumulated result-event sums (authoritative); the per-message sums are
|
|
304
|
+
* compared against them and any divergence on input/cacheRead/cacheCreation
|
|
305
|
+
* is surfaced, never silently absorbed. A trace with no result event
|
|
306
|
+
* (truncated or in-flight) falls back to the per-message sums, with output
|
|
307
|
+
* flagged as a streaming-snapshot lower bound and cost/duration/turns
|
|
308
|
+
* reported as unavailable rather than a silent 0.
|
|
287
309
|
* @returns {object}
|
|
288
310
|
*/
|
|
289
311
|
stats() {
|
|
290
|
-
|
|
291
|
-
|
|
312
|
+
if (isPreChangeDoc(this.trace.version)) {
|
|
313
|
+
return this.#carriedDocumentStats();
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
const { perMessage, totals: perMessageTotals } = perMessageUsage(
|
|
317
|
+
this.turns,
|
|
318
|
+
);
|
|
319
|
+
const re = this.summary.tokenUsage;
|
|
320
|
+
|
|
321
|
+
if (re) {
|
|
322
|
+
return {
|
|
323
|
+
totals: {
|
|
324
|
+
inputTokens: re.inputTokens ?? 0,
|
|
325
|
+
outputTokens: re.outputTokens ?? 0,
|
|
326
|
+
cacheReadInputTokens: re.cacheReadInputTokens ?? 0,
|
|
327
|
+
cacheCreationInputTokens: re.cacheCreationInputTokens ?? 0,
|
|
328
|
+
totalCostUsd: this.summary.totalCostUsd ?? 0,
|
|
329
|
+
durationMs: this.summary.durationMs ?? 0,
|
|
330
|
+
durationLabel: "cumulative invocation time",
|
|
331
|
+
resultEventTurns: this.summary.numTurns ?? 0,
|
|
332
|
+
population: "result-event-sum",
|
|
333
|
+
resultEventsPresent: true,
|
|
334
|
+
},
|
|
335
|
+
perTurn: perMessage,
|
|
336
|
+
modelUsage: this.summary.modelUsage ?? null,
|
|
337
|
+
divergence: computeDivergence(perMessageTotals, re),
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
return {
|
|
342
|
+
totals: {
|
|
343
|
+
...perMessageTotals,
|
|
344
|
+
outputIsStreamingSnapshot: true,
|
|
345
|
+
totalCostUsd: null,
|
|
346
|
+
durationMs: null,
|
|
347
|
+
resultEventTurns: null,
|
|
348
|
+
population: "per-message-fallback",
|
|
349
|
+
resultEventsPresent: false,
|
|
350
|
+
},
|
|
351
|
+
perTurn: perMessage,
|
|
352
|
+
modelUsage: this.summary.modelUsage ?? null,
|
|
353
|
+
divergence: null,
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Stats for a pre-change structured document: report the carried last-wins
|
|
359
|
+
* summary and per-stream-event breakdown, each labeled, without claiming
|
|
360
|
+
* result-event parity (the document lacks the message identity it needs).
|
|
361
|
+
* @returns {object}
|
|
362
|
+
*/
|
|
363
|
+
#carriedDocumentStats() {
|
|
364
|
+
const re = this.summary.tokenUsage ?? ZERO_USAGE;
|
|
292
365
|
return {
|
|
293
366
|
totals: {
|
|
294
|
-
|
|
367
|
+
inputTokens: re.inputTokens ?? 0,
|
|
368
|
+
outputTokens: re.outputTokens ?? 0,
|
|
369
|
+
cacheReadInputTokens: re.cacheReadInputTokens ?? 0,
|
|
370
|
+
cacheCreationInputTokens: re.cacheCreationInputTokens ?? 0,
|
|
295
371
|
totalCostUsd: this.summary.totalCostUsd ?? 0,
|
|
296
372
|
durationMs: this.summary.durationMs ?? 0,
|
|
373
|
+
population: "carried-document-summary",
|
|
297
374
|
},
|
|
298
|
-
perTurn,
|
|
375
|
+
perTurn: carriedPerTurn(this.turns),
|
|
376
|
+
modelUsage: this.summary.modelUsage ?? null,
|
|
377
|
+
divergence: null,
|
|
299
378
|
};
|
|
300
379
|
}
|
|
301
|
-
}
|
|
302
380
|
|
|
303
|
-
/**
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
381
|
+
/**
|
|
382
|
+
* One record per `tool_use` block, each paired with its `tool_result`
|
|
383
|
+
* (joined by `toolUseId`) or `result: null` for orphaned calls.
|
|
384
|
+
* @returns {Array<{turnIndex: number, name: string, toolUseId: string, input: object, result: {content: *, isError: boolean}|null}>}
|
|
385
|
+
*/
|
|
386
|
+
toolCalls() {
|
|
387
|
+
const blocks = collectToolUseBlocks(this.turns);
|
|
388
|
+
const results = new Map();
|
|
389
|
+
for (const turn of this.turns) {
|
|
390
|
+
if (turn.role === "tool_result" && turn.toolUseId) {
|
|
391
|
+
results.set(turn.toolUseId, {
|
|
392
|
+
content: turn.content ?? null,
|
|
393
|
+
isError: turn.isError ?? false,
|
|
394
|
+
});
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
return [...blocks.entries()].map(([toolUseId, b]) => ({
|
|
398
|
+
turnIndex: b.turnIndex,
|
|
399
|
+
name: b.name,
|
|
400
|
+
toolUseId,
|
|
401
|
+
input: b.input,
|
|
402
|
+
result: results.get(toolUseId) ?? null,
|
|
403
|
+
}));
|
|
404
|
+
}
|
|
316
405
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
406
|
+
/**
|
|
407
|
+
* One record per `Bash` `tool_use` block, carrying its command text.
|
|
408
|
+
* @param {string} [re] - Optional regex source tested against `input.command`.
|
|
409
|
+
* @returns {Array<{turnIndex: number, toolUseId: string, command: string}>}
|
|
410
|
+
*/
|
|
411
|
+
commands(re) {
|
|
412
|
+
const filter = re === undefined ? null : new RegExp(re);
|
|
413
|
+
const out = [];
|
|
414
|
+
for (const [toolUseId, b] of collectToolUseBlocks(this.turns, "Bash")) {
|
|
415
|
+
const command = b.input?.command ?? "";
|
|
416
|
+
if (filter && !filter.test(command)) continue;
|
|
417
|
+
out.push({ turnIndex: b.turnIndex, toolUseId, command });
|
|
418
|
+
}
|
|
419
|
+
return out;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Distinct `file_path` arguments across `Read`/`Edit`/`Write` tool calls,
|
|
424
|
+
* frequency-sorted (count desc, path asc tiebreak).
|
|
425
|
+
* @param {string} [prefix] - Optional `startsWith` filter.
|
|
426
|
+
* @returns {Array<{path: string, count: number}>}
|
|
427
|
+
*/
|
|
428
|
+
paths(prefix) {
|
|
429
|
+
return [...collectFilePaths(this.turns).entries()]
|
|
430
|
+
.filter(([path]) => prefix === undefined || path.startsWith(prefix))
|
|
431
|
+
.map(([path, count]) => ({ path, count }))
|
|
432
|
+
.sort((a, b) => b.count - a.count || a.path.localeCompare(b.path));
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
/**
|
|
436
|
+
* Side-by-side comparison of this trace against another peer `TraceQuery`.
|
|
437
|
+
* Identity (case name, participant) comes from the caller — the trace
|
|
438
|
+
* carries no filename.
|
|
439
|
+
* @param {TraceQuery} other
|
|
440
|
+
* @param {{aIdentity: {caseName: string, participant: string|null}, bIdentity: {caseName: string, participant: string|null}}} identities
|
|
441
|
+
* @returns {{a: object, b: object, toolDelta: Array, pathDelta: Array}}
|
|
442
|
+
*/
|
|
443
|
+
compare(other, { aIdentity, bIdentity } = {}) {
|
|
444
|
+
const a = sideSummary(this, aIdentity);
|
|
445
|
+
const b = sideSummary(other, bIdentity);
|
|
446
|
+
|
|
447
|
+
const toolNames = [
|
|
448
|
+
...new Set([...a.toolFreq.keys(), ...b.toolFreq.keys()]),
|
|
449
|
+
];
|
|
450
|
+
const toolDelta = toolNames
|
|
451
|
+
.map((tool) => {
|
|
452
|
+
const av = a.toolFreq.get(tool) ?? 0;
|
|
453
|
+
const bv = b.toolFreq.get(tool) ?? 0;
|
|
454
|
+
return { tool, a: av, b: bv, diff: bv - av };
|
|
455
|
+
})
|
|
456
|
+
.sort(
|
|
457
|
+
(x, y) =>
|
|
458
|
+
Math.abs(y.diff) - Math.abs(x.diff) || x.tool.localeCompare(y.tool),
|
|
459
|
+
);
|
|
460
|
+
|
|
461
|
+
const pathNames = [
|
|
462
|
+
...new Set([...a.pathFreq.keys(), ...b.pathFreq.keys()]),
|
|
463
|
+
];
|
|
464
|
+
const pathDelta = pathNames
|
|
465
|
+
.map((path) => {
|
|
466
|
+
const av = a.pathFreq.get(path) ?? 0;
|
|
467
|
+
const bv = b.pathFreq.get(path) ?? 0;
|
|
468
|
+
return { path, a: av, b: bv, diff: bv - av };
|
|
469
|
+
})
|
|
470
|
+
.sort(
|
|
471
|
+
(x, y) =>
|
|
472
|
+
Math.abs(y.diff) - Math.abs(x.diff) || x.path.localeCompare(y.path),
|
|
473
|
+
);
|
|
474
|
+
|
|
475
|
+
return { a: a.surface, b: b.surface, toolDelta, pathDelta };
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
/**
|
|
479
|
+
* Per-tool token attribution: each `tool_use` block gets an equal share of
|
|
480
|
+
* its host turn's usage; assistant turns with no `tool_use` block contribute
|
|
481
|
+
* full usage to the `(no-tool)` bucket. Per-bucket sums are scaled onto
|
|
482
|
+
* `stats().totals` — the authoritative population (result-event sums when the
|
|
483
|
+
* trace carries them, the per-message fallback otherwise) — so the buckets
|
|
484
|
+
* answer "of the reported total, what share did each tool drive" rather than
|
|
485
|
+
* a separate per-turn re-count that drifts from the headline figure. The
|
|
486
|
+
* largest bucket absorbs the rounding residual on each axis, so the input,
|
|
487
|
+
* output, and `costShare` columns each sum to the corresponding `totals`
|
|
488
|
+
* value (and `1.0`) exactly (criterion-6 invariant).
|
|
489
|
+
* @returns {{perTool: Array<{tool: string, turns: number, inputTokens: number, outputTokens: number, costShare: number}>, totals: object}}
|
|
490
|
+
*/
|
|
491
|
+
statsByTool() {
|
|
492
|
+
const { buckets, bucketTurns } = bucketUsageByTool(this.turns);
|
|
493
|
+
const totals = this.stats().totals;
|
|
494
|
+
const perTool = reconcileBucketsToTotals(buckets, bucketTurns, totals);
|
|
495
|
+
return { perTool, totals };
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/**
|
|
499
|
+
* Totals-only view — `stats().totals` with no per-turn array.
|
|
500
|
+
* @returns {{totals: object}}
|
|
501
|
+
*/
|
|
502
|
+
statsSummary() {
|
|
503
|
+
return { totals: this.stats().totals };
|
|
331
504
|
}
|
|
332
|
-
return { perTurn, totals };
|
|
333
505
|
}
|
|
334
506
|
|
|
335
507
|
/**
|
|
@@ -364,6 +536,31 @@ function matchesToolName(turn, toolName) {
|
|
|
364
536
|
);
|
|
365
537
|
}
|
|
366
538
|
|
|
539
|
+
/**
|
|
540
|
+
* Collect every assistant `tool_use` block keyed by `toolUseId`, optionally
|
|
541
|
+
* filtered by tool name. The shared join-key source feeding `toolCalls()`,
|
|
542
|
+
* `commands()`, and `collectToolUseIds()`. Insertion order follows turn order.
|
|
543
|
+
* @param {object[]} turns
|
|
544
|
+
* @param {string} [name] - Optional tool-name filter.
|
|
545
|
+
* @returns {Map<string, {turnIndex: number, name: string, input: object}>}
|
|
546
|
+
*/
|
|
547
|
+
function collectToolUseBlocks(turns, name) {
|
|
548
|
+
const blocks = new Map();
|
|
549
|
+
for (const turn of turns) {
|
|
550
|
+
if (turn.role !== "assistant") continue;
|
|
551
|
+
for (const b of turn.content) {
|
|
552
|
+
if (b.type !== "tool_use" || !b.toolUseId) continue;
|
|
553
|
+
if (name !== undefined && b.name !== name) continue;
|
|
554
|
+
blocks.set(b.toolUseId, {
|
|
555
|
+
turnIndex: turn.index,
|
|
556
|
+
name: b.name,
|
|
557
|
+
input: b.input,
|
|
558
|
+
});
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
return blocks;
|
|
562
|
+
}
|
|
563
|
+
|
|
367
564
|
/**
|
|
368
565
|
* Collect all toolUseIds for a given tool name from assistant turns.
|
|
369
566
|
* @param {object[]} turns
|
|
@@ -371,16 +568,68 @@ function matchesToolName(turn, toolName) {
|
|
|
371
568
|
* @returns {Set<string>}
|
|
372
569
|
*/
|
|
373
570
|
function collectToolUseIds(turns, name) {
|
|
374
|
-
|
|
571
|
+
return new Set(collectToolUseBlocks(turns, name).keys());
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
/** Tool names in `Read`/`Edit`/`Write` that carry a `file_path` argument. */
|
|
575
|
+
const PATH_TOOLS = new Set(["Read", "Edit", "Write"]);
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Frequency map of distinct `file_path` arguments across `Read`/`Edit`/`Write`
|
|
579
|
+
* tool calls, in first-seen insertion order.
|
|
580
|
+
* @param {object[]} turns
|
|
581
|
+
* @returns {Map<string, number>}
|
|
582
|
+
*/
|
|
583
|
+
function collectFilePaths(turns) {
|
|
584
|
+
const counts = new Map();
|
|
375
585
|
for (const turn of turns) {
|
|
376
586
|
if (turn.role !== "assistant") continue;
|
|
377
|
-
for (const
|
|
378
|
-
if (
|
|
379
|
-
|
|
380
|
-
|
|
587
|
+
for (const block of turn.content) {
|
|
588
|
+
if (block.type !== "tool_use" || !PATH_TOOLS.has(block.name)) continue;
|
|
589
|
+
const p = block.input?.file_path;
|
|
590
|
+
if (typeof p !== "string") continue;
|
|
591
|
+
counts.set(p, (counts.get(p) ?? 0) + 1);
|
|
381
592
|
}
|
|
382
593
|
}
|
|
383
|
-
return
|
|
594
|
+
return counts;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* Build the per-side comparison surface plus the tool/path frequency maps
|
|
599
|
+
* the delta computation consumes. Empty traces emit a `(empty)` marker.
|
|
600
|
+
* @param {TraceQuery} query
|
|
601
|
+
* @param {{caseName: string, participant: string|null}} [identity]
|
|
602
|
+
* @returns {{surface: object, toolFreq: Map<string, number>, pathFreq: Map<string, number>}}
|
|
603
|
+
*/
|
|
604
|
+
function sideSummary(
|
|
605
|
+
query,
|
|
606
|
+
identity = { caseName: "(unknown)", participant: null },
|
|
607
|
+
) {
|
|
608
|
+
const toolFreq = new Map(query.toolFrequency().map((t) => [t.tool, t.count]));
|
|
609
|
+
const pathFreq = collectFilePaths(query.turns);
|
|
610
|
+
|
|
611
|
+
const isEmpty = query.turns.length === 0;
|
|
612
|
+
const metadata = {
|
|
613
|
+
caseName: identity.caseName,
|
|
614
|
+
participant: identity.participant ?? null,
|
|
615
|
+
};
|
|
616
|
+
if (isEmpty) metadata.marker = "(empty)";
|
|
617
|
+
|
|
618
|
+
const tools = [...toolFreq.keys()].sort();
|
|
619
|
+
const paths = [...pathFreq.keys()].sort();
|
|
620
|
+
|
|
621
|
+
return {
|
|
622
|
+
surface: {
|
|
623
|
+
metadata,
|
|
624
|
+
turnCount: query.turns.length,
|
|
625
|
+
tools,
|
|
626
|
+
paths,
|
|
627
|
+
pathCount: paths.length,
|
|
628
|
+
cost: query.stats().totals.totalCostUsd,
|
|
629
|
+
},
|
|
630
|
+
toolFreq,
|
|
631
|
+
pathFreq,
|
|
632
|
+
};
|
|
384
633
|
}
|
|
385
634
|
|
|
386
635
|
/**
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text renderers for `fit-trace` query output.
|
|
3
|
+
*
|
|
4
|
+
* One named export per renderable verb. Each renderer accepts the query result
|
|
5
|
+
* plus `{multi, signatures}` and returns a string. `multi` controls
|
|
6
|
+
* source-attribution prefixing (`grep -H` convention); record-per-line
|
|
7
|
+
* renderers prepend `<basename>:`, block renderers emit `# <basename>` headers.
|
|
8
|
+
*
|
|
9
|
+
* Internal module — imported by `commands/trace.js` and tests by relative
|
|
10
|
+
* path, never re-exported from `src/index.js`.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/** Collapse newlines/tabs in a value to a single-line, grep-friendly string. */
|
|
14
|
+
function oneLine(value) {
|
|
15
|
+
const str = typeof value === "string" ? value : JSON.stringify(value ?? null);
|
|
16
|
+
return str.replace(/[\r\n\t]+/g, " ").trim();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Group records by their `source` field (multi-file path), preserving order. */
|
|
20
|
+
function groupBySource(records) {
|
|
21
|
+
const groups = new Map();
|
|
22
|
+
for (const record of records) {
|
|
23
|
+
const key = record.source ?? "";
|
|
24
|
+
if (!groups.has(key)) groups.set(key, []);
|
|
25
|
+
groups.get(key).push(record);
|
|
26
|
+
}
|
|
27
|
+
return groups;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Render record-per-line output, prefixing each line with `<source>:` when
|
|
32
|
+
* multi-file. `lineOf` maps one record to its text line.
|
|
33
|
+
* @param {object[]} records
|
|
34
|
+
* @param {(record: object) => string} lineOf
|
|
35
|
+
* @param {{multi: boolean}} opts
|
|
36
|
+
* @returns {string}
|
|
37
|
+
*/
|
|
38
|
+
function renderLines(records, lineOf, { multi }) {
|
|
39
|
+
return records
|
|
40
|
+
.map((r) => (multi && r.source ? `${r.source}:${lineOf(r)}` : lineOf(r)))
|
|
41
|
+
.join("\n");
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Render a block per source. `blockOf` maps one record to a multi-line string;
|
|
46
|
+
* multi-file output separates groups with `# <source>` headers.
|
|
47
|
+
* @param {object[]} records
|
|
48
|
+
* @param {(record: object) => string} blockOf
|
|
49
|
+
* @param {{multi: boolean}} opts
|
|
50
|
+
* @returns {string}
|
|
51
|
+
*/
|
|
52
|
+
function renderBlocks(records, blockOf, { multi }) {
|
|
53
|
+
if (!multi) return records.map(blockOf).join("\n");
|
|
54
|
+
const out = [];
|
|
55
|
+
for (const [source, group] of groupBySource(records)) {
|
|
56
|
+
out.push(`# ${source}`);
|
|
57
|
+
out.push(...group.map(blockOf));
|
|
58
|
+
}
|
|
59
|
+
return out.join("\n");
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/** `[turnIdx] <Tool> <toolUseId>` / ` in:` / ` out:` per block. */
|
|
63
|
+
export function renderToolCalls(records, opts = {}) {
|
|
64
|
+
return renderBlocks(
|
|
65
|
+
records,
|
|
66
|
+
(r) => {
|
|
67
|
+
const head = `[${r.turnIndex}] ${r.name} ${r.toolUseId}`;
|
|
68
|
+
const input = ` in: ${oneLine(r.input)}`;
|
|
69
|
+
const out = ` out: ${
|
|
70
|
+
r.result ? oneLine(r.result.content) : "(no result)"
|
|
71
|
+
}`;
|
|
72
|
+
return [head, input, out].join("\n");
|
|
73
|
+
},
|
|
74
|
+
opts,
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** `[turnIdx] <command>` per line, newlines escaped. */
|
|
79
|
+
export function renderCommands(records, opts = {}) {
|
|
80
|
+
return renderLines(
|
|
81
|
+
records,
|
|
82
|
+
(r) => `[${r.turnIndex}] ${oneLine(r.command)}`,
|
|
83
|
+
opts,
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** `<count>\t<path>` frequency-sorted. */
|
|
88
|
+
export function renderPaths(records, opts = {}) {
|
|
89
|
+
return renderLines(records, (r) => `${r.count}\t${r.path}`, opts);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Metadata header, per-row metrics, then Tool and Path delta tables. */
|
|
93
|
+
export function renderCompare(result) {
|
|
94
|
+
const { a, b, toolDelta, pathDelta } = result;
|
|
95
|
+
const part = (p) => (p == null ? "(none)" : p);
|
|
96
|
+
const lines = [];
|
|
97
|
+
lines.push(
|
|
98
|
+
`A: ${a.metadata.caseName} / ${part(a.metadata.participant)}${
|
|
99
|
+
a.metadata.marker ? ` ${a.metadata.marker}` : ""
|
|
100
|
+
}`,
|
|
101
|
+
);
|
|
102
|
+
lines.push(
|
|
103
|
+
`B: ${b.metadata.caseName} / ${part(b.metadata.participant)}${
|
|
104
|
+
b.metadata.marker ? ` ${b.metadata.marker}` : ""
|
|
105
|
+
}`,
|
|
106
|
+
);
|
|
107
|
+
lines.push("");
|
|
108
|
+
lines.push(`turns | ${a.turnCount} | ${b.turnCount}`);
|
|
109
|
+
lines.push(`tools | ${a.tools.length} | ${b.tools.length}`);
|
|
110
|
+
lines.push(`paths | ${a.pathCount} | ${b.pathCount}`);
|
|
111
|
+
lines.push(`cost | ${a.cost} | ${b.cost}`);
|
|
112
|
+
lines.push("");
|
|
113
|
+
lines.push("Tool | A | B | Δ");
|
|
114
|
+
for (const d of toolDelta) {
|
|
115
|
+
lines.push(`${d.tool} | ${d.a} | ${d.b} | ${d.diff}`);
|
|
116
|
+
}
|
|
117
|
+
lines.push("");
|
|
118
|
+
lines.push("Path | A | B | Δ");
|
|
119
|
+
for (const d of pathDelta) {
|
|
120
|
+
lines.push(`${d.path} | ${d.a} | ${d.b} | ${d.diff}`);
|
|
121
|
+
}
|
|
122
|
+
return lines.join("\n");
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** `Tool | Turns | In | Out | Share` sorted Share desc. */
|
|
126
|
+
export function renderStatsByTool(result) {
|
|
127
|
+
const lines = ["Tool | Turns | In | Out | Share"];
|
|
128
|
+
for (const b of result.perTool) {
|
|
129
|
+
lines.push(
|
|
130
|
+
`${b.tool} | ${b.turns} | ${Math.round(b.inputTokens)} | ${Math.round(
|
|
131
|
+
b.outputTokens,
|
|
132
|
+
)} | ${b.costShare.toFixed(4)}`,
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
return lines.join("\n");
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/** Totals block only. */
|
|
139
|
+
export function renderStatsSummary(result) {
|
|
140
|
+
const t = result.totals;
|
|
141
|
+
return [
|
|
142
|
+
`inputTokens: ${t.inputTokens}`,
|
|
143
|
+
`outputTokens: ${t.outputTokens}`,
|
|
144
|
+
`cacheReadInputTokens: ${t.cacheReadInputTokens}`,
|
|
145
|
+
`cacheCreationInputTokens: ${t.cacheCreationInputTokens}`,
|
|
146
|
+
`totalCostUsd: ${t.totalCostUsd}`,
|
|
147
|
+
`durationMs: ${t.durationMs}`,
|
|
148
|
+
].join("\n");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/** `[turnIdx] <prefix>: <excerpt>` per match. */
|
|
152
|
+
export function renderSearch(records, opts = {}) {
|
|
153
|
+
const lines = [];
|
|
154
|
+
for (const hit of records) {
|
|
155
|
+
const idx = hit.turn?.index;
|
|
156
|
+
const prefix = multiPrefix(hit, opts);
|
|
157
|
+
for (const match of hit.matches ?? []) {
|
|
158
|
+
lines.push(`${prefix}[${idx}] ${oneLine(match)}`);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
return lines.join("\n");
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/** Source prefix for a multi-file record (search/default), or "". */
|
|
165
|
+
function multiPrefix(record, { multi }) {
|
|
166
|
+
return multi && record.source ? `${record.source}:` : "";
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Default renderer for every other renderable verb: one record per block,
|
|
171
|
+
* fields rendered as `key: value` lines (no JSON braces or quotes, so the
|
|
172
|
+
* default output is grep/awk-friendly and does not parse as JSON). Nested
|
|
173
|
+
* values are collapsed to a single grep-friendly line. Multi-file output
|
|
174
|
+
* separates source groups with `# <source>` headers (`renderBlocks`
|
|
175
|
+
* convention).
|
|
176
|
+
* @param {object[]|object} result
|
|
177
|
+
* @param {{multi: boolean}} opts
|
|
178
|
+
* @returns {string}
|
|
179
|
+
*/
|
|
180
|
+
export function renderDefault(result, opts = {}) {
|
|
181
|
+
const records = Array.isArray(result) ? result : [result];
|
|
182
|
+
return renderBlocks(records, (r) => recordBlock(stripSource(r)), opts);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Render one record as `key: value` lines. Scalars render verbatim; objects
|
|
187
|
+
* and arrays collapse to a single line via `oneLine`. A non-object record
|
|
188
|
+
* (string/number) renders as its own single line.
|
|
189
|
+
* @param {*} record
|
|
190
|
+
* @returns {string}
|
|
191
|
+
*/
|
|
192
|
+
function recordBlock(record) {
|
|
193
|
+
if (record == null || typeof record !== "object" || Array.isArray(record)) {
|
|
194
|
+
return oneLine(record);
|
|
195
|
+
}
|
|
196
|
+
return Object.entries(record)
|
|
197
|
+
.map(([key, value]) => {
|
|
198
|
+
const scalar = value == null || typeof value !== "object";
|
|
199
|
+
return `${key}: ${scalar ? String(value) : oneLine(value)}`;
|
|
200
|
+
})
|
|
201
|
+
.join("\n");
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/** Drop the orchestrator-injected `source` field before textifying. */
|
|
205
|
+
function stripSource(record) {
|
|
206
|
+
if (record == null || typeof record !== "object" || Array.isArray(record)) {
|
|
207
|
+
return record;
|
|
208
|
+
}
|
|
209
|
+
const { source, ...rest } = record;
|
|
210
|
+
return rest;
|
|
211
|
+
}
|