@sebastiantuyu/agest 0.3.3-next.5 → 0.3.3-next.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/tracing.js +71 -17
- package/dist/context.js +13 -0
- package/dist/preview.js +93 -0
- package/dist/reporter.js +3 -0
- package/dist/reports.d.ts +37 -0
- package/dist/reports.js +126 -0
- package/dist/types.d.ts +2 -0
- package/dist/waterfall.d.ts +11 -0
- package/dist/waterfall.js +46 -0
- package/package.json +1 -1
package/dist/adapters/tracing.js
CHANGED
|
@@ -48,6 +48,7 @@ export async function createTracingHandle(baselineMs) {
|
|
|
48
48
|
const endMs = now() - baselineMs;
|
|
49
49
|
const tokens = extractTokensFromLLMOutput(output);
|
|
50
50
|
const providerCost = extractProviderCost(output);
|
|
51
|
+
const cachedInputTokens = extractCachedTokens(output);
|
|
51
52
|
const name = open.name ?? extractModelNameFromOutput(output) ?? "model";
|
|
52
53
|
if (name && name !== "model")
|
|
53
54
|
lastModelName = name;
|
|
@@ -64,6 +65,7 @@ export async function createTracingHandle(baselineMs) {
|
|
|
64
65
|
endMs,
|
|
65
66
|
durationMs: Math.max(0, endMs - open.startMs),
|
|
66
67
|
tokens,
|
|
68
|
+
cachedInputTokens,
|
|
67
69
|
cost: stripCostIfEmpty(cost),
|
|
68
70
|
});
|
|
69
71
|
}
|
|
@@ -82,10 +84,10 @@ export async function createTracingHandle(baselineMs) {
|
|
|
82
84
|
error: err?.message ?? String(err),
|
|
83
85
|
});
|
|
84
86
|
}
|
|
85
|
-
handleToolStart(tool, _input, runId) {
|
|
87
|
+
handleToolStart(tool, _input, runId, _parentRunId, _tags, _metadata, runName) {
|
|
86
88
|
openTools.set(runId, {
|
|
87
89
|
startMs: now() - baselineMs,
|
|
88
|
-
name: extractToolName(tool) ?? "tool",
|
|
90
|
+
name: extractToolName(tool, runName) ?? "tool",
|
|
89
91
|
});
|
|
90
92
|
}
|
|
91
93
|
handleToolEnd(_output, runId) {
|
|
@@ -252,30 +254,82 @@ function extractTokensFromLLMOutput(output) {
|
|
|
252
254
|
return undefined;
|
|
253
255
|
return { input, output: out };
|
|
254
256
|
}
|
|
257
|
+
/** Collect the usage-bearing objects LangChain/OpenRouter may attach to an LLM result. */
|
|
258
|
+
function usageObjects(output) {
|
|
259
|
+
const msg = output?.generations?.[0]?.[0]?.message;
|
|
260
|
+
return [
|
|
261
|
+
output?.llmOutput?.usage,
|
|
262
|
+
output?.llmOutput?.tokenUsage,
|
|
263
|
+
output?.llmOutput?.estimatedTokenUsage,
|
|
264
|
+
output?.llmOutput,
|
|
265
|
+
msg?.usage_metadata,
|
|
266
|
+
msg?.response_metadata?.usage,
|
|
267
|
+
msg?.response_metadata?.tokenUsage,
|
|
268
|
+
msg?.response_metadata?.estimatedTokenUsage,
|
|
269
|
+
msg?.response_metadata,
|
|
270
|
+
msg?.additional_kwargs?.usage,
|
|
271
|
+
].filter((u) => u && typeof u === "object");
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* OpenRouter (with `usage: { include: true }`) reports real USD cost. LangChain
|
|
275
|
+
* surfaces it inconsistently across versions, so scan the known usage objects
|
|
276
|
+
* for a numeric `cost` / `total_cost`.
|
|
277
|
+
*/
|
|
255
278
|
function extractProviderCost(output) {
|
|
256
|
-
const
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
for (const c of candidates) {
|
|
279
|
+
for (const u of usageObjects(output)) {
|
|
280
|
+
const c = (typeof u.cost === "number" ? u.cost : undefined) ??
|
|
281
|
+
(typeof u.total_cost === "number" ? u.total_cost : undefined) ??
|
|
282
|
+
(typeof u.cost_usd === "number" ? u.cost_usd : undefined) ??
|
|
283
|
+
(typeof u.cost_details?.upstream_inference_cost === "number"
|
|
284
|
+
? u.cost_details.upstream_inference_cost
|
|
285
|
+
: undefined);
|
|
264
286
|
if (typeof c === "number" && Number.isFinite(c))
|
|
265
287
|
return c;
|
|
266
288
|
}
|
|
267
289
|
return undefined;
|
|
268
290
|
}
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
291
|
+
/**
|
|
292
|
+
* Cached (prompt-cache hit) input tokens, when the provider reports them.
|
|
293
|
+
* Charged at a fraction of the normal input rate, so surfacing them lets the
|
|
294
|
+
* report explain why provider cost is below the flat-table estimate.
|
|
295
|
+
*/
|
|
296
|
+
function extractCachedTokens(output) {
|
|
297
|
+
for (const u of usageObjects(output)) {
|
|
298
|
+
const cached = u.input_token_details?.cache_read ??
|
|
299
|
+
u.prompt_tokens_details?.cached_tokens ??
|
|
300
|
+
u.cache_read_input_tokens ??
|
|
301
|
+
u.cached_tokens;
|
|
302
|
+
if (typeof cached === "number" && cached > 0)
|
|
303
|
+
return cached;
|
|
276
304
|
}
|
|
277
305
|
return undefined;
|
|
278
306
|
}
|
|
307
|
+
const TOOL_CLASS_NAMES = new Set([
|
|
308
|
+
"DynamicStructuredTool",
|
|
309
|
+
"DynamicTool",
|
|
310
|
+
"StructuredTool",
|
|
311
|
+
"Tool",
|
|
312
|
+
]);
|
|
313
|
+
function extractToolName(tool, runName) {
|
|
314
|
+
// `runName` is the actual tool name LangChain assigns the run (e.g.
|
|
315
|
+
// "search_recipes"); prefer it over the serialized class name.
|
|
316
|
+
if (runName && !TOOL_CLASS_NAMES.has(runName))
|
|
317
|
+
return runName;
|
|
318
|
+
if (tool) {
|
|
319
|
+
if (typeof tool.name === "string" && !TOOL_CLASS_NAMES.has(tool.name))
|
|
320
|
+
return tool.name;
|
|
321
|
+
if (typeof tool.kwargs?.name === "string")
|
|
322
|
+
return tool.kwargs.name;
|
|
323
|
+
if (Array.isArray(tool.id) && tool.id.length > 0) {
|
|
324
|
+
const last = String(tool.id[tool.id.length - 1]);
|
|
325
|
+
if (!TOOL_CLASS_NAMES.has(last))
|
|
326
|
+
return last;
|
|
327
|
+
}
|
|
328
|
+
if (typeof tool.name === "string")
|
|
329
|
+
return tool.name;
|
|
330
|
+
}
|
|
331
|
+
return runName;
|
|
332
|
+
}
|
|
279
333
|
function stripCostIfEmpty(cost) {
|
|
280
334
|
if (cost.source === "unavailable" && cost.totalUsd == null)
|
|
281
335
|
return undefined;
|
package/dist/context.js
CHANGED
|
@@ -4,6 +4,7 @@ import { formatReport, writeReport, writeDiffEntry } from "./reporter";
|
|
|
4
4
|
import { logger, c } from "./logger";
|
|
5
5
|
import { loadConfig } from "./config";
|
|
6
6
|
import { setPricingOverrides } from "./pricing";
|
|
7
|
+
import { renderTerminalWaterfall } from "./waterfall";
|
|
7
8
|
import { PromisePool } from "@supercharge/promise-pool";
|
|
8
9
|
export class SceneBuilder {
|
|
9
10
|
_prompt;
|
|
@@ -129,6 +130,18 @@ export class AgentContext {
|
|
|
129
130
|
const sigColor = sig >= 0.95 ? c.green : sig >= 0.80 ? c.yellow : c.red;
|
|
130
131
|
logger.info(`${indent} ${c.dim("significance:")} ${sigColor(`${(sig * 100).toFixed(1)}%`)} ${c.dim(`(pass rate: ${((result.passRate ?? 0) * 100).toFixed(1)}%)`)}`);
|
|
131
132
|
}
|
|
133
|
+
if (result.events && result.events.length > 0) {
|
|
134
|
+
const costLabel = result.costUsd != null
|
|
135
|
+
? ` ${c.dim("·")} ${c.green(`$${Number(result.costUsd.toFixed(4))}`)}`
|
|
136
|
+
: "";
|
|
137
|
+
const tokLabel = result.tokens
|
|
138
|
+
? ` ${c.dim(`(${result.tokens.input}→${result.tokens.output} tok)`)}`
|
|
139
|
+
: "";
|
|
140
|
+
logger.info(`${indent} ${c.dim("waterfall:")}${tokLabel}${costLabel}`);
|
|
141
|
+
for (const line of renderTerminalWaterfall(result.events, { indent: `${indent} ` })) {
|
|
142
|
+
logger.info(line);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
132
145
|
logger.debug(`${indent} response: ${result.response.text?.slice(0, 120)}`);
|
|
133
146
|
};
|
|
134
147
|
if (hasSuites) {
|
package/dist/preview.js
CHANGED
|
@@ -189,6 +189,91 @@ function renderFailedCases(cases) {
|
|
|
189
189
|
</ul>
|
|
190
190
|
</details>`;
|
|
191
191
|
}
|
|
192
|
+
const WF_MODEL = "#38bdf8";
|
|
193
|
+
const WF_TOOL = "#facc15";
|
|
194
|
+
const WF_ERROR = "#f87171";
|
|
195
|
+
function fmtUsdHtml(n) {
|
|
196
|
+
if (n === 0)
|
|
197
|
+
return "$0";
|
|
198
|
+
return "$" + Number(n.toFixed(4)).toString();
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Chrome-DevTools-style waterfall for a report's per-scene timelines. Bars are
|
|
202
|
+
* absolutely positioned within a track by start_ms / duration_ms. Returns "" if
|
|
203
|
+
* the report carries no timeline data (older reports / non-traced executors).
|
|
204
|
+
*/
|
|
205
|
+
function renderWaterfallHtml(report) {
|
|
206
|
+
const scenes = (report.scenes ?? []).filter((s) => s.timeline && s.timeline.length > 0);
|
|
207
|
+
if (scenes.length === 0)
|
|
208
|
+
return "";
|
|
209
|
+
const sceneBlocks = scenes
|
|
210
|
+
.map((scene) => {
|
|
211
|
+
const events = scene.timeline;
|
|
212
|
+
const t0 = Math.min(...events.map((e) => e.startMs));
|
|
213
|
+
const tEnd = Math.max(...events.map((e) => e.startMs + e.durationMs));
|
|
214
|
+
const span = Math.max(1, tEnd - t0);
|
|
215
|
+
const rows = events
|
|
216
|
+
.map((e) => {
|
|
217
|
+
const left = ((e.startMs - t0) / span) * 100;
|
|
218
|
+
const width = Math.max(0.6, (e.durationMs / span) * 100);
|
|
219
|
+
const bg = e.error ? WF_ERROR : e.kind === "model" ? WF_MODEL : WF_TOOL;
|
|
220
|
+
const icon = e.kind === "model" ? "◆" : "▸";
|
|
221
|
+
const tip = [
|
|
222
|
+
`${e.kind}: ${e.name}`,
|
|
223
|
+
`start ${Math.round(e.startMs)}ms · ${Math.round(e.durationMs)}ms`,
|
|
224
|
+
e.tokens ? `${e.tokens.input}→${e.tokens.output} tok` : "",
|
|
225
|
+
e.cachedInputTokens ? `${e.cachedInputTokens} cached` : "",
|
|
226
|
+
e.costUsd != null ? fmtUsdHtml(e.costUsd) : "",
|
|
227
|
+
e.error ? `error: ${e.error}` : "",
|
|
228
|
+
]
|
|
229
|
+
.filter(Boolean)
|
|
230
|
+
.join(" · ");
|
|
231
|
+
const cost = e.costUsd != null ? fmtUsdHtml(e.costUsd) : "";
|
|
232
|
+
return `
|
|
233
|
+
<div class="flex items-center gap-2 text-[11px] leading-5">
|
|
234
|
+
<span class="w-44 shrink-0 truncate ${e.error ? "text-red-400" : "text-zinc-400"}" title="${escHtml(e.name)}">
|
|
235
|
+
<span style="color:${bg}">${icon}</span> ${escHtml(e.name)}
|
|
236
|
+
</span>
|
|
237
|
+
<div class="relative flex-1 h-3 bg-zinc-800/40 rounded">
|
|
238
|
+
<div class="absolute top-0 h-3 rounded" style="left:${left.toFixed(2)}%;width:${width.toFixed(2)}%;background:${bg}" title="${escHtml(tip)}"></div>
|
|
239
|
+
</div>
|
|
240
|
+
<span class="w-16 shrink-0 text-right text-zinc-500">${Math.round(e.durationMs)}ms</span>
|
|
241
|
+
<span class="w-16 shrink-0 text-right text-zinc-500">${cost}</span>
|
|
242
|
+
</div>`;
|
|
243
|
+
})
|
|
244
|
+
.join("\n");
|
|
245
|
+
const meta = [
|
|
246
|
+
scene.tokens ? `${scene.tokens.input}→${scene.tokens.output} tok` : "",
|
|
247
|
+
scene.costUsd != null ? fmtUsdHtml(scene.costUsd) : "",
|
|
248
|
+
scene.costSource ? scene.costSource : "",
|
|
249
|
+
scene.durationMs != null ? `${Math.round(scene.durationMs)}ms` : "",
|
|
250
|
+
]
|
|
251
|
+
.filter(Boolean)
|
|
252
|
+
.join(" · ");
|
|
253
|
+
return `
|
|
254
|
+
<div>
|
|
255
|
+
<div class="flex items-center justify-between mb-1.5">
|
|
256
|
+
<span class="text-xs text-zinc-300 truncate" title="${escHtml(scene.prompt)}">${escHtml(scene.prompt)}</span>
|
|
257
|
+
<span class="text-[11px] text-zinc-500 shrink-0 ml-3">${escHtml(meta)}</span>
|
|
258
|
+
</div>
|
|
259
|
+
<div class="space-y-1">${rows}</div>
|
|
260
|
+
</div>`;
|
|
261
|
+
})
|
|
262
|
+
.join("\n");
|
|
263
|
+
return `
|
|
264
|
+
<details class="mt-2" open>
|
|
265
|
+
<summary class="text-xs text-sky-400 cursor-pointer hover:text-sky-300 select-none">
|
|
266
|
+
waterfall · ${scenes.length} scene${scenes.length !== 1 ? "s" : ""}
|
|
267
|
+
</summary>
|
|
268
|
+
<div class="mt-3 mb-2 pl-3 border-l border-zinc-800 space-y-5">
|
|
269
|
+
<div class="flex gap-4 text-[10px] text-zinc-500">
|
|
270
|
+
<span><span style="color:${WF_MODEL}">◆</span> model</span>
|
|
271
|
+
<span><span style="color:${WF_TOOL}">▸</span> tool</span>
|
|
272
|
+
</div>
|
|
273
|
+
${sceneBlocks}
|
|
274
|
+
</div>
|
|
275
|
+
</details>`;
|
|
276
|
+
}
|
|
192
277
|
function renderRunRow(entry, idx) {
|
|
193
278
|
const { report, delta, diffLines } = entry;
|
|
194
279
|
const pct = report.successRate * 100;
|
|
@@ -233,6 +318,7 @@ function renderRunRow(entry, idx) {
|
|
|
233
318
|
</div>
|
|
234
319
|
<div class="ml-10 mt-0.5 flex gap-3 flex-wrap">${dimTags}</div>
|
|
235
320
|
${diffHtml}
|
|
321
|
+
<div class="ml-10">${renderWaterfallHtml(report)}</div>
|
|
236
322
|
</div>`;
|
|
237
323
|
}
|
|
238
324
|
// ---------------------------------------------------------------------------
|
|
@@ -990,6 +1076,12 @@ function renderSingleRun(report) {
|
|
|
990
1076
|
<p class="text-zinc-300">${Math.round(report.averageOutputTokensPerCase)}</p>
|
|
991
1077
|
</div>`
|
|
992
1078
|
: ""}
|
|
1079
|
+
${report.totalCostUsd != null
|
|
1080
|
+
? `<div>
|
|
1081
|
+
<span class="text-zinc-500">Total Cost</span>
|
|
1082
|
+
<p class="text-zinc-300">${fmtUsdHtml(report.totalCostUsd)}${report.totalInputTokens != null ? ` <span class="text-zinc-600">· ${report.totalInputTokens}→${report.totalOutputTokens} tok</span>` : ""}</p>
|
|
1083
|
+
</div>`
|
|
1084
|
+
: ""}
|
|
993
1085
|
${report.tools && report.tools.length > 0
|
|
994
1086
|
? `<div>
|
|
995
1087
|
<span class="text-zinc-500">Tools</span>
|
|
@@ -997,6 +1089,7 @@ function renderSingleRun(report) {
|
|
|
997
1089
|
</div>`
|
|
998
1090
|
: ""}
|
|
999
1091
|
</div>
|
|
1092
|
+
${renderWaterfallHtml(report)}
|
|
1000
1093
|
${failedSection}
|
|
1001
1094
|
</div>`;
|
|
1002
1095
|
}
|
package/dist/reporter.js
CHANGED
|
@@ -126,6 +126,9 @@ function renderTimelineEvent(e) {
|
|
|
126
126
|
if (e.tokens) {
|
|
127
127
|
out.push(` tokens: { input: ${e.tokens.input}, output: ${e.tokens.output} }`);
|
|
128
128
|
}
|
|
129
|
+
if (e.cachedInputTokens != null && e.cachedInputTokens > 0) {
|
|
130
|
+
out.push(` cached_input_tokens: ${e.cachedInputTokens}`);
|
|
131
|
+
}
|
|
129
132
|
if (e.cost?.totalUsd != null) {
|
|
130
133
|
out.push(` cost_usd: ${formatUsd(e.cost.totalUsd)}`);
|
|
131
134
|
out.push(` cost_source: ${e.cost.source}`);
|
package/dist/reports.d.ts
CHANGED
|
@@ -9,6 +9,32 @@ export interface ParsedSuiteResult {
|
|
|
9
9
|
response?: string;
|
|
10
10
|
}>;
|
|
11
11
|
}
|
|
12
|
+
export interface ParsedTimelineEvent {
|
|
13
|
+
kind: "model" | "tool";
|
|
14
|
+
name: string;
|
|
15
|
+
startMs: number;
|
|
16
|
+
durationMs: number;
|
|
17
|
+
tokens?: {
|
|
18
|
+
input: number;
|
|
19
|
+
output: number;
|
|
20
|
+
};
|
|
21
|
+
cachedInputTokens?: number;
|
|
22
|
+
costUsd?: number;
|
|
23
|
+
costSource?: string;
|
|
24
|
+
runIndex?: number;
|
|
25
|
+
error?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface ParsedScene {
|
|
28
|
+
prompt: string;
|
|
29
|
+
durationMs?: number;
|
|
30
|
+
tokens?: {
|
|
31
|
+
input: number;
|
|
32
|
+
output: number;
|
|
33
|
+
};
|
|
34
|
+
costUsd?: number;
|
|
35
|
+
costSource?: string;
|
|
36
|
+
timeline?: ParsedTimelineEvent[];
|
|
37
|
+
}
|
|
12
38
|
export interface ParsedReport {
|
|
13
39
|
name?: string;
|
|
14
40
|
systemPromptHash?: string;
|
|
@@ -28,6 +54,10 @@ export interface ParsedReport {
|
|
|
28
54
|
timestamp: string;
|
|
29
55
|
averageInputTokensPerCase?: number;
|
|
30
56
|
averageOutputTokensPerCase?: number;
|
|
57
|
+
totalInputTokens?: number;
|
|
58
|
+
totalOutputTokens?: number;
|
|
59
|
+
totalCostUsd?: number;
|
|
60
|
+
scenes?: ParsedScene[];
|
|
31
61
|
suites?: ParsedSuiteResult[];
|
|
32
62
|
source: string;
|
|
33
63
|
}
|
|
@@ -44,6 +74,13 @@ export declare function parseFailedCases(content: string): Array<{
|
|
|
44
74
|
}>;
|
|
45
75
|
export declare function parseDimensions(content: string): Record<string, string> | undefined;
|
|
46
76
|
export declare function parseSuites(content: string): ParsedSuiteResult[] | undefined;
|
|
77
|
+
/**
|
|
78
|
+
* Parse the `scenes:` block (per-scene tokens/cost + timeline waterfall) from a
|
|
79
|
+
* report. The emitted format is fixed (see reporter.ts `renderSceneObservability`),
|
|
80
|
+
* so this hand-parses by indentation: scenes start at 8 spaces, scene fields at
|
|
81
|
+
* 10, timeline events at 14, event fields at 16.
|
|
82
|
+
*/
|
|
83
|
+
export declare function parseScenes(content: string): ParsedScene[] | undefined;
|
|
47
84
|
export declare function parseReport(content: string, source: string): ParsedReport;
|
|
48
85
|
export declare function findReports(dir: string, depth?: number): Promise<string[]>;
|
|
49
86
|
export declare function loadDiffEntry(hash: string): Promise<DiffEntry | null>;
|
package/dist/reports.js
CHANGED
|
@@ -124,6 +124,124 @@ export function parseSuites(content) {
|
|
|
124
124
|
suites.push(current);
|
|
125
125
|
return suites.length > 0 ? suites : undefined;
|
|
126
126
|
}
|
|
127
|
+
function parseTokens(raw) {
|
|
128
|
+
const m = raw.match(/input:\s*(\d+),\s*output:\s*(\d+)/);
|
|
129
|
+
if (!m)
|
|
130
|
+
return undefined;
|
|
131
|
+
return { input: parseInt(m[1], 10), output: parseInt(m[2], 10) };
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Parse the `scenes:` block (per-scene tokens/cost + timeline waterfall) from a
|
|
135
|
+
* report. The emitted format is fixed (see reporter.ts `renderSceneObservability`),
|
|
136
|
+
* so this hand-parses by indentation: scenes start at 8 spaces, scene fields at
|
|
137
|
+
* 10, timeline events at 14, event fields at 16.
|
|
138
|
+
*/
|
|
139
|
+
export function parseScenes(content) {
|
|
140
|
+
const lines = content.split("\n");
|
|
141
|
+
const startIdx = lines.findIndex((l) => l === " scenes:");
|
|
142
|
+
if (startIdx === -1)
|
|
143
|
+
return undefined;
|
|
144
|
+
const scenes = [];
|
|
145
|
+
let scene;
|
|
146
|
+
let inTimeline = false;
|
|
147
|
+
let event;
|
|
148
|
+
const pushEvent = () => {
|
|
149
|
+
if (event && scene) {
|
|
150
|
+
(scene.timeline ??= []).push(event);
|
|
151
|
+
event = undefined;
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
const pushScene = () => {
|
|
155
|
+
pushEvent();
|
|
156
|
+
if (scene)
|
|
157
|
+
scenes.push(scene);
|
|
158
|
+
scene = undefined;
|
|
159
|
+
inTimeline = false;
|
|
160
|
+
};
|
|
161
|
+
for (let i = startIdx + 1; i < lines.length; i++) {
|
|
162
|
+
const line = lines[i];
|
|
163
|
+
if (line.trim() === "")
|
|
164
|
+
continue;
|
|
165
|
+
const indent = line.length - line.trimStart().length;
|
|
166
|
+
// A new top-level agent field (<= 4 spaces, not part of scenes) ends the block.
|
|
167
|
+
if (indent <= 4)
|
|
168
|
+
break;
|
|
169
|
+
const sceneStart = line.match(/^ - prompt: "(.*)"$/);
|
|
170
|
+
if (sceneStart) {
|
|
171
|
+
pushScene();
|
|
172
|
+
scene = { prompt: sceneStart[1].replace(/\\"/g, '"').replace(/\\n/g, "\n") };
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
if (!scene)
|
|
176
|
+
continue;
|
|
177
|
+
const eventStart = line.match(/^ - kind: (model|tool)$/);
|
|
178
|
+
if (eventStart) {
|
|
179
|
+
pushEvent();
|
|
180
|
+
event = { kind: eventStart[1], name: "", startMs: 0, durationMs: 0 };
|
|
181
|
+
inTimeline = true;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (line.match(/^ timeline:$/)) {
|
|
185
|
+
inTimeline = true;
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
const trimmed = line.trim();
|
|
189
|
+
const target = inTimeline && event ? "event" : "scene";
|
|
190
|
+
const kv = trimmed.match(/^([a-z_]+):\s*(.*)$/);
|
|
191
|
+
if (!kv)
|
|
192
|
+
continue;
|
|
193
|
+
const [, key, value] = kv;
|
|
194
|
+
if (target === "event" && event) {
|
|
195
|
+
switch (key) {
|
|
196
|
+
case "name":
|
|
197
|
+
event.name = value.replace(/^"|"$/g, "").replace(/\\"/g, '"');
|
|
198
|
+
break;
|
|
199
|
+
case "start_ms":
|
|
200
|
+
event.startMs = parseFloat(value);
|
|
201
|
+
break;
|
|
202
|
+
case "duration_ms":
|
|
203
|
+
event.durationMs = parseFloat(value);
|
|
204
|
+
break;
|
|
205
|
+
case "tokens":
|
|
206
|
+
event.tokens = parseTokens(value);
|
|
207
|
+
break;
|
|
208
|
+
case "cached_input_tokens":
|
|
209
|
+
event.cachedInputTokens = parseInt(value, 10);
|
|
210
|
+
break;
|
|
211
|
+
case "cost_usd":
|
|
212
|
+
event.costUsd = parseFloat(value);
|
|
213
|
+
break;
|
|
214
|
+
case "cost_source":
|
|
215
|
+
event.costSource = value;
|
|
216
|
+
break;
|
|
217
|
+
case "run_index":
|
|
218
|
+
event.runIndex = parseInt(value, 10);
|
|
219
|
+
break;
|
|
220
|
+
case "error":
|
|
221
|
+
event.error = value.replace(/^"|"$/g, "").replace(/\\"/g, '"');
|
|
222
|
+
break;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
else if (scene) {
|
|
226
|
+
switch (key) {
|
|
227
|
+
case "duration_ms":
|
|
228
|
+
scene.durationMs = parseFloat(value);
|
|
229
|
+
break;
|
|
230
|
+
case "tokens":
|
|
231
|
+
scene.tokens = parseTokens(value);
|
|
232
|
+
break;
|
|
233
|
+
case "cost_usd":
|
|
234
|
+
scene.costUsd = parseFloat(value);
|
|
235
|
+
break;
|
|
236
|
+
case "cost_source":
|
|
237
|
+
scene.costSource = value;
|
|
238
|
+
break;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
pushScene();
|
|
243
|
+
return scenes.length > 0 ? scenes : undefined;
|
|
244
|
+
}
|
|
127
245
|
export function parseReport(content, source) {
|
|
128
246
|
const num = (key, fallback = 0) => parseFloat(extractField(content, key) ?? String(fallback));
|
|
129
247
|
const avgIn = extractField(content, "average_input_tokens_per_case");
|
|
@@ -158,9 +276,17 @@ export function parseReport(content, source) {
|
|
|
158
276
|
timestamp: extractField(content, "timestamp") ?? "",
|
|
159
277
|
averageInputTokensPerCase: avgIn != null ? parseFloat(avgIn) : undefined,
|
|
160
278
|
averageOutputTokensPerCase: avgOut != null ? parseFloat(avgOut) : undefined,
|
|
279
|
+
totalInputTokens: optNum("total_input_tokens"),
|
|
280
|
+
totalOutputTokens: optNum("total_output_tokens"),
|
|
281
|
+
totalCostUsd: optNum("total_cost_usd"),
|
|
282
|
+
scenes: parseScenes(content),
|
|
161
283
|
suites: parseSuites(content),
|
|
162
284
|
source,
|
|
163
285
|
};
|
|
286
|
+
function optNum(key) {
|
|
287
|
+
const v = extractField(content, key);
|
|
288
|
+
return v != null ? parseFloat(v) : undefined;
|
|
289
|
+
}
|
|
164
290
|
}
|
|
165
291
|
export async function findReports(dir, depth = 0) {
|
|
166
292
|
if (depth > 6)
|
package/dist/types.d.ts
CHANGED
|
@@ -21,6 +21,8 @@ export interface TimelineEvent {
|
|
|
21
21
|
input: number;
|
|
22
22
|
output: number;
|
|
23
23
|
};
|
|
24
|
+
/** Prompt-cache-hit input tokens (subset of tokens.input), when reported by the provider */
|
|
25
|
+
cachedInputTokens?: number;
|
|
24
26
|
cost?: CostBreakdown;
|
|
25
27
|
/** Index of the run this event belongs to (only set when aggregating across multi-run scenes) */
|
|
26
28
|
runIndex?: number;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { TimelineEvent } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Render a Chrome-DevTools-style waterfall of timeline events as colored
|
|
4
|
+
* terminal lines. Bars are positioned by `startMs` and sized by `durationMs`
|
|
5
|
+
* relative to the full span of the scene. Returns one string per event row
|
|
6
|
+
* (already indented), or `[]` when there's nothing to draw.
|
|
7
|
+
*/
|
|
8
|
+
export declare function renderTerminalWaterfall(events: TimelineEvent[], opts?: {
|
|
9
|
+
width?: number;
|
|
10
|
+
indent?: string;
|
|
11
|
+
}): string[];
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { c } from "./logger";
|
|
2
|
+
const BLOCK = "█";
|
|
3
|
+
const THIN = "▏";
|
|
4
|
+
function truncate(s, n) {
|
|
5
|
+
return s.length > n ? s.slice(0, n - 1) + "…" : s;
|
|
6
|
+
}
|
|
7
|
+
function fmtUsd(n) {
|
|
8
|
+
if (n === 0)
|
|
9
|
+
return "$0";
|
|
10
|
+
return "$" + Number(n.toFixed(4)).toString();
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Render a Chrome-DevTools-style waterfall of timeline events as colored
|
|
14
|
+
* terminal lines. Bars are positioned by `startMs` and sized by `durationMs`
|
|
15
|
+
* relative to the full span of the scene. Returns one string per event row
|
|
16
|
+
* (already indented), or `[]` when there's nothing to draw.
|
|
17
|
+
*/
|
|
18
|
+
export function renderTerminalWaterfall(events, opts = {}) {
|
|
19
|
+
if (!events || events.length === 0)
|
|
20
|
+
return [];
|
|
21
|
+
const width = opts.width ?? 28;
|
|
22
|
+
const indent = opts.indent ?? "";
|
|
23
|
+
const t0 = Math.min(...events.map((e) => e.startMs));
|
|
24
|
+
const tEnd = Math.max(...events.map((e) => e.endMs));
|
|
25
|
+
const span = Math.max(1, tEnd - t0);
|
|
26
|
+
const nameWidth = 16;
|
|
27
|
+
return events.map((e) => {
|
|
28
|
+
const lead = Math.min(width - 1, Math.round(((e.startMs - t0) / span) * width));
|
|
29
|
+
const barLen = Math.max(1, Math.round((e.durationMs / span) * width));
|
|
30
|
+
const fill = e.durationMs === 0 ? THIN : BLOCK.repeat(Math.min(barLen, width - lead));
|
|
31
|
+
const cells = Array(width).fill(" ");
|
|
32
|
+
for (let i = 0; i < fill.length && lead + i < width; i++) {
|
|
33
|
+
cells[lead + i] = fill[i];
|
|
34
|
+
}
|
|
35
|
+
let bar = cells.join("");
|
|
36
|
+
const color = e.error ? c.red : e.kind === "model" ? c.cyan : c.yellow;
|
|
37
|
+
bar = color(bar);
|
|
38
|
+
const kindLabel = (e.kind === "model" ? "model" : "tool ").padEnd(5);
|
|
39
|
+
const nameLabel = truncate(e.name, nameWidth).padEnd(nameWidth);
|
|
40
|
+
const dur = `${Math.round(e.durationMs)}ms`.padStart(7);
|
|
41
|
+
const cost = e.cost?.totalUsd != null ? ` ${fmtUsd(e.cost.totalUsd)}` : "";
|
|
42
|
+
const cached = e.cachedInputTokens ? ` ${c.dim(`(${e.cachedInputTokens} cached)`)}` : "";
|
|
43
|
+
const err = e.error ? ` ${c.red("✗ " + truncate(e.error, 40))}` : "";
|
|
44
|
+
return `${indent}${c.dim(kindLabel)} ${nameLabel} ${bar} ${c.dim(dur)}${c.dim(cost)}${cached}${err}`;
|
|
45
|
+
});
|
|
46
|
+
}
|