@sebastiantuyu/agest 0.3.3-next.4 → 0.3.3-next.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.d.ts +2 -0
- package/dist/adapters/index.js +1 -0
- package/dist/adapters/langchain.js +2 -46
- package/dist/adapters/tracing.d.ts +55 -1
- package/dist/adapters/tracing.js +80 -0
- package/dist/context.js +13 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +1 -0
- package/dist/preview.js +92 -0
- package/dist/reports.d.ts +36 -0
- package/dist/reports.js +123 -0
- package/dist/waterfall.d.ts +11 -0
- package/dist/waterfall.js +45 -0
- package/package.json +1 -1
package/dist/adapters/index.d.ts
CHANGED
package/dist/adapters/index.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { createTracingHandle } from "./tracing";
|
|
1
|
+
import { createTracingHandle, summarizeEvents } from "./tracing";
|
|
3
2
|
/**
|
|
4
3
|
* Adapter for LangChain runnables and agents.
|
|
5
4
|
*
|
|
@@ -221,48 +220,5 @@ function extractTokensFromMessage(msg) {
|
|
|
221
220
|
};
|
|
222
221
|
}
|
|
223
222
|
function summarizeRun(input) {
|
|
224
|
-
|
|
225
|
-
let inputTokens = 0;
|
|
226
|
-
let outputTokens = 0;
|
|
227
|
-
let providerCost = 0;
|
|
228
|
-
let hasProviderCost = false;
|
|
229
|
-
let hasTableCost = false;
|
|
230
|
-
let tableCost = 0;
|
|
231
|
-
let hasTokens = false;
|
|
232
|
-
for (const e of modelEvents) {
|
|
233
|
-
if (e.tokens) {
|
|
234
|
-
hasTokens = true;
|
|
235
|
-
inputTokens += e.tokens.input;
|
|
236
|
-
outputTokens += e.tokens.output;
|
|
237
|
-
}
|
|
238
|
-
if (e.cost?.source === "provider" && e.cost.totalUsd != null) {
|
|
239
|
-
hasProviderCost = true;
|
|
240
|
-
providerCost += e.cost.totalUsd;
|
|
241
|
-
}
|
|
242
|
-
else if (e.cost?.source === "table" && e.cost.totalUsd != null) {
|
|
243
|
-
hasTableCost = true;
|
|
244
|
-
tableCost += e.cost.totalUsd;
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
let tokens = hasTokens ? { input: inputTokens, output: outputTokens } : undefined;
|
|
248
|
-
if (!tokens && input.fallbackTokens)
|
|
249
|
-
tokens = input.fallbackTokens;
|
|
250
|
-
// Pick cost: provider > table > recompute from fallback tokens
|
|
251
|
-
let cost;
|
|
252
|
-
if (hasProviderCost) {
|
|
253
|
-
cost = { totalUsd: providerCost, source: "provider" };
|
|
254
|
-
}
|
|
255
|
-
else if (hasTableCost) {
|
|
256
|
-
cost = { totalUsd: tableCost, source: "table" };
|
|
257
|
-
}
|
|
258
|
-
else if (tokens && input.model) {
|
|
259
|
-
const computed = computeCost({
|
|
260
|
-
model: input.model,
|
|
261
|
-
inputTokens: tokens.input,
|
|
262
|
-
outputTokens: tokens.output,
|
|
263
|
-
});
|
|
264
|
-
if (computed.source !== "unavailable")
|
|
265
|
-
cost = computed;
|
|
266
|
-
}
|
|
267
|
-
return { tokens, cost };
|
|
223
|
+
return summarizeEvents(input.events, input.model, input.fallbackTokens);
|
|
268
224
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { TimelineEvent } from "../types";
|
|
1
|
+
import type { TimelineEvent, CostBreakdown } from "../types";
|
|
2
2
|
export interface TracingHandle {
|
|
3
3
|
/** Pass this into `runnable.invoke(..., { callbacks: [handler.callbacks] })` */
|
|
4
4
|
callbacks: any[];
|
|
@@ -17,3 +17,57 @@ export interface TracingHandle {
|
|
|
17
17
|
* than throwing — the underlying agent run must not be broken by tracing.
|
|
18
18
|
*/
|
|
19
19
|
export declare function createTracingHandle(baselineMs: number): Promise<TracingHandle>;
|
|
20
|
+
export interface Trace {
|
|
21
|
+
/**
|
|
22
|
+
* Attach to your top-level LangChain/LangGraph call:
|
|
23
|
+
* `await graph.invoke(input, { callbacks: trace.callbacks })`.
|
|
24
|
+
* Callbacks propagate to nested nodes automatically.
|
|
25
|
+
*/
|
|
26
|
+
callbacks: any[];
|
|
27
|
+
/**
|
|
28
|
+
* Collect the captured timeline plus aggregated tokens and cost. Call once
|
|
29
|
+
* after the run completes; the result is memoized so repeat calls are safe.
|
|
30
|
+
* Spread the result into your `AgentResponse.metadata` to surface the
|
|
31
|
+
* per-scene cost/timeline waterfall in the report.
|
|
32
|
+
*/
|
|
33
|
+
collect(): {
|
|
34
|
+
events: TimelineEvent[];
|
|
35
|
+
tokens?: {
|
|
36
|
+
input: number;
|
|
37
|
+
output: number;
|
|
38
|
+
};
|
|
39
|
+
cost?: CostBreakdown;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Public tracing helper for custom executors (i.e. agents not wired through
|
|
44
|
+
* the `langchain()` adapter). Create one per scene run, hand its `callbacks`
|
|
45
|
+
* to your LangChain/LangGraph invocation, then spread `collect()` into the
|
|
46
|
+
* response metadata.
|
|
47
|
+
*
|
|
48
|
+
* @example
|
|
49
|
+
* ```ts
|
|
50
|
+
* const trace = await createTrace({ model: env.OPENROUTER_MODEL });
|
|
51
|
+
* const plan = await generatePlan(input, { callbacks: trace.callbacks });
|
|
52
|
+
* return { text: render(plan), metadata: { model, tools, ...trace.collect() } };
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export declare function createTrace(opts?: {
|
|
56
|
+
model?: string;
|
|
57
|
+
}): Promise<Trace>;
|
|
58
|
+
/**
|
|
59
|
+
* Aggregate token counts and cost across a timeline's model events.
|
|
60
|
+
* Provider-reported cost wins; otherwise the table-derived cost; otherwise
|
|
61
|
+
* cost is recomputed from `model` and the summed tokens. `fallbackTokens` is
|
|
62
|
+
* used only when no model event carried usage.
|
|
63
|
+
*/
|
|
64
|
+
export declare function summarizeEvents(events: TimelineEvent[], model?: string, fallbackTokens?: {
|
|
65
|
+
input: number;
|
|
66
|
+
output: number;
|
|
67
|
+
}): {
|
|
68
|
+
tokens?: {
|
|
69
|
+
input: number;
|
|
70
|
+
output: number;
|
|
71
|
+
};
|
|
72
|
+
cost?: CostBreakdown;
|
|
73
|
+
};
|
package/dist/adapters/tracing.js
CHANGED
|
@@ -128,6 +128,86 @@ export async function createTracingHandle(baselineMs) {
|
|
|
128
128
|
},
|
|
129
129
|
};
|
|
130
130
|
}
|
|
131
|
+
/**
|
|
132
|
+
* Public tracing helper for custom executors (i.e. agents not wired through
|
|
133
|
+
* the `langchain()` adapter). Create one per scene run, hand its `callbacks`
|
|
134
|
+
* to your LangChain/LangGraph invocation, then spread `collect()` into the
|
|
135
|
+
* response metadata.
|
|
136
|
+
*
|
|
137
|
+
* @example
|
|
138
|
+
* ```ts
|
|
139
|
+
* const trace = await createTrace({ model: env.OPENROUTER_MODEL });
|
|
140
|
+
* const plan = await generatePlan(input, { callbacks: trace.callbacks });
|
|
141
|
+
* return { text: render(plan), metadata: { model, tools, ...trace.collect() } };
|
|
142
|
+
* ```
|
|
143
|
+
*/
|
|
144
|
+
export async function createTrace(opts) {
|
|
145
|
+
const baseline = performance.now();
|
|
146
|
+
const handle = await createTracingHandle(baseline);
|
|
147
|
+
let collected;
|
|
148
|
+
return {
|
|
149
|
+
callbacks: handle.callbacks,
|
|
150
|
+
collect() {
|
|
151
|
+
if (collected)
|
|
152
|
+
return collected;
|
|
153
|
+
const drained = handle.drain();
|
|
154
|
+
const { tokens, cost } = summarizeEvents(drained.events, opts?.model ?? drained.modelName);
|
|
155
|
+
collected = { events: drained.events, tokens, cost };
|
|
156
|
+
return collected;
|
|
157
|
+
},
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Aggregate token counts and cost across a timeline's model events.
|
|
162
|
+
* Provider-reported cost wins; otherwise the table-derived cost; otherwise
|
|
163
|
+
* cost is recomputed from `model` and the summed tokens. `fallbackTokens` is
|
|
164
|
+
* used only when no model event carried usage.
|
|
165
|
+
*/
|
|
166
|
+
export function summarizeEvents(events, model, fallbackTokens) {
|
|
167
|
+
const modelEvents = events.filter((e) => e.kind === "model");
|
|
168
|
+
let inputTokens = 0;
|
|
169
|
+
let outputTokens = 0;
|
|
170
|
+
let providerCost = 0;
|
|
171
|
+
let hasProviderCost = false;
|
|
172
|
+
let hasTableCost = false;
|
|
173
|
+
let tableCost = 0;
|
|
174
|
+
let hasTokens = false;
|
|
175
|
+
for (const e of modelEvents) {
|
|
176
|
+
if (e.tokens) {
|
|
177
|
+
hasTokens = true;
|
|
178
|
+
inputTokens += e.tokens.input;
|
|
179
|
+
outputTokens += e.tokens.output;
|
|
180
|
+
}
|
|
181
|
+
if (e.cost?.source === "provider" && e.cost.totalUsd != null) {
|
|
182
|
+
hasProviderCost = true;
|
|
183
|
+
providerCost += e.cost.totalUsd;
|
|
184
|
+
}
|
|
185
|
+
else if (e.cost?.source === "table" && e.cost.totalUsd != null) {
|
|
186
|
+
hasTableCost = true;
|
|
187
|
+
tableCost += e.cost.totalUsd;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
let tokens = hasTokens ? { input: inputTokens, output: outputTokens } : undefined;
|
|
191
|
+
if (!tokens && fallbackTokens)
|
|
192
|
+
tokens = fallbackTokens;
|
|
193
|
+
let cost;
|
|
194
|
+
if (hasProviderCost) {
|
|
195
|
+
cost = { totalUsd: providerCost, source: "provider" };
|
|
196
|
+
}
|
|
197
|
+
else if (hasTableCost) {
|
|
198
|
+
cost = { totalUsd: tableCost, source: "table" };
|
|
199
|
+
}
|
|
200
|
+
else if (tokens && model) {
|
|
201
|
+
const computed = computeCost({
|
|
202
|
+
model,
|
|
203
|
+
inputTokens: tokens.input,
|
|
204
|
+
outputTokens: tokens.output,
|
|
205
|
+
});
|
|
206
|
+
if (computed.source !== "unavailable")
|
|
207
|
+
cost = computed;
|
|
208
|
+
}
|
|
209
|
+
return { tokens, cost };
|
|
210
|
+
}
|
|
131
211
|
function now() {
|
|
132
212
|
return performance.now();
|
|
133
213
|
}
|
package/dist/context.js
CHANGED
|
@@ -4,6 +4,7 @@ import { formatReport, writeReport, writeDiffEntry } from "./reporter";
|
|
|
4
4
|
import { logger, c } from "./logger";
|
|
5
5
|
import { loadConfig } from "./config";
|
|
6
6
|
import { setPricingOverrides } from "./pricing";
|
|
7
|
+
import { renderTerminalWaterfall } from "./waterfall";
|
|
7
8
|
import { PromisePool } from "@supercharge/promise-pool";
|
|
8
9
|
export class SceneBuilder {
|
|
9
10
|
_prompt;
|
|
@@ -129,6 +130,18 @@ export class AgentContext {
|
|
|
129
130
|
const sigColor = sig >= 0.95 ? c.green : sig >= 0.80 ? c.yellow : c.red;
|
|
130
131
|
logger.info(`${indent} ${c.dim("significance:")} ${sigColor(`${(sig * 100).toFixed(1)}%`)} ${c.dim(`(pass rate: ${((result.passRate ?? 0) * 100).toFixed(1)}%)`)}`);
|
|
131
132
|
}
|
|
133
|
+
if (result.events && result.events.length > 0) {
|
|
134
|
+
const costLabel = result.costUsd != null
|
|
135
|
+
? ` ${c.dim("·")} ${c.green(`$${Number(result.costUsd.toFixed(4))}`)}`
|
|
136
|
+
: "";
|
|
137
|
+
const tokLabel = result.tokens
|
|
138
|
+
? ` ${c.dim(`(${result.tokens.input}→${result.tokens.output} tok)`)}`
|
|
139
|
+
: "";
|
|
140
|
+
logger.info(`${indent} ${c.dim("waterfall:")}${tokLabel}${costLabel}`);
|
|
141
|
+
for (const line of renderTerminalWaterfall(result.events, { indent: `${indent} ` })) {
|
|
142
|
+
logger.info(line);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
132
145
|
logger.debug(`${indent} response: ${result.response.text?.slice(0, 120)}`);
|
|
133
146
|
};
|
|
134
147
|
if (hasSuites) {
|
package/dist/index.d.ts
CHANGED
|
@@ -3,11 +3,13 @@ import { SceneBuilder } from "./context";
|
|
|
3
3
|
export { expect } from "./assertions";
|
|
4
4
|
export { logger } from "./logger";
|
|
5
5
|
export { defineConfig } from "./config";
|
|
6
|
+
export { createTrace, summarizeEvents } from "./adapters/tracing";
|
|
7
|
+
export type { Trace } from "./adapters/tracing";
|
|
6
8
|
export type { AgestConfig, JudgeConfig, JudgeExecutor } from "./config";
|
|
7
9
|
export type { LogLevel } from "./logger";
|
|
8
10
|
export type { AgentExpectation, AgentMatchers } from "./assertions";
|
|
9
11
|
export type { JudgeCriteria } from "./judge";
|
|
10
|
-
export type { AgentExecutor, ExecutorOptions, AgentResponse, AgentReport, SceneResult, RunResult, JudgeVerdict, JudgeResult, HookFn, } from "./types";
|
|
12
|
+
export type { AgentExecutor, ExecutorOptions, AgentResponse, AgentReport, SceneResult, RunResult, JudgeVerdict, JudgeResult, HookFn, TimelineEvent, TimelineEventKind, CostBreakdown, CostSource, } from "./types";
|
|
11
13
|
export interface AgentOptions {
|
|
12
14
|
name?: string;
|
|
13
15
|
}
|
package/dist/index.js
CHANGED
|
@@ -2,6 +2,7 @@ import { AgentContext, setContext, getContext } from "./context";
|
|
|
2
2
|
export { expect } from "./assertions";
|
|
3
3
|
export { logger } from "./logger";
|
|
4
4
|
export { defineConfig } from "./config";
|
|
5
|
+
export { createTrace, summarizeEvents } from "./adapters/tracing";
|
|
5
6
|
export function scene(prompt) {
|
|
6
7
|
return getContext().registerScene(prompt);
|
|
7
8
|
}
|
package/dist/preview.js
CHANGED
|
@@ -189,6 +189,90 @@ function renderFailedCases(cases) {
|
|
|
189
189
|
</ul>
|
|
190
190
|
</details>`;
|
|
191
191
|
}
|
|
192
|
+
const WF_MODEL = "#38bdf8";
|
|
193
|
+
const WF_TOOL = "#facc15";
|
|
194
|
+
const WF_ERROR = "#f87171";
|
|
195
|
+
function fmtUsdHtml(n) {
|
|
196
|
+
if (n === 0)
|
|
197
|
+
return "$0";
|
|
198
|
+
return "$" + Number(n.toFixed(4)).toString();
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Chrome-DevTools-style waterfall for a report's per-scene timelines. Bars are
|
|
202
|
+
* absolutely positioned within a track by start_ms / duration_ms. Returns "" if
|
|
203
|
+
* the report carries no timeline data (older reports / non-traced executors).
|
|
204
|
+
*/
|
|
205
|
+
function renderWaterfallHtml(report) {
|
|
206
|
+
const scenes = (report.scenes ?? []).filter((s) => s.timeline && s.timeline.length > 0);
|
|
207
|
+
if (scenes.length === 0)
|
|
208
|
+
return "";
|
|
209
|
+
const sceneBlocks = scenes
|
|
210
|
+
.map((scene) => {
|
|
211
|
+
const events = scene.timeline;
|
|
212
|
+
const t0 = Math.min(...events.map((e) => e.startMs));
|
|
213
|
+
const tEnd = Math.max(...events.map((e) => e.startMs + e.durationMs));
|
|
214
|
+
const span = Math.max(1, tEnd - t0);
|
|
215
|
+
const rows = events
|
|
216
|
+
.map((e) => {
|
|
217
|
+
const left = ((e.startMs - t0) / span) * 100;
|
|
218
|
+
const width = Math.max(0.6, (e.durationMs / span) * 100);
|
|
219
|
+
const bg = e.error ? WF_ERROR : e.kind === "model" ? WF_MODEL : WF_TOOL;
|
|
220
|
+
const icon = e.kind === "model" ? "◆" : "▸";
|
|
221
|
+
const tip = [
|
|
222
|
+
`${e.kind}: ${e.name}`,
|
|
223
|
+
`start ${Math.round(e.startMs)}ms · ${Math.round(e.durationMs)}ms`,
|
|
224
|
+
e.tokens ? `${e.tokens.input}→${e.tokens.output} tok` : "",
|
|
225
|
+
e.costUsd != null ? fmtUsdHtml(e.costUsd) : "",
|
|
226
|
+
e.error ? `error: ${e.error}` : "",
|
|
227
|
+
]
|
|
228
|
+
.filter(Boolean)
|
|
229
|
+
.join(" · ");
|
|
230
|
+
const cost = e.costUsd != null ? fmtUsdHtml(e.costUsd) : "";
|
|
231
|
+
return `
|
|
232
|
+
<div class="flex items-center gap-2 text-[11px] leading-5">
|
|
233
|
+
<span class="w-44 shrink-0 truncate ${e.error ? "text-red-400" : "text-zinc-400"}" title="${escHtml(e.name)}">
|
|
234
|
+
<span style="color:${bg}">${icon}</span> ${escHtml(e.name)}
|
|
235
|
+
</span>
|
|
236
|
+
<div class="relative flex-1 h-3 bg-zinc-800/40 rounded">
|
|
237
|
+
<div class="absolute top-0 h-3 rounded" style="left:${left.toFixed(2)}%;width:${width.toFixed(2)}%;background:${bg}" title="${escHtml(tip)}"></div>
|
|
238
|
+
</div>
|
|
239
|
+
<span class="w-16 shrink-0 text-right text-zinc-500">${Math.round(e.durationMs)}ms</span>
|
|
240
|
+
<span class="w-16 shrink-0 text-right text-zinc-500">${cost}</span>
|
|
241
|
+
</div>`;
|
|
242
|
+
})
|
|
243
|
+
.join("\n");
|
|
244
|
+
const meta = [
|
|
245
|
+
scene.tokens ? `${scene.tokens.input}→${scene.tokens.output} tok` : "",
|
|
246
|
+
scene.costUsd != null ? fmtUsdHtml(scene.costUsd) : "",
|
|
247
|
+
scene.costSource ? scene.costSource : "",
|
|
248
|
+
scene.durationMs != null ? `${Math.round(scene.durationMs)}ms` : "",
|
|
249
|
+
]
|
|
250
|
+
.filter(Boolean)
|
|
251
|
+
.join(" · ");
|
|
252
|
+
return `
|
|
253
|
+
<div>
|
|
254
|
+
<div class="flex items-center justify-between mb-1.5">
|
|
255
|
+
<span class="text-xs text-zinc-300 truncate" title="${escHtml(scene.prompt)}">${escHtml(scene.prompt)}</span>
|
|
256
|
+
<span class="text-[11px] text-zinc-500 shrink-0 ml-3">${escHtml(meta)}</span>
|
|
257
|
+
</div>
|
|
258
|
+
<div class="space-y-1">${rows}</div>
|
|
259
|
+
</div>`;
|
|
260
|
+
})
|
|
261
|
+
.join("\n");
|
|
262
|
+
return `
|
|
263
|
+
<details class="mt-2" open>
|
|
264
|
+
<summary class="text-xs text-sky-400 cursor-pointer hover:text-sky-300 select-none">
|
|
265
|
+
waterfall · ${scenes.length} scene${scenes.length !== 1 ? "s" : ""}
|
|
266
|
+
</summary>
|
|
267
|
+
<div class="mt-3 mb-2 pl-3 border-l border-zinc-800 space-y-5">
|
|
268
|
+
<div class="flex gap-4 text-[10px] text-zinc-500">
|
|
269
|
+
<span><span style="color:${WF_MODEL}">◆</span> model</span>
|
|
270
|
+
<span><span style="color:${WF_TOOL}">▸</span> tool</span>
|
|
271
|
+
</div>
|
|
272
|
+
${sceneBlocks}
|
|
273
|
+
</div>
|
|
274
|
+
</details>`;
|
|
275
|
+
}
|
|
192
276
|
function renderRunRow(entry, idx) {
|
|
193
277
|
const { report, delta, diffLines } = entry;
|
|
194
278
|
const pct = report.successRate * 100;
|
|
@@ -233,6 +317,7 @@ function renderRunRow(entry, idx) {
|
|
|
233
317
|
</div>
|
|
234
318
|
<div class="ml-10 mt-0.5 flex gap-3 flex-wrap">${dimTags}</div>
|
|
235
319
|
${diffHtml}
|
|
320
|
+
<div class="ml-10">${renderWaterfallHtml(report)}</div>
|
|
236
321
|
</div>`;
|
|
237
322
|
}
|
|
238
323
|
// ---------------------------------------------------------------------------
|
|
@@ -990,6 +1075,12 @@ function renderSingleRun(report) {
|
|
|
990
1075
|
<p class="text-zinc-300">${Math.round(report.averageOutputTokensPerCase)}</p>
|
|
991
1076
|
</div>`
|
|
992
1077
|
: ""}
|
|
1078
|
+
${report.totalCostUsd != null
|
|
1079
|
+
? `<div>
|
|
1080
|
+
<span class="text-zinc-500">Total Cost</span>
|
|
1081
|
+
<p class="text-zinc-300">${fmtUsdHtml(report.totalCostUsd)}${report.totalInputTokens != null ? ` <span class="text-zinc-600">· ${report.totalInputTokens}→${report.totalOutputTokens} tok</span>` : ""}</p>
|
|
1082
|
+
</div>`
|
|
1083
|
+
: ""}
|
|
993
1084
|
${report.tools && report.tools.length > 0
|
|
994
1085
|
? `<div>
|
|
995
1086
|
<span class="text-zinc-500">Tools</span>
|
|
@@ -997,6 +1088,7 @@ function renderSingleRun(report) {
|
|
|
997
1088
|
</div>`
|
|
998
1089
|
: ""}
|
|
999
1090
|
</div>
|
|
1091
|
+
${renderWaterfallHtml(report)}
|
|
1000
1092
|
${failedSection}
|
|
1001
1093
|
</div>`;
|
|
1002
1094
|
}
|
package/dist/reports.d.ts
CHANGED
|
@@ -9,6 +9,31 @@ export interface ParsedSuiteResult {
|
|
|
9
9
|
response?: string;
|
|
10
10
|
}>;
|
|
11
11
|
}
|
|
12
|
+
export interface ParsedTimelineEvent {
|
|
13
|
+
kind: "model" | "tool";
|
|
14
|
+
name: string;
|
|
15
|
+
startMs: number;
|
|
16
|
+
durationMs: number;
|
|
17
|
+
tokens?: {
|
|
18
|
+
input: number;
|
|
19
|
+
output: number;
|
|
20
|
+
};
|
|
21
|
+
costUsd?: number;
|
|
22
|
+
costSource?: string;
|
|
23
|
+
runIndex?: number;
|
|
24
|
+
error?: string;
|
|
25
|
+
}
|
|
26
|
+
export interface ParsedScene {
|
|
27
|
+
prompt: string;
|
|
28
|
+
durationMs?: number;
|
|
29
|
+
tokens?: {
|
|
30
|
+
input: number;
|
|
31
|
+
output: number;
|
|
32
|
+
};
|
|
33
|
+
costUsd?: number;
|
|
34
|
+
costSource?: string;
|
|
35
|
+
timeline?: ParsedTimelineEvent[];
|
|
36
|
+
}
|
|
12
37
|
export interface ParsedReport {
|
|
13
38
|
name?: string;
|
|
14
39
|
systemPromptHash?: string;
|
|
@@ -28,6 +53,10 @@ export interface ParsedReport {
|
|
|
28
53
|
timestamp: string;
|
|
29
54
|
averageInputTokensPerCase?: number;
|
|
30
55
|
averageOutputTokensPerCase?: number;
|
|
56
|
+
totalInputTokens?: number;
|
|
57
|
+
totalOutputTokens?: number;
|
|
58
|
+
totalCostUsd?: number;
|
|
59
|
+
scenes?: ParsedScene[];
|
|
31
60
|
suites?: ParsedSuiteResult[];
|
|
32
61
|
source: string;
|
|
33
62
|
}
|
|
@@ -44,6 +73,13 @@ export declare function parseFailedCases(content: string): Array<{
|
|
|
44
73
|
}>;
|
|
45
74
|
export declare function parseDimensions(content: string): Record<string, string> | undefined;
|
|
46
75
|
export declare function parseSuites(content: string): ParsedSuiteResult[] | undefined;
|
|
76
|
+
/**
|
|
77
|
+
* Parse the `scenes:` block (per-scene tokens/cost + timeline waterfall) from a
|
|
78
|
+
* report. The emitted format is fixed (see reporter.ts `renderSceneObservability`),
|
|
79
|
+
* so this hand-parses by indentation: scenes start at 8 spaces, scene fields at
|
|
80
|
+
* 10, timeline events at 14, event fields at 16.
|
|
81
|
+
*/
|
|
82
|
+
export declare function parseScenes(content: string): ParsedScene[] | undefined;
|
|
47
83
|
export declare function parseReport(content: string, source: string): ParsedReport;
|
|
48
84
|
export declare function findReports(dir: string, depth?: number): Promise<string[]>;
|
|
49
85
|
export declare function loadDiffEntry(hash: string): Promise<DiffEntry | null>;
|
package/dist/reports.js
CHANGED
|
@@ -124,6 +124,121 @@ export function parseSuites(content) {
|
|
|
124
124
|
suites.push(current);
|
|
125
125
|
return suites.length > 0 ? suites : undefined;
|
|
126
126
|
}
|
|
127
|
+
function parseTokens(raw) {
|
|
128
|
+
const m = raw.match(/input:\s*(\d+),\s*output:\s*(\d+)/);
|
|
129
|
+
if (!m)
|
|
130
|
+
return undefined;
|
|
131
|
+
return { input: parseInt(m[1], 10), output: parseInt(m[2], 10) };
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Parse the `scenes:` block (per-scene tokens/cost + timeline waterfall) from a
|
|
135
|
+
* report. The emitted format is fixed (see reporter.ts `renderSceneObservability`),
|
|
136
|
+
* so this hand-parses by indentation: scenes start at 8 spaces, scene fields at
|
|
137
|
+
* 10, timeline events at 14, event fields at 16.
|
|
138
|
+
*/
|
|
139
|
+
export function parseScenes(content) {
|
|
140
|
+
const lines = content.split("\n");
|
|
141
|
+
const startIdx = lines.findIndex((l) => l === " scenes:");
|
|
142
|
+
if (startIdx === -1)
|
|
143
|
+
return undefined;
|
|
144
|
+
const scenes = [];
|
|
145
|
+
let scene;
|
|
146
|
+
let inTimeline = false;
|
|
147
|
+
let event;
|
|
148
|
+
const pushEvent = () => {
|
|
149
|
+
if (event && scene) {
|
|
150
|
+
(scene.timeline ??= []).push(event);
|
|
151
|
+
event = undefined;
|
|
152
|
+
}
|
|
153
|
+
};
|
|
154
|
+
const pushScene = () => {
|
|
155
|
+
pushEvent();
|
|
156
|
+
if (scene)
|
|
157
|
+
scenes.push(scene);
|
|
158
|
+
scene = undefined;
|
|
159
|
+
inTimeline = false;
|
|
160
|
+
};
|
|
161
|
+
for (let i = startIdx + 1; i < lines.length; i++) {
|
|
162
|
+
const line = lines[i];
|
|
163
|
+
if (line.trim() === "")
|
|
164
|
+
continue;
|
|
165
|
+
const indent = line.length - line.trimStart().length;
|
|
166
|
+
// A new top-level agent field (<= 4 spaces, not part of scenes) ends the block.
|
|
167
|
+
if (indent <= 4)
|
|
168
|
+
break;
|
|
169
|
+
const sceneStart = line.match(/^ - prompt: "(.*)"$/);
|
|
170
|
+
if (sceneStart) {
|
|
171
|
+
pushScene();
|
|
172
|
+
scene = { prompt: sceneStart[1].replace(/\\"/g, '"').replace(/\\n/g, "\n") };
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
if (!scene)
|
|
176
|
+
continue;
|
|
177
|
+
const eventStart = line.match(/^ - kind: (model|tool)$/);
|
|
178
|
+
if (eventStart) {
|
|
179
|
+
pushEvent();
|
|
180
|
+
event = { kind: eventStart[1], name: "", startMs: 0, durationMs: 0 };
|
|
181
|
+
inTimeline = true;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (line.match(/^ timeline:$/)) {
|
|
185
|
+
inTimeline = true;
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
const trimmed = line.trim();
|
|
189
|
+
const target = inTimeline && event ? "event" : "scene";
|
|
190
|
+
const kv = trimmed.match(/^([a-z_]+):\s*(.*)$/);
|
|
191
|
+
if (!kv)
|
|
192
|
+
continue;
|
|
193
|
+
const [, key, value] = kv;
|
|
194
|
+
if (target === "event" && event) {
|
|
195
|
+
switch (key) {
|
|
196
|
+
case "name":
|
|
197
|
+
event.name = value.replace(/^"|"$/g, "").replace(/\\"/g, '"');
|
|
198
|
+
break;
|
|
199
|
+
case "start_ms":
|
|
200
|
+
event.startMs = parseFloat(value);
|
|
201
|
+
break;
|
|
202
|
+
case "duration_ms":
|
|
203
|
+
event.durationMs = parseFloat(value);
|
|
204
|
+
break;
|
|
205
|
+
case "tokens":
|
|
206
|
+
event.tokens = parseTokens(value);
|
|
207
|
+
break;
|
|
208
|
+
case "cost_usd":
|
|
209
|
+
event.costUsd = parseFloat(value);
|
|
210
|
+
break;
|
|
211
|
+
case "cost_source":
|
|
212
|
+
event.costSource = value;
|
|
213
|
+
break;
|
|
214
|
+
case "run_index":
|
|
215
|
+
event.runIndex = parseInt(value, 10);
|
|
216
|
+
break;
|
|
217
|
+
case "error":
|
|
218
|
+
event.error = value.replace(/^"|"$/g, "").replace(/\\"/g, '"');
|
|
219
|
+
break;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
else if (scene) {
|
|
223
|
+
switch (key) {
|
|
224
|
+
case "duration_ms":
|
|
225
|
+
scene.durationMs = parseFloat(value);
|
|
226
|
+
break;
|
|
227
|
+
case "tokens":
|
|
228
|
+
scene.tokens = parseTokens(value);
|
|
229
|
+
break;
|
|
230
|
+
case "cost_usd":
|
|
231
|
+
scene.costUsd = parseFloat(value);
|
|
232
|
+
break;
|
|
233
|
+
case "cost_source":
|
|
234
|
+
scene.costSource = value;
|
|
235
|
+
break;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
pushScene();
|
|
240
|
+
return scenes.length > 0 ? scenes : undefined;
|
|
241
|
+
}
|
|
127
242
|
export function parseReport(content, source) {
|
|
128
243
|
const num = (key, fallback = 0) => parseFloat(extractField(content, key) ?? String(fallback));
|
|
129
244
|
const avgIn = extractField(content, "average_input_tokens_per_case");
|
|
@@ -158,9 +273,17 @@ export function parseReport(content, source) {
|
|
|
158
273
|
timestamp: extractField(content, "timestamp") ?? "",
|
|
159
274
|
averageInputTokensPerCase: avgIn != null ? parseFloat(avgIn) : undefined,
|
|
160
275
|
averageOutputTokensPerCase: avgOut != null ? parseFloat(avgOut) : undefined,
|
|
276
|
+
totalInputTokens: optNum("total_input_tokens"),
|
|
277
|
+
totalOutputTokens: optNum("total_output_tokens"),
|
|
278
|
+
totalCostUsd: optNum("total_cost_usd"),
|
|
279
|
+
scenes: parseScenes(content),
|
|
161
280
|
suites: parseSuites(content),
|
|
162
281
|
source,
|
|
163
282
|
};
|
|
283
|
+
function optNum(key) {
|
|
284
|
+
const v = extractField(content, key);
|
|
285
|
+
return v != null ? parseFloat(v) : undefined;
|
|
286
|
+
}
|
|
164
287
|
}
|
|
165
288
|
export async function findReports(dir, depth = 0) {
|
|
166
289
|
if (depth > 6)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { TimelineEvent } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Render a Chrome-DevTools-style waterfall of timeline events as colored
|
|
4
|
+
* terminal lines. Bars are positioned by `startMs` and sized by `durationMs`
|
|
5
|
+
* relative to the full span of the scene. Returns one string per event row
|
|
6
|
+
* (already indented), or `[]` when there's nothing to draw.
|
|
7
|
+
*/
|
|
8
|
+
export declare function renderTerminalWaterfall(events: TimelineEvent[], opts?: {
|
|
9
|
+
width?: number;
|
|
10
|
+
indent?: string;
|
|
11
|
+
}): string[];
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { c } from "./logger";
|
|
2
|
+
const BLOCK = "█";
|
|
3
|
+
const THIN = "▏";
|
|
4
|
+
function truncate(s, n) {
|
|
5
|
+
return s.length > n ? s.slice(0, n - 1) + "…" : s;
|
|
6
|
+
}
|
|
7
|
+
function fmtUsd(n) {
|
|
8
|
+
if (n === 0)
|
|
9
|
+
return "$0";
|
|
10
|
+
return "$" + Number(n.toFixed(4)).toString();
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Render a Chrome-DevTools-style waterfall of timeline events as colored
|
|
14
|
+
* terminal lines. Bars are positioned by `startMs` and sized by `durationMs`
|
|
15
|
+
* relative to the full span of the scene. Returns one string per event row
|
|
16
|
+
* (already indented), or `[]` when there's nothing to draw.
|
|
17
|
+
*/
|
|
18
|
+
export function renderTerminalWaterfall(events, opts = {}) {
|
|
19
|
+
if (!events || events.length === 0)
|
|
20
|
+
return [];
|
|
21
|
+
const width = opts.width ?? 28;
|
|
22
|
+
const indent = opts.indent ?? "";
|
|
23
|
+
const t0 = Math.min(...events.map((e) => e.startMs));
|
|
24
|
+
const tEnd = Math.max(...events.map((e) => e.endMs));
|
|
25
|
+
const span = Math.max(1, tEnd - t0);
|
|
26
|
+
const nameWidth = 16;
|
|
27
|
+
return events.map((e) => {
|
|
28
|
+
const lead = Math.min(width - 1, Math.round(((e.startMs - t0) / span) * width));
|
|
29
|
+
const barLen = Math.max(1, Math.round((e.durationMs / span) * width));
|
|
30
|
+
const fill = e.durationMs === 0 ? THIN : BLOCK.repeat(Math.min(barLen, width - lead));
|
|
31
|
+
const cells = Array(width).fill(" ");
|
|
32
|
+
for (let i = 0; i < fill.length && lead + i < width; i++) {
|
|
33
|
+
cells[lead + i] = fill[i];
|
|
34
|
+
}
|
|
35
|
+
let bar = cells.join("");
|
|
36
|
+
const color = e.error ? c.red : e.kind === "model" ? c.cyan : c.yellow;
|
|
37
|
+
bar = color(bar);
|
|
38
|
+
const kindLabel = (e.kind === "model" ? "model" : "tool ").padEnd(5);
|
|
39
|
+
const nameLabel = truncate(e.name, nameWidth).padEnd(nameWidth);
|
|
40
|
+
const dur = `${Math.round(e.durationMs)}ms`.padStart(7);
|
|
41
|
+
const cost = e.cost?.totalUsd != null ? ` ${fmtUsd(e.cost.totalUsd)}` : "";
|
|
42
|
+
const err = e.error ? ` ${c.red("✗ " + truncate(e.error, 40))}` : "";
|
|
43
|
+
return `${indent}${c.dim(kindLabel)} ${nameLabel} ${bar} ${c.dim(dur)}${c.dim(cost)}${err}`;
|
|
44
|
+
});
|
|
45
|
+
}
|