@sebastiantuyu/agest 0.3.3-next.4 → 0.3.3-next.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
1
  export { langchain } from "./langchain";
2
2
  export { remote } from "./remote";
3
3
  export type { RemoteAdapterOptions } from "./remote";
4
+ export { createTrace, summarizeEvents } from "./tracing";
5
+ export type { Trace } from "./tracing";
@@ -1,2 +1,3 @@
1
1
  export { langchain } from "./langchain";
2
2
  export { remote } from "./remote";
3
+ export { createTrace, summarizeEvents } from "./tracing";
@@ -1,5 +1,4 @@
1
- import { computeCost } from "../pricing";
2
- import { createTracingHandle } from "./tracing";
1
+ import { createTracingHandle, summarizeEvents } from "./tracing";
3
2
  /**
4
3
  * Adapter for LangChain runnables and agents.
5
4
  *
@@ -221,48 +220,5 @@ function extractTokensFromMessage(msg) {
221
220
  };
222
221
  }
223
222
  function summarizeRun(input) {
224
- const modelEvents = input.events.filter((e) => e.kind === "model");
225
- let inputTokens = 0;
226
- let outputTokens = 0;
227
- let providerCost = 0;
228
- let hasProviderCost = false;
229
- let hasTableCost = false;
230
- let tableCost = 0;
231
- let hasTokens = false;
232
- for (const e of modelEvents) {
233
- if (e.tokens) {
234
- hasTokens = true;
235
- inputTokens += e.tokens.input;
236
- outputTokens += e.tokens.output;
237
- }
238
- if (e.cost?.source === "provider" && e.cost.totalUsd != null) {
239
- hasProviderCost = true;
240
- providerCost += e.cost.totalUsd;
241
- }
242
- else if (e.cost?.source === "table" && e.cost.totalUsd != null) {
243
- hasTableCost = true;
244
- tableCost += e.cost.totalUsd;
245
- }
246
- }
247
- let tokens = hasTokens ? { input: inputTokens, output: outputTokens } : undefined;
248
- if (!tokens && input.fallbackTokens)
249
- tokens = input.fallbackTokens;
250
- // Pick cost: provider > table > recompute from fallback tokens
251
- let cost;
252
- if (hasProviderCost) {
253
- cost = { totalUsd: providerCost, source: "provider" };
254
- }
255
- else if (hasTableCost) {
256
- cost = { totalUsd: tableCost, source: "table" };
257
- }
258
- else if (tokens && input.model) {
259
- const computed = computeCost({
260
- model: input.model,
261
- inputTokens: tokens.input,
262
- outputTokens: tokens.output,
263
- });
264
- if (computed.source !== "unavailable")
265
- cost = computed;
266
- }
267
- return { tokens, cost };
223
+ return summarizeEvents(input.events, input.model, input.fallbackTokens);
268
224
  }
@@ -1,4 +1,4 @@
1
- import type { TimelineEvent } from "../types";
1
+ import type { TimelineEvent, CostBreakdown } from "../types";
2
2
  export interface TracingHandle {
3
3
  /** Pass this into `runnable.invoke(..., { callbacks: [handler.callbacks] })` */
4
4
  callbacks: any[];
@@ -17,3 +17,57 @@ export interface TracingHandle {
17
17
  * than throwing — the underlying agent run must not be broken by tracing.
18
18
  */
19
19
  export declare function createTracingHandle(baselineMs: number): Promise<TracingHandle>;
20
+ export interface Trace {
21
+ /**
22
+ * Attach to your top-level LangChain/LangGraph call:
23
+ * `await graph.invoke(input, { callbacks: trace.callbacks })`.
24
+ * Callbacks propagate to nested nodes automatically.
25
+ */
26
+ callbacks: any[];
27
+ /**
28
+ * Collect the captured timeline plus aggregated tokens and cost. Call once
29
+ * after the run completes; the result is memoized so repeat calls are safe.
30
+ * Spread the result into your `AgentResponse.metadata` to surface the
31
+ * per-scene cost/timeline waterfall in the report.
32
+ */
33
+ collect(): {
34
+ events: TimelineEvent[];
35
+ tokens?: {
36
+ input: number;
37
+ output: number;
38
+ };
39
+ cost?: CostBreakdown;
40
+ };
41
+ }
42
+ /**
43
+ * Public tracing helper for custom executors (i.e. agents not wired through
44
+ * the `langchain()` adapter). Create one per scene run, hand its `callbacks`
45
+ * to your LangChain/LangGraph invocation, then spread `collect()` into the
46
+ * response metadata.
47
+ *
48
+ * @example
49
+ * ```ts
50
+ * const trace = await createTrace({ model: env.OPENROUTER_MODEL });
51
+ * const plan = await generatePlan(input, { callbacks: trace.callbacks });
52
+ * return { text: render(plan), metadata: { model, tools, ...trace.collect() } };
53
+ * ```
54
+ */
55
+ export declare function createTrace(opts?: {
56
+ model?: string;
57
+ }): Promise<Trace>;
58
+ /**
59
+ * Aggregate token counts and cost across a timeline's model events.
60
+ * Provider-reported cost wins; otherwise the table-derived cost; otherwise
61
+ * cost is recomputed from `model` and the summed tokens. `fallbackTokens` is
62
+ * used only when no model event carried usage.
63
+ */
64
+ export declare function summarizeEvents(events: TimelineEvent[], model?: string, fallbackTokens?: {
65
+ input: number;
66
+ output: number;
67
+ }): {
68
+ tokens?: {
69
+ input: number;
70
+ output: number;
71
+ };
72
+ cost?: CostBreakdown;
73
+ };
@@ -128,6 +128,86 @@ export async function createTracingHandle(baselineMs) {
128
128
  },
129
129
  };
130
130
  }
131
+ /**
132
+ * Public tracing helper for custom executors (i.e. agents not wired through
133
+ * the `langchain()` adapter). Create one per scene run, hand its `callbacks`
134
+ * to your LangChain/LangGraph invocation, then spread `collect()` into the
135
+ * response metadata.
136
+ *
137
+ * @example
138
+ * ```ts
139
+ * const trace = await createTrace({ model: env.OPENROUTER_MODEL });
140
+ * const plan = await generatePlan(input, { callbacks: trace.callbacks });
141
+ * return { text: render(plan), metadata: { model, tools, ...trace.collect() } };
142
+ * ```
143
+ */
144
+ export async function createTrace(opts) {
145
+ const baseline = performance.now();
146
+ const handle = await createTracingHandle(baseline);
147
+ let collected;
148
+ return {
149
+ callbacks: handle.callbacks,
150
+ collect() {
151
+ if (collected)
152
+ return collected;
153
+ const drained = handle.drain();
154
+ const { tokens, cost } = summarizeEvents(drained.events, opts?.model ?? drained.modelName);
155
+ collected = { events: drained.events, tokens, cost };
156
+ return collected;
157
+ },
158
+ };
159
+ }
160
+ /**
161
+ * Aggregate token counts and cost across a timeline's model events.
162
+ * Provider-reported cost wins; otherwise the table-derived cost; otherwise
163
+ * cost is recomputed from `model` and the summed tokens. `fallbackTokens` is
164
+ * used only when no model event carried usage.
165
+ */
166
+ export function summarizeEvents(events, model, fallbackTokens) {
167
+ const modelEvents = events.filter((e) => e.kind === "model");
168
+ let inputTokens = 0;
169
+ let outputTokens = 0;
170
+ let providerCost = 0;
171
+ let hasProviderCost = false;
172
+ let hasTableCost = false;
173
+ let tableCost = 0;
174
+ let hasTokens = false;
175
+ for (const e of modelEvents) {
176
+ if (e.tokens) {
177
+ hasTokens = true;
178
+ inputTokens += e.tokens.input;
179
+ outputTokens += e.tokens.output;
180
+ }
181
+ if (e.cost?.source === "provider" && e.cost.totalUsd != null) {
182
+ hasProviderCost = true;
183
+ providerCost += e.cost.totalUsd;
184
+ }
185
+ else if (e.cost?.source === "table" && e.cost.totalUsd != null) {
186
+ hasTableCost = true;
187
+ tableCost += e.cost.totalUsd;
188
+ }
189
+ }
190
+ let tokens = hasTokens ? { input: inputTokens, output: outputTokens } : undefined;
191
+ if (!tokens && fallbackTokens)
192
+ tokens = fallbackTokens;
193
+ let cost;
194
+ if (hasProviderCost) {
195
+ cost = { totalUsd: providerCost, source: "provider" };
196
+ }
197
+ else if (hasTableCost) {
198
+ cost = { totalUsd: tableCost, source: "table" };
199
+ }
200
+ else if (tokens && model) {
201
+ const computed = computeCost({
202
+ model,
203
+ inputTokens: tokens.input,
204
+ outputTokens: tokens.output,
205
+ });
206
+ if (computed.source !== "unavailable")
207
+ cost = computed;
208
+ }
209
+ return { tokens, cost };
210
+ }
131
211
  function now() {
132
212
  return performance.now();
133
213
  }
package/dist/context.js CHANGED
@@ -4,6 +4,7 @@ import { formatReport, writeReport, writeDiffEntry } from "./reporter";
4
4
  import { logger, c } from "./logger";
5
5
  import { loadConfig } from "./config";
6
6
  import { setPricingOverrides } from "./pricing";
7
+ import { renderTerminalWaterfall } from "./waterfall";
7
8
  import { PromisePool } from "@supercharge/promise-pool";
8
9
  export class SceneBuilder {
9
10
  _prompt;
@@ -129,6 +130,18 @@ export class AgentContext {
129
130
  const sigColor = sig >= 0.95 ? c.green : sig >= 0.80 ? c.yellow : c.red;
130
131
  logger.info(`${indent} ${c.dim("significance:")} ${sigColor(`${(sig * 100).toFixed(1)}%`)} ${c.dim(`(pass rate: ${((result.passRate ?? 0) * 100).toFixed(1)}%)`)}`);
131
132
  }
133
+ if (result.events && result.events.length > 0) {
134
+ const costLabel = result.costUsd != null
135
+ ? ` ${c.dim("·")} ${c.green(`$${Number(result.costUsd.toFixed(4))}`)}`
136
+ : "";
137
+ const tokLabel = result.tokens
138
+ ? ` ${c.dim(`(${result.tokens.input}→${result.tokens.output} tok)`)}`
139
+ : "";
140
+ logger.info(`${indent} ${c.dim("waterfall:")}${tokLabel}${costLabel}`);
141
+ for (const line of renderTerminalWaterfall(result.events, { indent: `${indent} ` })) {
142
+ logger.info(line);
143
+ }
144
+ }
132
145
  logger.debug(`${indent} response: ${result.response.text?.slice(0, 120)}`);
133
146
  };
134
147
  if (hasSuites) {
package/dist/index.d.ts CHANGED
@@ -3,11 +3,13 @@ import { SceneBuilder } from "./context";
3
3
  export { expect } from "./assertions";
4
4
  export { logger } from "./logger";
5
5
  export { defineConfig } from "./config";
6
+ export { createTrace, summarizeEvents } from "./adapters/tracing";
7
+ export type { Trace } from "./adapters/tracing";
6
8
  export type { AgestConfig, JudgeConfig, JudgeExecutor } from "./config";
7
9
  export type { LogLevel } from "./logger";
8
10
  export type { AgentExpectation, AgentMatchers } from "./assertions";
9
11
  export type { JudgeCriteria } from "./judge";
10
- export type { AgentExecutor, ExecutorOptions, AgentResponse, AgentReport, SceneResult, RunResult, JudgeVerdict, JudgeResult, HookFn, } from "./types";
12
+ export type { AgentExecutor, ExecutorOptions, AgentResponse, AgentReport, SceneResult, RunResult, JudgeVerdict, JudgeResult, HookFn, TimelineEvent, TimelineEventKind, CostBreakdown, CostSource, } from "./types";
11
13
  export interface AgentOptions {
12
14
  name?: string;
13
15
  }
package/dist/index.js CHANGED
@@ -2,6 +2,7 @@ import { AgentContext, setContext, getContext } from "./context";
2
2
  export { expect } from "./assertions";
3
3
  export { logger } from "./logger";
4
4
  export { defineConfig } from "./config";
5
+ export { createTrace, summarizeEvents } from "./adapters/tracing";
5
6
  export function scene(prompt) {
6
7
  return getContext().registerScene(prompt);
7
8
  }
package/dist/preview.js CHANGED
@@ -189,6 +189,90 @@ function renderFailedCases(cases) {
189
189
  </ul>
190
190
  </details>`;
191
191
  }
192
+ const WF_MODEL = "#38bdf8";
193
+ const WF_TOOL = "#facc15";
194
+ const WF_ERROR = "#f87171";
195
+ function fmtUsdHtml(n) {
196
+ if (n === 0)
197
+ return "$0";
198
+ return "$" + Number(n.toFixed(4)).toString();
199
+ }
200
+ /**
201
+ * Chrome-DevTools-style waterfall for a report's per-scene timelines. Bars are
202
+ * absolutely positioned within a track by start_ms / duration_ms. Returns "" if
203
+ * the report carries no timeline data (older reports / non-traced executors).
204
+ */
205
+ function renderWaterfallHtml(report) {
206
+ const scenes = (report.scenes ?? []).filter((s) => s.timeline && s.timeline.length > 0);
207
+ if (scenes.length === 0)
208
+ return "";
209
+ const sceneBlocks = scenes
210
+ .map((scene) => {
211
+ const events = scene.timeline;
212
+ const t0 = Math.min(...events.map((e) => e.startMs));
213
+ const tEnd = Math.max(...events.map((e) => e.startMs + e.durationMs));
214
+ const span = Math.max(1, tEnd - t0);
215
+ const rows = events
216
+ .map((e) => {
217
+ const left = ((e.startMs - t0) / span) * 100;
218
+ const width = Math.max(0.6, (e.durationMs / span) * 100);
219
+ const bg = e.error ? WF_ERROR : e.kind === "model" ? WF_MODEL : WF_TOOL;
220
+ const icon = e.kind === "model" ? "◆" : "▸";
221
+ const tip = [
222
+ `${e.kind}: ${e.name}`,
223
+ `start ${Math.round(e.startMs)}ms · ${Math.round(e.durationMs)}ms`,
224
+ e.tokens ? `${e.tokens.input}→${e.tokens.output} tok` : "",
225
+ e.costUsd != null ? fmtUsdHtml(e.costUsd) : "",
226
+ e.error ? `error: ${e.error}` : "",
227
+ ]
228
+ .filter(Boolean)
229
+ .join(" · ");
230
+ const cost = e.costUsd != null ? fmtUsdHtml(e.costUsd) : "";
231
+ return `
232
+ <div class="flex items-center gap-2 text-[11px] leading-5">
233
+ <span class="w-44 shrink-0 truncate ${e.error ? "text-red-400" : "text-zinc-400"}" title="${escHtml(e.name)}">
234
+ <span style="color:${bg}">${icon}</span> ${escHtml(e.name)}
235
+ </span>
236
+ <div class="relative flex-1 h-3 bg-zinc-800/40 rounded">
237
+ <div class="absolute top-0 h-3 rounded" style="left:${left.toFixed(2)}%;width:${width.toFixed(2)}%;background:${bg}" title="${escHtml(tip)}"></div>
238
+ </div>
239
+ <span class="w-16 shrink-0 text-right text-zinc-500">${Math.round(e.durationMs)}ms</span>
240
+ <span class="w-16 shrink-0 text-right text-zinc-500">${cost}</span>
241
+ </div>`;
242
+ })
243
+ .join("\n");
244
+ const meta = [
245
+ scene.tokens ? `${scene.tokens.input}→${scene.tokens.output} tok` : "",
246
+ scene.costUsd != null ? fmtUsdHtml(scene.costUsd) : "",
247
+ scene.costSource ? scene.costSource : "",
248
+ scene.durationMs != null ? `${Math.round(scene.durationMs)}ms` : "",
249
+ ]
250
+ .filter(Boolean)
251
+ .join(" · ");
252
+ return `
253
+ <div>
254
+ <div class="flex items-center justify-between mb-1.5">
255
+ <span class="text-xs text-zinc-300 truncate" title="${escHtml(scene.prompt)}">${escHtml(scene.prompt)}</span>
256
+ <span class="text-[11px] text-zinc-500 shrink-0 ml-3">${escHtml(meta)}</span>
257
+ </div>
258
+ <div class="space-y-1">${rows}</div>
259
+ </div>`;
260
+ })
261
+ .join("\n");
262
+ return `
263
+ <details class="mt-2" open>
264
+ <summary class="text-xs text-sky-400 cursor-pointer hover:text-sky-300 select-none">
265
+ waterfall &middot; ${scenes.length} scene${scenes.length !== 1 ? "s" : ""}
266
+ </summary>
267
+ <div class="mt-3 mb-2 pl-3 border-l border-zinc-800 space-y-5">
268
+ <div class="flex gap-4 text-[10px] text-zinc-500">
269
+ <span><span style="color:${WF_MODEL}">◆</span> model</span>
270
+ <span><span style="color:${WF_TOOL}">▸</span> tool</span>
271
+ </div>
272
+ ${sceneBlocks}
273
+ </div>
274
+ </details>`;
275
+ }
192
276
  function renderRunRow(entry, idx) {
193
277
  const { report, delta, diffLines } = entry;
194
278
  const pct = report.successRate * 100;
@@ -233,6 +317,7 @@ function renderRunRow(entry, idx) {
233
317
  </div>
234
318
  <div class="ml-10 mt-0.5 flex gap-3 flex-wrap">${dimTags}</div>
235
319
  ${diffHtml}
320
+ <div class="ml-10">${renderWaterfallHtml(report)}</div>
236
321
  </div>`;
237
322
  }
238
323
  // ---------------------------------------------------------------------------
@@ -990,6 +1075,12 @@ function renderSingleRun(report) {
990
1075
  <p class="text-zinc-300">${Math.round(report.averageOutputTokensPerCase)}</p>
991
1076
  </div>`
992
1077
  : ""}
1078
+ ${report.totalCostUsd != null
1079
+ ? `<div>
1080
+ <span class="text-zinc-500">Total Cost</span>
1081
+ <p class="text-zinc-300">${fmtUsdHtml(report.totalCostUsd)}${report.totalInputTokens != null ? ` <span class="text-zinc-600">· ${report.totalInputTokens}→${report.totalOutputTokens} tok</span>` : ""}</p>
1082
+ </div>`
1083
+ : ""}
993
1084
  ${report.tools && report.tools.length > 0
994
1085
  ? `<div>
995
1086
  <span class="text-zinc-500">Tools</span>
@@ -997,6 +1088,7 @@ function renderSingleRun(report) {
997
1088
  </div>`
998
1089
  : ""}
999
1090
  </div>
1091
+ ${renderWaterfallHtml(report)}
1000
1092
  ${failedSection}
1001
1093
  </div>`;
1002
1094
  }
package/dist/reports.d.ts CHANGED
@@ -9,6 +9,31 @@ export interface ParsedSuiteResult {
9
9
  response?: string;
10
10
  }>;
11
11
  }
12
+ export interface ParsedTimelineEvent {
13
+ kind: "model" | "tool";
14
+ name: string;
15
+ startMs: number;
16
+ durationMs: number;
17
+ tokens?: {
18
+ input: number;
19
+ output: number;
20
+ };
21
+ costUsd?: number;
22
+ costSource?: string;
23
+ runIndex?: number;
24
+ error?: string;
25
+ }
26
+ export interface ParsedScene {
27
+ prompt: string;
28
+ durationMs?: number;
29
+ tokens?: {
30
+ input: number;
31
+ output: number;
32
+ };
33
+ costUsd?: number;
34
+ costSource?: string;
35
+ timeline?: ParsedTimelineEvent[];
36
+ }
12
37
  export interface ParsedReport {
13
38
  name?: string;
14
39
  systemPromptHash?: string;
@@ -28,6 +53,10 @@ export interface ParsedReport {
28
53
  timestamp: string;
29
54
  averageInputTokensPerCase?: number;
30
55
  averageOutputTokensPerCase?: number;
56
+ totalInputTokens?: number;
57
+ totalOutputTokens?: number;
58
+ totalCostUsd?: number;
59
+ scenes?: ParsedScene[];
31
60
  suites?: ParsedSuiteResult[];
32
61
  source: string;
33
62
  }
@@ -44,6 +73,13 @@ export declare function parseFailedCases(content: string): Array<{
44
73
  }>;
45
74
  export declare function parseDimensions(content: string): Record<string, string> | undefined;
46
75
  export declare function parseSuites(content: string): ParsedSuiteResult[] | undefined;
76
+ /**
77
+ * Parse the `scenes:` block (per-scene tokens/cost + timeline waterfall) from a
78
+ * report. The emitted format is fixed (see reporter.ts `renderSceneObservability`),
79
+ * so this hand-parses by indentation: scenes start at 8 spaces, scene fields at
80
+ * 10, timeline events at 14, event fields at 16.
81
+ */
82
+ export declare function parseScenes(content: string): ParsedScene[] | undefined;
47
83
  export declare function parseReport(content: string, source: string): ParsedReport;
48
84
  export declare function findReports(dir: string, depth?: number): Promise<string[]>;
49
85
  export declare function loadDiffEntry(hash: string): Promise<DiffEntry | null>;
package/dist/reports.js CHANGED
@@ -124,6 +124,121 @@ export function parseSuites(content) {
124
124
  suites.push(current);
125
125
  return suites.length > 0 ? suites : undefined;
126
126
  }
127
+ function parseTokens(raw) {
128
+ const m = raw.match(/input:\s*(\d+),\s*output:\s*(\d+)/);
129
+ if (!m)
130
+ return undefined;
131
+ return { input: parseInt(m[1], 10), output: parseInt(m[2], 10) };
132
+ }
133
+ /**
134
+ * Parse the `scenes:` block (per-scene tokens/cost + timeline waterfall) from a
135
+ * report. The emitted format is fixed (see reporter.ts `renderSceneObservability`),
136
+ * so this hand-parses by indentation: scenes start at 8 spaces, scene fields at
137
+ * 10, timeline events at 14, event fields at 16.
138
+ */
139
+ export function parseScenes(content) {
140
+ const lines = content.split("\n");
141
+ const startIdx = lines.findIndex((l) => l === " scenes:");
142
+ if (startIdx === -1)
143
+ return undefined;
144
+ const scenes = [];
145
+ let scene;
146
+ let inTimeline = false;
147
+ let event;
148
+ const pushEvent = () => {
149
+ if (event && scene) {
150
+ (scene.timeline ??= []).push(event);
151
+ event = undefined;
152
+ }
153
+ };
154
+ const pushScene = () => {
155
+ pushEvent();
156
+ if (scene)
157
+ scenes.push(scene);
158
+ scene = undefined;
159
+ inTimeline = false;
160
+ };
161
+ for (let i = startIdx + 1; i < lines.length; i++) {
162
+ const line = lines[i];
163
+ if (line.trim() === "")
164
+ continue;
165
+ const indent = line.length - line.trimStart().length;
166
+ // A new top-level agent field (<= 4 spaces, not part of scenes) ends the block.
167
+ if (indent <= 4)
168
+ break;
169
+ const sceneStart = line.match(/^ - prompt: "(.*)"$/);
170
+ if (sceneStart) {
171
+ pushScene();
172
+ scene = { prompt: sceneStart[1].replace(/\\"/g, '"').replace(/\\n/g, "\n") };
173
+ continue;
174
+ }
175
+ if (!scene)
176
+ continue;
177
+ const eventStart = line.match(/^ - kind: (model|tool)$/);
178
+ if (eventStart) {
179
+ pushEvent();
180
+ event = { kind: eventStart[1], name: "", startMs: 0, durationMs: 0 };
181
+ inTimeline = true;
182
+ continue;
183
+ }
184
+ if (line.match(/^ timeline:$/)) {
185
+ inTimeline = true;
186
+ continue;
187
+ }
188
+ const trimmed = line.trim();
189
+ const target = inTimeline && event ? "event" : "scene";
190
+ const kv = trimmed.match(/^([a-z_]+):\s*(.*)$/);
191
+ if (!kv)
192
+ continue;
193
+ const [, key, value] = kv;
194
+ if (target === "event" && event) {
195
+ switch (key) {
196
+ case "name":
197
+ event.name = value.replace(/^"|"$/g, "").replace(/\\"/g, '"');
198
+ break;
199
+ case "start_ms":
200
+ event.startMs = parseFloat(value);
201
+ break;
202
+ case "duration_ms":
203
+ event.durationMs = parseFloat(value);
204
+ break;
205
+ case "tokens":
206
+ event.tokens = parseTokens(value);
207
+ break;
208
+ case "cost_usd":
209
+ event.costUsd = parseFloat(value);
210
+ break;
211
+ case "cost_source":
212
+ event.costSource = value;
213
+ break;
214
+ case "run_index":
215
+ event.runIndex = parseInt(value, 10);
216
+ break;
217
+ case "error":
218
+ event.error = value.replace(/^"|"$/g, "").replace(/\\"/g, '"');
219
+ break;
220
+ }
221
+ }
222
+ else if (scene) {
223
+ switch (key) {
224
+ case "duration_ms":
225
+ scene.durationMs = parseFloat(value);
226
+ break;
227
+ case "tokens":
228
+ scene.tokens = parseTokens(value);
229
+ break;
230
+ case "cost_usd":
231
+ scene.costUsd = parseFloat(value);
232
+ break;
233
+ case "cost_source":
234
+ scene.costSource = value;
235
+ break;
236
+ }
237
+ }
238
+ }
239
+ pushScene();
240
+ return scenes.length > 0 ? scenes : undefined;
241
+ }
127
242
  export function parseReport(content, source) {
128
243
  const num = (key, fallback = 0) => parseFloat(extractField(content, key) ?? String(fallback));
129
244
  const avgIn = extractField(content, "average_input_tokens_per_case");
@@ -158,9 +273,17 @@ export function parseReport(content, source) {
158
273
  timestamp: extractField(content, "timestamp") ?? "",
159
274
  averageInputTokensPerCase: avgIn != null ? parseFloat(avgIn) : undefined,
160
275
  averageOutputTokensPerCase: avgOut != null ? parseFloat(avgOut) : undefined,
276
+ totalInputTokens: optNum("total_input_tokens"),
277
+ totalOutputTokens: optNum("total_output_tokens"),
278
+ totalCostUsd: optNum("total_cost_usd"),
279
+ scenes: parseScenes(content),
161
280
  suites: parseSuites(content),
162
281
  source,
163
282
  };
283
+ function optNum(key) {
284
+ const v = extractField(content, key);
285
+ return v != null ? parseFloat(v) : undefined;
286
+ }
164
287
  }
165
288
  export async function findReports(dir, depth = 0) {
166
289
  if (depth > 6)
@@ -0,0 +1,11 @@
1
+ import type { TimelineEvent } from "./types";
2
+ /**
3
+ * Render a Chrome-DevTools-style waterfall of timeline events as colored
4
+ * terminal lines. Bars are positioned by `startMs` and sized by `durationMs`
5
+ * relative to the full span of the scene. Returns one string per event row
6
+ * (already indented), or `[]` when there's nothing to draw.
7
+ */
8
+ export declare function renderTerminalWaterfall(events: TimelineEvent[], opts?: {
9
+ width?: number;
10
+ indent?: string;
11
+ }): string[];
@@ -0,0 +1,45 @@
1
+ import { c } from "./logger";
2
+ const BLOCK = "█";
3
+ const THIN = "▏";
4
+ function truncate(s, n) {
5
+ return s.length > n ? s.slice(0, n - 1) + "…" : s;
6
+ }
7
+ function fmtUsd(n) {
8
+ if (n === 0)
9
+ return "$0";
10
+ return "$" + Number(n.toFixed(4)).toString();
11
+ }
12
+ /**
13
+ * Render a Chrome-DevTools-style waterfall of timeline events as colored
14
+ * terminal lines. Bars are positioned by `startMs` and sized by `durationMs`
15
+ * relative to the full span of the scene. Returns one string per event row
16
+ * (already indented), or `[]` when there's nothing to draw.
17
+ */
18
+ export function renderTerminalWaterfall(events, opts = {}) {
19
+ if (!events || events.length === 0)
20
+ return [];
21
+ const width = opts.width ?? 28;
22
+ const indent = opts.indent ?? "";
23
+ const t0 = Math.min(...events.map((e) => e.startMs));
24
+ const tEnd = Math.max(...events.map((e) => e.endMs));
25
+ const span = Math.max(1, tEnd - t0);
26
+ const nameWidth = 16;
27
+ return events.map((e) => {
28
+ const lead = Math.min(width - 1, Math.round(((e.startMs - t0) / span) * width));
29
+ const barLen = Math.max(1, Math.round((e.durationMs / span) * width));
30
+ const fill = e.durationMs === 0 ? THIN : BLOCK.repeat(Math.min(barLen, width - lead));
31
+ const cells = Array(width).fill(" ");
32
+ for (let i = 0; i < fill.length && lead + i < width; i++) {
33
+ cells[lead + i] = fill[i];
34
+ }
35
+ let bar = cells.join("");
36
+ const color = e.error ? c.red : e.kind === "model" ? c.cyan : c.yellow;
37
+ bar = color(bar);
38
+ const kindLabel = (e.kind === "model" ? "model" : "tool ").padEnd(5);
39
+ const nameLabel = truncate(e.name, nameWidth).padEnd(nameWidth);
40
+ const dur = `${Math.round(e.durationMs)}ms`.padStart(7);
41
+ const cost = e.cost?.totalUsd != null ? ` ${fmtUsd(e.cost.totalUsd)}` : "";
42
+ const err = e.error ? ` ${c.red("✗ " + truncate(e.error, 40))}` : "";
43
+ return `${indent}${c.dim(kindLabel)} ${nameLabel} ${bar} ${c.dim(dur)}${c.dim(cost)}${err}`;
44
+ });
45
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sebastiantuyu/agest",
3
- "version": "0.3.3-next.4",
3
+ "version": "0.3.3-next.6",
4
4
  "description": "A testing library for agents",
5
5
  "repository": {
6
6
  "type": "git",