@sebastiantuyu/agest 0.3.3-next.5 → 0.3.3-next.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/context.js CHANGED
@@ -4,6 +4,7 @@ import { formatReport, writeReport, writeDiffEntry } from "./reporter";
4
4
  import { logger, c } from "./logger";
5
5
  import { loadConfig } from "./config";
6
6
  import { setPricingOverrides } from "./pricing";
7
+ import { renderTerminalWaterfall } from "./waterfall";
7
8
  import { PromisePool } from "@supercharge/promise-pool";
8
9
  export class SceneBuilder {
9
10
  _prompt;
@@ -129,6 +130,18 @@ export class AgentContext {
129
130
  const sigColor = sig >= 0.95 ? c.green : sig >= 0.80 ? c.yellow : c.red;
130
131
  logger.info(`${indent} ${c.dim("significance:")} ${sigColor(`${(sig * 100).toFixed(1)}%`)} ${c.dim(`(pass rate: ${((result.passRate ?? 0) * 100).toFixed(1)}%)`)}`);
131
132
  }
133
+ if (result.events && result.events.length > 0) {
134
+ const costLabel = result.costUsd != null
135
+ ? ` ${c.dim("·")} ${c.green(`$${Number(result.costUsd.toFixed(4))}`)}`
136
+ : "";
137
+ const tokLabel = result.tokens
138
+ ? ` ${c.dim(`(${result.tokens.input}→${result.tokens.output} tok)`)}`
139
+ : "";
140
+ logger.info(`${indent} ${c.dim("waterfall:")}${tokLabel}${costLabel}`);
141
+ for (const line of renderTerminalWaterfall(result.events, { indent: `${indent} ` })) {
142
+ logger.info(line);
143
+ }
144
+ }
132
145
  logger.debug(`${indent} response: ${result.response.text?.slice(0, 120)}`);
133
146
  };
134
147
  if (hasSuites) {
package/dist/preview.js CHANGED
@@ -189,6 +189,90 @@ function renderFailedCases(cases) {
189
189
  </ul>
190
190
  </details>`;
191
191
  }
192
+ const WF_MODEL = "#38bdf8";
193
+ const WF_TOOL = "#facc15";
194
+ const WF_ERROR = "#f87171";
195
+ function fmtUsdHtml(n) {
196
+ if (n === 0)
197
+ return "$0";
198
+ return "$" + Number(n.toFixed(4)).toString();
199
+ }
200
+ /**
201
+ * Chrome-DevTools-style waterfall for a report's per-scene timelines. Bars are
202
+ * absolutely positioned within a track by start_ms / duration_ms. Returns "" if
203
+ * the report carries no timeline data (older reports / non-traced executors).
204
+ */
205
+ function renderWaterfallHtml(report) {
206
+ const scenes = (report.scenes ?? []).filter((s) => s.timeline && s.timeline.length > 0);
207
+ if (scenes.length === 0)
208
+ return "";
209
+ const sceneBlocks = scenes
210
+ .map((scene) => {
211
+ const events = scene.timeline;
212
+ const t0 = Math.min(...events.map((e) => e.startMs));
213
+ const tEnd = Math.max(...events.map((e) => e.startMs + e.durationMs));
214
+ const span = Math.max(1, tEnd - t0);
215
+ const rows = events
216
+ .map((e) => {
217
+ const left = ((e.startMs - t0) / span) * 100;
218
+ const width = Math.max(0.6, (e.durationMs / span) * 100);
219
+ const bg = e.error ? WF_ERROR : e.kind === "model" ? WF_MODEL : WF_TOOL;
220
+ const icon = e.kind === "model" ? "◆" : "▸";
221
+ const tip = [
222
+ `${e.kind}: ${e.name}`,
223
+ `start ${Math.round(e.startMs)}ms · ${Math.round(e.durationMs)}ms`,
224
+ e.tokens ? `${e.tokens.input}→${e.tokens.output} tok` : "",
225
+ e.costUsd != null ? fmtUsdHtml(e.costUsd) : "",
226
+ e.error ? `error: ${e.error}` : "",
227
+ ]
228
+ .filter(Boolean)
229
+ .join(" · ");
230
+ const cost = e.costUsd != null ? fmtUsdHtml(e.costUsd) : "";
231
+ return `
232
+ <div class="flex items-center gap-2 text-[11px] leading-5">
233
+ <span class="w-44 shrink-0 truncate ${e.error ? "text-red-400" : "text-zinc-400"}" title="${escHtml(e.name)}">
234
+ <span style="color:${bg}">${icon}</span> ${escHtml(e.name)}
235
+ </span>
236
+ <div class="relative flex-1 h-3 bg-zinc-800/40 rounded">
237
+ <div class="absolute top-0 h-3 rounded" style="left:${left.toFixed(2)}%;width:${width.toFixed(2)}%;background:${bg}" title="${escHtml(tip)}"></div>
238
+ </div>
239
+ <span class="w-16 shrink-0 text-right text-zinc-500">${Math.round(e.durationMs)}ms</span>
240
+ <span class="w-16 shrink-0 text-right text-zinc-500">${cost}</span>
241
+ </div>`;
242
+ })
243
+ .join("\n");
244
+ const meta = [
245
+ scene.tokens ? `${scene.tokens.input}→${scene.tokens.output} tok` : "",
246
+ scene.costUsd != null ? fmtUsdHtml(scene.costUsd) : "",
247
+ scene.costSource ? scene.costSource : "",
248
+ scene.durationMs != null ? `${Math.round(scene.durationMs)}ms` : "",
249
+ ]
250
+ .filter(Boolean)
251
+ .join(" · ");
252
+ return `
253
+ <div>
254
+ <div class="flex items-center justify-between mb-1.5">
255
+ <span class="text-xs text-zinc-300 truncate" title="${escHtml(scene.prompt)}">${escHtml(scene.prompt)}</span>
256
+ <span class="text-[11px] text-zinc-500 shrink-0 ml-3">${escHtml(meta)}</span>
257
+ </div>
258
+ <div class="space-y-1">${rows}</div>
259
+ </div>`;
260
+ })
261
+ .join("\n");
262
+ return `
263
+ <details class="mt-2" open>
264
+ <summary class="text-xs text-sky-400 cursor-pointer hover:text-sky-300 select-none">
265
+ waterfall &middot; ${scenes.length} scene${scenes.length !== 1 ? "s" : ""}
266
+ </summary>
267
+ <div class="mt-3 mb-2 pl-3 border-l border-zinc-800 space-y-5">
268
+ <div class="flex gap-4 text-[10px] text-zinc-500">
269
+ <span><span style="color:${WF_MODEL}">◆</span> model</span>
270
+ <span><span style="color:${WF_TOOL}">▸</span> tool</span>
271
+ </div>
272
+ ${sceneBlocks}
273
+ </div>
274
+ </details>`;
275
+ }
192
276
  function renderRunRow(entry, idx) {
193
277
  const { report, delta, diffLines } = entry;
194
278
  const pct = report.successRate * 100;
@@ -233,6 +317,7 @@ function renderRunRow(entry, idx) {
233
317
  </div>
234
318
  <div class="ml-10 mt-0.5 flex gap-3 flex-wrap">${dimTags}</div>
235
319
  ${diffHtml}
320
+ <div class="ml-10">${renderWaterfallHtml(report)}</div>
236
321
  </div>`;
237
322
  }
238
323
  // ---------------------------------------------------------------------------
@@ -990,6 +1075,12 @@ function renderSingleRun(report) {
990
1075
  <p class="text-zinc-300">${Math.round(report.averageOutputTokensPerCase)}</p>
991
1076
  </div>`
992
1077
  : ""}
1078
+ ${report.totalCostUsd != null
1079
+ ? `<div>
1080
+ <span class="text-zinc-500">Total Cost</span>
1081
+ <p class="text-zinc-300">${fmtUsdHtml(report.totalCostUsd)}${report.totalInputTokens != null ? ` <span class="text-zinc-600">· ${report.totalInputTokens}→${report.totalOutputTokens} tok</span>` : ""}</p>
1082
+ </div>`
1083
+ : ""}
993
1084
  ${report.tools && report.tools.length > 0
994
1085
  ? `<div>
995
1086
  <span class="text-zinc-500">Tools</span>
@@ -997,6 +1088,7 @@ function renderSingleRun(report) {
997
1088
  </div>`
998
1089
  : ""}
999
1090
  </div>
1091
+ ${renderWaterfallHtml(report)}
1000
1092
  ${failedSection}
1001
1093
  </div>`;
1002
1094
  }
package/dist/reports.d.ts CHANGED
@@ -9,6 +9,31 @@ export interface ParsedSuiteResult {
9
9
  response?: string;
10
10
  }>;
11
11
  }
12
+ export interface ParsedTimelineEvent {
13
+ kind: "model" | "tool";
14
+ name: string;
15
+ startMs: number;
16
+ durationMs: number;
17
+ tokens?: {
18
+ input: number;
19
+ output: number;
20
+ };
21
+ costUsd?: number;
22
+ costSource?: string;
23
+ runIndex?: number;
24
+ error?: string;
25
+ }
26
+ export interface ParsedScene {
27
+ prompt: string;
28
+ durationMs?: number;
29
+ tokens?: {
30
+ input: number;
31
+ output: number;
32
+ };
33
+ costUsd?: number;
34
+ costSource?: string;
35
+ timeline?: ParsedTimelineEvent[];
36
+ }
12
37
  export interface ParsedReport {
13
38
  name?: string;
14
39
  systemPromptHash?: string;
@@ -28,6 +53,10 @@ export interface ParsedReport {
28
53
  timestamp: string;
29
54
  averageInputTokensPerCase?: number;
30
55
  averageOutputTokensPerCase?: number;
56
+ totalInputTokens?: number;
57
+ totalOutputTokens?: number;
58
+ totalCostUsd?: number;
59
+ scenes?: ParsedScene[];
31
60
  suites?: ParsedSuiteResult[];
32
61
  source: string;
33
62
  }
@@ -44,6 +73,13 @@ export declare function parseFailedCases(content: string): Array<{
44
73
  }>;
45
74
  export declare function parseDimensions(content: string): Record<string, string> | undefined;
46
75
  export declare function parseSuites(content: string): ParsedSuiteResult[] | undefined;
76
+ /**
77
+ * Parse the `scenes:` block (per-scene tokens/cost + timeline waterfall) from a
78
+ * report. The emitted format is fixed (see reporter.ts `renderSceneObservability`),
79
+ * so this hand-parses by indentation: scenes start at 8 spaces, scene fields at
80
+ * 10, timeline events at 14, event fields at 16.
81
+ */
82
+ export declare function parseScenes(content: string): ParsedScene[] | undefined;
47
83
  export declare function parseReport(content: string, source: string): ParsedReport;
48
84
  export declare function findReports(dir: string, depth?: number): Promise<string[]>;
49
85
  export declare function loadDiffEntry(hash: string): Promise<DiffEntry | null>;
package/dist/reports.js CHANGED
@@ -124,6 +124,121 @@ export function parseSuites(content) {
124
124
  suites.push(current);
125
125
  return suites.length > 0 ? suites : undefined;
126
126
  }
127
+ function parseTokens(raw) {
128
+ const m = raw.match(/input:\s*(\d+),\s*output:\s*(\d+)/);
129
+ if (!m)
130
+ return undefined;
131
+ return { input: parseInt(m[1], 10), output: parseInt(m[2], 10) };
132
+ }
133
+ /**
134
+ * Parse the `scenes:` block (per-scene tokens/cost + timeline waterfall) from a
135
+ * report. The emitted format is fixed (see reporter.ts `renderSceneObservability`),
136
+ * so this hand-parses by indentation: scenes start at 8 spaces, scene fields at
137
+ * 10, timeline events at 14, event fields at 16.
138
+ */
139
+ export function parseScenes(content) {
140
+ const lines = content.split("\n");
141
+ const startIdx = lines.findIndex((l) => l === " scenes:");
142
+ if (startIdx === -1)
143
+ return undefined;
144
+ const scenes = [];
145
+ let scene;
146
+ let inTimeline = false;
147
+ let event;
148
+ const pushEvent = () => {
149
+ if (event && scene) {
150
+ (scene.timeline ??= []).push(event);
151
+ event = undefined;
152
+ }
153
+ };
154
+ const pushScene = () => {
155
+ pushEvent();
156
+ if (scene)
157
+ scenes.push(scene);
158
+ scene = undefined;
159
+ inTimeline = false;
160
+ };
161
+ for (let i = startIdx + 1; i < lines.length; i++) {
162
+ const line = lines[i];
163
+ if (line.trim() === "")
164
+ continue;
165
+ const indent = line.length - line.trimStart().length;
166
+ // A new top-level agent field (<= 4 spaces, not part of scenes) ends the block.
167
+ if (indent <= 4)
168
+ break;
169
+ const sceneStart = line.match(/^ - prompt: "(.*)"$/);
170
+ if (sceneStart) {
171
+ pushScene();
172
+ scene = { prompt: sceneStart[1].replace(/\\"/g, '"').replace(/\\n/g, "\n") };
173
+ continue;
174
+ }
175
+ if (!scene)
176
+ continue;
177
+ const eventStart = line.match(/^ - kind: (model|tool)$/);
178
+ if (eventStart) {
179
+ pushEvent();
180
+ event = { kind: eventStart[1], name: "", startMs: 0, durationMs: 0 };
181
+ inTimeline = true;
182
+ continue;
183
+ }
184
+ if (line.match(/^ timeline:$/)) {
185
+ inTimeline = true;
186
+ continue;
187
+ }
188
+ const trimmed = line.trim();
189
+ const target = inTimeline && event ? "event" : "scene";
190
+ const kv = trimmed.match(/^([a-z_]+):\s*(.*)$/);
191
+ if (!kv)
192
+ continue;
193
+ const [, key, value] = kv;
194
+ if (target === "event" && event) {
195
+ switch (key) {
196
+ case "name":
197
+ event.name = value.replace(/^"|"$/g, "").replace(/\\"/g, '"');
198
+ break;
199
+ case "start_ms":
200
+ event.startMs = parseFloat(value);
201
+ break;
202
+ case "duration_ms":
203
+ event.durationMs = parseFloat(value);
204
+ break;
205
+ case "tokens":
206
+ event.tokens = parseTokens(value);
207
+ break;
208
+ case "cost_usd":
209
+ event.costUsd = parseFloat(value);
210
+ break;
211
+ case "cost_source":
212
+ event.costSource = value;
213
+ break;
214
+ case "run_index":
215
+ event.runIndex = parseInt(value, 10);
216
+ break;
217
+ case "error":
218
+ event.error = value.replace(/^"|"$/g, "").replace(/\\"/g, '"');
219
+ break;
220
+ }
221
+ }
222
+ else if (scene) {
223
+ switch (key) {
224
+ case "duration_ms":
225
+ scene.durationMs = parseFloat(value);
226
+ break;
227
+ case "tokens":
228
+ scene.tokens = parseTokens(value);
229
+ break;
230
+ case "cost_usd":
231
+ scene.costUsd = parseFloat(value);
232
+ break;
233
+ case "cost_source":
234
+ scene.costSource = value;
235
+ break;
236
+ }
237
+ }
238
+ }
239
+ pushScene();
240
+ return scenes.length > 0 ? scenes : undefined;
241
+ }
127
242
  export function parseReport(content, source) {
128
243
  const num = (key, fallback = 0) => parseFloat(extractField(content, key) ?? String(fallback));
129
244
  const avgIn = extractField(content, "average_input_tokens_per_case");
@@ -158,9 +273,17 @@ export function parseReport(content, source) {
158
273
  timestamp: extractField(content, "timestamp") ?? "",
159
274
  averageInputTokensPerCase: avgIn != null ? parseFloat(avgIn) : undefined,
160
275
  averageOutputTokensPerCase: avgOut != null ? parseFloat(avgOut) : undefined,
276
+ totalInputTokens: optNum("total_input_tokens"),
277
+ totalOutputTokens: optNum("total_output_tokens"),
278
+ totalCostUsd: optNum("total_cost_usd"),
279
+ scenes: parseScenes(content),
161
280
  suites: parseSuites(content),
162
281
  source,
163
282
  };
283
+ function optNum(key) {
284
+ const v = extractField(content, key);
285
+ return v != null ? parseFloat(v) : undefined;
286
+ }
164
287
  }
165
288
  export async function findReports(dir, depth = 0) {
166
289
  if (depth > 6)
@@ -0,0 +1,11 @@
1
+ import type { TimelineEvent } from "./types";
2
+ /**
3
+ * Render a Chrome-DevTools-style waterfall of timeline events as colored
4
+ * terminal lines. Bars are positioned by `startMs` and sized by `durationMs`
5
+ * relative to the full span of the scene. Returns one string per event row
6
+ * (already indented), or `[]` when there's nothing to draw.
7
+ */
8
+ export declare function renderTerminalWaterfall(events: TimelineEvent[], opts?: {
9
+ width?: number;
10
+ indent?: string;
11
+ }): string[];
@@ -0,0 +1,45 @@
1
+ import { c } from "./logger";
2
+ const BLOCK = "█";
3
+ const THIN = "▏";
4
+ function truncate(s, n) {
5
+ return s.length > n ? s.slice(0, n - 1) + "…" : s;
6
+ }
7
+ function fmtUsd(n) {
8
+ if (n === 0)
9
+ return "$0";
10
+ return "$" + Number(n.toFixed(4)).toString();
11
+ }
12
+ /**
13
+ * Render a Chrome-DevTools-style waterfall of timeline events as colored
14
+ * terminal lines. Bars are positioned by `startMs` and sized by `durationMs`
15
+ * relative to the full span of the scene. Returns one string per event row
16
+ * (already indented), or `[]` when there's nothing to draw.
17
+ */
18
+ export function renderTerminalWaterfall(events, opts = {}) {
19
+ if (!events || events.length === 0)
20
+ return [];
21
+ const width = opts.width ?? 28;
22
+ const indent = opts.indent ?? "";
23
+ const t0 = Math.min(...events.map((e) => e.startMs));
24
+ const tEnd = Math.max(...events.map((e) => e.endMs));
25
+ const span = Math.max(1, tEnd - t0);
26
+ const nameWidth = 16;
27
+ return events.map((e) => {
28
+ const lead = Math.min(width - 1, Math.round(((e.startMs - t0) / span) * width));
29
+ const barLen = Math.max(1, Math.round((e.durationMs / span) * width));
30
+ const fill = e.durationMs === 0 ? THIN : BLOCK.repeat(Math.min(barLen, width - lead));
31
+ const cells = Array(width).fill(" ");
32
+ for (let i = 0; i < fill.length && lead + i < width; i++) {
33
+ cells[lead + i] = fill[i];
34
+ }
35
+ let bar = cells.join("");
36
+ const color = e.error ? c.red : e.kind === "model" ? c.cyan : c.yellow;
37
+ bar = color(bar);
38
+ const kindLabel = (e.kind === "model" ? "model" : "tool ").padEnd(5);
39
+ const nameLabel = truncate(e.name, nameWidth).padEnd(nameWidth);
40
+ const dur = `${Math.round(e.durationMs)}ms`.padStart(7);
41
+ const cost = e.cost?.totalUsd != null ? ` ${fmtUsd(e.cost.totalUsd)}` : "";
42
+ const err = e.error ? ` ${c.red("✗ " + truncate(e.error, 40))}` : "";
43
+ return `${indent}${c.dim(kindLabel)} ${nameLabel} ${bar} ${c.dim(dur)}${c.dim(cost)}${err}`;
44
+ });
45
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sebastiantuyu/agest",
3
- "version": "0.3.3-next.5",
3
+ "version": "0.3.3-next.6",
4
4
  "description": "A testing library for agents",
5
5
  "repository": {
6
6
  "type": "git",