@sebastiantuyu/agest 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/adapters/index.d.ts +2 -0
- package/dist/adapters/index.js +1 -0
- package/dist/adapters/remote.d.ts +58 -0
- package/dist/adapters/remote.js +127 -0
- package/dist/assertions.d.ts +7 -0
- package/dist/assertions.js +9 -0
- package/dist/config.d.ts +19 -0
- package/dist/config.js +19 -0
- package/dist/context.d.ts +7 -1
- package/dist/context.js +60 -18
- package/dist/index.d.ts +8 -2
- package/dist/index.js +3 -2
- package/dist/judge.d.ts +9 -0
- package/dist/judge.js +101 -0
- package/dist/preview.d.ts +1 -0
- package/dist/preview.js +777 -0
- package/dist/reporter.d.ts +2 -1
- package/dist/reporter.js +49 -14
- package/dist/reports.d.ts +78 -0
- package/dist/reports.js +278 -0
- package/dist/runner.d.ts +2 -1
- package/dist/runner.js +46 -4
- package/dist/stats.js +222 -65
- package/dist/types.d.ts +12 -0
- package/package.json +24 -11
package/dist/preview.js
ADDED
|
@@ -0,0 +1,777 @@
|
|
|
1
|
+
import { readFile, writeFile } from "fs/promises";
|
|
2
|
+
import { join, relative } from "path";
|
|
3
|
+
import os from "os";
|
|
4
|
+
import { exec } from "child_process";
|
|
5
|
+
import { parseReport, findReports, loadDiffEntry, computeDiff, ensureDimensions, findVaryingDimensions, groupByDimension, findControlledPairs, diffConfigs, } from "./reports.js";
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
// Helpers
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
function openBrowser(filepath) {
|
|
10
|
+
const cmd = process.platform === "win32"
|
|
11
|
+
? `start "" "${filepath}"`
|
|
12
|
+
: process.platform === "darwin"
|
|
13
|
+
? `open "${filepath}"`
|
|
14
|
+
: `xdg-open "${filepath}"`;
|
|
15
|
+
exec(cmd, (err) => {
|
|
16
|
+
if (err)
|
|
17
|
+
console.error(" Could not open browser:", err.message);
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
function escHtml(str) {
|
|
21
|
+
return str
|
|
22
|
+
.replace(/&/g, "&")
|
|
23
|
+
.replace(/</g, "<")
|
|
24
|
+
.replace(/>/g, ">")
|
|
25
|
+
.replace(/"/g, """);
|
|
26
|
+
}
|
|
27
|
+
function barColor(rate) {
|
|
28
|
+
if (rate >= 0.8)
|
|
29
|
+
return "#4ade80";
|
|
30
|
+
if (rate >= 0.5)
|
|
31
|
+
return "#facc15";
|
|
32
|
+
return "#f87171";
|
|
33
|
+
}
|
|
34
|
+
function rateClass(rate) {
|
|
35
|
+
if (rate >= 0.8)
|
|
36
|
+
return "text-green-400";
|
|
37
|
+
if (rate >= 0.5)
|
|
38
|
+
return "text-yellow-400";
|
|
39
|
+
return "text-red-400";
|
|
40
|
+
}
|
|
41
|
+
function deltaClass(delta) {
|
|
42
|
+
if (delta > 0)
|
|
43
|
+
return "text-green-400";
|
|
44
|
+
if (delta < 0)
|
|
45
|
+
return "text-red-400";
|
|
46
|
+
return "text-zinc-500";
|
|
47
|
+
}
|
|
48
|
+
function formatDelta(d) {
|
|
49
|
+
if (Math.abs(d) < 0.5)
|
|
50
|
+
return "=";
|
|
51
|
+
return (d > 0 ? "+" : "") + d.toFixed(0) + "%";
|
|
52
|
+
}
|
|
53
|
+
function formatTimestamp(ts) {
|
|
54
|
+
try {
|
|
55
|
+
return new Date(ts).toLocaleString();
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
return ts;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Rendering
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
function renderTools(tools) {
|
|
65
|
+
return tools
|
|
66
|
+
.map((t) => `<span class="text-xs bg-zinc-800 border border-zinc-700 px-2 py-0.5 rounded-full text-zinc-400">${escHtml(t)}</span>`)
|
|
67
|
+
.join(" ");
|
|
68
|
+
}
|
|
69
|
+
function renderFailedCases(cases) {
|
|
70
|
+
if (cases.length === 0)
|
|
71
|
+
return "";
|
|
72
|
+
const items = cases
|
|
73
|
+
.map((fc) => `
|
|
74
|
+
<li class="pl-3 border-l border-zinc-700">
|
|
75
|
+
<div class="text-sm text-zinc-300">“${escHtml(fc.prompt)}”</div>
|
|
76
|
+
${fc.reason ? `<div class="text-xs text-zinc-500 mt-1 break-words">${escHtml(fc.reason)}</div>` : ""}
|
|
77
|
+
</li>`)
|
|
78
|
+
.join("\n");
|
|
79
|
+
return `
|
|
80
|
+
<details class="mt-4">
|
|
81
|
+
<summary class="text-xs text-red-400 cursor-pointer hover:text-red-300 select-none">
|
|
82
|
+
${cases.length} failed case${cases.length !== 1 ? "s" : ""}
|
|
83
|
+
</summary>
|
|
84
|
+
<ul class="mt-3 space-y-3">
|
|
85
|
+
${items}
|
|
86
|
+
</ul>
|
|
87
|
+
</details>`;
|
|
88
|
+
}
|
|
89
|
+
function renderRunRow(entry, idx) {
|
|
90
|
+
const { report, delta, diffLines } = entry;
|
|
91
|
+
const pct = report.successRate * 100;
|
|
92
|
+
const color = barColor(report.successRate);
|
|
93
|
+
const textColor = rateClass(report.successRate);
|
|
94
|
+
// Show dimension values as tags
|
|
95
|
+
const dims = report.dimensions ?? {};
|
|
96
|
+
const dimTags = Object.entries(dims)
|
|
97
|
+
.map(([k, v]) => {
|
|
98
|
+
const short = v.length > 16 ? v.slice(0, 15) + "…" : v;
|
|
99
|
+
return `<span class="text-xs text-zinc-600" title="${escHtml(v)}">${escHtml(k)}:${escHtml(short)}</span>`;
|
|
100
|
+
})
|
|
101
|
+
.join(" ");
|
|
102
|
+
const deltaHtml = delta === undefined
|
|
103
|
+
? `<span class="w-14 text-right text-zinc-700 text-xs">—</span>`
|
|
104
|
+
: `<span class="w-14 text-right text-xs ${deltaClass(delta)}">${formatDelta(delta)}</span>`;
|
|
105
|
+
const diffHtml = diffLines.length === 0
|
|
106
|
+
? ""
|
|
107
|
+
: `<div class="ml-10 mt-1 mb-2 pl-3 border-l border-zinc-800 space-y-0.5">
|
|
108
|
+
${diffLines
|
|
109
|
+
.map((l) => {
|
|
110
|
+
const isAdd = l.includes(": +") || l.startsWith("tools: +");
|
|
111
|
+
const isRem = l.includes(": -") || l.startsWith("tools: -");
|
|
112
|
+
const cls = isAdd
|
|
113
|
+
? "text-green-600"
|
|
114
|
+
: isRem
|
|
115
|
+
? "text-red-600"
|
|
116
|
+
: "text-zinc-600";
|
|
117
|
+
return `<div class="text-xs ${cls}">${escHtml(l)}</div>`;
|
|
118
|
+
})
|
|
119
|
+
.join("\n")}
|
|
120
|
+
</div>`;
|
|
121
|
+
return `
|
|
122
|
+
<div>
|
|
123
|
+
<div class="flex items-center gap-3 py-1">
|
|
124
|
+
<span class="text-xs text-zinc-600 w-6 text-right select-none">#${idx + 1}</span>
|
|
125
|
+
<div class="flex-1 bg-zinc-800 rounded h-2.5 overflow-hidden">
|
|
126
|
+
<div class="h-2.5 rounded" style="width:${pct.toFixed(1)}%;background:${color}"></div>
|
|
127
|
+
</div>
|
|
128
|
+
<span class="text-sm font-bold ${textColor} w-12 text-right">${pct.toFixed(0)}%</span>
|
|
129
|
+
${deltaHtml}
|
|
130
|
+
</div>
|
|
131
|
+
<div class="ml-10 mt-0.5 flex gap-3 flex-wrap">${dimTags}</div>
|
|
132
|
+
${diffHtml}
|
|
133
|
+
</div>`;
|
|
134
|
+
}
|
|
135
|
+
// ---------------------------------------------------------------------------
|
|
136
|
+
// Grouped Bar Chart (benchmark-style)
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
const SERIES_COLORS = [
|
|
139
|
+
{ bg: "#f87171", text: "#fca5a5" }, // red
|
|
140
|
+
{ bg: "#fb923c", text: "#fdba74" }, // orange
|
|
141
|
+
{ bg: "#facc15", text: "#fde047" }, // yellow
|
|
142
|
+
{ bg: "#4ade80", text: "#86efac" }, // green
|
|
143
|
+
{ bg: "#38bdf8", text: "#7dd3fc" }, // sky
|
|
144
|
+
{ bg: "#a78bfa", text: "#c4b5fd" }, // violet
|
|
145
|
+
{ bg: "#f472b6", text: "#f9a8d4" }, // pink
|
|
146
|
+
{ bg: "#2dd4bf", text: "#5eead4" }, // teal
|
|
147
|
+
];
|
|
148
|
+
function renderMatrixView(sorted, groupDim, allDims, versionMaps, dimLabel, agentId, isActive) {
|
|
149
|
+
const otherDims = allDims.filter((d) => d !== groupDim);
|
|
150
|
+
// Column dimension: prefer "model", else first other dim
|
|
151
|
+
const colDim = otherDims.includes("model") ? "model" : otherDims[0];
|
|
152
|
+
if (!colDim)
|
|
153
|
+
return "";
|
|
154
|
+
// Remaining dims shown as tags in row labels
|
|
155
|
+
const tagDims = otherDims.filter((d) => d !== colDim);
|
|
156
|
+
// Unique column values (in order of first appearance)
|
|
157
|
+
const colVals = [...new Set(sorted.map((r) => r.dimensions?.[colDim] ?? "?"))];
|
|
158
|
+
// Build row entries: unique combinations of groupDim + tagDims
|
|
159
|
+
const rowEntriesMap = new Map();
|
|
160
|
+
for (const r of sorted) {
|
|
161
|
+
const gv = r.dimensions?.[groupDim] ?? "?";
|
|
162
|
+
const tags = {};
|
|
163
|
+
for (const td of tagDims) {
|
|
164
|
+
tags[td] = r.dimensions?.[td] ?? "?";
|
|
165
|
+
}
|
|
166
|
+
const key = [gv, ...tagDims.map((td) => tags[td])].join("|");
|
|
167
|
+
if (!rowEntriesMap.has(key)) {
|
|
168
|
+
rowEntriesMap.set(key, { groupVal: gv, tagVals: tags, key });
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
const rowEntries = [...rowEntriesMap.values()];
|
|
172
|
+
// Lookup: rowKey||colVal -> report (latest wins since sorted chronologically)
|
|
173
|
+
const lookup = new Map();
|
|
174
|
+
for (const r of sorted) {
|
|
175
|
+
const gv = r.dimensions?.[groupDim] ?? "?";
|
|
176
|
+
const tags = tagDims.map((td) => r.dimensions?.[td] ?? "?");
|
|
177
|
+
const rowKey = [gv, ...tags].join("|");
|
|
178
|
+
const cv = r.dimensions?.[colDim] ?? "?";
|
|
179
|
+
lookup.set(`${rowKey}||${cv}`, r);
|
|
180
|
+
}
|
|
181
|
+
// Column headers
|
|
182
|
+
const colHeaders = colVals
|
|
183
|
+
.map((cv) => {
|
|
184
|
+
const label = dimLabel(colDim, cv);
|
|
185
|
+
return `<th class="px-4 py-2 text-xs text-zinc-400 font-medium text-left" title="${escHtml(cv)}">${escHtml(label)}</th>`;
|
|
186
|
+
})
|
|
187
|
+
.join("\n");
|
|
188
|
+
// Rows
|
|
189
|
+
const rows = rowEntries
|
|
190
|
+
.map((row) => {
|
|
191
|
+
const groupLabel = dimLabel(groupDim, row.groupVal);
|
|
192
|
+
const tagHtml = tagDims
|
|
193
|
+
.map((td) => {
|
|
194
|
+
const tl = dimLabel(td, row.tagVals[td]);
|
|
195
|
+
return `<span class="text-[10px] text-zinc-600">${escHtml(td)}: ${escHtml(tl)}</span>`;
|
|
196
|
+
})
|
|
197
|
+
.join(" ");
|
|
198
|
+
const cells = colVals
|
|
199
|
+
.map((cv) => {
|
|
200
|
+
const r = lookup.get(`${row.key}||${cv}`);
|
|
201
|
+
if (!r) {
|
|
202
|
+
return `<td class="px-4 py-2"><span class="text-xs text-zinc-700">—</span></td>`;
|
|
203
|
+
}
|
|
204
|
+
const pct = r.successRate * 100;
|
|
205
|
+
const color = barColor(r.successRate);
|
|
206
|
+
const tc = rateClass(r.successRate);
|
|
207
|
+
return `<td class="px-4 py-2">
|
|
208
|
+
<div class="flex items-center gap-3">
|
|
209
|
+
<div class="flex-1 bg-zinc-800 rounded h-2 overflow-hidden" style="min-width:80px">
|
|
210
|
+
<div class="h-2 rounded" style="width:${pct.toFixed(1)}%;background:${color}"></div>
|
|
211
|
+
</div>
|
|
212
|
+
<span class="text-sm font-medium ${tc} w-12 text-right">${pct.toFixed(0)}%</span>
|
|
213
|
+
</div>
|
|
214
|
+
</td>`;
|
|
215
|
+
})
|
|
216
|
+
.join("\n");
|
|
217
|
+
return `<tr class="border-t border-zinc-800/50">
|
|
218
|
+
<td class="px-4 py-2.5">
|
|
219
|
+
<div class="text-xs text-zinc-300 font-medium">${escHtml(groupLabel)}</div>
|
|
220
|
+
${tagHtml ? `<div class="flex gap-2 mt-0.5">${tagHtml}</div>` : ""}
|
|
221
|
+
</td>
|
|
222
|
+
${cells}
|
|
223
|
+
</tr>`;
|
|
224
|
+
})
|
|
225
|
+
.join("\n");
|
|
226
|
+
// Version reference
|
|
227
|
+
const versionRef = allDims
|
|
228
|
+
.map((dim) => {
|
|
229
|
+
const vMap = versionMaps.get(dim);
|
|
230
|
+
if (vMap.size <= 1)
|
|
231
|
+
return "";
|
|
232
|
+
const entries = [...vMap.entries()]
|
|
233
|
+
.map(([val, version]) => {
|
|
234
|
+
const short = val.length > 28 ? val.slice(0, 27) + "…" : val;
|
|
235
|
+
return `<span class="text-zinc-600">${escHtml(version)}</span> <span class="text-zinc-700">${escHtml(short)}</span>`;
|
|
236
|
+
})
|
|
237
|
+
.join(" · ");
|
|
238
|
+
return `<div class="text-[10px]"><span class="text-zinc-500">${escHtml(dim)}:</span> ${entries}</div>`;
|
|
239
|
+
})
|
|
240
|
+
.filter(Boolean)
|
|
241
|
+
.join("\n");
|
|
242
|
+
return `<div class="chart-view" data-agent="${agentId}" data-dim="${escHtml(groupDim)}" style="display:${isActive ? "block" : "none"}">
|
|
243
|
+
<div class="mb-4">
|
|
244
|
+
<div class="text-xs text-zinc-600 uppercase tracking-wider mb-1">grouped by ${escHtml(groupDim)}</div>
|
|
245
|
+
</div>
|
|
246
|
+
<div class="overflow-x-auto">
|
|
247
|
+
<table class="w-full">
|
|
248
|
+
<thead>
|
|
249
|
+
<tr class="border-b border-zinc-800">
|
|
250
|
+
<th class="px-4 py-2 text-xs text-zinc-500 font-medium text-left">${escHtml(groupDim)}</th>
|
|
251
|
+
${colHeaders}
|
|
252
|
+
</tr>
|
|
253
|
+
</thead>
|
|
254
|
+
<tbody>
|
|
255
|
+
${rows}
|
|
256
|
+
</tbody>
|
|
257
|
+
</table>
|
|
258
|
+
</div>
|
|
259
|
+
<div class="mt-4 pt-3 border-t border-zinc-800/50 space-y-1">
|
|
260
|
+
${versionRef}
|
|
261
|
+
</div>
|
|
262
|
+
</div>`;
|
|
263
|
+
}
|
|
264
|
+
function renderGroupedBarChart(group) {
|
|
265
|
+
const reports = group.runs.map((r) => r.report);
|
|
266
|
+
const varying = [...group.varyingDims];
|
|
267
|
+
// Prefer "model" as default tab — it has human-readable labels
|
|
268
|
+
const modelIdx = varying.indexOf("model");
|
|
269
|
+
if (modelIdx > 0) {
|
|
270
|
+
varying.splice(modelIdx, 1);
|
|
271
|
+
varying.unshift("model");
|
|
272
|
+
}
|
|
273
|
+
const allDims = [...new Set(reports.flatMap((r) => Object.keys(r.dimensions ?? {})))];
|
|
274
|
+
if (varying.length < 1)
|
|
275
|
+
return "";
|
|
276
|
+
const agentId = escHtml(group.label).replace(/\s+/g, "-").toLowerCase();
|
|
277
|
+
// Build version labels for each dimension: first unique value seen = v1, etc.
|
|
278
|
+
const versionMaps = new Map();
|
|
279
|
+
const sorted = [...reports].sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime());
|
|
280
|
+
for (const dim of allDims) {
|
|
281
|
+
const seen = new Map();
|
|
282
|
+
let idx = 1;
|
|
283
|
+
for (const r of sorted) {
|
|
284
|
+
const val = r.dimensions?.[dim] ?? "?";
|
|
285
|
+
if (!seen.has(val)) {
|
|
286
|
+
seen.set(val, `v${idx}`);
|
|
287
|
+
idx++;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
versionMaps.set(dim, seen);
|
|
291
|
+
}
|
|
292
|
+
const dimLabel = (dim, val) => {
|
|
293
|
+
const vMap = versionMaps.get(dim);
|
|
294
|
+
const version = vMap?.get(val) ?? "?";
|
|
295
|
+
// For model, show short model name. For others, show version tag.
|
|
296
|
+
if (dim === "model") {
|
|
297
|
+
const short = val.length > 16 ? val.split("/").pop()?.slice(0, 16) ?? val.slice(0, 16) : val;
|
|
298
|
+
return short;
|
|
299
|
+
}
|
|
300
|
+
// For tools, show "none" directly instead of a version tag
|
|
301
|
+
if (dim === "tools" && val === "none") {
|
|
302
|
+
return "none";
|
|
303
|
+
}
|
|
304
|
+
return version;
|
|
305
|
+
};
|
|
306
|
+
// Build a chart for each possible grouping dimension
|
|
307
|
+
const charts = varying.map((groupDim, dimIdx) => {
|
|
308
|
+
const isActive = dimIdx === 0;
|
|
309
|
+
// For non-model dimensions, render a matrix/heatmap view instead of bar chart
|
|
310
|
+
if (groupDim !== "model") {
|
|
311
|
+
return renderMatrixView(sorted, groupDim, allDims, versionMaps, dimLabel, agentId, isActive);
|
|
312
|
+
}
|
|
313
|
+
const otherDims = allDims.filter((d) => d !== groupDim);
|
|
314
|
+
// Group runs by the grouping dimension
|
|
315
|
+
const groupVals = [...new Set(sorted.map((r) => r.dimensions?.[groupDim] ?? "?"))];
|
|
316
|
+
const grouped = new Map();
|
|
317
|
+
for (const r of sorted) {
|
|
318
|
+
const gv = r.dimensions?.[groupDim] ?? "?";
|
|
319
|
+
const arr = grouped.get(gv) ?? [];
|
|
320
|
+
arr.push(r);
|
|
321
|
+
grouped.set(gv, arr);
|
|
322
|
+
}
|
|
323
|
+
// Config key = unique combo of non-grouping dims
|
|
324
|
+
const configKey = (r) => otherDims.map((d) => r.dimensions?.[d] ?? "?").join("|");
|
|
325
|
+
const uniqueConfigs = [...new Set(sorted.map(configKey))];
|
|
326
|
+
// X-axis labels: model short names
|
|
327
|
+
const labels = groupVals.map((gv) => dimLabel(groupDim, gv));
|
|
328
|
+
// Build Chart.js datasets: one per unique config
|
|
329
|
+
const datasets = uniqueConfigs.map((ck, ci) => {
|
|
330
|
+
const color = SERIES_COLORS[ci % SERIES_COLORS.length];
|
|
331
|
+
const parts = ck.split("|");
|
|
332
|
+
const cfgLabel = otherDims.map((d, i) => `${d}: ${dimLabel(d, parts[i] ?? "?")}`).join(", ");
|
|
333
|
+
const data = groupVals.map((gv) => {
|
|
334
|
+
const groupRuns = grouped.get(gv) ?? [];
|
|
335
|
+
const match = groupRuns.find((r) => configKey(r) === ck);
|
|
336
|
+
return match ? +(match.successRate * 100).toFixed(1) : null;
|
|
337
|
+
});
|
|
338
|
+
return { label: cfgLabel, data, backgroundColor: color.bg, borderColor: color.bg, borderWidth: 0, borderRadius: 4 };
|
|
339
|
+
});
|
|
340
|
+
const canvasId = `bar-${agentId}-${escHtml(groupDim)}`;
|
|
341
|
+
// Version reference
|
|
342
|
+
const versionRef = allDims
|
|
343
|
+
.map((dim) => {
|
|
344
|
+
const vMap = versionMaps.get(dim);
|
|
345
|
+
if (vMap.size <= 1)
|
|
346
|
+
return "";
|
|
347
|
+
const entries = [...vMap.entries()]
|
|
348
|
+
.map(([val, version]) => {
|
|
349
|
+
const short = val.length > 28 ? val.slice(0, 27) + "…" : val;
|
|
350
|
+
return `<span class="text-zinc-600">${escHtml(version)}</span> <span class="text-zinc-700">${escHtml(short)}</span>`;
|
|
351
|
+
})
|
|
352
|
+
.join(" · ");
|
|
353
|
+
return `<div class="text-[10px]"><span class="text-zinc-500">${escHtml(dim)}:</span> ${entries}</div>`;
|
|
354
|
+
})
|
|
355
|
+
.filter(Boolean)
|
|
356
|
+
.join("\n");
|
|
357
|
+
return `<div class="chart-view" data-agent="${agentId}" data-dim="${escHtml(groupDim)}" style="display:${isActive ? "block" : "none"}">
|
|
358
|
+
<div class="mb-4">
|
|
359
|
+
<div class="text-xs text-zinc-600 uppercase tracking-wider mb-1">grouped by ${escHtml(groupDim)}</div>
|
|
360
|
+
</div>
|
|
361
|
+
<div style="position:relative;height:280px">
|
|
362
|
+
<canvas id="${canvasId}"></canvas>
|
|
363
|
+
</div>
|
|
364
|
+
<script>
|
|
365
|
+
new Chart(document.getElementById('${canvasId}'), {
|
|
366
|
+
type: 'bar',
|
|
367
|
+
data: { labels: ${JSON.stringify(labels)}, datasets: ${JSON.stringify(datasets)} },
|
|
368
|
+
options: {
|
|
369
|
+
responsive: true,
|
|
370
|
+
maintainAspectRatio: false,
|
|
371
|
+
plugins: {
|
|
372
|
+
legend: { labels: { color: '#a1a1aa', font: { family: 'ui-monospace, monospace', size: 10 }, boxWidth: 12, padding: 16 } },
|
|
373
|
+
tooltip: { callbacks: { label: function(ctx) { return ctx.dataset.label + ': ' + ctx.parsed.y + '%'; } } }
|
|
374
|
+
},
|
|
375
|
+
scales: {
|
|
376
|
+
x: { ticks: { color: '#71717a', font: { family: 'ui-monospace, monospace', size: 10 } }, grid: { color: '#27272a' } },
|
|
377
|
+
y: { min: 0, max: 100, ticks: { color: '#71717a', font: { family: 'ui-monospace, monospace', size: 10 }, callback: function(v) { return v + '%'; } }, grid: { color: '#27272a' } }
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
});
|
|
381
|
+
</script>
|
|
382
|
+
<div class="mt-4 pt-3 border-t border-zinc-800/50 space-y-1">
|
|
383
|
+
${versionRef}
|
|
384
|
+
</div>
|
|
385
|
+
</div>`;
|
|
386
|
+
});
|
|
387
|
+
// Dimension toggle tabs
|
|
388
|
+
const tabs = varying
|
|
389
|
+
.map((dim, i) => {
|
|
390
|
+
const active = i === 0;
|
|
391
|
+
return `<button
|
|
392
|
+
class="dim-tab px-3 py-1.5 text-xs rounded-md transition-colors ${active ? "bg-zinc-700 text-zinc-200" : "bg-zinc-800/50 text-zinc-500 hover:text-zinc-300"}"
|
|
393
|
+
data-agent="${agentId}"
|
|
394
|
+
data-dim="${escHtml(dim)}"
|
|
395
|
+
onclick="switchDim('${agentId}', '${escHtml(dim)}')"
|
|
396
|
+
>${escHtml(dim)}</button>`;
|
|
397
|
+
})
|
|
398
|
+
.join("\n");
|
|
399
|
+
return `
|
|
400
|
+
<div class="rounded-xl border border-zinc-800 bg-zinc-900 p-5 mb-4">
|
|
401
|
+
<div class="flex items-center justify-between mb-5">
|
|
402
|
+
<span class="text-xs text-zinc-600 uppercase tracking-wider">success rate</span>
|
|
403
|
+
<div class="flex gap-1.5">
|
|
404
|
+
${tabs}
|
|
405
|
+
</div>
|
|
406
|
+
</div>
|
|
407
|
+
${charts.join("\n")}
|
|
408
|
+
</div>`;
|
|
409
|
+
}
|
|
410
|
+
// ---------------------------------------------------------------------------
|
|
411
|
+
// Attribution Cards
|
|
412
|
+
// ---------------------------------------------------------------------------
|
|
413
|
+
function renderAttribution(group) {
|
|
414
|
+
const pairs = group.controlledPairs;
|
|
415
|
+
if (pairs.length === 0)
|
|
416
|
+
return "";
|
|
417
|
+
// Group by dimension
|
|
418
|
+
const byDim = new Map();
|
|
419
|
+
for (const p of pairs) {
|
|
420
|
+
const entry = byDim.get(p.variedDimension) ?? { deltas: [], pairs: [] };
|
|
421
|
+
entry.deltas.push(p.delta);
|
|
422
|
+
entry.pairs.push(p);
|
|
423
|
+
byDim.set(p.variedDimension, entry);
|
|
424
|
+
}
|
|
425
|
+
const sorted = [...byDim.entries()].sort((a, b) => Math.max(...b[1].deltas.map(Math.abs)) - Math.max(...a[1].deltas.map(Math.abs)));
|
|
426
|
+
const cards = sorted
|
|
427
|
+
.map(([dim, { deltas, pairs: dimPairs }]) => {
|
|
428
|
+
const avgDelta = deltas.reduce((a, b) => a + b, 0) / deltas.length;
|
|
429
|
+
const sign = avgDelta > 0 ? "+" : "";
|
|
430
|
+
const avgStr = `${sign}${(avgDelta * 100).toFixed(0)}%`;
|
|
431
|
+
const color = avgDelta > 0 ? "text-green-400" : avgDelta < 0 ? "text-red-400" : "text-zinc-500";
|
|
432
|
+
const examples = dimPairs.slice(0, 3).map((p) => {
|
|
433
|
+
const d = (p.delta * 100).toFixed(0);
|
|
434
|
+
const s = p.delta > 0 ? "+" : "";
|
|
435
|
+
const exColor = p.delta > 0 ? "text-green-600" : p.delta < 0 ? "text-red-600" : "text-zinc-600";
|
|
436
|
+
const fromLabel = p.variedFrom.length > 20 ? p.variedFrom.slice(0, 19) + "…" : p.variedFrom;
|
|
437
|
+
const toLabel = p.variedTo.length > 20 ? p.variedTo.slice(0, 19) + "…" : p.variedTo;
|
|
438
|
+
return `<div class="text-xs ${exColor}">${escHtml(fromLabel)} → ${escHtml(toLabel)}: ${s}${d}%</div>`;
|
|
439
|
+
}).join("\n");
|
|
440
|
+
return `
|
|
441
|
+
<div class="rounded-lg border border-zinc-800 bg-zinc-900/50 p-4">
|
|
442
|
+
<div class="flex items-center justify-between mb-2">
|
|
443
|
+
<span class="text-sm text-zinc-300 font-medium">${escHtml(dim)}</span>
|
|
444
|
+
<span class="text-lg font-bold ${color}">${avgStr} avg</span>
|
|
445
|
+
</div>
|
|
446
|
+
<div class="text-xs text-zinc-500 mb-2">${deltas.length} controlled comparison${deltas.length !== 1 ? "s" : ""}</div>
|
|
447
|
+
<div class="space-y-1">${examples}</div>
|
|
448
|
+
</div>`;
|
|
449
|
+
})
|
|
450
|
+
.join("\n");
|
|
451
|
+
return `
|
|
452
|
+
<div class="rounded-xl border border-zinc-800 bg-zinc-900 p-5 mb-4">
|
|
453
|
+
<div class="mb-4">
|
|
454
|
+
<span class="text-xs text-zinc-600 uppercase tracking-wider">dimension impact (single-variable comparisons)</span>
|
|
455
|
+
</div>
|
|
456
|
+
<div class="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-3">
|
|
457
|
+
${cards}
|
|
458
|
+
</div>
|
|
459
|
+
</div>`;
|
|
460
|
+
}
|
|
461
|
+
// ---------------------------------------------------------------------------
|
|
462
|
+
// Per-group evolution (grouped by primary dimension)
|
|
463
|
+
// ---------------------------------------------------------------------------
|
|
464
|
+
function renderGroupedEvolution(group) {
|
|
465
|
+
const reports = group.runs.map((r) => r.report);
|
|
466
|
+
const varying = group.varyingDims;
|
|
467
|
+
if (varying.length === 0) {
|
|
468
|
+
// No varying dims — flat timeline
|
|
469
|
+
return renderFlatEvolution(group);
|
|
470
|
+
}
|
|
471
|
+
const primaryDim = varying[0];
|
|
472
|
+
const dimGroups = groupByDimension(reports, primaryDim);
|
|
473
|
+
const cards = [...dimGroups.entries()]
|
|
474
|
+
.map(([dimVal, dimReports]) => {
|
|
475
|
+
const sorted = [...dimReports].sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime());
|
|
476
|
+
const entries = sorted.map((report, i) => {
|
|
477
|
+
const delta = i === 0
|
|
478
|
+
? undefined
|
|
479
|
+
: (report.successRate - sorted[i - 1].successRate) * 100;
|
|
480
|
+
// Compute diff lines for display
|
|
481
|
+
let diffLines = [];
|
|
482
|
+
if (i > 0) {
|
|
483
|
+
const prev = sorted[i - 1];
|
|
484
|
+
const diff = diffConfigs(prev.dimensions ?? {}, report.dimensions ?? {});
|
|
485
|
+
diffLines = Object.entries(diff.varied)
|
|
486
|
+
.map(([k, v]) => `${k}: ${v.from} → ${v.to}`)
|
|
487
|
+
.slice(0, 4);
|
|
488
|
+
}
|
|
489
|
+
return { report, delta, diffLines };
|
|
490
|
+
});
|
|
491
|
+
const rows = entries.map((e, i) => renderRunRow(e, i)).join("\n");
|
|
492
|
+
return `
|
|
493
|
+
<div class="rounded-xl border border-zinc-800 bg-zinc-900 p-5 mb-4">
|
|
494
|
+
<div class="flex items-center justify-between mb-4">
|
|
495
|
+
<span class="text-xs text-zinc-600 uppercase tracking-wider">${escHtml(primaryDim)}: ${escHtml(dimVal.length > 30 ? dimVal.slice(0, 29) + "…" : dimVal)}</span>
|
|
496
|
+
<span class="text-xs text-zinc-600">${sorted.length} run${sorted.length !== 1 ? "s" : ""}</span>
|
|
497
|
+
</div>
|
|
498
|
+
<div class="space-y-0">
|
|
499
|
+
${rows}
|
|
500
|
+
</div>
|
|
501
|
+
</div>`;
|
|
502
|
+
})
|
|
503
|
+
.join("\n");
|
|
504
|
+
return cards;
|
|
505
|
+
}
|
|
506
|
+
function renderFlatEvolution(group) {
|
|
507
|
+
const rows = group.runs.map((e, i) => renderRunRow(e, i)).join("\n");
|
|
508
|
+
return `
|
|
509
|
+
<div class="rounded-xl border border-zinc-800 bg-zinc-900 p-5 mb-4">
|
|
510
|
+
<div class="flex items-center justify-between mb-4">
|
|
511
|
+
<span class="text-xs text-zinc-600 uppercase tracking-wider">success rate · ${group.runs.length} runs</span>
|
|
512
|
+
<span class="text-xs text-zinc-600">oldest → newest</span>
|
|
513
|
+
</div>
|
|
514
|
+
<div class="space-y-0">
|
|
515
|
+
${rows}
|
|
516
|
+
</div>
|
|
517
|
+
</div>`;
|
|
518
|
+
}
|
|
519
|
+
// ---------------------------------------------------------------------------
|
|
520
|
+
// Agent section
|
|
521
|
+
// ---------------------------------------------------------------------------
|
|
522
|
+
function renderScatterPlot(group) {
|
|
523
|
+
const reports = group.runs.map((r) => r.report);
|
|
524
|
+
if (reports.length < 2)
|
|
525
|
+
return "";
|
|
526
|
+
const allDims = [...new Set(reports.flatMap((r) => Object.keys(r.dimensions ?? {})))];
|
|
527
|
+
// Group data points by model
|
|
528
|
+
const byModel = new Map();
|
|
529
|
+
for (const r of reports) {
|
|
530
|
+
const model = r.dimensions?.["model"] ?? r.model ?? "?";
|
|
531
|
+
const avgDurSec = r.totalCases > 0 ? +(r.duration / r.totalCases / 1000).toFixed(2) : 0;
|
|
532
|
+
const accuracy = +(r.successRate * 100).toFixed(1);
|
|
533
|
+
const configLabel = allDims
|
|
534
|
+
.filter((d) => d !== "model")
|
|
535
|
+
.map((d) => `${d}: ${r.dimensions?.[d] ?? "?"}`)
|
|
536
|
+
.join(", ");
|
|
537
|
+
const arr = byModel.get(model) ?? [];
|
|
538
|
+
arr.push({ x: avgDurSec, y: accuracy, label: configLabel });
|
|
539
|
+
byModel.set(model, arr);
|
|
540
|
+
}
|
|
541
|
+
const uniqueModels = [...byModel.keys()];
|
|
542
|
+
const datasets = uniqueModels.map((model, i) => {
|
|
543
|
+
const color = SERIES_COLORS[i % SERIES_COLORS.length];
|
|
544
|
+
const short = model.split("/").pop()?.slice(0, 24) ?? model.slice(0, 24);
|
|
545
|
+
return {
|
|
546
|
+
label: short,
|
|
547
|
+
data: byModel.get(model),
|
|
548
|
+
backgroundColor: color.bg,
|
|
549
|
+
borderColor: color.text,
|
|
550
|
+
pointRadius: 7,
|
|
551
|
+
pointHoverRadius: 9,
|
|
552
|
+
};
|
|
553
|
+
});
|
|
554
|
+
const allX = [...byModel.values()].flat().map((p) => p.x);
|
|
555
|
+
const midX = allX.length > 0 ? +((Math.min(...allX) + Math.max(...allX)) / 2).toFixed(2) : 0;
|
|
556
|
+
const agentId = escHtml(group.label).replace(/\s+/g, "-").toLowerCase();
|
|
557
|
+
const canvasId = `scatter-${agentId}`;
|
|
558
|
+
return `
|
|
559
|
+
<div class="rounded-xl border border-zinc-800 bg-zinc-900 p-5 mb-4">
|
|
560
|
+
<div class="mb-4">
|
|
561
|
+
<span class="text-xs text-zinc-600 uppercase tracking-wider">accuracy vs speed</span>
|
|
562
|
+
</div>
|
|
563
|
+
<div style="position:relative;height:320px">
|
|
564
|
+
<canvas id="${canvasId}"></canvas>
|
|
565
|
+
</div>
|
|
566
|
+
<script>
|
|
567
|
+
new Chart(document.getElementById('${canvasId}'), {
|
|
568
|
+
type: 'scatter',
|
|
569
|
+
data: { datasets: ${JSON.stringify(datasets)} },
|
|
570
|
+
options: {
|
|
571
|
+
responsive: true,
|
|
572
|
+
maintainAspectRatio: false,
|
|
573
|
+
plugins: {
|
|
574
|
+
legend: { labels: { color: '#a1a1aa', font: { family: 'ui-monospace, monospace', size: 10 }, boxWidth: 12, padding: 16 } },
|
|
575
|
+
tooltip: {
|
|
576
|
+
callbacks: {
|
|
577
|
+
label: function(ctx) {
|
|
578
|
+
var p = ctx.raw;
|
|
579
|
+
var lines = [ctx.dataset.label + ': ' + p.y + '% accuracy, ' + p.x.toFixed(1) + 's/case'];
|
|
580
|
+
if (p.label) lines.push(p.label);
|
|
581
|
+
return lines;
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
},
|
|
586
|
+
scales: {
|
|
587
|
+
x: {
|
|
588
|
+
title: { display: true, text: 'avg duration per case (s)', color: '#71717a', font: { family: 'ui-monospace, monospace', size: 11 } },
|
|
589
|
+
ticks: { color: '#71717a', font: { family: 'ui-monospace, monospace', size: 10 } },
|
|
590
|
+
grid: { color: '#27272a' }
|
|
591
|
+
},
|
|
592
|
+
y: {
|
|
593
|
+
min: 0, max: 100,
|
|
594
|
+
title: { display: true, text: 'accuracy (%)', color: '#71717a', font: { family: 'ui-monospace, monospace', size: 11 } },
|
|
595
|
+
ticks: { color: '#71717a', font: { family: 'ui-monospace, monospace', size: 10 }, callback: function(v) { return v + '%'; } },
|
|
596
|
+
grid: { color: '#27272a' }
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
},
|
|
600
|
+
plugins: [{
|
|
601
|
+
id: 'quadrantLines',
|
|
602
|
+
afterDraw: function(chart) {
|
|
603
|
+
var ctx = chart.ctx;
|
|
604
|
+
var area = chart.chartArea;
|
|
605
|
+
var xScale = chart.scales.x;
|
|
606
|
+
var yScale = chart.scales.y;
|
|
607
|
+
var midXPx = xScale.getPixelForValue(${midX});
|
|
608
|
+
var midYPx = yScale.getPixelForValue(50);
|
|
609
|
+
|
|
610
|
+
ctx.save();
|
|
611
|
+
ctx.setLineDash([6, 4]);
|
|
612
|
+
ctx.lineWidth = 1;
|
|
613
|
+
ctx.strokeStyle = 'rgba(113, 113, 122, 0.4)';
|
|
614
|
+
|
|
615
|
+
ctx.beginPath();
|
|
616
|
+
ctx.moveTo(midXPx, area.top);
|
|
617
|
+
ctx.lineTo(midXPx, area.bottom);
|
|
618
|
+
ctx.stroke();
|
|
619
|
+
|
|
620
|
+
ctx.beginPath();
|
|
621
|
+
ctx.moveTo(area.left, midYPx);
|
|
622
|
+
ctx.lineTo(area.right, midYPx);
|
|
623
|
+
ctx.stroke();
|
|
624
|
+
|
|
625
|
+
ctx.setLineDash([]);
|
|
626
|
+
ctx.font = '10px ui-monospace, monospace';
|
|
627
|
+
ctx.fillStyle = 'rgba(113, 113, 122, 0.5)';
|
|
628
|
+
|
|
629
|
+
ctx.textAlign = 'left';
|
|
630
|
+
ctx.textBaseline = 'top';
|
|
631
|
+
ctx.fillText('Ideal', area.left + 8, area.top + 8);
|
|
632
|
+
|
|
633
|
+
ctx.textAlign = 'right';
|
|
634
|
+
ctx.fillText('Smart but slow', area.right - 8, area.top + 8);
|
|
635
|
+
|
|
636
|
+
ctx.textBaseline = 'bottom';
|
|
637
|
+
ctx.fillText('Dumb and slow', area.right - 8, area.bottom - 8);
|
|
638
|
+
|
|
639
|
+
ctx.textAlign = 'left';
|
|
640
|
+
ctx.fillText('Dumb and fast', area.left + 8, area.bottom - 8);
|
|
641
|
+
|
|
642
|
+
ctx.restore();
|
|
643
|
+
}
|
|
644
|
+
}]
|
|
645
|
+
});
|
|
646
|
+
</script>
|
|
647
|
+
</div>`;
|
|
648
|
+
}
|
|
649
|
+
function renderAgentSection(group) {
|
|
650
|
+
const chartHtml = renderGroupedBarChart(group);
|
|
651
|
+
const scatterHtml = renderScatterPlot(group);
|
|
652
|
+
return `
|
|
653
|
+
<section class="mb-12">
|
|
654
|
+
<h2 class="text-base font-semibold mb-4 text-zinc-400 uppercase tracking-widest">${escHtml(group.label)}</h2>
|
|
655
|
+
|
|
656
|
+
${chartHtml}
|
|
657
|
+
${scatterHtml}
|
|
658
|
+
</section>`;
|
|
659
|
+
}
|
|
660
|
+
// ---------------------------------------------------------------------------
|
|
661
|
+
// Full HTML page
|
|
662
|
+
// ---------------------------------------------------------------------------
|
|
663
|
+
function generateHTML(groups, totalReports) {
|
|
664
|
+
const sections = groups.map((g) => renderAgentSection(g)).join("\n");
|
|
665
|
+
const generated = new Date().toLocaleString();
|
|
666
|
+
return `<!DOCTYPE html>
|
|
667
|
+
<html lang="en" class="bg-zinc-950 text-zinc-100">
|
|
668
|
+
<head>
|
|
669
|
+
<meta charset="UTF-8" />
|
|
670
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
671
|
+
<title>agest preview</title>
|
|
672
|
+
<script src="https://cdn.tailwindcss.com"></script>
|
|
673
|
+
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
|
674
|
+
<script>
|
|
675
|
+
function switchDim(agent, dim) {
|
|
676
|
+
document.querySelectorAll('.chart-view[data-agent="' + agent + '"]').forEach(el => {
|
|
677
|
+
el.style.display = el.dataset.dim === dim ? 'block' : 'none';
|
|
678
|
+
});
|
|
679
|
+
document.querySelectorAll('.dim-tab[data-agent="' + agent + '"]').forEach(el => {
|
|
680
|
+
if (el.dataset.dim === dim) {
|
|
681
|
+
el.className = el.className.replace('bg-zinc-800/50 text-zinc-500', 'bg-zinc-700 text-zinc-200');
|
|
682
|
+
} else {
|
|
683
|
+
el.className = el.className.replace('bg-zinc-700 text-zinc-200', 'bg-zinc-800/50 text-zinc-500');
|
|
684
|
+
}
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
</script>
|
|
688
|
+
</head>
|
|
689
|
+
<body class="min-h-screen font-mono p-8">
|
|
690
|
+
<div class="max-w-4xl mx-auto">
|
|
691
|
+
|
|
692
|
+
<header class="mb-10">
|
|
693
|
+
<h1 class="text-2xl font-bold tracking-tight">agest</h1>
|
|
694
|
+
<p class="text-zinc-500 text-sm mt-1">${totalReports} report${totalReports !== 1 ? "s" : ""} · generated ${generated}</p>
|
|
695
|
+
</header>
|
|
696
|
+
|
|
697
|
+
${sections}
|
|
698
|
+
|
|
699
|
+
<footer class="mt-16 border-t border-zinc-800 pt-6 text-xs text-zinc-600">
|
|
700
|
+
agest by <a href="https://sebastiantuyu.com" target="_blank" class="text-zinc-500 hover:text-zinc-300 transition-colors">sebastiantuyu</a>
|
|
701
|
+
</footer>
|
|
702
|
+
|
|
703
|
+
</div>
|
|
704
|
+
</body>
|
|
705
|
+
</html>`;
|
|
706
|
+
}
|
|
707
|
+
// ---------------------------------------------------------------------------
|
|
708
|
+
// Main
|
|
709
|
+
// ---------------------------------------------------------------------------
|
|
710
|
+
async function main() {
|
|
711
|
+
const cwd = process.cwd();
|
|
712
|
+
const files = await findReports(cwd);
|
|
713
|
+
if (files.length === 0) {
|
|
714
|
+
console.log("\n No reports found. Run some agent tests first.\n");
|
|
715
|
+
return;
|
|
716
|
+
}
|
|
717
|
+
const reports = await Promise.all(files.map(async (f) => {
|
|
718
|
+
const content = await readFile(f, "utf-8");
|
|
719
|
+
return parseReport(content, relative(cwd, f));
|
|
720
|
+
}));
|
|
721
|
+
// Ensure all reports have dimensions (backward compat)
|
|
722
|
+
await Promise.all(reports.map((r) => ensureDimensions(r)));
|
|
723
|
+
// Group by agent name, sort each group oldest -> newest
|
|
724
|
+
const groupMap = new Map();
|
|
725
|
+
for (const r of reports) {
|
|
726
|
+
const key = r.name ?? "__unnamed__";
|
|
727
|
+
const arr = groupMap.get(key) ?? [];
|
|
728
|
+
arr.push(r);
|
|
729
|
+
groupMap.set(key, arr);
|
|
730
|
+
}
|
|
731
|
+
for (const [, arr] of groupMap) {
|
|
732
|
+
arr.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime());
|
|
733
|
+
}
|
|
734
|
+
// Build AgentGroups with dimension analysis
|
|
735
|
+
const namedKeys = [...groupMap.keys()]
|
|
736
|
+
.filter((k) => k !== "__unnamed__")
|
|
737
|
+
.sort();
|
|
738
|
+
const orderedKeys = groupMap.has("__unnamed__")
|
|
739
|
+
? [...namedKeys, "__unnamed__"]
|
|
740
|
+
: namedKeys;
|
|
741
|
+
const groups = await Promise.all(orderedKeys.map(async (key) => {
|
|
742
|
+
const sorted = groupMap.get(key);
|
|
743
|
+
const varyingDims = findVaryingDimensions(sorted);
|
|
744
|
+
const controlledPairs = findControlledPairs(sorted);
|
|
745
|
+
// Load diff entries for consecutive run diffs
|
|
746
|
+
const diffEntries = await Promise.all(sorted.map((r) => r.systemPromptHash ? loadDiffEntry(r.systemPromptHash) : Promise.resolve(null)));
|
|
747
|
+
const runs = sorted.map((report, i) => {
|
|
748
|
+
const delta = i === 0
|
|
749
|
+
? undefined
|
|
750
|
+
: (report.successRate - sorted[i - 1].successRate) * 100;
|
|
751
|
+
let diffLines = [];
|
|
752
|
+
if (i > 0) {
|
|
753
|
+
const prev = diffEntries[i - 1];
|
|
754
|
+
const curr = diffEntries[i];
|
|
755
|
+
if (prev && curr) {
|
|
756
|
+
diffLines = computeDiff(prev, curr);
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
return { report, delta, diffLines };
|
|
760
|
+
});
|
|
761
|
+
return {
|
|
762
|
+
label: key === "__unnamed__" ? "Unnamed" : key,
|
|
763
|
+
runs,
|
|
764
|
+
varyingDims,
|
|
765
|
+
controlledPairs,
|
|
766
|
+
};
|
|
767
|
+
}));
|
|
768
|
+
const html = generateHTML(groups, reports.length);
|
|
769
|
+
const tmpPath = join(os.tmpdir(), `agest-preview-${Date.now()}.html`);
|
|
770
|
+
await writeFile(tmpPath, html, "utf-8");
|
|
771
|
+
console.log(`\n Preview: ${tmpPath}\n`);
|
|
772
|
+
openBrowser(tmpPath);
|
|
773
|
+
}
|
|
774
|
+
main().catch((err) => {
|
|
775
|
+
console.error("Error:", err.message);
|
|
776
|
+
process.exit(1);
|
|
777
|
+
});
|