@deepagents/evals 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +218 -0
  2. package/dist/comparison/index.d.ts +41 -0
  3. package/dist/comparison/index.d.ts.map +1 -0
  4. package/dist/comparison/index.js +106 -0
  5. package/dist/comparison/index.js.map +7 -0
  6. package/dist/dataset/hf.d.ts +16 -0
  7. package/dist/dataset/hf.d.ts.map +1 -0
  8. package/dist/dataset/index.d.ts +17 -0
  9. package/dist/dataset/index.d.ts.map +1 -0
  10. package/dist/dataset/index.js +256 -0
  11. package/dist/dataset/index.js.map +7 -0
  12. package/dist/engine/index.d.ts +67 -0
  13. package/dist/engine/index.d.ts.map +1 -0
  14. package/dist/engine/index.js +332 -0
  15. package/dist/engine/index.js.map +7 -0
  16. package/dist/evaluate/index.d.ts +47 -0
  17. package/dist/evaluate/index.d.ts.map +1 -0
  18. package/dist/evaluate/index.js +977 -0
  19. package/dist/evaluate/index.js.map +7 -0
  20. package/dist/index.d.ts +15 -0
  21. package/dist/index.d.ts.map +1 -0
  22. package/dist/index.js +1763 -0
  23. package/dist/index.js.map +7 -0
  24. package/dist/reporters/console.d.ts +6 -0
  25. package/dist/reporters/console.d.ts.map +1 -0
  26. package/dist/reporters/csv.d.ts +6 -0
  27. package/dist/reporters/csv.d.ts.map +1 -0
  28. package/dist/reporters/format.d.ts +12 -0
  29. package/dist/reporters/format.d.ts.map +1 -0
  30. package/dist/reporters/html.d.ts +6 -0
  31. package/dist/reporters/html.d.ts.map +1 -0
  32. package/dist/reporters/index.d.ts +12 -0
  33. package/dist/reporters/index.d.ts.map +1 -0
  34. package/dist/reporters/index.js +447 -0
  35. package/dist/reporters/index.js.map +7 -0
  36. package/dist/reporters/json.d.ts +7 -0
  37. package/dist/reporters/json.d.ts.map +1 -0
  38. package/dist/reporters/markdown.d.ts +6 -0
  39. package/dist/reporters/markdown.d.ts.map +1 -0
  40. package/dist/reporters/shared.d.ts +11 -0
  41. package/dist/reporters/shared.d.ts.map +1 -0
  42. package/dist/reporters/types.d.ts +35 -0
  43. package/dist/reporters/types.d.ts.map +1 -0
  44. package/dist/scorers/index.d.ts +30 -0
  45. package/dist/scorers/index.d.ts.map +1 -0
  46. package/dist/scorers/index.js +175 -0
  47. package/dist/scorers/index.js.map +7 -0
  48. package/dist/store/index.d.ts +103 -0
  49. package/dist/store/index.d.ts.map +1 -0
  50. package/dist/store/index.js +361 -0
  51. package/dist/store/index.js.map +7 -0
  52. package/package.json +99 -0
@@ -0,0 +1,447 @@
1
+ // packages/evals/src/reporters/console.ts
2
+ import chalk from "chalk";
3
+
4
+ // packages/evals/src/reporters/format.ts
5
+ function formatDuration(ms) {
6
+ if (ms < 1e3) return `${ms}ms`;
7
+ return `${(ms / 1e3).toFixed(1)}s`;
8
+ }
9
+ function formatTokens(n) {
10
+ if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
11
+ if (n >= 1e3) return `${(n / 1e3).toFixed(0)}k`;
12
+ return String(n);
13
+ }
14
+ function generateFilename(name, runId, ext) {
15
+ const slug = name.replace(/[^a-zA-Z0-9-_]/g, "-").toLowerCase();
16
+ const prefix = runId.slice(0, 8);
17
+ return `${slug}-${prefix}.${ext}`;
18
+ }
19
+ function stringifyUnknown(value, options) {
20
+ if (typeof value === "string") return value;
21
+ const space = options?.space ?? 0;
22
+ const fallback = options?.fallback ?? "null";
23
+ try {
24
+ return JSON.stringify(value, null, space) ?? fallback;
25
+ } catch {
26
+ return String(value);
27
+ }
28
+ }
29
+ function formatInputValue(value) {
30
+ return stringifyUnknown(value, { space: 0, fallback: "" });
31
+ }
32
+ function formatErrorValue(value) {
33
+ if (value == null) return "";
34
+ if (typeof value === "string") return value;
35
+ return stringifyUnknown(value, { space: 2, fallback: "" });
36
+ }
37
+ function escapeCsv(value) {
38
+ const str = stringifyUnknown(value, { space: 0, fallback: "null" });
39
+ if (str.includes(",") || str.includes('"') || str.includes("\n")) {
40
+ return `"${str.replace(/"/g, '""')}"`;
41
+ }
42
+ return str;
43
+ }
44
+
45
+ // packages/evals/src/reporters/shared.ts
46
+ import { mkdir, writeFile } from "node:fs/promises";
47
+ import { join } from "node:path";
48
+ var DEFAULT_OUTPUT_DIR = ".evals/reports";
49
+ function resolveOutputDir(outputDir) {
50
+ return outputDir ?? DEFAULT_OUTPUT_DIR;
51
+ }
52
+ function getReportPath(outputDir, name, runId, ext) {
53
+ return join(outputDir, generateFilename(name, runId, ext));
54
+ }
55
+ async function writeRunReportFile(outputDir, name, runId, ext, content) {
56
+ await mkdir(outputDir, { recursive: true });
57
+ await writeFile(getReportPath(outputDir, name, runId, ext), content, "utf-8");
58
+ }
59
+ function getCaseStatus(result, threshold) {
60
+ if (result.error) return "error";
61
+ const passed = Object.values(result.scores).every(
62
+ (s) => s.score >= threshold
63
+ );
64
+ return passed ? "pass" : "fail";
65
+ }
66
+ function createRunEndFileReporter(options) {
67
+ const outputDir = resolveOutputDir(options.outputDir);
68
+ return {
69
+ async onRunEnd(data) {
70
+ const content = await options.render(data);
71
+ await writeRunReportFile(
72
+ outputDir,
73
+ data.name,
74
+ data.runId,
75
+ options.ext,
76
+ content
77
+ );
78
+ }
79
+ };
80
+ }
81
+
82
+ // packages/evals/src/reporters/console.ts
83
+ function consoleReporter(options) {
84
+ const verbosity = options?.verbosity ?? "normal";
85
+ let totalCases = 0;
86
+ let completed = 0;
87
+ return {
88
+ onRunStart(data) {
89
+ totalCases = data.totalCases;
90
+ completed = 0;
91
+ },
92
+ onCaseEnd() {
93
+ completed++;
94
+ if (verbosity !== "quiet") {
95
+ process.stdout.write(
96
+ `\r ${chalk.dim(`[${completed}/${totalCases}]`)}`
97
+ );
98
+ }
99
+ },
100
+ onRunEnd(data) {
101
+ if (verbosity !== "quiet") {
102
+ process.stdout.write("\r" + " ".repeat(30) + "\r");
103
+ }
104
+ renderSummaryTable(data);
105
+ if (verbosity === "quiet") return;
106
+ const sorted = [...data.cases].sort((a, b) => a.index - b.index);
107
+ if (verbosity === "verbose") {
108
+ for (const c of sorted) {
109
+ renderCaseDetail(c, data.threshold, {
110
+ includeIO: true,
111
+ maxStringLength: 2e4
112
+ });
113
+ }
114
+ } else {
115
+ const failing = sorted.filter(
116
+ (c) => getCaseStatus(c, data.threshold) !== "pass"
117
+ );
118
+ if (failing.length > 0) {
119
+ console.log(chalk.dim(` Failing cases (${failing.length}):`));
120
+ console.log("");
121
+ for (const c of failing) {
122
+ renderCaseDetail(c, data.threshold, {
123
+ includeIO: true,
124
+ maxStringLength: 4e3
125
+ });
126
+ }
127
+ }
128
+ }
129
+ }
130
+ };
131
+ }
132
+ function indentBlock(text, spaces) {
133
+ const pad = " ".repeat(spaces);
134
+ return text.replace(/\r\n/g, "\n").split("\n").map((line) => pad + line).join("\n");
135
+ }
136
+ function truncateString(text, maxLength) {
137
+ if (text.length <= maxLength) return text;
138
+ return text.slice(0, maxLength) + "\u2026";
139
+ }
140
+ function renderSummaryTable(data) {
141
+ const { summary } = data;
142
+ const scoreStr = Object.entries(summary.meanScores).map(([name, score]) => `${name}: ${score.toFixed(3)}`).join(", ");
143
+ console.log("");
144
+ console.log(chalk.bold(" Summary"));
145
+ console.log(chalk.dim(" " + "\u2500".repeat(60)));
146
+ console.log(` ${chalk.dim("Eval:")} ${data.name}`);
147
+ console.log(` ${chalk.dim("Model:")} ${data.model}`);
148
+ console.log(` ${chalk.dim("Cases:")} ${summary.totalCases}`);
149
+ console.log(
150
+ ` ${chalk.dim("Pass/Fail:")} ${chalk.green(String(summary.passCount))} / ${chalk.red(String(summary.failCount))}`
151
+ );
152
+ console.log(` ${chalk.dim("Scores:")} ${scoreStr}`);
153
+ console.log(
154
+ ` ${chalk.dim("Duration:")} ${formatDuration(summary.totalLatencyMs)}`
155
+ );
156
+ console.log(
157
+ ` ${chalk.dim("Tokens:")} ${formatTokens(summary.totalTokensIn + summary.totalTokensOut)}`
158
+ );
159
+ console.log(chalk.dim(" " + "\u2500".repeat(60)));
160
+ console.log("");
161
+ }
162
+ function renderCaseDetail(c, threshold, options) {
163
+ const entries = Object.entries(c.scores);
164
+ const failed = entries.some(([, s]) => s.score < threshold);
165
+ const prefix = failed ? chalk.red("FAIL") : chalk.green("PASS");
166
+ const includeIO = options?.includeIO ?? false;
167
+ const maxStringLength = options?.maxStringLength ?? 4e3;
168
+ console.log(` ${prefix} ${chalk.dim(`Case #${c.index}`)}`);
169
+ const inputStr = stringifyUnknown(c.input, {
170
+ space: 2,
171
+ fallback: String(c.input)
172
+ });
173
+ console.log(` ${chalk.dim("Input:")} ${inputStr}`);
174
+ if (includeIO) {
175
+ console.log(` ${chalk.dim("Output:")}`);
176
+ console.log(indentBlock(truncateString(c.output, maxStringLength), 6));
177
+ console.log(` ${chalk.dim("Expected:")}`);
178
+ const expectedStrRaw = stringifyUnknown(c.expected, {
179
+ space: 2,
180
+ fallback: String(c.expected)
181
+ });
182
+ console.log(
183
+ indentBlock(truncateString(expectedStrRaw, maxStringLength), 6)
184
+ );
185
+ }
186
+ for (const [name, s] of entries) {
187
+ const scoreColor = s.score >= threshold ? chalk.green : chalk.red;
188
+ const reasonStr = s.reason ? ` \u2014 ${s.reason}` : "";
189
+ console.log(
190
+ ` ${chalk.dim(name + ":")} ${scoreColor(s.score.toFixed(3))}${reasonStr}`
191
+ );
192
+ }
193
+ if (c.error) {
194
+ console.log(` ${chalk.dim("Error:")}`);
195
+ const errorStr = formatErrorValue(c.error);
196
+ console.log(` ${chalk.red(errorStr)}`);
197
+ }
198
+ console.log("");
199
+ }
200
+
201
+ // packages/evals/src/reporters/json.ts
202
+ import { appendFile, mkdir as mkdir2 } from "node:fs/promises";
203
+ function jsonReporter(options) {
204
+ const outputDir = resolveOutputDir(options?.outputDir);
205
+ const pretty = options?.pretty ?? true;
206
+ let streamFilename = "";
207
+ return {
208
+ async onRunStart(data) {
209
+ await mkdir2(outputDir, { recursive: true });
210
+ streamFilename = getReportPath(outputDir, data.name, data.runId, "jsonl");
211
+ },
212
+ async onCaseEnd(data) {
213
+ const line = stringifyUnknown(data, { space: 0, fallback: "null" });
214
+ await appendFile(streamFilename, line + "\n", "utf-8");
215
+ },
216
+ async onRunEnd(data) {
217
+ const content = stringifyUnknown(data, {
218
+ space: pretty ? 2 : 0,
219
+ fallback: "null"
220
+ });
221
+ await writeRunReportFile(
222
+ outputDir,
223
+ data.name,
224
+ data.runId,
225
+ "json",
226
+ content
227
+ );
228
+ }
229
+ };
230
+ }
231
+
232
+ // packages/evals/src/reporters/csv.ts
233
+ function csvReporter(options) {
234
+ return createRunEndFileReporter({
235
+ outputDir: options?.outputDir,
236
+ ext: "csv",
237
+ render(data) {
238
+ const scorerNames = Object.keys(data.summary.meanScores);
239
+ const headerParts = [
240
+ "index",
241
+ "input",
242
+ "output",
243
+ "expected",
244
+ "error",
245
+ "latency_ms",
246
+ "tokens_in",
247
+ "tokens_out"
248
+ ];
249
+ for (const name of scorerNames) {
250
+ headerParts.push(`${name}_score`, `${name}_reason`);
251
+ }
252
+ const rows = [headerParts.join(",")];
253
+ for (const c of data.cases) {
254
+ const parts = [
255
+ String(c.index),
256
+ escapeCsv(c.input),
257
+ escapeCsv(c.output),
258
+ escapeCsv(c.expected),
259
+ escapeCsv(c.error ?? ""),
260
+ String(c.latencyMs),
261
+ String(c.tokensIn),
262
+ String(c.tokensOut)
263
+ ];
264
+ for (const name of scorerNames) {
265
+ const s = c.scores[name];
266
+ parts.push(String(s?.score ?? ""), escapeCsv(s?.reason ?? ""));
267
+ }
268
+ rows.push(parts.join(","));
269
+ }
270
+ return rows.join("\n") + "\n";
271
+ }
272
+ });
273
+ }
274
+
275
+ // packages/evals/src/reporters/markdown.ts
276
+ function markdownReporter(options) {
277
+ return createRunEndFileReporter({
278
+ outputDir: options?.outputDir,
279
+ ext: "md",
280
+ render(data) {
281
+ const { summary } = data;
282
+ const scorerNames = Object.keys(summary.meanScores);
283
+ const lines = [];
284
+ lines.push(`# ${data.name}`);
285
+ lines.push("");
286
+ lines.push(`**Model:** ${data.model}`);
287
+ lines.push(
288
+ `**Cases:** ${summary.totalCases} (${summary.passCount} pass, ${summary.failCount} fail)`
289
+ );
290
+ lines.push(`**Duration:** ${formatDuration(summary.totalLatencyMs)}`);
291
+ lines.push(
292
+ `**Tokens:** ${formatTokens(summary.totalTokensIn + summary.totalTokensOut)}`
293
+ );
294
+ lines.push("");
295
+ lines.push("## Scores");
296
+ lines.push("");
297
+ lines.push("| Scorer | Mean |");
298
+ lines.push("|--------|------|");
299
+ for (const [name, score] of Object.entries(summary.meanScores)) {
300
+ lines.push(`| ${name} | ${score.toFixed(3)} |`);
301
+ }
302
+ lines.push("");
303
+ lines.push("## Cases");
304
+ lines.push("");
305
+ const caseHeader = [
306
+ "#",
307
+ "Status",
308
+ "Input",
309
+ ...scorerNames,
310
+ "Latency",
311
+ "Error"
312
+ ];
313
+ lines.push(`| ${caseHeader.join(" | ")} |`);
314
+ lines.push(`| ${caseHeader.map(() => "---").join(" | ")} |`);
315
+ for (const c of data.cases) {
316
+ const statusValue = getCaseStatus(c, data.threshold);
317
+ const status = statusValue === "error" ? "\u{1F534} Error" : statusValue === "pass" ? "\u2705 Pass" : "\u274C Fail";
318
+ const input = formatInputValue(c.input).slice(0, 60);
319
+ const scores = scorerNames.map(
320
+ (name) => c.scores[name]?.score.toFixed(3) ?? "-"
321
+ );
322
+ const error = c.error ? formatErrorValue(c.error).replace(/\r?\n/g, "<br>").replace(/\|/g, "\\|") : "-";
323
+ const row = [
324
+ String(c.index),
325
+ status,
326
+ input,
327
+ ...scores,
328
+ `${c.latencyMs}ms`,
329
+ error
330
+ ];
331
+ lines.push(`| ${row.join(" | ")} |`);
332
+ }
333
+ lines.push("");
334
+ return lines.join("\n");
335
+ }
336
+ });
337
+ }
338
+
339
+ // packages/evals/src/reporters/html.ts
340
+ function htmlReporter(options) {
341
+ return createRunEndFileReporter({
342
+ outputDir: options?.outputDir,
343
+ ext: "html",
344
+ render: renderHtml
345
+ });
346
+ }
347
+ function esc(str) {
348
+ return str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
349
+ }
350
+ function renderHtml(data) {
351
+ const { summary } = data;
352
+ const scorerNames = Object.keys(summary.meanScores);
353
+ const caseRows = data.cases.map((c) => {
354
+ const status = getCaseStatus(c, data.threshold);
355
+ const statusLabel = status === "error" ? "ERROR" : status === "pass" ? "PASS" : "FAIL";
356
+ const scoresCells = scorerNames.map((name) => {
357
+ const s = c.scores[name];
358
+ const score = s?.score ?? 0;
359
+ const cls = score >= data.threshold ? "pass" : "fail";
360
+ const reason = s?.reason ? ` title="${esc(s.reason)}"` : "";
361
+ return `<td class="${cls}"${reason}>${score.toFixed(3)}</td>`;
362
+ }).join("");
363
+ return `<tr class="${status}">
364
+ <td>${c.index}</td>
365
+ <td class="${status}">${statusLabel}</td>
366
+ <td class="text">${esc(formatInputValue(c.input).slice(0, 120))}</td>
367
+ <td class="text">${esc(c.output.slice(0, 120))}</td>
368
+ ${scoresCells}
369
+ <td>${c.latencyMs}ms</td>
370
+ <td class="error-text">${c.error ? esc(formatErrorValue(c.error)) : ""}</td>
371
+ </tr>`;
372
+ }).join("\n");
373
+ const scorerHeaders = scorerNames.map((n) => `<th>${esc(n)}</th>`).join("");
374
+ const meanScoreRows = Object.entries(summary.meanScores).map(
375
+ ([name, score]) => `<tr><td>${esc(name)}</td><td>${score.toFixed(3)}</td></tr>`
376
+ ).join("");
377
+ return `<!DOCTYPE html>
378
+ <html lang="en">
379
+ <head>
380
+ <meta charset="utf-8">
381
+ <title>${esc(data.name)} \u2014 Eval Report</title>
382
+ <style>
383
+ * { box-sizing: border-box; margin: 0; padding: 0; }
384
+ body { font-family: system-ui, -apple-system, sans-serif; background: #f8f9fa; color: #1a1a1a; padding: 2rem; }
385
+ h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }
386
+ .meta { color: #666; margin-bottom: 1.5rem; font-size: 0.9rem; }
387
+ .meta span { margin-right: 1.5rem; }
388
+ .summary-table, .cases-table { width: 100%; border-collapse: collapse; margin-bottom: 2rem; }
389
+ .summary-table th, .summary-table td,
390
+ .cases-table th, .cases-table td { padding: 0.5rem 0.75rem; border: 1px solid #ddd; text-align: left; font-size: 0.85rem; }
391
+ .summary-table th, .cases-table th { background: #f1f3f5; font-weight: 600; }
392
+ .cases-table .text { max-width: 300px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
393
+ .cases-table .error-text { max-width: 480px; white-space: pre-wrap; word-break: break-word; }
394
+ .pass { color: #2b8a3e; }
395
+ .fail { color: #c92a2a; }
396
+ .error { color: #e67700; }
397
+ tr.pass:hover, tr.fail:hover, tr.error:hover { background: #f1f3f5; }
398
+ td.pass { background: #ebfbee; }
399
+ td.fail { background: #fff5f5; }
400
+ h2 { font-size: 1.2rem; margin: 1.5rem 0 0.75rem; }
401
+ </style>
402
+ </head>
403
+ <body>
404
+ <h1>${esc(data.name)}</h1>
405
+ <div class="meta">
406
+ <span><strong>Model:</strong> ${esc(data.model)}</span>
407
+ <span><strong>Cases:</strong> ${summary.totalCases}</span>
408
+ <span><strong>Pass:</strong> ${summary.passCount}</span>
409
+ <span><strong>Fail:</strong> ${summary.failCount}</span>
410
+ <span><strong>Duration:</strong> ${formatDuration(summary.totalLatencyMs)}</span>
411
+ <span><strong>Tokens:</strong> ${formatTokens(summary.totalTokensIn + summary.totalTokensOut)}</span>
412
+ </div>
413
+
414
+ <h2>Mean Scores</h2>
415
+ <table class="summary-table">
416
+ <thead><tr><th>Scorer</th><th>Mean</th></tr></thead>
417
+ <tbody>${meanScoreRows}</tbody>
418
+ </table>
419
+
420
+ <h2>Cases</h2>
421
+ <table class="cases-table">
422
+ <thead>
423
+ <tr>
424
+ <th>#</th>
425
+ <th>Status</th>
426
+ <th>Input</th>
427
+ <th>Output</th>
428
+ ${scorerHeaders}
429
+ <th>Latency</th>
430
+ <th>Error</th>
431
+ </tr>
432
+ </thead>
433
+ <tbody>
434
+ ${caseRows}
435
+ </tbody>
436
+ </table>
437
+ </body>
438
+ </html>`;
439
+ }
440
+ export {
441
+ consoleReporter,
442
+ csvReporter,
443
+ htmlReporter,
444
+ jsonReporter,
445
+ markdownReporter
446
+ };
447
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,7 @@
1
+ {
2
+ "version": 3,
3
+ "sources": ["../../src/reporters/console.ts", "../../src/reporters/format.ts", "../../src/reporters/shared.ts", "../../src/reporters/json.ts", "../../src/reporters/csv.ts", "../../src/reporters/markdown.ts", "../../src/reporters/html.ts"],
4
+ "sourcesContent": ["import chalk from 'chalk';\n\nimport {\n formatDuration,\n formatErrorValue,\n formatTokens,\n stringifyUnknown,\n} from './format.ts';\nimport { getCaseStatus } from './shared.ts';\nimport type { CaseResult, Reporter, RunEndData, Verbosity } from './types.ts';\n\nexport interface ConsoleReporterOptions {\n verbosity?: Verbosity;\n}\n\nexport function consoleReporter(options?: ConsoleReporterOptions): Reporter {\n const verbosity = options?.verbosity ?? 'normal';\n\n let totalCases = 0;\n let completed = 0;\n\n return {\n onRunStart(data) {\n totalCases = data.totalCases;\n completed = 0;\n },\n\n onCaseEnd() {\n completed++;\n if (verbosity !== 'quiet') {\n process.stdout.write(\n `\\r ${chalk.dim(`[${completed}/${totalCases}]`)}`,\n );\n }\n },\n\n onRunEnd(data) {\n if (verbosity !== 'quiet') {\n process.stdout.write('\\r' + ' '.repeat(30) + '\\r');\n }\n\n renderSummaryTable(data);\n\n if (verbosity === 'quiet') return;\n\n const sorted = [...data.cases].sort((a, b) => a.index - b.index);\n\n if (verbosity === 'verbose') {\n for (const c of sorted) {\n renderCaseDetail(c, data.threshold, {\n includeIO: true,\n maxStringLength: 20_000,\n });\n }\n } else {\n const failing = sorted.filter(\n (c) => getCaseStatus(c, data.threshold) !== 'pass',\n );\n if (failing.length > 0) {\n console.log(chalk.dim(` Failing cases (${failing.length}):`));\n console.log('');\n for (const c of failing) {\n renderCaseDetail(c, data.threshold, {\n includeIO: true,\n maxStringLength: 4_000,\n });\n }\n }\n }\n },\n };\n}\n\nfunction indentBlock(text: string, spaces: number): string {\n const pad = ' '.repeat(spaces);\n return text\n .replace(/\\r\\n/g, '\\n')\n .split('\\n')\n .map((line) => pad + line)\n .join('\\n');\n}\n\nfunction truncateString(text: string, maxLength: number): string {\n if (text.length <= maxLength) return text;\n return text.slice(0, maxLength) + '\u2026';\n}\n\nfunction renderSummaryTable(data: RunEndData): void {\n const { summary } = data;\n const scoreStr = Object.entries(summary.meanScores)\n .map(([name, score]) => `${name}: ${score.toFixed(3)}`)\n .join(', ');\n\n console.log('');\n console.log(chalk.bold(' Summary'));\n console.log(chalk.dim(' ' + '\u2500'.repeat(60)));\n console.log(` ${chalk.dim('Eval:')} ${data.name}`);\n console.log(` ${chalk.dim('Model:')} ${data.model}`);\n console.log(` ${chalk.dim('Cases:')} ${summary.totalCases}`);\n console.log(\n ` ${chalk.dim('Pass/Fail:')} ${chalk.green(String(summary.passCount))} / ${chalk.red(String(summary.failCount))}`,\n );\n console.log(` ${chalk.dim('Scores:')} ${scoreStr}`);\n console.log(\n ` ${chalk.dim('Duration:')} ${formatDuration(summary.totalLatencyMs)}`,\n );\n console.log(\n ` ${chalk.dim('Tokens:')} ${formatTokens(summary.totalTokensIn + summary.totalTokensOut)}`,\n );\n console.log(chalk.dim(' ' + '\u2500'.repeat(60)));\n console.log('');\n}\n\nfunction renderCaseDetail(\n c: CaseResult,\n threshold: number,\n options?: {\n includeIO?: boolean;\n maxStringLength?: number;\n },\n): void {\n const entries = Object.entries(c.scores);\n const failed = entries.some(([, s]) => s.score < threshold);\n const prefix = failed ? chalk.red('FAIL') : chalk.green('PASS');\n const includeIO = options?.includeIO ?? false;\n const maxStringLength = options?.maxStringLength ?? 4_000;\n\n console.log(` ${prefix} ${chalk.dim(`Case #${c.index}`)}`);\n const inputStr = stringifyUnknown(c.input, {\n space: 2,\n fallback: String(c.input),\n });\n console.log(` ${chalk.dim('Input:')} ${inputStr}`);\n\n if (includeIO) {\n console.log(` ${chalk.dim('Output:')}`);\n console.log(indentBlock(truncateString(c.output, maxStringLength), 6));\n console.log(` ${chalk.dim('Expected:')}`);\n const expectedStrRaw = stringifyUnknown(c.expected, {\n space: 2,\n fallback: String(c.expected),\n });\n console.log(\n indentBlock(truncateString(expectedStrRaw, maxStringLength), 6),\n );\n }\n\n for (const [name, s] of entries) {\n const scoreColor = s.score >= threshold ? chalk.green : chalk.red;\n const reasonStr = s.reason ? ` \u2014 ${s.reason}` : '';\n console.log(\n ` ${chalk.dim(name + ':')} ${scoreColor(s.score.toFixed(3))}${reasonStr}`,\n );\n }\n\n if (c.error) {\n console.log(` ${chalk.dim('Error:')}`);\n const errorStr = formatErrorValue(c.error);\n console.log(` ${chalk.red(errorStr)}`);\n }\n\n console.log('');\n}\n", "export function truncate(str: string, maxLen = 80): string {\n if (str.length <= maxLen) return str;\n return str.slice(0, maxLen) + '...';\n}\n\nexport function formatDuration(ms: number): string {\n if (ms < 1000) return `${ms}ms`;\n return `${(ms / 1000).toFixed(1)}s`;\n}\n\nexport function formatTokens(n: number): string {\n if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;\n if (n >= 1_000) return `${(n / 1_000).toFixed(0)}k`;\n return String(n);\n}\n\nexport function generateFilename(\n name: string,\n runId: string,\n ext: string,\n): string {\n const slug = name.replace(/[^a-zA-Z0-9-_]/g, '-').toLowerCase();\n const prefix = runId.slice(0, 8);\n return `${slug}-${prefix}.${ext}`;\n}\n\nexport function stringifyUnknown(\n value: unknown,\n options?: {\n space?: number;\n fallback?: string;\n },\n): string {\n if (typeof value === 'string') return value;\n\n const space = options?.space ?? 0;\n const fallback = options?.fallback ?? 'null';\n try {\n return JSON.stringify(value, null, space) ?? fallback;\n } catch {\n return String(value);\n }\n}\n\nexport function formatInputValue(value: unknown): string {\n return stringifyUnknown(value, { space: 0, fallback: '' });\n}\n\nexport function formatErrorValue(value: unknown): string {\n if (value == null) return '';\n if (typeof value === 'string') return value;\n return stringifyUnknown(value, { space: 2, fallback: '' });\n}\n\nexport function escapeCsv(value: unknown): string {\n const str = stringifyUnknown(value, { space: 0, fallback: 'null' });\n if (str.includes(',') || str.includes('\"') || str.includes('\\n')) {\n return `\"${str.replace(/\"/g, '\"\"')}\"`;\n }\n return str;\n}\n", "import { mkdir, writeFile } from 'node:fs/promises';\nimport { join } from 'node:path';\n\nimport { generateFilename } from './format.ts';\nimport type { CaseResult, Reporter, RunEndData } from './types.ts';\n\nconst DEFAULT_OUTPUT_DIR = '.evals/reports';\n\nexport function resolveOutputDir(outputDir?: string): string {\n return outputDir ?? DEFAULT_OUTPUT_DIR;\n}\n\nexport function getReportPath(\n outputDir: string,\n name: string,\n runId: string,\n ext: string,\n): string {\n return join(outputDir, generateFilename(name, runId, ext));\n}\n\nexport async function writeRunReportFile(\n outputDir: string,\n name: string,\n runId: string,\n ext: string,\n content: string,\n): Promise<void> {\n await mkdir(outputDir, { recursive: true });\n await writeFile(getReportPath(outputDir, name, runId, ext), content, 'utf-8');\n}\n\nexport function getCaseStatus(\n result: CaseResult,\n threshold: number,\n): 'error' | 'pass' | 'fail' {\n if (result.error) return 'error';\n const passed = Object.values(result.scores).every(\n (s) => s.score >= threshold,\n );\n return passed ? 'pass' : 'fail';\n}\n\nexport function createRunEndFileReporter(options: {\n outputDir?: string;\n ext: string;\n render: (data: RunEndData) => string | Promise<string>;\n}): Reporter {\n const outputDir = resolveOutputDir(options.outputDir);\n\n return {\n async onRunEnd(data) {\n const content = await options.render(data);\n await writeRunReportFile(\n outputDir,\n data.name,\n data.runId,\n options.ext,\n content,\n );\n },\n };\n}\n", "import { appendFile, mkdir } from 'node:fs/promises';\n\nimport { stringifyUnknown } from './format.ts';\nimport {\n getReportPath,\n resolveOutputDir,\n writeRunReportFile,\n} from './shared.ts';\nimport type { Reporter } from './types.ts';\n\nexport interface JsonReporterOptions {\n outputDir?: string;\n pretty?: boolean;\n}\n\nexport function jsonReporter(options?: JsonReporterOptions): Reporter {\n const outputDir = resolveOutputDir(options?.outputDir);\n const pretty = options?.pretty ?? true;\n let streamFilename = '';\n\n return {\n async onRunStart(data) {\n await mkdir(outputDir, { recursive: true });\n streamFilename = getReportPath(outputDir, data.name, data.runId, 'jsonl');\n },\n async onCaseEnd(data) {\n const line = stringifyUnknown(data, { space: 0, fallback: 'null' });\n await appendFile(streamFilename, line + '\\n', 'utf-8');\n },\n async onRunEnd(data) {\n const content = stringifyUnknown(data, {\n space: pretty ? 2 : 0,\n fallback: 'null',\n });\n await writeRunReportFile(\n outputDir,\n data.name,\n data.runId,\n 'json',\n content,\n );\n },\n };\n}\n", "import { escapeCsv } from './format.ts';\nimport { createRunEndFileReporter } from './shared.ts';\nimport type { Reporter } from './types.ts';\n\nexport interface CsvReporterOptions {\n outputDir?: string;\n}\n\nexport function csvReporter(options?: CsvReporterOptions): Reporter {\n return createRunEndFileReporter({\n outputDir: options?.outputDir,\n ext: 'csv',\n render(data) {\n const scorerNames = Object.keys(data.summary.meanScores);\n\n const headerParts = [\n 'index',\n 'input',\n 'output',\n 'expected',\n 'error',\n 'latency_ms',\n 'tokens_in',\n 'tokens_out',\n ];\n for (const name of scorerNames) {\n headerParts.push(`${name}_score`, `${name}_reason`);\n }\n\n const rows = [headerParts.join(',')];\n\n for (const c of data.cases) {\n const parts = [\n String(c.index),\n escapeCsv(c.input),\n escapeCsv(c.output),\n escapeCsv(c.expected),\n escapeCsv(c.error ?? ''),\n String(c.latencyMs),\n String(c.tokensIn),\n String(c.tokensOut),\n ];\n for (const name of scorerNames) {\n const s = c.scores[name];\n parts.push(String(s?.score ?? ''), escapeCsv(s?.reason ?? ''));\n }\n rows.push(parts.join(','));\n }\n\n return rows.join('\\n') + '\\n';\n },\n });\n}\n", "import {\n formatDuration,\n formatErrorValue,\n formatInputValue,\n formatTokens,\n} from './format.ts';\nimport { createRunEndFileReporter, getCaseStatus } from './shared.ts';\nimport type { Reporter } from './types.ts';\n\nexport interface MarkdownReporterOptions {\n outputDir?: string;\n}\n\nexport function markdownReporter(options?: MarkdownReporterOptions): Reporter {\n return createRunEndFileReporter({\n outputDir: options?.outputDir,\n ext: 'md',\n render(data) {\n const { summary } = data;\n const scorerNames = Object.keys(summary.meanScores);\n const lines: string[] = [];\n\n lines.push(`# ${data.name}`);\n lines.push('');\n lines.push(`**Model:** ${data.model}`);\n lines.push(\n `**Cases:** ${summary.totalCases} (${summary.passCount} pass, ${summary.failCount} fail)`,\n );\n lines.push(`**Duration:** ${formatDuration(summary.totalLatencyMs)}`);\n lines.push(\n `**Tokens:** ${formatTokens(summary.totalTokensIn + summary.totalTokensOut)}`,\n );\n lines.push('');\n\n lines.push('## Scores');\n lines.push('');\n lines.push('| Scorer | Mean |');\n lines.push('|--------|------|');\n for (const [name, score] of Object.entries(summary.meanScores)) {\n lines.push(`| ${name} | ${score.toFixed(3)} |`);\n }\n lines.push('');\n\n lines.push('## Cases');\n lines.push('');\n\n const caseHeader = [\n '#',\n 'Status',\n 'Input',\n ...scorerNames,\n 'Latency',\n 'Error',\n ];\n lines.push(`| ${caseHeader.join(' | ')} |`);\n lines.push(`| ${caseHeader.map(() => '---').join(' | ')} |`);\n\n for (const c of data.cases) {\n const statusValue = getCaseStatus(c, data.threshold);\n const status =\n statusValue === 'error'\n ? '\uD83D\uDD34 Error'\n : statusValue === 'pass'\n ? '\u2705 Pass'\n : '\u274C Fail';\n const input = formatInputValue(c.input).slice(0, 60);\n const scores = scorerNames.map(\n (name) => c.scores[name]?.score.toFixed(3) ?? '-',\n );\n const error = c.error\n ? formatErrorValue(c.error)\n .replace(/\\r?\\n/g, '<br>')\n .replace(/\\|/g, '\\\\|')\n : '-';\n const row = [\n String(c.index),\n status,\n input,\n ...scores,\n `${c.latencyMs}ms`,\n error,\n ];\n lines.push(`| ${row.join(' | ')} |`);\n }\n lines.push('');\n\n return lines.join('\\n');\n },\n });\n}\n", "import {\n formatDuration,\n formatErrorValue,\n formatInputValue,\n formatTokens,\n} from './format.ts';\nimport { createRunEndFileReporter, getCaseStatus } from './shared.ts';\nimport type { Reporter, RunEndData } from './types.ts';\n\nexport interface HtmlReporterOptions {\n outputDir?: string;\n}\n\nexport function htmlReporter(options?: HtmlReporterOptions): Reporter {\n return createRunEndFileReporter({\n outputDir: options?.outputDir,\n ext: 'html',\n render: renderHtml,\n });\n}\n\nfunction esc(str: string): string {\n return str\n .replace(/&/g, '&amp;')\n .replace(/</g, '&lt;')\n .replace(/>/g, '&gt;')\n .replace(/\"/g, '&quot;');\n}\n\nfunction renderHtml(data: RunEndData): string {\n const { summary } = data;\n const scorerNames = Object.keys(summary.meanScores);\n\n const caseRows = data.cases\n .map((c) => {\n const status = getCaseStatus(c, data.threshold);\n const statusLabel =\n status === 'error' ? 'ERROR' : status === 'pass' ? 'PASS' : 'FAIL';\n const scoresCells = scorerNames\n .map((name) => {\n const s = c.scores[name];\n const score = s?.score ?? 0;\n const cls = score >= data.threshold ? 'pass' : 'fail';\n const reason = s?.reason ? ` title=\"${esc(s.reason)}\"` : '';\n return `<td class=\"${cls}\"${reason}>${score.toFixed(3)}</td>`;\n })\n .join('');\n\n return `<tr class=\"${status}\">\n <td>${c.index}</td>\n <td class=\"${status}\">${statusLabel}</td>\n <td class=\"text\">${esc(formatInputValue(c.input).slice(0, 120))}</td>\n <td class=\"text\">${esc(c.output.slice(0, 120))}</td>\n ${scoresCells}\n <td>${c.latencyMs}ms</td>\n <td class=\"error-text\">${c.error ? esc(formatErrorValue(c.error)) : ''}</td>\n </tr>`;\n })\n .join('\\n');\n\n const scorerHeaders = scorerNames.map((n) => `<th>${esc(n)}</th>`).join('');\n const meanScoreRows = Object.entries(summary.meanScores)\n .map(\n ([name, score]) =>\n `<tr><td>${esc(name)}</td><td>${score.toFixed(3)}</td></tr>`,\n )\n .join('');\n\n return `<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n<title>${esc(data.name)} \u2014 Eval Report</title>\n<style>\n * { box-sizing: border-box; margin: 0; padding: 0; }\n body { font-family: system-ui, -apple-system, sans-serif; background: #f8f9fa; color: #1a1a1a; padding: 2rem; }\n h1 { font-size: 1.5rem; margin-bottom: 0.5rem; }\n .meta { color: #666; margin-bottom: 1.5rem; font-size: 0.9rem; }\n .meta span { margin-right: 1.5rem; }\n .summary-table, .cases-table { width: 100%; border-collapse: collapse; margin-bottom: 2rem; }\n .summary-table th, .summary-table td,\n .cases-table th, .cases-table td { padding: 0.5rem 0.75rem; border: 1px solid #ddd; text-align: left; font-size: 0.85rem; }\n .summary-table th, .cases-table th { background: #f1f3f5; font-weight: 600; }\n .cases-table .text { max-width: 300px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }\n .cases-table .error-text { max-width: 480px; white-space: pre-wrap; word-break: break-word; }\n .pass { color: #2b8a3e; }\n .fail { color: #c92a2a; }\n .error { color: #e67700; }\n tr.pass:hover, tr.fail:hover, tr.error:hover { background: #f1f3f5; }\n td.pass { background: #ebfbee; }\n td.fail { background: #fff5f5; }\n h2 { font-size: 1.2rem; margin: 1.5rem 0 0.75rem; }\n</style>\n</head>\n<body>\n <h1>${esc(data.name)}</h1>\n <div class=\"meta\">\n <span><strong>Model:</strong> ${esc(data.model)}</span>\n <span><strong>Cases:</strong> ${summary.totalCases}</span>\n <span><strong>Pass:</strong> ${summary.passCount}</span>\n <span><strong>Fail:</strong> ${summary.failCount}</span>\n <span><strong>Duration:</strong> ${formatDuration(summary.totalLatencyMs)}</span>\n <span><strong>Tokens:</strong> ${formatTokens(summary.totalTokensIn + summary.totalTokensOut)}</span>\n </div>\n\n <h2>Mean Scores</h2>\n <table class=\"summary-table\">\n <thead><tr><th>Scorer</th><th>Mean</th></tr></thead>\n <tbody>${meanScoreRows}</tbody>\n </table>\n\n <h2>Cases</h2>\n <table class=\"cases-table\">\n <thead>\n <tr>\n <th>#</th>\n <th>Status</th>\n <th>Input</th>\n <th>Output</th>\n ${scorerHeaders}\n <th>Latency</th>\n <th>Error</th>\n </tr>\n </thead>\n <tbody>\n ${caseRows}\n </tbody>\n </table>\n</body>\n</html>`;\n}\n"],
5
+ "mappings": ";AAAA,OAAO,WAAW;;;ACKX,SAAS,eAAe,IAAoB;AACjD,MAAI,KAAK,IAAM,QAAO,GAAG,EAAE;AAC3B,SAAO,IAAI,KAAK,KAAM,QAAQ,CAAC,CAAC;AAClC;AAEO,SAAS,aAAa,GAAmB;AAC9C,MAAI,KAAK,IAAW,QAAO,IAAI,IAAI,KAAW,QAAQ,CAAC,CAAC;AACxD,MAAI,KAAK,IAAO,QAAO,IAAI,IAAI,KAAO,QAAQ,CAAC,CAAC;AAChD,SAAO,OAAO,CAAC;AACjB;AAEO,SAAS,iBACd,MACA,OACA,KACQ;AACR,QAAM,OAAO,KAAK,QAAQ,mBAAmB,GAAG,EAAE,YAAY;AAC9D,QAAM,SAAS,MAAM,MAAM,GAAG,CAAC;AAC/B,SAAO,GAAG,IAAI,IAAI,MAAM,IAAI,GAAG;AACjC;AAEO,SAAS,iBACd,OACA,SAIQ;AACR,MAAI,OAAO,UAAU,SAAU,QAAO;AAEtC,QAAM,QAAQ,SAAS,SAAS;AAChC,QAAM,WAAW,SAAS,YAAY;AACtC,MAAI;AACF,WAAO,KAAK,UAAU,OAAO,MAAM,KAAK,KAAK;AAAA,EAC/C,QAAQ;AACN,WAAO,OAAO,KAAK;AAAA,EACrB;AACF;AAEO,SAAS,iBAAiB,OAAwB;AACvD,SAAO,iBAAiB,OAAO,EAAE,OAAO,GAAG,UAAU,GAAG,CAAC;AAC3D;AAEO,SAAS,iBAAiB,OAAwB;AACvD,MAAI,SAAS,KAAM,QAAO;AAC1B,MAAI,OAAO,UAAU,SAAU,QAAO;AACtC,SAAO,iBAAiB,OAAO,EAAE,OAAO,GAAG,UAAU,GAAG,CAAC;AAC3D;AAEO,SAAS,UAAU,OAAwB;AAChD,QAAM,MAAM,iBAAiB,OAAO,EAAE,OAAO,GAAG,UAAU,OAAO,CAAC;AAClE,MAAI,IAAI,SAAS,GAAG,KAAK,IAAI,SAAS,GAAG,KAAK,IAAI,SAAS,IAAI,GAAG;AAChE,WAAO,IAAI,IAAI,QAAQ,MAAM,IAAI,CAAC;AAAA,EACpC;AACA,SAAO;AACT;;;AC5DA,SAAS,OAAO,iBAAiB;AACjC,SAAS,YAAY;AAKrB,IAAM,qBAAqB;AAEpB,SAAS,iBAAiB,WAA4B;AAC3D,SAAO,aAAa;AACtB;AAEO,SAAS,cACd,WACA,MACA,OACA,KACQ;AACR,SAAO,KAAK,WAAW,iBAAiB,MAAM,OAAO,GAAG,CAAC;AAC3D;AAEA,eAAsB,mBACpB,WACA,MACA,OACA,KACA,SACe;AACf,QAAM,MAAM,WAAW,EAAE,WAAW,KAAK,CAAC;AAC1C,QAAM,UAAU,cAAc,WAAW,MAAM,OAAO,GAAG,GAAG,SAAS,OAAO;AAC9E;AAEO,SAAS,cACd,QACA,WAC2B;AAC3B,MAAI,OAAO,MAAO,QAAO;AACzB,QAAM,SAAS,OAAO,OAAO,OAAO,MAAM,EAAE;AAAA,IAC1C,CAAC,MAAM,EAAE,SAAS;AAAA,EACpB;AACA,SAAO,SAAS,SAAS;AAC3B;AAEO,SAAS,yBAAyB,SAI5B;AACX,QAAM,YAAY,iBAAiB,QAAQ,SAAS;AAEpD,SAAO;AAAA,IACL,MAAM,SAAS,MAAM;AACnB,YAAM,UAAU,MAAM,QAAQ,OAAO,IAAI;AACzC,YAAM;AAAA,QACJ;AAAA,QACA,KAAK;AAAA,QACL,KAAK;AAAA,QACL,QAAQ;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;;;AF/CO,SAAS,gBAAgB,SAA4C;AAC1E,QAAM,YAAY,SAAS,aAAa;AAExC,MAAI,aAAa;AACjB,MAAI,YAAY;AAEhB,SAAO;AAAA,IACL,WAAW,MAAM;AACf,mBAAa,KAAK;AAClB,kBAAY;AAAA,IACd;AAAA,IAEA,YAAY;AACV;AACA,UAAI,cAAc,SAAS;AACzB,gBAAQ,OAAO;AAAA,UACb,OAAO,MAAM,IAAI,IAAI,SAAS,IAAI,UAAU,GAAG,CAAC;AAAA,QAClD;AAAA,MACF;AAAA,IACF;AAAA,IAEA,SAAS,MAAM;AACb,UAAI,cAAc,SAAS;AACzB,gBAAQ,OAAO,MAAM,OAAO,IAAI,OAAO,EAAE,IAAI,IAAI;AAAA,MACnD;AAEA,yBAAmB,IAAI;AAEvB,UAAI,cAAc,QAAS;AAE3B,YAAM,SAAS,CAAC,GAAG,KAAK,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAE/D,UAAI,cAAc,WAAW;AAC3B,mBAAW,KAAK,QAAQ;AACtB,2BAAiB,GAAG,KAAK,WAAW;AAAA,YAClC,WAAW;AAAA,YACX,iBAAiB;AAAA,UACnB,CAAC;AAAA,QACH;AAAA,MACF,OAAO;AACL,cAAM,UAAU,OAAO;AAAA,UACrB,CAAC,MAAM,cAAc,GAAG,KAAK,SAAS,MAAM;AAAA,QAC9C;AACA,YAAI,QAAQ,SAAS,GAAG;AACtB,kBAAQ,IAAI,MAAM,IAAI,oBAAoB,QAAQ,MAAM,IAAI,CAAC;AAC7D,kBAAQ,IAAI,EAAE;AACd,qBAAW,KAAK,SAAS;AACvB,6BAAiB,GAAG,KAAK,WAAW;AAAA,cAClC,WAAW;AAAA,cACX,iBAAiB;AAAA,YACnB,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,YAAY,MAAc,QAAwB;AACzD,QAAM,MAAM,IAAI,OAAO,MAAM;AAC7B,SAAO,KACJ,QAAQ,SAAS,IAAI,EACrB,MAAM,IAAI,EACV,IAAI,CAAC,SAAS,MAAM,IAAI,EACxB,KAAK,IAAI;AACd;AAEA,SAAS,eAAe,MAAc,WAA2B;AAC/D,MAAI,KAAK,UAAU,UAAW,QAAO;AACrC,SAAO,KAAK,MAAM,GAAG,SAAS,IAAI;AACpC;AAEA,SAAS,mBAAmB,MAAwB;AAClD,QAAM,EAAE,QAAQ,IAAI;AACpB,QAAM,WAAW,OAAO,QAAQ,QAAQ,UAAU,EAC/C,IAAI,CAAC,CAAC,MAAM,KAAK,MAAM,GAAG,IAAI,KAAK,MAAM,QAAQ,CAAC,CAAC,EAAE,EACrD,KAAK,IAAI;AAEZ,UAAQ,IAAI,EAAE;AACd,UAAQ,IAAI,MAAM,KAAK,WAAW,CAAC;AACnC,UAAQ,IAAI,MAAM,IAAI,OAAO,SAAI,OAAO,EAAE,CAAC,CAAC;AAC5C,UAAQ,IAAI,KAAK,MAAM,IAAI,OAAO,CAAC,QAAQ,KAAK,IAAI,EAAE;AACtD,UAAQ,IAAI,KAAK,MAAM,IAAI,QAAQ,CAAC,OAAO,KAAK,KAAK,EAAE;AACvD,UAAQ,IAAI,KAAK,MAAM,IAAI,QAAQ,CAAC,OAAO,QAAQ,UAAU,EAAE;AAC/D,UAAQ;AAAA,IACN,KAAK,MAAM,IAAI,YAAY,CAAC,IAAI,MAAM,MAAM,OAAO,QAAQ,SAAS,CAAC,CAAC,MAAM,MAAM,IAAI,OAAO,QAAQ,SAAS,CAAC,CAAC;AAAA,EAClH;AACA,UAAQ,IAAI,KAAK,MAAM,IAAI,SAAS,CAAC,MAAM,QAAQ,EAAE;AACrD,UAAQ;AAAA,IACN,KAAK,MAAM,IAAI,WAAW,CAAC,IAAI,eAAe,QAAQ,cAAc,CAAC;AAAA,EACvE;AACA,UAAQ;AAAA,IACN,KAAK,MAAM,IAAI,SAAS,CAAC,MAAM,aAAa,QAAQ,gBAAgB,QAAQ,cAAc,CAAC;AAAA,EAC7F;AACA,UAAQ,IAAI,MAAM,IAAI,OAAO,SAAI,OAAO,EAAE,CAAC,CAAC;AAC5C,UAAQ,IAAI,EAAE;AAChB;AAEA,SAAS,iBACP,GACA,WACA,SAIM;AACN,QAAM,UAAU,OAAO,QAAQ,EAAE,MAAM;AACvC,QAAM,SAAS,QAAQ,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,EAAE,QAAQ,SAAS;AAC1D,QAAM,SAAS,SAAS,MAAM,IAAI,MAAM,IAAI,MAAM,MAAM,MAAM;AAC9D,QAAM,YAAY,SAAS,aAAa;AACxC,QAAM,kBAAkB,SAAS,mBAAmB;AAEpD,UAAQ,IAAI,KAAK,MAAM,IAAI,MAAM,IAAI,SAAS,EAAE,KAAK,EAAE,CAAC,EAAE;AAC1D,QAAM,WAAW,iBAAiB,EAAE,OAAO;AAAA,IACzC,OAAO;AAAA,IACP,UAAU,OAAO,EAAE,KAAK;AAAA,EAC1B,CAAC;AACD,UAAQ,IAAI,OAAO,MAAM,IAAI,QAAQ,CAAC,KAAK,QAAQ,EAAE;AAErD,MAAI,WAAW;AACb,YAAQ,IAAI,OAAO,MAAM,IAAI,SAAS,CAAC,EAAE;AACzC,YAAQ,IAAI,YAAY,eAAe,EAAE,QAAQ,eAAe,GAAG,CAAC,CAAC;AACrE,YAAQ,IAAI,OAAO,MAAM,IAAI,WAAW,CAAC,EAAE;AAC3C,UAAM,iBAAiB,iBAAiB,EAAE,UAAU;AAAA,MAClD,OAAO;AAAA,MACP,UAAU,OAAO,EAAE,QAAQ;AAAA,IAC7B,CAAC;AACD,YAAQ;AAAA,MACN,YAAY,eAAe,gBAAgB,eAAe,GAAG,CAAC;AAAA,IAChE;AAAA,EACF;AAEA,aAAW,CAAC,MAAM,CAAC,KAAK,SAAS;AAC/B,UAAM,aAAa,EAAE,SAAS,YAAY,MAAM,QAAQ,MAAM;AAC9D,UAAM,YAAY,EAAE,SAAS,WAAM,EAAE,MAAM,KAAK;AAChD,YAAQ;AAAA,MACN,OAAO,MAAM,IAAI,OAAO,GAAG,CAAC,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC,CAAC,CAAC,GAAG,SAAS;AAAA,IAC5E;AAAA,EACF;AAEA,MAAI,EAAE,OAAO;AACX,YAAQ,IAAI,OAAO,MAAM,IAAI,QAAQ,CAAC,EAAE;AACxC,UAAM,WAAW,iBAAiB,EAAE,KAAK;AACzC,YAAQ,IAAI,SAAS,MAAM,IAAI,QAAQ,CAAC,EAAE;AAAA,EAC5C;AAEA,UAAQ,IAAI,EAAE;AAChB;;;AGlKA,SAAS,YAAY,SAAAA,cAAa;AAe3B,SAAS,aAAa,SAAyC;AACpE,QAAM,YAAY,iBAAiB,SAAS,SAAS;AACrD,QAAM,SAAS,SAAS,UAAU;AAClC,MAAI,iBAAiB;AAErB,SAAO;AAAA,IACL,MAAM,WAAW,MAAM;AACrB,YAAMC,OAAM,WAAW,EAAE,WAAW,KAAK,CAAC;AAC1C,uBAAiB,cAAc,WAAW,KAAK,MAAM,KAAK,OAAO,OAAO;AAAA,IAC1E;AAAA,IACA,MAAM,UAAU,MAAM;AACpB,YAAM,OAAO,iBAAiB,MAAM,EAAE,OAAO,GAAG,UAAU,OAAO,CAAC;AAClE,YAAM,WAAW,gBAAgB,OAAO,MAAM,OAAO;AAAA,IACvD;AAAA,IACA,MAAM,SAAS,MAAM;AACnB,YAAM,UAAU,iBAAiB,MAAM;AAAA,QACrC,OAAO,SAAS,IAAI;AAAA,QACpB,UAAU;AAAA,MACZ,CAAC;AACD,YAAM;AAAA,QACJ;AAAA,QACA,KAAK;AAAA,QACL,KAAK;AAAA,QACL;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;;;ACnCO,SAAS,YAAY,SAAwC;AAClE,SAAO,yBAAyB;AAAA,IAC9B,WAAW,SAAS;AAAA,IACpB,KAAK;AAAA,IACL,OAAO,MAAM;AACX,YAAM,cAAc,OAAO,KAAK,KAAK,QAAQ,UAAU;AAEvD,YAAM,cAAc;AAAA,QAClB;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA,iBAAW,QAAQ,aAAa;AAC9B,oBAAY,KAAK,GAAG,IAAI,UAAU,GAAG,IAAI,SAAS;AAAA,MACpD;AAEA,YAAM,OAAO,CAAC,YAAY,KAAK,GAAG,CAAC;AAEnC,iBAAW,KAAK,KAAK,OAAO;AAC1B,cAAM,QAAQ;AAAA,UACZ,OAAO,EAAE,KAAK;AAAA,UACd,UAAU,EAAE,KAAK;AAAA,UACjB,UAAU,EAAE,MAAM;AAAA,UAClB,UAAU,EAAE,QAAQ;AAAA,UACpB,UAAU,EAAE,SAAS,EAAE;AAAA,UACvB,OAAO,EAAE,SAAS;AAAA,UAClB,OAAO,EAAE,QAAQ;AAAA,UACjB,OAAO,EAAE,SAAS;AAAA,QACpB;AACA,mBAAW,QAAQ,aAAa;AAC9B,gBAAM,IAAI,EAAE,OAAO,IAAI;AACvB,gBAAM,KAAK,OAAO,GAAG,SAAS,EAAE,GAAG,UAAU,GAAG,UAAU,EAAE,CAAC;AAAA,QAC/D;AACA,aAAK,KAAK,MAAM,KAAK,GAAG,CAAC;AAAA,MAC3B;AAEA,aAAO,KAAK,KAAK,IAAI,IAAI;AAAA,IAC3B;AAAA,EACF,CAAC;AACH;;;ACvCO,SAAS,iBAAiB,SAA6C;AAC5E,SAAO,yBAAyB;AAAA,IAC9B,WAAW,SAAS;AAAA,IACpB,KAAK;AAAA,IACL,OAAO,MAAM;AACX,YAAM,EAAE,QAAQ,IAAI;AACpB,YAAM,cAAc,OAAO,KAAK,QAAQ,UAAU;AAClD,YAAM,QAAkB,CAAC;AAEzB,YAAM,KAAK,KAAK,KAAK,IAAI,EAAE;AAC3B,YAAM,KAAK,EAAE;AACb,YAAM,KAAK,cAAc,KAAK,KAAK,EAAE;AACrC,YAAM;AAAA,QACJ,cAAc,QAAQ,UAAU,KAAK,QAAQ,SAAS,UAAU,QAAQ,SAAS;AAAA,MACnF;AACA,YAAM,KAAK,iBAAiB,eAAe,QAAQ,cAAc,CAAC,EAAE;AACpE,YAAM;AAAA,QACJ,eAAe,aAAa,QAAQ,gBAAgB,QAAQ,cAAc,CAAC;AAAA,MAC7E;AACA,YAAM,KAAK,EAAE;AAEb,YAAM,KAAK,WAAW;AACtB,YAAM,KAAK,EAAE;AACb,YAAM,KAAK,mBAAmB;AAC9B,YAAM,KAAK,mBAAmB;AAC9B,iBAAW,CAAC,MAAM,KAAK,KAAK,OAAO,QAAQ,QAAQ,UAAU,GAAG;AAC9D,cAAM,KAAK,KAAK,IAAI,MAAM,MAAM,QAAQ,CAAC,CAAC,IAAI;AAAA,MAChD;AACA,YAAM,KAAK,EAAE;AAEb,YAAM,KAAK,UAAU;AACrB,YAAM,KAAK,EAAE;AAEb,YAAM,aAAa;AAAA,QACjB;AAAA,QACA;AAAA,QACA;AAAA,QACA,GAAG;AAAA,QACH;AAAA,QACA;AAAA,MACF;AACA,YAAM,KAAK,KAAK,WAAW,KAAK,KAAK,CAAC,IAAI;AAC1C,YAAM,KAAK,KAAK,WAAW,IAAI,MAAM,KAAK,EAAE,KAAK,KAAK,CAAC,IAAI;AAE3D,iBAAW,KAAK,KAAK,OAAO;AAC1B,cAAM,cAAc,cAAc,GAAG,KAAK,SAAS;AACnD,cAAM,SACJ,gBAAgB,UACZ,oBACA,gBAAgB,SACd,gBACA;AACR,cAAM,QAAQ,iBAAiB,EAAE,KAAK,EAAE,MAAM,GAAG,EAAE;AACnD,cAAM,SAAS,YAAY;AAAA,UACzB,CAAC,SAAS,EAAE,OAAO,IAAI,GAAG,MAAM,QAAQ,CAAC,KAAK;AAAA,QAChD;AACA,cAAM,QAAQ,EAAE,QACZ,iBAAiB,EAAE,KAAK,EACrB,QAAQ,UAAU,MAAM,EACxB,QAAQ,OAAO,KAAK,IACvB;AACJ,cAAM,MAAM;AAAA,UACV,OAAO,EAAE,KAAK;AAAA,UACd;AAAA,UACA;AAAA,UACA,GAAG;AAAA,UACH,GAAG,EAAE,SAAS;AAAA,UACd;AAAA,QACF;AACA,cAAM,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC,IAAI;AAAA,MACrC;AACA,YAAM,KAAK,EAAE;AAEb,aAAO,MAAM,KAAK,IAAI;AAAA,IACxB;AAAA,EACF,CAAC;AACH;;;AC5EO,SAAS,aAAa,SAAyC;AACpE,SAAO,yBAAyB;AAAA,IAC9B,WAAW,SAAS;AAAA,IACpB,KAAK;AAAA,IACL,QAAQ;AAAA,EACV,CAAC;AACH;AAEA,SAAS,IAAI,KAAqB;AAChC,SAAO,IACJ,QAAQ,MAAM,OAAO,EACrB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,QAAQ;AAC3B;AAEA,SAAS,WAAW,MAA0B;AAC5C,QAAM,EAAE,QAAQ,IAAI;AACpB,QAAM,cAAc,OAAO,KAAK,QAAQ,UAAU;AAElD,QAAM,WAAW,KAAK,MACnB,IAAI,CAAC,MAAM;AACV,UAAM,SAAS,cAAc,GAAG,KAAK,SAAS;AAC9C,UAAM,cACJ,WAAW,UAAU,UAAU,WAAW,SAAS,SAAS;AAC9D,UAAM,cAAc,YACjB,IAAI,CAAC,SAAS;AACb,YAAM,IAAI,EAAE,OAAO,IAAI;AACvB,YAAM,QAAQ,GAAG,SAAS;AAC1B,YAAM,MAAM,SAAS,KAAK,YAAY,SAAS;AAC/C,YAAM,SAAS,GAAG,SAAS,WAAW,IAAI,EAAE,MAAM,CAAC,MAAM;AACzD,aAAO,cAAc,GAAG,IAAI,MAAM,IAAI,MAAM,QAAQ,CAAC,CAAC;AAAA,IACxD,CAAC,EACA,KAAK,EAAE;AAEV,WAAO,cAAc,MAAM;AAAA,cACnB,EAAE,KAAK;AAAA,qBACA,MAAM,KAAK,WAAW;AAAA,2BAChB,IAAI,iBAAiB,EAAE,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC,CAAC;AAAA,2BAC5C,IAAI,EAAE,OAAO,MAAM,GAAG,GAAG,CAAC,CAAC;AAAA,UAC5C,WAAW;AAAA,cACP,EAAE,SAAS;AAAA,iCACQ,EAAE,QAAQ,IAAI,iBAAiB,EAAE,KAAK,CAAC,IAAI,EAAE;AAAA;AAAA,EAE1E,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,gBAAgB,YAAY,IAAI,CAAC,MAAM,OAAO,IAAI,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE;AAC1E,QAAM,gBAAgB,OAAO,QAAQ,QAAQ,UAAU,EACpD;AAAA,IACC,CAAC,CAAC,MAAM,KAAK,MACX,WAAW,IAAI,IAAI,CAAC,YAAY,MAAM,QAAQ,CAAC,CAAC;AAAA,EACpD,EACC,KAAK,EAAE;AAEV,SAAO;AAAA;AAAA;AAAA;AAAA,SAIA,IAAI,KAAK,IAAI,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAuBf,IAAI,KAAK,IAAI,CAAC;AAAA;AAAA,oCAEc,IAAI,KAAK,KAAK,CAAC;AAAA,oCACf,QAAQ,UAAU;AAAA,mCACnB,QAAQ,SAAS;AAAA,mCACjB,QAAQ,SAAS;AAAA,uCACb,eAAe,QAAQ,cAAc,CAAC;AAAA,qCACxC,aAAa,QAAQ,gBAAgB,QAAQ,cAAc,CAAC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aAMpF,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,UAWhB,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,QAMf,QAAQ;AAAA;AAAA;AAAA;AAAA;AAKhB;",
6
+ "names": ["mkdir", "mkdir"]
7
+ }
@@ -0,0 +1,7 @@
1
+ import type { Reporter } from './types.ts';
2
+ export interface JsonReporterOptions {
3
+ outputDir?: string;
4
+ pretty?: boolean;
5
+ }
6
+ export declare function jsonReporter(options?: JsonReporterOptions): Reporter;
7
+ //# sourceMappingURL=json.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/reporters/json.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE3C,MAAM,WAAW,mBAAmB;IAClC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,wBAAgB,YAAY,CAAC,OAAO,CAAC,EAAE,mBAAmB,GAAG,QAAQ,CA4BpE"}
@@ -0,0 +1,6 @@
1
+ import type { Reporter } from './types.ts';
2
+ export interface MarkdownReporterOptions {
3
+ outputDir?: string;
4
+ }
5
+ export declare function markdownReporter(options?: MarkdownReporterOptions): Reporter;
6
+ //# sourceMappingURL=markdown.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/reporters/markdown.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE3C,MAAM,WAAW,uBAAuB;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,gBAAgB,CAAC,OAAO,CAAC,EAAE,uBAAuB,GAAG,QAAQ,CA4E5E"}
@@ -0,0 +1,11 @@
1
+ import type { CaseResult, Reporter, RunEndData } from './types.ts';
2
+ export declare function resolveOutputDir(outputDir?: string): string;
3
+ export declare function getReportPath(outputDir: string, name: string, runId: string, ext: string): string;
4
+ export declare function writeRunReportFile(outputDir: string, name: string, runId: string, ext: string, content: string): Promise<void>;
5
+ export declare function getCaseStatus(result: CaseResult, threshold: number): 'error' | 'pass' | 'fail';
6
+ export declare function createRunEndFileReporter(options: {
7
+ outputDir?: string;
8
+ ext: string;
9
+ render: (data: RunEndData) => string | Promise<string>;
10
+ }): Reporter;
11
+ //# sourceMappingURL=shared.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../src/reporters/shared.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAInE,wBAAgB,gBAAgB,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,CAE3D;AAED,wBAAgB,aAAa,CAC3B,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,GAAG,EAAE,MAAM,GACV,MAAM,CAER;AAED,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,IAAI,CAAC,CAGf;AAED,wBAAgB,aAAa,CAC3B,MAAM,EAAE,UAAU,EAClB,SAAS,EAAE,MAAM,GAChB,OAAO,GAAG,MAAM,GAAG,MAAM,CAM3B;AAED,wBAAgB,wBAAwB,CAAC,OAAO,EAAE;IAChD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,CAAC,IAAI,EAAE,UAAU,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACxD,GAAG,QAAQ,CAeX"}
@@ -0,0 +1,35 @@
1
+ import type { ScorerResult } from '../scorers/index.ts';
2
+ import type { RunSummary } from '../store/index.ts';
3
+ export type Verbosity = 'quiet' | 'normal' | 'verbose';
4
+ export interface RunStartData {
5
+ runId: string;
6
+ name: string;
7
+ model: string;
8
+ totalCases: number;
9
+ }
10
+ export interface CaseResult {
11
+ runId: string;
12
+ index: number;
13
+ input: unknown;
14
+ output: string;
15
+ expected: unknown;
16
+ scores: Record<string, ScorerResult>;
17
+ error: unknown;
18
+ latencyMs: number;
19
+ tokensIn: number;
20
+ tokensOut: number;
21
+ }
22
+ export interface RunEndData {
23
+ runId: string;
24
+ name: string;
25
+ model: string;
26
+ summary: RunSummary;
27
+ cases: CaseResult[];
28
+ threshold: number;
29
+ }
30
+ export interface Reporter {
31
+ onRunStart?(data: RunStartData): void;
32
+ onCaseEnd?(data: CaseResult): void;
33
+ onRunEnd?(data: RunEndData): void | Promise<void>;
34
+ }
35
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/reporters/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACxD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,QAAQ,GAAG,SAAS,CAAC;AAEvD,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,OAAO,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IACrC,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,UAAU,CAAC;IACpB,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,QAAQ;IACvB,UAAU,CAAC,CAAC,IAAI,EAAE,YAAY,GAAG,IAAI,CAAC;IACtC,SAAS,CAAC,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,CAAC;IACnC,QAAQ,CAAC,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACnD"}
@@ -0,0 +1,30 @@
1
+ import type { LanguageModelV3 } from '@ai-sdk/provider';
2
+ export interface ScorerArgs {
3
+ input: unknown;
4
+ output: string;
5
+ expected?: unknown;
6
+ }
7
+ export interface ScorerResult {
8
+ score: number;
9
+ reason?: string;
10
+ }
11
+ export type Scorer = (args: ScorerArgs) => Promise<ScorerResult>;
12
+ export declare const exactMatch: Scorer;
13
+ export declare const includes: Scorer;
14
+ export declare function regex(pattern: RegExp): Scorer;
15
+ export declare const levenshtein: Scorer;
16
+ export declare const jsonMatch: Scorer;
17
+ export declare function llmJudge(config: {
18
+ model: LanguageModelV3;
19
+ criteria: string;
20
+ }): Scorer;
21
+ export declare function factuality(config: {
22
+ model: LanguageModelV3;
23
+ }): Scorer;
24
+ export declare function all(...scorers: Scorer[]): Scorer;
25
+ export declare function any(...scorers: Scorer[]): Scorer;
26
+ export declare function weighted(config: Record<string, {
27
+ scorer: Scorer;
28
+ weight: number;
29
+ }>): Scorer;
30
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/scorers/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAIxD,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,MAAM,MAAM,GAAG,CAAC,IAAI,EAAE,UAAU,KAAK,OAAO,CAAC,YAAY,CAAC,CAAC;AAEjE,eAAO,MAAM,UAAU,EAAE,MAOxB,CAAC;AAEF,eAAO,MAAM,QAAQ,EAAE,MAOtB,CAAC;AAEF,wBAAgB,KAAK,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAI7C;AAuBD,eAAO,MAAM,WAAW,EAAE,MAWzB,CAAC;AA6BF,eAAO,MAAM,SAAS,EAAE,MAUvB,CAAC;AAOF,wBAAgB,QAAQ,CAAC,MAAM,EAAE;IAC/B,KAAK,EAAE,eAAe,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;CAClB,GAAG,MAAM,CAgBT;AAED,wBAAgB,UAAU,CAAC,MAAM,EAAE;IAAE,KAAK,EAAE,eAAe,CAAA;CAAE,GAAG,MAAM,CAgBrE;AAED,wBAAgB,GAAG,CAAC,GAAG,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,CAahD;AAED,wBAAgB,GAAG,CAAC,GAAG,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,CAShD;AAED,wBAAgB,QAAQ,CACtB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,GACzD,MAAM,CAqBR"}