@lobu/cli 3.0.7 → 3.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/credentials.d.ts +2 -1
- package/dist/api/credentials.d.ts.map +1 -1
- package/dist/api/credentials.js +30 -4
- package/dist/api/credentials.js.map +1 -1
- package/dist/commands/chat.d.ts +1 -0
- package/dist/commands/chat.d.ts.map +1 -1
- package/dist/commands/chat.js +16 -3
- package/dist/commands/chat.js.map +1 -1
- package/dist/commands/eval.d.ts +10 -0
- package/dist/commands/eval.d.ts.map +1 -0
- package/dist/commands/eval.js +194 -0
- package/dist/commands/eval.js.map +1 -0
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +24 -2
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/login.d.ts +1 -0
- package/dist/commands/login.d.ts.map +1 -1
- package/dist/commands/login.js +51 -50
- package/dist/commands/login.js.map +1 -1
- package/dist/commands/logout.d.ts.map +1 -1
- package/dist/commands/logout.js +15 -1
- package/dist/commands/logout.js.map +1 -1
- package/dist/eval/client.d.ts +42 -0
- package/dist/eval/client.d.ts.map +1 -0
- package/dist/eval/client.js +166 -0
- package/dist/eval/client.js.map +1 -0
- package/dist/eval/grader.d.ts +14 -0
- package/dist/eval/grader.d.ts.map +1 -0
- package/dist/eval/grader.js +177 -0
- package/dist/eval/grader.js.map +1 -0
- package/dist/eval/reporter.d.ts +8 -0
- package/dist/eval/reporter.d.ts.map +1 -0
- package/dist/eval/reporter.js +242 -0
- package/dist/eval/reporter.js.map +1 -0
- package/dist/eval/runner.d.ts +11 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +172 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/types.d.ts +243 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/eval/types.js +31 -0
- package/dist/eval/types.js.map +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +17 -0
- package/dist/index.js.map +1 -1
- package/dist/templates/.gitignore.tmpl +4 -0
- package/package.json +2 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grader.js","sourceRoot":"","sources":["../../src/eval/grader.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,aAAa,EACb,aAAa,EACb,cAAc,GAEf,MAAM,aAAa,CAAC;AAGrB,MAAM,mBAAmB,GAAG;;;;;;;;;;;;;;;aAef,CAAC;AAEd,MAAM,mBAAmB,GAAG;;;;;;;;;;;;;;eAcb,CAAC;AAEhB,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,UAAkB,EAClB,SAAiB,EACjB,aAAqB,EACrB,KAAmB,EACnB,SAAiB;IAEjB,MAAM,UAAU,GAAG,KAAK;SACrB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,YAAY,CAAC,CAAC,KAAK,EAAE,CAAC;SAChD,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,CACxC,YAAY,EACZ,aAAa,CACd,CAAC,OAAO,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAC;IAExC,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,UAAU,EAAE,SAAS,EAAE;QACzD,QAAQ,EAAE,IAAI;QACd,MAAM,EAAE,IAAI;KACb,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QAClE,OAAO,mBAAmB,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;YAAS,CAAC;QACT,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,UAAkB,EAClB,SAAiB,EACjB,QAAgB,EAChB,aAAqB,EACrB,SAAiB;IAEjB,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC,OAAO,CAC1E,cAAc,EACd,aAAa,CACd,CAAC;IAEF,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,UAAU,EAAE,SAAS,EAAE;QACzD,QAAQ,EAAE,IAAI;QACd,MAAM,EAAE,IAAI;KACb,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QAClE,OAAO,mBAAmB,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC;YAAS,CAAC;QACT,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,QAA2B;IACtD,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO;YACL,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC;SAC1E,CAAC;IACJ,CAAC;IAED,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAG7B,CAAC;QACF,OAAO;YACL,KAAK,EAAE,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1D,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC;gBACtC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC1B,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;oBAC1B,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;oBACzB,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC;iBACzC,CAAC,CAAC;gBACL,CAAC,CAAC,EAAE;SACP,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,6CAA6C;QAC7C,OAAO,mBAAmB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,QAA2B;IAKtD,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC;IAC7D,CAAC;IAED,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAI7B,CAAC;QACF,OAAO;YACL,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC;YAC9B,KAAK,EACH,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACzE,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC;SACpC,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,uCAAuC;QACvC,OAAO,mBAAmB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC;AAED,oFAAoF;AACpF,SAAS,WAAW,CAAC,IAAY;IAC/B,0CAA0C;IAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;IAChE,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC;QAAE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAEzC,8BAA8B;IAC9B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC7C,IAAI,UAAU;QAAE,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC;IAErC,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACrB,CAAC;AAED,oEAAoE;AACpE,SAAS,mBAAmB,CAAC,IAAY;IAKvC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACjC,MAAM,eAAe,GAAG;QACtB,KAAK;QACL,MAAM;QACN,OAAO;QACP,WAAW;QACX,SAAS;QACT,aAAa;QACb,MAAM;QACN,MAAM;KACP,CAAC;IACF,MAAM,eAAe,GAAG;QACtB,IAAI;QACJ,MAAM;QACN,UAAU;QACV,SAAS;QACT,WAAW;QACX,SAAS;QACT,OAAO;QACP,MAAM;KACP,CAAC;IAEF,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACzE,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAEzE,MAAM,MAAM,GAAG,QAAQ,GAAG,QAAQ,CAAC;IACnC,OAAO;QACL,MAAM;QACN,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;QACzB,MAAM,EAAE,4BAA4B,QAAQ,SAAS,QAAQ,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;KACxF,CAAC;AACJ,CAAC;AAED,wEAAwE;AACxE,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACjC,MAAM,eAAe,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAC1E,MAAM,eAAe,GAAG,CAAC,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IAE3E,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACzE,MAAM,QAAQ,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACzE,MAAM,MAAM,GAAG,QAAQ,GAAG,QAAQ,CAAC;IAEnC,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;QACzB,QAAQ,EAAE;YACR;gBACE,IAAI,EAAE,SAAS;gBACf,MAAM;gBACN,WAAW,EAAE,wBAAwB,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;aAC1D;SACF;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { EvalReport } from "./types.js";
|
|
2
|
+
export declare function printReport(report: EvalReport): void;
|
|
3
|
+
export declare function saveResult(evalsDir: string, report: EvalReport): Promise<string>;
|
|
4
|
+
export declare function writeJsonReport(report: EvalReport, outputPath: string): Promise<void>;
|
|
5
|
+
export declare function loadSavedResults(evalsDir: string): Promise<EvalReport[]>;
|
|
6
|
+
export declare function generateComparisonReport(evalsDir: string, currentReport?: EvalReport): Promise<string>;
|
|
7
|
+
export declare function writeMarkdownReport(evalsDir: string, currentReport?: EvalReport): Promise<string>;
|
|
8
|
+
//# sourceMappingURL=reporter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reporter.d.ts","sourceRoot":"","sources":["../../src/eval/reporter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAA2B,MAAM,YAAY,CAAC;AAItE,wBAAgB,WAAW,CAAC,MAAM,EAAE,UAAU,GAAG,IAAI,CAuBpD;AA+DD,wBAAsB,UAAU,CAC9B,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,UAAU,GACjB,OAAO,CAAC,MAAM,CAAC,CAejB;AAID,wBAAsB,eAAe,CACnC,MAAM,EAAE,UAAU,EAClB,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,IAAI,CAAC,CAGf;AAID,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,UAAU,EAAE,CAAC,CAkBvB;AAiBD,wBAAsB,wBAAwB,CAC5C,QAAQ,EAAE,MAAM,EAChB,aAAa,CAAC,EAAE,UAAU,GACzB,OAAO,CAAC,MAAM,CAAC,CA0IjB;AAED,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,MAAM,EAChB,aAAa,CAAC,EAAE,UAAU,GACzB,OAAO,CAAC,MAAM,CAAC,CAMjB"}
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import { readFile, readdir, writeFile, mkdir } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
// ─── Console reporter ──────────────────────────────────────────────────
|
|
5
|
+
export function printReport(report) {
|
|
6
|
+
console.log(chalk.bold(`\nAgent: ${report.agent}`));
|
|
7
|
+
console.log(chalk.dim(`Model: ${report.provider}/${report.model}`));
|
|
8
|
+
console.log(chalk.dim(`Evals: ${report.evals.length} total\n`));
|
|
9
|
+
for (const evalResult of report.evals) {
|
|
10
|
+
printEval(evalResult);
|
|
11
|
+
}
|
|
12
|
+
const { passed, failed, total } = report.summary;
|
|
13
|
+
const summaryColor = failed === 0 ? chalk.green : chalk.red;
|
|
14
|
+
console.log(summaryColor(`\nSummary: ${passed}/${total} evals passed`));
|
|
15
|
+
if (failed > 0) {
|
|
16
|
+
console.log(chalk.red(` Failed: ${report.evals
|
|
17
|
+
.filter((e) => e.passRate < 1 - 0.001)
|
|
18
|
+
.map((e) => e.name)
|
|
19
|
+
.join(", ")}`));
|
|
20
|
+
}
|
|
21
|
+
console.log();
|
|
22
|
+
}
|
|
23
|
+
function printEval(result) {
|
|
24
|
+
const trialCount = result.trials.length;
|
|
25
|
+
const passedCount = result.trials.filter((t) => t.passed).length;
|
|
26
|
+
console.log(chalk.bold(`${result.name} (${trialCount} trials)`));
|
|
27
|
+
for (const trial of result.trials) {
|
|
28
|
+
printTrial(trial);
|
|
29
|
+
}
|
|
30
|
+
const statusColor = result.passRate >= 0.8 ? chalk.green : chalk.red;
|
|
31
|
+
const status = result.passRate >= 0.8 ? "PASS" : "FAIL";
|
|
32
|
+
const tokenInfo = result.totalTokens.totalTokens
|
|
33
|
+
? ` tokens=${result.totalTokens.totalTokens}`
|
|
34
|
+
: "";
|
|
35
|
+
console.log(statusColor(` ${status} ${passedCount}/${trialCount} avg=${result.avgScore.toFixed(2)} p50=${result.p50LatencyMs}ms${tokenInfo}`));
|
|
36
|
+
console.log();
|
|
37
|
+
}
|
|
38
|
+
function printTrial(trial) {
|
|
39
|
+
const icon = trial.passed ? chalk.green("✓") : chalk.red("✗");
|
|
40
|
+
const latency = chalk.dim(`(${(trial.durationMs / 1000).toFixed(1)}s)`);
|
|
41
|
+
console.log(` ${icon} Trial ${trial.trial}: ${trial.score.toFixed(2)} ${latency}`);
|
|
42
|
+
for (const turn of trial.turns) {
|
|
43
|
+
for (const assertion of turn.assertions) {
|
|
44
|
+
if (!assertion.passed) {
|
|
45
|
+
console.log(chalk.red(` └ ${assertion.type}: ${assertion.reason ?? "FAIL"}`));
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (trial.rubric) {
|
|
50
|
+
for (const criterion of trial.rubric.criteria) {
|
|
51
|
+
const cIcon = criterion.passed ? chalk.green("✓") : chalk.red("✗");
|
|
52
|
+
console.log(` ${cIcon} ${criterion.name}`);
|
|
53
|
+
if (!criterion.passed) {
|
|
54
|
+
console.log(chalk.red(` └ ${criterion.explanation}`));
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Show trace IDs for failed trials (for debugging in Grafana/Tempo)
|
|
59
|
+
if (!trial.passed) {
|
|
60
|
+
const traceIds = trial.turns.map((t) => t.traceId).filter(Boolean);
|
|
61
|
+
if (traceIds.length > 0) {
|
|
62
|
+
console.log(chalk.dim(` traces: ${traceIds.join(", ")}`));
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// ─── Auto-save results ─────────────────────────────────────────────────
|
|
67
|
+
export async function saveResult(evalsDir, report) {
|
|
68
|
+
const resultsDir = join(evalsDir, ".results");
|
|
69
|
+
await mkdir(resultsDir, { recursive: true });
|
|
70
|
+
const slug = `${report.provider}-${report.model}`.replace(/[^a-z0-9-]/gi, "-");
|
|
71
|
+
const ts = report.timestamp.replace(/[:.]/g, "-").slice(0, 19);
|
|
72
|
+
const filename = `${slug}_${ts}.json`;
|
|
73
|
+
const filepath = join(resultsDir, filename);
|
|
74
|
+
await writeFile(filepath, JSON.stringify(report, null, 2));
|
|
75
|
+
console.log(chalk.dim(`Results saved to ${filepath}`));
|
|
76
|
+
return filepath;
|
|
77
|
+
}
|
|
78
|
+
// ─── JSON file output ──────────────────────────────────────────────────
|
|
79
|
+
export async function writeJsonReport(report, outputPath) {
|
|
80
|
+
await writeFile(outputPath, JSON.stringify(report, null, 2));
|
|
81
|
+
console.log(chalk.dim(`Results written to ${outputPath}`));
|
|
82
|
+
}
|
|
83
|
+
// ─── Markdown comparison report ────────────────────────────────────────
|
|
84
|
+
export async function loadSavedResults(evalsDir) {
|
|
85
|
+
const resultsDir = join(evalsDir, ".results");
|
|
86
|
+
try {
|
|
87
|
+
const files = await readdir(resultsDir);
|
|
88
|
+
const jsonFiles = files
|
|
89
|
+
.filter((f) => f.endsWith(".json"))
|
|
90
|
+
.sort()
|
|
91
|
+
.reverse(); // newest first
|
|
92
|
+
const reports = [];
|
|
93
|
+
for (const file of jsonFiles) {
|
|
94
|
+
const content = await readFile(join(resultsDir, file), "utf-8");
|
|
95
|
+
reports.push(JSON.parse(content));
|
|
96
|
+
}
|
|
97
|
+
return reports;
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
return [];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Group saved results by model and pick the latest run per model.
|
|
105
|
+
*/
|
|
106
|
+
function latestPerModel(reports) {
|
|
107
|
+
const byModel = new Map();
|
|
108
|
+
for (const report of reports) {
|
|
109
|
+
const key = `${report.provider}/${report.model}`;
|
|
110
|
+
const existing = byModel.get(key);
|
|
111
|
+
if (!existing || report.timestamp > existing.timestamp) {
|
|
112
|
+
byModel.set(key, report);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return Array.from(byModel.values());
|
|
116
|
+
}
|
|
117
|
+
export async function generateComparisonReport(evalsDir, currentReport) {
|
|
118
|
+
const allReports = await loadSavedResults(evalsDir);
|
|
119
|
+
if (currentReport)
|
|
120
|
+
allReports.unshift(currentReport);
|
|
121
|
+
const models = latestPerModel(allReports);
|
|
122
|
+
if (models.length === 0) {
|
|
123
|
+
return "No eval results found.";
|
|
124
|
+
}
|
|
125
|
+
// Collect all eval names across all models
|
|
126
|
+
const evalNames = [
|
|
127
|
+
...new Set(models.flatMap((m) => m.evals.map((e) => e.name))),
|
|
128
|
+
].sort();
|
|
129
|
+
let md = "# Eval Report\n\n";
|
|
130
|
+
md += `Generated: ${new Date().toISOString()}\n`;
|
|
131
|
+
md += `Agent: ${models[0]?.agent ?? "unknown"}\n\n`;
|
|
132
|
+
// ─── Summary table ──────────────────────────────────────────────
|
|
133
|
+
md += "## Model Comparison\n\n";
|
|
134
|
+
md += `| Eval | ${models.map((m) => `${m.provider}/${m.model}`).join(" | ")} |\n`;
|
|
135
|
+
md += `| --- | ${models.map(() => "---").join(" | ")} |\n`;
|
|
136
|
+
for (const evalName of evalNames) {
|
|
137
|
+
const cells = models.map((m) => {
|
|
138
|
+
const evalResult = m.evals.find((e) => e.name === evalName);
|
|
139
|
+
if (!evalResult)
|
|
140
|
+
return "-";
|
|
141
|
+
const icon = evalResult.passRate >= 0.8 ? "PASS" : "FAIL";
|
|
142
|
+
return `${icon} ${evalResult.avgScore.toFixed(2)} (${Math.round(evalResult.passRate * 100)}%)`;
|
|
143
|
+
});
|
|
144
|
+
md += `| ${evalName} | ${cells.join(" | ")} |\n`;
|
|
145
|
+
}
|
|
146
|
+
// ─── Overall scores ──────────────────────────────────────────────
|
|
147
|
+
md += "\n## Overall Scores\n\n";
|
|
148
|
+
md += "| Model | Pass Rate | Avg Score | p50 Latency | Total Tokens |\n";
|
|
149
|
+
md += "| --- | --- | --- | --- | --- |\n";
|
|
150
|
+
for (const report of models) {
|
|
151
|
+
const overallPassRate = report.evals.length > 0
|
|
152
|
+
? report.evals.filter((e) => e.passRate >= 0.8).length /
|
|
153
|
+
report.evals.length
|
|
154
|
+
: 0;
|
|
155
|
+
const overallAvgScore = report.evals.length > 0
|
|
156
|
+
? report.evals.reduce((sum, e) => sum + e.avgScore, 0) /
|
|
157
|
+
report.evals.length
|
|
158
|
+
: 0;
|
|
159
|
+
const overallP50 = report.evals.length > 0
|
|
160
|
+
? report.evals.reduce((sum, e) => sum + e.p50LatencyMs, 0) /
|
|
161
|
+
report.evals.length
|
|
162
|
+
: 0;
|
|
163
|
+
const totalTokens = report.evals.reduce((sum, e) => sum + (e.totalTokens?.totalTokens ?? 0), 0);
|
|
164
|
+
md += `| ${report.provider}/${report.model} | ${Math.round(overallPassRate * 100)}% | ${overallAvgScore.toFixed(2)} | ${Math.round(overallP50)}ms | ${totalTokens.toLocaleString()} |\n`;
|
|
165
|
+
}
|
|
166
|
+
// ─── Rubric details (latest run per model) ───────────────────────
|
|
167
|
+
for (const report of models) {
|
|
168
|
+
const rubricEvals = report.evals.filter((e) => e.trials.some((t) => t.rubric));
|
|
169
|
+
if (rubricEvals.length === 0)
|
|
170
|
+
continue;
|
|
171
|
+
md += `\n## Rubric Details: ${report.provider}/${report.model}\n\n`;
|
|
172
|
+
for (const evalResult of rubricEvals) {
|
|
173
|
+
md += `### ${evalResult.name}\n\n`;
|
|
174
|
+
// Show criteria from first trial that has rubric
|
|
175
|
+
const trial = evalResult.trials.find((t) => t.rubric);
|
|
176
|
+
if (!trial?.rubric)
|
|
177
|
+
continue;
|
|
178
|
+
for (const criterion of trial.rubric.criteria) {
|
|
179
|
+
const icon = criterion.passed ? "PASS" : "FAIL";
|
|
180
|
+
md += `- **${criterion.name}**: ${icon}`;
|
|
181
|
+
if (!criterion.passed) {
|
|
182
|
+
md += ` -- ${criterion.explanation}`;
|
|
183
|
+
}
|
|
184
|
+
md += "\n";
|
|
185
|
+
}
|
|
186
|
+
md += "\n";
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// ─── Failed trials with transcripts and trace IDs ─────────────────
|
|
190
|
+
for (const report of models) {
|
|
191
|
+
const failedEvals = report.evals.filter((e) => e.trials.some((t) => !t.passed));
|
|
192
|
+
if (failedEvals.length === 0)
|
|
193
|
+
continue;
|
|
194
|
+
md += `\n## Failed Trials: ${report.provider}/${report.model}\n\n`;
|
|
195
|
+
for (const evalResult of failedEvals) {
|
|
196
|
+
const failedTrials = evalResult.trials.filter((t) => !t.passed);
|
|
197
|
+
for (const trial of failedTrials) {
|
|
198
|
+
md += `### ${evalResult.name} -- Trial ${trial.trial} (score: ${trial.score.toFixed(2)})\n\n`;
|
|
199
|
+
// Trace IDs for Grafana/Tempo lookup
|
|
200
|
+
const traceIds = trial.turns.map((t) => t.traceId).filter(Boolean);
|
|
201
|
+
if (traceIds.length > 0) {
|
|
202
|
+
md += `**Trace IDs:** ${traceIds.map((id) => `\`${id}\``).join(", ")}\n\n`;
|
|
203
|
+
}
|
|
204
|
+
// Failed assertions
|
|
205
|
+
for (const turn of trial.turns) {
|
|
206
|
+
const failed = turn.assertions.filter((a) => !a.passed);
|
|
207
|
+
if (failed.length === 0)
|
|
208
|
+
continue;
|
|
209
|
+
md += `**User:** ${turn.user}\n\n`;
|
|
210
|
+
md += `**Agent:** ${turn.agent.slice(0, 500)}${turn.agent.length > 500 ? "..." : ""}\n\n`;
|
|
211
|
+
for (const assertion of failed) {
|
|
212
|
+
md += `- **${assertion.type}**: FAIL`;
|
|
213
|
+
if (assertion.reason)
|
|
214
|
+
md += ` -- ${assertion.reason}`;
|
|
215
|
+
md += "\n";
|
|
216
|
+
}
|
|
217
|
+
md += "\n";
|
|
218
|
+
}
|
|
219
|
+
// Rubric failures
|
|
220
|
+
if (trial.rubric) {
|
|
221
|
+
const failedCriteria = trial.rubric.criteria.filter((c) => !c.passed);
|
|
222
|
+
if (failedCriteria.length > 0) {
|
|
223
|
+
md += "**Rubric failures:**\n";
|
|
224
|
+
for (const c of failedCriteria) {
|
|
225
|
+
md += `- **${c.name}**: ${c.explanation}\n`;
|
|
226
|
+
}
|
|
227
|
+
md += "\n";
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
return md;
|
|
234
|
+
}
|
|
235
|
+
export async function writeMarkdownReport(evalsDir, currentReport) {
|
|
236
|
+
const md = await generateComparisonReport(evalsDir, currentReport);
|
|
237
|
+
const reportPath = join(evalsDir, "evals-report.md");
|
|
238
|
+
await writeFile(reportPath, md);
|
|
239
|
+
console.log(chalk.dim(`Report written to ${reportPath}`));
|
|
240
|
+
return reportPath;
|
|
241
|
+
}
|
|
242
|
+
//# sourceMappingURL=reporter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reporter.js","sourceRoot":"","sources":["../../src/eval/reporter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACvE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,0EAA0E;AAE1E,MAAM,UAAU,WAAW,CAAC,MAAkB;IAC5C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACpE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,KAAK,CAAC,MAAM,UAAU,CAAC,CAAC,CAAC;IAEhE,KAAK,MAAM,UAAU,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACtC,SAAS,CAAC,UAAU,CAAC,CAAC;IACxB,CAAC;IAED,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC,OAAO,CAAC;IACjD,MAAM,YAAY,GAAG,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;IAC5D,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,cAAc,MAAM,IAAI,KAAK,eAAe,CAAC,CAAC,CAAC;IACxE,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACf,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,GAAG,CACP,aAAa,MAAM,CAAC,KAAK;aACtB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,GAAG,KAAK,CAAC;aACrC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;aAClB,IAAI,CAAC,IAAI,CAAC,EAAE,CAChB,CACF,CAAC;IACJ,CAAC;IACD,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC;AAED,SAAS,SAAS,CAAC,MAAkB;IACnC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC;IACxC,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAEjE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,IAAI,KAAK,UAAU,UAAU,CAAC,CAAC,CAAC;IAEjE,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClC,UAAU,CAAC,KAAK,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;IACrE,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;IACxD,MAAM,SAAS,GAAG,MAAM,CAAC,WAAW,CAAC,WAAW;QAC9C,CAAC,CAAC,WAAW,MAAM,CAAC,WAAW,CAAC,WAAW,EAAE;QAC7C,CAAC,CAAC,EAAE,CAAC;IACP,OAAO,CAAC,GAAG,CACT,WAAW,CACT,KAAK,MAAM,IAAI,WAAW,IAAI,UAAU,QAAQ,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,MAAM,CAAC,YAAY,KAAK,SAAS,EAAE,CACtH,CACF,CAAC;IACF,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC;AAED,SAAS,UAAU,CAAC,KAAkB;IACpC,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC9D,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACxE,OAAO,CAAC,GAAG,CACT,KAAK,IAAI,UAAU,KAAK,CAAC,KAAK,KAAK,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,OAAO,EAAE,CACvE,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QAC/B,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACxC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;gBACtB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,GAAG,CAAC,SAAS,SAAS,CAAC,IAAI,KAAK,SAAS,CAAC,MAAM,IAAI,MAAM,EAAE,CAAC,CACpE,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YAC9C,MAAM,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACnE,OAAO,CAAC,GAAG,CAAC,OAAO,KAAK,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9C,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;gBACtB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,WAAW,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;IACH,CAAC;IAED,oEAAoE;IACpE,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QAClB,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACnE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,eAAe,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;AACH,CAAC;AAED,0EAA0E;AAE1E,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,QAAgB,EAChB,MAAkB;IAElB,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC9C,MAAM,KAAK,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE7C,MAAM,IAAI,GAAG,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC,OAAO,CACvD,cAAc,EACd,GAAG,CACJ,CAAC;IACF,MAAM,EAAE,GAAG,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,GAAG,IAAI,IAAI,EAAE,OAAO,CAAC;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;IAE5C,MAAM,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC3D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,oBAAoB,QAAQ,EAAE,CAAC,CAAC,CAAC;IACvD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,0EAA0E;AAE1E,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,MAAkB,EAClB,UAAkB;IAElB,MAAM,SAAS,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC7D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,sBAAsB,UAAU,EAAE,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,0EAA0E;AAE1E,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,QAAgB;IAEhB,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC9C,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,CAAC;QACxC,MAAM,SAAS,GAAG,KAAK;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;aAClC,IAAI,EAAE;aACN,OAAO,EAAE,CAAC,CAAC,eAAe;QAE7B,MAAM,OAAO,GAAiB,EAAE,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;YAChE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAe,CAAC,CAAC;QAClD,CAAC;QACD,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,OAAqB;IAC3C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAsB,CAAC;IAC9C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACjD,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,QAAQ,IAAI,MAAM,CAAC,SAAS,GAAG,QAAQ,CAAC,SAAS,EAAE,CAAC;YACvD,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;QAC3B,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;AACtC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,QAAgB,EAChB,aAA0B;IAE1B,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IACpD,IAAI,aAAa;QAAE,UAAU,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAErD,MAAM,MAAM,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;IAE1C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,wBAAwB,CAAC;IAClC,CAAC;IAED,2CAA2C;IAC3C,MAAM,SAAS,GAAG;QAChB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;KAC9D,CAAC,IAAI,EAAE,CAAC;IAET,IAAI,EAAE,GAAG,mBAAmB,CAAC;IAC7B,EAAE,IAAI,cAAc,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,CAAC;IACjD,EAAE,IAAI,UAAU,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,SAAS,MAAM,CAAC;IAEpD,mEAAmE;IACnE,EAAE,IAAI,yBAAyB,CAAC;IAChC,EAAE,IAAI,YAAY,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IAClF,EAAE,IAAI,WAAW,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IAE3D,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YAC7B,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;YAC5D,IAAI,CAAC,UAAU;gBAAE,OAAO,GAAG,CAAC;YAC5B,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YAC1D,OAAO,GAAG,IAAI,IAAI,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,IAAI,CAAC;QACjG,CAAC,CAAC,CAAC;QACH,EAAE,IAAI,KAAK,QAAQ,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IACnD,CAAC;IAED,oEAAoE;IACpE,EAAE,IAAI,yBAAyB,CAAC;IAChC,EAAE,IAAI,kEAAkE,CAAC;IACzE,EAAE,IAAI,mCAAmC,CAAC;IAE1C,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,eAAe,GACnB,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YACrB,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,MAAM;gBACpD,MAAM,CAAC,KAAK,CAAC,MAAM;YACrB,CAAC,CAAC,CAAC,CAAC;QACR,MAAM,eAAe,GACnB,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YACrB,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC;gBACpD,MAAM,CAAC,KAAK,CAAC,MAAM;YACrB,CAAC,CAAC,CAAC,CAAC;QACR,MAAM,UAAU,GACd,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC;YACrB,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC;gBACxD,MAAM,CAAC,KAAK,CAAC,MAAM;YACrB,CAAC,CAAC,CAAC,CAAC;QACR,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CACrC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,WAAW,IAAI,CAAC,CAAC,EACnD,CAAC,CACF,CAAC;QAEF,EAAE,IAAI,KAAK,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,GAAG,CAAC,OAAO,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,QAAQ,WAAW,CAAC,cAAc,EAAE,MAAM,CAAC;IAC3L,CAAC;IAED,oEAAoE;IACpE,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC5C,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAC/B,CAAC;QACF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEvC,EAAE,IAAI,wBAAwB,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,KAAK,MAAM,CAAC;QACpE,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;YACrC,EAAE,IAAI,OAAO,UAAU,CAAC,IAAI,MAAM,CAAC;YACnC,iDAAiD;YACjD,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YACtD,IAAI,CAAC,KAAK,EAAE,MAAM;gBAAE,SAAS;YAE7B,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC9C,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;gBAChD,EAAE,IAAI,OAAO,SAAS,CAAC,IAAI,OAAO,IAAI,EAAE,CAAC;gBACzC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;oBACtB,EAAE,IAAI,OAAO,SAAS,CAAC,WAAW,EAAE,CAAC;gBACvC,CAAC;gBACD,EAAE,IAAI,IAAI,CAAC;YACb,CAAC;YACD,EAAE,IAAI,IAAI,CAAC;QACb,CAAC;IACH,CAAC;IAED,qEAAqE;IACrE,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC5C,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAChC,CAAC;QACF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAEvC,EAAE,IAAI,uBAAuB,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,KAAK,MAAM,CAAC;QACnE,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;YACrC,MAAM,YAAY,GAAG,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAChE,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;gBACjC,EAAE,IAAI,OAAO,UAAU,CAAC,IAAI,aAAa,KAAK,CAAC,KAAK,YAAY,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;gBAE9F,qCAAqC;gBACrC,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;gBACnE,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACxB,EAAE,IAAI,kBAAkB,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;gBAC7E,CAAC;gBAED,oBAAoB;gBACpB,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;oBAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;oBACxD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;wBAAE,SAAS;oBAClC,EAAE,IAAI,aAAa,IAAI,CAAC,IAAI,MAAM,CAAC;oBACnC,EAAE,IAAI,cAAc,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC;oBAC1F,KAAK,MAAM,SAAS,IAAI,MAAM,EAAE,CAAC;wBAC/B,EAAE,IAAI,OAAO,SAAS,CAAC,IAAI,UAAU,CAAC;wBACtC,IAAI,SAAS,CAAC,MAAM;4BAAE,EAAE,IAAI,OAAO,SAAS,CAAC,MAAM,EAAE,CAAC;wBACtD,EAAE,IAAI,IAAI,CAAC;oBACb,CAAC;oBACD,EAAE,IAAI,IAAI,CAAC;gBACb,CAAC;gBAED,kBAAkB;gBAClB,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;oBACjB,MAAM,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;oBACtE,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC9B,EAAE,IAAI,wBAAwB,CAAC;wBAC/B,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;4BAC/B,EAAE,IAAI,OAAO,CAAC,CAAC,IAAI,OAAO,CAAC,CAAC,WAAW,IAAI,CAAC;wBAC9C,CAAC;wBACD,EAAE,IAAI,IAAI,CAAC;oBACb,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,QAAgB,EAChB,aAA0B;IAE1B,MAAM,EAAE,GAAG,MAAM,wBAAwB,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IACnE,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;IACrD,MAAM,SAAS,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAChC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,qBAAqB,UAAU,EAAE,CAAC,CAAC,CAAC;IAC1D,OAAO,UAAU,CAAC;AACpB,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { EvalDefinition, EvalResult } from "./types.js";
|
|
2
|
+
export interface RunOptions {
|
|
3
|
+
gatewayUrl: string;
|
|
4
|
+
authToken: string;
|
|
5
|
+
agentId?: string;
|
|
6
|
+
provider?: string;
|
|
7
|
+
model?: string;
|
|
8
|
+
trialsOverride?: number;
|
|
9
|
+
}
|
|
10
|
+
export declare function runEval(definition: EvalDefinition, evalFilePath: string, options: RunOptions): Promise<EvalResult>;
|
|
11
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAGV,cAAc,EACd,UAAU,EAKX,MAAM,YAAY,CAAC;AAEpB,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,wBAAsB,OAAO,CAC3B,UAAU,EAAE,cAAc,EAC1B,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,UAAU,CAAC,CAqCrB"}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { dirname, join } from "node:path";
|
|
3
|
+
import { createSession, deleteSession, sendAndCollect } from "./client.js";
|
|
4
|
+
import { gradeInline, gradeWithRubric } from "./grader.js";
|
|
5
|
+
export async function runEval(definition, evalFilePath, options) {
|
|
6
|
+
const trials = options.trialsOverride ?? definition.trials;
|
|
7
|
+
const results = [];
|
|
8
|
+
// Load rubric file if specified
|
|
9
|
+
let rubricContent;
|
|
10
|
+
if (definition.rubric) {
|
|
11
|
+
const rubricPath = join(dirname(evalFilePath), definition.rubric);
|
|
12
|
+
rubricContent = await readFile(rubricPath, "utf-8");
|
|
13
|
+
}
|
|
14
|
+
for (let i = 0; i < trials; i++) {
|
|
15
|
+
const result = await runTrial(i + 1, definition, rubricContent, options);
|
|
16
|
+
results.push(result);
|
|
17
|
+
}
|
|
18
|
+
const passedTrials = results.filter((t) => t.passed).length;
|
|
19
|
+
const latencies = results.flatMap((t) => t.turns.map((turn) => turn.latencyMs));
|
|
20
|
+
latencies.sort((a, b) => a - b);
|
|
21
|
+
// Aggregate token usage across all trials
|
|
22
|
+
const totalTokens = aggregateTokens(results);
|
|
23
|
+
return {
|
|
24
|
+
name: definition.name,
|
|
25
|
+
passRate: trials > 0 ? passedTrials / trials : 0,
|
|
26
|
+
avgScore: trials > 0 ? results.reduce((sum, t) => sum + t.score, 0) / trials : 0,
|
|
27
|
+
p50LatencyMs: latencies.length > 0
|
|
28
|
+
? (latencies[Math.floor(latencies.length / 2)] ?? 0)
|
|
29
|
+
: 0,
|
|
30
|
+
totalTokens,
|
|
31
|
+
trials: results,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
async function runTrial(trialNum, definition, rubricContent, options) {
|
|
35
|
+
const start = Date.now();
|
|
36
|
+
const timeoutMs = definition.timeout * 1000;
|
|
37
|
+
const session = await createSession(options.gatewayUrl, options.authToken, {
|
|
38
|
+
agentId: options.agentId,
|
|
39
|
+
forceNew: true,
|
|
40
|
+
dryRun: true,
|
|
41
|
+
});
|
|
42
|
+
const turnResults = [];
|
|
43
|
+
try {
|
|
44
|
+
for (const turn of definition.turns) {
|
|
45
|
+
const response = await sendAndCollect(session, turn.content, timeoutMs);
|
|
46
|
+
if (response.error) {
|
|
47
|
+
turnResults.push({
|
|
48
|
+
user: turn.content,
|
|
49
|
+
agent: response.text || `[Error: ${response.error}]`,
|
|
50
|
+
latencyMs: response.latencyMs,
|
|
51
|
+
assertions: [
|
|
52
|
+
{ type: "error", passed: false, score: 0, reason: response.error },
|
|
53
|
+
],
|
|
54
|
+
tokens: response.tokens,
|
|
55
|
+
traceId: response.traceId,
|
|
56
|
+
});
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
// Run assertions for this turn
|
|
60
|
+
const assertions = turn.assert
|
|
61
|
+
? await runAssertions(turn.assert, response.text, options.gatewayUrl, options.authToken, timeoutMs)
|
|
62
|
+
: [];
|
|
63
|
+
turnResults.push({
|
|
64
|
+
user: turn.content,
|
|
65
|
+
agent: response.text,
|
|
66
|
+
latencyMs: response.latencyMs,
|
|
67
|
+
assertions,
|
|
68
|
+
tokens: response.tokens,
|
|
69
|
+
traceId: response.traceId,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
// Run rubric grading on full transcript if rubric is specified
|
|
73
|
+
let rubric;
|
|
74
|
+
if (rubricContent) {
|
|
75
|
+
rubric = await gradeWithRubric(options.gatewayUrl, options.authToken, rubricContent, turnResults, timeoutMs);
|
|
76
|
+
}
|
|
77
|
+
// Calculate trial score
|
|
78
|
+
const score = calculateTrialScore(turnResults, rubric);
|
|
79
|
+
const passed = score >= definition.scoring.pass_threshold;
|
|
80
|
+
return {
|
|
81
|
+
trial: trialNum,
|
|
82
|
+
passed,
|
|
83
|
+
score,
|
|
84
|
+
turns: turnResults,
|
|
85
|
+
rubric,
|
|
86
|
+
durationMs: Date.now() - start,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
await deleteSession(session);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
async function runAssertions(assertions, agentResponse, gatewayUrl, authToken, timeoutMs) {
|
|
94
|
+
const results = [];
|
|
95
|
+
for (const assertion of assertions) {
|
|
96
|
+
switch (assertion.type) {
|
|
97
|
+
case "contains": {
|
|
98
|
+
const target = assertion.value;
|
|
99
|
+
const response = assertion.options?.case_insensitive
|
|
100
|
+
? agentResponse.toLowerCase()
|
|
101
|
+
: agentResponse;
|
|
102
|
+
const search = assertion.options?.case_insensitive
|
|
103
|
+
? target.toLowerCase()
|
|
104
|
+
: target;
|
|
105
|
+
const passed = response.includes(search);
|
|
106
|
+
results.push({ type: "contains", passed, score: passed ? 1 : 0 });
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
case "regex": {
|
|
110
|
+
const regex = new RegExp(assertion.value, "i");
|
|
111
|
+
const passed = regex.test(agentResponse);
|
|
112
|
+
results.push({ type: "regex", passed, score: passed ? 1 : 0 });
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
case "llm-rubric": {
|
|
116
|
+
const result = await gradeInline(gatewayUrl, authToken, assertion.value, agentResponse, timeoutMs);
|
|
117
|
+
results.push({
|
|
118
|
+
type: "llm-rubric",
|
|
119
|
+
passed: result.passed,
|
|
120
|
+
score: result.score,
|
|
121
|
+
reason: result.reason,
|
|
122
|
+
});
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return results;
|
|
128
|
+
}
|
|
129
|
+
function calculateTrialScore(turns, rubric) {
|
|
130
|
+
// Collect all weighted scores
|
|
131
|
+
const scores = [];
|
|
132
|
+
for (const turn of turns) {
|
|
133
|
+
if (turn.assertions.length === 0)
|
|
134
|
+
continue;
|
|
135
|
+
// If assertions have no explicit weights, weight them equally
|
|
136
|
+
const totalWeight = turn.assertions.reduce((sum, _a, _i) => sum + 1, // Default weight 1 per assertion
|
|
137
|
+
0);
|
|
138
|
+
for (const assertion of turn.assertions) {
|
|
139
|
+
scores.push({ score: assertion.score, weight: 1 / totalWeight });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// Add rubric score if present (weighted equally to all assertion scores combined)
|
|
143
|
+
if (rubric) {
|
|
144
|
+
const assertionWeight = scores.length > 0 ? 0.5 : 1;
|
|
145
|
+
const rubricWeight = scores.length > 0 ? 0.5 : 1;
|
|
146
|
+
const assertionAvg = scores.length > 0
|
|
147
|
+
? scores.reduce((sum, s) => sum + s.score * s.weight, 0)
|
|
148
|
+
: 0;
|
|
149
|
+
return assertionAvg * assertionWeight + rubric.score * rubricWeight;
|
|
150
|
+
}
|
|
151
|
+
if (scores.length === 0)
|
|
152
|
+
return 1; // No assertions = pass
|
|
153
|
+
return scores.reduce((sum, s) => sum + s.score * s.weight, 0);
|
|
154
|
+
}
|
|
155
|
+
function aggregateTokens(trials) {
|
|
156
|
+
let inputTokens = 0;
|
|
157
|
+
let outputTokens = 0;
|
|
158
|
+
for (const trial of trials) {
|
|
159
|
+
for (const turn of trial.turns) {
|
|
160
|
+
if (turn.tokens) {
|
|
161
|
+
inputTokens += turn.tokens.inputTokens ?? 0;
|
|
162
|
+
outputTokens += turn.tokens.outputTokens ?? 0;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return {
|
|
167
|
+
inputTokens,
|
|
168
|
+
outputTokens,
|
|
169
|
+
totalTokens: inputTokens + outputTokens,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAqB3D,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,UAA0B,EAC1B,YAAoB,EACpB,OAAmB;IAEnB,MAAM,MAAM,GAAG,OAAO,CAAC,cAAc,IAAI,UAAU,CAAC,MAAM,CAAC;IAC3D,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,gCAAgC;IAChC,IAAI,aAAiC,CAAC;IACtC,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;QACtB,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;QAClE,aAAa,GAAG,MAAM,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,CAAC,GAAG,CAAC,EAAE,UAAU,EAAE,aAAa,EAAE,OAAO,CAAC,CAAC;QACzE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC5D,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CACtC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CACtC,CAAC;IACF,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAEhC,0CAA0C;IAC1C,MAAM,WAAW,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAE7C,OAAO;QACL,IAAI,EAAE,UAAU,CAAC,IAAI;QACrB,QAAQ,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAChD,QAAQ,EACN,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxE,YAAY,EACV,SAAS,CAAC,MAAM,GAAG,CAAC;YAClB,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACpD,CAAC,CAAC,CAAC;QACP,WAAW;QACX,MAAM,EAAE,OAAO;KAChB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,QAAQ,CACrB,QAAgB,EAChB,UAA0B,EAC1B,aAAiC,EACjC,OAAmB;IAEnB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,SAAS,GAAG,UAAU,CAAC,OAAO,GAAG,IAAI,CAAC;IAE5C,MAAM,OAAO,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,SAAS,EAAE;QACzE,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,QAAQ,EAAE,IAAI;QACd,MAAM,EAAE,IAAI;KACb,CAAC,CAAC;IAEH,MAAM,WAAW,GAAiB,EAAE,CAAC;IAErC,IAAI,CAAC;QACH,KAAK,MAAM,IAAI,IAAI,UAAU,CAAC,KAAK,EAAE,CAAC;YACpC,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YAExE,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACnB,WAAW,CAAC,IAAI,CAAC;oBACf,IAAI,EAAE,IAAI,CAAC,OAAO;oBAClB,KAAK,EAAE,QAAQ,CAAC,IAAI,IAAI,WAAW,QAAQ,CAAC,KAAK,GAAG;oBACpD,SAAS,EAAE,QAAQ,CAAC,SAAS;oBAC7B,UAAU,EAAE;wBACV,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,QAAQ,CAAC,KAAK,EAAE;qBACnE;oBACD,MAAM,EAAE,QAAQ,CAAC,MAAM;oBACvB,OAAO,EAAE,QAAQ,CAAC,OAAO;iBAC1B,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,+BAA+B;YAC/B,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM;gBAC5B,CAAC,CAAC,MAAM,aAAa,CACjB,IAAI,CAAC,MAAM,EACX,QAAQ,CAAC,IAAI,EACb,OAAO,CAAC,UAAU,EAClB,OAAO,CAAC,SAAS,EACjB,SAAS,CACV;gBACH,CAAC,CAAC,EAAE,CAAC;YAEP,WAAW,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,IAAI,CAAC,OAAO;gBAClB,KAAK,EAAE,QAAQ,CAAC,IAAI;gBACpB,SAAS,EAAE,QAAQ,CAAC,SAAS;gBAC7B,UAAU;gBACV,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,OAAO,EAAE,QAAQ,CAAC,OAAO;aAC1B,CAAC,CAAC;QACL,CAAC;QAED,+DAA+D;QAC/D,IAAI,MAAgC,CAAC;QACrC,IAAI,aAAa,EAAE,CAAC;YAClB,MAAM,GAAG,MAAM,eAAe,CAC5B,OAAO,CAAC,UAAU,EAClB,OAAO,CAAC,SAAS,EACjB,aAAa,EACb,WAAW,EACX,SAAS,CACV,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,MAAM,KAAK,GAAG,mBAAmB,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;QACvD,MAAM,MAAM,GAAG,KAAK,IAAI,UAAU,CAAC,OAAO,CAAC,cAAc,CAAC;QAE1D,OAAO;YACL,KAAK,EAAE,QAAQ;YACf,MAAM;YACN,KAAK;YACL,KAAK,EAAE,WAAW;YAClB,MAAM;YACN,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC/B,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;AACH,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,UAAuB,EACvB,aAAqB,EACrB,UAAkB,EAClB,SAAiB,EACjB,SAAiB;IAEjB,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;YACvB,KAAK,UAAU,CAAC,CAAC,CAAC;gBAChB,MAAM,MAAM,GAAG,SAAS,CAAC,KAAK,CAAC;gBAC/B,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,EAAE,gBAAgB;oBAClD,CAAC,CAAC,aAAa,CAAC,WAAW,EAAE;oBAC7B,CAAC,CAAC,aAAa,CAAC;gBAClB,MAAM,MAAM,GAAG,SAAS,CAAC,OAAO,EAAE,gBAAgB;oBAChD,CAAC,CAAC,MAAM,CAAC,WAAW,EAAE;oBACtB,CAAC,CAAC,MAAM,CAAC;gBACX,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACzC,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBAClE,MAAM;YACR,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;gBAC/C,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;gBACzC,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBAC/D,MAAM;YACR,CAAC;YAED,KAAK,YAAY,CAAC,CAAC,CAAC;gBAClB,MAAM,MAAM,GAAG,MAAM,WAAW,CAC9B,UAAU,EACV,SAAS,EACT,SAAS,CAAC,KAAK,EACf,aAAa,EACb,SAAS,CACV,CAAC;gBACF,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,YAAY;oBAClB,MAAM,EAAE,MAAM,CAAC,MAAM;oBACrB,KAAK,EAAE,MAAM,CAAC,KAAK;oBACnB,MAAM,EAAE,MAAM,CAAC,MAAM;iBACtB,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,mBAAmB,CAC1B,KAAmB,EACnB,MAAqB;IAErB,8BAA8B;IAC9B,MAAM,MAAM,GAA6C,EAAE,CAAC;IAE5D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAE3C,8DAA8D;QAC9D,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CACxC,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,iCAAiC;QAC3D,CAAC,CACF,CAAC;QAEF,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IAED,kFAAkF;IAClF,IAAI,MAAM,EAAE,CAAC;QACX,MAAM,eAAe,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACpD,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAEjD,MAAM,YAAY,GAChB,MAAM,CAAC,MAAM,GAAG,CAAC;YACf,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;YACxD,CAAC,CAAC,CAAC,CAAC;QAER,OAAO,YAAY,GAAG,eAAe,GAAG,MAAM,CAAC,KAAK,GAAG,YAAY,CAAC;IACtE,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC,CAAC,uBAAuB;IAE1D,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;AAChE,CAAC;AAED,SAAS,eAAe,CAAC,MAAqB;IAC5C,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YAC/B,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAChB,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,IAAI,CAAC,CAAC;gBAC5C,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,YAAY,IAAI,CAAC,CAAC;YAChD,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO;QACL,WAAW;QACX,YAAY;QACZ,WAAW,EAAE,WAAW,GAAG,YAAY;KACxC,CAAC;AACJ,CAAC"}
|