pcl-mcp 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +137 -28
- package/dist/benchmarks/evaluators/context-retrieval-quality.d.ts +30 -0
- package/dist/benchmarks/evaluators/context-retrieval-quality.d.ts.map +1 -0
- package/dist/benchmarks/evaluators/context-retrieval-quality.js +50 -0
- package/dist/benchmarks/evaluators/context-retrieval-quality.js.map +1 -0
- package/dist/benchmarks/evaluators/ir-metrics.d.ts +32 -0
- package/dist/benchmarks/evaluators/ir-metrics.d.ts.map +1 -0
- package/dist/benchmarks/evaluators/ir-metrics.js +98 -0
- package/dist/benchmarks/evaluators/ir-metrics.js.map +1 -0
- package/dist/benchmarks/evaluators/structured-judge.d.ts +34 -0
- package/dist/benchmarks/evaluators/structured-judge.d.ts.map +1 -0
- package/dist/benchmarks/evaluators/structured-judge.js +153 -0
- package/dist/benchmarks/evaluators/structured-judge.js.map +1 -0
- package/dist/benchmarks/evaluators/token-counter.d.ts +9 -0
- package/dist/benchmarks/evaluators/token-counter.d.ts.map +1 -0
- package/dist/benchmarks/evaluators/token-counter.js +24 -0
- package/dist/benchmarks/evaluators/token-counter.js.map +1 -0
- package/dist/benchmarks/generators/generate-corpus.d.ts +2 -0
- package/dist/benchmarks/generators/generate-corpus.d.ts.map +1 -0
- package/dist/benchmarks/generators/generate-corpus.js +243 -0
- package/dist/benchmarks/generators/generate-corpus.js.map +1 -0
- package/dist/benchmarks/lib/harness.d.ts +23 -0
- package/dist/benchmarks/lib/harness.d.ts.map +1 -0
- package/dist/benchmarks/lib/harness.js +44 -0
- package/dist/benchmarks/lib/harness.js.map +1 -0
- package/dist/benchmarks/lib/types.d.ts +79 -0
- package/dist/benchmarks/lib/types.d.ts.map +1 -0
- package/dist/benchmarks/lib/types.js +2 -0
- package/dist/benchmarks/lib/types.js.map +1 -0
- package/dist/benchmarks/reporters/markdown-reporter.d.ts +2 -0
- package/dist/benchmarks/reporters/markdown-reporter.d.ts.map +1 -0
- package/dist/benchmarks/reporters/markdown-reporter.js +80 -0
- package/dist/benchmarks/reporters/markdown-reporter.js.map +1 -0
- package/dist/benchmarks/runners/bench-ablation.d.ts +2 -0
- package/dist/benchmarks/runners/bench-ablation.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-ablation.js +49 -0
- package/dist/benchmarks/runners/bench-ablation.js.map +1 -0
- package/dist/benchmarks/runners/bench-ai-quality.d.ts +2 -0
- package/dist/benchmarks/runners/bench-ai-quality.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-ai-quality.js +297 -0
- package/dist/benchmarks/runners/bench-ai-quality.js.map +1 -0
- package/dist/benchmarks/runners/bench-interactive-eval.d.ts +2 -0
- package/dist/benchmarks/runners/bench-interactive-eval.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-interactive-eval.js +119 -0
- package/dist/benchmarks/runners/bench-interactive-eval.js.map +1 -0
- package/dist/benchmarks/runners/bench-performance.bench.d.ts +2 -0
- package/dist/benchmarks/runners/bench-performance.bench.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-performance.bench.js +50 -0
- package/dist/benchmarks/runners/bench-performance.bench.js.map +1 -0
- package/dist/benchmarks/runners/bench-search-quality.d.ts +2 -0
- package/dist/benchmarks/runners/bench-search-quality.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-search-quality.js +70 -0
- package/dist/benchmarks/runners/bench-search-quality.js.map +1 -0
- package/dist/benchmarks/runners/bench-token-efficiency.d.ts +2 -0
- package/dist/benchmarks/runners/bench-token-efficiency.d.ts.map +1 -0
- package/dist/benchmarks/runners/bench-token-efficiency.js +89 -0
- package/dist/benchmarks/runners/bench-token-efficiency.js.map +1 -0
- package/dist/benchmarks/runners/diag.d.ts +2 -0
- package/dist/benchmarks/runners/diag.d.ts.map +1 -0
- package/dist/benchmarks/runners/diag.js +30 -0
- package/dist/benchmarks/runners/diag.js.map +1 -0
- package/dist/benchmarks/vitest.config.bench.d.ts +3 -0
- package/dist/benchmarks/vitest.config.bench.d.ts.map +1 -0
- package/dist/benchmarks/vitest.config.bench.js +14 -0
- package/dist/benchmarks/vitest.config.bench.js.map +1 -0
- package/dist/bin/pcl.js +36 -23
- package/dist/bin/pcl.js.map +1 -1
- package/dist/src/db.d.ts +2 -1
- package/dist/src/db.d.ts.map +1 -1
- package/dist/src/db.js +25 -21
- package/dist/src/db.js.map +1 -1
- package/dist/src/embeddings.d.ts +1 -1
- package/dist/src/embeddings.js +2 -2
- package/dist/src/embeddings.js.map +1 -1
- package/dist/src/indexer.d.ts +1 -1
- package/dist/src/indexer.d.ts.map +1 -1
- package/dist/src/indexer.js +6 -2
- package/dist/src/indexer.js.map +1 -1
- package/dist/src/search.d.ts.map +1 -1
- package/dist/src/search.js +138 -26
- package/dist/src/search.js.map +1 -1
- package/dist/src/server.js +6 -0
- package/dist/src/server.js.map +1 -1
- package/dist/src/types.d.ts +1 -0
- package/dist/src/types.d.ts.map +1 -1
- package/dist/tests/db.test.d.ts +2 -0
- package/dist/tests/db.test.d.ts.map +1 -0
- package/dist/tests/db.test.js +459 -0
- package/dist/tests/db.test.js.map +1 -0
- package/dist/tests/embeddings.test.d.ts +2 -0
- package/dist/tests/embeddings.test.d.ts.map +1 -0
- package/dist/tests/embeddings.test.js +165 -0
- package/dist/tests/embeddings.test.js.map +1 -0
- package/dist/tests/helpers/test-harness.d.ts +26 -0
- package/dist/tests/helpers/test-harness.d.ts.map +1 -0
- package/dist/tests/helpers/test-harness.js +80 -0
- package/dist/tests/helpers/test-harness.js.map +1 -0
- package/dist/tests/indexer.test.d.ts +2 -0
- package/dist/tests/indexer.test.d.ts.map +1 -0
- package/dist/tests/indexer.test.js +299 -0
- package/dist/tests/indexer.test.js.map +1 -0
- package/dist/tests/schemas.test.d.ts +2 -0
- package/dist/tests/schemas.test.d.ts.map +1 -0
- package/dist/tests/schemas.test.js +378 -0
- package/dist/tests/schemas.test.js.map +1 -0
- package/dist/tests/search.test.d.ts +2 -0
- package/dist/tests/search.test.d.ts.map +1 -0
- package/dist/tests/search.test.js +129 -0
- package/dist/tests/search.test.js.map +1 -0
- package/dist/tests/tools.test.d.ts +2 -0
- package/dist/tests/tools.test.d.ts.map +1 -0
- package/dist/tests/tools.test.js +232 -0
- package/dist/tests/tools.test.js.map +1 -0
- package/package.json +14 -2
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown reporter — reads JSON results from benchmarks/results/ and generates REPORT.md
|
|
3
|
+
*/
|
|
4
|
+
import { readFile, writeFile } from "node:fs/promises";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
const RESULTS_DIR = join(import.meta.dirname, "..", "results");
|
|
7
|
+
async function loadJSON(filename) {
|
|
8
|
+
try {
|
|
9
|
+
return JSON.parse(await readFile(join(RESULTS_DIR, filename), "utf8"));
|
|
10
|
+
}
|
|
11
|
+
catch {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
async function main() {
|
|
16
|
+
const lines = [];
|
|
17
|
+
const push = (s) => lines.push(s);
|
|
18
|
+
push("# PCL MCP Benchmark Report");
|
|
19
|
+
push("");
|
|
20
|
+
push(`Generated: ${new Date().toISOString()}`);
|
|
21
|
+
push("");
|
|
22
|
+
// --- Token Efficiency ---
|
|
23
|
+
const tokenData = await loadJSON("token-efficiency.json");
|
|
24
|
+
if (tokenData) {
|
|
25
|
+
push("## Layer 3: Token Efficiency");
|
|
26
|
+
push("");
|
|
27
|
+
push("| Corpus Size | PCL Start | Paste All | Savings | Ratio |");
|
|
28
|
+
push("|-------------|-----------|-----------|---------|-------|");
|
|
29
|
+
for (const r of tokenData.results) {
|
|
30
|
+
push(`| ${r.corpusSize} files | ${r.pclSessionStartTokens} tok | ${r.pasteAllTokens} tok | ${r.savingsPercent.toFixed(1)}% | ${r.ratio.toFixed(1)}x |`);
|
|
31
|
+
}
|
|
32
|
+
push("");
|
|
33
|
+
}
|
|
34
|
+
// --- Search Quality ---
|
|
35
|
+
const searchData = await loadJSON("search-quality.json");
|
|
36
|
+
if (searchData) {
|
|
37
|
+
push("## Layer 2: Search Quality");
|
|
38
|
+
push("");
|
|
39
|
+
push("| Mode | P@1 | P@3 | P@5 | R@5 | MRR | NDCG@5 |");
|
|
40
|
+
push("|------|-----|-----|-----|-----|-----|--------|");
|
|
41
|
+
for (const [mode, m] of Object.entries(searchData.aggregate)) {
|
|
42
|
+
push(`| ${mode} | ${m.precisionAt1.toFixed(3)} | ${m.precisionAt3.toFixed(3)} | ${m.precisionAt5.toFixed(3)} | ${m.recallAt5.toFixed(3)} | ${m.mrr.toFixed(3)} | ${m.ndcgAt5.toFixed(3)} |`);
|
|
43
|
+
}
|
|
44
|
+
push("");
|
|
45
|
+
}
|
|
46
|
+
// --- Ablation ---
|
|
47
|
+
const ablationData = await loadJSON("ablation.json");
|
|
48
|
+
if (ablationData) {
|
|
49
|
+
push("## Layer 5: Ablation Study");
|
|
50
|
+
push("");
|
|
51
|
+
push("| Configuration | P@1 | P@3 | P@5 | R@5 | MRR | NDCG@5 |");
|
|
52
|
+
push("|---------------|-----|-----|-----|-----|-----|--------|");
|
|
53
|
+
for (const r of ablationData.results) {
|
|
54
|
+
const m = r.metrics;
|
|
55
|
+
push(`| ${r.config} | ${m.precisionAt1.toFixed(3)} | ${m.precisionAt3.toFixed(3)} | ${m.precisionAt5.toFixed(3)} | ${m.recallAt5.toFixed(3)} | ${m.mrr.toFixed(3)} | ${m.ndcgAt5.toFixed(3)} |`);
|
|
56
|
+
}
|
|
57
|
+
push("");
|
|
58
|
+
}
|
|
59
|
+
// --- AI Quality ---
|
|
60
|
+
const aiData = await loadJSON("ai-quality.json");
|
|
61
|
+
if (aiData) {
|
|
62
|
+
push("## Layer 4: AI Coding Quality");
|
|
63
|
+
push("");
|
|
64
|
+
push(`Model: \`${aiData.model}\` | Judge: \`${aiData.judgeModel}\``);
|
|
65
|
+
push("");
|
|
66
|
+
push("| Task ID | Category | No Context | Paste All | PCL |");
|
|
67
|
+
push("|---------|----------|------------|-----------|-----|");
|
|
68
|
+
for (const r of aiData.results) {
|
|
69
|
+
push(`| ${r.taskId} | ${r.category} | ${r.noContext.toFixed(1)} | ${r.pasteAll.toFixed(1)} | ${r.pcl.toFixed(1)} |`);
|
|
70
|
+
}
|
|
71
|
+
push(`| **AVERAGE** | | **${aiData.averages.noContext.toFixed(1)}** | **${aiData.averages.pasteAll.toFixed(1)}** | **${aiData.averages.pcl.toFixed(1)}** |`);
|
|
72
|
+
push("");
|
|
73
|
+
}
|
|
74
|
+
const report = lines.join("\n");
|
|
75
|
+
await writeFile(join(RESULTS_DIR, "REPORT.md"), report);
|
|
76
|
+
console.log("Report generated: benchmarks/results/REPORT.md");
|
|
77
|
+
console.log(report);
|
|
78
|
+
}
|
|
79
|
+
main().catch(console.error);
|
|
80
|
+
//# sourceMappingURL=markdown-reporter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-reporter.js","sourceRoot":"","sources":["../../../benchmarks/reporters/markdown-reporter.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC;AAE/D,KAAK,UAAU,QAAQ,CAAI,QAAgB;IACzC,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,EAAE,MAAM,CAAC,CAAM,CAAC;IAC9E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE1C,IAAI,CAAC,4BAA4B,CAAC,CAAC;IACnC,IAAI,CAAC,EAAE,CAAC,CAAC;IACT,IAAI,CAAC,cAAc,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAC/C,IAAI,CAAC,EAAE,CAAC,CAAC;IAET,2BAA2B;IAC3B,MAAM,SAAS,GAAG,MAAM,QAAQ,CAQ7B,uBAAuB,CAAC,CAAC;IAE5B,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC,8BAA8B,CAAC,CAAC;QACrC,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,2DAA2D,CAAC,CAAC;QAClE,IAAI,CAAC,2DAA2D,CAAC,CAAC;QAClE,KAAK,MAAM,CAAC,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC;YAClC,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,YAAY,CAAC,CAAC,qBAAqB,UAAU,CAAC,CAAC,cAAc,UAAU,CAAC,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC1J,CAAC;QACD,IAAI,CAAC,EAAE,CAAC,CAAC;IACX,CAAC;IAED,yBAAyB;IACzB,MAAM,UAAU,GAAG,MAAM,QAAQ,CAS9B,qBAAqB,CAAC,CAAC;IAE1B,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACnC,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACxD,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACxD,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7D,IAAI,CAAC,KAAK,IAAI,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/L,CAAC;QACD,IAAI,CAAC,EAAE,CAAC,CAAC;IACX,CAAC;IAED,mBAAmB;IACnB,MAAM,YAAY,GAAG,MAAM,QAAQ,CAYhC,eAAe,CAAC,CAAC;IAEpB,IAAI,YAAY,EAAE,CAAC;QACjB,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACnC,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,0DAA0D,CAAC,CAAC;QACjE,IAAI,CAAC,0DAA0D,CAAC,CAAC;QACjE,KAAK,MAAM,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,CAAC;YACrC,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC;YACpB,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACnM,CAAC;QACD,IAAI,CAAC,EAAE,CAAC,CAAC;IACX,CAAC;IAED,qBAAqB;IACrB,MAAM,MAAM,GAAG,MAAM,QAAQ,CAW1B,iBAAiB,CAAC,CAAC;IAEtB,IAAI,MAAM,EAAE,CAAC;QACX,IAAI,CAAC,+BAA+B,CAAC,CAAC;QACtC,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,YAAY,MAAM,CAAC,KAAK,iBAAiB,MAAM,CAAC,UAAU,IAAI,CAAC,CAAC;QACrE,IAAI,CAAC,EAAE,CAAC,CAAC;QACT,IAAI,CAAC,uDAAuD,CAAC,CAAC;QAC9D,IAAI,CAAC,uDAAuD,CAAC,CAAC;QAC9D,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC/B,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,QAAQ,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACvH,CAAC;QACD,IAAI,CAAC,uBAAuB,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC7J,IAAI,CAAC,EAAE,CAAC,CAAC;IACX,CAAC;IAED,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,MAAM,SAAS,CAAC,IAAI,CAAC,WAAW,EAAE,WAAW,CAAC,EAAE,MAAM,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;IAC9D,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;AACtB,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-ablation.d.ts","sourceRoot":"","sources":["../../../benchmarks/runners/bench-ablation.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { setup } from "../lib/harness.js";
|
|
4
|
+
import { search } from "../../src/search.js";
|
|
5
|
+
import { computeMetrics, averageMetrics } from "../evaluators/ir-metrics.js";
|
|
6
|
+
async function main() {
|
|
7
|
+
console.log("=== Layer 5: Ablation Study ===\n");
|
|
8
|
+
const harness = await setup("corpus-small");
|
|
9
|
+
try {
|
|
10
|
+
const gtPath = join(import.meta.dirname, "..", "fixtures", "ground-truth", "relevance-judgments.json");
|
|
11
|
+
const gt = JSON.parse(await readFile(gtPath, "utf8"));
|
|
12
|
+
const configs = [
|
|
13
|
+
{ name: "Full PCL (hybrid)", mode: "hybrid" },
|
|
14
|
+
{ name: "Keyword only (no embeddings)", mode: "keyword" },
|
|
15
|
+
{ name: "Semantic only (no BM25)", mode: "semantic" },
|
|
16
|
+
];
|
|
17
|
+
console.log("Ablation Results:");
|
|
18
|
+
console.log("\u2500".repeat(85));
|
|
19
|
+
console.log("| Configuration | P@1 | P@3 | P@5 | R@5 | MRR | NDCG@5 |");
|
|
20
|
+
console.log("|------------------------------|-------|-------|-------|-------|-------|--------|");
|
|
21
|
+
const jsonResults = [];
|
|
22
|
+
for (const config of configs) {
|
|
23
|
+
const metrics = [];
|
|
24
|
+
for (const q of gt.queries) {
|
|
25
|
+
const results = await search(harness.db, q.query, {
|
|
26
|
+
mode: config.mode,
|
|
27
|
+
topK: 10,
|
|
28
|
+
});
|
|
29
|
+
const retrieved = results.map((r) => r.id);
|
|
30
|
+
const relevanceScores = new Map(q.judgments.map((j) => [j.docId, j.relevance]));
|
|
31
|
+
metrics.push(computeMetrics(retrieved, relevanceScores));
|
|
32
|
+
}
|
|
33
|
+
const avg = averageMetrics(metrics);
|
|
34
|
+
jsonResults.push({ config: config.name, metrics: avg });
|
|
35
|
+
console.log(`| ${config.name.padEnd(28)} | ${avg.precisionAt1.toFixed(3)} | ${avg.precisionAt3.toFixed(3)} | ${avg.precisionAt5.toFixed(3)} | ${avg.recallAt5.toFixed(3)} | ${avg.mrr.toFixed(3)} | ${avg.ndcgAt5.toFixed(3)} |`);
|
|
36
|
+
}
|
|
37
|
+
// Save results
|
|
38
|
+
await mkdir(join(import.meta.dirname, "..", "results"), {
|
|
39
|
+
recursive: true,
|
|
40
|
+
});
|
|
41
|
+
await writeFile(join(import.meta.dirname, "..", "results", "ablation.json"), JSON.stringify({ timestamp: new Date().toISOString(), results: jsonResults }, null, 2));
|
|
42
|
+
console.log("\nResults saved to benchmarks/results/ablation.json");
|
|
43
|
+
}
|
|
44
|
+
finally {
|
|
45
|
+
await harness.cleanup();
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
main().catch(console.error);
|
|
49
|
+
//# sourceMappingURL=bench-ablation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-ablation.js","sourceRoot":"","sources":["../../../benchmarks/runners/bench-ablation.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,MAAM,EAAmB,MAAM,qBAAqB,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAQ7E,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;IAEjD,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CACjB,MAAM,CAAC,IAAI,CAAC,OAAO,EACnB,IAAI,EACJ,UAAU,EACV,cAAc,EACd,0BAA0B,CAC3B,CAAC;QACF,MAAM,EAAE,GAAyB,IAAI,CAAC,KAAK,CACzC,MAAM,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC,CAC/B,CAAC;QAEF,MAAM,OAAO,GAAqB;YAChC,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC7C,EAAE,IAAI,EAAE,8BAA8B,EAAE,IAAI,EAAE,SAAS,EAAE;YACzD,EAAE,IAAI,EAAE,yBAAyB,EAAE,IAAI,EAAE,UAAU,EAAE;SACtD,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;QACjC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QACjC,OAAO,CAAC,GAAG,CACT,mFAAmF,CACpF,CAAC;QACF,OAAO,CAAC,GAAG,CACT,mFAAmF,CACpF,CAAC;QAEF,MAAM,WAAW,GAGZ,EAAE,CAAC;QAER,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAwC,EAAE,CAAC;YAExD,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;gBAC3B,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE;oBAChD,IAAI,EAAE,MAAM,CAAC,IAAI;oBACjB,IAAI,EAAE,EAAE;iBACT,CAAC,CAAC;gBACH,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBAC3C,MAAM,eAAe,GAAG,IAAI,GAAG,CAC7B,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAC/C,CAAC;gBACF,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC,CAAC;YAC3D,CAAC;YAED,MAAM,GAAG,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;YACpC,WAAW,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,CAAC;YAExD,OAAO,CAAC,GAAG,CACT,KAAK,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CACtN,CAAC;QACJ,CAAC;QAED,eAAe;QACf,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,CAAC,EAAE;YACtD,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,MAAM,SAAS,CACb,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,eAAe,CAAC,EAC3D,IAAI,CAAC,SAAS,CACZ,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,OAAO,EAAE,WAAW,EAAE,EAC7D,IAAI,EACJ,CAAC,CACF,CACF,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;IACrE,CAAC;YAAS,CAAC;QACT,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-ai-quality.d.ts","sourceRoot":"","sources":["../../../benchmarks/runners/bench-ai-quality.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Layer 4: AI Coding Quality Benchmark
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* 1. Structured yes/no judge
|
|
6
|
+
* 2. Context retrieval quality metrics (recall, precision, F1)
|
|
7
|
+
* 3. Multiple runs per task for statistical stability (mean + stddev)
|
|
8
|
+
* 4. TypeScript syntax checking on generated code
|
|
9
|
+
* 5. Separate context quality from code quality metrics
|
|
10
|
+
*
|
|
11
|
+
* Cost: ~$5-15 per full run (3x repetitions).
|
|
12
|
+
* Requires: ANTHROPIC_API_KEY + BENCH_AI_QUALITY=1
|
|
13
|
+
*/
|
|
14
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
// Load .env file
|
|
17
|
+
const envPath = join(import.meta.dirname, "..", "..", ".env");
|
|
18
|
+
try {
|
|
19
|
+
const envContent = await readFile(envPath, "utf8");
|
|
20
|
+
for (const line of envContent.split("\n")) {
|
|
21
|
+
const trimmed = line.trim();
|
|
22
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
23
|
+
continue;
|
|
24
|
+
const eqIdx = trimmed.indexOf("=");
|
|
25
|
+
if (eqIdx > 0) {
|
|
26
|
+
const key = trimmed.slice(0, eqIdx).trim();
|
|
27
|
+
const val = trimmed.slice(eqIdx + 1).trim();
|
|
28
|
+
if (!process.env[key])
|
|
29
|
+
process.env[key] = val;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
catch { /* no .env file, rely on env vars */ }
|
|
34
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
35
|
+
import { setup } from "../lib/harness.js";
|
|
36
|
+
import { search } from "../../src/search.js";
|
|
37
|
+
import { renderFile } from "../../src/tools.js";
|
|
38
|
+
import { getProductFile, getCritical, listByType, getFileById, } from "../../src/db.js";
|
|
39
|
+
import { countTokens, disposeEncoder } from "../evaluators/token-counter.js";
|
|
40
|
+
import { structuredJudge, checkTypeScriptSyntax, } from "../evaluators/structured-judge.js";
|
|
41
|
+
import { measureContextRetrieval, averageContextMetrics, } from "../evaluators/context-retrieval-quality.js";
|
|
42
|
+
if (!process.env.ANTHROPIC_API_KEY) {
|
|
43
|
+
console.error("Error: ANTHROPIC_API_KEY environment variable is required.");
|
|
44
|
+
console.error("Usage: ANTHROPIC_API_KEY=sk-... BENCH_AI_QUALITY=1 npm run bench:ai");
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
if (!process.env.BENCH_AI_QUALITY) {
|
|
48
|
+
console.error("Error: Set BENCH_AI_QUALITY=1 to confirm running this benchmark (costs ~$5-15).");
|
|
49
|
+
process.exit(1);
|
|
50
|
+
}
|
|
51
|
+
const RUNS = parseInt(process.env.BENCH_RUNS ?? "1", 10); // default 1, set BENCH_RUNS=3 for stability
|
|
52
|
+
const client = new Anthropic();
|
|
53
|
+
const MODEL = "claude-sonnet-4-20250514";
|
|
54
|
+
async function callLLM(system, userMessage) {
|
|
55
|
+
const maxRetries = 6;
|
|
56
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
57
|
+
try {
|
|
58
|
+
const response = await client.messages.create({
|
|
59
|
+
model: MODEL,
|
|
60
|
+
max_tokens: 4096,
|
|
61
|
+
temperature: 0,
|
|
62
|
+
system,
|
|
63
|
+
messages: [{ role: "user", content: userMessage }],
|
|
64
|
+
});
|
|
65
|
+
const block = response.content[0];
|
|
66
|
+
return block?.type === "text" ? block.text : "";
|
|
67
|
+
}
|
|
68
|
+
catch (err) {
|
|
69
|
+
const status = err.status;
|
|
70
|
+
if (status === 529 || status === 429 || status === 500) {
|
|
71
|
+
const delay = (attempt + 1) * 15_000; // 15s, 30s, 45s, 60s, 75s, 90s
|
|
72
|
+
console.log(` [retry ${attempt + 1}/${maxRetries}] ${status} — waiting ${delay / 1000}s...`);
|
|
73
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
throw err;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return ""; // all retries exhausted
|
|
80
|
+
}
|
|
81
|
+
// Throttle between tasks to avoid hammering the API
|
|
82
|
+
async function throttle() {
|
|
83
|
+
await new Promise((r) => setTimeout(r, 3000));
|
|
84
|
+
}
|
|
85
|
+
function mean(arr) {
|
|
86
|
+
return arr.length > 0 ? arr.reduce((a, b) => a + b, 0) / arr.length : 0;
|
|
87
|
+
}
|
|
88
|
+
function stddev(arr) {
|
|
89
|
+
if (arr.length < 2)
|
|
90
|
+
return 0;
|
|
91
|
+
const m = mean(arr);
|
|
92
|
+
return Math.sqrt(arr.reduce((s, x) => s + (x - m) ** 2, 0) / (arr.length - 1));
|
|
93
|
+
}
|
|
94
|
+
async function main() {
|
|
95
|
+
console.log("=== Layer 4: AI Coding Quality ===\n");
|
|
96
|
+
console.log(`Model: ${MODEL} | Runs per task: ${RUNS}\n`);
|
|
97
|
+
const harness = await setup("corpus-small");
|
|
98
|
+
try {
|
|
99
|
+
const tasksPath = join(import.meta.dirname, "..", "fixtures", "ground-truth", "tasks.json");
|
|
100
|
+
const gt = JSON.parse(await readFile(tasksPath, "utf8"));
|
|
101
|
+
// Build paste-all context
|
|
102
|
+
const allTypes = [
|
|
103
|
+
"product", "persona", "journey", "spec", "decision", "domain",
|
|
104
|
+
];
|
|
105
|
+
let pasteAllContext = "";
|
|
106
|
+
for (const type of allTypes) {
|
|
107
|
+
for (const f of listByType(harness.db, type)) {
|
|
108
|
+
pasteAllContext += renderFile(f) + "\n\n---\n\n";
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
const pasteAllTokens = countTokens(pasteAllContext);
|
|
112
|
+
console.log(`Paste-all context: ${pasteAllTokens} tokens\n`);
|
|
113
|
+
const systemPrompt = "You are an expert software engineer. Write production-quality TypeScript/React code. Follow the product specs and constraints. Output code in fenced code blocks.";
|
|
114
|
+
const results = [];
|
|
115
|
+
const allContextMetrics = [];
|
|
116
|
+
for (let i = 0; i < gt.tasks.length; i++) {
|
|
117
|
+
const task = gt.tasks[i];
|
|
118
|
+
console.log(`[${i + 1}/${gt.tasks.length}] ${task.id} (${task.category})`);
|
|
119
|
+
// --- Step 1: Measure context retrieval quality ---
|
|
120
|
+
const searchResults = await search(harness.db, task.description, {
|
|
121
|
+
mode: "hybrid",
|
|
122
|
+
topK: 5,
|
|
123
|
+
});
|
|
124
|
+
const retrievedIds = searchResults.map((r) => r.id);
|
|
125
|
+
// Include product file only if it appears in search results
|
|
126
|
+
const product = getProductFile(harness.db);
|
|
127
|
+
const criticalFiles = getCritical(harness.db);
|
|
128
|
+
// Only include critical files that are relevant (appear in search results
|
|
129
|
+
// or match the task's required context) to avoid adding noise
|
|
130
|
+
const searchIdSet = new Set(retrievedIds);
|
|
131
|
+
const relevantCritical = criticalFiles.filter((f) => searchIdSet.has(f.id) || task.requiredContext.includes(f.id));
|
|
132
|
+
const includeProduct = product && (searchIdSet.has(product.id) || task.requiredContext.includes(product.id));
|
|
133
|
+
const allRetrievedIds = [
|
|
134
|
+
...new Set([
|
|
135
|
+
...(includeProduct ? [product.id] : []),
|
|
136
|
+
...relevantCritical.map((f) => f.id),
|
|
137
|
+
...retrievedIds,
|
|
138
|
+
]),
|
|
139
|
+
];
|
|
140
|
+
const contextMetrics = measureContextRetrieval(allRetrievedIds, task.requiredContext);
|
|
141
|
+
allContextMetrics.push(contextMetrics);
|
|
142
|
+
// Build PCL context — de-duplicate so critical files appearing in
|
|
143
|
+
// search results are not rendered twice
|
|
144
|
+
const renderedIds = new Set();
|
|
145
|
+
let pclContext = "";
|
|
146
|
+
if (includeProduct) {
|
|
147
|
+
pclContext += renderFile(product) + "\n\n---\n\n";
|
|
148
|
+
renderedIds.add(product.id);
|
|
149
|
+
}
|
|
150
|
+
for (const c of relevantCritical) {
|
|
151
|
+
if (renderedIds.has(c.id))
|
|
152
|
+
continue;
|
|
153
|
+
pclContext += renderFile(c) + "\n\n---\n\n";
|
|
154
|
+
renderedIds.add(c.id);
|
|
155
|
+
}
|
|
156
|
+
for (const sr of searchResults) {
|
|
157
|
+
if (renderedIds.has(sr.id))
|
|
158
|
+
continue;
|
|
159
|
+
const file = getFileById(harness.db, sr.type, sr.id);
|
|
160
|
+
if (file)
|
|
161
|
+
pclContext += renderFile(file) + "\n\n---\n\n";
|
|
162
|
+
renderedIds.add(sr.id);
|
|
163
|
+
}
|
|
164
|
+
const pclTokens = countTokens(pclContext);
|
|
165
|
+
// Build context docs for judge (the actual required docs)
|
|
166
|
+
let judgeDocs = "";
|
|
167
|
+
for (const docId of task.requiredContext) {
|
|
168
|
+
for (const type of allTypes) {
|
|
169
|
+
const file = getFileById(harness.db, type, docId);
|
|
170
|
+
if (file) {
|
|
171
|
+
judgeDocs += renderFile(file) + "\n\n---\n\n";
|
|
172
|
+
break;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
// --- Step 2: Generate and evaluate (3 runs) ---
|
|
177
|
+
const scoresA = [];
|
|
178
|
+
const scoresB = [];
|
|
179
|
+
const scoresC = [];
|
|
180
|
+
const syntaxA = { valid: 0, total: 0 };
|
|
181
|
+
const syntaxB = { valid: 0, total: 0 };
|
|
182
|
+
const syntaxC = { valid: 0, total: 0 };
|
|
183
|
+
for (let run = 0; run < RUNS; run++) {
|
|
184
|
+
// Generate outputs (sequential with throttle to avoid overloading)
|
|
185
|
+
const outputA = await callLLM(systemPrompt, task.description);
|
|
186
|
+
await throttle();
|
|
187
|
+
const outputB = await callLLM(systemPrompt + "\n\n## Product Context\n\n" + pasteAllContext, task.description);
|
|
188
|
+
await throttle();
|
|
189
|
+
const outputC = await callLLM(systemPrompt + "\n\n## Product Context (PCL)\n\n" + pclContext, task.description);
|
|
190
|
+
await throttle();
|
|
191
|
+
// Structured judge evaluation (sequential to be gentle on API)
|
|
192
|
+
const judgeA = await structuredJudge(client, task, outputA, "");
|
|
193
|
+
await throttle();
|
|
194
|
+
const judgeB = await structuredJudge(client, task, outputB, judgeDocs);
|
|
195
|
+
await throttle();
|
|
196
|
+
const judgeC = await structuredJudge(client, task, outputC, judgeDocs);
|
|
197
|
+
await throttle();
|
|
198
|
+
scoresA.push(judgeA.score);
|
|
199
|
+
scoresB.push(judgeB.score);
|
|
200
|
+
scoresC.push(judgeC.score);
|
|
201
|
+
// Syntax check
|
|
202
|
+
const synA = checkTypeScriptSyntax(outputA);
|
|
203
|
+
const synB = checkTypeScriptSyntax(outputB);
|
|
204
|
+
const synC = checkTypeScriptSyntax(outputC);
|
|
205
|
+
if (synA.codeBlockCount > 0) {
|
|
206
|
+
syntaxA.total++;
|
|
207
|
+
if (synA.valid)
|
|
208
|
+
syntaxA.valid++;
|
|
209
|
+
}
|
|
210
|
+
if (synB.codeBlockCount > 0) {
|
|
211
|
+
syntaxB.total++;
|
|
212
|
+
if (synB.valid)
|
|
213
|
+
syntaxB.valid++;
|
|
214
|
+
}
|
|
215
|
+
if (synC.codeBlockCount > 0) {
|
|
216
|
+
syntaxC.total++;
|
|
217
|
+
if (synC.valid)
|
|
218
|
+
syntaxC.valid++;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
const result = {
|
|
222
|
+
taskId: task.id,
|
|
223
|
+
category: task.category,
|
|
224
|
+
noContext: { mean: mean(scoresA), stddev: stddev(scoresA), runs: scoresA },
|
|
225
|
+
pasteAll: { mean: mean(scoresB), stddev: stddev(scoresB), runs: scoresB },
|
|
226
|
+
pcl: { mean: mean(scoresC), stddev: stddev(scoresC), runs: scoresC },
|
|
227
|
+
contextMetrics,
|
|
228
|
+
syntax: { noContext: syntaxA, pasteAll: syntaxB, pcl: syntaxC },
|
|
229
|
+
pclTokens,
|
|
230
|
+
pasteAllTokens,
|
|
231
|
+
};
|
|
232
|
+
results.push(result);
|
|
233
|
+
const flag = result.pcl.stddev > 1.5 ? " [UNSTABLE]" : "";
|
|
234
|
+
console.log(` Scores: No=${result.noContext.mean.toFixed(1)}±${result.noContext.stddev.toFixed(1)} | All=${result.pasteAll.mean.toFixed(1)}±${result.pasteAll.stddev.toFixed(1)} | PCL=${result.pcl.mean.toFixed(1)}±${result.pcl.stddev.toFixed(1)}${flag}`);
|
|
235
|
+
console.log(` Context: recall=${contextMetrics.recall.toFixed(2)} precision=${contextMetrics.precision.toFixed(2)} F1=${contextMetrics.f1.toFixed(2)} | misses: [${contextMetrics.misses.join(", ")}]`);
|
|
236
|
+
}
|
|
237
|
+
// --- Summary ---
|
|
238
|
+
console.log("\n=== Results Summary ===\n");
|
|
239
|
+
// Score comparison table
|
|
240
|
+
console.log("| Task | Category | No Context | Paste All | PCL |");
|
|
241
|
+
console.log("|----------|--------------------|----------------|----------------|----------------|");
|
|
242
|
+
for (const r of results) {
|
|
243
|
+
console.log(`| ${r.taskId.padEnd(8)} | ${r.category.padEnd(18)} | ${r.noContext.mean.toFixed(1)}±${r.noContext.stddev.toFixed(1).padEnd(4)} | ${r.pasteAll.mean.toFixed(1)}±${r.pasteAll.stddev.toFixed(1).padEnd(4)} | ${r.pcl.mean.toFixed(1)}±${r.pcl.stddev.toFixed(1).padEnd(4)} |`);
|
|
244
|
+
}
|
|
245
|
+
const avgA = mean(results.map((r) => r.noContext.mean));
|
|
246
|
+
const avgB = mean(results.map((r) => r.pasteAll.mean));
|
|
247
|
+
const avgC = mean(results.map((r) => r.pcl.mean));
|
|
248
|
+
console.log(`| AVERAGE | | ${avgA.toFixed(1).padEnd(14)} | ${avgB.toFixed(1).padEnd(14)} | ${avgC.toFixed(1).padEnd(14)} |`);
|
|
249
|
+
// Context retrieval summary
|
|
250
|
+
const avgCtx = averageContextMetrics(allContextMetrics);
|
|
251
|
+
console.log(`\nContext Retrieval Quality:`);
|
|
252
|
+
console.log(` Recall: ${avgCtx.recall.toFixed(3)} | Precision: ${avgCtx.precision.toFixed(3)} | F1: ${avgCtx.f1.toFixed(3)}`);
|
|
253
|
+
// Syntax validity
|
|
254
|
+
const synTotalA = results.reduce((s, r) => s + r.syntax.noContext.total, 0);
|
|
255
|
+
const synValidA = results.reduce((s, r) => s + r.syntax.noContext.valid, 0);
|
|
256
|
+
const synTotalB = results.reduce((s, r) => s + r.syntax.pasteAll.total, 0);
|
|
257
|
+
const synValidB = results.reduce((s, r) => s + r.syntax.pasteAll.valid, 0);
|
|
258
|
+
const synTotalC = results.reduce((s, r) => s + r.syntax.pcl.total, 0);
|
|
259
|
+
const synValidC = results.reduce((s, r) => s + r.syntax.pcl.valid, 0);
|
|
260
|
+
console.log(`\nSyntax Validity:`);
|
|
261
|
+
console.log(` No Context: ${synTotalA > 0 ? ((synValidA / synTotalA) * 100).toFixed(0) : "N/A"}% | Paste All: ${synTotalB > 0 ? ((synValidB / synTotalB) * 100).toFixed(0) : "N/A"}% | PCL: ${synTotalC > 0 ? ((synValidC / synTotalC) * 100).toFixed(0) : "N/A"}%`);
|
|
262
|
+
// Token efficiency
|
|
263
|
+
const avgPclTokens = mean(results.map((r) => r.pclTokens));
|
|
264
|
+
console.log(`\nToken Efficiency: PCL avg ${Math.round(avgPclTokens)} tok vs paste-all ${pasteAllTokens} tok (${((1 - avgPclTokens / pasteAllTokens) * 100).toFixed(1)}% savings)`);
|
|
265
|
+
// Statistical significance hint
|
|
266
|
+
const delta = avgC - avgA;
|
|
267
|
+
console.log(`\nPCL vs No-Context delta: +${delta.toFixed(1)} points`);
|
|
268
|
+
if (delta > 1.5) {
|
|
269
|
+
console.log(" → Meaningful improvement detected");
|
|
270
|
+
}
|
|
271
|
+
else {
|
|
272
|
+
console.log(" → Delta below 1.5 threshold — more tasks may be needed");
|
|
273
|
+
}
|
|
274
|
+
// Save results
|
|
275
|
+
await mkdir(join(import.meta.dirname, "..", "results"), { recursive: true });
|
|
276
|
+
await writeFile(join(import.meta.dirname, "..", "results", "ai-quality.json"), JSON.stringify({
|
|
277
|
+
timestamp: new Date().toISOString(),
|
|
278
|
+
model: MODEL,
|
|
279
|
+
runs: RUNS,
|
|
280
|
+
results,
|
|
281
|
+
averages: { noContext: avgA, pasteAll: avgB, pcl: avgC },
|
|
282
|
+
contextRetrieval: avgCtx,
|
|
283
|
+
syntaxValidity: {
|
|
284
|
+
noContext: synTotalA > 0 ? synValidA / synTotalA : null,
|
|
285
|
+
pasteAll: synTotalB > 0 ? synValidB / synTotalB : null,
|
|
286
|
+
pcl: synTotalC > 0 ? synValidC / synTotalC : null,
|
|
287
|
+
},
|
|
288
|
+
}, null, 2));
|
|
289
|
+
console.log("\nResults saved to benchmarks/results/ai-quality.json");
|
|
290
|
+
}
|
|
291
|
+
finally {
|
|
292
|
+
disposeEncoder();
|
|
293
|
+
await harness.cleanup();
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
main().catch(console.error);
|
|
297
|
+
//# sourceMappingURL=bench-ai-quality.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-ai-quality.js","sourceRoot":"","sources":["../../../benchmarks/runners/bench-ai-quality.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,iBAAiB;AACjB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;AAC9D,IAAI,CAAC;IACH,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IACnD,KAAK,MAAM,IAAI,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QAClD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;YAC3C,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;QAChD,CAAC;IACH,CAAC;AACH,CAAC;AAAC,MAAM,CAAC,CAAC,oCAAoC,CAAC,CAAC;AAEhD,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EACL,cAAc,EACd,WAAW,EACX,UAAU,EACV,WAAW,GACZ,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAC7E,OAAO,EACL,eAAe,EACf,qBAAqB,GACtB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EACL,uBAAuB,EACvB,qBAAqB,GAEtB,MAAM,4CAA4C,CAAC;AAIpD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;IACnC,OAAO,CAAC,KAAK,CAAC,4DAA4D,CAAC,CAAC;IAC5E,OAAO,CAAC,KAAK,CACX,qEAAqE,CACtE,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;IAClC,OAAO,CAAC,KAAK,CACX,iFAAiF,CAClF,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,4CAA4C;AACtG,MAAM,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;AAC/B,MAAM,KAAK,GAAG,0BAA0B,CAAC;AAEzC,KAAK,UAAU,OAAO,CAAC,MAAc,EAAE,WAAmB;IACxD,MAAM,UAAU,GAAG,CAAC,CAAC;IACrB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,UAAU,EAAE,OAAO,EAAE,EAAE,CAAC;QACtD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC5C,KAAK,EAAE,KAAK;gBACZ,UAAU,EAAE,IAAI;gBAChB,WAAW,EAAE,CAAC;gBACd,MAAM;gBACN,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;aACnD,CAAC,CAAC;YACH,MAAM,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAClC,OAAO,KAAK,EAAE,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QAClD,CAAC;QAAC,OAAO,GAAY,EAAE,CAAC;YACtB,MAAM,MAAM,GAAI,GAA2B,CAAC,MAAM,CAAC;YACnD,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;gBACvD,MAAM,KAAK,GAAG,CAAC,OAAO,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,+BAA+B;gBACrE,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,GAAG,CAAC,IAAI,UAAU,KAAK,MAAM,cAAc,KAAK,GAAG,IAAI,MAAM,CAAC,CAAC;gBAChG,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;gBAC/C,SAAS;YACX,CAAC;YACD,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IACD,OAAO,EAAE,CAAC,CAAC,wBAAwB;AACrC,CAAC;AAED,oDAAoD;AACpD,KAAK,UAAU,QAAQ;IACrB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,IAAI,CAAC,GAAa;IACzB,OAAO,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AAC1E,CAAC;AAED,SAAS,MAAM,CAAC,GAAa;IAC3B,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAC7B,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IACpB,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;AACjF,CAAC;AAsBD,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,UAAU,KAAK,qBAAqB,IAAI,IAAI,CAAC,CAAC;IAE1D,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CACpB,MAAM,CAAC,IAAI,CAAC,OAAO,EACnB,IAAI,EACJ,UAAU,EACV,cAAc,EACd,YAAY,CACb,CAAC;QACF,MAAM,EAAE,GAAoB,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;QAE1E,0BAA0B;QAC1B,MAAM,QAAQ,GAAe;YAC3B,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,QAAQ;SAC9D,CAAC;QACF,IAAI,eAAe,GAAG,EAAE,CAAC;QACzB,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,OAAO,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC;gBAC7C,eAAe,IAAI,UAAU,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC;YACnD,CAAC;QACH,CAAC;QACD,MAAM,cAAc,GAAG,WAAW,CAAC,eAAe,CAAC,CAAC;QACpD,OAAO,CAAC,GAAG,CAAC,sBAAsB,cAAc,WAAW,CAAC,CAAC;QAE7D,MAAM,YAAY,GAChB,mKAAmK,CAAC;QAEtK,MAAM,OAAO,GAAiB,EAAE,CAAC;QACjC,MAAM,iBAAiB,GAAqB,EAAE,CAAC;QAE/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC;YAC1B,OAAO,CAAC,GAAG,CACT,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,MAAM,KAAK,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,QAAQ,GAAG,CAC9D,CAAC;YAEF,oDAAoD;YACpD,MAAM,aAAa,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,IAAI,CAAC,WAAW,EAAE;gBAC/D,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,CAAC;aACR,CAAC,CAAC;YACH,MAAM,YAAY,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAEpD,4DAA4D;YAC5D,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YAC3C,MAAM,aAAa,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YAE9C,0EAA0E;YAC1E,8DAA8D;YAC9D,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;YAC1C,MAAM,gBAAgB,GAAG,aAAa,CAAC,MAAM,CAC3C,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CACpE,CAAC;YACF,MAAM,cAAc,GAAG,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC;YAE7G,MAAM,eAAe,GAAG;gBACtB,GAAG,IAAI,GAAG,CAAC;oBACT,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,OAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBACxC,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACpC,GAAG,YAAY;iBAChB,CAAC;aACH,CAAC;YAEF,MAAM,cAAc,GAAG,uBAAuB,CAC5C,eAAe,EACf,IAAI,CAAC,eAAe,CACrB,CAAC;YACF,iBAAiB,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAEvC,kEAAkE;YAClE,wCAAwC;YACxC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;YACtC,IAAI,UAAU,GAAG,EAAE,CAAC;YACpB,IAAI,cAAc,EAAE,CAAC;gBACnB,UAAU,IAAI,UAAU,CAAC,OAAQ,CAAC,GAAG,aAAa,CAAC;gBACnD,WAAW,CAAC,GAAG,CAAC,OAAQ,CAAC,EAAE,CAAC,CAAC;YAC/B,CAAC;YACD,KAAK,MAAM,CAAC,IAAI,gBAAgB,EAAE,CAAC;gBACjC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;oBAAE,SAAS;gBACpC,UAAU,IAAI,UAAU,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC;gBAC5C,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACxB,CAAC;YACD,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;gBAC/B,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;oBAAE,SAAS;gBACrC,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;gBACrD,IAAI,IAAI;oBAAE,UAAU,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC;gBACzD,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;YACzB,CAAC;YACD,MAAM,SAAS,GAAG,WAAW,CAAC,UAAU,CAAC,CAAC;YAE1C,0DAA0D;YAC1D,IAAI,SAAS,GAAG,EAAE,CAAC;YACnB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;gBACzC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;oBAC5B,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;oBAClD,IAAI,IAAI,EAAE,CAAC;wBACT,SAAS,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC;wBAC9C,MAAM;oBACR,CAAC;gBACH,CAAC;YACH,CAAC;YAED,iDAAiD;YACjD,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,MAAM,OAAO,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;YACvC,MAAM,OAAO,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;YACvC,MAAM,OAAO,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;YAEvC,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC;gBACpC,mEAAmE;gBACnE,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;gBAC9D,MAAM,QAAQ,EAAE,CAAC;gBACjB,MAAM,OAAO,GAAG,MAAM,OAAO,CAC3B,YAAY,GAAG,4BAA4B,GAAG,eAAe,EAC7D,IAAI,CAAC,WAAW,CACjB,CAAC;gBACF,MAAM,QAAQ,EAAE,CAAC;gBACjB,MAAM,OAAO,GAAG,MAAM,OAAO,CAC3B,YAAY,GAAG,kCAAkC,GAAG,UAAU,EAC9D,IAAI,CAAC,WAAW,CACjB,CAAC;gBACF,MAAM,QAAQ,EAAE,CAAC;gBAEjB,+DAA+D;gBAC/D,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC,CAAC;gBAChE,MAAM,QAAQ,EAAE,CAAC;gBACjB,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;gBACvE,MAAM,QAAQ,EAAE,CAAC;gBACjB,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;gBACvE,MAAM,QAAQ,EAAE,CAAC;gBAEjB,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC3B,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC3B,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE3B,eAAe;gBACf,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;gBAC5C,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;gBAC5C,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;gBAE5C,IAAI,IAAI,CAAC,cAAc,GAAG,CAAC,EAAE,CAAC;oBAC5B,OAAO,CAAC,KAAK,EAAE,CAAC;oBAChB,IAAI,IAAI,CAAC,KAAK;wBAAE,OAAO,CAAC,KAAK,EAAE,CAAC;gBAClC,CAAC;gBACD,IAAI,IAAI,CAAC,cAAc,GAAG,CAAC,EAAE,CAAC;oBAC5B,OAAO,CAAC,KAAK,EAAE,CAAC;oBAChB,IAAI,IAAI,CAAC,KAAK;wBAAE,OAAO,CAAC,KAAK,EAAE,CAAC;gBAClC,CAAC;gBACD,IAAI,IAAI,CAAC,cAAc,GAAG,CAAC,EAAE,CAAC;oBAC5B,OAAO,CAAC,KAAK,EAAE,CAAC;oBAChB,IAAI,IAAI,CAAC,KAAK;wBAAE,OAAO,CAAC,KAAK,EAAE,CAAC;gBAClC,CAAC;YACH,CAAC;YAED,MAAM,MAAM,GAAe;gBACzB,MAAM,EAAE,IAAI,CAAC,EAAE;gBACf,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,SAAS,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE;gBAC1E,QAAQ,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE;gBACzE,GAAG,EAAE,EAAE,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE;gBACpE,cAAc;gBACd,MAAM,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE;gBAC/D,SAAS;gBACT,cAAc;aACf,CAAC;YAEF,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAErB,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;YAC1D,OAAO,CAAC,GAAG,CACT,gBAAgB,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,EAAE,CAClP,CAAC;YACF,OAAO,CAAC,GAAG,CACT,qBAAqB,cAAc,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,cAAc,cAAc,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,cAAc,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,cAAc,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAC5L,CAAC;QACJ,CAAC;QAED,kBAAkB;QAClB,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAE3C,yBAAyB;QACzB,OAAO,CAAC,GAAG,CACT,sFAAsF,CACvF,CAAC;QACF,OAAO,CAAC,GAAG,CACT,sFAAsF,CACvF,CAAC;QACF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAC7Q,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;QACxD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QACvD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;QAElD,OAAO,CAAC,GAAG,CACT,qCAAqC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CACpI,CAAC;QAEF,4BAA4B;QAC5B,MAAM,MAAM,GAAG,qBAAqB,CAAC,iBAAiB,CAAC,CAAC;QACxD,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;QAC5C,OAAO,CAAC,GAAG,CACT,aAAa,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,MAAM,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAClH,CAAC;QAEF,kBAAkB;QAClB,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC5E,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC5E,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACtE,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAEtE,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,CACT,iBAAiB,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,kBAAkB,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,YAAY,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CACzP,CAAC;QAEF,mBAAmB;QACnB,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;QAC3D,OAAO,CAAC,GAAG,CACT,+BAA+B,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,qBAAqB,cAAc,SAAS,CAAC,CAAC,CAAC,GAAG,YAAY,GAAG,cAAc,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CACtK,CAAC;QAEF,gCAAgC;QAChC,MAAM,KAAK,GAAG,IAAI,GAAG,IAAI,CAAC;QAC1B,OAAO,CAAC,GAAG,CACT,+BAA+B,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CACzD,CAAC;QACF,IAAI,KAAK,GAAG,GAAG,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,0DAA0D,CAAC,CAAC;QAC1E,CAAC;QAED,eAAe;QACf,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7E,MAAM,SAAS,CACb,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,iBAAiB,CAAC,EAC7D,IAAI,CAAC,SAAS,CACZ;YACE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,KAAK,EAAE,KAAK;YACZ,IAAI,EAAE,IAAI;YACV,OAAO;YACP,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE;YACxD,gBAAgB,EAAE,MAAM;YACxB,cAAc,EAAE;gBACd,SAAS,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI;gBACvD,QAAQ,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI;gBACtD,GAAG,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI;aAClD;SACF,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,uDAAuD,CAAC,CAAC;IACvE,CAAC;YAAS,CAAC;QACT,cAAc,EAAE,CAAC;QACjB,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-interactive-eval.d.ts","sourceRoot":"","sources":["../../../benchmarks/runners/bench-interactive-eval.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Interactive evaluation — measures context retrieval quality for all tasks
|
|
3
|
+
* and outputs task prompts for manual evaluation. No API key needed.
|
|
4
|
+
*/
|
|
5
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
import { setup } from "../lib/harness.js";
|
|
8
|
+
import { search } from "../../src/search.js";
|
|
9
|
+
import { renderFile } from "../../src/tools.js";
|
|
10
|
+
import { getProductFile, getCritical, listByType, getFileById, } from "../../src/db.js";
|
|
11
|
+
import { countTokens, disposeEncoder } from "../evaluators/token-counter.js";
|
|
12
|
+
import { measureContextRetrieval, averageContextMetrics, } from "../evaluators/context-retrieval-quality.js";
|
|
13
|
+
async function main() {
|
|
14
|
+
console.log("=== Interactive AI Quality Evaluation ===\n");
|
|
15
|
+
const harness = await setup("corpus-small");
|
|
16
|
+
try {
|
|
17
|
+
const tasksPath = join(import.meta.dirname, "..", "fixtures", "ground-truth", "tasks.json");
|
|
18
|
+
const gt = JSON.parse(await readFile(tasksPath, "utf8"));
|
|
19
|
+
const allTypes = ["product", "persona", "journey", "spec", "decision", "domain"];
|
|
20
|
+
// Build paste-all context
|
|
21
|
+
let pasteAllContext = "";
|
|
22
|
+
for (const type of allTypes) {
|
|
23
|
+
for (const f of listByType(harness.db, type)) {
|
|
24
|
+
pasteAllContext += renderFile(f) + "\n\n---\n\n";
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
const pasteAllTokens = countTokens(pasteAllContext);
|
|
28
|
+
// Measure context retrieval for every task
|
|
29
|
+
const allMetrics = [];
|
|
30
|
+
const taskDetails = [];
|
|
31
|
+
for (const task of gt.tasks) {
|
|
32
|
+
// PCL retrieval: product + critical + search
|
|
33
|
+
const product = getProductFile(harness.db);
|
|
34
|
+
const criticalFiles = getCritical(harness.db);
|
|
35
|
+
const searchResults = await search(harness.db, task.description, {
|
|
36
|
+
mode: "hybrid", topK: 5,
|
|
37
|
+
});
|
|
38
|
+
const allRetrievedIds = [
|
|
39
|
+
...new Set([
|
|
40
|
+
...(product ? [product.id] : []),
|
|
41
|
+
...criticalFiles.map((f) => f.id),
|
|
42
|
+
...searchResults.map((r) => r.id),
|
|
43
|
+
]),
|
|
44
|
+
];
|
|
45
|
+
const metrics = measureContextRetrieval(allRetrievedIds, task.requiredContext);
|
|
46
|
+
allMetrics.push(metrics);
|
|
47
|
+
// Build PCL context
|
|
48
|
+
let pclContext = "";
|
|
49
|
+
if (product)
|
|
50
|
+
pclContext += renderFile(product) + "\n\n---\n\n";
|
|
51
|
+
for (const c of criticalFiles) {
|
|
52
|
+
pclContext += renderFile(c) + "\n\n---\n\n";
|
|
53
|
+
}
|
|
54
|
+
for (const sr of searchResults) {
|
|
55
|
+
const file = getFileById(harness.db, sr.type, sr.id);
|
|
56
|
+
if (file)
|
|
57
|
+
pclContext += renderFile(file) + "\n\n---\n\n";
|
|
58
|
+
}
|
|
59
|
+
const pclTokens = countTokens(pclContext);
|
|
60
|
+
taskDetails.push({
|
|
61
|
+
taskId: task.id,
|
|
62
|
+
category: task.category,
|
|
63
|
+
description: task.description,
|
|
64
|
+
requiredContext: task.requiredContext,
|
|
65
|
+
retrievedIds: allRetrievedIds,
|
|
66
|
+
recall: metrics.recall,
|
|
67
|
+
precision: metrics.precision,
|
|
68
|
+
f1: metrics.f1,
|
|
69
|
+
hits: metrics.hits,
|
|
70
|
+
misses: metrics.misses,
|
|
71
|
+
noise: metrics.noise,
|
|
72
|
+
pclTokens,
|
|
73
|
+
criteriaCount: task.evaluationCriteria.length,
|
|
74
|
+
requiredPatterns: task.requiredPatterns,
|
|
75
|
+
forbiddenPatterns: task.forbiddenPatterns,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
// Print context retrieval results
|
|
79
|
+
console.log("=== Context Retrieval Quality (all 20 tasks) ===\n");
|
|
80
|
+
console.log("| Task | Category | Recall | Prec. | F1 | Hits | Misses | PCL tok |");
|
|
81
|
+
console.log("|----------|--------------------|--------|--------|--------|------|---------------------|---------|");
|
|
82
|
+
for (const t of taskDetails) {
|
|
83
|
+
const missStr = t.misses.length > 0 ? t.misses.join(", ") : "none";
|
|
84
|
+
console.log(`| ${t.taskId.padEnd(8)} | ${t.category.padEnd(18)} | ${t.recall.toFixed(2).padStart(6)} | ${t.precision.toFixed(2).padStart(6)} | ${t.f1.toFixed(2).padStart(6)} | ${String(t.hits.length).padStart(4)} | ${missStr.padEnd(19)} | ${String(t.pclTokens).padStart(7)} |`);
|
|
85
|
+
}
|
|
86
|
+
const avgCtx = averageContextMetrics(allMetrics);
|
|
87
|
+
console.log(`\nAVERAGE: Recall=${avgCtx.recall.toFixed(3)} | Precision=${avgCtx.precision.toFixed(3)} | F1=${avgCtx.f1.toFixed(3)}`);
|
|
88
|
+
console.log(`\nPaste-all tokens: ${pasteAllTokens}`);
|
|
89
|
+
const avgPclTokens = taskDetails.reduce((s, t) => s + t.pclTokens, 0) / taskDetails.length;
|
|
90
|
+
console.log(`Avg PCL tokens per task: ${Math.round(avgPclTokens)} (${((1 - avgPclTokens / pasteAllTokens) * 100).toFixed(1)}% savings)`);
|
|
91
|
+
// Pick 6 representative tasks for manual evaluation
|
|
92
|
+
const selectedIds = ["task-01", "task-05", "task-06", "task-07", "task-10", "task-12"];
|
|
93
|
+
console.log(`\n=== Selected Tasks for Manual Evaluation ===\n`);
|
|
94
|
+
for (const id of selectedIds) {
|
|
95
|
+
const detail = taskDetails.find((t) => t.taskId === id);
|
|
96
|
+
if (!detail)
|
|
97
|
+
continue;
|
|
98
|
+
const task = gt.tasks.find((t) => t.id === id);
|
|
99
|
+
console.log(`--- ${detail.taskId} (${detail.category}) ---`);
|
|
100
|
+
console.log(`Description: ${detail.description.slice(0, 120)}...`);
|
|
101
|
+
console.log(`Required context: [${detail.requiredContext.join(", ")}]`);
|
|
102
|
+
console.log(`PCL retrieved: [${detail.retrievedIds.join(", ")}]`);
|
|
103
|
+
console.log(`Context recall: ${detail.recall.toFixed(2)} | Misses: [${detail.misses.join(", ") || "none"}]`);
|
|
104
|
+
console.log(`Criteria: ${task.evaluationCriteria.map((c) => c.criterion).join(" | ")}`);
|
|
105
|
+
console.log(`Required patterns: [${detail.requiredPatterns.join(", ")}]`);
|
|
106
|
+
console.log(`Forbidden patterns: [${detail.forbiddenPatterns.join(", ")}]`);
|
|
107
|
+
console.log("");
|
|
108
|
+
}
|
|
109
|
+
// Save results
|
|
110
|
+
await mkdir(join(import.meta.dirname, "..", "results"), { recursive: true });
|
|
111
|
+
await writeFile(join(import.meta.dirname, "..", "results", "interactive-eval.json"), JSON.stringify({ timestamp: new Date().toISOString(), taskDetails, averageContext: avgCtx, pasteAllTokens }, null, 2));
|
|
112
|
+
}
|
|
113
|
+
finally {
|
|
114
|
+
disposeEncoder();
|
|
115
|
+
await harness.cleanup();
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
main().catch(console.error);
|
|
119
|
+
//# sourceMappingURL=bench-interactive-eval.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-interactive-eval.js","sourceRoot":"","sources":["../../../benchmarks/runners/bench-interactive-eval.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC1C,OAAO,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EACL,cAAc,EACd,WAAW,EACX,UAAU,EACV,WAAW,GACZ,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,gCAAgC,CAAC;AAC7E,OAAO,EACL,uBAAuB,EACvB,qBAAqB,GACtB,MAAM,4CAA4C,CAAC;AAIpD,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;IAE3D,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,cAAc,CAAC,CAAC;IAE5C,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,CACpB,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,cAAc,EAAE,YAAY,CACpE,CAAC;QACF,MAAM,EAAE,GAAoB,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;QAE1E,MAAM,QAAQ,GAAe,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;QAE7F,0BAA0B;QAC1B,IAAI,eAAe,GAAG,EAAE,CAAC;QACzB,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,OAAO,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC;gBAC7C,eAAe,IAAI,UAAU,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC;YACnD,CAAC;QACH,CAAC;QACD,MAAM,cAAc,GAAG,WAAW,CAAC,eAAe,CAAC,CAAC;QAEpD,2CAA2C;QAC3C,MAAM,UAAU,GAAG,EAAE,CAAC;QACtB,MAAM,WAAW,GAAG,EAAE,CAAC;QAEvB,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC,KAAK,EAAE,CAAC;YAC5B,6CAA6C;YAC7C,MAAM,OAAO,GAAG,cAAc,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YAC3C,MAAM,aAAa,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YAC9C,MAAM,aAAa,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,IAAI,CAAC,WAAW,EAAE;gBAC/D,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;aACxB,CAAC,CAAC;YAEH,MAAM,eAAe,GAAG;gBACtB,GAAG,IAAI,GAAG,CAAC;oBACT,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;oBAChC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACjC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBAClC,CAAC;aACH,CAAC;YAEF,MAAM,OAAO,GAAG,uBAAuB,CAAC,eAAe,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC;YAC/E,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAEzB,oBAAoB;YACpB,IAAI,UAAU,GAAG,EAAE,CAAC;YACpB,IAAI,OAAO;gBAAE,UAAU,IAAI,UAAU,CAAC,OAAO,CAAC,GAAG,aAAa,CAAC;YAC/D,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;gBAC9B,UAAU,IAAI,UAAU,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC;YAC9C,CAAC;YACD,KAAK,MAAM,EAAE,IAAI,aAAa,EAAE,CAAC;gBAC/B,MAAM,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC;gBACrD,IAAI,IAAI;oBAAE,UAAU,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC;YAC3D,CAAC;YAED,MAAM,SAAS,GAAG,WAAW,CAAC,UAAU,CAAC,CAAC;YAE1C,WAAW,CAAC,IAAI,CAAC;gBACf,MAAM,EAAE,IAAI,CAAC,EAAE;gBACf,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,eAAe,EAAE,IAAI,CAAC,eAAe;gBACrC,YAAY,EAAE,eAAe;gBAC7B,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,SAAS,EAAE,OAAO,CAAC,SAAS;gBAC5B,EAAE,EAAE,OAAO,CAAC,EAAE;gBACd,IAAI,EAAE,OAAO,CAAC,IAAI;gBAClB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,SAAS;gBACT,aAAa,EAAE,IAAI,CAAC,kBAAkB,CAAC,MAAM;gBAC7C,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,iBAAiB,EAAE,IAAI,CAAC,iBAAiB;aAC1C,CAAC,CAAC;QACL,CAAC;QAED,kCAAkC;QAClC,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;QAClE,OAAO,CAAC,GAAG,CAAC,qGAAqG,CAAC,CAAC;QACnH,OAAO,CAAC,GAAG,CAAC,qGAAqG,CAAC,CAAC;QAEnH,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACnE,OAAO,CAAC,GAAG,CACT,KAAK,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CACzQ,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,OAAO,CAAC,GAAG,CAAC,qBAAqB,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,gBAAgB,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,MAAM,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACrI,OAAO,CAAC,GAAG,CAAC,uBAAuB,cAAc,EAAE,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;QAC3F,OAAO,CAAC,GAAG,CAAC,4BAA4B,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,GAAG,cAAc,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;QAEzI,oDAAoD;QACpD,MAAM,WAAW,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;QACvF,OAAO,CAAC,GAAG,CAAC,kDAAkD,CAAC,CAAC;QAEhE,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;YAC7B,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,EAAE,CAAC,CAAC;YACxD,IAAI,CAAC,MAAM;gBAAE,SAAS;YACtB,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAE,CAAC;YAEhD,OAAO,CAAC,GAAG,CAAC,OAAO,MAAM,CAAC,MAAM,KAAK,MAAM,CAAC,QAAQ,OAAO,CAAC,CAAC;YAC7D,OAAO,CAAC,GAAG,CAAC,gBAAgB,MAAM,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;YACnE,OAAO,CAAC,GAAG,CAAC,sBAAsB,MAAM,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxE,OAAO,CAAC,GAAG,CAAC,mBAAmB,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAClE,OAAO,CAAC,GAAG,CAAC,mBAAmB,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC;YAC7G,OAAO,CAAC,GAAG,CAAC,aAAa,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACxF,OAAO,CAAC,GAAG,CAAC,uBAAuB,MAAM,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC1E,OAAO,CAAC,GAAG,CAAC,wBAAwB,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC5E,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;QAED,eAAe;QACf,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7E,MAAM,SAAS,CACb,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,uBAAuB,CAAC,EACnE,IAAI,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CACtH,CAAC;IAEJ,CAAC;YAAS,CAAC;QACT,cAAc,EAAE,CAAC;QACjB,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bench-performance.bench.d.ts","sourceRoot":"","sources":["../../../benchmarks/runners/bench-performance.bench.ts"],"names":[],"mappings":""}
|