autodocs-engine 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/dist/analysis-builder.d.ts +13 -0
- package/dist/analysis-builder.js +268 -0
- package/dist/analysis-builder.js.map +1 -0
- package/dist/anti-pattern-detector.d.ts +9 -0
- package/dist/anti-pattern-detector.js +58 -0
- package/dist/anti-pattern-detector.js.map +1 -0
- package/dist/architecture-detector.d.ts +7 -0
- package/dist/architecture-detector.js +212 -0
- package/dist/architecture-detector.js.map +1 -0
- package/dist/ast-parser.d.ts +5 -0
- package/dist/ast-parser.js +635 -0
- package/dist/ast-parser.js.map +1 -0
- package/dist/benchmark/code-generator.d.ts +20 -0
- package/dist/benchmark/code-generator.js +206 -0
- package/dist/benchmark/code-generator.js.map +1 -0
- package/dist/benchmark/pr-miner.d.ts +61 -0
- package/dist/benchmark/pr-miner.js +304 -0
- package/dist/benchmark/pr-miner.js.map +1 -0
- package/dist/benchmark/pr-runner.d.ts +58 -0
- package/dist/benchmark/pr-runner.js +346 -0
- package/dist/benchmark/pr-runner.js.map +1 -0
- package/dist/benchmark/pr-scorer.d.ts +48 -0
- package/dist/benchmark/pr-scorer.js +222 -0
- package/dist/benchmark/pr-scorer.js.map +1 -0
- package/dist/benchmark/pr-task-gen.d.ts +16 -0
- package/dist/benchmark/pr-task-gen.js +129 -0
- package/dist/benchmark/pr-task-gen.js.map +1 -0
- package/dist/benchmark/report.d.ts +9 -0
- package/dist/benchmark/report.js +131 -0
- package/dist/benchmark/report.js.map +1 -0
- package/dist/benchmark/runner.d.ts +6 -0
- package/dist/benchmark/runner.js +183 -0
- package/dist/benchmark/runner.js.map +1 -0
- package/dist/benchmark/scorer.d.ts +6 -0
- package/dist/benchmark/scorer.js +549 -0
- package/dist/benchmark/scorer.js.map +1 -0
- package/dist/benchmark/shuffler.d.ts +5 -0
- package/dist/benchmark/shuffler.js +70 -0
- package/dist/benchmark/shuffler.js.map +1 -0
- package/dist/benchmark/statistics.d.ts +36 -0
- package/dist/benchmark/statistics.js +159 -0
- package/dist/benchmark/statistics.js.map +1 -0
- package/dist/benchmark/task-generator.d.ts +20 -0
- package/dist/benchmark/task-generator.js +388 -0
- package/dist/benchmark/task-generator.js.map +1 -0
- package/dist/benchmark/types.d.ts +111 -0
- package/dist/benchmark/types.js +3 -0
- package/dist/benchmark/types.js.map +1 -0
- package/dist/bin/autodocs-engine.d.ts +2 -0
- package/dist/bin/autodocs-engine.js +296 -0
- package/dist/bin/autodocs-engine.js.map +1 -0
- package/dist/bin/benchmark.d.ts +14 -0
- package/dist/bin/benchmark.js +172 -0
- package/dist/bin/benchmark.js.map +1 -0
- package/dist/bin/check.d.ts +13 -0
- package/dist/bin/check.js +79 -0
- package/dist/bin/check.js.map +1 -0
- package/dist/bin/init.d.ts +11 -0
- package/dist/bin/init.js +268 -0
- package/dist/bin/init.js.map +1 -0
- package/dist/bin/serve.d.ts +4 -0
- package/dist/bin/serve.js +29 -0
- package/dist/bin/serve.js.map +1 -0
- package/dist/budget-validator.d.ts +22 -0
- package/dist/budget-validator.js +119 -0
- package/dist/budget-validator.js.map +1 -0
- package/dist/command-extractor.d.ts +10 -0
- package/dist/command-extractor.js +276 -0
- package/dist/command-extractor.js.map +1 -0
- package/dist/config-analyzer.d.ts +5 -0
- package/dist/config-analyzer.js +364 -0
- package/dist/config-analyzer.js.map +1 -0
- package/dist/config.d.ts +33 -0
- package/dist/config.js +172 -0
- package/dist/config.js.map +1 -0
- package/dist/contribution-patterns.d.ts +6 -0
- package/dist/contribution-patterns.js +263 -0
- package/dist/contribution-patterns.js.map +1 -0
- package/dist/convention-extractor.d.ts +17 -0
- package/dist/convention-extractor.js +90 -0
- package/dist/convention-extractor.js.map +1 -0
- package/dist/cross-package.d.ts +5 -0
- package/dist/cross-package.js +71 -0
- package/dist/cross-package.js.map +1 -0
- package/dist/dependency-analyzer.d.ts +5 -0
- package/dist/dependency-analyzer.js +233 -0
- package/dist/dependency-analyzer.js.map +1 -0
- package/dist/detectors/build-tool.d.ts +2 -0
- package/dist/detectors/build-tool.js +67 -0
- package/dist/detectors/build-tool.js.map +1 -0
- package/dist/detectors/component-patterns.d.ts +2 -0
- package/dist/detectors/component-patterns.js +49 -0
- package/dist/detectors/component-patterns.js.map +1 -0
- package/dist/detectors/data-fetching.d.ts +2 -0
- package/dist/detectors/data-fetching.js +127 -0
- package/dist/detectors/data-fetching.js.map +1 -0
- package/dist/detectors/database.d.ts +2 -0
- package/dist/detectors/database.js +54 -0
- package/dist/detectors/database.js.map +1 -0
- package/dist/detectors/error-handling.d.ts +2 -0
- package/dist/detectors/error-handling.js +47 -0
- package/dist/detectors/error-handling.js.map +1 -0
- package/dist/detectors/export-patterns.d.ts +2 -0
- package/dist/detectors/export-patterns.js +64 -0
- package/dist/detectors/export-patterns.js.map +1 -0
- package/dist/detectors/file-naming.d.ts +2 -0
- package/dist/detectors/file-naming.js +74 -0
- package/dist/detectors/file-naming.js.map +1 -0
- package/dist/detectors/graphql-patterns.d.ts +2 -0
- package/dist/detectors/graphql-patterns.js +47 -0
- package/dist/detectors/graphql-patterns.js.map +1 -0
- package/dist/detectors/hook-patterns.d.ts +2 -0
- package/dist/detectors/hook-patterns.js +105 -0
- package/dist/detectors/hook-patterns.js.map +1 -0
- package/dist/detectors/import-patterns.d.ts +2 -0
- package/dist/detectors/import-patterns.js +88 -0
- package/dist/detectors/import-patterns.js.map +1 -0
- package/dist/detectors/telemetry-patterns.d.ts +2 -0
- package/dist/detectors/telemetry-patterns.js +42 -0
- package/dist/detectors/telemetry-patterns.js.map +1 -0
- package/dist/detectors/test-framework-ecosystem.d.ts +2 -0
- package/dist/detectors/test-framework-ecosystem.js +95 -0
- package/dist/detectors/test-framework-ecosystem.js.map +1 -0
- package/dist/detectors/test-patterns.d.ts +2 -0
- package/dist/detectors/test-patterns.js +60 -0
- package/dist/detectors/test-patterns.js.map +1 -0
- package/dist/detectors/web-framework.d.ts +2 -0
- package/dist/detectors/web-framework.js +51 -0
- package/dist/detectors/web-framework.js.map +1 -0
- package/dist/deterministic-formatter.d.ts +54 -0
- package/dist/deterministic-formatter.js +922 -0
- package/dist/deterministic-formatter.js.map +1 -0
- package/dist/diff-analyzer.d.ts +7 -0
- package/dist/diff-analyzer.js +126 -0
- package/dist/diff-analyzer.js.map +1 -0
- package/dist/example-extractor.d.ts +6 -0
- package/dist/example-extractor.js +115 -0
- package/dist/example-extractor.js.map +1 -0
- package/dist/existing-docs.d.ts +36 -0
- package/dist/existing-docs.js +257 -0
- package/dist/existing-docs.js.map +1 -0
- package/dist/file-discovery.d.ts +6 -0
- package/dist/file-discovery.js +154 -0
- package/dist/file-discovery.js.map +1 -0
- package/dist/git-history.d.ts +41 -0
- package/dist/git-history.js +401 -0
- package/dist/git-history.js.map +1 -0
- package/dist/impact-classifier.d.ts +22 -0
- package/dist/impact-classifier.js +87 -0
- package/dist/impact-classifier.js.map +1 -0
- package/dist/impact-radius.d.ts +23 -0
- package/dist/impact-radius.js +130 -0
- package/dist/impact-radius.js.map +1 -0
- package/dist/import-chain.d.ts +12 -0
- package/dist/import-chain.js +93 -0
- package/dist/import-chain.js.map +1 -0
- package/dist/index.d.ts +40 -0
- package/dist/index.js +72 -0
- package/dist/index.js.map +1 -0
- package/dist/inferability.d.ts +16 -0
- package/dist/inferability.js +142 -0
- package/dist/inferability.js.map +1 -0
- package/dist/llm/adapter.d.ts +33 -0
- package/dist/llm/adapter.js +202 -0
- package/dist/llm/adapter.js.map +1 -0
- package/dist/llm/client.d.ts +5 -0
- package/dist/llm/client.js +68 -0
- package/dist/llm/client.js.map +1 -0
- package/dist/llm/hierarchical.d.ts +23 -0
- package/dist/llm/hierarchical.js +126 -0
- package/dist/llm/hierarchical.js.map +1 -0
- package/dist/llm/serializer.d.ts +19 -0
- package/dist/llm/serializer.js +363 -0
- package/dist/llm/serializer.js.map +1 -0
- package/dist/llm/template-selector.d.ts +7 -0
- package/dist/llm/template-selector.js +21 -0
- package/dist/llm/template-selector.js.map +1 -0
- package/dist/llm-adapter.d.ts +2 -0
- package/dist/llm-adapter.js +5 -0
- package/dist/llm-adapter.js.map +1 -0
- package/dist/mcp/cache.d.ts +30 -0
- package/dist/mcp/cache.js +112 -0
- package/dist/mcp/cache.js.map +1 -0
- package/dist/mcp/errors.d.ts +21 -0
- package/dist/mcp/errors.js +27 -0
- package/dist/mcp/errors.js.map +1 -0
- package/dist/mcp/queries.d.ts +27 -0
- package/dist/mcp/queries.js +121 -0
- package/dist/mcp/queries.js.map +1 -0
- package/dist/mcp/server.d.ts +14 -0
- package/dist/mcp/server.js +131 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools.d.ts +39 -0
- package/dist/mcp/tools.js +249 -0
- package/dist/mcp/tools.js.map +1 -0
- package/dist/mermaid-generator.d.ts +6 -0
- package/dist/mermaid-generator.js +59 -0
- package/dist/mermaid-generator.js.map +1 -0
- package/dist/meta-tool-detector.d.ts +23 -0
- package/dist/meta-tool-detector.js +177 -0
- package/dist/meta-tool-detector.js.map +1 -0
- package/dist/output-validator.d.ts +6 -0
- package/dist/output-validator.js +471 -0
- package/dist/output-validator.js.map +1 -0
- package/dist/pattern-fingerprinter.d.ts +7 -0
- package/dist/pattern-fingerprinter.js +241 -0
- package/dist/pattern-fingerprinter.js.map +1 -0
- package/dist/pipeline.d.ts +5 -0
- package/dist/pipeline.js +374 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/plugin-loader.d.ts +19 -0
- package/dist/plugin-loader.js +124 -0
- package/dist/plugin-loader.js.map +1 -0
- package/dist/role-inferrer.d.ts +5 -0
- package/dist/role-inferrer.js +159 -0
- package/dist/role-inferrer.js.map +1 -0
- package/dist/symbol-graph.d.ts +11 -0
- package/dist/symbol-graph.js +613 -0
- package/dist/symbol-graph.js.map +1 -0
- package/dist/templates/agents-md.d.ts +20 -0
- package/dist/templates/agents-md.js +346 -0
- package/dist/templates/agents-md.js.map +1 -0
- package/dist/templates/claude-md.d.ts +4 -0
- package/dist/templates/claude-md.js +23 -0
- package/dist/templates/claude-md.js.map +1 -0
- package/dist/templates/cursorrules.d.ts +4 -0
- package/dist/templates/cursorrules.js +18 -0
- package/dist/templates/cursorrules.js.map +1 -0
- package/dist/tier-classifier.d.ts +7 -0
- package/dist/tier-classifier.js +32 -0
- package/dist/tier-classifier.js.map +1 -0
- package/dist/types.d.ts +428 -0
- package/dist/types.js +42 -0
- package/dist/types.js.map +1 -0
- package/dist/workflow-rules.d.ts +18 -0
- package/dist/workflow-rules.js +131 -0
- package/dist/workflow-rules.js.map +1 -0
- package/package.json +62 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
// src/benchmark/pr-scorer.ts — Score AI output against real commit ground truth
|
|
2
|
+
// Phase 0: File placement accuracy as the primary metric.
|
|
3
|
+
// Designed to be extended with naming, imports, exports in later phases.
|
|
4
|
+
import { dirname, basename, extname } from "node:path";
|
|
5
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
6
|
+
/**
|
|
7
|
+
* Score AI-generated files against ground truth from a real commit.
|
|
8
|
+
* Phase 0: primary metric is file placement accuracy.
|
|
9
|
+
*/
|
|
10
|
+
export function scorePROutput(files, task, tokensUsed, latencyMs, error) {
|
|
11
|
+
if (error || files.length === 0) {
|
|
12
|
+
return {
|
|
13
|
+
score: 0,
|
|
14
|
+
dimensions: {
|
|
15
|
+
filePlacement: { score: 0, detail: error ?? "No files generated", passed: false },
|
|
16
|
+
namingConvention: { score: 0, detail: "No files generated", passed: false },
|
|
17
|
+
barrelUpdate: { score: 0, detail: "No files generated", passed: false },
|
|
18
|
+
},
|
|
19
|
+
filesCreated: [],
|
|
20
|
+
tokensUsed,
|
|
21
|
+
latencyMs,
|
|
22
|
+
error,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
// Filter to implementation files (not test, not barrel, not response wrapper)
|
|
26
|
+
const implFiles = files.filter(f => !f.path.includes(".test.") &&
|
|
27
|
+
!f.path.includes(".spec.") &&
|
|
28
|
+
!f.path.includes("__response__") &&
|
|
29
|
+
!isBarrelFile(f.path));
|
|
30
|
+
const filePlacement = scoreFilePlacement(implFiles, task);
|
|
31
|
+
const namingConvention = scoreNamingConvention(implFiles, task);
|
|
32
|
+
const barrelUpdate = scoreBarrelUpdate(files, task);
|
|
33
|
+
// Phase 0 headline: file placement is THE metric
|
|
34
|
+
// Others tracked as secondary signals
|
|
35
|
+
const score = filePlacement.score;
|
|
36
|
+
return {
|
|
37
|
+
score,
|
|
38
|
+
dimensions: { filePlacement, namingConvention, barrelUpdate },
|
|
39
|
+
filesCreated: files.map(f => f.path),
|
|
40
|
+
tokensUsed,
|
|
41
|
+
latencyMs,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
// ─── File Placement Scoring ─────────────────────────────────────────────────
|
|
45
|
+
/**
|
|
46
|
+
* Score whether the AI placed its files in the correct directory.
|
|
47
|
+
* Uses path distance: exact match = 100%, parent = 60%, shared prefix scaled.
|
|
48
|
+
*/
|
|
49
|
+
export function scoreFilePlacement(implFiles, task) {
|
|
50
|
+
if (implFiles.length === 0) {
|
|
51
|
+
return { score: 0, detail: "No implementation files generated", passed: false };
|
|
52
|
+
}
|
|
53
|
+
const expectedDir = normalizeDir(task.groundTruth.directory);
|
|
54
|
+
// Find the best-scoring file among the AI's output
|
|
55
|
+
let bestScore = 0;
|
|
56
|
+
let bestPath = "";
|
|
57
|
+
for (const file of implFiles) {
|
|
58
|
+
const fileDir = normalizeDir(dirname(file.path));
|
|
59
|
+
const s = pathSimilarity(fileDir, expectedDir);
|
|
60
|
+
if (s > bestScore) {
|
|
61
|
+
bestScore = s;
|
|
62
|
+
bestPath = file.path;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
const score = Math.round(bestScore * 100);
|
|
66
|
+
const passed = bestScore >= 0.9; // Exact or near-exact match
|
|
67
|
+
let detail;
|
|
68
|
+
if (bestScore >= 1.0) {
|
|
69
|
+
detail = `Correct: ${dirname(bestPath)} matches ${task.groundTruth.directory}`;
|
|
70
|
+
}
|
|
71
|
+
else if (bestScore >= 0.5) {
|
|
72
|
+
detail = `Close: ${dirname(bestPath)} near ${task.groundTruth.directory} (${score}%)`;
|
|
73
|
+
}
|
|
74
|
+
else if (bestScore > 0) {
|
|
75
|
+
detail = `Wrong: ${dirname(bestPath)}, expected ${task.groundTruth.directory} (${score}%)`;
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
detail = `Wrong: ${bestPath ? dirname(bestPath) : "unknown"}, expected ${task.groundTruth.directory}`;
|
|
79
|
+
}
|
|
80
|
+
return { score, detail, passed };
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Path similarity using common prefix and distance.
|
|
84
|
+
* Exact dir match = 1.0, parent = 0.6, grandparent = 0.4, etc.
|
|
85
|
+
*/
|
|
86
|
+
export function pathSimilarity(a, b) {
|
|
87
|
+
if (a === b)
|
|
88
|
+
return 1.0;
|
|
89
|
+
const aParts = a.split("/").filter(Boolean);
|
|
90
|
+
const bParts = b.split("/").filter(Boolean);
|
|
91
|
+
// Common prefix length
|
|
92
|
+
let lcp = 0;
|
|
93
|
+
while (lcp < aParts.length && lcp < bParts.length && aParts[lcp] === bParts[lcp]) {
|
|
94
|
+
lcp++;
|
|
95
|
+
}
|
|
96
|
+
const distance = (aParts.length - lcp) + (bParts.length - lcp);
|
|
97
|
+
if (distance === 0)
|
|
98
|
+
return 1.0;
|
|
99
|
+
// Exponential decay with distance
|
|
100
|
+
const lambda = Math.log(2); // distance 1 ≈ 0.5
|
|
101
|
+
let score = Math.exp(-lambda * distance);
|
|
102
|
+
// Package mismatch penalty (monorepos)
|
|
103
|
+
const aPkg = getPackage(aParts);
|
|
104
|
+
const bPkg = getPackage(bParts);
|
|
105
|
+
if (aPkg && bPkg && aPkg !== bPkg) {
|
|
106
|
+
score *= 0.5;
|
|
107
|
+
}
|
|
108
|
+
return Math.max(0, Math.min(1, score));
|
|
109
|
+
}
|
|
110
|
+
function getPackage(parts) {
|
|
111
|
+
if ((parts[0] === "packages" || parts[0] === "apps") && parts.length >= 2) {
|
|
112
|
+
return parts.slice(0, 2).join("/");
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
// ─── Naming Convention Scoring ──────────────────────────────────────────────
|
|
117
|
+
/**
|
|
118
|
+
* Score whether the AI's filename follows the same naming convention
|
|
119
|
+
* as the ground truth file.
|
|
120
|
+
*/
|
|
121
|
+
export function scoreNamingConvention(implFiles, task) {
|
|
122
|
+
if (implFiles.length === 0) {
|
|
123
|
+
return { score: 0, detail: "No files", passed: false };
|
|
124
|
+
}
|
|
125
|
+
const gtName = basename(task.groundTruth.filename, extname(task.groundTruth.filename));
|
|
126
|
+
const gtConvention = detectConvention(gtName);
|
|
127
|
+
// Check if any AI file matches the naming convention
|
|
128
|
+
let bestMatch = false;
|
|
129
|
+
let bestFile = "";
|
|
130
|
+
for (const file of implFiles) {
|
|
131
|
+
const aiName = basename(file.path, extname(file.path));
|
|
132
|
+
const aiConvention = detectConvention(aiName);
|
|
133
|
+
if (aiConvention === gtConvention) {
|
|
134
|
+
bestMatch = true;
|
|
135
|
+
bestFile = file.path;
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (bestMatch) {
|
|
140
|
+
return {
|
|
141
|
+
score: 100,
|
|
142
|
+
detail: `Convention match: ${gtConvention} (${bestFile})`,
|
|
143
|
+
passed: true,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
// Check for partial match (e.g., both use lowercase but different separators)
|
|
147
|
+
const aiFile = implFiles[0];
|
|
148
|
+
const aiName = basename(aiFile.path, extname(aiFile.path));
|
|
149
|
+
const aiConvention = detectConvention(aiName);
|
|
150
|
+
return {
|
|
151
|
+
score: 0,
|
|
152
|
+
detail: `Convention mismatch: AI used ${aiConvention}, expected ${gtConvention}`,
|
|
153
|
+
passed: false,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
function detectConvention(name) {
|
|
157
|
+
if (/^[a-z][a-z0-9]*(-[a-z0-9]+)+$/.test(name))
|
|
158
|
+
return "kebab-case";
|
|
159
|
+
if (/^[a-z][a-z0-9]*$/.test(name))
|
|
160
|
+
return "kebab-case"; // single word, treat as kebab
|
|
161
|
+
if (/^[a-z][a-zA-Z0-9]*$/.test(name) && /[A-Z]/.test(name))
|
|
162
|
+
return "camelCase";
|
|
163
|
+
if (/^[A-Z][a-zA-Z0-9]*$/.test(name))
|
|
164
|
+
return "PascalCase";
|
|
165
|
+
if (/^[a-z][a-z0-9]*(_[a-z0-9]+)+$/.test(name))
|
|
166
|
+
return "snake_case";
|
|
167
|
+
return "unknown";
|
|
168
|
+
}
|
|
169
|
+
// ─── Barrel Update Scoring ──────────────────────────────────────────────────
|
|
170
|
+
/**
|
|
171
|
+
* Score whether the AI updated the barrel/index file when one exists.
|
|
172
|
+
* Binary: did the AI produce a modified barrel that includes a new export?
|
|
173
|
+
*/
|
|
174
|
+
export function scoreBarrelUpdate(allFiles, task) {
|
|
175
|
+
// Only score if the directory had a barrel file
|
|
176
|
+
if (!task.context.barrelFile) {
|
|
177
|
+
return { score: 100, detail: "No barrel file — N/A", passed: true };
|
|
178
|
+
}
|
|
179
|
+
// Find the AI's barrel file
|
|
180
|
+
const barrelPath = task.context.barrelFile.path;
|
|
181
|
+
const barrelName = basename(barrelPath);
|
|
182
|
+
const aiBarrel = allFiles.find(f => f.path === barrelPath ||
|
|
183
|
+
f.path.endsWith("/" + barrelName) ||
|
|
184
|
+
basename(f.path) === barrelName);
|
|
185
|
+
if (!aiBarrel) {
|
|
186
|
+
return { score: 0, detail: "Barrel file exists but AI did not update it", passed: false };
|
|
187
|
+
}
|
|
188
|
+
// Check if the AI added a new export
|
|
189
|
+
const originalExports = extractReExports(task.context.barrelFile.content);
|
|
190
|
+
const aiExports = extractReExports(aiBarrel.content);
|
|
191
|
+
const newExports = aiExports.filter(e => !originalExports.includes(e));
|
|
192
|
+
if (newExports.length > 0) {
|
|
193
|
+
return {
|
|
194
|
+
score: 100,
|
|
195
|
+
detail: `Barrel updated: added ${newExports.join(", ")}`,
|
|
196
|
+
passed: true,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
return {
|
|
200
|
+
score: 0,
|
|
201
|
+
detail: "Barrel file present but no new exports added",
|
|
202
|
+
passed: false,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
function extractReExports(content) {
|
|
206
|
+
const exports = [];
|
|
207
|
+
const regex = /export\s+(?:\*|\{[^}]+\})\s+from\s+["']([^"']+)["']/g;
|
|
208
|
+
let match;
|
|
209
|
+
while ((match = regex.exec(content)) !== null) {
|
|
210
|
+
exports.push(match[1]);
|
|
211
|
+
}
|
|
212
|
+
return exports;
|
|
213
|
+
}
|
|
214
|
+
// ─── Helpers ────────────────────────────────────────────────────────────────
|
|
215
|
+
function normalizeDir(dir) {
|
|
216
|
+
return dir.replace(/^\.\//, "").replace(/\/$/, "");
|
|
217
|
+
}
|
|
218
|
+
function isBarrelFile(path) {
|
|
219
|
+
const name = basename(path);
|
|
220
|
+
return name === "index.ts" || name === "index.tsx" || name === "mod.ts";
|
|
221
|
+
}
|
|
222
|
+
//# sourceMappingURL=pr-scorer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pr-scorer.js","sourceRoot":"","sources":["../../src/benchmark/pr-scorer.ts"],"names":[],"mappings":"AAAA,gFAAgF;AAChF,0DAA0D;AAC1D,yEAAyE;AAGzE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA6BvD,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,aAAa,CAC3B,KAAsB,EACtB,IAAe,EACf,UAAkB,EAClB,SAAiB,EACjB,KAAc;IAEd,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO;YACL,KAAK,EAAE,CAAC;YACR,UAAU,EAAE;gBACV,aAAa,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,IAAI,oBAAoB,EAAE,MAAM,EAAE,KAAK,EAAE;gBACjF,gBAAgB,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,oBAAoB,EAAE,MAAM,EAAE,KAAK,EAAE;gBAC3E,YAAY,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,oBAAoB,EAAE,MAAM,EAAE,KAAK,EAAE;aACxE;YACD,YAAY,EAAE,EAAE;YAChB,UAAU;YACV,SAAS;YACT,KAAK;SACN,CAAC;IACJ,CAAC;IAED,8EAA8E;IAC9E,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CACjC,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAC1B,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAC1B,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC;QAChC,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CACtB,CAAC;IAEF,MAAM,aAAa,GAAG,kBAAkB,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAC1D,MAAM,gBAAgB,GAAG,qBAAqB,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;IAChE,MAAM,YAAY,GAAG,iBAAiB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAEpD,iDAAiD;IACjD,sCAAsC;IACtC,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,CAAC;IAElC,OAAO;QACL,KAAK;QACL,UAAU,EAAE,EAAE,aAAa,EAAE,gBAAgB,EAAE,YAAY,EAAE;QAC7D,YAAY,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACpC,UAAU;QACV,SAAS;KACV,CAAC;AACJ,CAAC;AAED,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAChC,SAA0B,EAC1B,IAAe;IAEf,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,mCAAmC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAClF,CAAC;IAED,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;IAE7D,mDAAmD;IACnD,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,QAAQ,GAAG,EAAE,CAAC;IAElB,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;QACjD,MAAM,CAAC,GAAG,cAAc,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;QAC/C,IAAI,CAAC,GAAG,SAAS,EAAE,CAAC;YAClB,SAAS,GAAG,CAAC,CAAC;YACd,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,SAAS,IAAI,GAAG,CAAC,CAAC,4BAA4B;IAE7D,IAAI,MAAc,CAAC;IACnB,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;QACrB,MAAM,GAAG,YAAY,OAAO,CAAC,QAAQ,CAAC,YAAY,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE,CAAC;IACjF,CAAC;SAAM,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;QAC5B,MAAM,GAAG,UAAU,OAAO,CAAC,QAAQ,CAAC,SAAS,IAAI,CAAC,WAAW,CAAC,SAAS,KAAK,KAAK,IAAI,CAAC;IACxF,CAAC;SAAM,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QACzB,MAAM,GAAG,UAAU,OAAO,CAAC,QAAQ,CAAC,cAAc,IAAI,CAAC,WAAW,CAAC,SAAS,KAAK,KAAK,IAAI,CAAC;IAC7F,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,UAAU,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,cAAc,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE,CAAC;IACxG,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;AACnC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,CAAS,EAAE,CAAS;IACjD,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAExB,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC5C,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAE5C,uBAAuB;IACvB,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,OAAO,GAAG,GAAG,MAAM,CAAC,MAAM,IAAI,GAAG,GAAG,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;QACjF,GAAG,EAAE,CAAC;IACR,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;IAC/D,IAAI,QAAQ,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAE/B,kCAAkC;IAClC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB;IAC/C,IAAI,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;IAEzC,uCAAuC;IACvC,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IAChC,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IAChC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClC,KAAK,IAAI,GAAG,CAAC;IACf,CAAC;IAED,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;AACzC,CAAC;AAED,SAAS,UAAU,CAAC,KAAe;IACjC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,UAAU,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC1E,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACrC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CACnC,SAA0B,EAC1B,IAAe;IAEf,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IACzD,CAAC;IAED,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;IACvF,MAAM,YAAY,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAE9C,qDAAqD;IACrD,IAAI,SAAS,GAAG,KAAK,CAAC;IACtB,IAAI,QAAQ,GAAG,EAAE,CAAC;IAElB,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;QACvD,MAAM,YAAY,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;QAC9C,IAAI,YAAY,KAAK,YAAY,EAAE,CAAC;YAClC,SAAS,GAAG,IAAI,CAAC;YACjB,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC;YACrB,MAAM;QACR,CAAC;IACH,CAAC;IAED,IAAI,SAAS,EAAE,CAAC;QACd,OAAO;YACL,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,qBAAqB,YAAY,KAAK,QAAQ,GAAG;YACzD,MAAM,EAAE,IAAI;SACb,CAAC;IACJ,CAAC;IAED,8EAA8E;IAC9E,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;IAC5B,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAC3D,MAAM,YAAY,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAE9C,OAAO;QACL,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,gCAAgC,YAAY,cAAc,YAAY,EAAE;QAChF,MAAM,EAAE,KAAK;KACd,CAAC;AACJ,CAAC;AAID,SAAS,gBAAgB,CAAC,IAAY;IACpC,IAAI,+BAA+B,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,YAAY,CAAC;IACpE,IAAI,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,YAAY,CAAC,CAAC,8BAA8B;IACtF,IAAI,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,WAAW,CAAC;IAC/E,IAAI,qBAAqB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,YAAY,CAAC;IAC1D,IAAI,+BAA+B,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,YAAY,CAAC;IACpE,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,+EAA+E;AAE/E;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAC/B,QAAyB,EACzB,IAAe;IAEf,gDAAgD;IAChD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;QAC7B,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,sBAAsB,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;IACtE,CAAC;IAED,4BAA4B;IAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC;IAChD,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACxC,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CACjC,CAAC,CAAC,IAAI,KAAK,UAAU;QACrB,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,GAAG,UAAU,CAAC;QACjC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,UAAU,CAChC,CAAC;IAEF,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,6CAA6C,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC5F,CAAC;IAED,qCAAqC;IACrC,MAAM,eAAe,GAAG,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;IAC1E,MAAM,SAAS,GAAG,gBAAgB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IAEvE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO;YACL,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,yBAAyB,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YACxD,MAAM,EAAE,IAAI;SACb,CAAC;IACJ,CAAC;IAED,OAAO;QACL,KAAK,EAAE,CAAC;QACR,MAAM,EAAE,8CAA8C;QACtD,MAAM,EAAE,KAAK;KACd,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAe;IACvC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,KAAK,GAAG,sDAAsD,CAAC;IACrE,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,+EAA+E;AAE/E,SAAS,YAAY,CAAC,GAAW;IAC/B,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AACrD,CAAC;AAED,SAAS,YAAY,CAAC,IAAY;IAChC,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,OAAO,IAAI,KAAK,UAAU,IAAI,IAAI,KAAK,WAAW,IAAI,IAAI,KAAK,QAAQ,CAAC;AAC1E,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { MinedTask } from "./pr-miner.js";
|
|
2
|
+
/**
|
|
3
|
+
* Generate a task prompt from a mined commit.
|
|
4
|
+
* Strips directory/location hints and implementation-specific details
|
|
5
|
+
* to keep the task fair across conditions.
|
|
6
|
+
*/
|
|
7
|
+
export declare function generateTaskPrompt(task: MinedTask, packageName: string): string;
|
|
8
|
+
/**
|
|
9
|
+
* Clean a commit message for use as a task prompt.
|
|
10
|
+
* Strips:
|
|
11
|
+
* - Conventional commit prefixes (feat:, fix:, etc.)
|
|
12
|
+
* - Directory/path references (to src/utils, in packages/auth)
|
|
13
|
+
* - Issue/PR references (#123)
|
|
14
|
+
* - Overly specific implementation details
|
|
15
|
+
*/
|
|
16
|
+
export declare function cleanCommitMessage(message: string): string;
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
// src/benchmark/pr-task-gen.ts — Generate task prompts from mined commits
|
|
2
|
+
// Derives fair, deliberately vague prompts that test whether AGENTS.md
|
|
3
|
+
// helps with file placement and convention adherence.
|
|
4
|
+
import ts from "typescript";
|
|
5
|
+
import { basename, extname } from "node:path";
|
|
6
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
7
|
+
/**
|
|
8
|
+
* Generate a task prompt from a mined commit.
|
|
9
|
+
* Strips directory/location hints and implementation-specific details
|
|
10
|
+
* to keep the task fair across conditions.
|
|
11
|
+
*/
|
|
12
|
+
export function generateTaskPrompt(task, packageName) {
|
|
13
|
+
const message = task.commitMessage;
|
|
14
|
+
const file = task.groundTruth;
|
|
15
|
+
// Try to use the commit message if it's descriptive enough
|
|
16
|
+
const cleaned = cleanCommitMessage(message);
|
|
17
|
+
if (cleaned.length >= 30) {
|
|
18
|
+
return buildPrompt(cleaned, packageName);
|
|
19
|
+
}
|
|
20
|
+
// Fall back to deriving from the file's exports
|
|
21
|
+
const derived = deriveFromFile(file.content, file.filename);
|
|
22
|
+
if (derived) {
|
|
23
|
+
return buildPrompt(derived, packageName);
|
|
24
|
+
}
|
|
25
|
+
// Last resort: generic prompt from filename
|
|
26
|
+
const name = basename(file.filename, extname(file.filename));
|
|
27
|
+
const humanName = name.replace(/[-_]/g, " ");
|
|
28
|
+
return buildPrompt(`Add a new module for ${humanName} functionality`, packageName);
|
|
29
|
+
}
|
|
30
|
+
// ─── Prompt Construction ────────────────────────────────────────────────────
|
|
31
|
+
function buildPrompt(description, packageName) {
|
|
32
|
+
// Capitalize first letter
|
|
33
|
+
const desc = description.charAt(0).toUpperCase() + description.slice(1);
|
|
34
|
+
return (`${desc.endsWith(".") ? desc : desc + "."}\n\n` +
|
|
35
|
+
`This is for the ${packageName} project. ` +
|
|
36
|
+
`Follow the project's conventions for file naming, imports, exports, and code style. ` +
|
|
37
|
+
`Include the implementation file and any necessary updates to barrel/index files.`);
|
|
38
|
+
}
|
|
39
|
+
// ─── Commit Message Cleaning ────────────────────────────────────────────────
|
|
40
|
+
/**
|
|
41
|
+
* Clean a commit message for use as a task prompt.
|
|
42
|
+
* Strips:
|
|
43
|
+
* - Conventional commit prefixes (feat:, fix:, etc.)
|
|
44
|
+
* - Directory/path references (to src/utils, in packages/auth)
|
|
45
|
+
* - Issue/PR references (#123)
|
|
46
|
+
* - Overly specific implementation details
|
|
47
|
+
*/
|
|
48
|
+
export function cleanCommitMessage(message) {
|
|
49
|
+
let cleaned = message;
|
|
50
|
+
// Strip conventional commit prefixes
|
|
51
|
+
cleaned = cleaned.replace(/^(feat|fix|chore|docs|style|refactor|perf|test|build|ci|revert)(\(.+?\))?:\s*/i, "");
|
|
52
|
+
// Strip issue/PR references
|
|
53
|
+
cleaned = cleaned.replace(/\s*\(#\d+\)\s*$/, "");
|
|
54
|
+
cleaned = cleaned.replace(/#\d+/g, "");
|
|
55
|
+
// Strip directory/path hints (e.g., "to src/utils", "in packages/auth")
|
|
56
|
+
cleaned = cleaned.replace(/\s+(to|in|at|from|under)\s+[a-z]+\/[a-zA-Z0-9_\-\/]+/g, "");
|
|
57
|
+
// Strip filename references (e.g., "add auth-service.ts")
|
|
58
|
+
cleaned = cleaned.replace(/\s+[a-z][a-z0-9\-_]*\.(ts|tsx|js|jsx)\b/gi, "");
|
|
59
|
+
// Strip leading/trailing whitespace and common noise words
|
|
60
|
+
cleaned = cleaned.replace(/^\s*(add|create|implement|introduce)\s+/i, "Add ");
|
|
61
|
+
return cleaned.trim();
|
|
62
|
+
}
|
|
63
|
+
// ─── File Analysis ──────────────────────────────────────────────────────────
|
|
64
|
+
/**
|
|
65
|
+
* Derive a task description from the file's exports and structure.
|
|
66
|
+
*/
|
|
67
|
+
function deriveFromFile(content, filename) {
|
|
68
|
+
const sourceFile = ts.createSourceFile(filename, content, ts.ScriptTarget.Latest, true);
|
|
69
|
+
const exports = extractExports(sourceFile);
|
|
70
|
+
if (exports.length === 0)
|
|
71
|
+
return null;
|
|
72
|
+
const primary = exports[0];
|
|
73
|
+
const kind = primary.kind;
|
|
74
|
+
const name = primary.name;
|
|
75
|
+
if (!name)
|
|
76
|
+
return null;
|
|
77
|
+
const humanName = camelToSpaces(name);
|
|
78
|
+
switch (kind) {
|
|
79
|
+
case "function":
|
|
80
|
+
return `Add a new utility function for ${humanName}`;
|
|
81
|
+
case "class":
|
|
82
|
+
return `Add a new ${humanName} class`;
|
|
83
|
+
case "interface":
|
|
84
|
+
case "type":
|
|
85
|
+
return `Add type definitions for ${humanName}`;
|
|
86
|
+
case "const":
|
|
87
|
+
case "variable":
|
|
88
|
+
return `Add a ${humanName} module`;
|
|
89
|
+
default:
|
|
90
|
+
return `Add a new module for ${humanName}`;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
function extractExports(sourceFile) {
|
|
94
|
+
const exports = [];
|
|
95
|
+
ts.forEachChild(sourceFile, (node) => {
|
|
96
|
+
const mods = ts.canHaveModifiers(node) ? ts.getModifiers(node) : undefined;
|
|
97
|
+
const isExported = mods?.some(m => m.kind === ts.SyntaxKind.ExportKeyword);
|
|
98
|
+
if (!isExported)
|
|
99
|
+
return;
|
|
100
|
+
if (ts.isFunctionDeclaration(node) && node.name) {
|
|
101
|
+
exports.push({ name: node.name.text, kind: "function" });
|
|
102
|
+
}
|
|
103
|
+
else if (ts.isClassDeclaration(node) && node.name) {
|
|
104
|
+
exports.push({ name: node.name.text, kind: "class" });
|
|
105
|
+
}
|
|
106
|
+
else if (ts.isInterfaceDeclaration(node)) {
|
|
107
|
+
exports.push({ name: node.name.text, kind: "interface" });
|
|
108
|
+
}
|
|
109
|
+
else if (ts.isTypeAliasDeclaration(node)) {
|
|
110
|
+
exports.push({ name: node.name.text, kind: "type" });
|
|
111
|
+
}
|
|
112
|
+
else if (ts.isVariableStatement(node)) {
|
|
113
|
+
for (const decl of node.declarationList.declarations) {
|
|
114
|
+
if (ts.isIdentifier(decl.name)) {
|
|
115
|
+
exports.push({ name: decl.name.text, kind: "const" });
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
return exports;
|
|
121
|
+
}
|
|
122
|
+
function camelToSpaces(name) {
|
|
123
|
+
// CamelCase → "camel case", kebab-case → "kebab case"
|
|
124
|
+
return name
|
|
125
|
+
.replace(/([a-z])([A-Z])/g, "$1 $2")
|
|
126
|
+
.replace(/[-_]/g, " ")
|
|
127
|
+
.toLowerCase();
|
|
128
|
+
}
|
|
129
|
+
//# sourceMappingURL=pr-task-gen.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pr-task-gen.js","sourceRoot":"","sources":["../../src/benchmark/pr-task-gen.ts"],"names":[],"mappings":"AAAA,0EAA0E;AAC1E,uEAAuE;AACvE,sDAAsD;AAEtD,OAAO,EAAE,MAAM,YAAY,CAAC;AAC5B,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAG9C,+EAA+E;AAE/E;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAChC,IAAe,EACf,WAAmB;IAEnB,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC;IACnC,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC;IAE9B,2DAA2D;IAC3D,MAAM,OAAO,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,IAAI,OAAO,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;QACzB,OAAO,WAAW,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IAC3C,CAAC;IAED,gDAAgD;IAChD,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC5D,IAAI,OAAO,EAAE,CAAC;QACZ,OAAO,WAAW,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IAC3C,CAAC;IAED,4CAA4C;IAC5C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAC7C,OAAO,WAAW,CAChB,wBAAwB,SAAS,gBAAgB,EACjD,WAAW,CACZ,CAAC;AACJ,CAAC;AAED,+EAA+E;AAE/E,SAAS,WAAW,CAAC,WAAmB,EAAE,WAAmB;IAC3D,0BAA0B;IAC1B,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAExE,OAAO,CACL,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,GAAG,MAAM;QAC/C,mBAAmB,WAAW,YAAY;QAC1C,sFAAsF;QACtF,kFAAkF,CACnF,CAAC;AACJ,CAAC;AAED,+EAA+E;AAE/E;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAe;IAChD,IAAI,OAAO,GAAG,OAAO,CAAC;IAEtB,qCAAqC;IACrC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,gFAAgF,EAAE,EAAE,CAAC,CAAC;IAEhH,4BAA4B;IAC5B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC;IACjD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAEvC,wEAAwE;IACxE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,uDAAuD,EAAE,EAAE,CAAC,CAAC;IAEvF,0DAA0D;IAC1D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,2CAA2C,EAAE,EAAE,CAAC,CAAC;IAE3E,2DAA2D;IAC3D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,0CAA0C,EAAE,MAAM,CAAC,CAAC;IAE9E,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED,+EAA+E;AAE/E;;GAEG;AACH,SAAS,cAAc,CAAC,OAAe,EAAE,QAAgB;IACvD,MAAM,UAAU,GAAG,EAAE,CAAC,gBAAgB,CACpC,QAAQ,EACR,OAAO,EACP,EAAE,CAAC,YAAY,CAAC,MAAM,EACtB,IAAI,CACL,CAAC;IAEF,MAAM,OAAO,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC;IAC3C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAC1B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAE1B,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,SAAS,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IAEtC,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,UAAU;YACb,OAAO,kCAAkC,SAAS,EAAE,CAAC;QACvD,KAAK,OAAO;YACV,OAAO,aAAa,SAAS,QAAQ,CAAC;QACxC,KAAK,WAAW,CAAC;QACjB,KAAK,MAAM;YACT,OAAO,4BAA4B,SAAS,EAAE,CAAC;QACjD,KAAK,OAAO,CAAC;QACb,KAAK,UAAU;YACb,OAAO,SAAS,SAAS,SAAS,CAAC;QACrC;YACE,OAAO,wBAAwB,SAAS,EAAE,CAAC;IAC/C,CAAC;AACH,CAAC;AAOD,SAAS,cAAc,CAAC,UAAyB;IAC/C,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,CAAC,IAAI,EAAE,EAAE;QACnC,MAAM,IAAI,GAAG,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAC3E,MAAM,UAAU,GAAG,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QAC3E,IAAI,CAAC,UAAU;YAAE,OAAO;QAExB,IAAI,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YAChD,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;QAC3D,CAAC;aAAM,IAAI,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;YACpD,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;QACxD,CAAC;aAAM,IAAI,EAAE,CAAC,sBAAsB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC;QAC5D,CAAC;aAAM,IAAI,EAAE,CAAC,sBAAsB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QACvD,CAAC;aAAM,IAAI,EAAE,CAAC,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;YACxC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,eAAe,CAAC,YAAY,EAAE,CAAC;gBACrD,IAAI,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC/B,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;gBACxD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CAAC,IAAY;IACjC,sDAAsD;IACtD,OAAO,IAAI;SACR,OAAO,CAAC,iBAAiB,EAAE,OAAO,CAAC;SACnC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,WAAW,EAAE,CAAC;AACnB,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { BenchmarkResults } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Generate a human-readable markdown benchmark report.
|
|
4
|
+
*/
|
|
5
|
+
export declare function generateMarkdownReport(results: BenchmarkResults): string;
|
|
6
|
+
/**
|
|
7
|
+
* Generate JSON benchmark results (for programmatic consumption).
|
|
8
|
+
*/
|
|
9
|
+
export declare function generateJsonReport(results: BenchmarkResults): string;
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
// src/benchmark/report.ts — Generate benchmark reports (Markdown + JSON)
|
|
2
|
+
import { effectSizeLabel } from "./statistics.js";
|
|
3
|
+
// ─── Markdown Report ─────────────────────────────────────────────────────────
|
|
4
|
+
/**
|
|
5
|
+
* Generate a human-readable markdown benchmark report.
|
|
6
|
+
*/
|
|
7
|
+
export function generateMarkdownReport(results) {
|
|
8
|
+
const lines = [];
|
|
9
|
+
const { meta, summary, tasks } = results;
|
|
10
|
+
lines.push(`# Benchmark Report: ${meta.repoPath.split("/").pop()}`);
|
|
11
|
+
lines.push(`Generated: ${meta.timestamp} | Model: ${meta.model} | Mode: ${meta.mode}`);
|
|
12
|
+
lines.push("");
|
|
13
|
+
// Claim scoping
|
|
14
|
+
lines.push("> **Claim scope:** This benchmark measures whether AGENTS.md improves AI adherence");
|
|
15
|
+
lines.push("> to contribution patterns (file placement, naming, imports, exports, registration)");
|
|
16
|
+
lines.push("> beyond what an AI can infer from reading source code alone.");
|
|
17
|
+
lines.push("");
|
|
18
|
+
// Summary table
|
|
19
|
+
lines.push("## Summary");
|
|
20
|
+
lines.push("");
|
|
21
|
+
lines.push("| Condition | Mean Score | Pass Rate | Avg Tokens |");
|
|
22
|
+
lines.push("|-----------|-----------|-----------|------------|");
|
|
23
|
+
const conditionLabels = {
|
|
24
|
+
"treatment": "A: AGENTS.md + source",
|
|
25
|
+
"realistic-control": "B: Source only",
|
|
26
|
+
"impoverished-control": "C: Dir listing only",
|
|
27
|
+
"negative-control": "N: Shuffled AGENTS.md",
|
|
28
|
+
};
|
|
29
|
+
for (const cond of meta.conditions) {
|
|
30
|
+
const data = summary.conditions[cond];
|
|
31
|
+
if (!data)
|
|
32
|
+
continue;
|
|
33
|
+
const label = conditionLabels[cond];
|
|
34
|
+
const score = data.meanScore.toFixed(1) + "%";
|
|
35
|
+
const pass = `${Math.round(data.passRate * summary.tasksRun)}/${summary.tasksRun}`;
|
|
36
|
+
const tokens = Math.round(data.meanTokens).toLocaleString();
|
|
37
|
+
lines.push(`| ${label} | ${score} | ${pass} | ${tokens} |`);
|
|
38
|
+
}
|
|
39
|
+
lines.push("");
|
|
40
|
+
lines.push(`**Headline (A - B):** ${summary.headlineDelta >= 0 ? "+" : ""}${summary.headlineDelta.toFixed(1)}%`);
|
|
41
|
+
lines.push(`**Upper bound (A - C):** ${summary.upperBoundDelta >= 0 ? "+" : ""}${summary.upperBoundDelta.toFixed(1)}%`);
|
|
42
|
+
// Per-task-type breakdown
|
|
43
|
+
const taskTypes = [...new Set(tasks.map(t => t.taskType))];
|
|
44
|
+
if (taskTypes.length > 1) {
|
|
45
|
+
lines.push("");
|
|
46
|
+
lines.push("## Results by Task Type");
|
|
47
|
+
lines.push("");
|
|
48
|
+
lines.push("| Type | Tasks | A Mean | B Mean | A-B Delta |");
|
|
49
|
+
lines.push("|------|-------|--------|--------|-----------|");
|
|
50
|
+
for (const type of taskTypes) {
|
|
51
|
+
const typeTasks = tasks.filter(t => t.taskType === type);
|
|
52
|
+
const aScores = typeTasks.map(t => t.results["treatment"]?.score ?? 0);
|
|
53
|
+
const bScores = typeTasks.map(t => t.results["realistic-control"]?.score ?? 0);
|
|
54
|
+
const aMean = aScores.reduce((s, v) => s + v, 0) / aScores.length;
|
|
55
|
+
const bMean = bScores.reduce((s, v) => s + v, 0) / bScores.length;
|
|
56
|
+
const delta = aMean - bMean;
|
|
57
|
+
lines.push(`| ${type} | ${typeTasks.length} | ${aMean.toFixed(1)}% | ${bMean.toFixed(1)}% | ${delta >= 0 ? "+" : ""}${delta.toFixed(1)}% |`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// Statistical analysis (full mode only)
|
|
61
|
+
if (meta.mode === "full" && summary.pValue !== undefined) {
|
|
62
|
+
lines.push("");
|
|
63
|
+
lines.push("## Statistical Analysis");
|
|
64
|
+
lines.push("");
|
|
65
|
+
lines.push(`- **Paired t-test:** p = ${summary.pValue.toFixed(4)}`);
|
|
66
|
+
if (summary.effectSize !== undefined) {
|
|
67
|
+
lines.push(`- **Cohen's d:** ${summary.effectSize.toFixed(2)} (${effectSizeLabel(summary.effectSize)})`);
|
|
68
|
+
}
|
|
69
|
+
if (summary.ci95) {
|
|
70
|
+
lines.push(`- **95% Bootstrap CI:** [${summary.ci95[0].toFixed(1)}%, ${summary.ci95[1].toFixed(1)}%]`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
lines.push("");
|
|
75
|
+
lines.push("*Quick mode — directional results only, not statistically powered.*");
|
|
76
|
+
}
|
|
77
|
+
// Per-task breakdown
|
|
78
|
+
lines.push("");
|
|
79
|
+
lines.push("## Per-Task Results");
|
|
80
|
+
for (const task of tasks) {
|
|
81
|
+
lines.push("");
|
|
82
|
+
lines.push(`### ${task.taskId} (Tier ${task.tier})`);
|
|
83
|
+
lines.push("");
|
|
84
|
+
lines.push(`> ${task.prompt}`);
|
|
85
|
+
lines.push("");
|
|
86
|
+
for (const cond of meta.conditions) {
|
|
87
|
+
const run = task.results[cond];
|
|
88
|
+
if (!run)
|
|
89
|
+
continue;
|
|
90
|
+
const label = conditionLabels[cond];
|
|
91
|
+
const status = run.error ? "ERROR" : run.passed ? "PASS" : "FAIL";
|
|
92
|
+
lines.push(`**${label}:** ${run.score}% (${run.rawScore}/${run.maxPoints} pts) — ${status}`);
|
|
93
|
+
if (run.error) {
|
|
94
|
+
lines.push(` Error: ${run.error}`);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
for (const check of run.checks) {
|
|
98
|
+
const icon = check.passed ? "[x]" : "[ ]";
|
|
99
|
+
lines.push(` - ${icon} ${check.name} (${check.score}/${check.weight}) — ${check.detail}`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
lines.push("");
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// Token counts (confound disclosure)
|
|
106
|
+
lines.push("## Token Analysis");
|
|
107
|
+
lines.push("");
|
|
108
|
+
lines.push("| Condition | Mean Input Tokens | Notes |");
|
|
109
|
+
lines.push("|-----------|------------------|-------|");
|
|
110
|
+
for (const cond of meta.conditions) {
|
|
111
|
+
const data = summary.conditions[cond];
|
|
112
|
+
if (!data)
|
|
113
|
+
continue;
|
|
114
|
+
const note = cond === "treatment" ? "Includes AGENTS.md context" :
|
|
115
|
+
cond === "realistic-control" ? "Sibling files only" :
|
|
116
|
+
cond === "impoverished-control" ? "Minimal context" :
|
|
117
|
+
"Shuffled context";
|
|
118
|
+
lines.push(`| ${conditionLabels[cond]} | ${Math.round(data.meanTokens).toLocaleString()} | ${note} |`);
|
|
119
|
+
}
|
|
120
|
+
lines.push("");
|
|
121
|
+
lines.push("*Note: Token count asymmetry between conditions may contribute to score differences.*");
|
|
122
|
+
return lines.join("\n");
|
|
123
|
+
}
|
|
124
|
+
// ─── JSON Report ─────────────────────────────────────────────────────────────
|
|
125
|
+
/**
|
|
126
|
+
* Generate JSON benchmark results (for programmatic consumption).
|
|
127
|
+
*/
|
|
128
|
+
export function generateJsonReport(results) {
|
|
129
|
+
return JSON.stringify(results, null, 2);
|
|
130
|
+
}
|
|
131
|
+
//# sourceMappingURL=report.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"report.js","sourceRoot":"","sources":["../../src/benchmark/report.ts"],"names":[],"mappings":"AAAA,yEAAyE;AAGzE,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAElD,gFAAgF;AAEhF;;GAEG;AACH,MAAM,UAAU,sBAAsB,CAAC,OAAyB;IAC9D,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC;IAEzC,KAAK,CAAC,IAAI,CAAC,uBAAuB,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;IACpE,KAAK,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,SAAS,aAAa,IAAI,CAAC,KAAK,YAAY,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;IACvF,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,gBAAgB;IAChB,KAAK,CAAC,IAAI,CAAC,oFAAoF,CAAC,CAAC;IACjG,KAAK,CAAC,IAAI,CAAC,qFAAqF,CAAC,CAAC;IAClG,KAAK,CAAC,IAAI,CAAC,+DAA+D,CAAC,CAAC;IAC5E,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,gBAAgB;IAChB,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACzB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,qDAAqD,CAAC,CAAC;IAClE,KAAK,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;IAEjE,MAAM,eAAe,GAAuC;QAC1D,WAAW,EAAE,uBAAuB;QACpC,mBAAmB,EAAE,gBAAgB;QACrC,sBAAsB,EAAE,qBAAqB;QAC7C,kBAAkB,EAAE,uBAAuB;KAC5C,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACnC,MAAM,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACtC,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACpC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QAC9C,MAAM,IAAI,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACnF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,cAAc,EAAE,CAAC;QAC5D,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,MAAM,KAAK,MAAM,IAAI,MAAM,MAAM,IAAI,CAAC,CAAC;IAC9D,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,yBAAyB,OAAO,CAAC,aAAa,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACjH,KAAK,CAAC,IAAI,CAAC,4BAA4B,OAAO,CAAC,eAAe,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAExH,0BAA0B;IAC1B,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAC3D,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;QACtC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;QAC7D,KAAK,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;QAE7D,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,IAAI,CAAC,CAAC;YACzD,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC;YACvE,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,mBAAmB,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC;YAC/E,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;YAClE,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;YAClE,MAAM,KAAK,GAAG,KAAK,GAAG,KAAK,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,KAAK,IAAI,MAAM,SAAS,CAAC,MAAM,MAAM,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC/I,CAAC;IACH,CAAC;IAED,wCAAwC;IACxC,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;QACzD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;QACtC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,4BAA4B,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpE,IAAI,OAAO,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YACrC,KAAK,CAAC,IAAI,CAAC,oBAAoB,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,eAAe,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;QAC3G,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,4BAA4B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACzG,CAAC;IACH,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,qEAAqE,CAAC,CAAC;IACpF,CAAC;IAED,qBAAqB;IACrB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,MAAM,UAAU,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAC/B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACnC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC/B,IAAI,CAAC,GAAG;gBAAE,SAAS;YACnB,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;YACpC,MAAM,MAAM,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;YAClE,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,OAAO,GAAG,CAAC,KAAK,MAAM,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,SAAS,WAAW,MAAM,EAAE,CAAC,CAAC;YAE7F,IAAI,GAAG,CAAC,KAAK,EAAE,CAAC;gBACd,KAAK,CAAC,IAAI,CAAC,YAAY,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC;YACtC,CAAC;iBAAM,CAAC;gBACN,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;oBAC/B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;oBAC1C,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,OAAO,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;gBAC7F,CAAC;YACH,CAAC;YACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAChC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;IACxD,KAAK,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;IACvD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACnC,MAAM,IAAI,GAAG,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACtC,IAAI,CAAC,IAAI;YAAE,SAAS;QACpB,MAAM,IAAI,GAAG,IAAI,KAAK,WAAW,CAAC,CAAC,CAAC,4BAA4B,CAAC,CAAC;YACrD,IAAI,KAAK,mBAAmB,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC;gBACrD,IAAI,KAAK,sBAAsB,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC;oBACrD,kBAAkB,CAAC;QAChC,KAAK,CAAC,IAAI,CAAC,KAAK,eAAe,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,cAAc,EAAE,MAAM,IAAI,IAAI,CAAC,CAAC;IACzG,CAAC;IACD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,uFAAuF,CAAC,CAAC;IAEpG,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,gFAAgF;AAEhF;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAyB;IAC1D,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AAC1C,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { ResolvedConfig } from "../types.js";
|
|
2
|
+
import type { BenchmarkOptions, BenchmarkResults } from "./types.js";
|
|
3
|
+
/**
|
|
4
|
+
* Run the full benchmark pipeline.
|
|
5
|
+
*/
|
|
6
|
+
export declare function orchestrateBenchmark(options: BenchmarkOptions, llmConfig: ResolvedConfig["llm"]): Promise<BenchmarkResults>;
|