autodocs-engine 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/dist/analysis-builder.d.ts +13 -0
- package/dist/analysis-builder.js +268 -0
- package/dist/analysis-builder.js.map +1 -0
- package/dist/anti-pattern-detector.d.ts +9 -0
- package/dist/anti-pattern-detector.js +58 -0
- package/dist/anti-pattern-detector.js.map +1 -0
- package/dist/architecture-detector.d.ts +7 -0
- package/dist/architecture-detector.js +212 -0
- package/dist/architecture-detector.js.map +1 -0
- package/dist/ast-parser.d.ts +5 -0
- package/dist/ast-parser.js +635 -0
- package/dist/ast-parser.js.map +1 -0
- package/dist/benchmark/code-generator.d.ts +20 -0
- package/dist/benchmark/code-generator.js +206 -0
- package/dist/benchmark/code-generator.js.map +1 -0
- package/dist/benchmark/pr-miner.d.ts +61 -0
- package/dist/benchmark/pr-miner.js +304 -0
- package/dist/benchmark/pr-miner.js.map +1 -0
- package/dist/benchmark/pr-runner.d.ts +58 -0
- package/dist/benchmark/pr-runner.js +346 -0
- package/dist/benchmark/pr-runner.js.map +1 -0
- package/dist/benchmark/pr-scorer.d.ts +48 -0
- package/dist/benchmark/pr-scorer.js +222 -0
- package/dist/benchmark/pr-scorer.js.map +1 -0
- package/dist/benchmark/pr-task-gen.d.ts +16 -0
- package/dist/benchmark/pr-task-gen.js +129 -0
- package/dist/benchmark/pr-task-gen.js.map +1 -0
- package/dist/benchmark/report.d.ts +9 -0
- package/dist/benchmark/report.js +131 -0
- package/dist/benchmark/report.js.map +1 -0
- package/dist/benchmark/runner.d.ts +6 -0
- package/dist/benchmark/runner.js +183 -0
- package/dist/benchmark/runner.js.map +1 -0
- package/dist/benchmark/scorer.d.ts +6 -0
- package/dist/benchmark/scorer.js +549 -0
- package/dist/benchmark/scorer.js.map +1 -0
- package/dist/benchmark/shuffler.d.ts +5 -0
- package/dist/benchmark/shuffler.js +70 -0
- package/dist/benchmark/shuffler.js.map +1 -0
- package/dist/benchmark/statistics.d.ts +36 -0
- package/dist/benchmark/statistics.js +159 -0
- package/dist/benchmark/statistics.js.map +1 -0
- package/dist/benchmark/task-generator.d.ts +20 -0
- package/dist/benchmark/task-generator.js +388 -0
- package/dist/benchmark/task-generator.js.map +1 -0
- package/dist/benchmark/types.d.ts +111 -0
- package/dist/benchmark/types.js +3 -0
- package/dist/benchmark/types.js.map +1 -0
- package/dist/bin/autodocs-engine.d.ts +2 -0
- package/dist/bin/autodocs-engine.js +296 -0
- package/dist/bin/autodocs-engine.js.map +1 -0
- package/dist/bin/benchmark.d.ts +14 -0
- package/dist/bin/benchmark.js +172 -0
- package/dist/bin/benchmark.js.map +1 -0
- package/dist/bin/check.d.ts +13 -0
- package/dist/bin/check.js +79 -0
- package/dist/bin/check.js.map +1 -0
- package/dist/bin/init.d.ts +11 -0
- package/dist/bin/init.js +268 -0
- package/dist/bin/init.js.map +1 -0
- package/dist/bin/serve.d.ts +4 -0
- package/dist/bin/serve.js +29 -0
- package/dist/bin/serve.js.map +1 -0
- package/dist/budget-validator.d.ts +22 -0
- package/dist/budget-validator.js +119 -0
- package/dist/budget-validator.js.map +1 -0
- package/dist/command-extractor.d.ts +10 -0
- package/dist/command-extractor.js +276 -0
- package/dist/command-extractor.js.map +1 -0
- package/dist/config-analyzer.d.ts +5 -0
- package/dist/config-analyzer.js +364 -0
- package/dist/config-analyzer.js.map +1 -0
- package/dist/config.d.ts +33 -0
- package/dist/config.js +172 -0
- package/dist/config.js.map +1 -0
- package/dist/contribution-patterns.d.ts +6 -0
- package/dist/contribution-patterns.js +263 -0
- package/dist/contribution-patterns.js.map +1 -0
- package/dist/convention-extractor.d.ts +17 -0
- package/dist/convention-extractor.js +90 -0
- package/dist/convention-extractor.js.map +1 -0
- package/dist/cross-package.d.ts +5 -0
- package/dist/cross-package.js +71 -0
- package/dist/cross-package.js.map +1 -0
- package/dist/dependency-analyzer.d.ts +5 -0
- package/dist/dependency-analyzer.js +233 -0
- package/dist/dependency-analyzer.js.map +1 -0
- package/dist/detectors/build-tool.d.ts +2 -0
- package/dist/detectors/build-tool.js +67 -0
- package/dist/detectors/build-tool.js.map +1 -0
- package/dist/detectors/component-patterns.d.ts +2 -0
- package/dist/detectors/component-patterns.js +49 -0
- package/dist/detectors/component-patterns.js.map +1 -0
- package/dist/detectors/data-fetching.d.ts +2 -0
- package/dist/detectors/data-fetching.js +127 -0
- package/dist/detectors/data-fetching.js.map +1 -0
- package/dist/detectors/database.d.ts +2 -0
- package/dist/detectors/database.js +54 -0
- package/dist/detectors/database.js.map +1 -0
- package/dist/detectors/error-handling.d.ts +2 -0
- package/dist/detectors/error-handling.js +47 -0
- package/dist/detectors/error-handling.js.map +1 -0
- package/dist/detectors/export-patterns.d.ts +2 -0
- package/dist/detectors/export-patterns.js +64 -0
- package/dist/detectors/export-patterns.js.map +1 -0
- package/dist/detectors/file-naming.d.ts +2 -0
- package/dist/detectors/file-naming.js +74 -0
- package/dist/detectors/file-naming.js.map +1 -0
- package/dist/detectors/graphql-patterns.d.ts +2 -0
- package/dist/detectors/graphql-patterns.js +47 -0
- package/dist/detectors/graphql-patterns.js.map +1 -0
- package/dist/detectors/hook-patterns.d.ts +2 -0
- package/dist/detectors/hook-patterns.js +105 -0
- package/dist/detectors/hook-patterns.js.map +1 -0
- package/dist/detectors/import-patterns.d.ts +2 -0
- package/dist/detectors/import-patterns.js +88 -0
- package/dist/detectors/import-patterns.js.map +1 -0
- package/dist/detectors/telemetry-patterns.d.ts +2 -0
- package/dist/detectors/telemetry-patterns.js +42 -0
- package/dist/detectors/telemetry-patterns.js.map +1 -0
- package/dist/detectors/test-framework-ecosystem.d.ts +2 -0
- package/dist/detectors/test-framework-ecosystem.js +95 -0
- package/dist/detectors/test-framework-ecosystem.js.map +1 -0
- package/dist/detectors/test-patterns.d.ts +2 -0
- package/dist/detectors/test-patterns.js +60 -0
- package/dist/detectors/test-patterns.js.map +1 -0
- package/dist/detectors/web-framework.d.ts +2 -0
- package/dist/detectors/web-framework.js +51 -0
- package/dist/detectors/web-framework.js.map +1 -0
- package/dist/deterministic-formatter.d.ts +54 -0
- package/dist/deterministic-formatter.js +922 -0
- package/dist/deterministic-formatter.js.map +1 -0
- package/dist/diff-analyzer.d.ts +7 -0
- package/dist/diff-analyzer.js +126 -0
- package/dist/diff-analyzer.js.map +1 -0
- package/dist/example-extractor.d.ts +6 -0
- package/dist/example-extractor.js +115 -0
- package/dist/example-extractor.js.map +1 -0
- package/dist/existing-docs.d.ts +36 -0
- package/dist/existing-docs.js +257 -0
- package/dist/existing-docs.js.map +1 -0
- package/dist/file-discovery.d.ts +6 -0
- package/dist/file-discovery.js +154 -0
- package/dist/file-discovery.js.map +1 -0
- package/dist/git-history.d.ts +41 -0
- package/dist/git-history.js +401 -0
- package/dist/git-history.js.map +1 -0
- package/dist/impact-classifier.d.ts +22 -0
- package/dist/impact-classifier.js +87 -0
- package/dist/impact-classifier.js.map +1 -0
- package/dist/impact-radius.d.ts +23 -0
- package/dist/impact-radius.js +130 -0
- package/dist/impact-radius.js.map +1 -0
- package/dist/import-chain.d.ts +12 -0
- package/dist/import-chain.js +93 -0
- package/dist/import-chain.js.map +1 -0
- package/dist/index.d.ts +40 -0
- package/dist/index.js +72 -0
- package/dist/index.js.map +1 -0
- package/dist/inferability.d.ts +16 -0
- package/dist/inferability.js +142 -0
- package/dist/inferability.js.map +1 -0
- package/dist/llm/adapter.d.ts +33 -0
- package/dist/llm/adapter.js +202 -0
- package/dist/llm/adapter.js.map +1 -0
- package/dist/llm/client.d.ts +5 -0
- package/dist/llm/client.js +68 -0
- package/dist/llm/client.js.map +1 -0
- package/dist/llm/hierarchical.d.ts +23 -0
- package/dist/llm/hierarchical.js +126 -0
- package/dist/llm/hierarchical.js.map +1 -0
- package/dist/llm/serializer.d.ts +19 -0
- package/dist/llm/serializer.js +363 -0
- package/dist/llm/serializer.js.map +1 -0
- package/dist/llm/template-selector.d.ts +7 -0
- package/dist/llm/template-selector.js +21 -0
- package/dist/llm/template-selector.js.map +1 -0
- package/dist/llm-adapter.d.ts +2 -0
- package/dist/llm-adapter.js +5 -0
- package/dist/llm-adapter.js.map +1 -0
- package/dist/mcp/cache.d.ts +30 -0
- package/dist/mcp/cache.js +112 -0
- package/dist/mcp/cache.js.map +1 -0
- package/dist/mcp/errors.d.ts +21 -0
- package/dist/mcp/errors.js +27 -0
- package/dist/mcp/errors.js.map +1 -0
- package/dist/mcp/queries.d.ts +27 -0
- package/dist/mcp/queries.js +121 -0
- package/dist/mcp/queries.js.map +1 -0
- package/dist/mcp/server.d.ts +14 -0
- package/dist/mcp/server.js +131 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools.d.ts +39 -0
- package/dist/mcp/tools.js +249 -0
- package/dist/mcp/tools.js.map +1 -0
- package/dist/mermaid-generator.d.ts +6 -0
- package/dist/mermaid-generator.js +59 -0
- package/dist/mermaid-generator.js.map +1 -0
- package/dist/meta-tool-detector.d.ts +23 -0
- package/dist/meta-tool-detector.js +177 -0
- package/dist/meta-tool-detector.js.map +1 -0
- package/dist/output-validator.d.ts +6 -0
- package/dist/output-validator.js +471 -0
- package/dist/output-validator.js.map +1 -0
- package/dist/pattern-fingerprinter.d.ts +7 -0
- package/dist/pattern-fingerprinter.js +241 -0
- package/dist/pattern-fingerprinter.js.map +1 -0
- package/dist/pipeline.d.ts +5 -0
- package/dist/pipeline.js +374 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/plugin-loader.d.ts +19 -0
- package/dist/plugin-loader.js +124 -0
- package/dist/plugin-loader.js.map +1 -0
- package/dist/role-inferrer.d.ts +5 -0
- package/dist/role-inferrer.js +159 -0
- package/dist/role-inferrer.js.map +1 -0
- package/dist/symbol-graph.d.ts +11 -0
- package/dist/symbol-graph.js +613 -0
- package/dist/symbol-graph.js.map +1 -0
- package/dist/templates/agents-md.d.ts +20 -0
- package/dist/templates/agents-md.js +346 -0
- package/dist/templates/agents-md.js.map +1 -0
- package/dist/templates/claude-md.d.ts +4 -0
- package/dist/templates/claude-md.js +23 -0
- package/dist/templates/claude-md.js.map +1 -0
- package/dist/templates/cursorrules.d.ts +4 -0
- package/dist/templates/cursorrules.js +18 -0
- package/dist/templates/cursorrules.js.map +1 -0
- package/dist/tier-classifier.d.ts +7 -0
- package/dist/tier-classifier.js +32 -0
- package/dist/tier-classifier.js.map +1 -0
- package/dist/types.d.ts +428 -0
- package/dist/types.js +42 -0
- package/dist/types.js.map +1 -0
- package/dist/workflow-rules.d.ts +18 -0
- package/dist/workflow-rules.js +131 -0
- package/dist/workflow-rules.js.map +1 -0
- package/package.json +62 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
// src/benchmark/runner.ts — Benchmark orchestrator
|
|
2
|
+
// Coordinates: analyze → generate tasks → AGENTS.md → A/B/C/N → score → report
|
|
3
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { analyze, formatDeterministic } from "../index.js";
|
|
6
|
+
import { ENGINE_VERSION } from "../types.js";
|
|
7
|
+
import { generateTasksFromAnalysis } from "./task-generator.js";
|
|
8
|
+
import { generateCode } from "./code-generator.js";
|
|
9
|
+
import { scoreGeneratedOutput } from "./scorer.js";
|
|
10
|
+
import { shuffleAgentsMd } from "./shuffler.js";
|
|
11
|
+
import { pairedTTest, cohensD, bootstrapCI } from "./statistics.js";
|
|
12
|
+
import { generateMarkdownReport, generateJsonReport } from "./report.js";
|
|
13
|
+
// ─── Public API ──────────────────────────────────────────────────────────────
|
|
14
|
+
/**
|
|
15
|
+
* Run the full benchmark pipeline.
|
|
16
|
+
*/
|
|
17
|
+
export async function orchestrateBenchmark(options, llmConfig) {
|
|
18
|
+
const conditions = [
|
|
19
|
+
"treatment",
|
|
20
|
+
"realistic-control",
|
|
21
|
+
"impoverished-control",
|
|
22
|
+
"negative-control",
|
|
23
|
+
];
|
|
24
|
+
const log = (msg) => {
|
|
25
|
+
if (options.verbose)
|
|
26
|
+
process.stderr.write(`[BENCH] ${msg}\n`);
|
|
27
|
+
};
|
|
28
|
+
// Phase 1: Analyze the repo
|
|
29
|
+
log("Phase 1: Analyzing repository...");
|
|
30
|
+
const analysis = await analyze({
|
|
31
|
+
packages: [options.repoPath],
|
|
32
|
+
rootDir: options.rootDir,
|
|
33
|
+
});
|
|
34
|
+
// Phase 2: Generate tasks
|
|
35
|
+
log("Phase 2: Generating benchmark tasks...");
|
|
36
|
+
const maxTasks = options.mode === "quick"
|
|
37
|
+
? Math.min(options.maxTasks ?? 5, 5)
|
|
38
|
+
: options.maxTasks ?? 20;
|
|
39
|
+
const tasks = generateTasksFromAnalysis(analysis, options.repoPath, maxTasks);
|
|
40
|
+
if (tasks.length === 0) {
|
|
41
|
+
throw new Error("No benchmark tasks could be generated. This repo may lack contribution patterns.\n"
|
|
42
|
+
+ "Try a repo with structured patterns (e.g., directories with 3+ similar files).");
|
|
43
|
+
}
|
|
44
|
+
const patternCount = tasks.filter(t => t.taskType === "pattern").length;
|
|
45
|
+
const commandCount = tasks.filter(t => t.taskType === "command").length;
|
|
46
|
+
const archCount = tasks.filter(t => t.taskType === "architecture").length;
|
|
47
|
+
log(` ${tasks.length} tasks generated: ${patternCount} pattern, ${commandCount} command, ${archCount} architecture (${tasks.filter(t => t.tier === "A").length} Tier A, ${tasks.filter(t => t.tier === "B").length} Tier B, ${tasks.filter(t => t.tier === "C").length} Tier C)`);
|
|
48
|
+
// Dry run: show tasks and exit
|
|
49
|
+
if (options.dryRun) {
|
|
50
|
+
for (const task of tasks) {
|
|
51
|
+
process.stderr.write(`\n--- Task: ${task.id} [${task.taskType}] (Tier ${task.tier}) ---\n`);
|
|
52
|
+
process.stderr.write(`Prompt: ${task.prompt}\n`);
|
|
53
|
+
process.stderr.write(`Directory: ${task.expectedDirectory}\n`);
|
|
54
|
+
process.stderr.write(`Pattern: ${task.expectedFilePattern}\n`);
|
|
55
|
+
process.stderr.write(`Siblings: ${task.context.siblingFiles.map(s => s.path).join(", ")}\n`);
|
|
56
|
+
process.stderr.write(`Registration: ${task.context.registrationFile?.path ?? "none"}\n`);
|
|
57
|
+
process.stderr.write(`Barrel: ${task.context.barrelFile?.path ?? "none"}\n`);
|
|
58
|
+
process.stderr.write(`Max points: ${task.maxScoringPoints}\n`);
|
|
59
|
+
}
|
|
60
|
+
// Return empty results for dry run
|
|
61
|
+
return {
|
|
62
|
+
meta: {
|
|
63
|
+
engineVersion: ENGINE_VERSION,
|
|
64
|
+
model: llmConfig.model,
|
|
65
|
+
repoPath: options.repoPath,
|
|
66
|
+
timestamp: new Date().toISOString(),
|
|
67
|
+
mode: options.mode,
|
|
68
|
+
conditions,
|
|
69
|
+
},
|
|
70
|
+
summary: emptySummary(conditions),
|
|
71
|
+
tasks: [],
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
// Phase 3: Generate AGENTS.md
|
|
75
|
+
log("Phase 3: Generating AGENTS.md...");
|
|
76
|
+
let agentsMd = null;
|
|
77
|
+
try {
|
|
78
|
+
const formatConfig = {
|
|
79
|
+
output: { format: "agents.md", dir: "." },
|
|
80
|
+
llm: llmConfig,
|
|
81
|
+
};
|
|
82
|
+
agentsMd = await formatDeterministic(analysis, formatConfig);
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
log(" Warning: AGENTS.md generation failed (LLM unavailable?). Using JSON analysis as fallback.");
|
|
86
|
+
}
|
|
87
|
+
// Generate shuffled AGENTS.md for negative control
|
|
88
|
+
const shuffledAgentsMd = agentsMd ? shuffleAgentsMd(agentsMd) : null;
|
|
89
|
+
// Phase 4: Run A/B/C/N for each task
|
|
90
|
+
log(`Phase 4: Running ${tasks.length} tasks × ${conditions.length} conditions...`);
|
|
91
|
+
const taskResults = [];
|
|
92
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
93
|
+
const task = tasks[i];
|
|
94
|
+
log(` Task ${i + 1}/${tasks.length}: ${task.id} (Tier ${task.tier})`);
|
|
95
|
+
const results = {};
|
|
96
|
+
for (const condition of conditions) {
|
|
97
|
+
log(` Condition: ${condition}...`);
|
|
98
|
+
const codeResult = await generateCode(task, condition, agentsMd, shuffledAgentsMd, llmConfig);
|
|
99
|
+
const runResult = scoreGeneratedOutput(codeResult.files, task, codeResult.tokensUsed, codeResult.latencyMs, codeResult.error);
|
|
100
|
+
results[condition] = runResult;
|
|
101
|
+
log(` → ${runResult.score}% (${runResult.rawScore}/${runResult.maxPoints}) ${runResult.passed ? "PASS" : "FAIL"}`);
|
|
102
|
+
}
|
|
103
|
+
taskResults.push({
|
|
104
|
+
taskId: task.id,
|
|
105
|
+
tier: task.tier,
|
|
106
|
+
taskType: task.taskType,
|
|
107
|
+
prompt: task.prompt,
|
|
108
|
+
results,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
// Phase 5: Compute statistics and generate report
|
|
112
|
+
log("Phase 5: Computing statistics and generating report...");
|
|
113
|
+
const summary = computeSummary(taskResults, conditions, options.mode);
|
|
114
|
+
const benchmarkResults = {
|
|
115
|
+
meta: {
|
|
116
|
+
engineVersion: ENGINE_VERSION,
|
|
117
|
+
model: llmConfig.model,
|
|
118
|
+
repoPath: options.repoPath,
|
|
119
|
+
timestamp: new Date().toISOString(),
|
|
120
|
+
mode: options.mode,
|
|
121
|
+
conditions,
|
|
122
|
+
},
|
|
123
|
+
summary,
|
|
124
|
+
tasks: taskResults,
|
|
125
|
+
};
|
|
126
|
+
// Write reports
|
|
127
|
+
const outputDir = options.outputDir ?? "./benchmark-results";
|
|
128
|
+
mkdirSync(outputDir, { recursive: true });
|
|
129
|
+
writeFileSync(join(outputDir, "results.json"), generateJsonReport(benchmarkResults));
|
|
130
|
+
writeFileSync(join(outputDir, "REPORT.md"), generateMarkdownReport(benchmarkResults));
|
|
131
|
+
log(`Reports written to ${outputDir}/`);
|
|
132
|
+
return benchmarkResults;
|
|
133
|
+
}
|
|
134
|
+
// ─── Summary Computation ─────────────────────────────────────────────────────
|
|
135
|
+
function computeSummary(tasks, conditions, mode) {
|
|
136
|
+
const n = tasks.length;
|
|
137
|
+
const conditionData = {};
|
|
138
|
+
for (const cond of conditions) {
|
|
139
|
+
const scores = tasks.map(t => t.results[cond]?.score ?? 0);
|
|
140
|
+
const tokens = tasks.map(t => t.results[cond]?.tokensUsed ?? 0);
|
|
141
|
+
const passCount = tasks.filter(t => t.results[cond]?.passed).length;
|
|
142
|
+
conditionData[cond] = {
|
|
143
|
+
meanScore: scores.reduce((s, v) => s + v, 0) / n,
|
|
144
|
+
passRate: passCount / n,
|
|
145
|
+
scores,
|
|
146
|
+
meanTokens: tokens.reduce((s, v) => s + v, 0) / n,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
const aScores = conditionData["treatment"]?.scores ?? [];
|
|
150
|
+
const bScores = conditionData["realistic-control"]?.scores ?? [];
|
|
151
|
+
const cScores = conditionData["impoverished-control"]?.scores ?? [];
|
|
152
|
+
const headlineDelta = (conditionData["treatment"]?.meanScore ?? 0)
|
|
153
|
+
- (conditionData["realistic-control"]?.meanScore ?? 0);
|
|
154
|
+
const upperBoundDelta = (conditionData["treatment"]?.meanScore ?? 0)
|
|
155
|
+
- (conditionData["impoverished-control"]?.meanScore ?? 0);
|
|
156
|
+
const summary = {
|
|
157
|
+
tasksRun: n,
|
|
158
|
+
conditions: conditionData,
|
|
159
|
+
headlineDelta,
|
|
160
|
+
upperBoundDelta,
|
|
161
|
+
};
|
|
162
|
+
// Statistical analysis for full mode only (n >= 15)
|
|
163
|
+
if (mode === "full" && n >= 10) {
|
|
164
|
+
const tTest = pairedTTest(aScores, bScores);
|
|
165
|
+
summary.pValue = tTest.p;
|
|
166
|
+
summary.effectSize = cohensD(aScores, bScores);
|
|
167
|
+
summary.ci95 = bootstrapCI(aScores, bScores);
|
|
168
|
+
}
|
|
169
|
+
return summary;
|
|
170
|
+
}
|
|
171
|
+
function emptySummary(conditions) {
|
|
172
|
+
const conditionData = {};
|
|
173
|
+
for (const cond of conditions) {
|
|
174
|
+
conditionData[cond] = { meanScore: 0, passRate: 0, scores: [], meanTokens: 0 };
|
|
175
|
+
}
|
|
176
|
+
return {
|
|
177
|
+
tasksRun: 0,
|
|
178
|
+
conditions: conditionData,
|
|
179
|
+
headlineDelta: 0,
|
|
180
|
+
upperBoundDelta: 0,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/benchmark/runner.ts"],"names":[],"mappings":"AAAA,mDAAmD;AACnD,+EAA+E;AAE/E,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAE3D,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,EAAE,yBAAyB,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,WAAW,EAAsB,MAAM,iBAAiB,CAAC;AACxF,OAAO,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAUzE,gFAAgF;AAEhF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAyB,EACzB,SAAgC;IAEhC,MAAM,UAAU,GAAyB;QACvC,WAAW;QACX,mBAAmB;QACnB,sBAAsB;QACtB,kBAAkB;KACnB,CAAC;IAEF,MAAM,GAAG,GAAG,CAAC,GAAW,EAAE,EAAE;QAC1B,IAAI,OAAO,CAAC,OAAO;YAAE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC;IAChE,CAAC,CAAC;IAEF,4BAA4B;IAC5B,GAAG,CAAC,kCAAkC,CAAC,CAAC;IACxC,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC;QAC7B,QAAQ,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC;QAC5B,OAAO,EAAE,OAAO,CAAC,OAAO;KACzB,CAAC,CAAC;IAEH,0BAA0B;IAC1B,GAAG,CAAC,wCAAwC,CAAC,CAAC;IAC9C,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,KAAK,OAAO;QACvC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,IAAI,CAAC,EAAE,CAAC,CAAC;QACpC,CAAC,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IAE3B,MAAM,KAAK,GAAG,yBAAyB,CAAC,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAE9E,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,KAAK,CACb,oFAAoF;cAClF,gFAAgF,CACnF,CAAC;IACJ,CAAC;IAED,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,cAAc,CAAC,CAAC,MAAM,CAAC;IAC1E,GAAG,CAAC,KAAK,KAAK,CAAC,MAAM,qBAAqB,YAAY,aAAa,YAAY,aAAa,SAAS,kBAAkB,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,GAAG,CAAC,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,GAAG,CAAC,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,GAAG,CAAC,CAAC,MAAM,UAAU,CAAC,CAAC;IAEnR,+BAA+B;IAC/B,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,eAAe,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,QAAQ,WAAW,IAAI,CAAC,IAAI,SAAS,CAAC,CAAC;YAC5F,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;YACjD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,iBAAiB,IAAI,CAAC,CAAC;YAC/D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,mBAAmB,IAAI,CAAC,CAAC;YAC/D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,aAAa,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7F,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,iBAAiB,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,IAAI,IAAI,MAAM,IAAI,CAAC,CAAC;YACzF,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,IAAI,MAAM,IAAI,CAAC,CAAC;YAC7E,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,eAAe,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAC;QACjE,CAAC;QACD,mCAAmC;QACnC,OAAO;YACL,IAAI,EAAE;gBACJ,aAAa,EAAE,cAAc;gBAC7B,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,IAAI,EAAE,OAAO,CAAC,IAAI;gBAClB,UAAU;aACX;YACD,OAAO,EAAE,YAAY,CAAC,UAAU,CAAC;YACjC,KAAK,EAAE,EAAE;SACV,CAAC;IACJ,CAAC;IAED,8BAA8B;IAC9B,GAAG,CAAC,kCAAkC,CAAC,CAAC;IACxC,IAAI,QAAQ,GAAkB,IAAI,CAAC;IACnC,IAAI,CAAC;QACH,MAAM,YAAY,GAAG;YACnB,MAAM,EAAE,EAAE,MAAM,EAAE,WAAoB,EAAE,GAAG,EAAE,GAAG,EAAE;YAClD,GAAG,EAAE,SAAS;SACf,CAAC;QACF,QAAQ,GAAG,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAC/D,CAAC;IAAC,MAAM,CAAC;QACP,GAAG,CAAC,6FAA6F,CAAC,CAAC;IACrG,CAAC;IAED,mDAAmD;IACnD,MAAM,gBAAgB,GAAG,QAAQ,CAAC,CAAC,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAErE,qCAAqC;IACrC,GAAG,CAAC,oBAAoB,KAAK,CAAC,MAAM,YAAY,UAAU,CAAC,MAAM,gBAAgB,CAAC,CAAC;IACnF,MAAM,WAAW,GAAiB,EAAE,CAAC;IAErC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,IAAI,CAAC,EAAE,UAAU,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;QAEvE,MAAM,OAAO,GAA0C,EAA2C,CAAC;QAEnG,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,GAAG,CAAC,kBAAkB,SAAS,KAAK,CAAC,CAAC;YACtC,MAAM,UAAU,GAAG,MAAM,YAAY,CACnC,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,CACvD,CAAC;YAEF,MAAM,SAAS,GAAG,oBAAoB,CACpC,UAAU,CAAC,KAAK,EAChB,IAAI,EACJ,UAAU,CAAC,UAAU,EACrB,UAAU,CAAC,SAAS,EACpB,UAAU,CAAC,KAAK,CACjB,CAAC;YAEF,OAAO,CAAC,SAAS,CAAC,GAAG,SAAS,CAAC;YAC/B,GAAG,CAAC,SAAS,SAAS,CAAC,KAAK,MAAM,SAAS,CAAC,QAAQ,IAAI,SAAS,CAAC,SAAS,KAAK,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;QACxH,CAAC;QAED,WAAW,CAAC,IAAI,CAAC;YACf,MAAM,EAAE,IAAI,CAAC,EAAE;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,OAAO;SACR,CAAC,CAAC;IACL,CAAC;IAED,kDAAkD;IAClD,GAAG,CAAC,wDAAwD,CAAC,CAAC;IAC9D,MAAM,OAAO,GAAG,cAAc,CAAC,WAAW,EAAE,UAAU,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAEtE,MAAM,gBAAgB,GAAqB;QACzC,IAAI,EAAE;YACJ,aAAa,EAAE,cAAc;YAC7B,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,UAAU;SACX;QACD,OAAO;QACP,KAAK,EAAE,WAAW;KACnB,CAAC;IAEF,gBAAgB;IAChB,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,qBAAqB,CAAC;IAC7D,SAAS,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1C,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,cAAc,CAAC,EAAE,kBAAkB,CAAC,gBAAgB,CAAC,CAAC,CAAC;IACrF,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,CAAC,EAAE,sBAAsB,CAAC,gBAAgB,CAAC,CAAC,CAAC;IAEtF,GAAG,CAAC,sBAAsB,SAAS,GAAG,CAAC,CAAC;IAExC,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,gFAAgF;AAEhF,SAAS,cAAc,CACrB,KAAmB,EACnB,UAAgC,EAChC,IAAsB;IAEtB,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,aAAa,GAAmC,EAAoC,CAAC;IAE3F,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC;QAC3D,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,UAAU,IAAI,CAAC,CAAC,CAAC;QAChE,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC;QAEpE,aAAa,CAAC,IAAI,CAAC,GAAG;YACpB,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC;YAChD,QAAQ,EAAE,SAAS,GAAG,CAAC;YACvB,MAAM;YACN,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC;SAClD,CAAC;IACJ,CAAC;IAED,MAAM,OAAO,GAAG,aAAa,CAAC,WAAW,CAAC,EAAE,MAAM,IAAI,EAAE,CAAC;IACzD,MAAM,OAAO,GAAG,aAAa,CAAC,mBAAmB,CAAC,EAAE,MAAM,IAAI,EAAE,CAAC;IACjE,MAAM,OAAO,GAAG,aAAa,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,EAAE,CAAC;IAEpE,MAAM,aAAa,GAAG,CAAC,aAAa,CAAC,WAAW,CAAC,EAAE,SAAS,IAAI,CAAC,CAAC;UAC9D,CAAC,aAAa,CAAC,mBAAmB,CAAC,EAAE,SAAS,IAAI,CAAC,CAAC,CAAC;IACzD,MAAM,eAAe,GAAG,CAAC,aAAa,CAAC,WAAW,CAAC,EAAE,SAAS,IAAI,CAAC,CAAC;UAChE,CAAC,aAAa,CAAC,sBAAsB,CAAC,EAAE,SAAS,IAAI,CAAC,CAAC,CAAC;IAE5D,MAAM,OAAO,GAAqB;QAChC,QAAQ,EAAE,CAAC;QACX,UAAU,EAAE,aAAa;QACzB,aAAa;QACb,eAAe;KAChB,CAAC;IAEF,oDAAoD;IACpD,IAAI,IAAI,KAAK,MAAM,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAC5C,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC;QACzB,OAAO,CAAC,UAAU,GAAG,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAC/C,OAAO,CAAC,IAAI,GAAG,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,YAAY,CAAC,UAAgC;IACpD,MAAM,aAAa,GAAG,EAAoC,CAAC;IAC3D,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IACjF,CAAC;IACD,OAAO;QACL,QAAQ,EAAE,CAAC;QACX,UAAU,EAAE,aAAa;QACzB,aAAa,EAAE,CAAC;QAChB,eAAe,EAAE,CAAC;KACnB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { BenchmarkTask, GeneratedFile, RunResult } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Score generated files against a benchmark task's expected patterns.
|
|
4
|
+
* Returns a RunResult with per-check scoring breakdown.
|
|
5
|
+
*/
|
|
6
|
+
export declare function scoreGeneratedOutput(files: GeneratedFile[], task: BenchmarkTask, tokensUsed: number, latencyMs: number, error?: string): RunResult;
|