coding-agent-benchmarks 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -0
- package/dist/evaluator.d.ts +20 -2
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +50 -51
- package/dist/evaluator.js.map +1 -1
- package/dist/reporter.d.ts +35 -0
- package/dist/reporter.d.ts.map +1 -0
- package/dist/reporter.js +232 -0
- package/dist/reporter.js.map +1 -0
- package/dist/runner.js +75 -63
- package/dist/runner.js.map +1 -1
- package/package.json +21 -4
package/README.md
CHANGED
|
@@ -298,6 +298,43 @@ Test LLM judge with a custom prompt (for debugging).
|
|
|
298
298
|
**Options:**
|
|
299
299
|
- `--model <model>`: LLM model to use
|
|
300
300
|
|
|
301
|
+
## Understanding Output
|
|
302
|
+
|
|
303
|
+
When running evaluations, each scenario displays a live status that updates as it progresses:
|
|
304
|
+
|
|
305
|
+
```
|
|
306
|
+
Evaluating 3 scenario(s)...
|
|
307
|
+
|
|
308
|
+
✓ [1/3] typescript-no-any PASS (score: 1.00) 14.8s
|
|
309
|
+
✗ [2/3] react-inline-styles FAIL (score: 0.60) 22.1s
|
|
310
|
+
○ [3/3] async-error-handling SKIP (error) 2m 5s
|
|
311
|
+
|
|
312
|
+
============================================================
|
|
313
|
+
EVALUATION SUMMARY
|
|
314
|
+
============================================================
|
|
315
|
+
Total scenarios: 3
|
|
316
|
+
Passed: 1
|
|
317
|
+
Failed: 1
|
|
318
|
+
Skipped: 1
|
|
319
|
+
...
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### Status Indicators
|
|
323
|
+
|
|
324
|
+
| Status | Symbol | Meaning |
|
|
325
|
+
|--------|--------|---------|
|
|
326
|
+
| **PASS** | ✓ | Scenario passed validation (score ≥ 0.8, no violations) |
|
|
327
|
+
| **FAIL** | ✗ | Scenario was evaluated but didn't pass (low score or violations) |
|
|
328
|
+
| **SKIP** | ○ | Scenario couldn't be evaluated due to an error |
|
|
329
|
+
|
|
330
|
+
### Common Causes for SKIP
|
|
331
|
+
|
|
332
|
+
- **Timeout**: Code generation exceeded the configured timeout
|
|
333
|
+
- **Adapter failure**: The CLI (Copilot or Claude Code) crashed or returned an error
|
|
334
|
+
- **File system errors**: Couldn't read context files or write generated code
|
|
335
|
+
|
|
336
|
+
> **Note**: In interactive terminals, you'll see a spinner animation (⠋) while scenarios are running. In CI/non-TTY environments, output falls back to simple line-by-line logging.
|
|
337
|
+
|
|
301
338
|
## Programmatic Usage
|
|
302
339
|
|
|
303
340
|
You can also use the framework programmatically:
|
package/dist/evaluator.d.ts
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Main evaluation engine
|
|
3
3
|
*/
|
|
4
|
-
import {
|
|
4
|
+
import { EventEmitter } from "events";
|
|
5
|
+
import { AdapterType, TestScenario, EvaluationResult, EvaluationReport } from "./types";
|
|
6
|
+
/**
|
|
7
|
+
* Events emitted by the Evaluator during execution
|
|
8
|
+
*/
|
|
9
|
+
export interface EvaluatorEvents {
|
|
10
|
+
"evaluation:start": (scenarios: TestScenario[]) => void;
|
|
11
|
+
"scenario:start": (scenarioId: string, scenario: TestScenario) => void;
|
|
12
|
+
"scenario:generating": (scenarioId: string) => void;
|
|
13
|
+
"scenario:validating": (scenarioId: string) => void;
|
|
14
|
+
"scenario:complete": (scenarioId: string, result: EvaluationResult) => void;
|
|
15
|
+
"evaluation:complete": (report: EvaluationReport) => void;
|
|
16
|
+
log: (message: string) => void;
|
|
17
|
+
}
|
|
5
18
|
export interface EvaluatorOptions {
|
|
6
19
|
adapter: AdapterType;
|
|
7
20
|
model?: string;
|
|
@@ -11,12 +24,17 @@ export interface EvaluatorOptions {
|
|
|
11
24
|
saveBaseline?: boolean;
|
|
12
25
|
compareBaseline?: boolean;
|
|
13
26
|
}
|
|
14
|
-
export declare class Evaluator {
|
|
27
|
+
export declare class Evaluator extends EventEmitter {
|
|
15
28
|
private adapter;
|
|
16
29
|
private workspaceRoot;
|
|
17
30
|
private baselineManager;
|
|
18
31
|
private options;
|
|
19
32
|
constructor(options: EvaluatorOptions);
|
|
33
|
+
/**
|
|
34
|
+
* Type-safe event emitter methods
|
|
35
|
+
*/
|
|
36
|
+
on<K extends keyof EvaluatorEvents>(event: K, listener: EvaluatorEvents[K]): this;
|
|
37
|
+
emit<K extends keyof EvaluatorEvents>(event: K, ...args: Parameters<EvaluatorEvents[K]>): boolean;
|
|
20
38
|
/**
|
|
21
39
|
* Create adapter instance based on type
|
|
22
40
|
*/
|
package/dist/evaluator.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,WAAW,EAEX,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAEjB,MAAM,SAAS,CAAC;AASjB,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,qBAAa,
|
|
1
|
+
{"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EACL,WAAW,EAEX,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAEjB,MAAM,SAAS,CAAC;AASjB;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,kBAAkB,EAAE,CAAC,SAAS,EAAE,YAAY,EAAE,KAAK,IAAI,CAAC;IACxD,gBAAgB,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,KAAK,IAAI,CAAC;IACvE,qBAAqB,EAAE,CAAC,UAAU,EAAE,MAAM,KAAK,IAAI,CAAC;IACpD,qBAAqB,EAAE,CAAC,UAAU,EAAE,MAAM,KAAK,IAAI,CAAC;IACpD,mBAAmB,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAC5E,qBAAqB,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAC1D,GAAG,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;CAChC;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,qBAAa,SAAU,SAAQ,YAAY;IACzC,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,eAAe,CAAkB;IACzC,OAAO,CAAC,OAAO,CAAmB;gBAEtB,OAAO,EAAE,gBAAgB;IAUrC;;OAEG;IACM,EAAE,CAAC,CAAC,SAAS,MAAM,eAAe,EACzC,KAAK,EAAE,CAAC,EACR,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC,GAC3B,IAAI;IAIE,IAAI,CAAC,CAAC,SAAS,MAAM,eAAe,EAC3C,KAAK,EAAE,CAAC,EACR,GAAG,IAAI,EAAE,UAAU,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,GACtC,OAAO;IAIV;;OAEG;IACH,OAAO,CAAC,aAAa;IAiBrB;;OAEG;IACG,wBAAwB,IAAI,OAAO,CAAC,OAAO,CAAC;IAIlD;;OAEG;IACH,eAAe,CACb,SAAS,EAAE,YAAY,EAAE,EACzB,OAAO,EAAE;QACP,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;KACjB,GACA,YAAY,EAAE;IA0BjB;;OAEG;IACG,gBAAgB,CAAC,QAAQ,EAAE,YAAY,GAAG,OAAO,CAAC,gBAAgB,CAAC;IA6JzE;;OAEG;IACG,QAAQ,CAAC,SAAS,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,gBAAgB,CAAC;CAsDrE"}
|
package/dist/evaluator.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.Evaluator = void 0;
|
|
7
|
+
const events_1 = require("events");
|
|
7
8
|
const copilotCLI_1 = require("./adapters/copilotCLI");
|
|
8
9
|
const claudeCodeCLI_1 = require("./adapters/claudeCodeCLI");
|
|
9
10
|
const patternValidator_1 = require("./validators/patternValidator");
|
|
@@ -11,25 +12,35 @@ const llmJudge_1 = require("./validators/llmJudge");
|
|
|
11
12
|
const eslintValidator_1 = require("./validators/eslintValidator");
|
|
12
13
|
const workspaceUtils_1 = require("./utils/workspaceUtils");
|
|
13
14
|
const baselineManager_1 = require("./utils/baselineManager");
|
|
14
|
-
class Evaluator {
|
|
15
|
+
class Evaluator extends events_1.EventEmitter {
|
|
15
16
|
constructor(options) {
|
|
17
|
+
super();
|
|
16
18
|
this.options = options;
|
|
17
19
|
this.workspaceRoot = (0, workspaceUtils_1.resolveWorkspaceRoot)(options.workspaceRoot);
|
|
18
20
|
this.baselineManager = new baselineManager_1.BaselineManager(this.workspaceRoot);
|
|
19
21
|
// Create adapter based on type
|
|
20
22
|
this.adapter = this.createAdapter(options.adapter);
|
|
21
23
|
}
|
|
24
|
+
/**
|
|
25
|
+
* Type-safe event emitter methods
|
|
26
|
+
*/
|
|
27
|
+
on(event, listener) {
|
|
28
|
+
return super.on(event, listener);
|
|
29
|
+
}
|
|
30
|
+
emit(event, ...args) {
|
|
31
|
+
return super.emit(event, ...args);
|
|
32
|
+
}
|
|
22
33
|
/**
|
|
23
34
|
* Create adapter instance based on type
|
|
24
35
|
*/
|
|
25
36
|
createAdapter(type) {
|
|
26
37
|
switch (type) {
|
|
27
|
-
case
|
|
38
|
+
case "copilot":
|
|
28
39
|
return new copilotCLI_1.CopilotCLIAdapter({
|
|
29
40
|
workspaceRoot: this.workspaceRoot,
|
|
30
41
|
model: this.options.model,
|
|
31
42
|
});
|
|
32
|
-
case
|
|
43
|
+
case "claude-code":
|
|
33
44
|
return new claudeCodeCLI_1.ClaudeCodeCLIAdapter({
|
|
34
45
|
workspaceRoot: this.workspaceRoot,
|
|
35
46
|
model: this.options.model,
|
|
@@ -51,18 +62,18 @@ class Evaluator {
|
|
|
51
62
|
let filtered = scenarios;
|
|
52
63
|
// Filter by scenario ID pattern
|
|
53
64
|
if (filters.scenarioPattern) {
|
|
54
|
-
const pattern = filters.scenarioPattern.replace(/\*/g,
|
|
65
|
+
const pattern = filters.scenarioPattern.replace(/\*/g, ".*");
|
|
55
66
|
const regex = new RegExp(pattern);
|
|
56
|
-
filtered = filtered.filter(s => regex.test(s.id));
|
|
67
|
+
filtered = filtered.filter((s) => regex.test(s.id));
|
|
57
68
|
}
|
|
58
69
|
// Filter by category
|
|
59
70
|
if (filters.category) {
|
|
60
|
-
const categories = filters.category.split(
|
|
61
|
-
filtered = filtered.filter(s => categories.includes(s.category));
|
|
71
|
+
const categories = filters.category.split(",").map((c) => c.trim());
|
|
72
|
+
filtered = filtered.filter((s) => categories.includes(s.category));
|
|
62
73
|
}
|
|
63
74
|
// Filter by tags
|
|
64
75
|
if (filters.tags && filters.tags.length > 0) {
|
|
65
|
-
filtered = filtered.filter(s => filters.tags.some(tag => s.tags.includes(tag)));
|
|
76
|
+
filtered = filtered.filter((s) => filters.tags.some((tag) => s.tags.includes(tag)));
|
|
66
77
|
}
|
|
67
78
|
return filtered;
|
|
68
79
|
}
|
|
@@ -73,12 +84,13 @@ class Evaluator {
|
|
|
73
84
|
const startTime = Date.now();
|
|
74
85
|
try {
|
|
75
86
|
if (this.options.verbose) {
|
|
76
|
-
|
|
77
|
-
|
|
87
|
+
this.emit("log", `\nEvaluating scenario: ${scenario.id}`);
|
|
88
|
+
this.emit("log", ` Description: ${scenario.description}`);
|
|
78
89
|
}
|
|
79
|
-
//
|
|
90
|
+
// Emit generating phase
|
|
91
|
+
this.emit("scenario:generating", scenario.id);
|
|
80
92
|
if (this.options.verbose) {
|
|
81
|
-
|
|
93
|
+
this.emit("log", " Generating code...");
|
|
82
94
|
}
|
|
83
95
|
// Resolve timeout (null = no timeout, undefined = use defaults)
|
|
84
96
|
let timeout;
|
|
@@ -96,8 +108,10 @@ class Evaluator {
|
|
|
96
108
|
}
|
|
97
109
|
const generatedFiles = await this.adapter.generate(scenario.prompt, scenario.contextFiles, timeout);
|
|
98
110
|
if (this.options.verbose) {
|
|
99
|
-
|
|
111
|
+
this.emit("log", ` Generated ${generatedFiles.length} file(s)`);
|
|
100
112
|
}
|
|
113
|
+
// Emit validating phase
|
|
114
|
+
this.emit("scenario:validating", scenario.id);
|
|
101
115
|
// Run validators
|
|
102
116
|
const validationResults = [];
|
|
103
117
|
// Pattern validator
|
|
@@ -105,29 +119,30 @@ class Evaluator {
|
|
|
105
119
|
const patternResult = await patternValidator.validate(generatedFiles, scenario);
|
|
106
120
|
validationResults.push(patternResult);
|
|
107
121
|
if (this.options.verbose && patternResult.score >= 0) {
|
|
108
|
-
|
|
122
|
+
this.emit("log", ` Pattern validation: ${patternResult.score.toFixed(2)}`);
|
|
109
123
|
}
|
|
110
124
|
// LLM judge validator
|
|
111
125
|
const llmValidator = new llmJudge_1.LLMJudgeValidator(this.workspaceRoot, this.options.model);
|
|
112
126
|
const llmResult = await llmValidator.validate(generatedFiles, scenario);
|
|
113
127
|
validationResults.push(llmResult);
|
|
114
128
|
if (this.options.verbose && llmResult.score >= 0) {
|
|
115
|
-
|
|
129
|
+
this.emit("log", ` LLM judge: ${llmResult.score.toFixed(2)}`);
|
|
116
130
|
}
|
|
117
131
|
// ESLint validator
|
|
118
132
|
const eslintValidator = new eslintValidator_1.ESLintValidator(this.workspaceRoot);
|
|
119
133
|
const eslintResult = await eslintValidator.validate(generatedFiles, scenario);
|
|
120
134
|
validationResults.push(eslintResult);
|
|
121
135
|
if (this.options.verbose && eslintResult.score >= 0) {
|
|
122
|
-
|
|
136
|
+
this.emit("log", ` ESLint: ${eslintResult.score.toFixed(2)}`);
|
|
123
137
|
}
|
|
124
138
|
// Calculate overall score (average of non-skipped validators)
|
|
125
|
-
const activeResults = validationResults.filter(r => r.score >= 0);
|
|
139
|
+
const activeResults = validationResults.filter((r) => r.score >= 0);
|
|
126
140
|
const overallScore = activeResults.length > 0
|
|
127
|
-
? activeResults.reduce((sum, r) => sum + r.score, 0) /
|
|
141
|
+
? activeResults.reduce((sum, r) => sum + r.score, 0) /
|
|
142
|
+
activeResults.length
|
|
128
143
|
: 0;
|
|
129
144
|
// Collect all violations
|
|
130
|
-
const allViolations = validationResults.flatMap(r => r.violations);
|
|
145
|
+
const allViolations = validationResults.flatMap((r) => r.violations);
|
|
131
146
|
// Check if passed (score above threshold and no violations)
|
|
132
147
|
const passed = overallScore >= 0.8 && allViolations.length === 0;
|
|
133
148
|
const result = {
|
|
@@ -140,26 +155,26 @@ class Evaluator {
|
|
|
140
155
|
};
|
|
141
156
|
// Compare with baseline if requested
|
|
142
157
|
if (this.options.compareBaseline) {
|
|
143
|
-
const comparison = this.baselineManager.compareWithBaseline(result, this.options.adapter, this.options.model ||
|
|
158
|
+
const comparison = this.baselineManager.compareWithBaseline(result, this.options.adapter, this.options.model || "default");
|
|
144
159
|
if (comparison) {
|
|
145
160
|
result.baselineComparison = comparison;
|
|
146
161
|
}
|
|
147
162
|
}
|
|
148
163
|
// Save baseline if requested
|
|
149
164
|
if (this.options.saveBaseline) {
|
|
150
|
-
this.baselineManager.saveBaseline(result, this.options.adapter, this.options.model ||
|
|
165
|
+
this.baselineManager.saveBaseline(result, this.options.adapter, this.options.model || "default");
|
|
151
166
|
}
|
|
152
167
|
return result;
|
|
153
168
|
}
|
|
154
169
|
catch (error) {
|
|
155
170
|
const errorMessage = String(error);
|
|
156
|
-
const isTimeout = errorMessage.includes(
|
|
171
|
+
const isTimeout = errorMessage.includes("timed out");
|
|
157
172
|
// Create a violation for timeout errors
|
|
158
173
|
const violations = isTimeout
|
|
159
174
|
? [
|
|
160
175
|
{
|
|
161
|
-
type:
|
|
162
|
-
message:
|
|
176
|
+
type: "pattern",
|
|
177
|
+
message: "Code generation timed out",
|
|
163
178
|
severity: scenario.severity,
|
|
164
179
|
details: errorMessage,
|
|
165
180
|
},
|
|
@@ -182,39 +197,21 @@ class Evaluator {
|
|
|
182
197
|
async evaluate(scenarios) {
|
|
183
198
|
const startTime = Date.now();
|
|
184
199
|
const results = [];
|
|
185
|
-
|
|
200
|
+
// Emit evaluation start event
|
|
201
|
+
this.emit("evaluation:start", scenarios);
|
|
186
202
|
for (let i = 0; i < scenarios.length; i++) {
|
|
187
203
|
const scenario = scenarios[i];
|
|
188
|
-
|
|
204
|
+
// Emit scenario start event
|
|
205
|
+
this.emit("scenario:start", scenario.id, scenario);
|
|
189
206
|
const result = await this.evaluateScenario(scenario);
|
|
190
207
|
results.push(result);
|
|
191
|
-
//
|
|
192
|
-
|
|
193
|
-
console.log(` ✓ PASSED (score: ${result.score.toFixed(2)})`);
|
|
194
|
-
}
|
|
195
|
-
else {
|
|
196
|
-
console.log(` ✗ FAILED (score: ${result.score.toFixed(2)})`);
|
|
197
|
-
if (result.violations.length > 0) {
|
|
198
|
-
console.log(` ${result.violations.length} violation(s):\n`);
|
|
199
|
-
result.violations.forEach((v, idx) => {
|
|
200
|
-
console.log(` ${idx + 1}. [${v.type}] ${v.message}`);
|
|
201
|
-
if (v.file) {
|
|
202
|
-
console.log(` File: ${v.file}${v.line ? `:${v.line}` : ''}`);
|
|
203
|
-
}
|
|
204
|
-
if (v.details) {
|
|
205
|
-
console.log(` Details: ${v.details}`);
|
|
206
|
-
}
|
|
207
|
-
});
|
|
208
|
-
}
|
|
209
|
-
if (result.error) {
|
|
210
|
-
console.log(` Error: ${result.error}`);
|
|
211
|
-
}
|
|
212
|
-
}
|
|
208
|
+
// Emit scenario complete event
|
|
209
|
+
this.emit("scenario:complete", scenario.id, result);
|
|
213
210
|
}
|
|
214
211
|
// Calculate summary statistics
|
|
215
|
-
const passed = results.filter(r => r.passed).length;
|
|
216
|
-
const failed = results.filter(r => !r.passed && !r.error).length;
|
|
217
|
-
const skipped = results.filter(r => r.error).length;
|
|
212
|
+
const passed = results.filter((r) => r.passed).length;
|
|
213
|
+
const failed = results.filter((r) => !r.passed && !r.error).length;
|
|
214
|
+
const skipped = results.filter((r) => r.error).length;
|
|
218
215
|
const totalViolations = results.reduce((sum, r) => sum + r.violations.length, 0);
|
|
219
216
|
const averageScore = results.length > 0
|
|
220
217
|
? results.reduce((sum, r) => sum + r.score, 0) / results.length
|
|
@@ -234,6 +231,8 @@ class Evaluator {
|
|
|
234
231
|
},
|
|
235
232
|
totalDuration: Date.now() - startTime,
|
|
236
233
|
};
|
|
234
|
+
// Emit evaluation complete event
|
|
235
|
+
this.emit("evaluation:complete", report);
|
|
237
236
|
return report;
|
|
238
237
|
}
|
|
239
238
|
}
|
package/dist/evaluator.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":";AAAA;;GAEG;;;
|
|
1
|
+
{"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../src/evaluator.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,mCAAsC;AAStC,sDAA0D;AAC1D,4DAAgE;AAChE,oEAAiE;AACjE,oDAA0D;AAC1D,kEAA+D;AAC/D,2DAA8D;AAC9D,6DAA0D;AAyB1D,MAAa,SAAU,SAAQ,qBAAY;IAMzC,YAAY,OAAyB;QACnC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACjE,IAAI,CAAC,eAAe,GAAG,IAAI,iCAAe,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE/D,+BAA+B;QAC/B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACM,EAAE,CACT,KAAQ,EACR,QAA4B;QAE5B,OAAO,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IACnC,CAAC;IAEQ,IAAI,CACX,KAAQ,EACR,GAAG,IAAoC;QAEvC,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,IAAI,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAiB;QACrC,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,SAAS;gBACZ,OAAO,IAAI,8BAAiB,CAAC;oBAC3B,aAAa,EAAE,IAAI,CAAC,aAAa;oBACjC,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK;iBAC1B,CAAC,CAAC;YACL,KAAK,aAAa;gBAChB,OAAO,IAAI,oCAAoB,CAAC;oBAC9B,aAAa,EAAE,IAAI,CAAC,aAAa;oBACjC,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK;iBAC1B,CAAC,CAAC;YACL;gBACE,MAAM,IAAI,KAAK,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,wBAAwB;QAC5B,OAAO,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,eAAe,CACb,SAAyB,EACzB,OAIC;QAED,IAAI,QAAQ,GAAG,SAAS,CAAC;QAEzB,gCAAgC;QAChC,IAAI,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,OAAO,CAAC,eAAe,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;YAC7D,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC;YAClC,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACtD,CAAC;QAED,qBAAqB;QACrB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACrB,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACpE,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;QACrE,CAAC;QAED,iBAAiB;QACjB,IAAI,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC/B,OAAO,CAAC,IAAK,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAClD,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,gBAAgB,CAAC,QAAsB;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,IAAI,CAAC;YACH,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;gBACzB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,0BAA0B,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC1D,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,kBAAkB,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;YAC7D,CAAC;YAED,wBAAwB;YACxB,IAAI,CAAC,IAAI,CAAC,qBAAqB,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YAE9C,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;gBACzB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,sBAAsB,CAAC,CAAC;YAC3C,CAAC;YAED,gEAAgE;YAChE,IAAI,OAAsB,CAAC;YAC3B,IAAI,QAAQ,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;gBACnC,6DAA6D;gBAC7D,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;YAC7B,CAAC;iBAAM,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;gBACrD,wDAAwD;gBACxD,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC;YACxC,CAAC;iBAAM,CAAC;gBACN,8BAA8B;gBAC9B,OAAO,GAAG,MAAM,CAAC;YACnB,CAAC;YAED,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,QAAQ,CAChD,QAAQ,CAAC,MAAM,EACf,QAAQ,CAAC,YAAY,EACrB,OAAO,CACR,CAAC;YAEF,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;gBACzB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,eAAe,cAAc,CAAC,MAAM,UAAU,CAAC,CAAC;YACnE,CAAC;YAED,wBAAwB;YACxB,IAAI,CAAC,IAAI,CAAC,qBAAqB,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YAE9C,iBAAiB;YACjB,MAAM,iBAAiB,GAAuB,EAAE,CAAC;YAEjD,oBAAoB;YACpB,MAAM,gBAAgB,GAAG,IAAI,mCAAgB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YAClE,MAAM,aAAa,GAAG,MAAM,gBAAgB,CAAC,QAAQ,CACnD,cAAc,EACd,QAAQ,CACT,CAAC;YACF,iBAAiB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YAEtC,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,aAAa,CAAC,KAAK,IAAI,CAAC,EAAE,CAAC;gBACrD,IAAI,CAAC,IAAI,CACP,KAAK,EACL,yBAAyB,aAAa,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAC1D,CAAC;YACJ,CAAC;YAED,sBAAsB;YACtB,MAAM,YAAY,GAAG,IAAI,4BAAiB,CACxC,IAAI,CAAC,aAAa,EAClB,IAAI,CAAC,OAAO,CAAC,KAAK,CACnB,CAAC;YACF,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;YACxE,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAElC,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,SAAS,CAAC,KAAK,IAAI,CAAC,EAAE,CAAC;gBACjD,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,gBAAgB,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACjE,CAAC;YAED,mBAAmB;YACnB,MAAM,eAAe,GAAG,IAAI,iCAAe,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YAChE,MAAM,YAAY,GAAG,MAAM,eAAe,CAAC,QAAQ,CACjD,cAAc,EACd,QAAQ,CACT,CAAC;YACF,iBAAiB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAErC,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,YAAY,CAAC,KAAK,IAAI,CAAC,EAAE,CAAC;gBACpD,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,aAAa,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACjE,CAAC;YAED,8DAA8D;YAC9D,MAAM,aAAa,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC;YACpE,MAAM,YAAY,GAChB,aAAa,CAAC,MAAM,GAAG,CAAC;gBACtB,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;oBAClD,aAAa,CAAC,MAAM;gBACtB,CAAC,CAAC,CAAC,CAAC;YAER,yBAAyB;YACzB,MAAM,aAAa,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;YAErE,4DAA4D;YAC5D,MAAM,MAAM,GAAG,YAAY,IAAI,GAAG,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,CAAC;YAEjE,MAAM,MAAM,GAAqB;gBAC/B,QAAQ;gBACR,MAAM;gBACN,KAAK,EAAE,YAAY;gBACnB,iBAAiB;gBACjB,UAAU,EAAE,aAAa;gBACzB,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;aACjC,CAAC;YAEF,qCAAqC;YACrC,IAAI,IAAI,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;gBACjC,MAAM,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,mBAAmB,CACzD,MAAM,EACN,IAAI,CAAC,OAAO,CAAC,OAAO,EACpB,IAAI,CAAC,OAAO,CAAC,KAAK,IAAI,SAAS,CAChC,CAAC;gBACF,IAAI,UAAU,EAAE,CAAC;oBACf,MAAM,CAAC,kBAAkB,GAAG,UAAU,CAAC;gBACzC,CAAC;YACH,CAAC;YAED,6BAA6B;YAC7B,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC;gBAC9B,IAAI,CAAC,eAAe,CAAC,YAAY,CAC/B,MAAM,EACN,IAAI,CAAC,OAAO,CAAC,OAAO,EACpB,IAAI,CAAC,OAAO,CAAC,KAAK,IAAI,SAAS,CAChC,CAAC;YACJ,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAErD,wCAAwC;YACxC,MAAM,UAAU,GAAG,SAAS;gBAC1B,CAAC,CAAC;oBACE;wBACE,IAAI,EAAE,SAAkB;wBACxB,OAAO,EAAE,2BAA2B;wBACpC,QAAQ,EAAE,QAAQ,CAAC,QAAQ;wBAC3B,OAAO,EAAE,YAAY;qBACtB;iBACF;gBACH,CAAC,CAAC,EAAE,CAAC;YAEP,OAAO;gBACL,QAAQ;gBACR,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,iBAAiB,EAAE,EAAE;gBACrB,UAAU;gBACV,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBAChC,KAAK,EAAE,sBAAsB,KAAK,EAAE;aACrC,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CAAC,SAAyB;QACtC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAuB,EAAE,CAAC;QAEvC,8BAA8B;QAC9B,IAAI,CAAC,IAAI,CAAC,kBAAkB,EAAE,SAAS,CAAC,CAAC;QAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAE9B,4BAA4B;YAC5B,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,QAAQ,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAC;YAEnD,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;YACrD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAErB,+BAA+B;YAC/B,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACtD,CAAC;QAED,+BAA+B;QAC/B,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QACtD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QACnE,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QACtD,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,CACpC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EACrC,CAAC,CACF,CAAC;QACF,MAAM,YAAY,GAChB,OAAO,CAAC,MAAM,GAAG,CAAC;YAChB,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM;YAC/D,CAAC,CAAC,CAAC,CAAC;QAER,MAAM,MAAM,GAAqB;YAC/B,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO;YAC7B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK;YACzB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,OAAO;YACP,OAAO,EAAE;gBACP,KAAK,EAAE,SAAS,CAAC,MAAM;gBACvB,MAAM;gBACN,MAAM;gBACN,OAAO;gBACP,YAAY;gBACZ,eAAe;aAChB;YACD,aAAa,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;SACtC,CAAC;QAEF,iCAAiC;QACjC,IAAI,CAAC,IAAI,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;QAEzC,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAzTD,8BAyTC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { TestScenario, EvaluationResult, EvaluationReport } from "./types";
|
|
2
|
+
export type ExecutionPhase = "pending" | "generating" | "validating" | "complete";
|
|
3
|
+
export type ScenarioStatus = "runs" | "pass" | "fail" | "skip";
|
|
4
|
+
export declare class ProgressReporter {
|
|
5
|
+
private isInteractive;
|
|
6
|
+
private verbose;
|
|
7
|
+
private scenarios;
|
|
8
|
+
private scenarioOrder;
|
|
9
|
+
private currentIndex;
|
|
10
|
+
private totalScenarios;
|
|
11
|
+
private spinnerFrame;
|
|
12
|
+
private spinnerInterval?;
|
|
13
|
+
private startTime;
|
|
14
|
+
private verboseBuffer;
|
|
15
|
+
constructor(options?: {
|
|
16
|
+
verbose?: boolean;
|
|
17
|
+
});
|
|
18
|
+
start(scenarios: TestScenario[]): void;
|
|
19
|
+
onScenarioStart(scenarioId: string): void;
|
|
20
|
+
onScenarioValidating(scenarioId: string): void;
|
|
21
|
+
onScenarioComplete(scenarioId: string, result: EvaluationResult): void;
|
|
22
|
+
log(message: string): void;
|
|
23
|
+
finish(report: EvaluationReport): void;
|
|
24
|
+
private getScenarioIndex;
|
|
25
|
+
private startSpinner;
|
|
26
|
+
private stopSpinner;
|
|
27
|
+
private render;
|
|
28
|
+
private formatScenarioLine;
|
|
29
|
+
private printScenarioResult;
|
|
30
|
+
private printViolations;
|
|
31
|
+
private flushVerboseBuffer;
|
|
32
|
+
private printSummary;
|
|
33
|
+
private formatDuration;
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=reporter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reporter.d.ts","sourceRoot":"","sources":["../src/reporter.ts"],"names":[],"mappings":"AAEA,OAAO,EACL,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAEjB,MAAM,SAAS,CAAC;AAEjB,MAAM,MAAM,cAAc,GACtB,SAAS,GACT,YAAY,GACZ,YAAY,GACZ,UAAU,CAAC;AAEf,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,GAAG,MAAM,CAAC;AAY/D,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,aAAa,CAAU;IAC/B,OAAO,CAAC,OAAO,CAAU;IACzB,OAAO,CAAC,SAAS,CAAyC;IAC1D,OAAO,CAAC,aAAa,CAAgB;IACrC,OAAO,CAAC,YAAY,CAAa;IACjC,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,YAAY,CAAa;IACjC,OAAO,CAAC,eAAe,CAAC,CAAiC;IACzD,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,aAAa,CAAgB;gBAEzB,OAAO,GAAE;QAAE,OAAO,CAAC,EAAE,OAAO,CAAA;KAAO;IAK/C,KAAK,CAAC,SAAS,EAAE,YAAY,EAAE,GAAG,IAAI;IAsBtC,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAoBzC,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAa9C,kBAAkB,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAiBtE,GAAG,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IAQ1B,MAAM,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI;IAUtC,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,YAAY;IASpB,OAAO,CAAC,WAAW;IAOnB,OAAO,CAAC,MAAM;IAgBd,OAAO,CAAC,kBAAkB;IAkC1B,OAAO,CAAC,mBAAmB;IAe3B,OAAO,CAAC,eAAe;IAevB,OAAO,CAAC,kBAAkB;IAW1B,OAAO,CAAC,YAAY;IAkCpB,OAAO,CAAC,cAAc;CAOvB"}
|
package/dist/reporter.js
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.ProgressReporter = void 0;
|
|
7
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
8
|
+
const log_update_1 = __importDefault(require("log-update"));
|
|
9
|
+
const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
10
|
+
class ProgressReporter {
|
|
11
|
+
constructor(options = {}) {
|
|
12
|
+
this.scenarios = new Map();
|
|
13
|
+
this.scenarioOrder = [];
|
|
14
|
+
this.currentIndex = 0;
|
|
15
|
+
this.totalScenarios = 0;
|
|
16
|
+
this.spinnerFrame = 0;
|
|
17
|
+
this.startTime = 0;
|
|
18
|
+
this.verboseBuffer = [];
|
|
19
|
+
this.isInteractive = process.stdout.isTTY === true;
|
|
20
|
+
this.verbose = options.verbose ?? false;
|
|
21
|
+
}
|
|
22
|
+
start(scenarios) {
|
|
23
|
+
this.startTime = Date.now();
|
|
24
|
+
this.totalScenarios = scenarios.length;
|
|
25
|
+
this.scenarioOrder = scenarios.map((s) => s.id);
|
|
26
|
+
this.currentIndex = 0;
|
|
27
|
+
for (const scenario of scenarios) {
|
|
28
|
+
this.scenarios.set(scenario.id, {
|
|
29
|
+
scenario,
|
|
30
|
+
phase: "pending",
|
|
31
|
+
status: "runs",
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
if (this.isInteractive) {
|
|
35
|
+
console.log(`\nEvaluating ${scenarios.length} scenario(s)...\n`);
|
|
36
|
+
this.startSpinner();
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
console.log(`Evaluating ${scenarios.length} scenario(s)...`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
onScenarioStart(scenarioId) {
|
|
43
|
+
const state = this.scenarios.get(scenarioId);
|
|
44
|
+
if (!state)
|
|
45
|
+
return;
|
|
46
|
+
state.phase = "generating";
|
|
47
|
+
state.status = "runs";
|
|
48
|
+
state.startTime = Date.now();
|
|
49
|
+
if (this.isInteractive) {
|
|
50
|
+
this.render();
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
console.log(`\n[${this.getScenarioIndex(scenarioId)}/${this.totalScenarios}] ${scenarioId}`);
|
|
54
|
+
if (this.verbose) {
|
|
55
|
+
console.log(` Generating code...`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
onScenarioValidating(scenarioId) {
|
|
60
|
+
const state = this.scenarios.get(scenarioId);
|
|
61
|
+
if (!state)
|
|
62
|
+
return;
|
|
63
|
+
state.phase = "validating";
|
|
64
|
+
if (this.isInteractive) {
|
|
65
|
+
this.render();
|
|
66
|
+
}
|
|
67
|
+
else if (this.verbose) {
|
|
68
|
+
console.log(` Validating...`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
onScenarioComplete(scenarioId, result) {
|
|
72
|
+
const state = this.scenarios.get(scenarioId);
|
|
73
|
+
if (!state)
|
|
74
|
+
return;
|
|
75
|
+
state.phase = "complete";
|
|
76
|
+
state.result = result;
|
|
77
|
+
state.status = result.error ? "skip" : result.passed ? "pass" : "fail";
|
|
78
|
+
if (this.isInteractive) {
|
|
79
|
+
this.render();
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
this.printScenarioResult(scenarioId, result);
|
|
83
|
+
}
|
|
84
|
+
this.flushVerboseBuffer();
|
|
85
|
+
}
|
|
86
|
+
log(message) {
|
|
87
|
+
if (this.isInteractive) {
|
|
88
|
+
this.verboseBuffer.push(message);
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
console.log(message);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
finish(report) {
|
|
95
|
+
this.stopSpinner();
|
|
96
|
+
if (this.isInteractive) {
|
|
97
|
+
log_update_1.default.done();
|
|
98
|
+
}
|
|
99
|
+
this.printSummary(report);
|
|
100
|
+
}
|
|
101
|
+
getScenarioIndex(scenarioId) {
|
|
102
|
+
return this.scenarioOrder.indexOf(scenarioId) + 1;
|
|
103
|
+
}
|
|
104
|
+
startSpinner() {
|
|
105
|
+
if (!this.isInteractive)
|
|
106
|
+
return;
|
|
107
|
+
this.spinnerInterval = setInterval(() => {
|
|
108
|
+
this.spinnerFrame = (this.spinnerFrame + 1) % SPINNER_FRAMES.length;
|
|
109
|
+
this.render();
|
|
110
|
+
}, 80);
|
|
111
|
+
}
|
|
112
|
+
stopSpinner() {
|
|
113
|
+
if (this.spinnerInterval) {
|
|
114
|
+
clearInterval(this.spinnerInterval);
|
|
115
|
+
this.spinnerInterval = undefined;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
render() {
|
|
119
|
+
if (!this.isInteractive)
|
|
120
|
+
return;
|
|
121
|
+
const lines = [];
|
|
122
|
+
for (const scenarioId of this.scenarioOrder) {
|
|
123
|
+
const state = this.scenarios.get(scenarioId);
|
|
124
|
+
if (!state)
|
|
125
|
+
continue;
|
|
126
|
+
const line = this.formatScenarioLine(state);
|
|
127
|
+
lines.push(line);
|
|
128
|
+
}
|
|
129
|
+
(0, log_update_1.default)(lines.join("\n"));
|
|
130
|
+
}
|
|
131
|
+
formatScenarioLine(state) {
|
|
132
|
+
const index = this.getScenarioIndex(state.scenario.id);
|
|
133
|
+
const prefix = `[${index}/${this.totalScenarios}]`;
|
|
134
|
+
switch (state.status) {
|
|
135
|
+
case "runs": {
|
|
136
|
+
const spinner = SPINNER_FRAMES[this.spinnerFrame];
|
|
137
|
+
const phaseText = state.phase === "generating" ? "generating..." : "validating...";
|
|
138
|
+
const elapsed = state.startTime
|
|
139
|
+
? this.formatDuration(Date.now() - state.startTime)
|
|
140
|
+
: "";
|
|
141
|
+
return `${chalk_1.default.yellow(spinner)} ${prefix} ${state.scenario.id} ${chalk_1.default.dim(phaseText)} ${chalk_1.default.dim(elapsed)}`;
|
|
142
|
+
}
|
|
143
|
+
case "pass": {
|
|
144
|
+
const score = state.result?.score.toFixed(2) ?? "0.00";
|
|
145
|
+
const duration = state.result
|
|
146
|
+
? this.formatDuration(state.result.duration)
|
|
147
|
+
: "";
|
|
148
|
+
return `${chalk_1.default.green("✓")} ${prefix} ${state.scenario.id} ${chalk_1.default.green("PASS")} ${chalk_1.default.dim(`(score: ${score})`)} ${chalk_1.default.dim(duration)}`;
|
|
149
|
+
}
|
|
150
|
+
case "fail": {
|
|
151
|
+
const score = state.result?.score.toFixed(2) ?? "0.00";
|
|
152
|
+
const duration = state.result
|
|
153
|
+
? this.formatDuration(state.result.duration)
|
|
154
|
+
: "";
|
|
155
|
+
return `${chalk_1.default.red("✗")} ${prefix} ${state.scenario.id} ${chalk_1.default.red("FAIL")} ${chalk_1.default.dim(`(score: ${score})`)} ${chalk_1.default.dim(duration)}`;
|
|
156
|
+
}
|
|
157
|
+
case "skip": {
|
|
158
|
+
return `${chalk_1.default.yellow("○")} ${prefix} ${state.scenario.id} ${chalk_1.default.yellow("SKIP")} ${chalk_1.default.dim("(error)")}`;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
printScenarioResult(scenarioId, result) {
|
|
163
|
+
if (result.passed) {
|
|
164
|
+
console.log(` ✓ PASSED (score: ${result.score.toFixed(2)})`);
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
console.log(` ✗ FAILED (score: ${result.score.toFixed(2)})`);
|
|
168
|
+
this.printViolations(result.violations);
|
|
169
|
+
if (result.error) {
|
|
170
|
+
console.log(` Error: ${result.error}`);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
printViolations(violations) {
|
|
175
|
+
if (violations.length === 0)
|
|
176
|
+
return;
|
|
177
|
+
console.log(` ${violations.length} violation(s):\n`);
|
|
178
|
+
violations.forEach((v, idx) => {
|
|
179
|
+
console.log(` ${idx + 1}. [${v.type}] ${v.message}`);
|
|
180
|
+
if (v.file) {
|
|
181
|
+
console.log(` File: ${v.file}${v.line ? `:${v.line}` : ""}`);
|
|
182
|
+
}
|
|
183
|
+
if (v.details) {
|
|
184
|
+
console.log(` Details: ${v.details}`);
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
flushVerboseBuffer() {
|
|
189
|
+
if (!this.isInteractive || this.verboseBuffer.length === 0)
|
|
190
|
+
return;
|
|
191
|
+
log_update_1.default.clear();
|
|
192
|
+
this.verboseBuffer.forEach((message) => console.log(message));
|
|
193
|
+
this.verboseBuffer = [];
|
|
194
|
+
this.render();
|
|
195
|
+
}
|
|
196
|
+
printSummary(report) {
|
|
197
|
+
const { summary, totalDuration } = report;
|
|
198
|
+
if (this.isInteractive) {
|
|
199
|
+
report.results.forEach((result) => {
|
|
200
|
+
if (!result.passed) {
|
|
201
|
+
console.log(`\n${chalk_1.default.red("✗")} ${result.scenario.id}`);
|
|
202
|
+
this.printViolations(result.violations);
|
|
203
|
+
if (result.error) {
|
|
204
|
+
console.log(` Error: ${result.error}`);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
console.log("\n" + "=".repeat(60));
|
|
210
|
+
console.log("EVALUATION SUMMARY");
|
|
211
|
+
console.log("=".repeat(60));
|
|
212
|
+
console.log(`Total scenarios: ${summary.total}`);
|
|
213
|
+
console.log(`Passed: ${this.isInteractive ? chalk_1.default.green(summary.passed.toString()) : summary.passed}`);
|
|
214
|
+
console.log(`Failed: ${this.isInteractive ? chalk_1.default.red(summary.failed.toString()) : summary.failed}`);
|
|
215
|
+
console.log(`Skipped: ${this.isInteractive ? chalk_1.default.yellow(summary.skipped.toString()) : summary.skipped}`);
|
|
216
|
+
console.log(`Average score: ${summary.averageScore.toFixed(2)}`);
|
|
217
|
+
console.log(`Total violations: ${summary.totalViolations}`);
|
|
218
|
+
console.log(`Total duration: ${(totalDuration / 1000).toFixed(1)}s`);
|
|
219
|
+
console.log("=".repeat(60));
|
|
220
|
+
}
|
|
221
|
+
formatDuration(ms) {
|
|
222
|
+
if (ms < 1000)
|
|
223
|
+
return `${ms}ms`;
|
|
224
|
+
if (ms < 60000)
|
|
225
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
226
|
+
const minutes = Math.floor(ms / 60000);
|
|
227
|
+
const seconds = ((ms % 60000) / 1000).toFixed(0);
|
|
228
|
+
return `${minutes}m ${seconds}s`;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
exports.ProgressReporter = ProgressReporter;
|
|
232
|
+
//# sourceMappingURL=reporter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reporter.js","sourceRoot":"","sources":["../src/reporter.ts"],"names":[],"mappings":";;;;;;AAAA,kDAA0B;AAC1B,4DAAmC;AAwBnC,MAAM,cAAc,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;AAE1E,MAAa,gBAAgB;IAY3B,YAAY,UAAiC,EAAE;QATvC,cAAS,GAA+B,IAAI,GAAG,EAAE,CAAC;QAClD,kBAAa,GAAa,EAAE,CAAC;QAC7B,iBAAY,GAAW,CAAC,CAAC;QACzB,mBAAc,GAAW,CAAC,CAAC;QAC3B,iBAAY,GAAW,CAAC,CAAC;QAEzB,cAAS,GAAW,CAAC,CAAC;QACtB,kBAAa,GAAa,EAAE,CAAC;QAGnC,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,KAAK,IAAI,CAAC;QACnD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,KAAK,CAAC;IAC1C,CAAC;IAED,KAAK,CAAC,SAAyB;QAC7B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC5B,IAAI,CAAC,cAAc,GAAG,SAAS,CAAC,MAAM,CAAC;QACvC,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QAEtB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,EAAE;gBAC9B,QAAQ;gBACR,KAAK,EAAE,SAAS;gBAChB,MAAM,EAAE,MAAM;aACf,CAAC,CAAC;QACL,CAAC;QAED,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,OAAO,CAAC,GAAG,CAAC,gBAAgB,SAAS,CAAC,MAAM,mBAAmB,CAAC,CAAC;YACjE,IAAI,CAAC,YAAY,EAAE,CAAC;QACtB,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,CAAC,MAAM,iBAAiB,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,eAAe,CAAC,UAAkB;QAChC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC7C,IAAI,CAAC,KAAK;YAAE,OAAO;QAEnB,KAAK,CAAC,KAAK,GAAG,YAAY,CAAC;QAC3B,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC;QACtB,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CACT,MAAM,IAAI,CAAC,gBAAgB,CAAC,UAAU,CAAC,IAAI,IAAI,CAAC,cAAc,KAAK,UAAU,EAAE,CAChF,CAAC;YACF,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;YACtC,CAAC;QACH,CAAC;IACH,CAAC;IAED,oBAAoB,CAAC,UAAkB;QACrC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC7C,IAAI,CAAC,KAAK;YAAE,OAAO;QAEnB,KAAK,CAAC,KAAK,GAAG,YAAY,CAAC;QAE3B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC;aAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED,kBAAkB,CAAC,UAAkB,EAAE,MAAwB;QAC7D,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAC7C,IAAI,CAAC,KAAK;YAAE,OAAO;QAEnB,KAAK,CAAC,KAAK,GAAG,UAAU,CAAC;QACzB,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC;QACtB,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAEvE,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,mBAAmB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QAC/C,CAAC;QAED,IAAI,CAAC,kBAAkB,EAAE,CAAC;IAC5B,CAAC;IAED,GAAG,CAAC,OAAe;QACjB,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED,MAAM,CAAC,MAAwB;QAC7B,IAAI,CAAC,WAAW,EAAE,CAAC;QAEnB,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,oBAAS,CAAC,IAAI,EAAE,CAAC;QACnB,CAAC;QAED,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAEO,gBAAgB,CAAC,UAAkB;QACzC,OAAO,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IACpD,CAAC;IAEO,YAAY;QAClB,IAAI,CAAC,IAAI,CAAC,aAAa;YAAE,OAAO;QAEhC,IAAI,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;YACtC,IAAI,CAAC,YAAY,GAAG,CAAC,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC;YACpE,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC,EAAE,EAAE,CAAC,CAAC;IACT,CAAC;IAEO,WAAW;QACjB,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YACzB,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACpC,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;QACnC,CAAC;IACH,CAAC;IAEO,MAAM;QACZ,IAAI,CAAC,IAAI,CAAC,aAAa;YAAE,OAAO;QAEhC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,KAAK,MAAM,UAAU,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YAC5C,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,KAAK;gBAAE,SAAS;YAErB,MAAM,IAAI,GAAG,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;YAC5C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;QAED,IAAA,oBAAS,EAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC9B,CAAC;IAEO,kBAAkB,CAAC,KAAoB;QAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACvD,MAAM,MAAM,GAAG,IAAI,KAAK,IAAI,IAAI,CAAC,cAAc,GAAG,CAAC;QAEnD,QAAQ,KAAK,CAAC,MAAM,EAAE,CAAC;YACrB,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBAClD,MAAM,SAAS,GACb,KAAK,CAAC,KAAK,KAAK,YAAY,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,eAAe,CAAC;gBACnE,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS;oBAC7B,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,CAAC;oBACnD,CAAC,CAAC,EAAE,CAAC;gBACP,OAAO,GAAG,eAAK,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,MAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,EAAE,IAAI,eAAK,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,eAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACjH,CAAC;YACD,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC;gBACvD,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM;oBAC3B,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC;oBAC5C,CAAC,CAAC,EAAE,CAAC;gBACP,OAAO,GAAG,eAAK,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,EAAE,IAAI,eAAK,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,eAAK,CAAC,GAAG,CAAC,WAAW,KAAK,GAAG,CAAC,IAAI,eAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9I,CAAC;YACD,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC;gBACvD,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM;oBAC3B,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC;oBAC5C,CAAC,CAAC,EAAE,CAAC;gBACP,OAAO,GAAG,eAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,MAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,EAAE,IAAI,eAAK,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,eAAK,CAAC,GAAG,CAAC,WAAW,KAAK,GAAG,CAAC,IAAI,eAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1I,CAAC;YACD,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,OAAO,GAAG,eAAK,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,MAAM,IAAI,KAAK,CAAC,QAAQ,CAAC,EAAE,IAAI,eAAK,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,eAAK,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAC/G,CAAC;QACH,CAAC;IACH,CAAC;IAEO,mBAAmB,CACzB,UAAkB,EAClB,MAAwB;QAExB,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,OAAO,CAAC,GAAG,CAAC,sBAAsB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAChE,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,sBAAsB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC9D,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YACxC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;gBACjB,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;YAC5C,CAAC;QACH,CAAC;IACH,CAAC;IAEO,eAAe,CAAC,UAAuB;QAC7C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEpC,OAAO,CAAC,GAAG,CAAC,OAAO,UAAU,CAAC,MAAM,kBAAkB,CAAC,CAAC;QACxD,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE;YAC5B,OAAO,CAAC,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YACxD,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;gBACX,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACrE,CAAC;YACD,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;gBACd,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YAC9C,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,kBAAkB;QACxB,IAAI,CAAC,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,aAAa,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEnE,oBAAS,CAAC,KAAK,EAAE,CAAC;QAElB,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;QAC9D,IAAI,CAAC,aAAa,GAAG,EAAE,CAAC;QAExB,IAAI,CAAC,MAAM,EAAE,CAAC;IAChB,CAAC;IAEO,YAAY,CAAC,MAAwB;QAC3C,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,MAAM,CAAC;QAE1C,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE;gBAChC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;oBACnB,OAAO,CAAC,GAAG,CAAC,KAAK,eAAK,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;oBACzD,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;oBACxC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;wBACjB,OAAO,CAAC,GAAG,CAAC,YAAY,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;oBAC1C,CAAC;gBACH,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,CAAC,GAAG,CAAC,oBAAoB,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC;QACjD,OAAO,CAAC,GAAG,CACT,WAAW,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,eAAK,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,CAC1F,CAAC;QACF,OAAO,CAAC,GAAG,CACT,WAAW,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,eAAK,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,CACxF,CAAC;QACF,OAAO,CAAC,GAAG,CACT,YAAY,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,eAAK,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,CAC9F,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,kBAAkB,OAAO,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACjE,OAAO,CAAC,GAAG,CAAC,qBAAqB,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,aAAa,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACrE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC9B,CAAC;IAEO,cAAc,CAAC,EAAU;QAC/B,IAAI,EAAE,GAAG,IAAI;YAAE,OAAO,GAAG,EAAE,IAAI,CAAC;QAChC,IAAI,EAAE,GAAG,KAAK;YAAE,OAAO,GAAG,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QACpD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,KAAK,CAAC,CAAC;QACvC,MAAM,OAAO,GAAG,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACjD,OAAO,GAAG,OAAO,KAAK,OAAO,GAAG,CAAC;IACnC,CAAC;CACF;AAnQD,4CAmQC"}
|
package/dist/runner.js
CHANGED
|
@@ -45,33 +45,32 @@ const copilotCLI_1 = require("./adapters/copilotCLI");
|
|
|
45
45
|
const claudeCodeCLI_1 = require("./adapters/claudeCodeCLI");
|
|
46
46
|
const llmJudge_1 = require("./validators/llmJudge");
|
|
47
47
|
const githubAuth_1 = require("./utils/githubAuth");
|
|
48
|
+
const reporter_1 = require("./reporter");
|
|
48
49
|
const program = new commander_1.Command();
|
|
49
50
|
program
|
|
50
|
-
.name(
|
|
51
|
-
.description(
|
|
52
|
-
.version(
|
|
51
|
+
.name("coding-agent-benchmarks")
|
|
52
|
+
.description("Evaluate coding agents against coding standards and best practices")
|
|
53
|
+
.version("0.1.0");
|
|
53
54
|
/**
|
|
54
55
|
* Evaluate command
|
|
55
56
|
*/
|
|
56
57
|
program
|
|
57
|
-
.command(
|
|
58
|
-
.description(
|
|
59
|
-
.option(
|
|
60
|
-
.option(
|
|
61
|
-
.option(
|
|
62
|
-
.option(
|
|
63
|
-
.option(
|
|
64
|
-
.option(
|
|
65
|
-
.option(
|
|
66
|
-
.option(
|
|
67
|
-
.option(
|
|
68
|
-
.option(
|
|
69
|
-
.option(
|
|
58
|
+
.command("evaluate")
|
|
59
|
+
.description("Run benchmark evaluations")
|
|
60
|
+
.option("--scenario <pattern>", "Filter scenarios by ID pattern (supports wildcards)")
|
|
61
|
+
.option("--category <categories>", "Filter by category (comma-separated)")
|
|
62
|
+
.option("--tag <tags>", "Filter by tags (comma-separated)")
|
|
63
|
+
.option("--adapter <type>", "Code generation adapter (copilot or claude-code)", "copilot")
|
|
64
|
+
.option("--model <model>", "LLM model for judge (default: openai/gpt-4.1)")
|
|
65
|
+
.option("--threshold <number>", "Minimum passing score", "0.8")
|
|
66
|
+
.option("--verbose", "Show detailed output")
|
|
67
|
+
.option("--output <file>", "Export JSON report to file")
|
|
68
|
+
.option("--save-baseline", "Save results as baseline")
|
|
69
|
+
.option("--compare-baseline", "Compare results with baseline")
|
|
70
|
+
.option("--workspace-root <path>", "Workspace root directory")
|
|
70
71
|
.action(async (options) => {
|
|
71
72
|
try {
|
|
72
|
-
// Load configuration
|
|
73
73
|
const { config, scenarios } = await (0, loader_1.loadConfig)(options.workspaceRoot || process.cwd());
|
|
74
|
-
// Create evaluator
|
|
75
74
|
const evaluator = new evaluator_1.Evaluator({
|
|
76
75
|
adapter: options.adapter,
|
|
77
76
|
model: options.model,
|
|
@@ -81,6 +80,25 @@ program
|
|
|
81
80
|
saveBaseline: options.saveBaseline,
|
|
82
81
|
compareBaseline: options.compareBaseline,
|
|
83
82
|
});
|
|
83
|
+
const reporter = new reporter_1.ProgressReporter({ verbose: options.verbose });
|
|
84
|
+
evaluator.on("evaluation:start", (scenarioList) => {
|
|
85
|
+
reporter.start(scenarioList);
|
|
86
|
+
});
|
|
87
|
+
evaluator.on("scenario:start", (scenarioId) => {
|
|
88
|
+
reporter.onScenarioStart(scenarioId);
|
|
89
|
+
});
|
|
90
|
+
evaluator.on("scenario:generating", (scenarioId) => {
|
|
91
|
+
// Phase already set in onScenarioStart, but could be used for more granular updates
|
|
92
|
+
});
|
|
93
|
+
evaluator.on("scenario:validating", (scenarioId) => {
|
|
94
|
+
reporter.onScenarioValidating(scenarioId);
|
|
95
|
+
});
|
|
96
|
+
evaluator.on("scenario:complete", (scenarioId, result) => {
|
|
97
|
+
reporter.onScenarioComplete(scenarioId, result);
|
|
98
|
+
});
|
|
99
|
+
evaluator.on("log", (message) => {
|
|
100
|
+
reporter.log(message);
|
|
101
|
+
});
|
|
84
102
|
// Check adapter availability
|
|
85
103
|
const isAvailable = await evaluator.checkAdapterAvailability();
|
|
86
104
|
if (!isAvailable) {
|
|
@@ -92,29 +110,21 @@ program
|
|
|
92
110
|
const filteredScenarios = evaluator.filterScenarios(scenarios, {
|
|
93
111
|
scenarioPattern: options.scenario,
|
|
94
112
|
category: options.category,
|
|
95
|
-
tags: options.tag
|
|
113
|
+
tags: options.tag
|
|
114
|
+
? options.tag.split(",").map((t) => t.trim())
|
|
115
|
+
: undefined,
|
|
96
116
|
});
|
|
97
117
|
if (filteredScenarios.length === 0) {
|
|
98
|
-
console.log(
|
|
118
|
+
console.log("No scenarios match the specified filters");
|
|
99
119
|
return;
|
|
100
120
|
}
|
|
101
121
|
// Run evaluation
|
|
102
122
|
const report = await evaluator.evaluate(filteredScenarios);
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
console.log('EVALUATION SUMMARY');
|
|
106
|
-
console.log('='.repeat(60));
|
|
107
|
-
console.log(`Total scenarios: ${report.summary.total}`);
|
|
108
|
-
console.log(`Passed: ${report.summary.passed}`);
|
|
109
|
-
console.log(`Failed: ${report.summary.failed}`);
|
|
110
|
-
console.log(`Skipped: ${report.summary.skipped}`);
|
|
111
|
-
console.log(`Average score: ${report.summary.averageScore.toFixed(2)}`);
|
|
112
|
-
console.log(`Total violations: ${report.summary.totalViolations}`);
|
|
113
|
-
console.log(`Total duration: ${(report.totalDuration / 1000).toFixed(1)}s`);
|
|
114
|
-
console.log('='.repeat(60));
|
|
123
|
+
// Finish reporter and display summary
|
|
124
|
+
reporter.finish(report);
|
|
115
125
|
// Export JSON report if requested
|
|
116
126
|
if (options.output) {
|
|
117
|
-
fs.writeFileSync(options.output, JSON.stringify(report, null, 2),
|
|
127
|
+
fs.writeFileSync(options.output, JSON.stringify(report, null, 2), "utf-8");
|
|
118
128
|
console.log(`\nReport exported to: ${options.output}`);
|
|
119
129
|
}
|
|
120
130
|
// Exit with error code if any scenarios failed
|
|
@@ -131,30 +141,32 @@ program
|
|
|
131
141
|
* List command
|
|
132
142
|
*/
|
|
133
143
|
program
|
|
134
|
-
.command(
|
|
135
|
-
.description(
|
|
136
|
-
.option(
|
|
137
|
-
.option(
|
|
144
|
+
.command("list")
|
|
145
|
+
.description("List available test scenarios")
|
|
146
|
+
.option("--category <categories>", "Filter by category")
|
|
147
|
+
.option("--tag <tags>", "Filter by tags (comma-separated)")
|
|
138
148
|
.action(async (options) => {
|
|
139
149
|
try {
|
|
140
150
|
const { scenarios } = await (0, loader_1.loadConfig)();
|
|
141
151
|
let filtered = scenarios;
|
|
142
152
|
// Filter by category
|
|
143
153
|
if (options.category) {
|
|
144
|
-
const categories = options.category
|
|
145
|
-
|
|
154
|
+
const categories = options.category
|
|
155
|
+
.split(",")
|
|
156
|
+
.map((c) => c.trim());
|
|
157
|
+
filtered = filtered.filter((s) => categories.includes(s.category));
|
|
146
158
|
}
|
|
147
159
|
// Filter by tags
|
|
148
160
|
if (options.tag) {
|
|
149
|
-
const tags = options.tag.split(
|
|
150
|
-
filtered = filtered.filter(s => tags.some((tag) => s.tags.includes(tag)));
|
|
161
|
+
const tags = options.tag.split(",").map((t) => t.trim());
|
|
162
|
+
filtered = filtered.filter((s) => tags.some((tag) => s.tags.includes(tag)));
|
|
151
163
|
}
|
|
152
164
|
console.log(`\nAvailable scenarios (${filtered.length}):\n`);
|
|
153
165
|
for (const scenario of filtered) {
|
|
154
166
|
console.log(` ${scenario.id}`);
|
|
155
167
|
console.log(` Category: ${scenario.category}`);
|
|
156
168
|
console.log(` Severity: ${scenario.severity}`);
|
|
157
|
-
console.log(` Tags: ${scenario.tags.join(
|
|
169
|
+
console.log(` Tags: ${scenario.tags.join(", ")}`);
|
|
158
170
|
console.log(` Description: ${scenario.description}`);
|
|
159
171
|
console.log();
|
|
160
172
|
}
|
|
@@ -168,34 +180,34 @@ program
|
|
|
168
180
|
* Check command
|
|
169
181
|
*/
|
|
170
182
|
program
|
|
171
|
-
.command(
|
|
172
|
-
.description(
|
|
183
|
+
.command("check")
|
|
184
|
+
.description("Check if coding agent CLIs and GitHub auth are available")
|
|
173
185
|
.action(async () => {
|
|
174
|
-
console.log(
|
|
186
|
+
console.log("Checking adapter availability...\n");
|
|
175
187
|
const adapters = [
|
|
176
|
-
{ name:
|
|
177
|
-
{ name:
|
|
188
|
+
{ name: "GitHub Copilot CLI", type: "copilot" },
|
|
189
|
+
{ name: "Claude Code CLI", type: "claude-code" },
|
|
178
190
|
];
|
|
179
191
|
for (const { name, type } of adapters) {
|
|
180
192
|
let adapter;
|
|
181
|
-
if (type ===
|
|
193
|
+
if (type === "copilot") {
|
|
182
194
|
adapter = new copilotCLI_1.CopilotCLIAdapter();
|
|
183
195
|
}
|
|
184
196
|
else {
|
|
185
197
|
adapter = new claudeCodeCLI_1.ClaudeCodeCLIAdapter();
|
|
186
198
|
}
|
|
187
199
|
const available = await adapter.checkAvailability();
|
|
188
|
-
const status = available ?
|
|
200
|
+
const status = available ? "✓ Available" : "✗ Not found";
|
|
189
201
|
console.log(` ${name}: ${status}`);
|
|
190
202
|
}
|
|
191
|
-
console.log(
|
|
203
|
+
console.log("\nChecking GitHub authentication...\n");
|
|
192
204
|
const authStatus = (0, githubAuth_1.checkGitHubAuth)();
|
|
193
|
-
const authIcon = authStatus.available ?
|
|
205
|
+
const authIcon = authStatus.available ? "✓" : "✗";
|
|
194
206
|
console.log(` ${authIcon} ${authStatus.message}`);
|
|
195
207
|
if (!authStatus.available) {
|
|
196
|
-
console.log(
|
|
197
|
-
console.log(
|
|
198
|
-
console.log(
|
|
208
|
+
console.log("\n 💡 GitHub token is required for LLM-as-judge validation");
|
|
209
|
+
console.log(" Setup: https://github.com/settings/tokens (scope: models:read)");
|
|
210
|
+
console.log(" Or install GitHub CLI: brew install gh && gh auth login");
|
|
199
211
|
}
|
|
200
212
|
console.log();
|
|
201
213
|
});
|
|
@@ -203,23 +215,23 @@ program
|
|
|
203
215
|
* Test LLM command
|
|
204
216
|
*/
|
|
205
217
|
program
|
|
206
|
-
.command(
|
|
207
|
-
.description(
|
|
208
|
-
.option(
|
|
218
|
+
.command("test-llm")
|
|
219
|
+
.description("Test LLM judge with a custom prompt")
|
|
220
|
+
.option("--model <model>", "LLM model to use (default: openai/gpt-4.1)")
|
|
209
221
|
.action(async (options) => {
|
|
210
222
|
try {
|
|
211
|
-
console.log(
|
|
212
|
-
console.log(
|
|
223
|
+
console.log("Testing LLM judge...\n");
|
|
224
|
+
console.log("Enter your prompt (Ctrl+D when done):\n");
|
|
213
225
|
// Read prompt from stdin
|
|
214
226
|
const chunks = [];
|
|
215
|
-
process.stdin.on(
|
|
227
|
+
process.stdin.on("data", (chunk) => {
|
|
216
228
|
chunks.push(chunk.toString());
|
|
217
229
|
});
|
|
218
|
-
process.stdin.on(
|
|
219
|
-
const prompt = chunks.join(
|
|
230
|
+
process.stdin.on("end", async () => {
|
|
231
|
+
const prompt = chunks.join("");
|
|
220
232
|
const validator = new llmJudge_1.LLMJudgeValidator(undefined, options.model);
|
|
221
233
|
const result = await validator.testJudge(prompt, options.model);
|
|
222
|
-
console.log(
|
|
234
|
+
console.log("\nLLM Response:\n");
|
|
223
235
|
console.log(result);
|
|
224
236
|
});
|
|
225
237
|
}
|
package/dist/runner.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":";;AAEA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,yCAAoC;AACpC,uCAAyB;AACzB,4CAA6C;AAC7C,2CAAwC;AAExC,sDAA0D;AAC1D,4DAAgE;AAChE,oDAA0D;AAC1D,mDAAqD;
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../src/runner.ts"],"names":[],"mappings":";;AAEA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,yCAAoC;AACpC,uCAAyB;AACzB,4CAA6C;AAC7C,2CAAwC;AAExC,sDAA0D;AAC1D,4DAAgE;AAChE,oDAA0D;AAC1D,mDAAqD;AACrD,yCAA8C;AAE9C,MAAM,OAAO,GAAG,IAAI,mBAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,yBAAyB,CAAC;KAC/B,WAAW,CACV,oEAAoE,CACrE;KACA,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB;;GAEG;AACH,OAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,2BAA2B,CAAC;KACxC,MAAM,CACL,sBAAsB,EACtB,qDAAqD,CACtD;KACA,MAAM,CAAC,yBAAyB,EAAE,sCAAsC,CAAC;KACzE,MAAM,CAAC,cAAc,EAAE,kCAAkC,CAAC;KAC1D,MAAM,CACL,kBAAkB,EAClB,kDAAkD,EAClD,SAAS,CACV;KACA,MAAM,CAAC,iBAAiB,EAAE,+CAA+C,CAAC;KAC1E,MAAM,CAAC,sBAAsB,EAAE,uBAAuB,EAAE,KAAK,CAAC;KAC9D,MAAM,CAAC,WAAW,EAAE,sBAAsB,CAAC;KAC3C,MAAM,CAAC,iBAAiB,EAAE,4BAA4B,CAAC;KACvD,MAAM,CAAC,iBAAiB,EAAE,0BAA0B,CAAC;KACrD,MAAM,CAAC,oBAAoB,EAAE,+BAA+B,CAAC;KAC7D,MAAM,CAAC,yBAAyB,EAAE,0BAA0B,CAAC;KAC7D,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,IAAA,mBAAU,EAC5C,OAAO,CAAC,aAAa,IAAI,OAAO,CAAC,GAAG,EAAE,CACvC,CAAC;QAEF,MAAM,SAAS,GAAG,IAAI,qBAAS,CAAC;YAC9B,OAAO,EAAE,OAAO,CAAC,OAAsB;YACvC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,aAAa,EAAE,OAAO,CAAC,aAAa;YACpC,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,eAAe,EAAE,OAAO,CAAC,eAAe;SACzC,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,IAAI,2BAAgB,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEpE,SAAS,CAAC,EAAE,CAAC,kBAAkB,EAAE,CAAC,YAAY,EAAE,EAAE;YAChD,QAAQ,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,SAAS,CAAC,EAAE,CAAC,gBAAgB,EAAE,CAAC,UAAU,EAAE,EAAE;YAC5C,QAAQ,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;QAEH,SAAS,CAAC,EAAE,CAAC,qBAAqB,EAAE,CAAC,UAAU,EAAE,EAAE;YACjD,oFAAoF;QACtF,CAAC,CAAC,CAAC;QAEH,SAAS,CAAC,EAAE,CAAC,qBAAqB,EAAE,CAAC,UAAU,EAAE,EAAE;YACjD,QAAQ,CAAC,oBAAoB,CAAC,UAAU,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,SAAS,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,EAAE;YACvD,QAAQ,CAAC,kBAAkB,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,SAAS,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,EAAE;YAC9B,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QACxB,CAAC,CAAC,CAAC;QAEH,6BAA6B;QAC7B,MAAM,WAAW,GAAG,MAAM,SAAS,CAAC,wBAAwB,EAAE,CAAC;QAC/D,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,CAAC,KAAK,CAAC,UAAU,OAAO,CAAC,OAAO,gBAAgB,CAAC,CAAC;YACzD,OAAO,CAAC,KAAK,CACX,kBAAkB,OAAO,CAAC,OAAO,0BAA0B,CAC5D,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,mBAAmB;QACnB,MAAM,iBAAiB,GAAG,SAAS,CAAC,eAAe,CAAC,SAAS,EAAE;YAC7D,eAAe,EAAE,OAAO,CAAC,QAAQ;YACjC,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,IAAI,EAAE,OAAO,CAAC,GAAG;gBACf,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBACrD,CAAC,CAAC,SAAS;SACd,CAAC,CAAC;QAEH,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC;YACxD,OAAO;QACT,CAAC;QAED,iBAAiB;QACjB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAE3D,sCAAsC;QACtC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAExB,kCAAkC;QAClC,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YACnB,EAAE,CAAC,aAAa,CACd,OAAO,CAAC,MAAM,EACd,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAC/B,OAAO,CACR,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,yBAAyB,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;QACzD,CAAC;QAED,+CAA+C;QAC/C,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YAC5D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,UAAU,KAAK,EAAE,CAAC,CAAC;QACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL;;GAEG;AACH,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,+BAA+B,CAAC;KAC5C,MAAM,CAAC,yBAAyB,EAAE,oBAAoB,CAAC;KACvD,MAAM,CAAC,cAAc,EAAE,kCAAkC,CAAC;KAC1D,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,IAAI,CAAC;QACH,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,IAAA,mBAAU,GAAE,CAAC;QAEzC,IAAI,QAAQ,GAAG,SAAS,CAAC;QAEzB,qBAAqB;QACrB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACrB,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ;iBAChC,KAAK,CAAC,GAAG,CAAC;iBACV,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAChC,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;QACrE,CAAC;QAED,iBAAiB;QACjB,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;YAChB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YACjE,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC/B,IAAI,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CACjD,CAAC;QACJ,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,0BAA0B,QAAQ,CAAC,MAAM,MAAM,CAAC,CAAC;QAE7D,KAAK,MAAM,QAAQ,IAAI,QAAQ,EAAE,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,KAAK,QAAQ,CAAC,EAAE,EAAE,CAAC,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,iBAAiB,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,iBAAiB,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;YAClD,OAAO,CAAC,GAAG,CAAC,aAAa,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;YACxD,OAAO,CAAC,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,UAAU,KAAK,EAAE,CAAC,CAAC;QACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL;;GAEG;AACH,OAAO;KACJ,OAAO,CAAC,OAAO,CAAC;KAChB,WAAW,CAAC,0DAA0D,CAAC;KACvE,MAAM,CAAC,KAAK,IAAI,EAAE;IACjB,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;IAElD,MAAM,QAAQ,GAA+C;QAC3D,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,SAAS,EAAE;QAC/C,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,aAAa,EAAE;KACjD,CAAC;IAEF,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,QAAQ,EAAE,CAAC;QACtC,IAAI,OAAO,CAAC;QACZ,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;YACvB,OAAO,GAAG,IAAI,8BAAiB,EAAE,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,IAAI,oCAAoB,EAAE,CAAC;QACvC,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,iBAAiB,EAAE,CAAC;QACpD,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC;QACzD,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,KAAK,MAAM,EAAE,CAAC,CAAC;IACtC,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,uCAAuC,CAAC,CAAC;IACrD,MAAM,UAAU,GAAG,IAAA,4BAAe,GAAE,CAAC;IACrC,MAAM,QAAQ,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,KAAK,QAAQ,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC;IAEnD,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,CAAC;QAC1B,OAAO,CAAC,GAAG,CACT,6DAA6D,CAC9D,CAAC;QACF,OAAO,CAAC,GAAG,CACT,kEAAkE,CACnE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,2DAA2D,CAAC,CAAC;IAC3E,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC,CAAC,CAAC;AAEL;;GAEG;AACH,OAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,qCAAqC,CAAC;KAClD,MAAM,CAAC,iBAAiB,EAAE,4CAA4C,CAAC;KACvE,MAAM,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;IACxB,IAAI,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;QAEvD,yBAAyB;QACzB,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE;YACjC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,IAAI,EAAE;YACjC,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAE/B,MAAM,SAAS,GAAG,IAAI,4BAAiB,CAAC,SAAS,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;YAClE,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;YAEhE,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;YACjC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,UAAU,KAAK,EAAE,CAAC,CAAC;QACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CAAC,CAAC;AAEL,kBAAkB;AAClB,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "coding-agent-benchmarks",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.1.4",
|
|
4
|
+
"description": "Testing coding agents (GitHub Copilot CLI, Claude Code, etc.) with your repo's context to evaluate their code generation quality.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
7
|
"bin": {
|
|
@@ -18,10 +18,24 @@
|
|
|
18
18
|
"benchmark",
|
|
19
19
|
"evaluation",
|
|
20
20
|
"copilot",
|
|
21
|
+
"github-copilot",
|
|
22
|
+
"claude",
|
|
21
23
|
"claude-code",
|
|
22
24
|
"ai",
|
|
25
|
+
"llm",
|
|
26
|
+
"openai",
|
|
27
|
+
"gpt",
|
|
23
28
|
"testing",
|
|
24
|
-
"code-quality"
|
|
29
|
+
"code-quality",
|
|
30
|
+
"code-generation",
|
|
31
|
+
"code-standards",
|
|
32
|
+
"lint",
|
|
33
|
+
"eslint",
|
|
34
|
+
"ci-cd",
|
|
35
|
+
"automation",
|
|
36
|
+
"developer-tools",
|
|
37
|
+
"prompt-testing",
|
|
38
|
+
"ai-testing"
|
|
25
39
|
],
|
|
26
40
|
"author": "chiItepin",
|
|
27
41
|
"license": "MIT",
|
|
@@ -38,12 +52,15 @@
|
|
|
38
52
|
"LICENSE"
|
|
39
53
|
],
|
|
40
54
|
"dependencies": {
|
|
41
|
-
"
|
|
55
|
+
"chalk": "^4.1.2",
|
|
56
|
+
"commander": "^13.1.0",
|
|
57
|
+
"log-update": "^4.0.0"
|
|
42
58
|
},
|
|
43
59
|
"devDependencies": {
|
|
44
60
|
"@types/node": "^25.0.9",
|
|
45
61
|
"eslint": "^9.39.2",
|
|
46
62
|
"prettier": "^3.8.0",
|
|
63
|
+
"tsup": "^8.5.1",
|
|
47
64
|
"tsx": "^4.21.0",
|
|
48
65
|
"typescript": "^5.9.3"
|
|
49
66
|
}
|