coding-agent-benchmarks 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +474 -0
- package/dist/adapters/claudeCodeCLI.d.ts +19 -0
- package/dist/adapters/claudeCodeCLI.d.ts.map +1 -0
- package/dist/adapters/claudeCodeCLI.js +106 -0
- package/dist/adapters/claudeCodeCLI.js.map +1 -0
- package/dist/adapters/copilotCLI.d.ts +19 -0
- package/dist/adapters/copilotCLI.d.ts.map +1 -0
- package/dist/adapters/copilotCLI.js +104 -0
- package/dist/adapters/copilotCLI.js.map +1 -0
- package/dist/config/defaultScenarios.d.ts +6 -0
- package/dist/config/defaultScenarios.d.ts.map +1 -0
- package/dist/config/defaultScenarios.js +209 -0
- package/dist/config/defaultScenarios.js.map +1 -0
- package/dist/config/loader.d.ts +13 -0
- package/dist/config/loader.d.ts.map +1 -0
- package/dist/config/loader.js +153 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/evaluator.d.ts +45 -0
- package/dist/evaluator.d.ts.map +1 -0
- package/dist/evaluator.js +226 -0
- package/dist/evaluator.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38 -0
- package/dist/index.js.map +1 -0
- package/dist/runner.d.ts +6 -0
- package/dist/runner.d.ts.map +1 -0
- package/dist/runner.js +233 -0
- package/dist/runner.js.map +1 -0
- package/dist/types.d.ts +354 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/baselineManager.d.ts +53 -0
- package/dist/utils/baselineManager.d.ts.map +1 -0
- package/dist/utils/baselineManager.js +220 -0
- package/dist/utils/baselineManager.js.map +1 -0
- package/dist/utils/gitUtils.d.ts +39 -0
- package/dist/utils/gitUtils.d.ts.map +1 -0
- package/dist/utils/gitUtils.js +121 -0
- package/dist/utils/gitUtils.js.map +1 -0
- package/dist/utils/githubAuth.d.ts +22 -0
- package/dist/utils/githubAuth.d.ts.map +1 -0
- package/dist/utils/githubAuth.js +79 -0
- package/dist/utils/githubAuth.js.map +1 -0
- package/dist/utils/workspaceUtils.d.ts +32 -0
- package/dist/utils/workspaceUtils.d.ts.map +1 -0
- package/dist/utils/workspaceUtils.js +121 -0
- package/dist/utils/workspaceUtils.js.map +1 -0
- package/dist/validators/eslintValidator.d.ts +22 -0
- package/dist/validators/eslintValidator.d.ts.map +1 -0
- package/dist/validators/eslintValidator.js +217 -0
- package/dist/validators/eslintValidator.js.map +1 -0
- package/dist/validators/llmJudge.d.ts +28 -0
- package/dist/validators/llmJudge.d.ts.map +1 -0
- package/dist/validators/llmJudge.js +241 -0
- package/dist/validators/llmJudge.js.map +1 -0
- package/dist/validators/patternValidator.d.ts +27 -0
- package/dist/validators/patternValidator.d.ts.map +1 -0
- package/dist/validators/patternValidator.js +233 -0
- package/dist/validators/patternValidator.js.map +1 -0
- package/package.json +50 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ESLint validator for generated code
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.ESLintValidator = void 0;
|
|
40
|
+
const fs = __importStar(require("fs"));
|
|
41
|
+
const path = __importStar(require("path"));
|
|
42
|
+
const workspaceUtils_1 = require("../utils/workspaceUtils");
|
|
43
|
+
const child_process_1 = require("child_process");
|
|
44
|
+
class ESLintValidator {
|
|
45
|
+
constructor(workspaceRoot) {
|
|
46
|
+
this.type = 'eslint';
|
|
47
|
+
this.workspaceRoot = (0, workspaceUtils_1.resolveWorkspaceRoot)(workspaceRoot);
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Check if ESLint is available in the project
|
|
51
|
+
*/
|
|
52
|
+
async checkESLintAvailability() {
|
|
53
|
+
try {
|
|
54
|
+
// Check if eslint is installed
|
|
55
|
+
(0, child_process_1.execSync)('npx eslint --version', {
|
|
56
|
+
cwd: this.workspaceRoot,
|
|
57
|
+
stdio: 'pipe',
|
|
58
|
+
});
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Validate generated code using ESLint
|
|
67
|
+
*/
|
|
68
|
+
async validate(files, scenario) {
|
|
69
|
+
const eslintConfig = scenario.validationStrategy.eslint;
|
|
70
|
+
// If ESLint not enabled, skip
|
|
71
|
+
if (!eslintConfig?.enabled) {
|
|
72
|
+
return {
|
|
73
|
+
passed: true,
|
|
74
|
+
score: -1,
|
|
75
|
+
violations: [],
|
|
76
|
+
validatorType: 'eslint',
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
// Check if ESLint is available
|
|
80
|
+
const isAvailable = await this.checkESLintAvailability();
|
|
81
|
+
if (!isAvailable) {
|
|
82
|
+
console.warn('ESLint not found in project, skipping ESLint validation');
|
|
83
|
+
return {
|
|
84
|
+
passed: true,
|
|
85
|
+
score: -1,
|
|
86
|
+
violations: [],
|
|
87
|
+
validatorType: 'eslint',
|
|
88
|
+
error: 'ESLint not found',
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
try {
|
|
92
|
+
const absolutePaths = (0, workspaceUtils_1.resolveFilePaths)(this.workspaceRoot, files);
|
|
93
|
+
const violations = [];
|
|
94
|
+
// Run ESLint on each file
|
|
95
|
+
for (const filePath of absolutePaths) {
|
|
96
|
+
if (!fs.existsSync(filePath)) {
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
// Only lint JS/TS files
|
|
100
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
101
|
+
if (!['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'].includes(ext)) {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
try {
|
|
105
|
+
// Run ESLint with JSON output
|
|
106
|
+
const configArg = eslintConfig.configPath
|
|
107
|
+
? `--config ${eslintConfig.configPath}`
|
|
108
|
+
: '';
|
|
109
|
+
const output = (0, child_process_1.execSync)(`npx eslint ${configArg} --format json "${filePath}"`, {
|
|
110
|
+
cwd: this.workspaceRoot,
|
|
111
|
+
encoding: 'utf-8',
|
|
112
|
+
stdio: 'pipe',
|
|
113
|
+
});
|
|
114
|
+
// Parse ESLint output
|
|
115
|
+
const results = JSON.parse(output);
|
|
116
|
+
const relativePath = path.relative(this.workspaceRoot, filePath);
|
|
117
|
+
// Extract violations from ESLint results
|
|
118
|
+
for (const result of results) {
|
|
119
|
+
for (const message of result.messages || []) {
|
|
120
|
+
const severity = message.severity === 2
|
|
121
|
+
? 'major'
|
|
122
|
+
: message.severity === 1
|
|
123
|
+
? 'minor'
|
|
124
|
+
: 'minor';
|
|
125
|
+
violations.push({
|
|
126
|
+
type: 'eslint',
|
|
127
|
+
message: `${message.ruleId}: ${message.message}`,
|
|
128
|
+
file: relativePath,
|
|
129
|
+
line: message.line,
|
|
130
|
+
severity: severity,
|
|
131
|
+
details: `Column ${message.column}`,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
catch (error) {
|
|
137
|
+
// ESLint exits with non-zero code if there are errors
|
|
138
|
+
// Try to parse the error output
|
|
139
|
+
if (error.stdout) {
|
|
140
|
+
try {
|
|
141
|
+
const results = JSON.parse(error.stdout);
|
|
142
|
+
const relativePath = path.relative(this.workspaceRoot, filePath);
|
|
143
|
+
for (const result of results) {
|
|
144
|
+
for (const message of result.messages || []) {
|
|
145
|
+
const severity = message.severity === 2
|
|
146
|
+
? 'major'
|
|
147
|
+
: message.severity === 1
|
|
148
|
+
? 'minor'
|
|
149
|
+
: 'minor';
|
|
150
|
+
violations.push({
|
|
151
|
+
type: 'eslint',
|
|
152
|
+
message: `${message.ruleId}: ${message.message}`,
|
|
153
|
+
file: relativePath,
|
|
154
|
+
line: message.line,
|
|
155
|
+
severity: severity,
|
|
156
|
+
details: `Column ${message.column}`,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
// If we can't parse the output, treat it as a general error
|
|
163
|
+
violations.push({
|
|
164
|
+
type: 'eslint',
|
|
165
|
+
message: `ESLint failed for ${path.basename(filePath)}`,
|
|
166
|
+
file: path.relative(this.workspaceRoot, filePath),
|
|
167
|
+
severity: scenario.severity,
|
|
168
|
+
details: error.message,
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Calculate score based on violations
|
|
175
|
+
const passed = violations.length === 0;
|
|
176
|
+
const score = this.calculateScore(violations);
|
|
177
|
+
return {
|
|
178
|
+
passed,
|
|
179
|
+
score,
|
|
180
|
+
violations,
|
|
181
|
+
validatorType: 'eslint',
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
catch (error) {
|
|
185
|
+
return {
|
|
186
|
+
passed: false,
|
|
187
|
+
score: 0,
|
|
188
|
+
violations: [],
|
|
189
|
+
validatorType: 'eslint',
|
|
190
|
+
error: `ESLint validation failed: ${error}`,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Calculate score based on number and severity of violations
|
|
196
|
+
*/
|
|
197
|
+
calculateScore(violations) {
|
|
198
|
+
if (violations.length === 0) {
|
|
199
|
+
return 1.0;
|
|
200
|
+
}
|
|
201
|
+
// Weight violations by severity
|
|
202
|
+
const weights = {
|
|
203
|
+
critical: 1.0,
|
|
204
|
+
major: 0.7,
|
|
205
|
+
minor: 0.3,
|
|
206
|
+
};
|
|
207
|
+
const totalWeight = violations.reduce((sum, v) => {
|
|
208
|
+
return sum + weights[v.severity];
|
|
209
|
+
}, 0);
|
|
210
|
+
// Score decreases with more weighted violations
|
|
211
|
+
// Using exponential decay: score = e^(-totalWeight/2)
|
|
212
|
+
const score = Math.exp(-totalWeight / 2);
|
|
213
|
+
return Math.max(0, Math.min(1, score));
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
exports.ESLintValidator = ESLintValidator;
|
|
217
|
+
//# sourceMappingURL=eslintValidator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eslintValidator.js","sourceRoot":"","sources":["../../src/validators/eslintValidator.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAE7B,4DAAiF;AACjF,iDAAyC;AAEzC,MAAa,eAAe;IAI1B,YAAY,aAAsB;QAHlB,SAAI,GAAG,QAAiB,CAAC;QAIvC,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;IAC3D,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,uBAAuB;QACnC,IAAI,CAAC;YACH,+BAA+B;YAC/B,IAAA,wBAAQ,EAAC,sBAAsB,EAAE;gBAC/B,GAAG,EAAE,IAAI,CAAC,aAAa;gBACvB,KAAK,EAAE,MAAM;aACd,CAAC,CAAC;YACH,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,YAAY,GAAG,QAAQ,CAAC,kBAAkB,CAAC,MAAM,CAAC;QAExD,8BAA8B;QAC9B,IAAI,CAAC,YAAY,EAAE,OAAO,EAAE,CAAC;YAC3B,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,QAAQ;aACxB,CAAC;QACJ,CAAC;QAED,+BAA+B;QAC/B,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,uBAAuB,EAAE,CAAC;QACzD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC;YACxE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,QAAQ;gBACvB,KAAK,EAAE,kBAAkB;aAC1B,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,UAAU,GAAgB,EAAE,CAAC;YAEnC,0BAA0B;YAC1B,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,wBAAwB;gBACxB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;gBACjD,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBAClE,SAAS;gBACX,CAAC;gBAED,IAAI,CAAC;oBACH,8BAA8B;oBAC9B,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU;wBACvC,CAAC,CAAC,YAAY,YAAY,CAAC,UAAU,EAAE;wBACvC,CAAC,CAAC,EAAE,CAAC;oBAEP,MAAM,MAAM,GAAG,IAAA,wBAAQ,EACrB,cAAc,SAAS,mBAAmB,QAAQ,GAAG,EACrD;wBACE,GAAG,EAAE,IAAI,CAAC,aAAa;wBACvB,QAAQ,EAAE,OAAO;wBACjB,KAAK,EAAE,MAAM;qBACd,CACF,CAAC;oBAEF,sBAAsB;oBACtB,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;oBACnC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;oBAEjE,yCAAyC;oBACzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;wBAC7B,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC;4BAC5C,MAAM,QAAQ,GACZ,OAAO,CAAC,QAAQ,KAAK,CAAC;gCACpB,CAAC,CAAC,OAAO;gCACT,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,CAAC;oCACtB,CAAC,CAAC,OAAO;oCACT,CAAC,CAAC,OAAO,CAAC;4BAEhB,UAAU,CAAC,IAAI,CAAC;gCACd,IAAI,EAAE,QAAQ;gCACd,OAAO,EAAE,GAAG,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,OAAO,EAAE;gCAChD,IAAI,EAAE,YAAY;gCAClB,IAAI,EAAE,OAAO,CAAC,IAAI;gCAClB,QAAQ,EAAE,QAA6B;gCACvC,OAAO,EAAE,UAAU,OAAO,CAAC,MAAM,EAAE;6BACpC,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;gBAAC,OAAO,KAAU,EAAE,CAAC;oBACpB,sDAAsD;oBACtD,gCAAgC;oBAChC,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;wBACjB,IAAI,CAAC;4BACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;4BACzC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;4BAEjE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gCAC7B,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC;oCAC5C,MAAM,QAAQ,GACZ,OAAO,CAAC,QAAQ,KAAK,CAAC;wCACpB,CAAC,CAAC,OAAO;wCACT,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,CAAC;4CACtB,CAAC,CAAC,OAAO;4CACT,CAAC,CAAC,OAAO,CAAC;oCAEhB,UAAU,CAAC,IAAI,CAAC;wCACd,IAAI,EAAE,QAAQ;wCACd,OAAO,EAAE,GAAG,OAAO,CAAC,MAAM,KAAK,OAAO,CAAC,OAAO,EAAE;wCAChD,IAAI,EAAE,YAAY;wCAClB,IAAI,EAAE,OAAO,CAAC,IAAI;wCAClB,QAAQ,EAAE,QAA6B;wCACvC,OAAO,EAAE,UAAU,OAAO,CAAC,MAAM,EAAE;qCACpC,CAAC,CAAC;gCACL,CAAC;4BACH,CAAC;wBACH,CAAC;wBAAC,MAAM,CAAC;4BACP,4DAA4D;4BAC5D,UAAU,CAAC,IAAI,CAAC;gCACd,IAAI,EAAE,QAAQ;gCACd,OAAO,EAAE,qBAAqB,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;gCACvD,IAAI,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC;gCACjD,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gCAC3B,OAAO,EAAE,KAAK,CAAC,OAAO;6BACvB,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,sCAAsC;YACtC,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC;YAE9C,OAAO;gBACL,MAAM;gBACN,KAAK;gBACL,UAAU;gBACV,aAAa,EAAE,QAAQ;aACxB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,QAAQ;gBACvB,KAAK,EAAE,6BAA6B,KAAK,EAAE;aAC5C,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,UAAuB;QAC5C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,GAAG,CAAC;QACb,CAAC;QAED,gCAAgC;QAChC,MAAM,OAAO,GAAG;YACd,QAAQ,EAAE,GAAG;YACb,KAAK,EAAE,GAAG;YACV,KAAK,EAAE,GAAG;SACX,CAAC;QAEF,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE;YAC/C,OAAO,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACnC,CAAC,EAAE,CAAC,CAAC,CAAC;QAEN,gDAAgD;QAChD,sDAAsD;QACtD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC;QAEzC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;IACzC,CAAC;CACF;AAtMD,0CAsMC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-as-Judge validator using GitHub Models API
|
|
3
|
+
*/
|
|
4
|
+
import { CodeValidator, TestScenario, ValidationResult } from '../types';
|
|
5
|
+
export declare class LLMJudgeValidator implements CodeValidator {
|
|
6
|
+
readonly type: "llm-judge";
|
|
7
|
+
private workspaceRoot;
|
|
8
|
+
private apiToken;
|
|
9
|
+
private defaultModel;
|
|
10
|
+
constructor(workspaceRoot?: string, model?: string);
|
|
11
|
+
/**
|
|
12
|
+
* Validate generated code using LLM judgment
|
|
13
|
+
*/
|
|
14
|
+
validate(files: readonly string[], scenario: TestScenario): Promise<ValidationResult>;
|
|
15
|
+
/**
|
|
16
|
+
* Build the judgment prompt for the LLM
|
|
17
|
+
*/
|
|
18
|
+
private buildJudgmentPrompt;
|
|
19
|
+
/**
|
|
20
|
+
* Call the GitHub Models API (or other LLM API)
|
|
21
|
+
*/
|
|
22
|
+
private callLLMAPI;
|
|
23
|
+
/**
|
|
24
|
+
* Test the LLM judge with a sample prompt (for debugging)
|
|
25
|
+
*/
|
|
26
|
+
testJudge(prompt: string, model?: string): Promise<string>;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=llmJudge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llmJudge.d.ts","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,gBAAgB,EAAa,MAAM,UAAU,CAAC;AAkDpF,qBAAa,iBAAkB,YAAW,aAAa;IACrD,SAAgB,IAAI,EAAG,WAAW,CAAU;IAC5C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,YAAY,CAAS;gBAEjB,aAAa,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAyB;IAMpE;;OAEG;IACG,QAAQ,CACZ,KAAK,EAAE,SAAS,MAAM,EAAE,EACxB,QAAQ,EAAE,YAAY,GACrB,OAAO,CAAC,gBAAgB,CAAC;IA8E5B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA+B3B;;OAEG;YACW,UAAU;IAqExB;;OAEG;IACG,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAYjE"}
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LLM-as-Judge validator using GitHub Models API
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.LLMJudgeValidator = void 0;
|
|
40
|
+
const fs = __importStar(require("fs"));
|
|
41
|
+
const path = __importStar(require("path"));
|
|
42
|
+
const workspaceUtils_1 = require("../utils/workspaceUtils");
|
|
43
|
+
const githubAuth_1 = require("../utils/githubAuth");
|
|
44
|
+
const judgeSystemPrompt = `You are a code review judge evaluating whether generated code follows specific coding guidelines.
|
|
45
|
+
|
|
46
|
+
Your task is to evaluate the provided code against a set of criteria and return a JSON assessment.
|
|
47
|
+
|
|
48
|
+
Be strict but fair. Only mark criteria as FAIL if there is a clear violation.
|
|
49
|
+
|
|
50
|
+
Respond ONLY with valid JSON in this exact format:
|
|
51
|
+
{
|
|
52
|
+
"evaluations": [
|
|
53
|
+
{
|
|
54
|
+
"criterion": "criterion text",
|
|
55
|
+
"result": "PASS" | "FAIL" | "N/A",
|
|
56
|
+
"explanation": "brief explanation"
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"overallScore": 0.0 to 1.0,
|
|
60
|
+
"summary": "one sentence summary"
|
|
61
|
+
}`;
|
|
62
|
+
class LLMJudgeValidator {
|
|
63
|
+
constructor(workspaceRoot, model = 'openai/gpt-4.1') {
|
|
64
|
+
this.type = 'llm-judge';
|
|
65
|
+
this.workspaceRoot = (0, workspaceUtils_1.resolveWorkspaceRoot)(workspaceRoot);
|
|
66
|
+
this.apiToken = (0, githubAuth_1.getGitHubToken)(); // Auto-detect from env or GitHub CLI
|
|
67
|
+
this.defaultModel = model;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Validate generated code using LLM judgment
|
|
71
|
+
*/
|
|
72
|
+
async validate(files, scenario) {
|
|
73
|
+
const llmConfig = scenario.validationStrategy.llmJudge;
|
|
74
|
+
// If LLM judge not enabled, skip
|
|
75
|
+
if (!llmConfig?.enabled) {
|
|
76
|
+
return {
|
|
77
|
+
passed: true,
|
|
78
|
+
score: -1,
|
|
79
|
+
violations: [],
|
|
80
|
+
validatorType: 'llm-judge',
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
// If no API token, skip
|
|
84
|
+
if (!this.apiToken) {
|
|
85
|
+
console.warn('GITHUB_TOKEN not found, skipping LLM judge validation');
|
|
86
|
+
return {
|
|
87
|
+
passed: true,
|
|
88
|
+
score: -1,
|
|
89
|
+
violations: [],
|
|
90
|
+
validatorType: 'llm-judge',
|
|
91
|
+
error: 'GITHUB_TOKEN not found',
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
try {
|
|
95
|
+
// Read all generated files
|
|
96
|
+
const absolutePaths = (0, workspaceUtils_1.resolveFilePaths)(this.workspaceRoot, files);
|
|
97
|
+
const fileContents = [];
|
|
98
|
+
for (const filePath of absolutePaths) {
|
|
99
|
+
if (!fs.existsSync(filePath)) {
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
103
|
+
const relativePath = path.relative(this.workspaceRoot, filePath);
|
|
104
|
+
fileContents.push({ path: relativePath, content });
|
|
105
|
+
}
|
|
106
|
+
// Build judgment prompt
|
|
107
|
+
const judgmentPrompt = this.buildJudgmentPrompt(scenario, fileContents, llmConfig.judgmentPrompt);
|
|
108
|
+
// Call LLM API
|
|
109
|
+
const model = llmConfig.model || this.defaultModel;
|
|
110
|
+
const judgment = await this.callLLMAPI(judgmentPrompt, model);
|
|
111
|
+
// Convert judgment to violations
|
|
112
|
+
const violations = (judgment.violations ?? []).map(v => ({
|
|
113
|
+
type: 'llm-judge',
|
|
114
|
+
message: v.message,
|
|
115
|
+
file: v.file,
|
|
116
|
+
line: v.line,
|
|
117
|
+
severity: scenario.severity,
|
|
118
|
+
details: judgment.reasoning,
|
|
119
|
+
}));
|
|
120
|
+
return {
|
|
121
|
+
passed: judgment.passed,
|
|
122
|
+
score: judgment.score,
|
|
123
|
+
violations,
|
|
124
|
+
validatorType: 'llm-judge',
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
return {
|
|
129
|
+
passed: false,
|
|
130
|
+
score: 0,
|
|
131
|
+
violations: [],
|
|
132
|
+
validatorType: 'llm-judge',
|
|
133
|
+
error: `LLM judge failed: ${error}`,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Build the judgment prompt for the LLM
|
|
139
|
+
*/
|
|
140
|
+
buildJudgmentPrompt(scenario, fileContents, customPrompt) {
|
|
141
|
+
if (customPrompt) {
|
|
142
|
+
return customPrompt;
|
|
143
|
+
}
|
|
144
|
+
const filesSection = fileContents
|
|
145
|
+
.map(f => `### ${f.path}\n\`\`\`\n${f.content}\n\`\`\``)
|
|
146
|
+
.join('\n\n');
|
|
147
|
+
return `# Task Description
|
|
148
|
+
${scenario.description}
|
|
149
|
+
|
|
150
|
+
# Original Prompt Given to AI
|
|
151
|
+
${scenario.prompt}
|
|
152
|
+
|
|
153
|
+
# Generated Code
|
|
154
|
+
${filesSection}
|
|
155
|
+
|
|
156
|
+
# Evaluation Criteria
|
|
157
|
+
Evaluate whether the generated code:
|
|
158
|
+
1. Correctly implements the requirements from the prompt
|
|
159
|
+
2. Follows best practices for ${scenario.category}
|
|
160
|
+
3. Meets the quality standards for a ${scenario.severity} severity scenario
|
|
161
|
+
|
|
162
|
+
Be strict but fair in your evaluation.`;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Call the GitHub Models API (or other LLM API)
|
|
166
|
+
*/
|
|
167
|
+
async callLLMAPI(prompt, model) {
|
|
168
|
+
const apiUrl = 'https://models.github.ai/inference/chat/completions';
|
|
169
|
+
const response = await fetch(apiUrl, {
|
|
170
|
+
method: 'POST',
|
|
171
|
+
headers: {
|
|
172
|
+
'Content-Type': 'application/json',
|
|
173
|
+
Authorization: `Bearer ${this.apiToken}`,
|
|
174
|
+
},
|
|
175
|
+
body: JSON.stringify({
|
|
176
|
+
model,
|
|
177
|
+
messages: [
|
|
178
|
+
{ role: 'system', content: judgeSystemPrompt },
|
|
179
|
+
{ role: 'user', content: prompt },
|
|
180
|
+
],
|
|
181
|
+
temperature: 0,
|
|
182
|
+
response_format: { type: 'json_object' },
|
|
183
|
+
}),
|
|
184
|
+
});
|
|
185
|
+
if (!response.ok) {
|
|
186
|
+
const errorText = await response.text();
|
|
187
|
+
throw new Error(`GitHub Models API error: ${response.status} ${errorText}`);
|
|
188
|
+
}
|
|
189
|
+
const data = (await response.json());
|
|
190
|
+
const content = data.choices[0]?.message?.content;
|
|
191
|
+
if (!content) {
|
|
192
|
+
throw new Error('No content in LLM response');
|
|
193
|
+
}
|
|
194
|
+
// Parse JSON response
|
|
195
|
+
try {
|
|
196
|
+
const apiResponse = JSON.parse(content);
|
|
197
|
+
// Validate API response structure
|
|
198
|
+
if (!Array.isArray(apiResponse.evaluations) ||
|
|
199
|
+
apiResponse.overallScore == null ||
|
|
200
|
+
apiResponse.summary == null) {
|
|
201
|
+
throw new Error('Invalid judgment structure');
|
|
202
|
+
}
|
|
203
|
+
// Transform API response to internal judgment format
|
|
204
|
+
// Extract violations from FAIL evaluations
|
|
205
|
+
const violations = apiResponse.evaluations
|
|
206
|
+
.filter(e => e.result === 'FAIL')
|
|
207
|
+
.map(e => ({
|
|
208
|
+
message: `${e.criterion}: ${e.explanation}`,
|
|
209
|
+
}));
|
|
210
|
+
// Determine if passed based on violations and score threshold
|
|
211
|
+
const passed = violations.length === 0 && apiResponse.overallScore >= 0.7;
|
|
212
|
+
const judgment = {
|
|
213
|
+
passed,
|
|
214
|
+
score: Math.max(0, Math.min(1, apiResponse.overallScore)), // Ensure score is in valid range
|
|
215
|
+
reasoning: apiResponse.summary,
|
|
216
|
+
violations,
|
|
217
|
+
};
|
|
218
|
+
return judgment;
|
|
219
|
+
}
|
|
220
|
+
catch (error) {
|
|
221
|
+
throw new Error(`Failed to parse LLM response: ${error}\nContent: ${content}`);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Test the LLM judge with a sample prompt (for debugging)
|
|
226
|
+
*/
|
|
227
|
+
async testJudge(prompt, model) {
|
|
228
|
+
if (!this.apiToken) {
|
|
229
|
+
return 'Error: GITHUB_TOKEN not found';
|
|
230
|
+
}
|
|
231
|
+
try {
|
|
232
|
+
const result = await this.callLLMAPI(prompt, model || this.defaultModel);
|
|
233
|
+
return JSON.stringify(result, null, 2);
|
|
234
|
+
}
|
|
235
|
+
catch (error) {
|
|
236
|
+
return `Error: ${error}`;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
exports.LLMJudgeValidator = LLMJudgeValidator;
|
|
241
|
+
//# sourceMappingURL=llmJudge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAE7B,4DAAiF;AACjF,oDAAqD;AA6BrD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;EAiBxB,CAAC;AAEH,MAAa,iBAAiB;IAM5B,YAAY,aAAsB,EAAE,QAAgB,gBAAgB;QALpD,SAAI,GAAG,WAAoB,CAAC;QAM1C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,QAAQ,GAAG,IAAA,2BAAc,GAAE,CAAC,CAAC,qCAAqC;QACvE,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,SAAS,GAAG,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC;QAEvD,iCAAiC;QACjC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;YACtE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,wBAAwB;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,2BAA2B;YAC3B,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,YAAY,GAA6C,EAAE,CAAC;YAElE,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;gBACjE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,wBAAwB;YACxB,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAC7C,QAAQ,EACR,YAAY,EACZ,SAAS,CAAC,cAAc,CACzB,CAAC;YAEF,eAAe;YACf,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;YAE9D,iCAAiC;YACjC,MAAM,UAAU,GAAgB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACpE,IAAI,EAAE,WAAoB;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,OAAO,EAAE,QAAQ,CAAC,SAAS;aAC5B,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,UAAU;gBACV,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,qBAAqB,KAAK,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,QAAsB,EACtB,YAAsD,EACtD,YAAqB;QAErB,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO,YAAY,CAAC;QACtB,CAAC;QAED,MAAM,YAAY,GAAG,YAAY;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,OAAO,UAAU,CAAC;aACvD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,OAAO;EACT,QAAQ,CAAC,WAAW;;;EAGpB,QAAQ,CAAC,MAAM;;;EAGf,YAAY;;;;;gCAKkB,QAAQ,CAAC,QAAQ;uCACV,QAAQ,CAAC,QAAQ;;uCAEjB,CAAC;IACtC,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,MAAc,EAAE,KAAa;QACpD,MAAM,MAAM,GAAG,qDAAqD,CAAC;QAErE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,QAAQ,EAAE;aACzC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,iBAAiB,EAAE;oBAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;iBAClC;gBACD,WAAW,EAAE,CAAC;gBACd,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAQ,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QAElD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAmB,CAAC;YAE1D,kCAAkC;YAClC,IACE,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,WAAW,CAAC;gBACvC,WAAW,CAAC,YAAY,IAAI,IAAI;gBAChC,WAAW,CAAC,OAAO,IAAI,IAAI,EAC3B,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAChD,CAAC;YAED,qDAAqD;YACrD,2CAA2C;YAC3C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC;iBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACT,OAAO,EAAE,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,WAAW,EAAE;aAC5C,CAAC,CAAC,CAAC;YAEN,8DAA8D;YAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,YAAY,IAAI,GAAG,CAAC;YAE1E,MAAM,QAAQ,GAAgB;gBAC5B,MAAM;gBACN,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAC,EAAE,iCAAiC;gBAC5F,SAAS,EAAE,WAAW,CAAC,OAAO;gBAC9B,UAAU;aACX,CAAC;YAEF,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,iCAAiC,KAAK,cAAc,OAAO,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,KAAc;QAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC,CAAC;YACzE,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,UAAU,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;CACF;AAzND,8CAyNC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pattern-based code validator
|
|
3
|
+
*/
|
|
4
|
+
import { CodeValidator, TestScenario, ValidationResult } from '../types';
|
|
5
|
+
export declare class PatternValidator implements CodeValidator {
|
|
6
|
+
readonly type: "pattern";
|
|
7
|
+
private workspaceRoot;
|
|
8
|
+
constructor(workspaceRoot?: string);
|
|
9
|
+
/**
|
|
10
|
+
* Validate generated code against pattern rules
|
|
11
|
+
*/
|
|
12
|
+
validate(files: readonly string[], scenario: TestScenario): Promise<ValidationResult>;
|
|
13
|
+
/**
|
|
14
|
+
* Find all matches of a pattern in text with line numbers
|
|
15
|
+
*/
|
|
16
|
+
private findPatternMatches;
|
|
17
|
+
/**
|
|
18
|
+
* Find the line number containing specific text
|
|
19
|
+
*/
|
|
20
|
+
private findLineWithText;
|
|
21
|
+
/**
|
|
22
|
+
* Calculate score based on number and severity of violations
|
|
23
|
+
* Critical violations have more weight than minor ones
|
|
24
|
+
*/
|
|
25
|
+
private calculateScore;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=patternValidator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"patternValidator.d.ts","sourceRoot":"","sources":["../../src/validators/patternValidator.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,gBAAgB,EAAa,MAAM,UAAU,CAAC;AAGpF,qBAAa,gBAAiB,YAAW,aAAa;IACpD,SAAgB,IAAI,EAAG,SAAS,CAAU;IAC1C,OAAO,CAAC,aAAa,CAAS;gBAElB,aAAa,CAAC,EAAE,MAAM;IAIlC;;OAEG;IACG,QAAQ,CACZ,KAAK,EAAE,SAAS,MAAM,EAAE,EACxB,QAAQ,EAAE,YAAY,GACrB,OAAO,CAAC,gBAAgB,CAAC;IA8I5B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAoB1B;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAgBxB;;;OAGG;IACH,OAAO,CAAC,cAAc;CAsBvB"}
|