coding-agent-benchmarks 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +171 -10
- package/dist/adapters/claudeCodeCLI.d.ts +1 -1
- package/dist/adapters/claudeCodeCLI.d.ts.map +1 -1
- package/dist/adapters/claudeCodeCLI.js +44 -32
- package/dist/adapters/claudeCodeCLI.js.map +1 -1
- package/dist/adapters/copilotCLI.d.ts.map +1 -1
- package/dist/adapters/copilotCLI.js +28 -16
- package/dist/adapters/copilotCLI.js.map +1 -1
- package/dist/evaluator.d.ts +4 -9
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +4 -12
- package/dist/evaluator.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/reporter.d.ts.map +1 -1
- package/dist/reporter.js +59 -34
- package/dist/reporter.js.map +1 -1
- package/dist/runner.js +2 -1
- package/dist/runner.js.map +1 -1
- package/dist/types.d.ts +6 -6
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/TypedEventEmitter.d.ts +18 -0
- package/dist/utils/TypedEventEmitter.d.ts.map +1 -0
- package/dist/utils/TypedEventEmitter.js +62 -0
- package/dist/utils/TypedEventEmitter.js.map +1 -0
- package/dist/utils/baselineManager.d.ts +2 -1
- package/dist/utils/baselineManager.d.ts.map +1 -1
- package/dist/utils/baselineManager.js +1 -0
- package/dist/utils/baselineManager.js.map +1 -1
- package/dist/utils/errorHandler.d.ts +10 -0
- package/dist/utils/errorHandler.d.ts.map +1 -0
- package/dist/utils/errorHandler.js +58 -0
- package/dist/utils/errorHandler.js.map +1 -0
- package/dist/utils/gitUtils.d.ts +2 -1
- package/dist/utils/gitUtils.d.ts.map +1 -1
- package/dist/utils/gitUtils.js +85 -18
- package/dist/utils/gitUtils.js.map +1 -1
- package/dist/validators/llmJudge.d.ts +1 -1
- package/dist/validators/llmJudge.d.ts.map +1 -1
- package/dist/validators/llmJudge.js +40 -41
- package/dist/validators/llmJudge.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errorHandler.js","sourceRoot":"","sources":["../../src/utils/errorHandler.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;AAEH,kDAA0B;AAG1B;;GAEG;AACI,MAAM,eAAe,GAAG,CAC7B,KAAc,EACd,OAA8B,EACxB,EAAE;IACR,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IACnC,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,KAAK,IAAI,CAAC;IAEpD,6BAA6B;IAC7B,IAAI,YAAY,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;QAClD,0BAA0B,CAAC,OAAO,EAAE,OAAO,EAAE,aAAa,CAAC,CAAC;QAC5D,OAAO;IACT,CAAC;IAED,gBAAgB;IAChB,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,eAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,YAAY,CAAC,CAAC;IACnD,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACxC,CAAC;AACH,CAAC,CAAC;AAnBW,QAAA,eAAe,mBAmB1B;AAEF;;GAEG;AACH,MAAM,0BAA0B,GAAG,CACjC,eAAwB,EACxB,aAAa,GAAG,IAAI,EACd,EAAE;IACR,MAAM,aAAa,GAAkB,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IAEhE,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,eAAK,CAAC,GAAG,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAElB,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CACX,KAAK,eAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,eAAK,CAAC,MAAM,CAAC,eAAe,CAAC,EAAE,CACrE,CAAC;QACJ,CAAC;QAED,OAAO,CAAC,KAAK,CACX,KAAK,eAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC3F,CAAC;QACF,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAClB,OAAO,CAAC,KAAK,CACX,eAAK,CAAC,IAAI,CACR,uEAAuE,CACxE,CACF,CAAC;QACF,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACpB,CAAC;SAAM,CAAC;QACN,iDAAiD;QACjD,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC7C,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,oBAAoB,eAAe,EAAE,CAAC,CAAC;QACvD,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,oBAAoB,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9D,OAAO,CAAC,KAAK,CACX,mEAAmE,CACpE,CAAC;IACJ,CAAC;AACH,CAAC,CAAC"}
|
package/dist/utils/gitUtils.d.ts
CHANGED
|
@@ -23,9 +23,10 @@ export declare const getChangedFiles: (workspaceRoot: string) => string[];
|
|
|
23
23
|
* Get files that changed between two git status snapshots
|
|
24
24
|
* @param before Git status output before operation
|
|
25
25
|
* @param after Git status output after operation
|
|
26
|
+
* @param workspaceRoot The workspace root to resolve directory contents
|
|
26
27
|
* @returns Array of file paths that were added or modified
|
|
27
28
|
*/
|
|
28
|
-
export declare const getChangedFilesDiff: (before: string, after: string) => string[];
|
|
29
|
+
export declare const getChangedFilesDiff: (before: string, after: string, workspaceRoot?: string) => string[];
|
|
29
30
|
/**
|
|
30
31
|
* Get the git root directory
|
|
31
32
|
* @returns Absolute path to git root
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gitUtils.d.ts","sourceRoot":"","sources":["../../src/utils/gitUtils.ts"],"names":[],"mappings":"AAAA;;GAEG;
|
|
1
|
+
{"version":3,"file":"gitUtils.d.ts","sourceRoot":"","sources":["../../src/utils/gitUtils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,GAAI,eAAe,MAAM,KAAG,MAU7D,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,cAAc,GAAI,cAAc,MAAM,KAAG,MAAM,EAwB3D,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,eAAe,GAAI,eAAe,MAAM,KAAG,MAAM,EAG7D,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,mBAAmB,GAC9B,QAAQ,MAAM,EACd,OAAO,MAAM,EACb,gBAAgB,MAAM,KACrB,MAAM,EAyBR,CAAC;AA+BF;;;GAGG;AACH,eAAO,MAAM,UAAU,QAAO,MAS7B,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,eAAe,GAAI,WAAW,MAAM,KAAG,OAWnD,CAAC"}
|
package/dist/utils/gitUtils.js
CHANGED
|
@@ -2,9 +2,44 @@
|
|
|
2
2
|
/**
|
|
3
3
|
* Git utilities for tracking file changes
|
|
4
4
|
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
5
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
39
|
exports.isGitRepository = exports.getGitRoot = exports.getChangedFilesDiff = exports.getChangedFiles = exports.parseGitStatus = exports.getGitStatusPorcelain = void 0;
|
|
7
40
|
const child_process_1 = require("child_process");
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const path = __importStar(require("path"));
|
|
8
43
|
/**
|
|
9
44
|
* Get git status in porcelain format
|
|
10
45
|
* @param workspaceRoot The workspace root directory
|
|
@@ -12,10 +47,10 @@ const child_process_1 = require("child_process");
|
|
|
12
47
|
*/
|
|
13
48
|
const getGitStatusPorcelain = (workspaceRoot) => {
|
|
14
49
|
try {
|
|
15
|
-
return (0, child_process_1.execSync)(
|
|
50
|
+
return (0, child_process_1.execSync)("git status --porcelain", {
|
|
16
51
|
cwd: workspaceRoot,
|
|
17
|
-
encoding:
|
|
18
|
-
stdio: [
|
|
52
|
+
encoding: "utf-8",
|
|
53
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
19
54
|
});
|
|
20
55
|
}
|
|
21
56
|
catch (error) {
|
|
@@ -29,7 +64,10 @@ exports.getGitStatusPorcelain = getGitStatusPorcelain;
|
|
|
29
64
|
* @returns Array of file paths that were changed
|
|
30
65
|
*/
|
|
31
66
|
const parseGitStatus = (statusOutput) => {
|
|
32
|
-
const lines = statusOutput
|
|
67
|
+
const lines = statusOutput
|
|
68
|
+
.trim()
|
|
69
|
+
.split("\n")
|
|
70
|
+
.filter((line) => line.length > 0);
|
|
33
71
|
const files = [];
|
|
34
72
|
for (const line of lines) {
|
|
35
73
|
// Git porcelain format: XY filename
|
|
@@ -38,8 +76,8 @@ const parseGitStatus = (statusOutput) => {
|
|
|
38
76
|
if (line.length > 3) {
|
|
39
77
|
const filename = line.substring(3).trim();
|
|
40
78
|
// Handle renamed files (format: "old -> new")
|
|
41
|
-
if (filename.includes(
|
|
42
|
-
const newFilename = filename.split(
|
|
79
|
+
if (filename.includes(" -> ")) {
|
|
80
|
+
const newFilename = filename.split(" -> ")[1];
|
|
43
81
|
files.push(newFilename);
|
|
44
82
|
}
|
|
45
83
|
else {
|
|
@@ -64,11 +102,12 @@ exports.getChangedFiles = getChangedFiles;
|
|
|
64
102
|
* Get files that changed between two git status snapshots
|
|
65
103
|
* @param before Git status output before operation
|
|
66
104
|
* @param after Git status output after operation
|
|
105
|
+
* @param workspaceRoot The workspace root to resolve directory contents
|
|
67
106
|
* @returns Array of file paths that were added or modified
|
|
68
107
|
*/
|
|
69
|
-
const getChangedFilesDiff = (before, after) => {
|
|
70
|
-
const beforeLines = new Set(before.split(
|
|
71
|
-
const afterLines = after.split(
|
|
108
|
+
const getChangedFilesDiff = (before, after, workspaceRoot) => {
|
|
109
|
+
const beforeLines = new Set(before.split("\n").filter(Boolean));
|
|
110
|
+
const afterLines = after.split("\n").filter(Boolean);
|
|
72
111
|
const newOrModified = [];
|
|
73
112
|
for (const line of afterLines) {
|
|
74
113
|
if (!beforeLines.has(line)) {
|
|
@@ -76,8 +115,13 @@ const getChangedFilesDiff = (before, after) => {
|
|
|
76
115
|
const match = /^.{3}(.+)$/.exec(line);
|
|
77
116
|
if (match) {
|
|
78
117
|
const filePath = match[1];
|
|
79
|
-
//
|
|
80
|
-
if (
|
|
118
|
+
// If it's a directory, list files inside it
|
|
119
|
+
if (filePath.endsWith("/") && workspaceRoot) {
|
|
120
|
+
const dirPath = path.join(workspaceRoot, filePath);
|
|
121
|
+
const filesInDir = listFilesRecursively(dirPath, workspaceRoot);
|
|
122
|
+
newOrModified.push(...filesInDir);
|
|
123
|
+
}
|
|
124
|
+
else if (!filePath.endsWith("/")) {
|
|
81
125
|
newOrModified.push(filePath);
|
|
82
126
|
}
|
|
83
127
|
}
|
|
@@ -86,19 +130,42 @@ const getChangedFilesDiff = (before, after) => {
|
|
|
86
130
|
return newOrModified;
|
|
87
131
|
};
|
|
88
132
|
exports.getChangedFilesDiff = getChangedFilesDiff;
|
|
133
|
+
/**
|
|
134
|
+
* Recursively list all files in a directory
|
|
135
|
+
* @param dirPath Absolute path to directory
|
|
136
|
+
* @param workspaceRoot Workspace root for relative path calculation
|
|
137
|
+
* @returns Array of relative file paths
|
|
138
|
+
*/
|
|
139
|
+
const listFilesRecursively = (dirPath, workspaceRoot) => {
|
|
140
|
+
const files = [];
|
|
141
|
+
if (!fs.existsSync(dirPath)) {
|
|
142
|
+
return files;
|
|
143
|
+
}
|
|
144
|
+
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
145
|
+
for (const entry of entries) {
|
|
146
|
+
const fullPath = path.join(dirPath, entry.name);
|
|
147
|
+
if (entry.isDirectory()) {
|
|
148
|
+
files.push(...listFilesRecursively(fullPath, workspaceRoot));
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
files.push(path.relative(workspaceRoot, fullPath));
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return files;
|
|
155
|
+
};
|
|
89
156
|
/**
|
|
90
157
|
* Get the git root directory
|
|
91
158
|
* @returns Absolute path to git root
|
|
92
159
|
*/
|
|
93
160
|
const getGitRoot = () => {
|
|
94
161
|
try {
|
|
95
|
-
return (0, child_process_1.execSync)(
|
|
96
|
-
encoding:
|
|
97
|
-
stdio: [
|
|
162
|
+
return (0, child_process_1.execSync)("git rev-parse --show-toplevel", {
|
|
163
|
+
encoding: "utf-8",
|
|
164
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
98
165
|
}).trim();
|
|
99
166
|
}
|
|
100
167
|
catch (error) {
|
|
101
|
-
throw new Error(
|
|
168
|
+
throw new Error("Not inside a git repository");
|
|
102
169
|
}
|
|
103
170
|
};
|
|
104
171
|
exports.getGitRoot = getGitRoot;
|
|
@@ -109,10 +176,10 @@ exports.getGitRoot = getGitRoot;
|
|
|
109
176
|
*/
|
|
110
177
|
const isGitRepository = (directory) => {
|
|
111
178
|
try {
|
|
112
|
-
(0, child_process_1.execSync)(
|
|
179
|
+
(0, child_process_1.execSync)("git rev-parse --git-dir", {
|
|
113
180
|
cwd: directory,
|
|
114
|
-
encoding:
|
|
115
|
-
stdio: [
|
|
181
|
+
encoding: "utf-8",
|
|
182
|
+
stdio: ["pipe", "pipe", "ignore"],
|
|
116
183
|
});
|
|
117
184
|
return true;
|
|
118
185
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gitUtils.js","sourceRoot":"","sources":["../../src/utils/gitUtils.ts"],"names":[],"mappings":";AAAA;;GAEG
|
|
1
|
+
{"version":3,"file":"gitUtils.js","sourceRoot":"","sources":["../../src/utils/gitUtils.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,iDAAyC;AACzC,uCAAyB;AACzB,2CAA6B;AAE7B;;;;GAIG;AACI,MAAM,qBAAqB,GAAG,CAAC,aAAqB,EAAU,EAAE;IACrE,IAAI,CAAC;QACH,OAAO,IAAA,wBAAQ,EAAC,wBAAwB,EAAE;YACxC,GAAG,EAAE,aAAa;YAClB,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;IACxD,CAAC;AACH,CAAC,CAAC;AAVW,QAAA,qBAAqB,yBAUhC;AAEF;;;;GAIG;AACI,MAAM,cAAc,GAAG,CAAC,YAAoB,EAAY,EAAE;IAC/D,MAAM,KAAK,GAAG,YAAY;SACvB,IAAI,EAAE;SACN,KAAK,CAAC,IAAI,CAAC;SACX,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,oCAAoC;QACpC,4CAA4C;QAC5C,gEAAgE;QAChE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1C,8CAA8C;YAC9C,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC9B,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9C,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC1B,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC,CAAC;AAxBW,QAAA,cAAc,kBAwBzB;AAEF;;;;GAIG;AACI,MAAM,eAAe,GAAG,CAAC,aAAqB,EAAY,EAAE;IACjE,MAAM,YAAY,GAAG,IAAA,6BAAqB,EAAC,aAAa,CAAC,CAAC;IAC1D,OAAO,IAAA,sBAAc,EAAC,YAAY,CAAC,CAAC;AACtC,CAAC,CAAC;AAHW,QAAA,eAAe,mBAG1B;AAEF;;;;;;GAMG;AACI,MAAM,mBAAmB,GAAG,CACjC,MAAc,EACd,KAAa,EACb,aAAsB,EACZ,EAAE;IACZ,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;IAChE,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAErD,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3B,oEAAoE;YACpE,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtC,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC1B,4CAA4C;gBAC5C,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,aAAa,EAAE,CAAC;oBAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;oBACnD,MAAM,UAAU,GAAG,oBAAoB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;oBAChE,aAAa,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;gBACpC,CAAC;qBAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC/B,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC;AACvB,CAAC,CAAC;AA7BW,QAAA,mBAAmB,uBA6B9B;AAEF;;;;;GAKG;AACH,MAAM,oBAAoB,GAAG,CAC3B,OAAe,EACf,aAAqB,EACX,EAAE;IACZ,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IACjE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QAChD,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,GAAG,oBAAoB,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC;QAC/D,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC,CAAC;AAEF;;;GAGG;AACI,MAAM,UAAU,GAAG,GAAW,EAAE;IACrC,IAAI,CAAC;QACH,OAAO,IAAA,wBAAQ,EAAC,+BAA+B,EAAE;YAC/C,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC,IAAI,EAAE,CAAC;IACZ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;IACjD,CAAC;AACH,CAAC,CAAC;AATW,QAAA,UAAU,cASrB;AAEF;;;;GAIG;AACI,MAAM,eAAe,GAAG,CAAC,SAAiB,EAAW,EAAE;IAC5D,IAAI,CAAC;QACH,IAAA,wBAAQ,EAAC,yBAAyB,EAAE;YAClC,GAAG,EAAE,SAAS;YACd,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;SAClC,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC,CAAC;AAXW,QAAA,eAAe,mBAW1B"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* LLM-as-Judge validator using GitHub Models API
|
|
3
3
|
*/
|
|
4
|
-
import { CodeValidator, TestScenario, ValidationResult } from
|
|
4
|
+
import { CodeValidator, TestScenario, ValidationResult } from "../types";
|
|
5
5
|
export declare class LLMJudgeValidator implements CodeValidator {
|
|
6
6
|
readonly type: "llm-judge";
|
|
7
7
|
private workspaceRoot;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llmJudge.d.ts","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,
|
|
1
|
+
{"version":3,"file":"llmJudge.d.ts","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EACL,aAAa,EACb,YAAY,EACZ,gBAAgB,EAEjB,MAAM,UAAU,CAAC;AA6DlB,qBAAa,iBAAkB,YAAW,aAAa;IACrD,SAAgB,IAAI,EAAG,WAAW,CAAU;IAC5C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,YAAY,CAAS;gBAEjB,aAAa,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAyB;IAMpE;;OAEG;IACG,QAAQ,CACZ,KAAK,EAAE,SAAS,MAAM,EAAE,EACxB,QAAQ,EAAE,YAAY,GACrB,OAAO,CAAC,gBAAgB,CAAC;IA8E5B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA+B3B;;OAEG;YACW,UAAU;IA4ExB;;OAEG;IACG,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAYjE"}
|
|
@@ -60,8 +60,8 @@ Respond ONLY with valid JSON in this exact format:
|
|
|
60
60
|
"summary": "one sentence summary"
|
|
61
61
|
}`;
|
|
62
62
|
class LLMJudgeValidator {
|
|
63
|
-
constructor(workspaceRoot, model =
|
|
64
|
-
this.type =
|
|
63
|
+
constructor(workspaceRoot, model = "openai/gpt-4.1") {
|
|
64
|
+
this.type = "llm-judge";
|
|
65
65
|
this.workspaceRoot = (0, workspaceUtils_1.resolveWorkspaceRoot)(workspaceRoot);
|
|
66
66
|
this.apiToken = (0, githubAuth_1.getGitHubToken)(); // Auto-detect from env or GitHub CLI
|
|
67
67
|
this.defaultModel = model;
|
|
@@ -75,20 +75,20 @@ class LLMJudgeValidator {
|
|
|
75
75
|
if (!llmConfig?.enabled) {
|
|
76
76
|
return {
|
|
77
77
|
passed: true,
|
|
78
|
-
score: -1,
|
|
78
|
+
score: -1, // Indicate skipped
|
|
79
79
|
violations: [],
|
|
80
|
-
validatorType:
|
|
80
|
+
validatorType: "llm-judge",
|
|
81
81
|
};
|
|
82
82
|
}
|
|
83
83
|
// If no API token, skip
|
|
84
84
|
if (!this.apiToken) {
|
|
85
|
-
console.warn(
|
|
85
|
+
console.warn("GITHUB_TOKEN not found, skipping LLM judge validation");
|
|
86
86
|
return {
|
|
87
87
|
passed: true,
|
|
88
|
-
score: -1,
|
|
88
|
+
score: -1, // Indicate skipped
|
|
89
89
|
violations: [],
|
|
90
|
-
validatorType:
|
|
91
|
-
error:
|
|
90
|
+
validatorType: "llm-judge",
|
|
91
|
+
error: "GITHUB_TOKEN not found",
|
|
92
92
|
};
|
|
93
93
|
}
|
|
94
94
|
try {
|
|
@@ -99,18 +99,18 @@ class LLMJudgeValidator {
|
|
|
99
99
|
if (!fs.existsSync(filePath)) {
|
|
100
100
|
continue;
|
|
101
101
|
}
|
|
102
|
-
const content = fs.readFileSync(filePath,
|
|
102
|
+
const content = fs.readFileSync(filePath, "utf-8");
|
|
103
103
|
const relativePath = path.relative(this.workspaceRoot, filePath);
|
|
104
104
|
fileContents.push({ path: relativePath, content });
|
|
105
105
|
}
|
|
106
106
|
// Build judgment prompt
|
|
107
107
|
const judgmentPrompt = this.buildJudgmentPrompt(scenario, fileContents, llmConfig.judgmentPrompt);
|
|
108
|
-
//
|
|
108
|
+
// Calling LLM API
|
|
109
109
|
const model = llmConfig.model || this.defaultModel;
|
|
110
110
|
const judgment = await this.callLLMAPI(judgmentPrompt, model);
|
|
111
111
|
// Convert judgment to violations
|
|
112
|
-
const violations = (judgment.violations ?? []).map(v => ({
|
|
113
|
-
type:
|
|
112
|
+
const violations = (judgment.violations ?? []).map((v) => ({
|
|
113
|
+
type: "llm-judge",
|
|
114
114
|
message: v.message,
|
|
115
115
|
file: v.file,
|
|
116
116
|
line: v.line,
|
|
@@ -121,7 +121,7 @@ class LLMJudgeValidator {
|
|
|
121
121
|
passed: judgment.passed,
|
|
122
122
|
score: judgment.score,
|
|
123
123
|
violations,
|
|
124
|
-
validatorType:
|
|
124
|
+
validatorType: "llm-judge",
|
|
125
125
|
};
|
|
126
126
|
}
|
|
127
127
|
catch (error) {
|
|
@@ -129,7 +129,7 @@ class LLMJudgeValidator {
|
|
|
129
129
|
passed: false,
|
|
130
130
|
score: 0,
|
|
131
131
|
violations: [],
|
|
132
|
-
validatorType:
|
|
132
|
+
validatorType: "llm-judge",
|
|
133
133
|
error: `LLM judge failed: ${error}`,
|
|
134
134
|
};
|
|
135
135
|
}
|
|
@@ -138,48 +138,47 @@ class LLMJudgeValidator {
|
|
|
138
138
|
* Build the judgment prompt for the LLM
|
|
139
139
|
*/
|
|
140
140
|
buildJudgmentPrompt(scenario, fileContents, customPrompt) {
|
|
141
|
-
if (customPrompt) {
|
|
142
|
-
return customPrompt;
|
|
143
|
-
}
|
|
144
141
|
const filesSection = fileContents
|
|
145
|
-
.map(f => `### ${f.path}\n\`\`\`\n${f.content}\n\`\`\``)
|
|
146
|
-
.join(
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
# Original Prompt Given to AI
|
|
151
|
-
${scenario.prompt}
|
|
152
|
-
|
|
153
|
-
# Generated Code
|
|
154
|
-
${filesSection}
|
|
155
|
-
|
|
156
|
-
# Evaluation Criteria
|
|
157
|
-
Evaluate whether the generated code:
|
|
142
|
+
.map((f) => `### ${f.path}\n\`\`\`\n${f.content}\n\`\`\``)
|
|
143
|
+
.join("\n\n");
|
|
144
|
+
const evaluationCriteria = customPrompt ||
|
|
145
|
+
`Evaluate whether the generated code:
|
|
158
146
|
1. Correctly implements the requirements from the prompt
|
|
159
147
|
2. Follows best practices for ${scenario.category}
|
|
160
148
|
3. Meets the quality standards for a ${scenario.severity} severity scenario
|
|
161
149
|
|
|
162
150
|
Be strict but fair in your evaluation.`;
|
|
151
|
+
return `# Task Description
|
|
152
|
+
${scenario.description}
|
|
153
|
+
|
|
154
|
+
# Original Prompt Given to AI
|
|
155
|
+
${scenario.prompt}
|
|
156
|
+
|
|
157
|
+
# Generated Code
|
|
158
|
+
${filesSection}
|
|
159
|
+
|
|
160
|
+
# Evaluation Criteria
|
|
161
|
+
${evaluationCriteria}`;
|
|
163
162
|
}
|
|
164
163
|
/**
|
|
165
164
|
* Call the GitHub Models API (or other LLM API)
|
|
166
165
|
*/
|
|
167
166
|
async callLLMAPI(prompt, model) {
|
|
168
|
-
const apiUrl =
|
|
167
|
+
const apiUrl = "https://models.github.ai/inference/chat/completions";
|
|
169
168
|
const response = await fetch(apiUrl, {
|
|
170
|
-
method:
|
|
169
|
+
method: "POST",
|
|
171
170
|
headers: {
|
|
172
|
-
|
|
171
|
+
"Content-Type": "application/json",
|
|
173
172
|
Authorization: `Bearer ${this.apiToken}`,
|
|
174
173
|
},
|
|
175
174
|
body: JSON.stringify({
|
|
176
175
|
model,
|
|
177
176
|
messages: [
|
|
178
|
-
{ role:
|
|
179
|
-
{ role:
|
|
177
|
+
{ role: "system", content: judgeSystemPrompt },
|
|
178
|
+
{ role: "user", content: prompt },
|
|
180
179
|
],
|
|
181
180
|
temperature: 0,
|
|
182
|
-
response_format: { type:
|
|
181
|
+
response_format: { type: "json_object" },
|
|
183
182
|
}),
|
|
184
183
|
});
|
|
185
184
|
if (!response.ok) {
|
|
@@ -189,7 +188,7 @@ Be strict but fair in your evaluation.`;
|
|
|
189
188
|
const data = (await response.json());
|
|
190
189
|
const content = data.choices[0]?.message?.content;
|
|
191
190
|
if (!content) {
|
|
192
|
-
throw new Error(
|
|
191
|
+
throw new Error("No content in LLM response");
|
|
193
192
|
}
|
|
194
193
|
// Parse JSON response
|
|
195
194
|
try {
|
|
@@ -198,13 +197,13 @@ Be strict but fair in your evaluation.`;
|
|
|
198
197
|
if (!Array.isArray(apiResponse.evaluations) ||
|
|
199
198
|
apiResponse.overallScore == null ||
|
|
200
199
|
apiResponse.summary == null) {
|
|
201
|
-
throw new Error(
|
|
200
|
+
throw new Error("Invalid judgment structure");
|
|
202
201
|
}
|
|
203
202
|
// Transform API response to internal judgment format
|
|
204
203
|
// Extract violations from FAIL evaluations
|
|
205
204
|
const violations = apiResponse.evaluations
|
|
206
|
-
.filter(e => e.result ===
|
|
207
|
-
.map(e => ({
|
|
205
|
+
.filter((e) => e.result === "FAIL")
|
|
206
|
+
.map((e) => ({
|
|
208
207
|
message: `${e.criterion}: ${e.explanation}`,
|
|
209
208
|
}));
|
|
210
209
|
// Determine if passed based on violations and score threshold
|
|
@@ -226,7 +225,7 @@ Be strict but fair in your evaluation.`;
|
|
|
226
225
|
*/
|
|
227
226
|
async testJudge(prompt, model) {
|
|
228
227
|
if (!this.apiToken) {
|
|
229
|
-
return
|
|
228
|
+
return "Error: GITHUB_TOKEN not found";
|
|
230
229
|
}
|
|
231
230
|
try {
|
|
232
231
|
const result = await this.callLLMAPI(prompt, model || this.defaultModel);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;
|
|
1
|
+
{"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAO7B,4DAGiC;AACjC,oDAAqD;AAqCrD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;EAiBxB,CAAC;AAEH,MAAa,iBAAiB;IAM5B,YAAY,aAAsB,EAAE,QAAgB,gBAAgB;QALpD,SAAI,GAAG,WAAoB,CAAC;QAM1C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,QAAQ,GAAG,IAAA,2BAAc,GAAE,CAAC,CAAC,qCAAqC;QACvE,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,SAAS,GAAG,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC;QAEvD,iCAAiC;QACjC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;YACtE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,wBAAwB;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,2BAA2B;YAC3B,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,YAAY,GAAsB,EAAE,CAAC;YAE3C,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;gBACjE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,wBAAwB;YACxB,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAC7C,QAAQ,EACR,YAAY,EACZ,SAAS,CAAC,cAAc,CACzB,CAAC;YAEF,kBAAkB;YAClB,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;YAE9D,iCAAiC;YACjC,MAAM,UAAU,GAAgB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACtE,IAAI,EAAE,WAAoB;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,OAAO,EAAE,QAAQ,CAAC,SAAS;aAC5B,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,UAAU;gBACV,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,qBAAqB,KAAK,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,QAAsB,EACtB,YAA+B,EAC/B,YAAqB;QAErB,MAAM,YAAY,GAAG,YAAY;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,OAAO,UAAU,CAAC;aACzD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,kBAAkB,GACtB,YAAY;YACZ;;gCAE0B,QAAQ,CAAC,QAAQ;uCACV,QAAQ,CAAC,QAAQ;;uCAEjB,CAAC;QAEpC,OAAO;UACD,QAAQ,CAAC,WAAW;;;UAGpB,QAAQ,CAAC,MAAM;;;UAGf,YAAY;;;QAGd,kBAAkB,EAAE,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CACtB,MAAc,EACd,KAAa;QAEb,MAAM,MAAM,GAAG,qDAAqD,CAAC;QAErE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,QAAQ,EAAE;aACzC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,iBAAiB,EAAE;oBAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;iBAClC;gBACD,WAAW,EAAE,CAAC;gBACd,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,4BAA4B,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAC3D,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAQ,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QAElD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAmB,CAAC;YAE1D,kCAAkC;YAClC,IACE,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,WAAW,CAAC;gBACvC,WAAW,CAAC,YAAY,IAAI,IAAI;gBAChC,WAAW,CAAC,OAAO,IAAI,IAAI,EAC3B,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAChD,CAAC;YAED,qDAAqD;YACrD,2CAA2C;YAC3C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC;iBAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACX,OAAO,EAAE,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,WAAW,EAAE;aAC5C,CAAC,CAAC,CAAC;YAEN,8DAA8D;YAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,YAAY,IAAI,GAAG,CAAC;YAE1E,MAAM,QAAQ,GAAgB;gBAC5B,MAAM;gBACN,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAC,EAAE,iCAAiC;gBAC5F,SAAS,EAAE,WAAW,CAAC,OAAO;gBAC9B,UAAU;aACX,CAAC;YAEF,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CACb,iCAAiC,KAAK,cAAc,OAAO,EAAE,CAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,KAAc;QAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC,CAAC;YACzE,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,UAAU,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;CACF;AAhOD,8CAgOC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "coding-agent-benchmarks",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Testing coding agents (GitHub Copilot CLI, Claude Code, etc.) with your repo's context to evaluate their code generation quality.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|