coding-agent-benchmarks 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +171 -10
  2. package/dist/adapters/claudeCodeCLI.d.ts +1 -1
  3. package/dist/adapters/claudeCodeCLI.d.ts.map +1 -1
  4. package/dist/adapters/claudeCodeCLI.js +44 -32
  5. package/dist/adapters/claudeCodeCLI.js.map +1 -1
  6. package/dist/adapters/copilotCLI.d.ts.map +1 -1
  7. package/dist/adapters/copilotCLI.js +28 -16
  8. package/dist/adapters/copilotCLI.js.map +1 -1
  9. package/dist/evaluator.d.ts +4 -9
  10. package/dist/evaluator.d.ts.map +1 -1
  11. package/dist/evaluator.js +4 -12
  12. package/dist/evaluator.js.map +1 -1
  13. package/dist/index.d.ts +2 -1
  14. package/dist/index.d.ts.map +1 -1
  15. package/dist/index.js +3 -1
  16. package/dist/index.js.map +1 -1
  17. package/dist/reporter.d.ts.map +1 -1
  18. package/dist/reporter.js +59 -34
  19. package/dist/reporter.js.map +1 -1
  20. package/dist/runner.js +2 -1
  21. package/dist/runner.js.map +1 -1
  22. package/dist/types.d.ts +6 -6
  23. package/dist/types.d.ts.map +1 -1
  24. package/dist/utils/TypedEventEmitter.d.ts +18 -0
  25. package/dist/utils/TypedEventEmitter.d.ts.map +1 -0
  26. package/dist/utils/TypedEventEmitter.js +62 -0
  27. package/dist/utils/TypedEventEmitter.js.map +1 -0
  28. package/dist/utils/baselineManager.d.ts +2 -1
  29. package/dist/utils/baselineManager.d.ts.map +1 -1
  30. package/dist/utils/baselineManager.js +1 -0
  31. package/dist/utils/baselineManager.js.map +1 -1
  32. package/dist/utils/errorHandler.d.ts +10 -0
  33. package/dist/utils/errorHandler.d.ts.map +1 -0
  34. package/dist/utils/errorHandler.js +58 -0
  35. package/dist/utils/errorHandler.js.map +1 -0
  36. package/dist/utils/gitUtils.d.ts +2 -1
  37. package/dist/utils/gitUtils.d.ts.map +1 -1
  38. package/dist/utils/gitUtils.js +85 -18
  39. package/dist/utils/gitUtils.js.map +1 -1
  40. package/dist/validators/llmJudge.d.ts +1 -1
  41. package/dist/validators/llmJudge.d.ts.map +1 -1
  42. package/dist/validators/llmJudge.js +40 -41
  43. package/dist/validators/llmJudge.js.map +1 -1
  44. package/package.json +1 -1
@@ -0,0 +1 @@
1
+ {"version":3,"file":"errorHandler.js","sourceRoot":"","sources":["../../src/utils/errorHandler.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;AAEH,kDAA0B;AAG1B;;GAEG;AACI,MAAM,eAAe,GAAG,CAC7B,KAAc,EACd,OAA8B,EACxB,EAAE;IACR,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IACnC,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,KAAK,IAAI,CAAC;IAEpD,6BAA6B;IAC7B,IAAI,YAAY,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;QAClD,0BAA0B,CAAC,OAAO,EAAE,OAAO,EAAE,aAAa,CAAC,CAAC;QAC5D,OAAO;IACT,CAAC;IAED,gBAAgB;IAChB,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,eAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,YAAY,CAAC,CAAC;IACnD,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACxC,CAAC;AACH,CAAC,CAAC;AAnBW,QAAA,eAAe,mBAmB1B;AAEF;;GAEG;AACH,MAAM,0BAA0B,GAAG,CACjC,eAAwB,EACxB,aAAa,GAAG,IAAI,EACd,EAAE;IACR,MAAM,aAAa,GAAkB,CAAC,SAAS,EAAE,aAAa,CAAC,CAAC;IAEhE,IAAI,aAAa,EAAE,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,eAAK,CAAC,GAAG,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAElB,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CACX,KAAK,eAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,eAAK,CAAC,MAAM,CAAC,eAAe,CAAC,EAAE,CACrE,CAAC;QACJ,CAAC;QAED,OAAO,CAAC,KAAK,CACX,KAAK,eAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAC3F,CAAC;QACF,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAClB,OAAO,CAAC,KAAK,CACX,eAAK,CAAC,IAAI,CACR,uEAAuE,CACxE,CACF,CAAC;QACF,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACpB,CAAC;SAAM,CAAC;QACN,iDAAiD;QACjD,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QAC7C,IAAI,eAAe,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,oBAAoB,eAAe,EAAE,CAAC,CAAC;QACvD,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,oBAAoB,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9D,OAAO,CAAC,KAAK,CACX,mEAAmE,CACpE,CAAC;IACJ,CAAC;AACH,CAAC,CAAC"}
@@ -23,9 +23,10 @@ export declare const getChangedFiles: (workspaceRoot: string) => string[];
23
23
  * Get files that changed between two git status snapshots
24
24
  * @param before Git status output before operation
25
25
  * @param after Git status output after operation
26
+ * @param workspaceRoot The workspace root to resolve directory contents
26
27
  * @returns Array of file paths that were added or modified
27
28
  */
28
- export declare const getChangedFilesDiff: (before: string, after: string) => string[];
29
+ export declare const getChangedFilesDiff: (before: string, after: string, workspaceRoot?: string) => string[];
29
30
  /**
30
31
  * Get the git root directory
31
32
  * @returns Absolute path to git root
@@ -1 +1 @@
1
- {"version":3,"file":"gitUtils.d.ts","sourceRoot":"","sources":["../../src/utils/gitUtils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,GAAI,eAAe,MAAM,KAAG,MAU7D,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,cAAc,GAAI,cAAc,MAAM,KAAG,MAAM,EAqB3D,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,eAAe,GAAI,eAAe,MAAM,KAAG,MAAM,EAG7D,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,mBAAmB,GAAI,QAAQ,MAAM,EAAE,OAAO,MAAM,KAAG,MAAM,EAqBzE,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,UAAU,QAAO,MAS7B,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,eAAe,GAAI,WAAW,MAAM,KAAG,OAWnD,CAAC"}
1
+ {"version":3,"file":"gitUtils.d.ts","sourceRoot":"","sources":["../../src/utils/gitUtils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,GAAI,eAAe,MAAM,KAAG,MAU7D,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,cAAc,GAAI,cAAc,MAAM,KAAG,MAAM,EAwB3D,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,eAAe,GAAI,eAAe,MAAM,KAAG,MAAM,EAG7D,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,mBAAmB,GAC9B,QAAQ,MAAM,EACd,OAAO,MAAM,EACb,gBAAgB,MAAM,KACrB,MAAM,EAyBR,CAAC;AA+BF;;;GAGG;AACH,eAAO,MAAM,UAAU,QAAO,MAS7B,CAAC;AAEF;;;;GAIG;AACH,eAAO,MAAM,eAAe,GAAI,WAAW,MAAM,KAAG,OAWnD,CAAC"}
@@ -2,9 +2,44 @@
2
2
  /**
3
3
  * Git utilities for tracking file changes
4
4
  */
5
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
6
+ if (k2 === undefined) k2 = k;
7
+ var desc = Object.getOwnPropertyDescriptor(m, k);
8
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
9
+ desc = { enumerable: true, get: function() { return m[k]; } };
10
+ }
11
+ Object.defineProperty(o, k2, desc);
12
+ }) : (function(o, m, k, k2) {
13
+ if (k2 === undefined) k2 = k;
14
+ o[k2] = m[k];
15
+ }));
16
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
17
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
18
+ }) : function(o, v) {
19
+ o["default"] = v;
20
+ });
21
+ var __importStar = (this && this.__importStar) || (function () {
22
+ var ownKeys = function(o) {
23
+ ownKeys = Object.getOwnPropertyNames || function (o) {
24
+ var ar = [];
25
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
26
+ return ar;
27
+ };
28
+ return ownKeys(o);
29
+ };
30
+ return function (mod) {
31
+ if (mod && mod.__esModule) return mod;
32
+ var result = {};
33
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
34
+ __setModuleDefault(result, mod);
35
+ return result;
36
+ };
37
+ })();
5
38
  Object.defineProperty(exports, "__esModule", { value: true });
6
39
  exports.isGitRepository = exports.getGitRoot = exports.getChangedFilesDiff = exports.getChangedFiles = exports.parseGitStatus = exports.getGitStatusPorcelain = void 0;
7
40
  const child_process_1 = require("child_process");
41
+ const fs = __importStar(require("fs"));
42
+ const path = __importStar(require("path"));
8
43
  /**
9
44
  * Get git status in porcelain format
10
45
  * @param workspaceRoot The workspace root directory
@@ -12,10 +47,10 @@ const child_process_1 = require("child_process");
12
47
  */
13
48
  const getGitStatusPorcelain = (workspaceRoot) => {
14
49
  try {
15
- return (0, child_process_1.execSync)('git status --porcelain', {
50
+ return (0, child_process_1.execSync)("git status --porcelain", {
16
51
  cwd: workspaceRoot,
17
- encoding: 'utf-8',
18
- stdio: ['pipe', 'pipe', 'pipe'],
52
+ encoding: "utf-8",
53
+ stdio: ["pipe", "pipe", "pipe"],
19
54
  });
20
55
  }
21
56
  catch (error) {
@@ -29,7 +64,10 @@ exports.getGitStatusPorcelain = getGitStatusPorcelain;
29
64
  * @returns Array of file paths that were changed
30
65
  */
31
66
  const parseGitStatus = (statusOutput) => {
32
- const lines = statusOutput.trim().split('\n').filter(line => line.length > 0);
67
+ const lines = statusOutput
68
+ .trim()
69
+ .split("\n")
70
+ .filter((line) => line.length > 0);
33
71
  const files = [];
34
72
  for (const line of lines) {
35
73
  // Git porcelain format: XY filename
@@ -38,8 +76,8 @@ const parseGitStatus = (statusOutput) => {
38
76
  if (line.length > 3) {
39
77
  const filename = line.substring(3).trim();
40
78
  // Handle renamed files (format: "old -> new")
41
- if (filename.includes(' -> ')) {
42
- const newFilename = filename.split(' -> ')[1];
79
+ if (filename.includes(" -> ")) {
80
+ const newFilename = filename.split(" -> ")[1];
43
81
  files.push(newFilename);
44
82
  }
45
83
  else {
@@ -64,11 +102,12 @@ exports.getChangedFiles = getChangedFiles;
64
102
  * Get files that changed between two git status snapshots
65
103
  * @param before Git status output before operation
66
104
  * @param after Git status output after operation
105
+ * @param workspaceRoot The workspace root to resolve directory contents
67
106
  * @returns Array of file paths that were added or modified
68
107
  */
69
- const getChangedFilesDiff = (before, after) => {
70
- const beforeLines = new Set(before.split('\n').filter(Boolean));
71
- const afterLines = after.split('\n').filter(Boolean);
108
+ const getChangedFilesDiff = (before, after, workspaceRoot) => {
109
+ const beforeLines = new Set(before.split("\n").filter(Boolean));
110
+ const afterLines = after.split("\n").filter(Boolean);
72
111
  const newOrModified = [];
73
112
  for (const line of afterLines) {
74
113
  if (!beforeLines.has(line)) {
@@ -76,8 +115,13 @@ const getChangedFilesDiff = (before, after) => {
76
115
  const match = /^.{3}(.+)$/.exec(line);
77
116
  if (match) {
78
117
  const filePath = match[1];
79
- // Skip directories (end with /)
80
- if (!filePath.endsWith('/')) {
118
+ // If it's a directory, list files inside it
119
+ if (filePath.endsWith("/") && workspaceRoot) {
120
+ const dirPath = path.join(workspaceRoot, filePath);
121
+ const filesInDir = listFilesRecursively(dirPath, workspaceRoot);
122
+ newOrModified.push(...filesInDir);
123
+ }
124
+ else if (!filePath.endsWith("/")) {
81
125
  newOrModified.push(filePath);
82
126
  }
83
127
  }
@@ -86,19 +130,42 @@ const getChangedFilesDiff = (before, after) => {
86
130
  return newOrModified;
87
131
  };
88
132
  exports.getChangedFilesDiff = getChangedFilesDiff;
133
+ /**
134
+ * Recursively list all files in a directory
135
+ * @param dirPath Absolute path to directory
136
+ * @param workspaceRoot Workspace root for relative path calculation
137
+ * @returns Array of relative file paths
138
+ */
139
+ const listFilesRecursively = (dirPath, workspaceRoot) => {
140
+ const files = [];
141
+ if (!fs.existsSync(dirPath)) {
142
+ return files;
143
+ }
144
+ const entries = fs.readdirSync(dirPath, { withFileTypes: true });
145
+ for (const entry of entries) {
146
+ const fullPath = path.join(dirPath, entry.name);
147
+ if (entry.isDirectory()) {
148
+ files.push(...listFilesRecursively(fullPath, workspaceRoot));
149
+ }
150
+ else {
151
+ files.push(path.relative(workspaceRoot, fullPath));
152
+ }
153
+ }
154
+ return files;
155
+ };
89
156
  /**
90
157
  * Get the git root directory
91
158
  * @returns Absolute path to git root
92
159
  */
93
160
  const getGitRoot = () => {
94
161
  try {
95
- return (0, child_process_1.execSync)('git rev-parse --show-toplevel', {
96
- encoding: 'utf-8',
97
- stdio: ['pipe', 'pipe', 'pipe'],
162
+ return (0, child_process_1.execSync)("git rev-parse --show-toplevel", {
163
+ encoding: "utf-8",
164
+ stdio: ["pipe", "pipe", "pipe"],
98
165
  }).trim();
99
166
  }
100
167
  catch (error) {
101
- throw new Error('Not inside a git repository');
168
+ throw new Error("Not inside a git repository");
102
169
  }
103
170
  };
104
171
  exports.getGitRoot = getGitRoot;
@@ -109,10 +176,10 @@ exports.getGitRoot = getGitRoot;
109
176
  */
110
177
  const isGitRepository = (directory) => {
111
178
  try {
112
- (0, child_process_1.execSync)('git rev-parse --git-dir', {
179
+ (0, child_process_1.execSync)("git rev-parse --git-dir", {
113
180
  cwd: directory,
114
- encoding: 'utf-8',
115
- stdio: ['pipe', 'pipe', 'ignore'],
181
+ encoding: "utf-8",
182
+ stdio: ["pipe", "pipe", "ignore"],
116
183
  });
117
184
  return true;
118
185
  }
@@ -1 +1 @@
1
- {"version":3,"file":"gitUtils.js","sourceRoot":"","sources":["../../src/utils/gitUtils.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,iDAAyC;AAEzC;;;;GAIG;AACI,MAAM,qBAAqB,GAAG,CAAC,aAAqB,EAAU,EAAE;IACrE,IAAI,CAAC;QACH,OAAO,IAAA,wBAAQ,EAAC,wBAAwB,EAAE;YACxC,GAAG,EAAE,aAAa;YAClB,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;IACxD,CAAC;AACH,CAAC,CAAC;AAVW,QAAA,qBAAqB,yBAUhC;AAEF;;;;GAIG;AACI,MAAM,cAAc,GAAG,CAAC,YAAoB,EAAY,EAAE;IAC/D,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9E,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,oCAAoC;QACpC,4CAA4C;QAC5C,gEAAgE;QAChE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1C,8CAA8C;YAC9C,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC9B,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9C,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC1B,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC,CAAC;AArBW,QAAA,cAAc,kBAqBzB;AAEF;;;;GAIG;AACI,MAAM,eAAe,GAAG,CAAC,aAAqB,EAAY,EAAE;IACjE,MAAM,YAAY,GAAG,IAAA,6BAAqB,EAAC,aAAa,CAAC,CAAC;IAC1D,OAAO,IAAA,sBAAc,EAAC,YAAY,CAAC,CAAC;AACtC,CAAC,CAAC;AAHW,QAAA,eAAe,mBAG1B;AAEF;;;;;GAKG;AACI,MAAM,mBAAmB,GAAG,CAAC,MAAc,EAAE,KAAa,EAAY,EAAE;IAC7E,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;IAChE,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAErD,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3B,oEAAoE;YACpE,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtC,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC1B,gCAAgC;gBAChC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBAC5B,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC/B,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC;AACvB,CAAC,CAAC;AArBW,QAAA,mBAAmB,uBAqB9B;AAEF;;;GAGG;AACI,MAAM,UAAU,GAAG,GAAW,EAAE;IACrC,IAAI,CAAC;QACH,OAAO,IAAA,wBAAQ,EAAC,+BAA+B,EAAE;YAC/C,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC,IAAI,EAAE,CAAC;IACZ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;IACjD,CAAC;AACH,CAAC,CAAC;AATW,QAAA,UAAU,cASrB;AAEF;;;;GAIG;AACI,MAAM,eAAe,GAAG,CAAC,SAAiB,EAAW,EAAE;IAC5D,IAAI,CAAC;QACH,IAAA,wBAAQ,EAAC,yBAAyB,EAAE;YAClC,GAAG,EAAE,SAAS;YACd,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;SAClC,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC,CAAC;AAXW,QAAA,eAAe,mBAW1B"}
1
+ {"version":3,"file":"gitUtils.js","sourceRoot":"","sources":["../../src/utils/gitUtils.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,iDAAyC;AACzC,uCAAyB;AACzB,2CAA6B;AAE7B;;;;GAIG;AACI,MAAM,qBAAqB,GAAG,CAAC,aAAqB,EAAU,EAAE;IACrE,IAAI,CAAC;QACH,OAAO,IAAA,wBAAQ,EAAC,wBAAwB,EAAE;YACxC,GAAG,EAAE,aAAa;YAClB,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;IACxD,CAAC;AACH,CAAC,CAAC;AAVW,QAAA,qBAAqB,yBAUhC;AAEF;;;;GAIG;AACI,MAAM,cAAc,GAAG,CAAC,YAAoB,EAAY,EAAE;IAC/D,MAAM,KAAK,GAAG,YAAY;SACvB,IAAI,EAAE;SACN,KAAK,CAAC,IAAI,CAAC;SACX,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,oCAAoC;QACpC,4CAA4C;QAC5C,gEAAgE;QAChE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1C,8CAA8C;YAC9C,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC9B,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9C,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC1B,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC,CAAC;AAxBW,QAAA,cAAc,kBAwBzB;AAEF;;;;GAIG;AACI,MAAM,eAAe,GAAG,CAAC,aAAqB,EAAY,EAAE;IACjE,MAAM,YAAY,GAAG,IAAA,6BAAqB,EAAC,aAAa,CAAC,CAAC;IAC1D,OAAO,IAAA,sBAAc,EAAC,YAAY,CAAC,CAAC;AACtC,CAAC,CAAC;AAHW,QAAA,eAAe,mBAG1B;AAEF;;;;;;GAMG;AACI,MAAM,mBAAmB,GAAG,CACjC,MAAc,EACd,KAAa,EACb,aAAsB,EACZ,EAAE;IACZ,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;IAChE,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAErD,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3B,oEAAoE;YACpE,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtC,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC1B,4CAA4C;gBAC5C,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,aAAa,EAAE,CAAC;oBAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;oBACnD,MAAM,UAAU,GAAG,oBAAoB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;oBAChE,aAAa,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;gBACpC,CAAC;qBAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC/B,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC;AACvB,CAAC,CAAC;AA7BW,QAAA,mBAAmB,uBA6B9B;AAEF;;;;;GAKG;AACH,MAAM,oBAAoB,GAAG,CAC3B,OAAe,EACf,aAAqB,EACX,EAAE;IACZ,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IACjE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QAChD,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,GAAG,oBAAoB,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC;QAC/D,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC;QACrD,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC,CAAC;AAEF;;;GAGG;AACI,MAAM,UAAU,GAAG,GAAW,EAAE;IACrC,IAAI,CAAC;QACH,OAAO,IAAA,wBAAQ,EAAC,+BAA+B,EAAE;YAC/C,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC,IAAI,EAAE,CAAC;IACZ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;IACjD,CAAC;AACH,CAAC,CAAC;AATW,QAAA,UAAU,cASrB;AAEF;;;;GAIG;AACI,MAAM,eAAe,GAAG,CAAC,SAAiB,EAAW,EAAE;IAC5D,IAAI,CAAC;QACH,IAAA,wBAAQ,EAAC,yBAAyB,EAAE;YAClC,GAAG,EAAE,SAAS;YACd,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC;SAClC,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC,CAAC;AAXW,QAAA,eAAe,mBAW1B"}
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * LLM-as-Judge validator using GitHub Models API
3
3
  */
4
- import { CodeValidator, TestScenario, ValidationResult } from '../types';
4
+ import { CodeValidator, TestScenario, ValidationResult } from "../types";
5
5
  export declare class LLMJudgeValidator implements CodeValidator {
6
6
  readonly type: "llm-judge";
7
7
  private workspaceRoot;
@@ -1 +1 @@
1
- {"version":3,"file":"llmJudge.d.ts","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,gBAAgB,EAAa,MAAM,UAAU,CAAC;AAkDpF,qBAAa,iBAAkB,YAAW,aAAa;IACrD,SAAgB,IAAI,EAAG,WAAW,CAAU;IAC5C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,YAAY,CAAS;gBAEjB,aAAa,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAyB;IAMpE;;OAEG;IACG,QAAQ,CACZ,KAAK,EAAE,SAAS,MAAM,EAAE,EACxB,QAAQ,EAAE,YAAY,GACrB,OAAO,CAAC,gBAAgB,CAAC;IA8E5B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA+B3B;;OAEG;YACW,UAAU;IAqExB;;OAEG;IACG,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAYjE"}
1
+ {"version":3,"file":"llmJudge.d.ts","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EACL,aAAa,EACb,YAAY,EACZ,gBAAgB,EAEjB,MAAM,UAAU,CAAC;AA6DlB,qBAAa,iBAAkB,YAAW,aAAa;IACrD,SAAgB,IAAI,EAAG,WAAW,CAAU;IAC5C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,YAAY,CAAS;gBAEjB,aAAa,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAyB;IAMpE;;OAEG;IACG,QAAQ,CACZ,KAAK,EAAE,SAAS,MAAM,EAAE,EACxB,QAAQ,EAAE,YAAY,GACrB,OAAO,CAAC,gBAAgB,CAAC;IA8E5B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA+B3B;;OAEG;YACW,UAAU;IA4ExB;;OAEG;IACG,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAYjE"}
@@ -60,8 +60,8 @@ Respond ONLY with valid JSON in this exact format:
60
60
  "summary": "one sentence summary"
61
61
  }`;
62
62
  class LLMJudgeValidator {
63
- constructor(workspaceRoot, model = 'openai/gpt-4.1') {
64
- this.type = 'llm-judge';
63
+ constructor(workspaceRoot, model = "openai/gpt-4.1") {
64
+ this.type = "llm-judge";
65
65
  this.workspaceRoot = (0, workspaceUtils_1.resolveWorkspaceRoot)(workspaceRoot);
66
66
  this.apiToken = (0, githubAuth_1.getGitHubToken)(); // Auto-detect from env or GitHub CLI
67
67
  this.defaultModel = model;
@@ -75,20 +75,20 @@ class LLMJudgeValidator {
75
75
  if (!llmConfig?.enabled) {
76
76
  return {
77
77
  passed: true,
78
- score: -1,
78
+ score: -1, // Indicate skipped
79
79
  violations: [],
80
- validatorType: 'llm-judge',
80
+ validatorType: "llm-judge",
81
81
  };
82
82
  }
83
83
  // If no API token, skip
84
84
  if (!this.apiToken) {
85
- console.warn('GITHUB_TOKEN not found, skipping LLM judge validation');
85
+ console.warn("GITHUB_TOKEN not found, skipping LLM judge validation");
86
86
  return {
87
87
  passed: true,
88
- score: -1,
88
+ score: -1, // Indicate skipped
89
89
  violations: [],
90
- validatorType: 'llm-judge',
91
- error: 'GITHUB_TOKEN not found',
90
+ validatorType: "llm-judge",
91
+ error: "GITHUB_TOKEN not found",
92
92
  };
93
93
  }
94
94
  try {
@@ -99,18 +99,18 @@ class LLMJudgeValidator {
99
99
  if (!fs.existsSync(filePath)) {
100
100
  continue;
101
101
  }
102
- const content = fs.readFileSync(filePath, 'utf-8');
102
+ const content = fs.readFileSync(filePath, "utf-8");
103
103
  const relativePath = path.relative(this.workspaceRoot, filePath);
104
104
  fileContents.push({ path: relativePath, content });
105
105
  }
106
106
  // Build judgment prompt
107
107
  const judgmentPrompt = this.buildJudgmentPrompt(scenario, fileContents, llmConfig.judgmentPrompt);
108
- // Call LLM API
108
+ // Calling LLM API
109
109
  const model = llmConfig.model || this.defaultModel;
110
110
  const judgment = await this.callLLMAPI(judgmentPrompt, model);
111
111
  // Convert judgment to violations
112
- const violations = (judgment.violations ?? []).map(v => ({
113
- type: 'llm-judge',
112
+ const violations = (judgment.violations ?? []).map((v) => ({
113
+ type: "llm-judge",
114
114
  message: v.message,
115
115
  file: v.file,
116
116
  line: v.line,
@@ -121,7 +121,7 @@ class LLMJudgeValidator {
121
121
  passed: judgment.passed,
122
122
  score: judgment.score,
123
123
  violations,
124
- validatorType: 'llm-judge',
124
+ validatorType: "llm-judge",
125
125
  };
126
126
  }
127
127
  catch (error) {
@@ -129,7 +129,7 @@ class LLMJudgeValidator {
129
129
  passed: false,
130
130
  score: 0,
131
131
  violations: [],
132
- validatorType: 'llm-judge',
132
+ validatorType: "llm-judge",
133
133
  error: `LLM judge failed: ${error}`,
134
134
  };
135
135
  }
@@ -138,48 +138,47 @@ class LLMJudgeValidator {
138
138
  * Build the judgment prompt for the LLM
139
139
  */
140
140
  buildJudgmentPrompt(scenario, fileContents, customPrompt) {
141
- if (customPrompt) {
142
- return customPrompt;
143
- }
144
141
  const filesSection = fileContents
145
- .map(f => `### ${f.path}\n\`\`\`\n${f.content}\n\`\`\``)
146
- .join('\n\n');
147
- return `# Task Description
148
- ${scenario.description}
149
-
150
- # Original Prompt Given to AI
151
- ${scenario.prompt}
152
-
153
- # Generated Code
154
- ${filesSection}
155
-
156
- # Evaluation Criteria
157
- Evaluate whether the generated code:
142
+ .map((f) => `### ${f.path}\n\`\`\`\n${f.content}\n\`\`\``)
143
+ .join("\n\n");
144
+ const evaluationCriteria = customPrompt ||
145
+ `Evaluate whether the generated code:
158
146
  1. Correctly implements the requirements from the prompt
159
147
  2. Follows best practices for ${scenario.category}
160
148
  3. Meets the quality standards for a ${scenario.severity} severity scenario
161
149
 
162
150
  Be strict but fair in your evaluation.`;
151
+ return `# Task Description
152
+ ${scenario.description}
153
+
154
+ # Original Prompt Given to AI
155
+ ${scenario.prompt}
156
+
157
+ # Generated Code
158
+ ${filesSection}
159
+
160
+ # Evaluation Criteria
161
+ ${evaluationCriteria}`;
163
162
  }
164
163
  /**
165
164
  * Call the GitHub Models API (or other LLM API)
166
165
  */
167
166
  async callLLMAPI(prompt, model) {
168
- const apiUrl = 'https://models.github.ai/inference/chat/completions';
167
+ const apiUrl = "https://models.github.ai/inference/chat/completions";
169
168
  const response = await fetch(apiUrl, {
170
- method: 'POST',
169
+ method: "POST",
171
170
  headers: {
172
- 'Content-Type': 'application/json',
171
+ "Content-Type": "application/json",
173
172
  Authorization: `Bearer ${this.apiToken}`,
174
173
  },
175
174
  body: JSON.stringify({
176
175
  model,
177
176
  messages: [
178
- { role: 'system', content: judgeSystemPrompt },
179
- { role: 'user', content: prompt },
177
+ { role: "system", content: judgeSystemPrompt },
178
+ { role: "user", content: prompt },
180
179
  ],
181
180
  temperature: 0,
182
- response_format: { type: 'json_object' },
181
+ response_format: { type: "json_object" },
183
182
  }),
184
183
  });
185
184
  if (!response.ok) {
@@ -189,7 +188,7 @@ Be strict but fair in your evaluation.`;
189
188
  const data = (await response.json());
190
189
  const content = data.choices[0]?.message?.content;
191
190
  if (!content) {
192
- throw new Error('No content in LLM response');
191
+ throw new Error("No content in LLM response");
193
192
  }
194
193
  // Parse JSON response
195
194
  try {
@@ -198,13 +197,13 @@ Be strict but fair in your evaluation.`;
198
197
  if (!Array.isArray(apiResponse.evaluations) ||
199
198
  apiResponse.overallScore == null ||
200
199
  apiResponse.summary == null) {
201
- throw new Error('Invalid judgment structure');
200
+ throw new Error("Invalid judgment structure");
202
201
  }
203
202
  // Transform API response to internal judgment format
204
203
  // Extract violations from FAIL evaluations
205
204
  const violations = apiResponse.evaluations
206
- .filter(e => e.result === 'FAIL')
207
- .map(e => ({
205
+ .filter((e) => e.result === "FAIL")
206
+ .map((e) => ({
208
207
  message: `${e.criterion}: ${e.explanation}`,
209
208
  }));
210
209
  // Determine if passed based on violations and score threshold
@@ -226,7 +225,7 @@ Be strict but fair in your evaluation.`;
226
225
  */
227
226
  async testJudge(prompt, model) {
228
227
  if (!this.apiToken) {
229
- return 'Error: GITHUB_TOKEN not found';
228
+ return "Error: GITHUB_TOKEN not found";
230
229
  }
231
230
  try {
232
231
  const result = await this.callLLMAPI(prompt, model || this.defaultModel);
@@ -1 +1 @@
1
- {"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAE7B,4DAAiF;AACjF,oDAAqD;AA6BrD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;EAiBxB,CAAC;AAEH,MAAa,iBAAiB;IAM5B,YAAY,aAAsB,EAAE,QAAgB,gBAAgB;QALpD,SAAI,GAAG,WAAoB,CAAC;QAM1C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,QAAQ,GAAG,IAAA,2BAAc,GAAE,CAAC,CAAC,qCAAqC;QACvE,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,SAAS,GAAG,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC;QAEvD,iCAAiC;QACjC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;YACtE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,wBAAwB;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,2BAA2B;YAC3B,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,YAAY,GAA6C,EAAE,CAAC;YAElE,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;gBACjE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,wBAAwB;YACxB,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAC7C,QAAQ,EACR,YAAY,EACZ,SAAS,CAAC,cAAc,CACzB,CAAC;YAEF,eAAe;YACf,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;YAE9D,iCAAiC;YACjC,MAAM,UAAU,GAAgB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACpE,IAAI,EAAE,WAAoB;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,OAAO,EAAE,QAAQ,CAAC,SAAS;aAC5B,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,UAAU;gBACV,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,qBAAqB,KAAK,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,QAAsB,EACtB,YAAsD,EACtD,YAAqB;QAErB,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO,YAAY,CAAC;QACtB,CAAC;QAED,MAAM,YAAY,GAAG,YAAY;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,OAAO,UAAU,CAAC;aACvD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,OAAO;EACT,QAAQ,CAAC,WAAW;;;EAGpB,QAAQ,CAAC,MAAM;;;EAGf,YAAY;;;;;gCAKkB,QAAQ,CAAC,QAAQ;uCACV,QAAQ,CAAC,QAAQ;;uCAEjB,CAAC;IACtC,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,MAAc,EAAE,KAAa;QACpD,MAAM,MAAM,GAAG,qDAAqD,CAAC;QAErE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,QAAQ,EAAE;aACzC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,iBAAiB,EAAE;oBAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;iBAClC;gBACD,WAAW,EAAE,CAAC;gBACd,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAQ,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QAElD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAmB,CAAC;YAE1D,kCAAkC;YAClC,IACE,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,WAAW,CAAC;gBACvC,WAAW,CAAC,YAAY,IAAI,IAAI;gBAChC,WAAW,CAAC,OAAO,IAAI,IAAI,EAC3B,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAChD,CAAC;YAED,qDAAqD;YACrD,2CAA2C;YAC3C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC;iBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACT,OAAO,EAAE,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,WAAW,EAAE;aAC5C,CAAC,CAAC,CAAC;YAEN,8DAA8D;YAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,YAAY,IAAI,GAAG,CAAC;YAE1E,MAAM,QAAQ,GAAgB;gBAC5B,MAAM;gBACN,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAC,EAAE,iCAAiC;gBAC5F,SAAS,EAAE,WAAW,CAAC,OAAO;gBAC9B,UAAU;aACX,CAAC;YAEF,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,iCAAiC,KAAK,cAAc,OAAO,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,KAAc;QAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC,CAAC;YACzE,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,UAAU,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;CACF;AAzND,8CAyNC"}
1
+ {"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAO7B,4DAGiC;AACjC,oDAAqD;AAqCrD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;EAiBxB,CAAC;AAEH,MAAa,iBAAiB;IAM5B,YAAY,aAAsB,EAAE,QAAgB,gBAAgB;QALpD,SAAI,GAAG,WAAoB,CAAC;QAM1C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,QAAQ,GAAG,IAAA,2BAAc,GAAE,CAAC,CAAC,qCAAqC;QACvE,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,SAAS,GAAG,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC;QAEvD,iCAAiC;QACjC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;YACtE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,wBAAwB;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,2BAA2B;YAC3B,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,YAAY,GAAsB,EAAE,CAAC;YAE3C,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;gBACjE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,wBAAwB;YACxB,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAC7C,QAAQ,EACR,YAAY,EACZ,SAAS,CAAC,cAAc,CACzB,CAAC;YAEF,kBAAkB;YAClB,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;YAE9D,iCAAiC;YACjC,MAAM,UAAU,GAAgB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACtE,IAAI,EAAE,WAAoB;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,OAAO,EAAE,QAAQ,CAAC,SAAS;aAC5B,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,UAAU;gBACV,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,qBAAqB,KAAK,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,QAAsB,EACtB,YAA+B,EAC/B,YAAqB;QAErB,MAAM,YAAY,GAAG,YAAY;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,OAAO,UAAU,CAAC;aACzD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,kBAAkB,GACtB,YAAY;YACZ;;gCAE0B,QAAQ,CAAC,QAAQ;uCACV,QAAQ,CAAC,QAAQ;;uCAEjB,CAAC;QAEpC,OAAO;UACD,QAAQ,CAAC,WAAW;;;UAGpB,QAAQ,CAAC,MAAM;;;UAGf,YAAY;;;QAGd,kBAAkB,EAAE,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CACtB,MAAc,EACd,KAAa;QAEb,MAAM,MAAM,GAAG,qDAAqD,CAAC;QAErE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,QAAQ,EAAE;aACzC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,iBAAiB,EAAE;oBAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;iBAClC;gBACD,WAAW,EAAE,CAAC;gBACd,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,4BAA4B,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAC3D,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAQ,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QAElD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAmB,CAAC;YAE1D,kCAAkC;YAClC,IACE,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,WAAW,CAAC;gBACvC,WAAW,CAAC,YAAY,IAAI,IAAI;gBAChC,WAAW,CAAC,OAAO,IAAI,IAAI,EAC3B,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAChD,CAAC;YAED,qDAAqD;YACrD,2CAA2C;YAC3C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC;iBAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACX,OAAO,EAAE,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,WAAW,EAAE;aAC5C,CAAC,CAAC,CAAC;YAEN,8DAA8D;YAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,YAAY,IAAI,GAAG,CAAC;YAE1E,MAAM,QAAQ,GAAgB;gBAC5B,MAAM;gBACN,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAC,EAAE,iCAAiC;gBAC5F,SAAS,EAAE,WAAW,CAAC,OAAO;gBAC9B,UAAU;aACX,CAAC;YAEF,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CACb,iCAAiC,KAAK,cAAc,OAAO,EAAE,CAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,KAAc;QAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC,CAAC;YACzE,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,UAAU,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;CACF;AAhOD,8CAgOC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "coding-agent-benchmarks",
3
- "version": "0.2.1",
3
+ "version": "0.3.1",
4
4
  "description": "Testing coding agents (GitHub Copilot CLI, Claude Code, etc.) with your repo's context to evaluate their code generation quality.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",