coding-agent-benchmarks 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +171 -10
  2. package/dist/User.d.ts +10 -0
  3. package/dist/User.d.ts.map +1 -0
  4. package/dist/User.js +3 -0
  5. package/dist/User.js.map +1 -0
  6. package/dist/adapters/claudeCodeCLI.d.ts +1 -1
  7. package/dist/adapters/claudeCodeCLI.d.ts.map +1 -1
  8. package/dist/adapters/claudeCodeCLI.js +44 -32
  9. package/dist/adapters/claudeCodeCLI.js.map +1 -1
  10. package/dist/adapters/copilotCLI.d.ts.map +1 -1
  11. package/dist/adapters/copilotCLI.js +28 -16
  12. package/dist/adapters/copilotCLI.js.map +1 -1
  13. package/dist/config/defaultScenarios.d.ts +6 -0
  14. package/dist/config/defaultScenarios.d.ts.map +1 -0
  15. package/dist/config/defaultScenarios.js +209 -0
  16. package/dist/config/defaultScenarios.js.map +1 -0
  17. package/dist/config/environment.d.ts +51 -0
  18. package/dist/config/environment.d.ts.map +1 -0
  19. package/dist/config/environment.js +57 -0
  20. package/dist/config/environment.js.map +1 -0
  21. package/dist/config/parser.d.ts +6 -0
  22. package/dist/config/parser.d.ts.map +1 -0
  23. package/dist/config/parser.js +87 -0
  24. package/dist/config/parser.js.map +1 -0
  25. package/dist/evaluator.d.ts +4 -9
  26. package/dist/evaluator.d.ts.map +1 -1
  27. package/dist/evaluator.js +4 -12
  28. package/dist/evaluator.js.map +1 -1
  29. package/dist/examples/file-upload-example.d.ts +2 -0
  30. package/dist/examples/file-upload-example.d.ts.map +1 -0
  31. package/dist/examples/file-upload-example.js +37 -0
  32. package/dist/examples/file-upload-example.js.map +1 -0
  33. package/dist/examples/typedEventEmitterExample.d.ts +5 -0
  34. package/dist/examples/typedEventEmitterExample.d.ts.map +1 -0
  35. package/dist/examples/typedEventEmitterExample.js +44 -0
  36. package/dist/examples/typedEventEmitterExample.js.map +1 -0
  37. package/dist/index.d.ts +2 -1
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +3 -1
  40. package/dist/index.js.map +1 -1
  41. package/dist/registration/database.d.ts +13 -0
  42. package/dist/registration/database.d.ts.map +1 -0
  43. package/dist/registration/database.js +35 -0
  44. package/dist/registration/database.js.map +1 -0
  45. package/dist/registration/email.d.ts +17 -0
  46. package/dist/registration/email.d.ts.map +1 -0
  47. package/dist/registration/email.js +40 -0
  48. package/dist/registration/email.js.map +1 -0
  49. package/dist/registration/emailService.d.ts +13 -0
  50. package/dist/registration/emailService.d.ts.map +1 -0
  51. package/dist/registration/emailService.js +26 -0
  52. package/dist/registration/emailService.js.map +1 -0
  53. package/dist/registration/example.d.ts +2 -0
  54. package/dist/registration/example.d.ts.map +1 -0
  55. package/dist/registration/example.js +37 -0
  56. package/dist/registration/example.js.map +1 -0
  57. package/dist/registration/index.d.ts +7 -0
  58. package/dist/registration/index.d.ts.map +1 -0
  59. package/dist/registration/index.js +15 -0
  60. package/dist/registration/index.js.map +1 -0
  61. package/dist/registration/password.d.ts +3 -0
  62. package/dist/registration/password.d.ts.map +1 -0
  63. package/dist/registration/password.js +27 -0
  64. package/dist/registration/password.js.map +1 -0
  65. package/dist/registration/passwordHasher.d.ts +8 -0
  66. package/dist/registration/passwordHasher.d.ts.map +1 -0
  67. package/dist/registration/passwordHasher.js +39 -0
  68. package/dist/registration/passwordHasher.js.map +1 -0
  69. package/dist/registration/registrationService.d.ts +14 -0
  70. package/dist/registration/registrationService.d.ts.map +1 -0
  71. package/dist/registration/registrationService.js +49 -0
  72. package/dist/registration/registrationService.js.map +1 -0
  73. package/dist/registration/service.d.ts +14 -0
  74. package/dist/registration/service.d.ts.map +1 -0
  75. package/dist/registration/service.js +48 -0
  76. package/dist/registration/service.js.map +1 -0
  77. package/dist/registration/types.d.ts +22 -0
  78. package/dist/registration/types.d.ts.map +1 -0
  79. package/dist/registration/types.js +3 -0
  80. package/dist/registration/types.js.map +1 -0
  81. package/dist/registration/userRepository.d.ts +12 -0
  82. package/dist/registration/userRepository.d.ts.map +1 -0
  83. package/dist/registration/userRepository.js +28 -0
  84. package/dist/registration/userRepository.js.map +1 -0
  85. package/dist/registration/validator.d.ts +7 -0
  86. package/dist/registration/validator.d.ts.map +1 -0
  87. package/dist/registration/validator.js +44 -0
  88. package/dist/registration/validator.js.map +1 -0
  89. package/dist/registration/validators.d.ts +5 -0
  90. package/dist/registration/validators.d.ts.map +1 -0
  91. package/dist/registration/validators.js +60 -0
  92. package/dist/registration/validators.js.map +1 -0
  93. package/dist/reporter.d.ts.map +1 -1
  94. package/dist/reporter.js +59 -34
  95. package/dist/reporter.js.map +1 -1
  96. package/dist/runner.js +2 -1
  97. package/dist/runner.js.map +1 -1
  98. package/dist/sync/cache.d.ts +14 -0
  99. package/dist/sync/cache.d.ts.map +1 -0
  100. package/dist/sync/cache.js +73 -0
  101. package/dist/sync/cache.js.map +1 -0
  102. package/dist/sync/index.d.ts +7 -0
  103. package/dist/sync/index.d.ts.map +1 -0
  104. package/dist/sync/index.js +20 -0
  105. package/dist/sync/index.js.map +1 -0
  106. package/dist/sync/resolver.d.ts +12 -0
  107. package/dist/sync/resolver.d.ts.map +1 -0
  108. package/dist/sync/resolver.js +43 -0
  109. package/dist/sync/resolver.js.map +1 -0
  110. package/dist/sync/service.d.ts +20 -0
  111. package/dist/sync/service.d.ts.map +1 -0
  112. package/dist/sync/service.js +162 -0
  113. package/dist/sync/service.js.map +1 -0
  114. package/dist/sync/source.d.ts +21 -0
  115. package/dist/sync/source.d.ts.map +1 -0
  116. package/dist/sync/source.js +58 -0
  117. package/dist/sync/source.js.map +1 -0
  118. package/dist/sync/types.d.ts +56 -0
  119. package/dist/sync/types.d.ts.map +1 -0
  120. package/dist/sync/types.js +21 -0
  121. package/dist/sync/types.js.map +1 -0
  122. package/dist/types.d.ts +6 -6
  123. package/dist/types.d.ts.map +1 -1
  124. package/dist/utils/baselineManager.d.ts +2 -1
  125. package/dist/utils/baselineManager.d.ts.map +1 -1
  126. package/dist/utils/baselineManager.js +1 -0
  127. package/dist/utils/baselineManager.js.map +1 -1
  128. package/dist/utils/cache.d.ts +11 -0
  129. package/dist/utils/cache.d.ts.map +1 -0
  130. package/dist/utils/cache.js +28 -0
  131. package/dist/utils/cache.js.map +1 -0
  132. package/dist/utils/config-merger.d.ts +11 -0
  133. package/dist/utils/config-merger.d.ts.map +1 -0
  134. package/dist/utils/config-merger.js +36 -0
  135. package/dist/utils/config-merger.js.map +1 -0
  136. package/dist/utils/configParser.d.ts +9 -0
  137. package/dist/utils/configParser.d.ts.map +1 -0
  138. package/dist/utils/configParser.js +51 -0
  139. package/dist/utils/configParser.js.map +1 -0
  140. package/dist/utils/database-providers.d.ts +23 -0
  141. package/dist/utils/database-providers.d.ts.map +1 -0
  142. package/dist/utils/database-providers.js +48 -0
  143. package/dist/utils/database-providers.js.map +1 -0
  144. package/dist/utils/errorHandler.d.ts +10 -0
  145. package/dist/utils/errorHandler.d.ts.map +1 -0
  146. package/dist/utils/errorHandler.js +58 -0
  147. package/dist/utils/errorHandler.js.map +1 -0
  148. package/dist/utils/fetchUserData.d.ts +11 -0
  149. package/dist/utils/fetchUserData.d.ts.map +1 -0
  150. package/dist/utils/fetchUserData.js +31 -0
  151. package/dist/utils/fetchUserData.js.map +1 -0
  152. package/dist/utils/file-upload-handler.d.ts +46 -0
  153. package/dist/utils/file-upload-handler.d.ts.map +1 -0
  154. package/dist/utils/file-upload-handler.js +110 -0
  155. package/dist/utils/file-upload-handler.js.map +1 -0
  156. package/dist/utils/gitUtils.d.ts +2 -1
  157. package/dist/utils/gitUtils.d.ts.map +1 -1
  158. package/dist/utils/gitUtils.js +85 -18
  159. package/dist/utils/gitUtils.js.map +1 -1
  160. package/dist/utils/statistics.d.ts +9 -0
  161. package/dist/utils/statistics.d.ts.map +1 -0
  162. package/dist/utils/statistics.js +26 -0
  163. package/dist/utils/statistics.js.map +1 -0
  164. package/dist/utils/storage-providers.d.ts +24 -0
  165. package/dist/utils/storage-providers.d.ts.map +1 -0
  166. package/dist/utils/storage-providers.js +38 -0
  167. package/dist/utils/storage-providers.js.map +1 -0
  168. package/dist/utils/transformArray.d.ts +18 -0
  169. package/dist/utils/transformArray.d.ts.map +1 -0
  170. package/dist/utils/transformArray.js +21 -0
  171. package/dist/utils/transformArray.js.map +1 -0
  172. package/dist/utils/typedEventEmitter.d.ts +18 -0
  173. package/dist/utils/typedEventEmitter.d.ts.map +1 -0
  174. package/dist/utils/typedEventEmitter.js +62 -0
  175. package/dist/utils/typedEventEmitter.js.map +1 -0
  176. package/dist/utils/userUtils.d.ts +9 -0
  177. package/dist/utils/userUtils.d.ts.map +1 -0
  178. package/dist/utils/userUtils.js +14 -0
  179. package/dist/utils/userUtils.js.map +1 -0
  180. package/dist/validators/llmJudge.d.ts +1 -1
  181. package/dist/validators/llmJudge.d.ts.map +1 -1
  182. package/dist/validators/llmJudge.js +40 -41
  183. package/dist/validators/llmJudge.js.map +1 -1
  184. package/package.json +1 -1
@@ -60,8 +60,8 @@ Respond ONLY with valid JSON in this exact format:
60
60
  "summary": "one sentence summary"
61
61
  }`;
62
62
  class LLMJudgeValidator {
63
- constructor(workspaceRoot, model = 'openai/gpt-4.1') {
64
- this.type = 'llm-judge';
63
+ constructor(workspaceRoot, model = "openai/gpt-4.1") {
64
+ this.type = "llm-judge";
65
65
  this.workspaceRoot = (0, workspaceUtils_1.resolveWorkspaceRoot)(workspaceRoot);
66
66
  this.apiToken = (0, githubAuth_1.getGitHubToken)(); // Auto-detect from env or GitHub CLI
67
67
  this.defaultModel = model;
@@ -75,20 +75,20 @@ class LLMJudgeValidator {
75
75
  if (!llmConfig?.enabled) {
76
76
  return {
77
77
  passed: true,
78
- score: -1,
78
+ score: -1, // Indicate skipped
79
79
  violations: [],
80
- validatorType: 'llm-judge',
80
+ validatorType: "llm-judge",
81
81
  };
82
82
  }
83
83
  // If no API token, skip
84
84
  if (!this.apiToken) {
85
- console.warn('GITHUB_TOKEN not found, skipping LLM judge validation');
85
+ console.warn("GITHUB_TOKEN not found, skipping LLM judge validation");
86
86
  return {
87
87
  passed: true,
88
- score: -1,
88
+ score: -1, // Indicate skipped
89
89
  violations: [],
90
- validatorType: 'llm-judge',
91
- error: 'GITHUB_TOKEN not found',
90
+ validatorType: "llm-judge",
91
+ error: "GITHUB_TOKEN not found",
92
92
  };
93
93
  }
94
94
  try {
@@ -99,18 +99,18 @@ class LLMJudgeValidator {
99
99
  if (!fs.existsSync(filePath)) {
100
100
  continue;
101
101
  }
102
- const content = fs.readFileSync(filePath, 'utf-8');
102
+ const content = fs.readFileSync(filePath, "utf-8");
103
103
  const relativePath = path.relative(this.workspaceRoot, filePath);
104
104
  fileContents.push({ path: relativePath, content });
105
105
  }
106
106
  // Build judgment prompt
107
107
  const judgmentPrompt = this.buildJudgmentPrompt(scenario, fileContents, llmConfig.judgmentPrompt);
108
- // Call LLM API
108
+ // Calling LLM API
109
109
  const model = llmConfig.model || this.defaultModel;
110
110
  const judgment = await this.callLLMAPI(judgmentPrompt, model);
111
111
  // Convert judgment to violations
112
- const violations = (judgment.violations ?? []).map(v => ({
113
- type: 'llm-judge',
112
+ const violations = (judgment.violations ?? []).map((v) => ({
113
+ type: "llm-judge",
114
114
  message: v.message,
115
115
  file: v.file,
116
116
  line: v.line,
@@ -121,7 +121,7 @@ class LLMJudgeValidator {
121
121
  passed: judgment.passed,
122
122
  score: judgment.score,
123
123
  violations,
124
- validatorType: 'llm-judge',
124
+ validatorType: "llm-judge",
125
125
  };
126
126
  }
127
127
  catch (error) {
@@ -129,7 +129,7 @@ class LLMJudgeValidator {
129
129
  passed: false,
130
130
  score: 0,
131
131
  violations: [],
132
- validatorType: 'llm-judge',
132
+ validatorType: "llm-judge",
133
133
  error: `LLM judge failed: ${error}`,
134
134
  };
135
135
  }
@@ -138,48 +138,47 @@ class LLMJudgeValidator {
138
138
  * Build the judgment prompt for the LLM
139
139
  */
140
140
  buildJudgmentPrompt(scenario, fileContents, customPrompt) {
141
- if (customPrompt) {
142
- return customPrompt;
143
- }
144
141
  const filesSection = fileContents
145
- .map(f => `### ${f.path}\n\`\`\`\n${f.content}\n\`\`\``)
146
- .join('\n\n');
147
- return `# Task Description
148
- ${scenario.description}
149
-
150
- # Original Prompt Given to AI
151
- ${scenario.prompt}
152
-
153
- # Generated Code
154
- ${filesSection}
155
-
156
- # Evaluation Criteria
157
- Evaluate whether the generated code:
142
+ .map((f) => `### ${f.path}\n\`\`\`\n${f.content}\n\`\`\``)
143
+ .join("\n\n");
144
+ const evaluationCriteria = customPrompt ||
145
+ `Evaluate whether the generated code:
158
146
  1. Correctly implements the requirements from the prompt
159
147
  2. Follows best practices for ${scenario.category}
160
148
  3. Meets the quality standards for a ${scenario.severity} severity scenario
161
149
 
162
150
  Be strict but fair in your evaluation.`;
151
+ return `# Task Description
152
+ ${scenario.description}
153
+
154
+ # Original Prompt Given to AI
155
+ ${scenario.prompt}
156
+
157
+ # Generated Code
158
+ ${filesSection}
159
+
160
+ # Evaluation Criteria
161
+ ${evaluationCriteria}`;
163
162
  }
164
163
  /**
165
164
  * Call the GitHub Models API (or other LLM API)
166
165
  */
167
166
  async callLLMAPI(prompt, model) {
168
- const apiUrl = 'https://models.github.ai/inference/chat/completions';
167
+ const apiUrl = "https://models.github.ai/inference/chat/completions";
169
168
  const response = await fetch(apiUrl, {
170
- method: 'POST',
169
+ method: "POST",
171
170
  headers: {
172
- 'Content-Type': 'application/json',
171
+ "Content-Type": "application/json",
173
172
  Authorization: `Bearer ${this.apiToken}`,
174
173
  },
175
174
  body: JSON.stringify({
176
175
  model,
177
176
  messages: [
178
- { role: 'system', content: judgeSystemPrompt },
179
- { role: 'user', content: prompt },
177
+ { role: "system", content: judgeSystemPrompt },
178
+ { role: "user", content: prompt },
180
179
  ],
181
180
  temperature: 0,
182
- response_format: { type: 'json_object' },
181
+ response_format: { type: "json_object" },
183
182
  }),
184
183
  });
185
184
  if (!response.ok) {
@@ -189,7 +188,7 @@ Be strict but fair in your evaluation.`;
189
188
  const data = (await response.json());
190
189
  const content = data.choices[0]?.message?.content;
191
190
  if (!content) {
192
- throw new Error('No content in LLM response');
191
+ throw new Error("No content in LLM response");
193
192
  }
194
193
  // Parse JSON response
195
194
  try {
@@ -198,13 +197,13 @@ Be strict but fair in your evaluation.`;
198
197
  if (!Array.isArray(apiResponse.evaluations) ||
199
198
  apiResponse.overallScore == null ||
200
199
  apiResponse.summary == null) {
201
- throw new Error('Invalid judgment structure');
200
+ throw new Error("Invalid judgment structure");
202
201
  }
203
202
  // Transform API response to internal judgment format
204
203
  // Extract violations from FAIL evaluations
205
204
  const violations = apiResponse.evaluations
206
- .filter(e => e.result === 'FAIL')
207
- .map(e => ({
205
+ .filter((e) => e.result === "FAIL")
206
+ .map((e) => ({
208
207
  message: `${e.criterion}: ${e.explanation}`,
209
208
  }));
210
209
  // Determine if passed based on violations and score threshold
@@ -226,7 +225,7 @@ Be strict but fair in your evaluation.`;
226
225
  */
227
226
  async testJudge(prompt, model) {
228
227
  if (!this.apiToken) {
229
- return 'Error: GITHUB_TOKEN not found';
228
+ return "Error: GITHUB_TOKEN not found";
230
229
  }
231
230
  try {
232
231
  const result = await this.callLLMAPI(prompt, model || this.defaultModel);
@@ -1 +1 @@
1
- {"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAE7B,4DAAiF;AACjF,oDAAqD;AA6BrD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;EAiBxB,CAAC;AAEH,MAAa,iBAAiB;IAM5B,YAAY,aAAsB,EAAE,QAAgB,gBAAgB;QALpD,SAAI,GAAG,WAAoB,CAAC;QAM1C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,QAAQ,GAAG,IAAA,2BAAc,GAAE,CAAC,CAAC,qCAAqC;QACvE,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,SAAS,GAAG,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC;QAEvD,iCAAiC;QACjC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;YACtE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,wBAAwB;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,2BAA2B;YAC3B,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,YAAY,GAA6C,EAAE,CAAC;YAElE,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;gBACjE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,wBAAwB;YACxB,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAC7C,QAAQ,EACR,YAAY,EACZ,SAAS,CAAC,cAAc,CACzB,CAAC;YAEF,eAAe;YACf,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;YAE9D,iCAAiC;YACjC,MAAM,UAAU,GAAgB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACpE,IAAI,EAAE,WAAoB;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,OAAO,EAAE,QAAQ,CAAC,SAAS;aAC5B,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,UAAU;gBACV,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,qBAAqB,KAAK,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,QAAsB,EACtB,YAAsD,EACtD,YAAqB;QAErB,IAAI,YAAY,EAAE,CAAC;YACjB,OAAO,YAAY,CAAC;QACtB,CAAC;QAED,MAAM,YAAY,GAAG,YAAY;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,OAAO,UAAU,CAAC;aACvD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,OAAO;EACT,QAAQ,CAAC,WAAW;;;EAGpB,QAAQ,CAAC,MAAM;;;EAGf,YAAY;;;;;gCAKkB,QAAQ,CAAC,QAAQ;uCACV,QAAQ,CAAC,QAAQ;;uCAEjB,CAAC;IACtC,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,MAAc,EAAE,KAAa;QACpD,MAAM,MAAM,GAAG,qDAAqD,CAAC;QAErE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,QAAQ,EAAE;aACzC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,iBAAiB,EAAE;oBAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;iBAClC;gBACD,WAAW,EAAE,CAAC;gBACd,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAQ,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QAElD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAmB,CAAC;YAE1D,kCAAkC;YAClC,IACE,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,WAAW,CAAC;gBACvC,WAAW,CAAC,YAAY,IAAI,IAAI;gBAChC,WAAW,CAAC,OAAO,IAAI,IAAI,EAC3B,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAChD,CAAC;YAED,qDAAqD;YACrD,2CAA2C;YAC3C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC;iBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACT,OAAO,EAAE,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,WAAW,EAAE;aAC5C,CAAC,CAAC,CAAC;YAEN,8DAA8D;YAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,YAAY,IAAI,GAAG,CAAC;YAE1E,MAAM,QAAQ,GAAgB;gBAC5B,MAAM;gBACN,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAC,EAAE,iCAAiC;gBAC5F,SAAS,EAAE,WAAW,CAAC,OAAO;gBAC9B,UAAU;aACX,CAAC;YAEF,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,iCAAiC,KAAK,cAAc,OAAO,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,KAAc;QAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC,CAAC;YACzE,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,UAAU,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;CACF;AAzND,8CAyNC"}
1
+ {"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAO7B,4DAGiC;AACjC,oDAAqD;AAqCrD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;EAiBxB,CAAC;AAEH,MAAa,iBAAiB;IAM5B,YAAY,aAAsB,EAAE,QAAgB,gBAAgB;QALpD,SAAI,GAAG,WAAoB,CAAC;QAM1C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,QAAQ,GAAG,IAAA,2BAAc,GAAE,CAAC,CAAC,qCAAqC;QACvE,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,SAAS,GAAG,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC;QAEvD,iCAAiC;QACjC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;YACtE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,wBAAwB;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,2BAA2B;YAC3B,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,YAAY,GAAsB,EAAE,CAAC;YAE3C,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;gBACjE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,wBAAwB;YACxB,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAC7C,QAAQ,EACR,YAAY,EACZ,SAAS,CAAC,cAAc,CACzB,CAAC;YAEF,kBAAkB;YAClB,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;YAE9D,iCAAiC;YACjC,MAAM,UAAU,GAAgB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACtE,IAAI,EAAE,WAAoB;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,OAAO,EAAE,QAAQ,CAAC,SAAS;aAC5B,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,UAAU;gBACV,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,qBAAqB,KAAK,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,QAAsB,EACtB,YAA+B,EAC/B,YAAqB;QAErB,MAAM,YAAY,GAAG,YAAY;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,OAAO,UAAU,CAAC;aACzD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,kBAAkB,GACtB,YAAY;YACZ;;gCAE0B,QAAQ,CAAC,QAAQ;uCACV,QAAQ,CAAC,QAAQ;;uCAEjB,CAAC;QAEpC,OAAO;UACD,QAAQ,CAAC,WAAW;;;UAGpB,QAAQ,CAAC,MAAM;;;UAGf,YAAY;;;QAGd,kBAAkB,EAAE,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CACtB,MAAc,EACd,KAAa;QAEb,MAAM,MAAM,GAAG,qDAAqD,CAAC;QAErE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,QAAQ,EAAE;aACzC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,iBAAiB,EAAE;oBAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;iBAClC;gBACD,WAAW,EAAE,CAAC;gBACd,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,4BAA4B,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAC3D,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAQ,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QAElD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAmB,CAAC;YAE1D,kCAAkC;YAClC,IACE,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,WAAW,CAAC;gBACvC,WAAW,CAAC,YAAY,IAAI,IAAI;gBAChC,WAAW,CAAC,OAAO,IAAI,IAAI,EAC3B,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAChD,CAAC;YAED,qDAAqD;YACrD,2CAA2C;YAC3C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC;iBAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACX,OAAO,EAAE,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,WAAW,EAAE;aAC5C,CAAC,CAAC,CAAC;YAEN,8DAA8D;YAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,YAAY,IAAI,GAAG,CAAC;YAE1E,MAAM,QAAQ,GAAgB;gBAC5B,MAAM;gBACN,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAC,EAAE,iCAAiC;gBAC5F,SAAS,EAAE,WAAW,CAAC,OAAO;gBAC9B,UAAU;aACX,CAAC;YAEF,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CACb,iCAAiC,KAAK,cAAc,OAAO,EAAE,CAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,KAAc;QAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC,CAAC;YACzE,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,UAAU,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;CACF;AAhOD,8CAgOC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "coding-agent-benchmarks",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "Testing coding agents (GitHub Copilot CLI, Claude Code, etc.) with your repo's context to evaluate their code generation quality.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",