coding-agent-benchmarks 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/evaluator.d.ts +1 -0
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +1 -1
- package/dist/evaluator.js.map +1 -1
- package/dist/lcs.d.ts +33 -0
- package/dist/lcs.d.ts.map +1 -0
- package/dist/lcs.js +147 -0
- package/dist/lcs.js.map +1 -0
- package/dist/longest-common-subsequence.d.ts +45 -0
- package/dist/longest-common-subsequence.d.ts.map +1 -0
- package/dist/longest-common-subsequence.js +162 -0
- package/dist/longest-common-subsequence.js.map +1 -0
- package/dist/longestCommonSubsequence.d.ts +39 -0
- package/dist/longestCommonSubsequence.d.ts.map +1 -0
- package/dist/longestCommonSubsequence.js +166 -0
- package/dist/longestCommonSubsequence.js.map +1 -0
- package/dist/runner.js +8 -4
- package/dist/runner.js.map +1 -1
- package/dist/types.d.ts +5 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/user-registration/example.d.ts +2 -0
- package/dist/user-registration/example.d.ts.map +1 -0
- package/dist/user-registration/example.js +41 -0
- package/dist/user-registration/example.js.map +1 -0
- package/dist/user-registration/index.d.ts +9 -0
- package/dist/user-registration/index.d.ts.map +1 -0
- package/dist/user-registration/index.js +22 -0
- package/dist/user-registration/index.js.map +1 -0
- package/dist/user-registration/services/email-service.d.ts +13 -0
- package/dist/user-registration/services/email-service.d.ts.map +1 -0
- package/dist/user-registration/services/email-service.js +34 -0
- package/dist/user-registration/services/email-service.js.map +1 -0
- package/dist/user-registration/services/password-service.d.ts +5 -0
- package/dist/user-registration/services/password-service.d.ts.map +1 -0
- package/dist/user-registration/services/password-service.js +23 -0
- package/dist/user-registration/services/password-service.js.map +1 -0
- package/dist/user-registration/services/registration-service.d.ts +19 -0
- package/dist/user-registration/services/registration-service.d.ts.map +1 -0
- package/dist/user-registration/services/registration-service.js +60 -0
- package/dist/user-registration/services/registration-service.js.map +1 -0
- package/dist/user-registration/services/user-repository.d.ts +12 -0
- package/dist/user-registration/services/user-repository.d.ts.map +1 -0
- package/dist/user-registration/services/user-repository.js +24 -0
- package/dist/user-registration/services/user-repository.js.map +1 -0
- package/dist/user-registration/types.d.ts +26 -0
- package/dist/user-registration/types.d.ts.map +1 -0
- package/dist/user-registration/types.js +18 -0
- package/dist/user-registration/types.js.map +1 -0
- package/dist/user-registration/validators/email-validator.d.ts +3 -0
- package/dist/user-registration/validators/email-validator.d.ts.map +1 -0
- package/dist/user-registration/validators/email-validator.js +22 -0
- package/dist/user-registration/validators/email-validator.js.map +1 -0
- package/dist/user-registration/validators/password-validator.d.ts +3 -0
- package/dist/user-registration/validators/password-validator.d.ts.map +1 -0
- package/dist/user-registration/validators/password-validator.js +36 -0
- package/dist/user-registration/validators/password-validator.js.map +1 -0
- package/dist/user-registration/validators/required-fields-validator.d.ts +3 -0
- package/dist/user-registration/validators/required-fields-validator.d.ts.map +1 -0
- package/dist/user-registration/validators/required-fields-validator.js +17 -0
- package/dist/user-registration/validators/required-fields-validator.js.map +1 -0
- package/dist/utils/dataFetcher.d.ts +21 -0
- package/dist/utils/dataFetcher.d.ts.map +1 -0
- package/dist/utils/dataFetcher.js +71 -0
- package/dist/utils/dataFetcher.js.map +1 -0
- package/dist/utils/imageProcessor.d.ts +16 -0
- package/dist/utils/imageProcessor.d.ts.map +1 -0
- package/dist/utils/imageProcessor.js +121 -0
- package/dist/utils/imageProcessor.js.map +1 -0
- package/dist/utils/lcs.d.ts +42 -0
- package/dist/utils/lcs.d.ts.map +1 -0
- package/dist/utils/lcs.js +135 -0
- package/dist/utils/lcs.js.map +1 -0
- package/dist/utils/notifications.d.ts +2 -0
- package/dist/utils/notifications.d.ts.map +1 -0
- package/dist/utils/notifications.js +8 -0
- package/dist/utils/notifications.js.map +1 -0
- package/dist/utils/timeUtils.d.ts +16 -1
- package/dist/utils/timeUtils.d.ts.map +1 -1
- package/dist/utils/timeUtils.js +54 -17
- package/dist/utils/timeUtils.js.map +1 -1
- package/dist/utils/userReport.d.ts +10 -0
- package/dist/utils/userReport.d.ts.map +1 -0
- package/dist/utils/userReport.js +19 -0
- package/dist/utils/userReport.js.map +1 -0
- package/dist/validators/llmJudge.d.ts.map +1 -1
- package/dist/validators/llmJudge.js +10 -6
- package/dist/validators/llmJudge.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Time difference units
|
|
3
|
+
*/
|
|
1
4
|
export interface TimeDifference {
|
|
2
5
|
milliseconds: number;
|
|
3
6
|
seconds: number;
|
|
@@ -5,5 +8,17 @@ export interface TimeDifference {
|
|
|
5
8
|
hours: number;
|
|
6
9
|
days: number;
|
|
7
10
|
}
|
|
8
|
-
|
|
11
|
+
/**
|
|
12
|
+
* Calculate the absolute time difference between two dates
|
|
13
|
+
*/
|
|
14
|
+
export declare const getTimeDifference: (date1: Date, date2: Date) => TimeDifference;
|
|
15
|
+
/**
|
|
16
|
+
* Calculate the signed time difference between two dates (date2 - date1)
|
|
17
|
+
* Positive if date2 is after date1, negative if before
|
|
18
|
+
*/
|
|
19
|
+
export declare const getSignedTimeDifference: (date1: Date, date2: Date) => TimeDifference;
|
|
20
|
+
/**
|
|
21
|
+
* Format time difference as human-readable string
|
|
22
|
+
*/
|
|
23
|
+
export declare const formatTimeDifference: (diff: TimeDifference) => string;
|
|
9
24
|
//# sourceMappingURL=timeUtils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"timeUtils.d.ts","sourceRoot":"","sources":["../../src/utils/timeUtils.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,cAAc;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;
|
|
1
|
+
{"version":3,"file":"timeUtils.d.ts","sourceRoot":"","sources":["../../src/utils/timeUtils.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAoBD;;GAEG;AACH,eAAO,MAAM,iBAAiB,GAC5B,OAAO,IAAI,EACX,OAAO,IAAI,KACV,cAGF,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,uBAAuB,GAClC,OAAO,IAAI,EACX,OAAO,IAAI,KACV,cAGF,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,oBAAoB,GAAI,MAAM,cAAc,KAAG,MAmB3D,CAAC"}
|
package/dist/utils/timeUtils.js
CHANGED
|
@@ -1,23 +1,60 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const
|
|
10
|
-
const
|
|
11
|
-
const
|
|
12
|
-
const calculateTimeDifference = (startDate, endDate) => {
|
|
13
|
-
const milliseconds = Math.abs(endDate.getTime() - startDate.getTime());
|
|
3
|
+
exports.formatTimeDifference = exports.getSignedTimeDifference = exports.getTimeDifference = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Convert milliseconds to other time units
|
|
6
|
+
*/
|
|
7
|
+
const convertMilliseconds = (ms) => {
|
|
8
|
+
const seconds = ms / 1000;
|
|
9
|
+
const minutes = seconds / 60;
|
|
10
|
+
const hours = minutes / 60;
|
|
11
|
+
const days = hours / 24;
|
|
14
12
|
return {
|
|
15
|
-
milliseconds,
|
|
16
|
-
seconds
|
|
17
|
-
minutes
|
|
18
|
-
hours
|
|
19
|
-
days
|
|
13
|
+
milliseconds: ms,
|
|
14
|
+
seconds,
|
|
15
|
+
minutes,
|
|
16
|
+
hours,
|
|
17
|
+
days,
|
|
20
18
|
};
|
|
21
19
|
};
|
|
22
|
-
|
|
20
|
+
/**
|
|
21
|
+
* Calculate the absolute time difference between two dates
|
|
22
|
+
*/
|
|
23
|
+
const getTimeDifference = (date1, date2) => {
|
|
24
|
+
const ms = Math.abs(date1.getTime() - date2.getTime());
|
|
25
|
+
return convertMilliseconds(ms);
|
|
26
|
+
};
|
|
27
|
+
exports.getTimeDifference = getTimeDifference;
|
|
28
|
+
/**
|
|
29
|
+
* Calculate the signed time difference between two dates (date2 - date1)
|
|
30
|
+
* Positive if date2 is after date1, negative if before
|
|
31
|
+
*/
|
|
32
|
+
const getSignedTimeDifference = (date1, date2) => {
|
|
33
|
+
const ms = date2.getTime() - date1.getTime();
|
|
34
|
+
return convertMilliseconds(ms);
|
|
35
|
+
};
|
|
36
|
+
exports.getSignedTimeDifference = getSignedTimeDifference;
|
|
37
|
+
/**
|
|
38
|
+
* Format time difference as human-readable string
|
|
39
|
+
*/
|
|
40
|
+
const formatTimeDifference = (diff) => {
|
|
41
|
+
const absDays = Math.abs(diff.days);
|
|
42
|
+
const absHours = Math.abs(diff.hours);
|
|
43
|
+
const absMinutes = Math.abs(diff.minutes);
|
|
44
|
+
const absSeconds = Math.abs(diff.seconds);
|
|
45
|
+
if (absDays >= 1) {
|
|
46
|
+
return `${diff.days.toFixed(2)} days`;
|
|
47
|
+
}
|
|
48
|
+
if (absHours >= 1) {
|
|
49
|
+
return `${diff.hours.toFixed(2)} hours`;
|
|
50
|
+
}
|
|
51
|
+
if (absMinutes >= 1) {
|
|
52
|
+
return `${diff.minutes.toFixed(2)} minutes`;
|
|
53
|
+
}
|
|
54
|
+
if (absSeconds >= 1) {
|
|
55
|
+
return `${diff.seconds.toFixed(2)} seconds`;
|
|
56
|
+
}
|
|
57
|
+
return `${diff.milliseconds} milliseconds`;
|
|
58
|
+
};
|
|
59
|
+
exports.formatTimeDifference = formatTimeDifference;
|
|
23
60
|
//# sourceMappingURL=timeUtils.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"timeUtils.js","sourceRoot":"","sources":["../../src/utils/timeUtils.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"timeUtils.js","sourceRoot":"","sources":["../../src/utils/timeUtils.ts"],"names":[],"mappings":";;;AAWA;;GAEG;AACH,MAAM,mBAAmB,GAAG,CAAC,EAAU,EAAkB,EAAE;IACzD,MAAM,OAAO,GAAG,EAAE,GAAG,IAAI,CAAC;IAC1B,MAAM,OAAO,GAAG,OAAO,GAAG,EAAE,CAAC;IAC7B,MAAM,KAAK,GAAG,OAAO,GAAG,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;IAExB,OAAO;QACL,YAAY,EAAE,EAAE;QAChB,OAAO;QACP,OAAO;QACP,KAAK;QACL,IAAI;KACL,CAAC;AACJ,CAAC,CAAC;AAEF;;GAEG;AACI,MAAM,iBAAiB,GAAG,CAC/B,KAAW,EACX,KAAW,EACK,EAAE;IAClB,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;IACvD,OAAO,mBAAmB,CAAC,EAAE,CAAC,CAAC;AACjC,CAAC,CAAC;AANW,QAAA,iBAAiB,qBAM5B;AAEF;;;GAGG;AACI,MAAM,uBAAuB,GAAG,CACrC,KAAW,EACX,KAAW,EACK,EAAE;IAClB,MAAM,EAAE,GAAG,KAAK,CAAC,OAAO,EAAE,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC;IAC7C,OAAO,mBAAmB,CAAC,EAAE,CAAC,CAAC;AACjC,CAAC,CAAC;AANW,QAAA,uBAAuB,2BAMlC;AAEF;;GAEG;AACI,MAAM,oBAAoB,GAAG,CAAC,IAAoB,EAAU,EAAE;IACnE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAE1C,IAAI,OAAO,IAAI,CAAC,EAAE,CAAC;QACjB,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;IACxC,CAAC;IACD,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;QAClB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC;IAC1C,CAAC;IACD,IAAI,UAAU,IAAI,CAAC,EAAE,CAAC;QACpB,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC;IAC9C,CAAC;IACD,IAAI,UAAU,IAAI,CAAC,EAAE,CAAC;QACpB,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC;IAC9C,CAAC;IACD,OAAO,GAAG,IAAI,CAAC,YAAY,eAAe,CAAC;AAC7C,CAAC,CAAC;AAnBW,QAAA,oBAAoB,wBAmB/B"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* User report printing utilities
|
|
3
|
+
*/
|
|
4
|
+
export interface UserWithStatus {
|
|
5
|
+
name: string;
|
|
6
|
+
email: string;
|
|
7
|
+
accountStatus: string;
|
|
8
|
+
}
|
|
9
|
+
export declare const printUserReport: (users: readonly UserWithStatus[]) => void;
|
|
10
|
+
//# sourceMappingURL=userReport.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"userReport.d.ts","sourceRoot":"","sources":["../../src/utils/userReport.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,eAAO,MAAM,eAAe,GAAI,OAAO,SAAS,cAAc,EAAE,KAAG,IAYlE,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* User report printing utilities
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.printUserReport = void 0;
|
|
7
|
+
const printUserReport = (users) => {
|
|
8
|
+
console.log('\n=== User Report ===\n');
|
|
9
|
+
users.forEach((user, index) => {
|
|
10
|
+
console.log(`User #${index + 1}:`);
|
|
11
|
+
console.log(` Name: ${user.name}`);
|
|
12
|
+
console.log(` Email: ${user.email}`);
|
|
13
|
+
console.log(` Account Status: ${user.accountStatus}`);
|
|
14
|
+
console.log('');
|
|
15
|
+
});
|
|
16
|
+
console.log(`Total Users: ${users.length}\n`);
|
|
17
|
+
};
|
|
18
|
+
exports.printUserReport = printUserReport;
|
|
19
|
+
//# sourceMappingURL=userReport.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"userReport.js","sourceRoot":"","sources":["../../src/utils/userReport.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAQI,MAAM,eAAe,GAAG,CAAC,KAAgC,EAAQ,EAAE;IACxE,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;IAEvC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5B,OAAO,CAAC,GAAG,CAAC,SAAS,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;QAC/C,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;QACvD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,gBAAgB,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC;AAChD,CAAC,CAAC;AAZW,QAAA,eAAe,mBAY1B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llmJudge.d.ts","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EACL,aAAa,EACb,YAAY,EACZ,gBAAgB,EAEjB,MAAM,UAAU,CAAC;AA6DlB,qBAAa,iBAAkB,YAAW,aAAa;IACrD,SAAgB,IAAI,EAAG,WAAW,CAAU;IAC5C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,YAAY,CAAS;gBAEjB,aAAa,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAuB;IAMlE;;OAEG;IACG,QAAQ,CACZ,KAAK,EAAE,SAAS,MAAM,EAAE,EACxB,QAAQ,EAAE,YAAY,GACrB,OAAO,CAAC,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"llmJudge.d.ts","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EACL,aAAa,EACb,YAAY,EACZ,gBAAgB,EAEjB,MAAM,UAAU,CAAC;AA6DlB,qBAAa,iBAAkB,YAAW,aAAa;IACrD,SAAgB,IAAI,EAAG,WAAW,CAAU;IAC5C,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,YAAY,CAAS;gBAEjB,aAAa,CAAC,EAAE,MAAM,EAAE,KAAK,GAAE,MAAuB;IAMlE;;OAEG;IACG,QAAQ,CACZ,KAAK,EAAE,SAAS,MAAM,EAAE,EACxB,QAAQ,EAAE,YAAY,GACrB,OAAO,CAAC,gBAAgB,CAAC;IA4E5B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA+B3B;;OAEG;YACW,UAAU;IAsFxB;;OAEG;IACG,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAYjE"}
|
|
@@ -103,9 +103,7 @@ class LLMJudgeValidator {
|
|
|
103
103
|
const relativePath = path.relative(this.workspaceRoot, filePath);
|
|
104
104
|
fileContents.push({ path: relativePath, content });
|
|
105
105
|
}
|
|
106
|
-
// Build judgment prompt
|
|
107
106
|
const judgmentPrompt = this.buildJudgmentPrompt(scenario, fileContents, llmConfig.judgmentPrompt);
|
|
108
|
-
// Calling LLM API
|
|
109
107
|
const model = llmConfig.model || this.defaultModel;
|
|
110
108
|
const judgment = await this.callLLMAPI(judgmentPrompt, model);
|
|
111
109
|
// Convert judgment to violations
|
|
@@ -126,8 +124,8 @@ class LLMJudgeValidator {
|
|
|
126
124
|
}
|
|
127
125
|
catch (error) {
|
|
128
126
|
return {
|
|
129
|
-
passed:
|
|
130
|
-
score:
|
|
127
|
+
passed: true,
|
|
128
|
+
score: -1,
|
|
131
129
|
violations: [],
|
|
132
130
|
validatorType: "llm-judge",
|
|
133
131
|
error: `LLM judge failed: ${error}`,
|
|
@@ -177,10 +175,15 @@ Be strict but fair in your evaluation.`;
|
|
|
177
175
|
{ role: "system", content: judgeSystemPrompt },
|
|
178
176
|
{ role: "user", content: prompt },
|
|
179
177
|
],
|
|
180
|
-
max_completion_tokens: 1000,
|
|
181
178
|
response_format: { type: "json_object" },
|
|
182
179
|
}),
|
|
183
180
|
});
|
|
181
|
+
if (response.status === 401) {
|
|
182
|
+
throw new Error("Unauthorized: Invalid GITHUB_TOKEN");
|
|
183
|
+
}
|
|
184
|
+
if (response.status === 429) {
|
|
185
|
+
throw new Error("Rate limit exceeded: Too many requests to LLM API");
|
|
186
|
+
}
|
|
184
187
|
if (!response.ok) {
|
|
185
188
|
const errorText = await response.text();
|
|
186
189
|
throw new Error(`GitHub Models API error: ${response.status} ${errorText}`);
|
|
@@ -188,7 +191,8 @@ Be strict but fair in your evaluation.`;
|
|
|
188
191
|
const data = (await response.json());
|
|
189
192
|
const content = data.choices[0]?.message?.content;
|
|
190
193
|
if (!content) {
|
|
191
|
-
|
|
194
|
+
const finishReason = data.choices[0]?.finish_reason ?? "unknown";
|
|
195
|
+
throw new Error(`No content in LLM response (finish_reason: ${finishReason})`);
|
|
192
196
|
}
|
|
193
197
|
// Parse JSON response
|
|
194
198
|
try {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAO7B,4DAGiC;AACjC,oDAAqD;AAqCrD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;EAiBxB,CAAC;AAEH,MAAa,iBAAiB;IAM5B,YAAY,aAAsB,EAAE,QAAgB,cAAc;QALlD,SAAI,GAAG,WAAoB,CAAC;QAM1C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,QAAQ,GAAG,IAAA,2BAAc,GAAE,CAAC;QACjC,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,SAAS,GAAG,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC;QAEvD,iCAAiC;QACjC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;YACtE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,wBAAwB;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,2BAA2B;YAC3B,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,YAAY,GAAsB,EAAE,CAAC;YAE3C,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;gBACjE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,
|
|
1
|
+
{"version":3,"file":"llmJudge.js","sourceRoot":"","sources":["../../src/validators/llmJudge.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAO7B,4DAGiC;AACjC,oDAAqD;AAqCrD,MAAM,iBAAiB,GAAG;;;;;;;;;;;;;;;;;EAiBxB,CAAC;AAEH,MAAa,iBAAiB;IAM5B,YAAY,aAAsB,EAAE,QAAgB,cAAc;QALlD,SAAI,GAAG,WAAoB,CAAC;QAM1C,IAAI,CAAC,aAAa,GAAG,IAAA,qCAAoB,EAAC,aAAa,CAAC,CAAC;QACzD,IAAI,CAAC,QAAQ,GAAG,IAAA,2BAAc,GAAE,CAAC;QACjC,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,KAAwB,EACxB,QAAsB;QAEtB,MAAM,SAAS,GAAG,QAAQ,CAAC,kBAAkB,CAAC,QAAQ,CAAC;QAEvD,iCAAiC;QACjC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,CAAC;YACxB,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAED,wBAAwB;QACxB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;YACtE,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,EAAE,mBAAmB;gBAC9B,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,wBAAwB;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,2BAA2B;YAC3B,MAAM,aAAa,GAAG,IAAA,iCAAgB,EAAC,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAClE,MAAM,YAAY,GAAsB,EAAE,CAAC;YAE3C,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC7B,SAAS;gBACX,CAAC;gBAED,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;gBACjE,YAAY,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,MAAM,cAAc,GAAG,IAAI,CAAC,mBAAmB,CAC7C,QAAQ,EACR,YAAY,EACZ,SAAS,CAAC,cAAc,CACzB,CAAC;YAEF,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;YACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;YAE9D,iCAAiC;YACjC,MAAM,UAAU,GAAgB,CAAC,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACtE,IAAI,EAAE,WAAoB;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,QAAQ,EAAE,QAAQ,CAAC,QAAQ;gBAC3B,OAAO,EAAE,QAAQ,CAAC,SAAS;aAC5B,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,MAAM;gBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,UAAU;gBACV,aAAa,EAAE,WAAW;aAC3B,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC;gBACT,UAAU,EAAE,EAAE;gBACd,aAAa,EAAE,WAAW;gBAC1B,KAAK,EAAE,qBAAqB,KAAK,EAAE;aACpC,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,QAAsB,EACtB,YAA+B,EAC/B,YAAqB;QAErB,MAAM,YAAY,GAAG,YAAY;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,aAAa,CAAC,CAAC,OAAO,UAAU,CAAC;aACzD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,kBAAkB,GACtB,YAAY;YACZ;;gCAE0B,QAAQ,CAAC,QAAQ;uCACV,QAAQ,CAAC,QAAQ;;uCAEjB,CAAC;QAEpC,OAAO;UACD,QAAQ,CAAC,WAAW;;;UAGpB,QAAQ,CAAC,MAAM;;;UAGf,YAAY;;;QAGd,kBAAkB,EAAE,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CACtB,MAAc,EACd,KAAa;QAEb,MAAM,MAAM,GAAG,qDAAqD,CAAC;QAErE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,QAAQ,EAAE;aACzC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,iBAAiB,EAAE;oBAC9C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE;iBAClC;gBACD,eAAe,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE;aACzC,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxD,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;QACvE,CAAC;QAED,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CACb,4BAA4B,QAAQ,CAAC,MAAM,IAAI,SAAS,EAAE,CAC3D,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAQ,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;QAElD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,aAAa,IAAI,SAAS,CAAC;YACjE,MAAM,IAAI,KAAK,CACb,8CAA8C,YAAY,GAAG,CAC9D,CAAC;QACJ,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAmB,CAAC;YAE1D,kCAAkC;YAClC,IACE,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,WAAW,CAAC;gBACvC,WAAW,CAAC,YAAY,IAAI,IAAI;gBAChC,WAAW,CAAC,OAAO,IAAI,IAAI,EAC3B,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;YAChD,CAAC;YAED,qDAAqD;YACrD,2CAA2C;YAC3C,MAAM,UAAU,GAAG,WAAW,CAAC,WAAW;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC;iBAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACX,OAAO,EAAE,GAAG,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,WAAW,EAAE;aAC5C,CAAC,CAAC,CAAC;YAEN,8DAA8D;YAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,WAAW,CAAC,YAAY,IAAI,GAAG,CAAC;YAE1E,MAAM,QAAQ,GAAgB;gBAC5B,MAAM;gBACN,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAC,EAAE,iCAAiC;gBAC5F,SAAS,EAAE,WAAW,CAAC,OAAO;gBAC9B,UAAU;aACX,CAAC;YAEF,OAAO,QAAQ,CAAC;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CACb,iCAAiC,KAAK,cAAc,OAAO,EAAE,CAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,MAAc,EAAE,KAAc;QAC5C,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,OAAO,+BAA+B,CAAC;QACzC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC,CAAC;YACzE,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,UAAU,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;CACF;AAxOD,8CAwOC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "coding-agent-benchmarks",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.2",
|
|
4
4
|
"description": "Testing coding agents (GitHub Copilot CLI, Claude Code, etc.) with your repo's context to evaluate their code generation quality.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|