peerbench 0.0.2-alpha.0 → 0.0.2-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +123 -99
  2. package/dist/aggregators/index.d.ts +67 -0
  3. package/dist/aggregators/index.js +46 -0
  4. package/dist/aggregators/index.js.map +1 -0
  5. package/dist/benchmarks/index.d.ts +615 -1271
  6. package/dist/benchmarks/index.js +358 -805
  7. package/dist/benchmarks/index.js.map +1 -1
  8. package/dist/{chunk-DUBKY73H.js → chunk-4UBK6452.js} +13 -13
  9. package/dist/chunk-4UBK6452.js.map +1 -0
  10. package/dist/chunk-ERALDEZY.js +112 -0
  11. package/dist/chunk-ERALDEZY.js.map +1 -0
  12. package/dist/{chunk-ZJWSK4VO.js → chunk-HMQYGCKI.js} +1 -1
  13. package/dist/chunk-HMQYGCKI.js.map +1 -0
  14. package/dist/chunk-NUEOE3K5.js +8 -0
  15. package/dist/chunk-NUEOE3K5.js.map +1 -0
  16. package/dist/chunk-OQE6TQXZ.js +42 -0
  17. package/dist/chunk-OQE6TQXZ.js.map +1 -0
  18. package/dist/chunk-QY5MPNNB.js +28 -0
  19. package/dist/chunk-QY5MPNNB.js.map +1 -0
  20. package/dist/chunk-R76XA2K6.js +229 -0
  21. package/dist/chunk-R76XA2K6.js.map +1 -0
  22. package/dist/chunk-TRNCF2BG.js +35 -0
  23. package/dist/chunk-TRNCF2BG.js.map +1 -0
  24. package/dist/chunk-UHHHSYVE.js +11 -0
  25. package/dist/chunk-UHHHSYVE.js.map +1 -0
  26. package/dist/{chunk-232PY7K3.js → chunk-YY33MNMV.js} +29 -14
  27. package/dist/chunk-YY33MNMV.js.map +1 -0
  28. package/dist/chunk-ZEWI24CV.js +365 -0
  29. package/dist/chunk-ZEWI24CV.js.map +1 -0
  30. package/dist/chunk-ZXTQJFGL.js +44 -0
  31. package/dist/chunk-ZXTQJFGL.js.map +1 -0
  32. package/dist/index-BAioQhp2.d.ts +27 -0
  33. package/dist/index.d.ts +51 -26
  34. package/dist/index.js +28 -25
  35. package/dist/index.js.map +1 -1
  36. package/dist/json-file-ZwzLUbje.d.ts +73 -0
  37. package/dist/llm-judge-QThCZ9TQ.d.ts +67 -0
  38. package/dist/providers/index.d.ts +16 -19
  39. package/dist/providers/index.js +8 -253
  40. package/dist/providers/index.js.map +1 -1
  41. package/dist/schemas/extensions/index.d.ts +16 -2
  42. package/dist/schemas/extensions/index.js +9 -3
  43. package/dist/schemas/extensions/index.js.map +1 -1
  44. package/dist/schemas/index.d.ts +108 -141
  45. package/dist/schemas/index.js +7 -10
  46. package/dist/schemas/llm/index.d.ts +100 -82
  47. package/dist/schemas/llm/index.js +7 -29
  48. package/dist/schemas/llm/index.js.map +1 -1
  49. package/dist/scorers/index.d.ts +3 -2
  50. package/dist/scorers/index.js +8 -486
  51. package/dist/scorers/index.js.map +1 -1
  52. package/dist/storages/index.d.ts +69 -0
  53. package/dist/storages/index.js +98 -0
  54. package/dist/storages/index.js.map +1 -0
  55. package/package.json +12 -6
  56. package/dist/catalogs/index.d.ts +0 -75
  57. package/dist/catalogs/index.js +0 -88
  58. package/dist/catalogs/index.js.map +0 -1
  59. package/dist/chunk-22HU24QF.js +0 -8
  60. package/dist/chunk-22HU24QF.js.map +0 -1
  61. package/dist/chunk-232PY7K3.js.map +0 -1
  62. package/dist/chunk-7TREBPSJ.js +0 -26
  63. package/dist/chunk-7TREBPSJ.js.map +0 -1
  64. package/dist/chunk-DUBKY73H.js.map +0 -1
  65. package/dist/chunk-GVF4YZF3.js +0 -15
  66. package/dist/chunk-GVF4YZF3.js.map +0 -1
  67. package/dist/chunk-HJH3SW3L.js +0 -103
  68. package/dist/chunk-HJH3SW3L.js.map +0 -1
  69. package/dist/chunk-IUN2IUCS.js +0 -58
  70. package/dist/chunk-IUN2IUCS.js.map +0 -1
  71. package/dist/chunk-VBOM2YEG.js +0 -47
  72. package/dist/chunk-VBOM2YEG.js.map +0 -1
  73. package/dist/chunk-ZJWSK4VO.js.map +0 -1
  74. package/dist/data-BmN5WjZ4.d.ts +0 -57
  75. package/dist/generic-array-DLHWSvf1.d.ts +0 -22
  76. package/dist/index-WiPjF2AL.d.ts +0 -15
  77. package/dist/llm-judge-DIG1f1Az.d.ts +0 -67
  78. package/dist/simple-system-prompt-CzPYuvo0.d.ts +0 -49
  79. package/dist/system-prompt--0FdPWqK.d.ts +0 -58
  80. package/dist/utilities-BrRH32rD.d.ts +0 -30
@@ -0,0 +1,365 @@
1
+ import {
2
+ PEERBENCH_NAMESPACE
3
+ } from "./chunk-UHHHSYVE.js";
4
+ import {
5
+ parseResponseAsJSON
6
+ } from "./chunk-4UBK6452.js";
7
+
8
+ // src/scorers/abstract.ts
9
+ var AbstractScorer = class {
10
+ };
11
+
12
+ // src/scorers/regex.ts
13
+ var RegexScorer = class extends AbstractScorer {
14
+ kind = `${PEERBENCH_NAMESPACE}/regex`;
15
+ async score(params) {
16
+ const allGroupNames = /* @__PURE__ */ new Set();
17
+ for (const pattern of params.patterns) {
18
+ const regexSource = pattern.regex.source;
19
+ const namedGroupRegex = /\(\?<(\w+)>/g;
20
+ while (true) {
21
+ const match = namedGroupRegex.exec(regexSource);
22
+ if (match === null) {
23
+ break;
24
+ }
25
+ if (match[1]) {
26
+ allGroupNames.add(match[1]);
27
+ }
28
+ }
29
+ }
30
+ const extractedValues = {};
31
+ for (const groupName of allGroupNames) {
32
+ extractedValues[groupName] = null;
33
+ }
34
+ const matchPreference = params.matchPreference ?? "last";
35
+ for (const pattern of params.patterns) {
36
+ const matches = Array.from(params.input.matchAll(pattern.regex));
37
+ const match = matchPreference === "first" ? matches[0] : matches.at(-1);
38
+ if (match && match.groups) {
39
+ let hasExtractedValue = false;
40
+ for (const [groupName, groupValue] of Object.entries(match.groups)) {
41
+ if (groupValue !== void 0) {
42
+ let value = groupValue;
43
+ if (pattern.transform) {
44
+ const transformed = pattern.transform(value);
45
+ if (transformed === void 0) {
46
+ continue;
47
+ }
48
+ value = transformed;
49
+ }
50
+ extractedValues[groupName] = value;
51
+ hasExtractedValue = true;
52
+ }
53
+ }
54
+ if (hasExtractedValue) {
55
+ break;
56
+ }
57
+ } else if (match) {
58
+ const captureGroupIndex = pattern.captureGroupIndex ?? 1;
59
+ const extractedValue = match[captureGroupIndex];
60
+ if (extractedValue !== void 0) {
61
+ if (pattern.transform) {
62
+ const transformed = pattern.transform(extractedValue);
63
+ if (transformed === void 0) {
64
+ continue;
65
+ }
66
+ }
67
+ break;
68
+ }
69
+ }
70
+ }
71
+ const allowPartial = params.allowPartialScoring ?? false;
72
+ let score = 0;
73
+ if (typeof params.expectedValue === "function") {
74
+ const validator = params.expectedValue;
75
+ const extractedEntries = Object.entries(extractedValues).filter(
76
+ ([, value]) => value !== null
77
+ );
78
+ if (extractedEntries.length === 0) {
79
+ score = 0;
80
+ } else {
81
+ if (allowPartial) {
82
+ const passingCount = extractedEntries.filter(
83
+ ([groupName, extractedValue]) => validator(groupName, extractedValue)
84
+ ).length;
85
+ score = passingCount / extractedEntries.length;
86
+ } else {
87
+ const allMatch = extractedEntries.every(
88
+ ([groupName, extractedValue]) => validator(groupName, extractedValue)
89
+ );
90
+ score = allMatch ? 1 : 0;
91
+ }
92
+ }
93
+ } else {
94
+ const expectedEntries = Object.entries(params.expectedValue);
95
+ const totalExpected = expectedEntries.length;
96
+ if (allowPartial) {
97
+ const matchingCount = expectedEntries.filter(([key, expectedValue]) => {
98
+ const extractedValue = extractedValues[key];
99
+ return extractedValue !== null && extractedValue === expectedValue;
100
+ }).length;
101
+ score = totalExpected > 0 ? matchingCount / totalExpected : 0;
102
+ } else {
103
+ const allMatch = expectedEntries.every(([key, expectedValue]) => {
104
+ const extractedValue = extractedValues[key];
105
+ return extractedValue !== null && extractedValue === expectedValue;
106
+ });
107
+ score = allMatch ? 1 : 0;
108
+ }
109
+ }
110
+ return {
111
+ value: score,
112
+ extractedAnswers: Object.fromEntries(
113
+ Object.entries(extractedValues).filter(
114
+ (entry) => entry[1] !== null
115
+ )
116
+ )
117
+ };
118
+ }
119
+ };
120
+
121
+ // src/scorers/mcq.ts
122
+ var MCQScorer = class extends AbstractScorer {
123
+ kind = `${PEERBENCH_NAMESPACE}/mcq`;
124
+ regexScorer = new RegexScorer();
125
+ async score(params) {
126
+ const { response, choices, correctAnswers } = params;
127
+ const normalizedCorrectAnswers = correctAnswers.map(
128
+ (ca) => ca.toUpperCase()
129
+ );
130
+ const normalizedResponse = response.trim().toUpperCase();
131
+ if (normalizedCorrectAnswers.includes(normalizedResponse)) {
132
+ return {
133
+ value: 1,
134
+ extractedAnswers: [normalizedResponse]
135
+ };
136
+ }
137
+ const json = parseResponseAsJSON(response);
138
+ if (json !== void 0 && typeof json === "object") {
139
+ const extractedAnswer = json.answer !== void 0 ? getFirstLetter(json.answer) : void 0;
140
+ if (extractedAnswer !== void 0) {
141
+ const normalizedExtracted = extractedAnswer.trim().toUpperCase();
142
+ if (normalizedCorrectAnswers.includes(normalizedExtracted)) {
143
+ return {
144
+ value: 1,
145
+ extractedAnswers: [extractedAnswer]
146
+ };
147
+ }
148
+ return {
149
+ value: 0,
150
+ extractedAnswers: json.answer === void 0 ? [] : [extractedAnswer ?? String(json.answer)]
151
+ };
152
+ }
153
+ }
154
+ const patterns = [];
155
+ for (const answer of Object.values(params.choices)) {
156
+ const answerPatterns = this.buildPatternsForAnswer(answer);
157
+ patterns.push(...answerPatterns);
158
+ }
159
+ const validateAnswer = (groupName, extracted) => {
160
+ const normalizedExtracted = extracted.trim().toUpperCase();
161
+ if (normalizedCorrectAnswers.includes(normalizedExtracted)) {
162
+ return true;
163
+ }
164
+ const answerOption = Object.entries(choices).find(
165
+ ([, value]) => value.trim().toUpperCase() === extracted.trim().toUpperCase()
166
+ );
167
+ if (answerOption && normalizedCorrectAnswers.includes(answerOption[0].toUpperCase())) {
168
+ return true;
169
+ }
170
+ return false;
171
+ };
172
+ const regexParams = {
173
+ input: response,
174
+ patterns,
175
+ expectedValue: validateAnswer,
176
+ matchPreference: "last"
177
+ };
178
+ const result = await this.regexScorer.score(regexParams);
179
+ return {
180
+ value: result.value,
181
+ extractedAnswers: Object.entries(result.extractedAnswers).map(
182
+ ([, value]) => value
183
+ )
184
+ };
185
+ }
186
+ buildPatternsForAnswer(answerText) {
187
+ const escapedAnswer = escapeRegex(answerText);
188
+ return [
189
+ {
190
+ // "<!NO ANSWER!>" - This pattern matches but has no capture group, so it won't extract anything
191
+ regex: /<!NO ANSWER!>/g
192
+ },
193
+ // Specific patterns for the full answer text (checked first)
194
+ {
195
+ // "Answer is $\boxed{answer text}$"
196
+ regex: new RegExp(
197
+ `[Aa]nswer is \\$\\\\boxed\\{(?<answer>${escapedAnswer})\\}\\$`,
198
+ "g"
199
+ ),
200
+ transform: (value) => value.toUpperCase()
201
+ },
202
+ {
203
+ // "Answer is answer text"
204
+ regex: new RegExp(`[Aa]nswer is\\s+(?<answer>${escapedAnswer})`, "g"),
205
+ transform: (value) => value.toUpperCase()
206
+ },
207
+ {
208
+ // "Answer is **answer text**"
209
+ regex: new RegExp(
210
+ `[Aa]nswer is\\s+\\**(?<answer>${escapedAnswer})\\**`,
211
+ "g"
212
+ ),
213
+ transform: (value) => value.toUpperCase()
214
+ },
215
+ // Generic patterns (checked after specific patterns)
216
+ {
217
+ // "Answer is $\boxed{A}$."
218
+ regex: /[Aa]nswer is \$\\boxed\{(?<answer>[A-Z])\}\$\.?/g
219
+ },
220
+ {
221
+ // "Answer is A" - match single letter only when it's a complete standalone answer.
222
+ // Pattern matches: "Answer is" + whitespace + single letter + end or punctuation
223
+ regex: /[Aa]nswer is\s+(?<answer>[A-Z])(?=\s*$|[.,;:!?])/g
224
+ },
225
+ {
226
+ // "Answer is **A**"
227
+ regex: /[Aa]nswer is\s+\**(?<answer>[A-Z])\**/g
228
+ },
229
+ {
230
+ // "A: answer text"
231
+ regex: /(?<answer>[A-Z]):.+/g
232
+ },
233
+ {
234
+ // "A) answer text"
235
+ regex: /(?<answer>[A-Z])\)\s*.+/g
236
+ },
237
+ {
238
+ // "A)"
239
+ regex: /(?<answer>[A-Z])\)/g
240
+ }
241
+ ];
242
+ }
243
+ };
244
+ function getFirstLetter(text) {
245
+ const match = text.match(/[A-Za-z]/);
246
+ return match ? match[0].toUpperCase() : void 0;
247
+ }
248
+ function escapeRegex(str) {
249
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
250
+ }
251
+
252
+ // src/scorers/llm-judge.ts
253
+ import z from "zod";
254
+ var LLMAsAJudgeScorer = class extends AbstractScorer {
255
+ kind = `${PEERBENCH_NAMESPACE}/llm-as-a-judge`;
256
+ provider;
257
+ constructor(config) {
258
+ super();
259
+ this.provider = config.provider;
260
+ }
261
+ async score(params) {
262
+ const criteria = normalizeWeights(params.criteria);
263
+ const systemPrompt = [];
264
+ const responseSchema = z.object({
265
+ results: z.array(
266
+ z.object({
267
+ id: z.string().describe("The id of the criterion"),
268
+ score: z.number().describe("The score of the criterion"),
269
+ explanation: z.string().describe("The explanation of the criterion")
270
+ })
271
+ ).describe("The results of the evaluation per criterion"),
272
+ explanation: z.string().describe(
273
+ `The overall explanation for the score (less than ${params.maxExplanationLength ?? 200} characters)`
274
+ ),
275
+ ...params.fieldsToExtract ?? {}
276
+ });
277
+ systemPrompt.push("You are a strict, fair evaluation judge.");
278
+ if (params.systemPrompt) {
279
+ systemPrompt.push(params.systemPrompt);
280
+ } else {
281
+ systemPrompt.push("Only use information from the rubric");
282
+ }
283
+ systemPrompt.push(
284
+ "For each criterion return an integer score within the provided scale and a very brief justification (less than 2 sentences)."
285
+ );
286
+ systemPrompt.push(
287
+ [
288
+ `Rubric: ${params.rubric}`,
289
+ `Criteria:`,
290
+ ...criteria.map(
291
+ (criterion) => `- ${criterion.id}: ${criterion.description} (weight: ${criterion.weight}, scale: ${criterion.scale?.min ?? 0}..${criterion.scale?.max ?? 5})`
292
+ )
293
+ ].join("\n")
294
+ );
295
+ const responseJSONSchema = responseSchema.toJSONSchema();
296
+ systemPrompt.push(
297
+ `Reply back with the following JSON schema (strict):
298
+ ${JSON.stringify(responseJSONSchema, null, 2)}
299
+ `
300
+ );
301
+ const userPrompt = [`Answer: ${params.response}`];
302
+ const providerResponse = await this.provider.forward({
303
+ messages: [
304
+ {
305
+ role: "system",
306
+ content: systemPrompt.join("\n")
307
+ },
308
+ {
309
+ role: "user",
310
+ content: userPrompt.join("\n")
311
+ }
312
+ ],
313
+ model: params.model,
314
+ responseFormat: {
315
+ type: "json_schema",
316
+ json_schema: {
317
+ name: "judgeResult",
318
+ schema: responseJSONSchema
319
+ }
320
+ }
321
+ });
322
+ const parsed = responseSchema.parse(
323
+ parseResponseAsJSON(providerResponse.data)
324
+ );
325
+ const { explanation, results, ...extractedFields } = parsed;
326
+ return {
327
+ explanation,
328
+ results,
329
+ value: computeOverallScore(results, criteria),
330
+ extractedFields,
331
+ provider: this.provider.kind,
332
+ inputTokensUsed: providerResponse.inputTokensUsed,
333
+ outputTokensUsed: providerResponse.outputTokensUsed,
334
+ inputCost: providerResponse.inputCost,
335
+ outputCost: providerResponse.outputCost
336
+ };
337
+ }
338
+ };
339
+ function normalizeWeights(criteria) {
340
+ const sum = criteria.reduce((a, c) => a + (c.weight ?? 1), 0) || 1;
341
+ return criteria.map((c) => ({ ...c, weight: (c.weight ?? 1) / sum }));
342
+ }
343
+ function computeOverallScore(results, criteria) {
344
+ let total = 0;
345
+ for (const pc of results) {
346
+ const criterion = criteria.find((c) => c.id === pc.id);
347
+ const min = criterion?.scale?.min ?? 0;
348
+ const max = criterion?.scale?.max ?? 5;
349
+ const weight = criterion?.weight ?? 0;
350
+ const score = Number(pc.score);
351
+ if (!Number.isFinite(score)) continue;
352
+ const clamped = Math.max(min, Math.min(max, score));
353
+ const normalized01 = max === min ? 0 : (clamped - min) / (max - min);
354
+ total += normalized01 * weight;
355
+ }
356
+ return Math.max(0, Math.min(1, total));
357
+ }
358
+
359
+ export {
360
+ AbstractScorer,
361
+ RegexScorer,
362
+ MCQScorer,
363
+ LLMAsAJudgeScorer
364
+ };
365
+ //# sourceMappingURL=chunk-ZEWI24CV.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/scorers/abstract.ts","../src/scorers/regex.ts","../src/scorers/mcq.ts","../src/scorers/llm-judge.ts"],"sourcesContent":["export abstract class AbstractScorer {\n abstract readonly kind: string;\n\n abstract score(params: any): Promise<BaseScorerResult | null>;\n}\n\nexport type BaseScorerResult = {\n value: number;\n explanation?: string;\n metadata?: Record<string, unknown>;\n [key: string]: unknown;\n};\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport { AbstractScorer, BaseScorerResult } from \"./abstract\";\n\nexport type RegexPattern = {\n /**\n * The regex pattern to match against the response\n */\n regex: RegExp;\n\n /**\n * The index of the capture group to extract (1-based, like match[1])\n * If not provided, defaults to 1 (first capture group)\n */\n captureGroupIndex?: number;\n\n /**\n * Optional function to transform the extracted value before validation\n */\n transform?: (value: string) => string | undefined;\n};\n\nexport type RegexScorerParams = {\n /**\n * The input text to score\n */\n input: string;\n\n /**\n * Array of regex patterns to try (in order, first match wins)\n */\n patterns: RegexPattern[];\n\n /**\n * Expected value(s) to match against. Can be a record of expected values for named groups, or a validation function\n */\n expectedValue:\n | Record<string, string>\n | ((groupName: string, match: string) => boolean);\n\n /**\n * Optional: Which match to use when multiple matches are found\n * Defaults to \"last\" (uses the last match found)\n */\n matchPreference?: \"first\" | \"last\";\n\n /**\n * Optional: If true, allows partial scoring based on how many groups match\n * For example, if 2 groups are expected and only 1 matches, score would be 0.5\n * Defaults to false (all-or-nothing scoring)\n */\n allowPartialScoring?: boolean;\n};\n\n/**\n * Generic Regex scorer. It scores the given input against a set of regex patterns.\n */\nexport class RegexScorer extends AbstractScorer {\n override readonly kind = `${PEERBENCH_NAMESPACE}/regex` as const;\n\n override async score(params: RegexScorerParams) {\n // Collect all named group names from all patterns\n const allGroupNames = new Set<string>();\n for (const pattern of params.patterns) {\n const regexSource = pattern.regex.source;\n const namedGroupRegex = /\\(\\?<(\\w+)>/g;\n\n while (true) {\n const match = namedGroupRegex.exec(regexSource);\n if (match === null) {\n break;\n }\n\n if (match[1]) {\n allGroupNames.add(match[1]);\n }\n }\n }\n\n // Initialize result object with all group names set to null (aka not found yet)\n const extractedValues: Record<string, string | null> = {};\n for (const groupName of allGroupNames) {\n extractedValues[groupName] = null;\n }\n\n // Try regex patterns in order, stop at first successful match\n const matchPreference = params.matchPreference ?? \"last\";\n for (const pattern of params.patterns) {\n const matches = Array.from(params.input.matchAll(pattern.regex));\n const match = matchPreference === \"first\" ? matches[0] : matches.at(-1);\n\n if (match && match.groups) {\n // Extract all named groups from this match\n let hasExtractedValue = false;\n for (const [groupName, groupValue] of Object.entries(match.groups)) {\n if (groupValue !== undefined) {\n let value = groupValue;\n\n // Apply transformation if provided\n if (pattern.transform) {\n const transformed = pattern.transform(value);\n if (transformed === undefined) {\n continue;\n }\n value = transformed;\n }\n\n extractedValues[groupName] = value;\n hasExtractedValue = true;\n }\n }\n // If we extracted at least one value, stop processing further patterns\n if (hasExtractedValue) {\n break;\n }\n } else if (match) {\n // Fallback to captureGroupIndex if no named groups\n // For unnamed groups, we can't add to extractedValues by name\n // They are only used for scoring if no named groups are found\n const captureGroupIndex = pattern.captureGroupIndex ?? 1;\n const extractedValue = match[captureGroupIndex];\n\n if (extractedValue !== undefined) {\n // Apply transformation if provided\n if (pattern.transform) {\n const transformed = pattern.transform(extractedValue);\n if (transformed === undefined) {\n continue;\n }\n // For unnamed groups, we can't store in extractedValues\n // but we can use it for scoring if no named groups were found\n }\n // If we have an extracted value (even if unnamed), stop processing\n // Note: This is a fallback case, so we break here too\n break;\n }\n }\n }\n\n // Calculate score based on matched value\n const allowPartial = params.allowPartialScoring ?? false;\n let score = 0;\n\n if (typeof params.expectedValue === \"function\") {\n // To get proper type inference, cast it using \"as\"\n const validator = params.expectedValue as typeof params.expectedValue;\n const extractedEntries = Object.entries(extractedValues).filter(\n ([, value]) => value !== null\n );\n\n // If no values were extracted, score is 0\n if (extractedEntries.length === 0) {\n score = 0;\n } else {\n if (allowPartial) {\n // Count how many extracted values pass validation\n const passingCount = extractedEntries.filter(\n ([groupName, extractedValue]) =>\n validator(groupName, extractedValue as string)\n ).length;\n score = passingCount / extractedEntries.length;\n } else {\n // All extracted values must pass validation\n const allMatch = extractedEntries.every(\n ([groupName, extractedValue]) =>\n validator(groupName, extractedValue as string)\n );\n score = allMatch ? 1 : 0;\n }\n }\n } else {\n const expectedEntries = Object.entries(params.expectedValue);\n const totalExpected = expectedEntries.length;\n\n if (allowPartial) {\n // Count how many expected values match their extracted values\n const matchingCount = expectedEntries.filter(([key, expectedValue]) => {\n const extractedValue = extractedValues[key];\n return extractedValue !== null && extractedValue === expectedValue;\n }).length;\n score = totalExpected > 0 ? matchingCount / totalExpected : 0;\n } else {\n // All expected values must match\n const allMatch = expectedEntries.every(([key, expectedValue]) => {\n const extractedValue = extractedValues[key];\n return extractedValue !== null && extractedValue === expectedValue;\n });\n score = allMatch ? 1 : 0;\n }\n }\n\n return {\n value: score,\n extractedAnswers: Object.fromEntries(\n Object.entries(extractedValues).filter(\n (entry): entry is [string, string] => entry[1] !== null\n )\n ),\n } satisfies BaseScorerResult;\n }\n}\n","import { parseResponseAsJSON } from \"@/utils\";\nimport { AbstractScorer, BaseScorerResult } from \"./abstract\";\nimport { RegexScorer, RegexPattern, RegexScorerParams } from \"./regex\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport type MCQScorerParams = {\n response: string;\n choices: Record<string, string>;\n correctAnswers: string[];\n};\n\nexport class MCQScorer extends AbstractScorer {\n override readonly kind = `${PEERBENCH_NAMESPACE}/mcq` as const;\n private regexScorer = new RegexScorer();\n\n async score(params: MCQScorerParams): Promise<\n BaseScorerResult & {\n extractedAnswers: string[];\n }\n > {\n const { response, choices, correctAnswers } = params;\n const normalizedCorrectAnswers = correctAnswers.map((ca) =>\n ca.toUpperCase()\n );\n\n // Direct answer comparison\n const normalizedResponse = response.trim().toUpperCase();\n if (normalizedCorrectAnswers.includes(normalizedResponse)) {\n return {\n value: 1,\n extractedAnswers: [normalizedResponse],\n };\n }\n\n // Try to parse the response as JSON (original behavior: returns early if parsed)\n const json = parseResponseAsJSON<{ answer: string }>(response);\n if (json !== undefined && typeof json === \"object\") {\n const extractedAnswer =\n json.answer !== undefined ? getFirstLetter(json.answer) : undefined;\n\n if (extractedAnswer !== undefined) {\n const normalizedExtracted = extractedAnswer.trim().toUpperCase();\n if (normalizedCorrectAnswers.includes(normalizedExtracted)) {\n return {\n value: 1,\n extractedAnswers: [extractedAnswer],\n };\n }\n\n // Response parsed as JSON but does not represent the correct answer\n return {\n value: 0,\n extractedAnswers:\n json.answer === undefined\n ? []\n : [extractedAnswer ?? String(json.answer)],\n };\n }\n }\n\n // Build patterns for all correctAnswers\n const patterns: RegexPattern[] = [];\n for (const answer of Object.values(params.choices)) {\n const answerPatterns = this.buildPatternsForAnswer(answer);\n patterns.push(...answerPatterns);\n }\n\n // Create validation function that handles choices matching\n // New RegexScorer API expects (groupName: string, match: string) => boolean\n const validateAnswer = (groupName: string, extracted: string): boolean => {\n const normalizedExtracted = extracted.trim().toUpperCase();\n\n // Check if extracted value is in correctAnswers\n if (normalizedCorrectAnswers.includes(normalizedExtracted)) {\n return true;\n }\n\n // Check if extracted text matches any choice value, and that choice key is correct\n // Note: When transform uppercases the value, we need to compare case-insensitively\n const answerOption = Object.entries(choices).find(\n ([, value]) =>\n value.trim().toUpperCase() === extracted.trim().toUpperCase()\n );\n\n if (\n answerOption &&\n normalizedCorrectAnswers.includes(answerOption[0].toUpperCase())\n ) {\n return true;\n }\n\n return false;\n };\n\n // Build regex scorer params\n const regexParams: RegexScorerParams = {\n input: response,\n patterns,\n expectedValue: validateAnswer,\n matchPreference: \"last\",\n };\n\n // Call regex scorer\n const result = await this.regexScorer.score(regexParams);\n\n return {\n value: result.value,\n extractedAnswers: Object.entries(result.extractedAnswers).map(\n ([, value]) => value\n ),\n };\n }\n\n private buildPatternsForAnswer(answerText: string): RegexPattern[] {\n const escapedAnswer = escapeRegex(answerText);\n\n return [\n {\n // \"<!NO ANSWER!>\" - This pattern matches but has no capture group, so it won't extract anything\n regex: /<!NO ANSWER!>/g,\n },\n // Specific patterns for the full answer text (checked first)\n {\n // \"Answer is $\\boxed{answer text}$\"\n regex: new RegExp(\n `[Aa]nswer is \\\\$\\\\\\\\boxed\\\\{(?<answer>${escapedAnswer})\\\\}\\\\$`,\n \"g\"\n ),\n transform: (value: string) => value.toUpperCase(),\n },\n {\n // \"Answer is answer text\"\n regex: new RegExp(`[Aa]nswer is\\\\s+(?<answer>${escapedAnswer})`, \"g\"),\n transform: (value: string) => value.toUpperCase(),\n },\n {\n // \"Answer is **answer text**\"\n regex: new RegExp(\n `[Aa]nswer is\\\\s+\\\\**(?<answer>${escapedAnswer})\\\\**`,\n \"g\"\n ),\n transform: (value: string) => value.toUpperCase(),\n },\n // Generic patterns (checked after specific patterns)\n {\n // \"Answer is $\\boxed{A}$.\"\n regex: /[Aa]nswer is \\$\\\\boxed\\{(?<answer>[A-Z])\\}\\$\\.?/g,\n },\n {\n // \"Answer is A\" - match single letter only when it's a complete standalone answer.\n // Pattern matches: \"Answer is\" + whitespace + single letter + end or punctuation\n regex: /[Aa]nswer is\\s+(?<answer>[A-Z])(?=\\s*$|[.,;:!?])/g,\n },\n {\n // \"Answer is **A**\"\n regex: /[Aa]nswer is\\s+\\**(?<answer>[A-Z])\\**/g,\n },\n {\n // \"A: answer text\"\n regex: /(?<answer>[A-Z]):.+/g,\n },\n {\n // \"A) answer text\"\n regex: /(?<answer>[A-Z])\\)\\s*.+/g,\n },\n {\n // \"A)\"\n regex: /(?<answer>[A-Z])\\)/g,\n },\n ];\n }\n}\n\nfunction getFirstLetter(text: string): string | undefined {\n const match = text.match(/[A-Za-z]/);\n return match ? match[0].toUpperCase() : undefined;\n}\n\nfunction escapeRegex(str: string): string {\n return str.replace(/[.*+?^${}()|[\\]\\\\]/g, \"\\\\$&\");\n}\n","import { AbstractLLMProvider } from \"@/providers/abstract/llm\";\nimport { parseResponseAsJSON } from \"@/utils\";\nimport { RateLimiter } from \"@/utils/rate-limiter\";\nimport { AbstractScorer, BaseScorerResult } from \"./abstract\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport z from \"zod\";\n\nexport class LLMAsAJudgeScorer extends AbstractScorer {\n override readonly kind = `${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const;\n\n private provider: AbstractLLMProvider;\n\n constructor(config: {\n provider: AbstractLLMProvider;\n rateLimiter?: RateLimiter;\n }) {\n super();\n this.provider = config.provider;\n }\n\n override async score<T extends z.ZodRawShape>(\n params: LLMAsAJudgeScoreParams & { fieldsToExtract: T }\n ): Promise<ScorerResultWithExtractedFields<T> | null>;\n override async score(\n params: LLMAsAJudgeScoreParams & { fieldsToExtract?: never }\n ): Promise<ScorerResultWithoutExtractedFields | null>;\n override async score<T extends z.ZodRawShape>(\n params: LLMAsAJudgeScoreParams & { fieldsToExtract?: T }\n ): Promise<\n | ScorerResultWithoutExtractedFields\n | ScorerResultWithExtractedFields<T>\n | null\n > {\n const criteria = normalizeWeights(params.criteria);\n const systemPrompt = [];\n const responseSchema = z.object({\n results: z\n .array(\n z.object({\n id: z.string().describe(\"The id of the criterion\"),\n score: z.number().describe(\"The score of the criterion\"),\n explanation: z\n .string()\n .describe(\"The explanation of the criterion\"),\n })\n )\n .describe(\"The results of the evaluation per criterion\"),\n\n explanation: z\n .string()\n .describe(\n `The overall explanation for the score (less than ${params.maxExplanationLength ?? 200} characters)`\n ),\n\n ...(params.fieldsToExtract ?? {}),\n });\n\n systemPrompt.push(\"You are a strict, fair evaluation judge.\");\n\n if (params.systemPrompt) {\n systemPrompt.push(params.systemPrompt);\n } else {\n systemPrompt.push(\"Only use information from the rubric\");\n }\n\n systemPrompt.push(\n \"For each criterion return an integer score within the provided scale and a very brief justification (less than 2 sentences).\"\n );\n systemPrompt.push(\n [\n `Rubric: ${params.rubric}`,\n `Criteria:`,\n ...criteria.map(\n (criterion) =>\n `- ${criterion.id}: ${criterion.description} (weight: ${criterion.weight}, scale: ${criterion.scale?.min ?? 0}..${criterion.scale?.max ?? 5})`\n ),\n ].join(\"\\n\")\n );\n\n const responseJSONSchema = responseSchema.toJSONSchema();\n systemPrompt.push(\n `Reply back with the following JSON schema (strict):\\n${JSON.stringify(responseJSONSchema, null, 2)}\\n`\n );\n\n const userPrompt = [`Answer: ${params.response}`];\n const providerResponse = await this.provider.forward({\n messages: [\n {\n role: \"system\",\n content: systemPrompt.join(\"\\n\"),\n },\n {\n role: \"user\",\n content: userPrompt.join(\"\\n\"),\n },\n ],\n model: params.model,\n responseFormat: {\n type: \"json_schema\",\n json_schema: {\n name: \"judgeResult\",\n schema: responseJSONSchema,\n },\n },\n });\n\n const parsed = responseSchema.parse(\n parseResponseAsJSON(providerResponse.data)\n );\n\n const { explanation, results, ...extractedFields } = parsed;\n\n return {\n explanation,\n results,\n value: computeOverallScore(results, criteria),\n extractedFields: extractedFields as z.infer<z.ZodObject<T>>,\n\n provider: this.provider.kind,\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n };\n }\n}\n\nexport type LLMAsAJudgeCriterion = {\n id: string;\n description: string;\n weight: number;\n scale?: {\n min: number;\n max: number;\n };\n};\n\nexport type LLMAsAJudgeScoreParams = {\n model: string;\n response: string;\n rubric: string;\n criteria: LLMAsAJudgeCriterion[];\n\n systemPrompt?: string;\n maxExplanationLength?: number;\n};\n\ntype ScorerResultWithoutExtractedFields = BaseScorerResult & {\n results: {\n id: string;\n score: number;\n explanation: string;\n }[];\n\n provider: string;\n inputTokensUsed?: number;\n outputTokensUsed?: number;\n inputCost?: string;\n outputCost?: string;\n};\n\ntype ScorerResultWithExtractedFields<T extends z.ZodRawShape> =\n ScorerResultWithoutExtractedFields & {\n extractedFields: z.infer<z.ZodObject<T>>;\n };\n\nfunction normalizeWeights(\n criteria: LLMAsAJudgeCriterion[]\n): LLMAsAJudgeCriterion[] {\n const sum = criteria.reduce((a, c) => a + (c.weight ?? 1), 0) || 1;\n return criteria.map((c) => ({ ...c, weight: (c.weight ?? 1) / sum }));\n}\n\nfunction computeOverallScore(\n results: ScorerResultWithoutExtractedFields[\"results\"],\n criteria: LLMAsAJudgeCriterion[]\n): number {\n let total = 0;\n for (const pc of results) {\n const criterion = criteria.find((c) => c.id === pc.id);\n const min = criterion?.scale?.min ?? 0;\n const max = criterion?.scale?.max ?? 5;\n const weight = criterion?.weight ?? 0;\n\n const score = Number(pc.score);\n if (!Number.isFinite(score)) continue;\n\n const clamped = Math.max(min, Math.min(max, score));\n const normalized01 = max === min ? 0 : (clamped - min) / (max - min);\n total += normalized01 * weight;\n }\n return Math.max(0, Math.min(1, total));\n}\n"],"mappings":";;;;;;;;AAAO,IAAe,iBAAf,MAA8B;AAIrC;;;ACoDO,IAAM,cAAN,cAA0B,eAAe;AAAA,EAC5B,OAAO,GAAG,mBAAmB;AAAA,EAE/C,MAAe,MAAM,QAA2B;AAE9C,UAAM,gBAAgB,oBAAI,IAAY;AACtC,eAAW,WAAW,OAAO,UAAU;AACrC,YAAM,cAAc,QAAQ,MAAM;AAClC,YAAM,kBAAkB;AAExB,aAAO,MAAM;AACX,cAAM,QAAQ,gBAAgB,KAAK,WAAW;AAC9C,YAAI,UAAU,MAAM;AAClB;AAAA,QACF;AAEA,YAAI,MAAM,CAAC,GAAG;AACZ,wBAAc,IAAI,MAAM,CAAC,CAAC;AAAA,QAC5B;AAAA,MACF;AAAA,IACF;AAGA,UAAM,kBAAiD,CAAC;AACxD,eAAW,aAAa,eAAe;AACrC,sBAAgB,SAAS,IAAI;AAAA,IAC/B;AAGA,UAAM,kBAAkB,OAAO,mBAAmB;AAClD,eAAW,WAAW,OAAO,UAAU;AACrC,YAAM,UAAU,MAAM,KAAK,OAAO,MAAM,SAAS,QAAQ,KAAK,CAAC;AAC/D,YAAM,QAAQ,oBAAoB,UAAU,QAAQ,CAAC,IAAI,QAAQ,GAAG,EAAE;AAEtE,UAAI,SAAS,MAAM,QAAQ;AAEzB,YAAI,oBAAoB;AACxB,mBAAW,CAAC,WAAW,UAAU,KAAK,OAAO,QAAQ,MAAM,MAAM,GAAG;AAClE,cAAI,eAAe,QAAW;AAC5B,gBAAI,QAAQ;AAGZ,gBAAI,QAAQ,WAAW;AACrB,oBAAM,cAAc,QAAQ,UAAU,KAAK;AAC3C,kBAAI,gBAAgB,QAAW;AAC7B;AAAA,cACF;AACA,sBAAQ;AAAA,YACV;AAEA,4BAAgB,SAAS,IAAI;AAC7B,gCAAoB;AAAA,UACtB;AAAA,QACF;AAEA,YAAI,mBAAmB;AACrB;AAAA,QACF;AAAA,MACF,WAAW,OAAO;AAIhB,cAAM,oBAAoB,QAAQ,qBAAqB;AACvD,cAAM,iBAAiB,MAAM,iBAAiB;AAE9C,YAAI,mBAAmB,QAAW;AAEhC,cAAI,QAAQ,WAAW;AACrB,kBAAM,cAAc,QAAQ,UAAU,cAAc;AACpD,gBAAI,gBAAgB,QAAW;AAC7B;AAAA,YACF;AAAA,UAGF;AAGA;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAGA,UAAM,eAAe,OAAO,uBAAuB;AACnD,QAAI,QAAQ;AAEZ,QAAI,OAAO,OAAO,kBAAkB,YAAY;AAE9C,YAAM,YAAY,OAAO;AACzB,YAAM,mBAAmB,OAAO,QAAQ,eAAe,EAAE;AAAA,QACvD,CAAC,CAAC,EAAE,KAAK,MAAM,UAAU;AAAA,MAC3B;AAGA,UAAI,iBAAiB,WAAW,GAAG;AACjC,gBAAQ;AAAA,MACV,OAAO;AACL,YAAI,cAAc;AAEhB,gBAAM,eAAe,iBAAiB;AAAA,YACpC,CAAC,CAAC,WAAW,cAAc,MACzB,UAAU,WAAW,cAAwB;AAAA,UACjD,EAAE;AACF,kBAAQ,eAAe,iBAAiB;AAAA,QAC1C,OAAO;AAEL,gBAAM,WAAW,iBAAiB;AAAA,YAChC,CAAC,CAAC,WAAW,cAAc,MACzB,UAAU,WAAW,cAAwB;AAAA,UACjD;AACA,kBAAQ,WAAW,IAAI;AAAA,QACzB;AAAA,MACF;AAAA,IACF,OAAO;AACL,YAAM,kBAAkB,OAAO,QAAQ,OAAO,aAAa;AAC3D,YAAM,gBAAgB,gBAAgB;AAEtC,UAAI,cAAc;AAEhB,cAAM,gBAAgB,gBAAgB,OAAO,CAAC,CAAC,KAAK,aAAa,MAAM;AACrE,gBAAM,iBAAiB,gBAAgB,GAAG;AAC1C,iBAAO,mBAAmB,QAAQ,mBAAmB;AAAA,QACvD,CAAC,EAAE;AACH,gBAAQ,gBAAgB,IAAI,gBAAgB,gBAAgB;AAAA,MAC9D,OAAO;AAEL,cAAM,WAAW,gBAAgB,MAAM,CAAC,CAAC,KAAK,aAAa,MAAM;AAC/D,gBAAM,iBAAiB,gBAAgB,GAAG;AAC1C,iBAAO,mBAAmB,QAAQ,mBAAmB;AAAA,QACvD,CAAC;AACD,gBAAQ,WAAW,IAAI;AAAA,MACzB;AAAA,IACF;AAEA,WAAO;AAAA,MACL,OAAO;AAAA,MACP,kBAAkB,OAAO;AAAA,QACvB,OAAO,QAAQ,eAAe,EAAE;AAAA,UAC9B,CAAC,UAAqC,MAAM,CAAC,MAAM;AAAA,QACrD;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACF;;;AC5LO,IAAM,YAAN,cAAwB,eAAe;AAAA,EAC1B,OAAO,GAAG,mBAAmB;AAAA,EACvC,cAAc,IAAI,YAAY;AAAA,EAEtC,MAAM,MAAM,QAIV;AACA,UAAM,EAAE,UAAU,SAAS,eAAe,IAAI;AAC9C,UAAM,2BAA2B,eAAe;AAAA,MAAI,CAAC,OACnD,GAAG,YAAY;AAAA,IACjB;AAGA,UAAM,qBAAqB,SAAS,KAAK,EAAE,YAAY;AACvD,QAAI,yBAAyB,SAAS,kBAAkB,GAAG;AACzD,aAAO;AAAA,QACL,OAAO;AAAA,QACP,kBAAkB,CAAC,kBAAkB;AAAA,MACvC;AAAA,IACF;AAGA,UAAM,OAAO,oBAAwC,QAAQ;AAC7D,QAAI,SAAS,UAAa,OAAO,SAAS,UAAU;AAClD,YAAM,kBACJ,KAAK,WAAW,SAAY,eAAe,KAAK,MAAM,IAAI;AAE5D,UAAI,oBAAoB,QAAW;AACjC,cAAM,sBAAsB,gBAAgB,KAAK,EAAE,YAAY;AAC/D,YAAI,yBAAyB,SAAS,mBAAmB,GAAG;AAC1D,iBAAO;AAAA,YACL,OAAO;AAAA,YACP,kBAAkB,CAAC,eAAe;AAAA,UACpC;AAAA,QACF;AAGA,eAAO;AAAA,UACL,OAAO;AAAA,UACP,kBACE,KAAK,WAAW,SACZ,CAAC,IACD,CAAC,mBAAmB,OAAO,KAAK,MAAM,CAAC;AAAA,QAC/C;AAAA,MACF;AAAA,IACF;AAGA,UAAM,WAA2B,CAAC;AAClC,eAAW,UAAU,OAAO,OAAO,OAAO,OAAO,GAAG;AAClD,YAAM,iBAAiB,KAAK,uBAAuB,MAAM;AACzD,eAAS,KAAK,GAAG,cAAc;AAAA,IACjC;AAIA,UAAM,iBAAiB,CAAC,WAAmB,cAA+B;AACxE,YAAM,sBAAsB,UAAU,KAAK,EAAE,YAAY;AAGzD,UAAI,yBAAyB,SAAS,mBAAmB,GAAG;AAC1D,eAAO;AAAA,MACT;AAIA,YAAM,eAAe,OAAO,QAAQ,OAAO,EAAE;AAAA,QAC3C,CAAC,CAAC,EAAE,KAAK,MACP,MAAM,KAAK,EAAE,YAAY,MAAM,UAAU,KAAK,EAAE,YAAY;AAAA,MAChE;AAEA,UACE,gBACA,yBAAyB,SAAS,aAAa,CAAC,EAAE,YAAY,CAAC,GAC/D;AACA,eAAO;AAAA,MACT;AAEA,aAAO;AAAA,IACT;AAGA,UAAM,cAAiC;AAAA,MACrC,OAAO;AAAA,MACP;AAAA,MACA,eAAe;AAAA,MACf,iBAAiB;AAAA,IACnB;AAGA,UAAM,SAAS,MAAM,KAAK,YAAY,MAAM,WAAW;AAEvD,WAAO;AAAA,MACL,OAAO,OAAO;AAAA,MACd,kBAAkB,OAAO,QAAQ,OAAO,gBAAgB,EAAE;AAAA,QACxD,CAAC,CAAC,EAAE,KAAK,MAAM;AAAA,MACjB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,uBAAuB,YAAoC;AACjE,UAAM,gBAAgB,YAAY,UAAU;AAE5C,WAAO;AAAA,MACL;AAAA;AAAA,QAEE,OAAO;AAAA,MACT;AAAA;AAAA,MAEA;AAAA;AAAA,QAEE,OAAO,IAAI;AAAA,UACT,yCAAyC,aAAa;AAAA,UACtD;AAAA,QACF;AAAA,QACA,WAAW,CAAC,UAAkB,MAAM,YAAY;AAAA,MAClD;AAAA,MACA;AAAA;AAAA,QAEE,OAAO,IAAI,OAAO,6BAA6B,aAAa,KAAK,GAAG;AAAA,QACpE,WAAW,CAAC,UAAkB,MAAM,YAAY;AAAA,MAClD;AAAA,MACA;AAAA;AAAA,QAEE,OAAO,IAAI;AAAA,UACT,iCAAiC,aAAa;AAAA,UAC9C;AAAA,QACF;AAAA,QACA,WAAW,CAAC,UAAkB,MAAM,YAAY;AAAA,MAClD;AAAA;AAAA,MAEA;AAAA;AAAA,QAEE,OAAO;AAAA,MACT;AAAA,MACA;AAAA;AAAA;AAAA,QAGE,OAAO;AAAA,MACT;AAAA,MACA;AAAA;AAAA,QAEE,OAAO;AAAA,MACT;AAAA,MACA;AAAA;AAAA,QAEE,OAAO;AAAA,MACT;AAAA,MACA;AAAA;AAAA,QAEE,OAAO;AAAA,MACT;AAAA,MACA;AAAA;AAAA,QAEE,OAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AACF;AAEA,SAAS,eAAe,MAAkC;AACxD,QAAM,QAAQ,KAAK,MAAM,UAAU;AACnC,SAAO,QAAQ,MAAM,CAAC,EAAE,YAAY,IAAI;AAC1C;AAEA,SAAS,YAAY,KAAqB;AACxC,SAAO,IAAI,QAAQ,uBAAuB,MAAM;AAClD;;;AC/KA,OAAO,OAAO;AAEP,IAAM,oBAAN,cAAgC,eAAe;AAAA,EAClC,OAAO,GAAG,mBAAmB;AAAA,EAEvC;AAAA,EAER,YAAY,QAGT;AACD,UAAM;AACN,SAAK,WAAW,OAAO;AAAA,EACzB;AAAA,EAQA,MAAe,MACb,QAKA;AACA,UAAM,WAAW,iBAAiB,OAAO,QAAQ;AACjD,UAAM,eAAe,CAAC;AACtB,UAAM,iBAAiB,EAAE,OAAO;AAAA,MAC9B,SAAS,EACN;AAAA,QACC,EAAE,OAAO;AAAA,UACP,IAAI,EAAE,OAAO,EAAE,SAAS,yBAAyB;AAAA,UACjD,OAAO,EAAE,OAAO,EAAE,SAAS,4BAA4B;AAAA,UACvD,aAAa,EACV,OAAO,EACP,SAAS,kCAAkC;AAAA,QAChD,CAAC;AAAA,MACH,EACC,SAAS,6CAA6C;AAAA,MAEzD,aAAa,EACV,OAAO,EACP;AAAA,QACC,oDAAoD,OAAO,wBAAwB,GAAG;AAAA,MACxF;AAAA,MAEF,GAAI,OAAO,mBAAmB,CAAC;AAAA,IACjC,CAAC;AAED,iBAAa,KAAK,0CAA0C;AAE5D,QAAI,OAAO,cAAc;AACvB,mBAAa,KAAK,OAAO,YAAY;AAAA,IACvC,OAAO;AACL,mBAAa,KAAK,sCAAsC;AAAA,IAC1D;AAEA,iBAAa;AAAA,MACX;AAAA,IACF;AACA,iBAAa;AAAA,MACX;AAAA,QACE,WAAW,OAAO,MAAM;AAAA,QACxB;AAAA,QACA,GAAG,SAAS;AAAA,UACV,CAAC,cACC,KAAK,UAAU,EAAE,KAAK,UAAU,WAAW,aAAa,UAAU,MAAM,YAAY,UAAU,OAAO,OAAO,CAAC,KAAK,UAAU,OAAO,OAAO,CAAC;AAAA,QAC/I;AAAA,MACF,EAAE,KAAK,IAAI;AAAA,IACb;AAEA,UAAM,qBAAqB,eAAe,aAAa;AACvD,iBAAa;AAAA,MACX;AAAA,EAAwD,KAAK,UAAU,oBAAoB,MAAM,CAAC,CAAC;AAAA;AAAA,IACrG;AAEA,UAAM,aAAa,CAAC,WAAW,OAAO,QAAQ,EAAE;AAChD,UAAM,mBAAmB,MAAM,KAAK,SAAS,QAAQ;AAAA,MACnD,UAAU;AAAA,QACR;AAAA,UACE,MAAM;AAAA,UACN,SAAS,aAAa,KAAK,IAAI;AAAA,QACjC;AAAA,QACA;AAAA,UACE,MAAM;AAAA,UACN,SAAS,WAAW,KAAK,IAAI;AAAA,QAC/B;AAAA,MACF;AAAA,MACA,OAAO,OAAO;AAAA,MACd,gBAAgB;AAAA,QACd,MAAM;AAAA,QACN,aAAa;AAAA,UACX,MAAM;AAAA,UACN,QAAQ;AAAA,QACV;AAAA,MACF;AAAA,IACF,CAAC;AAED,UAAM,SAAS,eAAe;AAAA,MAC5B,oBAAoB,iBAAiB,IAAI;AAAA,IAC3C;AAEA,UAAM,EAAE,aAAa,SAAS,GAAG,gBAAgB,IAAI;AAErD,WAAO;AAAA,MACL;AAAA,MACA;AAAA,MACA,OAAO,oBAAoB,SAAS,QAAQ;AAAA,MAC5C;AAAA,MAEA,UAAU,KAAK,SAAS;AAAA,MACxB,iBAAiB,iBAAiB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,WAAW,iBAAiB;AAAA,MAC5B,YAAY,iBAAiB;AAAA,IAC/B;AAAA,EACF;AACF;AAyCA,SAAS,iBACP,UACwB;AACxB,QAAM,MAAM,SAAS,OAAO,CAAC,GAAG,MAAM,KAAK,EAAE,UAAU,IAAI,CAAC,KAAK;AACjE,SAAO,SAAS,IAAI,CAAC,OAAO,EAAE,GAAG,GAAG,SAAS,EAAE,UAAU,KAAK,IAAI,EAAE;AACtE;AAEA,SAAS,oBACP,SACA,UACQ;AACR,MAAI,QAAQ;AACZ,aAAW,MAAM,SAAS;AACxB,UAAM,YAAY,SAAS,KAAK,CAAC,MAAM,EAAE,OAAO,GAAG,EAAE;AACrD,UAAM,MAAM,WAAW,OAAO,OAAO;AACrC,UAAM,MAAM,WAAW,OAAO,OAAO;AACrC,UAAM,SAAS,WAAW,UAAU;AAEpC,UAAM,QAAQ,OAAO,GAAG,KAAK;AAC7B,QAAI,CAAC,OAAO,SAAS,KAAK,EAAG;AAE7B,UAAM,UAAU,KAAK,IAAI,KAAK,KAAK,IAAI,KAAK,KAAK,CAAC;AAClD,UAAM,eAAe,QAAQ,MAAM,KAAK,UAAU,QAAQ,MAAM;AAChE,aAAS,eAAe;AAAA,EAC1B;AACA,SAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AACvC;","names":[]}
@@ -0,0 +1,44 @@
1
+ import {
2
+ CATEGORIES,
3
+ PEERBENCH_NAMESPACE
4
+ } from "./chunk-UHHHSYVE.js";
5
+ import {
6
+ buildSchemaDefiner
7
+ } from "./chunk-OQE6TQXZ.js";
8
+ import {
9
+ IdSchema
10
+ } from "./chunk-NUEOE3K5.js";
11
+
12
+ // src/schemas/llm/system-prompt.ts
13
+ import { z } from "zod";
14
+ var BaseSystemPromptSchemaV1 = z.object({
15
+ id: IdSchema,
16
+ namespace: z.string(),
17
+ kind: z.string(),
18
+ schemaVersion: z.number(),
19
+ version: z.number(),
20
+ metadata: z.record(z.string(), z.unknown()).optional()
21
+ });
22
+ var defineSystemPromptSchema = buildSchemaDefiner(
23
+ BaseSystemPromptSchemaV1,
24
+ "sys-prompt"
25
+ );
26
+
27
+ // src/schemas/llm/simple-system-prompt.ts
28
+ import { z as z2 } from "zod";
29
+ var SimpleSystemPromptSchemaV1 = defineSystemPromptSchema({
30
+ baseSchema: BaseSystemPromptSchemaV1,
31
+ namespace: PEERBENCH_NAMESPACE,
32
+ kind: `${CATEGORIES.LLM}/simple`,
33
+ schemaVersion: 1,
34
+ fields: {
35
+ content: z2.string()
36
+ }
37
+ });
38
+
39
+ export {
40
+ BaseSystemPromptSchemaV1,
41
+ defineSystemPromptSchema,
42
+ SimpleSystemPromptSchemaV1
43
+ };
44
+ //# sourceMappingURL=chunk-ZXTQJFGL.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/schemas/llm/system-prompt.ts","../src/schemas/llm/simple-system-prompt.ts"],"sourcesContent":["import { IdSchema } from \"../id\";\nimport { z } from \"zod\";\nimport { buildSchemaDefiner } from \"../schema-definer\";\n\nexport const BaseSystemPromptSchemaV1 = z.object({\n id: IdSchema,\n namespace: z.string(),\n kind: z.string(),\n schemaVersion: z.number(),\n version: z.number(),\n metadata: z.record(z.string(), z.unknown()).optional(),\n});\nexport type BaseSystemPromptV1 = z.infer<typeof BaseSystemPromptSchemaV1>;\n\nexport const defineSystemPromptSchema = buildSchemaDefiner(\n BaseSystemPromptSchemaV1,\n \"sys-prompt\"\n);\n","import { z } from \"zod\";\nimport { defineSystemPromptSchema } from \"./system-prompt\";\nimport { BaseSystemPromptSchemaV1 } from \"./system-prompt\";\nimport { CATEGORIES, PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport const SimpleSystemPromptSchemaV1 = defineSystemPromptSchema({\n baseSchema: BaseSystemPromptSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: `${CATEGORIES.LLM}/simple`,\n schemaVersion: 1,\n fields: {\n content: z.string(),\n },\n});\nexport type SimpleSystemPromptV1 = z.infer<typeof SimpleSystemPromptSchemaV1>;\n"],"mappings":";;;;;;;;;;;;AACA,SAAS,SAAS;AAGX,IAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,IAAI;AAAA,EACJ,WAAW,EAAE,OAAO;AAAA,EACpB,MAAM,EAAE,OAAO;AAAA,EACf,eAAe,EAAE,OAAO;AAAA,EACxB,SAAS,EAAE,OAAO;AAAA,EAClB,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC;AAGM,IAAM,2BAA2B;AAAA,EACtC;AAAA,EACA;AACF;;;ACjBA,SAAS,KAAAA,UAAS;AAKX,IAAM,6BAA6B,yBAAyB;AAAA,EACjE,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM,GAAG,WAAW,GAAG;AAAA,EACvB,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,SAASC,GAAE,OAAO;AAAA,EACpB;AACF,CAAC;","names":["z","z"]}
@@ -0,0 +1,27 @@
1
+ import { A as AbstractProvider } from './provider-BDjGp2y-.js';
2
+ import z__default, { z } from 'zod';
3
+ import { A as AbstractScorer } from './abstract-Dec9Sc5O.js';
4
+
5
+ declare const IdSchema: z__default.ZodString;
6
+
7
+ type ClassConstructor<T> = new (...args: any[]) => T;
8
+ type AbstractClassConstructor<T> = abstract new (...args: any[]) => T;
9
+ type InferExtension<TExtension extends z.ZodRawShape, TBase = unknown> = z.infer<z.ZodObject<TExtension>> & TBase;
10
+ type WidenLiteralSchema<S> = S extends z.ZodString ? z.ZodString | z.ZodLiteral<string> : S extends z.ZodNumber ? z.ZodNumber | z.ZodLiteral<number> : S extends z.ZodBoolean ? z.ZodBoolean | z.ZodLiteral<boolean> : S extends z.ZodLiteral<infer L> ? L extends string ? z.ZodString | z.ZodLiteral<L> : L extends number ? z.ZodNumber | z.ZodLiteral<L> : L extends boolean ? z.ZodBoolean | z.ZodLiteral<L> : S : S extends z.ZodOptional<infer O> ? z.ZodOptional<WidenLiteralSchema<O>> : S extends z.ZodType ? S : never;
11
+ type WidenZodObject<T extends z.ZodObject> = z.ZodObject<{
12
+ [K in keyof T["shape"]]: WidenLiteralSchema<T["shape"][K]>;
13
+ }>;
14
+
15
+ type Id = z__default.infer<typeof IdSchema>;
16
+ type IdGenerator<TInput = unknown> = (input: TInput) => MaybePromise<Id>;
17
+ type MaybePromise<T> = T | Promise<T>;
18
+ declare const ScoringMethod: {
19
+ readonly ai: "ai";
20
+ readonly human: "human";
21
+ readonly algo: "algo";
22
+ };
23
+ type ScoringMethod = (typeof ScoringMethod)[keyof typeof ScoringMethod];
24
+ type ScorerCtor = ClassConstructor<AbstractScorer> | AbstractClassConstructor<AbstractScorer>;
25
+ type ProviderCtor = ClassConstructor<AbstractProvider> | AbstractClassConstructor<AbstractProvider>;
26
+
27
+ export { type AbstractClassConstructor as A, type ClassConstructor as C, type IdGenerator as I, type MaybePromise as M, type ProviderCtor as P, type ScorerCtor as S, type WidenZodObject as W, type InferExtension as a, type Id as b, ScoringMethod as c, IdSchema as d };
package/dist/index.d.ts CHANGED
@@ -1,15 +1,9 @@
1
- import { I as IdGenerator } from './index-WiPjF2AL.js';
2
- export { a as Id, M as MaybePromise, S as ScoringMethod } from './index-WiPjF2AL.js';
3
- export { A as AbstractDataLoader, a as AbstractLoader, e as LLMChatRunner, d as LLMChatRunnerParams, L as LoaderResult, R as Runner, b as RunnerParams, c as RunnerResult } from './data-BmN5WjZ4.js';
4
- export { A as AbstractFileLoader, a as AbstractHttpLoader, C as ClassConstructor } from './utilities-BrRH32rD.js';
5
- export { a as GenericJSONArrayDataLoader, G as GenericJSONArrayLoaderResult } from './generic-array-DLHWSvf1.js';
1
+ import { I as IdGenerator, P as ProviderCtor, S as ScorerCtor } from './index-BAioQhp2.js';
2
+ export { A as AbstractClassConstructor, C as ClassConstructor, b as Id, a as InferExtension, M as MaybePromise, c as ScoringMethod, W as WidenZodObject } from './index-BAioQhp2.js';
6
3
  export { R as RateLimiter, b as RateLimiterCallOptions, a as RateLimiterOptions } from './rate-limiter-CSmVIRsM.js';
7
- import 'zod';
8
- import './schemas/index.js';
9
- import './provider-BDjGp2y-.js';
10
- import './abstract-Dec9Sc5O.js';
11
- import './system-prompt--0FdPWqK.js';
12
- import 'node:fs';
4
+ import z__default from 'zod';
5
+ import { A as AbstractProvider } from './provider-BDjGp2y-.js';
6
+ import { A as AbstractScorer } from './abstract-Dec9Sc5O.js';
13
7
 
14
8
  declare function sleep(ms: number, signal?: AbortSignal): Promise<void>;
15
9
 
@@ -24,7 +18,7 @@ declare function parseResponseAsJSON<T>(response: string): T | undefined;
24
18
  /**
25
19
  * Converts the given byte array to a string
26
20
  */
27
- declare function bufferToString(buffer: Uint8Array): string;
21
+ declare function bufferToString(buffer: Uint8Array, encoding?: BufferEncoding): string;
28
22
  /**
29
23
  * Converts the given string to a byte array
30
24
  */
@@ -32,6 +26,21 @@ declare function stringToBuffer(str: string): Uint8Array;
32
26
 
33
27
  declare const idGeneratorUUIDv7: IdGenerator;
34
28
 
29
+ type Runner<TTestCase extends z__default.ZodObject, TResponse extends z__default.ZodObject, TScore extends z__default.ZodObject, TProvider extends AbstractProvider, TScorer extends AbstractScorer, TRunConfig extends Record<string, unknown>> = (params: {
30
+ testCase: z__default.infer<TTestCase>;
31
+ provider: TProvider;
32
+ scorer?: TScorer;
33
+ runConfig: TRunConfig;
34
+ idGenerators?: {
35
+ response?: IdGenerator;
36
+ score?: IdGenerator;
37
+ };
38
+ }) => Promise<{
39
+ response: z__default.infer<TResponse>;
40
+ score?: z__default.infer<TScore>;
41
+ }>;
42
+ type InferRunConfig<TRunConfigSchema extends z__default.ZodRawShape> = z__default.infer<z__default.ZodObject<TRunConfigSchema>>;
43
+
35
44
  declare class PeerbenchError extends Error {
36
45
  code: number;
37
46
  constructor(message?: string, options?: ErrorOptions & {
@@ -39,19 +48,35 @@ declare class PeerbenchError extends Error {
39
48
  });
40
49
  }
41
50
 
42
- declare class CatalogItemNotFoundError extends PeerbenchError {
43
- readonly itemName: string | {
44
- message: string;
45
- };
46
- constructor(itemName: string | {
47
- message: string;
48
- });
49
- }
50
- declare class CatalogItemHasNoInstantiateMethodError extends PeerbenchError {
51
- readonly itemName: string;
52
- constructor(itemName: string);
53
- }
54
-
55
51
  declare function captureStackTrace(error: Error, constructor: Function): void;
56
52
 
57
- export { CatalogItemHasNoInstantiateMethodError, CatalogItemNotFoundError, IdGenerator, PeerbenchError, bufferToString, captureStackTrace, idGeneratorUUIDv7, parseResponseAsJSON, sleep, stringToBuffer };
53
+ declare const PEERBENCH_NAMESPACE: "peerbench.ai";
54
+ declare const CATEGORIES: {
55
+ LLM: string;
56
+ };
57
+
58
+ declare function defineRunner<const TProviders extends ProviderCtor[], const TScorers extends ScorerCtor[], const TSchemaSets extends SchemaSetDefinition[], const TRunConfigSchema extends z__default.ZodRawShape = {}>(config: {
59
+ schemaSets: TSchemaSets;
60
+ providers: TProviders;
61
+ scorers: TScorers;
62
+ runConfigSchema?: TRunConfigSchema;
63
+ /**
64
+ * @default true
65
+ */
66
+ parseRunConfig?: boolean;
67
+ defaults?: {
68
+ scorer?: InstanceType<TScorers[number]>;
69
+ responseIdGenerator?: IdGenerator;
70
+ scoreIdGenerator?: IdGenerator;
71
+ };
72
+ }, fn: Runner<TSchemaSets[number]["testCase"], TSchemaSets[number]["response"], TSchemaSets[number]["score"], InstanceType<TProviders[number]>, InstanceType<TScorers[number]>, InferRunConfig<TRunConfigSchema>>): (params: Parameters<typeof fn>[0]) => Promise<{
73
+ response: z__default.core.output<TSchemaSets[number]["response"]>;
74
+ score?: z__default.core.output<TSchemaSets[number]["score"]> | undefined;
75
+ }>;
76
+ type SchemaSetDefinition<TTestCase extends z__default.ZodObject = z__default.ZodObject, TResponse extends z__default.ZodObject = z__default.ZodObject, TScore extends z__default.ZodObject = z__default.ZodObject> = {
77
+ testCase: TTestCase;
78
+ response: TResponse;
79
+ score: TScore;
80
+ };
81
+
82
+ export { CATEGORIES, IdGenerator, type InferRunConfig, PEERBENCH_NAMESPACE, PeerbenchError, ProviderCtor, type Runner, ScorerCtor, bufferToString, captureStackTrace, defineRunner, idGeneratorUUIDv7, parseResponseAsJSON, sleep, stringToBuffer };