peerbench 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -58
- package/dist/benchmarks/examples/echo-basic/runner.d.ts +11 -254
- package/dist/benchmarks/examples/echo-basic/schema-sets/echo.v1.d.ts +25 -25
- package/dist/benchmarks/examples/exact-match-scorer/runner.d.ts +38 -386
- package/dist/benchmarks/examples/exact-match-scorer/schema-sets/exact-match.v1.d.ts +25 -25
- package/dist/benchmarks/examples/text-transform/runner.d.ts +32 -480
- package/dist/benchmarks/examples/text-transform/schema-sets/echo.v1.d.ts +25 -25
- package/dist/benchmarks/examples/text-transform/schema-sets/reverse.v1.d.ts +25 -25
- package/dist/benchmarks/index.js +180 -248
- package/dist/benchmarks/index.js.map +1 -1
- package/dist/benchmarks/peerbench/index.d.ts +2 -1
- package/dist/benchmarks/peerbench/mcq-runner.d.ts +78 -0
- package/dist/benchmarks/peerbench/qa-runner.d.ts +77 -0
- package/dist/benchmarks/peerbench/schema-sets/mcq.v1.d.ts +25 -25
- package/dist/benchmarks/peerbench/schema-sets/multi-turn.v1.d.ts +25 -25
- package/dist/benchmarks/peerbench/schema-sets/qa.v1.d.ts +25 -25
- package/dist/chunk-6WDCU5BP.js +9 -0
- package/dist/chunk-6WDCU5BP.js.map +1 -0
- package/dist/{chunk-YY33MNMV.js → chunk-7KMGLEYP.js} +2 -2
- package/dist/{chunk-TRNCF2BG.js → chunk-HBGC6BDW.js} +1 -1
- package/dist/chunk-HBGC6BDW.js.map +1 -0
- package/dist/{chunk-HMQYGCKI.js → chunk-ZJWSK4VO.js} +1 -1
- package/dist/chunk-ZJWSK4VO.js.map +1 -0
- package/dist/dev.d.ts +22 -0
- package/dist/helpers/define-runner.d.ts +2 -45
- package/dist/index.js +2 -2
- package/dist/providers/ai-sdk.d.ts +24 -0
- package/dist/providers/callables/callable.d.ts +4 -0
- package/dist/providers/callables/llm.d.ts +41 -0
- package/dist/providers/example/echo.d.ts +12 -11
- package/dist/providers/example/restapi.d.ts +11 -18
- package/dist/providers/index.d.ts +4 -2
- package/dist/providers/index.js +380 -9
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/mastra.d.ts +16 -21
- package/dist/providers/openai.d.ts +25 -10
- package/dist/providers/openrouter.d.ts +6 -8
- package/dist/schemas/extensions/index.js +1 -1
- package/dist/schemas/extensions/response/llm.d.ts +17 -0
- package/dist/schemas/index.js +2 -2
- package/dist/schemas/llm/index.js +36 -7
- package/dist/schemas/llm/index.js.map +1 -1
- package/dist/schemas/llm/simple-system-prompt.d.ts +3 -3
- package/dist/schemas/llm/system-prompt.d.ts +7 -7
- package/dist/schemas/response.d.ts +7 -7
- package/dist/schemas/schema-definer.d.ts +5 -5
- package/dist/schemas/score.d.ts +7 -7
- package/dist/schemas/test-case.d.ts +7 -7
- package/dist/scorers/abstract.d.ts +1 -1
- package/dist/scorers/index.js +377 -7
- package/dist/scorers/index.js.map +1 -1
- package/dist/scorers/llm-judge.d.ts +6 -6
- package/dist/types/index.d.ts +0 -5
- package/dist/types/runner.d.ts +13 -17
- package/package.json +8 -7
- package/dist/benchmarks/peerbench/runner.d.ts +0 -754
- package/dist/chunk-3JHDJEY3.js +0 -374
- package/dist/chunk-3JHDJEY3.js.map +0 -1
- package/dist/chunk-HMQYGCKI.js.map +0 -1
- package/dist/chunk-Q6GSOHOP.js +0 -44
- package/dist/chunk-Q6GSOHOP.js.map +0 -1
- package/dist/chunk-RTEAK4II.js +0 -37
- package/dist/chunk-RTEAK4II.js.map +0 -1
- package/dist/chunk-SMLNDQFX.js +0 -244
- package/dist/chunk-SMLNDQFX.js.map +0 -1
- package/dist/chunk-TRNCF2BG.js.map +0 -1
- package/dist/providers/abstract/llm.d.ts +0 -20
- /package/dist/{chunk-YY33MNMV.js.map → chunk-7KMGLEYP.js.map} +0 -0
- /package/dist/providers/{abstract/provider.d.ts → abstract.d.ts} +0 -0
package/dist/benchmarks/index.js
CHANGED
|
@@ -1,12 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
SimpleSystemPromptSchemaV1
|
|
3
|
-
} from "../chunk-Q6GSOHOP.js";
|
|
4
1
|
import {
|
|
5
2
|
defineRunner
|
|
6
|
-
} from "../chunk-
|
|
7
|
-
import {
|
|
8
|
-
AbstractLLMProvider
|
|
9
|
-
} from "../chunk-SMLNDQFX.js";
|
|
3
|
+
} from "../chunk-6WDCU5BP.js";
|
|
10
4
|
import {
|
|
11
5
|
BaseResponseSchemaV1,
|
|
12
6
|
BaseScoreSchemaV1,
|
|
@@ -14,28 +8,24 @@ import {
|
|
|
14
8
|
defineResponseSchema,
|
|
15
9
|
defineScoreSchema,
|
|
16
10
|
defineTestCaseSchema
|
|
17
|
-
} from "../chunk-
|
|
11
|
+
} from "../chunk-7KMGLEYP.js";
|
|
18
12
|
import "../chunk-OQE6TQXZ.js";
|
|
19
13
|
import {
|
|
20
14
|
ScoringMethod
|
|
21
|
-
} from "../chunk-
|
|
22
|
-
import {
|
|
23
|
-
JSONFileStorage
|
|
24
|
-
} from "../chunk-WBCMV445.js";
|
|
25
|
-
import {
|
|
26
|
-
LLMAsAJudgeScorer,
|
|
27
|
-
MCQScorer
|
|
28
|
-
} from "../chunk-3JHDJEY3.js";
|
|
15
|
+
} from "../chunk-ZJWSK4VO.js";
|
|
29
16
|
import {
|
|
30
17
|
PEERBENCH_NAMESPACE
|
|
31
18
|
} from "../chunk-UHHHSYVE.js";
|
|
19
|
+
import {
|
|
20
|
+
JSONFileStorage
|
|
21
|
+
} from "../chunk-WBCMV445.js";
|
|
32
22
|
import {
|
|
33
23
|
idGeneratorUUIDv7
|
|
34
24
|
} from "../chunk-4UBK6452.js";
|
|
35
25
|
import {
|
|
36
26
|
ExtensionLLMAsAJudgeScoreFieldsV1,
|
|
37
27
|
ExtensionLLMResponseFieldsV1
|
|
38
|
-
} from "../chunk-
|
|
28
|
+
} from "../chunk-HBGC6BDW.js";
|
|
39
29
|
import "../chunk-NUEOE3K5.js";
|
|
40
30
|
import {
|
|
41
31
|
__export
|
|
@@ -57,7 +47,8 @@ __export(peerbench_exports, {
|
|
|
57
47
|
QAResponseSchemaV1: () => QAResponseSchemaV1,
|
|
58
48
|
QAScoreSchemaV1: () => QAScoreSchemaV1,
|
|
59
49
|
QATestCaseSchemaV1: () => QATestCaseSchemaV1,
|
|
60
|
-
|
|
50
|
+
mcqRunner: () => mcqRunner,
|
|
51
|
+
qaRunner: () => qaRunner
|
|
61
52
|
});
|
|
62
53
|
|
|
63
54
|
// src/benchmarks/peerbench/schema-sets/mcq.v1.ts
|
|
@@ -179,254 +170,112 @@ var QAScoreSchemaV1 = defineScoreSchema({
|
|
|
179
170
|
}
|
|
180
171
|
});
|
|
181
172
|
|
|
182
|
-
// src/benchmarks/peerbench/runner.ts
|
|
173
|
+
// src/benchmarks/peerbench/mcq-runner.ts
|
|
183
174
|
import Handlebars from "handlebars";
|
|
184
175
|
import z4 from "zod";
|
|
185
|
-
var
|
|
186
|
-
{
|
|
187
|
-
schemaSets: [
|
|
188
|
-
{
|
|
189
|
-
testCase: MCQTestCaseSchemaV1,
|
|
190
|
-
response: MCQResponseSchemaV1,
|
|
191
|
-
score: MCQScoreSchemaV1
|
|
192
|
-
},
|
|
193
|
-
{
|
|
194
|
-
testCase: QATestCaseSchemaV1,
|
|
195
|
-
response: QAResponseSchemaV1,
|
|
196
|
-
score: QAScoreSchemaV1
|
|
197
|
-
}
|
|
198
|
-
],
|
|
199
|
-
providers: [AbstractLLMProvider],
|
|
200
|
-
scorers: [LLMAsAJudgeScorer, MCQScorer],
|
|
201
|
-
runConfigSchema: {
|
|
202
|
-
model: z4.string(),
|
|
203
|
-
llmJudgeModel: z4.string().optional(),
|
|
204
|
-
llmJudgeSystemPrompt: SimpleSystemPromptSchemaV1.optional(),
|
|
205
|
-
llmJudgeFieldsToExtract: z4.record(z4.string(), z4.custom()).optional(),
|
|
206
|
-
systemPrompt: SimpleSystemPromptSchemaV1.optional(),
|
|
207
|
-
templateVariables: z4.record(z4.string(), z4.string()).optional()
|
|
208
|
-
}
|
|
209
|
-
},
|
|
176
|
+
var mcqRunner = defineRunner(
|
|
210
177
|
async (params) => {
|
|
211
|
-
const { testCase,
|
|
178
|
+
const { testCase, target, scorer } = params;
|
|
212
179
|
const messages = [];
|
|
213
|
-
if (
|
|
180
|
+
if (params.systemPrompt) {
|
|
214
181
|
messages.push({
|
|
215
182
|
role: "system",
|
|
216
|
-
content:
|
|
183
|
+
content: params.systemPrompt.content
|
|
217
184
|
});
|
|
218
185
|
}
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
186
|
+
messages.push({
|
|
187
|
+
role: "user",
|
|
188
|
+
content: formatMCQ(testCase)
|
|
189
|
+
});
|
|
190
|
+
templateMessages(messages, params.templateVariables ?? {});
|
|
191
|
+
const providerResponse = await target.forward({ messages });
|
|
192
|
+
const response = await MCQResponseSchemaV1.newWithId(
|
|
193
|
+
{
|
|
194
|
+
data: providerResponse.data,
|
|
195
|
+
startedAt: providerResponse.startedAt,
|
|
196
|
+
completedAt: providerResponse.completedAt,
|
|
197
|
+
testCaseId: testCase.id,
|
|
198
|
+
modelSlug: target.slug,
|
|
199
|
+
provider: target.provider.kind,
|
|
200
|
+
systemPromptId: params.systemPrompt?.id,
|
|
201
|
+
inputTokensUsed: providerResponse.inputTokensUsed,
|
|
202
|
+
outputTokensUsed: providerResponse.outputTokensUsed,
|
|
203
|
+
inputCost: providerResponse.inputCost,
|
|
204
|
+
outputCost: providerResponse.outputCost
|
|
205
|
+
},
|
|
206
|
+
params.idGenerators?.response ?? idGeneratorUUIDv7
|
|
207
|
+
);
|
|
208
|
+
if (scorer?.kind === `${PEERBENCH_NAMESPACE}/mcq`) {
|
|
209
|
+
const scorerResult = await scorer.score({
|
|
210
|
+
response: response.data,
|
|
211
|
+
choices: testCase.options,
|
|
212
|
+
correctAnswers: testCase.correctAnswerKeys
|
|
235
213
|
});
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
214
|
+
if (scorerResult !== null) {
|
|
215
|
+
const score = await MCQScoreSchemaV1.newWithId(
|
|
216
|
+
{
|
|
217
|
+
scoringMethod: ScoringMethod.algo,
|
|
218
|
+
value: scorerResult.value,
|
|
219
|
+
responseId: response.id,
|
|
220
|
+
extractedAnswers: scorerResult.extractedAnswers,
|
|
221
|
+
explanation: scorerResult.explanation,
|
|
222
|
+
metadata: scorerResult.metadata
|
|
223
|
+
},
|
|
224
|
+
params.idGenerators?.score ?? idGeneratorUUIDv7
|
|
241
225
|
);
|
|
226
|
+
return { response, score };
|
|
242
227
|
}
|
|
243
|
-
messages.push({
|
|
244
|
-
role: "user",
|
|
245
|
-
content: testCase.question
|
|
246
|
-
});
|
|
247
|
-
templateMessages(messages, runConfig.templateVariables ?? {});
|
|
248
|
-
return runQA({
|
|
249
|
-
testCase,
|
|
250
|
-
messages,
|
|
251
|
-
provider,
|
|
252
|
-
scorer,
|
|
253
|
-
runConfig,
|
|
254
|
-
idGenerators: {
|
|
255
|
-
response: params.idGenerators?.response ?? idGeneratorUUIDv7,
|
|
256
|
-
score: params.idGenerators?.score ?? idGeneratorUUIDv7
|
|
257
|
-
}
|
|
258
|
-
});
|
|
259
|
-
}
|
|
260
|
-
throw new Error("Unsupported test case kind");
|
|
261
|
-
}
|
|
262
|
-
);
|
|
263
|
-
async function runQA(params) {
|
|
264
|
-
const { messages, testCase, provider, scorer, runConfig } = params;
|
|
265
|
-
const providerResponse = await provider.forward({
|
|
266
|
-
model: runConfig.model,
|
|
267
|
-
messages
|
|
268
|
-
});
|
|
269
|
-
const response = await QAResponseSchemaV1.newWithId(
|
|
270
|
-
{
|
|
271
|
-
data: providerResponse.data,
|
|
272
|
-
startedAt: providerResponse.startedAt,
|
|
273
|
-
completedAt: providerResponse.completedAt,
|
|
274
|
-
testCaseId: testCase.id,
|
|
275
|
-
modelSlug: runConfig.model,
|
|
276
|
-
provider: provider.kind,
|
|
277
|
-
systemPromptId: runConfig.systemPrompt?.id,
|
|
278
|
-
inputTokensUsed: providerResponse.inputTokensUsed,
|
|
279
|
-
outputTokensUsed: providerResponse.outputTokensUsed,
|
|
280
|
-
inputCost: providerResponse.inputCost,
|
|
281
|
-
outputCost: providerResponse.outputCost
|
|
282
|
-
},
|
|
283
|
-
params.idGenerators?.response ?? idGeneratorUUIDv7
|
|
284
|
-
);
|
|
285
|
-
if (scorer?.kind === `${PEERBENCH_NAMESPACE}/llm-as-a-judge`) {
|
|
286
|
-
if (!runConfig.llmJudgeModel) {
|
|
287
|
-
throw new Error(
|
|
288
|
-
"LLM judge model is required when using LLM as a judge scorer"
|
|
289
|
-
);
|
|
290
228
|
}
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
{
|
|
299
|
-
id: "correctness",
|
|
300
|
-
description: "Is the response matches with the expected/valid answers in terms of meaning?",
|
|
301
|
-
weight: 1
|
|
302
|
-
}
|
|
303
|
-
],
|
|
304
|
-
fieldsToExtract: runConfig.llmJudgeFieldsToExtract ?? {}
|
|
305
|
-
});
|
|
306
|
-
if (scorerResult !== null) {
|
|
307
|
-
const score = await QAScoreSchemaV1.newWithId(
|
|
308
|
-
{
|
|
309
|
-
scoringMethod: ScoringMethod.ai,
|
|
310
|
-
value: scorerResult.value,
|
|
311
|
-
responseId: response.id,
|
|
312
|
-
explanation: scorerResult.explanation,
|
|
313
|
-
scorerAIInputCost: scorerResult.inputCost,
|
|
314
|
-
scorerAIOutputCost: scorerResult.outputCost,
|
|
315
|
-
scorerAIInputTokensUsed: scorerResult.inputTokensUsed,
|
|
316
|
-
scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,
|
|
317
|
-
scorerAIProvider: scorerResult.provider,
|
|
318
|
-
scorerAIModelSlug: runConfig.llmJudgeModel,
|
|
319
|
-
scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,
|
|
320
|
-
metadata: {
|
|
321
|
-
...scorerResult.metadata,
|
|
322
|
-
extractedFields: scorerResult.extractedFields
|
|
229
|
+
if (scorer?.kind === `${PEERBENCH_NAMESPACE}/llm-as-a-judge`) {
|
|
230
|
+
const scorerResult = await scorer.score({
|
|
231
|
+
criteria: [
|
|
232
|
+
{
|
|
233
|
+
id: "correctness",
|
|
234
|
+
description: "Is the given answer key matches with one of the correct answer keys?",
|
|
235
|
+
weight: 1
|
|
323
236
|
}
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
);
|
|
327
|
-
return { response, score };
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
return { response };
|
|
331
|
-
}
|
|
332
|
-
async function runMCQ(params) {
|
|
333
|
-
const { messages, testCase, provider, scorer, runConfig } = params;
|
|
334
|
-
const providerResponse = await provider.forward({
|
|
335
|
-
model: runConfig.model,
|
|
336
|
-
messages
|
|
337
|
-
});
|
|
338
|
-
const response = await MCQResponseSchemaV1.newWithId(
|
|
339
|
-
{
|
|
340
|
-
data: providerResponse.data,
|
|
341
|
-
startedAt: providerResponse.startedAt,
|
|
342
|
-
completedAt: providerResponse.completedAt,
|
|
343
|
-
testCaseId: testCase.id,
|
|
344
|
-
modelSlug: runConfig.model,
|
|
345
|
-
provider: provider.kind,
|
|
346
|
-
systemPromptId: runConfig.systemPrompt?.id,
|
|
347
|
-
inputTokensUsed: providerResponse.inputTokensUsed,
|
|
348
|
-
outputTokensUsed: providerResponse.outputTokensUsed,
|
|
349
|
-
inputCost: providerResponse.inputCost,
|
|
350
|
-
outputCost: providerResponse.outputCost
|
|
351
|
-
},
|
|
352
|
-
params.idGenerators?.response ?? idGeneratorUUIDv7
|
|
353
|
-
);
|
|
354
|
-
if (scorer?.kind === `${PEERBENCH_NAMESPACE}/mcq`) {
|
|
355
|
-
const scorerResult = await scorer.score({
|
|
356
|
-
response: response.data,
|
|
357
|
-
choices: testCase.options,
|
|
358
|
-
correctAnswers: testCase.correctAnswerKeys
|
|
359
|
-
});
|
|
360
|
-
if (scorerResult !== null) {
|
|
361
|
-
const score = await MCQScoreSchemaV1.newWithId(
|
|
362
|
-
{
|
|
363
|
-
scoringMethod: ScoringMethod.algo,
|
|
364
|
-
value: scorerResult.value,
|
|
365
|
-
responseId: response.id,
|
|
366
|
-
extractedAnswers: scorerResult.extractedAnswers,
|
|
367
|
-
explanation: scorerResult.explanation,
|
|
368
|
-
metadata: scorerResult.metadata
|
|
369
|
-
},
|
|
370
|
-
params.idGenerators?.score ?? idGeneratorUUIDv7
|
|
371
|
-
);
|
|
372
|
-
return { response, score };
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
if (scorer?.kind === `${PEERBENCH_NAMESPACE}/llm-as-a-judge`) {
|
|
376
|
-
if (!runConfig.llmJudgeModel) {
|
|
377
|
-
throw new Error(
|
|
378
|
-
"LLM judge model is required when using LLM as a judge scorer"
|
|
379
|
-
);
|
|
380
|
-
}
|
|
381
|
-
const scorerResult = await scorer.score({
|
|
382
|
-
model: runConfig.llmJudgeModel,
|
|
383
|
-
criteria: [
|
|
384
|
-
{
|
|
385
|
-
id: "correctness",
|
|
386
|
-
description: "Is the given answer key matches with one of the correct answer keys?",
|
|
387
|
-
weight: 1
|
|
388
|
-
}
|
|
389
|
-
],
|
|
390
|
-
rubric: `Answer text itself or the key (A, B, C) is accepted
|
|
237
|
+
],
|
|
238
|
+
rubric: `Answer text itself or the key (A, B, C) is accepted
|
|
391
239
|
Valid answer keys: ${testCase.correctAnswerKeys.map((key) => `- ${key}`).join("\n")}
|
|
392
240
|
Valid Answer texts: ${testCase.correctAnswerKeys.map((key) => `- ${testCase.options?.[key] ?? ""}`).join("\n")}`,
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
},
|
|
399
|
-
response: response.data,
|
|
400
|
-
systemPrompt: runConfig.llmJudgeSystemPrompt?.content
|
|
401
|
-
});
|
|
402
|
-
if (scorerResult !== null) {
|
|
403
|
-
const { extractedAnswers, ...extractedFields } = scorerResult.extractedFields;
|
|
404
|
-
const score = await MCQScoreSchemaV1.newWithId(
|
|
405
|
-
{
|
|
406
|
-
scoringMethod: ScoringMethod.ai,
|
|
407
|
-
value: scorerResult.value,
|
|
408
|
-
extractedAnswers,
|
|
409
|
-
responseId: response.id,
|
|
410
|
-
explanation: scorerResult.explanation,
|
|
411
|
-
scorerAIInputCost: scorerResult.inputCost,
|
|
412
|
-
scorerAIOutputCost: scorerResult.outputCost,
|
|
413
|
-
scorerAIInputTokensUsed: scorerResult.inputTokensUsed,
|
|
414
|
-
scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,
|
|
415
|
-
scorerAIProvider: scorerResult.provider,
|
|
416
|
-
scorerAIModelSlug: runConfig.llmJudgeModel,
|
|
417
|
-
scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,
|
|
418
|
-
metadata: {
|
|
419
|
-
...scorerResult.metadata,
|
|
420
|
-
extractedFields
|
|
421
|
-
}
|
|
241
|
+
fieldsToExtract: {
|
|
242
|
+
extractedAnswers: z4.string().array().describe(
|
|
243
|
+
"The extracted answer keys, valid or invalid (even if the answer text is provided rather than the key)"
|
|
244
|
+
),
|
|
245
|
+
...params.llmJudgeFieldsToExtract ?? {}
|
|
422
246
|
},
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
247
|
+
response: response.data,
|
|
248
|
+
systemPrompt: params.llmJudgeSystemPrompt?.content
|
|
249
|
+
});
|
|
250
|
+
if (scorerResult !== null) {
|
|
251
|
+
const { extractedAnswers, ...extractedFields } = scorerResult.extractedFields;
|
|
252
|
+
const score = await MCQScoreSchemaV1.newWithId(
|
|
253
|
+
{
|
|
254
|
+
scoringMethod: ScoringMethod.ai,
|
|
255
|
+
value: scorerResult.value,
|
|
256
|
+
extractedAnswers,
|
|
257
|
+
responseId: response.id,
|
|
258
|
+
explanation: scorerResult.explanation,
|
|
259
|
+
scorerAIInputCost: scorerResult.inputCost,
|
|
260
|
+
scorerAIOutputCost: scorerResult.outputCost,
|
|
261
|
+
scorerAIInputTokensUsed: scorerResult.inputTokensUsed,
|
|
262
|
+
scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,
|
|
263
|
+
scorerAIProvider: scorerResult.provider,
|
|
264
|
+
scorerAIModelSlug: scorerResult.modelSlug,
|
|
265
|
+
scorerAISystemPromptId: params.llmJudgeSystemPrompt?.id,
|
|
266
|
+
metadata: {
|
|
267
|
+
...scorerResult.metadata,
|
|
268
|
+
extractedFields
|
|
269
|
+
}
|
|
270
|
+
},
|
|
271
|
+
params.idGenerators?.score ?? idGeneratorUUIDv7
|
|
272
|
+
);
|
|
273
|
+
return { response, score };
|
|
274
|
+
}
|
|
426
275
|
}
|
|
276
|
+
return { response };
|
|
427
277
|
}
|
|
428
|
-
|
|
429
|
-
}
|
|
278
|
+
);
|
|
430
279
|
function formatMCQ(testCase) {
|
|
431
280
|
return `Question: ${testCase.question}
|
|
432
281
|
Options:
|
|
@@ -441,6 +290,89 @@ function templateMessages(messages, templateVariables) {
|
|
|
441
290
|
}
|
|
442
291
|
}
|
|
443
292
|
|
|
293
|
+
// src/benchmarks/peerbench/qa-runner.ts
|
|
294
|
+
import Handlebars2 from "handlebars";
|
|
295
|
+
var qaRunner = defineRunner(
|
|
296
|
+
async (params) => {
|
|
297
|
+
const { testCase, target, scorer } = params;
|
|
298
|
+
const messages = [];
|
|
299
|
+
if (params.systemPrompt) {
|
|
300
|
+
messages.push({
|
|
301
|
+
role: "system",
|
|
302
|
+
content: params.systemPrompt.content
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
messages.push({
|
|
306
|
+
role: "user",
|
|
307
|
+
content: testCase.question
|
|
308
|
+
});
|
|
309
|
+
templateMessages2(messages, params.templateVariables ?? {});
|
|
310
|
+
const providerResponse = await target.forward({ messages });
|
|
311
|
+
const response = await QAResponseSchemaV1.newWithId(
|
|
312
|
+
{
|
|
313
|
+
data: providerResponse.data,
|
|
314
|
+
startedAt: providerResponse.startedAt,
|
|
315
|
+
completedAt: providerResponse.completedAt,
|
|
316
|
+
testCaseId: testCase.id,
|
|
317
|
+
modelSlug: target.slug,
|
|
318
|
+
provider: target.provider.kind,
|
|
319
|
+
systemPromptId: params.systemPrompt?.id,
|
|
320
|
+
inputTokensUsed: providerResponse.inputTokensUsed,
|
|
321
|
+
outputTokensUsed: providerResponse.outputTokensUsed,
|
|
322
|
+
inputCost: providerResponse.inputCost,
|
|
323
|
+
outputCost: providerResponse.outputCost
|
|
324
|
+
},
|
|
325
|
+
params.idGenerators?.response ?? idGeneratorUUIDv7
|
|
326
|
+
);
|
|
327
|
+
if (scorer?.kind === `${PEERBENCH_NAMESPACE}/llm-as-a-judge`) {
|
|
328
|
+
const scorerResult = await scorer.score({
|
|
329
|
+
response: response.data,
|
|
330
|
+
rubric: `Expected/Valid answers: ${testCase.goodAnswers.join("\n")}
|
|
331
|
+
Invalid answers: ${testCase.badAnswers.join("\n")}`,
|
|
332
|
+
systemPrompt: params.llmJudgeSystemPrompt?.content,
|
|
333
|
+
criteria: [
|
|
334
|
+
{
|
|
335
|
+
id: "correctness",
|
|
336
|
+
description: "Is the response matches with the expected/valid answers in terms of meaning?",
|
|
337
|
+
weight: 1
|
|
338
|
+
}
|
|
339
|
+
],
|
|
340
|
+
fieldsToExtract: params.llmJudgeFieldsToExtract ?? {}
|
|
341
|
+
});
|
|
342
|
+
if (scorerResult !== null) {
|
|
343
|
+
const score = await QAScoreSchemaV1.newWithId(
|
|
344
|
+
{
|
|
345
|
+
scoringMethod: ScoringMethod.ai,
|
|
346
|
+
value: scorerResult.value,
|
|
347
|
+
responseId: response.id,
|
|
348
|
+
explanation: scorerResult.explanation,
|
|
349
|
+
scorerAIInputCost: scorerResult.inputCost,
|
|
350
|
+
scorerAIOutputCost: scorerResult.outputCost,
|
|
351
|
+
scorerAIInputTokensUsed: scorerResult.inputTokensUsed,
|
|
352
|
+
scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,
|
|
353
|
+
scorerAIProvider: scorerResult.provider,
|
|
354
|
+
scorerAIModelSlug: scorerResult.modelSlug,
|
|
355
|
+
scorerAISystemPromptId: params.llmJudgeSystemPrompt?.id,
|
|
356
|
+
metadata: {
|
|
357
|
+
...scorerResult.metadata,
|
|
358
|
+
extractedFields: scorerResult.extractedFields
|
|
359
|
+
}
|
|
360
|
+
},
|
|
361
|
+
params.idGenerators?.score ?? idGeneratorUUIDv7
|
|
362
|
+
);
|
|
363
|
+
return { response, score };
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
return { response };
|
|
367
|
+
}
|
|
368
|
+
);
|
|
369
|
+
function templateMessages2(messages, templateVariables) {
|
|
370
|
+
for (let i = 0; i < messages.length; i++) {
|
|
371
|
+
const template = Handlebars2.compile(messages[i].content);
|
|
372
|
+
messages[i].content = template(templateVariables);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
444
376
|
// src/benchmarks/peerbench/storages/json.ts
|
|
445
377
|
import z5 from "zod";
|
|
446
378
|
var PeerbenchJSONStorage = class extends JSONFileStorage {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/benchmarks/peerbench/index.ts","../../src/benchmarks/peerbench/schema-sets/mcq.v1.ts","../../src/benchmarks/peerbench/schema-sets/multi-turn.v1.ts","../../src/benchmarks/peerbench/schema-sets/qa.v1.ts","../../src/benchmarks/peerbench/runner.ts","../../src/benchmarks/peerbench/storages/json.ts"],"sourcesContent":["export * from \"./schema-sets/mcq.v1\";\nexport * from \"./schema-sets/multi-turn.v1\";\nexport * from \"./schema-sets/qa.v1\";\n\nexport * from \"./runner\";\n\nexport * from \"./storages/json\";\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const MCQKind = `llm/mcq` as const;\n\nexport const MCQTestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n question: z.string(),\n options: z.record(z.string(), z.string()),\n correctAnswerKeys: z.string().array(),\n },\n});\nexport type MCQTestCaseV1 = z.infer<typeof MCQTestCaseSchemaV1>;\n\nexport const MCQResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n },\n});\nexport type MCQResponseV1 = z.infer<typeof MCQResponseSchemaV1>;\n\nexport const MCQScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n extractedAnswers: z.array(z.string()),\n },\n});\nexport type MCQScoreV1 = z.infer<typeof MCQScoreSchemaV1>;\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const MultiTurnKind = `llm/multi-turn` as const;\n\nexport const MultiTurnTestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n messages: z\n .object({\n role: z.string(),\n content: z.string(),\n goodAnswers: z.string().array().optional(),\n badAnswers: z.string().array().optional(),\n })\n .array(),\n\n maxTurns: z.number().optional(),\n expectedOutcome: z.string().optional(),\n },\n});\nexport type MultiTurnTestCaseV1 = z.infer<typeof MultiTurnTestCaseSchemaV1>;\n\nexport const MultiTurnResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n replies: z\n .object({\n messageIndex: z.number(),\n startedAt: z.number(),\n completedAt: z.number(),\n data: z.string(),\n\n inputTokensUsed: z.number().optional(),\n outputTokensUsed: z.number().optional(),\n inputCost: z.string().optional(),\n outputCost: z.string().optional(),\n })\n .array(),\n },\n});\nexport type MultiTurnResponseV1 = z.infer<typeof MultiTurnResponseSchemaV1>;\n\nexport const MultiTurnScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n individualScores: z\n .object({\n replyIndex: z.number(),\n value: z.number(),\n })\n .array(),\n },\n});\nexport type MultiTurnScoreV1 = z.infer<typeof MultiTurnScoreSchemaV1>;\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const QAKind = `llm/qa` as const;\n\nexport const QATestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n question: z.string(),\n goodAnswers: z.string().array(),\n badAnswers: z.string().array(),\n },\n});\nexport type QATestCaseV1 = z.infer<typeof QATestCaseSchemaV1>;\n\nexport const QAResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n },\n});\nexport type QAResponseV1 = z.infer<typeof QAResponseSchemaV1>;\n\nexport const QAScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n },\n});\nexport type QAScoreV1 = z.infer<typeof QAScoreSchemaV1>;\n","import { defineRunner } from \"@/helpers/define-runner\";\nimport { AbstractLLMProvider } from \"@/providers\";\nimport {\n SimpleSystemPromptSchemaV1,\n SimpleSystemPromptV1,\n} from \"@/schemas/llm\";\nimport { LLMAsAJudgeScorer, MCQScorer } from \"@/scorers\";\nimport { IdGenerator, ScoringMethod } from \"@/types\";\nimport { idGeneratorUUIDv7 } from \"@/utils\";\nimport { ChatCompletionMessageParam } from \"openai/resources/index\";\nimport Handlebars from \"handlebars\";\nimport z from \"zod\";\nimport {\n MCQResponseSchemaV1,\n MCQScoreSchemaV1,\n MCQTestCaseSchemaV1,\n MCQTestCaseV1,\n} from \"./schema-sets/mcq.v1\";\nimport {\n QAResponseSchemaV1,\n QAScoreSchemaV1,\n QATestCaseSchemaV1,\n QATestCaseV1,\n} from \"./schema-sets/qa.v1\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport const peerbenchRunner = defineRunner(\n {\n schemaSets: [\n {\n testCase: MCQTestCaseSchemaV1,\n response: MCQResponseSchemaV1,\n score: MCQScoreSchemaV1,\n },\n {\n testCase: QATestCaseSchemaV1,\n response: QAResponseSchemaV1,\n score: QAScoreSchemaV1,\n },\n ],\n providers: [AbstractLLMProvider],\n scorers: [LLMAsAJudgeScorer, MCQScorer],\n\n runConfigSchema: {\n model: z.string(),\n llmJudgeModel: z.string().optional(),\n llmJudgeSystemPrompt: SimpleSystemPromptSchemaV1.optional(),\n llmJudgeFieldsToExtract: z\n .record(z.string(), z.custom<z.ZodType>())\n .optional(),\n systemPrompt: SimpleSystemPromptSchemaV1.optional(),\n templateVariables: z.record(z.string(), z.string()).optional(),\n },\n },\n async (params) => {\n const { testCase, provider, scorer, runConfig } = params;\n const messages: ChatCompletionMessageParam[] = [];\n\n if (runConfig.systemPrompt) {\n messages.push({\n role: \"system\",\n content: runConfig.systemPrompt.content,\n });\n }\n\n if (testCase.kind === \"llm/mcq.tc\") {\n messages.push({\n role: \"user\",\n content: formatMCQ(testCase),\n });\n templateMessages(messages, runConfig.templateVariables ?? {});\n\n return runMCQ({\n testCase,\n messages,\n provider,\n scorer,\n runConfig,\n idGenerators: {\n response: params.idGenerators?.response ?? idGeneratorUUIDv7,\n score: params.idGenerators?.score ?? idGeneratorUUIDv7,\n },\n });\n }\n\n if (testCase.kind === \"llm/qa.tc\") {\n if (\n scorer &&\n scorer?.kind !== (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)\n ) {\n throw new Error(\n `QA test cases can only be scored with an LLM as a judge scorer, but ${scorer?.kind} was provided`\n );\n }\n\n messages.push({\n role: \"user\",\n content: testCase.question,\n });\n templateMessages(messages, runConfig.templateVariables ?? {});\n\n return runQA({\n testCase,\n messages,\n provider,\n scorer,\n runConfig,\n idGenerators: {\n response: params.idGenerators?.response ?? idGeneratorUUIDv7,\n score: params.idGenerators?.score ?? idGeneratorUUIDv7,\n },\n });\n }\n\n throw new Error(\"Unsupported test case kind\");\n }\n);\n\nasync function runQA(params: {\n messages: ChatCompletionMessageParam[];\n testCase: QATestCaseV1;\n provider: AbstractLLMProvider;\n scorer?: LLMAsAJudgeScorer;\n runConfig: {\n model: string;\n llmJudgeModel?: string;\n llmJudgeSystemPrompt?: SimpleSystemPromptV1;\n llmJudgeFieldsToExtract?: Record<string, z.ZodType>;\n systemPrompt?: SimpleSystemPromptV1;\n };\n idGenerators: {\n response: IdGenerator;\n score: IdGenerator;\n };\n}) {\n const { messages, testCase, provider, scorer, runConfig } = params;\n\n const providerResponse = await provider.forward({\n model: runConfig.model,\n messages,\n });\n\n const response = await QAResponseSchemaV1.newWithId(\n {\n data: providerResponse.data,\n startedAt: providerResponse.startedAt,\n completedAt: providerResponse.completedAt,\n testCaseId: testCase.id,\n modelSlug: runConfig.model,\n provider: provider.kind,\n systemPromptId: runConfig.systemPrompt?.id,\n\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n },\n params.idGenerators?.response ?? idGeneratorUUIDv7\n );\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)) {\n if (!runConfig.llmJudgeModel) {\n throw new Error(\n \"LLM judge model is required when using LLM as a judge scorer\"\n );\n }\n\n const scorerResult = await scorer.score({\n model: runConfig.llmJudgeModel,\n response: response.data,\n rubric: `Expected/Valid answers: ${testCase.goodAnswers.join(\"\\n\")}\\nInvalid answers: ${testCase.badAnswers.join(\"\\n\")}`,\n systemPrompt: runConfig.llmJudgeSystemPrompt?.content,\n criteria: [\n {\n id: \"correctness\",\n description:\n \"Is the response matches with the expected/valid answers in terms of meaning?\",\n weight: 1,\n },\n ],\n fieldsToExtract: runConfig.llmJudgeFieldsToExtract ?? {},\n });\n\n if (scorerResult !== null) {\n const score = await QAScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.ai,\n value: scorerResult.value,\n responseId: response.id,\n explanation: scorerResult.explanation,\n scorerAIInputCost: scorerResult.inputCost,\n scorerAIOutputCost: scorerResult.outputCost,\n scorerAIInputTokensUsed: scorerResult.inputTokensUsed,\n scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,\n scorerAIProvider: scorerResult.provider,\n scorerAIModelSlug: runConfig.llmJudgeModel,\n scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,\n metadata: {\n ...scorerResult.metadata,\n extractedFields: scorerResult.extractedFields,\n },\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n return { response };\n}\n\nasync function runMCQ(params: {\n messages: ChatCompletionMessageParam[];\n testCase: MCQTestCaseV1;\n provider: AbstractLLMProvider;\n scorer?: MCQScorer | LLMAsAJudgeScorer;\n runConfig: {\n model: string;\n llmJudgeModel?: string;\n llmJudgeSystemPrompt?: SimpleSystemPromptV1;\n llmJudgeFieldsToExtract?: Record<string, z.ZodType>;\n systemPrompt?: SimpleSystemPromptV1;\n };\n idGenerators: {\n response: IdGenerator;\n score: IdGenerator;\n };\n}) {\n const { messages, testCase, provider, scorer, runConfig } = params;\n\n const providerResponse = await provider.forward({\n model: runConfig.model,\n messages,\n });\n\n const response = await MCQResponseSchemaV1.newWithId(\n {\n data: providerResponse.data,\n startedAt: providerResponse.startedAt,\n completedAt: providerResponse.completedAt,\n testCaseId: testCase.id,\n modelSlug: runConfig.model,\n provider: provider.kind,\n systemPromptId: runConfig.systemPrompt?.id,\n\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n },\n params.idGenerators?.response ?? idGeneratorUUIDv7\n );\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/mcq` as const)) {\n const scorerResult = await scorer.score({\n response: response.data,\n choices: testCase.options,\n correctAnswers: testCase.correctAnswerKeys,\n });\n\n if (scorerResult !== null) {\n const score = await MCQScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.algo,\n value: scorerResult.value,\n responseId: response.id,\n extractedAnswers: scorerResult.extractedAnswers,\n explanation: scorerResult.explanation,\n metadata: scorerResult.metadata,\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)) {\n if (!runConfig.llmJudgeModel) {\n throw new Error(\n \"LLM judge model is required when using LLM as a judge scorer\"\n );\n }\n\n const scorerResult = await scorer.score({\n model: runConfig.llmJudgeModel,\n criteria: [\n {\n id: \"correctness\",\n description:\n \"Is the given answer key matches with one of the correct answer keys?\",\n weight: 1,\n },\n ],\n rubric: `Answer text itself or the key (A, B, C) is accepted\nValid answer keys: ${testCase.correctAnswerKeys.map((key) => `- ${key}`).join(\"\\n\")}\nValid Answer texts: ${testCase.correctAnswerKeys.map((key) => `- ${testCase.options?.[key] ?? \"\"}`).join(\"\\n\")}`,\n fieldsToExtract: {\n extractedAnswers: z\n .string()\n .array()\n .describe(\n \"The extracted answer keys, valid or invalid (even if the answer text is provided rather than the key)\"\n ),\n ...(runConfig.llmJudgeFieldsToExtract ?? {}),\n },\n response: response.data,\n systemPrompt: runConfig.llmJudgeSystemPrompt?.content,\n });\n\n if (scorerResult !== null) {\n const { extractedAnswers, ...extractedFields } =\n scorerResult.extractedFields;\n const score = await MCQScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.ai,\n value: scorerResult.value,\n extractedAnswers,\n responseId: response.id,\n explanation: scorerResult.explanation,\n scorerAIInputCost: scorerResult.inputCost,\n scorerAIOutputCost: scorerResult.outputCost,\n scorerAIInputTokensUsed: scorerResult.inputTokensUsed,\n scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,\n scorerAIProvider: scorerResult.provider,\n scorerAIModelSlug: runConfig.llmJudgeModel,\n scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,\n metadata: {\n ...scorerResult.metadata,\n extractedFields,\n },\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n return { response };\n}\n\nfunction formatMCQ(testCase: MCQTestCaseV1) {\n return `Question: ${testCase.question}\\nOptions:\\n${Object.entries(\n testCase.options ?? {}\n )\n .map(([key, value]) => `${key}: ${value}`)\n .join(\"\\n\")}`;\n}\n\nfunction templateMessages(\n messages: ChatCompletionMessageParam[],\n templateVariables: Record<string, string>\n) {\n for (let i = 0; i < messages.length; i++) {\n const template = Handlebars.compile(messages[i]!.content);\n messages[i]!.content = template(templateVariables);\n }\n}\n","import { JSONFileStorage } from \"@/storages/json-file\";\nimport {\n MCQResponseSchemaV1,\n MCQResponseV1,\n MCQScoreSchemaV1,\n MCQScoreV1,\n MCQTestCaseSchemaV1,\n MCQTestCaseV1,\n} from \"../schema-sets/mcq.v1\";\nimport {\n QAResponseSchemaV1,\n QAResponseV1,\n QAScoreSchemaV1,\n QAScoreV1,\n QATestCaseSchemaV1,\n QATestCaseV1,\n} from \"../schema-sets/qa.v1\";\nimport {\n MultiTurnResponseSchemaV1,\n MultiTurnResponseV1,\n MultiTurnScoreSchemaV1,\n MultiTurnScoreV1,\n MultiTurnTestCaseSchemaV1,\n MultiTurnTestCaseV1,\n} from \"../schema-sets/multi-turn.v1\";\nimport z from \"zod\";\n\nexport class PeerbenchJSONStorage extends JSONFileStorage<\n | MCQTestCaseV1\n | MCQResponseV1\n | MCQScoreV1\n | QATestCaseV1\n | QAResponseV1\n | QAScoreV1\n | MultiTurnTestCaseV1\n | MultiTurnResponseV1\n | MultiTurnScoreV1\n> {\n constructor(config: { path: string; chunkSize?: number }) {\n super({\n path: config.path,\n chunkSize: config.chunkSize,\n\n schema: z.union([\n MCQTestCaseSchemaV1,\n MCQResponseSchemaV1,\n MCQScoreSchemaV1,\n QATestCaseSchemaV1,\n QAResponseSchemaV1,\n QAScoreSchemaV1,\n MultiTurnTestCaseSchemaV1,\n MultiTurnResponseSchemaV1,\n MultiTurnScoreSchemaV1,\n ]),\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACWA,SAAS,SAAS;AAEX,IAAM,UAAU;AAEhB,IAAM,sBAAsB,qBAAqB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAU,EAAE,OAAO;AAAA,IACnB,SAAS,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC;AAAA,IACxC,mBAAmB,EAAE,OAAO,EAAE,MAAM;AAAA,EACtC;AACF,CAAC;AAGM,IAAM,sBAAsB,qBAAqB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;AAGM,IAAM,mBAAmB,kBAAkB;AAAA,EAChD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,kBAAkB,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EACtC;AACF,CAAC;;;ACrCD,SAAS,KAAAA,UAAS;AAEX,IAAM,gBAAgB;AAEtB,IAAM,4BAA4B,qBAAqB;AAAA,EAC5D,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAUA,GACP,OAAO;AAAA,MACN,MAAMA,GAAE,OAAO;AAAA,MACf,SAASA,GAAE,OAAO;AAAA,MAClB,aAAaA,GAAE,OAAO,EAAE,MAAM,EAAE,SAAS;AAAA,MACzC,YAAYA,GAAE,OAAO,EAAE,MAAM,EAAE,SAAS;AAAA,IAC1C,CAAC,EACA,MAAM;AAAA,IAET,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,IAC9B,iBAAiBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACvC;AACF,CAAC;AAGM,IAAM,4BAA4B,qBAAqB;AAAA,EAC5D,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,SAASA,GACN,OAAO;AAAA,MACN,cAAcA,GAAE,OAAO;AAAA,MACvB,WAAWA,GAAE,OAAO;AAAA,MACpB,aAAaA,GAAE,OAAO;AAAA,MACtB,MAAMA,GAAE,OAAO;AAAA,MAEf,iBAAiBA,GAAE,OAAO,EAAE,SAAS;AAAA,MACrC,kBAAkBA,GAAE,OAAO,EAAE,SAAS;AAAA,MACtC,WAAWA,GAAE,OAAO,EAAE,SAAS;AAAA,MAC/B,YAAYA,GAAE,OAAO,EAAE,SAAS;AAAA,IAClC,CAAC,EACA,MAAM;AAAA,EACX;AACF,CAAC;AAGM,IAAM,yBAAyB,kBAAkB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,kBAAkBA,GACf,OAAO;AAAA,MACN,YAAYA,GAAE,OAAO;AAAA,MACrB,OAAOA,GAAE,OAAO;AAAA,IAClB,CAAC,EACA,MAAM;AAAA,EACX;AACF,CAAC;;;AC/DD,SAAS,KAAAC,UAAS;AAEX,IAAM,SAAS;AAEf,IAAM,qBAAqB,qBAAqB;AAAA,EACrD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAUA,GAAE,OAAO;AAAA,IACnB,aAAaA,GAAE,OAAO,EAAE,MAAM;AAAA,IAC9B,YAAYA,GAAE,OAAO,EAAE,MAAM;AAAA,EAC/B;AACF,CAAC;AAGM,IAAM,qBAAqB,qBAAqB;AAAA,EACrD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;AAGM,IAAM,kBAAkB,kBAAkB;AAAA,EAC/C,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;;;ACrCD,OAAO,gBAAgB;AACvB,OAAOC,QAAO;AAeP,IAAM,kBAAkB;AAAA,EAC7B;AAAA,IACE,YAAY;AAAA,MACV;AAAA,QACE,UAAU;AAAA,QACV,UAAU;AAAA,QACV,OAAO;AAAA,MACT;AAAA,MACA;AAAA,QACE,UAAU;AAAA,QACV,UAAU;AAAA,QACV,OAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,WAAW,CAAC,mBAAmB;AAAA,IAC/B,SAAS,CAAC,mBAAmB,SAAS;AAAA,IAEtC,iBAAiB;AAAA,MACf,OAAOC,GAAE,OAAO;AAAA,MAChB,eAAeA,GAAE,OAAO,EAAE,SAAS;AAAA,MACnC,sBAAsB,2BAA2B,SAAS;AAAA,MAC1D,yBAAyBA,GACtB,OAAOA,GAAE,OAAO,GAAGA,GAAE,OAAkB,CAAC,EACxC,SAAS;AAAA,MACZ,cAAc,2BAA2B,SAAS;AAAA,MAClD,mBAAmBA,GAAE,OAAOA,GAAE,OAAO,GAAGA,GAAE,OAAO,CAAC,EAAE,SAAS;AAAA,IAC/D;AAAA,EACF;AAAA,EACA,OAAO,WAAW;AAChB,UAAM,EAAE,UAAU,UAAU,QAAQ,UAAU,IAAI;AAClD,UAAM,WAAyC,CAAC;AAEhD,QAAI,UAAU,cAAc;AAC1B,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,UAAU,aAAa;AAAA,MAClC,CAAC;AAAA,IACH;AAEA,QAAI,SAAS,SAAS,cAAc;AAClC,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,UAAU,QAAQ;AAAA,MAC7B,CAAC;AACD,uBAAiB,UAAU,UAAU,qBAAqB,CAAC,CAAC;AAE5D,aAAO,OAAO;AAAA,QACZ;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,cAAc;AAAA,UACZ,UAAU,OAAO,cAAc,YAAY;AAAA,UAC3C,OAAO,OAAO,cAAc,SAAS;AAAA,QACvC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,QAAI,SAAS,SAAS,aAAa;AACjC,UACE,UACA,QAAQ,SAAU,GAAG,mBAAmB,mBACxC;AACA,cAAM,IAAI;AAAA,UACR,uEAAuE,QAAQ,IAAI;AAAA,QACrF;AAAA,MACF;AAEA,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,SAAS;AAAA,MACpB,CAAC;AACD,uBAAiB,UAAU,UAAU,qBAAqB,CAAC,CAAC;AAE5D,aAAO,MAAM;AAAA,QACX;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,cAAc;AAAA,UACZ,UAAU,OAAO,cAAc,YAAY;AAAA,UAC3C,OAAO,OAAO,cAAc,SAAS;AAAA,QACvC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,IAAI,MAAM,4BAA4B;AAAA,EAC9C;AACF;AAEA,eAAe,MAAM,QAgBlB;AACD,QAAM,EAAE,UAAU,UAAU,UAAU,QAAQ,UAAU,IAAI;AAE5D,QAAM,mBAAmB,MAAM,SAAS,QAAQ;AAAA,IAC9C,OAAO,UAAU;AAAA,IACjB;AAAA,EACF,CAAC;AAED,QAAM,WAAW,MAAM,mBAAmB;AAAA,IACxC;AAAA,MACE,MAAM,iBAAiB;AAAA,MACvB,WAAW,iBAAiB;AAAA,MAC5B,aAAa,iBAAiB;AAAA,MAC9B,YAAY,SAAS;AAAA,MACrB,WAAW,UAAU;AAAA,MACrB,UAAU,SAAS;AAAA,MACnB,gBAAgB,UAAU,cAAc;AAAA,MAExC,iBAAiB,iBAAiB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,WAAW,iBAAiB;AAAA,MAC5B,YAAY,iBAAiB;AAAA,IAC/B;AAAA,IACA,OAAO,cAAc,YAAY;AAAA,EACnC;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,mBAA6B;AACvE,QAAI,CAAC,UAAU,eAAe;AAC5B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,OAAO,UAAU;AAAA,MACjB,UAAU,SAAS;AAAA,MACnB,QAAQ,2BAA2B,SAAS,YAAY,KAAK,IAAI,CAAC;AAAA,mBAAsB,SAAS,WAAW,KAAK,IAAI,CAAC;AAAA,MACtH,cAAc,UAAU,sBAAsB;AAAA,MAC9C,UAAU;AAAA,QACR;AAAA,UACE,IAAI;AAAA,UACJ,aACE;AAAA,UACF,QAAQ;AAAA,QACV;AAAA,MACF;AAAA,MACA,iBAAiB,UAAU,2BAA2B,CAAC;AAAA,IACzD,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,QAAQ,MAAM,gBAAgB;AAAA,QAClC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB,YAAY,SAAS;AAAA,UACrB,aAAa,aAAa;AAAA,UAC1B,mBAAmB,aAAa;AAAA,UAChC,oBAAoB,aAAa;AAAA,UACjC,yBAAyB,aAAa;AAAA,UACtC,0BAA0B,aAAa;AAAA,UACvC,kBAAkB,aAAa;AAAA,UAC/B,mBAAmB,UAAU;AAAA,UAC7B,wBAAwB,UAAU,sBAAsB;AAAA,UACxD,UAAU;AAAA,YACR,GAAG,aAAa;AAAA,YAChB,iBAAiB,aAAa;AAAA,UAChC;AAAA,QACF;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS;AACpB;AAEA,eAAe,OAAO,QAgBnB;AACD,QAAM,EAAE,UAAU,UAAU,UAAU,QAAQ,UAAU,IAAI;AAE5D,QAAM,mBAAmB,MAAM,SAAS,QAAQ;AAAA,IAC9C,OAAO,UAAU;AAAA,IACjB;AAAA,EACF,CAAC;AAED,QAAM,WAAW,MAAM,oBAAoB;AAAA,IACzC;AAAA,MACE,MAAM,iBAAiB;AAAA,MACvB,WAAW,iBAAiB;AAAA,MAC5B,aAAa,iBAAiB;AAAA,MAC9B,YAAY,SAAS;AAAA,MACrB,WAAW,UAAU;AAAA,MACrB,UAAU,SAAS;AAAA,MACnB,gBAAgB,UAAU,cAAc;AAAA,MAExC,iBAAiB,iBAAiB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,WAAW,iBAAiB;AAAA,MAC5B,YAAY,iBAAiB;AAAA,IAC/B;AAAA,IACA,OAAO,cAAc,YAAY;AAAA,EACnC;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,QAAkB;AAC5D,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,UAAU,SAAS;AAAA,MACnB,SAAS,SAAS;AAAA,MAClB,gBAAgB,SAAS;AAAA,IAC3B,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,QAAQ,MAAM,iBAAiB;AAAA,QACnC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB,YAAY,SAAS;AAAA,UACrB,kBAAkB,aAAa;AAAA,UAC/B,aAAa,aAAa;AAAA,UAC1B,UAAU,aAAa;AAAA,QACzB;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,mBAA6B;AACvE,QAAI,CAAC,UAAU,eAAe;AAC5B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,OAAO,UAAU;AAAA,MACjB,UAAU;AAAA,QACR;AAAA,UACE,IAAI;AAAA,UACJ,aACE;AAAA,UACF,QAAQ;AAAA,QACV;AAAA,MACF;AAAA,MACA,QAAQ;AAAA,qBACO,SAAS,kBAAkB,IAAI,CAAC,QAAQ,KAAK,GAAG,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,sBAC7D,SAAS,kBAAkB,IAAI,CAAC,QAAQ,KAAK,SAAS,UAAU,GAAG,KAAK,EAAE,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,MACxG,iBAAiB;AAAA,QACf,kBAAkBA,GACf,OAAO,EACP,MAAM,EACN;AAAA,UACC;AAAA,QACF;AAAA,QACF,GAAI,UAAU,2BAA2B,CAAC;AAAA,MAC5C;AAAA,MACA,UAAU,SAAS;AAAA,MACnB,cAAc,UAAU,sBAAsB;AAAA,IAChD,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,EAAE,kBAAkB,GAAG,gBAAgB,IAC3C,aAAa;AACf,YAAM,QAAQ,MAAM,iBAAiB;AAAA,QACnC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB;AAAA,UACA,YAAY,SAAS;AAAA,UACrB,aAAa,aAAa;AAAA,UAC1B,mBAAmB,aAAa;AAAA,UAChC,oBAAoB,aAAa;AAAA,UACjC,yBAAyB,aAAa;AAAA,UACtC,0BAA0B,aAAa;AAAA,UACvC,kBAAkB,aAAa;AAAA,UAC/B,mBAAmB,UAAU;AAAA,UAC7B,wBAAwB,UAAU,sBAAsB;AAAA,UACxD,UAAU;AAAA,YACR,GAAG,aAAa;AAAA,YAChB;AAAA,UACF;AAAA,QACF;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS;AACpB;AAEA,SAAS,UAAU,UAAyB;AAC1C,SAAO,aAAa,SAAS,QAAQ;AAAA;AAAA,EAAe,OAAO;AAAA,IACzD,SAAS,WAAW,CAAC;AAAA,EACvB,EACG,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM,GAAG,GAAG,KAAK,KAAK,EAAE,EACxC,KAAK,IAAI,CAAC;AACf;AAEA,SAAS,iBACP,UACA,mBACA;AACA,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,WAAW,WAAW,QAAQ,SAAS,CAAC,EAAG,OAAO;AACxD,aAAS,CAAC,EAAG,UAAU,SAAS,iBAAiB;AAAA,EACnD;AACF;;;AC9UA,OAAOC,QAAO;AAEP,IAAM,uBAAN,cAAmC,gBAUxC;AAAA,EACA,YAAY,QAA8C;AACxD,UAAM;AAAA,MACJ,MAAM,OAAO;AAAA,MACb,WAAW,OAAO;AAAA,MAElB,QAAQA,GAAE,MAAM;AAAA,QACd;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;","names":["z","z","z","z","z"]}
|
|
1
|
+
{"version":3,"sources":["../../src/benchmarks/peerbench/index.ts","../../src/benchmarks/peerbench/schema-sets/mcq.v1.ts","../../src/benchmarks/peerbench/schema-sets/multi-turn.v1.ts","../../src/benchmarks/peerbench/schema-sets/qa.v1.ts","../../src/benchmarks/peerbench/mcq-runner.ts","../../src/benchmarks/peerbench/qa-runner.ts","../../src/benchmarks/peerbench/storages/json.ts"],"sourcesContent":["export * from \"./schema-sets/mcq.v1\";\nexport * from \"./schema-sets/multi-turn.v1\";\nexport * from \"./schema-sets/qa.v1\";\n\nexport * from \"./mcq-runner\";\nexport * from \"./qa-runner\";\n\nexport * from \"./storages/json\";\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const MCQKind = `llm/mcq` as const;\n\nexport const MCQTestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n question: z.string(),\n options: z.record(z.string(), z.string()),\n correctAnswerKeys: z.string().array(),\n },\n});\nexport type MCQTestCaseV1 = z.infer<typeof MCQTestCaseSchemaV1>;\n\nexport const MCQResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n },\n});\nexport type MCQResponseV1 = z.infer<typeof MCQResponseSchemaV1>;\n\nexport const MCQScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n extractedAnswers: z.array(z.string()),\n },\n});\nexport type MCQScoreV1 = z.infer<typeof MCQScoreSchemaV1>;\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const MultiTurnKind = `llm/multi-turn` as const;\n\nexport const MultiTurnTestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n messages: z\n .object({\n role: z.string(),\n content: z.string(),\n goodAnswers: z.string().array().optional(),\n badAnswers: z.string().array().optional(),\n })\n .array(),\n\n maxTurns: z.number().optional(),\n expectedOutcome: z.string().optional(),\n },\n});\nexport type MultiTurnTestCaseV1 = z.infer<typeof MultiTurnTestCaseSchemaV1>;\n\nexport const MultiTurnResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n replies: z\n .object({\n messageIndex: z.number(),\n startedAt: z.number(),\n completedAt: z.number(),\n data: z.string(),\n\n inputTokensUsed: z.number().optional(),\n outputTokensUsed: z.number().optional(),\n inputCost: z.string().optional(),\n outputCost: z.string().optional(),\n })\n .array(),\n },\n});\nexport type MultiTurnResponseV1 = z.infer<typeof MultiTurnResponseSchemaV1>;\n\nexport const MultiTurnScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n individualScores: z\n .object({\n replyIndex: z.number(),\n value: z.number(),\n })\n .array(),\n },\n});\nexport type MultiTurnScoreV1 = z.infer<typeof MultiTurnScoreSchemaV1>;\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const QAKind = `llm/qa` as const;\n\nexport const QATestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n question: z.string(),\n goodAnswers: z.string().array(),\n badAnswers: z.string().array(),\n },\n});\nexport type QATestCaseV1 = z.infer<typeof QATestCaseSchemaV1>;\n\nexport const QAResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n },\n});\nexport type QAResponseV1 = z.infer<typeof QAResponseSchemaV1>;\n\nexport const QAScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n },\n});\nexport type QAScoreV1 = z.infer<typeof QAScoreSchemaV1>;\n","import { defineRunner } from \"@/helpers/define-runner\";\nimport { CallableLLM } from \"@/providers\";\nimport { SimpleSystemPromptV1 } from \"@/schemas/llm\";\nimport { LLMAsAJudgeScorer, MCQScorer } from \"@/scorers\";\nimport { IdGenerator, ScoringMethod } from \"@/types\";\nimport { idGeneratorUUIDv7 } from \"@/utils\";\nimport { ChatCompletionMessageParam } from \"openai/resources/index\";\nimport Handlebars from \"handlebars\";\nimport z from \"zod\";\nimport {\n MCQResponseSchemaV1,\n MCQScoreSchemaV1,\n MCQTestCaseV1,\n} from \"./schema-sets/mcq.v1\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport const mcqRunner = defineRunner(\n async (params: {\n testCase: MCQTestCaseV1;\n target: CallableLLM;\n scorer?: MCQScorer | LLMAsAJudgeScorer;\n systemPrompt?: SimpleSystemPromptV1;\n llmJudgeSystemPrompt?: SimpleSystemPromptV1;\n llmJudgeFieldsToExtract?: Record<string, z.ZodType>;\n templateVariables?: Record<string, string>;\n idGenerators?: {\n response?: IdGenerator;\n score?: IdGenerator;\n };\n }) => {\n const { testCase, target, scorer } = params;\n const messages: ChatCompletionMessageParam[] = [];\n\n if (params.systemPrompt) {\n messages.push({\n role: \"system\",\n content: params.systemPrompt.content,\n });\n }\n\n messages.push({\n role: \"user\",\n content: formatMCQ(testCase),\n });\n templateMessages(messages, params.templateVariables ?? {});\n\n const providerResponse = await target.forward({ messages });\n\n const response = await MCQResponseSchemaV1.newWithId(\n {\n data: providerResponse.data,\n startedAt: providerResponse.startedAt,\n completedAt: providerResponse.completedAt,\n testCaseId: testCase.id,\n modelSlug: target.slug,\n provider: target.provider.kind,\n systemPromptId: params.systemPrompt?.id,\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n },\n params.idGenerators?.response ?? idGeneratorUUIDv7\n );\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/mcq` as const)) {\n const scorerResult = await scorer.score({\n response: response.data,\n choices: testCase.options,\n correctAnswers: testCase.correctAnswerKeys,\n });\n\n if (scorerResult !== null) {\n const score = await MCQScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.algo,\n value: scorerResult.value,\n responseId: response.id,\n extractedAnswers: scorerResult.extractedAnswers,\n explanation: scorerResult.explanation,\n metadata: scorerResult.metadata,\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)) {\n const scorerResult = await scorer.score({\n criteria: [\n {\n id: \"correctness\",\n description:\n \"Is the given answer key matches with one of the correct answer keys?\",\n weight: 1,\n },\n ],\n rubric: `Answer text itself or the key (A, B, C) is accepted\nValid answer keys: ${testCase.correctAnswerKeys.map((key) => `- ${key}`).join(\"\\n\")}\nValid Answer texts: ${testCase.correctAnswerKeys.map((key) => `- ${testCase.options?.[key] ?? \"\"}`).join(\"\\n\")}`,\n fieldsToExtract: {\n extractedAnswers: z\n .string()\n .array()\n .describe(\n \"The extracted answer keys, valid or invalid (even if the answer text is provided rather than the key)\"\n ),\n ...(params.llmJudgeFieldsToExtract ?? {}),\n },\n response: response.data,\n systemPrompt: params.llmJudgeSystemPrompt?.content,\n });\n\n if (scorerResult !== null) {\n const { extractedAnswers, ...extractedFields } =\n scorerResult.extractedFields;\n const score = await MCQScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.ai,\n value: scorerResult.value,\n extractedAnswers,\n responseId: response.id,\n explanation: scorerResult.explanation,\n scorerAIInputCost: scorerResult.inputCost,\n scorerAIOutputCost: scorerResult.outputCost,\n scorerAIInputTokensUsed: scorerResult.inputTokensUsed,\n scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,\n scorerAIProvider: scorerResult.provider,\n scorerAIModelSlug: scorerResult.modelSlug,\n scorerAISystemPromptId: params.llmJudgeSystemPrompt?.id,\n metadata: {\n ...scorerResult.metadata,\n extractedFields,\n },\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n return { response };\n }\n);\n\nfunction formatMCQ(testCase: MCQTestCaseV1) {\n return `Question: ${testCase.question}\\nOptions:\\n${Object.entries(\n testCase.options ?? {}\n )\n .map(([key, value]) => `${key}: ${value}`)\n .join(\"\\n\")}`;\n}\n\nfunction templateMessages(\n messages: ChatCompletionMessageParam[],\n templateVariables: Record<string, string>\n) {\n for (let i = 0; i < messages.length; i++) {\n const template = Handlebars.compile(messages[i]!.content);\n messages[i]!.content = template(templateVariables);\n }\n}\n","import { defineRunner } from \"@/helpers/define-runner\";\nimport { CallableLLM } from \"@/providers\";\nimport { SimpleSystemPromptV1 } from \"@/schemas/llm\";\nimport { LLMAsAJudgeScorer } from \"@/scorers\";\nimport { IdGenerator, ScoringMethod } from \"@/types\";\nimport { idGeneratorUUIDv7 } from \"@/utils\";\nimport { ChatCompletionMessageParam } from \"openai/resources/index\";\nimport Handlebars from \"handlebars\";\nimport z from \"zod\";\nimport {\n QAResponseSchemaV1,\n QAScoreSchemaV1,\n QATestCaseV1,\n} from \"./schema-sets/qa.v1\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport const qaRunner = defineRunner(\n async (params: {\n testCase: QATestCaseV1;\n target: CallableLLM;\n scorer?: LLMAsAJudgeScorer;\n systemPrompt?: SimpleSystemPromptV1;\n llmJudgeSystemPrompt?: SimpleSystemPromptV1;\n llmJudgeFieldsToExtract?: Record<string, z.ZodType>;\n templateVariables?: Record<string, string>;\n idGenerators?: {\n response?: IdGenerator;\n score?: IdGenerator;\n };\n }) => {\n const { testCase, target, scorer } = params;\n const messages: ChatCompletionMessageParam[] = [];\n\n if (params.systemPrompt) {\n messages.push({\n role: \"system\",\n content: params.systemPrompt.content,\n });\n }\n\n messages.push({\n role: \"user\",\n content: testCase.question,\n });\n templateMessages(messages, params.templateVariables ?? {});\n\n const providerResponse = await target.forward({ messages });\n\n const response = await QAResponseSchemaV1.newWithId(\n {\n data: providerResponse.data,\n startedAt: providerResponse.startedAt,\n completedAt: providerResponse.completedAt,\n testCaseId: testCase.id,\n modelSlug: target.slug,\n provider: target.provider.kind,\n systemPromptId: params.systemPrompt?.id,\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n },\n params.idGenerators?.response ?? idGeneratorUUIDv7\n );\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)) {\n const scorerResult = await scorer.score({\n response: response.data,\n rubric: `Expected/Valid answers: ${testCase.goodAnswers.join(\"\\n\")}\\nInvalid answers: ${testCase.badAnswers.join(\"\\n\")}`,\n systemPrompt: params.llmJudgeSystemPrompt?.content,\n criteria: [\n {\n id: \"correctness\",\n description:\n \"Is the response matches with the expected/valid answers in terms of meaning?\",\n weight: 1,\n },\n ],\n fieldsToExtract: params.llmJudgeFieldsToExtract ?? {},\n });\n\n if (scorerResult !== null) {\n const score = await QAScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.ai,\n value: scorerResult.value,\n responseId: response.id,\n explanation: scorerResult.explanation,\n scorerAIInputCost: scorerResult.inputCost,\n scorerAIOutputCost: scorerResult.outputCost,\n scorerAIInputTokensUsed: scorerResult.inputTokensUsed,\n scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,\n scorerAIProvider: scorerResult.provider,\n scorerAIModelSlug: scorerResult.modelSlug,\n scorerAISystemPromptId: params.llmJudgeSystemPrompt?.id,\n metadata: {\n ...scorerResult.metadata,\n extractedFields: scorerResult.extractedFields,\n },\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n return { response };\n }\n);\n\nfunction templateMessages(\n messages: ChatCompletionMessageParam[],\n templateVariables: Record<string, string>\n) {\n for (let i = 0; i < messages.length; i++) {\n const template = Handlebars.compile(messages[i]!.content);\n messages[i]!.content = template(templateVariables);\n }\n}\n","import { JSONFileStorage } from \"@/storages/json-file\";\nimport {\n MCQResponseSchemaV1,\n MCQResponseV1,\n MCQScoreSchemaV1,\n MCQScoreV1,\n MCQTestCaseSchemaV1,\n MCQTestCaseV1,\n} from \"../schema-sets/mcq.v1\";\nimport {\n QAResponseSchemaV1,\n QAResponseV1,\n QAScoreSchemaV1,\n QAScoreV1,\n QATestCaseSchemaV1,\n QATestCaseV1,\n} from \"../schema-sets/qa.v1\";\nimport {\n MultiTurnResponseSchemaV1,\n MultiTurnResponseV1,\n MultiTurnScoreSchemaV1,\n MultiTurnScoreV1,\n MultiTurnTestCaseSchemaV1,\n MultiTurnTestCaseV1,\n} from \"../schema-sets/multi-turn.v1\";\nimport z from \"zod\";\n\nexport class PeerbenchJSONStorage extends JSONFileStorage<\n | MCQTestCaseV1\n | MCQResponseV1\n | MCQScoreV1\n | QATestCaseV1\n | QAResponseV1\n | QAScoreV1\n | MultiTurnTestCaseV1\n | MultiTurnResponseV1\n | MultiTurnScoreV1\n> {\n constructor(config: { path: string; chunkSize?: number }) {\n super({\n path: config.path,\n chunkSize: config.chunkSize,\n\n schema: z.union([\n MCQTestCaseSchemaV1,\n MCQResponseSchemaV1,\n MCQScoreSchemaV1,\n QATestCaseSchemaV1,\n QAResponseSchemaV1,\n QAScoreSchemaV1,\n MultiTurnTestCaseSchemaV1,\n MultiTurnResponseSchemaV1,\n MultiTurnScoreSchemaV1,\n ]),\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACWA,SAAS,SAAS;AAEX,IAAM,UAAU;AAEhB,IAAM,sBAAsB,qBAAqB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAU,EAAE,OAAO;AAAA,IACnB,SAAS,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC;AAAA,IACxC,mBAAmB,EAAE,OAAO,EAAE,MAAM;AAAA,EACtC;AACF,CAAC;AAGM,IAAM,sBAAsB,qBAAqB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;AAGM,IAAM,mBAAmB,kBAAkB;AAAA,EAChD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,kBAAkB,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EACtC;AACF,CAAC;;;ACrCD,SAAS,KAAAA,UAAS;AAEX,IAAM,gBAAgB;AAEtB,IAAM,4BAA4B,qBAAqB;AAAA,EAC5D,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAUA,GACP,OAAO;AAAA,MACN,MAAMA,GAAE,OAAO;AAAA,MACf,SAASA,GAAE,OAAO;AAAA,MAClB,aAAaA,GAAE,OAAO,EAAE,MAAM,EAAE,SAAS;AAAA,MACzC,YAAYA,GAAE,OAAO,EAAE,MAAM,EAAE,SAAS;AAAA,IAC1C,CAAC,EACA,MAAM;AAAA,IAET,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,IAC9B,iBAAiBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACvC;AACF,CAAC;AAGM,IAAM,4BAA4B,qBAAqB;AAAA,EAC5D,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,SAASA,GACN,OAAO;AAAA,MACN,cAAcA,GAAE,OAAO;AAAA,MACvB,WAAWA,GAAE,OAAO;AAAA,MACpB,aAAaA,GAAE,OAAO;AAAA,MACtB,MAAMA,GAAE,OAAO;AAAA,MAEf,iBAAiBA,GAAE,OAAO,EAAE,SAAS;AAAA,MACrC,kBAAkBA,GAAE,OAAO,EAAE,SAAS;AAAA,MACtC,WAAWA,GAAE,OAAO,EAAE,SAAS;AAAA,MAC/B,YAAYA,GAAE,OAAO,EAAE,SAAS;AAAA,IAClC,CAAC,EACA,MAAM;AAAA,EACX;AACF,CAAC;AAGM,IAAM,yBAAyB,kBAAkB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,kBAAkBA,GACf,OAAO;AAAA,MACN,YAAYA,GAAE,OAAO;AAAA,MACrB,OAAOA,GAAE,OAAO;AAAA,IAClB,CAAC,EACA,MAAM;AAAA,EACX;AACF,CAAC;;;AC/DD,SAAS,KAAAC,UAAS;AAEX,IAAM,SAAS;AAEf,IAAM,qBAAqB,qBAAqB;AAAA,EACrD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAUA,GAAE,OAAO;AAAA,IACnB,aAAaA,GAAE,OAAO,EAAE,MAAM;AAAA,IAC9B,YAAYA,GAAE,OAAO,EAAE,MAAM;AAAA,EAC/B;AACF,CAAC;AAGM,IAAM,qBAAqB,qBAAqB;AAAA,EACrD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;AAGM,IAAM,kBAAkB,kBAAkB;AAAA,EAC/C,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;;;ACxCD,OAAO,gBAAgB;AACvB,OAAOC,QAAO;AAQP,IAAM,YAAY;AAAA,EACvB,OAAO,WAYD;AACJ,UAAM,EAAE,UAAU,QAAQ,OAAO,IAAI;AACrC,UAAM,WAAyC,CAAC;AAEhD,QAAI,OAAO,cAAc;AACvB,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,OAAO,aAAa;AAAA,MAC/B,CAAC;AAAA,IACH;AAEA,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,SAAS,UAAU,QAAQ;AAAA,IAC7B,CAAC;AACD,qBAAiB,UAAU,OAAO,qBAAqB,CAAC,CAAC;AAEzD,UAAM,mBAAmB,MAAM,OAAO,QAAQ,EAAE,SAAS,CAAC;AAE1D,UAAM,WAAW,MAAM,oBAAoB;AAAA,MACzC;AAAA,QACE,MAAM,iBAAiB;AAAA,QACvB,WAAW,iBAAiB;AAAA,QAC5B,aAAa,iBAAiB;AAAA,QAC9B,YAAY,SAAS;AAAA,QACrB,WAAW,OAAO;AAAA,QAClB,UAAU,OAAO,SAAS;AAAA,QAC1B,gBAAgB,OAAO,cAAc;AAAA,QACrC,iBAAiB,iBAAiB;AAAA,QAClC,kBAAkB,iBAAiB;AAAA,QACnC,WAAW,iBAAiB;AAAA,QAC5B,YAAY,iBAAiB;AAAA,MAC/B;AAAA,MACA,OAAO,cAAc,YAAY;AAAA,IACnC;AAEA,QAAI,QAAQ,SAAU,GAAG,mBAAmB,QAAkB;AAC5D,YAAM,eAAe,MAAM,OAAO,MAAM;AAAA,QACtC,UAAU,SAAS;AAAA,QACnB,SAAS,SAAS;AAAA,QAClB,gBAAgB,SAAS;AAAA,MAC3B,CAAC;AAED,UAAI,iBAAiB,MAAM;AACzB,cAAM,QAAQ,MAAM,iBAAiB;AAAA,UACnC;AAAA,YACE,eAAe,cAAc;AAAA,YAC7B,OAAO,aAAa;AAAA,YACpB,YAAY,SAAS;AAAA,YACrB,kBAAkB,aAAa;AAAA,YAC/B,aAAa,aAAa;AAAA,YAC1B,UAAU,aAAa;AAAA,UACzB;AAAA,UACA,OAAO,cAAc,SAAS;AAAA,QAChC;AAEA,eAAO,EAAE,UAAU,MAAM;AAAA,MAC3B;AAAA,IACF;AAEA,QAAI,QAAQ,SAAU,GAAG,mBAAmB,mBAA6B;AACvE,YAAM,eAAe,MAAM,OAAO,MAAM;AAAA,QACtC,UAAU;AAAA,UACR;AAAA,YACE,IAAI;AAAA,YACJ,aACE;AAAA,YACF,QAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,QAAQ;AAAA,qBACK,SAAS,kBAAkB,IAAI,CAAC,QAAQ,KAAK,GAAG,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,sBAC7D,SAAS,kBAAkB,IAAI,CAAC,QAAQ,KAAK,SAAS,UAAU,GAAG,KAAK,EAAE,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,QACtG,iBAAiB;AAAA,UACf,kBAAkBC,GACf,OAAO,EACP,MAAM,EACN;AAAA,YACC;AAAA,UACF;AAAA,UACF,GAAI,OAAO,2BAA2B,CAAC;AAAA,QACzC;AAAA,QACA,UAAU,SAAS;AAAA,QACnB,cAAc,OAAO,sBAAsB;AAAA,MAC7C,CAAC;AAED,UAAI,iBAAiB,MAAM;AACzB,cAAM,EAAE,kBAAkB,GAAG,gBAAgB,IAC3C,aAAa;AACf,cAAM,QAAQ,MAAM,iBAAiB;AAAA,UACnC;AAAA,YACE,eAAe,cAAc;AAAA,YAC7B,OAAO,aAAa;AAAA,YACpB;AAAA,YACA,YAAY,SAAS;AAAA,YACrB,aAAa,aAAa;AAAA,YAC1B,mBAAmB,aAAa;AAAA,YAChC,oBAAoB,aAAa;AAAA,YACjC,yBAAyB,aAAa;AAAA,YACtC,0BAA0B,aAAa;AAAA,YACvC,kBAAkB,aAAa;AAAA,YAC/B,mBAAmB,aAAa;AAAA,YAChC,wBAAwB,OAAO,sBAAsB;AAAA,YACrD,UAAU;AAAA,cACR,GAAG,aAAa;AAAA,cAChB;AAAA,YACF;AAAA,UACF;AAAA,UACA,OAAO,cAAc,SAAS;AAAA,QAChC;AAEA,eAAO,EAAE,UAAU,MAAM;AAAA,MAC3B;AAAA,IACF;AAEA,WAAO,EAAE,SAAS;AAAA,EACpB;AACF;AAEA,SAAS,UAAU,UAAyB;AAC1C,SAAO,aAAa,SAAS,QAAQ;AAAA;AAAA,EAAe,OAAO;AAAA,IACzD,SAAS,WAAW,CAAC;AAAA,EACvB,EACG,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM,GAAG,GAAG,KAAK,KAAK,EAAE,EACxC,KAAK,IAAI,CAAC;AACf;AAEA,SAAS,iBACP,UACA,mBACA;AACA,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,WAAW,WAAW,QAAQ,SAAS,CAAC,EAAG,OAAO;AACxD,aAAS,CAAC,EAAG,UAAU,SAAS,iBAAiB;AAAA,EACnD;AACF;;;AC7JA,OAAOC,iBAAgB;AAShB,IAAM,WAAW;AAAA,EACtB,OAAO,WAYD;AACJ,UAAM,EAAE,UAAU,QAAQ,OAAO,IAAI;AACrC,UAAM,WAAyC,CAAC;AAEhD,QAAI,OAAO,cAAc;AACvB,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,OAAO,aAAa;AAAA,MAC/B,CAAC;AAAA,IACH;AAEA,aAAS,KAAK;AAAA,MACZ,MAAM;AAAA,MACN,SAAS,SAAS;AAAA,IACpB,CAAC;AACD,IAAAC,kBAAiB,UAAU,OAAO,qBAAqB,CAAC,CAAC;AAEzD,UAAM,mBAAmB,MAAM,OAAO,QAAQ,EAAE,SAAS,CAAC;AAE1D,UAAM,WAAW,MAAM,mBAAmB;AAAA,MACxC;AAAA,QACE,MAAM,iBAAiB;AAAA,QACvB,WAAW,iBAAiB;AAAA,QAC5B,aAAa,iBAAiB;AAAA,QAC9B,YAAY,SAAS;AAAA,QACrB,WAAW,OAAO;AAAA,QAClB,UAAU,OAAO,SAAS;AAAA,QAC1B,gBAAgB,OAAO,cAAc;AAAA,QACrC,iBAAiB,iBAAiB;AAAA,QAClC,kBAAkB,iBAAiB;AAAA,QACnC,WAAW,iBAAiB;AAAA,QAC5B,YAAY,iBAAiB;AAAA,MAC/B;AAAA,MACA,OAAO,cAAc,YAAY;AAAA,IACnC;AAEA,QAAI,QAAQ,SAAU,GAAG,mBAAmB,mBAA6B;AACvE,YAAM,eAAe,MAAM,OAAO,MAAM;AAAA,QACtC,UAAU,SAAS;AAAA,QACnB,QAAQ,2BAA2B,SAAS,YAAY,KAAK,IAAI,CAAC;AAAA,mBAAsB,SAAS,WAAW,KAAK,IAAI,CAAC;AAAA,QACtH,cAAc,OAAO,sBAAsB;AAAA,QAC3C,UAAU;AAAA,UACR;AAAA,YACE,IAAI;AAAA,YACJ,aACE;AAAA,YACF,QAAQ;AAAA,UACV;AAAA,QACF;AAAA,QACA,iBAAiB,OAAO,2BAA2B,CAAC;AAAA,MACtD,CAAC;AAED,UAAI,iBAAiB,MAAM;AACzB,cAAM,QAAQ,MAAM,gBAAgB;AAAA,UAClC;AAAA,YACE,eAAe,cAAc;AAAA,YAC7B,OAAO,aAAa;AAAA,YACpB,YAAY,SAAS;AAAA,YACrB,aAAa,aAAa;AAAA,YAC1B,mBAAmB,aAAa;AAAA,YAChC,oBAAoB,aAAa;AAAA,YACjC,yBAAyB,aAAa;AAAA,YACtC,0BAA0B,aAAa;AAAA,YACvC,kBAAkB,aAAa;AAAA,YAC/B,mBAAmB,aAAa;AAAA,YAChC,wBAAwB,OAAO,sBAAsB;AAAA,YACrD,UAAU;AAAA,cACR,GAAG,aAAa;AAAA,cAChB,iBAAiB,aAAa;AAAA,YAChC;AAAA,UACF;AAAA,UACA,OAAO,cAAc,SAAS;AAAA,QAChC;AAEA,eAAO,EAAE,UAAU,MAAM;AAAA,MAC3B;AAAA,IACF;AAEA,WAAO,EAAE,SAAS;AAAA,EACpB;AACF;AAEA,SAASA,kBACP,UACA,mBACA;AACA,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,WAAWC,YAAW,QAAQ,SAAS,CAAC,EAAG,OAAO;AACxD,aAAS,CAAC,EAAG,UAAU,SAAS,iBAAiB;AAAA,EACnD;AACF;;;AC9FA,OAAOC,QAAO;AAEP,IAAM,uBAAN,cAAmC,gBAUxC;AAAA,EACA,YAAY,QAA8C;AACxD,UAAM;AAAA,MACJ,MAAM,OAAO;AAAA,MACb,WAAW,OAAO;AAAA,MAElB,QAAQA,GAAE,MAAM;AAAA,QACd;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;","names":["z","z","z","z","Handlebars","templateMessages","Handlebars","z"]}
|