peerbench 0.0.2-alpha.1 → 0.0.2-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmarks/index.d.ts +2 -1
- package/dist/benchmarks/index.js +28 -18
- package/dist/benchmarks/index.js.map +1 -1
- package/dist/{chunk-Q6GSOHOP.js → chunk-ZXTQJFGL.js} +4 -4
- package/dist/index.js +3 -3
- package/dist/schemas/llm/index.js +2 -2
- package/package.json +1 -1
- /package/dist/{chunk-Q6GSOHOP.js.map → chunk-ZXTQJFGL.js.map} +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { I as IdGenerator } from '../index-BAioQhp2.js';
|
|
2
|
-
import { z } from 'zod';
|
|
2
|
+
import z__default, { z } from 'zod';
|
|
3
3
|
import { A as AbstractLLMProvider } from '../llm-DNj_tp2T.js';
|
|
4
4
|
import { a as MCQScorer, L as LLMAsAJudgeScorer } from '../llm-judge-QThCZ9TQ.js';
|
|
5
5
|
import { J as JSONFileStorage } from '../json-file-ZwzLUbje.js';
|
|
@@ -912,6 +912,7 @@ declare const peerbenchRunner: (params: {
|
|
|
912
912
|
schemaVersion: 1;
|
|
913
913
|
metadata?: Record<string, unknown> | undefined;
|
|
914
914
|
} | undefined;
|
|
915
|
+
llmJudgeFieldsToExtract?: Record<string, z__default.ZodType<unknown, unknown, z__default.core.$ZodTypeInternals<unknown, unknown>>> | undefined;
|
|
915
916
|
systemPrompt?: {
|
|
916
917
|
id: string;
|
|
917
918
|
version: number;
|
package/dist/benchmarks/index.js
CHANGED
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
import {
|
|
2
2
|
SimpleSystemPromptSchemaV1
|
|
3
|
-
} from "../chunk-
|
|
3
|
+
} from "../chunk-ZXTQJFGL.js";
|
|
4
4
|
import {
|
|
5
5
|
defineRunner
|
|
6
6
|
} from "../chunk-QY5MPNNB.js";
|
|
7
|
+
import {
|
|
8
|
+
AbstractLLMProvider
|
|
9
|
+
} from "../chunk-R76XA2K6.js";
|
|
10
|
+
import {
|
|
11
|
+
LLMAsAJudgeScorer,
|
|
12
|
+
MCQScorer
|
|
13
|
+
} from "../chunk-ZEWI24CV.js";
|
|
14
|
+
import {
|
|
15
|
+
PEERBENCH_NAMESPACE
|
|
16
|
+
} from "../chunk-UHHHSYVE.js";
|
|
7
17
|
import {
|
|
8
18
|
BaseResponseSchemaV1,
|
|
9
19
|
BaseScoreSchemaV1,
|
|
@@ -19,16 +29,6 @@ import {
|
|
|
19
29
|
import {
|
|
20
30
|
JSONFileStorage
|
|
21
31
|
} from "../chunk-ERALDEZY.js";
|
|
22
|
-
import {
|
|
23
|
-
LLMAsAJudgeScorer,
|
|
24
|
-
MCQScorer
|
|
25
|
-
} from "../chunk-ZEWI24CV.js";
|
|
26
|
-
import {
|
|
27
|
-
AbstractLLMProvider
|
|
28
|
-
} from "../chunk-R76XA2K6.js";
|
|
29
|
-
import {
|
|
30
|
-
PEERBENCH_NAMESPACE
|
|
31
|
-
} from "../chunk-UHHHSYVE.js";
|
|
32
32
|
import {
|
|
33
33
|
idGeneratorUUIDv7
|
|
34
34
|
} from "../chunk-4UBK6452.js";
|
|
@@ -202,6 +202,7 @@ var peerbenchRunner = defineRunner(
|
|
|
202
202
|
model: z4.string(),
|
|
203
203
|
llmJudgeModel: z4.string().optional(),
|
|
204
204
|
llmJudgeSystemPrompt: SimpleSystemPromptSchemaV1.optional(),
|
|
205
|
+
llmJudgeFieldsToExtract: z4.record(z4.string(), z4.custom()).optional(),
|
|
205
206
|
systemPrompt: SimpleSystemPromptSchemaV1.optional(),
|
|
206
207
|
templateVariables: z4.record(z4.string(), z4.string()).optional()
|
|
207
208
|
}
|
|
@@ -299,7 +300,8 @@ Invalid answers: ${testCase.badAnswers.join("\n")}`,
|
|
|
299
300
|
description: "Is the response matches with the expected/valid answers in terms of meaning?",
|
|
300
301
|
weight: 1
|
|
301
302
|
}
|
|
302
|
-
]
|
|
303
|
+
],
|
|
304
|
+
fieldsToExtract: runConfig.llmJudgeFieldsToExtract ?? {}
|
|
303
305
|
});
|
|
304
306
|
if (scorerResult !== null) {
|
|
305
307
|
const score = await QAScoreSchemaV1.newWithId(
|
|
@@ -308,14 +310,17 @@ Invalid answers: ${testCase.badAnswers.join("\n")}`,
|
|
|
308
310
|
value: scorerResult.value,
|
|
309
311
|
responseId: response.id,
|
|
310
312
|
explanation: scorerResult.explanation,
|
|
311
|
-
metadata: scorerResult.metadata,
|
|
312
313
|
scorerAIInputCost: scorerResult.inputCost,
|
|
313
314
|
scorerAIOutputCost: scorerResult.outputCost,
|
|
314
315
|
scorerAIInputTokensUsed: scorerResult.inputTokensUsed,
|
|
315
316
|
scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,
|
|
316
317
|
scorerAIProvider: scorerResult.provider,
|
|
317
318
|
scorerAIModelSlug: runConfig.llmJudgeModel,
|
|
318
|
-
scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id
|
|
319
|
+
scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,
|
|
320
|
+
metadata: {
|
|
321
|
+
...scorerResult.metadata,
|
|
322
|
+
extractedFields: scorerResult.extractedFields
|
|
323
|
+
}
|
|
319
324
|
},
|
|
320
325
|
params.idGenerators?.score ?? idGeneratorUUIDv7
|
|
321
326
|
);
|
|
@@ -388,27 +393,32 @@ Valid Answer texts: ${testCase.correctAnswerKeys.map((key) => `- ${testCase.opti
|
|
|
388
393
|
fieldsToExtract: {
|
|
389
394
|
extractedAnswers: z4.string().array().describe(
|
|
390
395
|
"The extracted answer keys, valid or invalid (even if the answer text is provided rather than the key)"
|
|
391
|
-
)
|
|
396
|
+
),
|
|
397
|
+
...runConfig.llmJudgeFieldsToExtract ?? {}
|
|
392
398
|
},
|
|
393
399
|
response: response.data,
|
|
394
400
|
systemPrompt: runConfig.llmJudgeSystemPrompt?.content
|
|
395
401
|
});
|
|
396
402
|
if (scorerResult !== null) {
|
|
403
|
+
const { extractedAnswers, ...extractedFields } = scorerResult.extractedFields;
|
|
397
404
|
const score = await MCQScoreSchemaV1.newWithId(
|
|
398
405
|
{
|
|
399
406
|
scoringMethod: ScoringMethod.ai,
|
|
400
407
|
value: scorerResult.value,
|
|
401
|
-
extractedAnswers
|
|
408
|
+
extractedAnswers,
|
|
402
409
|
responseId: response.id,
|
|
403
410
|
explanation: scorerResult.explanation,
|
|
404
|
-
metadata: scorerResult.metadata,
|
|
405
411
|
scorerAIInputCost: scorerResult.inputCost,
|
|
406
412
|
scorerAIOutputCost: scorerResult.outputCost,
|
|
407
413
|
scorerAIInputTokensUsed: scorerResult.inputTokensUsed,
|
|
408
414
|
scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,
|
|
409
415
|
scorerAIProvider: scorerResult.provider,
|
|
410
416
|
scorerAIModelSlug: runConfig.llmJudgeModel,
|
|
411
|
-
scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id
|
|
417
|
+
scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,
|
|
418
|
+
metadata: {
|
|
419
|
+
...scorerResult.metadata,
|
|
420
|
+
extractedFields
|
|
421
|
+
}
|
|
412
422
|
},
|
|
413
423
|
params.idGenerators?.score ?? idGeneratorUUIDv7
|
|
414
424
|
);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/benchmarks/peerbench/index.ts","../../src/benchmarks/peerbench/schema-sets/mcq.v1.ts","../../src/benchmarks/peerbench/schema-sets/multi-turn.v1.ts","../../src/benchmarks/peerbench/schema-sets/qa.v1.ts","../../src/benchmarks/peerbench/runner.ts","../../src/benchmarks/peerbench/storages/json.ts"],"sourcesContent":["export * from \"./schema-sets/mcq.v1\";\nexport * from \"./schema-sets/multi-turn.v1\";\nexport * from \"./schema-sets/qa.v1\";\n\nexport * from \"./runner\";\n\nexport * from \"./storages/json\";\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const MCQKind = `llm/mcq` as const;\n\nexport const MCQTestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n question: z.string(),\n options: z.record(z.string(), z.string()),\n correctAnswerKeys: z.string().array(),\n },\n});\nexport type MCQTestCaseV1 = z.infer<typeof MCQTestCaseSchemaV1>;\n\nexport const MCQResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n },\n});\nexport type MCQResponseV1 = z.infer<typeof MCQResponseSchemaV1>;\n\nexport const MCQScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n extractedAnswers: z.array(z.string()),\n },\n});\nexport type MCQScoreV1 = z.infer<typeof MCQScoreSchemaV1>;\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const MultiTurnKind = `llm/multi-turn` as const;\n\nexport const MultiTurnTestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n messages: z\n .object({\n role: z.string(),\n content: z.string(),\n goodAnswers: z.string().array().optional(),\n badAnswers: z.string().array().optional(),\n })\n .array(),\n\n maxTurns: z.number().optional(),\n expectedOutcome: z.string().optional(),\n },\n});\nexport type MultiTurnTestCaseV1 = z.infer<typeof MultiTurnTestCaseSchemaV1>;\n\nexport const MultiTurnResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n replies: z\n .object({\n messageIndex: z.number(),\n startedAt: z.number(),\n completedAt: z.number(),\n data: z.string(),\n\n inputTokensUsed: z.number().optional(),\n outputTokensUsed: z.number().optional(),\n inputCost: z.string().optional(),\n outputCost: z.string().optional(),\n })\n .array(),\n },\n});\nexport type MultiTurnResponseV1 = z.infer<typeof MultiTurnResponseSchemaV1>;\n\nexport const MultiTurnScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n individualScores: z\n .object({\n replyIndex: z.number(),\n value: z.number(),\n })\n .array(),\n },\n});\nexport type MultiTurnScoreV1 = z.infer<typeof MultiTurnScoreSchemaV1>;\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const QAKind = `llm/qa` as const;\n\nexport const QATestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n question: z.string(),\n goodAnswers: z.string().array(),\n badAnswers: z.string().array(),\n },\n});\nexport type QATestCaseV1 = z.infer<typeof QATestCaseSchemaV1>;\n\nexport const QAResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n },\n});\nexport type QAResponseV1 = z.infer<typeof QAResponseSchemaV1>;\n\nexport const QAScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n },\n});\nexport type QAScoreV1 = z.infer<typeof QAScoreSchemaV1>;\n","import { defineRunner } from \"@/helpers/define-runner\";\nimport { AbstractLLMProvider } from \"@/providers\";\nimport {\n SimpleSystemPromptSchemaV1,\n SimpleSystemPromptV1,\n} from \"@/schemas/llm\";\nimport { LLMAsAJudgeScorer, MCQScorer } from \"@/scorers\";\nimport { IdGenerator, ScoringMethod } from \"@/types\";\nimport { idGeneratorUUIDv7 } from \"@/utils\";\nimport { ChatCompletionMessageParam } from \"openai/resources/index\";\nimport Handlebars from \"handlebars\";\nimport z from \"zod\";\nimport {\n MCQResponseSchemaV1,\n MCQScoreSchemaV1,\n MCQTestCaseSchemaV1,\n MCQTestCaseV1,\n} from \"./schema-sets/mcq.v1\";\nimport {\n QAResponseSchemaV1,\n QAScoreSchemaV1,\n QATestCaseSchemaV1,\n QATestCaseV1,\n} from \"./schema-sets/qa.v1\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport const peerbenchRunner = defineRunner(\n {\n schemaSets: [\n {\n testCase: MCQTestCaseSchemaV1,\n response: MCQResponseSchemaV1,\n score: MCQScoreSchemaV1,\n },\n {\n testCase: QATestCaseSchemaV1,\n response: QAResponseSchemaV1,\n score: QAScoreSchemaV1,\n },\n ],\n providers: [AbstractLLMProvider],\n scorers: [LLMAsAJudgeScorer, MCQScorer],\n\n runConfigSchema: {\n model: z.string(),\n llmJudgeModel: z.string().optional(),\n llmJudgeSystemPrompt: SimpleSystemPromptSchemaV1.optional(),\n systemPrompt: SimpleSystemPromptSchemaV1.optional(),\n templateVariables: z.record(z.string(), z.string()).optional(),\n },\n },\n async (params) => {\n const { testCase, provider, scorer, runConfig } = params;\n const messages: ChatCompletionMessageParam[] = [];\n\n if (runConfig.systemPrompt) {\n messages.push({\n role: \"system\",\n content: runConfig.systemPrompt.content,\n });\n }\n\n if (testCase.kind === \"llm/mcq.tc\") {\n messages.push({\n role: \"user\",\n content: formatMCQ(testCase),\n });\n templateMessages(messages, runConfig.templateVariables ?? {});\n\n return runMCQ({\n testCase,\n messages,\n provider,\n scorer,\n runConfig,\n idGenerators: {\n response: params.idGenerators?.response ?? idGeneratorUUIDv7,\n score: params.idGenerators?.score ?? idGeneratorUUIDv7,\n },\n });\n }\n\n if (testCase.kind === \"llm/qa.tc\") {\n if (\n scorer &&\n scorer?.kind !== (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)\n ) {\n throw new Error(\n `QA test cases can only be scored with an LLM as a judge scorer, but ${scorer?.kind} was provided`\n );\n }\n\n messages.push({\n role: \"user\",\n content: testCase.question,\n });\n templateMessages(messages, runConfig.templateVariables ?? {});\n\n return runQA({\n testCase,\n messages,\n provider,\n scorer,\n runConfig,\n idGenerators: {\n response: params.idGenerators?.response ?? idGeneratorUUIDv7,\n score: params.idGenerators?.score ?? idGeneratorUUIDv7,\n },\n });\n }\n\n throw new Error(\"Unsupported test case kind\");\n }\n);\n\nasync function runQA(params: {\n messages: ChatCompletionMessageParam[];\n testCase: QATestCaseV1;\n provider: AbstractLLMProvider;\n scorer?: LLMAsAJudgeScorer;\n runConfig: {\n model: string;\n llmJudgeModel?: string;\n llmJudgeSystemPrompt?: SimpleSystemPromptV1;\n systemPrompt?: SimpleSystemPromptV1;\n };\n idGenerators: {\n response: IdGenerator;\n score: IdGenerator;\n };\n}) {\n const { messages, testCase, provider, scorer, runConfig } = params;\n\n const providerResponse = await provider.forward({\n model: runConfig.model,\n messages,\n });\n\n const response = await QAResponseSchemaV1.newWithId(\n {\n data: providerResponse.data,\n startedAt: providerResponse.startedAt,\n completedAt: providerResponse.completedAt,\n testCaseId: testCase.id,\n modelSlug: runConfig.model,\n provider: provider.kind,\n systemPromptId: runConfig.systemPrompt?.id,\n\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n },\n params.idGenerators?.response ?? idGeneratorUUIDv7\n );\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)) {\n if (!runConfig.llmJudgeModel) {\n throw new Error(\n \"LLM judge model is required when using LLM as a judge scorer\"\n );\n }\n\n const scorerResult = await scorer.score({\n model: runConfig.llmJudgeModel,\n response: response.data,\n rubric: `Expected/Valid answers: ${testCase.goodAnswers.join(\"\\n\")}\\nInvalid answers: ${testCase.badAnswers.join(\"\\n\")}`,\n systemPrompt: runConfig.llmJudgeSystemPrompt?.content,\n criteria: [\n {\n id: \"correctness\",\n description:\n \"Is the response matches with the expected/valid answers in terms of meaning?\",\n weight: 1,\n },\n ],\n });\n\n if (scorerResult !== null) {\n const score = await QAScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.ai,\n value: scorerResult.value,\n responseId: response.id,\n explanation: scorerResult.explanation,\n metadata: scorerResult.metadata,\n scorerAIInputCost: scorerResult.inputCost,\n scorerAIOutputCost: scorerResult.outputCost,\n scorerAIInputTokensUsed: scorerResult.inputTokensUsed,\n scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,\n scorerAIProvider: scorerResult.provider,\n scorerAIModelSlug: runConfig.llmJudgeModel,\n scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n return { response };\n}\n\nasync function runMCQ(params: {\n messages: ChatCompletionMessageParam[];\n testCase: MCQTestCaseV1;\n provider: AbstractLLMProvider;\n scorer?: MCQScorer | LLMAsAJudgeScorer;\n runConfig: {\n model: string;\n llmJudgeModel?: string;\n llmJudgeSystemPrompt?: SimpleSystemPromptV1;\n systemPrompt?: SimpleSystemPromptV1;\n };\n idGenerators: {\n response: IdGenerator;\n score: IdGenerator;\n };\n}) {\n const { messages, testCase, provider, scorer, runConfig } = params;\n\n const providerResponse = await provider.forward({\n model: runConfig.model,\n messages,\n });\n\n const response = await MCQResponseSchemaV1.newWithId(\n {\n data: providerResponse.data,\n startedAt: providerResponse.startedAt,\n completedAt: providerResponse.completedAt,\n testCaseId: testCase.id,\n modelSlug: runConfig.model,\n provider: provider.kind,\n systemPromptId: runConfig.systemPrompt?.id,\n\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n },\n params.idGenerators?.response ?? idGeneratorUUIDv7\n );\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/mcq` as const)) {\n const scorerResult = await scorer.score({\n response: response.data,\n choices: testCase.options,\n correctAnswers: testCase.correctAnswerKeys,\n });\n\n if (scorerResult !== null) {\n const score = await MCQScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.algo,\n value: scorerResult.value,\n responseId: response.id,\n extractedAnswers: scorerResult.extractedAnswers,\n explanation: scorerResult.explanation,\n metadata: scorerResult.metadata,\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)) {\n if (!runConfig.llmJudgeModel) {\n throw new Error(\n \"LLM judge model is required when using LLM as a judge scorer\"\n );\n }\n\n const scorerResult = await scorer.score({\n model: runConfig.llmJudgeModel,\n criteria: [\n {\n id: \"correctness\",\n description:\n \"Is the given answer key matches with one of the correct answer keys?\",\n weight: 1,\n },\n ],\n rubric: `Answer text itself or the key (A, B, C) is accepted\nValid answer keys: ${testCase.correctAnswerKeys.map((key) => `- ${key}`).join(\"\\n\")}\nValid Answer texts: ${testCase.correctAnswerKeys.map((key) => `- ${testCase.options?.[key] ?? \"\"}`).join(\"\\n\")}`,\n fieldsToExtract: {\n extractedAnswers: z\n .string()\n .array()\n .describe(\n \"The extracted answer keys, valid or invalid (even if the answer text is provided rather than the key)\"\n ),\n },\n response: response.data,\n systemPrompt: runConfig.llmJudgeSystemPrompt?.content,\n });\n\n if (scorerResult !== null) {\n const score = await MCQScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.ai,\n value: scorerResult.value,\n extractedAnswers: scorerResult.extractedFields.extractedAnswers,\n responseId: response.id,\n explanation: scorerResult.explanation,\n metadata: scorerResult.metadata,\n scorerAIInputCost: scorerResult.inputCost,\n scorerAIOutputCost: scorerResult.outputCost,\n scorerAIInputTokensUsed: scorerResult.inputTokensUsed,\n scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,\n scorerAIProvider: scorerResult.provider,\n scorerAIModelSlug: runConfig.llmJudgeModel,\n scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n return { response };\n}\n\nfunction formatMCQ(testCase: MCQTestCaseV1) {\n return `Question: ${testCase.question}\\nOptions:\\n${Object.entries(\n testCase.options ?? {}\n )\n .map(([key, value]) => `${key}: ${value}`)\n .join(\"\\n\")}`;\n}\n\nfunction templateMessages(\n messages: ChatCompletionMessageParam[],\n templateVariables: Record<string, string>\n) {\n for (let i = 0; i < messages.length; i++) {\n const template = Handlebars.compile(messages[i]!.content);\n messages[i]!.content = template(templateVariables);\n }\n}\n","import { JSONFileStorage } from \"@/storages/json-file\";\nimport {\n MCQResponseSchemaV1,\n MCQResponseV1,\n MCQScoreSchemaV1,\n MCQScoreV1,\n MCQTestCaseSchemaV1,\n MCQTestCaseV1,\n} from \"../schema-sets/mcq.v1\";\nimport {\n QAResponseSchemaV1,\n QAResponseV1,\n QAScoreSchemaV1,\n QAScoreV1,\n QATestCaseSchemaV1,\n QATestCaseV1,\n} from \"../schema-sets/qa.v1\";\nimport {\n MultiTurnResponseSchemaV1,\n MultiTurnResponseV1,\n MultiTurnScoreSchemaV1,\n MultiTurnScoreV1,\n MultiTurnTestCaseSchemaV1,\n MultiTurnTestCaseV1,\n} from \"../schema-sets/multi-turn.v1\";\nimport z from \"zod\";\n\nexport class PeerbenchJSONStorage extends JSONFileStorage<\n | MCQTestCaseV1\n | MCQResponseV1\n | MCQScoreV1\n | QATestCaseV1\n | QAResponseV1\n | QAScoreV1\n | MultiTurnTestCaseV1\n | MultiTurnResponseV1\n | MultiTurnScoreV1\n> {\n constructor(config: { path: string; chunkSize?: number }) {\n super({\n path: config.path,\n chunkSize: config.chunkSize,\n\n schema: z.union([\n MCQTestCaseSchemaV1,\n MCQResponseSchemaV1,\n MCQScoreSchemaV1,\n QATestCaseSchemaV1,\n QAResponseSchemaV1,\n QAScoreSchemaV1,\n MultiTurnTestCaseSchemaV1,\n MultiTurnResponseSchemaV1,\n MultiTurnScoreSchemaV1,\n ]),\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACWA,SAAS,SAAS;AAEX,IAAM,UAAU;AAEhB,IAAM,sBAAsB,qBAAqB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAU,EAAE,OAAO;AAAA,IACnB,SAAS,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC;AAAA,IACxC,mBAAmB,EAAE,OAAO,EAAE,MAAM;AAAA,EACtC;AACF,CAAC;AAGM,IAAM,sBAAsB,qBAAqB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;AAGM,IAAM,mBAAmB,kBAAkB;AAAA,EAChD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,kBAAkB,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EACtC;AACF,CAAC;;;ACrCD,SAAS,KAAAA,UAAS;AAEX,IAAM,gBAAgB;AAEtB,IAAM,4BAA4B,qBAAqB;AAAA,EAC5D,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAUA,GACP,OAAO;AAAA,MACN,MAAMA,GAAE,OAAO;AAAA,MACf,SAASA,GAAE,OAAO;AAAA,MAClB,aAAaA,GAAE,OAAO,EAAE,MAAM,EAAE,SAAS;AAAA,MACzC,YAAYA,GAAE,OAAO,EAAE,MAAM,EAAE,SAAS;AAAA,IAC1C,CAAC,EACA,MAAM;AAAA,IAET,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,IAC9B,iBAAiBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACvC;AACF,CAAC;AAGM,IAAM,4BAA4B,qBAAqB;AAAA,EAC5D,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,SAASA,GACN,OAAO;AAAA,MACN,cAAcA,GAAE,OAAO;AAAA,MACvB,WAAWA,GAAE,OAAO;AAAA,MACpB,aAAaA,GAAE,OAAO;AAAA,MACtB,MAAMA,GAAE,OAAO;AAAA,MAEf,iBAAiBA,GAAE,OAAO,EAAE,SAAS;AAAA,MACrC,kBAAkBA,GAAE,OAAO,EAAE,SAAS;AAAA,MACtC,WAAWA,GAAE,OAAO,EAAE,SAAS;AAAA,MAC/B,YAAYA,GAAE,OAAO,EAAE,SAAS;AAAA,IAClC,CAAC,EACA,MAAM;AAAA,EACX;AACF,CAAC;AAGM,IAAM,yBAAyB,kBAAkB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,kBAAkBA,GACf,OAAO;AAAA,MACN,YAAYA,GAAE,OAAO;AAAA,MACrB,OAAOA,GAAE,OAAO;AAAA,IAClB,CAAC,EACA,MAAM;AAAA,EACX;AACF,CAAC;;;AC/DD,SAAS,KAAAC,UAAS;AAEX,IAAM,SAAS;AAEf,IAAM,qBAAqB,qBAAqB;AAAA,EACrD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAUA,GAAE,OAAO;AAAA,IACnB,aAAaA,GAAE,OAAO,EAAE,MAAM;AAAA,IAC9B,YAAYA,GAAE,OAAO,EAAE,MAAM;AAAA,EAC/B;AACF,CAAC;AAGM,IAAM,qBAAqB,qBAAqB;AAAA,EACrD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;AAGM,IAAM,kBAAkB,kBAAkB;AAAA,EAC/C,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;;;ACrCD,OAAO,gBAAgB;AACvB,OAAOC,QAAO;AAeP,IAAM,kBAAkB;AAAA,EAC7B;AAAA,IACE,YAAY;AAAA,MACV;AAAA,QACE,UAAU;AAAA,QACV,UAAU;AAAA,QACV,OAAO;AAAA,MACT;AAAA,MACA;AAAA,QACE,UAAU;AAAA,QACV,UAAU;AAAA,QACV,OAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,WAAW,CAAC,mBAAmB;AAAA,IAC/B,SAAS,CAAC,mBAAmB,SAAS;AAAA,IAEtC,iBAAiB;AAAA,MACf,OAAOC,GAAE,OAAO;AAAA,MAChB,eAAeA,GAAE,OAAO,EAAE,SAAS;AAAA,MACnC,sBAAsB,2BAA2B,SAAS;AAAA,MAC1D,cAAc,2BAA2B,SAAS;AAAA,MAClD,mBAAmBA,GAAE,OAAOA,GAAE,OAAO,GAAGA,GAAE,OAAO,CAAC,EAAE,SAAS;AAAA,IAC/D;AAAA,EACF;AAAA,EACA,OAAO,WAAW;AAChB,UAAM,EAAE,UAAU,UAAU,QAAQ,UAAU,IAAI;AAClD,UAAM,WAAyC,CAAC;AAEhD,QAAI,UAAU,cAAc;AAC1B,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,UAAU,aAAa;AAAA,MAClC,CAAC;AAAA,IACH;AAEA,QAAI,SAAS,SAAS,cAAc;AAClC,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,UAAU,QAAQ;AAAA,MAC7B,CAAC;AACD,uBAAiB,UAAU,UAAU,qBAAqB,CAAC,CAAC;AAE5D,aAAO,OAAO;AAAA,QACZ;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,cAAc;AAAA,UACZ,UAAU,OAAO,cAAc,YAAY;AAAA,UAC3C,OAAO,OAAO,cAAc,SAAS;AAAA,QACvC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,QAAI,SAAS,SAAS,aAAa;AACjC,UACE,UACA,QAAQ,SAAU,GAAG,mBAAmB,mBACxC;AACA,cAAM,IAAI;AAAA,UACR,uEAAuE,QAAQ,IAAI;AAAA,QACrF;AAAA,MACF;AAEA,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,SAAS;AAAA,MACpB,CAAC;AACD,uBAAiB,UAAU,UAAU,qBAAqB,CAAC,CAAC;AAE5D,aAAO,MAAM;AAAA,QACX;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,cAAc;AAAA,UACZ,UAAU,OAAO,cAAc,YAAY;AAAA,UAC3C,OAAO,OAAO,cAAc,SAAS;AAAA,QACvC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,IAAI,MAAM,4BAA4B;AAAA,EAC9C;AACF;AAEA,eAAe,MAAM,QAelB;AACD,QAAM,EAAE,UAAU,UAAU,UAAU,QAAQ,UAAU,IAAI;AAE5D,QAAM,mBAAmB,MAAM,SAAS,QAAQ;AAAA,IAC9C,OAAO,UAAU;AAAA,IACjB;AAAA,EACF,CAAC;AAED,QAAM,WAAW,MAAM,mBAAmB;AAAA,IACxC;AAAA,MACE,MAAM,iBAAiB;AAAA,MACvB,WAAW,iBAAiB;AAAA,MAC5B,aAAa,iBAAiB;AAAA,MAC9B,YAAY,SAAS;AAAA,MACrB,WAAW,UAAU;AAAA,MACrB,UAAU,SAAS;AAAA,MACnB,gBAAgB,UAAU,cAAc;AAAA,MAExC,iBAAiB,iBAAiB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,WAAW,iBAAiB;AAAA,MAC5B,YAAY,iBAAiB;AAAA,IAC/B;AAAA,IACA,OAAO,cAAc,YAAY;AAAA,EACnC;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,mBAA6B;AACvE,QAAI,CAAC,UAAU,eAAe;AAC5B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,OAAO,UAAU;AAAA,MACjB,UAAU,SAAS;AAAA,MACnB,QAAQ,2BAA2B,SAAS,YAAY,KAAK,IAAI,CAAC;AAAA,mBAAsB,SAAS,WAAW,KAAK,IAAI,CAAC;AAAA,MACtH,cAAc,UAAU,sBAAsB;AAAA,MAC9C,UAAU;AAAA,QACR;AAAA,UACE,IAAI;AAAA,UACJ,aACE;AAAA,UACF,QAAQ;AAAA,QACV;AAAA,MACF;AAAA,IACF,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,QAAQ,MAAM,gBAAgB;AAAA,QAClC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB,YAAY,SAAS;AAAA,UACrB,aAAa,aAAa;AAAA,UAC1B,UAAU,aAAa;AAAA,UACvB,mBAAmB,aAAa;AAAA,UAChC,oBAAoB,aAAa;AAAA,UACjC,yBAAyB,aAAa;AAAA,UACtC,0BAA0B,aAAa;AAAA,UACvC,kBAAkB,aAAa;AAAA,UAC/B,mBAAmB,UAAU;AAAA,UAC7B,wBAAwB,UAAU,sBAAsB;AAAA,QAC1D;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS;AACpB;AAEA,eAAe,OAAO,QAenB;AACD,QAAM,EAAE,UAAU,UAAU,UAAU,QAAQ,UAAU,IAAI;AAE5D,QAAM,mBAAmB,MAAM,SAAS,QAAQ;AAAA,IAC9C,OAAO,UAAU;AAAA,IACjB;AAAA,EACF,CAAC;AAED,QAAM,WAAW,MAAM,oBAAoB;AAAA,IACzC;AAAA,MACE,MAAM,iBAAiB;AAAA,MACvB,WAAW,iBAAiB;AAAA,MAC5B,aAAa,iBAAiB;AAAA,MAC9B,YAAY,SAAS;AAAA,MACrB,WAAW,UAAU;AAAA,MACrB,UAAU,SAAS;AAAA,MACnB,gBAAgB,UAAU,cAAc;AAAA,MAExC,iBAAiB,iBAAiB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,WAAW,iBAAiB;AAAA,MAC5B,YAAY,iBAAiB;AAAA,IAC/B;AAAA,IACA,OAAO,cAAc,YAAY;AAAA,EACnC;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,QAAkB;AAC5D,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,UAAU,SAAS;AAAA,MACnB,SAAS,SAAS;AAAA,MAClB,gBAAgB,SAAS;AAAA,IAC3B,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,QAAQ,MAAM,iBAAiB;AAAA,QACnC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB,YAAY,SAAS;AAAA,UACrB,kBAAkB,aAAa;AAAA,UAC/B,aAAa,aAAa;AAAA,UAC1B,UAAU,aAAa;AAAA,QACzB;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,mBAA6B;AACvE,QAAI,CAAC,UAAU,eAAe;AAC5B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,OAAO,UAAU;AAAA,MACjB,UAAU;AAAA,QACR;AAAA,UACE,IAAI;AAAA,UACJ,aACE;AAAA,UACF,QAAQ;AAAA,QACV;AAAA,MACF;AAAA,MACA,QAAQ;AAAA,qBACO,SAAS,kBAAkB,IAAI,CAAC,QAAQ,KAAK,GAAG,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,sBAC7D,SAAS,kBAAkB,IAAI,CAAC,QAAQ,KAAK,SAAS,UAAU,GAAG,KAAK,EAAE,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,MACxG,iBAAiB;AAAA,QACf,kBAAkBA,GACf,OAAO,EACP,MAAM,EACN;AAAA,UACC;AAAA,QACF;AAAA,MACJ;AAAA,MACA,UAAU,SAAS;AAAA,MACnB,cAAc,UAAU,sBAAsB;AAAA,IAChD,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,QAAQ,MAAM,iBAAiB;AAAA,QACnC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB,kBAAkB,aAAa,gBAAgB;AAAA,UAC/C,YAAY,SAAS;AAAA,UACrB,aAAa,aAAa;AAAA,UAC1B,UAAU,aAAa;AAAA,UACvB,mBAAmB,aAAa;AAAA,UAChC,oBAAoB,aAAa;AAAA,UACjC,yBAAyB,aAAa;AAAA,UACtC,0BAA0B,aAAa;AAAA,UACvC,kBAAkB,aAAa;AAAA,UAC/B,mBAAmB,UAAU;AAAA,UAC7B,wBAAwB,UAAU,sBAAsB;AAAA,QAC1D;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS;AACpB;AAEA,SAAS,UAAU,UAAyB;AAC1C,SAAO,aAAa,SAAS,QAAQ;AAAA;AAAA,EAAe,OAAO;AAAA,IACzD,SAAS,WAAW,CAAC;AAAA,EACvB,EACG,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM,GAAG,GAAG,KAAK,KAAK,EAAE,EACxC,KAAK,IAAI,CAAC;AACf;AAEA,SAAS,iBACP,UACA,mBACA;AACA,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,WAAW,WAAW,QAAQ,SAAS,CAAC,EAAG,OAAO;AACxD,aAAS,CAAC,EAAG,UAAU,SAAS,iBAAiB;AAAA,EACnD;AACF;;;AC/TA,OAAOC,QAAO;AAEP,IAAM,uBAAN,cAAmC,gBAUxC;AAAA,EACA,YAAY,QAA8C;AACxD,UAAM;AAAA,MACJ,MAAM,OAAO;AAAA,MACb,WAAW,OAAO;AAAA,MAElB,QAAQA,GAAE,MAAM;AAAA,QACd;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;","names":["z","z","z","z","z"]}
|
|
1
|
+
{"version":3,"sources":["../../src/benchmarks/peerbench/index.ts","../../src/benchmarks/peerbench/schema-sets/mcq.v1.ts","../../src/benchmarks/peerbench/schema-sets/multi-turn.v1.ts","../../src/benchmarks/peerbench/schema-sets/qa.v1.ts","../../src/benchmarks/peerbench/runner.ts","../../src/benchmarks/peerbench/storages/json.ts"],"sourcesContent":["export * from \"./schema-sets/mcq.v1\";\nexport * from \"./schema-sets/multi-turn.v1\";\nexport * from \"./schema-sets/qa.v1\";\n\nexport * from \"./runner\";\n\nexport * from \"./storages/json\";\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const MCQKind = `llm/mcq` as const;\n\nexport const MCQTestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n question: z.string(),\n options: z.record(z.string(), z.string()),\n correctAnswerKeys: z.string().array(),\n },\n});\nexport type MCQTestCaseV1 = z.infer<typeof MCQTestCaseSchemaV1>;\n\nexport const MCQResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n },\n});\nexport type MCQResponseV1 = z.infer<typeof MCQResponseSchemaV1>;\n\nexport const MCQScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MCQKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n extractedAnswers: z.array(z.string()),\n },\n});\nexport type MCQScoreV1 = z.infer<typeof MCQScoreSchemaV1>;\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const MultiTurnKind = `llm/multi-turn` as const;\n\nexport const MultiTurnTestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n messages: z\n .object({\n role: z.string(),\n content: z.string(),\n goodAnswers: z.string().array().optional(),\n badAnswers: z.string().array().optional(),\n })\n .array(),\n\n maxTurns: z.number().optional(),\n expectedOutcome: z.string().optional(),\n },\n});\nexport type MultiTurnTestCaseV1 = z.infer<typeof MultiTurnTestCaseSchemaV1>;\n\nexport const MultiTurnResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n replies: z\n .object({\n messageIndex: z.number(),\n startedAt: z.number(),\n completedAt: z.number(),\n data: z.string(),\n\n inputTokensUsed: z.number().optional(),\n outputTokensUsed: z.number().optional(),\n inputCost: z.string().optional(),\n outputCost: z.string().optional(),\n })\n .array(),\n },\n});\nexport type MultiTurnResponseV1 = z.infer<typeof MultiTurnResponseSchemaV1>;\n\nexport const MultiTurnScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: MultiTurnKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n individualScores: z\n .object({\n replyIndex: z.number(),\n value: z.number(),\n })\n .array(),\n },\n});\nexport type MultiTurnScoreV1 = z.infer<typeof MultiTurnScoreSchemaV1>;\n","import { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport {\n BaseResponseSchemaV1,\n BaseScoreSchemaV1,\n BaseTestCaseSchemaV1,\n defineResponseSchema,\n defineScoreSchema,\n defineTestCaseSchema,\n} from \"@/schemas\";\nimport { ExtensionLLMResponseFieldsV1 } from \"@/schemas/extensions/response/llm\";\nimport { ExtensionLLMAsAJudgeScoreFieldsV1 } from \"@/schemas/extensions/score/llm-as-a-judge-scorer\";\nimport { z } from \"zod\";\n\nexport const QAKind = `llm/qa` as const;\n\nexport const QATestCaseSchemaV1 = defineTestCaseSchema({\n baseSchema: BaseTestCaseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n question: z.string(),\n goodAnswers: z.string().array(),\n badAnswers: z.string().array(),\n },\n});\nexport type QATestCaseV1 = z.infer<typeof QATestCaseSchemaV1>;\n\nexport const QAResponseSchemaV1 = defineResponseSchema({\n baseSchema: BaseResponseSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMResponseFieldsV1,\n },\n});\nexport type QAResponseV1 = z.infer<typeof QAResponseSchemaV1>;\n\nexport const QAScoreSchemaV1 = defineScoreSchema({\n baseSchema: BaseScoreSchemaV1,\n namespace: PEERBENCH_NAMESPACE,\n kind: QAKind,\n schemaVersion: 1,\n fields: {\n ...ExtensionLLMAsAJudgeScoreFieldsV1,\n },\n});\nexport type QAScoreV1 = z.infer<typeof QAScoreSchemaV1>;\n","import { defineRunner } from \"@/helpers/define-runner\";\nimport { AbstractLLMProvider } from \"@/providers\";\nimport {\n SimpleSystemPromptSchemaV1,\n SimpleSystemPromptV1,\n} from \"@/schemas/llm\";\nimport { LLMAsAJudgeScorer, MCQScorer } from \"@/scorers\";\nimport { IdGenerator, ScoringMethod } from \"@/types\";\nimport { idGeneratorUUIDv7 } from \"@/utils\";\nimport { ChatCompletionMessageParam } from \"openai/resources/index\";\nimport Handlebars from \"handlebars\";\nimport z from \"zod\";\nimport {\n MCQResponseSchemaV1,\n MCQScoreSchemaV1,\n MCQTestCaseSchemaV1,\n MCQTestCaseV1,\n} from \"./schema-sets/mcq.v1\";\nimport {\n QAResponseSchemaV1,\n QAScoreSchemaV1,\n QATestCaseSchemaV1,\n QATestCaseV1,\n} from \"./schema-sets/qa.v1\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport const peerbenchRunner = defineRunner(\n {\n schemaSets: [\n {\n testCase: MCQTestCaseSchemaV1,\n response: MCQResponseSchemaV1,\n score: MCQScoreSchemaV1,\n },\n {\n testCase: QATestCaseSchemaV1,\n response: QAResponseSchemaV1,\n score: QAScoreSchemaV1,\n },\n ],\n providers: [AbstractLLMProvider],\n scorers: [LLMAsAJudgeScorer, MCQScorer],\n\n runConfigSchema: {\n model: z.string(),\n llmJudgeModel: z.string().optional(),\n llmJudgeSystemPrompt: SimpleSystemPromptSchemaV1.optional(),\n llmJudgeFieldsToExtract: z\n .record(z.string(), z.custom<z.ZodType>())\n .optional(),\n systemPrompt: SimpleSystemPromptSchemaV1.optional(),\n templateVariables: z.record(z.string(), z.string()).optional(),\n },\n },\n async (params) => {\n const { testCase, provider, scorer, runConfig } = params;\n const messages: ChatCompletionMessageParam[] = [];\n\n if (runConfig.systemPrompt) {\n messages.push({\n role: \"system\",\n content: runConfig.systemPrompt.content,\n });\n }\n\n if (testCase.kind === \"llm/mcq.tc\") {\n messages.push({\n role: \"user\",\n content: formatMCQ(testCase),\n });\n templateMessages(messages, runConfig.templateVariables ?? {});\n\n return runMCQ({\n testCase,\n messages,\n provider,\n scorer,\n runConfig,\n idGenerators: {\n response: params.idGenerators?.response ?? idGeneratorUUIDv7,\n score: params.idGenerators?.score ?? idGeneratorUUIDv7,\n },\n });\n }\n\n if (testCase.kind === \"llm/qa.tc\") {\n if (\n scorer &&\n scorer?.kind !== (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)\n ) {\n throw new Error(\n `QA test cases can only be scored with an LLM as a judge scorer, but ${scorer?.kind} was provided`\n );\n }\n\n messages.push({\n role: \"user\",\n content: testCase.question,\n });\n templateMessages(messages, runConfig.templateVariables ?? {});\n\n return runQA({\n testCase,\n messages,\n provider,\n scorer,\n runConfig,\n idGenerators: {\n response: params.idGenerators?.response ?? idGeneratorUUIDv7,\n score: params.idGenerators?.score ?? idGeneratorUUIDv7,\n },\n });\n }\n\n throw new Error(\"Unsupported test case kind\");\n }\n);\n\nasync function runQA(params: {\n messages: ChatCompletionMessageParam[];\n testCase: QATestCaseV1;\n provider: AbstractLLMProvider;\n scorer?: LLMAsAJudgeScorer;\n runConfig: {\n model: string;\n llmJudgeModel?: string;\n llmJudgeSystemPrompt?: SimpleSystemPromptV1;\n llmJudgeFieldsToExtract?: Record<string, z.ZodType>;\n systemPrompt?: SimpleSystemPromptV1;\n };\n idGenerators: {\n response: IdGenerator;\n score: IdGenerator;\n };\n}) {\n const { messages, testCase, provider, scorer, runConfig } = params;\n\n const providerResponse = await provider.forward({\n model: runConfig.model,\n messages,\n });\n\n const response = await QAResponseSchemaV1.newWithId(\n {\n data: providerResponse.data,\n startedAt: providerResponse.startedAt,\n completedAt: providerResponse.completedAt,\n testCaseId: testCase.id,\n modelSlug: runConfig.model,\n provider: provider.kind,\n systemPromptId: runConfig.systemPrompt?.id,\n\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n },\n params.idGenerators?.response ?? idGeneratorUUIDv7\n );\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)) {\n if (!runConfig.llmJudgeModel) {\n throw new Error(\n \"LLM judge model is required when using LLM as a judge scorer\"\n );\n }\n\n const scorerResult = await scorer.score({\n model: runConfig.llmJudgeModel,\n response: response.data,\n rubric: `Expected/Valid answers: ${testCase.goodAnswers.join(\"\\n\")}\\nInvalid answers: ${testCase.badAnswers.join(\"\\n\")}`,\n systemPrompt: runConfig.llmJudgeSystemPrompt?.content,\n criteria: [\n {\n id: \"correctness\",\n description:\n \"Is the response matches with the expected/valid answers in terms of meaning?\",\n weight: 1,\n },\n ],\n fieldsToExtract: runConfig.llmJudgeFieldsToExtract ?? {},\n });\n\n if (scorerResult !== null) {\n const score = await QAScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.ai,\n value: scorerResult.value,\n responseId: response.id,\n explanation: scorerResult.explanation,\n scorerAIInputCost: scorerResult.inputCost,\n scorerAIOutputCost: scorerResult.outputCost,\n scorerAIInputTokensUsed: scorerResult.inputTokensUsed,\n scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,\n scorerAIProvider: scorerResult.provider,\n scorerAIModelSlug: runConfig.llmJudgeModel,\n scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,\n metadata: {\n ...scorerResult.metadata,\n extractedFields: scorerResult.extractedFields,\n },\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n return { response };\n}\n\nasync function runMCQ(params: {\n messages: ChatCompletionMessageParam[];\n testCase: MCQTestCaseV1;\n provider: AbstractLLMProvider;\n scorer?: MCQScorer | LLMAsAJudgeScorer;\n runConfig: {\n model: string;\n llmJudgeModel?: string;\n llmJudgeSystemPrompt?: SimpleSystemPromptV1;\n llmJudgeFieldsToExtract?: Record<string, z.ZodType>;\n systemPrompt?: SimpleSystemPromptV1;\n };\n idGenerators: {\n response: IdGenerator;\n score: IdGenerator;\n };\n}) {\n const { messages, testCase, provider, scorer, runConfig } = params;\n\n const providerResponse = await provider.forward({\n model: runConfig.model,\n messages,\n });\n\n const response = await MCQResponseSchemaV1.newWithId(\n {\n data: providerResponse.data,\n startedAt: providerResponse.startedAt,\n completedAt: providerResponse.completedAt,\n testCaseId: testCase.id,\n modelSlug: runConfig.model,\n provider: provider.kind,\n systemPromptId: runConfig.systemPrompt?.id,\n\n inputTokensUsed: providerResponse.inputTokensUsed,\n outputTokensUsed: providerResponse.outputTokensUsed,\n inputCost: providerResponse.inputCost,\n outputCost: providerResponse.outputCost,\n },\n params.idGenerators?.response ?? idGeneratorUUIDv7\n );\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/mcq` as const)) {\n const scorerResult = await scorer.score({\n response: response.data,\n choices: testCase.options,\n correctAnswers: testCase.correctAnswerKeys,\n });\n\n if (scorerResult !== null) {\n const score = await MCQScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.algo,\n value: scorerResult.value,\n responseId: response.id,\n extractedAnswers: scorerResult.extractedAnswers,\n explanation: scorerResult.explanation,\n metadata: scorerResult.metadata,\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n if (scorer?.kind === (`${PEERBENCH_NAMESPACE}/llm-as-a-judge` as const)) {\n if (!runConfig.llmJudgeModel) {\n throw new Error(\n \"LLM judge model is required when using LLM as a judge scorer\"\n );\n }\n\n const scorerResult = await scorer.score({\n model: runConfig.llmJudgeModel,\n criteria: [\n {\n id: \"correctness\",\n description:\n \"Is the given answer key matches with one of the correct answer keys?\",\n weight: 1,\n },\n ],\n rubric: `Answer text itself or the key (A, B, C) is accepted\nValid answer keys: ${testCase.correctAnswerKeys.map((key) => `- ${key}`).join(\"\\n\")}\nValid Answer texts: ${testCase.correctAnswerKeys.map((key) => `- ${testCase.options?.[key] ?? \"\"}`).join(\"\\n\")}`,\n fieldsToExtract: {\n extractedAnswers: z\n .string()\n .array()\n .describe(\n \"The extracted answer keys, valid or invalid (even if the answer text is provided rather than the key)\"\n ),\n ...(runConfig.llmJudgeFieldsToExtract ?? {}),\n },\n response: response.data,\n systemPrompt: runConfig.llmJudgeSystemPrompt?.content,\n });\n\n if (scorerResult !== null) {\n const { extractedAnswers, ...extractedFields } =\n scorerResult.extractedFields;\n const score = await MCQScoreSchemaV1.newWithId(\n {\n scoringMethod: ScoringMethod.ai,\n value: scorerResult.value,\n extractedAnswers,\n responseId: response.id,\n explanation: scorerResult.explanation,\n scorerAIInputCost: scorerResult.inputCost,\n scorerAIOutputCost: scorerResult.outputCost,\n scorerAIInputTokensUsed: scorerResult.inputTokensUsed,\n scorerAIOutputTokensUsed: scorerResult.outputTokensUsed,\n scorerAIProvider: scorerResult.provider,\n scorerAIModelSlug: runConfig.llmJudgeModel,\n scorerAISystemPromptId: runConfig.llmJudgeSystemPrompt?.id,\n metadata: {\n ...scorerResult.metadata,\n extractedFields,\n },\n },\n params.idGenerators?.score ?? idGeneratorUUIDv7\n );\n\n return { response, score };\n }\n }\n\n return { response };\n}\n\nfunction formatMCQ(testCase: MCQTestCaseV1) {\n return `Question: ${testCase.question}\\nOptions:\\n${Object.entries(\n testCase.options ?? {}\n )\n .map(([key, value]) => `${key}: ${value}`)\n .join(\"\\n\")}`;\n}\n\nfunction templateMessages(\n messages: ChatCompletionMessageParam[],\n templateVariables: Record<string, string>\n) {\n for (let i = 0; i < messages.length; i++) {\n const template = Handlebars.compile(messages[i]!.content);\n messages[i]!.content = template(templateVariables);\n }\n}\n","import { JSONFileStorage } from \"@/storages/json-file\";\nimport {\n MCQResponseSchemaV1,\n MCQResponseV1,\n MCQScoreSchemaV1,\n MCQScoreV1,\n MCQTestCaseSchemaV1,\n MCQTestCaseV1,\n} from \"../schema-sets/mcq.v1\";\nimport {\n QAResponseSchemaV1,\n QAResponseV1,\n QAScoreSchemaV1,\n QAScoreV1,\n QATestCaseSchemaV1,\n QATestCaseV1,\n} from \"../schema-sets/qa.v1\";\nimport {\n MultiTurnResponseSchemaV1,\n MultiTurnResponseV1,\n MultiTurnScoreSchemaV1,\n MultiTurnScoreV1,\n MultiTurnTestCaseSchemaV1,\n MultiTurnTestCaseV1,\n} from \"../schema-sets/multi-turn.v1\";\nimport z from \"zod\";\n\nexport class PeerbenchJSONStorage extends JSONFileStorage<\n | MCQTestCaseV1\n | MCQResponseV1\n | MCQScoreV1\n | QATestCaseV1\n | QAResponseV1\n | QAScoreV1\n | MultiTurnTestCaseV1\n | MultiTurnResponseV1\n | MultiTurnScoreV1\n> {\n constructor(config: { path: string; chunkSize?: number }) {\n super({\n path: config.path,\n chunkSize: config.chunkSize,\n\n schema: z.union([\n MCQTestCaseSchemaV1,\n MCQResponseSchemaV1,\n MCQScoreSchemaV1,\n QATestCaseSchemaV1,\n QAResponseSchemaV1,\n QAScoreSchemaV1,\n MultiTurnTestCaseSchemaV1,\n MultiTurnResponseSchemaV1,\n MultiTurnScoreSchemaV1,\n ]),\n });\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACWA,SAAS,SAAS;AAEX,IAAM,UAAU;AAEhB,IAAM,sBAAsB,qBAAqB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAU,EAAE,OAAO;AAAA,IACnB,SAAS,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,CAAC;AAAA,IACxC,mBAAmB,EAAE,OAAO,EAAE,MAAM;AAAA,EACtC;AACF,CAAC;AAGM,IAAM,sBAAsB,qBAAqB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;AAGM,IAAM,mBAAmB,kBAAkB;AAAA,EAChD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,kBAAkB,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EACtC;AACF,CAAC;;;ACrCD,SAAS,KAAAA,UAAS;AAEX,IAAM,gBAAgB;AAEtB,IAAM,4BAA4B,qBAAqB;AAAA,EAC5D,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAUA,GACP,OAAO;AAAA,MACN,MAAMA,GAAE,OAAO;AAAA,MACf,SAASA,GAAE,OAAO;AAAA,MAClB,aAAaA,GAAE,OAAO,EAAE,MAAM,EAAE,SAAS;AAAA,MACzC,YAAYA,GAAE,OAAO,EAAE,MAAM,EAAE,SAAS;AAAA,IAC1C,CAAC,EACA,MAAM;AAAA,IAET,UAAUA,GAAE,OAAO,EAAE,SAAS;AAAA,IAC9B,iBAAiBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACvC;AACF,CAAC;AAGM,IAAM,4BAA4B,qBAAqB;AAAA,EAC5D,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,SAASA,GACN,OAAO;AAAA,MACN,cAAcA,GAAE,OAAO;AAAA,MACvB,WAAWA,GAAE,OAAO;AAAA,MACpB,aAAaA,GAAE,OAAO;AAAA,MACtB,MAAMA,GAAE,OAAO;AAAA,MAEf,iBAAiBA,GAAE,OAAO,EAAE,SAAS;AAAA,MACrC,kBAAkBA,GAAE,OAAO,EAAE,SAAS;AAAA,MACtC,WAAWA,GAAE,OAAO,EAAE,SAAS;AAAA,MAC/B,YAAYA,GAAE,OAAO,EAAE,SAAS;AAAA,IAClC,CAAC,EACA,MAAM;AAAA,EACX;AACF,CAAC;AAGM,IAAM,yBAAyB,kBAAkB;AAAA,EACtD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,IACH,kBAAkBA,GACf,OAAO;AAAA,MACN,YAAYA,GAAE,OAAO;AAAA,MACrB,OAAOA,GAAE,OAAO;AAAA,IAClB,CAAC,EACA,MAAM;AAAA,EACX;AACF,CAAC;;;AC/DD,SAAS,KAAAC,UAAS;AAEX,IAAM,SAAS;AAEf,IAAM,qBAAqB,qBAAqB;AAAA,EACrD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,UAAUA,GAAE,OAAO;AAAA,IACnB,aAAaA,GAAE,OAAO,EAAE,MAAM;AAAA,IAC9B,YAAYA,GAAE,OAAO,EAAE,MAAM;AAAA,EAC/B;AACF,CAAC;AAGM,IAAM,qBAAqB,qBAAqB;AAAA,EACrD,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;AAGM,IAAM,kBAAkB,kBAAkB;AAAA,EAC/C,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,MAAM;AAAA,EACN,eAAe;AAAA,EACf,QAAQ;AAAA,IACN,GAAG;AAAA,EACL;AACF,CAAC;;;ACrCD,OAAO,gBAAgB;AACvB,OAAOC,QAAO;AAeP,IAAM,kBAAkB;AAAA,EAC7B;AAAA,IACE,YAAY;AAAA,MACV;AAAA,QACE,UAAU;AAAA,QACV,UAAU;AAAA,QACV,OAAO;AAAA,MACT;AAAA,MACA;AAAA,QACE,UAAU;AAAA,QACV,UAAU;AAAA,QACV,OAAO;AAAA,MACT;AAAA,IACF;AAAA,IACA,WAAW,CAAC,mBAAmB;AAAA,IAC/B,SAAS,CAAC,mBAAmB,SAAS;AAAA,IAEtC,iBAAiB;AAAA,MACf,OAAOC,GAAE,OAAO;AAAA,MAChB,eAAeA,GAAE,OAAO,EAAE,SAAS;AAAA,MACnC,sBAAsB,2BAA2B,SAAS;AAAA,MAC1D,yBAAyBA,GACtB,OAAOA,GAAE,OAAO,GAAGA,GAAE,OAAkB,CAAC,EACxC,SAAS;AAAA,MACZ,cAAc,2BAA2B,SAAS;AAAA,MAClD,mBAAmBA,GAAE,OAAOA,GAAE,OAAO,GAAGA,GAAE,OAAO,CAAC,EAAE,SAAS;AAAA,IAC/D;AAAA,EACF;AAAA,EACA,OAAO,WAAW;AAChB,UAAM,EAAE,UAAU,UAAU,QAAQ,UAAU,IAAI;AAClD,UAAM,WAAyC,CAAC;AAEhD,QAAI,UAAU,cAAc;AAC1B,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,UAAU,aAAa;AAAA,MAClC,CAAC;AAAA,IACH;AAEA,QAAI,SAAS,SAAS,cAAc;AAClC,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,UAAU,QAAQ;AAAA,MAC7B,CAAC;AACD,uBAAiB,UAAU,UAAU,qBAAqB,CAAC,CAAC;AAE5D,aAAO,OAAO;AAAA,QACZ;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,cAAc;AAAA,UACZ,UAAU,OAAO,cAAc,YAAY;AAAA,UAC3C,OAAO,OAAO,cAAc,SAAS;AAAA,QACvC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,QAAI,SAAS,SAAS,aAAa;AACjC,UACE,UACA,QAAQ,SAAU,GAAG,mBAAmB,mBACxC;AACA,cAAM,IAAI;AAAA,UACR,uEAAuE,QAAQ,IAAI;AAAA,QACrF;AAAA,MACF;AAEA,eAAS,KAAK;AAAA,QACZ,MAAM;AAAA,QACN,SAAS,SAAS;AAAA,MACpB,CAAC;AACD,uBAAiB,UAAU,UAAU,qBAAqB,CAAC,CAAC;AAE5D,aAAO,MAAM;AAAA,QACX;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA,cAAc;AAAA,UACZ,UAAU,OAAO,cAAc,YAAY;AAAA,UAC3C,OAAO,OAAO,cAAc,SAAS;AAAA,QACvC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,IAAI,MAAM,4BAA4B;AAAA,EAC9C;AACF;AAEA,eAAe,MAAM,QAgBlB;AACD,QAAM,EAAE,UAAU,UAAU,UAAU,QAAQ,UAAU,IAAI;AAE5D,QAAM,mBAAmB,MAAM,SAAS,QAAQ;AAAA,IAC9C,OAAO,UAAU;AAAA,IACjB;AAAA,EACF,CAAC;AAED,QAAM,WAAW,MAAM,mBAAmB;AAAA,IACxC;AAAA,MACE,MAAM,iBAAiB;AAAA,MACvB,WAAW,iBAAiB;AAAA,MAC5B,aAAa,iBAAiB;AAAA,MAC9B,YAAY,SAAS;AAAA,MACrB,WAAW,UAAU;AAAA,MACrB,UAAU,SAAS;AAAA,MACnB,gBAAgB,UAAU,cAAc;AAAA,MAExC,iBAAiB,iBAAiB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,WAAW,iBAAiB;AAAA,MAC5B,YAAY,iBAAiB;AAAA,IAC/B;AAAA,IACA,OAAO,cAAc,YAAY;AAAA,EACnC;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,mBAA6B;AACvE,QAAI,CAAC,UAAU,eAAe;AAC5B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,OAAO,UAAU;AAAA,MACjB,UAAU,SAAS;AAAA,MACnB,QAAQ,2BAA2B,SAAS,YAAY,KAAK,IAAI,CAAC;AAAA,mBAAsB,SAAS,WAAW,KAAK,IAAI,CAAC;AAAA,MACtH,cAAc,UAAU,sBAAsB;AAAA,MAC9C,UAAU;AAAA,QACR;AAAA,UACE,IAAI;AAAA,UACJ,aACE;AAAA,UACF,QAAQ;AAAA,QACV;AAAA,MACF;AAAA,MACA,iBAAiB,UAAU,2BAA2B,CAAC;AAAA,IACzD,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,QAAQ,MAAM,gBAAgB;AAAA,QAClC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB,YAAY,SAAS;AAAA,UACrB,aAAa,aAAa;AAAA,UAC1B,mBAAmB,aAAa;AAAA,UAChC,oBAAoB,aAAa;AAAA,UACjC,yBAAyB,aAAa;AAAA,UACtC,0BAA0B,aAAa;AAAA,UACvC,kBAAkB,aAAa;AAAA,UAC/B,mBAAmB,UAAU;AAAA,UAC7B,wBAAwB,UAAU,sBAAsB;AAAA,UACxD,UAAU;AAAA,YACR,GAAG,aAAa;AAAA,YAChB,iBAAiB,aAAa;AAAA,UAChC;AAAA,QACF;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS;AACpB;AAEA,eAAe,OAAO,QAgBnB;AACD,QAAM,EAAE,UAAU,UAAU,UAAU,QAAQ,UAAU,IAAI;AAE5D,QAAM,mBAAmB,MAAM,SAAS,QAAQ;AAAA,IAC9C,OAAO,UAAU;AAAA,IACjB;AAAA,EACF,CAAC;AAED,QAAM,WAAW,MAAM,oBAAoB;AAAA,IACzC;AAAA,MACE,MAAM,iBAAiB;AAAA,MACvB,WAAW,iBAAiB;AAAA,MAC5B,aAAa,iBAAiB;AAAA,MAC9B,YAAY,SAAS;AAAA,MACrB,WAAW,UAAU;AAAA,MACrB,UAAU,SAAS;AAAA,MACnB,gBAAgB,UAAU,cAAc;AAAA,MAExC,iBAAiB,iBAAiB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,WAAW,iBAAiB;AAAA,MAC5B,YAAY,iBAAiB;AAAA,IAC/B;AAAA,IACA,OAAO,cAAc,YAAY;AAAA,EACnC;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,QAAkB;AAC5D,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,UAAU,SAAS;AAAA,MACnB,SAAS,SAAS;AAAA,MAClB,gBAAgB,SAAS;AAAA,IAC3B,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,QAAQ,MAAM,iBAAiB;AAAA,QACnC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB,YAAY,SAAS;AAAA,UACrB,kBAAkB,aAAa;AAAA,UAC/B,aAAa,aAAa;AAAA,UAC1B,UAAU,aAAa;AAAA,QACzB;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,MAAI,QAAQ,SAAU,GAAG,mBAAmB,mBAA6B;AACvE,QAAI,CAAC,UAAU,eAAe;AAC5B,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,UAAM,eAAe,MAAM,OAAO,MAAM;AAAA,MACtC,OAAO,UAAU;AAAA,MACjB,UAAU;AAAA,QACR;AAAA,UACE,IAAI;AAAA,UACJ,aACE;AAAA,UACF,QAAQ;AAAA,QACV;AAAA,MACF;AAAA,MACA,QAAQ;AAAA,qBACO,SAAS,kBAAkB,IAAI,CAAC,QAAQ,KAAK,GAAG,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,sBAC7D,SAAS,kBAAkB,IAAI,CAAC,QAAQ,KAAK,SAAS,UAAU,GAAG,KAAK,EAAE,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,MACxG,iBAAiB;AAAA,QACf,kBAAkBA,GACf,OAAO,EACP,MAAM,EACN;AAAA,UACC;AAAA,QACF;AAAA,QACF,GAAI,UAAU,2BAA2B,CAAC;AAAA,MAC5C;AAAA,MACA,UAAU,SAAS;AAAA,MACnB,cAAc,UAAU,sBAAsB;AAAA,IAChD,CAAC;AAED,QAAI,iBAAiB,MAAM;AACzB,YAAM,EAAE,kBAAkB,GAAG,gBAAgB,IAC3C,aAAa;AACf,YAAM,QAAQ,MAAM,iBAAiB;AAAA,QACnC;AAAA,UACE,eAAe,cAAc;AAAA,UAC7B,OAAO,aAAa;AAAA,UACpB;AAAA,UACA,YAAY,SAAS;AAAA,UACrB,aAAa,aAAa;AAAA,UAC1B,mBAAmB,aAAa;AAAA,UAChC,oBAAoB,aAAa;AAAA,UACjC,yBAAyB,aAAa;AAAA,UACtC,0BAA0B,aAAa;AAAA,UACvC,kBAAkB,aAAa;AAAA,UAC/B,mBAAmB,UAAU;AAAA,UAC7B,wBAAwB,UAAU,sBAAsB;AAAA,UACxD,UAAU;AAAA,YACR,GAAG,aAAa;AAAA,YAChB;AAAA,UACF;AAAA,QACF;AAAA,QACA,OAAO,cAAc,SAAS;AAAA,MAChC;AAEA,aAAO,EAAE,UAAU,MAAM;AAAA,IAC3B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS;AACpB;AAEA,SAAS,UAAU,UAAyB;AAC1C,SAAO,aAAa,SAAS,QAAQ;AAAA;AAAA,EAAe,OAAO;AAAA,IACzD,SAAS,WAAW,CAAC;AAAA,EACvB,EACG,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM,GAAG,GAAG,KAAK,KAAK,EAAE,EACxC,KAAK,IAAI,CAAC;AACf;AAEA,SAAS,iBACP,UACA,mBACA;AACA,WAAS,IAAI,GAAG,IAAI,SAAS,QAAQ,KAAK;AACxC,UAAM,WAAW,WAAW,QAAQ,SAAS,CAAC,EAAG,OAAO;AACxD,aAAS,CAAC,EAAG,UAAU,SAAS,iBAAiB;AAAA,EACnD;AACF;;;AC9UA,OAAOC,QAAO;AAEP,IAAM,uBAAN,cAAmC,gBAUxC;AAAA,EACA,YAAY,QAA8C;AACxD,UAAM;AAAA,MACJ,MAAM,OAAO;AAAA,MACb,WAAW,OAAO;AAAA,MAElB,QAAQA,GAAE,MAAM;AAAA,QACd;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;","names":["z","z","z","z","z"]}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import {
|
|
2
|
-
buildSchemaDefiner
|
|
3
|
-
} from "./chunk-OQE6TQXZ.js";
|
|
4
1
|
import {
|
|
5
2
|
CATEGORIES,
|
|
6
3
|
PEERBENCH_NAMESPACE
|
|
7
4
|
} from "./chunk-UHHHSYVE.js";
|
|
5
|
+
import {
|
|
6
|
+
buildSchemaDefiner
|
|
7
|
+
} from "./chunk-OQE6TQXZ.js";
|
|
8
8
|
import {
|
|
9
9
|
IdSchema
|
|
10
10
|
} from "./chunk-NUEOE3K5.js";
|
|
@@ -41,4 +41,4 @@ export {
|
|
|
41
41
|
defineSystemPromptSchema,
|
|
42
42
|
SimpleSystemPromptSchemaV1
|
|
43
43
|
};
|
|
44
|
-
//# sourceMappingURL=chunk-
|
|
44
|
+
//# sourceMappingURL=chunk-ZXTQJFGL.js.map
|
package/dist/index.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import {
|
|
2
2
|
defineRunner
|
|
3
3
|
} from "./chunk-QY5MPNNB.js";
|
|
4
|
-
import {
|
|
5
|
-
ScoringMethod
|
|
6
|
-
} from "./chunk-HMQYGCKI.js";
|
|
7
4
|
import {
|
|
8
5
|
CATEGORIES,
|
|
9
6
|
PEERBENCH_NAMESPACE
|
|
10
7
|
} from "./chunk-UHHHSYVE.js";
|
|
8
|
+
import {
|
|
9
|
+
ScoringMethod
|
|
10
|
+
} from "./chunk-HMQYGCKI.js";
|
|
11
11
|
import {
|
|
12
12
|
RateLimiter,
|
|
13
13
|
bufferToString,
|
|
@@ -2,9 +2,9 @@ import {
|
|
|
2
2
|
BaseSystemPromptSchemaV1,
|
|
3
3
|
SimpleSystemPromptSchemaV1,
|
|
4
4
|
defineSystemPromptSchema
|
|
5
|
-
} from "../../chunk-
|
|
6
|
-
import "../../chunk-OQE6TQXZ.js";
|
|
5
|
+
} from "../../chunk-ZXTQJFGL.js";
|
|
7
6
|
import "../../chunk-UHHHSYVE.js";
|
|
7
|
+
import "../../chunk-OQE6TQXZ.js";
|
|
8
8
|
import "../../chunk-NUEOE3K5.js";
|
|
9
9
|
import "../../chunk-PZ5AY32C.js";
|
|
10
10
|
export {
|
package/package.json
CHANGED
|
File without changes
|