peerbench 0.0.2-alpha.0 → 0.0.2-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +123 -99
  2. package/dist/aggregators/index.d.ts +67 -0
  3. package/dist/aggregators/index.js +46 -0
  4. package/dist/aggregators/index.js.map +1 -0
  5. package/dist/benchmarks/index.d.ts +614 -1271
  6. package/dist/benchmarks/index.js +346 -803
  7. package/dist/benchmarks/index.js.map +1 -1
  8. package/dist/{chunk-DUBKY73H.js → chunk-4UBK6452.js} +13 -13
  9. package/dist/chunk-4UBK6452.js.map +1 -0
  10. package/dist/chunk-ERALDEZY.js +112 -0
  11. package/dist/chunk-ERALDEZY.js.map +1 -0
  12. package/dist/{chunk-ZJWSK4VO.js → chunk-HMQYGCKI.js} +1 -1
  13. package/dist/chunk-HMQYGCKI.js.map +1 -0
  14. package/dist/chunk-NUEOE3K5.js +8 -0
  15. package/dist/chunk-NUEOE3K5.js.map +1 -0
  16. package/dist/chunk-OQE6TQXZ.js +42 -0
  17. package/dist/chunk-OQE6TQXZ.js.map +1 -0
  18. package/dist/chunk-Q6GSOHOP.js +44 -0
  19. package/dist/chunk-Q6GSOHOP.js.map +1 -0
  20. package/dist/chunk-QY5MPNNB.js +28 -0
  21. package/dist/chunk-QY5MPNNB.js.map +1 -0
  22. package/dist/chunk-R76XA2K6.js +229 -0
  23. package/dist/chunk-R76XA2K6.js.map +1 -0
  24. package/dist/chunk-TRNCF2BG.js +35 -0
  25. package/dist/chunk-TRNCF2BG.js.map +1 -0
  26. package/dist/chunk-UHHHSYVE.js +11 -0
  27. package/dist/chunk-UHHHSYVE.js.map +1 -0
  28. package/dist/{chunk-232PY7K3.js → chunk-YY33MNMV.js} +29 -14
  29. package/dist/chunk-YY33MNMV.js.map +1 -0
  30. package/dist/chunk-ZEWI24CV.js +365 -0
  31. package/dist/chunk-ZEWI24CV.js.map +1 -0
  32. package/dist/index-BAioQhp2.d.ts +27 -0
  33. package/dist/index.d.ts +51 -26
  34. package/dist/index.js +28 -25
  35. package/dist/index.js.map +1 -1
  36. package/dist/json-file-ZwzLUbje.d.ts +73 -0
  37. package/dist/llm-judge-QThCZ9TQ.d.ts +67 -0
  38. package/dist/providers/index.d.ts +16 -19
  39. package/dist/providers/index.js +8 -253
  40. package/dist/providers/index.js.map +1 -1
  41. package/dist/schemas/extensions/index.d.ts +16 -2
  42. package/dist/schemas/extensions/index.js +9 -3
  43. package/dist/schemas/extensions/index.js.map +1 -1
  44. package/dist/schemas/index.d.ts +108 -141
  45. package/dist/schemas/index.js +7 -10
  46. package/dist/schemas/llm/index.d.ts +100 -82
  47. package/dist/schemas/llm/index.js +7 -29
  48. package/dist/schemas/llm/index.js.map +1 -1
  49. package/dist/scorers/index.d.ts +3 -2
  50. package/dist/scorers/index.js +8 -486
  51. package/dist/scorers/index.js.map +1 -1
  52. package/dist/storages/index.d.ts +69 -0
  53. package/dist/storages/index.js +98 -0
  54. package/dist/storages/index.js.map +1 -0
  55. package/package.json +12 -6
  56. package/dist/catalogs/index.d.ts +0 -75
  57. package/dist/catalogs/index.js +0 -88
  58. package/dist/catalogs/index.js.map +0 -1
  59. package/dist/chunk-22HU24QF.js +0 -8
  60. package/dist/chunk-22HU24QF.js.map +0 -1
  61. package/dist/chunk-232PY7K3.js.map +0 -1
  62. package/dist/chunk-7TREBPSJ.js +0 -26
  63. package/dist/chunk-7TREBPSJ.js.map +0 -1
  64. package/dist/chunk-DUBKY73H.js.map +0 -1
  65. package/dist/chunk-GVF4YZF3.js +0 -15
  66. package/dist/chunk-GVF4YZF3.js.map +0 -1
  67. package/dist/chunk-HJH3SW3L.js +0 -103
  68. package/dist/chunk-HJH3SW3L.js.map +0 -1
  69. package/dist/chunk-IUN2IUCS.js +0 -58
  70. package/dist/chunk-IUN2IUCS.js.map +0 -1
  71. package/dist/chunk-VBOM2YEG.js +0 -47
  72. package/dist/chunk-VBOM2YEG.js.map +0 -1
  73. package/dist/chunk-ZJWSK4VO.js.map +0 -1
  74. package/dist/data-BmN5WjZ4.d.ts +0 -57
  75. package/dist/generic-array-DLHWSvf1.d.ts +0 -22
  76. package/dist/index-WiPjF2AL.d.ts +0 -15
  77. package/dist/llm-judge-DIG1f1Az.d.ts +0 -67
  78. package/dist/simple-system-prompt-CzPYuvo0.d.ts +0 -49
  79. package/dist/system-prompt--0FdPWqK.d.ts +0 -58
  80. package/dist/utilities-BrRH32rD.d.ts +0 -30
@@ -1,50 +1,47 @@
1
- import { a as GenericJSONArrayDataLoader } from '../generic-array-DLHWSvf1.js';
2
- import { I as IdGenerator } from '../index-WiPjF2AL.js';
3
- import * as z from 'zod';
4
- import z__default, { z as z$1 } from 'zod';
1
+ import { I as IdGenerator } from '../index-BAioQhp2.js';
2
+ import { z } from 'zod';
5
3
  import { A as AbstractLLMProvider } from '../llm-DNj_tp2T.js';
6
- import { a as MCQScorer, L as LLMJudgeScorer } from '../llm-judge-DIG1f1Az.js';
7
- import { a as SimpleSystemPromptV1 } from '../simple-system-prompt-CzPYuvo0.js';
8
- import { c as RunnerResult, A as AbstractDataLoader, L as LoaderResult } from '../data-BmN5WjZ4.js';
9
- import * as zod_v4_core from 'zod/v4/core';
10
- import { A as AbstractScorer, B as BaseScorerResult } from '../abstract-Dec9Sc5O.js';
11
- import '../schemas/index.js';
4
+ import { a as MCQScorer, L as LLMAsAJudgeScorer } from '../llm-judge-QThCZ9TQ.js';
5
+ import { J as JSONFileStorage } from '../json-file-ZwzLUbje.js';
12
6
  import '../provider-BDjGp2y-.js';
7
+ import '../abstract-Dec9Sc5O.js';
13
8
  import 'openai/resources/shared';
14
9
  import 'openai/resources/chat/completions';
15
10
  import '../rate-limiter-CSmVIRsM.js';
16
- import '../system-prompt--0FdPWqK.js';
11
+ import 'node:fs/promises';
17
12
 
18
- declare const PeerbenchMultipleChoiceTestCaseSchemaV1: z$1.ZodObject<Omit<{
19
- id: z$1.ZodString;
20
- kind: z$1.ZodString;
21
- schemaVersion: z$1.ZodNumber;
22
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
23
- }, "kind" | "schemaVersion"> & {
24
- question: z$1.ZodString;
25
- options: z$1.ZodRecord<z$1.ZodString, z$1.ZodString>;
26
- answer: z$1.ZodString;
27
- answerKey: z$1.ZodString;
13
+ declare const MCQKind: "llm/mcq";
14
+ declare const MCQTestCaseSchemaV1: z.ZodObject<Omit<{
15
+ id: z.ZodString;
16
+ namespace: z.ZodString;
17
+ schemaVersion: z.ZodNumber;
18
+ kind: z.ZodString;
19
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
20
+ }, "kind" | "namespace" | "schemaVersion"> & {
21
+ question: z.ZodString;
22
+ options: z.ZodRecord<z.ZodString, z.ZodString>;
23
+ correctAnswerKeys: z.ZodArray<z.ZodString>;
28
24
  } & {
29
- kind: z$1.ZodLiteral<"pb.ts.mcq">;
30
- schemaVersion: z$1.ZodLiteral<1>;
31
- }, z$1.core.$strip> & {
25
+ namespace: z.ZodLiteral<"peerbench.ai">;
26
+ kind: z.ZodLiteral<"llm/mcq.tc">;
27
+ schemaVersion: z.ZodLiteral<1>;
28
+ }, z.core.$strip> & {
32
29
  new: (input: Omit<{
33
30
  id: string;
34
31
  question: string;
35
32
  options: Record<string, string>;
36
- answer: string;
37
- answerKey: string;
38
- kind: "pb.ts.mcq";
33
+ correctAnswerKeys: string[];
34
+ namespace: "peerbench.ai";
35
+ kind: "llm/mcq.tc";
39
36
  schemaVersion: 1;
40
37
  metadata?: Record<string, unknown> | undefined;
41
- }, "kind" | "schemaVersion">) => {
38
+ }, "kind" | "namespace" | "schemaVersion">) => {
42
39
  id: string;
43
40
  question: string;
44
41
  options: Record<string, string>;
45
- answer: string;
46
- answerKey: string;
47
- kind: "pb.ts.mcq";
42
+ correctAnswerKeys: string[];
43
+ namespace: "peerbench.ai";
44
+ kind: "llm/mcq.tc";
48
45
  schemaVersion: 1;
49
46
  metadata?: Record<string, unknown> | undefined;
50
47
  };
@@ -52,170 +49,179 @@ declare const PeerbenchMultipleChoiceTestCaseSchemaV1: z$1.ZodObject<Omit<{
52
49
  id: string;
53
50
  question: string;
54
51
  options: Record<string, string>;
55
- answer: string;
56
- answerKey: string;
57
- kind: "pb.ts.mcq";
52
+ correctAnswerKeys: string[];
53
+ namespace: "peerbench.ai";
54
+ kind: "llm/mcq.tc";
58
55
  schemaVersion: 1;
59
56
  metadata?: Record<string, unknown> | undefined;
60
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
57
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
61
58
  id: string;
62
59
  question: string;
63
60
  options: Record<string, string>;
64
- answer: string;
65
- answerKey: string;
66
- kind: "pb.ts.mcq";
61
+ correctAnswerKeys: string[];
62
+ namespace: "peerbench.ai";
63
+ kind: "llm/mcq.tc";
67
64
  schemaVersion: 1;
68
65
  metadata?: Record<string, unknown> | undefined;
69
66
  }>;
70
67
  };
71
- type PeerbenchMultipleChoiceTestCaseV1 = z$1.infer<typeof PeerbenchMultipleChoiceTestCaseSchemaV1>;
72
- declare const PeerbenchMultipleChoiceResponseSchemaV1: z$1.ZodObject<Omit<Omit<{
73
- id: z$1.ZodString;
74
- kind: z$1.ZodString;
75
- schemaVersion: z$1.ZodNumber;
76
- startedAt: z$1.ZodNumber;
77
- completedAt: z$1.ZodNumber;
78
- testCaseId: z$1.ZodString;
79
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
80
- }, "kind" | "schemaVersion"> & {
81
- data: z$1.ZodString;
82
- modelSlug: z$1.ZodString;
83
- provider: z$1.ZodString;
84
- systemPromptId: z$1.ZodOptional<z$1.ZodString>;
85
- inputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
86
- outputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
87
- inputCost: z$1.ZodOptional<z$1.ZodString>;
88
- outputCost: z$1.ZodOptional<z$1.ZodString>;
68
+ type MCQTestCaseV1 = z.infer<typeof MCQTestCaseSchemaV1>;
69
+ declare const MCQResponseSchemaV1: z.ZodObject<Omit<{
70
+ id: z.ZodString;
71
+ namespace: z.ZodString;
72
+ schemaVersion: z.ZodNumber;
73
+ kind: z.ZodString;
74
+ startedAt: z.ZodNumber;
75
+ completedAt: z.ZodNumber;
76
+ testCaseId: z.ZodString;
77
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
78
+ }, "kind" | "namespace" | "schemaVersion"> & {
79
+ data: z.ZodString;
80
+ modelSlug: z.ZodString;
81
+ provider: z.ZodString;
82
+ systemPromptId: z.ZodOptional<z.ZodString>;
83
+ inputTokensUsed: z.ZodOptional<z.ZodNumber>;
84
+ outputTokensUsed: z.ZodOptional<z.ZodNumber>;
85
+ inputCost: z.ZodOptional<z.ZodString>;
86
+ outputCost: z.ZodOptional<z.ZodString>;
89
87
  } & {
90
- kind: z$1.ZodString;
91
- schemaVersion: z$1.ZodNumber;
92
- }, "kind" | "schemaVersion"> & {
93
- kind: z$1.ZodLiteral<"pb.rs.mcq">;
94
- schemaVersion: z$1.ZodLiteral<1>;
95
- }, z$1.core.$strip> & {
88
+ namespace: z.ZodLiteral<"peerbench.ai">;
89
+ kind: z.ZodLiteral<"llm/mcq.rs">;
90
+ schemaVersion: z.ZodLiteral<1>;
91
+ }, z.core.$strip> & {
96
92
  new: (input: Omit<{
97
- id: string;
98
- testCaseId: string;
99
93
  startedAt: number;
100
94
  completedAt: number;
95
+ id: string;
96
+ testCaseId: string;
101
97
  data: string;
102
- provider: string;
103
98
  modelSlug: string;
104
- kind: "pb.rs.mcq";
99
+ provider: string;
100
+ namespace: "peerbench.ai";
101
+ kind: "llm/mcq.rs";
105
102
  schemaVersion: 1;
106
103
  metadata?: Record<string, unknown> | undefined;
104
+ systemPromptId?: string | undefined;
107
105
  inputTokensUsed?: number | undefined;
108
106
  outputTokensUsed?: number | undefined;
109
107
  inputCost?: string | undefined;
110
108
  outputCost?: string | undefined;
111
- systemPromptId?: string | undefined;
112
- }, "kind" | "schemaVersion">) => {
113
- id: string;
114
- testCaseId: string;
109
+ }, "kind" | "namespace" | "schemaVersion">) => {
115
110
  startedAt: number;
116
111
  completedAt: number;
112
+ id: string;
113
+ testCaseId: string;
117
114
  data: string;
118
- provider: string;
119
115
  modelSlug: string;
120
- kind: "pb.rs.mcq";
116
+ provider: string;
117
+ namespace: "peerbench.ai";
118
+ kind: "llm/mcq.rs";
121
119
  schemaVersion: 1;
122
120
  metadata?: Record<string, unknown> | undefined;
121
+ systemPromptId?: string | undefined;
123
122
  inputTokensUsed?: number | undefined;
124
123
  outputTokensUsed?: number | undefined;
125
124
  inputCost?: string | undefined;
126
125
  outputCost?: string | undefined;
127
- systemPromptId?: string | undefined;
128
126
  };
129
127
  newWithId(input: Omit<{
130
- id: string;
131
- testCaseId: string;
132
128
  startedAt: number;
133
129
  completedAt: number;
130
+ id: string;
131
+ testCaseId: string;
134
132
  data: string;
135
- provider: string;
136
133
  modelSlug: string;
137
- kind: "pb.rs.mcq";
134
+ provider: string;
135
+ namespace: "peerbench.ai";
136
+ kind: "llm/mcq.rs";
138
137
  schemaVersion: 1;
139
138
  metadata?: Record<string, unknown> | undefined;
139
+ systemPromptId?: string | undefined;
140
140
  inputTokensUsed?: number | undefined;
141
141
  outputTokensUsed?: number | undefined;
142
142
  inputCost?: string | undefined;
143
143
  outputCost?: string | undefined;
144
- systemPromptId?: string | undefined;
145
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
146
- id: string;
147
- testCaseId: string;
144
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
148
145
  startedAt: number;
149
146
  completedAt: number;
147
+ id: string;
148
+ testCaseId: string;
150
149
  data: string;
151
- provider: string;
152
150
  modelSlug: string;
153
- kind: "pb.rs.mcq";
151
+ provider: string;
152
+ namespace: "peerbench.ai";
153
+ kind: "llm/mcq.rs";
154
154
  schemaVersion: 1;
155
155
  metadata?: Record<string, unknown> | undefined;
156
+ systemPromptId?: string | undefined;
156
157
  inputTokensUsed?: number | undefined;
157
158
  outputTokensUsed?: number | undefined;
158
159
  inputCost?: string | undefined;
159
160
  outputCost?: string | undefined;
160
- systemPromptId?: string | undefined;
161
161
  }>;
162
162
  };
163
- type PeerbenchMultipleChoiceResponseV1 = z$1.infer<typeof PeerbenchMultipleChoiceResponseSchemaV1>;
164
- declare const PeerbenchMultipleChoiceScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
165
- id: z$1.ZodString;
166
- kind: z$1.ZodString;
167
- schemaVersion: z$1.ZodNumber;
168
- value: z$1.ZodNumber;
169
- responseId: z$1.ZodString;
170
- explanation: z$1.ZodOptional<z$1.ZodString>;
171
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
172
- scoringMethod: z$1.ZodEnum<{
163
+ type MCQResponseV1 = z.infer<typeof MCQResponseSchemaV1>;
164
+ declare const MCQScoreSchemaV1: z.ZodObject<Omit<{
165
+ id: z.ZodString;
166
+ namespace: z.ZodString;
167
+ kind: z.ZodString;
168
+ schemaVersion: z.ZodNumber;
169
+ value: z.ZodNumber;
170
+ responseId: z.ZodString;
171
+ explanation: z.ZodOptional<z.ZodString>;
172
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
173
+ scoringMethod: z.ZodEnum<{
173
174
  readonly ai: "ai";
174
175
  readonly human: "human";
175
176
  readonly algo: "algo";
176
177
  }>;
177
- }, "kind" | "schemaVersion"> & {
178
- scorerAIProvider: z$1.ZodOptional<z$1.ZodString>;
179
- scorerAIModelSlug: z$1.ZodOptional<z$1.ZodString>;
180
- scorerAIInputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
181
- scorerAIOutputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
182
- scorerAIInputCost: z$1.ZodOptional<z$1.ZodString>;
183
- scorerAIOutputCost: z$1.ZodOptional<z$1.ZodString>;
184
- } & {
185
- kind: z$1.ZodString;
186
- schemaVersion: z$1.ZodNumber;
187
- }, "kind" | "schemaVersion"> & {
188
- extractedAnswers: z$1.ZodArray<z$1.ZodString>;
178
+ }, "kind" | "namespace" | "schemaVersion"> & {
179
+ extractedAnswers: z.ZodArray<z.ZodString>;
180
+ scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
181
+ scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
182
+ scorerAIProvider: z.ZodOptional<z.ZodString>;
183
+ scorerAIModelSlug: z.ZodOptional<z.ZodString>;
184
+ scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
185
+ scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
186
+ scorerAIInputCost: z.ZodOptional<z.ZodString>;
187
+ scorerAIOutputCost: z.ZodOptional<z.ZodString>;
189
188
  } & {
190
- kind: z$1.ZodLiteral<"pb.sc.mcq">;
191
- schemaVersion: z$1.ZodLiteral<1>;
192
- }, z$1.core.$strip> & {
189
+ namespace: z.ZodLiteral<"peerbench.ai">;
190
+ kind: z.ZodLiteral<"llm/mcq.sc">;
191
+ schemaVersion: z.ZodLiteral<1>;
192
+ }, z.core.$strip> & {
193
193
  new: (input: Omit<{
194
194
  id: string;
195
195
  value: number;
196
196
  responseId: string;
197
197
  scoringMethod: "ai" | "human" | "algo";
198
198
  extractedAnswers: string[];
199
- kind: "pb.sc.mcq";
199
+ namespace: "peerbench.ai";
200
+ kind: "llm/mcq.sc";
200
201
  schemaVersion: 1;
201
- metadata?: Record<string, unknown> | undefined;
202
202
  explanation?: string | undefined;
203
+ metadata?: Record<string, unknown> | undefined;
204
+ scorerAISystemPrompt?: string | undefined;
205
+ scorerAISystemPromptId?: string | undefined;
203
206
  scorerAIProvider?: string | undefined;
204
207
  scorerAIModelSlug?: string | undefined;
205
208
  scorerAIInputTokensUsed?: number | undefined;
206
209
  scorerAIOutputTokensUsed?: number | undefined;
207
210
  scorerAIInputCost?: string | undefined;
208
211
  scorerAIOutputCost?: string | undefined;
209
- }, "kind" | "schemaVersion">) => {
212
+ }, "kind" | "namespace" | "schemaVersion">) => {
210
213
  id: string;
211
214
  value: number;
212
215
  responseId: string;
213
216
  scoringMethod: "ai" | "human" | "algo";
214
217
  extractedAnswers: string[];
215
- kind: "pb.sc.mcq";
218
+ namespace: "peerbench.ai";
219
+ kind: "llm/mcq.sc";
216
220
  schemaVersion: 1;
217
- metadata?: Record<string, unknown> | undefined;
218
221
  explanation?: string | undefined;
222
+ metadata?: Record<string, unknown> | undefined;
223
+ scorerAISystemPrompt?: string | undefined;
224
+ scorerAISystemPromptId?: string | undefined;
219
225
  scorerAIProvider?: string | undefined;
220
226
  scorerAIModelSlug?: string | undefined;
221
227
  scorerAIInputTokensUsed?: number | undefined;
@@ -229,26 +235,32 @@ declare const PeerbenchMultipleChoiceScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
229
235
  responseId: string;
230
236
  scoringMethod: "ai" | "human" | "algo";
231
237
  extractedAnswers: string[];
232
- kind: "pb.sc.mcq";
238
+ namespace: "peerbench.ai";
239
+ kind: "llm/mcq.sc";
233
240
  schemaVersion: 1;
234
- metadata?: Record<string, unknown> | undefined;
235
241
  explanation?: string | undefined;
242
+ metadata?: Record<string, unknown> | undefined;
243
+ scorerAISystemPrompt?: string | undefined;
244
+ scorerAISystemPromptId?: string | undefined;
236
245
  scorerAIProvider?: string | undefined;
237
246
  scorerAIModelSlug?: string | undefined;
238
247
  scorerAIInputTokensUsed?: number | undefined;
239
248
  scorerAIOutputTokensUsed?: number | undefined;
240
249
  scorerAIInputCost?: string | undefined;
241
250
  scorerAIOutputCost?: string | undefined;
242
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
251
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
243
252
  id: string;
244
253
  value: number;
245
254
  responseId: string;
246
255
  scoringMethod: "ai" | "human" | "algo";
247
256
  extractedAnswers: string[];
248
- kind: "pb.sc.mcq";
257
+ namespace: "peerbench.ai";
258
+ kind: "llm/mcq.sc";
249
259
  schemaVersion: 1;
250
- metadata?: Record<string, unknown> | undefined;
251
260
  explanation?: string | undefined;
261
+ metadata?: Record<string, unknown> | undefined;
262
+ scorerAISystemPrompt?: string | undefined;
263
+ scorerAISystemPromptId?: string | undefined;
252
264
  scorerAIProvider?: string | undefined;
253
265
  scorerAIModelSlug?: string | undefined;
254
266
  scorerAIInputTokensUsed?: number | undefined;
@@ -257,195 +269,304 @@ declare const PeerbenchMultipleChoiceScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
257
269
  scorerAIOutputCost?: string | undefined;
258
270
  }>;
259
271
  };
260
- type PeerbenchMultipleChoiceScoreV1 = z$1.infer<typeof PeerbenchMultipleChoiceScoreSchemaV1>;
272
+ type MCQScoreV1 = z.infer<typeof MCQScoreSchemaV1>;
261
273
 
262
- declare const PeerbenchOpenEndedTestCaseSchemaV1: z$1.ZodObject<Omit<{
263
- id: z$1.ZodString;
264
- kind: z$1.ZodString;
265
- schemaVersion: z$1.ZodNumber;
266
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
267
- }, "kind" | "schemaVersion"> & {
268
- question: z$1.ZodString;
269
- answer: z$1.ZodOptional<z$1.ZodString>;
274
+ declare const MultiTurnKind: "llm/multi-turn";
275
+ declare const MultiTurnTestCaseSchemaV1: z.ZodObject<Omit<{
276
+ id: z.ZodString;
277
+ namespace: z.ZodString;
278
+ schemaVersion: z.ZodNumber;
279
+ kind: z.ZodString;
280
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
281
+ }, "kind" | "namespace" | "schemaVersion"> & {
282
+ messages: z.ZodArray<z.ZodObject<{
283
+ role: z.ZodString;
284
+ content: z.ZodString;
285
+ goodAnswers: z.ZodOptional<z.ZodArray<z.ZodString>>;
286
+ badAnswers: z.ZodOptional<z.ZodArray<z.ZodString>>;
287
+ }, z.core.$strip>>;
288
+ maxTurns: z.ZodOptional<z.ZodNumber>;
289
+ expectedOutcome: z.ZodOptional<z.ZodString>;
270
290
  } & {
271
- kind: z$1.ZodLiteral<"pb.ts.open-ended">;
272
- schemaVersion: z$1.ZodLiteral<1>;
273
- }, z$1.core.$strip> & {
291
+ namespace: z.ZodLiteral<"peerbench.ai">;
292
+ kind: z.ZodLiteral<"llm/multi-turn.tc">;
293
+ schemaVersion: z.ZodLiteral<1>;
294
+ }, z.core.$strip> & {
274
295
  new: (input: Omit<{
275
296
  id: string;
276
- question: string;
277
- kind: "pb.ts.open-ended";
297
+ messages: {
298
+ role: string;
299
+ content: string;
300
+ goodAnswers?: string[] | undefined;
301
+ badAnswers?: string[] | undefined;
302
+ }[];
303
+ namespace: "peerbench.ai";
304
+ kind: "llm/multi-turn.tc";
278
305
  schemaVersion: 1;
279
306
  metadata?: Record<string, unknown> | undefined;
280
- answer?: string | undefined;
281
- }, "kind" | "schemaVersion">) => {
307
+ maxTurns?: number | undefined;
308
+ expectedOutcome?: string | undefined;
309
+ }, "kind" | "namespace" | "schemaVersion">) => {
282
310
  id: string;
283
- question: string;
284
- kind: "pb.ts.open-ended";
311
+ messages: {
312
+ role: string;
313
+ content: string;
314
+ goodAnswers?: string[] | undefined;
315
+ badAnswers?: string[] | undefined;
316
+ }[];
317
+ namespace: "peerbench.ai";
318
+ kind: "llm/multi-turn.tc";
285
319
  schemaVersion: 1;
286
320
  metadata?: Record<string, unknown> | undefined;
287
- answer?: string | undefined;
321
+ maxTurns?: number | undefined;
322
+ expectedOutcome?: string | undefined;
288
323
  };
289
324
  newWithId(input: Omit<{
290
325
  id: string;
291
- question: string;
292
- kind: "pb.ts.open-ended";
326
+ messages: {
327
+ role: string;
328
+ content: string;
329
+ goodAnswers?: string[] | undefined;
330
+ badAnswers?: string[] | undefined;
331
+ }[];
332
+ namespace: "peerbench.ai";
333
+ kind: "llm/multi-turn.tc";
293
334
  schemaVersion: 1;
294
335
  metadata?: Record<string, unknown> | undefined;
295
- answer?: string | undefined;
296
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
336
+ maxTurns?: number | undefined;
337
+ expectedOutcome?: string | undefined;
338
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
297
339
  id: string;
298
- question: string;
299
- kind: "pb.ts.open-ended";
340
+ messages: {
341
+ role: string;
342
+ content: string;
343
+ goodAnswers?: string[] | undefined;
344
+ badAnswers?: string[] | undefined;
345
+ }[];
346
+ namespace: "peerbench.ai";
347
+ kind: "llm/multi-turn.tc";
300
348
  schemaVersion: 1;
301
349
  metadata?: Record<string, unknown> | undefined;
302
- answer?: string | undefined;
350
+ maxTurns?: number | undefined;
351
+ expectedOutcome?: string | undefined;
303
352
  }>;
304
353
  };
305
- type PeerbenchOpenEndedTestCaseV1 = z$1.infer<typeof PeerbenchOpenEndedTestCaseSchemaV1>;
306
- declare const PeerbenchOpenEndedResponseSchemaV1: z$1.ZodObject<Omit<Omit<{
307
- id: z$1.ZodString;
308
- kind: z$1.ZodString;
309
- schemaVersion: z$1.ZodNumber;
310
- startedAt: z$1.ZodNumber;
311
- completedAt: z$1.ZodNumber;
312
- testCaseId: z$1.ZodString;
313
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
314
- }, "kind" | "schemaVersion"> & {
315
- data: z$1.ZodString;
316
- modelSlug: z$1.ZodString;
317
- provider: z$1.ZodString;
318
- systemPromptId: z$1.ZodOptional<z$1.ZodString>;
319
- inputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
320
- outputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
321
- inputCost: z$1.ZodOptional<z$1.ZodString>;
322
- outputCost: z$1.ZodOptional<z$1.ZodString>;
354
+ type MultiTurnTestCaseV1 = z.infer<typeof MultiTurnTestCaseSchemaV1>;
355
+ declare const MultiTurnResponseSchemaV1: z.ZodObject<Omit<{
356
+ id: z.ZodString;
357
+ namespace: z.ZodString;
358
+ schemaVersion: z.ZodNumber;
359
+ kind: z.ZodString;
360
+ startedAt: z.ZodNumber;
361
+ completedAt: z.ZodNumber;
362
+ testCaseId: z.ZodString;
363
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
364
+ }, "kind" | "namespace" | "schemaVersion"> & {
365
+ replies: z.ZodArray<z.ZodObject<{
366
+ messageIndex: z.ZodNumber;
367
+ startedAt: z.ZodNumber;
368
+ completedAt: z.ZodNumber;
369
+ data: z.ZodString;
370
+ inputTokensUsed: z.ZodOptional<z.ZodNumber>;
371
+ outputTokensUsed: z.ZodOptional<z.ZodNumber>;
372
+ inputCost: z.ZodOptional<z.ZodString>;
373
+ outputCost: z.ZodOptional<z.ZodString>;
374
+ }, z.core.$strip>>;
375
+ data: z.ZodString;
376
+ modelSlug: z.ZodString;
377
+ provider: z.ZodString;
378
+ systemPromptId: z.ZodOptional<z.ZodString>;
379
+ inputTokensUsed: z.ZodOptional<z.ZodNumber>;
380
+ outputTokensUsed: z.ZodOptional<z.ZodNumber>;
381
+ inputCost: z.ZodOptional<z.ZodString>;
382
+ outputCost: z.ZodOptional<z.ZodString>;
323
383
  } & {
324
- kind: z$1.ZodString;
325
- schemaVersion: z$1.ZodNumber;
326
- }, "kind" | "schemaVersion"> & {
327
- kind: z$1.ZodLiteral<"pb.rs.open-ended">;
328
- schemaVersion: z$1.ZodLiteral<1>;
329
- }, z$1.core.$strip> & {
384
+ namespace: z.ZodLiteral<"peerbench.ai">;
385
+ kind: z.ZodLiteral<"llm/multi-turn.rs">;
386
+ schemaVersion: z.ZodLiteral<1>;
387
+ }, z.core.$strip> & {
330
388
  new: (input: Omit<{
331
- id: string;
332
- testCaseId: string;
333
389
  startedAt: number;
334
390
  completedAt: number;
391
+ id: string;
392
+ testCaseId: string;
393
+ replies: {
394
+ messageIndex: number;
395
+ startedAt: number;
396
+ completedAt: number;
397
+ data: string;
398
+ inputTokensUsed?: number | undefined;
399
+ outputTokensUsed?: number | undefined;
400
+ inputCost?: string | undefined;
401
+ outputCost?: string | undefined;
402
+ }[];
335
403
  data: string;
336
- provider: string;
337
404
  modelSlug: string;
338
- kind: "pb.rs.open-ended";
405
+ provider: string;
406
+ namespace: "peerbench.ai";
407
+ kind: "llm/multi-turn.rs";
339
408
  schemaVersion: 1;
340
409
  metadata?: Record<string, unknown> | undefined;
410
+ systemPromptId?: string | undefined;
341
411
  inputTokensUsed?: number | undefined;
342
412
  outputTokensUsed?: number | undefined;
343
413
  inputCost?: string | undefined;
344
414
  outputCost?: string | undefined;
345
- systemPromptId?: string | undefined;
346
- }, "kind" | "schemaVersion">) => {
347
- id: string;
348
- testCaseId: string;
415
+ }, "kind" | "namespace" | "schemaVersion">) => {
349
416
  startedAt: number;
350
417
  completedAt: number;
418
+ id: string;
419
+ testCaseId: string;
420
+ replies: {
421
+ messageIndex: number;
422
+ startedAt: number;
423
+ completedAt: number;
424
+ data: string;
425
+ inputTokensUsed?: number | undefined;
426
+ outputTokensUsed?: number | undefined;
427
+ inputCost?: string | undefined;
428
+ outputCost?: string | undefined;
429
+ }[];
351
430
  data: string;
352
- provider: string;
353
431
  modelSlug: string;
354
- kind: "pb.rs.open-ended";
432
+ provider: string;
433
+ namespace: "peerbench.ai";
434
+ kind: "llm/multi-turn.rs";
355
435
  schemaVersion: 1;
356
436
  metadata?: Record<string, unknown> | undefined;
437
+ systemPromptId?: string | undefined;
357
438
  inputTokensUsed?: number | undefined;
358
439
  outputTokensUsed?: number | undefined;
359
440
  inputCost?: string | undefined;
360
441
  outputCost?: string | undefined;
361
- systemPromptId?: string | undefined;
362
442
  };
363
443
  newWithId(input: Omit<{
364
- id: string;
365
- testCaseId: string;
366
444
  startedAt: number;
367
445
  completedAt: number;
446
+ id: string;
447
+ testCaseId: string;
448
+ replies: {
449
+ messageIndex: number;
450
+ startedAt: number;
451
+ completedAt: number;
452
+ data: string;
453
+ inputTokensUsed?: number | undefined;
454
+ outputTokensUsed?: number | undefined;
455
+ inputCost?: string | undefined;
456
+ outputCost?: string | undefined;
457
+ }[];
368
458
  data: string;
369
- provider: string;
370
459
  modelSlug: string;
371
- kind: "pb.rs.open-ended";
460
+ provider: string;
461
+ namespace: "peerbench.ai";
462
+ kind: "llm/multi-turn.rs";
372
463
  schemaVersion: 1;
373
464
  metadata?: Record<string, unknown> | undefined;
465
+ systemPromptId?: string | undefined;
374
466
  inputTokensUsed?: number | undefined;
375
467
  outputTokensUsed?: number | undefined;
376
468
  inputCost?: string | undefined;
377
469
  outputCost?: string | undefined;
378
- systemPromptId?: string | undefined;
379
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
380
- id: string;
381
- testCaseId: string;
470
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
382
471
  startedAt: number;
383
472
  completedAt: number;
473
+ id: string;
474
+ testCaseId: string;
475
+ replies: {
476
+ messageIndex: number;
477
+ startedAt: number;
478
+ completedAt: number;
479
+ data: string;
480
+ inputTokensUsed?: number | undefined;
481
+ outputTokensUsed?: number | undefined;
482
+ inputCost?: string | undefined;
483
+ outputCost?: string | undefined;
484
+ }[];
384
485
  data: string;
385
- provider: string;
386
486
  modelSlug: string;
387
- kind: "pb.rs.open-ended";
487
+ provider: string;
488
+ namespace: "peerbench.ai";
489
+ kind: "llm/multi-turn.rs";
388
490
  schemaVersion: 1;
389
491
  metadata?: Record<string, unknown> | undefined;
492
+ systemPromptId?: string | undefined;
390
493
  inputTokensUsed?: number | undefined;
391
494
  outputTokensUsed?: number | undefined;
392
495
  inputCost?: string | undefined;
393
496
  outputCost?: string | undefined;
394
- systemPromptId?: string | undefined;
395
497
  }>;
396
498
  };
397
- type PeerbenchOpenEndedResponseV1 = z$1.infer<typeof PeerbenchOpenEndedResponseSchemaV1>;
398
- declare const PeerbenchOpenEndedScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
399
- id: z$1.ZodString;
400
- kind: z$1.ZodString;
401
- schemaVersion: z$1.ZodNumber;
402
- value: z$1.ZodNumber;
403
- responseId: z$1.ZodString;
404
- explanation: z$1.ZodOptional<z$1.ZodString>;
405
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
406
- scoringMethod: z$1.ZodEnum<{
499
+ type MultiTurnResponseV1 = z.infer<typeof MultiTurnResponseSchemaV1>;
500
+ declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
501
+ id: z.ZodString;
502
+ namespace: z.ZodString;
503
+ kind: z.ZodString;
504
+ schemaVersion: z.ZodNumber;
505
+ value: z.ZodNumber;
506
+ responseId: z.ZodString;
507
+ explanation: z.ZodOptional<z.ZodString>;
508
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
509
+ scoringMethod: z.ZodEnum<{
407
510
  readonly ai: "ai";
408
511
  readonly human: "human";
409
512
  readonly algo: "algo";
410
513
  }>;
411
- }, "kind" | "schemaVersion"> & {
412
- scorerAIProvider: z$1.ZodOptional<z$1.ZodString>;
413
- scorerAIModelSlug: z$1.ZodOptional<z$1.ZodString>;
414
- scorerAIInputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
415
- scorerAIOutputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
416
- scorerAIInputCost: z$1.ZodOptional<z$1.ZodString>;
417
- scorerAIOutputCost: z$1.ZodOptional<z$1.ZodString>;
514
+ }, "kind" | "namespace" | "schemaVersion"> & {
515
+ individualScores: z.ZodArray<z.ZodObject<{
516
+ replyIndex: z.ZodNumber;
517
+ value: z.ZodNumber;
518
+ }, z.core.$strip>>;
519
+ scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
520
+ scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
521
+ scorerAIProvider: z.ZodOptional<z.ZodString>;
522
+ scorerAIModelSlug: z.ZodOptional<z.ZodString>;
523
+ scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
524
+ scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
525
+ scorerAIInputCost: z.ZodOptional<z.ZodString>;
526
+ scorerAIOutputCost: z.ZodOptional<z.ZodString>;
418
527
  } & {
419
- kind: z$1.ZodString;
420
- schemaVersion: z$1.ZodNumber;
421
- }, "kind" | "schemaVersion"> & {
422
- kind: z$1.ZodLiteral<"pb.sc.open-ended">;
423
- schemaVersion: z$1.ZodLiteral<1>;
424
- }, z$1.core.$strip> & {
528
+ namespace: z.ZodLiteral<"peerbench.ai">;
529
+ kind: z.ZodLiteral<"llm/multi-turn.sc">;
530
+ schemaVersion: z.ZodLiteral<1>;
531
+ }, z.core.$strip> & {
425
532
  new: (input: Omit<{
426
533
  id: string;
427
534
  value: number;
428
535
  responseId: string;
429
536
  scoringMethod: "ai" | "human" | "algo";
430
- kind: "pb.sc.open-ended";
537
+ individualScores: {
538
+ replyIndex: number;
539
+ value: number;
540
+ }[];
541
+ namespace: "peerbench.ai";
542
+ kind: "llm/multi-turn.sc";
431
543
  schemaVersion: 1;
432
- metadata?: Record<string, unknown> | undefined;
433
544
  explanation?: string | undefined;
545
+ metadata?: Record<string, unknown> | undefined;
546
+ scorerAISystemPrompt?: string | undefined;
547
+ scorerAISystemPromptId?: string | undefined;
434
548
  scorerAIProvider?: string | undefined;
435
549
  scorerAIModelSlug?: string | undefined;
436
550
  scorerAIInputTokensUsed?: number | undefined;
437
551
  scorerAIOutputTokensUsed?: number | undefined;
438
552
  scorerAIInputCost?: string | undefined;
439
553
  scorerAIOutputCost?: string | undefined;
440
- }, "kind" | "schemaVersion">) => {
554
+ }, "kind" | "namespace" | "schemaVersion">) => {
441
555
  id: string;
442
556
  value: number;
443
557
  responseId: string;
444
558
  scoringMethod: "ai" | "human" | "algo";
445
- kind: "pb.sc.open-ended";
559
+ individualScores: {
560
+ replyIndex: number;
561
+ value: number;
562
+ }[];
563
+ namespace: "peerbench.ai";
564
+ kind: "llm/multi-turn.sc";
446
565
  schemaVersion: 1;
447
- metadata?: Record<string, unknown> | undefined;
448
566
  explanation?: string | undefined;
567
+ metadata?: Record<string, unknown> | undefined;
568
+ scorerAISystemPrompt?: string | undefined;
569
+ scorerAISystemPromptId?: string | undefined;
449
570
  scorerAIProvider?: string | undefined;
450
571
  scorerAIModelSlug?: string | undefined;
451
572
  scorerAIInputTokensUsed?: number | undefined;
@@ -458,25 +579,39 @@ declare const PeerbenchOpenEndedScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
458
579
  value: number;
459
580
  responseId: string;
460
581
  scoringMethod: "ai" | "human" | "algo";
461
- kind: "pb.sc.open-ended";
582
+ individualScores: {
583
+ replyIndex: number;
584
+ value: number;
585
+ }[];
586
+ namespace: "peerbench.ai";
587
+ kind: "llm/multi-turn.sc";
462
588
  schemaVersion: 1;
463
- metadata?: Record<string, unknown> | undefined;
464
589
  explanation?: string | undefined;
590
+ metadata?: Record<string, unknown> | undefined;
591
+ scorerAISystemPrompt?: string | undefined;
592
+ scorerAISystemPromptId?: string | undefined;
465
593
  scorerAIProvider?: string | undefined;
466
594
  scorerAIModelSlug?: string | undefined;
467
595
  scorerAIInputTokensUsed?: number | undefined;
468
596
  scorerAIOutputTokensUsed?: number | undefined;
469
597
  scorerAIInputCost?: string | undefined;
470
598
  scorerAIOutputCost?: string | undefined;
471
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
599
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
472
600
  id: string;
473
601
  value: number;
474
602
  responseId: string;
475
603
  scoringMethod: "ai" | "human" | "algo";
476
- kind: "pb.sc.open-ended";
604
+ individualScores: {
605
+ replyIndex: number;
606
+ value: number;
607
+ }[];
608
+ namespace: "peerbench.ai";
609
+ kind: "llm/multi-turn.sc";
477
610
  schemaVersion: 1;
478
- metadata?: Record<string, unknown> | undefined;
479
611
  explanation?: string | undefined;
612
+ metadata?: Record<string, unknown> | undefined;
613
+ scorerAISystemPrompt?: string | undefined;
614
+ scorerAISystemPromptId?: string | undefined;
480
615
  scorerAIProvider?: string | undefined;
481
616
  scorerAIModelSlug?: string | undefined;
482
617
  scorerAIInputTokensUsed?: number | undefined;
@@ -485,406 +620,254 @@ declare const PeerbenchOpenEndedScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
485
620
  scorerAIOutputCost?: string | undefined;
486
621
  }>;
487
622
  };
488
- type PeerbenchOpenEndedScoreV1 = z$1.infer<typeof PeerbenchOpenEndedScoreSchemaV1>;
623
+ type MultiTurnScoreV1 = z.infer<typeof MultiTurnScoreSchemaV1>;
489
624
 
490
- declare const PeerbenchBenchmarkSpecSchemaV1: z__default.ZodObject<Omit<{
491
- kind: z__default.ZodString;
492
- schemaVersion: z__default.ZodNumber;
493
- metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
494
- }, "kind" | "schemaVersion"> & {
495
- /**
496
- * Big text contents that can be referred as <text>{key}</text> in a prompt or system prompt.
497
- */
498
- blobTexts: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodString>>;
625
+ declare const QAKind: "llm/qa";
626
+ declare const QATestCaseSchemaV1: z.ZodObject<Omit<{
627
+ id: z.ZodString;
628
+ namespace: z.ZodString;
629
+ schemaVersion: z.ZodNumber;
630
+ kind: z.ZodString;
631
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
632
+ }, "kind" | "namespace" | "schemaVersion"> & {
633
+ question: z.ZodString;
634
+ goodAnswers: z.ZodArray<z.ZodString>;
635
+ badAnswers: z.ZodArray<z.ZodString>;
499
636
  } & {
500
- kind: z__default.ZodLiteral<"pb.benchmark.spec">;
501
- schemaVersion: z__default.ZodLiteral<1>;
502
- }, z__default.core.$strip> & {
637
+ namespace: z.ZodLiteral<"peerbench.ai">;
638
+ kind: z.ZodLiteral<"llm/qa.tc">;
639
+ schemaVersion: z.ZodLiteral<1>;
640
+ }, z.core.$strip> & {
503
641
  new: (input: Omit<{
504
- kind: "pb.benchmark.spec";
642
+ id: string;
643
+ question: string;
644
+ goodAnswers: string[];
645
+ badAnswers: string[];
646
+ namespace: "peerbench.ai";
647
+ kind: "llm/qa.tc";
505
648
  schemaVersion: 1;
506
649
  metadata?: Record<string, unknown> | undefined;
507
- blobTexts?: Record<string, string> | undefined;
508
- }, "kind" | "schemaVersion">) => {
509
- kind: "pb.benchmark.spec";
650
+ }, "kind" | "namespace" | "schemaVersion">) => {
651
+ id: string;
652
+ question: string;
653
+ goodAnswers: string[];
654
+ badAnswers: string[];
655
+ namespace: "peerbench.ai";
656
+ kind: "llm/qa.tc";
510
657
  schemaVersion: 1;
511
658
  metadata?: Record<string, unknown> | undefined;
512
- blobTexts?: Record<string, string> | undefined;
513
659
  };
514
660
  newWithId(input: Omit<{
515
- kind: "pb.benchmark.spec";
516
- schemaVersion: 1;
517
- metadata?: Record<string, unknown> | undefined;
518
- blobTexts?: Record<string, string> | undefined;
519
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
520
- kind: "pb.benchmark.spec";
521
- schemaVersion: 1;
522
- metadata?: Record<string, unknown> | undefined;
523
- blobTexts?: Record<string, string> | undefined;
524
- }>;
525
- };
526
- type PeerbenchBenchmarkSpecV1 = z__default.infer<typeof PeerbenchBenchmarkSpecSchemaV1>;
527
-
528
- declare class PeerbenchJSONDataLoader extends GenericJSONArrayDataLoader<PeerbenchMultipleChoiceTestCaseV1 | PeerbenchOpenEndedTestCaseV1, PeerbenchMultipleChoiceResponseV1 | PeerbenchOpenEndedResponseV1, PeerbenchMultipleChoiceScoreV1 | PeerbenchOpenEndedScoreV1> {
529
- readonly kind = "pb.load.json.data";
530
- loadBenchmarkSpec(params: {
531
- content: Uint8Array;
532
- }): Promise<PeerbenchBenchmarkSpecV1>;
533
- protected testCaseBuilder(data: any): {
534
661
  id: string;
535
662
  question: string;
536
- options: Record<string, string>;
537
- answer: string;
538
- answerKey: string;
539
- kind: "pb.ts.mcq";
663
+ goodAnswers: string[];
664
+ badAnswers: string[];
665
+ namespace: "peerbench.ai";
666
+ kind: "llm/qa.tc";
540
667
  schemaVersion: 1;
541
668
  metadata?: Record<string, unknown> | undefined;
542
- } | {
669
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
543
670
  id: string;
544
671
  question: string;
545
- kind: "pb.ts.open-ended";
672
+ goodAnswers: string[];
673
+ badAnswers: string[];
674
+ namespace: "peerbench.ai";
675
+ kind: "llm/qa.tc";
546
676
  schemaVersion: 1;
547
677
  metadata?: Record<string, unknown> | undefined;
548
- answer?: string | undefined;
549
- } | undefined;
550
- protected responseBuilder(data: any): Promise<{
678
+ }>;
679
+ };
680
+ type QATestCaseV1 = z.infer<typeof QATestCaseSchemaV1>;
681
+ declare const QAResponseSchemaV1: z.ZodObject<Omit<{
682
+ id: z.ZodString;
683
+ namespace: z.ZodString;
684
+ schemaVersion: z.ZodNumber;
685
+ kind: z.ZodString;
686
+ startedAt: z.ZodNumber;
687
+ completedAt: z.ZodNumber;
688
+ testCaseId: z.ZodString;
689
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
690
+ }, "kind" | "namespace" | "schemaVersion"> & {
691
+ data: z.ZodString;
692
+ modelSlug: z.ZodString;
693
+ provider: z.ZodString;
694
+ systemPromptId: z.ZodOptional<z.ZodString>;
695
+ inputTokensUsed: z.ZodOptional<z.ZodNumber>;
696
+ outputTokensUsed: z.ZodOptional<z.ZodNumber>;
697
+ inputCost: z.ZodOptional<z.ZodString>;
698
+ outputCost: z.ZodOptional<z.ZodString>;
699
+ } & {
700
+ namespace: z.ZodLiteral<"peerbench.ai">;
701
+ kind: z.ZodLiteral<"llm/qa.rs">;
702
+ schemaVersion: z.ZodLiteral<1>;
703
+ }, z.core.$strip> & {
704
+ new: (input: Omit<{
705
+ startedAt: number;
706
+ completedAt: number;
551
707
  id: string;
552
708
  testCaseId: string;
709
+ data: string;
710
+ modelSlug: string;
711
+ provider: string;
712
+ namespace: "peerbench.ai";
713
+ kind: "llm/qa.rs";
714
+ schemaVersion: 1;
715
+ metadata?: Record<string, unknown> | undefined;
716
+ systemPromptId?: string | undefined;
717
+ inputTokensUsed?: number | undefined;
718
+ outputTokensUsed?: number | undefined;
719
+ inputCost?: string | undefined;
720
+ outputCost?: string | undefined;
721
+ }, "kind" | "namespace" | "schemaVersion">) => {
553
722
  startedAt: number;
554
723
  completedAt: number;
724
+ id: string;
725
+ testCaseId: string;
555
726
  data: string;
556
- provider: string;
557
727
  modelSlug: string;
558
- kind: "pb.rs.mcq";
728
+ provider: string;
729
+ namespace: "peerbench.ai";
730
+ kind: "llm/qa.rs";
559
731
  schemaVersion: 1;
560
732
  metadata?: Record<string, unknown> | undefined;
733
+ systemPromptId?: string | undefined;
561
734
  inputTokensUsed?: number | undefined;
562
735
  outputTokensUsed?: number | undefined;
563
736
  inputCost?: string | undefined;
564
737
  outputCost?: string | undefined;
565
- systemPromptId?: string | undefined;
566
- } | {
738
+ };
739
+ newWithId(input: Omit<{
740
+ startedAt: number;
741
+ completedAt: number;
567
742
  id: string;
568
743
  testCaseId: string;
744
+ data: string;
745
+ modelSlug: string;
746
+ provider: string;
747
+ namespace: "peerbench.ai";
748
+ kind: "llm/qa.rs";
749
+ schemaVersion: 1;
750
+ metadata?: Record<string, unknown> | undefined;
751
+ systemPromptId?: string | undefined;
752
+ inputTokensUsed?: number | undefined;
753
+ outputTokensUsed?: number | undefined;
754
+ inputCost?: string | undefined;
755
+ outputCost?: string | undefined;
756
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
569
757
  startedAt: number;
570
758
  completedAt: number;
759
+ id: string;
760
+ testCaseId: string;
571
761
  data: string;
572
- provider: string;
573
762
  modelSlug: string;
574
- kind: "pb.rs.open-ended";
763
+ provider: string;
764
+ namespace: "peerbench.ai";
765
+ kind: "llm/qa.rs";
575
766
  schemaVersion: 1;
576
767
  metadata?: Record<string, unknown> | undefined;
768
+ systemPromptId?: string | undefined;
577
769
  inputTokensUsed?: number | undefined;
578
770
  outputTokensUsed?: number | undefined;
579
771
  inputCost?: string | undefined;
580
772
  outputCost?: string | undefined;
581
- systemPromptId?: string | undefined;
582
- } | undefined>;
583
- protected scoreBuilder(data: any): Promise<{
773
+ }>;
774
+ };
775
+ type QAResponseV1 = z.infer<typeof QAResponseSchemaV1>;
776
+ declare const QAScoreSchemaV1: z.ZodObject<Omit<{
777
+ id: z.ZodString;
778
+ namespace: z.ZodString;
779
+ kind: z.ZodString;
780
+ schemaVersion: z.ZodNumber;
781
+ value: z.ZodNumber;
782
+ responseId: z.ZodString;
783
+ explanation: z.ZodOptional<z.ZodString>;
784
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
785
+ scoringMethod: z.ZodEnum<{
786
+ readonly ai: "ai";
787
+ readonly human: "human";
788
+ readonly algo: "algo";
789
+ }>;
790
+ }, "kind" | "namespace" | "schemaVersion"> & {
791
+ scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
792
+ scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
793
+ scorerAIProvider: z.ZodOptional<z.ZodString>;
794
+ scorerAIModelSlug: z.ZodOptional<z.ZodString>;
795
+ scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
796
+ scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
797
+ scorerAIInputCost: z.ZodOptional<z.ZodString>;
798
+ scorerAIOutputCost: z.ZodOptional<z.ZodString>;
799
+ } & {
800
+ namespace: z.ZodLiteral<"peerbench.ai">;
801
+ kind: z.ZodLiteral<"llm/qa.sc">;
802
+ schemaVersion: z.ZodLiteral<1>;
803
+ }, z.core.$strip> & {
804
+ new: (input: Omit<{
584
805
  id: string;
585
806
  value: number;
586
807
  responseId: string;
587
808
  scoringMethod: "ai" | "human" | "algo";
588
- extractedAnswers: string[];
589
- kind: "pb.sc.mcq";
809
+ namespace: "peerbench.ai";
810
+ kind: "llm/qa.sc";
590
811
  schemaVersion: 1;
591
- metadata?: Record<string, unknown> | undefined;
592
812
  explanation?: string | undefined;
813
+ metadata?: Record<string, unknown> | undefined;
814
+ scorerAISystemPrompt?: string | undefined;
815
+ scorerAISystemPromptId?: string | undefined;
593
816
  scorerAIProvider?: string | undefined;
594
817
  scorerAIModelSlug?: string | undefined;
595
818
  scorerAIInputTokensUsed?: number | undefined;
596
819
  scorerAIOutputTokensUsed?: number | undefined;
597
820
  scorerAIInputCost?: string | undefined;
598
821
  scorerAIOutputCost?: string | undefined;
599
- } | {
822
+ }, "kind" | "namespace" | "schemaVersion">) => {
600
823
  id: string;
601
824
  value: number;
602
825
  responseId: string;
603
826
  scoringMethod: "ai" | "human" | "algo";
604
- kind: "pb.sc.open-ended";
827
+ namespace: "peerbench.ai";
828
+ kind: "llm/qa.sc";
605
829
  schemaVersion: 1;
606
- metadata?: Record<string, unknown> | undefined;
607
830
  explanation?: string | undefined;
831
+ metadata?: Record<string, unknown> | undefined;
832
+ scorerAISystemPrompt?: string | undefined;
833
+ scorerAISystemPromptId?: string | undefined;
608
834
  scorerAIProvider?: string | undefined;
609
835
  scorerAIModelSlug?: string | undefined;
610
836
  scorerAIInputTokensUsed?: number | undefined;
611
837
  scorerAIOutputTokensUsed?: number | undefined;
612
838
  scorerAIInputCost?: string | undefined;
613
839
  scorerAIOutputCost?: string | undefined;
614
- } | undefined>;
615
- }
616
-
617
- type ResponseTypes = PeerbenchMultipleChoiceResponseV1 | PeerbenchOpenEndedResponseV1;
618
- type ScoreTypes = PeerbenchMultipleChoiceScoreV1 | PeerbenchOpenEndedScoreV1;
619
- type TestCaseTypes = PeerbenchMultipleChoiceTestCaseV1 | PeerbenchOpenEndedTestCaseV1;
620
- declare function runTestCase$2(params: {
621
- testCase: TestCaseTypes;
622
- provider: AbstractLLMProvider;
623
- scorer?: MCQScorer | LLMJudgeScorer;
624
- spec?: PeerbenchBenchmarkSpecV1;
625
- runConfig: {
626
- model: string;
627
- llmJudgeModel?: string;
628
- };
629
- systemPrompt?: SimpleSystemPromptV1;
630
- idGenerators?: {
631
- response?: IdGenerator;
632
- score?: IdGenerator;
633
840
  };
634
- }): Promise<RunnerResult<ResponseTypes, ScoreTypes>>;
635
-
636
- type index$2_PeerbenchJSONDataLoader = PeerbenchJSONDataLoader;
637
- declare const index$2_PeerbenchJSONDataLoader: typeof PeerbenchJSONDataLoader;
638
- declare const index$2_PeerbenchMultipleChoiceResponseSchemaV1: typeof PeerbenchMultipleChoiceResponseSchemaV1;
639
- type index$2_PeerbenchMultipleChoiceResponseV1 = PeerbenchMultipleChoiceResponseV1;
640
- declare const index$2_PeerbenchMultipleChoiceScoreSchemaV1: typeof PeerbenchMultipleChoiceScoreSchemaV1;
641
- type index$2_PeerbenchMultipleChoiceScoreV1 = PeerbenchMultipleChoiceScoreV1;
642
- declare const index$2_PeerbenchMultipleChoiceTestCaseSchemaV1: typeof PeerbenchMultipleChoiceTestCaseSchemaV1;
643
- type index$2_PeerbenchMultipleChoiceTestCaseV1 = PeerbenchMultipleChoiceTestCaseV1;
644
- declare const index$2_PeerbenchOpenEndedResponseSchemaV1: typeof PeerbenchOpenEndedResponseSchemaV1;
645
- type index$2_PeerbenchOpenEndedResponseV1 = PeerbenchOpenEndedResponseV1;
646
- declare const index$2_PeerbenchOpenEndedScoreSchemaV1: typeof PeerbenchOpenEndedScoreSchemaV1;
647
- type index$2_PeerbenchOpenEndedScoreV1 = PeerbenchOpenEndedScoreV1;
648
- declare const index$2_PeerbenchOpenEndedTestCaseSchemaV1: typeof PeerbenchOpenEndedTestCaseSchemaV1;
649
- type index$2_PeerbenchOpenEndedTestCaseV1 = PeerbenchOpenEndedTestCaseV1;
650
- declare namespace index$2 {
651
- export { index$2_PeerbenchJSONDataLoader as PeerbenchJSONDataLoader, index$2_PeerbenchMultipleChoiceResponseSchemaV1 as PeerbenchMultipleChoiceResponseSchemaV1, type index$2_PeerbenchMultipleChoiceResponseV1 as PeerbenchMultipleChoiceResponseV1, index$2_PeerbenchMultipleChoiceScoreSchemaV1 as PeerbenchMultipleChoiceScoreSchemaV1, type index$2_PeerbenchMultipleChoiceScoreV1 as PeerbenchMultipleChoiceScoreV1, index$2_PeerbenchMultipleChoiceTestCaseSchemaV1 as PeerbenchMultipleChoiceTestCaseSchemaV1, type index$2_PeerbenchMultipleChoiceTestCaseV1 as PeerbenchMultipleChoiceTestCaseV1, index$2_PeerbenchOpenEndedResponseSchemaV1 as PeerbenchOpenEndedResponseSchemaV1, type index$2_PeerbenchOpenEndedResponseV1 as PeerbenchOpenEndedResponseV1, index$2_PeerbenchOpenEndedScoreSchemaV1 as PeerbenchOpenEndedScoreSchemaV1, type index$2_PeerbenchOpenEndedScoreV1 as PeerbenchOpenEndedScoreV1, index$2_PeerbenchOpenEndedTestCaseSchemaV1 as PeerbenchOpenEndedTestCaseSchemaV1, type index$2_PeerbenchOpenEndedTestCaseV1 as PeerbenchOpenEndedTestCaseV1, runTestCase$2 as runTestCase };
652
- }
653
-
654
- declare const MMLUProMainTestCaseSchemaV1: z$1.ZodObject<Omit<{
655
- id: z$1.ZodString;
656
- kind: z$1.ZodString;
657
- schemaVersion: z$1.ZodNumber;
658
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
659
- }, "kind" | "schemaVersion"> & {
660
- question: z$1.ZodString;
661
- options: z$1.ZodRecord<z$1.ZodString, z$1.ZodString>;
662
- answer: z$1.ZodString;
663
- answerKey: z$1.ZodString;
664
- } & {
665
- kind: z$1.ZodLiteral<"mmlu-pro.ts.main">;
666
- schemaVersion: z$1.ZodLiteral<1>;
667
- }, z$1.core.$strip> & {
668
- new: (input: Omit<{
669
- id: string;
670
- question: string;
671
- options: Record<string, string>;
672
- answer: string;
673
- answerKey: string;
674
- kind: "mmlu-pro.ts.main";
675
- schemaVersion: 1;
676
- metadata?: Record<string, unknown> | undefined;
677
- }, "kind" | "schemaVersion">) => {
678
- id: string;
679
- question: string;
680
- options: Record<string, string>;
681
- answer: string;
682
- answerKey: string;
683
- kind: "mmlu-pro.ts.main";
684
- schemaVersion: 1;
685
- metadata?: Record<string, unknown> | undefined;
686
- };
687
- newWithId(input: Omit<{
688
- id: string;
689
- question: string;
690
- options: Record<string, string>;
691
- answer: string;
692
- answerKey: string;
693
- kind: "mmlu-pro.ts.main";
694
- schemaVersion: 1;
695
- metadata?: Record<string, unknown> | undefined;
696
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
697
- id: string;
698
- question: string;
699
- options: Record<string, string>;
700
- answer: string;
701
- answerKey: string;
702
- kind: "mmlu-pro.ts.main";
703
- schemaVersion: 1;
704
- metadata?: Record<string, unknown> | undefined;
705
- }>;
706
- };
707
- type MMLUProMainTestCaseV1 = z$1.infer<typeof MMLUProMainTestCaseSchemaV1>;
708
- declare const MMLUProMainResponseSchemaV1: z$1.ZodObject<Omit<Omit<{
709
- id: z$1.ZodString;
710
- kind: z$1.ZodString;
711
- schemaVersion: z$1.ZodNumber;
712
- startedAt: z$1.ZodNumber;
713
- completedAt: z$1.ZodNumber;
714
- testCaseId: z$1.ZodString;
715
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
716
- }, "kind" | "schemaVersion"> & {
717
- data: z$1.ZodString;
718
- modelSlug: z$1.ZodString;
719
- provider: z$1.ZodString;
720
- systemPromptId: z$1.ZodOptional<z$1.ZodString>;
721
- inputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
722
- outputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
723
- inputCost: z$1.ZodOptional<z$1.ZodString>;
724
- outputCost: z$1.ZodOptional<z$1.ZodString>;
725
- } & {
726
- kind: z$1.ZodString;
727
- schemaVersion: z$1.ZodNumber;
728
- }, "kind" | "schemaVersion"> & {
729
- kind: z$1.ZodLiteral<"mmlu-pro.rs.main">;
730
- schemaVersion: z$1.ZodLiteral<1>;
731
- }, z$1.core.$strip> & {
732
- new: (input: Omit<{
733
- id: string;
734
- testCaseId: string;
735
- startedAt: number;
736
- completedAt: number;
737
- data: string;
738
- provider: string;
739
- modelSlug: string;
740
- kind: "mmlu-pro.rs.main";
741
- schemaVersion: 1;
742
- metadata?: Record<string, unknown> | undefined;
743
- inputTokensUsed?: number | undefined;
744
- outputTokensUsed?: number | undefined;
745
- inputCost?: string | undefined;
746
- outputCost?: string | undefined;
747
- systemPromptId?: string | undefined;
748
- }, "kind" | "schemaVersion">) => {
749
- id: string;
750
- testCaseId: string;
751
- startedAt: number;
752
- completedAt: number;
753
- data: string;
754
- provider: string;
755
- modelSlug: string;
756
- kind: "mmlu-pro.rs.main";
757
- schemaVersion: 1;
758
- metadata?: Record<string, unknown> | undefined;
759
- inputTokensUsed?: number | undefined;
760
- outputTokensUsed?: number | undefined;
761
- inputCost?: string | undefined;
762
- outputCost?: string | undefined;
763
- systemPromptId?: string | undefined;
764
- };
765
- newWithId(input: Omit<{
766
- id: string;
767
- testCaseId: string;
768
- startedAt: number;
769
- completedAt: number;
770
- data: string;
771
- provider: string;
772
- modelSlug: string;
773
- kind: "mmlu-pro.rs.main";
774
- schemaVersion: 1;
775
- metadata?: Record<string, unknown> | undefined;
776
- inputTokensUsed?: number | undefined;
777
- outputTokensUsed?: number | undefined;
778
- inputCost?: string | undefined;
779
- outputCost?: string | undefined;
780
- systemPromptId?: string | undefined;
781
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
782
- id: string;
783
- testCaseId: string;
784
- startedAt: number;
785
- completedAt: number;
786
- data: string;
787
- provider: string;
788
- modelSlug: string;
789
- kind: "mmlu-pro.rs.main";
790
- schemaVersion: 1;
791
- metadata?: Record<string, unknown> | undefined;
792
- inputTokensUsed?: number | undefined;
793
- outputTokensUsed?: number | undefined;
794
- inputCost?: string | undefined;
795
- outputCost?: string | undefined;
796
- systemPromptId?: string | undefined;
797
- }>;
798
- };
799
- type MMLUProMainResponseV1 = z$1.infer<typeof MMLUProMainResponseSchemaV1>;
800
- declare const MMLUProMainScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
801
- id: z$1.ZodString;
802
- kind: z$1.ZodString;
803
- schemaVersion: z$1.ZodNumber;
804
- value: z$1.ZodNumber;
805
- responseId: z$1.ZodString;
806
- explanation: z$1.ZodOptional<z$1.ZodString>;
807
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
808
- scoringMethod: z$1.ZodEnum<{
809
- readonly ai: "ai";
810
- readonly human: "human";
811
- readonly algo: "algo";
812
- }>;
813
- }, "kind" | "schemaVersion"> & {
814
- scorerAIProvider: z$1.ZodOptional<z$1.ZodString>;
815
- scorerAIModelSlug: z$1.ZodOptional<z$1.ZodString>;
816
- scorerAIInputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
817
- scorerAIOutputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
818
- scorerAIInputCost: z$1.ZodOptional<z$1.ZodString>;
819
- scorerAIOutputCost: z$1.ZodOptional<z$1.ZodString>;
820
- } & {
821
- kind: z$1.ZodString;
822
- schemaVersion: z$1.ZodNumber;
823
- }, "kind" | "schemaVersion"> & {
824
- extractedAnswers: z$1.ZodArray<z$1.ZodString>;
825
- } & {
826
- kind: z$1.ZodLiteral<"mmlu-pro.sc.main">;
827
- schemaVersion: z$1.ZodLiteral<1>;
828
- }, z$1.core.$strip> & {
829
- new: (input: Omit<{
841
+ newWithId(input: Omit<{
830
842
  id: string;
831
843
  value: number;
832
844
  responseId: string;
833
845
  scoringMethod: "ai" | "human" | "algo";
834
- extractedAnswers: string[];
835
- kind: "mmlu-pro.sc.main";
846
+ namespace: "peerbench.ai";
847
+ kind: "llm/qa.sc";
836
848
  schemaVersion: 1;
837
- metadata?: Record<string, unknown> | undefined;
838
849
  explanation?: string | undefined;
839
- scorerAIProvider?: string | undefined;
840
- scorerAIModelSlug?: string | undefined;
841
- scorerAIInputTokensUsed?: number | undefined;
842
- scorerAIOutputTokensUsed?: number | undefined;
843
- scorerAIInputCost?: string | undefined;
844
- scorerAIOutputCost?: string | undefined;
845
- }, "kind" | "schemaVersion">) => {
846
- id: string;
847
- value: number;
848
- responseId: string;
849
- scoringMethod: "ai" | "human" | "algo";
850
- extractedAnswers: string[];
851
- kind: "mmlu-pro.sc.main";
852
- schemaVersion: 1;
853
850
  metadata?: Record<string, unknown> | undefined;
854
- explanation?: string | undefined;
851
+ scorerAISystemPrompt?: string | undefined;
852
+ scorerAISystemPromptId?: string | undefined;
855
853
  scorerAIProvider?: string | undefined;
856
854
  scorerAIModelSlug?: string | undefined;
857
855
  scorerAIInputTokensUsed?: number | undefined;
858
856
  scorerAIOutputTokensUsed?: number | undefined;
859
857
  scorerAIInputCost?: string | undefined;
860
858
  scorerAIOutputCost?: string | undefined;
861
- };
862
- newWithId(input: Omit<{
859
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
863
860
  id: string;
864
861
  value: number;
865
862
  responseId: string;
866
863
  scoringMethod: "ai" | "human" | "algo";
867
- extractedAnswers: string[];
868
- kind: "mmlu-pro.sc.main";
864
+ namespace: "peerbench.ai";
865
+ kind: "llm/qa.sc";
869
866
  schemaVersion: 1;
870
- metadata?: Record<string, unknown> | undefined;
871
867
  explanation?: string | undefined;
872
- scorerAIProvider?: string | undefined;
873
- scorerAIModelSlug?: string | undefined;
874
- scorerAIInputTokensUsed?: number | undefined;
875
- scorerAIOutputTokensUsed?: number | undefined;
876
- scorerAIInputCost?: string | undefined;
877
- scorerAIOutputCost?: string | undefined;
878
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
879
- id: string;
880
- value: number;
881
- responseId: string;
882
- scoringMethod: "ai" | "human" | "algo";
883
- extractedAnswers: string[];
884
- kind: "mmlu-pro.sc.main";
885
- schemaVersion: 1;
886
868
  metadata?: Record<string, unknown> | undefined;
887
- explanation?: string | undefined;
869
+ scorerAISystemPrompt?: string | undefined;
870
+ scorerAISystemPromptId?: string | undefined;
888
871
  scorerAIProvider?: string | undefined;
889
872
  scorerAIModelSlug?: string | undefined;
890
873
  scorerAIInputTokensUsed?: number | undefined;
@@ -893,806 +876,166 @@ declare const MMLUProMainScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
893
876
  scorerAIOutputCost?: string | undefined;
894
877
  }>;
895
878
  };
896
- type MMLUProMainScoreV1 = z$1.infer<typeof MMLUProMainScoreSchemaV1>;
879
+ type QAScoreV1 = z.infer<typeof QAScoreSchemaV1>;
897
880
 
898
- declare const MMLUProBenchmarkSpecSchemaV1: z__default.ZodObject<Omit<{
899
- kind: z__default.ZodString;
900
- schemaVersion: z__default.ZodNumber;
901
- metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
902
- }, "kind" | "schemaVersion"> & {
903
- kind: z__default.ZodLiteral<"mmlu-pro.benchmark.spec">;
904
- schemaVersion: z__default.ZodLiteral<1>;
905
- }, z__default.core.$strip> & {
906
- new: (input: Omit<{
907
- kind: "mmlu-pro.benchmark.spec";
881
+ declare const peerbenchRunner: (params: {
882
+ testCase: {
883
+ id: string;
884
+ question: string;
885
+ options: Record<string, string>;
886
+ correctAnswerKeys: string[];
887
+ namespace: "peerbench.ai";
888
+ kind: "llm/mcq.tc";
908
889
  schemaVersion: 1;
909
890
  metadata?: Record<string, unknown> | undefined;
910
- }, "kind" | "schemaVersion">) => {
911
- kind: "mmlu-pro.benchmark.spec";
891
+ } | {
892
+ id: string;
893
+ question: string;
894
+ goodAnswers: string[];
895
+ badAnswers: string[];
896
+ namespace: "peerbench.ai";
897
+ kind: "llm/qa.tc";
912
898
  schemaVersion: 1;
913
899
  metadata?: Record<string, unknown> | undefined;
914
900
  };
915
- newWithId(input: Omit<{
916
- kind: "mmlu-pro.benchmark.spec";
917
- schemaVersion: 1;
918
- metadata?: Record<string, unknown> | undefined;
919
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
920
- kind: "mmlu-pro.benchmark.spec";
921
- schemaVersion: 1;
922
- metadata?: Record<string, unknown> | undefined;
923
- }>;
924
- };
925
- type MMLUProBenchmarkSpecV1 = z__default.infer<typeof MMLUProBenchmarkSpecSchemaV1>;
926
-
927
- declare class MMLUProJSONDataLoader extends AbstractDataLoader {
928
- readonly kind = "mmlu-pro.load.json.data";
929
- loadData(params: {
930
- content: Uint8Array;
931
- }): LoaderResult<MMLUProMainTestCaseV1, MMLUProMainResponseV1, MMLUProMainScoreV1>;
932
- loadBenchmarkSpec(params: {
933
- content: Uint8Array;
934
- }): Promise<MMLUProBenchmarkSpecV1>;
935
- }
936
- declare class MMLUProParquetDataLoader extends AbstractDataLoader {
937
- readonly kind = "mmlu-pro.load.parquet.data";
938
- loadData(params: {
939
- content: Uint8Array;
940
- }): Promise<LoaderResult<MMLUProMainTestCaseV1, MMLUProMainResponseV1, MMLUProMainScoreV1>>;
941
- loadBenchmarkSpec(params: {
942
- content: Uint8Array;
943
- }): Promise<MMLUProBenchmarkSpecV1>;
944
- }
945
-
946
- declare function runTestCase$1(params: {
947
- testCase: MMLUProMainTestCaseV1;
948
901
  provider: AbstractLLMProvider;
949
- scorer?: MCQScorer | LLMJudgeScorer;
950
- spec?: MMLUProBenchmarkSpecV1;
902
+ scorer?: MCQScorer | LLMAsAJudgeScorer | undefined;
951
903
  runConfig: {
952
904
  model: string;
953
- llmJudgeModel?: string;
905
+ llmJudgeModel?: string | undefined;
906
+ llmJudgeSystemPrompt?: {
907
+ id: string;
908
+ version: number;
909
+ content: string;
910
+ namespace: "peerbench.ai";
911
+ kind: `${string}/simple.sys-prompt`;
912
+ schemaVersion: 1;
913
+ metadata?: Record<string, unknown> | undefined;
914
+ } | undefined;
915
+ systemPrompt?: {
916
+ id: string;
917
+ version: number;
918
+ content: string;
919
+ namespace: "peerbench.ai";
920
+ kind: `${string}/simple.sys-prompt`;
921
+ schemaVersion: 1;
922
+ metadata?: Record<string, unknown> | undefined;
923
+ } | undefined;
924
+ templateVariables?: Record<string, string> | undefined;
954
925
  };
955
- systemPrompt?: SimpleSystemPromptV1;
956
926
  idGenerators?: {
957
927
  response?: IdGenerator;
958
928
  score?: IdGenerator;
959
929
  };
960
- }): Promise<RunnerResult<MMLUProMainResponseV1, MMLUProMainScoreV1>>;
961
-
962
- declare const BaseMMLUProScoreSchemaV1: z.ZodObject<Omit<{
963
- id: z.ZodString;
964
- kind: z.ZodString;
965
- schemaVersion: z.ZodNumber;
966
- value: z.ZodNumber;
967
- responseId: z.ZodString;
968
- explanation: z.ZodOptional<z.ZodString>;
969
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
970
- scoringMethod: z.ZodEnum<{
971
- readonly ai: "ai";
972
- readonly human: "human";
973
- readonly algo: "algo";
974
- }>;
975
- }, "kind" | "schemaVersion"> & {
976
- scorerAIProvider: z.ZodOptional<z.ZodString>;
977
- scorerAIModelSlug: z.ZodOptional<z.ZodString>;
978
- scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
979
- scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
980
- scorerAIInputCost: z.ZodOptional<z.ZodString>;
981
- scorerAIOutputCost: z.ZodOptional<z.ZodString>;
982
- } & {
983
- kind: z.ZodString;
984
- schemaVersion: z.ZodNumber;
985
- }, zod_v4_core.$strip> & {
986
- new: (input: Omit<{
987
- id: string;
988
- value: number;
989
- responseId: string;
990
- scoringMethod: "ai" | "human" | "algo";
991
- kind: string;
992
- schemaVersion: number;
993
- metadata?: Record<string, unknown> | undefined;
994
- explanation?: string | undefined;
995
- scorerAIProvider?: string | undefined;
996
- scorerAIModelSlug?: string | undefined;
997
- scorerAIInputTokensUsed?: number | undefined;
998
- scorerAIOutputTokensUsed?: number | undefined;
999
- scorerAIInputCost?: string | undefined;
1000
- scorerAIOutputCost?: string | undefined;
1001
- }, "kind" | "schemaVersion">) => {
1002
- id: string;
1003
- value: number;
1004
- responseId: string;
1005
- scoringMethod: "ai" | "human" | "algo";
1006
- kind: string;
1007
- schemaVersion: number;
1008
- metadata?: Record<string, unknown> | undefined;
1009
- explanation?: string | undefined;
1010
- scorerAIProvider?: string | undefined;
1011
- scorerAIModelSlug?: string | undefined;
1012
- scorerAIInputTokensUsed?: number | undefined;
1013
- scorerAIOutputTokensUsed?: number | undefined;
1014
- scorerAIInputCost?: string | undefined;
1015
- scorerAIOutputCost?: string | undefined;
1016
- };
1017
- newWithId(input: Omit<{
1018
- id: string;
1019
- value: number;
1020
- responseId: string;
1021
- scoringMethod: "ai" | "human" | "algo";
1022
- kind: string;
1023
- schemaVersion: number;
1024
- metadata?: Record<string, unknown> | undefined;
1025
- explanation?: string | undefined;
1026
- scorerAIProvider?: string | undefined;
1027
- scorerAIModelSlug?: string | undefined;
1028
- scorerAIInputTokensUsed?: number | undefined;
1029
- scorerAIOutputTokensUsed?: number | undefined;
1030
- scorerAIInputCost?: string | undefined;
1031
- scorerAIOutputCost?: string | undefined;
1032
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
1033
- id: string;
1034
- value: number;
1035
- responseId: string;
1036
- scoringMethod: "ai" | "human" | "algo";
1037
- kind: string;
1038
- schemaVersion: number;
1039
- metadata?: Record<string, unknown> | undefined;
1040
- explanation?: string | undefined;
1041
- scorerAIProvider?: string | undefined;
1042
- scorerAIModelSlug?: string | undefined;
1043
- scorerAIInputTokensUsed?: number | undefined;
1044
- scorerAIOutputTokensUsed?: number | undefined;
1045
- scorerAIInputCost?: string | undefined;
1046
- scorerAIOutputCost?: string | undefined;
1047
- }>;
1048
- };
1049
-
1050
- declare const index$1_BaseMMLUProScoreSchemaV1: typeof BaseMMLUProScoreSchemaV1;
1051
- declare const index$1_MMLUProBenchmarkSpecSchemaV1: typeof MMLUProBenchmarkSpecSchemaV1;
1052
- type index$1_MMLUProBenchmarkSpecV1 = MMLUProBenchmarkSpecV1;
1053
- type index$1_MMLUProJSONDataLoader = MMLUProJSONDataLoader;
1054
- declare const index$1_MMLUProJSONDataLoader: typeof MMLUProJSONDataLoader;
1055
- declare const index$1_MMLUProMainResponseSchemaV1: typeof MMLUProMainResponseSchemaV1;
1056
- type index$1_MMLUProMainResponseV1 = MMLUProMainResponseV1;
1057
- declare const index$1_MMLUProMainScoreSchemaV1: typeof MMLUProMainScoreSchemaV1;
1058
- type index$1_MMLUProMainScoreV1 = MMLUProMainScoreV1;
1059
- declare const index$1_MMLUProMainTestCaseSchemaV1: typeof MMLUProMainTestCaseSchemaV1;
1060
- type index$1_MMLUProMainTestCaseV1 = MMLUProMainTestCaseV1;
1061
- type index$1_MMLUProParquetDataLoader = MMLUProParquetDataLoader;
1062
- declare const index$1_MMLUProParquetDataLoader: typeof MMLUProParquetDataLoader;
1063
- declare namespace index$1 {
1064
- export { index$1_BaseMMLUProScoreSchemaV1 as BaseMMLUProScoreSchemaV1, index$1_MMLUProBenchmarkSpecSchemaV1 as MMLUProBenchmarkSpecSchemaV1, type index$1_MMLUProBenchmarkSpecV1 as MMLUProBenchmarkSpecV1, index$1_MMLUProJSONDataLoader as MMLUProJSONDataLoader, index$1_MMLUProMainResponseSchemaV1 as MMLUProMainResponseSchemaV1, type index$1_MMLUProMainResponseV1 as MMLUProMainResponseV1, index$1_MMLUProMainScoreSchemaV1 as MMLUProMainScoreSchemaV1, type index$1_MMLUProMainScoreV1 as MMLUProMainScoreV1, index$1_MMLUProMainTestCaseSchemaV1 as MMLUProMainTestCaseSchemaV1, type index$1_MMLUProMainTestCaseV1 as MMLUProMainTestCaseV1, index$1_MMLUProParquetDataLoader as MMLUProParquetDataLoader, runTestCase$1 as runTestCase };
1065
- }
1066
-
1067
- declare class FNOLFieldsScorer extends AbstractScorer {
1068
- readonly kind = "fnol.fields";
1069
- score(params: {
1070
- fieldsToCollect: Record<string, {
1071
- required?: boolean;
1072
- expected?: unknown;
1073
- description?: string;
1074
- }>;
1075
- extracted?: Record<string, unknown>;
1076
- }): Promise<BaseScorerResult & {
1077
- requiredKeys: string[];
1078
- presentKeys: string[];
1079
- missingKeys: string[];
1080
- mismatchedKeys: string[];
1081
- }>;
1082
- }
1083
-
1084
- declare const FNOLFieldSchemaV1: z$1.ZodObject<{
1085
- description: z$1.ZodString;
1086
- required: z$1.ZodOptional<z$1.ZodBoolean>;
1087
- expected: z$1.ZodOptional<z$1.ZodUnknown>;
1088
- valueType: z$1.ZodOptional<z$1.ZodEnum<{
1089
- readonly string: "string";
1090
- readonly number: "number";
1091
- readonly boolean: "boolean";
1092
- readonly object: "object";
1093
- }>>;
1094
- }, z$1.core.$strip>;
1095
- declare const FNOLTestCaseSchemaV1: z$1.ZodObject<Omit<{
1096
- id: z$1.ZodString;
1097
- kind: z$1.ZodString;
1098
- schemaVersion: z$1.ZodNumber;
1099
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
1100
- }, "kind" | "schemaVersion"> & {
1101
- /**
1102
- * Scenario starter message. This is what the "user" would say initially.
1103
- */
1104
- initialUserMessage: z$1.ZodString;
1105
- /**
1106
- * Private/structured information about the user and the incident.
1107
- * This is used by the user simulator LLM to answer the target model questions.
1108
- */
1109
- userProfile: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
1110
- /**
1111
- * The fields the target model must collect.
1112
- * Keys are canonical identifiers (e.g. "policyNumber", "dateOfLoss").
1113
- */
1114
- fieldsToCollect: z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
1115
- description: z$1.ZodString;
1116
- required: z$1.ZodOptional<z$1.ZodBoolean>;
1117
- expected: z$1.ZodOptional<z$1.ZodUnknown>;
1118
- valueType: z$1.ZodOptional<z$1.ZodEnum<{
1119
- readonly string: "string";
1120
- readonly number: "number";
1121
- readonly boolean: "boolean";
1122
- readonly object: "object";
1123
- }>>;
1124
- }, z$1.core.$strip>>;
1125
- /**
1126
- * Maximum number of back-and-forth turns (target question + user answer).
1127
- */
1128
- maxTurns: z$1.ZodDefault<z$1.ZodNumber>;
1129
- } & {
1130
- kind: z$1.ZodLiteral<"fnol.ts.v1">;
1131
- schemaVersion: z$1.ZodLiteral<1>;
1132
- }, z$1.core.$strip> & {
1133
- new: (input: Omit<{
1134
- id: string;
1135
- initialUserMessage: string;
1136
- userProfile: Record<string, unknown>;
1137
- fieldsToCollect: Record<string, {
1138
- description: string;
1139
- required?: boolean | undefined;
1140
- expected?: unknown;
1141
- valueType?: "string" | "number" | "boolean" | "object" | undefined;
1142
- }>;
1143
- maxTurns: number;
1144
- kind: "fnol.ts.v1";
1145
- schemaVersion: 1;
1146
- metadata?: Record<string, unknown> | undefined;
1147
- }, "kind" | "schemaVersion">) => {
1148
- id: string;
1149
- initialUserMessage: string;
1150
- userProfile: Record<string, unknown>;
1151
- fieldsToCollect: Record<string, {
1152
- description: string;
1153
- required?: boolean | undefined;
1154
- expected?: unknown;
1155
- valueType?: "string" | "number" | "boolean" | "object" | undefined;
1156
- }>;
1157
- maxTurns: number;
1158
- kind: "fnol.ts.v1";
1159
- schemaVersion: 1;
1160
- metadata?: Record<string, unknown> | undefined;
1161
- };
1162
- newWithId(input: Omit<{
1163
- id: string;
1164
- initialUserMessage: string;
1165
- userProfile: Record<string, unknown>;
1166
- fieldsToCollect: Record<string, {
1167
- description: string;
1168
- required?: boolean | undefined;
1169
- expected?: unknown;
1170
- valueType?: "string" | "number" | "boolean" | "object" | undefined;
1171
- }>;
1172
- maxTurns: number;
1173
- kind: "fnol.ts.v1";
1174
- schemaVersion: 1;
1175
- metadata?: Record<string, unknown> | undefined;
1176
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
1177
- id: string;
1178
- initialUserMessage: string;
1179
- userProfile: Record<string, unknown>;
1180
- fieldsToCollect: Record<string, {
1181
- description: string;
1182
- required?: boolean | undefined;
1183
- expected?: unknown;
1184
- valueType?: "string" | "number" | "boolean" | "object" | undefined;
1185
- }>;
1186
- maxTurns: number;
1187
- kind: "fnol.ts.v1";
1188
- schemaVersion: 1;
1189
- metadata?: Record<string, unknown> | undefined;
1190
- }>;
1191
- };
1192
- type FNOLTestCaseV1 = z$1.infer<typeof FNOLTestCaseSchemaV1>;
1193
- declare const FNOLConversationMessageSchemaV1: z$1.ZodObject<{
1194
- role: z$1.ZodEnum<{
1195
- system: "system";
1196
- user: "user";
1197
- assistant: "assistant";
1198
- }>;
1199
- content: z$1.ZodString;
1200
- }, z$1.core.$strip>;
1201
- declare const FNOLResponseSchemaV1: z$1.ZodObject<Omit<Omit<{
1202
- id: z$1.ZodString;
1203
- kind: z$1.ZodString;
1204
- schemaVersion: z$1.ZodNumber;
1205
- startedAt: z$1.ZodNumber;
1206
- completedAt: z$1.ZodNumber;
1207
- testCaseId: z$1.ZodString;
1208
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
1209
- }, "kind" | "schemaVersion"> & {
1210
- data: z$1.ZodString;
1211
- modelSlug: z$1.ZodString;
1212
- provider: z$1.ZodString;
1213
- systemPromptId: z$1.ZodOptional<z$1.ZodString>;
1214
- inputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
1215
- outputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
1216
- inputCost: z$1.ZodOptional<z$1.ZodString>;
1217
- outputCost: z$1.ZodOptional<z$1.ZodString>;
1218
- } & {
1219
- kind: z$1.ZodString;
1220
- schemaVersion: z$1.ZodNumber;
1221
- }, "kind" | "schemaVersion"> & {
1222
- /**
1223
- * Full conversation between the target model and simulated user.
1224
- */
1225
- conversation: z$1.ZodArray<z$1.ZodObject<{
1226
- role: z$1.ZodEnum<{
1227
- system: "system";
1228
- user: "user";
1229
- assistant: "assistant";
1230
- }>;
1231
- content: z$1.ZodString;
1232
- }, z$1.core.$strip>>;
1233
- turnsUsed: z$1.ZodNumber;
1234
- doneReason: z$1.ZodEnum<{
1235
- readonly modelProvidedJson: "modelProvidedJson";
1236
- readonly reachedMaxTurns: "reachedMaxTurns";
1237
- readonly forcedFinalJson: "forcedFinalJson";
1238
- }>;
1239
- /**
1240
- * Parsed JSON object from the target model's final answer, if available.
1241
- */
1242
- extracted: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
1243
- } & {
1244
- kind: z$1.ZodLiteral<"fnol.rs.v1">;
1245
- schemaVersion: z$1.ZodLiteral<1>;
1246
- }, z$1.core.$strip> & {
1247
- new: (input: Omit<{
1248
- id: string;
1249
- testCaseId: string;
930
+ }) => Promise<{
931
+ response: {
1250
932
  startedAt: number;
1251
933
  completedAt: number;
1252
- data: string;
1253
- provider: string;
1254
- modelSlug: string;
1255
- conversation: {
1256
- role: "system" | "user" | "assistant";
1257
- content: string;
1258
- }[];
1259
- turnsUsed: number;
1260
- doneReason: "modelProvidedJson" | "reachedMaxTurns" | "forcedFinalJson";
1261
- kind: "fnol.rs.v1";
1262
- schemaVersion: 1;
1263
- metadata?: Record<string, unknown> | undefined;
1264
- inputTokensUsed?: number | undefined;
1265
- outputTokensUsed?: number | undefined;
1266
- inputCost?: string | undefined;
1267
- outputCost?: string | undefined;
1268
- systemPromptId?: string | undefined;
1269
- extracted?: Record<string, unknown> | undefined;
1270
- }, "kind" | "schemaVersion">) => {
1271
934
  id: string;
1272
935
  testCaseId: string;
1273
- startedAt: number;
1274
- completedAt: number;
1275
936
  data: string;
1276
- provider: string;
1277
937
  modelSlug: string;
1278
- conversation: {
1279
- role: "system" | "user" | "assistant";
1280
- content: string;
1281
- }[];
1282
- turnsUsed: number;
1283
- doneReason: "modelProvidedJson" | "reachedMaxTurns" | "forcedFinalJson";
1284
- kind: "fnol.rs.v1";
938
+ provider: string;
939
+ namespace: "peerbench.ai";
940
+ kind: "llm/mcq.rs";
1285
941
  schemaVersion: 1;
1286
942
  metadata?: Record<string, unknown> | undefined;
943
+ systemPromptId?: string | undefined;
1287
944
  inputTokensUsed?: number | undefined;
1288
945
  outputTokensUsed?: number | undefined;
1289
946
  inputCost?: string | undefined;
1290
947
  outputCost?: string | undefined;
1291
- systemPromptId?: string | undefined;
1292
- extracted?: Record<string, unknown> | undefined;
1293
- };
1294
- newWithId(input: Omit<{
1295
- id: string;
1296
- testCaseId: string;
948
+ } | {
1297
949
  startedAt: number;
1298
950
  completedAt: number;
1299
- data: string;
1300
- provider: string;
1301
- modelSlug: string;
1302
- conversation: {
1303
- role: "system" | "user" | "assistant";
1304
- content: string;
1305
- }[];
1306
- turnsUsed: number;
1307
- doneReason: "modelProvidedJson" | "reachedMaxTurns" | "forcedFinalJson";
1308
- kind: "fnol.rs.v1";
1309
- schemaVersion: 1;
1310
- metadata?: Record<string, unknown> | undefined;
1311
- inputTokensUsed?: number | undefined;
1312
- outputTokensUsed?: number | undefined;
1313
- inputCost?: string | undefined;
1314
- outputCost?: string | undefined;
1315
- systemPromptId?: string | undefined;
1316
- extracted?: Record<string, unknown> | undefined;
1317
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
1318
951
  id: string;
1319
952
  testCaseId: string;
1320
- startedAt: number;
1321
- completedAt: number;
1322
953
  data: string;
1323
- provider: string;
1324
954
  modelSlug: string;
1325
- conversation: {
1326
- role: "system" | "user" | "assistant";
1327
- content: string;
1328
- }[];
1329
- turnsUsed: number;
1330
- doneReason: "modelProvidedJson" | "reachedMaxTurns" | "forcedFinalJson";
1331
- kind: "fnol.rs.v1";
955
+ provider: string;
956
+ namespace: "peerbench.ai";
957
+ kind: "llm/qa.rs";
1332
958
  schemaVersion: 1;
1333
959
  metadata?: Record<string, unknown> | undefined;
960
+ systemPromptId?: string | undefined;
1334
961
  inputTokensUsed?: number | undefined;
1335
962
  outputTokensUsed?: number | undefined;
1336
963
  inputCost?: string | undefined;
1337
964
  outputCost?: string | undefined;
1338
- systemPromptId?: string | undefined;
1339
- extracted?: Record<string, unknown> | undefined;
1340
- }>;
1341
- };
1342
- type FNOLResponseV1 = z$1.infer<typeof FNOLResponseSchemaV1>;
1343
- declare const FNOLFieldsScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
1344
- id: z$1.ZodString;
1345
- kind: z$1.ZodString;
1346
- schemaVersion: z$1.ZodNumber;
1347
- value: z$1.ZodNumber;
1348
- responseId: z$1.ZodString;
1349
- explanation: z$1.ZodOptional<z$1.ZodString>;
1350
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
1351
- scoringMethod: z$1.ZodEnum<{
1352
- readonly ai: "ai";
1353
- readonly human: "human";
1354
- readonly algo: "algo";
1355
- }>;
1356
- }, "kind" | "schemaVersion"> & {
1357
- scorerAIProvider: z$1.ZodOptional<z$1.ZodString>;
1358
- scorerAIModelSlug: z$1.ZodOptional<z$1.ZodString>;
1359
- scorerAIInputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
1360
- scorerAIOutputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
1361
- scorerAIInputCost: z$1.ZodOptional<z$1.ZodString>;
1362
- scorerAIOutputCost: z$1.ZodOptional<z$1.ZodString>;
1363
- } & {
1364
- kind: z$1.ZodString;
1365
- schemaVersion: z$1.ZodNumber;
1366
- }, "kind" | "schemaVersion"> & {
1367
- requiredKeys: z$1.ZodArray<z$1.ZodString>;
1368
- presentKeys: z$1.ZodArray<z$1.ZodString>;
1369
- missingKeys: z$1.ZodArray<z$1.ZodString>;
1370
- mismatchedKeys: z$1.ZodArray<z$1.ZodString>;
1371
- } & {
1372
- kind: z$1.ZodLiteral<"fnol.sc.fields.v1">;
1373
- schemaVersion: z$1.ZodLiteral<1>;
1374
- }, z$1.core.$strip> & {
1375
- new: (input: Omit<{
1376
- id: string;
1377
- value: number;
1378
- responseId: string;
1379
- scoringMethod: "ai" | "human" | "algo";
1380
- requiredKeys: string[];
1381
- presentKeys: string[];
1382
- missingKeys: string[];
1383
- mismatchedKeys: string[];
1384
- kind: "fnol.sc.fields.v1";
1385
- schemaVersion: 1;
1386
- metadata?: Record<string, unknown> | undefined;
1387
- explanation?: string | undefined;
1388
- scorerAIProvider?: string | undefined;
1389
- scorerAIModelSlug?: string | undefined;
1390
- scorerAIInputTokensUsed?: number | undefined;
1391
- scorerAIOutputTokensUsed?: number | undefined;
1392
- scorerAIInputCost?: string | undefined;
1393
- scorerAIOutputCost?: string | undefined;
1394
- }, "kind" | "schemaVersion">) => {
1395
- id: string;
1396
- value: number;
1397
- responseId: string;
1398
- scoringMethod: "ai" | "human" | "algo";
1399
- requiredKeys: string[];
1400
- presentKeys: string[];
1401
- missingKeys: string[];
1402
- mismatchedKeys: string[];
1403
- kind: "fnol.sc.fields.v1";
1404
- schemaVersion: 1;
1405
- metadata?: Record<string, unknown> | undefined;
1406
- explanation?: string | undefined;
1407
- scorerAIProvider?: string | undefined;
1408
- scorerAIModelSlug?: string | undefined;
1409
- scorerAIInputTokensUsed?: number | undefined;
1410
- scorerAIOutputTokensUsed?: number | undefined;
1411
- scorerAIInputCost?: string | undefined;
1412
- scorerAIOutputCost?: string | undefined;
1413
965
  };
1414
- newWithId(input: Omit<{
1415
- id: string;
1416
- value: number;
1417
- responseId: string;
1418
- scoringMethod: "ai" | "human" | "algo";
1419
- requiredKeys: string[];
1420
- presentKeys: string[];
1421
- missingKeys: string[];
1422
- mismatchedKeys: string[];
1423
- kind: "fnol.sc.fields.v1";
1424
- schemaVersion: 1;
1425
- metadata?: Record<string, unknown> | undefined;
1426
- explanation?: string | undefined;
1427
- scorerAIProvider?: string | undefined;
1428
- scorerAIModelSlug?: string | undefined;
1429
- scorerAIInputTokensUsed?: number | undefined;
1430
- scorerAIOutputTokensUsed?: number | undefined;
1431
- scorerAIInputCost?: string | undefined;
1432
- scorerAIOutputCost?: string | undefined;
1433
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
1434
- id: string;
1435
- value: number;
1436
- responseId: string;
1437
- scoringMethod: "ai" | "human" | "algo";
1438
- requiredKeys: string[];
1439
- presentKeys: string[];
1440
- missingKeys: string[];
1441
- mismatchedKeys: string[];
1442
- kind: "fnol.sc.fields.v1";
1443
- schemaVersion: 1;
1444
- metadata?: Record<string, unknown> | undefined;
1445
- explanation?: string | undefined;
1446
- scorerAIProvider?: string | undefined;
1447
- scorerAIModelSlug?: string | undefined;
1448
- scorerAIInputTokensUsed?: number | undefined;
1449
- scorerAIOutputTokensUsed?: number | undefined;
1450
- scorerAIInputCost?: string | undefined;
1451
- scorerAIOutputCost?: string | undefined;
1452
- }>;
1453
- };
1454
- type FNOLFieldsScoreV1 = z$1.infer<typeof FNOLFieldsScoreSchemaV1>;
1455
- declare const FNOLLLMJudgeScoreSchemaV1: z$1.ZodObject<Omit<Omit<{
1456
- id: z$1.ZodString;
1457
- kind: z$1.ZodString;
1458
- schemaVersion: z$1.ZodNumber;
1459
- value: z$1.ZodNumber;
1460
- responseId: z$1.ZodString;
1461
- explanation: z$1.ZodOptional<z$1.ZodString>;
1462
- metadata: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
1463
- scoringMethod: z$1.ZodEnum<{
1464
- readonly ai: "ai";
1465
- readonly human: "human";
1466
- readonly algo: "algo";
1467
- }>;
1468
- }, "kind" | "schemaVersion"> & {
1469
- scorerAIProvider: z$1.ZodOptional<z$1.ZodString>;
1470
- scorerAIModelSlug: z$1.ZodOptional<z$1.ZodString>;
1471
- scorerAIInputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
1472
- scorerAIOutputTokensUsed: z$1.ZodOptional<z$1.ZodNumber>;
1473
- scorerAIInputCost: z$1.ZodOptional<z$1.ZodString>;
1474
- scorerAIOutputCost: z$1.ZodOptional<z$1.ZodString>;
1475
- } & {
1476
- kind: z$1.ZodString;
1477
- schemaVersion: z$1.ZodNumber;
1478
- }, "kind" | "schemaVersion"> & {
1479
- verdict: z$1.ZodOptional<z$1.ZodEnum<{
1480
- pass: "pass";
1481
- borderline: "borderline";
1482
- fail: "fail";
1483
- }>>;
1484
- } & {
1485
- kind: z$1.ZodLiteral<"fnol.sc.llm-judge.v1">;
1486
- schemaVersion: z$1.ZodLiteral<1>;
1487
- }, z$1.core.$strip> & {
1488
- new: (input: Omit<{
966
+ score?: {
1489
967
  id: string;
1490
968
  value: number;
1491
969
  responseId: string;
1492
970
  scoringMethod: "ai" | "human" | "algo";
1493
- kind: "fnol.sc.llm-judge.v1";
971
+ extractedAnswers: string[];
972
+ namespace: "peerbench.ai";
973
+ kind: "llm/mcq.sc";
1494
974
  schemaVersion: 1;
1495
- metadata?: Record<string, unknown> | undefined;
1496
975
  explanation?: string | undefined;
1497
- scorerAIProvider?: string | undefined;
1498
- scorerAIModelSlug?: string | undefined;
1499
- scorerAIInputTokensUsed?: number | undefined;
1500
- scorerAIOutputTokensUsed?: number | undefined;
1501
- scorerAIInputCost?: string | undefined;
1502
- scorerAIOutputCost?: string | undefined;
1503
- verdict?: "pass" | "borderline" | "fail" | undefined;
1504
- }, "kind" | "schemaVersion">) => {
1505
- id: string;
1506
- value: number;
1507
- responseId: string;
1508
- scoringMethod: "ai" | "human" | "algo";
1509
- kind: "fnol.sc.llm-judge.v1";
1510
- schemaVersion: 1;
1511
976
  metadata?: Record<string, unknown> | undefined;
1512
- explanation?: string | undefined;
977
+ scorerAISystemPrompt?: string | undefined;
978
+ scorerAISystemPromptId?: string | undefined;
1513
979
  scorerAIProvider?: string | undefined;
1514
980
  scorerAIModelSlug?: string | undefined;
1515
981
  scorerAIInputTokensUsed?: number | undefined;
1516
982
  scorerAIOutputTokensUsed?: number | undefined;
1517
983
  scorerAIInputCost?: string | undefined;
1518
984
  scorerAIOutputCost?: string | undefined;
1519
- verdict?: "pass" | "borderline" | "fail" | undefined;
1520
- };
1521
- newWithId(input: Omit<{
985
+ } | {
1522
986
  id: string;
1523
987
  value: number;
1524
988
  responseId: string;
1525
989
  scoringMethod: "ai" | "human" | "algo";
1526
- kind: "fnol.sc.llm-judge.v1";
990
+ namespace: "peerbench.ai";
991
+ kind: "llm/qa.sc";
1527
992
  schemaVersion: 1;
1528
- metadata?: Record<string, unknown> | undefined;
1529
993
  explanation?: string | undefined;
1530
- scorerAIProvider?: string | undefined;
1531
- scorerAIModelSlug?: string | undefined;
1532
- scorerAIInputTokensUsed?: number | undefined;
1533
- scorerAIOutputTokensUsed?: number | undefined;
1534
- scorerAIInputCost?: string | undefined;
1535
- scorerAIOutputCost?: string | undefined;
1536
- verdict?: "pass" | "borderline" | "fail" | undefined;
1537
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
1538
- id: string;
1539
- value: number;
1540
- responseId: string;
1541
- scoringMethod: "ai" | "human" | "algo";
1542
- kind: "fnol.sc.llm-judge.v1";
1543
- schemaVersion: 1;
1544
994
  metadata?: Record<string, unknown> | undefined;
1545
- explanation?: string | undefined;
995
+ scorerAISystemPrompt?: string | undefined;
996
+ scorerAISystemPromptId?: string | undefined;
1546
997
  scorerAIProvider?: string | undefined;
1547
998
  scorerAIModelSlug?: string | undefined;
1548
999
  scorerAIInputTokensUsed?: number | undefined;
1549
1000
  scorerAIOutputTokensUsed?: number | undefined;
1550
1001
  scorerAIInputCost?: string | undefined;
1551
1002
  scorerAIOutputCost?: string | undefined;
1552
- verdict?: "pass" | "borderline" | "fail" | undefined;
1553
- }>;
1554
- };
1555
- type FNOLLLMJudgeScoreV1 = z$1.infer<typeof FNOLLLMJudgeScoreSchemaV1>;
1556
-
1557
- declare function runTestCase(params: {
1558
- testCase: FNOLTestCaseV1;
1559
- provider: AbstractLLMProvider;
1560
- userSimulatorProvider?: AbstractLLMProvider;
1561
- scorer?: FNOLFieldsScorer | LLMJudgeScorer;
1562
- runConfig: {
1563
- model: string;
1564
- userSimulatorModel?: string;
1565
- llmJudgeModel?: string;
1566
- temperature?: number;
1567
- userSimulatorTemperature?: number;
1568
- };
1569
- systemPrompt?: SimpleSystemPromptV1;
1570
- idGenerators?: {
1571
- response?: IdGenerator;
1572
- score?: IdGenerator;
1573
- };
1574
- }): Promise<RunnerResult<FNOLResponseV1, FNOLFieldsScoreV1 | FNOLLLMJudgeScoreV1>>;
1575
-
1576
- declare const FNOLBaseScoreSchemaV1: z.ZodObject<Omit<{
1577
- id: z.ZodString;
1578
- kind: z.ZodString;
1579
- schemaVersion: z.ZodNumber;
1580
- value: z.ZodNumber;
1581
- responseId: z.ZodString;
1582
- explanation: z.ZodOptional<z.ZodString>;
1583
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1584
- scoringMethod: z.ZodEnum<{
1585
- readonly ai: "ai";
1586
- readonly human: "human";
1587
- readonly algo: "algo";
1588
- }>;
1589
- }, "kind" | "schemaVersion"> & {
1590
- scorerAIProvider: z.ZodOptional<z.ZodString>;
1591
- scorerAIModelSlug: z.ZodOptional<z.ZodString>;
1592
- scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
1593
- scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
1594
- scorerAIInputCost: z.ZodOptional<z.ZodString>;
1595
- scorerAIOutputCost: z.ZodOptional<z.ZodString>;
1596
- } & {
1597
- kind: z.ZodString;
1598
- schemaVersion: z.ZodNumber;
1599
- }, zod_v4_core.$strip> & {
1600
- new: (input: Omit<{
1601
- id: string;
1602
- value: number;
1603
- responseId: string;
1604
- scoringMethod: "ai" | "human" | "algo";
1605
- kind: string;
1606
- schemaVersion: number;
1607
- metadata?: Record<string, unknown> | undefined;
1608
- explanation?: string | undefined;
1609
- scorerAIProvider?: string | undefined;
1610
- scorerAIModelSlug?: string | undefined;
1611
- scorerAIInputTokensUsed?: number | undefined;
1612
- scorerAIOutputTokensUsed?: number | undefined;
1613
- scorerAIInputCost?: string | undefined;
1614
- scorerAIOutputCost?: string | undefined;
1615
- }, "kind" | "schemaVersion">) => {
1616
- id: string;
1617
- value: number;
1618
- responseId: string;
1619
- scoringMethod: "ai" | "human" | "algo";
1620
- kind: string;
1621
- schemaVersion: number;
1622
- metadata?: Record<string, unknown> | undefined;
1623
- explanation?: string | undefined;
1624
- scorerAIProvider?: string | undefined;
1625
- scorerAIModelSlug?: string | undefined;
1626
- scorerAIInputTokensUsed?: number | undefined;
1627
- scorerAIOutputTokensUsed?: number | undefined;
1628
- scorerAIInputCost?: string | undefined;
1629
- scorerAIOutputCost?: string | undefined;
1630
- };
1631
- newWithId(input: Omit<{
1632
- id: string;
1633
- value: number;
1634
- responseId: string;
1635
- scoringMethod: "ai" | "human" | "algo";
1636
- kind: string;
1637
- schemaVersion: number;
1638
- metadata?: Record<string, unknown> | undefined;
1639
- explanation?: string | undefined;
1640
- scorerAIProvider?: string | undefined;
1641
- scorerAIModelSlug?: string | undefined;
1642
- scorerAIInputTokensUsed?: number | undefined;
1643
- scorerAIOutputTokensUsed?: number | undefined;
1644
- scorerAIInputCost?: string | undefined;
1645
- scorerAIOutputCost?: string | undefined;
1646
- }, "id" | "kind" | "schemaVersion">, generator: IdGenerator): Promise<{
1647
- id: string;
1648
- value: number;
1649
- responseId: string;
1650
- scoringMethod: "ai" | "human" | "algo";
1651
- kind: string;
1652
- schemaVersion: number;
1653
- metadata?: Record<string, unknown> | undefined;
1654
- explanation?: string | undefined;
1655
- scorerAIProvider?: string | undefined;
1656
- scorerAIModelSlug?: string | undefined;
1657
- scorerAIInputTokensUsed?: number | undefined;
1658
- scorerAIOutputTokensUsed?: number | undefined;
1659
- scorerAIInputCost?: string | undefined;
1660
- scorerAIOutputCost?: string | undefined;
1661
- }>;
1662
- };
1003
+ } | undefined;
1004
+ }>;
1663
1005
 
1664
- declare const FNOLFieldValueType: {
1665
- readonly string: "string";
1666
- readonly number: "number";
1667
- readonly boolean: "boolean";
1668
- readonly object: "object";
1669
- };
1670
- type FNOLFieldValueType = (typeof FNOLFieldValueType)[keyof typeof FNOLFieldValueType];
1671
- declare const FNOLDoneReason: {
1672
- readonly modelProvidedJson: "modelProvidedJson";
1673
- readonly reachedMaxTurns: "reachedMaxTurns";
1674
- readonly forcedFinalJson: "forcedFinalJson";
1675
- };
1676
- type FNOLDoneReason = (typeof FNOLDoneReason)[keyof typeof FNOLDoneReason];
1006
+ declare class PeerbenchJSONStorage extends JSONFileStorage<MCQTestCaseV1 | MCQResponseV1 | MCQScoreV1 | QATestCaseV1 | QAResponseV1 | QAScoreV1 | MultiTurnTestCaseV1 | MultiTurnResponseV1 | MultiTurnScoreV1> {
1007
+ constructor(config: {
1008
+ path: string;
1009
+ chunkSize?: number;
1010
+ });
1011
+ }
1677
1012
 
1678
- declare const index_FNOLBaseScoreSchemaV1: typeof FNOLBaseScoreSchemaV1;
1679
- declare const index_FNOLConversationMessageSchemaV1: typeof FNOLConversationMessageSchemaV1;
1680
- type index_FNOLDoneReason = FNOLDoneReason;
1681
- declare const index_FNOLFieldSchemaV1: typeof FNOLFieldSchemaV1;
1682
- type index_FNOLFieldValueType = FNOLFieldValueType;
1683
- declare const index_FNOLFieldsScoreSchemaV1: typeof FNOLFieldsScoreSchemaV1;
1684
- type index_FNOLFieldsScoreV1 = FNOLFieldsScoreV1;
1685
- type index_FNOLFieldsScorer = FNOLFieldsScorer;
1686
- declare const index_FNOLFieldsScorer: typeof FNOLFieldsScorer;
1687
- declare const index_FNOLLLMJudgeScoreSchemaV1: typeof FNOLLLMJudgeScoreSchemaV1;
1688
- type index_FNOLLLMJudgeScoreV1 = FNOLLLMJudgeScoreV1;
1689
- declare const index_FNOLResponseSchemaV1: typeof FNOLResponseSchemaV1;
1690
- type index_FNOLResponseV1 = FNOLResponseV1;
1691
- declare const index_FNOLTestCaseSchemaV1: typeof FNOLTestCaseSchemaV1;
1692
- type index_FNOLTestCaseV1 = FNOLTestCaseV1;
1693
- declare const index_runTestCase: typeof runTestCase;
1013
+ declare const index_MCQKind: typeof MCQKind;
1014
+ declare const index_MCQResponseSchemaV1: typeof MCQResponseSchemaV1;
1015
+ type index_MCQResponseV1 = MCQResponseV1;
1016
+ declare const index_MCQScoreSchemaV1: typeof MCQScoreSchemaV1;
1017
+ type index_MCQScoreV1 = MCQScoreV1;
1018
+ declare const index_MCQTestCaseSchemaV1: typeof MCQTestCaseSchemaV1;
1019
+ type index_MCQTestCaseV1 = MCQTestCaseV1;
1020
+ declare const index_MultiTurnKind: typeof MultiTurnKind;
1021
+ declare const index_MultiTurnResponseSchemaV1: typeof MultiTurnResponseSchemaV1;
1022
+ type index_MultiTurnResponseV1 = MultiTurnResponseV1;
1023
+ declare const index_MultiTurnScoreSchemaV1: typeof MultiTurnScoreSchemaV1;
1024
+ type index_MultiTurnScoreV1 = MultiTurnScoreV1;
1025
+ declare const index_MultiTurnTestCaseSchemaV1: typeof MultiTurnTestCaseSchemaV1;
1026
+ type index_MultiTurnTestCaseV1 = MultiTurnTestCaseV1;
1027
+ type index_PeerbenchJSONStorage = PeerbenchJSONStorage;
1028
+ declare const index_PeerbenchJSONStorage: typeof PeerbenchJSONStorage;
1029
+ declare const index_QAKind: typeof QAKind;
1030
+ declare const index_QAResponseSchemaV1: typeof QAResponseSchemaV1;
1031
+ type index_QAResponseV1 = QAResponseV1;
1032
+ declare const index_QAScoreSchemaV1: typeof QAScoreSchemaV1;
1033
+ type index_QAScoreV1 = QAScoreV1;
1034
+ declare const index_QATestCaseSchemaV1: typeof QATestCaseSchemaV1;
1035
+ type index_QATestCaseV1 = QATestCaseV1;
1036
+ declare const index_peerbenchRunner: typeof peerbenchRunner;
1694
1037
  declare namespace index {
1695
- export { index_FNOLBaseScoreSchemaV1 as FNOLBaseScoreSchemaV1, index_FNOLConversationMessageSchemaV1 as FNOLConversationMessageSchemaV1, type index_FNOLDoneReason as FNOLDoneReason, index_FNOLFieldSchemaV1 as FNOLFieldSchemaV1, type index_FNOLFieldValueType as FNOLFieldValueType, index_FNOLFieldsScoreSchemaV1 as FNOLFieldsScoreSchemaV1, type index_FNOLFieldsScoreV1 as FNOLFieldsScoreV1, index_FNOLFieldsScorer as FNOLFieldsScorer, index_FNOLLLMJudgeScoreSchemaV1 as FNOLLLMJudgeScoreSchemaV1, type index_FNOLLLMJudgeScoreV1 as FNOLLLMJudgeScoreV1, index_FNOLResponseSchemaV1 as FNOLResponseSchemaV1, type index_FNOLResponseV1 as FNOLResponseV1, index_FNOLTestCaseSchemaV1 as FNOLTestCaseSchemaV1, type index_FNOLTestCaseV1 as FNOLTestCaseV1, index_runTestCase as runTestCase };
1038
+ export { index_MCQKind as MCQKind, index_MCQResponseSchemaV1 as MCQResponseSchemaV1, type index_MCQResponseV1 as MCQResponseV1, index_MCQScoreSchemaV1 as MCQScoreSchemaV1, type index_MCQScoreV1 as MCQScoreV1, index_MCQTestCaseSchemaV1 as MCQTestCaseSchemaV1, type index_MCQTestCaseV1 as MCQTestCaseV1, index_MultiTurnKind as MultiTurnKind, index_MultiTurnResponseSchemaV1 as MultiTurnResponseSchemaV1, type index_MultiTurnResponseV1 as MultiTurnResponseV1, index_MultiTurnScoreSchemaV1 as MultiTurnScoreSchemaV1, type index_MultiTurnScoreV1 as MultiTurnScoreV1, index_MultiTurnTestCaseSchemaV1 as MultiTurnTestCaseSchemaV1, type index_MultiTurnTestCaseV1 as MultiTurnTestCaseV1, index_PeerbenchJSONStorage as PeerbenchJSONStorage, index_QAKind as QAKind, index_QAResponseSchemaV1 as QAResponseSchemaV1, type index_QAResponseV1 as QAResponseV1, index_QAScoreSchemaV1 as QAScoreSchemaV1, type index_QAScoreV1 as QAScoreV1, index_QATestCaseSchemaV1 as QATestCaseSchemaV1, type index_QATestCaseV1 as QATestCaseV1, index_peerbenchRunner as peerbenchRunner };
1696
1039
  }
1697
1040
 
1698
- export { index as fnol, index$1 as mmluPro, index$2 as peerbench };
1041
+ export { index as peerbench };