peerbench 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +71 -58
  2. package/dist/benchmarks/examples/echo-basic/runner.d.ts +11 -254
  3. package/dist/benchmarks/examples/echo-basic/schema-sets/echo.v1.d.ts +25 -25
  4. package/dist/benchmarks/examples/exact-match-scorer/runner.d.ts +38 -386
  5. package/dist/benchmarks/examples/exact-match-scorer/schema-sets/exact-match.v1.d.ts +25 -25
  6. package/dist/benchmarks/examples/text-transform/runner.d.ts +32 -480
  7. package/dist/benchmarks/examples/text-transform/schema-sets/echo.v1.d.ts +25 -25
  8. package/dist/benchmarks/examples/text-transform/schema-sets/reverse.v1.d.ts +25 -25
  9. package/dist/benchmarks/index.js +180 -248
  10. package/dist/benchmarks/index.js.map +1 -1
  11. package/dist/benchmarks/peerbench/index.d.ts +2 -1
  12. package/dist/benchmarks/peerbench/mcq-runner.d.ts +78 -0
  13. package/dist/benchmarks/peerbench/qa-runner.d.ts +77 -0
  14. package/dist/benchmarks/peerbench/schema-sets/mcq.v1.d.ts +25 -25
  15. package/dist/benchmarks/peerbench/schema-sets/multi-turn.v1.d.ts +25 -25
  16. package/dist/benchmarks/peerbench/schema-sets/qa.v1.d.ts +25 -25
  17. package/dist/chunk-6WDCU5BP.js +9 -0
  18. package/dist/chunk-6WDCU5BP.js.map +1 -0
  19. package/dist/{chunk-YY33MNMV.js → chunk-7KMGLEYP.js} +2 -2
  20. package/dist/{chunk-TRNCF2BG.js → chunk-HBGC6BDW.js} +1 -1
  21. package/dist/chunk-HBGC6BDW.js.map +1 -0
  22. package/dist/{chunk-HMQYGCKI.js → chunk-ZJWSK4VO.js} +1 -1
  23. package/dist/chunk-ZJWSK4VO.js.map +1 -0
  24. package/dist/dev.d.ts +22 -0
  25. package/dist/helpers/define-runner.d.ts +2 -45
  26. package/dist/index.js +2 -2
  27. package/dist/providers/ai-sdk.d.ts +24 -0
  28. package/dist/providers/callables/callable.d.ts +4 -0
  29. package/dist/providers/callables/llm.d.ts +41 -0
  30. package/dist/providers/example/echo.d.ts +12 -11
  31. package/dist/providers/example/restapi.d.ts +11 -18
  32. package/dist/providers/index.d.ts +4 -2
  33. package/dist/providers/index.js +380 -9
  34. package/dist/providers/index.js.map +1 -1
  35. package/dist/providers/mastra.d.ts +16 -21
  36. package/dist/providers/openai.d.ts +25 -10
  37. package/dist/providers/openrouter.d.ts +6 -8
  38. package/dist/schemas/extensions/index.js +1 -1
  39. package/dist/schemas/extensions/response/llm.d.ts +17 -0
  40. package/dist/schemas/index.js +2 -2
  41. package/dist/schemas/llm/index.js +36 -7
  42. package/dist/schemas/llm/index.js.map +1 -1
  43. package/dist/schemas/llm/simple-system-prompt.d.ts +3 -3
  44. package/dist/schemas/llm/system-prompt.d.ts +7 -7
  45. package/dist/schemas/response.d.ts +7 -7
  46. package/dist/schemas/schema-definer.d.ts +5 -5
  47. package/dist/schemas/score.d.ts +7 -7
  48. package/dist/schemas/test-case.d.ts +7 -7
  49. package/dist/scorers/abstract.d.ts +1 -1
  50. package/dist/scorers/index.js +377 -7
  51. package/dist/scorers/index.js.map +1 -1
  52. package/dist/scorers/llm-judge.d.ts +6 -6
  53. package/dist/types/index.d.ts +0 -5
  54. package/dist/types/runner.d.ts +13 -17
  55. package/package.json +8 -7
  56. package/dist/benchmarks/peerbench/runner.d.ts +0 -754
  57. package/dist/chunk-3JHDJEY3.js +0 -374
  58. package/dist/chunk-3JHDJEY3.js.map +0 -1
  59. package/dist/chunk-HMQYGCKI.js.map +0 -1
  60. package/dist/chunk-Q6GSOHOP.js +0 -44
  61. package/dist/chunk-Q6GSOHOP.js.map +0 -1
  62. package/dist/chunk-RTEAK4II.js +0 -37
  63. package/dist/chunk-RTEAK4II.js.map +0 -1
  64. package/dist/chunk-SMLNDQFX.js +0 -244
  65. package/dist/chunk-SMLNDQFX.js.map +0 -1
  66. package/dist/chunk-TRNCF2BG.js.map +0 -1
  67. package/dist/providers/abstract/llm.d.ts +0 -20
  68. /package/dist/{chunk-YY33MNMV.js.map → chunk-7KMGLEYP.js.map} +0 -0
  69. /package/dist/providers/{abstract/provider.d.ts → abstract.d.ts} +0 -0
@@ -1,754 +0,0 @@
1
- import { AbstractLLMProvider } from "../../providers/index.js";
2
- import { LLMAsAJudgeScorer, MCQScorer } from "../../scorers/index.js";
3
- import { IdGenerator } from "../../types";
4
- import z from "zod";
5
- export declare const peerbenchRunner: ((params: {
6
- testCase: {
7
- id: string;
8
- question: string;
9
- options: Record<string, string>;
10
- correctAnswerKeys: string[];
11
- namespace: "peerbench.ai";
12
- kind: "llm/mcq.tc";
13
- schemaVersion: 1;
14
- metadata?: Record<string, unknown> | undefined;
15
- } | {
16
- id: string;
17
- question: string;
18
- goodAnswers: string[];
19
- badAnswers: string[];
20
- namespace: "peerbench.ai";
21
- kind: "llm/qa.tc";
22
- schemaVersion: 1;
23
- metadata?: Record<string, unknown> | undefined;
24
- };
25
- provider: AbstractLLMProvider;
26
- scorer?: MCQScorer | LLMAsAJudgeScorer | undefined;
27
- runConfig: {
28
- model: string;
29
- llmJudgeModel?: string | undefined;
30
- llmJudgeSystemPrompt?: {
31
- id: string;
32
- version: number;
33
- content: string;
34
- namespace: "peerbench.ai";
35
- kind: `${string}/simple.sys-prompt`;
36
- schemaVersion: 1;
37
- metadata?: Record<string, unknown> | undefined;
38
- } | undefined;
39
- llmJudgeFieldsToExtract?: Record<string, z.ZodType<unknown, unknown, z.core.$ZodTypeInternals<unknown, unknown>>> | undefined;
40
- systemPrompt?: {
41
- id: string;
42
- version: number;
43
- content: string;
44
- namespace: "peerbench.ai";
45
- kind: `${string}/simple.sys-prompt`;
46
- schemaVersion: 1;
47
- metadata?: Record<string, unknown> | undefined;
48
- } | undefined;
49
- templateVariables?: Record<string, string> | undefined;
50
- };
51
- idGenerators?: {
52
- response?: IdGenerator;
53
- score?: IdGenerator;
54
- };
55
- }) => Promise<{
56
- response: {
57
- startedAt: number;
58
- completedAt: number;
59
- id: string;
60
- testCaseId: string;
61
- data: string;
62
- modelSlug: string;
63
- provider: string;
64
- namespace: "peerbench.ai";
65
- kind: "llm/mcq.rs";
66
- schemaVersion: 1;
67
- metadata?: Record<string, unknown> | undefined;
68
- systemPromptId?: string | undefined;
69
- inputTokensUsed?: number | undefined;
70
- outputTokensUsed?: number | undefined;
71
- inputCost?: string | undefined;
72
- outputCost?: string | undefined;
73
- } | {
74
- startedAt: number;
75
- completedAt: number;
76
- id: string;
77
- testCaseId: string;
78
- data: string;
79
- modelSlug: string;
80
- provider: string;
81
- namespace: "peerbench.ai";
82
- kind: "llm/qa.rs";
83
- schemaVersion: 1;
84
- metadata?: Record<string, unknown> | undefined;
85
- systemPromptId?: string | undefined;
86
- inputTokensUsed?: number | undefined;
87
- outputTokensUsed?: number | undefined;
88
- inputCost?: string | undefined;
89
- outputCost?: string | undefined;
90
- };
91
- score?: {
92
- id: string;
93
- value: number;
94
- responseId: string;
95
- scoringMethod: "ai" | "human" | "algo";
96
- extractedAnswers: string[];
97
- namespace: "peerbench.ai";
98
- kind: "llm/mcq.sc";
99
- schemaVersion: 1;
100
- metadata?: Record<string, unknown> | undefined;
101
- explanation?: string | undefined;
102
- scorerAISystemPrompt?: string | undefined;
103
- scorerAISystemPromptId?: string | undefined;
104
- scorerAIProvider?: string | undefined;
105
- scorerAIModelSlug?: string | undefined;
106
- scorerAIInputTokensUsed?: number | undefined;
107
- scorerAIOutputTokensUsed?: number | undefined;
108
- scorerAIInputCost?: string | undefined;
109
- scorerAIOutputCost?: string | undefined;
110
- } | {
111
- id: string;
112
- value: number;
113
- responseId: string;
114
- scoringMethod: "ai" | "human" | "algo";
115
- namespace: "peerbench.ai";
116
- kind: "llm/qa.sc";
117
- schemaVersion: 1;
118
- metadata?: Record<string, unknown> | undefined;
119
- explanation?: string | undefined;
120
- scorerAISystemPrompt?: string | undefined;
121
- scorerAISystemPromptId?: string | undefined;
122
- scorerAIProvider?: string | undefined;
123
- scorerAIModelSlug?: string | undefined;
124
- scorerAIInputTokensUsed?: number | undefined;
125
- scorerAIOutputTokensUsed?: number | undefined;
126
- scorerAIInputCost?: string | undefined;
127
- scorerAIOutputCost?: string | undefined;
128
- } | undefined;
129
- }>) & {
130
- config: {
131
- runConfigSchema: z.ZodObject<{
132
- model: z.ZodString;
133
- llmJudgeModel: z.ZodOptional<z.ZodString>;
134
- llmJudgeSystemPrompt: z.ZodOptional<z.ZodObject<Omit<{
135
- id: z.ZodString;
136
- namespace: z.ZodString;
137
- kind: z.ZodString;
138
- schemaVersion: z.ZodNumber;
139
- version: z.ZodNumber;
140
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
141
- }, "kind" | "namespace" | "schemaVersion"> & {
142
- content: z.ZodString;
143
- } & {
144
- namespace: z.ZodLiteral<"peerbench.ai">;
145
- kind: z.ZodLiteral<`${string}/simple.sys-prompt`>;
146
- schemaVersion: z.ZodLiteral<1>;
147
- }, z.core.$strip> & {
148
- new: (input: Omit<{
149
- id: string;
150
- version: number;
151
- content: string;
152
- namespace: "peerbench.ai";
153
- kind: `${string}/simple.sys-prompt`;
154
- schemaVersion: 1;
155
- metadata?: Record<string, unknown> | undefined;
156
- }, "kind" | "namespace" | "schemaVersion">) => {
157
- id: string;
158
- version: number;
159
- content: string;
160
- namespace: "peerbench.ai";
161
- kind: `${string}/simple.sys-prompt`;
162
- schemaVersion: 1;
163
- metadata?: Record<string, unknown> | undefined;
164
- };
165
- newWithId(input: Omit<{
166
- id: string;
167
- version: number;
168
- content: string;
169
- namespace: "peerbench.ai";
170
- kind: `${string}/simple.sys-prompt`;
171
- schemaVersion: 1;
172
- metadata?: Record<string, unknown> | undefined;
173
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
174
- id: string;
175
- version: number;
176
- content: string;
177
- namespace: "peerbench.ai";
178
- kind: `${string}/simple.sys-prompt`;
179
- schemaVersion: 1;
180
- metadata?: Record<string, unknown> | undefined;
181
- }>;
182
- }>;
183
- llmJudgeFieldsToExtract: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodCustom<z.ZodType<unknown, unknown, z.core.$ZodTypeInternals<unknown, unknown>>, z.ZodType<unknown, unknown, z.core.$ZodTypeInternals<unknown, unknown>>>>>;
184
- systemPrompt: z.ZodOptional<z.ZodObject<Omit<{
185
- id: z.ZodString;
186
- namespace: z.ZodString;
187
- kind: z.ZodString;
188
- schemaVersion: z.ZodNumber;
189
- version: z.ZodNumber;
190
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
191
- }, "kind" | "namespace" | "schemaVersion"> & {
192
- content: z.ZodString;
193
- } & {
194
- namespace: z.ZodLiteral<"peerbench.ai">;
195
- kind: z.ZodLiteral<`${string}/simple.sys-prompt`>;
196
- schemaVersion: z.ZodLiteral<1>;
197
- }, z.core.$strip> & {
198
- new: (input: Omit<{
199
- id: string;
200
- version: number;
201
- content: string;
202
- namespace: "peerbench.ai";
203
- kind: `${string}/simple.sys-prompt`;
204
- schemaVersion: 1;
205
- metadata?: Record<string, unknown> | undefined;
206
- }, "kind" | "namespace" | "schemaVersion">) => {
207
- id: string;
208
- version: number;
209
- content: string;
210
- namespace: "peerbench.ai";
211
- kind: `${string}/simple.sys-prompt`;
212
- schemaVersion: 1;
213
- metadata?: Record<string, unknown> | undefined;
214
- };
215
- newWithId(input: Omit<{
216
- id: string;
217
- version: number;
218
- content: string;
219
- namespace: "peerbench.ai";
220
- kind: `${string}/simple.sys-prompt`;
221
- schemaVersion: 1;
222
- metadata?: Record<string, unknown> | undefined;
223
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
224
- id: string;
225
- version: number;
226
- content: string;
227
- namespace: "peerbench.ai";
228
- kind: `${string}/simple.sys-prompt`;
229
- schemaVersion: 1;
230
- metadata?: Record<string, unknown> | undefined;
231
- }>;
232
- }>;
233
- templateVariables: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
234
- }, z.core.$strip>;
235
- schemaSets: [{
236
- readonly testCase: z.ZodObject<Omit<{
237
- id: z.ZodString;
238
- namespace: z.ZodString;
239
- schemaVersion: z.ZodNumber;
240
- kind: z.ZodString;
241
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
242
- }, "kind" | "namespace" | "schemaVersion"> & {
243
- question: z.ZodString;
244
- options: z.ZodRecord<z.ZodString, z.ZodString>;
245
- correctAnswerKeys: z.ZodArray<z.ZodString>;
246
- } & {
247
- namespace: z.ZodLiteral<"peerbench.ai">;
248
- kind: z.ZodLiteral<"llm/mcq.tc">;
249
- schemaVersion: z.ZodLiteral<1>;
250
- }, z.core.$strip> & {
251
- new: (input: Omit<{
252
- id: string;
253
- question: string;
254
- options: Record<string, string>;
255
- correctAnswerKeys: string[];
256
- namespace: "peerbench.ai";
257
- kind: "llm/mcq.tc";
258
- schemaVersion: 1;
259
- metadata?: Record<string, unknown> | undefined;
260
- }, "kind" | "namespace" | "schemaVersion">) => {
261
- id: string;
262
- question: string;
263
- options: Record<string, string>;
264
- correctAnswerKeys: string[];
265
- namespace: "peerbench.ai";
266
- kind: "llm/mcq.tc";
267
- schemaVersion: 1;
268
- metadata?: Record<string, unknown> | undefined;
269
- };
270
- newWithId(input: Omit<{
271
- id: string;
272
- question: string;
273
- options: Record<string, string>;
274
- correctAnswerKeys: string[];
275
- namespace: "peerbench.ai";
276
- kind: "llm/mcq.tc";
277
- schemaVersion: 1;
278
- metadata?: Record<string, unknown> | undefined;
279
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
280
- id: string;
281
- question: string;
282
- options: Record<string, string>;
283
- correctAnswerKeys: string[];
284
- namespace: "peerbench.ai";
285
- kind: "llm/mcq.tc";
286
- schemaVersion: 1;
287
- metadata?: Record<string, unknown> | undefined;
288
- }>;
289
- };
290
- readonly response: z.ZodObject<Omit<{
291
- id: z.ZodString;
292
- namespace: z.ZodString;
293
- schemaVersion: z.ZodNumber;
294
- kind: z.ZodString;
295
- startedAt: z.ZodNumber;
296
- completedAt: z.ZodNumber;
297
- testCaseId: z.ZodString;
298
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
299
- }, "kind" | "namespace" | "schemaVersion"> & {
300
- data: z.ZodString;
301
- modelSlug: z.ZodString;
302
- provider: z.ZodString;
303
- systemPromptId: z.ZodOptional<z.ZodString>;
304
- inputTokensUsed: z.ZodOptional<z.ZodNumber>;
305
- outputTokensUsed: z.ZodOptional<z.ZodNumber>;
306
- inputCost: z.ZodOptional<z.ZodString>;
307
- outputCost: z.ZodOptional<z.ZodString>;
308
- } & {
309
- namespace: z.ZodLiteral<"peerbench.ai">;
310
- kind: z.ZodLiteral<"llm/mcq.rs">;
311
- schemaVersion: z.ZodLiteral<1>;
312
- }, z.core.$strip> & {
313
- new: (input: Omit<{
314
- startedAt: number;
315
- completedAt: number;
316
- id: string;
317
- testCaseId: string;
318
- data: string;
319
- modelSlug: string;
320
- provider: string;
321
- namespace: "peerbench.ai";
322
- kind: "llm/mcq.rs";
323
- schemaVersion: 1;
324
- metadata?: Record<string, unknown> | undefined;
325
- systemPromptId?: string | undefined;
326
- inputTokensUsed?: number | undefined;
327
- outputTokensUsed?: number | undefined;
328
- inputCost?: string | undefined;
329
- outputCost?: string | undefined;
330
- }, "kind" | "namespace" | "schemaVersion">) => {
331
- startedAt: number;
332
- completedAt: number;
333
- id: string;
334
- testCaseId: string;
335
- data: string;
336
- modelSlug: string;
337
- provider: string;
338
- namespace: "peerbench.ai";
339
- kind: "llm/mcq.rs";
340
- schemaVersion: 1;
341
- metadata?: Record<string, unknown> | undefined;
342
- systemPromptId?: string | undefined;
343
- inputTokensUsed?: number | undefined;
344
- outputTokensUsed?: number | undefined;
345
- inputCost?: string | undefined;
346
- outputCost?: string | undefined;
347
- };
348
- newWithId(input: Omit<{
349
- startedAt: number;
350
- completedAt: number;
351
- id: string;
352
- testCaseId: string;
353
- data: string;
354
- modelSlug: string;
355
- provider: string;
356
- namespace: "peerbench.ai";
357
- kind: "llm/mcq.rs";
358
- schemaVersion: 1;
359
- metadata?: Record<string, unknown> | undefined;
360
- systemPromptId?: string | undefined;
361
- inputTokensUsed?: number | undefined;
362
- outputTokensUsed?: number | undefined;
363
- inputCost?: string | undefined;
364
- outputCost?: string | undefined;
365
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
366
- startedAt: number;
367
- completedAt: number;
368
- id: string;
369
- testCaseId: string;
370
- data: string;
371
- modelSlug: string;
372
- provider: string;
373
- namespace: "peerbench.ai";
374
- kind: "llm/mcq.rs";
375
- schemaVersion: 1;
376
- metadata?: Record<string, unknown> | undefined;
377
- systemPromptId?: string | undefined;
378
- inputTokensUsed?: number | undefined;
379
- outputTokensUsed?: number | undefined;
380
- inputCost?: string | undefined;
381
- outputCost?: string | undefined;
382
- }>;
383
- };
384
- readonly score: z.ZodObject<Omit<{
385
- id: z.ZodString;
386
- namespace: z.ZodString;
387
- kind: z.ZodString;
388
- schemaVersion: z.ZodNumber;
389
- value: z.ZodNumber;
390
- responseId: z.ZodString;
391
- explanation: z.ZodOptional<z.ZodString>;
392
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
393
- scoringMethod: z.ZodEnum<{
394
- readonly ai: "ai";
395
- readonly human: "human";
396
- readonly algo: "algo";
397
- }>;
398
- }, "kind" | "namespace" | "schemaVersion"> & {
399
- extractedAnswers: z.ZodArray<z.ZodString>;
400
- scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
401
- scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
402
- scorerAIProvider: z.ZodOptional<z.ZodString>;
403
- scorerAIModelSlug: z.ZodOptional<z.ZodString>;
404
- scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
405
- scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
406
- scorerAIInputCost: z.ZodOptional<z.ZodString>;
407
- scorerAIOutputCost: z.ZodOptional<z.ZodString>;
408
- } & {
409
- namespace: z.ZodLiteral<"peerbench.ai">;
410
- kind: z.ZodLiteral<"llm/mcq.sc">;
411
- schemaVersion: z.ZodLiteral<1>;
412
- }, z.core.$strip> & {
413
- new: (input: Omit<{
414
- id: string;
415
- value: number;
416
- responseId: string;
417
- scoringMethod: "ai" | "human" | "algo";
418
- extractedAnswers: string[];
419
- namespace: "peerbench.ai";
420
- kind: "llm/mcq.sc";
421
- schemaVersion: 1;
422
- metadata?: Record<string, unknown> | undefined;
423
- explanation?: string | undefined;
424
- scorerAISystemPrompt?: string | undefined;
425
- scorerAISystemPromptId?: string | undefined;
426
- scorerAIProvider?: string | undefined;
427
- scorerAIModelSlug?: string | undefined;
428
- scorerAIInputTokensUsed?: number | undefined;
429
- scorerAIOutputTokensUsed?: number | undefined;
430
- scorerAIInputCost?: string | undefined;
431
- scorerAIOutputCost?: string | undefined;
432
- }, "kind" | "namespace" | "schemaVersion">) => {
433
- id: string;
434
- value: number;
435
- responseId: string;
436
- scoringMethod: "ai" | "human" | "algo";
437
- extractedAnswers: string[];
438
- namespace: "peerbench.ai";
439
- kind: "llm/mcq.sc";
440
- schemaVersion: 1;
441
- metadata?: Record<string, unknown> | undefined;
442
- explanation?: string | undefined;
443
- scorerAISystemPrompt?: string | undefined;
444
- scorerAISystemPromptId?: string | undefined;
445
- scorerAIProvider?: string | undefined;
446
- scorerAIModelSlug?: string | undefined;
447
- scorerAIInputTokensUsed?: number | undefined;
448
- scorerAIOutputTokensUsed?: number | undefined;
449
- scorerAIInputCost?: string | undefined;
450
- scorerAIOutputCost?: string | undefined;
451
- };
452
- newWithId(input: Omit<{
453
- id: string;
454
- value: number;
455
- responseId: string;
456
- scoringMethod: "ai" | "human" | "algo";
457
- extractedAnswers: string[];
458
- namespace: "peerbench.ai";
459
- kind: "llm/mcq.sc";
460
- schemaVersion: 1;
461
- metadata?: Record<string, unknown> | undefined;
462
- explanation?: string | undefined;
463
- scorerAISystemPrompt?: string | undefined;
464
- scorerAISystemPromptId?: string | undefined;
465
- scorerAIProvider?: string | undefined;
466
- scorerAIModelSlug?: string | undefined;
467
- scorerAIInputTokensUsed?: number | undefined;
468
- scorerAIOutputTokensUsed?: number | undefined;
469
- scorerAIInputCost?: string | undefined;
470
- scorerAIOutputCost?: string | undefined;
471
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
472
- id: string;
473
- value: number;
474
- responseId: string;
475
- scoringMethod: "ai" | "human" | "algo";
476
- extractedAnswers: string[];
477
- namespace: "peerbench.ai";
478
- kind: "llm/mcq.sc";
479
- schemaVersion: 1;
480
- metadata?: Record<string, unknown> | undefined;
481
- explanation?: string | undefined;
482
- scorerAISystemPrompt?: string | undefined;
483
- scorerAISystemPromptId?: string | undefined;
484
- scorerAIProvider?: string | undefined;
485
- scorerAIModelSlug?: string | undefined;
486
- scorerAIInputTokensUsed?: number | undefined;
487
- scorerAIOutputTokensUsed?: number | undefined;
488
- scorerAIInputCost?: string | undefined;
489
- scorerAIOutputCost?: string | undefined;
490
- }>;
491
- };
492
- }, {
493
- readonly testCase: z.ZodObject<Omit<{
494
- id: z.ZodString;
495
- namespace: z.ZodString;
496
- schemaVersion: z.ZodNumber;
497
- kind: z.ZodString;
498
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
499
- }, "kind" | "namespace" | "schemaVersion"> & {
500
- question: z.ZodString;
501
- goodAnswers: z.ZodArray<z.ZodString>;
502
- badAnswers: z.ZodArray<z.ZodString>;
503
- } & {
504
- namespace: z.ZodLiteral<"peerbench.ai">;
505
- kind: z.ZodLiteral<"llm/qa.tc">;
506
- schemaVersion: z.ZodLiteral<1>;
507
- }, z.core.$strip> & {
508
- new: (input: Omit<{
509
- id: string;
510
- question: string;
511
- goodAnswers: string[];
512
- badAnswers: string[];
513
- namespace: "peerbench.ai";
514
- kind: "llm/qa.tc";
515
- schemaVersion: 1;
516
- metadata?: Record<string, unknown> | undefined;
517
- }, "kind" | "namespace" | "schemaVersion">) => {
518
- id: string;
519
- question: string;
520
- goodAnswers: string[];
521
- badAnswers: string[];
522
- namespace: "peerbench.ai";
523
- kind: "llm/qa.tc";
524
- schemaVersion: 1;
525
- metadata?: Record<string, unknown> | undefined;
526
- };
527
- newWithId(input: Omit<{
528
- id: string;
529
- question: string;
530
- goodAnswers: string[];
531
- badAnswers: string[];
532
- namespace: "peerbench.ai";
533
- kind: "llm/qa.tc";
534
- schemaVersion: 1;
535
- metadata?: Record<string, unknown> | undefined;
536
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
537
- id: string;
538
- question: string;
539
- goodAnswers: string[];
540
- badAnswers: string[];
541
- namespace: "peerbench.ai";
542
- kind: "llm/qa.tc";
543
- schemaVersion: 1;
544
- metadata?: Record<string, unknown> | undefined;
545
- }>;
546
- };
547
- readonly response: z.ZodObject<Omit<{
548
- id: z.ZodString;
549
- namespace: z.ZodString;
550
- schemaVersion: z.ZodNumber;
551
- kind: z.ZodString;
552
- startedAt: z.ZodNumber;
553
- completedAt: z.ZodNumber;
554
- testCaseId: z.ZodString;
555
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
556
- }, "kind" | "namespace" | "schemaVersion"> & {
557
- data: z.ZodString;
558
- modelSlug: z.ZodString;
559
- provider: z.ZodString;
560
- systemPromptId: z.ZodOptional<z.ZodString>;
561
- inputTokensUsed: z.ZodOptional<z.ZodNumber>;
562
- outputTokensUsed: z.ZodOptional<z.ZodNumber>;
563
- inputCost: z.ZodOptional<z.ZodString>;
564
- outputCost: z.ZodOptional<z.ZodString>;
565
- } & {
566
- namespace: z.ZodLiteral<"peerbench.ai">;
567
- kind: z.ZodLiteral<"llm/qa.rs">;
568
- schemaVersion: z.ZodLiteral<1>;
569
- }, z.core.$strip> & {
570
- new: (input: Omit<{
571
- startedAt: number;
572
- completedAt: number;
573
- id: string;
574
- testCaseId: string;
575
- data: string;
576
- modelSlug: string;
577
- provider: string;
578
- namespace: "peerbench.ai";
579
- kind: "llm/qa.rs";
580
- schemaVersion: 1;
581
- metadata?: Record<string, unknown> | undefined;
582
- systemPromptId?: string | undefined;
583
- inputTokensUsed?: number | undefined;
584
- outputTokensUsed?: number | undefined;
585
- inputCost?: string | undefined;
586
- outputCost?: string | undefined;
587
- }, "kind" | "namespace" | "schemaVersion">) => {
588
- startedAt: number;
589
- completedAt: number;
590
- id: string;
591
- testCaseId: string;
592
- data: string;
593
- modelSlug: string;
594
- provider: string;
595
- namespace: "peerbench.ai";
596
- kind: "llm/qa.rs";
597
- schemaVersion: 1;
598
- metadata?: Record<string, unknown> | undefined;
599
- systemPromptId?: string | undefined;
600
- inputTokensUsed?: number | undefined;
601
- outputTokensUsed?: number | undefined;
602
- inputCost?: string | undefined;
603
- outputCost?: string | undefined;
604
- };
605
- newWithId(input: Omit<{
606
- startedAt: number;
607
- completedAt: number;
608
- id: string;
609
- testCaseId: string;
610
- data: string;
611
- modelSlug: string;
612
- provider: string;
613
- namespace: "peerbench.ai";
614
- kind: "llm/qa.rs";
615
- schemaVersion: 1;
616
- metadata?: Record<string, unknown> | undefined;
617
- systemPromptId?: string | undefined;
618
- inputTokensUsed?: number | undefined;
619
- outputTokensUsed?: number | undefined;
620
- inputCost?: string | undefined;
621
- outputCost?: string | undefined;
622
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
623
- startedAt: number;
624
- completedAt: number;
625
- id: string;
626
- testCaseId: string;
627
- data: string;
628
- modelSlug: string;
629
- provider: string;
630
- namespace: "peerbench.ai";
631
- kind: "llm/qa.rs";
632
- schemaVersion: 1;
633
- metadata?: Record<string, unknown> | undefined;
634
- systemPromptId?: string | undefined;
635
- inputTokensUsed?: number | undefined;
636
- outputTokensUsed?: number | undefined;
637
- inputCost?: string | undefined;
638
- outputCost?: string | undefined;
639
- }>;
640
- };
641
- readonly score: z.ZodObject<Omit<{
642
- id: z.ZodString;
643
- namespace: z.ZodString;
644
- kind: z.ZodString;
645
- schemaVersion: z.ZodNumber;
646
- value: z.ZodNumber;
647
- responseId: z.ZodString;
648
- explanation: z.ZodOptional<z.ZodString>;
649
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
650
- scoringMethod: z.ZodEnum<{
651
- readonly ai: "ai";
652
- readonly human: "human";
653
- readonly algo: "algo";
654
- }>;
655
- }, "kind" | "namespace" | "schemaVersion"> & {
656
- scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
657
- scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
658
- scorerAIProvider: z.ZodOptional<z.ZodString>;
659
- scorerAIModelSlug: z.ZodOptional<z.ZodString>;
660
- scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
661
- scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
662
- scorerAIInputCost: z.ZodOptional<z.ZodString>;
663
- scorerAIOutputCost: z.ZodOptional<z.ZodString>;
664
- } & {
665
- namespace: z.ZodLiteral<"peerbench.ai">;
666
- kind: z.ZodLiteral<"llm/qa.sc">;
667
- schemaVersion: z.ZodLiteral<1>;
668
- }, z.core.$strip> & {
669
- new: (input: Omit<{
670
- id: string;
671
- value: number;
672
- responseId: string;
673
- scoringMethod: "ai" | "human" | "algo";
674
- namespace: "peerbench.ai";
675
- kind: "llm/qa.sc";
676
- schemaVersion: 1;
677
- metadata?: Record<string, unknown> | undefined;
678
- explanation?: string | undefined;
679
- scorerAISystemPrompt?: string | undefined;
680
- scorerAISystemPromptId?: string | undefined;
681
- scorerAIProvider?: string | undefined;
682
- scorerAIModelSlug?: string | undefined;
683
- scorerAIInputTokensUsed?: number | undefined;
684
- scorerAIOutputTokensUsed?: number | undefined;
685
- scorerAIInputCost?: string | undefined;
686
- scorerAIOutputCost?: string | undefined;
687
- }, "kind" | "namespace" | "schemaVersion">) => {
688
- id: string;
689
- value: number;
690
- responseId: string;
691
- scoringMethod: "ai" | "human" | "algo";
692
- namespace: "peerbench.ai";
693
- kind: "llm/qa.sc";
694
- schemaVersion: 1;
695
- metadata?: Record<string, unknown> | undefined;
696
- explanation?: string | undefined;
697
- scorerAISystemPrompt?: string | undefined;
698
- scorerAISystemPromptId?: string | undefined;
699
- scorerAIProvider?: string | undefined;
700
- scorerAIModelSlug?: string | undefined;
701
- scorerAIInputTokensUsed?: number | undefined;
702
- scorerAIOutputTokensUsed?: number | undefined;
703
- scorerAIInputCost?: string | undefined;
704
- scorerAIOutputCost?: string | undefined;
705
- };
706
- newWithId(input: Omit<{
707
- id: string;
708
- value: number;
709
- responseId: string;
710
- scoringMethod: "ai" | "human" | "algo";
711
- namespace: "peerbench.ai";
712
- kind: "llm/qa.sc";
713
- schemaVersion: 1;
714
- metadata?: Record<string, unknown> | undefined;
715
- explanation?: string | undefined;
716
- scorerAISystemPrompt?: string | undefined;
717
- scorerAISystemPromptId?: string | undefined;
718
- scorerAIProvider?: string | undefined;
719
- scorerAIModelSlug?: string | undefined;
720
- scorerAIInputTokensUsed?: number | undefined;
721
- scorerAIOutputTokensUsed?: number | undefined;
722
- scorerAIInputCost?: string | undefined;
723
- scorerAIOutputCost?: string | undefined;
724
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
725
- id: string;
726
- value: number;
727
- responseId: string;
728
- scoringMethod: "ai" | "human" | "algo";
729
- namespace: "peerbench.ai";
730
- kind: "llm/qa.sc";
731
- schemaVersion: 1;
732
- metadata?: Record<string, unknown> | undefined;
733
- explanation?: string | undefined;
734
- scorerAISystemPrompt?: string | undefined;
735
- scorerAISystemPromptId?: string | undefined;
736
- scorerAIProvider?: string | undefined;
737
- scorerAIModelSlug?: string | undefined;
738
- scorerAIInputTokensUsed?: number | undefined;
739
- scorerAIOutputTokensUsed?: number | undefined;
740
- scorerAIInputCost?: string | undefined;
741
- scorerAIOutputCost?: string | undefined;
742
- }>;
743
- };
744
- }];
745
- providers: [typeof AbstractLLMProvider];
746
- scorers: [typeof LLMAsAJudgeScorer, typeof MCQScorer];
747
- parseRunConfig?: boolean;
748
- defaults?: {
749
- scorer?: MCQScorer | LLMAsAJudgeScorer | undefined;
750
- responseIdGenerator?: IdGenerator;
751
- scoreIdGenerator?: IdGenerator;
752
- } | undefined;
753
- };
754
- };