peerbench 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +71 -58
  2. package/dist/benchmarks/examples/echo-basic/runner.d.ts +11 -254
  3. package/dist/benchmarks/examples/echo-basic/schema-sets/echo.v1.d.ts +25 -25
  4. package/dist/benchmarks/examples/exact-match-scorer/runner.d.ts +38 -386
  5. package/dist/benchmarks/examples/exact-match-scorer/schema-sets/exact-match.v1.d.ts +25 -25
  6. package/dist/benchmarks/examples/text-transform/runner.d.ts +32 -480
  7. package/dist/benchmarks/examples/text-transform/schema-sets/echo.v1.d.ts +25 -25
  8. package/dist/benchmarks/examples/text-transform/schema-sets/reverse.v1.d.ts +25 -25
  9. package/dist/benchmarks/index.js +180 -248
  10. package/dist/benchmarks/index.js.map +1 -1
  11. package/dist/benchmarks/peerbench/index.d.ts +2 -1
  12. package/dist/benchmarks/peerbench/mcq-runner.d.ts +78 -0
  13. package/dist/benchmarks/peerbench/qa-runner.d.ts +77 -0
  14. package/dist/benchmarks/peerbench/schema-sets/mcq.v1.d.ts +25 -25
  15. package/dist/benchmarks/peerbench/schema-sets/multi-turn.v1.d.ts +25 -25
  16. package/dist/benchmarks/peerbench/schema-sets/qa.v1.d.ts +25 -25
  17. package/dist/chunk-6WDCU5BP.js +9 -0
  18. package/dist/chunk-6WDCU5BP.js.map +1 -0
  19. package/dist/{chunk-YY33MNMV.js → chunk-7KMGLEYP.js} +2 -2
  20. package/dist/{chunk-TRNCF2BG.js → chunk-HBGC6BDW.js} +1 -1
  21. package/dist/chunk-HBGC6BDW.js.map +1 -0
  22. package/dist/{chunk-HMQYGCKI.js → chunk-ZJWSK4VO.js} +1 -1
  23. package/dist/chunk-ZJWSK4VO.js.map +1 -0
  24. package/dist/dev.d.ts +22 -0
  25. package/dist/helpers/define-runner.d.ts +2 -45
  26. package/dist/index.js +2 -2
  27. package/dist/providers/ai-sdk.d.ts +24 -0
  28. package/dist/providers/callables/callable.d.ts +4 -0
  29. package/dist/providers/callables/llm.d.ts +41 -0
  30. package/dist/providers/example/echo.d.ts +12 -11
  31. package/dist/providers/example/restapi.d.ts +11 -18
  32. package/dist/providers/index.d.ts +4 -2
  33. package/dist/providers/index.js +380 -9
  34. package/dist/providers/index.js.map +1 -1
  35. package/dist/providers/mastra.d.ts +16 -21
  36. package/dist/providers/openai.d.ts +25 -10
  37. package/dist/providers/openrouter.d.ts +6 -8
  38. package/dist/schemas/extensions/index.js +1 -1
  39. package/dist/schemas/extensions/response/llm.d.ts +17 -0
  40. package/dist/schemas/index.js +2 -2
  41. package/dist/schemas/llm/index.js +36 -7
  42. package/dist/schemas/llm/index.js.map +1 -1
  43. package/dist/schemas/llm/simple-system-prompt.d.ts +3 -3
  44. package/dist/schemas/llm/system-prompt.d.ts +7 -7
  45. package/dist/schemas/response.d.ts +7 -7
  46. package/dist/schemas/schema-definer.d.ts +5 -5
  47. package/dist/schemas/score.d.ts +7 -7
  48. package/dist/schemas/test-case.d.ts +7 -7
  49. package/dist/scorers/abstract.d.ts +1 -1
  50. package/dist/scorers/index.js +377 -7
  51. package/dist/scorers/index.js.map +1 -1
  52. package/dist/scorers/llm-judge.d.ts +6 -6
  53. package/dist/types/index.d.ts +0 -5
  54. package/dist/types/runner.d.ts +13 -17
  55. package/package.json +8 -7
  56. package/dist/benchmarks/peerbench/runner.d.ts +0 -754
  57. package/dist/chunk-3JHDJEY3.js +0 -374
  58. package/dist/chunk-3JHDJEY3.js.map +0 -1
  59. package/dist/chunk-HMQYGCKI.js.map +0 -1
  60. package/dist/chunk-Q6GSOHOP.js +0 -44
  61. package/dist/chunk-Q6GSOHOP.js.map +0 -1
  62. package/dist/chunk-RTEAK4II.js +0 -37
  63. package/dist/chunk-RTEAK4II.js.map +0 -1
  64. package/dist/chunk-SMLNDQFX.js +0 -244
  65. package/dist/chunk-SMLNDQFX.js.map +0 -1
  66. package/dist/chunk-TRNCF2BG.js.map +0 -1
  67. package/dist/providers/abstract/llm.d.ts +0 -20
  68. /package/dist/{chunk-YY33MNMV.js.map → chunk-7KMGLEYP.js.map} +0 -0
  69. /package/dist/providers/{abstract/provider.d.ts → abstract.d.ts} +0 -0
@@ -42,7 +42,7 @@ export declare const EchoBasicTestCaseSchemaV1: z.ZodObject<Omit<{
42
42
  schemaVersion: z.ZodNumber;
43
43
  kind: z.ZodString;
44
44
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
45
- }, "kind" | "namespace" | "schemaVersion"> & {
45
+ }, "kind" | "schemaVersion" | "namespace"> & {
46
46
  input: z.ZodString;
47
47
  } & {
48
48
  namespace: z.ZodLiteral<"example.peerbench.ai">;
@@ -56,7 +56,7 @@ export declare const EchoBasicTestCaseSchemaV1: z.ZodObject<Omit<{
56
56
  kind: "llm/echo-basic.tc";
57
57
  schemaVersion: 1;
58
58
  metadata?: Record<string, unknown> | undefined;
59
- }, "kind" | "namespace" | "schemaVersion">) => {
59
+ }, "kind" | "schemaVersion" | "namespace">) => {
60
60
  id: string;
61
61
  input: string;
62
62
  namespace: "example.peerbench.ai";
@@ -71,7 +71,7 @@ export declare const EchoBasicTestCaseSchemaV1: z.ZodObject<Omit<{
71
71
  kind: "llm/echo-basic.tc";
72
72
  schemaVersion: 1;
73
73
  metadata?: Record<string, unknown> | undefined;
74
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../../index.js").IdGenerator): Promise<{
74
+ }, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../../index.js").IdGenerator): Promise<{
75
75
  id: string;
76
76
  input: string;
77
77
  namespace: "example.peerbench.ai";
@@ -90,7 +90,7 @@ export declare const EchoBasicResponseSchemaV1: z.ZodObject<Omit<{
90
90
  completedAt: z.ZodNumber;
91
91
  testCaseId: z.ZodString;
92
92
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
93
- }, "kind" | "namespace" | "schemaVersion"> & {
93
+ }, "kind" | "schemaVersion" | "namespace"> & {
94
94
  data: z.ZodString;
95
95
  modelSlug: z.ZodString;
96
96
  provider: z.ZodString;
@@ -105,10 +105,10 @@ export declare const EchoBasicResponseSchemaV1: z.ZodObject<Omit<{
105
105
  schemaVersion: z.ZodLiteral<1>;
106
106
  }, z.core.$strip> & {
107
107
  new: (input: Omit<{
108
- startedAt: number;
109
- completedAt: number;
110
108
  id: string;
111
109
  testCaseId: string;
110
+ startedAt: number;
111
+ completedAt: number;
112
112
  data: string;
113
113
  modelSlug: string;
114
114
  provider: string;
@@ -121,11 +121,11 @@ export declare const EchoBasicResponseSchemaV1: z.ZodObject<Omit<{
121
121
  outputTokensUsed?: number | undefined;
122
122
  inputCost?: string | undefined;
123
123
  outputCost?: string | undefined;
124
- }, "kind" | "namespace" | "schemaVersion">) => {
125
- startedAt: number;
126
- completedAt: number;
124
+ }, "kind" | "schemaVersion" | "namespace">) => {
127
125
  id: string;
128
126
  testCaseId: string;
127
+ startedAt: number;
128
+ completedAt: number;
129
129
  data: string;
130
130
  modelSlug: string;
131
131
  provider: string;
@@ -140,10 +140,10 @@ export declare const EchoBasicResponseSchemaV1: z.ZodObject<Omit<{
140
140
  outputCost?: string | undefined;
141
141
  };
142
142
  newWithId(input: Omit<{
143
- startedAt: number;
144
- completedAt: number;
145
143
  id: string;
146
144
  testCaseId: string;
145
+ startedAt: number;
146
+ completedAt: number;
147
147
  data: string;
148
148
  modelSlug: string;
149
149
  provider: string;
@@ -156,11 +156,11 @@ export declare const EchoBasicResponseSchemaV1: z.ZodObject<Omit<{
156
156
  outputTokensUsed?: number | undefined;
157
157
  inputCost?: string | undefined;
158
158
  outputCost?: string | undefined;
159
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../../index.js").IdGenerator): Promise<{
160
- startedAt: number;
161
- completedAt: number;
159
+ }, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../../index.js").IdGenerator): Promise<{
162
160
  id: string;
163
161
  testCaseId: string;
162
+ startedAt: number;
163
+ completedAt: number;
164
164
  data: string;
165
165
  modelSlug: string;
166
166
  provider: string;
@@ -190,52 +190,52 @@ export declare const EchoBasicScoreSchemaV1: z.ZodObject<Omit<{
190
190
  readonly human: "human";
191
191
  readonly algo: "algo";
192
192
  }>;
193
- }, "kind" | "namespace" | "schemaVersion"> & {
193
+ }, "kind" | "schemaVersion" | "namespace"> & {
194
194
  namespace: z.ZodLiteral<"example.peerbench.ai">;
195
195
  kind: z.ZodLiteral<"llm/echo-basic.sc">;
196
196
  schemaVersion: z.ZodLiteral<1>;
197
197
  }, z.core.$strip> & {
198
198
  new: (input: Omit<{
199
199
  id: string;
200
- value: number;
201
200
  responseId: string;
201
+ value: number;
202
202
  scoringMethod: "ai" | "human" | "algo";
203
203
  namespace: "example.peerbench.ai";
204
204
  kind: "llm/echo-basic.sc";
205
205
  schemaVersion: 1;
206
- metadata?: Record<string, unknown> | undefined;
207
206
  explanation?: string | undefined;
208
- }, "kind" | "namespace" | "schemaVersion">) => {
207
+ metadata?: Record<string, unknown> | undefined;
208
+ }, "kind" | "schemaVersion" | "namespace">) => {
209
209
  id: string;
210
- value: number;
211
210
  responseId: string;
211
+ value: number;
212
212
  scoringMethod: "ai" | "human" | "algo";
213
213
  namespace: "example.peerbench.ai";
214
214
  kind: "llm/echo-basic.sc";
215
215
  schemaVersion: 1;
216
- metadata?: Record<string, unknown> | undefined;
217
216
  explanation?: string | undefined;
217
+ metadata?: Record<string, unknown> | undefined;
218
218
  };
219
219
  newWithId(input: Omit<{
220
220
  id: string;
221
- value: number;
222
221
  responseId: string;
222
+ value: number;
223
223
  scoringMethod: "ai" | "human" | "algo";
224
224
  namespace: "example.peerbench.ai";
225
225
  kind: "llm/echo-basic.sc";
226
226
  schemaVersion: 1;
227
- metadata?: Record<string, unknown> | undefined;
228
227
  explanation?: string | undefined;
229
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../../index.js").IdGenerator): Promise<{
228
+ metadata?: Record<string, unknown> | undefined;
229
+ }, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../../index.js").IdGenerator): Promise<{
230
230
  id: string;
231
- value: number;
232
231
  responseId: string;
232
+ value: number;
233
233
  scoringMethod: "ai" | "human" | "algo";
234
234
  namespace: "example.peerbench.ai";
235
235
  kind: "llm/echo-basic.sc";
236
236
  schemaVersion: 1;
237
- metadata?: Record<string, unknown> | undefined;
238
237
  explanation?: string | undefined;
238
+ metadata?: Record<string, unknown> | undefined;
239
239
  }>;
240
240
  };
241
241
  export type EchoBasicScoreV1 = z.infer<typeof EchoBasicScoreSchemaV1>;
@@ -1,45 +1,25 @@
1
- import { AbstractLLMProvider } from "../../../providers/index.js";
1
+ import { CallableLLM } from "../../../providers/index.js";
2
+ import { SimpleSystemPromptV1 } from "../../../schemas/llm/index.js";
2
3
  import { LLMAsAJudgeScorer } from "../../../scorers/index.js";
3
- import z from "zod";
4
+ import { IdGenerator } from "../../../types";
4
5
  import { ExactMatchScorer } from "./scorer";
5
- export declare const exactMatchScorerRunner: ((params: {
6
- testCase: {
7
- id: string;
8
- instruction: string;
9
- input: string;
10
- expectedOutput: string;
11
- namespace: "example.peerbench.ai";
12
- kind: "llm/exact-match.tc";
13
- schemaVersion: 1;
14
- metadata?: Record<string, unknown> | undefined;
15
- normalize?: boolean | undefined;
16
- };
17
- provider: AbstractLLMProvider;
18
- scorer?: LLMAsAJudgeScorer | ExactMatchScorer | undefined;
19
- runConfig: {
20
- model: string;
21
- temperature?: number | undefined;
22
- systemPrompt?: {
23
- id: string;
24
- version: number;
25
- content: string;
26
- namespace: "peerbench.ai";
27
- kind: `${string}/simple.sys-prompt`;
28
- schemaVersion: 1;
29
- metadata?: Record<string, unknown> | undefined;
30
- } | undefined;
31
- llmJudgeModel?: string | undefined;
32
- };
6
+ import { ExactMatchTestCaseV1 } from "./schema-sets/exact-match.v1";
7
+ export declare const exactMatchScorerRunner: (params: {
8
+ testCase: ExactMatchTestCaseV1;
9
+ target: CallableLLM;
10
+ scorer?: ExactMatchScorer | LLMAsAJudgeScorer;
11
+ temperature?: number;
12
+ systemPrompt?: SimpleSystemPromptV1;
33
13
  idGenerators?: {
34
- response?: import("../../../types").IdGenerator;
35
- score?: import("../../../types").IdGenerator;
14
+ response?: IdGenerator;
15
+ score?: IdGenerator;
36
16
  };
37
17
  }) => Promise<{
38
18
  response: {
39
- startedAt: number;
40
- completedAt: number;
41
19
  id: string;
42
20
  testCaseId: string;
21
+ startedAt: number;
22
+ completedAt: number;
43
23
  data: string;
44
24
  modelSlug: string;
45
25
  provider: string;
@@ -53,17 +33,17 @@ export declare const exactMatchScorerRunner: ((params: {
53
33
  inputCost?: string | undefined;
54
34
  outputCost?: string | undefined;
55
35
  };
56
- score?: {
36
+ score: {
57
37
  id: string;
58
- value: number;
59
38
  responseId: string;
39
+ value: number;
60
40
  scoringMethod: "ai" | "human" | "algo";
61
41
  match: boolean;
62
42
  namespace: "example.peerbench.ai";
63
43
  kind: "llm/exact-match.sc";
64
44
  schemaVersion: 1;
65
- metadata?: Record<string, unknown> | undefined;
66
45
  explanation?: string | undefined;
46
+ metadata?: Record<string, unknown> | undefined;
67
47
  normalized?: {
68
48
  expected: string;
69
49
  actual: string;
@@ -76,353 +56,25 @@ export declare const exactMatchScorerRunner: ((params: {
76
56
  scorerAIOutputTokensUsed?: number | undefined;
77
57
  scorerAIInputCost?: string | undefined;
78
58
  scorerAIOutputCost?: string | undefined;
79
- } | undefined;
80
- }>) & {
81
- config: {
82
- runConfigSchema: z.ZodObject<{
83
- model: z.ZodString;
84
- temperature: z.ZodOptional<z.ZodNumber>;
85
- systemPrompt: z.ZodOptional<z.ZodObject<Omit<{
86
- id: z.ZodString;
87
- namespace: z.ZodString;
88
- kind: z.ZodString;
89
- schemaVersion: z.ZodNumber;
90
- version: z.ZodNumber;
91
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
92
- }, "kind" | "namespace" | "schemaVersion"> & {
93
- content: z.ZodString;
94
- } & {
95
- namespace: z.ZodLiteral<"peerbench.ai">;
96
- kind: z.ZodLiteral<`${string}/simple.sys-prompt`>;
97
- schemaVersion: z.ZodLiteral<1>;
98
- }, z.core.$strip> & {
99
- new: (input: Omit<{
100
- id: string;
101
- version: number;
102
- content: string;
103
- namespace: "peerbench.ai";
104
- kind: `${string}/simple.sys-prompt`;
105
- schemaVersion: 1;
106
- metadata?: Record<string, unknown> | undefined;
107
- }, "kind" | "namespace" | "schemaVersion">) => {
108
- id: string;
109
- version: number;
110
- content: string;
111
- namespace: "peerbench.ai";
112
- kind: `${string}/simple.sys-prompt`;
113
- schemaVersion: 1;
114
- metadata?: Record<string, unknown> | undefined;
115
- };
116
- newWithId(input: Omit<{
117
- id: string;
118
- version: number;
119
- content: string;
120
- namespace: "peerbench.ai";
121
- kind: `${string}/simple.sys-prompt`;
122
- schemaVersion: 1;
123
- metadata?: Record<string, unknown> | undefined;
124
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../types").IdGenerator): Promise<{
125
- id: string;
126
- version: number;
127
- content: string;
128
- namespace: "peerbench.ai";
129
- kind: `${string}/simple.sys-prompt`;
130
- schemaVersion: 1;
131
- metadata?: Record<string, unknown> | undefined;
132
- }>;
133
- }>;
134
- llmJudgeModel: z.ZodOptional<z.ZodString>;
135
- }, z.core.$strip>;
136
- schemaSets: [{
137
- readonly testCase: z.ZodObject<Omit<{
138
- id: z.ZodString;
139
- namespace: z.ZodString;
140
- schemaVersion: z.ZodNumber;
141
- kind: z.ZodString;
142
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
143
- }, "kind" | "namespace" | "schemaVersion"> & {
144
- instruction: z.ZodString;
145
- input: z.ZodString;
146
- expectedOutput: z.ZodString;
147
- normalize: z.ZodOptional<z.ZodBoolean>;
148
- } & {
149
- namespace: z.ZodLiteral<"example.peerbench.ai">;
150
- kind: z.ZodLiteral<"llm/exact-match.tc">;
151
- schemaVersion: z.ZodLiteral<1>;
152
- }, z.core.$strip> & {
153
- new: (input: Omit<{
154
- id: string;
155
- instruction: string;
156
- input: string;
157
- expectedOutput: string;
158
- namespace: "example.peerbench.ai";
159
- kind: "llm/exact-match.tc";
160
- schemaVersion: 1;
161
- metadata?: Record<string, unknown> | undefined;
162
- normalize?: boolean | undefined;
163
- }, "kind" | "namespace" | "schemaVersion">) => {
164
- id: string;
165
- instruction: string;
166
- input: string;
167
- expectedOutput: string;
168
- namespace: "example.peerbench.ai";
169
- kind: "llm/exact-match.tc";
170
- schemaVersion: 1;
171
- metadata?: Record<string, unknown> | undefined;
172
- normalize?: boolean | undefined;
173
- };
174
- newWithId(input: Omit<{
175
- id: string;
176
- instruction: string;
177
- input: string;
178
- expectedOutput: string;
179
- namespace: "example.peerbench.ai";
180
- kind: "llm/exact-match.tc";
181
- schemaVersion: 1;
182
- metadata?: Record<string, unknown> | undefined;
183
- normalize?: boolean | undefined;
184
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../types").IdGenerator): Promise<{
185
- id: string;
186
- instruction: string;
187
- input: string;
188
- expectedOutput: string;
189
- namespace: "example.peerbench.ai";
190
- kind: "llm/exact-match.tc";
191
- schemaVersion: 1;
192
- metadata?: Record<string, unknown> | undefined;
193
- normalize?: boolean | undefined;
194
- }>;
195
- };
196
- readonly response: z.ZodObject<Omit<{
197
- id: z.ZodString;
198
- namespace: z.ZodString;
199
- schemaVersion: z.ZodNumber;
200
- kind: z.ZodString;
201
- startedAt: z.ZodNumber;
202
- completedAt: z.ZodNumber;
203
- testCaseId: z.ZodString;
204
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
205
- }, "kind" | "namespace" | "schemaVersion"> & {
206
- data: z.ZodString;
207
- modelSlug: z.ZodString;
208
- provider: z.ZodString;
209
- systemPromptId: z.ZodOptional<z.ZodString>;
210
- inputTokensUsed: z.ZodOptional<z.ZodNumber>;
211
- outputTokensUsed: z.ZodOptional<z.ZodNumber>;
212
- inputCost: z.ZodOptional<z.ZodString>;
213
- outputCost: z.ZodOptional<z.ZodString>;
214
- } & {
215
- namespace: z.ZodLiteral<"example.peerbench.ai">;
216
- kind: z.ZodLiteral<"llm/exact-match.rs">;
217
- schemaVersion: z.ZodLiteral<1>;
218
- }, z.core.$strip> & {
219
- new: (input: Omit<{
220
- startedAt: number;
221
- completedAt: number;
222
- id: string;
223
- testCaseId: string;
224
- data: string;
225
- modelSlug: string;
226
- provider: string;
227
- namespace: "example.peerbench.ai";
228
- kind: "llm/exact-match.rs";
229
- schemaVersion: 1;
230
- metadata?: Record<string, unknown> | undefined;
231
- systemPromptId?: string | undefined;
232
- inputTokensUsed?: number | undefined;
233
- outputTokensUsed?: number | undefined;
234
- inputCost?: string | undefined;
235
- outputCost?: string | undefined;
236
- }, "kind" | "namespace" | "schemaVersion">) => {
237
- startedAt: number;
238
- completedAt: number;
239
- id: string;
240
- testCaseId: string;
241
- data: string;
242
- modelSlug: string;
243
- provider: string;
244
- namespace: "example.peerbench.ai";
245
- kind: "llm/exact-match.rs";
246
- schemaVersion: 1;
247
- metadata?: Record<string, unknown> | undefined;
248
- systemPromptId?: string | undefined;
249
- inputTokensUsed?: number | undefined;
250
- outputTokensUsed?: number | undefined;
251
- inputCost?: string | undefined;
252
- outputCost?: string | undefined;
253
- };
254
- newWithId(input: Omit<{
255
- startedAt: number;
256
- completedAt: number;
257
- id: string;
258
- testCaseId: string;
259
- data: string;
260
- modelSlug: string;
261
- provider: string;
262
- namespace: "example.peerbench.ai";
263
- kind: "llm/exact-match.rs";
264
- schemaVersion: 1;
265
- metadata?: Record<string, unknown> | undefined;
266
- systemPromptId?: string | undefined;
267
- inputTokensUsed?: number | undefined;
268
- outputTokensUsed?: number | undefined;
269
- inputCost?: string | undefined;
270
- outputCost?: string | undefined;
271
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../types").IdGenerator): Promise<{
272
- startedAt: number;
273
- completedAt: number;
274
- id: string;
275
- testCaseId: string;
276
- data: string;
277
- modelSlug: string;
278
- provider: string;
279
- namespace: "example.peerbench.ai";
280
- kind: "llm/exact-match.rs";
281
- schemaVersion: 1;
282
- metadata?: Record<string, unknown> | undefined;
283
- systemPromptId?: string | undefined;
284
- inputTokensUsed?: number | undefined;
285
- outputTokensUsed?: number | undefined;
286
- inputCost?: string | undefined;
287
- outputCost?: string | undefined;
288
- }>;
289
- };
290
- readonly score: z.ZodObject<Omit<{
291
- id: z.ZodString;
292
- namespace: z.ZodString;
293
- kind: z.ZodString;
294
- schemaVersion: z.ZodNumber;
295
- value: z.ZodNumber;
296
- responseId: z.ZodString;
297
- explanation: z.ZodOptional<z.ZodString>;
298
- metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
299
- scoringMethod: z.ZodEnum<{
300
- readonly ai: "ai";
301
- readonly human: "human";
302
- readonly algo: "algo";
303
- }>;
304
- }, "kind" | "namespace" | "schemaVersion"> & {
305
- match: z.ZodBoolean;
306
- normalized: z.ZodOptional<z.ZodObject<{
307
- expected: z.ZodString;
308
- actual: z.ZodString;
309
- }, z.core.$strip>>;
310
- scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
311
- scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
312
- scorerAIProvider: z.ZodOptional<z.ZodString>;
313
- scorerAIModelSlug: z.ZodOptional<z.ZodString>;
314
- scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
315
- scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
316
- scorerAIInputCost: z.ZodOptional<z.ZodString>;
317
- scorerAIOutputCost: z.ZodOptional<z.ZodString>;
318
- } & {
319
- namespace: z.ZodLiteral<"example.peerbench.ai">;
320
- kind: z.ZodLiteral<"llm/exact-match.sc">;
321
- schemaVersion: z.ZodLiteral<1>;
322
- }, z.core.$strip> & {
323
- new: (input: Omit<{
324
- id: string;
325
- value: number;
326
- responseId: string;
327
- scoringMethod: "ai" | "human" | "algo";
328
- match: boolean;
329
- namespace: "example.peerbench.ai";
330
- kind: "llm/exact-match.sc";
331
- schemaVersion: 1;
332
- metadata?: Record<string, unknown> | undefined;
333
- explanation?: string | undefined;
334
- normalized?: {
335
- expected: string;
336
- actual: string;
337
- } | undefined;
338
- scorerAISystemPrompt?: string | undefined;
339
- scorerAISystemPromptId?: string | undefined;
340
- scorerAIProvider?: string | undefined;
341
- scorerAIModelSlug?: string | undefined;
342
- scorerAIInputTokensUsed?: number | undefined;
343
- scorerAIOutputTokensUsed?: number | undefined;
344
- scorerAIInputCost?: string | undefined;
345
- scorerAIOutputCost?: string | undefined;
346
- }, "kind" | "namespace" | "schemaVersion">) => {
347
- id: string;
348
- value: number;
349
- responseId: string;
350
- scoringMethod: "ai" | "human" | "algo";
351
- match: boolean;
352
- namespace: "example.peerbench.ai";
353
- kind: "llm/exact-match.sc";
354
- schemaVersion: 1;
355
- metadata?: Record<string, unknown> | undefined;
356
- explanation?: string | undefined;
357
- normalized?: {
358
- expected: string;
359
- actual: string;
360
- } | undefined;
361
- scorerAISystemPrompt?: string | undefined;
362
- scorerAISystemPromptId?: string | undefined;
363
- scorerAIProvider?: string | undefined;
364
- scorerAIModelSlug?: string | undefined;
365
- scorerAIInputTokensUsed?: number | undefined;
366
- scorerAIOutputTokensUsed?: number | undefined;
367
- scorerAIInputCost?: string | undefined;
368
- scorerAIOutputCost?: string | undefined;
369
- };
370
- newWithId(input: Omit<{
371
- id: string;
372
- value: number;
373
- responseId: string;
374
- scoringMethod: "ai" | "human" | "algo";
375
- match: boolean;
376
- namespace: "example.peerbench.ai";
377
- kind: "llm/exact-match.sc";
378
- schemaVersion: 1;
379
- metadata?: Record<string, unknown> | undefined;
380
- explanation?: string | undefined;
381
- normalized?: {
382
- expected: string;
383
- actual: string;
384
- } | undefined;
385
- scorerAISystemPrompt?: string | undefined;
386
- scorerAISystemPromptId?: string | undefined;
387
- scorerAIProvider?: string | undefined;
388
- scorerAIModelSlug?: string | undefined;
389
- scorerAIInputTokensUsed?: number | undefined;
390
- scorerAIOutputTokensUsed?: number | undefined;
391
- scorerAIInputCost?: string | undefined;
392
- scorerAIOutputCost?: string | undefined;
393
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../types").IdGenerator): Promise<{
394
- id: string;
395
- value: number;
396
- responseId: string;
397
- scoringMethod: "ai" | "human" | "algo";
398
- match: boolean;
399
- namespace: "example.peerbench.ai";
400
- kind: "llm/exact-match.sc";
401
- schemaVersion: 1;
402
- metadata?: Record<string, unknown> | undefined;
403
- explanation?: string | undefined;
404
- normalized?: {
405
- expected: string;
406
- actual: string;
407
- } | undefined;
408
- scorerAISystemPrompt?: string | undefined;
409
- scorerAISystemPromptId?: string | undefined;
410
- scorerAIProvider?: string | undefined;
411
- scorerAIModelSlug?: string | undefined;
412
- scorerAIInputTokensUsed?: number | undefined;
413
- scorerAIOutputTokensUsed?: number | undefined;
414
- scorerAIInputCost?: string | undefined;
415
- scorerAIOutputCost?: string | undefined;
416
- }>;
417
- };
418
- }];
419
- providers: [typeof AbstractLLMProvider];
420
- scorers: [typeof ExactMatchScorer, typeof LLMAsAJudgeScorer];
421
- parseRunConfig?: boolean;
422
- defaults?: {
423
- scorer?: LLMAsAJudgeScorer | ExactMatchScorer | undefined;
424
- responseIdGenerator?: import("../../../types").IdGenerator;
425
- scoreIdGenerator?: import("../../../types").IdGenerator;
426
- } | undefined;
427
59
  };
428
- };
60
+ } | {
61
+ response: {
62
+ id: string;
63
+ testCaseId: string;
64
+ startedAt: number;
65
+ completedAt: number;
66
+ data: string;
67
+ modelSlug: string;
68
+ provider: string;
69
+ namespace: "example.peerbench.ai";
70
+ kind: "llm/exact-match.rs";
71
+ schemaVersion: 1;
72
+ metadata?: Record<string, unknown> | undefined;
73
+ systemPromptId?: string | undefined;
74
+ inputTokensUsed?: number | undefined;
75
+ outputTokensUsed?: number | undefined;
76
+ inputCost?: string | undefined;
77
+ outputCost?: string | undefined;
78
+ };
79
+ score?: undefined;
80
+ }>;