peerbench 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/aggregators/abstract.d.ts +10 -0
  2. package/dist/aggregators/index.d.ts +2 -67
  3. package/dist/aggregators/llm/avg.d.ts +26 -0
  4. package/dist/benchmarks/examples/echo-basic/index.d.ts +4 -0
  5. package/dist/benchmarks/examples/echo-basic/runner.d.ts +273 -0
  6. package/dist/benchmarks/examples/echo-basic/schema-sets/echo.v1.d.ts +241 -0
  7. package/dist/benchmarks/examples/echo-basic/storages/json.d.ts +14 -0
  8. package/dist/benchmarks/examples/echo-basic/storages/text.d.ts +24 -0
  9. package/dist/benchmarks/examples/exact-match-scorer/index.d.ts +4 -0
  10. package/dist/benchmarks/examples/exact-match-scorer/runner.d.ts +428 -0
  11. package/dist/benchmarks/examples/exact-match-scorer/schema-sets/exact-match.v1.d.ts +287 -0
  12. package/dist/benchmarks/examples/exact-match-scorer/scorer.d.ts +30 -0
  13. package/dist/benchmarks/examples/exact-match-scorer/storages/json.d.ts +8 -0
  14. package/dist/benchmarks/examples/text-transform/index.d.ts +4 -0
  15. package/dist/benchmarks/examples/text-transform/runner.d.ts +524 -0
  16. package/dist/benchmarks/examples/text-transform/schema-sets/echo.v1.d.ts +211 -0
  17. package/dist/benchmarks/examples/text-transform/schema-sets/namespace.d.ts +1 -0
  18. package/dist/benchmarks/examples/text-transform/schema-sets/reverse.v1.d.ts +216 -0
  19. package/dist/benchmarks/examples/text-transform/storages/json.d.ts +9 -0
  20. package/dist/benchmarks/index.d.ts +1 -1667
  21. package/dist/benchmarks/index.js +16 -16
  22. package/dist/benchmarks/peerbench/index.d.ts +5 -0
  23. package/dist/benchmarks/peerbench/runner.d.ts +754 -0
  24. package/dist/benchmarks/peerbench/schema-sets/mcq.v1.d.ts +261 -0
  25. package/dist/benchmarks/peerbench/schema-sets/multi-turn.v1.d.ts +351 -0
  26. package/dist/benchmarks/peerbench/schema-sets/qa.v1.d.ts +256 -0
  27. package/dist/benchmarks/peerbench/storages/json.d.ts +10 -0
  28. package/dist/{chunk-ZXTQJFGL.js → chunk-Q6GSOHOP.js} +4 -4
  29. package/dist/constants.d.ts +4 -0
  30. package/dist/errors/index.d.ts +2 -0
  31. package/dist/errors/peerbench.d.ts +6 -0
  32. package/dist/errors/polyfill.d.ts +1 -0
  33. package/dist/examples/basic.d.ts +1 -0
  34. package/dist/helpers/define-runner.d.ts +45 -0
  35. package/dist/helpers/index.d.ts +1 -0
  36. package/dist/index.d.ts +6 -101
  37. package/dist/index.js +3 -3
  38. package/dist/providers/abstract/llm.d.ts +20 -0
  39. package/dist/{provider-DnEBdl1n.d.ts → providers/abstract/provider.d.ts} +2 -4
  40. package/dist/providers/example/echo.d.ts +12 -0
  41. package/dist/providers/example/restapi.d.ts +37 -0
  42. package/dist/providers/index.d.ts +5 -96
  43. package/dist/providers/mastra.d.ts +40 -0
  44. package/dist/providers/openai.d.ts +29 -0
  45. package/dist/providers/openrouter.d.ts +27 -0
  46. package/dist/schemas/extensions/index.d.ts +18 -22
  47. package/dist/schemas/extensions/response/llm.d.ts +14 -0
  48. package/dist/schemas/extensions/score/llm-as-a-judge-scorer.d.ts +15 -0
  49. package/dist/schemas/id.d.ts +2 -0
  50. package/dist/schemas/index.d.ts +4 -200
  51. package/dist/schemas/llm/index.d.ts +2 -116
  52. package/dist/schemas/llm/index.js +2 -2
  53. package/dist/schemas/llm/simple-system-prompt.d.ts +51 -0
  54. package/dist/schemas/llm/system-prompt.d.ts +59 -0
  55. package/dist/schemas/response.d.ts +63 -0
  56. package/dist/schemas/schema-definer.d.ts +47 -0
  57. package/dist/schemas/score.d.ts +73 -0
  58. package/dist/schemas/test-case.d.ts +57 -0
  59. package/dist/{abstract-BdgLjkNC.d.ts → scorers/abstract.d.ts} +2 -4
  60. package/dist/scorers/index.d.ts +4 -68
  61. package/dist/scorers/llm-judge.d.ts +55 -0
  62. package/dist/scorers/mcq.d.ts +19 -0
  63. package/dist/scorers/mcq.test.d.ts +1 -0
  64. package/dist/scorers/regex.d.ts +58 -0
  65. package/dist/scorers/regex.test.d.ts +1 -0
  66. package/dist/storages/abstract.d.ts +7 -0
  67. package/dist/storages/examples/http.d.ts +1 -0
  68. package/dist/storages/examples/sqlite.d.ts +1 -0
  69. package/dist/storages/file.d.ts +43 -0
  70. package/dist/storages/http.d.ts +22 -0
  71. package/dist/storages/index.d.ts +5 -69
  72. package/dist/storages/json-file.d.ts +21 -0
  73. package/dist/storages/sqlite.d.ts +41 -0
  74. package/dist/types/index.d.ts +17 -0
  75. package/dist/types/runner.d.ts +18 -0
  76. package/dist/utilities.d.ts +9 -0
  77. package/dist/utils/id-generator.d.ts +2 -0
  78. package/dist/utils/index.d.ts +5 -0
  79. package/dist/utils/json.d.ts +17 -0
  80. package/dist/utils/llm.d.ts +7 -0
  81. package/dist/{rate-limiter-CSmVIRsM.d.ts → utils/rate-limiter.d.ts} +3 -5
  82. package/dist/utils/sleep.d.ts +1 -0
  83. package/dist/utils/string.d.ts +8 -0
  84. package/package.json +3 -3
  85. package/dist/index-Cn20kPrz.d.ts +0 -27
  86. package/dist/json-file-Bgv9TLcX.d.ts +0 -74
  87. package/dist/llm-8ecJmwKJ.d.ts +0 -23
  88. package/dist/llm-judge-BuF80-5-.d.ts +0 -75
  89. /package/dist/{chunk-ZXTQJFGL.js.map → chunk-Q6GSOHOP.js.map} +0 -0
@@ -0,0 +1,287 @@
1
+ import { z } from "zod";
2
+ export declare const ExactMatchNamespace: "example.peerbench.ai";
3
+ export declare const ExactMatchKind: "llm/exact-match";
4
+ export declare const ExactMatchTestCaseSchemaV1: z.ZodObject<Omit<{
5
+ id: z.ZodString;
6
+ namespace: z.ZodString;
7
+ schemaVersion: z.ZodNumber;
8
+ kind: z.ZodString;
9
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
10
+ }, "kind" | "namespace" | "schemaVersion"> & {
11
+ instruction: z.ZodString;
12
+ input: z.ZodString;
13
+ expectedOutput: z.ZodString;
14
+ normalize: z.ZodOptional<z.ZodBoolean>;
15
+ } & {
16
+ namespace: z.ZodLiteral<"example.peerbench.ai">;
17
+ kind: z.ZodLiteral<"llm/exact-match.tc">;
18
+ schemaVersion: z.ZodLiteral<1>;
19
+ }, z.core.$strip> & {
20
+ new: (input: Omit<{
21
+ id: string;
22
+ instruction: string;
23
+ input: string;
24
+ expectedOutput: string;
25
+ namespace: "example.peerbench.ai";
26
+ kind: "llm/exact-match.tc";
27
+ schemaVersion: 1;
28
+ metadata?: Record<string, unknown> | undefined;
29
+ normalize?: boolean | undefined;
30
+ }, "kind" | "namespace" | "schemaVersion">) => {
31
+ id: string;
32
+ instruction: string;
33
+ input: string;
34
+ expectedOutput: string;
35
+ namespace: "example.peerbench.ai";
36
+ kind: "llm/exact-match.tc";
37
+ schemaVersion: 1;
38
+ metadata?: Record<string, unknown> | undefined;
39
+ normalize?: boolean | undefined;
40
+ };
41
+ newWithId(input: Omit<{
42
+ id: string;
43
+ instruction: string;
44
+ input: string;
45
+ expectedOutput: string;
46
+ namespace: "example.peerbench.ai";
47
+ kind: "llm/exact-match.tc";
48
+ schemaVersion: 1;
49
+ metadata?: Record<string, unknown> | undefined;
50
+ normalize?: boolean | undefined;
51
+ }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../../index.js").IdGenerator): Promise<{
52
+ id: string;
53
+ instruction: string;
54
+ input: string;
55
+ expectedOutput: string;
56
+ namespace: "example.peerbench.ai";
57
+ kind: "llm/exact-match.tc";
58
+ schemaVersion: 1;
59
+ metadata?: Record<string, unknown> | undefined;
60
+ normalize?: boolean | undefined;
61
+ }>;
62
+ };
63
+ export type ExactMatchTestCaseV1 = z.infer<typeof ExactMatchTestCaseSchemaV1>;
64
+ export declare const ExactMatchResponseSchemaV1: z.ZodObject<Omit<{
65
+ id: z.ZodString;
66
+ namespace: z.ZodString;
67
+ schemaVersion: z.ZodNumber;
68
+ kind: z.ZodString;
69
+ startedAt: z.ZodNumber;
70
+ completedAt: z.ZodNumber;
71
+ testCaseId: z.ZodString;
72
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
73
+ }, "kind" | "namespace" | "schemaVersion"> & {
74
+ data: z.ZodString;
75
+ modelSlug: z.ZodString;
76
+ provider: z.ZodString;
77
+ systemPromptId: z.ZodOptional<z.ZodString>;
78
+ inputTokensUsed: z.ZodOptional<z.ZodNumber>;
79
+ outputTokensUsed: z.ZodOptional<z.ZodNumber>;
80
+ inputCost: z.ZodOptional<z.ZodString>;
81
+ outputCost: z.ZodOptional<z.ZodString>;
82
+ } & {
83
+ namespace: z.ZodLiteral<"example.peerbench.ai">;
84
+ kind: z.ZodLiteral<"llm/exact-match.rs">;
85
+ schemaVersion: z.ZodLiteral<1>;
86
+ }, z.core.$strip> & {
87
+ new: (input: Omit<{
88
+ startedAt: number;
89
+ completedAt: number;
90
+ id: string;
91
+ testCaseId: string;
92
+ data: string;
93
+ modelSlug: string;
94
+ provider: string;
95
+ namespace: "example.peerbench.ai";
96
+ kind: "llm/exact-match.rs";
97
+ schemaVersion: 1;
98
+ metadata?: Record<string, unknown> | undefined;
99
+ systemPromptId?: string | undefined;
100
+ inputTokensUsed?: number | undefined;
101
+ outputTokensUsed?: number | undefined;
102
+ inputCost?: string | undefined;
103
+ outputCost?: string | undefined;
104
+ }, "kind" | "namespace" | "schemaVersion">) => {
105
+ startedAt: number;
106
+ completedAt: number;
107
+ id: string;
108
+ testCaseId: string;
109
+ data: string;
110
+ modelSlug: string;
111
+ provider: string;
112
+ namespace: "example.peerbench.ai";
113
+ kind: "llm/exact-match.rs";
114
+ schemaVersion: 1;
115
+ metadata?: Record<string, unknown> | undefined;
116
+ systemPromptId?: string | undefined;
117
+ inputTokensUsed?: number | undefined;
118
+ outputTokensUsed?: number | undefined;
119
+ inputCost?: string | undefined;
120
+ outputCost?: string | undefined;
121
+ };
122
+ newWithId(input: Omit<{
123
+ startedAt: number;
124
+ completedAt: number;
125
+ id: string;
126
+ testCaseId: string;
127
+ data: string;
128
+ modelSlug: string;
129
+ provider: string;
130
+ namespace: "example.peerbench.ai";
131
+ kind: "llm/exact-match.rs";
132
+ schemaVersion: 1;
133
+ metadata?: Record<string, unknown> | undefined;
134
+ systemPromptId?: string | undefined;
135
+ inputTokensUsed?: number | undefined;
136
+ outputTokensUsed?: number | undefined;
137
+ inputCost?: string | undefined;
138
+ outputCost?: string | undefined;
139
+ }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../../index.js").IdGenerator): Promise<{
140
+ startedAt: number;
141
+ completedAt: number;
142
+ id: string;
143
+ testCaseId: string;
144
+ data: string;
145
+ modelSlug: string;
146
+ provider: string;
147
+ namespace: "example.peerbench.ai";
148
+ kind: "llm/exact-match.rs";
149
+ schemaVersion: 1;
150
+ metadata?: Record<string, unknown> | undefined;
151
+ systemPromptId?: string | undefined;
152
+ inputTokensUsed?: number | undefined;
153
+ outputTokensUsed?: number | undefined;
154
+ inputCost?: string | undefined;
155
+ outputCost?: string | undefined;
156
+ }>;
157
+ };
158
+ export type ExactMatchResponseV1 = z.infer<typeof ExactMatchResponseSchemaV1>;
159
+ export declare const ExactMatchScoreSchemaV1: z.ZodObject<Omit<{
160
+ id: z.ZodString;
161
+ namespace: z.ZodString;
162
+ kind: z.ZodString;
163
+ schemaVersion: z.ZodNumber;
164
+ value: z.ZodNumber;
165
+ responseId: z.ZodString;
166
+ explanation: z.ZodOptional<z.ZodString>;
167
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
168
+ scoringMethod: z.ZodEnum<{
169
+ readonly ai: "ai";
170
+ readonly human: "human";
171
+ readonly algo: "algo";
172
+ }>;
173
+ }, "kind" | "namespace" | "schemaVersion"> & {
174
+ match: z.ZodBoolean;
175
+ normalized: z.ZodOptional<z.ZodObject<{
176
+ expected: z.ZodString;
177
+ actual: z.ZodString;
178
+ }, z.core.$strip>>;
179
+ scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
180
+ scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
181
+ scorerAIProvider: z.ZodOptional<z.ZodString>;
182
+ scorerAIModelSlug: z.ZodOptional<z.ZodString>;
183
+ scorerAIInputTokensUsed: z.ZodOptional<z.ZodNumber>;
184
+ scorerAIOutputTokensUsed: z.ZodOptional<z.ZodNumber>;
185
+ scorerAIInputCost: z.ZodOptional<z.ZodString>;
186
+ scorerAIOutputCost: z.ZodOptional<z.ZodString>;
187
+ } & {
188
+ namespace: z.ZodLiteral<"example.peerbench.ai">;
189
+ kind: z.ZodLiteral<"llm/exact-match.sc">;
190
+ schemaVersion: z.ZodLiteral<1>;
191
+ }, z.core.$strip> & {
192
+ new: (input: Omit<{
193
+ id: string;
194
+ value: number;
195
+ responseId: string;
196
+ scoringMethod: "ai" | "human" | "algo";
197
+ match: boolean;
198
+ namespace: "example.peerbench.ai";
199
+ kind: "llm/exact-match.sc";
200
+ schemaVersion: 1;
201
+ metadata?: Record<string, unknown> | undefined;
202
+ explanation?: string | undefined;
203
+ normalized?: {
204
+ expected: string;
205
+ actual: string;
206
+ } | undefined;
207
+ scorerAISystemPrompt?: string | undefined;
208
+ scorerAISystemPromptId?: string | undefined;
209
+ scorerAIProvider?: string | undefined;
210
+ scorerAIModelSlug?: string | undefined;
211
+ scorerAIInputTokensUsed?: number | undefined;
212
+ scorerAIOutputTokensUsed?: number | undefined;
213
+ scorerAIInputCost?: string | undefined;
214
+ scorerAIOutputCost?: string | undefined;
215
+ }, "kind" | "namespace" | "schemaVersion">) => {
216
+ id: string;
217
+ value: number;
218
+ responseId: string;
219
+ scoringMethod: "ai" | "human" | "algo";
220
+ match: boolean;
221
+ namespace: "example.peerbench.ai";
222
+ kind: "llm/exact-match.sc";
223
+ schemaVersion: 1;
224
+ metadata?: Record<string, unknown> | undefined;
225
+ explanation?: string | undefined;
226
+ normalized?: {
227
+ expected: string;
228
+ actual: string;
229
+ } | undefined;
230
+ scorerAISystemPrompt?: string | undefined;
231
+ scorerAISystemPromptId?: string | undefined;
232
+ scorerAIProvider?: string | undefined;
233
+ scorerAIModelSlug?: string | undefined;
234
+ scorerAIInputTokensUsed?: number | undefined;
235
+ scorerAIOutputTokensUsed?: number | undefined;
236
+ scorerAIInputCost?: string | undefined;
237
+ scorerAIOutputCost?: string | undefined;
238
+ };
239
+ newWithId(input: Omit<{
240
+ id: string;
241
+ value: number;
242
+ responseId: string;
243
+ scoringMethod: "ai" | "human" | "algo";
244
+ match: boolean;
245
+ namespace: "example.peerbench.ai";
246
+ kind: "llm/exact-match.sc";
247
+ schemaVersion: 1;
248
+ metadata?: Record<string, unknown> | undefined;
249
+ explanation?: string | undefined;
250
+ normalized?: {
251
+ expected: string;
252
+ actual: string;
253
+ } | undefined;
254
+ scorerAISystemPrompt?: string | undefined;
255
+ scorerAISystemPromptId?: string | undefined;
256
+ scorerAIProvider?: string | undefined;
257
+ scorerAIModelSlug?: string | undefined;
258
+ scorerAIInputTokensUsed?: number | undefined;
259
+ scorerAIOutputTokensUsed?: number | undefined;
260
+ scorerAIInputCost?: string | undefined;
261
+ scorerAIOutputCost?: string | undefined;
262
+ }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../../index.js").IdGenerator): Promise<{
263
+ id: string;
264
+ value: number;
265
+ responseId: string;
266
+ scoringMethod: "ai" | "human" | "algo";
267
+ match: boolean;
268
+ namespace: "example.peerbench.ai";
269
+ kind: "llm/exact-match.sc";
270
+ schemaVersion: 1;
271
+ metadata?: Record<string, unknown> | undefined;
272
+ explanation?: string | undefined;
273
+ normalized?: {
274
+ expected: string;
275
+ actual: string;
276
+ } | undefined;
277
+ scorerAISystemPrompt?: string | undefined;
278
+ scorerAISystemPromptId?: string | undefined;
279
+ scorerAIProvider?: string | undefined;
280
+ scorerAIModelSlug?: string | undefined;
281
+ scorerAIInputTokensUsed?: number | undefined;
282
+ scorerAIOutputTokensUsed?: number | undefined;
283
+ scorerAIInputCost?: string | undefined;
284
+ scorerAIOutputCost?: string | undefined;
285
+ }>;
286
+ };
287
+ export type ExactMatchScoreV1 = z.infer<typeof ExactMatchScoreSchemaV1>;
@@ -0,0 +1,30 @@
1
+ import { AbstractScorer, BaseScorerResult } from "../../../scorers/abstract";
2
+ /**
3
+ * A "scorer" is the piece that turns the given data into a numeric score
4
+ * alongside with additional explanation and metadata.
5
+ *
6
+ * A runner can do scoring inline, but having a dedicated scorer is nice when:
7
+ * - you want to reuse the same scoring logic across multiple runners/benchmarks
8
+ * - you want to allow callers to swap scorers easily (e.g. deterministic scorer vs LLM-as-a-judge)
9
+ * - you want to test scoring in isolation without calling a provider
10
+ *
11
+ * Here is an example, simple scorer implementation. It checks exact match (optionally with normalization)
12
+ * of the given expected and actual values. Score values must be between 0 and 1.
13
+ */
14
+ export declare class ExactMatchScorer extends AbstractScorer {
15
+ readonly kind: "example.peerbench.ai/exact-match";
16
+ score(params: {
17
+ expected: string;
18
+ actual: string;
19
+ normalize?: boolean;
20
+ }): Promise<BaseScorerResult & {
21
+ metadata: {
22
+ match: boolean;
23
+ normalize: boolean;
24
+ normalized?: {
25
+ expected: string;
26
+ actual: string;
27
+ };
28
+ };
29
+ }>;
30
+ }
@@ -0,0 +1,8 @@
1
+ import { JSONFileStorage } from "../../../../storages/json-file";
2
+ import { ExactMatchResponseV1, ExactMatchScoreV1, ExactMatchTestCaseV1 } from "../schema-sets/exact-match.v1";
3
+ export declare class ExactMatchJSONStorage extends JSONFileStorage<ExactMatchTestCaseV1 | ExactMatchResponseV1 | ExactMatchScoreV1> {
4
+ constructor(config: {
5
+ path: string;
6
+ chunkSize?: number;
7
+ });
8
+ }
@@ -0,0 +1,4 @@
1
+ export * from "./runner";
2
+ export * from "./schema-sets/echo.v1";
3
+ export * from "./schema-sets/reverse.v1";
4
+ export * from "./storages/json";