peerbench 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +71 -58
  2. package/dist/benchmarks/examples/echo-basic/runner.d.ts +11 -254
  3. package/dist/benchmarks/examples/echo-basic/schema-sets/echo.v1.d.ts +25 -25
  4. package/dist/benchmarks/examples/exact-match-scorer/runner.d.ts +38 -386
  5. package/dist/benchmarks/examples/exact-match-scorer/schema-sets/exact-match.v1.d.ts +25 -25
  6. package/dist/benchmarks/examples/text-transform/runner.d.ts +32 -480
  7. package/dist/benchmarks/examples/text-transform/schema-sets/echo.v1.d.ts +25 -25
  8. package/dist/benchmarks/examples/text-transform/schema-sets/reverse.v1.d.ts +25 -25
  9. package/dist/benchmarks/index.js +180 -248
  10. package/dist/benchmarks/index.js.map +1 -1
  11. package/dist/benchmarks/peerbench/index.d.ts +2 -1
  12. package/dist/benchmarks/peerbench/mcq-runner.d.ts +78 -0
  13. package/dist/benchmarks/peerbench/qa-runner.d.ts +77 -0
  14. package/dist/benchmarks/peerbench/schema-sets/mcq.v1.d.ts +25 -25
  15. package/dist/benchmarks/peerbench/schema-sets/multi-turn.v1.d.ts +25 -25
  16. package/dist/benchmarks/peerbench/schema-sets/qa.v1.d.ts +25 -25
  17. package/dist/chunk-6WDCU5BP.js +9 -0
  18. package/dist/chunk-6WDCU5BP.js.map +1 -0
  19. package/dist/{chunk-YY33MNMV.js → chunk-7KMGLEYP.js} +2 -2
  20. package/dist/{chunk-TRNCF2BG.js → chunk-HBGC6BDW.js} +1 -1
  21. package/dist/chunk-HBGC6BDW.js.map +1 -0
  22. package/dist/{chunk-HMQYGCKI.js → chunk-ZJWSK4VO.js} +1 -1
  23. package/dist/chunk-ZJWSK4VO.js.map +1 -0
  24. package/dist/dev.d.ts +22 -0
  25. package/dist/helpers/define-runner.d.ts +2 -45
  26. package/dist/index.js +2 -2
  27. package/dist/providers/ai-sdk.d.ts +24 -0
  28. package/dist/providers/callables/callable.d.ts +4 -0
  29. package/dist/providers/callables/llm.d.ts +41 -0
  30. package/dist/providers/example/echo.d.ts +12 -11
  31. package/dist/providers/example/restapi.d.ts +11 -18
  32. package/dist/providers/index.d.ts +4 -2
  33. package/dist/providers/index.js +380 -9
  34. package/dist/providers/index.js.map +1 -1
  35. package/dist/providers/mastra.d.ts +16 -21
  36. package/dist/providers/openai.d.ts +25 -10
  37. package/dist/providers/openrouter.d.ts +6 -8
  38. package/dist/schemas/extensions/index.js +1 -1
  39. package/dist/schemas/extensions/response/llm.d.ts +17 -0
  40. package/dist/schemas/index.js +2 -2
  41. package/dist/schemas/llm/index.js +36 -7
  42. package/dist/schemas/llm/index.js.map +1 -1
  43. package/dist/schemas/llm/simple-system-prompt.d.ts +3 -3
  44. package/dist/schemas/llm/system-prompt.d.ts +7 -7
  45. package/dist/schemas/response.d.ts +7 -7
  46. package/dist/schemas/schema-definer.d.ts +5 -5
  47. package/dist/schemas/score.d.ts +7 -7
  48. package/dist/schemas/test-case.d.ts +7 -7
  49. package/dist/scorers/abstract.d.ts +1 -1
  50. package/dist/scorers/index.js +377 -7
  51. package/dist/scorers/index.js.map +1 -1
  52. package/dist/scorers/llm-judge.d.ts +6 -6
  53. package/dist/types/index.d.ts +0 -5
  54. package/dist/types/runner.d.ts +13 -17
  55. package/package.json +8 -7
  56. package/dist/benchmarks/peerbench/runner.d.ts +0 -754
  57. package/dist/chunk-3JHDJEY3.js +0 -374
  58. package/dist/chunk-3JHDJEY3.js.map +0 -1
  59. package/dist/chunk-HMQYGCKI.js.map +0 -1
  60. package/dist/chunk-Q6GSOHOP.js +0 -44
  61. package/dist/chunk-Q6GSOHOP.js.map +0 -1
  62. package/dist/chunk-RTEAK4II.js +0 -37
  63. package/dist/chunk-RTEAK4II.js.map +0 -1
  64. package/dist/chunk-SMLNDQFX.js +0 -244
  65. package/dist/chunk-SMLNDQFX.js.map +0 -1
  66. package/dist/chunk-TRNCF2BG.js.map +0 -1
  67. package/dist/providers/abstract/llm.d.ts +0 -20
  68. /package/dist/{chunk-YY33MNMV.js.map → chunk-7KMGLEYP.js.map} +0 -0
  69. /package/dist/providers/{abstract/provider.d.ts → abstract.d.ts} +0 -0
@@ -6,7 +6,7 @@ export declare const QATestCaseSchemaV1: z.ZodObject<Omit<{
6
6
  schemaVersion: z.ZodNumber;
7
7
  kind: z.ZodString;
8
8
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
9
- }, "kind" | "namespace" | "schemaVersion"> & {
9
+ }, "kind" | "schemaVersion" | "namespace"> & {
10
10
  question: z.ZodString;
11
11
  goodAnswers: z.ZodArray<z.ZodString>;
12
12
  badAnswers: z.ZodArray<z.ZodString>;
@@ -24,7 +24,7 @@ export declare const QATestCaseSchemaV1: z.ZodObject<Omit<{
24
24
  kind: "llm/qa.tc";
25
25
  schemaVersion: 1;
26
26
  metadata?: Record<string, unknown> | undefined;
27
- }, "kind" | "namespace" | "schemaVersion">) => {
27
+ }, "kind" | "schemaVersion" | "namespace">) => {
28
28
  id: string;
29
29
  question: string;
30
30
  goodAnswers: string[];
@@ -43,7 +43,7 @@ export declare const QATestCaseSchemaV1: z.ZodObject<Omit<{
43
43
  kind: "llm/qa.tc";
44
44
  schemaVersion: 1;
45
45
  metadata?: Record<string, unknown> | undefined;
46
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../index.js").IdGenerator): Promise<{
46
+ }, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
47
47
  id: string;
48
48
  question: string;
49
49
  goodAnswers: string[];
@@ -64,7 +64,7 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
64
64
  completedAt: z.ZodNumber;
65
65
  testCaseId: z.ZodString;
66
66
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
67
- }, "kind" | "namespace" | "schemaVersion"> & {
67
+ }, "kind" | "schemaVersion" | "namespace"> & {
68
68
  data: z.ZodString;
69
69
  modelSlug: z.ZodString;
70
70
  provider: z.ZodString;
@@ -79,10 +79,10 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
79
79
  schemaVersion: z.ZodLiteral<1>;
80
80
  }, z.core.$strip> & {
81
81
  new: (input: Omit<{
82
- startedAt: number;
83
- completedAt: number;
84
82
  id: string;
85
83
  testCaseId: string;
84
+ startedAt: number;
85
+ completedAt: number;
86
86
  data: string;
87
87
  modelSlug: string;
88
88
  provider: string;
@@ -95,11 +95,11 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
95
95
  outputTokensUsed?: number | undefined;
96
96
  inputCost?: string | undefined;
97
97
  outputCost?: string | undefined;
98
- }, "kind" | "namespace" | "schemaVersion">) => {
99
- startedAt: number;
100
- completedAt: number;
98
+ }, "kind" | "schemaVersion" | "namespace">) => {
101
99
  id: string;
102
100
  testCaseId: string;
101
+ startedAt: number;
102
+ completedAt: number;
103
103
  data: string;
104
104
  modelSlug: string;
105
105
  provider: string;
@@ -114,10 +114,10 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
114
114
  outputCost?: string | undefined;
115
115
  };
116
116
  newWithId(input: Omit<{
117
- startedAt: number;
118
- completedAt: number;
119
117
  id: string;
120
118
  testCaseId: string;
119
+ startedAt: number;
120
+ completedAt: number;
121
121
  data: string;
122
122
  modelSlug: string;
123
123
  provider: string;
@@ -130,11 +130,11 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
130
130
  outputTokensUsed?: number | undefined;
131
131
  inputCost?: string | undefined;
132
132
  outputCost?: string | undefined;
133
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../index.js").IdGenerator): Promise<{
134
- startedAt: number;
135
- completedAt: number;
133
+ }, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
136
134
  id: string;
137
135
  testCaseId: string;
136
+ startedAt: number;
137
+ completedAt: number;
138
138
  data: string;
139
139
  modelSlug: string;
140
140
  provider: string;
@@ -164,7 +164,7 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
164
164
  readonly human: "human";
165
165
  readonly algo: "algo";
166
166
  }>;
167
- }, "kind" | "namespace" | "schemaVersion"> & {
167
+ }, "kind" | "schemaVersion" | "namespace"> & {
168
168
  scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
169
169
  scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
170
170
  scorerAIProvider: z.ZodOptional<z.ZodString>;
@@ -180,14 +180,14 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
180
180
  }, z.core.$strip> & {
181
181
  new: (input: Omit<{
182
182
  id: string;
183
- value: number;
184
183
  responseId: string;
184
+ value: number;
185
185
  scoringMethod: "ai" | "human" | "algo";
186
186
  namespace: "peerbench.ai";
187
187
  kind: "llm/qa.sc";
188
188
  schemaVersion: 1;
189
- metadata?: Record<string, unknown> | undefined;
190
189
  explanation?: string | undefined;
190
+ metadata?: Record<string, unknown> | undefined;
191
191
  scorerAISystemPrompt?: string | undefined;
192
192
  scorerAISystemPromptId?: string | undefined;
193
193
  scorerAIProvider?: string | undefined;
@@ -196,16 +196,16 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
196
196
  scorerAIOutputTokensUsed?: number | undefined;
197
197
  scorerAIInputCost?: string | undefined;
198
198
  scorerAIOutputCost?: string | undefined;
199
- }, "kind" | "namespace" | "schemaVersion">) => {
199
+ }, "kind" | "schemaVersion" | "namespace">) => {
200
200
  id: string;
201
- value: number;
202
201
  responseId: string;
202
+ value: number;
203
203
  scoringMethod: "ai" | "human" | "algo";
204
204
  namespace: "peerbench.ai";
205
205
  kind: "llm/qa.sc";
206
206
  schemaVersion: 1;
207
- metadata?: Record<string, unknown> | undefined;
208
207
  explanation?: string | undefined;
208
+ metadata?: Record<string, unknown> | undefined;
209
209
  scorerAISystemPrompt?: string | undefined;
210
210
  scorerAISystemPromptId?: string | undefined;
211
211
  scorerAIProvider?: string | undefined;
@@ -217,14 +217,14 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
217
217
  };
218
218
  newWithId(input: Omit<{
219
219
  id: string;
220
- value: number;
221
220
  responseId: string;
221
+ value: number;
222
222
  scoringMethod: "ai" | "human" | "algo";
223
223
  namespace: "peerbench.ai";
224
224
  kind: "llm/qa.sc";
225
225
  schemaVersion: 1;
226
- metadata?: Record<string, unknown> | undefined;
227
226
  explanation?: string | undefined;
227
+ metadata?: Record<string, unknown> | undefined;
228
228
  scorerAISystemPrompt?: string | undefined;
229
229
  scorerAISystemPromptId?: string | undefined;
230
230
  scorerAIProvider?: string | undefined;
@@ -233,16 +233,16 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
233
233
  scorerAIOutputTokensUsed?: number | undefined;
234
234
  scorerAIInputCost?: string | undefined;
235
235
  scorerAIOutputCost?: string | undefined;
236
- }, "kind" | "id" | "namespace" | "schemaVersion">, generator: import("../../../index.js").IdGenerator): Promise<{
236
+ }, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
237
237
  id: string;
238
- value: number;
239
238
  responseId: string;
239
+ value: number;
240
240
  scoringMethod: "ai" | "human" | "algo";
241
241
  namespace: "peerbench.ai";
242
242
  kind: "llm/qa.sc";
243
243
  schemaVersion: 1;
244
- metadata?: Record<string, unknown> | undefined;
245
244
  explanation?: string | undefined;
245
+ metadata?: Record<string, unknown> | undefined;
246
246
  scorerAISystemPrompt?: string | undefined;
247
247
  scorerAISystemPromptId?: string | undefined;
248
248
  scorerAIProvider?: string | undefined;
@@ -0,0 +1,9 @@
1
+ // src/helpers/define-runner.ts
2
+ function defineRunner(fn) {
3
+ return fn;
4
+ }
5
+
6
+ export {
7
+ defineRunner
8
+ };
9
+ //# sourceMappingURL=chunk-6WDCU5BP.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/helpers/define-runner.ts"],"sourcesContent":["import { RunnerParams, RunnerResult } from \"@/types\";\n\nexport function defineRunner<TParams extends RunnerParams, TResult extends RunnerResult>(\n fn: (params: TParams) => Promise<TResult>\n) {\n return fn;\n}\n"],"mappings":";AAEO,SAAS,aACd,IACA;AACA,SAAO;AACT;","names":[]}
@@ -3,7 +3,7 @@ import {
3
3
  } from "./chunk-OQE6TQXZ.js";
4
4
  import {
5
5
  ScoringMethod
6
- } from "./chunk-HMQYGCKI.js";
6
+ } from "./chunk-ZJWSK4VO.js";
7
7
  import {
8
8
  IdSchema
9
9
  } from "./chunk-NUEOE3K5.js";
@@ -62,4 +62,4 @@ export {
62
62
  BaseScoreSchemaV1,
63
63
  defineScoreSchema
64
64
  };
65
- //# sourceMappingURL=chunk-YY33MNMV.js.map
65
+ //# sourceMappingURL=chunk-7KMGLEYP.js.map
@@ -32,4 +32,4 @@ export {
32
32
  ExtensionLLMResponseFieldsV1,
33
33
  ExtensionLLMAsAJudgeScoreFieldsV1
34
34
  };
35
- //# sourceMappingURL=chunk-TRNCF2BG.js.map
35
+ //# sourceMappingURL=chunk-HBGC6BDW.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/schemas/extensions/response/llm.ts","../src/schemas/extensions/score/llm-as-a-judge-scorer.ts"],"sourcesContent":["import { IdSchema } from \"@/schemas/id\";\nimport z from \"zod\";\n\n/**\n * Provides a set of fields that holds information about the LLM and its response.\n */\nexport const ExtensionLLMResponseFieldsV1 = {\n data: z.string(),\n modelSlug: z.string(),\n provider: z.string(),\n systemPromptId: IdSchema.optional(),\n\n inputTokensUsed: z.number().optional(),\n outputTokensUsed: z.number().optional(),\n inputCost: z.string().optional(),\n outputCost: z.string().optional(),\n};\n\nexport function LLMResponseFieldsV1<\n TRawDataField extends boolean = false,\n>(params?: {\n /**\n * If `true` then the `data` field (which is a `z.string()`)\n * will be included in the returned fields\n */\n withRawDataField?: TRawDataField;\n}) {\n const baseFields = {\n modelSlug: z.string(),\n provider: z.string(),\n systemPromptId: IdSchema.optional(),\n\n inputTokensUsed: z.number().optional(),\n outputTokensUsed: z.number().optional(),\n inputCost: z.string().optional(),\n outputCost: z.string().optional(),\n };\n\n const fields = {\n ...baseFields,\n data: undefined as z.ZodString | undefined,\n };\n\n if (params?.withRawDataField) {\n fields.data = z.string();\n }\n\n return fields as unknown as typeof baseFields &\n (TRawDataField extends true ? { data: z.ZodString } : {});\n}\n","import z from \"zod\";\n\n/**\n * Provides a set of fields that holds information about the LLM model\n * that was used to judge the response.\n */\nexport const ExtensionLLMAsAJudgeScoreFieldsV1 = {\n scorerAISystemPrompt: z.string().optional(),\n scorerAISystemPromptId: z.string().optional(),\n scorerAIProvider: z.string().optional(),\n scorerAIModelSlug: z.string().optional(),\n scorerAIInputTokensUsed: z.number().optional(),\n scorerAIOutputTokensUsed: z.number().optional(),\n scorerAIInputCost: z.string().optional(),\n scorerAIOutputCost: z.string().optional(),\n};\n"],"mappings":";;;;;AACA,OAAO,OAAO;AAKP,IAAM,+BAA+B;AAAA,EAC1C,MAAM,EAAE,OAAO;AAAA,EACf,WAAW,EAAE,OAAO;AAAA,EACpB,UAAU,EAAE,OAAO;AAAA,EACnB,gBAAgB,SAAS,SAAS;AAAA,EAElC,iBAAiB,EAAE,OAAO,EAAE,SAAS;AAAA,EACrC,kBAAkB,EAAE,OAAO,EAAE,SAAS;AAAA,EACtC,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC;;;AChBA,OAAOA,QAAO;AAMP,IAAM,oCAAoC;AAAA,EAC/C,sBAAsBA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC1C,wBAAwBA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC5C,kBAAkBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACtC,mBAAmBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACvC,yBAAyBA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC7C,0BAA0BA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC9C,mBAAmBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACvC,oBAAoBA,GAAE,OAAO,EAAE,SAAS;AAC1C;","names":["z"]}
@@ -8,4 +8,4 @@ var ScoringMethod = {
8
8
  export {
9
9
  ScoringMethod
10
10
  };
11
- //# sourceMappingURL=chunk-HMQYGCKI.js.map
11
+ //# sourceMappingURL=chunk-ZJWSK4VO.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/types/index.ts"],"sourcesContent":["export * from \"./runner\";\n\nimport { IdSchema } from \"@/schemas/id\";\nimport z from \"zod\";\n\nexport type Id = z.infer<typeof IdSchema>;\n\nexport type IdGenerator<TInput = unknown> = (input: TInput) => MaybePromise<Id>;\n\nexport type MaybePromise<T> = T | Promise<T>;\n\nexport const ScoringMethod = {\n ai: \"ai\",\n human: \"human\",\n algo: \"algo\",\n} as const;\nexport type ScoringMethod = (typeof ScoringMethod)[keyof typeof ScoringMethod];\n"],"mappings":";AAWO,IAAM,gBAAgB;AAAA,EAC3B,IAAI;AAAA,EACJ,OAAO;AAAA,EACP,MAAM;AACR;","names":[]}
package/dist/dev.d.ts ADDED
@@ -0,0 +1,22 @@
1
+ type PeerBenchTestCase<TKind extends string, TSchemaVersion extends number> = {
2
+ id: string;
3
+ kind: TKind;
4
+ schemaVersion: TSchemaVersion;
5
+ [key: string]: unknown;
6
+ };
7
+ type PeerBenchResponse<TKind extends string, TSchemaVersion extends number> = {
8
+ id: string;
9
+ kind: TKind;
10
+ schemaVersion: TSchemaVersion;
11
+ testCaseId: string;
12
+ [key: string]: unknown;
13
+ };
14
+ type PeerBenchScore<TKind extends string, TSchemaVersion extends number> = {
15
+ id: string;
16
+ kind: TKind;
17
+ schemaVersion: TSchemaVersion;
18
+ responseId: string;
19
+ };
20
+ declare function createTestCase<TKind extends string, TSchemaVersion extends number>(kind: TKind, schemaVersion: TSchemaVersion, tc: Omit<PeerBenchTestCase<TKind, TSchemaVersion>, "kind" | "schemaVersion">): PeerBenchTestCase<TKind, TSchemaVersion>;
21
+ declare function createResponse<TKind extends string, TSchemaVersion extends number>(kind: TKind, schemaVersion: TSchemaVersion, rs: Omit<PeerBenchResponse<TKind, TSchemaVersion>, "kind" | "schemaVersion">): PeerBenchResponse<TKind, TSchemaVersion>;
22
+ declare function createScore<TKind extends string, TSchemaVersion extends number>(kind: TKind, schemaVersion: TSchemaVersion, sc: Omit<PeerBenchScore<TKind, TSchemaVersion>, "kind" | "schemaVersion">): PeerBenchScore<TKind, TSchemaVersion>;
@@ -1,45 +1,2 @@
1
- import { IdGenerator, InferRunConfig, ProviderCtor, Runner, ScorerCtor } from "../types";
2
- import z from "zod";
3
- export declare function defineRunner<const TProviders extends ProviderCtor[], const TScorers extends ScorerCtor[], const TSchemaSets extends SchemaSetDefinition[], const TRunConfigSchema extends z.ZodRawShape = {}>(config: {
4
- schemaSets: TSchemaSets;
5
- providers: TProviders;
6
- scorers: TScorers;
7
- runConfigSchema?: TRunConfigSchema;
8
- /**
9
- * @default true
10
- */
11
- parseRunConfig?: boolean;
12
- defaults?: {
13
- scorer?: InstanceType<TScorers[number]>;
14
- responseIdGenerator?: IdGenerator;
15
- scoreIdGenerator?: IdGenerator;
16
- };
17
- }, fn: Runner<TSchemaSets[number]["testCase"], TSchemaSets[number]["response"], TSchemaSets[number]["score"], InstanceType<TProviders[number]>, InstanceType<TScorers[number]>, InferRunConfig<TRunConfigSchema>>): ((params: Parameters<typeof fn>[0]) => Promise<{
18
- response: z.core.output<TSchemaSets[number]["response"]>;
19
- score?: z.core.output<TSchemaSets[number]["score"]> | undefined;
20
- }>) & {
21
- /**
22
- * The configuration that was used to define the runner.
23
- */
24
- config: {
25
- runConfigSchema: z.ZodObject<{ -readonly [P in keyof TRunConfigSchema]: TRunConfigSchema[P]; }, z.core.$strip>;
26
- schemaSets: TSchemaSets;
27
- providers: TProviders;
28
- scorers: TScorers;
29
- /**
30
- * @default true
31
- */
32
- parseRunConfig?: boolean;
33
- defaults?: {
34
- scorer?: InstanceType<TScorers[number]>;
35
- responseIdGenerator?: IdGenerator;
36
- scoreIdGenerator?: IdGenerator;
37
- };
38
- };
39
- };
40
- type SchemaSetDefinition<TTestCase extends z.ZodObject = z.ZodObject, TResponse extends z.ZodObject = z.ZodObject, TScore extends z.ZodObject = z.ZodObject> = {
41
- testCase: TTestCase;
42
- response: TResponse;
43
- score: TScore;
44
- };
45
- export {};
1
+ import { RunnerParams, RunnerResult } from "../types";
2
+ export declare function defineRunner<TParams extends RunnerParams, TResult extends RunnerResult>(fn: (params: TParams) => Promise<TResult>): (params: TParams) => Promise<TResult>;
package/dist/index.js CHANGED
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  defineRunner
3
- } from "./chunk-RTEAK4II.js";
3
+ } from "./chunk-6WDCU5BP.js";
4
4
  import {
5
5
  ScoringMethod
6
- } from "./chunk-HMQYGCKI.js";
6
+ } from "./chunk-ZJWSK4VO.js";
7
7
  import {
8
8
  CATEGORIES,
9
9
  PEERBENCH_NAMESPACE
@@ -0,0 +1,24 @@
1
+ import type { LanguageModelV3 } from "@ai-sdk/provider";
2
+ import { RateLimiter } from "../utils";
3
+ import { AbstractProvider } from "./abstract";
4
+ import { type CallableLLM } from "./callables/llm";
5
+ declare const AISdkProvider_base: (new () => AbstractProvider & {
6
+ readonly kind: "peerbench.ai/llm/ai-sdk";
7
+ }) & {
8
+ readonly kind: "peerbench.ai/llm/ai-sdk";
9
+ };
10
+ export declare class AISdkProvider extends AISdkProvider_base {
11
+ private rateLimiter;
12
+ private maxRetries;
13
+ constructor(config?: AISdkProviderConfig);
14
+ model(config: AISdkModelConfig): CallableLLM<AISdkProvider>;
15
+ }
16
+ type AISdkProviderConfig = {
17
+ rateLimiter?: RateLimiter;
18
+ maxRetries?: number;
19
+ };
20
+ type AISdkModelConfig = {
21
+ aiSdkModel: LanguageModelV3;
22
+ stream?: boolean;
23
+ };
24
+ export {};
@@ -0,0 +1,4 @@
1
+ import { AbstractProvider } from "../abstract";
2
+ export interface Callable<TProvider = AbstractProvider> {
3
+ readonly provider: TProvider;
4
+ }
@@ -0,0 +1,41 @@
1
+ import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
2
+ import { ResponseFormatJSONObject, ResponseFormatJSONSchema, ResponseFormatText } from "openai/resources/shared";
3
+ import { AbstractProvider, ProviderResponse } from "../abstract";
4
+ import { Callable } from "./callable";
5
+ export interface CallableLLM<TProvider extends AbstractProvider = AbstractProvider> extends Callable<TProvider> {
6
+ slug: string;
7
+ forward(args: CallableLLMForwardArgs): Promise<LLMResponse>;
8
+ }
9
+ export type CallableLLMForwardArgs = {
10
+ messages: ChatCompletionMessageParam[];
11
+ abortSignal?: AbortSignal;
12
+ maxTokens?: number;
13
+ temperature?: number;
14
+ responseFormat?: ResponseFormatText | ResponseFormatJSONSchema | ResponseFormatJSONObject;
15
+ };
16
+ export type LLMResponse = ProviderResponse<string> & {
17
+ /**
18
+ * Number of input tokens used.
19
+ */
20
+ inputTokensUsed?: number;
21
+ /**
22
+ * Number of output tokens used.
23
+ */
24
+ outputTokensUsed?: number;
25
+ /**
26
+ * Cost of the input tokens.
27
+ */
28
+ inputCost?: string;
29
+ /**
30
+ * Cost of the output tokens.
31
+ */
32
+ outputCost?: string;
33
+ /**
34
+ * Time taken to receive the first token.
35
+ */
36
+ timeToFirstToken?: number;
37
+ /**
38
+ * Additional metadata
39
+ */
40
+ metadata?: Record<string, unknown>;
41
+ };
@@ -1,12 +1,13 @@
1
- import { AbstractLLMProvider, ChatResponse, LLMProviderForwardArgs } from "../abstract/llm";
2
- /**
3
- * Example provider implementation for local testing and as a reference.
4
- *
5
- * - Extends `AbstractLLMProvider`
6
- * - Implements `forward({ messages, model, ... })`
7
- * - Does not perform any network calls
8
- */
9
- export declare class ExampleEchoLLMProvider extends AbstractLLMProvider {
10
- readonly kind = "example.echo";
11
- forward(args: LLMProviderForwardArgs): Promise<ChatResponse>;
1
+ import { AbstractProvider } from "../abstract";
2
+ import { type CallableLLM } from "../callables/llm";
3
+ declare const ExampleEchoLLMProvider_base: (new () => AbstractProvider & {
4
+ readonly kind: "example.echo";
5
+ }) & {
6
+ readonly kind: "example.echo";
7
+ };
8
+ export declare class ExampleEchoLLMProvider extends ExampleEchoLLMProvider_base {
9
+ model(config?: {
10
+ model?: string;
11
+ }): CallableLLM<ExampleEchoLLMProvider>;
12
12
  }
13
+ export {};
@@ -1,25 +1,18 @@
1
- import { AbstractLLMProvider, ChatResponse, LLMProviderForwardArgs } from "../abstract/llm";
2
- /**
3
- * Example "custom REST API LLM agent provider".
4
- *
5
- * Sometimes you don't call a model API directly. You call your own REST API, and *it* talks to the model.
6
- * That REST API can hide secrets, run tools, do retrieval, apply guardrails, and whatever else your
7
- * product needs.
8
- *
9
- * In the SDK we still want a clean abstraction, so we model that REST API as an `AbstractLLMProvider`.
10
- * The runner (or host app) still passes `messages + model`, and the provider still returns one final string.
11
- *
12
- * If you’re implementing your own provider, this is the only part that matters: translate
13
- * `LLMProviderForwardArgs` into your HTTP request, then translate your HTTP response back into
14
- * `ChatResponse`.
15
- */
16
- export declare class ExampleRestApiLLMAgentProvider extends AbstractLLMProvider {
17
- readonly kind = "example.restapi.agent";
1
+ import { AbstractProvider } from "../abstract";
2
+ import { type CallableLLM } from "../callables/llm";
3
+ declare const ExampleRestApiLLMAgentProvider_base: (new () => AbstractProvider & {
4
+ readonly kind: "example.rest-api.agent";
5
+ }) & {
6
+ readonly kind: "example.rest-api.agent";
7
+ };
8
+ export declare class ExampleRestApiLLMAgentProvider extends ExampleRestApiLLMAgentProvider_base {
18
9
  private readonly baseUrl;
19
10
  private readonly apiKey?;
20
11
  private readonly headers?;
21
12
  constructor(config: ExampleRestApiAgentProviderConfig);
22
- forward(args: LLMProviderForwardArgs): Promise<ChatResponse>;
13
+ model(config?: {
14
+ model?: string;
15
+ }): CallableLLM<ExampleRestApiLLMAgentProvider>;
23
16
  }
24
17
  type ExampleRestApiAgentProviderConfig = {
25
18
  /**
@@ -1,5 +1,7 @@
1
- export * from "./abstract/llm";
2
- export * from "./abstract/provider";
1
+ export * from "./callables/callable";
2
+ export * from "./callables/llm";
3
+ export * from "./abstract";
3
4
  export * from "./mastra";
4
5
  export * from "./openai";
5
6
  export * from "./openrouter";
7
+ export * from "./ai-sdk";