peerbench 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -58
- package/dist/benchmarks/examples/echo-basic/runner.d.ts +11 -254
- package/dist/benchmarks/examples/echo-basic/schema-sets/echo.v1.d.ts +25 -25
- package/dist/benchmarks/examples/exact-match-scorer/runner.d.ts +38 -386
- package/dist/benchmarks/examples/exact-match-scorer/schema-sets/exact-match.v1.d.ts +25 -25
- package/dist/benchmarks/examples/text-transform/runner.d.ts +32 -480
- package/dist/benchmarks/examples/text-transform/schema-sets/echo.v1.d.ts +25 -25
- package/dist/benchmarks/examples/text-transform/schema-sets/reverse.v1.d.ts +25 -25
- package/dist/benchmarks/index.js +180 -248
- package/dist/benchmarks/index.js.map +1 -1
- package/dist/benchmarks/peerbench/index.d.ts +2 -1
- package/dist/benchmarks/peerbench/mcq-runner.d.ts +78 -0
- package/dist/benchmarks/peerbench/qa-runner.d.ts +77 -0
- package/dist/benchmarks/peerbench/schema-sets/mcq.v1.d.ts +25 -25
- package/dist/benchmarks/peerbench/schema-sets/multi-turn.v1.d.ts +25 -25
- package/dist/benchmarks/peerbench/schema-sets/qa.v1.d.ts +25 -25
- package/dist/chunk-6WDCU5BP.js +9 -0
- package/dist/chunk-6WDCU5BP.js.map +1 -0
- package/dist/{chunk-YY33MNMV.js → chunk-7KMGLEYP.js} +2 -2
- package/dist/{chunk-TRNCF2BG.js → chunk-HBGC6BDW.js} +1 -1
- package/dist/chunk-HBGC6BDW.js.map +1 -0
- package/dist/{chunk-HMQYGCKI.js → chunk-ZJWSK4VO.js} +1 -1
- package/dist/chunk-ZJWSK4VO.js.map +1 -0
- package/dist/dev.d.ts +22 -0
- package/dist/helpers/define-runner.d.ts +2 -45
- package/dist/index.js +2 -2
- package/dist/providers/ai-sdk.d.ts +24 -0
- package/dist/providers/callables/callable.d.ts +4 -0
- package/dist/providers/callables/llm.d.ts +41 -0
- package/dist/providers/example/echo.d.ts +12 -11
- package/dist/providers/example/restapi.d.ts +11 -18
- package/dist/providers/index.d.ts +4 -2
- package/dist/providers/index.js +380 -9
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/mastra.d.ts +16 -21
- package/dist/providers/openai.d.ts +25 -10
- package/dist/providers/openrouter.d.ts +6 -8
- package/dist/schemas/extensions/index.js +1 -1
- package/dist/schemas/extensions/response/llm.d.ts +17 -0
- package/dist/schemas/index.js +2 -2
- package/dist/schemas/llm/index.js +36 -7
- package/dist/schemas/llm/index.js.map +1 -1
- package/dist/schemas/llm/simple-system-prompt.d.ts +3 -3
- package/dist/schemas/llm/system-prompt.d.ts +7 -7
- package/dist/schemas/response.d.ts +7 -7
- package/dist/schemas/schema-definer.d.ts +5 -5
- package/dist/schemas/score.d.ts +7 -7
- package/dist/schemas/test-case.d.ts +7 -7
- package/dist/scorers/abstract.d.ts +1 -1
- package/dist/scorers/index.js +377 -7
- package/dist/scorers/index.js.map +1 -1
- package/dist/scorers/llm-judge.d.ts +6 -6
- package/dist/types/index.d.ts +0 -5
- package/dist/types/runner.d.ts +13 -17
- package/package.json +8 -7
- package/dist/benchmarks/peerbench/runner.d.ts +0 -754
- package/dist/chunk-3JHDJEY3.js +0 -374
- package/dist/chunk-3JHDJEY3.js.map +0 -1
- package/dist/chunk-HMQYGCKI.js.map +0 -1
- package/dist/chunk-Q6GSOHOP.js +0 -44
- package/dist/chunk-Q6GSOHOP.js.map +0 -1
- package/dist/chunk-RTEAK4II.js +0 -37
- package/dist/chunk-RTEAK4II.js.map +0 -1
- package/dist/chunk-SMLNDQFX.js +0 -244
- package/dist/chunk-SMLNDQFX.js.map +0 -1
- package/dist/chunk-TRNCF2BG.js.map +0 -1
- package/dist/providers/abstract/llm.d.ts +0 -20
- /package/dist/{chunk-YY33MNMV.js.map → chunk-7KMGLEYP.js.map} +0 -0
- /package/dist/providers/{abstract/provider.d.ts → abstract.d.ts} +0 -0
|
@@ -6,7 +6,7 @@ export declare const QATestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
6
6
|
schemaVersion: z.ZodNumber;
|
|
7
7
|
kind: z.ZodString;
|
|
8
8
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
9
|
-
}, "kind" | "
|
|
9
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
10
10
|
question: z.ZodString;
|
|
11
11
|
goodAnswers: z.ZodArray<z.ZodString>;
|
|
12
12
|
badAnswers: z.ZodArray<z.ZodString>;
|
|
@@ -24,7 +24,7 @@ export declare const QATestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
24
24
|
kind: "llm/qa.tc";
|
|
25
25
|
schemaVersion: 1;
|
|
26
26
|
metadata?: Record<string, unknown> | undefined;
|
|
27
|
-
}, "kind" | "
|
|
27
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
28
28
|
id: string;
|
|
29
29
|
question: string;
|
|
30
30
|
goodAnswers: string[];
|
|
@@ -43,7 +43,7 @@ export declare const QATestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
43
43
|
kind: "llm/qa.tc";
|
|
44
44
|
schemaVersion: 1;
|
|
45
45
|
metadata?: Record<string, unknown> | undefined;
|
|
46
|
-
}, "
|
|
46
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
47
47
|
id: string;
|
|
48
48
|
question: string;
|
|
49
49
|
goodAnswers: string[];
|
|
@@ -64,7 +64,7 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
64
64
|
completedAt: z.ZodNumber;
|
|
65
65
|
testCaseId: z.ZodString;
|
|
66
66
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
67
|
-
}, "kind" | "
|
|
67
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
68
68
|
data: z.ZodString;
|
|
69
69
|
modelSlug: z.ZodString;
|
|
70
70
|
provider: z.ZodString;
|
|
@@ -79,10 +79,10 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
79
79
|
schemaVersion: z.ZodLiteral<1>;
|
|
80
80
|
}, z.core.$strip> & {
|
|
81
81
|
new: (input: Omit<{
|
|
82
|
-
startedAt: number;
|
|
83
|
-
completedAt: number;
|
|
84
82
|
id: string;
|
|
85
83
|
testCaseId: string;
|
|
84
|
+
startedAt: number;
|
|
85
|
+
completedAt: number;
|
|
86
86
|
data: string;
|
|
87
87
|
modelSlug: string;
|
|
88
88
|
provider: string;
|
|
@@ -95,11 +95,11 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
95
95
|
outputTokensUsed?: number | undefined;
|
|
96
96
|
inputCost?: string | undefined;
|
|
97
97
|
outputCost?: string | undefined;
|
|
98
|
-
}, "kind" | "
|
|
99
|
-
startedAt: number;
|
|
100
|
-
completedAt: number;
|
|
98
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
101
99
|
id: string;
|
|
102
100
|
testCaseId: string;
|
|
101
|
+
startedAt: number;
|
|
102
|
+
completedAt: number;
|
|
103
103
|
data: string;
|
|
104
104
|
modelSlug: string;
|
|
105
105
|
provider: string;
|
|
@@ -114,10 +114,10 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
114
114
|
outputCost?: string | undefined;
|
|
115
115
|
};
|
|
116
116
|
newWithId(input: Omit<{
|
|
117
|
-
startedAt: number;
|
|
118
|
-
completedAt: number;
|
|
119
117
|
id: string;
|
|
120
118
|
testCaseId: string;
|
|
119
|
+
startedAt: number;
|
|
120
|
+
completedAt: number;
|
|
121
121
|
data: string;
|
|
122
122
|
modelSlug: string;
|
|
123
123
|
provider: string;
|
|
@@ -130,11 +130,11 @@ export declare const QAResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
130
130
|
outputTokensUsed?: number | undefined;
|
|
131
131
|
inputCost?: string | undefined;
|
|
132
132
|
outputCost?: string | undefined;
|
|
133
|
-
}, "
|
|
134
|
-
startedAt: number;
|
|
135
|
-
completedAt: number;
|
|
133
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
136
134
|
id: string;
|
|
137
135
|
testCaseId: string;
|
|
136
|
+
startedAt: number;
|
|
137
|
+
completedAt: number;
|
|
138
138
|
data: string;
|
|
139
139
|
modelSlug: string;
|
|
140
140
|
provider: string;
|
|
@@ -164,7 +164,7 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
164
164
|
readonly human: "human";
|
|
165
165
|
readonly algo: "algo";
|
|
166
166
|
}>;
|
|
167
|
-
}, "kind" | "
|
|
167
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
168
168
|
scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
|
|
169
169
|
scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
|
|
170
170
|
scorerAIProvider: z.ZodOptional<z.ZodString>;
|
|
@@ -180,14 +180,14 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
180
180
|
}, z.core.$strip> & {
|
|
181
181
|
new: (input: Omit<{
|
|
182
182
|
id: string;
|
|
183
|
-
value: number;
|
|
184
183
|
responseId: string;
|
|
184
|
+
value: number;
|
|
185
185
|
scoringMethod: "ai" | "human" | "algo";
|
|
186
186
|
namespace: "peerbench.ai";
|
|
187
187
|
kind: "llm/qa.sc";
|
|
188
188
|
schemaVersion: 1;
|
|
189
|
-
metadata?: Record<string, unknown> | undefined;
|
|
190
189
|
explanation?: string | undefined;
|
|
190
|
+
metadata?: Record<string, unknown> | undefined;
|
|
191
191
|
scorerAISystemPrompt?: string | undefined;
|
|
192
192
|
scorerAISystemPromptId?: string | undefined;
|
|
193
193
|
scorerAIProvider?: string | undefined;
|
|
@@ -196,16 +196,16 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
196
196
|
scorerAIOutputTokensUsed?: number | undefined;
|
|
197
197
|
scorerAIInputCost?: string | undefined;
|
|
198
198
|
scorerAIOutputCost?: string | undefined;
|
|
199
|
-
}, "kind" | "
|
|
199
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
200
200
|
id: string;
|
|
201
|
-
value: number;
|
|
202
201
|
responseId: string;
|
|
202
|
+
value: number;
|
|
203
203
|
scoringMethod: "ai" | "human" | "algo";
|
|
204
204
|
namespace: "peerbench.ai";
|
|
205
205
|
kind: "llm/qa.sc";
|
|
206
206
|
schemaVersion: 1;
|
|
207
|
-
metadata?: Record<string, unknown> | undefined;
|
|
208
207
|
explanation?: string | undefined;
|
|
208
|
+
metadata?: Record<string, unknown> | undefined;
|
|
209
209
|
scorerAISystemPrompt?: string | undefined;
|
|
210
210
|
scorerAISystemPromptId?: string | undefined;
|
|
211
211
|
scorerAIProvider?: string | undefined;
|
|
@@ -217,14 +217,14 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
217
217
|
};
|
|
218
218
|
newWithId(input: Omit<{
|
|
219
219
|
id: string;
|
|
220
|
-
value: number;
|
|
221
220
|
responseId: string;
|
|
221
|
+
value: number;
|
|
222
222
|
scoringMethod: "ai" | "human" | "algo";
|
|
223
223
|
namespace: "peerbench.ai";
|
|
224
224
|
kind: "llm/qa.sc";
|
|
225
225
|
schemaVersion: 1;
|
|
226
|
-
metadata?: Record<string, unknown> | undefined;
|
|
227
226
|
explanation?: string | undefined;
|
|
227
|
+
metadata?: Record<string, unknown> | undefined;
|
|
228
228
|
scorerAISystemPrompt?: string | undefined;
|
|
229
229
|
scorerAISystemPromptId?: string | undefined;
|
|
230
230
|
scorerAIProvider?: string | undefined;
|
|
@@ -233,16 +233,16 @@ export declare const QAScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
233
233
|
scorerAIOutputTokensUsed?: number | undefined;
|
|
234
234
|
scorerAIInputCost?: string | undefined;
|
|
235
235
|
scorerAIOutputCost?: string | undefined;
|
|
236
|
-
}, "
|
|
236
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
237
237
|
id: string;
|
|
238
|
-
value: number;
|
|
239
238
|
responseId: string;
|
|
239
|
+
value: number;
|
|
240
240
|
scoringMethod: "ai" | "human" | "algo";
|
|
241
241
|
namespace: "peerbench.ai";
|
|
242
242
|
kind: "llm/qa.sc";
|
|
243
243
|
schemaVersion: 1;
|
|
244
|
-
metadata?: Record<string, unknown> | undefined;
|
|
245
244
|
explanation?: string | undefined;
|
|
245
|
+
metadata?: Record<string, unknown> | undefined;
|
|
246
246
|
scorerAISystemPrompt?: string | undefined;
|
|
247
247
|
scorerAISystemPromptId?: string | undefined;
|
|
248
248
|
scorerAIProvider?: string | undefined;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/helpers/define-runner.ts"],"sourcesContent":["import { RunnerParams, RunnerResult } from \"@/types\";\n\nexport function defineRunner<TParams extends RunnerParams, TResult extends RunnerResult>(\n fn: (params: TParams) => Promise<TResult>\n) {\n return fn;\n}\n"],"mappings":";AAEO,SAAS,aACd,IACA;AACA,SAAO;AACT;","names":[]}
|
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
} from "./chunk-OQE6TQXZ.js";
|
|
4
4
|
import {
|
|
5
5
|
ScoringMethod
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-ZJWSK4VO.js";
|
|
7
7
|
import {
|
|
8
8
|
IdSchema
|
|
9
9
|
} from "./chunk-NUEOE3K5.js";
|
|
@@ -62,4 +62,4 @@ export {
|
|
|
62
62
|
BaseScoreSchemaV1,
|
|
63
63
|
defineScoreSchema
|
|
64
64
|
};
|
|
65
|
-
//# sourceMappingURL=chunk-
|
|
65
|
+
//# sourceMappingURL=chunk-7KMGLEYP.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/schemas/extensions/response/llm.ts","../src/schemas/extensions/score/llm-as-a-judge-scorer.ts"],"sourcesContent":["import { IdSchema } from \"@/schemas/id\";\nimport z from \"zod\";\n\n/**\n * Provides a set of fields that holds information about the LLM and its response.\n */\nexport const ExtensionLLMResponseFieldsV1 = {\n data: z.string(),\n modelSlug: z.string(),\n provider: z.string(),\n systemPromptId: IdSchema.optional(),\n\n inputTokensUsed: z.number().optional(),\n outputTokensUsed: z.number().optional(),\n inputCost: z.string().optional(),\n outputCost: z.string().optional(),\n};\n\nexport function LLMResponseFieldsV1<\n TRawDataField extends boolean = false,\n>(params?: {\n /**\n * If `true` then the `data` field (which is a `z.string()`)\n * will be included in the returned fields\n */\n withRawDataField?: TRawDataField;\n}) {\n const baseFields = {\n modelSlug: z.string(),\n provider: z.string(),\n systemPromptId: IdSchema.optional(),\n\n inputTokensUsed: z.number().optional(),\n outputTokensUsed: z.number().optional(),\n inputCost: z.string().optional(),\n outputCost: z.string().optional(),\n };\n\n const fields = {\n ...baseFields,\n data: undefined as z.ZodString | undefined,\n };\n\n if (params?.withRawDataField) {\n fields.data = z.string();\n }\n\n return fields as unknown as typeof baseFields &\n (TRawDataField extends true ? { data: z.ZodString } : {});\n}\n","import z from \"zod\";\n\n/**\n * Provides a set of fields that holds information about the LLM model\n * that was used to judge the response.\n */\nexport const ExtensionLLMAsAJudgeScoreFieldsV1 = {\n scorerAISystemPrompt: z.string().optional(),\n scorerAISystemPromptId: z.string().optional(),\n scorerAIProvider: z.string().optional(),\n scorerAIModelSlug: z.string().optional(),\n scorerAIInputTokensUsed: z.number().optional(),\n scorerAIOutputTokensUsed: z.number().optional(),\n scorerAIInputCost: z.string().optional(),\n scorerAIOutputCost: z.string().optional(),\n};\n"],"mappings":";;;;;AACA,OAAO,OAAO;AAKP,IAAM,+BAA+B;AAAA,EAC1C,MAAM,EAAE,OAAO;AAAA,EACf,WAAW,EAAE,OAAO;AAAA,EACpB,UAAU,EAAE,OAAO;AAAA,EACnB,gBAAgB,SAAS,SAAS;AAAA,EAElC,iBAAiB,EAAE,OAAO,EAAE,SAAS;AAAA,EACrC,kBAAkB,EAAE,OAAO,EAAE,SAAS;AAAA,EACtC,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC;;;AChBA,OAAOA,QAAO;AAMP,IAAM,oCAAoC;AAAA,EAC/C,sBAAsBA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC1C,wBAAwBA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC5C,kBAAkBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACtC,mBAAmBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACvC,yBAAyBA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC7C,0BAA0BA,GAAE,OAAO,EAAE,SAAS;AAAA,EAC9C,mBAAmBA,GAAE,OAAO,EAAE,SAAS;AAAA,EACvC,oBAAoBA,GAAE,OAAO,EAAE,SAAS;AAC1C;","names":["z"]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/types/index.ts"],"sourcesContent":["export * from \"./runner\";\n\nimport { IdSchema } from \"@/schemas/id\";\nimport z from \"zod\";\n\nexport type Id = z.infer<typeof IdSchema>;\n\nexport type IdGenerator<TInput = unknown> = (input: TInput) => MaybePromise<Id>;\n\nexport type MaybePromise<T> = T | Promise<T>;\n\nexport const ScoringMethod = {\n ai: \"ai\",\n human: \"human\",\n algo: \"algo\",\n} as const;\nexport type ScoringMethod = (typeof ScoringMethod)[keyof typeof ScoringMethod];\n"],"mappings":";AAWO,IAAM,gBAAgB;AAAA,EAC3B,IAAI;AAAA,EACJ,OAAO;AAAA,EACP,MAAM;AACR;","names":[]}
|
package/dist/dev.d.ts
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
type PeerBenchTestCase<TKind extends string, TSchemaVersion extends number> = {
|
|
2
|
+
id: string;
|
|
3
|
+
kind: TKind;
|
|
4
|
+
schemaVersion: TSchemaVersion;
|
|
5
|
+
[key: string]: unknown;
|
|
6
|
+
};
|
|
7
|
+
type PeerBenchResponse<TKind extends string, TSchemaVersion extends number> = {
|
|
8
|
+
id: string;
|
|
9
|
+
kind: TKind;
|
|
10
|
+
schemaVersion: TSchemaVersion;
|
|
11
|
+
testCaseId: string;
|
|
12
|
+
[key: string]: unknown;
|
|
13
|
+
};
|
|
14
|
+
type PeerBenchScore<TKind extends string, TSchemaVersion extends number> = {
|
|
15
|
+
id: string;
|
|
16
|
+
kind: TKind;
|
|
17
|
+
schemaVersion: TSchemaVersion;
|
|
18
|
+
responseId: string;
|
|
19
|
+
};
|
|
20
|
+
declare function createTestCase<TKind extends string, TSchemaVersion extends number>(kind: TKind, schemaVersion: TSchemaVersion, tc: Omit<PeerBenchTestCase<TKind, TSchemaVersion>, "kind" | "schemaVersion">): PeerBenchTestCase<TKind, TSchemaVersion>;
|
|
21
|
+
declare function createResponse<TKind extends string, TSchemaVersion extends number>(kind: TKind, schemaVersion: TSchemaVersion, rs: Omit<PeerBenchResponse<TKind, TSchemaVersion>, "kind" | "schemaVersion">): PeerBenchResponse<TKind, TSchemaVersion>;
|
|
22
|
+
declare function createScore<TKind extends string, TSchemaVersion extends number>(kind: TKind, schemaVersion: TSchemaVersion, sc: Omit<PeerBenchScore<TKind, TSchemaVersion>, "kind" | "schemaVersion">): PeerBenchScore<TKind, TSchemaVersion>;
|
|
@@ -1,45 +1,2 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
export declare function defineRunner<const TProviders extends ProviderCtor[], const TScorers extends ScorerCtor[], const TSchemaSets extends SchemaSetDefinition[], const TRunConfigSchema extends z.ZodRawShape = {}>(config: {
|
|
4
|
-
schemaSets: TSchemaSets;
|
|
5
|
-
providers: TProviders;
|
|
6
|
-
scorers: TScorers;
|
|
7
|
-
runConfigSchema?: TRunConfigSchema;
|
|
8
|
-
/**
|
|
9
|
-
* @default true
|
|
10
|
-
*/
|
|
11
|
-
parseRunConfig?: boolean;
|
|
12
|
-
defaults?: {
|
|
13
|
-
scorer?: InstanceType<TScorers[number]>;
|
|
14
|
-
responseIdGenerator?: IdGenerator;
|
|
15
|
-
scoreIdGenerator?: IdGenerator;
|
|
16
|
-
};
|
|
17
|
-
}, fn: Runner<TSchemaSets[number]["testCase"], TSchemaSets[number]["response"], TSchemaSets[number]["score"], InstanceType<TProviders[number]>, InstanceType<TScorers[number]>, InferRunConfig<TRunConfigSchema>>): ((params: Parameters<typeof fn>[0]) => Promise<{
|
|
18
|
-
response: z.core.output<TSchemaSets[number]["response"]>;
|
|
19
|
-
score?: z.core.output<TSchemaSets[number]["score"]> | undefined;
|
|
20
|
-
}>) & {
|
|
21
|
-
/**
|
|
22
|
-
* The configuration that was used to define the runner.
|
|
23
|
-
*/
|
|
24
|
-
config: {
|
|
25
|
-
runConfigSchema: z.ZodObject<{ -readonly [P in keyof TRunConfigSchema]: TRunConfigSchema[P]; }, z.core.$strip>;
|
|
26
|
-
schemaSets: TSchemaSets;
|
|
27
|
-
providers: TProviders;
|
|
28
|
-
scorers: TScorers;
|
|
29
|
-
/**
|
|
30
|
-
* @default true
|
|
31
|
-
*/
|
|
32
|
-
parseRunConfig?: boolean;
|
|
33
|
-
defaults?: {
|
|
34
|
-
scorer?: InstanceType<TScorers[number]>;
|
|
35
|
-
responseIdGenerator?: IdGenerator;
|
|
36
|
-
scoreIdGenerator?: IdGenerator;
|
|
37
|
-
};
|
|
38
|
-
};
|
|
39
|
-
};
|
|
40
|
-
type SchemaSetDefinition<TTestCase extends z.ZodObject = z.ZodObject, TResponse extends z.ZodObject = z.ZodObject, TScore extends z.ZodObject = z.ZodObject> = {
|
|
41
|
-
testCase: TTestCase;
|
|
42
|
-
response: TResponse;
|
|
43
|
-
score: TScore;
|
|
44
|
-
};
|
|
45
|
-
export {};
|
|
1
|
+
import { RunnerParams, RunnerResult } from "../types";
|
|
2
|
+
export declare function defineRunner<TParams extends RunnerParams, TResult extends RunnerResult>(fn: (params: TParams) => Promise<TResult>): (params: TParams) => Promise<TResult>;
|
package/dist/index.js
CHANGED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { LanguageModelV3 } from "@ai-sdk/provider";
|
|
2
|
+
import { RateLimiter } from "../utils";
|
|
3
|
+
import { AbstractProvider } from "./abstract";
|
|
4
|
+
import { type CallableLLM } from "./callables/llm";
|
|
5
|
+
declare const AISdkProvider_base: (new () => AbstractProvider & {
|
|
6
|
+
readonly kind: "peerbench.ai/llm/ai-sdk";
|
|
7
|
+
}) & {
|
|
8
|
+
readonly kind: "peerbench.ai/llm/ai-sdk";
|
|
9
|
+
};
|
|
10
|
+
export declare class AISdkProvider extends AISdkProvider_base {
|
|
11
|
+
private rateLimiter;
|
|
12
|
+
private maxRetries;
|
|
13
|
+
constructor(config?: AISdkProviderConfig);
|
|
14
|
+
model(config: AISdkModelConfig): CallableLLM<AISdkProvider>;
|
|
15
|
+
}
|
|
16
|
+
type AISdkProviderConfig = {
|
|
17
|
+
rateLimiter?: RateLimiter;
|
|
18
|
+
maxRetries?: number;
|
|
19
|
+
};
|
|
20
|
+
type AISdkModelConfig = {
|
|
21
|
+
aiSdkModel: LanguageModelV3;
|
|
22
|
+
stream?: boolean;
|
|
23
|
+
};
|
|
24
|
+
export {};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
|
|
2
|
+
import { ResponseFormatJSONObject, ResponseFormatJSONSchema, ResponseFormatText } from "openai/resources/shared";
|
|
3
|
+
import { AbstractProvider, ProviderResponse } from "../abstract";
|
|
4
|
+
import { Callable } from "./callable";
|
|
5
|
+
export interface CallableLLM<TProvider extends AbstractProvider = AbstractProvider> extends Callable<TProvider> {
|
|
6
|
+
slug: string;
|
|
7
|
+
forward(args: CallableLLMForwardArgs): Promise<LLMResponse>;
|
|
8
|
+
}
|
|
9
|
+
export type CallableLLMForwardArgs = {
|
|
10
|
+
messages: ChatCompletionMessageParam[];
|
|
11
|
+
abortSignal?: AbortSignal;
|
|
12
|
+
maxTokens?: number;
|
|
13
|
+
temperature?: number;
|
|
14
|
+
responseFormat?: ResponseFormatText | ResponseFormatJSONSchema | ResponseFormatJSONObject;
|
|
15
|
+
};
|
|
16
|
+
export type LLMResponse = ProviderResponse<string> & {
|
|
17
|
+
/**
|
|
18
|
+
* Number of input tokens used.
|
|
19
|
+
*/
|
|
20
|
+
inputTokensUsed?: number;
|
|
21
|
+
/**
|
|
22
|
+
* Number of output tokens used.
|
|
23
|
+
*/
|
|
24
|
+
outputTokensUsed?: number;
|
|
25
|
+
/**
|
|
26
|
+
* Cost of the input tokens.
|
|
27
|
+
*/
|
|
28
|
+
inputCost?: string;
|
|
29
|
+
/**
|
|
30
|
+
* Cost of the output tokens.
|
|
31
|
+
*/
|
|
32
|
+
outputCost?: string;
|
|
33
|
+
/**
|
|
34
|
+
* Time taken to receive the first token.
|
|
35
|
+
*/
|
|
36
|
+
timeToFirstToken?: number;
|
|
37
|
+
/**
|
|
38
|
+
* Additional metadata
|
|
39
|
+
*/
|
|
40
|
+
metadata?: Record<string, unknown>;
|
|
41
|
+
};
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
import { AbstractProvider } from "../abstract";
|
|
2
|
+
import { type CallableLLM } from "../callables/llm";
|
|
3
|
+
declare const ExampleEchoLLMProvider_base: (new () => AbstractProvider & {
|
|
4
|
+
readonly kind: "example.echo";
|
|
5
|
+
}) & {
|
|
6
|
+
readonly kind: "example.echo";
|
|
7
|
+
};
|
|
8
|
+
export declare class ExampleEchoLLMProvider extends ExampleEchoLLMProvider_base {
|
|
9
|
+
model(config?: {
|
|
10
|
+
model?: string;
|
|
11
|
+
}): CallableLLM<ExampleEchoLLMProvider>;
|
|
12
12
|
}
|
|
13
|
+
export {};
|
|
@@ -1,25 +1,18 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
* In the SDK we still want a clean abstraction, so we model that REST API as an `AbstractLLMProvider`.
|
|
10
|
-
* The runner (or host app) still passes `messages + model`, and the provider still returns one final string.
|
|
11
|
-
*
|
|
12
|
-
* If you’re implementing your own provider, this is the only part that matters: translate
|
|
13
|
-
* `LLMProviderForwardArgs` into your HTTP request, then translate your HTTP response back into
|
|
14
|
-
* `ChatResponse`.
|
|
15
|
-
*/
|
|
16
|
-
export declare class ExampleRestApiLLMAgentProvider extends AbstractLLMProvider {
|
|
17
|
-
readonly kind = "example.restapi.agent";
|
|
1
|
+
import { AbstractProvider } from "../abstract";
|
|
2
|
+
import { type CallableLLM } from "../callables/llm";
|
|
3
|
+
declare const ExampleRestApiLLMAgentProvider_base: (new () => AbstractProvider & {
|
|
4
|
+
readonly kind: "example.rest-api.agent";
|
|
5
|
+
}) & {
|
|
6
|
+
readonly kind: "example.rest-api.agent";
|
|
7
|
+
};
|
|
8
|
+
export declare class ExampleRestApiLLMAgentProvider extends ExampleRestApiLLMAgentProvider_base {
|
|
18
9
|
private readonly baseUrl;
|
|
19
10
|
private readonly apiKey?;
|
|
20
11
|
private readonly headers?;
|
|
21
12
|
constructor(config: ExampleRestApiAgentProviderConfig);
|
|
22
|
-
|
|
13
|
+
model(config?: {
|
|
14
|
+
model?: string;
|
|
15
|
+
}): CallableLLM<ExampleRestApiLLMAgentProvider>;
|
|
23
16
|
}
|
|
24
17
|
type ExampleRestApiAgentProviderConfig = {
|
|
25
18
|
/**
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
export * from "./
|
|
2
|
-
export * from "./
|
|
1
|
+
export * from "./callables/callable";
|
|
2
|
+
export * from "./callables/llm";
|
|
3
|
+
export * from "./abstract";
|
|
3
4
|
export * from "./mastra";
|
|
4
5
|
export * from "./openai";
|
|
5
6
|
export * from "./openrouter";
|
|
7
|
+
export * from "./ai-sdk";
|