peerbench 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -58
- package/dist/benchmarks/examples/echo-basic/runner.d.ts +11 -254
- package/dist/benchmarks/examples/echo-basic/schema-sets/echo.v1.d.ts +25 -25
- package/dist/benchmarks/examples/exact-match-scorer/runner.d.ts +38 -386
- package/dist/benchmarks/examples/exact-match-scorer/schema-sets/exact-match.v1.d.ts +25 -25
- package/dist/benchmarks/examples/text-transform/runner.d.ts +32 -480
- package/dist/benchmarks/examples/text-transform/schema-sets/echo.v1.d.ts +25 -25
- package/dist/benchmarks/examples/text-transform/schema-sets/reverse.v1.d.ts +25 -25
- package/dist/benchmarks/index.js +180 -248
- package/dist/benchmarks/index.js.map +1 -1
- package/dist/benchmarks/peerbench/index.d.ts +2 -1
- package/dist/benchmarks/peerbench/mcq-runner.d.ts +78 -0
- package/dist/benchmarks/peerbench/qa-runner.d.ts +77 -0
- package/dist/benchmarks/peerbench/schema-sets/mcq.v1.d.ts +25 -25
- package/dist/benchmarks/peerbench/schema-sets/multi-turn.v1.d.ts +25 -25
- package/dist/benchmarks/peerbench/schema-sets/qa.v1.d.ts +25 -25
- package/dist/chunk-6WDCU5BP.js +9 -0
- package/dist/chunk-6WDCU5BP.js.map +1 -0
- package/dist/{chunk-YY33MNMV.js → chunk-7KMGLEYP.js} +2 -2
- package/dist/{chunk-TRNCF2BG.js → chunk-HBGC6BDW.js} +1 -1
- package/dist/chunk-HBGC6BDW.js.map +1 -0
- package/dist/{chunk-HMQYGCKI.js → chunk-ZJWSK4VO.js} +1 -1
- package/dist/chunk-ZJWSK4VO.js.map +1 -0
- package/dist/dev.d.ts +22 -0
- package/dist/helpers/define-runner.d.ts +2 -45
- package/dist/index.js +2 -2
- package/dist/providers/ai-sdk.d.ts +24 -0
- package/dist/providers/callables/callable.d.ts +4 -0
- package/dist/providers/callables/llm.d.ts +41 -0
- package/dist/providers/example/echo.d.ts +12 -11
- package/dist/providers/example/restapi.d.ts +11 -18
- package/dist/providers/index.d.ts +4 -2
- package/dist/providers/index.js +380 -9
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/mastra.d.ts +16 -21
- package/dist/providers/openai.d.ts +25 -10
- package/dist/providers/openrouter.d.ts +6 -8
- package/dist/schemas/extensions/index.js +1 -1
- package/dist/schemas/extensions/response/llm.d.ts +17 -0
- package/dist/schemas/index.js +2 -2
- package/dist/schemas/llm/index.js +36 -7
- package/dist/schemas/llm/index.js.map +1 -1
- package/dist/schemas/llm/simple-system-prompt.d.ts +3 -3
- package/dist/schemas/llm/system-prompt.d.ts +7 -7
- package/dist/schemas/response.d.ts +7 -7
- package/dist/schemas/schema-definer.d.ts +5 -5
- package/dist/schemas/score.d.ts +7 -7
- package/dist/schemas/test-case.d.ts +7 -7
- package/dist/scorers/abstract.d.ts +1 -1
- package/dist/scorers/index.js +377 -7
- package/dist/scorers/index.js.map +1 -1
- package/dist/scorers/llm-judge.d.ts +6 -6
- package/dist/types/index.d.ts +0 -5
- package/dist/types/runner.d.ts +13 -17
- package/package.json +8 -7
- package/dist/benchmarks/peerbench/runner.d.ts +0 -754
- package/dist/chunk-3JHDJEY3.js +0 -374
- package/dist/chunk-3JHDJEY3.js.map +0 -1
- package/dist/chunk-HMQYGCKI.js.map +0 -1
- package/dist/chunk-Q6GSOHOP.js +0 -44
- package/dist/chunk-Q6GSOHOP.js.map +0 -1
- package/dist/chunk-RTEAK4II.js +0 -37
- package/dist/chunk-RTEAK4II.js.map +0 -1
- package/dist/chunk-SMLNDQFX.js +0 -244
- package/dist/chunk-SMLNDQFX.js.map +0 -1
- package/dist/chunk-TRNCF2BG.js.map +0 -1
- package/dist/providers/abstract/llm.d.ts +0 -20
- /package/dist/{chunk-YY33MNMV.js.map → chunk-7KMGLEYP.js.map} +0 -0
- /package/dist/providers/{abstract/provider.d.ts → abstract.d.ts} +0 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { CallableLLM } from "../../providers/index.js";
|
|
2
|
+
import { SimpleSystemPromptV1 } from "../../schemas/llm/index.js";
|
|
3
|
+
import { LLMAsAJudgeScorer, MCQScorer } from "../../scorers/index.js";
|
|
4
|
+
import { IdGenerator } from "../../types";
|
|
5
|
+
import z from "zod";
|
|
6
|
+
import { MCQTestCaseV1 } from "./schema-sets/mcq.v1";
|
|
7
|
+
export declare const mcqRunner: (params: {
|
|
8
|
+
testCase: MCQTestCaseV1;
|
|
9
|
+
target: CallableLLM;
|
|
10
|
+
scorer?: MCQScorer | LLMAsAJudgeScorer;
|
|
11
|
+
systemPrompt?: SimpleSystemPromptV1;
|
|
12
|
+
llmJudgeSystemPrompt?: SimpleSystemPromptV1;
|
|
13
|
+
llmJudgeFieldsToExtract?: Record<string, z.ZodType>;
|
|
14
|
+
templateVariables?: Record<string, string>;
|
|
15
|
+
idGenerators?: {
|
|
16
|
+
response?: IdGenerator;
|
|
17
|
+
score?: IdGenerator;
|
|
18
|
+
};
|
|
19
|
+
}) => Promise<{
|
|
20
|
+
response: {
|
|
21
|
+
id: string;
|
|
22
|
+
testCaseId: string;
|
|
23
|
+
startedAt: number;
|
|
24
|
+
completedAt: number;
|
|
25
|
+
data: string;
|
|
26
|
+
modelSlug: string;
|
|
27
|
+
provider: string;
|
|
28
|
+
namespace: "peerbench.ai";
|
|
29
|
+
kind: "llm/mcq.rs";
|
|
30
|
+
schemaVersion: 1;
|
|
31
|
+
metadata?: Record<string, unknown> | undefined;
|
|
32
|
+
systemPromptId?: string | undefined;
|
|
33
|
+
inputTokensUsed?: number | undefined;
|
|
34
|
+
outputTokensUsed?: number | undefined;
|
|
35
|
+
inputCost?: string | undefined;
|
|
36
|
+
outputCost?: string | undefined;
|
|
37
|
+
};
|
|
38
|
+
score: {
|
|
39
|
+
id: string;
|
|
40
|
+
responseId: string;
|
|
41
|
+
value: number;
|
|
42
|
+
scoringMethod: "ai" | "human" | "algo";
|
|
43
|
+
extractedAnswers: string[];
|
|
44
|
+
namespace: "peerbench.ai";
|
|
45
|
+
kind: "llm/mcq.sc";
|
|
46
|
+
schemaVersion: 1;
|
|
47
|
+
explanation?: string | undefined;
|
|
48
|
+
metadata?: Record<string, unknown> | undefined;
|
|
49
|
+
scorerAISystemPrompt?: string | undefined;
|
|
50
|
+
scorerAISystemPromptId?: string | undefined;
|
|
51
|
+
scorerAIProvider?: string | undefined;
|
|
52
|
+
scorerAIModelSlug?: string | undefined;
|
|
53
|
+
scorerAIInputTokensUsed?: number | undefined;
|
|
54
|
+
scorerAIOutputTokensUsed?: number | undefined;
|
|
55
|
+
scorerAIInputCost?: string | undefined;
|
|
56
|
+
scorerAIOutputCost?: string | undefined;
|
|
57
|
+
};
|
|
58
|
+
} | {
|
|
59
|
+
response: {
|
|
60
|
+
id: string;
|
|
61
|
+
testCaseId: string;
|
|
62
|
+
startedAt: number;
|
|
63
|
+
completedAt: number;
|
|
64
|
+
data: string;
|
|
65
|
+
modelSlug: string;
|
|
66
|
+
provider: string;
|
|
67
|
+
namespace: "peerbench.ai";
|
|
68
|
+
kind: "llm/mcq.rs";
|
|
69
|
+
schemaVersion: 1;
|
|
70
|
+
metadata?: Record<string, unknown> | undefined;
|
|
71
|
+
systemPromptId?: string | undefined;
|
|
72
|
+
inputTokensUsed?: number | undefined;
|
|
73
|
+
outputTokensUsed?: number | undefined;
|
|
74
|
+
inputCost?: string | undefined;
|
|
75
|
+
outputCost?: string | undefined;
|
|
76
|
+
};
|
|
77
|
+
score?: undefined;
|
|
78
|
+
}>;
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { CallableLLM } from "../../providers/index.js";
|
|
2
|
+
import { SimpleSystemPromptV1 } from "../../schemas/llm/index.js";
|
|
3
|
+
import { LLMAsAJudgeScorer } from "../../scorers/index.js";
|
|
4
|
+
import { IdGenerator } from "../../types";
|
|
5
|
+
import z from "zod";
|
|
6
|
+
import { QATestCaseV1 } from "./schema-sets/qa.v1";
|
|
7
|
+
export declare const qaRunner: (params: {
|
|
8
|
+
testCase: QATestCaseV1;
|
|
9
|
+
target: CallableLLM;
|
|
10
|
+
scorer?: LLMAsAJudgeScorer;
|
|
11
|
+
systemPrompt?: SimpleSystemPromptV1;
|
|
12
|
+
llmJudgeSystemPrompt?: SimpleSystemPromptV1;
|
|
13
|
+
llmJudgeFieldsToExtract?: Record<string, z.ZodType>;
|
|
14
|
+
templateVariables?: Record<string, string>;
|
|
15
|
+
idGenerators?: {
|
|
16
|
+
response?: IdGenerator;
|
|
17
|
+
score?: IdGenerator;
|
|
18
|
+
};
|
|
19
|
+
}) => Promise<{
|
|
20
|
+
response: {
|
|
21
|
+
id: string;
|
|
22
|
+
testCaseId: string;
|
|
23
|
+
startedAt: number;
|
|
24
|
+
completedAt: number;
|
|
25
|
+
data: string;
|
|
26
|
+
modelSlug: string;
|
|
27
|
+
provider: string;
|
|
28
|
+
namespace: "peerbench.ai";
|
|
29
|
+
kind: "llm/qa.rs";
|
|
30
|
+
schemaVersion: 1;
|
|
31
|
+
metadata?: Record<string, unknown> | undefined;
|
|
32
|
+
systemPromptId?: string | undefined;
|
|
33
|
+
inputTokensUsed?: number | undefined;
|
|
34
|
+
outputTokensUsed?: number | undefined;
|
|
35
|
+
inputCost?: string | undefined;
|
|
36
|
+
outputCost?: string | undefined;
|
|
37
|
+
};
|
|
38
|
+
score: {
|
|
39
|
+
id: string;
|
|
40
|
+
responseId: string;
|
|
41
|
+
value: number;
|
|
42
|
+
scoringMethod: "ai" | "human" | "algo";
|
|
43
|
+
namespace: "peerbench.ai";
|
|
44
|
+
kind: "llm/qa.sc";
|
|
45
|
+
schemaVersion: 1;
|
|
46
|
+
explanation?: string | undefined;
|
|
47
|
+
metadata?: Record<string, unknown> | undefined;
|
|
48
|
+
scorerAISystemPrompt?: string | undefined;
|
|
49
|
+
scorerAISystemPromptId?: string | undefined;
|
|
50
|
+
scorerAIProvider?: string | undefined;
|
|
51
|
+
scorerAIModelSlug?: string | undefined;
|
|
52
|
+
scorerAIInputTokensUsed?: number | undefined;
|
|
53
|
+
scorerAIOutputTokensUsed?: number | undefined;
|
|
54
|
+
scorerAIInputCost?: string | undefined;
|
|
55
|
+
scorerAIOutputCost?: string | undefined;
|
|
56
|
+
};
|
|
57
|
+
} | {
|
|
58
|
+
response: {
|
|
59
|
+
id: string;
|
|
60
|
+
testCaseId: string;
|
|
61
|
+
startedAt: number;
|
|
62
|
+
completedAt: number;
|
|
63
|
+
data: string;
|
|
64
|
+
modelSlug: string;
|
|
65
|
+
provider: string;
|
|
66
|
+
namespace: "peerbench.ai";
|
|
67
|
+
kind: "llm/qa.rs";
|
|
68
|
+
schemaVersion: 1;
|
|
69
|
+
metadata?: Record<string, unknown> | undefined;
|
|
70
|
+
systemPromptId?: string | undefined;
|
|
71
|
+
inputTokensUsed?: number | undefined;
|
|
72
|
+
outputTokensUsed?: number | undefined;
|
|
73
|
+
inputCost?: string | undefined;
|
|
74
|
+
outputCost?: string | undefined;
|
|
75
|
+
};
|
|
76
|
+
score?: undefined;
|
|
77
|
+
}>;
|
|
@@ -6,7 +6,7 @@ export declare const MCQTestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
6
6
|
schemaVersion: z.ZodNumber;
|
|
7
7
|
kind: z.ZodString;
|
|
8
8
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
9
|
-
}, "kind" | "
|
|
9
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
10
10
|
question: z.ZodString;
|
|
11
11
|
options: z.ZodRecord<z.ZodString, z.ZodString>;
|
|
12
12
|
correctAnswerKeys: z.ZodArray<z.ZodString>;
|
|
@@ -24,7 +24,7 @@ export declare const MCQTestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
24
24
|
kind: "llm/mcq.tc";
|
|
25
25
|
schemaVersion: 1;
|
|
26
26
|
metadata?: Record<string, unknown> | undefined;
|
|
27
|
-
}, "kind" | "
|
|
27
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
28
28
|
id: string;
|
|
29
29
|
question: string;
|
|
30
30
|
options: Record<string, string>;
|
|
@@ -43,7 +43,7 @@ export declare const MCQTestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
43
43
|
kind: "llm/mcq.tc";
|
|
44
44
|
schemaVersion: 1;
|
|
45
45
|
metadata?: Record<string, unknown> | undefined;
|
|
46
|
-
}, "
|
|
46
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
47
47
|
id: string;
|
|
48
48
|
question: string;
|
|
49
49
|
options: Record<string, string>;
|
|
@@ -64,7 +64,7 @@ export declare const MCQResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
64
64
|
completedAt: z.ZodNumber;
|
|
65
65
|
testCaseId: z.ZodString;
|
|
66
66
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
67
|
-
}, "kind" | "
|
|
67
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
68
68
|
data: z.ZodString;
|
|
69
69
|
modelSlug: z.ZodString;
|
|
70
70
|
provider: z.ZodString;
|
|
@@ -79,10 +79,10 @@ export declare const MCQResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
79
79
|
schemaVersion: z.ZodLiteral<1>;
|
|
80
80
|
}, z.core.$strip> & {
|
|
81
81
|
new: (input: Omit<{
|
|
82
|
-
startedAt: number;
|
|
83
|
-
completedAt: number;
|
|
84
82
|
id: string;
|
|
85
83
|
testCaseId: string;
|
|
84
|
+
startedAt: number;
|
|
85
|
+
completedAt: number;
|
|
86
86
|
data: string;
|
|
87
87
|
modelSlug: string;
|
|
88
88
|
provider: string;
|
|
@@ -95,11 +95,11 @@ export declare const MCQResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
95
95
|
outputTokensUsed?: number | undefined;
|
|
96
96
|
inputCost?: string | undefined;
|
|
97
97
|
outputCost?: string | undefined;
|
|
98
|
-
}, "kind" | "
|
|
99
|
-
startedAt: number;
|
|
100
|
-
completedAt: number;
|
|
98
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
101
99
|
id: string;
|
|
102
100
|
testCaseId: string;
|
|
101
|
+
startedAt: number;
|
|
102
|
+
completedAt: number;
|
|
103
103
|
data: string;
|
|
104
104
|
modelSlug: string;
|
|
105
105
|
provider: string;
|
|
@@ -114,10 +114,10 @@ export declare const MCQResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
114
114
|
outputCost?: string | undefined;
|
|
115
115
|
};
|
|
116
116
|
newWithId(input: Omit<{
|
|
117
|
-
startedAt: number;
|
|
118
|
-
completedAt: number;
|
|
119
117
|
id: string;
|
|
120
118
|
testCaseId: string;
|
|
119
|
+
startedAt: number;
|
|
120
|
+
completedAt: number;
|
|
121
121
|
data: string;
|
|
122
122
|
modelSlug: string;
|
|
123
123
|
provider: string;
|
|
@@ -130,11 +130,11 @@ export declare const MCQResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
130
130
|
outputTokensUsed?: number | undefined;
|
|
131
131
|
inputCost?: string | undefined;
|
|
132
132
|
outputCost?: string | undefined;
|
|
133
|
-
}, "
|
|
134
|
-
startedAt: number;
|
|
135
|
-
completedAt: number;
|
|
133
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
136
134
|
id: string;
|
|
137
135
|
testCaseId: string;
|
|
136
|
+
startedAt: number;
|
|
137
|
+
completedAt: number;
|
|
138
138
|
data: string;
|
|
139
139
|
modelSlug: string;
|
|
140
140
|
provider: string;
|
|
@@ -164,7 +164,7 @@ export declare const MCQScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
164
164
|
readonly human: "human";
|
|
165
165
|
readonly algo: "algo";
|
|
166
166
|
}>;
|
|
167
|
-
}, "kind" | "
|
|
167
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
168
168
|
extractedAnswers: z.ZodArray<z.ZodString>;
|
|
169
169
|
scorerAISystemPrompt: z.ZodOptional<z.ZodString>;
|
|
170
170
|
scorerAISystemPromptId: z.ZodOptional<z.ZodString>;
|
|
@@ -181,15 +181,15 @@ export declare const MCQScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
181
181
|
}, z.core.$strip> & {
|
|
182
182
|
new: (input: Omit<{
|
|
183
183
|
id: string;
|
|
184
|
-
value: number;
|
|
185
184
|
responseId: string;
|
|
185
|
+
value: number;
|
|
186
186
|
scoringMethod: "ai" | "human" | "algo";
|
|
187
187
|
extractedAnswers: string[];
|
|
188
188
|
namespace: "peerbench.ai";
|
|
189
189
|
kind: "llm/mcq.sc";
|
|
190
190
|
schemaVersion: 1;
|
|
191
|
-
metadata?: Record<string, unknown> | undefined;
|
|
192
191
|
explanation?: string | undefined;
|
|
192
|
+
metadata?: Record<string, unknown> | undefined;
|
|
193
193
|
scorerAISystemPrompt?: string | undefined;
|
|
194
194
|
scorerAISystemPromptId?: string | undefined;
|
|
195
195
|
scorerAIProvider?: string | undefined;
|
|
@@ -198,17 +198,17 @@ export declare const MCQScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
198
198
|
scorerAIOutputTokensUsed?: number | undefined;
|
|
199
199
|
scorerAIInputCost?: string | undefined;
|
|
200
200
|
scorerAIOutputCost?: string | undefined;
|
|
201
|
-
}, "kind" | "
|
|
201
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
202
202
|
id: string;
|
|
203
|
-
value: number;
|
|
204
203
|
responseId: string;
|
|
204
|
+
value: number;
|
|
205
205
|
scoringMethod: "ai" | "human" | "algo";
|
|
206
206
|
extractedAnswers: string[];
|
|
207
207
|
namespace: "peerbench.ai";
|
|
208
208
|
kind: "llm/mcq.sc";
|
|
209
209
|
schemaVersion: 1;
|
|
210
|
-
metadata?: Record<string, unknown> | undefined;
|
|
211
210
|
explanation?: string | undefined;
|
|
211
|
+
metadata?: Record<string, unknown> | undefined;
|
|
212
212
|
scorerAISystemPrompt?: string | undefined;
|
|
213
213
|
scorerAISystemPromptId?: string | undefined;
|
|
214
214
|
scorerAIProvider?: string | undefined;
|
|
@@ -220,15 +220,15 @@ export declare const MCQScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
220
220
|
};
|
|
221
221
|
newWithId(input: Omit<{
|
|
222
222
|
id: string;
|
|
223
|
-
value: number;
|
|
224
223
|
responseId: string;
|
|
224
|
+
value: number;
|
|
225
225
|
scoringMethod: "ai" | "human" | "algo";
|
|
226
226
|
extractedAnswers: string[];
|
|
227
227
|
namespace: "peerbench.ai";
|
|
228
228
|
kind: "llm/mcq.sc";
|
|
229
229
|
schemaVersion: 1;
|
|
230
|
-
metadata?: Record<string, unknown> | undefined;
|
|
231
230
|
explanation?: string | undefined;
|
|
231
|
+
metadata?: Record<string, unknown> | undefined;
|
|
232
232
|
scorerAISystemPrompt?: string | undefined;
|
|
233
233
|
scorerAISystemPromptId?: string | undefined;
|
|
234
234
|
scorerAIProvider?: string | undefined;
|
|
@@ -237,17 +237,17 @@ export declare const MCQScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
237
237
|
scorerAIOutputTokensUsed?: number | undefined;
|
|
238
238
|
scorerAIInputCost?: string | undefined;
|
|
239
239
|
scorerAIOutputCost?: string | undefined;
|
|
240
|
-
}, "
|
|
240
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
241
241
|
id: string;
|
|
242
|
-
value: number;
|
|
243
242
|
responseId: string;
|
|
243
|
+
value: number;
|
|
244
244
|
scoringMethod: "ai" | "human" | "algo";
|
|
245
245
|
extractedAnswers: string[];
|
|
246
246
|
namespace: "peerbench.ai";
|
|
247
247
|
kind: "llm/mcq.sc";
|
|
248
248
|
schemaVersion: 1;
|
|
249
|
-
metadata?: Record<string, unknown> | undefined;
|
|
250
249
|
explanation?: string | undefined;
|
|
250
|
+
metadata?: Record<string, unknown> | undefined;
|
|
251
251
|
scorerAISystemPrompt?: string | undefined;
|
|
252
252
|
scorerAISystemPromptId?: string | undefined;
|
|
253
253
|
scorerAIProvider?: string | undefined;
|
|
@@ -6,7 +6,7 @@ export declare const MultiTurnTestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
6
6
|
schemaVersion: z.ZodNumber;
|
|
7
7
|
kind: z.ZodString;
|
|
8
8
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
9
|
-
}, "kind" | "
|
|
9
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
10
10
|
messages: z.ZodArray<z.ZodObject<{
|
|
11
11
|
role: z.ZodString;
|
|
12
12
|
content: z.ZodString;
|
|
@@ -34,7 +34,7 @@ export declare const MultiTurnTestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
34
34
|
metadata?: Record<string, unknown> | undefined;
|
|
35
35
|
maxTurns?: number | undefined;
|
|
36
36
|
expectedOutcome?: string | undefined;
|
|
37
|
-
}, "kind" | "
|
|
37
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
38
38
|
id: string;
|
|
39
39
|
messages: {
|
|
40
40
|
role: string;
|
|
@@ -63,7 +63,7 @@ export declare const MultiTurnTestCaseSchemaV1: z.ZodObject<Omit<{
|
|
|
63
63
|
metadata?: Record<string, unknown> | undefined;
|
|
64
64
|
maxTurns?: number | undefined;
|
|
65
65
|
expectedOutcome?: string | undefined;
|
|
66
|
-
}, "
|
|
66
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
67
67
|
id: string;
|
|
68
68
|
messages: {
|
|
69
69
|
role: string;
|
|
@@ -89,7 +89,7 @@ export declare const MultiTurnResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
89
89
|
completedAt: z.ZodNumber;
|
|
90
90
|
testCaseId: z.ZodString;
|
|
91
91
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
92
|
-
}, "kind" | "
|
|
92
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
93
93
|
replies: z.ZodArray<z.ZodObject<{
|
|
94
94
|
messageIndex: z.ZodNumber;
|
|
95
95
|
startedAt: z.ZodNumber;
|
|
@@ -114,10 +114,10 @@ export declare const MultiTurnResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
114
114
|
schemaVersion: z.ZodLiteral<1>;
|
|
115
115
|
}, z.core.$strip> & {
|
|
116
116
|
new: (input: Omit<{
|
|
117
|
-
startedAt: number;
|
|
118
|
-
completedAt: number;
|
|
119
117
|
id: string;
|
|
120
118
|
testCaseId: string;
|
|
119
|
+
startedAt: number;
|
|
120
|
+
completedAt: number;
|
|
121
121
|
replies: {
|
|
122
122
|
messageIndex: number;
|
|
123
123
|
startedAt: number;
|
|
@@ -140,11 +140,11 @@ export declare const MultiTurnResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
140
140
|
outputTokensUsed?: number | undefined;
|
|
141
141
|
inputCost?: string | undefined;
|
|
142
142
|
outputCost?: string | undefined;
|
|
143
|
-
}, "kind" | "
|
|
144
|
-
startedAt: number;
|
|
145
|
-
completedAt: number;
|
|
143
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
146
144
|
id: string;
|
|
147
145
|
testCaseId: string;
|
|
146
|
+
startedAt: number;
|
|
147
|
+
completedAt: number;
|
|
148
148
|
replies: {
|
|
149
149
|
messageIndex: number;
|
|
150
150
|
startedAt: number;
|
|
@@ -169,10 +169,10 @@ export declare const MultiTurnResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
169
169
|
outputCost?: string | undefined;
|
|
170
170
|
};
|
|
171
171
|
newWithId(input: Omit<{
|
|
172
|
-
startedAt: number;
|
|
173
|
-
completedAt: number;
|
|
174
172
|
id: string;
|
|
175
173
|
testCaseId: string;
|
|
174
|
+
startedAt: number;
|
|
175
|
+
completedAt: number;
|
|
176
176
|
replies: {
|
|
177
177
|
messageIndex: number;
|
|
178
178
|
startedAt: number;
|
|
@@ -195,11 +195,11 @@ export declare const MultiTurnResponseSchemaV1: z.ZodObject<Omit<{
|
|
|
195
195
|
outputTokensUsed?: number | undefined;
|
|
196
196
|
inputCost?: string | undefined;
|
|
197
197
|
outputCost?: string | undefined;
|
|
198
|
-
}, "
|
|
199
|
-
startedAt: number;
|
|
200
|
-
completedAt: number;
|
|
198
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
201
199
|
id: string;
|
|
202
200
|
testCaseId: string;
|
|
201
|
+
startedAt: number;
|
|
202
|
+
completedAt: number;
|
|
203
203
|
replies: {
|
|
204
204
|
messageIndex: number;
|
|
205
205
|
startedAt: number;
|
|
@@ -239,7 +239,7 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
239
239
|
readonly human: "human";
|
|
240
240
|
readonly algo: "algo";
|
|
241
241
|
}>;
|
|
242
|
-
}, "kind" | "
|
|
242
|
+
}, "kind" | "schemaVersion" | "namespace"> & {
|
|
243
243
|
individualScores: z.ZodArray<z.ZodObject<{
|
|
244
244
|
replyIndex: z.ZodNumber;
|
|
245
245
|
value: z.ZodNumber;
|
|
@@ -259,8 +259,8 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
259
259
|
}, z.core.$strip> & {
|
|
260
260
|
new: (input: Omit<{
|
|
261
261
|
id: string;
|
|
262
|
-
value: number;
|
|
263
262
|
responseId: string;
|
|
263
|
+
value: number;
|
|
264
264
|
scoringMethod: "ai" | "human" | "algo";
|
|
265
265
|
individualScores: {
|
|
266
266
|
replyIndex: number;
|
|
@@ -269,8 +269,8 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
269
269
|
namespace: "peerbench.ai";
|
|
270
270
|
kind: "llm/multi-turn.sc";
|
|
271
271
|
schemaVersion: 1;
|
|
272
|
-
metadata?: Record<string, unknown> | undefined;
|
|
273
272
|
explanation?: string | undefined;
|
|
273
|
+
metadata?: Record<string, unknown> | undefined;
|
|
274
274
|
scorerAISystemPrompt?: string | undefined;
|
|
275
275
|
scorerAISystemPromptId?: string | undefined;
|
|
276
276
|
scorerAIProvider?: string | undefined;
|
|
@@ -279,10 +279,10 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
279
279
|
scorerAIOutputTokensUsed?: number | undefined;
|
|
280
280
|
scorerAIInputCost?: string | undefined;
|
|
281
281
|
scorerAIOutputCost?: string | undefined;
|
|
282
|
-
}, "kind" | "
|
|
282
|
+
}, "kind" | "schemaVersion" | "namespace">) => {
|
|
283
283
|
id: string;
|
|
284
|
-
value: number;
|
|
285
284
|
responseId: string;
|
|
285
|
+
value: number;
|
|
286
286
|
scoringMethod: "ai" | "human" | "algo";
|
|
287
287
|
individualScores: {
|
|
288
288
|
replyIndex: number;
|
|
@@ -291,8 +291,8 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
291
291
|
namespace: "peerbench.ai";
|
|
292
292
|
kind: "llm/multi-turn.sc";
|
|
293
293
|
schemaVersion: 1;
|
|
294
|
-
metadata?: Record<string, unknown> | undefined;
|
|
295
294
|
explanation?: string | undefined;
|
|
295
|
+
metadata?: Record<string, unknown> | undefined;
|
|
296
296
|
scorerAISystemPrompt?: string | undefined;
|
|
297
297
|
scorerAISystemPromptId?: string | undefined;
|
|
298
298
|
scorerAIProvider?: string | undefined;
|
|
@@ -304,8 +304,8 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
304
304
|
};
|
|
305
305
|
newWithId(input: Omit<{
|
|
306
306
|
id: string;
|
|
307
|
-
value: number;
|
|
308
307
|
responseId: string;
|
|
308
|
+
value: number;
|
|
309
309
|
scoringMethod: "ai" | "human" | "algo";
|
|
310
310
|
individualScores: {
|
|
311
311
|
replyIndex: number;
|
|
@@ -314,8 +314,8 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
314
314
|
namespace: "peerbench.ai";
|
|
315
315
|
kind: "llm/multi-turn.sc";
|
|
316
316
|
schemaVersion: 1;
|
|
317
|
-
metadata?: Record<string, unknown> | undefined;
|
|
318
317
|
explanation?: string | undefined;
|
|
318
|
+
metadata?: Record<string, unknown> | undefined;
|
|
319
319
|
scorerAISystemPrompt?: string | undefined;
|
|
320
320
|
scorerAISystemPromptId?: string | undefined;
|
|
321
321
|
scorerAIProvider?: string | undefined;
|
|
@@ -324,10 +324,10 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
324
324
|
scorerAIOutputTokensUsed?: number | undefined;
|
|
325
325
|
scorerAIInputCost?: string | undefined;
|
|
326
326
|
scorerAIOutputCost?: string | undefined;
|
|
327
|
-
}, "
|
|
327
|
+
}, "id" | "kind" | "schemaVersion" | "namespace">, generator: import("../../../index.js").IdGenerator): Promise<{
|
|
328
328
|
id: string;
|
|
329
|
-
value: number;
|
|
330
329
|
responseId: string;
|
|
330
|
+
value: number;
|
|
331
331
|
scoringMethod: "ai" | "human" | "algo";
|
|
332
332
|
individualScores: {
|
|
333
333
|
replyIndex: number;
|
|
@@ -336,8 +336,8 @@ export declare const MultiTurnScoreSchemaV1: z.ZodObject<Omit<{
|
|
|
336
336
|
namespace: "peerbench.ai";
|
|
337
337
|
kind: "llm/multi-turn.sc";
|
|
338
338
|
schemaVersion: 1;
|
|
339
|
-
metadata?: Record<string, unknown> | undefined;
|
|
340
339
|
explanation?: string | undefined;
|
|
340
|
+
metadata?: Record<string, unknown> | undefined;
|
|
341
341
|
scorerAISystemPrompt?: string | undefined;
|
|
342
342
|
scorerAISystemPromptId?: string | undefined;
|
|
343
343
|
scorerAIProvider?: string | undefined;
|