peerbench 0.0.1 → 0.0.2-alpha-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +332 -2
- package/dist/abstract-Dec9Sc5O.d.ts +12 -0
- package/dist/aggregators/index.d.ts +67 -0
- package/dist/aggregators/index.js +46 -0
- package/dist/aggregators/index.js.map +1 -0
- package/dist/benchmarks/index.d.ts +1041 -0
- package/dist/benchmarks/index.js +458 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/chunk-4UBK6452.js +128 -0
- package/dist/chunk-4UBK6452.js.map +1 -0
- package/dist/chunk-ERALDEZY.js +112 -0
- package/dist/chunk-ERALDEZY.js.map +1 -0
- package/dist/chunk-HMQYGCKI.js +11 -0
- package/dist/chunk-HMQYGCKI.js.map +1 -0
- package/dist/chunk-NUEOE3K5.js +8 -0
- package/dist/chunk-NUEOE3K5.js.map +1 -0
- package/dist/chunk-OQE6TQXZ.js +42 -0
- package/dist/chunk-OQE6TQXZ.js.map +1 -0
- package/dist/chunk-PZ5AY32C.js +10 -0
- package/dist/chunk-PZ5AY32C.js.map +1 -0
- package/dist/chunk-Q6GSOHOP.js +44 -0
- package/dist/chunk-Q6GSOHOP.js.map +1 -0
- package/dist/chunk-QY5MPNNB.js +28 -0
- package/dist/chunk-QY5MPNNB.js.map +1 -0
- package/dist/chunk-R76XA2K6.js +229 -0
- package/dist/chunk-R76XA2K6.js.map +1 -0
- package/dist/chunk-TRNCF2BG.js +35 -0
- package/dist/chunk-TRNCF2BG.js.map +1 -0
- package/dist/chunk-UHHHSYVE.js +11 -0
- package/dist/chunk-UHHHSYVE.js.map +1 -0
- package/dist/chunk-YY33MNMV.js +65 -0
- package/dist/chunk-YY33MNMV.js.map +1 -0
- package/dist/chunk-ZEWI24CV.js +365 -0
- package/dist/chunk-ZEWI24CV.js.map +1 -0
- package/dist/index-BAioQhp2.d.ts +27 -0
- package/dist/index.d.ts +59 -3841
- package/dist/index.js +31 -3545
- package/dist/index.js.map +1 -1
- package/dist/json-file-ZwzLUbje.d.ts +73 -0
- package/dist/llm-DNj_tp2T.d.ts +22 -0
- package/dist/llm-judge-QThCZ9TQ.d.ts +67 -0
- package/dist/provider-BDjGp2y-.d.ts +10 -0
- package/dist/providers/index.d.ts +69 -0
- package/dist/providers/index.js +18 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/rate-limiter-CSmVIRsM.d.ts +60 -0
- package/dist/schemas/extensions/index.d.ts +28 -0
- package/dist/schemas/extensions/index.js +19 -0
- package/dist/schemas/extensions/index.js.map +1 -0
- package/dist/schemas/index.d.ts +200 -0
- package/dist/schemas/index.js +24 -0
- package/dist/schemas/index.js.map +1 -0
- package/dist/schemas/llm/index.d.ts +116 -0
- package/dist/schemas/llm/index.js +15 -0
- package/dist/schemas/llm/index.js.map +1 -0
- package/dist/scorers/index.d.ts +64 -0
- package/dist/scorers/index.js +16 -0
- package/dist/scorers/index.js.map +1 -0
- package/dist/storages/index.d.ts +69 -0
- package/dist/storages/index.js +98 -0
- package/dist/storages/index.js.map +1 -0
- package/package.json +46 -22
- package/LICENSE +0 -21
package/dist/index.js
CHANGED
|
@@ -1,1091 +1,22 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
return JSON.parse(content);
|
|
21
|
-
} catch {
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
function stableStringify(value) {
|
|
25
|
-
return stringify(value);
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
// src/utils/cid.ts
|
|
29
|
-
import { CID } from "multiformats/cid";
|
|
30
|
-
import { sha256 } from "multiformats/hashes/sha2";
|
|
31
|
-
import * as json from "multiformats/codecs/json";
|
|
32
|
-
async function calculateCID(data) {
|
|
33
|
-
const bytes = json.encode(data);
|
|
34
|
-
const hash = await sha256.digest(bytes);
|
|
35
|
-
const cid = CID.create(1, json.code, hash);
|
|
36
|
-
return cid;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// src/utils/sleep.ts
|
|
40
|
-
async function sleep(ms) {
|
|
41
|
-
return await new Promise((res) => setTimeout(res, ms));
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// src/utils/file.ts
|
|
45
|
-
async function readFile(path) {
|
|
46
|
-
if (typeof window === "undefined") {
|
|
47
|
-
const { readFileSync, statSync } = await import("node:fs");
|
|
48
|
-
if (!statSync(path, { throwIfNoEntry: false })?.isFile()) {
|
|
49
|
-
throw new Error(`File doesn't exist: ${path}`);
|
|
50
|
-
}
|
|
51
|
-
return readFileSync(path);
|
|
52
|
-
} else {
|
|
53
|
-
throw new Error(
|
|
54
|
-
"File system operations are not supported in browser environment. Use readFromContent instead."
|
|
55
|
-
);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// src/utils/prompt.ts
|
|
60
|
-
function preparePrompt(question, options = {}) {
|
|
61
|
-
if (options && Object.keys(options).length === 0) {
|
|
62
|
-
return question;
|
|
63
|
-
}
|
|
64
|
-
let fullPrompt = `${question}
|
|
65
|
-
|
|
66
|
-
`;
|
|
67
|
-
for (const [letter, answer] of Object.entries(options)) {
|
|
68
|
-
fullPrompt += `${letter}: ${answer}
|
|
69
|
-
`;
|
|
70
|
-
}
|
|
71
|
-
return fullPrompt;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// src/utils/sha256.ts
|
|
75
|
-
async function calculateSHA256(input) {
|
|
76
|
-
if (typeof window !== "undefined" && window.crypto) {
|
|
77
|
-
let data;
|
|
78
|
-
if (input instanceof Uint8Array) {
|
|
79
|
-
data = input;
|
|
80
|
-
} else {
|
|
81
|
-
const encoder = new TextEncoder();
|
|
82
|
-
data = encoder.encode(input);
|
|
83
|
-
}
|
|
84
|
-
const buffer = await window.crypto.subtle.digest("SHA-256", data);
|
|
85
|
-
const hashArray = Array.from(new Uint8Array(buffer));
|
|
86
|
-
return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
87
|
-
} else {
|
|
88
|
-
const { createHash } = await import("crypto");
|
|
89
|
-
if (input instanceof Uint8Array) {
|
|
90
|
-
return createHash("sha256").update(Buffer.from(input)).digest("hex");
|
|
91
|
-
}
|
|
92
|
-
return createHash("sha256").update(input).digest("hex");
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
// src/utils/did.ts
|
|
97
|
-
function removeDIDPrefix(did) {
|
|
98
|
-
if (!did.startsWith("did:")) {
|
|
99
|
-
return did;
|
|
100
|
-
}
|
|
101
|
-
const parts = did.split(":");
|
|
102
|
-
if (parts.length < 3) {
|
|
103
|
-
return did;
|
|
104
|
-
}
|
|
105
|
-
return parts.slice(2).join(":");
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// src/utils/string.ts
|
|
109
|
-
function formatMs(ms, options) {
|
|
110
|
-
const {
|
|
111
|
-
full = false,
|
|
112
|
-
include = ["millisecond", "second", "minute", "hour", "day"]
|
|
113
|
-
} = options ?? {};
|
|
114
|
-
const totalSeconds = ms / 1e3;
|
|
115
|
-
if (full) {
|
|
116
|
-
const days = Math.floor(totalSeconds / 86400);
|
|
117
|
-
const hours = Math.floor(totalSeconds % 86400 / 3600);
|
|
118
|
-
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
119
|
-
const seconds = Math.floor(totalSeconds % 60);
|
|
120
|
-
const milliseconds = ms % 1e3;
|
|
121
|
-
const parts = [];
|
|
122
|
-
if (days > 0 && include.includes("day")) {
|
|
123
|
-
parts.push(days + (days === 1 ? " day" : " days"));
|
|
124
|
-
}
|
|
125
|
-
if (hours > 0 && include.includes("hour")) {
|
|
126
|
-
parts.push(hours + (hours === 1 ? " hour" : " hours"));
|
|
127
|
-
}
|
|
128
|
-
if (minutes > 0 && include.includes("minute")) {
|
|
129
|
-
parts.push(minutes + (minutes === 1 ? " minute" : " minutes"));
|
|
130
|
-
}
|
|
131
|
-
if (seconds > 0 && include.includes("second")) {
|
|
132
|
-
parts.push(seconds + (seconds === 1 ? " second" : " seconds"));
|
|
133
|
-
}
|
|
134
|
-
if (milliseconds > 0 && include.includes("millisecond")) {
|
|
135
|
-
parts.push(milliseconds + "ms");
|
|
136
|
-
}
|
|
137
|
-
return parts.join(", ");
|
|
138
|
-
}
|
|
139
|
-
if (totalSeconds >= 86400) {
|
|
140
|
-
const days = Math.floor(totalSeconds / 86400);
|
|
141
|
-
return days.toFixed(2) + (days === 1 ? " day" : " days");
|
|
142
|
-
} else if (totalSeconds >= 3600) {
|
|
143
|
-
const hours = Math.floor(totalSeconds / 3600);
|
|
144
|
-
return hours.toFixed(0) + (hours === 1 ? " hour" : " hours");
|
|
145
|
-
} else if (totalSeconds >= 60) {
|
|
146
|
-
const minutes = Math.floor(totalSeconds / 60);
|
|
147
|
-
return minutes.toFixed(0) + (minutes === 1 ? " minute" : " minutes");
|
|
148
|
-
} else if (totalSeconds >= 1) {
|
|
149
|
-
return totalSeconds.toFixed(0) + (totalSeconds === 1 ? " second" : " seconds");
|
|
150
|
-
} else {
|
|
151
|
-
return ms.toFixed(0) + "ms";
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
function bufferToString(buffer) {
|
|
155
|
-
const decoder = new TextDecoder();
|
|
156
|
-
return decoder.decode(buffer);
|
|
157
|
-
}
|
|
158
|
-
function stringToBuffer(str) {
|
|
159
|
-
const encoder = new TextEncoder();
|
|
160
|
-
return encoder.encode(str);
|
|
161
|
-
}
|
|
162
|
-
function formatString(str, values) {
|
|
163
|
-
return str.replace(/\{(\w+)\}/g, (_, key) => values[key] ?? `{${key}}`);
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
// src/utils/llm.ts
|
|
167
|
-
import { jsonrepair } from "jsonrepair";
|
|
168
|
-
function parseResponseAsJSON(response) {
|
|
169
|
-
try {
|
|
170
|
-
const json2 = extractJSONFromResponse(response);
|
|
171
|
-
if (!json2) {
|
|
172
|
-
throw new Error("No JSON found in the response");
|
|
173
|
-
}
|
|
174
|
-
return JSON.parse(jsonrepair(json2));
|
|
175
|
-
} catch (e) {
|
|
176
|
-
if (process?.env?.PB_SDK_DEBUG) {
|
|
177
|
-
console.log("Original response", JSON.stringify(response));
|
|
178
|
-
console.error("Error parsing response as JSON", e);
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
function extractJSONFromResponse(response) {
|
|
183
|
-
const jsonRegex = /```(json)?\n*(?<content>.*?)\n*```/s;
|
|
184
|
-
const jsonMatch = response.match(jsonRegex);
|
|
185
|
-
return jsonMatch?.groups?.content;
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
// src/utils/validation.ts
|
|
189
|
-
function parseValidationError(safeParseReturn, path) {
|
|
190
|
-
path ??= "";
|
|
191
|
-
if (safeParseReturn?.error) {
|
|
192
|
-
const firstError = safeParseReturn.error.errors[0];
|
|
193
|
-
if (path) {
|
|
194
|
-
path = `${path}: `;
|
|
195
|
-
}
|
|
196
|
-
path = firstError.path.length > 0 ? `"${firstError.path.join(".")}": ` : path;
|
|
197
|
-
return `${path}${firstError.message}`;
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
function checkValidationError(safeParseReturn, path) {
|
|
201
|
-
if (safeParseReturn?.error) {
|
|
202
|
-
throw new Error(parseValidationError(safeParseReturn, path));
|
|
203
|
-
}
|
|
204
|
-
return safeParseReturn.data;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// src/utils/builder.ts
|
|
208
|
-
import { v7 as uuidv7 } from "uuid";
|
|
209
|
-
async function buildPrompt(params) {
|
|
210
|
-
return PromptSchema.parse({
|
|
211
|
-
did: params.did ?? uuidv7(),
|
|
212
|
-
options: params.options ?? void 0,
|
|
213
|
-
question: {
|
|
214
|
-
data: params.question,
|
|
215
|
-
cid: await calculateCID(params.question).then((c) => c.toString()),
|
|
216
|
-
sha256: await calculateSHA256(params.question)
|
|
217
|
-
},
|
|
218
|
-
fullPrompt: {
|
|
219
|
-
data: params.fullPrompt ?? params.question,
|
|
220
|
-
cid: await calculateCID(params.fullPrompt ?? params.question).then(
|
|
221
|
-
(c) => c.toString()
|
|
222
|
-
),
|
|
223
|
-
sha256: await calculateSHA256(params.fullPrompt ?? params.question)
|
|
224
|
-
},
|
|
225
|
-
type: params.type,
|
|
226
|
-
answer: params.answer ?? void 0,
|
|
227
|
-
answerKey: params.answerKey ?? void 0,
|
|
228
|
-
metadata: params.metadata ?? void 0,
|
|
229
|
-
scorers: params.scorers ?? void 0
|
|
230
|
-
});
|
|
231
|
-
}
|
|
232
|
-
async function buildResponse(params) {
|
|
233
|
-
return PromptResponseSchema.parse({
|
|
234
|
-
did: params.did ?? uuidv7(),
|
|
235
|
-
runId: params.runId ?? uuidv7(),
|
|
236
|
-
data: params.forwardResponse.data,
|
|
237
|
-
sha256: await calculateSHA256(params.forwardResponse.data),
|
|
238
|
-
cid: await calculateCID(params.forwardResponse.data).then(
|
|
239
|
-
(c) => c.toString()
|
|
240
|
-
),
|
|
241
|
-
startedAt: params.forwardResponse.startedAt.getTime(),
|
|
242
|
-
finishedAt: params.forwardResponse.completedAt.getTime(),
|
|
243
|
-
prompt: params.prompt,
|
|
244
|
-
metadata: params.metadata,
|
|
245
|
-
provider: params.provider,
|
|
246
|
-
modelId: params.modelId,
|
|
247
|
-
modelName: params.modelName || "unknown",
|
|
248
|
-
modelOwner: params.modelOwner || "unknown",
|
|
249
|
-
modelHost: params.modelHost || "auto",
|
|
250
|
-
inputTokensUsed: params.forwardResponse.inputTokensUsed,
|
|
251
|
-
inputCost: params.forwardResponse.inputCost,
|
|
252
|
-
outputTokensUsed: params.forwardResponse.outputTokensUsed,
|
|
253
|
-
outputCost: params.forwardResponse.outputCost
|
|
254
|
-
});
|
|
255
|
-
}
|
|
256
|
-
async function buildScore(params) {
|
|
257
|
-
return PromptScoreSchema.parse({
|
|
258
|
-
...params,
|
|
259
|
-
scoreDID: params.scoreDID ?? uuidv7()
|
|
260
|
-
});
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// src/validation/did.ts
|
|
264
|
-
function DIDSchema(schema) {
|
|
265
|
-
return z.string().transform((val) => removeDIDPrefix(val)).pipe(schema);
|
|
266
|
-
}
|
|
267
|
-
var DIDasUUIDSchema = DIDSchema(
|
|
268
|
-
z.string().uuid({ message: "Invalid DID" })
|
|
269
|
-
);
|
|
270
|
-
|
|
271
|
-
// src/types.ts
|
|
272
|
-
var PromptTypes = {
|
|
273
|
-
MultipleChoice: "multiple-choice",
|
|
274
|
-
OrderSentences: "order-sentences",
|
|
275
|
-
TextReplacement: "text-replacement",
|
|
276
|
-
Typo: "typo",
|
|
277
|
-
OpenEnded: "open-ended",
|
|
278
|
-
OpenEndedWithDocs: "open-ended-with-docs"
|
|
279
|
-
};
|
|
280
|
-
var ScoringMethods = {
|
|
281
|
-
/**
|
|
282
|
-
* Scored by a human
|
|
283
|
-
*/
|
|
284
|
-
human: "human",
|
|
285
|
-
/**
|
|
286
|
-
* Scored using an AI model
|
|
287
|
-
*/
|
|
288
|
-
ai: "ai",
|
|
289
|
-
/**
|
|
290
|
-
* Scored using an algorithm
|
|
291
|
-
*/
|
|
292
|
-
algo: "algo"
|
|
293
|
-
};
|
|
294
|
-
var PromptSchema = z2.object({
|
|
295
|
-
/**
|
|
296
|
-
* Decentralized identifier of the Prompt
|
|
297
|
-
*/
|
|
298
|
-
did: DIDasUUIDSchema,
|
|
299
|
-
/**
|
|
300
|
-
* The question that is going to be asked to the model
|
|
301
|
-
*/
|
|
302
|
-
question: z2.object({
|
|
303
|
-
/**
|
|
304
|
-
* Question data itself
|
|
305
|
-
*/
|
|
306
|
-
data: z2.string(),
|
|
307
|
-
/**
|
|
308
|
-
* CID v1 calculation of the question string
|
|
309
|
-
*/
|
|
310
|
-
cid: z2.string(),
|
|
311
|
-
/**
|
|
312
|
-
* SHA256 hash of the question string
|
|
313
|
-
*/
|
|
314
|
-
sha256: z2.string()
|
|
315
|
-
}),
|
|
316
|
-
/**
|
|
317
|
-
* Multiple choice answers for the question where the keys are letters and the values are the answers.
|
|
318
|
-
*/
|
|
319
|
-
options: z2.record(z2.string(), z2.string()).optional(),
|
|
320
|
-
/**
|
|
321
|
-
* Full prompt that is going to be sent to the model
|
|
322
|
-
*/
|
|
323
|
-
fullPrompt: z2.object({
|
|
324
|
-
/**
|
|
325
|
-
* Full prompt itself
|
|
326
|
-
*/
|
|
327
|
-
data: z2.string(),
|
|
328
|
-
/**
|
|
329
|
-
* CID v1 calculation of the full prompt string
|
|
330
|
-
*/
|
|
331
|
-
cid: z2.string(),
|
|
332
|
-
/**
|
|
333
|
-
* SHA256 hash of the full prompt string
|
|
334
|
-
*/
|
|
335
|
-
sha256: z2.string()
|
|
336
|
-
}),
|
|
337
|
-
/**
|
|
338
|
-
* Type of the Prompt
|
|
339
|
-
*/
|
|
340
|
-
type: z2.nativeEnum(PromptTypes),
|
|
341
|
-
/**
|
|
342
|
-
* Expected option value for the question
|
|
343
|
-
*/
|
|
344
|
-
answer: z2.string().optional(),
|
|
345
|
-
/**
|
|
346
|
-
* Expected letter of the answer (e.g "A", "B" or "C")
|
|
347
|
-
*/
|
|
348
|
-
answerKey: z2.string().optional(),
|
|
349
|
-
/**
|
|
350
|
-
* Additional metadata related to the Prompt
|
|
351
|
-
*/
|
|
352
|
-
metadata: z2.record(z2.string(), z2.any()).optional(),
|
|
353
|
-
/**
|
|
354
|
-
* Expected Scorer identifiers that can be used to
|
|
355
|
-
* score the Responses for this Prompt
|
|
356
|
-
*/
|
|
357
|
-
scorers: z2.array(z2.string()).optional()
|
|
358
|
-
}).transform((prompt, ctx) => {
|
|
359
|
-
if (prompt.type === PromptTypes.MultipleChoice) {
|
|
360
|
-
if (Object.keys(prompt.options || {}).length === 0) {
|
|
361
|
-
ctx.addIssue({
|
|
362
|
-
code: z2.ZodIssueCode.custom,
|
|
363
|
-
message: "No options provided for multiple choice question"
|
|
364
|
-
});
|
|
365
|
-
return z2.NEVER;
|
|
366
|
-
}
|
|
367
|
-
if (Object.values(prompt.options || {}).some(
|
|
368
|
-
(value) => value?.trim() === ""
|
|
369
|
-
)) {
|
|
370
|
-
ctx.addIssue({
|
|
371
|
-
code: z2.ZodIssueCode.custom,
|
|
372
|
-
message: "Multiple choice options cannot be empty"
|
|
373
|
-
});
|
|
374
|
-
return z2.NEVER;
|
|
375
|
-
}
|
|
376
|
-
if (!prompt.answerKey) {
|
|
377
|
-
ctx.addIssue({
|
|
378
|
-
code: z2.ZodIssueCode.custom,
|
|
379
|
-
message: "Correct answer key cannot be empty"
|
|
380
|
-
});
|
|
381
|
-
return z2.NEVER;
|
|
382
|
-
}
|
|
383
|
-
if (!prompt.answer) {
|
|
384
|
-
ctx.addIssue({
|
|
385
|
-
code: z2.ZodIssueCode.custom,
|
|
386
|
-
message: "Correct answer value cannot be empty"
|
|
387
|
-
});
|
|
388
|
-
return z2.NEVER;
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
return prompt;
|
|
392
|
-
});
|
|
393
|
-
var TaskSchema = z2.object({
|
|
394
|
-
/**
|
|
395
|
-
* Decentralized identifier of the Task
|
|
396
|
-
*/
|
|
397
|
-
did: z2.string().startsWith("did:task:"),
|
|
398
|
-
/**
|
|
399
|
-
* The Prompts that the Task has
|
|
400
|
-
*/
|
|
401
|
-
prompts: z2.array(PromptSchema),
|
|
402
|
-
/**
|
|
403
|
-
* CID v1 calculation of the Task file
|
|
404
|
-
*/
|
|
405
|
-
cid: z2.string(),
|
|
406
|
-
/**
|
|
407
|
-
* SHA256 calculation of the Task file
|
|
408
|
-
*/
|
|
409
|
-
sha256: z2.string(),
|
|
410
|
-
/**
|
|
411
|
-
* Basename of the Task file
|
|
412
|
-
*/
|
|
413
|
-
fileName: z2.string(),
|
|
414
|
-
/**
|
|
415
|
-
* Full path of the Task file
|
|
416
|
-
*/
|
|
417
|
-
path: z2.string()
|
|
418
|
-
});
|
|
419
|
-
var PromptResponseSchema = z2.object({
|
|
420
|
-
/**
|
|
421
|
-
* Unique identifier of the Response
|
|
422
|
-
*/
|
|
423
|
-
did: DIDasUUIDSchema,
|
|
424
|
-
/**
|
|
425
|
-
* Name of the Provider that the Response comes from
|
|
426
|
-
*/
|
|
427
|
-
provider: z2.string(),
|
|
428
|
-
/**
|
|
429
|
-
* ID of the Model that was used by the Provider
|
|
430
|
-
*/
|
|
431
|
-
modelId: z2.string(),
|
|
432
|
-
/**
|
|
433
|
-
* Known name of the model by peerBench
|
|
434
|
-
*/
|
|
435
|
-
modelName: z2.string(),
|
|
436
|
-
/**
|
|
437
|
-
* Owner of the model
|
|
438
|
-
*/
|
|
439
|
-
modelOwner: z2.string(),
|
|
440
|
-
/**
|
|
441
|
-
* The entity that responsible for hosting the model
|
|
442
|
-
*/
|
|
443
|
-
modelHost: z2.string(),
|
|
444
|
-
/**
|
|
445
|
-
* The Prompt that used to achieve this Response.
|
|
446
|
-
*/
|
|
447
|
-
prompt: PromptSchema,
|
|
448
|
-
/**
|
|
449
|
-
* CID v1 calculation of the Response data.
|
|
450
|
-
*/
|
|
451
|
-
cid: z2.string(),
|
|
452
|
-
/**
|
|
453
|
-
* SHA256 calculation of the Response data.
|
|
454
|
-
*/
|
|
455
|
-
sha256: z2.string(),
|
|
456
|
-
/**
|
|
457
|
-
* Response data itself.
|
|
458
|
-
*/
|
|
459
|
-
data: z2.string(),
|
|
460
|
-
/**
|
|
461
|
-
* Timestamp when the Prompt sent to the Model
|
|
462
|
-
*/
|
|
463
|
-
startedAt: z2.number(),
|
|
464
|
-
/**
|
|
465
|
-
* Timestamp when the Model responded this particular Prompt.
|
|
466
|
-
*/
|
|
467
|
-
finishedAt: z2.number(),
|
|
468
|
-
/**
|
|
469
|
-
* Unique identifier of which run this Response belongs to
|
|
470
|
-
*/
|
|
471
|
-
runId: z2.string(),
|
|
472
|
-
inputTokensUsed: z2.number().optional(),
|
|
473
|
-
outputTokensUsed: z2.number().optional(),
|
|
474
|
-
inputCost: z2.string().optional(),
|
|
475
|
-
outputCost: z2.string().optional(),
|
|
476
|
-
metadata: z2.record(z2.string(), z2.any()).optional()
|
|
477
|
-
});
|
|
478
|
-
var PromptScoreSchema = PromptResponseSchema.extend({
|
|
479
|
-
prompt: (
|
|
480
|
-
// Modify some of the fields of the Prompt in case
|
|
481
|
-
// if the Score object doesn't want to include original Prompt data
|
|
482
|
-
PromptSchema.sourceType().extend({
|
|
483
|
-
options: PromptSchema.sourceType().shape.options.optional(),
|
|
484
|
-
question: PromptSchema.sourceType().shape.question.extend({
|
|
485
|
-
data: PromptSchema.sourceType().shape.question.shape.data.optional()
|
|
486
|
-
}),
|
|
487
|
-
fullPrompt: PromptSchema.sourceType().shape.fullPrompt.extend({
|
|
488
|
-
data: PromptSchema.sourceType().shape.fullPrompt.shape.data.optional()
|
|
489
|
-
}),
|
|
490
|
-
type: PromptSchema.sourceType().shape.type.optional(),
|
|
491
|
-
answer: PromptSchema.sourceType().shape.answer.optional(),
|
|
492
|
-
answerKey: PromptSchema.sourceType().shape.answerKey.optional()
|
|
493
|
-
}).optional()
|
|
494
|
-
),
|
|
495
|
-
data: z2.string().optional(),
|
|
496
|
-
/**
|
|
497
|
-
* Unique identifier of this Scoring result. This is named
|
|
498
|
-
* like this because `did` field represents the Response ID since
|
|
499
|
-
* the Score object inherits from the Response object.
|
|
500
|
-
*/
|
|
501
|
-
scoreDID: DIDasUUIDSchema,
|
|
502
|
-
/**
|
|
503
|
-
* Additional metadata about the Scoring result. This is named
|
|
504
|
-
* like this because `metadata` field represents the Response metadata since
|
|
505
|
-
* the Score object inherits from the Response object.
|
|
506
|
-
*/
|
|
507
|
-
scoreMetadata: z2.record(z2.string(), z2.any()).optional(),
|
|
508
|
-
score: z2.number().min(0).max(1),
|
|
509
|
-
method: z2.nativeEnum(ScoringMethods),
|
|
510
|
-
/**
|
|
511
|
-
* Explanation about how the score was calculated.
|
|
512
|
-
*/
|
|
513
|
-
explanation: z2.string().optional(),
|
|
514
|
-
// Only presented if the scoring method is `ai`
|
|
515
|
-
scorerAI: z2.object({
|
|
516
|
-
provider: z2.string(),
|
|
517
|
-
modelName: z2.string(),
|
|
518
|
-
modelHost: z2.string(),
|
|
519
|
-
modelOwner: z2.string(),
|
|
520
|
-
modelId: z2.string(),
|
|
521
|
-
inputTokensUsed: z2.number().optional(),
|
|
522
|
-
outputTokensUsed: z2.number().optional(),
|
|
523
|
-
inputCost: z2.string().optional(),
|
|
524
|
-
outputCost: z2.string().optional()
|
|
525
|
-
}).optional()
|
|
526
|
-
});
|
|
527
|
-
|
|
528
|
-
// src/collectors/abstract/abstract-collector.ts
|
|
529
|
-
var AbstractCollector = class {
|
|
530
|
-
/**
|
|
531
|
-
* Initializes the collector (depends on the implementation)
|
|
532
|
-
*/
|
|
533
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
534
|
-
async initialize(...args) {
|
|
535
|
-
}
|
|
536
|
-
};
|
|
537
|
-
|
|
538
|
-
// src/collectors/abstract/abstract-rss-collector.ts
|
|
539
|
-
import { XMLParser } from "fast-xml-parser";
|
|
540
|
-
var AbstractRSSCollector = class extends AbstractCollector {
|
|
541
|
-
parser;
|
|
542
|
-
constructor() {
|
|
543
|
-
super();
|
|
544
|
-
this.parser = new XMLParser({});
|
|
545
|
-
}
|
|
546
|
-
/**
|
|
547
|
-
* Helper function to fetch the feed from the URL
|
|
548
|
-
* @param url The URL of the feed
|
|
549
|
-
* @returns Raw XML string of the feed
|
|
550
|
-
*/
|
|
551
|
-
async fetchFeed(url) {
|
|
552
|
-
const response = await fetch(url);
|
|
553
|
-
if (!response.ok) {
|
|
554
|
-
throw new Error(`HTTP error! status: ${response.status}`);
|
|
555
|
-
}
|
|
556
|
-
return response.text();
|
|
557
|
-
}
|
|
558
|
-
/**
|
|
559
|
-
* Parses the raw XML string and validates it
|
|
560
|
-
* against the defined `FeedStructure` schema
|
|
561
|
-
* @param xml The raw XML string
|
|
562
|
-
* @returns Structured data of the RSS feed
|
|
563
|
-
*/
|
|
564
|
-
async parseFeedXML(xml) {
|
|
565
|
-
const parsedData = this.parser.parse(xml);
|
|
566
|
-
return await this.feedSchema.parseAsync(parsedData);
|
|
567
|
-
}
|
|
568
|
-
};
|
|
569
|
-
|
|
570
|
-
// src/collectors/pubmed-collector.ts
|
|
571
|
-
import { z as z3 } from "zod";
|
|
572
|
-
import * as cheerio from "cheerio";
|
|
573
|
-
var PubMedCollector = class extends AbstractRSSCollector {
|
|
574
|
-
identifier = "pubmed";
|
|
575
|
-
feedSchema = z3.object({
|
|
576
|
-
rss: z3.object({
|
|
577
|
-
channel: z3.object({
|
|
578
|
-
title: z3.string(),
|
|
579
|
-
link: z3.string(),
|
|
580
|
-
description: z3.string(),
|
|
581
|
-
pubDate: z3.string(),
|
|
582
|
-
lastBuildDate: z3.string().optional(),
|
|
583
|
-
language: z3.string().optional(),
|
|
584
|
-
item: z3.any().transform((data) => Array.isArray(data) ? data : [data]).pipe(
|
|
585
|
-
z3.array(
|
|
586
|
-
z3.object({
|
|
587
|
-
title: z3.string(),
|
|
588
|
-
link: z3.string(),
|
|
589
|
-
description: z3.string(),
|
|
590
|
-
guid: z3.string(),
|
|
591
|
-
pubDate: z3.string(),
|
|
592
|
-
"content:encoded": z3.string()
|
|
593
|
-
})
|
|
594
|
-
)
|
|
595
|
-
)
|
|
596
|
-
})
|
|
597
|
-
})
|
|
598
|
-
});
|
|
599
|
-
async collect(url) {
|
|
600
|
-
const feed = await this.parseFeedXML(await this.fetchFeed(url));
|
|
601
|
-
const parsed = [];
|
|
602
|
-
const tags = /* @__PURE__ */ new Set();
|
|
603
|
-
tags.add("pubmed");
|
|
604
|
-
for (const article of feed.rss.channel.item) {
|
|
605
|
-
const $ = cheerio.load(article["content:encoded"]);
|
|
606
|
-
const elements = $("p");
|
|
607
|
-
const texts = [];
|
|
608
|
-
const paragraphs = {};
|
|
609
|
-
elements.each((i, el) => {
|
|
610
|
-
if (i === 0) return;
|
|
611
|
-
const text = $(el).text().trim();
|
|
612
|
-
if (text === "" || text === "ABSTRACT" || text.startsWith("PMID:") || text.startsWith("DOI:")) {
|
|
613
|
-
return;
|
|
614
|
-
}
|
|
615
|
-
texts.push(text);
|
|
616
|
-
});
|
|
617
|
-
if (this.parseParagraphTitle(texts[0])) {
|
|
618
|
-
tags.add(feed.rss.channel.title);
|
|
619
|
-
for (const text of texts) {
|
|
620
|
-
const match = this.parseParagraphTitle(text);
|
|
621
|
-
if (match) {
|
|
622
|
-
paragraphs[match[1].trim()] = match[2].trim();
|
|
623
|
-
}
|
|
624
|
-
}
|
|
625
|
-
parsed.push({
|
|
626
|
-
pmid: article.guid,
|
|
627
|
-
title: article.title,
|
|
628
|
-
paragraphs,
|
|
629
|
-
tags: Array.from(tags)
|
|
630
|
-
});
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
return parsed;
|
|
634
|
-
}
|
|
635
|
-
/**
|
|
636
|
-
* Parses the title of a paragraph which
|
|
637
|
-
* is in the format of "TITLE:"
|
|
638
|
-
*/
|
|
639
|
-
parseParagraphTitle(text) {
|
|
640
|
-
const paragraphRegex = /^([A-Z|\s]+):\s*(.*)/;
|
|
641
|
-
const match = text.match(paragraphRegex);
|
|
642
|
-
return match;
|
|
643
|
-
}
|
|
644
|
-
};
|
|
645
|
-
|
|
646
|
-
// src/collectors/simple-general-rss-collector.ts
|
|
647
|
-
import { z as z4 } from "zod";
|
|
648
|
-
import * as cheerio2 from "cheerio";
|
|
649
|
-
var SimpleGeneralRSSCollector = class extends AbstractRSSCollector {
|
|
650
|
-
identifier = "simple-general-rss";
|
|
651
|
-
feedSchema = z4.object({
|
|
652
|
-
// Handle both RSS and RDF formats
|
|
653
|
-
rss: z4.object({
|
|
654
|
-
channel: z4.object({
|
|
655
|
-
title: z4.string().optional(),
|
|
656
|
-
link: z4.string().optional(),
|
|
657
|
-
description: z4.string().optional(),
|
|
658
|
-
pubDate: z4.string().optional(),
|
|
659
|
-
lastBuildDate: z4.string().optional(),
|
|
660
|
-
language: z4.string().optional(),
|
|
661
|
-
item: z4.any().transform((data) => Array.isArray(data) ? data : [data]).pipe(
|
|
662
|
-
z4.array(
|
|
663
|
-
z4.object({
|
|
664
|
-
title: z4.string(),
|
|
665
|
-
link: z4.string().optional(),
|
|
666
|
-
description: z4.string().optional(),
|
|
667
|
-
guid: z4.union([z4.string(), z4.object({}).passthrough()]).optional(),
|
|
668
|
-
pubDate: z4.string().optional(),
|
|
669
|
-
"content:encoded": z4.string().optional(),
|
|
670
|
-
// Handle various RSS namespaces and extensions
|
|
671
|
-
"dc:creator": z4.union([z4.string(), z4.array(z4.string())]).optional(),
|
|
672
|
-
"dc:date": z4.string().optional(),
|
|
673
|
-
"dc:title": z4.string().optional(),
|
|
674
|
-
"dc:subject": z4.string().optional(),
|
|
675
|
-
"dc:publisher": z4.string().optional(),
|
|
676
|
-
"dc:identifier": z4.union([z4.string(), z4.array(z4.string())]).optional(),
|
|
677
|
-
"dc:rights": z4.string().optional(),
|
|
678
|
-
"dc:source": z4.string().optional(),
|
|
679
|
-
"prism:publicationDate": z4.string().optional(),
|
|
680
|
-
"prism:section": z4.string().optional(),
|
|
681
|
-
"prism:volume": z4.string().optional(),
|
|
682
|
-
"prism:number": z4.string().optional(),
|
|
683
|
-
"prism:startingPage": z4.string().optional(),
|
|
684
|
-
"prism:endingPage": z4.string().optional(),
|
|
685
|
-
"arxiv:announce_type": z4.string().optional(),
|
|
686
|
-
category: z4.union([z4.string(), z4.array(z4.string())]).optional()
|
|
687
|
-
})
|
|
688
|
-
)
|
|
689
|
-
)
|
|
690
|
-
})
|
|
691
|
-
}).optional(),
|
|
692
|
-
// Handle RDF format (like medRxiv)
|
|
693
|
-
rdf: z4.object({
|
|
694
|
-
channel: z4.object({
|
|
695
|
-
title: z4.string().optional(),
|
|
696
|
-
link: z4.string().optional(),
|
|
697
|
-
description: z4.string().optional(),
|
|
698
|
-
items: z4.object({
|
|
699
|
-
"rdf:Seq": z4.object({
|
|
700
|
-
"rdf:li": z4.any().optional()
|
|
701
|
-
}).optional()
|
|
702
|
-
}).optional(),
|
|
703
|
-
item: z4.any().transform((data) => Array.isArray(data) ? data : [data]).pipe(
|
|
704
|
-
z4.array(
|
|
705
|
-
z4.object({
|
|
706
|
-
title: z4.string(),
|
|
707
|
-
link: z4.string().optional(),
|
|
708
|
-
description: z4.string().optional(),
|
|
709
|
-
"dc:creator": z4.union([z4.string(), z4.array(z4.string())]).optional(),
|
|
710
|
-
"dc:date": z4.string().optional(),
|
|
711
|
-
"dc:title": z4.string().optional(),
|
|
712
|
-
"dc:subject": z4.string().optional(),
|
|
713
|
-
"dc:publisher": z4.string().optional(),
|
|
714
|
-
"dc:identifier": z4.union([z4.string(), z4.array(z4.string())]).optional(),
|
|
715
|
-
"dc:rights": z4.string().optional(),
|
|
716
|
-
"dc:source": z4.string().optional(),
|
|
717
|
-
"prism:publicationDate": z4.string().optional(),
|
|
718
|
-
"prism:section": z4.string().optional(),
|
|
719
|
-
"prism:volume": z4.string().optional(),
|
|
720
|
-
"prism:number": z4.string().optional(),
|
|
721
|
-
"prism:startingPage": z4.string().optional(),
|
|
722
|
-
"prism:endingPage": z4.string().optional(),
|
|
723
|
-
category: z4.union([z4.string(), z4.array(z4.string())]).optional()
|
|
724
|
-
})
|
|
725
|
-
)
|
|
726
|
-
).optional()
|
|
727
|
-
}).optional()
|
|
728
|
-
}).optional(),
|
|
729
|
-
// Handle Atom feeds
|
|
730
|
-
feed: z4.object({
|
|
731
|
-
title: z4.string().optional(),
|
|
732
|
-
link: z4.string().optional(),
|
|
733
|
-
entry: z4.any().transform((data) => Array.isArray(data) ? data : [data]).pipe(
|
|
734
|
-
z4.array(
|
|
735
|
-
z4.object({
|
|
736
|
-
title: z4.string(),
|
|
737
|
-
link: z4.string().optional(),
|
|
738
|
-
summary: z4.string().optional(),
|
|
739
|
-
content: z4.string().optional(),
|
|
740
|
-
author: z4.string().optional(),
|
|
741
|
-
published: z4.string().optional(),
|
|
742
|
-
id: z4.string().optional()
|
|
743
|
-
})
|
|
744
|
-
)
|
|
745
|
-
)
|
|
746
|
-
}).optional()
|
|
747
|
-
// Allow any other structure and try to extract items dynamically
|
|
748
|
-
});
|
|
749
|
-
async collect(url) {
|
|
750
|
-
const rawXml = await this.fetchFeed(url);
|
|
751
|
-
const feed = await this.parseFeedXML(rawXml);
|
|
752
|
-
const parsed = [];
|
|
753
|
-
const tags = /* @__PURE__ */ new Set();
|
|
754
|
-
let items = [];
|
|
755
|
-
let channelTitle = "";
|
|
756
|
-
if (feed.rss?.channel?.item) {
|
|
757
|
-
items = feed.rss.channel.item;
|
|
758
|
-
channelTitle = feed.rss.channel.title || "";
|
|
759
|
-
} else if (feed.rdf?.channel?.item) {
|
|
760
|
-
items = feed.rdf.channel.item;
|
|
761
|
-
channelTitle = feed.rdf.channel.title || "";
|
|
762
|
-
} else if (feed.rdf?.channel?.items?.["rdf:Seq"]?.["rdf:li"]) {
|
|
763
|
-
console.log("RDF 1.0 format detected - items are references, not content");
|
|
764
|
-
items = [];
|
|
765
|
-
channelTitle = feed.rdf.channel.title || "";
|
|
766
|
-
} else if (feed.feed?.entry) {
|
|
767
|
-
items = feed.feed.entry;
|
|
768
|
-
channelTitle = feed.feed.title || "";
|
|
769
|
-
} else {
|
|
770
|
-
items = this.findItemsDynamically(feed);
|
|
771
|
-
channelTitle = this.findChannelTitle(feed) || "";
|
|
772
|
-
}
|
|
773
|
-
if (items.length === 0) {
|
|
774
|
-
console.log("No items found with standard parsing, trying regex fallback...");
|
|
775
|
-
items = this.findItemsWithRegex(rawXml);
|
|
776
|
-
if (!channelTitle) {
|
|
777
|
-
channelTitle = this.findChannelTitleWithRegex(rawXml) || "";
|
|
778
|
-
}
|
|
779
|
-
}
|
|
780
|
-
console.log(`Items found: ${items.length}`);
|
|
781
|
-
if (channelTitle) {
|
|
782
|
-
tags.add(channelTitle.toLowerCase().replace(/\s+/g, "-"));
|
|
783
|
-
}
|
|
784
|
-
tags.add("rss-feed");
|
|
785
|
-
for (const item of items) {
|
|
786
|
-
const mainText = this.extractMainText(item);
|
|
787
|
-
if (mainText.length < 200) {
|
|
788
|
-
console.log(`Skipping item with short content (${mainText.length} chars): ${item.title?.substring(0, 50)}...`);
|
|
789
|
-
continue;
|
|
790
|
-
}
|
|
791
|
-
const metadata = this.extractMetadata(item);
|
|
792
|
-
const itemTags = this.extractTags(item, tags);
|
|
793
|
-
parsed.push({
|
|
794
|
-
title: item.title,
|
|
795
|
-
link: item.link,
|
|
796
|
-
mainText,
|
|
797
|
-
// Already cleaned in extractMainText
|
|
798
|
-
rawXml: item,
|
|
799
|
-
// Keep the raw XML data for future use
|
|
800
|
-
metadata,
|
|
801
|
-
tags: itemTags,
|
|
802
|
-
pubDate: item.pubDate || item.published,
|
|
803
|
-
guid: this.extractGuidValue(item.guid) || item.id
|
|
804
|
-
});
|
|
805
|
-
}
|
|
806
|
-
return parsed;
|
|
807
|
-
}
|
|
808
|
-
/**
|
|
809
|
-
* Extracts GUID value from either a string or object
|
|
810
|
-
*/
|
|
811
|
-
extractGuidValue(guid) {
|
|
812
|
-
if (typeof guid === "string") {
|
|
813
|
-
return guid;
|
|
814
|
-
}
|
|
815
|
-
if (guid && typeof guid === "object") {
|
|
816
|
-
if (guid._text) return guid._text;
|
|
817
|
-
if (guid.text) return guid.text;
|
|
818
|
-
if (guid.value) return guid.value;
|
|
819
|
-
if (guid.content) return guid.content;
|
|
820
|
-
if (guid.id) return guid.id;
|
|
821
|
-
try {
|
|
822
|
-
return JSON.stringify(guid);
|
|
823
|
-
} catch {
|
|
824
|
-
return void 0;
|
|
825
|
-
}
|
|
826
|
-
}
|
|
827
|
-
return void 0;
|
|
828
|
-
}
|
|
829
|
-
/**
|
|
830
|
-
* Dynamically searches for items in any XML structure
|
|
831
|
-
*/
|
|
832
|
-
findItemsDynamically(feed) {
|
|
833
|
-
const items = [];
|
|
834
|
-
const searchForItems = (obj, path = []) => {
|
|
835
|
-
if (Array.isArray(obj)) {
|
|
836
|
-
if (obj.length > 0 && obj[0] && typeof obj[0] === "object") {
|
|
837
|
-
const firstItem = obj[0];
|
|
838
|
-
if (firstItem.title || firstItem["dc:title"] || firstItem.name || firstItem.link) {
|
|
839
|
-
items.push(...obj);
|
|
840
|
-
return;
|
|
841
|
-
}
|
|
842
|
-
}
|
|
843
|
-
}
|
|
844
|
-
if (obj && typeof obj === "object") {
|
|
845
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
846
|
-
if (key.toLowerCase().includes("item") || key.toLowerCase().includes("entry") || key.toLowerCase().includes("article") || key.toLowerCase().includes("post")) {
|
|
847
|
-
if (Array.isArray(value)) {
|
|
848
|
-
items.push(...value);
|
|
849
|
-
return;
|
|
850
|
-
}
|
|
851
|
-
}
|
|
852
|
-
searchForItems(value, [...path, key]);
|
|
853
|
-
}
|
|
854
|
-
}
|
|
855
|
-
};
|
|
856
|
-
searchForItems(feed);
|
|
857
|
-
return items;
|
|
858
|
-
}
|
|
859
|
-
/**
|
|
860
|
-
* Fallback method using regex to find items when standard parsing fails
|
|
861
|
-
*/
|
|
862
|
-
findItemsWithRegex(xmlString) {
|
|
863
|
-
try {
|
|
864
|
-
console.log("Using regex fallback to find items...");
|
|
865
|
-
console.log(`XML length: ${xmlString.length} characters`);
|
|
866
|
-
const firstItemMatch = xmlString.match(/<item[^>]*>/i);
|
|
867
|
-
if (firstItemMatch) {
|
|
868
|
-
console.log(`Found first item tag: ${firstItemMatch[0]}`);
|
|
869
|
-
}
|
|
870
|
-
const titleMatches = xmlString.match(/<title[^>]*>.*?<\/title>/gi);
|
|
871
|
-
if (titleMatches) {
|
|
872
|
-
console.log(`Found ${titleMatches.length} title tags`);
|
|
873
|
-
console.log(`First few titles: ${titleMatches.slice(0, 3).map((t) => t.substring(0, 100))}`);
|
|
874
|
-
}
|
|
875
|
-
const itemRegex = /<item[^>]*>([\s\S]*?)<\/item>/gi;
|
|
876
|
-
const items = [];
|
|
877
|
-
let match;
|
|
878
|
-
while ((match = itemRegex.exec(xmlString)) !== null) {
|
|
879
|
-
const itemXml = match[1];
|
|
880
|
-
const titleMatch = itemXml.match(/<title[^>]*>[\s\S]*?<!\[CDATA\[([\s\S]*?)\]\]>[\s\S]*?<\/title>/i) || itemXml.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
881
|
-
const title = titleMatch ? this.cleanText(titleMatch[1]) : "";
|
|
882
|
-
const descMatch = itemXml.match(/<description[^>]*>[\s\S]*?<!\[CDATA\[([\s\S]*?)\]\]>[\s\S]*?<\/description>/i) || itemXml.match(/<description[^>]*>([\s\S]*?)<\/description>/i);
|
|
883
|
-
const description = descMatch ? this.cleanText(descMatch[1]) : "";
|
|
884
|
-
const linkMatch = itemXml.match(/<link[^>]*>([\s\S]*?)<\/link>/i);
|
|
885
|
-
const link = linkMatch ? this.cleanText(linkMatch[1]) : "";
|
|
886
|
-
if (title && description && description.length > 100) {
|
|
887
|
-
items.push({
|
|
888
|
-
title,
|
|
889
|
-
link,
|
|
890
|
-
description,
|
|
891
|
-
rawXml: itemXml
|
|
892
|
-
});
|
|
893
|
-
}
|
|
894
|
-
}
|
|
895
|
-
console.log(`Regex fallback found ${items.length} items`);
|
|
896
|
-
return items;
|
|
897
|
-
} catch (error) {
|
|
898
|
-
console.log(`Regex fallback failed: ${error}`);
|
|
899
|
-
return [];
|
|
900
|
-
}
|
|
901
|
-
}
|
|
902
|
-
/**
|
|
903
|
-
* Dynamically searches for channel title in any XML structure
|
|
904
|
-
*/
|
|
905
|
-
findChannelTitle(feed) {
|
|
906
|
-
const searchForTitle = (obj) => {
|
|
907
|
-
if (obj && typeof obj === "object") {
|
|
908
|
-
if (obj.title) return obj.title;
|
|
909
|
-
if (obj.name) return obj.name;
|
|
910
|
-
for (const value of Object.values(obj)) {
|
|
911
|
-
const result = searchForTitle(value);
|
|
912
|
-
if (result) return result;
|
|
913
|
-
}
|
|
914
|
-
}
|
|
915
|
-
return void 0;
|
|
916
|
-
};
|
|
917
|
-
return searchForTitle(feed);
|
|
918
|
-
}
|
|
919
|
-
/**
|
|
920
|
-
* Find channel title using regex
|
|
921
|
-
*/
|
|
922
|
-
findChannelTitleWithRegex(xmlString) {
|
|
923
|
-
try {
|
|
924
|
-
const titleMatch = xmlString.match(/<channel[^>]*>[\s\S]*?<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
925
|
-
if (titleMatch) {
|
|
926
|
-
return this.cleanText(titleMatch[1]);
|
|
927
|
-
}
|
|
928
|
-
const feedTitleMatch = xmlString.match(/<feed[^>]*>[\s\S]*?<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
929
|
-
if (feedTitleMatch) {
|
|
930
|
-
return this.cleanText(feedTitleMatch[1]);
|
|
931
|
-
}
|
|
932
|
-
return void 0;
|
|
933
|
-
} catch (error) {
|
|
934
|
-
console.log(`Regex channel title search failed: ${error}`);
|
|
935
|
-
return void 0;
|
|
936
|
-
}
|
|
937
|
-
}
|
|
938
|
-
/**
|
|
939
|
-
* Intelligently extracts the main text content from an RSS item.
|
|
940
|
-
* Finds the element with the most text and appends other text-rich elements.
|
|
941
|
-
* Returns cleaned, combined text content.
|
|
942
|
-
*/
|
|
943
|
-
extractMainText(item) {
|
|
944
|
-
const textFields = [
|
|
945
|
-
"content:encoded",
|
|
946
|
-
"description",
|
|
947
|
-
"dc:description",
|
|
948
|
-
"summary",
|
|
949
|
-
"excerpt",
|
|
950
|
-
"abstract",
|
|
951
|
-
"content",
|
|
952
|
-
"body"
|
|
953
|
-
];
|
|
954
|
-
let primaryText = "";
|
|
955
|
-
let primaryField = "";
|
|
956
|
-
let maxLength = 0;
|
|
957
|
-
for (const field of textFields) {
|
|
958
|
-
if (item[field] && typeof item[field] === "string") {
|
|
959
|
-
const cleanLength = this.cleanText(item[field]).length;
|
|
960
|
-
if (cleanLength > maxLength) {
|
|
961
|
-
maxLength = cleanLength;
|
|
962
|
-
primaryText = item[field];
|
|
963
|
-
primaryField = field;
|
|
964
|
-
}
|
|
965
|
-
}
|
|
966
|
-
}
|
|
967
|
-
if (!primaryText) {
|
|
968
|
-
return item.title || "";
|
|
969
|
-
}
|
|
970
|
-
const additionalTexts = [];
|
|
971
|
-
for (const field of textFields) {
|
|
972
|
-
if (field !== primaryField && item[field] && typeof item[field] === "string") {
|
|
973
|
-
const cleanText = this.cleanText(item[field]);
|
|
974
|
-
if (cleanText.length > 20 && !this.isDuplicateContent(primaryText, cleanText)) {
|
|
975
|
-
additionalTexts.push(cleanText);
|
|
976
|
-
}
|
|
977
|
-
}
|
|
978
|
-
}
|
|
979
|
-
for (const [key, value] of Object.entries(item)) {
|
|
980
|
-
if (typeof value === "string" && !textFields.includes(key) && !key.startsWith("dc:") && !key.startsWith("prism:") && !key.startsWith("arxiv:") && key !== "title" && key !== "link" && key !== "guid" && key !== "pubDate") {
|
|
981
|
-
const cleanText = this.cleanText(value);
|
|
982
|
-
if (cleanText.length > 20 && !this.isDuplicateContent(primaryText, cleanText)) {
|
|
983
|
-
additionalTexts.push(cleanText);
|
|
984
|
-
}
|
|
985
|
-
}
|
|
986
|
-
}
|
|
987
|
-
let combinedText = primaryText;
|
|
988
|
-
if (additionalTexts.length > 0) {
|
|
989
|
-
combinedText += "\n\n" + additionalTexts.join("\n\n");
|
|
990
|
-
}
|
|
991
|
-
return this.cleanText(combinedText);
|
|
992
|
-
}
|
|
993
|
-
/**
|
|
994
|
-
* Checks if two text contents are substantially similar to avoid duplication
|
|
995
|
-
*/
|
|
996
|
-
isDuplicateContent(primaryText, additionalText) {
|
|
997
|
-
const primary = primaryText.toLowerCase().trim();
|
|
998
|
-
const additional = additionalText.toLowerCase().trim();
|
|
999
|
-
if (primary.includes(additional) || additional.includes(primary)) {
|
|
1000
|
-
return true;
|
|
1001
|
-
}
|
|
1002
|
-
const primaryWords = primary.split(/\s+/).filter((word) => word.length > 3);
|
|
1003
|
-
const additionalWords = additional.split(/\s+/).filter((word) => word.length > 3);
|
|
1004
|
-
if (primaryWords.length === 0 || additionalWords.length === 0) {
|
|
1005
|
-
return false;
|
|
1006
|
-
}
|
|
1007
|
-
const commonWords = primaryWords.filter((word) => additionalWords.includes(word));
|
|
1008
|
-
const similarity = commonWords.length / Math.max(primaryWords.length, additionalWords.length);
|
|
1009
|
-
return similarity > 0.7;
|
|
1010
|
-
}
|
|
1011
|
-
/**
|
|
1012
|
-
* Cleans text of HTML tags, special characters, and formatting.
|
|
1013
|
-
* Preserves readable text while removing markup.
|
|
1014
|
-
*/
|
|
1015
|
-
cleanText(text) {
|
|
1016
|
-
if (!text) return "";
|
|
1017
|
-
const $ = cheerio2.load(text);
|
|
1018
|
-
$("script, style").remove();
|
|
1019
|
-
let cleaned = $.text();
|
|
1020
|
-
cleaned = cleaned.replace(/\s+/g, " ").replace(/</g, "<").replace(/>/g, ">").replace(/&/g, "&").replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, " ").replace(/\[CDATA\[/g, "").replace(/\]\]>/g, "").replace(/\n+/g, " ").replace(/\t+/g, " ").trim();
|
|
1021
|
-
return cleaned;
|
|
1022
|
-
}
|
|
1023
|
-
/**
|
|
1024
|
-
* Extracts structured metadata from RSS item fields.
|
|
1025
|
-
*/
|
|
1026
|
-
extractMetadata(item) {
|
|
1027
|
-
const metadata = {};
|
|
1028
|
-
if (item["dc:creator"]) metadata.creator = item["dc:creator"];
|
|
1029
|
-
if (item["dc:date"]) metadata.date = item["dc:date"];
|
|
1030
|
-
if (item["dc:subject"]) metadata.subject = item["dc:subject"];
|
|
1031
|
-
if (item["dc:publisher"]) metadata.publisher = item["dc:publisher"];
|
|
1032
|
-
if (item["dc:identifier"]) metadata.identifier = item["dc:identifier"];
|
|
1033
|
-
if (item["dc:rights"]) metadata.rights = item["dc:rights"];
|
|
1034
|
-
if (item["dc:source"]) metadata.source = item["dc:source"];
|
|
1035
|
-
if (item["prism:publicationDate"]) metadata.publicationDate = item["prism:publicationDate"];
|
|
1036
|
-
if (item["prism:section"]) metadata.section = item["prism:section"];
|
|
1037
|
-
if (item["prism:volume"]) metadata.volume = item["prism:volume"];
|
|
1038
|
-
if (item["prism:number"]) metadata.number = item["prism:number"];
|
|
1039
|
-
if (item["prism:startingPage"]) metadata.startingPage = item["prism:startingPage"];
|
|
1040
|
-
if (item["prism:endingPage"]) metadata.endingPage = item["prism:endingPage"];
|
|
1041
|
-
if (item["arxiv:announce_type"]) metadata.announceType = item["arxiv:announce_type"];
|
|
1042
|
-
if (item.category) {
|
|
1043
|
-
if (Array.isArray(item.category)) {
|
|
1044
|
-
metadata.categories = item.category;
|
|
1045
|
-
} else {
|
|
1046
|
-
metadata.categories = [item.category];
|
|
1047
|
-
}
|
|
1048
|
-
}
|
|
1049
|
-
return metadata;
|
|
1050
|
-
}
|
|
1051
|
-
/**
|
|
1052
|
-
* Extracts tags from the item and combines with feed-level tags.
|
|
1053
|
-
*/
|
|
1054
|
-
extractTags(item, baseTags) {
|
|
1055
|
-
const tags = new Set(baseTags);
|
|
1056
|
-
if (item.category) {
|
|
1057
|
-
if (Array.isArray(item.category)) {
|
|
1058
|
-
item.category.forEach((cat) => tags.add(cat.toLowerCase().replace(/\s+/g, "-")));
|
|
1059
|
-
} else {
|
|
1060
|
-
tags.add(item.category.toLowerCase().replace(/\s+/g, "-"));
|
|
1061
|
-
}
|
|
1062
|
-
}
|
|
1063
|
-
if (item["dc:subject"]) {
|
|
1064
|
-
tags.add(item["dc:subject"].toLowerCase().replace(/\s+/g, "-"));
|
|
1065
|
-
}
|
|
1066
|
-
if (item["prism:section"]) {
|
|
1067
|
-
tags.add(item["prism:section"].toLowerCase().replace(/\s+/g, "-"));
|
|
1068
|
-
}
|
|
1069
|
-
if (item["dc:source"]) {
|
|
1070
|
-
tags.add(item["dc:source"].toLowerCase().replace(/\s+/g, "-"));
|
|
1071
|
-
}
|
|
1072
|
-
if (item["dc:publisher"]) {
|
|
1073
|
-
tags.add(item["dc:publisher"].toLowerCase().replace(/\s+/g, "-"));
|
|
1074
|
-
}
|
|
1075
|
-
return Array.from(tags);
|
|
1076
|
-
}
|
|
1077
|
-
};
|
|
1078
|
-
|
|
1079
|
-
// src/collectors/string-collector.ts
|
|
1080
|
-
var StringCollector = class extends AbstractCollector {
|
|
1081
|
-
identifier = "string";
|
|
1082
|
-
async collect(source) {
|
|
1083
|
-
if (typeof source === "string") {
|
|
1084
|
-
return source;
|
|
1085
|
-
}
|
|
1086
|
-
return void 0;
|
|
1087
|
-
}
|
|
1088
|
-
};
|
|
1
|
+
import {
|
|
2
|
+
defineRunner
|
|
3
|
+
} from "./chunk-QY5MPNNB.js";
|
|
4
|
+
import {
|
|
5
|
+
ScoringMethod
|
|
6
|
+
} from "./chunk-HMQYGCKI.js";
|
|
7
|
+
import {
|
|
8
|
+
CATEGORIES,
|
|
9
|
+
PEERBENCH_NAMESPACE
|
|
10
|
+
} from "./chunk-UHHHSYVE.js";
|
|
11
|
+
import {
|
|
12
|
+
RateLimiter,
|
|
13
|
+
bufferToString,
|
|
14
|
+
idGeneratorUUIDv7,
|
|
15
|
+
parseResponseAsJSON,
|
|
16
|
+
sleep,
|
|
17
|
+
stringToBuffer
|
|
18
|
+
} from "./chunk-4UBK6452.js";
|
|
19
|
+
import "./chunk-PZ5AY32C.js";
|
|
1089
20
|
|
|
1090
21
|
// src/errors/polyfill.ts
|
|
1091
22
|
function captureStackTrace(error, constructor) {
|
|
@@ -1096,2472 +27,27 @@ function captureStackTrace(error, constructor) {
|
|
|
1096
27
|
}
|
|
1097
28
|
}
|
|
1098
29
|
|
|
1099
|
-
// src/errors/
|
|
1100
|
-
var
|
|
1101
|
-
startedAt;
|
|
30
|
+
// src/errors/peerbench.ts
|
|
31
|
+
var PeerbenchError = class extends Error {
|
|
1102
32
|
code;
|
|
1103
33
|
constructor(message, options) {
|
|
1104
34
|
super(message, options);
|
|
1105
|
-
this.
|
|
1106
|
-
this.code = options?.code;
|
|
35
|
+
this.code = options?.code ?? 0;
|
|
1107
36
|
captureStackTrace(this, this.constructor);
|
|
1108
37
|
}
|
|
1109
38
|
};
|
|
1110
|
-
|
|
1111
|
-
// src/errors/parser.ts
|
|
1112
|
-
var InvalidDataError = class extends Error {
|
|
1113
|
-
constructor(message) {
|
|
1114
|
-
super(message || "Data is in an invalid format");
|
|
1115
|
-
this.name = "InvalidDataError";
|
|
1116
|
-
captureStackTrace(this, this.constructor);
|
|
1117
|
-
}
|
|
1118
|
-
};
|
|
1119
|
-
var ParserIsNotCompatibleError = class extends InvalidDataError {
|
|
1120
|
-
constructor() {
|
|
1121
|
-
super("Parser is not compatible with the data");
|
|
1122
|
-
this.name = "ParserIsNotCompatibleError";
|
|
1123
|
-
captureStackTrace(this, this.constructor);
|
|
1124
|
-
}
|
|
1125
|
-
};
|
|
1126
|
-
|
|
1127
|
-
// src/errors/codes.ts
|
|
1128
|
-
var PEERBENCH_ERROR_CODES = {
|
|
1129
|
-
PROVIDER_FORWARD_FAILED: "PROVIDER_FORWARD_FAILED",
|
|
1130
|
-
PROVIDER_UNAUTHORIZED: "PROVIDER_UNAUTHORIZED",
|
|
1131
|
-
PROVIDER_MAX_RETRIES_REACHED: "PROVIDER_MAX_RETRIES_REACHED"
|
|
1132
|
-
};
|
|
1133
|
-
|
|
1134
|
-
// src/generators/abstract/abstract-generator.ts
|
|
1135
|
-
import { v7 as uuidv72 } from "uuid";
|
|
1136
|
-
var AbstractGenerator = class {
|
|
1137
|
-
/**
|
|
1138
|
-
* Generate prompt from the collected source data
|
|
1139
|
-
* @param input - Raw input data that will be validated against inputSchema
|
|
1140
|
-
* @param options - Optional configuration options
|
|
1141
|
-
* @returns Promise resolving to an array of prompts
|
|
1142
|
-
*/
|
|
1143
|
-
async generate(input, options) {
|
|
1144
|
-
const validatedInput = this.inputSchema.parse(input);
|
|
1145
|
-
return this.generatePrompts(validatedInput, options);
|
|
1146
|
-
}
|
|
1147
|
-
/**
|
|
1148
|
-
* Checks whether the generator can handle the given input
|
|
1149
|
-
*/
|
|
1150
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
1151
|
-
canHandle(input) {
|
|
1152
|
-
return true;
|
|
1153
|
-
}
|
|
1154
|
-
/**
|
|
1155
|
-
* Initializes the generator (depends on the implementation)
|
|
1156
|
-
*/
|
|
1157
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
1158
|
-
async initialize(...args) {
|
|
1159
|
-
}
|
|
1160
|
-
async buildPrompt(params) {
|
|
1161
|
-
const questionCID = await calculateCID(params.question).then(
|
|
1162
|
-
(cid) => cid.toString()
|
|
1163
|
-
);
|
|
1164
|
-
const questionSHA256 = await calculateSHA256(params.question);
|
|
1165
|
-
const fullPromptCID = await calculateCID(params.fullPrompt).then(
|
|
1166
|
-
(cid) => cid.toString()
|
|
1167
|
-
);
|
|
1168
|
-
const fullPromptSHA256 = await calculateSHA256(params.fullPrompt);
|
|
1169
|
-
return {
|
|
1170
|
-
did: uuidv72(),
|
|
1171
|
-
question: {
|
|
1172
|
-
data: params.question,
|
|
1173
|
-
cid: questionCID,
|
|
1174
|
-
sha256: questionSHA256
|
|
1175
|
-
},
|
|
1176
|
-
// If the Prompt is a Multiple Choice, the answer data should be
|
|
1177
|
-
// the value of the correct answer key
|
|
1178
|
-
answer: params.options === void 0 || Object.keys(params.options).length === 0 ? params.correctAnswer : params.options[params.correctAnswer],
|
|
1179
|
-
// If the options is provided then the correctAnswer points to the letter of the correct answer
|
|
1180
|
-
// Answer key is only valid when the options are provided
|
|
1181
|
-
// which means the Prompt is a multiple choice question
|
|
1182
|
-
answerKey: params.options === void 0 || Object.keys(params.options).length === 0 ? "" : params.correctAnswer,
|
|
1183
|
-
options: params.options ?? {},
|
|
1184
|
-
fullPrompt: {
|
|
1185
|
-
data: params.fullPrompt,
|
|
1186
|
-
cid: fullPromptCID,
|
|
1187
|
-
sha256: fullPromptSHA256
|
|
1188
|
-
},
|
|
1189
|
-
type: params.type,
|
|
1190
|
-
metadata: {
|
|
1191
|
-
generatorIdentifier: this.identifier,
|
|
1192
|
-
...params.metadata || {}
|
|
1193
|
-
},
|
|
1194
|
-
scorers: params.scorers ?? []
|
|
1195
|
-
};
|
|
1196
|
-
}
|
|
1197
|
-
};
|
|
1198
|
-
|
|
1199
|
-
// src/generators/pubmed/trp-generator.ts
|
|
1200
|
-
import { z as z6 } from "zod";
|
|
1201
|
-
|
|
1202
|
-
// src/validation/enum.ts
|
|
1203
|
-
import { z as z5 } from "zod";
|
|
1204
|
-
function EnumSchema(_enum, lookFor = "value") {
|
|
1205
|
-
return z5.string().transform((value, ctx) => {
|
|
1206
|
-
let enumValue;
|
|
1207
|
-
if (lookFor === "key") {
|
|
1208
|
-
enumValue = _enum[value];
|
|
1209
|
-
} else {
|
|
1210
|
-
const values = Object.values(_enum);
|
|
1211
|
-
enumValue = values.find((v) => v === value);
|
|
1212
|
-
}
|
|
1213
|
-
if (!enumValue) {
|
|
1214
|
-
ctx.addIssue({
|
|
1215
|
-
code: z5.ZodIssueCode.custom,
|
|
1216
|
-
message: `Invalid enum value: ${value}`
|
|
1217
|
-
});
|
|
1218
|
-
return z5.NEVER;
|
|
1219
|
-
}
|
|
1220
|
-
return enumValue;
|
|
1221
|
-
});
|
|
1222
|
-
}
|
|
1223
|
-
|
|
1224
|
-
// src/generators/pubmed/helpers/paragraph-merge.ts
|
|
1225
|
-
function paragraphMerge(paragraphs, strategy) {
|
|
1226
|
-
switch (strategy) {
|
|
1227
|
-
case ParagraphMergeStrategy.WithoutTitles:
|
|
1228
|
-
return Object.entries(paragraphs).map(([, value]) => value).join("\n");
|
|
1229
|
-
case ParagraphMergeStrategy.TitlesAsSentences:
|
|
1230
|
-
return Object.entries(paragraphs).map(([key, value]) => `${key}. ${value}`).join("\n");
|
|
1231
|
-
case ParagraphMergeStrategy.TitlesWithinSentences:
|
|
1232
|
-
return Object.entries(paragraphs).map(([key, value]) => `${key}: ${value}`).join("\n");
|
|
1233
|
-
default:
|
|
1234
|
-
throw new Error(`Invalid paragraph merge strategy: ${strategy}`);
|
|
1235
|
-
}
|
|
1236
|
-
}
|
|
1237
|
-
var ParagraphMergeStrategy = {
|
|
1238
|
-
WithoutTitles: "without-titles",
|
|
1239
|
-
TitlesAsSentences: "with-titles-as-sentences",
|
|
1240
|
-
TitlesWithinSentences: "titles-within-sentences"
|
|
1241
|
-
};
|
|
1242
|
-
|
|
1243
|
-
// src/generators/pubmed/helpers/replace-entities.ts
|
|
1244
|
-
function replaceEntities(text, entities, placeholder = "{}") {
|
|
1245
|
-
if (!entities || entities.length === 0) return text;
|
|
1246
|
-
const esc = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1247
|
-
const pattern = entities.map(esc).sort((a, b) => b.length - a.length).join("|");
|
|
1248
|
-
const re = new RegExp(`\\b(?:${pattern})\\b`, "g");
|
|
1249
|
-
return text.replace(re, placeholder);
|
|
1250
|
-
}
|
|
1251
|
-
|
|
1252
|
-
// src/generators/pubmed/helpers/crypto-random.ts
|
|
1253
|
-
function cryptoRandom() {
|
|
1254
|
-
return crypto.getRandomValues(new Uint32Array(1))[0] / 2 ** 32;
|
|
1255
|
-
}
|
|
1256
|
-
|
|
1257
|
-
// src/providers/abstract/abstract-provider.ts
|
|
1258
|
-
var AbstractProvider = class {
|
|
1259
|
-
};
|
|
1260
|
-
|
|
1261
|
-
// src/providers/llm/base-llm-provider.ts
|
|
1262
|
-
import OpenAI, { APIError } from "openai";
|
|
1263
|
-
var BaseLLMProvider = class extends AbstractProvider {
|
|
1264
|
-
timestamps = [];
|
|
1265
|
-
timeout;
|
|
1266
|
-
rateLimit;
|
|
1267
|
-
rateLimitTimeWindow;
|
|
1268
|
-
client;
|
|
1269
|
-
maxRetries;
|
|
1270
|
-
/**
|
|
1271
|
-
* Initialize a new LLM provider
|
|
1272
|
-
* @param options Options for the provider
|
|
1273
|
-
*/
|
|
1274
|
-
constructor(options) {
|
|
1275
|
-
super();
|
|
1276
|
-
this.rateLimit = options.rateLimit ?? 20;
|
|
1277
|
-
this.rateLimitTimeWindow = options.rateLimitTimeWindow ?? 3e3;
|
|
1278
|
-
this.timeout = options.timeout ?? 5 * 6e4;
|
|
1279
|
-
this.maxRetries = options.maxRetries ?? 5;
|
|
1280
|
-
this.client = new OpenAI({
|
|
1281
|
-
baseURL: options.baseURL,
|
|
1282
|
-
apiKey: options.apiKey,
|
|
1283
|
-
maxRetries: options.maxRetries,
|
|
1284
|
-
timeout: options.timeout,
|
|
1285
|
-
dangerouslyAllowBrowser: true
|
|
1286
|
-
});
|
|
1287
|
-
}
|
|
1288
|
-
async enforceRateLimit() {
|
|
1289
|
-
const now = Date.now();
|
|
1290
|
-
this.timestamps = this.timestamps.filter(
|
|
1291
|
-
(ts) => now - ts < this.rateLimitTimeWindow
|
|
1292
|
-
);
|
|
1293
|
-
if (this.timestamps.length < this.rateLimit) {
|
|
1294
|
-
this.timestamps.push(now);
|
|
1295
|
-
return;
|
|
1296
|
-
}
|
|
1297
|
-
const earliest = this.timestamps[0];
|
|
1298
|
-
const waitTime = this.rateLimitTimeWindow - (now - earliest);
|
|
1299
|
-
await sleep(waitTime);
|
|
1300
|
-
return this.enforceRateLimit();
|
|
1301
|
-
}
|
|
1302
|
-
/**
|
|
1303
|
-
* Fetch all supported models from the provider
|
|
1304
|
-
* @returns Array of model information
|
|
1305
|
-
*/
|
|
1306
|
-
async getSupportedModels() {
|
|
1307
|
-
await this.enforceRateLimit();
|
|
1308
|
-
try {
|
|
1309
|
-
const response = await this.client.models.list();
|
|
1310
|
-
const models = response.data;
|
|
1311
|
-
const parsedModels = await Promise.all(
|
|
1312
|
-
models.map(async (model) => {
|
|
1313
|
-
const parsed = await this.parseModelInfo(model);
|
|
1314
|
-
if (!parsed) {
|
|
1315
|
-
return;
|
|
1316
|
-
}
|
|
1317
|
-
return {
|
|
1318
|
-
...parsed,
|
|
1319
|
-
metadata: {
|
|
1320
|
-
// These fields might not be available in all models
|
|
1321
|
-
contextWindow: model.context_window,
|
|
1322
|
-
maxTokens: model.max_tokens,
|
|
1323
|
-
pricing: model.pricing ? {
|
|
1324
|
-
input: model.pricing.input,
|
|
1325
|
-
output: model.pricing.output
|
|
1326
|
-
} : void 0
|
|
1327
|
-
}
|
|
1328
|
-
};
|
|
1329
|
-
})
|
|
1330
|
-
);
|
|
1331
|
-
return parsedModels.filter((model) => model !== void 0);
|
|
1332
|
-
} catch (error) {
|
|
1333
|
-
throw new Error(
|
|
1334
|
-
`Failed to fetch supported models: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1335
|
-
);
|
|
1336
|
-
}
|
|
1337
|
-
}
|
|
1338
|
-
async forward(input, options) {
|
|
1339
|
-
await this.enforceRateLimit();
|
|
1340
|
-
let retryCount = this.maxRetries;
|
|
1341
|
-
while (retryCount > 0) {
|
|
1342
|
-
const startedAt = /* @__PURE__ */ new Date();
|
|
1343
|
-
try {
|
|
1344
|
-
const messages = [];
|
|
1345
|
-
if (Array.isArray(input)) {
|
|
1346
|
-
messages.push(...input);
|
|
1347
|
-
} else {
|
|
1348
|
-
if (options?.system) {
|
|
1349
|
-
messages.push({
|
|
1350
|
-
role: "system",
|
|
1351
|
-
content: options.system
|
|
1352
|
-
});
|
|
1353
|
-
}
|
|
1354
|
-
messages.push({
|
|
1355
|
-
role: "user",
|
|
1356
|
-
content: input
|
|
1357
|
-
});
|
|
1358
|
-
}
|
|
1359
|
-
const response = await this.client.chat.completions.create(
|
|
1360
|
-
{
|
|
1361
|
-
model: options.model,
|
|
1362
|
-
messages,
|
|
1363
|
-
temperature: options.temperature
|
|
1364
|
-
},
|
|
1365
|
-
{ signal: options?.abortSignal, timeout: this.timeout }
|
|
1366
|
-
);
|
|
1367
|
-
if ("error" in response) {
|
|
1368
|
-
const err = response.error;
|
|
1369
|
-
throw new Error(
|
|
1370
|
-
`${err.message} - Code ${err.code} - ${JSON.stringify(err)}`
|
|
1371
|
-
);
|
|
1372
|
-
}
|
|
1373
|
-
return {
|
|
1374
|
-
data: response?.choices?.[0]?.message?.content || "",
|
|
1375
|
-
inputTokensUsed: response?.usage?.prompt_tokens,
|
|
1376
|
-
outputTokensUsed: response?.usage?.completion_tokens,
|
|
1377
|
-
startedAt,
|
|
1378
|
-
completedAt: /* @__PURE__ */ new Date()
|
|
1379
|
-
};
|
|
1380
|
-
} catch (err) {
|
|
1381
|
-
if (err instanceof APIError && err.status === 401) {
|
|
1382
|
-
throw new ForwardError(
|
|
1383
|
-
`Invalid credentials provided for the Provider`,
|
|
1384
|
-
{
|
|
1385
|
-
cause: err,
|
|
1386
|
-
startedAt,
|
|
1387
|
-
code: PEERBENCH_ERROR_CODES.PROVIDER_UNAUTHORIZED
|
|
1388
|
-
}
|
|
1389
|
-
);
|
|
1390
|
-
}
|
|
1391
|
-
retryCount--;
|
|
1392
|
-
if (err instanceof SyntaxError) {
|
|
1393
|
-
console.debug(err);
|
|
1394
|
-
continue;
|
|
1395
|
-
}
|
|
1396
|
-
if (retryCount !== 0) {
|
|
1397
|
-
continue;
|
|
1398
|
-
}
|
|
1399
|
-
throw new ForwardError(
|
|
1400
|
-
`Failed to forward prompt to the model: ${err instanceof Error ? err.message : err}`,
|
|
1401
|
-
{
|
|
1402
|
-
cause: err,
|
|
1403
|
-
startedAt,
|
|
1404
|
-
code: PEERBENCH_ERROR_CODES.PROVIDER_FORWARD_FAILED
|
|
1405
|
-
}
|
|
1406
|
-
);
|
|
1407
|
-
}
|
|
1408
|
-
}
|
|
1409
|
-
throw new ForwardError(
|
|
1410
|
-
`Failed to forward prompt to the model: Max retries reached`,
|
|
1411
|
-
{
|
|
1412
|
-
startedAt: /* @__PURE__ */ new Date(),
|
|
1413
|
-
code: PEERBENCH_ERROR_CODES.PROVIDER_MAX_RETRIES_REACHED
|
|
1414
|
-
}
|
|
1415
|
-
);
|
|
1416
|
-
}
|
|
1417
|
-
};
|
|
1418
|
-
var LargeLanguageModelOwner = {
|
|
1419
|
-
Meta: "meta",
|
|
1420
|
-
OpenAI: "openai",
|
|
1421
|
-
Deepseek: "deepseek",
|
|
1422
|
-
Qwen: "qwen",
|
|
1423
|
-
Google: "google",
|
|
1424
|
-
XAI: "x-ai",
|
|
1425
|
-
Anthropic: "anthropic",
|
|
1426
|
-
Mistral: "mistral"
|
|
1427
|
-
};
|
|
1428
|
-
var MetaModels = {
|
|
1429
|
-
Llama_4_Maverick: "llama-4-maverick",
|
|
1430
|
-
Llama_4_Scout: "llama-4-scout",
|
|
1431
|
-
Llama_3_3_70b_Instruct: "llama-3.3-70b-instruct",
|
|
1432
|
-
Llama_3_1_8b_Instruct: "llama-3.1-8b-instruct",
|
|
1433
|
-
Llama_3_1_70b_Instruct: "llama-3.1-70b-instruct"
|
|
1434
|
-
};
|
|
1435
|
-
var QwenModels = {
|
|
1436
|
-
QwQ_32b: "qwq-32b"
|
|
1437
|
-
};
|
|
1438
|
-
var DeepSeekModels = {
|
|
1439
|
-
V3: "deepseek-v3",
|
|
1440
|
-
V3_0324: "deepseek-v3-0324"
|
|
1441
|
-
};
|
|
1442
|
-
var XAIModels = {
|
|
1443
|
-
Grok3_Beta: "grok-3-beta",
|
|
1444
|
-
Grok4: "grok-4"
|
|
1445
|
-
};
|
|
1446
|
-
var GoogleModels = {
|
|
1447
|
-
Gemini_2_0_Flash: "gemini-2.0-flash",
|
|
1448
|
-
Gemini_2_5_Flash_Lite: "gemini-2.5-flash-lite",
|
|
1449
|
-
Gemini_2_5_Pro: "gemini-2.5-pro"
|
|
1450
|
-
};
|
|
1451
|
-
var AnthropicModels = {
|
|
1452
|
-
Claude_3_7_Sonnet: "claude-3.7-sonnet",
|
|
1453
|
-
Claude_Sonnet_4_5: "claude-sonnet-4.5"
|
|
1454
|
-
};
|
|
1455
|
-
var OpenAIModels = {
|
|
1456
|
-
ChatGPT_4o: "chatgpt-4o-latest",
|
|
1457
|
-
GPT_4o: "gpt-4o",
|
|
1458
|
-
GPT_4o_Mini: "gpt-4o-mini",
|
|
1459
|
-
GPT_5: "gpt-5"
|
|
1460
|
-
};
|
|
1461
|
-
var MistralModels = {
|
|
1462
|
-
Ministral_8B: "ministral-8b"
|
|
1463
|
-
};
|
|
1464
|
-
var LargeLanguageModel = {
|
|
1465
|
-
[LargeLanguageModelOwner.Meta]: MetaModels,
|
|
1466
|
-
[LargeLanguageModelOwner.Deepseek]: DeepSeekModels,
|
|
1467
|
-
[LargeLanguageModelOwner.Qwen]: QwenModels,
|
|
1468
|
-
[LargeLanguageModelOwner.Google]: GoogleModels,
|
|
1469
|
-
[LargeLanguageModelOwner.XAI]: XAIModels,
|
|
1470
|
-
[LargeLanguageModelOwner.OpenAI]: OpenAIModels,
|
|
1471
|
-
[LargeLanguageModelOwner.Anthropic]: AnthropicModels,
|
|
1472
|
-
[LargeLanguageModelOwner.Mistral]: MistralModels
|
|
1473
|
-
};
|
|
1474
|
-
|
|
1475
|
-
// src/providers/llm/openrouter.ts
|
|
1476
|
-
import axios from "axios";
|
|
1477
|
-
import Decimal from "decimal.js";
|
|
1478
|
-
var baseURL = "https://openrouter.ai/api/v1";
|
|
1479
|
-
var MODELS_CACHE_TTL = 1e3 * 60 * 60 * 24;
|
|
1480
|
-
var OpenRouterProvider = class extends BaseLLMProvider {
|
|
1481
|
-
models = void 0;
|
|
1482
|
-
modelsCachePromise = Promise.resolve(void 0);
|
|
1483
|
-
modelsUpdatedAt = 0;
|
|
1484
|
-
identifier = "openrouter.ai";
|
|
1485
|
-
constructor(options) {
|
|
1486
|
-
super({
|
|
1487
|
-
...options,
|
|
1488
|
-
baseURL
|
|
1489
|
-
});
|
|
1490
|
-
}
|
|
1491
|
-
/**
|
|
1492
|
-
* Updates the cache that holds information about OpenRouter models
|
|
1493
|
-
* including pricing information. It will be valid for 24 hours as
|
|
1494
|
-
* long as the instance of this Provider object is alive.
|
|
1495
|
-
*/
|
|
1496
|
-
async updateModelsCache() {
|
|
1497
|
-
this.modelsCachePromise = this.modelsCachePromise.then(async () => {
|
|
1498
|
-
if (
|
|
1499
|
-
// The data presented in the cache
|
|
1500
|
-
this.models !== void 0 && // The cache is still valid
|
|
1501
|
-
Date.now() - this.modelsUpdatedAt < MODELS_CACHE_TTL
|
|
1502
|
-
) {
|
|
1503
|
-
return this.models;
|
|
1504
|
-
}
|
|
1505
|
-
return axios.get(`${baseURL}/models`).then((res) => res.data).then((data) => {
|
|
1506
|
-
data = {
|
|
1507
|
-
data: data.data.filter(
|
|
1508
|
-
(m) => m.architecture.input_modalities.includes("text") && m.architecture.output_modalities.includes("text")
|
|
1509
|
-
)
|
|
1510
|
-
};
|
|
1511
|
-
this.models = data;
|
|
1512
|
-
this.modelsUpdatedAt = Date.now();
|
|
1513
|
-
return data;
|
|
1514
|
-
});
|
|
1515
|
-
}).catch(() => void 0);
|
|
1516
|
-
await this.modelsCachePromise;
|
|
1517
|
-
}
|
|
1518
|
-
async getModelDetails(modelId) {
|
|
1519
|
-
await this.updateModelsCache();
|
|
1520
|
-
if (modelId === void 0) {
|
|
1521
|
-
return this.models?.data;
|
|
1522
|
-
}
|
|
1523
|
-
return this.models?.data.find((model) => model.id === modelId);
|
|
1524
|
-
}
|
|
1525
|
-
/**
|
|
1526
|
-
* Returns the credits that the account has on openrouter.ai
|
|
1527
|
-
*/
|
|
1528
|
-
async getCredits() {
|
|
1529
|
-
const response = await axios.get(`${baseURL}/credits`, {
|
|
1530
|
-
headers: {
|
|
1531
|
-
Authorization: `Bearer ${this.client.apiKey}`
|
|
1532
|
-
}
|
|
1533
|
-
});
|
|
1534
|
-
return response.data;
|
|
1535
|
-
}
|
|
1536
|
-
/**
|
|
1537
|
-
* Makes a request to a protected endpoint to validate the API key.
|
|
1538
|
-
*/
|
|
1539
|
-
async validateApiKey() {
|
|
1540
|
-
await axios.get(`${baseURL}/models/user`, {
|
|
1541
|
-
headers: {
|
|
1542
|
-
Authorization: `Bearer ${this.client.apiKey}`
|
|
1543
|
-
}
|
|
1544
|
-
});
|
|
1545
|
-
return true;
|
|
1546
|
-
}
|
|
1547
|
-
async forward(input, options) {
|
|
1548
|
-
const [response] = await Promise.all([
|
|
1549
|
-
super.forward(input, options),
|
|
1550
|
-
this.updateModelsCache()
|
|
1551
|
-
// Concurrently update the cache for cost info
|
|
1552
|
-
]);
|
|
1553
|
-
const modelInfo = this.models?.data.find(
|
|
1554
|
-
(model) => model.id === options.model
|
|
1555
|
-
);
|
|
1556
|
-
let inputCost = void 0;
|
|
1557
|
-
let outputCost = void 0;
|
|
1558
|
-
if (modelInfo !== void 0) {
|
|
1559
|
-
if (response.inputTokensUsed !== void 0) {
|
|
1560
|
-
inputCost = new Decimal(modelInfo.pricing.prompt).mul(response.inputTokensUsed).toFixed(10);
|
|
1561
|
-
}
|
|
1562
|
-
if (response.outputTokensUsed !== void 0) {
|
|
1563
|
-
outputCost = new Decimal(modelInfo.pricing.completion).mul(response.outputTokensUsed).toFixed(10);
|
|
1564
|
-
}
|
|
1565
|
-
}
|
|
1566
|
-
return {
|
|
1567
|
-
...response,
|
|
1568
|
-
inputCost,
|
|
1569
|
-
outputCost
|
|
1570
|
-
};
|
|
1571
|
-
}
|
|
1572
|
-
parseModelInfo(modelOrId) {
|
|
1573
|
-
const id = typeof modelOrId === "string" ? modelOrId : modelOrId.id;
|
|
1574
|
-
const [, modelName] = id.split("/");
|
|
1575
|
-
if (!modelName) {
|
|
1576
|
-
return;
|
|
1577
|
-
}
|
|
1578
|
-
let name;
|
|
1579
|
-
let owner;
|
|
1580
|
-
switch (modelName) {
|
|
1581
|
-
case "ministral-8b":
|
|
1582
|
-
owner = LargeLanguageModelOwner.Mistral;
|
|
1583
|
-
name = LargeLanguageModel[owner].Ministral_8B;
|
|
1584
|
-
break;
|
|
1585
|
-
case "chatgpt-4o-latest":
|
|
1586
|
-
owner = LargeLanguageModelOwner.OpenAI;
|
|
1587
|
-
name = LargeLanguageModel[owner].ChatGPT_4o;
|
|
1588
|
-
break;
|
|
1589
|
-
case "gpt-4o-mini":
|
|
1590
|
-
owner = LargeLanguageModelOwner.OpenAI;
|
|
1591
|
-
name = LargeLanguageModel[owner].GPT_4o_Mini;
|
|
1592
|
-
break;
|
|
1593
|
-
case "deepseek-chat-v3-0324":
|
|
1594
|
-
owner = LargeLanguageModelOwner.Deepseek;
|
|
1595
|
-
name = LargeLanguageModel[owner].V3_0324;
|
|
1596
|
-
break;
|
|
1597
|
-
case "gpt-4o":
|
|
1598
|
-
owner = LargeLanguageModelOwner.OpenAI;
|
|
1599
|
-
name = LargeLanguageModel[owner].GPT_4o;
|
|
1600
|
-
break;
|
|
1601
|
-
case "gpt-5":
|
|
1602
|
-
owner = LargeLanguageModelOwner.OpenAI;
|
|
1603
|
-
name = LargeLanguageModel[owner].GPT_5;
|
|
1604
|
-
break;
|
|
1605
|
-
case "claude-3.7-sonnet":
|
|
1606
|
-
owner = LargeLanguageModelOwner.Anthropic;
|
|
1607
|
-
name = LargeLanguageModel[owner].Claude_3_7_Sonnet;
|
|
1608
|
-
break;
|
|
1609
|
-
case "claude-sonnet-4.5":
|
|
1610
|
-
owner = LargeLanguageModelOwner.Anthropic;
|
|
1611
|
-
name = LargeLanguageModel[owner].Claude_Sonnet_4_5;
|
|
1612
|
-
break;
|
|
1613
|
-
case "llama-3.3-70b-instruct":
|
|
1614
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1615
|
-
name = LargeLanguageModel[owner].Llama_3_3_70b_Instruct;
|
|
1616
|
-
break;
|
|
1617
|
-
case "llama-3.1-70b-instruct":
|
|
1618
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1619
|
-
name = LargeLanguageModel[owner].Llama_3_1_70b_Instruct;
|
|
1620
|
-
break;
|
|
1621
|
-
case "llama-3.1-8b-instruct":
|
|
1622
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1623
|
-
name = LargeLanguageModel[owner].Llama_3_1_8b_Instruct;
|
|
1624
|
-
break;
|
|
1625
|
-
case "deepseek-chat":
|
|
1626
|
-
owner = LargeLanguageModelOwner.Deepseek;
|
|
1627
|
-
name = LargeLanguageModel[owner].V3;
|
|
1628
|
-
break;
|
|
1629
|
-
case "qwq-32b":
|
|
1630
|
-
owner = LargeLanguageModelOwner.Qwen;
|
|
1631
|
-
name = LargeLanguageModel[owner].QwQ_32b;
|
|
1632
|
-
break;
|
|
1633
|
-
case "gemini-2.0-flash-001":
|
|
1634
|
-
owner = LargeLanguageModelOwner.Google;
|
|
1635
|
-
name = LargeLanguageModel[owner].Gemini_2_0_Flash;
|
|
1636
|
-
break;
|
|
1637
|
-
case "gemini-2.5-flash-lite":
|
|
1638
|
-
owner = LargeLanguageModelOwner.Google;
|
|
1639
|
-
name = LargeLanguageModel[owner].Gemini_2_5_Flash_Lite;
|
|
1640
|
-
break;
|
|
1641
|
-
case "gemini-2.5-pro":
|
|
1642
|
-
owner = LargeLanguageModelOwner.Google;
|
|
1643
|
-
name = LargeLanguageModel[owner].Gemini_2_5_Pro;
|
|
1644
|
-
break;
|
|
1645
|
-
case "grok-3-beta":
|
|
1646
|
-
case "grok-3":
|
|
1647
|
-
owner = LargeLanguageModelOwner.XAI;
|
|
1648
|
-
name = LargeLanguageModel[owner].Grok3_Beta;
|
|
1649
|
-
break;
|
|
1650
|
-
case "grok-4":
|
|
1651
|
-
owner = LargeLanguageModelOwner.XAI;
|
|
1652
|
-
name = LargeLanguageModel[owner].Grok4;
|
|
1653
|
-
break;
|
|
1654
|
-
case "llama-4-maverick":
|
|
1655
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1656
|
-
name = LargeLanguageModel[owner].Llama_4_Maverick;
|
|
1657
|
-
break;
|
|
1658
|
-
case "llama-4-scout":
|
|
1659
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1660
|
-
name = LargeLanguageModel[owner].Llama_4_Scout;
|
|
1661
|
-
break;
|
|
1662
|
-
default:
|
|
1663
|
-
return;
|
|
1664
|
-
}
|
|
1665
|
-
return {
|
|
1666
|
-
id,
|
|
1667
|
-
name,
|
|
1668
|
-
owner,
|
|
1669
|
-
provider: this.identifier.toLowerCase()
|
|
1670
|
-
};
|
|
1671
|
-
}
|
|
1672
|
-
};
|
|
1673
|
-
|
|
1674
|
-
// src/providers/llm/nearai.ts
|
|
1675
|
-
var NearAIProvider = class extends BaseLLMProvider {
|
|
1676
|
-
identifier = "near.ai";
|
|
1677
|
-
constructor(options) {
|
|
1678
|
-
super({
|
|
1679
|
-
...options,
|
|
1680
|
-
baseURL: "https://api.near.ai/v1"
|
|
1681
|
-
});
|
|
1682
|
-
}
|
|
1683
|
-
parseModelInfo(modelOrId) {
|
|
1684
|
-
const id = typeof modelOrId === "string" ? modelOrId : modelOrId.id;
|
|
1685
|
-
let modelName;
|
|
1686
|
-
let host;
|
|
1687
|
-
if (id.startsWith("fireworks::")) {
|
|
1688
|
-
const splitResult = id.split("/");
|
|
1689
|
-
modelName = splitResult.pop();
|
|
1690
|
-
host = "fireworks";
|
|
1691
|
-
} else if (id.startsWith("hyperbolic::")) {
|
|
1692
|
-
const splitResult = id.split("/");
|
|
1693
|
-
modelName = splitResult.pop();
|
|
1694
|
-
host = "hyperbolic";
|
|
1695
|
-
}
|
|
1696
|
-
if (!modelName) {
|
|
1697
|
-
return;
|
|
1698
|
-
}
|
|
1699
|
-
let name;
|
|
1700
|
-
let owner;
|
|
1701
|
-
switch (modelName) {
|
|
1702
|
-
case "llama4-maverick-instruct-basic":
|
|
1703
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1704
|
-
name = LargeLanguageModel[owner].Llama_4_Maverick;
|
|
1705
|
-
break;
|
|
1706
|
-
case "llama4-scout-instruct-basic":
|
|
1707
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1708
|
-
name = LargeLanguageModel[owner].Llama_4_Scout;
|
|
1709
|
-
break;
|
|
1710
|
-
case "llama-v3p3-70b-instruct":
|
|
1711
|
-
case "Llama-3.3-70B-Instruct":
|
|
1712
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1713
|
-
name = LargeLanguageModel[owner].Llama_3_3_70b_Instruct;
|
|
1714
|
-
break;
|
|
1715
|
-
case "llama-v3p1-8b-instruct":
|
|
1716
|
-
owner = LargeLanguageModelOwner.Meta;
|
|
1717
|
-
name = LargeLanguageModel[owner].Llama_3_1_8b_Instruct;
|
|
1718
|
-
break;
|
|
1719
|
-
case "deepseek-v3":
|
|
1720
|
-
case "DeepSeek-V3":
|
|
1721
|
-
owner = LargeLanguageModelOwner.Deepseek;
|
|
1722
|
-
name = LargeLanguageModel[owner].V3;
|
|
1723
|
-
break;
|
|
1724
|
-
case "qwq-32b":
|
|
1725
|
-
owner = LargeLanguageModelOwner.Qwen;
|
|
1726
|
-
name = LargeLanguageModel[owner].QwQ_32b;
|
|
1727
|
-
break;
|
|
1728
|
-
default:
|
|
1729
|
-
return;
|
|
1730
|
-
}
|
|
1731
|
-
return {
|
|
1732
|
-
id,
|
|
1733
|
-
name,
|
|
1734
|
-
owner,
|
|
1735
|
-
host,
|
|
1736
|
-
provider: this.identifier.toLowerCase()
|
|
1737
|
-
};
|
|
1738
|
-
}
|
|
1739
|
-
};
|
|
1740
|
-
|
|
1741
|
-
// src/generators/pubmed/trp-generator.ts
|
|
1742
|
-
var TRPGenerator = class extends AbstractGenerator {
|
|
1743
|
-
identifier = "trp";
|
|
1744
|
-
inputSchema = z6.array(
|
|
1745
|
-
z6.object({
|
|
1746
|
-
pmid: z6.string(),
|
|
1747
|
-
title: z6.string(),
|
|
1748
|
-
paragraphs: z6.record(z6.string(), z6.string()),
|
|
1749
|
-
tags: z6.array(z6.string())
|
|
1750
|
-
})
|
|
1751
|
-
);
|
|
1752
|
-
optionsSchema = z6.object(
|
|
1753
|
-
{
|
|
1754
|
-
openRouterApiKey: z6.string(),
|
|
1755
|
-
paragraphMergeStrategy: EnumSchema(ParagraphMergeStrategy).default(
|
|
1756
|
-
ParagraphMergeStrategy.TitlesWithinSentences
|
|
1757
|
-
),
|
|
1758
|
-
model: z6.string(),
|
|
1759
|
-
placeholder: z6.string().default("{}"),
|
|
1760
|
-
nerPrompt: z6.string().optional().default(
|
|
1761
|
-
`You are a Named Entity Recognition model which is specialized on medical relevant texts. Your task is extracting all medical related entities. Your output strictly forced to be a JSON array of strings where each item represents a single entity that you've extracted. Markdown formatting is forbidden. JSON output must be minified.`
|
|
1762
|
-
)
|
|
1763
|
-
},
|
|
1764
|
-
{ message: "No options provided" }
|
|
1765
|
-
);
|
|
1766
|
-
async generatePrompts(input, options) {
|
|
1767
|
-
const parsedOptions = this.optionsSchema.parse(options);
|
|
1768
|
-
const provider = new OpenRouterProvider({
|
|
1769
|
-
apiKey: parsedOptions.openRouterApiKey
|
|
1770
|
-
});
|
|
1771
|
-
const generatedPrompts = await Promise.all(
|
|
1772
|
-
input.map(
|
|
1773
|
-
(article) => this.generatePromptFromArticle(article, provider, parsedOptions)
|
|
1774
|
-
)
|
|
1775
|
-
);
|
|
1776
|
-
return generatedPrompts.filter((prompt) => prompt !== null);
|
|
1777
|
-
}
|
|
1778
|
-
async generatePromptFromArticle(article, provider, options) {
|
|
1779
|
-
const tags = [
|
|
1780
|
-
`generator-${this.identifier}`,
|
|
1781
|
-
"perform-ner",
|
|
1782
|
-
`merge-paragraphs-${options.paragraphMergeStrategy}`,
|
|
1783
|
-
`ner-for-medical-related-entities`
|
|
1784
|
-
];
|
|
1785
|
-
const text = `${article.title}
|
|
1786
|
-
|
|
1787
|
-
${paragraphMerge(
|
|
1788
|
-
article.paragraphs,
|
|
1789
|
-
options.paragraphMergeStrategy
|
|
1790
|
-
)}`;
|
|
1791
|
-
const entities = await this.doNER({
|
|
1792
|
-
text,
|
|
1793
|
-
provider,
|
|
1794
|
-
model: options.model,
|
|
1795
|
-
systemPrompt: options.nerPrompt
|
|
1796
|
-
});
|
|
1797
|
-
if (!entities) {
|
|
1798
|
-
return null;
|
|
1799
|
-
}
|
|
1800
|
-
const modifiedText = replaceEntities(text, entities, options.placeholder);
|
|
1801
|
-
const fullPrompt = `TEXT:
|
|
1802
|
-
${modifiedText}
|
|
1803
|
-
|
|
1804
|
-
ENTITIES:
|
|
1805
|
-
${entities.sort(() => cryptoRandom() - 0.5).map((e) => `"${e}"`).join(", ")}`;
|
|
1806
|
-
return await this.buildPrompt({
|
|
1807
|
-
question: modifiedText,
|
|
1808
|
-
fullPrompt,
|
|
1809
|
-
correctAnswer: text,
|
|
1810
|
-
// Original formatted text is the correct answer
|
|
1811
|
-
type: PromptTypes.TextReplacement,
|
|
1812
|
-
metadata: {
|
|
1813
|
-
articleTags: article.tags,
|
|
1814
|
-
articleId: article.pmid,
|
|
1815
|
-
paragraphMergeStrategy: options.paragraphMergeStrategy,
|
|
1816
|
-
entityTypes: ["medical-related-entities"],
|
|
1817
|
-
entities,
|
|
1818
|
-
brainModel: options.model,
|
|
1819
|
-
generatorTags: tags,
|
|
1820
|
-
tags: [...tags, ...article.tags]
|
|
1821
|
-
}
|
|
1822
|
-
});
|
|
1823
|
-
}
|
|
1824
|
-
async doNER(params) {
|
|
1825
|
-
const { text, provider, model, systemPrompt } = params;
|
|
1826
|
-
const { data } = await provider.forward(text, {
|
|
1827
|
-
model,
|
|
1828
|
-
system: systemPrompt
|
|
1829
|
-
});
|
|
1830
|
-
return parseResponseAsJSON(data);
|
|
1831
|
-
}
|
|
1832
|
-
};
|
|
1833
|
-
|
|
1834
|
-
// src/generators/mcq-generator.ts
|
|
1835
|
-
import { z as z9 } from "zod";
|
|
1836
|
-
|
|
1837
|
-
// src/utils/debug.ts
|
|
1838
|
-
function debugLog(...args) {
|
|
1839
|
-
if (process?.env?.PB_SDK_DEBUG) {
|
|
1840
|
-
console.log(`[${(/* @__PURE__ */ new Date()).toISOString()}] PB_SDK_DEBUG:`, ...args);
|
|
1841
|
-
}
|
|
1842
|
-
}
|
|
1843
|
-
|
|
1844
|
-
// src/scorers/abstract/abstract-scorer.ts
|
|
1845
|
-
var AbstractScorer = class {
|
|
1846
|
-
};
|
|
1847
|
-
|
|
1848
|
-
// src/scorers/multiple-choice-scorer.ts
|
|
1849
|
-
import { v7 as uuidv73 } from "uuid";
|
|
1850
|
-
var MultipleChoiceScorer = class extends AbstractScorer {
|
|
1851
|
-
identifier = "multiple-choice";
|
|
1852
|
-
EXPLANATION_TEXT = `This scorer searches for multiple choice answers using the following patterns (in order):
|
|
1853
|
-
1) "<!NO ANSWER!>" (special marker indicating model's inability to answer),
|
|
1854
|
-
2) "Answer is $\\boxed{answer text}$" (full answer text in LaTeX boxed format),
|
|
1855
|
-
3) "Answer is answer text" (full answer text),
|
|
1856
|
-
4) "Answer is **answer text**" (full answer text in bold),
|
|
1857
|
-
5) "Answer is $\\boxed{A}$" or "Answer is $\\boxed{A}$." (single letter in LaTeX boxed format, optional period),
|
|
1858
|
-
6) "Answer is A" (single letter),
|
|
1859
|
-
7) "Answer is **A**" (single letter in bold),
|
|
1860
|
-
8) "A: ..." (letter followed by colon),
|
|
1861
|
-
9) "A) ..." (letter followed by closing parenthesis and optional text),
|
|
1862
|
-
10) "A)" (letter followed by closing parenthesis).
|
|
1863
|
-
The scorer extracts the answer from the last matching pattern (if multiple matches exist) and compares it with the expected answer key (or the answer text itself).`;
|
|
1864
|
-
/**
|
|
1865
|
-
* Score a multiple choice response
|
|
1866
|
-
*/
|
|
1867
|
-
async scoreOne(response) {
|
|
1868
|
-
if (!await this.canScore(response)) {
|
|
1869
|
-
return void 0;
|
|
1870
|
-
}
|
|
1871
|
-
const { data, prompt } = response;
|
|
1872
|
-
let score = 0;
|
|
1873
|
-
if (data.trim() === prompt.answerKey?.trim()) {
|
|
1874
|
-
score = 1;
|
|
1875
|
-
}
|
|
1876
|
-
let extractedAnswer = this.lookForAnswer(data, prompt.answerKey);
|
|
1877
|
-
if (extractedAnswer === prompt.answerKey) {
|
|
1878
|
-
score = 1;
|
|
1879
|
-
} else {
|
|
1880
|
-
const answerOption = Object.entries(prompt.options).find(
|
|
1881
|
-
([, value]) => value.trim() === extractedAnswer?.trim()
|
|
1882
|
-
);
|
|
1883
|
-
if (answerOption && answerOption[0] === prompt.answerKey) {
|
|
1884
|
-
score = 1;
|
|
1885
|
-
extractedAnswer = answerOption[0];
|
|
1886
|
-
}
|
|
1887
|
-
}
|
|
1888
|
-
return PromptScoreSchema.parse({
|
|
1889
|
-
...response,
|
|
1890
|
-
score,
|
|
1891
|
-
scoreDID: uuidv73(),
|
|
1892
|
-
method: ScoringMethods.algo,
|
|
1893
|
-
prompt: response.prompt,
|
|
1894
|
-
scoreMetadata: {
|
|
1895
|
-
scorerIdentifier: this.identifier,
|
|
1896
|
-
extractedAnswer
|
|
1897
|
-
},
|
|
1898
|
-
explanation: this.EXPLANATION_TEXT
|
|
1899
|
-
});
|
|
1900
|
-
}
|
|
1901
|
-
async canScore(response) {
|
|
1902
|
-
return response.data !== void 0 && response.prompt !== void 0 && // TODO: Enable this condition once we are sure the structure of the Prompt and whether to include the type in there
|
|
1903
|
-
// response.prompt.type === PromptTypes.MultipleChoice &&
|
|
1904
|
-
response.prompt.options !== void 0 && Object.keys(response.prompt.options).length > 0 && Boolean(response.prompt.answerKey) && // not undefined or empty string
|
|
1905
|
-
Boolean(response.prompt.answer);
|
|
1906
|
-
}
|
|
1907
|
-
/**
|
|
1908
|
-
* Extracts answer from Response text using regex patterns
|
|
1909
|
-
*/
|
|
1910
|
-
lookForAnswer(response, answer) {
|
|
1911
|
-
const patterns = [
|
|
1912
|
-
{
|
|
1913
|
-
// "<!NO ANSWER!>"
|
|
1914
|
-
regex: /<!NO ANSWER!>/g,
|
|
1915
|
-
// Pattern matches, but no group is specified in the regex.
|
|
1916
|
-
// So the final value will be `undefined`.
|
|
1917
|
-
answerGroupIndex: 1
|
|
1918
|
-
},
|
|
1919
|
-
{
|
|
1920
|
-
// "Answer is $\boxed{answer text}$"
|
|
1921
|
-
regex: new RegExp(
|
|
1922
|
-
`[Aa]nswer is \\$\\\\boxed\\{(${this.escapeRegex(answer)})\\}\\$`,
|
|
1923
|
-
"g"
|
|
1924
|
-
),
|
|
1925
|
-
answerGroupIndex: 1
|
|
1926
|
-
},
|
|
1927
|
-
{
|
|
1928
|
-
// "Answer is answer text"
|
|
1929
|
-
regex: new RegExp(`[Aa]nswer is\\s+(${this.escapeRegex(answer)})`, "g"),
|
|
1930
|
-
answerGroupIndex: 1
|
|
1931
|
-
},
|
|
1932
|
-
{
|
|
1933
|
-
// "Answer is **answer text**"
|
|
1934
|
-
regex: new RegExp(
|
|
1935
|
-
`[Aa]nswer is\\s+\\**(${this.escapeRegex(answer)})\\**`,
|
|
1936
|
-
"g"
|
|
1937
|
-
),
|
|
1938
|
-
answerGroupIndex: 1
|
|
1939
|
-
},
|
|
1940
|
-
{
|
|
1941
|
-
// "Answer is $\boxed{A}$."
|
|
1942
|
-
regex: /[Aa]nswer is \$\\boxed\{([A-Z])\}\$\.?/g,
|
|
1943
|
-
answerGroupIndex: 1
|
|
1944
|
-
},
|
|
1945
|
-
{
|
|
1946
|
-
// "Answer is A"
|
|
1947
|
-
regex: /[Aa]nswer is\s+([A-Z])/g,
|
|
1948
|
-
answerGroupIndex: 1
|
|
1949
|
-
},
|
|
1950
|
-
{
|
|
1951
|
-
// "Answer is **A**"
|
|
1952
|
-
regex: /[Aa]nswer is\s+\**([A-Z])\**/g,
|
|
1953
|
-
answerGroupIndex: 1
|
|
1954
|
-
},
|
|
1955
|
-
{
|
|
1956
|
-
// "A: answer text"
|
|
1957
|
-
regex: /([A-Z]):.+/g,
|
|
1958
|
-
answerGroupIndex: 1
|
|
1959
|
-
},
|
|
1960
|
-
{
|
|
1961
|
-
// "A) answer text"
|
|
1962
|
-
regex: /([A-Z])\)\s*.+/g,
|
|
1963
|
-
answerGroupIndex: 1
|
|
1964
|
-
},
|
|
1965
|
-
{
|
|
1966
|
-
// "A)"
|
|
1967
|
-
regex: /([A-Z])\)/g,
|
|
1968
|
-
answerGroupIndex: 1
|
|
1969
|
-
}
|
|
1970
|
-
];
|
|
1971
|
-
for (const pattern of patterns) {
|
|
1972
|
-
const matches = Array.from(response.matchAll(pattern.regex));
|
|
1973
|
-
const match = matches.at(-1);
|
|
1974
|
-
if (match) {
|
|
1975
|
-
return match[pattern.answerGroupIndex];
|
|
1976
|
-
}
|
|
1977
|
-
}
|
|
1978
|
-
}
|
|
1979
|
-
escapeRegex(str) {
|
|
1980
|
-
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1981
|
-
}
|
|
1982
|
-
};
|
|
1983
|
-
|
|
1984
|
-
// src/scorers/exact-match-scorer.ts
|
|
1985
|
-
import { v7 as uuidv74 } from "uuid";
|
|
1986
|
-
var ExactMatchScorer = class extends AbstractScorer {
|
|
1987
|
-
identifier = "exact-match";
|
|
1988
|
-
async scoreOne(response) {
|
|
1989
|
-
if (!await this.canScore(response)) {
|
|
1990
|
-
return void 0;
|
|
1991
|
-
}
|
|
1992
|
-
let score = 0;
|
|
1993
|
-
if (response.prompt.options !== void 0 && Object.keys(response.prompt.options).length > 0) {
|
|
1994
|
-
score = response.data?.trim() === response.prompt.answerKey?.trim() ? 1 : 0;
|
|
1995
|
-
} else {
|
|
1996
|
-
score = response.data?.trim() === response.prompt.answer?.trim() ? 1 : 0;
|
|
1997
|
-
}
|
|
1998
|
-
return PromptScoreSchema.parse({
|
|
1999
|
-
...response,
|
|
2000
|
-
score,
|
|
2001
|
-
scoreDID: uuidv74(),
|
|
2002
|
-
method: ScoringMethods.algo,
|
|
2003
|
-
prompt: response.prompt,
|
|
2004
|
-
scoreMetadata: {
|
|
2005
|
-
scorerIdentifier: this.identifier,
|
|
2006
|
-
extractedAnswer: response.data?.trim()
|
|
2007
|
-
},
|
|
2008
|
-
explanation: void 0
|
|
2009
|
-
});
|
|
2010
|
-
}
|
|
2011
|
-
async canScore(response) {
|
|
2012
|
-
return response.data !== void 0 && response.prompt !== void 0;
|
|
2013
|
-
}
|
|
2014
|
-
};
|
|
2015
|
-
|
|
2016
|
-
// src/scorers/ref-answer-equality-llm-judge-scorer.ts
|
|
2017
|
-
import { z as z7 } from "zod";
|
|
2018
|
-
import { v7 as uuidv75 } from "uuid";
|
|
2019
|
-
var RefAnswerEqualityLLMJudgeScorer = class extends AbstractScorer {
|
|
2020
|
-
identifier = "ref-answer-equality-llm-judge-scorer";
|
|
2021
|
-
optionsSchema = z7.object({
|
|
2022
|
-
openRouterApiKey: z7.string().optional(),
|
|
2023
|
-
provider: z7.instanceof(BaseLLMProvider).optional(),
|
|
2024
|
-
model: z7.string(),
|
|
2025
|
-
prompt: z7.string().default(`
|
|
2026
|
-
Judge whether the following [response] to [question] is correct or not based on the precise and unambiguous [correctAnswer] OR [correctAnswerKey] (if the question was a multiple choice question) below.
|
|
2027
|
-
|
|
2028
|
-
[question]: {question}
|
|
2029
|
-
|
|
2030
|
-
[response]: {response}
|
|
2031
|
-
|
|
2032
|
-
[correctAnswer]: {correctAnswer}
|
|
2033
|
-
[correctAnswerKey]: {correctAnswerKey}
|
|
2034
|
-
|
|
2035
|
-
Your judgement must be in the format and criteria specified below:
|
|
2036
|
-
\`\`\`json
|
|
2037
|
-
{
|
|
2038
|
-
"extractedFinalAnswer": "The final exact answer extracted from the [response]. Put the extracted answer as 'None' if there is no exact, final answer to extract from the response.",
|
|
2039
|
-
"reasoning": "Explain why the extractedFinalAnswer is correct or incorrect based on [correctAnswer], focusing only on if there are meaningful differences between [correctAnswer] and the extractedFinalAnswer. Do not comment on any background to the problem, do not attempt to solve the problem, do not argue for any answer different than [correctAnswer], focus only on whether the answers match.",
|
|
2040
|
-
"correct": "Answer 'yes' if extracted_final_answer matches the [correctAnswer] given above, or is within a small margin of error for numerical problems. Answer 'no' otherwise, i.e. if there if there is any inconsistency, ambiguity, non-equivalency, or if the extracted answer is incorrect.",
|
|
2041
|
-
"confidence": "The extracted confidence score between 0% and 100% from [response]. Put 100 if there is no confidence score available."
|
|
2042
|
-
}
|
|
2043
|
-
\`\`\``),
|
|
2044
|
-
promptSuffix: z7.string().default(""),
|
|
2045
|
-
promptPrefix: z7.string().default("")
|
|
2046
|
-
}).transform((options, ctx) => {
|
|
2047
|
-
if (options.provider !== void 0) {
|
|
2048
|
-
return options;
|
|
2049
|
-
}
|
|
2050
|
-
if (options.openRouterApiKey !== void 0) {
|
|
2051
|
-
return {
|
|
2052
|
-
...options,
|
|
2053
|
-
provider: new OpenRouterProvider({
|
|
2054
|
-
apiKey: options.openRouterApiKey
|
|
2055
|
-
})
|
|
2056
|
-
};
|
|
2057
|
-
}
|
|
2058
|
-
ctx.addIssue({
|
|
2059
|
-
code: z7.ZodIssueCode.custom,
|
|
2060
|
-
message: "No provider or openRouterApiKey provided"
|
|
2061
|
-
});
|
|
2062
|
-
return z7.NEVER;
|
|
2063
|
-
});
|
|
2064
|
-
async scoreOne(response, options) {
|
|
2065
|
-
if (!await this.canScore(response)) {
|
|
2066
|
-
return void 0;
|
|
2067
|
-
}
|
|
2068
|
-
const parsedOptions = this.optionsSchema.parse(options);
|
|
2069
|
-
const provider = options?.provider ?? new OpenRouterProvider({
|
|
2070
|
-
apiKey: parsedOptions.openRouterApiKey
|
|
2071
|
-
});
|
|
2072
|
-
const prompt = formatString(
|
|
2073
|
-
parsedOptions.promptPrefix + parsedOptions.prompt + parsedOptions.promptSuffix,
|
|
2074
|
-
{
|
|
2075
|
-
question: response.prompt.question.data,
|
|
2076
|
-
response: response.data,
|
|
2077
|
-
correctAnswer: response.prompt.answer,
|
|
2078
|
-
correctAnswerKey: (
|
|
2079
|
-
// If the Prompt is a multiple choice question, then the model might have answered
|
|
2080
|
-
// with the correct answer key rather than the actual answer text so consider that
|
|
2081
|
-
// the answer key is also the correct answer
|
|
2082
|
-
response.prompt.answerKey || "CORRECT ANSWER KEY IS NOT AVAILABLE"
|
|
2083
|
-
)
|
|
2084
|
-
}
|
|
2085
|
-
);
|
|
2086
|
-
const judge = await provider.forward(prompt, {
|
|
2087
|
-
model: parsedOptions.model
|
|
2088
|
-
});
|
|
2089
|
-
const extractedJSON = parseResponseAsJSON(judge.data);
|
|
2090
|
-
debugLog("Extracted JSON from Judge response:", extractedJSON);
|
|
2091
|
-
debugLog("-------------------------------------");
|
|
2092
|
-
let score = 0;
|
|
2093
|
-
if (extractedJSON && typeof extractedJSON === "object") {
|
|
2094
|
-
score = extractedJSON.correct.toLowerCase() === "yes" ? 1 : 0;
|
|
2095
|
-
}
|
|
2096
|
-
const modelInfo = await provider.parseModelInfo(parsedOptions.model);
|
|
2097
|
-
return PromptScoreSchema.parse({
|
|
2098
|
-
...response,
|
|
2099
|
-
score,
|
|
2100
|
-
scoreDID: uuidv75(),
|
|
2101
|
-
prompt: response.prompt,
|
|
2102
|
-
method: ScoringMethods.ai,
|
|
2103
|
-
scorerAI: {
|
|
2104
|
-
provider: provider.identifier,
|
|
2105
|
-
modelName: modelInfo?.name ?? "unknown",
|
|
2106
|
-
modelHost: modelInfo?.host ?? "auto",
|
|
2107
|
-
modelOwner: modelInfo?.owner ?? "unknown",
|
|
2108
|
-
modelId: parsedOptions.model,
|
|
2109
|
-
inputTokensUsed: judge.inputTokensUsed,
|
|
2110
|
-
outputTokensUsed: judge.outputTokensUsed,
|
|
2111
|
-
inputCost: judge.inputCost,
|
|
2112
|
-
outputCost: judge.outputCost
|
|
2113
|
-
},
|
|
2114
|
-
scoreMetadata: {
|
|
2115
|
-
scorerIdentifier: this.identifier,
|
|
2116
|
-
extractedAnswer: extractedJSON?.extractedFinalAnswer,
|
|
2117
|
-
reasoning: extractedJSON?.reasoning,
|
|
2118
|
-
confidence: extractedJSON?.confidence
|
|
2119
|
-
},
|
|
2120
|
-
explanation: extractedJSON?.reasoning ?? void 0
|
|
2121
|
-
});
|
|
2122
|
-
}
|
|
2123
|
-
async canScore(response) {
|
|
2124
|
-
return response.data !== void 0 && response.prompt !== void 0 && response.prompt.answer !== void 0;
|
|
2125
|
-
}
|
|
2126
|
-
};
|
|
2127
|
-
|
|
2128
|
-
// src/scorers/similarity-scorer.ts
|
|
2129
|
-
import tokenizer from "sbd";
|
|
2130
|
-
import { v7 as uuidv76 } from "uuid";
|
|
2131
|
-
var SimilarityScorer = class extends AbstractScorer {
|
|
2132
|
-
identifier = "similarity";
|
|
2133
|
-
async scoreOne(response, options) {
|
|
2134
|
-
if (!await this.canScore(response)) {
|
|
2135
|
-
return void 0;
|
|
2136
|
-
}
|
|
2137
|
-
const originalSentences = options?.ignoreCase ? tokenizer.sentences(response.prompt.answer.toLowerCase()) : tokenizer.sentences(response.prompt.answer);
|
|
2138
|
-
const responseSentences = options?.ignoreCase ? tokenizer.sentences(response.data.toLowerCase()) : tokenizer.sentences(response.data);
|
|
2139
|
-
let score = 0;
|
|
2140
|
-
for (let i = 0; i < originalSentences.length; i++) {
|
|
2141
|
-
const originalSentence = originalSentences[i];
|
|
2142
|
-
const responseSentence = responseSentences[i];
|
|
2143
|
-
if (originalSentence === responseSentence) {
|
|
2144
|
-
score += 1;
|
|
2145
|
-
}
|
|
2146
|
-
}
|
|
2147
|
-
return PromptScoreSchema.parse({
|
|
2148
|
-
...response,
|
|
2149
|
-
scoreDID: uuidv76(),
|
|
2150
|
-
// Calculate the accuracy
|
|
2151
|
-
score: score / originalSentences.length,
|
|
2152
|
-
method: ScoringMethods.algo,
|
|
2153
|
-
prompt: response.prompt,
|
|
2154
|
-
scoreMetadata: {
|
|
2155
|
-
scorerIdentifier: this.identifier,
|
|
2156
|
-
correctPositionedSentences: score,
|
|
2157
|
-
totalSentencesInAnswer: originalSentences.length,
|
|
2158
|
-
totalSentencesInResponse: responseSentences.length
|
|
2159
|
-
},
|
|
2160
|
-
explanation: void 0
|
|
2161
|
-
});
|
|
2162
|
-
}
|
|
2163
|
-
async canScore(response) {
|
|
2164
|
-
return response.data !== void 0 && response.prompt !== void 0 && response.prompt.answer !== void 0;
|
|
2165
|
-
}
|
|
2166
|
-
};
|
|
2167
|
-
|
|
2168
|
-
// src/scorers/llm-judge-scorer.ts
|
|
2169
|
-
import { z as z8 } from "zod";
|
|
2170
|
-
import { v7 as uuidv77 } from "uuid";
|
|
2171
|
-
var LLMJudgeScorer = class extends AbstractScorer {
|
|
2172
|
-
identifier = "llm-judge";
|
|
2173
|
-
optionsSchema = z8.object({
|
|
2174
|
-
openRouterApiKey: z8.string().optional(),
|
|
2175
|
-
provider: z8.instanceof(BaseLLMProvider).optional(),
|
|
2176
|
-
model: z8.string().default("openai/gpt-4o-mini"),
|
|
2177
|
-
mode: z8.enum(["pointwise", "pairwise"]).default("pointwise"),
|
|
2178
|
-
criteria: z8.array(CriterionSchema).min(1),
|
|
2179
|
-
meta: z8.record(z8.any(), z8.any()).optional(),
|
|
2180
|
-
temperature: z8.number().min(0).max(2).default(0),
|
|
2181
|
-
promptPrefix: z8.string().default(""),
|
|
2182
|
-
promptSuffix: z8.string().default(""),
|
|
2183
|
-
responseB: PromptResponseSchema.optional()
|
|
2184
|
-
}).transform((options, ctx) => {
|
|
2185
|
-
if (options.provider !== void 0) {
|
|
2186
|
-
return options;
|
|
2187
|
-
}
|
|
2188
|
-
if (options.openRouterApiKey !== void 0) {
|
|
2189
|
-
return {
|
|
2190
|
-
...options,
|
|
2191
|
-
provider: new OpenRouterProvider({
|
|
2192
|
-
apiKey: options.openRouterApiKey
|
|
2193
|
-
})
|
|
2194
|
-
};
|
|
2195
|
-
}
|
|
2196
|
-
ctx.addIssue({
|
|
2197
|
-
code: z8.ZodIssueCode.custom,
|
|
2198
|
-
message: "No provider or openRouterApiKey provided"
|
|
2199
|
-
});
|
|
2200
|
-
return z8.NEVER;
|
|
2201
|
-
});
|
|
2202
|
-
async scoreOne(response, options) {
|
|
2203
|
-
if (!await this.canScore(response, options)) {
|
|
2204
|
-
return void 0;
|
|
2205
|
-
}
|
|
2206
|
-
const parsedOptions = this.optionsSchema.parse(options);
|
|
2207
|
-
const provider = parsedOptions.provider ?? new OpenRouterProvider({
|
|
2208
|
-
apiKey: parsedOptions.openRouterApiKey
|
|
2209
|
-
});
|
|
2210
|
-
if (parsedOptions.mode === "pointwise") {
|
|
2211
|
-
return this.scorePointwise(response, parsedOptions, provider);
|
|
2212
|
-
} else {
|
|
2213
|
-
if (!parsedOptions.responseB) {
|
|
2214
|
-
throw new Error(
|
|
2215
|
-
"Pairwise mode requires responseB to be provided in options."
|
|
2216
|
-
);
|
|
2217
|
-
}
|
|
2218
|
-
return this.scorePairwise(response, parsedOptions, provider);
|
|
2219
|
-
}
|
|
2220
|
-
}
|
|
2221
|
-
systemPrompt() {
|
|
2222
|
-
return [
|
|
2223
|
-
"You are a strict, fair evaluation judge.",
|
|
2224
|
-
"Only use information provided in the task and candidate answers.",
|
|
2225
|
-
"For each criterion, return an integer score within the provided scale and a very brief justification (\u22642 sentences).",
|
|
2226
|
-
"Return only JSON that conforms to the requested schema.",
|
|
2227
|
-
"Do not include chain-of-thought or internal reasoning; just concise justifications."
|
|
2228
|
-
].join(" ");
|
|
2229
|
-
}
|
|
2230
|
-
async scorePointwise(response, options, provider) {
|
|
2231
|
-
let task = response.prompt.fullPrompt.data;
|
|
2232
|
-
if (response.prompt.type === PromptTypes.MultipleChoice) {
|
|
2233
|
-
task += `
|
|
2234
|
-
Correct answer: ${response.prompt.answerKey} - ${response.prompt.answer}`;
|
|
2235
|
-
} else if (response.prompt.answer) {
|
|
2236
|
-
task += `
|
|
2237
|
-
Expected answer: ${response.prompt.answer}`;
|
|
2238
|
-
}
|
|
2239
|
-
const norm = this.normalizeWeights(options.criteria);
|
|
2240
|
-
const user = [
|
|
2241
|
-
`TASK:
|
|
2242
|
-
${task}`,
|
|
2243
|
-
options.meta ? `
|
|
2244
|
-
ADDITIONAL CONTEXT (may include references, constraints, expected behavior):
|
|
2245
|
-
${JSON.stringify(options.meta, null, 2)}` : "",
|
|
2246
|
-
`
|
|
2247
|
-
RUBRIC:
|
|
2248
|
-
${this.renderCriteria(norm)}`,
|
|
2249
|
-
`
|
|
2250
|
-
CANDIDATE ANSWER:
|
|
2251
|
-
${response.data}`,
|
|
2252
|
-
`
|
|
2253
|
-
RESPONSE FORMAT (strict JSON):`,
|
|
2254
|
-
JSON.stringify(
|
|
2255
|
-
{
|
|
2256
|
-
perCriterion: [
|
|
2257
|
-
{
|
|
2258
|
-
id: "<string>",
|
|
2259
|
-
score: "<integer within scale>",
|
|
2260
|
-
justification: "<\u22642 sentences>"
|
|
2261
|
-
}
|
|
2262
|
-
],
|
|
2263
|
-
overall: "<0..100 integer>",
|
|
2264
|
-
verdict: "<one of: 'strong-pass' | 'pass' | 'borderline' | 'fail'>",
|
|
2265
|
-
notes: "<optional, \u22643 short bullet points>"
|
|
2266
|
-
},
|
|
2267
|
-
null,
|
|
2268
|
-
2
|
|
2269
|
-
)
|
|
2270
|
-
].join("");
|
|
2271
|
-
const scorePrompt = [
|
|
2272
|
-
user,
|
|
2273
|
-
"\nINSTRUCTIONS:",
|
|
2274
|
-
"- Compute per-criterion integer scores within each scale.",
|
|
2275
|
-
"- Compute weighted overall as a 0-100 integer: normalize weights, map each score to 0-100 by its scale, then weighted average.",
|
|
2276
|
-
"- Choose verdict thresholds: \u226585 strong-pass, 70-84 pass, 60-69 borderline, <60 fail.",
|
|
2277
|
-
"- Output valid JSON only."
|
|
2278
|
-
].join("\n");
|
|
2279
|
-
const messages = [
|
|
2280
|
-
{ role: "system", content: this.systemPrompt() },
|
|
2281
|
-
{
|
|
2282
|
-
role: "user",
|
|
2283
|
-
content: scorePrompt
|
|
2284
|
-
}
|
|
2285
|
-
];
|
|
2286
|
-
const llmResponse = await provider.forward(messages, {
|
|
2287
|
-
model: options.model
|
|
2288
|
-
});
|
|
2289
|
-
debugLog("LLM Response:", llmResponse.data);
|
|
2290
|
-
const json2 = extractFirstJSON(llmResponse.data);
|
|
2291
|
-
if (!Array.isArray(json2.perCriterion)) {
|
|
2292
|
-
throw new Error("Model did not return perCriterion array.");
|
|
2293
|
-
}
|
|
2294
|
-
debugLog("Extracted JSON from Judge response:", json2);
|
|
2295
|
-
const computedOverall = this.computeOverallScore(json2.perCriterion, norm);
|
|
2296
|
-
const overall = Number.isFinite(Number(json2.overall)) && Number(json2.overall) > 0 ? Number(json2.overall) : computedOverall;
|
|
2297
|
-
const score = Math.min(1, Math.max(0, overall / 100));
|
|
2298
|
-
let explanation = "";
|
|
2299
|
-
if (json2.notes && json2.notes.length > 0) {
|
|
2300
|
-
explanation = json2.notes.join(". ");
|
|
2301
|
-
}
|
|
2302
|
-
for (const criterion of json2.perCriterion) {
|
|
2303
|
-
explanation += [
|
|
2304
|
-
`Criteria: ${criterion.id}`,
|
|
2305
|
-
`Score: ${criterion.score}`,
|
|
2306
|
-
`Justification:
|
|
2307
|
-
${criterion.justification}
|
|
2308
|
-
`
|
|
2309
|
-
].join("\n");
|
|
2310
|
-
}
|
|
2311
|
-
return PromptScoreSchema.parse({
|
|
2312
|
-
...response,
|
|
2313
|
-
score,
|
|
2314
|
-
scoreDID: uuidv77(),
|
|
2315
|
-
prompt: response.prompt,
|
|
2316
|
-
method: ScoringMethods.ai,
|
|
2317
|
-
scorerAI: {
|
|
2318
|
-
provider: provider.identifier,
|
|
2319
|
-
modelId: options.model,
|
|
2320
|
-
modelHost: "auto",
|
|
2321
|
-
modelName: "unknown",
|
|
2322
|
-
modelOwner: "unknown",
|
|
2323
|
-
inputTokensUsed: llmResponse.inputTokensUsed,
|
|
2324
|
-
outputTokensUsed: llmResponse.outputTokensUsed,
|
|
2325
|
-
inputCost: llmResponse.inputCost,
|
|
2326
|
-
outputCost: llmResponse.outputCost
|
|
2327
|
-
},
|
|
2328
|
-
scoreMetadata: {
|
|
2329
|
-
overall,
|
|
2330
|
-
scorerIdentifier: this.identifier,
|
|
2331
|
-
mode: "pointwise",
|
|
2332
|
-
perCriterion: json2.perCriterion,
|
|
2333
|
-
verdict: json2.verdict,
|
|
2334
|
-
scorePrompt,
|
|
2335
|
-
systemPrompt: this.systemPrompt()
|
|
2336
|
-
},
|
|
2337
|
-
explanation: explanation || void 0
|
|
2338
|
-
});
|
|
2339
|
-
}
|
|
2340
|
-
async scorePairwise(response, options, provider) {
|
|
2341
|
-
if (!options.responseB || !response.data || !options.responseB.data) {
|
|
2342
|
-
return void 0;
|
|
2343
|
-
}
|
|
2344
|
-
const norm = this.normalizeWeights(options.criteria);
|
|
2345
|
-
const user = [
|
|
2346
|
-
`TASK:
|
|
2347
|
-
${response.prompt.fullPrompt.data}`,
|
|
2348
|
-
options.meta ? `
|
|
2349
|
-
ADDITIONAL CONTEXT:
|
|
2350
|
-
${JSON.stringify(options.meta, null, 2)}` : "",
|
|
2351
|
-
`
|
|
2352
|
-
RUBRIC:
|
|
2353
|
-
${this.renderCriteria(norm)}`,
|
|
2354
|
-
`
|
|
2355
|
-
CANDIDATE A:
|
|
2356
|
-
${response.data}`,
|
|
2357
|
-
`
|
|
2358
|
-
CANDIDATE B:
|
|
2359
|
-
${options.responseB.data}`,
|
|
2360
|
-
`
|
|
2361
|
-
RESPONSE FORMAT (strict JSON):`,
|
|
2362
|
-
JSON.stringify(
|
|
2363
|
-
{
|
|
2364
|
-
winner: "<'A' | 'B' | 'tie'>",
|
|
2365
|
-
confidence: "<integer 1..5>",
|
|
2366
|
-
rationale: "<\u22643 sentences>",
|
|
2367
|
-
perCriterion: [
|
|
2368
|
-
{
|
|
2369
|
-
id: "<string>",
|
|
2370
|
-
better: "<'A'|'B'|'tie'>",
|
|
2371
|
-
justification: "<\u22642 sentences>"
|
|
2372
|
-
}
|
|
2373
|
-
]
|
|
2374
|
-
},
|
|
2375
|
-
null,
|
|
2376
|
-
2
|
|
2377
|
-
)
|
|
2378
|
-
].join("");
|
|
2379
|
-
const systemPrompt = [
|
|
2380
|
-
this.systemPrompt(),
|
|
2381
|
-
"When comparing, do not reward verbosity or length. Prefer factual accuracy, adherence to instructions, safety, and clarity."
|
|
2382
|
-
].join(" ");
|
|
2383
|
-
const messages = [
|
|
2384
|
-
{
|
|
2385
|
-
role: "system",
|
|
2386
|
-
content: systemPrompt
|
|
2387
|
-
},
|
|
2388
|
-
{ role: "user", content: user }
|
|
2389
|
-
];
|
|
2390
|
-
const content = await provider.forward(messages, {
|
|
2391
|
-
model: options.model
|
|
2392
|
-
});
|
|
2393
|
-
const json2 = extractFirstJSON(content.data);
|
|
2394
|
-
debugLog("Original Pairwise Judge response:", content.data);
|
|
2395
|
-
debugLog("Extracted JSON from Pairwise Judge response:", json2);
|
|
2396
|
-
if (!json2 || !["A", "B", "tie"].includes(json2.winner)) {
|
|
2397
|
-
throw new Error("Model returned invalid pairwise result.");
|
|
2398
|
-
}
|
|
2399
|
-
const confidence = Math.max(1, Math.min(5, Number(json2.confidence) || 3));
|
|
2400
|
-
let score = 0.5;
|
|
2401
|
-
if (json2.winner === "A") {
|
|
2402
|
-
score = 1;
|
|
2403
|
-
} else if (json2.winner === "B") {
|
|
2404
|
-
score = 0;
|
|
2405
|
-
}
|
|
2406
|
-
let explanation = "";
|
|
2407
|
-
if (json2.rationale) {
|
|
2408
|
-
explanation = json2.rationale;
|
|
2409
|
-
}
|
|
2410
|
-
for (const criterion of json2.perCriterion) {
|
|
2411
|
-
explanation += [
|
|
2412
|
-
`Criteria: ${criterion.id}`,
|
|
2413
|
-
`Better: ${criterion.better}`,
|
|
2414
|
-
`Justification:
|
|
2415
|
-
${criterion.justification}
|
|
2416
|
-
`
|
|
2417
|
-
].join("\n");
|
|
2418
|
-
}
|
|
2419
|
-
return PromptScoreSchema.parse({
|
|
2420
|
-
...response,
|
|
2421
|
-
score,
|
|
2422
|
-
scoreDID: uuidv77(),
|
|
2423
|
-
prompt: response.prompt,
|
|
2424
|
-
method: ScoringMethods.ai,
|
|
2425
|
-
scorerAI: {
|
|
2426
|
-
provider: provider.identifier,
|
|
2427
|
-
modelId: options.model,
|
|
2428
|
-
modelHost: "auto",
|
|
2429
|
-
modelName: "unknown",
|
|
2430
|
-
modelOwner: "unknown",
|
|
2431
|
-
inputTokensUsed: content.inputTokensUsed,
|
|
2432
|
-
outputTokensUsed: content.outputTokensUsed,
|
|
2433
|
-
inputCost: content.inputCost,
|
|
2434
|
-
outputCost: content.outputCost
|
|
2435
|
-
},
|
|
2436
|
-
scoreMetadata: {
|
|
2437
|
-
scorerIdentifier: this.identifier,
|
|
2438
|
-
mode: "pairwise",
|
|
2439
|
-
winner: json2.winner,
|
|
2440
|
-
confidence,
|
|
2441
|
-
rationale: json2.rationale,
|
|
2442
|
-
perCriterion: json2.perCriterion,
|
|
2443
|
-
responseB: options.responseB.data,
|
|
2444
|
-
systemPrompt,
|
|
2445
|
-
scorePrompt: user
|
|
2446
|
-
},
|
|
2447
|
-
explanation: explanation || void 0
|
|
2448
|
-
});
|
|
2449
|
-
}
|
|
2450
|
-
/**
|
|
2451
|
-
* Build the pairwise evaluation prompt
|
|
2452
|
-
*/
|
|
2453
|
-
buildPairwisePrompt(task, answerA, answerB, criteria, meta, prefix = "", suffix = "") {
|
|
2454
|
-
const rubric = this.renderCriteria(criteria);
|
|
2455
|
-
const userPrompt = [
|
|
2456
|
-
`TASK:
|
|
2457
|
-
${task}`,
|
|
2458
|
-
meta ? `
|
|
2459
|
-
ADDITIONAL CONTEXT:
|
|
2460
|
-
${JSON.stringify(meta, null, 2)}` : "",
|
|
2461
|
-
`
|
|
2462
|
-
RUBRIC:
|
|
2463
|
-
${rubric}`,
|
|
2464
|
-
`
|
|
2465
|
-
CANDIDATE A:
|
|
2466
|
-
${answerA}`,
|
|
2467
|
-
`
|
|
2468
|
-
CANDIDATE B:
|
|
2469
|
-
${answerB}`,
|
|
2470
|
-
`
|
|
2471
|
-
RESPONSE FORMAT (strict JSON):`,
|
|
2472
|
-
JSON.stringify(
|
|
2473
|
-
{
|
|
2474
|
-
winner: "<'A' | 'B' | 'tie'>",
|
|
2475
|
-
confidence: "<integer 1..5>",
|
|
2476
|
-
rationale: "<\u22643 sentences>",
|
|
2477
|
-
perCriterion: [
|
|
2478
|
-
{
|
|
2479
|
-
id: "<string>",
|
|
2480
|
-
better: "<'A'|'B'|'tie'>",
|
|
2481
|
-
justification: "<\u22642 sentences>"
|
|
2482
|
-
}
|
|
2483
|
-
]
|
|
2484
|
-
},
|
|
2485
|
-
null,
|
|
2486
|
-
2
|
|
2487
|
-
)
|
|
2488
|
-
].join("\n");
|
|
2489
|
-
return `${prefix}${userPrompt}${suffix}`;
|
|
2490
|
-
}
|
|
2491
|
-
/**
|
|
2492
|
-
* Normalize criterion weights to sum to 1
|
|
2493
|
-
*/
|
|
2494
|
-
normalizeWeights(criteria) {
|
|
2495
|
-
const sum = criteria.reduce((a, c) => a + (c.weight ?? 1), 0) || 1;
|
|
2496
|
-
return criteria.map((c) => ({ ...c, weight: (c.weight ?? 1) / sum }));
|
|
2497
|
-
}
|
|
2498
|
-
/**
|
|
2499
|
-
* Render criteria as a formatted string
|
|
2500
|
-
*/
|
|
2501
|
-
renderCriteria(criteria) {
|
|
2502
|
-
return criteria.map((c, i) => {
|
|
2503
|
-
const mn = c.scale?.min ?? 0;
|
|
2504
|
-
const mx = c.scale?.max ?? 5;
|
|
2505
|
-
return `${i + 1}. id="${c.id}" (weight=${c.weight}, scale=${mn}..${mx}) \u2014 ${c.description}`;
|
|
2506
|
-
}).join("\n");
|
|
2507
|
-
}
|
|
2508
|
-
/**
|
|
2509
|
-
* Compute overall score from per-criterion scores
|
|
2510
|
-
*/
|
|
2511
|
-
computeOverallScore(perCriterion, criteria) {
|
|
2512
|
-
let total = 0;
|
|
2513
|
-
for (const pc of perCriterion) {
|
|
2514
|
-
const criterion = criteria.find((c) => c.id === pc.id);
|
|
2515
|
-
const { min, max, weight } = {
|
|
2516
|
-
min: criterion?.scale?.min ?? 0,
|
|
2517
|
-
max: criterion?.scale?.max ?? 5,
|
|
2518
|
-
weight: criterion?.weight ?? 0
|
|
2519
|
-
};
|
|
2520
|
-
const score = Number(pc.score);
|
|
2521
|
-
if (!Number.isFinite(score)) continue;
|
|
2522
|
-
const clamped = Math.max(min, Math.min(max, score));
|
|
2523
|
-
const normalized100 = max === min ? 0 : (clamped - min) / (max - min) * 100;
|
|
2524
|
-
total += normalized100 * weight;
|
|
2525
|
-
}
|
|
2526
|
-
return Math.round(total);
|
|
2527
|
-
}
|
|
2528
|
-
/**
|
|
2529
|
-
* Extract criteria IDs from prompt text (fallback)
|
|
2530
|
-
*/
|
|
2531
|
-
extractCriteria(promptText) {
|
|
2532
|
-
const ids = [];
|
|
2533
|
-
const re = /\d+\. id="([^"]+)"/g;
|
|
2534
|
-
let m;
|
|
2535
|
-
while ((m = re.exec(promptText)) !== null) {
|
|
2536
|
-
ids.push(m[1]);
|
|
2537
|
-
}
|
|
2538
|
-
return ids.length ? ids : ["correctness", "instruction_following", "clarity"];
|
|
2539
|
-
}
|
|
2540
|
-
async canScore(response, options) {
|
|
2541
|
-
const hasValidResponse = response.data !== void 0 && response.prompt !== void 0;
|
|
2542
|
-
if (!hasValidResponse) {
|
|
2543
|
-
return false;
|
|
2544
|
-
}
|
|
2545
|
-
if (options?.mode === "pairwise") {
|
|
2546
|
-
return options.responseB !== void 0 && options.responseB.data !== void 0 && options.responseB.prompt !== void 0;
|
|
2547
|
-
}
|
|
2548
|
-
return true;
|
|
2549
|
-
}
|
|
2550
|
-
};
|
|
2551
|
-
function extractFirstJSON(maybeJSON) {
|
|
2552
|
-
try {
|
|
2553
|
-
return JSON.parse(maybeJSON);
|
|
2554
|
-
} catch {
|
|
2555
|
-
}
|
|
2556
|
-
const start = maybeJSON.indexOf("{");
|
|
2557
|
-
const end = maybeJSON.lastIndexOf("}");
|
|
2558
|
-
if (start >= 0 && end > start) {
|
|
2559
|
-
const slice = maybeJSON.slice(start, end + 1);
|
|
2560
|
-
try {
|
|
2561
|
-
return JSON.parse(slice);
|
|
2562
|
-
} catch {
|
|
2563
|
-
}
|
|
2564
|
-
}
|
|
2565
|
-
throw new Error("Failed to parse model response as JSON.");
|
|
2566
|
-
}
|
|
2567
|
-
var CriterionSchema = z8.object({
|
|
2568
|
-
id: z8.string(),
|
|
2569
|
-
description: z8.string(),
|
|
2570
|
-
weight: z8.number().min(0).max(1).default(1),
|
|
2571
|
-
scale: z8.object({
|
|
2572
|
-
min: z8.number().default(0),
|
|
2573
|
-
max: z8.number().default(5)
|
|
2574
|
-
}).default({ min: 0, max: 5 })
|
|
2575
|
-
});
|
|
2576
|
-
|
|
2577
|
-
// src/generators/mcq-generator.ts
|
|
2578
|
-
var MCQGenerator = class extends AbstractGenerator {
|
|
2579
|
-
identifier = "mcq";
|
|
2580
|
-
inputSchema = z9.array(z9.any());
|
|
2581
|
-
optionsSchema = z9.object(
|
|
2582
|
-
{
|
|
2583
|
-
/**
|
|
2584
|
-
* The API key for the OpenRouter provider. Won't be used if `provider` is given.
|
|
2585
|
-
*/
|
|
2586
|
-
openRouterApiKey: z9.string().optional(),
|
|
2587
|
-
/**
|
|
2588
|
-
* The Provider that is going to be used to generate the Prompts.
|
|
2589
|
-
*/
|
|
2590
|
-
provider: z9.instanceof(BaseLLMProvider).optional(),
|
|
2591
|
-
/**
|
|
2592
|
-
* The model that is going to be used to generate the prompts
|
|
2593
|
-
*/
|
|
2594
|
-
model: z9.string(),
|
|
2595
|
-
systemPromptPrefix: z9.string().optional().default(""),
|
|
2596
|
-
/**
|
|
2597
|
-
* Additional rules that will be appended to the system prompt.
|
|
2598
|
-
*/
|
|
2599
|
-
systemPromptRules: z9.array(z9.string()).default([]),
|
|
2600
|
-
systemPrompt: z9.string().optional().default(`Take the input text and find a difficult question about the content. The question should test understanding of the key concepts, facts or relationships described in the text. The question should include enough context to answer it without the need to read the text. The question must not refer to the text in any way. The question must not want a very specific values such as p-values or time frames like when effect was on day. Each option must not be longer than 5-7 words.
|
|
2601
|
-
|
|
2602
|
-
You need to generate a multiple choice question with at least 8 options. Some of the options can be real terms from the domain, others can be plausible-sounding terms that you invent to sound authentic to the subject matter.
|
|
2603
|
-
{rules}
|
|
2604
|
-
Your output strictly forced to be a JSON object which applies the following schema:
|
|
2605
|
-
\`\`\`json
|
|
2606
|
-
{
|
|
2607
|
-
"question": "Question text",
|
|
2608
|
-
"options": {
|
|
2609
|
-
"A": "Option A text",
|
|
2610
|
-
"B": "Option B text",
|
|
2611
|
-
"C": "Option C text",
|
|
2612
|
-
"D": "Option D text"
|
|
2613
|
-
// ....
|
|
2614
|
-
},
|
|
2615
|
-
"answerKey": "C",
|
|
2616
|
-
"answer": "Option C text"
|
|
2617
|
-
}
|
|
2618
|
-
\`\`\`
|
|
2619
|
-
`),
|
|
2620
|
-
systemPromptSuffix: z9.string().optional().default(""),
|
|
2621
|
-
/**
|
|
2622
|
-
* The main function that parses the input value to a string.
|
|
2623
|
-
*/
|
|
2624
|
-
parseInput: z9.function().args(z9.any()).returns(z9.string()),
|
|
2625
|
-
/**
|
|
2626
|
-
* Whether to include the original data as a field inside the metadata.
|
|
2627
|
-
*/
|
|
2628
|
-
includeOriginalInputAsMetadata: z9.boolean().optional().default(false),
|
|
2629
|
-
/**
|
|
2630
|
-
* Additional metadata that is going to be added to the each Prompt.
|
|
2631
|
-
* If a function is given then that function takes an argument which
|
|
2632
|
-
* is the input value for each item and generates the additional metadata.
|
|
2633
|
-
*/
|
|
2634
|
-
additionalMetadata: z9.union([
|
|
2635
|
-
z9.function().args(z9.any(), z9.object({ systemPrompt: z9.string() })).returns(z9.record(z9.string(), z9.any())),
|
|
2636
|
-
z9.record(z9.string(), z9.any())
|
|
2637
|
-
]).optional()
|
|
2638
|
-
},
|
|
2639
|
-
{ message: "No options provided" }
|
|
2640
|
-
).transform((options, ctx) => {
|
|
2641
|
-
if (options.provider !== void 0) {
|
|
2642
|
-
return options;
|
|
2643
|
-
}
|
|
2644
|
-
if (options.openRouterApiKey !== void 0) {
|
|
2645
|
-
return {
|
|
2646
|
-
...options,
|
|
2647
|
-
provider: new OpenRouterProvider({
|
|
2648
|
-
apiKey: options.openRouterApiKey
|
|
2649
|
-
})
|
|
2650
|
-
};
|
|
2651
|
-
}
|
|
2652
|
-
ctx.addIssue({
|
|
2653
|
-
code: z9.ZodIssueCode.custom,
|
|
2654
|
-
message: "No provider or openRouterApiKey provided"
|
|
2655
|
-
});
|
|
2656
|
-
return z9.NEVER;
|
|
2657
|
-
});
|
|
2658
|
-
async generatePrompts(input, options) {
|
|
2659
|
-
const parsedOptions = this.optionsSchema.parse(options);
|
|
2660
|
-
const systemPrompt = formatString(
|
|
2661
|
-
parsedOptions.systemPromptPrefix + parsedOptions.systemPrompt + parsedOptions.systemPromptSuffix,
|
|
2662
|
-
{
|
|
2663
|
-
rules: parsedOptions.systemPromptRules.length > 0 ? `
|
|
2664
|
-
Also strictly follow the rules below:
|
|
2665
|
-
${parsedOptions.systemPromptRules.join("\n")}
|
|
2666
|
-
` : ""
|
|
2667
|
-
}
|
|
2668
|
-
);
|
|
2669
|
-
const results = await Promise.all(
|
|
2670
|
-
input.map(
|
|
2671
|
-
(item) => this.generateFromInput(
|
|
2672
|
-
item,
|
|
2673
|
-
parsedOptions.provider,
|
|
2674
|
-
parsedOptions,
|
|
2675
|
-
systemPrompt
|
|
2676
|
-
)
|
|
2677
|
-
)
|
|
2678
|
-
);
|
|
2679
|
-
return results.filter((result) => result !== null);
|
|
2680
|
-
}
|
|
2681
|
-
async generateFromInput(input, provider, options, systemPrompt) {
|
|
2682
|
-
const inputText = options.parseInput(input);
|
|
2683
|
-
const response = await provider.forward(inputText, {
|
|
2684
|
-
model: options.model,
|
|
2685
|
-
system: systemPrompt
|
|
2686
|
-
});
|
|
2687
|
-
const mcq = parseResponseAsJSON(response.data);
|
|
2688
|
-
if (!mcq) {
|
|
2689
|
-
debugLog("Couldn't parsed the LLM response as a JSON:", response.data);
|
|
2690
|
-
return null;
|
|
2691
|
-
}
|
|
2692
|
-
const fullPrompt = preparePrompt(mcq.question, mcq.options);
|
|
2693
|
-
const additionalMetadata = typeof options.additionalMetadata === "function" ? options.additionalMetadata(input, { systemPrompt }) : options.additionalMetadata || {};
|
|
2694
|
-
return await this.buildPrompt({
|
|
2695
|
-
correctAnswer: mcq.answerKey || mcq.answer,
|
|
2696
|
-
question: mcq.question,
|
|
2697
|
-
options: mcq.options || {},
|
|
2698
|
-
fullPrompt,
|
|
2699
|
-
type: PromptTypes.MultipleChoice,
|
|
2700
|
-
metadata: {
|
|
2701
|
-
tags: [
|
|
2702
|
-
`generator-${this.identifier}`,
|
|
2703
|
-
// Expand the tags field if it is provided within the additional metadata
|
|
2704
|
-
...Array.isArray(additionalMetadata.tags) ? additionalMetadata.tags : []
|
|
2705
|
-
],
|
|
2706
|
-
model: options.model,
|
|
2707
|
-
generatorIdentifier: this.identifier,
|
|
2708
|
-
generatorClassName: this.constructor.name,
|
|
2709
|
-
// Input might be an object but input text will be a string
|
|
2710
|
-
originalSourceInput: options.includeOriginalInputAsMetadata ? input : void 0,
|
|
2711
|
-
originalSourceInputText: inputText,
|
|
2712
|
-
...additionalMetadata
|
|
2713
|
-
},
|
|
2714
|
-
scorers: [
|
|
2715
|
-
// TODO: We should be able to access identifier without instantiating an object
|
|
2716
|
-
new MultipleChoiceScorer().identifier,
|
|
2717
|
-
new RefAnswerEqualityLLMJudgeScorer().identifier,
|
|
2718
|
-
new ExactMatchScorer().identifier
|
|
2719
|
-
]
|
|
2720
|
-
});
|
|
2721
|
-
}
|
|
2722
|
-
};
|
|
2723
|
-
|
|
2724
|
-
// src/generators/open-ended-generator.ts
|
|
2725
|
-
import { z as z10 } from "zod";
|
|
2726
|
-
var OpenEndedGenerator = class extends AbstractGenerator {
|
|
2727
|
-
identifier = "open-ended";
|
|
2728
|
-
inputSchema = z10.array(z10.any());
|
|
2729
|
-
optionsSchema = z10.object(
|
|
2730
|
-
{
|
|
2731
|
-
/**
|
|
2732
|
-
* The API key for the OpenRouter provider. Won't be used if `provider` is given.
|
|
2733
|
-
*/
|
|
2734
|
-
openRouterApiKey: z10.string().optional(),
|
|
2735
|
-
/**
|
|
2736
|
-
* The Provider that is going to be used to generate the Prompts.
|
|
2737
|
-
*/
|
|
2738
|
-
provider: z10.instanceof(BaseLLMProvider).optional(),
|
|
2739
|
-
/**
|
|
2740
|
-
* The model that is going to be used to generate the prompts
|
|
2741
|
-
*/
|
|
2742
|
-
model: z10.string(),
|
|
2743
|
-
/**
|
|
2744
|
-
* Adds an additional clue to the end of the question such as "Answer with a positive number"
|
|
2745
|
-
* which is relevant with the answer type expected.
|
|
2746
|
-
*/
|
|
2747
|
-
addClueToQuestion: z10.boolean().optional().default(true),
|
|
2748
|
-
/**
|
|
2749
|
-
* Additional rules that will be appended to the system prompt.
|
|
2750
|
-
*/
|
|
2751
|
-
systemPromptRules: z10.array(z10.string()).default([]),
|
|
2752
|
-
systemPromptPrefix: z10.string().optional().default(""),
|
|
2753
|
-
systemPrompt: z10.string().optional().default(`Take the input text and find a difficult question about the content. The question should test understanding of the key concepts, facts or relationships described in the text. The question should include enough context to answer it without the need to read the text. The question must not refer to the text in any way. The question must not want a very specific values such as p-values or time frames like when effect was on day. Each option must not be longer than 5-7 words. {clue}
|
|
2754
|
-
{rules}
|
|
2755
|
-
Your output strictly forced to be a JSON object which applies the following schema:
|
|
2756
|
-
\`\`\`json
|
|
2757
|
-
{
|
|
2758
|
-
"question": "Question text",
|
|
2759
|
-
"answer": "Correct answer"
|
|
2760
|
-
}
|
|
2761
|
-
\`\`\`
|
|
2762
|
-
`),
|
|
2763
|
-
systemPromptSuffix: z10.string().optional().default(""),
|
|
2764
|
-
/**
|
|
2765
|
-
* The main function that parses the input value to a string.
|
|
2766
|
-
*/
|
|
2767
|
-
parseInput: z10.function().args(z10.any()).returns(z10.string()),
|
|
2768
|
-
/**
|
|
2769
|
-
* Whether to include the original as a field inside the metadata.
|
|
2770
|
-
*/
|
|
2771
|
-
includeOriginalInputAsMetadata: z10.boolean().optional().default(false),
|
|
2772
|
-
/**
|
|
2773
|
-
* Additional metadata that is going to be added to the each Prompt.
|
|
2774
|
-
* If a function is given then that function takes an argument which
|
|
2775
|
-
* is the input value for each item and generates the additional metadata.
|
|
2776
|
-
*/
|
|
2777
|
-
additionalMetadata: z10.union([
|
|
2778
|
-
z10.function().args(z10.any(), z10.object({ systemPrompt: z10.string() })).returns(z10.record(z10.string(), z10.any())),
|
|
2779
|
-
z10.record(z10.string(), z10.any())
|
|
2780
|
-
]).optional()
|
|
2781
|
-
},
|
|
2782
|
-
{ message: "No options provided" }
|
|
2783
|
-
).transform((options, ctx) => {
|
|
2784
|
-
if (options.provider !== void 0) {
|
|
2785
|
-
return options;
|
|
2786
|
-
}
|
|
2787
|
-
if (options.openRouterApiKey !== void 0) {
|
|
2788
|
-
return {
|
|
2789
|
-
...options,
|
|
2790
|
-
provider: new OpenRouterProvider({
|
|
2791
|
-
apiKey: options.openRouterApiKey
|
|
2792
|
-
})
|
|
2793
|
-
};
|
|
2794
|
-
}
|
|
2795
|
-
ctx.addIssue({
|
|
2796
|
-
code: z10.ZodIssueCode.custom,
|
|
2797
|
-
message: "No provider or openRouterApiKey provided"
|
|
2798
|
-
});
|
|
2799
|
-
return z10.NEVER;
|
|
2800
|
-
});
|
|
2801
|
-
async generatePrompts(input, options) {
|
|
2802
|
-
const parsedOptions = this.optionsSchema.parse(options);
|
|
2803
|
-
const provider = options?.provider ?? new OpenRouterProvider({
|
|
2804
|
-
apiKey: parsedOptions.openRouterApiKey
|
|
2805
|
-
});
|
|
2806
|
-
const systemPrompt = formatString(
|
|
2807
|
-
parsedOptions.systemPromptPrefix + parsedOptions.systemPrompt + parsedOptions.systemPromptSuffix,
|
|
2808
|
-
{
|
|
2809
|
-
clue: parsedOptions.addClueToQuestion ? `The question must ends with a sentence that says "Answer with ..." where "..." represents the type of answer expected.` : "",
|
|
2810
|
-
rules: parsedOptions.systemPromptRules.length > 0 ? `
|
|
2811
|
-
Also strictly follow the rules below:
|
|
2812
|
-
${parsedOptions.systemPromptRules.join("\n")}
|
|
2813
|
-
` : ""
|
|
2814
|
-
}
|
|
2815
|
-
);
|
|
2816
|
-
const results = await Promise.all(
|
|
2817
|
-
input.map(
|
|
2818
|
-
(item) => this.generateFromInput(item, provider, parsedOptions, systemPrompt)
|
|
2819
|
-
)
|
|
2820
|
-
);
|
|
2821
|
-
return results.filter((result) => result !== null);
|
|
2822
|
-
}
|
|
2823
|
-
async generateFromInput(input, provider, options, systemPrompt) {
|
|
2824
|
-
const inputText = options.parseInput(input);
|
|
2825
|
-
const response = await provider.forward(inputText, {
|
|
2826
|
-
model: options.model,
|
|
2827
|
-
system: systemPrompt
|
|
2828
|
-
});
|
|
2829
|
-
const q = parseResponseAsJSON(response.data);
|
|
2830
|
-
if (!q) {
|
|
2831
|
-
debugLog("Couldn't parsed the LLM response as a JSON:", response.data);
|
|
2832
|
-
return null;
|
|
2833
|
-
}
|
|
2834
|
-
const additionalMetadata = typeof options.additionalMetadata === "function" ? options.additionalMetadata(input, { systemPrompt }) : options.additionalMetadata || {};
|
|
2835
|
-
return this.buildPrompt({
|
|
2836
|
-
correctAnswer: q.answer,
|
|
2837
|
-
question: q.question,
|
|
2838
|
-
options: {},
|
|
2839
|
-
fullPrompt: q.question,
|
|
2840
|
-
type: PromptTypes.OpenEnded,
|
|
2841
|
-
metadata: {
|
|
2842
|
-
tags: [
|
|
2843
|
-
`generator-${this.identifier}`,
|
|
2844
|
-
// Expand the tags field if it is provided within the additional metadata
|
|
2845
|
-
...Array.isArray(additionalMetadata.tags) ? additionalMetadata.tags : []
|
|
2846
|
-
],
|
|
2847
|
-
brainModel: options.model,
|
|
2848
|
-
model: options.model,
|
|
2849
|
-
promptGenClassName: this.constructor.name,
|
|
2850
|
-
promptGenJSClassName: this.constructor.name,
|
|
2851
|
-
// Input might be an object but input text will be a string
|
|
2852
|
-
originalSourceInput: options.includeOriginalInputAsMetadata ? input : void 0,
|
|
2853
|
-
originalSourceInputText: inputText,
|
|
2854
|
-
...additionalMetadata
|
|
2855
|
-
},
|
|
2856
|
-
scorers: [
|
|
2857
|
-
// TODO: We should be able to access identifier without instantiating an object
|
|
2858
|
-
new RefAnswerEqualityLLMJudgeScorer().identifier
|
|
2859
|
-
]
|
|
2860
|
-
});
|
|
2861
|
-
}
|
|
2862
|
-
};
|
|
2863
|
-
|
|
2864
|
-
// src/registries/abstract/abstract-registry.ts
|
|
2865
|
-
var AbstractRegistry = class {
|
|
2866
|
-
};
|
|
2867
|
-
|
|
2868
|
-
// src/registries/peerbench-registry.ts
|
|
2869
|
-
import { createClient } from "@supabase/supabase-js";
|
|
2870
|
-
import axios2 from "axios";
|
|
2871
|
-
var PeerBenchRegistry = class _PeerBenchRegistry extends AbstractRegistry {
|
|
2872
|
-
// TODO: Find a better way to force sub classes to define a static identifier
|
|
2873
|
-
// Static accessor for the identifier
|
|
2874
|
-
static identifier = "peerbench";
|
|
2875
|
-
identifier = _PeerBenchRegistry.identifier;
|
|
2876
|
-
token;
|
|
2877
|
-
supabaseClient;
|
|
2878
|
-
apiURL;
|
|
2879
|
-
session = null;
|
|
2880
|
-
email;
|
|
2881
|
-
password;
|
|
2882
|
-
authMethod;
|
|
2883
|
-
refreshTokenInterval;
|
|
2884
|
-
isRefreshingToken = false;
|
|
2885
|
-
isClosed = false;
|
|
2886
|
-
isInitialized = false;
|
|
2887
|
-
tokenRefresher;
|
|
2888
|
-
constructor(options) {
|
|
2889
|
-
super();
|
|
2890
|
-
this.tokenRefresher = options.tokenRefresher ?? false;
|
|
2891
|
-
this.authMethod = options.authMethod ?? "token";
|
|
2892
|
-
this.apiURL = options.peerbenchApiURL;
|
|
2893
|
-
if (options.authMethod === "token" || options.authMethod === void 0) {
|
|
2894
|
-
this.supabaseClient = createClient(
|
|
2895
|
-
options.peerbenchSupabaseURL,
|
|
2896
|
-
options.peerbenchSupabaseAnonKey
|
|
2897
|
-
);
|
|
2898
|
-
this.email = options.email;
|
|
2899
|
-
this.password = options.password;
|
|
2900
|
-
}
|
|
2901
|
-
}
|
|
2902
|
-
async uploadPrompts(prompts, options) {
|
|
2903
|
-
if (prompts.length === 0) {
|
|
2904
|
-
return 0;
|
|
2905
|
-
}
|
|
2906
|
-
await this.init();
|
|
2907
|
-
const account = options?.account;
|
|
2908
|
-
if (account && account.signMessage) {
|
|
2909
|
-
prompts = await Promise.all(
|
|
2910
|
-
prompts.map(async (prompt) => {
|
|
2911
|
-
const promptWithSignature = {
|
|
2912
|
-
...prompt
|
|
2913
|
-
};
|
|
2914
|
-
const promptStringified = stableStringify(prompt);
|
|
2915
|
-
const hash = await calculateCID(promptStringified).then(
|
|
2916
|
-
(c) => c.toString()
|
|
2917
|
-
);
|
|
2918
|
-
const signature = await account.signMessage({
|
|
2919
|
-
message: hash
|
|
2920
|
-
});
|
|
2921
|
-
promptWithSignature.signature = signature;
|
|
2922
|
-
promptWithSignature.publicKey = account.address;
|
|
2923
|
-
promptWithSignature.signatureType = "cid";
|
|
2924
|
-
promptWithSignature.keyType = "secp256k1n";
|
|
2925
|
-
return promptWithSignature;
|
|
2926
|
-
})
|
|
2927
|
-
);
|
|
2928
|
-
}
|
|
2929
|
-
const res = await axios2.post(
|
|
2930
|
-
`${this.apiURL}/api/v2/prompts`,
|
|
2931
|
-
{
|
|
2932
|
-
promptSetId: options.promptSetId,
|
|
2933
|
-
prompts
|
|
2934
|
-
},
|
|
2935
|
-
{
|
|
2936
|
-
withCredentials: this.authMethod === "cookie",
|
|
2937
|
-
headers: {
|
|
2938
|
-
Authorization: this.authMethod === "token" ? `Bearer ${this.token}` : void 0,
|
|
2939
|
-
"Content-Type": "application/json"
|
|
2940
|
-
}
|
|
2941
|
-
}
|
|
2942
|
-
);
|
|
2943
|
-
if (res.status !== 200) {
|
|
2944
|
-
throw new Error(
|
|
2945
|
-
`Failed to upload Prompts: ${res?.data?.message || JSON.stringify(res?.data || "No response available")}`
|
|
2946
|
-
);
|
|
2947
|
-
}
|
|
2948
|
-
return prompts.length;
|
|
2949
|
-
}
|
|
2950
|
-
async uploadResponses(responses, options) {
|
|
2951
|
-
if (responses.length === 0) {
|
|
2952
|
-
return 0;
|
|
2953
|
-
}
|
|
2954
|
-
await this.init();
|
|
2955
|
-
const account = options?.account;
|
|
2956
|
-
if (account && account.signMessage) {
|
|
2957
|
-
responses = await Promise.all(
|
|
2958
|
-
responses.map(async (response) => {
|
|
2959
|
-
const responseWithSignature = {
|
|
2960
|
-
...response
|
|
2961
|
-
};
|
|
2962
|
-
const responseStringified = stableStringify(response);
|
|
2963
|
-
const hash = await calculateCID(responseStringified).then(
|
|
2964
|
-
(c) => c.toString()
|
|
2965
|
-
);
|
|
2966
|
-
const signature = await account.signMessage({
|
|
2967
|
-
message: hash
|
|
2968
|
-
});
|
|
2969
|
-
responseWithSignature.signature = signature;
|
|
2970
|
-
responseWithSignature.publicKey = account.address;
|
|
2971
|
-
responseWithSignature.signatureType = "cid";
|
|
2972
|
-
responseWithSignature.keyType = "secp256k1n";
|
|
2973
|
-
return responseWithSignature;
|
|
2974
|
-
})
|
|
2975
|
-
);
|
|
2976
|
-
}
|
|
2977
|
-
const res = await axios2.post(
|
|
2978
|
-
`${this.apiURL}/api/v2/responses`,
|
|
2979
|
-
{ responses },
|
|
2980
|
-
{
|
|
2981
|
-
withCredentials: this.authMethod === "cookie",
|
|
2982
|
-
headers: {
|
|
2983
|
-
Authorization: this.authMethod === "token" ? `Bearer ${this.token}` : void 0,
|
|
2984
|
-
"Content-Type": "application/json"
|
|
2985
|
-
}
|
|
2986
|
-
}
|
|
2987
|
-
);
|
|
2988
|
-
if (res.status !== 200) {
|
|
2989
|
-
throw new Error(
|
|
2990
|
-
`Failed to upload Responses: ${res?.data?.message || JSON.stringify(res?.data || "No response available")}`
|
|
2991
|
-
);
|
|
2992
|
-
}
|
|
2993
|
-
return responses.length;
|
|
2994
|
-
}
|
|
2995
|
-
async uploadScores(scores, options) {
|
|
2996
|
-
if (scores.length === 0) {
|
|
2997
|
-
return 0;
|
|
2998
|
-
}
|
|
2999
|
-
await this.init();
|
|
3000
|
-
scores = await Promise.all(
|
|
3001
|
-
scores.map(async (score) => {
|
|
3002
|
-
if (!score.prompt) {
|
|
3003
|
-
throw new Error("Score must have a prompt for hash registration");
|
|
3004
|
-
}
|
|
3005
|
-
if (!score.data) {
|
|
3006
|
-
throw new Error("Score must have data for hash registration");
|
|
3007
|
-
}
|
|
3008
|
-
const promptStringified = stableStringify(score.prompt);
|
|
3009
|
-
const promptHashSha256Registration = await calculateSHA256(promptStringified);
|
|
3010
|
-
const promptHashCIDRegistration = await calculateCID(
|
|
3011
|
-
promptStringified
|
|
3012
|
-
).then((c) => c.toString());
|
|
3013
|
-
const responseObject = PromptResponseSchema.parse(score);
|
|
3014
|
-
const responseStringified = stableStringify(responseObject);
|
|
3015
|
-
const responseHashSha256Registration = await calculateSHA256(responseStringified);
|
|
3016
|
-
const responseHashCIDRegistration = await calculateCID(
|
|
3017
|
-
responseStringified
|
|
3018
|
-
).then((c) => c.toString());
|
|
3019
|
-
const scoreWithRegistration = {
|
|
3020
|
-
...score,
|
|
3021
|
-
responseHashSha256Registration,
|
|
3022
|
-
responseHashCIDRegistration,
|
|
3023
|
-
promptHashSha256Registration,
|
|
3024
|
-
promptHashCIDRegistration
|
|
3025
|
-
};
|
|
3026
|
-
if (options?.account && options.account.signMessage) {
|
|
3027
|
-
const scoreStringified = stableStringify(score);
|
|
3028
|
-
const hash = await calculateCID(scoreStringified).then(
|
|
3029
|
-
(c) => c.toString()
|
|
3030
|
-
);
|
|
3031
|
-
const signature = await options.account.signMessage({
|
|
3032
|
-
message: hash
|
|
3033
|
-
});
|
|
3034
|
-
scoreWithRegistration.signature = signature;
|
|
3035
|
-
scoreWithRegistration.publicKey = options.account.address;
|
|
3036
|
-
scoreWithRegistration.signatureType = "cid";
|
|
3037
|
-
scoreWithRegistration.keyType = "secp256k1n";
|
|
3038
|
-
}
|
|
3039
|
-
return scoreWithRegistration;
|
|
3040
|
-
})
|
|
3041
|
-
);
|
|
3042
|
-
const res = await axios2.post(
|
|
3043
|
-
`${this.apiURL}/api/v2/scores`,
|
|
3044
|
-
{ scores },
|
|
3045
|
-
{
|
|
3046
|
-
withCredentials: this.authMethod === "cookie",
|
|
3047
|
-
headers: {
|
|
3048
|
-
Authorization: this.authMethod === "token" ? `Bearer ${this.token}` : void 0,
|
|
3049
|
-
"Content-Type": "application/json"
|
|
3050
|
-
}
|
|
3051
|
-
}
|
|
3052
|
-
);
|
|
3053
|
-
if (res.status !== 200) {
|
|
3054
|
-
throw new Error(
|
|
3055
|
-
`Failed to upload Scores: ${res?.data?.message || JSON.stringify(res?.data || "No response available")}`
|
|
3056
|
-
);
|
|
3057
|
-
}
|
|
3058
|
-
return scores.length;
|
|
3059
|
-
}
|
|
3060
|
-
/**
|
|
3061
|
-
* Clears the interval execution for refreshing the token.
|
|
3062
|
-
*/
|
|
3063
|
-
async clearRefreshInterval() {
|
|
3064
|
-
this.isClosed = true;
|
|
3065
|
-
clearInterval(this.refreshTokenInterval);
|
|
3066
|
-
}
|
|
3067
|
-
async init() {
|
|
3068
|
-
if (this.isClosed || this.isInitialized || this.authMethod === "cookie") {
|
|
3069
|
-
return;
|
|
3070
|
-
}
|
|
3071
|
-
const authData = await this.login(this.email, this.password);
|
|
3072
|
-
this.session = authData?.session || null;
|
|
3073
|
-
this.token = this.session?.access_token;
|
|
3074
|
-
if (!this.token) {
|
|
3075
|
-
throw new Error(
|
|
3076
|
-
`Failed authentication with peerBench: No token received`
|
|
3077
|
-
);
|
|
3078
|
-
}
|
|
3079
|
-
if (this.tokenRefresher) {
|
|
3080
|
-
this.refreshTokenInterval = setInterval(
|
|
3081
|
-
() => this.refreshToken(),
|
|
3082
|
-
(this.session.expires_in - 15 * 60) * 1e3
|
|
3083
|
-
);
|
|
3084
|
-
}
|
|
3085
|
-
this.isInitialized = true;
|
|
3086
|
-
}
|
|
3087
|
-
/**
|
|
3088
|
-
* Refreshes the token if it is about to expire.
|
|
3089
|
-
*/
|
|
3090
|
-
async refreshToken() {
|
|
3091
|
-
if (this.isClosed || this.authMethod === "cookie") {
|
|
3092
|
-
return;
|
|
3093
|
-
}
|
|
3094
|
-
if (this.isRefreshingToken) {
|
|
3095
|
-
return;
|
|
3096
|
-
}
|
|
3097
|
-
this.isRefreshingToken = true;
|
|
3098
|
-
while (!this.isClosed) {
|
|
3099
|
-
try {
|
|
3100
|
-
const { data, error } = await this.supabaseClient.auth.refreshSession(
|
|
3101
|
-
this.session || void 0
|
|
3102
|
-
);
|
|
3103
|
-
if (error) {
|
|
3104
|
-
throw new Error(error.message);
|
|
3105
|
-
}
|
|
3106
|
-
this.session = data.session;
|
|
3107
|
-
this.token = this.session?.access_token;
|
|
3108
|
-
break;
|
|
3109
|
-
} catch (err) {
|
|
3110
|
-
console.error(`PeerBenchRegistry: Failed to refresh token`, err);
|
|
3111
|
-
console.error(`PeerBenchRegistry: Retrying in 10 seconds`);
|
|
3112
|
-
await sleep(1e4);
|
|
3113
|
-
}
|
|
3114
|
-
}
|
|
3115
|
-
this.isRefreshingToken = false;
|
|
3116
|
-
}
|
|
3117
|
-
async login(email, password) {
|
|
3118
|
-
const { data, error } = await this.supabaseClient.auth.signInWithPassword({
|
|
3119
|
-
email,
|
|
3120
|
-
password
|
|
3121
|
-
});
|
|
3122
|
-
if (error) {
|
|
3123
|
-
throw new Error(`Failed login to peerBench: ${error.message}`);
|
|
3124
|
-
}
|
|
3125
|
-
if (!data.session) {
|
|
3126
|
-
throw new Error(`No session returned from peerBench authentication`);
|
|
3127
|
-
}
|
|
3128
|
-
return data;
|
|
3129
|
-
}
|
|
3130
|
-
};
|
|
3131
|
-
|
|
3132
|
-
// src/parsers/abstract-parser.ts
|
|
3133
|
-
var AbstractParser = class {
|
|
3134
|
-
/**
|
|
3135
|
-
* String identifier of the parser. Must be
|
|
3136
|
-
* set to distunguish the parser from the others.
|
|
3137
|
-
*/
|
|
3138
|
-
static identifier = "";
|
|
3139
|
-
/**
|
|
3140
|
-
* Returns the identifier of the parser.
|
|
3141
|
-
*/
|
|
3142
|
-
getIdentifier() {
|
|
3143
|
-
return this.constructor?.identifier || this?.identifier;
|
|
3144
|
-
}
|
|
3145
|
-
};
|
|
3146
|
-
|
|
3147
|
-
// src/parsers/generic-array.ts
|
|
3148
|
-
var GenericArrayParser = class extends AbstractParser {
|
|
3149
|
-
static identifier = "generic-array";
|
|
3150
|
-
promptBuilder;
|
|
3151
|
-
responseBuilder;
|
|
3152
|
-
scoreBuilder;
|
|
3153
|
-
constructor(options) {
|
|
3154
|
-
super();
|
|
3155
|
-
this.promptBuilder = options.promptBuilder;
|
|
3156
|
-
this.responseBuilder = options.responseBuilder;
|
|
3157
|
-
this.scoreBuilder = options.scoreBuilder;
|
|
3158
|
-
}
|
|
3159
|
-
async parseFile(path) {
|
|
3160
|
-
const content = await readFile(path);
|
|
3161
|
-
return this.parseContent(content);
|
|
3162
|
-
}
|
|
3163
|
-
async parseContent(content) {
|
|
3164
|
-
let data;
|
|
3165
|
-
const contentString = typeof content === "string" ? content : bufferToString(content);
|
|
3166
|
-
data = tryParseJson(contentString);
|
|
3167
|
-
if (!data) {
|
|
3168
|
-
data = parseJSONL(contentString);
|
|
3169
|
-
}
|
|
3170
|
-
if (!data || !Array.isArray(data) || data.length === 0) {
|
|
3171
|
-
throw new InvalidDataError();
|
|
3172
|
-
}
|
|
3173
|
-
const includedPromptCIDs = {};
|
|
3174
|
-
const includedResponseDIDs = {};
|
|
3175
|
-
const includedScoreDIDs = {};
|
|
3176
|
-
const result = {
|
|
3177
|
-
prompts: [],
|
|
3178
|
-
responses: [],
|
|
3179
|
-
scores: []
|
|
3180
|
-
};
|
|
3181
|
-
for (const item of data) {
|
|
3182
|
-
const prompt = await this.promptBuilder(item, { result });
|
|
3183
|
-
let response;
|
|
3184
|
-
let score;
|
|
3185
|
-
if (prompt) {
|
|
3186
|
-
if (!includedPromptCIDs[prompt.fullPrompt.cid]) {
|
|
3187
|
-
includedPromptCIDs[prompt.fullPrompt.cid] = prompt.did;
|
|
3188
|
-
result.prompts.push(prompt);
|
|
3189
|
-
}
|
|
3190
|
-
}
|
|
3191
|
-
if (this.responseBuilder) {
|
|
3192
|
-
response = await this.responseBuilder(item, { result, prompt });
|
|
3193
|
-
if (response) {
|
|
3194
|
-
if (includedPromptCIDs[response.prompt.fullPrompt.cid]) {
|
|
3195
|
-
const cid = response.prompt.fullPrompt.cid;
|
|
3196
|
-
response.prompt = result.prompts.find(
|
|
3197
|
-
(p) => p.fullPrompt.cid === cid
|
|
3198
|
-
);
|
|
3199
|
-
}
|
|
3200
|
-
if (!includedResponseDIDs[response.did]) {
|
|
3201
|
-
includedResponseDIDs[response.did] = response.did;
|
|
3202
|
-
result.responses.push(response);
|
|
3203
|
-
}
|
|
3204
|
-
}
|
|
3205
|
-
}
|
|
3206
|
-
if (this.scoreBuilder) {
|
|
3207
|
-
score = await this.scoreBuilder(item, { result, prompt, response });
|
|
3208
|
-
if (score) {
|
|
3209
|
-
if (score.prompt && includedPromptCIDs[score.prompt.fullPrompt.cid]) {
|
|
3210
|
-
const cid = score.prompt.fullPrompt.cid;
|
|
3211
|
-
score.prompt = result.prompts.find(
|
|
3212
|
-
(p) => p.fullPrompt.cid === cid
|
|
3213
|
-
);
|
|
3214
|
-
}
|
|
3215
|
-
if (!includedScoreDIDs[score.did]) {
|
|
3216
|
-
includedScoreDIDs[score.did] = score.did;
|
|
3217
|
-
result.scores.push(score);
|
|
3218
|
-
}
|
|
3219
|
-
}
|
|
3220
|
-
}
|
|
3221
|
-
}
|
|
3222
|
-
return result;
|
|
3223
|
-
}
|
|
3224
|
-
};
|
|
3225
|
-
|
|
3226
|
-
// src/parsers/medqa.ts
|
|
3227
|
-
import { parquetReadObjects } from "hyparquet";
|
|
3228
|
-
import { z as z11 } from "zod";
|
|
3229
|
-
var MedQAPromptObjectSchema = z11.object({
|
|
3230
|
-
question: z11.string(),
|
|
3231
|
-
options: z11.record(z11.string(), z11.string()),
|
|
3232
|
-
answer: z11.string(),
|
|
3233
|
-
answer_idx: z11.string(),
|
|
3234
|
-
meta_info: z11.string().optional()
|
|
3235
|
-
});
|
|
3236
|
-
var MedQAParser = class extends AbstractParser {
|
|
3237
|
-
static identifier = "medqa";
|
|
3238
|
-
async parseFile(path) {
|
|
3239
|
-
const content = await readFile(path);
|
|
3240
|
-
return this.parseContent(content);
|
|
3241
|
-
}
|
|
3242
|
-
async parseContent(content) {
|
|
3243
|
-
let data;
|
|
3244
|
-
if (content instanceof ArrayBuffer) {
|
|
3245
|
-
try {
|
|
3246
|
-
data = await parquetReadObjects({ file: content });
|
|
3247
|
-
} catch {
|
|
3248
|
-
}
|
|
3249
|
-
}
|
|
3250
|
-
if (!data) {
|
|
3251
|
-
const contentString = typeof content === "string" ? content : bufferToString(content);
|
|
3252
|
-
data = tryParseJson(contentString);
|
|
3253
|
-
if (!data) {
|
|
3254
|
-
data = parseJSONL(contentString);
|
|
3255
|
-
}
|
|
3256
|
-
}
|
|
3257
|
-
if (!data || data.length == 0) {
|
|
3258
|
-
throw new InvalidDataError();
|
|
3259
|
-
}
|
|
3260
|
-
const prompts = [];
|
|
3261
|
-
const validation = z11.array(MedQAPromptObjectSchema).safeParse(data);
|
|
3262
|
-
if (!validation.success) {
|
|
3263
|
-
throw new ParserIsNotCompatibleError();
|
|
3264
|
-
}
|
|
3265
|
-
const validatedData = validation.data;
|
|
3266
|
-
let rowNumber = 0;
|
|
3267
|
-
for (const rawPrompt of validatedData) {
|
|
3268
|
-
const fullPrompt = preparePrompt(rawPrompt.question, rawPrompt.options);
|
|
3269
|
-
prompts.push(
|
|
3270
|
-
await buildPrompt({
|
|
3271
|
-
options: rawPrompt.options,
|
|
3272
|
-
question: rawPrompt.question,
|
|
3273
|
-
type: PromptTypes.MultipleChoice,
|
|
3274
|
-
answer: rawPrompt.answer,
|
|
3275
|
-
answerKey: rawPrompt.answer_idx,
|
|
3276
|
-
fullPrompt,
|
|
3277
|
-
metadata: {
|
|
3278
|
-
medqaCategory: rawPrompt.meta_info,
|
|
3279
|
-
rowNumberInSource: rowNumber
|
|
3280
|
-
}
|
|
3281
|
-
})
|
|
3282
|
-
);
|
|
3283
|
-
rowNumber++;
|
|
3284
|
-
}
|
|
3285
|
-
return {
|
|
3286
|
-
prompts,
|
|
3287
|
-
responses: [],
|
|
3288
|
-
scores: []
|
|
3289
|
-
};
|
|
3290
|
-
}
|
|
3291
|
-
};
|
|
3292
|
-
|
|
3293
|
-
// src/parsers/mmlu-pro.ts
|
|
3294
|
-
import { parquetReadObjects as parquetReadObjects2 } from "hyparquet";
|
|
3295
|
-
import { z as z12 } from "zod";
|
|
3296
|
-
var MMLUProTaskZodSchema = z12.object({
|
|
3297
|
-
question_id: z12.coerce.number(),
|
|
3298
|
-
question: z12.string(),
|
|
3299
|
-
options: z12.array(z12.string()),
|
|
3300
|
-
answer: z12.string(),
|
|
3301
|
-
answer_index: z12.coerce.number(),
|
|
3302
|
-
cot_content: z12.string(),
|
|
3303
|
-
category: z12.string(),
|
|
3304
|
-
src: z12.string()
|
|
3305
|
-
});
|
|
3306
|
-
var MMLUProParser = class extends AbstractParser {
|
|
3307
|
-
static identifier = "mmlu-pro";
|
|
3308
|
-
async parseFile(path) {
|
|
3309
|
-
const content = await readFile(path);
|
|
3310
|
-
return this.parseContent(content);
|
|
3311
|
-
}
|
|
3312
|
-
async parseContent(content) {
|
|
3313
|
-
let data;
|
|
3314
|
-
if (content instanceof ArrayBuffer) {
|
|
3315
|
-
try {
|
|
3316
|
-
data = await parquetReadObjects2({ file: content });
|
|
3317
|
-
} catch {
|
|
3318
|
-
}
|
|
3319
|
-
}
|
|
3320
|
-
if (!data) {
|
|
3321
|
-
const contentString = typeof content === "string" ? content : bufferToString(content);
|
|
3322
|
-
data = tryParseJson(contentString);
|
|
3323
|
-
if (!data) {
|
|
3324
|
-
data = parseJSONL(contentString);
|
|
3325
|
-
}
|
|
3326
|
-
}
|
|
3327
|
-
if (!data || data.length == 0) {
|
|
3328
|
-
throw new InvalidDataError();
|
|
3329
|
-
}
|
|
3330
|
-
const prompts = [];
|
|
3331
|
-
const validation = z12.array(MMLUProTaskZodSchema).safeParse(data);
|
|
3332
|
-
if (!validation.success) {
|
|
3333
|
-
throw new ParserIsNotCompatibleError();
|
|
3334
|
-
}
|
|
3335
|
-
const validatedData = validation.data;
|
|
3336
|
-
let rowNumber = 0;
|
|
3337
|
-
for (const rawPrompt of validatedData) {
|
|
3338
|
-
const options = {};
|
|
3339
|
-
let answerKey = "";
|
|
3340
|
-
for (let i = 0; i < rawPrompt.options.length; i++) {
|
|
3341
|
-
const option = rawPrompt.options[i];
|
|
3342
|
-
const letter = String.fromCharCode(65 + i);
|
|
3343
|
-
options[letter] = option;
|
|
3344
|
-
if (i === rawPrompt.answer_index) {
|
|
3345
|
-
answerKey = letter;
|
|
3346
|
-
}
|
|
3347
|
-
}
|
|
3348
|
-
const fullPrompt = preparePrompt(rawPrompt.question, options);
|
|
3349
|
-
prompts.push(
|
|
3350
|
-
await buildPrompt({
|
|
3351
|
-
options,
|
|
3352
|
-
question: rawPrompt.question,
|
|
3353
|
-
answerKey,
|
|
3354
|
-
answer: rawPrompt.options[rawPrompt.answer_index],
|
|
3355
|
-
fullPrompt,
|
|
3356
|
-
type: PromptTypes.MultipleChoice,
|
|
3357
|
-
metadata: {
|
|
3358
|
-
mmluProCategory: rawPrompt.category,
|
|
3359
|
-
rowNumberInSource: rowNumber,
|
|
3360
|
-
mmluProQuestionId: rawPrompt.question_id,
|
|
3361
|
-
mmluProCotContent: rawPrompt.cot_content,
|
|
3362
|
-
mmluProSource: rawPrompt.src
|
|
3363
|
-
}
|
|
3364
|
-
})
|
|
3365
|
-
);
|
|
3366
|
-
rowNumber++;
|
|
3367
|
-
}
|
|
3368
|
-
return {
|
|
3369
|
-
prompts,
|
|
3370
|
-
responses: [],
|
|
3371
|
-
scores: []
|
|
3372
|
-
};
|
|
3373
|
-
}
|
|
3374
|
-
};
|
|
3375
|
-
|
|
3376
|
-
// src/parsers/pb.ts
|
|
3377
|
-
var PBParser = class extends GenericArrayParser {
|
|
3378
|
-
static identifier = "pb";
|
|
3379
|
-
constructor() {
|
|
3380
|
-
super({
|
|
3381
|
-
promptBuilder: (data) => {
|
|
3382
|
-
const promptValidation = PromptSchema.safeParse(data);
|
|
3383
|
-
if (!promptValidation.success) {
|
|
3384
|
-
const responseValidation = PromptResponseSchema.safeParse(data);
|
|
3385
|
-
if (!responseValidation.success) {
|
|
3386
|
-
return;
|
|
3387
|
-
}
|
|
3388
|
-
return responseValidation.data.prompt;
|
|
3389
|
-
}
|
|
3390
|
-
return promptValidation.data;
|
|
3391
|
-
},
|
|
3392
|
-
responseBuilder: (data) => PromptResponseSchema.safeParse(data).data,
|
|
3393
|
-
scoreBuilder: (data, context) => {
|
|
3394
|
-
const validation = PromptScoreSchema.safeParse(data);
|
|
3395
|
-
if (!validation.success) {
|
|
3396
|
-
if (!context.prompt && !context.response) {
|
|
3397
|
-
throw new InvalidDataError();
|
|
3398
|
-
}
|
|
3399
|
-
return;
|
|
3400
|
-
}
|
|
3401
|
-
return validation.data;
|
|
3402
|
-
}
|
|
3403
|
-
});
|
|
3404
|
-
}
|
|
3405
|
-
};
|
|
3406
|
-
|
|
3407
|
-
// src/parsers/data-parser.ts
|
|
3408
|
-
var DataParser = class {
|
|
3409
|
-
static parsers = /* @__PURE__ */ new Map();
|
|
3410
|
-
static {
|
|
3411
|
-
const medqa = new MedQAParser();
|
|
3412
|
-
this.parsers.set(MedQAParser.identifier, medqa);
|
|
3413
|
-
const mmluPro = new MMLUProParser();
|
|
3414
|
-
this.parsers.set(MMLUProParser.identifier, mmluPro);
|
|
3415
|
-
const pb = new PBParser();
|
|
3416
|
-
this.parsers.set(PBParser.identifier, pb);
|
|
3417
|
-
}
|
|
3418
|
-
/**
|
|
3419
|
-
* Tries to parse data from a file path with one of the available parsers.
|
|
3420
|
-
* @example
|
|
3421
|
-
* ```typescript
|
|
3422
|
-
* const { result, parser } = await DataParser.parseFile("path/to/file.json");
|
|
3423
|
-
* ```
|
|
3424
|
-
*/
|
|
3425
|
-
static async parseFile(path) {
|
|
3426
|
-
if (typeof window === "undefined") {
|
|
3427
|
-
const { statSync } = await import("node:fs");
|
|
3428
|
-
if (!statSync(path, { throwIfNoEntry: false })?.isFile()) {
|
|
3429
|
-
throw new Error(`File does not exist: ${path}`);
|
|
3430
|
-
}
|
|
3431
|
-
for (const parser of this.parsers.values()) {
|
|
3432
|
-
try {
|
|
3433
|
-
return {
|
|
3434
|
-
result: await parser.parseFile(path),
|
|
3435
|
-
parser
|
|
3436
|
-
};
|
|
3437
|
-
} catch {
|
|
3438
|
-
continue;
|
|
3439
|
-
}
|
|
3440
|
-
}
|
|
3441
|
-
throw new Error(`No parser could parse the file: ${path}`);
|
|
3442
|
-
} else {
|
|
3443
|
-
throw new Error(
|
|
3444
|
-
"File system operations are not supported in browser environment. Use `parseContent` instead."
|
|
3445
|
-
);
|
|
3446
|
-
}
|
|
3447
|
-
}
|
|
3448
|
-
/**
|
|
3449
|
-
* Tries to parse data from a string or ArrayBuffer content with one of the available parsers.
|
|
3450
|
-
* @example
|
|
3451
|
-
* ```typescript
|
|
3452
|
-
* // With string content
|
|
3453
|
-
* const { result, parser } = await DataParser.parseContent('{"question": "What is...", ...}');
|
|
3454
|
-
*
|
|
3455
|
-
* // With file upload in browser
|
|
3456
|
-
* const file = event.target.files[0];
|
|
3457
|
-
* const content = await file.arrayBuffer();
|
|
3458
|
-
* const { result, parser } = await DataParser.parseContent(content);
|
|
3459
|
-
* ```
|
|
3460
|
-
*/
|
|
3461
|
-
static async parseContent(content) {
|
|
3462
|
-
for (const parser of this.parsers.values()) {
|
|
3463
|
-
try {
|
|
3464
|
-
return {
|
|
3465
|
-
result: await parser.parseContent(content),
|
|
3466
|
-
parser
|
|
3467
|
-
};
|
|
3468
|
-
} catch {
|
|
3469
|
-
continue;
|
|
3470
|
-
}
|
|
3471
|
-
}
|
|
3472
|
-
throw new Error("No parser could parse the content");
|
|
3473
|
-
}
|
|
3474
|
-
};
|
|
3475
|
-
|
|
3476
|
-
// src/constants/system-prompts.ts
|
|
3477
|
-
var MULTIPLE_CHOICE_SYSTEM_PROMPT = `Your explanation can't be longer than 400 tokens. The last sentence must be formatted as one of the following:
|
|
3478
|
-
- The answer is <answer letter>
|
|
3479
|
-
- The answer is **<answer letter>**
|
|
3480
|
-
- <answer letter>: ...
|
|
3481
|
-
- <answer letter>) ...
|
|
3482
|
-
Replace <answer letter> with the letter of your chosen answer.
|
|
3483
|
-
|
|
3484
|
-
Use the following string as your last sentence if you are not capable of answering the question:
|
|
3485
|
-
<!NO ANSWER!>`;
|
|
3486
|
-
var SENTENCE_REORDER_SYSTEM_PROMPT = "Your task is ordering the given sentences (each line is a sentence) in a correct order. Your output must be formatted as the input but with the sentences in the correct order. Markdown formatting is forbidden.";
|
|
3487
|
-
var TEXT_REPLACEMENT_SYSTEM_PROMPT = "Your task is placing all the entities that are provided in the ENTITIES section to the input text in a correct order. Your output only and only includes the modified text, nothing else. It is forbidden to modify anything else from the input text. Markdown formatting is forbidden too.";
|
|
3488
|
-
var TYPO_SYSTEM_PROMPT = "Your task is to find all the typos in the given text. Your output must include the corrected text, nothing else.";
|
|
3489
|
-
var OPEN_ENDED_SYSTEM_PROMPT = "You are a knowledgeable expert. Please provide a clear, accurate, short and well-reasoned answer to the following question. Be concise but comprehensive in your response. Your answer must be short and clear with less than 20 words";
|
|
3490
39
|
export {
|
|
3491
|
-
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
AbstractRegistry,
|
|
3497
|
-
AbstractScorer,
|
|
3498
|
-
AnthropicModels,
|
|
3499
|
-
BaseLLMProvider,
|
|
3500
|
-
DataParser,
|
|
3501
|
-
DeepSeekModels,
|
|
3502
|
-
EnumSchema,
|
|
3503
|
-
ExactMatchScorer,
|
|
3504
|
-
ForwardError,
|
|
3505
|
-
GenericArrayParser,
|
|
3506
|
-
GoogleModels,
|
|
3507
|
-
InvalidDataError,
|
|
3508
|
-
LLMJudgeScorer,
|
|
3509
|
-
LargeLanguageModel,
|
|
3510
|
-
LargeLanguageModelOwner,
|
|
3511
|
-
MCQGenerator,
|
|
3512
|
-
MMLUProParser,
|
|
3513
|
-
MULTIPLE_CHOICE_SYSTEM_PROMPT,
|
|
3514
|
-
MedQAParser,
|
|
3515
|
-
MetaModels,
|
|
3516
|
-
MistralModels,
|
|
3517
|
-
MultipleChoiceScorer,
|
|
3518
|
-
NearAIProvider,
|
|
3519
|
-
OPEN_ENDED_SYSTEM_PROMPT,
|
|
3520
|
-
OpenAIModels,
|
|
3521
|
-
OpenEndedGenerator,
|
|
3522
|
-
OpenRouterProvider,
|
|
3523
|
-
PBParser,
|
|
3524
|
-
PEERBENCH_ERROR_CODES,
|
|
3525
|
-
ParagraphMergeStrategy,
|
|
3526
|
-
ParserIsNotCompatibleError,
|
|
3527
|
-
PeerBenchRegistry,
|
|
3528
|
-
PromptResponseSchema,
|
|
3529
|
-
PromptSchema,
|
|
3530
|
-
PromptScoreSchema,
|
|
3531
|
-
PromptTypes,
|
|
3532
|
-
PubMedCollector,
|
|
3533
|
-
QwenModels,
|
|
3534
|
-
RefAnswerEqualityLLMJudgeScorer,
|
|
3535
|
-
SENTENCE_REORDER_SYSTEM_PROMPT,
|
|
3536
|
-
ScoringMethods,
|
|
3537
|
-
SimilarityScorer,
|
|
3538
|
-
SimpleGeneralRSSCollector,
|
|
3539
|
-
StringCollector,
|
|
3540
|
-
TEXT_REPLACEMENT_SYSTEM_PROMPT,
|
|
3541
|
-
TRPGenerator,
|
|
3542
|
-
TYPO_SYSTEM_PROMPT,
|
|
3543
|
-
TaskSchema,
|
|
3544
|
-
XAIModels,
|
|
40
|
+
CATEGORIES,
|
|
41
|
+
PEERBENCH_NAMESPACE,
|
|
42
|
+
PeerbenchError,
|
|
43
|
+
RateLimiter,
|
|
44
|
+
ScoringMethod,
|
|
3545
45
|
bufferToString,
|
|
3546
|
-
|
|
3547
|
-
|
|
3548
|
-
|
|
3549
|
-
calculateCID,
|
|
3550
|
-
calculateSHA256,
|
|
3551
|
-
checkValidationError,
|
|
3552
|
-
extractJSONFromResponse,
|
|
3553
|
-
formatMs,
|
|
3554
|
-
formatString,
|
|
3555
|
-
paragraphMerge,
|
|
3556
|
-
parseJSONL,
|
|
46
|
+
captureStackTrace,
|
|
47
|
+
defineRunner,
|
|
48
|
+
idGeneratorUUIDv7,
|
|
3557
49
|
parseResponseAsJSON,
|
|
3558
|
-
parseValidationError,
|
|
3559
|
-
preparePrompt,
|
|
3560
|
-
readFile,
|
|
3561
|
-
removeDIDPrefix,
|
|
3562
50
|
sleep,
|
|
3563
|
-
|
|
3564
|
-
stringToBuffer,
|
|
3565
|
-
tryParseJson
|
|
51
|
+
stringToBuffer
|
|
3566
52
|
};
|
|
3567
53
|
//# sourceMappingURL=index.js.map
|