peerbench 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -878,7 +878,7 @@ declare const QAScoreSchemaV1: z.ZodObject<Omit<{
878
878
  };
879
879
  type QAScoreV1 = z.infer<typeof QAScoreSchemaV1>;
880
880
 
881
- declare const peerbenchRunner: (params: {
881
+ declare const peerbenchRunner: ((params: {
882
882
  testCase: {
883
883
  id: string;
884
884
  question: string;
@@ -1002,7 +1002,632 @@ declare const peerbenchRunner: (params: {
1002
1002
  scorerAIInputCost?: string | undefined;
1003
1003
  scorerAIOutputCost?: string | undefined;
1004
1004
  } | undefined;
1005
- }>;
1005
+ }>) & {
1006
+ config: {
1007
+ runConfigSchema: z__default.ZodObject<{
1008
+ model: z__default.ZodString;
1009
+ llmJudgeModel: z__default.ZodOptional<z__default.ZodString>;
1010
+ llmJudgeSystemPrompt: z__default.ZodOptional<z__default.ZodObject<Omit<{
1011
+ id: z__default.ZodString;
1012
+ namespace: z__default.ZodString;
1013
+ kind: z__default.ZodString;
1014
+ schemaVersion: z__default.ZodNumber;
1015
+ version: z__default.ZodNumber;
1016
+ metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
1017
+ }, "kind" | "namespace" | "schemaVersion"> & {
1018
+ content: z__default.ZodString;
1019
+ } & {
1020
+ namespace: z__default.ZodLiteral<"peerbench.ai">;
1021
+ kind: z__default.ZodLiteral<`${string}/simple.sys-prompt`>;
1022
+ schemaVersion: z__default.ZodLiteral<1>;
1023
+ }, z__default.core.$strip> & {
1024
+ new: (input: Omit<{
1025
+ id: string;
1026
+ version: number;
1027
+ content: string;
1028
+ namespace: "peerbench.ai";
1029
+ kind: `${string}/simple.sys-prompt`;
1030
+ schemaVersion: 1;
1031
+ metadata?: Record<string, unknown> | undefined;
1032
+ }, "kind" | "namespace" | "schemaVersion">) => {
1033
+ id: string;
1034
+ version: number;
1035
+ content: string;
1036
+ namespace: "peerbench.ai";
1037
+ kind: `${string}/simple.sys-prompt`;
1038
+ schemaVersion: 1;
1039
+ metadata?: Record<string, unknown> | undefined;
1040
+ };
1041
+ newWithId(input: Omit<{
1042
+ id: string;
1043
+ version: number;
1044
+ content: string;
1045
+ namespace: "peerbench.ai";
1046
+ kind: `${string}/simple.sys-prompt`;
1047
+ schemaVersion: 1;
1048
+ metadata?: Record<string, unknown> | undefined;
1049
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
1050
+ id: string;
1051
+ version: number;
1052
+ content: string;
1053
+ namespace: "peerbench.ai";
1054
+ kind: `${string}/simple.sys-prompt`;
1055
+ schemaVersion: 1;
1056
+ metadata?: Record<string, unknown> | undefined;
1057
+ }>;
1058
+ }>;
1059
+ llmJudgeFieldsToExtract: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodCustom<z__default.ZodType<unknown, unknown, z__default.core.$ZodTypeInternals<unknown, unknown>>, z__default.ZodType<unknown, unknown, z__default.core.$ZodTypeInternals<unknown, unknown>>>>>;
1060
+ systemPrompt: z__default.ZodOptional<z__default.ZodObject<Omit<{
1061
+ id: z__default.ZodString;
1062
+ namespace: z__default.ZodString;
1063
+ kind: z__default.ZodString;
1064
+ schemaVersion: z__default.ZodNumber;
1065
+ version: z__default.ZodNumber;
1066
+ metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
1067
+ }, "kind" | "namespace" | "schemaVersion"> & {
1068
+ content: z__default.ZodString;
1069
+ } & {
1070
+ namespace: z__default.ZodLiteral<"peerbench.ai">;
1071
+ kind: z__default.ZodLiteral<`${string}/simple.sys-prompt`>;
1072
+ schemaVersion: z__default.ZodLiteral<1>;
1073
+ }, z__default.core.$strip> & {
1074
+ new: (input: Omit<{
1075
+ id: string;
1076
+ version: number;
1077
+ content: string;
1078
+ namespace: "peerbench.ai";
1079
+ kind: `${string}/simple.sys-prompt`;
1080
+ schemaVersion: 1;
1081
+ metadata?: Record<string, unknown> | undefined;
1082
+ }, "kind" | "namespace" | "schemaVersion">) => {
1083
+ id: string;
1084
+ version: number;
1085
+ content: string;
1086
+ namespace: "peerbench.ai";
1087
+ kind: `${string}/simple.sys-prompt`;
1088
+ schemaVersion: 1;
1089
+ metadata?: Record<string, unknown> | undefined;
1090
+ };
1091
+ newWithId(input: Omit<{
1092
+ id: string;
1093
+ version: number;
1094
+ content: string;
1095
+ namespace: "peerbench.ai";
1096
+ kind: `${string}/simple.sys-prompt`;
1097
+ schemaVersion: 1;
1098
+ metadata?: Record<string, unknown> | undefined;
1099
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
1100
+ id: string;
1101
+ version: number;
1102
+ content: string;
1103
+ namespace: "peerbench.ai";
1104
+ kind: `${string}/simple.sys-prompt`;
1105
+ schemaVersion: 1;
1106
+ metadata?: Record<string, unknown> | undefined;
1107
+ }>;
1108
+ }>;
1109
+ templateVariables: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodString>>;
1110
+ }, z__default.core.$strip>;
1111
+ schemaSets: [{
1112
+ readonly testCase: z__default.ZodObject<Omit<{
1113
+ id: z__default.ZodString;
1114
+ namespace: z__default.ZodString;
1115
+ schemaVersion: z__default.ZodNumber;
1116
+ kind: z__default.ZodString;
1117
+ metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
1118
+ }, "kind" | "namespace" | "schemaVersion"> & {
1119
+ question: z__default.ZodString;
1120
+ options: z__default.ZodRecord<z__default.ZodString, z__default.ZodString>;
1121
+ correctAnswerKeys: z__default.ZodArray<z__default.ZodString>;
1122
+ } & {
1123
+ namespace: z__default.ZodLiteral<"peerbench.ai">;
1124
+ kind: z__default.ZodLiteral<"llm/mcq.tc">;
1125
+ schemaVersion: z__default.ZodLiteral<1>;
1126
+ }, z__default.core.$strip> & {
1127
+ new: (input: Omit<{
1128
+ id: string;
1129
+ question: string;
1130
+ options: Record<string, string>;
1131
+ correctAnswerKeys: string[];
1132
+ namespace: "peerbench.ai";
1133
+ kind: "llm/mcq.tc";
1134
+ schemaVersion: 1;
1135
+ metadata?: Record<string, unknown> | undefined;
1136
+ }, "kind" | "namespace" | "schemaVersion">) => {
1137
+ id: string;
1138
+ question: string;
1139
+ options: Record<string, string>;
1140
+ correctAnswerKeys: string[];
1141
+ namespace: "peerbench.ai";
1142
+ kind: "llm/mcq.tc";
1143
+ schemaVersion: 1;
1144
+ metadata?: Record<string, unknown> | undefined;
1145
+ };
1146
+ newWithId(input: Omit<{
1147
+ id: string;
1148
+ question: string;
1149
+ options: Record<string, string>;
1150
+ correctAnswerKeys: string[];
1151
+ namespace: "peerbench.ai";
1152
+ kind: "llm/mcq.tc";
1153
+ schemaVersion: 1;
1154
+ metadata?: Record<string, unknown> | undefined;
1155
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
1156
+ id: string;
1157
+ question: string;
1158
+ options: Record<string, string>;
1159
+ correctAnswerKeys: string[];
1160
+ namespace: "peerbench.ai";
1161
+ kind: "llm/mcq.tc";
1162
+ schemaVersion: 1;
1163
+ metadata?: Record<string, unknown> | undefined;
1164
+ }>;
1165
+ };
1166
+ readonly response: z__default.ZodObject<Omit<{
1167
+ id: z__default.ZodString;
1168
+ namespace: z__default.ZodString;
1169
+ schemaVersion: z__default.ZodNumber;
1170
+ kind: z__default.ZodString;
1171
+ startedAt: z__default.ZodNumber;
1172
+ completedAt: z__default.ZodNumber;
1173
+ testCaseId: z__default.ZodString;
1174
+ metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
1175
+ }, "kind" | "namespace" | "schemaVersion"> & {
1176
+ data: z__default.ZodString;
1177
+ modelSlug: z__default.ZodString;
1178
+ provider: z__default.ZodString;
1179
+ systemPromptId: z__default.ZodOptional<z__default.ZodString>;
1180
+ inputTokensUsed: z__default.ZodOptional<z__default.ZodNumber>;
1181
+ outputTokensUsed: z__default.ZodOptional<z__default.ZodNumber>;
1182
+ inputCost: z__default.ZodOptional<z__default.ZodString>;
1183
+ outputCost: z__default.ZodOptional<z__default.ZodString>;
1184
+ } & {
1185
+ namespace: z__default.ZodLiteral<"peerbench.ai">;
1186
+ kind: z__default.ZodLiteral<"llm/mcq.rs">;
1187
+ schemaVersion: z__default.ZodLiteral<1>;
1188
+ }, z__default.core.$strip> & {
1189
+ new: (input: Omit<{
1190
+ startedAt: number;
1191
+ completedAt: number;
1192
+ id: string;
1193
+ testCaseId: string;
1194
+ data: string;
1195
+ modelSlug: string;
1196
+ provider: string;
1197
+ namespace: "peerbench.ai";
1198
+ kind: "llm/mcq.rs";
1199
+ schemaVersion: 1;
1200
+ metadata?: Record<string, unknown> | undefined;
1201
+ systemPromptId?: string | undefined;
1202
+ inputTokensUsed?: number | undefined;
1203
+ outputTokensUsed?: number | undefined;
1204
+ inputCost?: string | undefined;
1205
+ outputCost?: string | undefined;
1206
+ }, "kind" | "namespace" | "schemaVersion">) => {
1207
+ startedAt: number;
1208
+ completedAt: number;
1209
+ id: string;
1210
+ testCaseId: string;
1211
+ data: string;
1212
+ modelSlug: string;
1213
+ provider: string;
1214
+ namespace: "peerbench.ai";
1215
+ kind: "llm/mcq.rs";
1216
+ schemaVersion: 1;
1217
+ metadata?: Record<string, unknown> | undefined;
1218
+ systemPromptId?: string | undefined;
1219
+ inputTokensUsed?: number | undefined;
1220
+ outputTokensUsed?: number | undefined;
1221
+ inputCost?: string | undefined;
1222
+ outputCost?: string | undefined;
1223
+ };
1224
+ newWithId(input: Omit<{
1225
+ startedAt: number;
1226
+ completedAt: number;
1227
+ id: string;
1228
+ testCaseId: string;
1229
+ data: string;
1230
+ modelSlug: string;
1231
+ provider: string;
1232
+ namespace: "peerbench.ai";
1233
+ kind: "llm/mcq.rs";
1234
+ schemaVersion: 1;
1235
+ metadata?: Record<string, unknown> | undefined;
1236
+ systemPromptId?: string | undefined;
1237
+ inputTokensUsed?: number | undefined;
1238
+ outputTokensUsed?: number | undefined;
1239
+ inputCost?: string | undefined;
1240
+ outputCost?: string | undefined;
1241
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
1242
+ startedAt: number;
1243
+ completedAt: number;
1244
+ id: string;
1245
+ testCaseId: string;
1246
+ data: string;
1247
+ modelSlug: string;
1248
+ provider: string;
1249
+ namespace: "peerbench.ai";
1250
+ kind: "llm/mcq.rs";
1251
+ schemaVersion: 1;
1252
+ metadata?: Record<string, unknown> | undefined;
1253
+ systemPromptId?: string | undefined;
1254
+ inputTokensUsed?: number | undefined;
1255
+ outputTokensUsed?: number | undefined;
1256
+ inputCost?: string | undefined;
1257
+ outputCost?: string | undefined;
1258
+ }>;
1259
+ };
1260
+ readonly score: z__default.ZodObject<Omit<{
1261
+ id: z__default.ZodString;
1262
+ namespace: z__default.ZodString;
1263
+ kind: z__default.ZodString;
1264
+ schemaVersion: z__default.ZodNumber;
1265
+ value: z__default.ZodNumber;
1266
+ responseId: z__default.ZodString;
1267
+ explanation: z__default.ZodOptional<z__default.ZodString>;
1268
+ metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
1269
+ scoringMethod: z__default.ZodEnum<{
1270
+ readonly ai: "ai";
1271
+ readonly human: "human";
1272
+ readonly algo: "algo";
1273
+ }>;
1274
+ }, "kind" | "namespace" | "schemaVersion"> & {
1275
+ extractedAnswers: z__default.ZodArray<z__default.ZodString>;
1276
+ scorerAISystemPrompt: z__default.ZodOptional<z__default.ZodString>;
1277
+ scorerAISystemPromptId: z__default.ZodOptional<z__default.ZodString>;
1278
+ scorerAIProvider: z__default.ZodOptional<z__default.ZodString>;
1279
+ scorerAIModelSlug: z__default.ZodOptional<z__default.ZodString>;
1280
+ scorerAIInputTokensUsed: z__default.ZodOptional<z__default.ZodNumber>;
1281
+ scorerAIOutputTokensUsed: z__default.ZodOptional<z__default.ZodNumber>;
1282
+ scorerAIInputCost: z__default.ZodOptional<z__default.ZodString>;
1283
+ scorerAIOutputCost: z__default.ZodOptional<z__default.ZodString>;
1284
+ } & {
1285
+ namespace: z__default.ZodLiteral<"peerbench.ai">;
1286
+ kind: z__default.ZodLiteral<"llm/mcq.sc">;
1287
+ schemaVersion: z__default.ZodLiteral<1>;
1288
+ }, z__default.core.$strip> & {
1289
+ new: (input: Omit<{
1290
+ id: string;
1291
+ value: number;
1292
+ responseId: string;
1293
+ scoringMethod: "ai" | "human" | "algo";
1294
+ extractedAnswers: string[];
1295
+ namespace: "peerbench.ai";
1296
+ kind: "llm/mcq.sc";
1297
+ schemaVersion: 1;
1298
+ metadata?: Record<string, unknown> | undefined;
1299
+ explanation?: string | undefined;
1300
+ scorerAISystemPrompt?: string | undefined;
1301
+ scorerAISystemPromptId?: string | undefined;
1302
+ scorerAIProvider?: string | undefined;
1303
+ scorerAIModelSlug?: string | undefined;
1304
+ scorerAIInputTokensUsed?: number | undefined;
1305
+ scorerAIOutputTokensUsed?: number | undefined;
1306
+ scorerAIInputCost?: string | undefined;
1307
+ scorerAIOutputCost?: string | undefined;
1308
+ }, "kind" | "namespace" | "schemaVersion">) => {
1309
+ id: string;
1310
+ value: number;
1311
+ responseId: string;
1312
+ scoringMethod: "ai" | "human" | "algo";
1313
+ extractedAnswers: string[];
1314
+ namespace: "peerbench.ai";
1315
+ kind: "llm/mcq.sc";
1316
+ schemaVersion: 1;
1317
+ metadata?: Record<string, unknown> | undefined;
1318
+ explanation?: string | undefined;
1319
+ scorerAISystemPrompt?: string | undefined;
1320
+ scorerAISystemPromptId?: string | undefined;
1321
+ scorerAIProvider?: string | undefined;
1322
+ scorerAIModelSlug?: string | undefined;
1323
+ scorerAIInputTokensUsed?: number | undefined;
1324
+ scorerAIOutputTokensUsed?: number | undefined;
1325
+ scorerAIInputCost?: string | undefined;
1326
+ scorerAIOutputCost?: string | undefined;
1327
+ };
1328
+ newWithId(input: Omit<{
1329
+ id: string;
1330
+ value: number;
1331
+ responseId: string;
1332
+ scoringMethod: "ai" | "human" | "algo";
1333
+ extractedAnswers: string[];
1334
+ namespace: "peerbench.ai";
1335
+ kind: "llm/mcq.sc";
1336
+ schemaVersion: 1;
1337
+ metadata?: Record<string, unknown> | undefined;
1338
+ explanation?: string | undefined;
1339
+ scorerAISystemPrompt?: string | undefined;
1340
+ scorerAISystemPromptId?: string | undefined;
1341
+ scorerAIProvider?: string | undefined;
1342
+ scorerAIModelSlug?: string | undefined;
1343
+ scorerAIInputTokensUsed?: number | undefined;
1344
+ scorerAIOutputTokensUsed?: number | undefined;
1345
+ scorerAIInputCost?: string | undefined;
1346
+ scorerAIOutputCost?: string | undefined;
1347
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
1348
+ id: string;
1349
+ value: number;
1350
+ responseId: string;
1351
+ scoringMethod: "ai" | "human" | "algo";
1352
+ extractedAnswers: string[];
1353
+ namespace: "peerbench.ai";
1354
+ kind: "llm/mcq.sc";
1355
+ schemaVersion: 1;
1356
+ metadata?: Record<string, unknown> | undefined;
1357
+ explanation?: string | undefined;
1358
+ scorerAISystemPrompt?: string | undefined;
1359
+ scorerAISystemPromptId?: string | undefined;
1360
+ scorerAIProvider?: string | undefined;
1361
+ scorerAIModelSlug?: string | undefined;
1362
+ scorerAIInputTokensUsed?: number | undefined;
1363
+ scorerAIOutputTokensUsed?: number | undefined;
1364
+ scorerAIInputCost?: string | undefined;
1365
+ scorerAIOutputCost?: string | undefined;
1366
+ }>;
1367
+ };
1368
+ }, {
1369
+ readonly testCase: z__default.ZodObject<Omit<{
1370
+ id: z__default.ZodString;
1371
+ namespace: z__default.ZodString;
1372
+ schemaVersion: z__default.ZodNumber;
1373
+ kind: z__default.ZodString;
1374
+ metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
1375
+ }, "kind" | "namespace" | "schemaVersion"> & {
1376
+ question: z__default.ZodString;
1377
+ goodAnswers: z__default.ZodArray<z__default.ZodString>;
1378
+ badAnswers: z__default.ZodArray<z__default.ZodString>;
1379
+ } & {
1380
+ namespace: z__default.ZodLiteral<"peerbench.ai">;
1381
+ kind: z__default.ZodLiteral<"llm/qa.tc">;
1382
+ schemaVersion: z__default.ZodLiteral<1>;
1383
+ }, z__default.core.$strip> & {
1384
+ new: (input: Omit<{
1385
+ id: string;
1386
+ question: string;
1387
+ goodAnswers: string[];
1388
+ badAnswers: string[];
1389
+ namespace: "peerbench.ai";
1390
+ kind: "llm/qa.tc";
1391
+ schemaVersion: 1;
1392
+ metadata?: Record<string, unknown> | undefined;
1393
+ }, "kind" | "namespace" | "schemaVersion">) => {
1394
+ id: string;
1395
+ question: string;
1396
+ goodAnswers: string[];
1397
+ badAnswers: string[];
1398
+ namespace: "peerbench.ai";
1399
+ kind: "llm/qa.tc";
1400
+ schemaVersion: 1;
1401
+ metadata?: Record<string, unknown> | undefined;
1402
+ };
1403
+ newWithId(input: Omit<{
1404
+ id: string;
1405
+ question: string;
1406
+ goodAnswers: string[];
1407
+ badAnswers: string[];
1408
+ namespace: "peerbench.ai";
1409
+ kind: "llm/qa.tc";
1410
+ schemaVersion: 1;
1411
+ metadata?: Record<string, unknown> | undefined;
1412
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
1413
+ id: string;
1414
+ question: string;
1415
+ goodAnswers: string[];
1416
+ badAnswers: string[];
1417
+ namespace: "peerbench.ai";
1418
+ kind: "llm/qa.tc";
1419
+ schemaVersion: 1;
1420
+ metadata?: Record<string, unknown> | undefined;
1421
+ }>;
1422
+ };
1423
+ readonly response: z__default.ZodObject<Omit<{
1424
+ id: z__default.ZodString;
1425
+ namespace: z__default.ZodString;
1426
+ schemaVersion: z__default.ZodNumber;
1427
+ kind: z__default.ZodString;
1428
+ startedAt: z__default.ZodNumber;
1429
+ completedAt: z__default.ZodNumber;
1430
+ testCaseId: z__default.ZodString;
1431
+ metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
1432
+ }, "kind" | "namespace" | "schemaVersion"> & {
1433
+ data: z__default.ZodString;
1434
+ modelSlug: z__default.ZodString;
1435
+ provider: z__default.ZodString;
1436
+ systemPromptId: z__default.ZodOptional<z__default.ZodString>;
1437
+ inputTokensUsed: z__default.ZodOptional<z__default.ZodNumber>;
1438
+ outputTokensUsed: z__default.ZodOptional<z__default.ZodNumber>;
1439
+ inputCost: z__default.ZodOptional<z__default.ZodString>;
1440
+ outputCost: z__default.ZodOptional<z__default.ZodString>;
1441
+ } & {
1442
+ namespace: z__default.ZodLiteral<"peerbench.ai">;
1443
+ kind: z__default.ZodLiteral<"llm/qa.rs">;
1444
+ schemaVersion: z__default.ZodLiteral<1>;
1445
+ }, z__default.core.$strip> & {
1446
+ new: (input: Omit<{
1447
+ startedAt: number;
1448
+ completedAt: number;
1449
+ id: string;
1450
+ testCaseId: string;
1451
+ data: string;
1452
+ modelSlug: string;
1453
+ provider: string;
1454
+ namespace: "peerbench.ai";
1455
+ kind: "llm/qa.rs";
1456
+ schemaVersion: 1;
1457
+ metadata?: Record<string, unknown> | undefined;
1458
+ systemPromptId?: string | undefined;
1459
+ inputTokensUsed?: number | undefined;
1460
+ outputTokensUsed?: number | undefined;
1461
+ inputCost?: string | undefined;
1462
+ outputCost?: string | undefined;
1463
+ }, "kind" | "namespace" | "schemaVersion">) => {
1464
+ startedAt: number;
1465
+ completedAt: number;
1466
+ id: string;
1467
+ testCaseId: string;
1468
+ data: string;
1469
+ modelSlug: string;
1470
+ provider: string;
1471
+ namespace: "peerbench.ai";
1472
+ kind: "llm/qa.rs";
1473
+ schemaVersion: 1;
1474
+ metadata?: Record<string, unknown> | undefined;
1475
+ systemPromptId?: string | undefined;
1476
+ inputTokensUsed?: number | undefined;
1477
+ outputTokensUsed?: number | undefined;
1478
+ inputCost?: string | undefined;
1479
+ outputCost?: string | undefined;
1480
+ };
1481
+ newWithId(input: Omit<{
1482
+ startedAt: number;
1483
+ completedAt: number;
1484
+ id: string;
1485
+ testCaseId: string;
1486
+ data: string;
1487
+ modelSlug: string;
1488
+ provider: string;
1489
+ namespace: "peerbench.ai";
1490
+ kind: "llm/qa.rs";
1491
+ schemaVersion: 1;
1492
+ metadata?: Record<string, unknown> | undefined;
1493
+ systemPromptId?: string | undefined;
1494
+ inputTokensUsed?: number | undefined;
1495
+ outputTokensUsed?: number | undefined;
1496
+ inputCost?: string | undefined;
1497
+ outputCost?: string | undefined;
1498
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
1499
+ startedAt: number;
1500
+ completedAt: number;
1501
+ id: string;
1502
+ testCaseId: string;
1503
+ data: string;
1504
+ modelSlug: string;
1505
+ provider: string;
1506
+ namespace: "peerbench.ai";
1507
+ kind: "llm/qa.rs";
1508
+ schemaVersion: 1;
1509
+ metadata?: Record<string, unknown> | undefined;
1510
+ systemPromptId?: string | undefined;
1511
+ inputTokensUsed?: number | undefined;
1512
+ outputTokensUsed?: number | undefined;
1513
+ inputCost?: string | undefined;
1514
+ outputCost?: string | undefined;
1515
+ }>;
1516
+ };
1517
+ readonly score: z__default.ZodObject<Omit<{
1518
+ id: z__default.ZodString;
1519
+ namespace: z__default.ZodString;
1520
+ kind: z__default.ZodString;
1521
+ schemaVersion: z__default.ZodNumber;
1522
+ value: z__default.ZodNumber;
1523
+ responseId: z__default.ZodString;
1524
+ explanation: z__default.ZodOptional<z__default.ZodString>;
1525
+ metadata: z__default.ZodOptional<z__default.ZodRecord<z__default.ZodString, z__default.ZodUnknown>>;
1526
+ scoringMethod: z__default.ZodEnum<{
1527
+ readonly ai: "ai";
1528
+ readonly human: "human";
1529
+ readonly algo: "algo";
1530
+ }>;
1531
+ }, "kind" | "namespace" | "schemaVersion"> & {
1532
+ scorerAISystemPrompt: z__default.ZodOptional<z__default.ZodString>;
1533
+ scorerAISystemPromptId: z__default.ZodOptional<z__default.ZodString>;
1534
+ scorerAIProvider: z__default.ZodOptional<z__default.ZodString>;
1535
+ scorerAIModelSlug: z__default.ZodOptional<z__default.ZodString>;
1536
+ scorerAIInputTokensUsed: z__default.ZodOptional<z__default.ZodNumber>;
1537
+ scorerAIOutputTokensUsed: z__default.ZodOptional<z__default.ZodNumber>;
1538
+ scorerAIInputCost: z__default.ZodOptional<z__default.ZodString>;
1539
+ scorerAIOutputCost: z__default.ZodOptional<z__default.ZodString>;
1540
+ } & {
1541
+ namespace: z__default.ZodLiteral<"peerbench.ai">;
1542
+ kind: z__default.ZodLiteral<"llm/qa.sc">;
1543
+ schemaVersion: z__default.ZodLiteral<1>;
1544
+ }, z__default.core.$strip> & {
1545
+ new: (input: Omit<{
1546
+ id: string;
1547
+ value: number;
1548
+ responseId: string;
1549
+ scoringMethod: "ai" | "human" | "algo";
1550
+ namespace: "peerbench.ai";
1551
+ kind: "llm/qa.sc";
1552
+ schemaVersion: 1;
1553
+ metadata?: Record<string, unknown> | undefined;
1554
+ explanation?: string | undefined;
1555
+ scorerAISystemPrompt?: string | undefined;
1556
+ scorerAISystemPromptId?: string | undefined;
1557
+ scorerAIProvider?: string | undefined;
1558
+ scorerAIModelSlug?: string | undefined;
1559
+ scorerAIInputTokensUsed?: number | undefined;
1560
+ scorerAIOutputTokensUsed?: number | undefined;
1561
+ scorerAIInputCost?: string | undefined;
1562
+ scorerAIOutputCost?: string | undefined;
1563
+ }, "kind" | "namespace" | "schemaVersion">) => {
1564
+ id: string;
1565
+ value: number;
1566
+ responseId: string;
1567
+ scoringMethod: "ai" | "human" | "algo";
1568
+ namespace: "peerbench.ai";
1569
+ kind: "llm/qa.sc";
1570
+ schemaVersion: 1;
1571
+ metadata?: Record<string, unknown> | undefined;
1572
+ explanation?: string | undefined;
1573
+ scorerAISystemPrompt?: string | undefined;
1574
+ scorerAISystemPromptId?: string | undefined;
1575
+ scorerAIProvider?: string | undefined;
1576
+ scorerAIModelSlug?: string | undefined;
1577
+ scorerAIInputTokensUsed?: number | undefined;
1578
+ scorerAIOutputTokensUsed?: number | undefined;
1579
+ scorerAIInputCost?: string | undefined;
1580
+ scorerAIOutputCost?: string | undefined;
1581
+ };
1582
+ newWithId(input: Omit<{
1583
+ id: string;
1584
+ value: number;
1585
+ responseId: string;
1586
+ scoringMethod: "ai" | "human" | "algo";
1587
+ namespace: "peerbench.ai";
1588
+ kind: "llm/qa.sc";
1589
+ schemaVersion: 1;
1590
+ metadata?: Record<string, unknown> | undefined;
1591
+ explanation?: string | undefined;
1592
+ scorerAISystemPrompt?: string | undefined;
1593
+ scorerAISystemPromptId?: string | undefined;
1594
+ scorerAIProvider?: string | undefined;
1595
+ scorerAIModelSlug?: string | undefined;
1596
+ scorerAIInputTokensUsed?: number | undefined;
1597
+ scorerAIOutputTokensUsed?: number | undefined;
1598
+ scorerAIInputCost?: string | undefined;
1599
+ scorerAIOutputCost?: string | undefined;
1600
+ }, "id" | "kind" | "namespace" | "schemaVersion">, generator: IdGenerator): Promise<{
1601
+ id: string;
1602
+ value: number;
1603
+ responseId: string;
1604
+ scoringMethod: "ai" | "human" | "algo";
1605
+ namespace: "peerbench.ai";
1606
+ kind: "llm/qa.sc";
1607
+ schemaVersion: 1;
1608
+ metadata?: Record<string, unknown> | undefined;
1609
+ explanation?: string | undefined;
1610
+ scorerAISystemPrompt?: string | undefined;
1611
+ scorerAISystemPromptId?: string | undefined;
1612
+ scorerAIProvider?: string | undefined;
1613
+ scorerAIModelSlug?: string | undefined;
1614
+ scorerAIInputTokensUsed?: number | undefined;
1615
+ scorerAIOutputTokensUsed?: number | undefined;
1616
+ scorerAIInputCost?: string | undefined;
1617
+ scorerAIOutputCost?: string | undefined;
1618
+ }>;
1619
+ };
1620
+ }];
1621
+ providers: [typeof AbstractLLMProvider];
1622
+ scorers: [typeof LLMAsAJudgeScorer, typeof MCQScorer];
1623
+ parseRunConfig?: boolean;
1624
+ defaults?: {
1625
+ scorer?: MCQScorer | LLMAsAJudgeScorer | undefined;
1626
+ responseIdGenerator?: IdGenerator;
1627
+ scoreIdGenerator?: IdGenerator;
1628
+ } | undefined;
1629
+ };
1630
+ };
1006
1631
 
1007
1632
  declare class PeerbenchJSONStorage extends JSONFileStorage<MCQTestCaseV1 | MCQResponseV1 | MCQScoreV1 | QATestCaseV1 | QAResponseV1 | QAScoreV1 | MultiTurnTestCaseV1 | MultiTurnResponseV1 | MultiTurnScoreV1> {
1008
1633
  constructor(config: {
@@ -3,11 +3,10 @@ import {
3
3
  } from "../chunk-Q6GSOHOP.js";
4
4
  import {
5
5
  defineRunner
6
- } from "../chunk-QY5MPNNB.js";
6
+ } from "../chunk-RTEAK4II.js";
7
7
  import {
8
- LLMAsAJudgeScorer,
9
- MCQScorer
10
- } from "../chunk-DNGT4SJC.js";
8
+ AbstractLLMProvider
9
+ } from "../chunk-3JF7SHLC.js";
11
10
  import {
12
11
  BaseResponseSchemaV1,
13
12
  BaseScoreSchemaV1,
@@ -21,14 +20,15 @@ import {
21
20
  ScoringMethod
22
21
  } from "../chunk-HMQYGCKI.js";
23
22
  import {
24
- JSONFileStorage
25
- } from "../chunk-JFLUJLGT.js";
26
- import {
27
- AbstractLLMProvider
28
- } from "../chunk-HPPCDSJ3.js";
23
+ LLMAsAJudgeScorer,
24
+ MCQScorer
25
+ } from "../chunk-DNGT4SJC.js";
29
26
  import {
30
27
  PEERBENCH_NAMESPACE
31
28
  } from "../chunk-UHHHSYVE.js";
29
+ import {
30
+ JSONFileStorage
31
+ } from "../chunk-JFLUJLGT.js";
32
32
  import {
33
33
  idGeneratorUUIDv7
34
34
  } from "../chunk-4UBK6452.js";
@@ -44,7 +44,7 @@ var MastraProvider = class extends AbstractLLMProvider {
44
44
  {
45
45
  messages: apiMessages,
46
46
  runtimeContext: {
47
- "model-id": args.model
47
+ "model-id": args.modelName
48
48
  }
49
49
  },
50
50
  { memory: args.memory ?? this.memory }
@@ -232,4 +232,4 @@ export {
232
232
  OpenAIProvider,
233
233
  OpenRouterProvider
234
234
  };
235
- //# sourceMappingURL=chunk-HPPCDSJ3.js.map
235
+ //# sourceMappingURL=chunk-3JF7SHLC.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/providers/abstract/provider.ts","../src/providers/abstract/llm.ts","../src/providers/mastra.ts","../src/providers/openai.ts","../src/providers/openrouter.ts"],"sourcesContent":["export abstract class AbstractProvider {\n abstract readonly kind: string;\n}\n\nexport type ProviderResponse<TData = unknown> = {\n startedAt: number;\n completedAt: number;\n data: TData;\n};\n","import { AbstractProvider, ProviderResponse } from \"./provider\";\nimport {\n ResponseFormatJSONObject,\n ResponseFormatJSONSchema,\n ResponseFormatText,\n} from \"openai/resources/shared\";\nimport { ChatCompletionMessageParam } from \"openai/resources/chat/completions\";\n\nexport abstract class AbstractLLMProvider extends AbstractProvider {\n abstract forward(args: LLMProviderForwardArgs): Promise<ChatResponse>;\n}\n\nexport type LLMProviderForwardArgs = {\n messages: ChatCompletionMessageParam[];\n model: string;\n abortSignal?: AbortSignal;\n temperature?: number;\n responseFormat?:\n | ResponseFormatText\n | ResponseFormatJSONSchema\n | ResponseFormatJSONObject;\n};\n\nexport type ChatResponse = ProviderResponse<string> & {\n inputTokensUsed?: number;\n outputTokensUsed?: number;\n inputCost?: string;\n outputCost?: string;\n\n metadata?: Record<string, unknown>;\n};\n","import {\n AbstractLLMProvider,\n type ChatResponse,\n type LLMProviderForwardArgs,\n} from \"./abstract/llm\";\nimport { MastraClient, type GetAgentResponse } from \"@mastra/client-js\";\n\nexport class MastraProvider extends AbstractLLMProvider {\n override readonly kind = \"mastra\";\n\n private readonly endpoint: string;\n private readonly authToken?: string;\n private client: MastraClient;\n private memory?: AgentMemoryOption;\n\n constructor(params: {\n endpoint: string;\n authToken?: string;\n memory?: AgentMemoryOption;\n }) {\n super();\n this.endpoint = params.endpoint;\n this.authToken = params.authToken;\n this.client = new MastraClient({\n baseUrl: this.endpoint,\n headers: this.authToken\n ? {\n Authorization: `Bearer ${this.authToken}`,\n }\n : undefined,\n });\n this.memory = params.memory;\n }\n\n override async forward(\n args: LLMProviderForwardArgs & {\n memory?: AgentMemoryOption;\n\n /**\n * The model that will be used as the brain for the agent.\n */\n modelName?: string\n }\n ): Promise<ChatResponse> {\n const apiMessages = args.messages\n .filter((m) => m.role === \"user\" || m.role === \"assistant\")\n .map((m) => ({\n role: m.role as \"user\" | \"assistant\",\n content: String((m as any).content ?? \"\"),\n }));\n\n const agent = this.client.getAgent(args.model);\n\n const startedAt = Date.now();\n const response = await agent.generate(\n {\n messages: apiMessages,\n runtimeContext: {\n \"model-id\": args.modelName,\n },\n },\n { memory: args.memory ?? this.memory }\n );\n\n return {\n data: response.text,\n startedAt,\n completedAt: Date.now(),\n };\n }\n\n async getAgentInfo(args: {\n agentId: string;\n runtimeContext?: MastraRuntimeContext;\n }) {\n return await this.client\n .getAgent(args.agentId)\n .details(args.runtimeContext);\n }\n\n async getAgents(args?: {\n runtimeContext?: MastraRuntimeContext;\n partial?: boolean;\n }): Promise<Record<string, GetAgentResponse>> {\n return this.client.getAgents(args?.runtimeContext, args?.partial);\n }\n}\n\n// NOTE: Mastra client does not export these types\nexport type AgentMemoryOption = Parameters<\n Parameters<MastraClient[\"getAgent\"]>[\"0\"] extends string\n ? ReturnType<MastraClient[\"getAgent\"]>[\"generate\"]\n : never\n>[0] extends { memory?: infer M }\n ? M\n : never;\n\ntype MastraRuntimeContext = Parameters<\n Parameters<MastraClient[\"getAgent\"]>[\"0\"] extends string\n ? ReturnType<MastraClient[\"getAgent\"]>[\"generate\"]\n : never\n>[0] extends { runtimeContext?: infer R }\n ? R\n : never;\n","import { RateLimiter } from \"@/utils\";\nimport { ChatCompletionMessageParam } from \"openai/resources/chat/completions\";\nimport {\n ResponseFormatJSONObject,\n ResponseFormatJSONSchema,\n ResponseFormatText,\n} from \"openai/resources/shared\";\nimport OpenAI, { APIError } from \"openai\";\nimport { AbstractLLMProvider, ChatResponse } from \"./abstract/llm\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport class OpenAIProvider extends AbstractLLMProvider {\n override readonly kind = `${PEERBENCH_NAMESPACE}/llm/openai` as const;\n\n private client: OpenAI;\n private rateLimiter: RateLimiter;\n private maxRetries: number;\n\n constructor(config: {\n apiKey: string;\n baseURL: string;\n maxRetries?: number;\n timeout?: number;\n rateLimiter?: RateLimiter;\n }) {\n super();\n this.maxRetries = config.maxRetries ?? 3;\n this.rateLimiter =\n config.rateLimiter ??\n new RateLimiter({\n maxWeight: 20,\n timeWindow: 3_000,\n });\n\n this.client = new OpenAI({\n baseURL: config.baseURL,\n apiKey: config.apiKey,\n timeout: config.timeout,\n dangerouslyAllowBrowser: true,\n });\n }\n\n async forward(args: {\n messages: ChatCompletionMessageParam[];\n model: string;\n abortSignal?: AbortSignal;\n temperature?: number;\n responseFormat?:\n | ResponseFormatText\n | ResponseFormatJSONSchema\n | ResponseFormatJSONObject;\n }): Promise<ChatResponse> {\n let retryCount = this.maxRetries;\n while (retryCount > 0) {\n let startedAt: Date = new Date();\n\n try {\n const response = await this.rateLimiter.execute(\n async () => {\n // Capture the start time of the request\n startedAt = new Date();\n return await this.client.chat.completions.create(\n {\n model: args.model,\n messages: args.messages,\n temperature: args.temperature,\n response_format: args.responseFormat,\n },\n // Signal for request\n { signal: args.abortSignal }\n );\n },\n // Signal for rate limiting\n { signal: args.abortSignal }\n );\n\n if (\"error\" in response) {\n const err = response.error as any;\n throw new Error(\n `${err.message} - Code ${err.code} - ${JSON.stringify(err)}`\n );\n }\n\n if (!response?.choices?.[0]?.message?.content) {\n throw new Error(\"No content returned from the model\");\n }\n\n return {\n data: response.choices[0].message.content,\n\n inputTokensUsed: response?.usage?.prompt_tokens,\n outputTokensUsed: response?.usage?.completion_tokens,\n\n startedAt: startedAt.getTime(),\n completedAt: Date.now(),\n };\n } catch (err) {\n if (err instanceof APIError && err.status === 401) {\n throw new Error(`Invalid credentials provided`, { cause: err });\n }\n\n retryCount--;\n\n // More likely an empty HTTP response returned by the Provider\n // and it couldn't be parsed as JSON by the OpenAI SDK. We need to retry the request\n // More info can be found in the following links:\n // https://www.reddit.com/r/SillyTavernAI/comments/1ik95vr/deepseek_r1_on_openrouter_returning_blank_messages/\n // https://github.com/cline/cline/issues/60\n if (err instanceof SyntaxError) {\n console.debug(err);\n continue;\n }\n\n // If it was another error, just continue until we run out of retries\n if (retryCount !== 0) {\n continue;\n }\n\n throw new Error(\n `Failed to forward prompt to the model: ${err instanceof Error ? err.message : err}`,\n { cause: err }\n );\n }\n }\n\n throw new Error(\n `Failed to forward prompt to the model: Max retries reached`,\n { cause: new Error(\"Max retries reached\") }\n );\n }\n}\n","import {\n AbstractLLMProvider,\n ChatResponse,\n LLMProviderForwardArgs,\n} from \"./abstract/llm\";\nimport { RateLimiter } from \"@/utils\";\nimport { OpenAIProvider } from \"./openai\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport Decimal from \"decimal.js\";\nimport axios from \"axios\";\n\nconst baseURL = \"https://openrouter.ai/api/v1\";\nconst MODELS_CACHE_TTL = 1000 * 60 * 60 * 24; // 24 hours\n\nexport class OpenRouterProvider extends AbstractLLMProvider {\n override readonly kind = `${PEERBENCH_NAMESPACE}/llm/openrouter.ai` as const;\n\n private models: ModelsResponse | undefined = undefined;\n private modelsCachePromise: Promise<ModelsResponse | undefined> =\n Promise.resolve(undefined);\n private modelsUpdatedAt = 0;\n private openAIProvider: OpenAIProvider;\n\n constructor(config: {\n apiKey: string;\n maxRetries?: number;\n timeout?: number;\n rateLimiter?: RateLimiter;\n }) {\n super();\n this.openAIProvider = new OpenAIProvider({\n baseURL,\n apiKey: config.apiKey,\n maxRetries: config.maxRetries,\n timeout: config.timeout,\n rateLimiter: config.rateLimiter,\n });\n }\n\n override async forward(args: LLMProviderForwardArgs): Promise<ChatResponse> {\n // Update models cache concurrently (non-blocking)\n const [response] = await Promise.all([\n this.openAIProvider.forward(args),\n this.updateModelsCache().catch(() => {\n // Silently fail if cache update fails so we won't have cost info in the result\n }),\n ]);\n\n // Get the model info from the cache\n const modelInfo = this.models?.data.find((m) => m.id === args.model);\n let inputCost: string | undefined = undefined;\n let outputCost: string | undefined = undefined;\n\n if (modelInfo !== undefined) {\n // Use Decimal.js for more accurate calculation\n if (response.inputTokensUsed !== undefined) {\n inputCost = new Decimal(modelInfo.pricing.prompt)\n .mul(response.inputTokensUsed)\n .toFixed(10);\n }\n if (response.outputTokensUsed !== undefined) {\n outputCost = new Decimal(modelInfo.pricing.completion)\n .mul(response.outputTokensUsed)\n .toFixed(10);\n }\n }\n\n return {\n ...response,\n inputCost,\n outputCost,\n };\n }\n\n /**\n * Updates the cache that holds information about OpenRouter models\n * including pricing information. It will be valid for 24 hours as\n * long as the instance of this Provider object is alive.\n */\n private async updateModelsCache() {\n // Chain each update method call to the promise.\n // This approach prevents race conditions between multiple calls.\n // Since each call is chained to the end of the previous one,\n // each promise makes a request only if the models cache is not updated\n // in the last call. Otherwise it simply resolves to the cached value.\n this.modelsCachePromise = this.modelsCachePromise\n .then(async () => {\n if (\n // The data presented in the cache\n this.models !== undefined &&\n // The cache is still valid\n Date.now() - this.modelsUpdatedAt < MODELS_CACHE_TTL\n ) {\n return this.models;\n }\n\n // If the cache is not valid, update it\n return axios\n .get<ModelsResponse>(`${baseURL}/models`)\n .then((res) => res.data)\n .then((data) => {\n // Only get the models that supports text input and output\n data = {\n data: data.data.filter(\n (m) =>\n m.architecture.input_modalities.includes(\"text\") &&\n m.architecture.output_modalities.includes(\"text\") &&\n // These models are \"fast apply model\" and don't support multi turn conversations so don't include them\n ![\n \"morph/morph-v3-large\",\n \"morph/morph-v3-fast\",\n \"relace/relace-apply-3\",\n ].includes(m.id)\n ),\n };\n\n this.models = data;\n this.modelsUpdatedAt = Date.now();\n\n return data;\n });\n })\n .catch(() => undefined);\n\n // Wait for the promise chain to resolve\n await this.modelsCachePromise;\n }\n}\n\ntype PutModality = \"text\" | \"image\" | \"file\" | \"audio\";\ntype Modality = \"text->text\" | \"text+image->text\" | \"text+image->text+image\";\ntype ModelsResponse = {\n data: {\n readonly id: string;\n readonly canonical_slug: string;\n readonly hugging_face_id: null | string;\n readonly name: string;\n readonly created: number;\n readonly description: string;\n readonly context_length: number;\n readonly architecture: {\n readonly modality: Modality;\n readonly input_modalities: PutModality[];\n readonly output_modalities: PutModality[];\n readonly instruct_type: null | string;\n };\n readonly pricing: {\n readonly prompt: string;\n readonly completion: string;\n readonly request?: string;\n readonly image?: string;\n readonly web_search?: string;\n readonly internal_reasoning?: string;\n readonly input_cache_read?: string;\n readonly input_cache_write?: string;\n readonly audio?: string;\n };\n }[];\n};\n"],"mappings":";;;;;;;;AAAO,IAAe,mBAAf,MAAgC;AAEvC;;;ACMO,IAAe,sBAAf,cAA2C,iBAAiB;AAEnE;;;ACLA,SAAS,oBAA2C;AAE7C,IAAM,iBAAN,cAA6B,oBAAoB;AAAA,EACpC,OAAO;AAAA,EAER;AAAA,EACA;AAAA,EACT;AAAA,EACA;AAAA,EAER,YAAY,QAIT;AACD,UAAM;AACN,SAAK,WAAW,OAAO;AACvB,SAAK,YAAY,OAAO;AACxB,SAAK,SAAS,IAAI,aAAa;AAAA,MAC7B,SAAS,KAAK;AAAA,MACd,SAAS,KAAK,YACV;AAAA,QACA,eAAe,UAAU,KAAK,SAAS;AAAA,MACzC,IACE;AAAA,IACN,CAAC;AACD,SAAK,SAAS,OAAO;AAAA,EACvB;AAAA,EAEA,MAAe,QACb,MAQuB;AACvB,UAAM,cAAc,KAAK,SACtB,OAAO,CAAC,MAAM,EAAE,SAAS,UAAU,EAAE,SAAS,WAAW,EACzD,IAAI,CAAC,OAAO;AAAA,MACX,MAAM,EAAE;AAAA,MACR,SAAS,OAAQ,EAAU,WAAW,EAAE;AAAA,IAC1C,EAAE;AAEJ,UAAM,QAAQ,KAAK,OAAO,SAAS,KAAK,KAAK;AAE7C,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,WAAW,MAAM,MAAM;AAAA,MAC3B;AAAA,QACE,UAAU;AAAA,QACV,gBAAgB;AAAA,UACd,YAAY,KAAK;AAAA,QACnB;AAAA,MACF;AAAA,MACA,EAAE,QAAQ,KAAK,UAAU,KAAK,OAAO;AAAA,IACvC;AAEA,WAAO;AAAA,MACL,MAAM,SAAS;AAAA,MACf;AAAA,MACA,aAAa,KAAK,IAAI;AAAA,IACxB;AAAA,EACF;AAAA,EAEA,MAAM,aAAa,MAGhB;AACD,WAAO,MAAM,KAAK,OACf,SAAS,KAAK,OAAO,EACrB,QAAQ,KAAK,cAAc;AAAA,EAChC;AAAA,EAEA,MAAM,UAAU,MAG8B;AAC5C,WAAO,KAAK,OAAO,UAAU,MAAM,gBAAgB,MAAM,OAAO;AAAA,EAClE;AACF;;;AC/EA,OAAO,UAAU,gBAAgB;AAI1B,IAAM,iBAAN,cAA6B,oBAAoB;AAAA,EACpC,OAAO,GAAG,mBAAmB;AAAA,EAEvC;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,QAMT;AACD,UAAM;AACN,SAAK,aAAa,OAAO,cAAc;AACvC,SAAK,cACH,OAAO,eACP,IAAI,YAAY;AAAA,MACd,WAAW;AAAA,MACX,YAAY;AAAA,IACd,CAAC;AAEH,SAAK,SAAS,IAAI,OAAO;AAAA,MACvB,SAAS,OAAO;AAAA,MAChB,QAAQ,OAAO;AAAA,MACf,SAAS,OAAO;AAAA,MAChB,yBAAyB;AAAA,IAC3B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ,MASY;AACxB,QAAI,aAAa,KAAK;AACtB,WAAO,aAAa,GAAG;AACrB,UAAI,YAAkB,oBAAI,KAAK;AAE/B,UAAI;AACF,cAAM,WAAW,MAAM,KAAK,YAAY;AAAA,UACtC,YAAY;AAEV,wBAAY,oBAAI,KAAK;AACrB,mBAAO,MAAM,KAAK,OAAO,KAAK,YAAY;AAAA,cACxC;AAAA,gBACE,OAAO,KAAK;AAAA,gBACZ,UAAU,KAAK;AAAA,gBACf,aAAa,KAAK;AAAA,gBAClB,iBAAiB,KAAK;AAAA,cACxB;AAAA;AAAA,cAEA,EAAE,QAAQ,KAAK,YAAY;AAAA,YAC7B;AAAA,UACF;AAAA;AAAA,UAEA,EAAE,QAAQ,KAAK,YAAY;AAAA,QAC7B;AAEA,YAAI,WAAW,UAAU;AACvB,gBAAM,MAAM,SAAS;AACrB,gBAAM,IAAI;AAAA,YACR,GAAG,IAAI,OAAO,WAAW,IAAI,IAAI,MAAM,KAAK,UAAU,GAAG,CAAC;AAAA,UAC5D;AAAA,QACF;AAEA,YAAI,CAAC,UAAU,UAAU,CAAC,GAAG,SAAS,SAAS;AAC7C,gBAAM,IAAI,MAAM,oCAAoC;AAAA,QACtD;AAEA,eAAO;AAAA,UACL,MAAM,SAAS,QAAQ,CAAC,EAAE,QAAQ;AAAA,UAElC,iBAAiB,UAAU,OAAO;AAAA,UAClC,kBAAkB,UAAU,OAAO;AAAA,UAEnC,WAAW,UAAU,QAAQ;AAAA,UAC7B,aAAa,KAAK,IAAI;AAAA,QACxB;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,eAAe,YAAY,IAAI,WAAW,KAAK;AACjD,gBAAM,IAAI,MAAM,gCAAgC,EAAE,OAAO,IAAI,CAAC;AAAA,QAChE;AAEA;AAOA,YAAI,eAAe,aAAa;AAC9B,kBAAQ,MAAM,GAAG;AACjB;AAAA,QACF;AAGA,YAAI,eAAe,GAAG;AACpB;AAAA,QACF;AAEA,cAAM,IAAI;AAAA,UACR,0CAA0C,eAAe,QAAQ,IAAI,UAAU,GAAG;AAAA,UAClF,EAAE,OAAO,IAAI;AAAA,QACf;AAAA,MACF;AAAA,IACF;AAEA,UAAM,IAAI;AAAA,MACR;AAAA,MACA,EAAE,OAAO,IAAI,MAAM,qBAAqB,EAAE;AAAA,IAC5C;AAAA,EACF;AACF;;;AC1HA,OAAO,aAAa;AACpB,OAAO,WAAW;AAElB,IAAM,UAAU;AAChB,IAAM,mBAAmB,MAAO,KAAK,KAAK;AAEnC,IAAM,qBAAN,cAAiC,oBAAoB;AAAA,EACxC,OAAO,GAAG,mBAAmB;AAAA,EAEvC,SAAqC;AAAA,EACrC,qBACN,QAAQ,QAAQ,MAAS;AAAA,EACnB,kBAAkB;AAAA,EAClB;AAAA,EAER,YAAY,QAKT;AACD,UAAM;AACN,SAAK,iBAAiB,IAAI,eAAe;AAAA,MACvC;AAAA,MACA,QAAQ,OAAO;AAAA,MACf,YAAY,OAAO;AAAA,MACnB,SAAS,OAAO;AAAA,MAChB,aAAa,OAAO;AAAA,IACtB,CAAC;AAAA,EACH;AAAA,EAEA,MAAe,QAAQ,MAAqD;AAE1E,UAAM,CAAC,QAAQ,IAAI,MAAM,QAAQ,IAAI;AAAA,MACnC,KAAK,eAAe,QAAQ,IAAI;AAAA,MAChC,KAAK,kBAAkB,EAAE,MAAM,MAAM;AAAA,MAErC,CAAC;AAAA,IACH,CAAC;AAGD,UAAM,YAAY,KAAK,QAAQ,KAAK,KAAK,CAAC,MAAM,EAAE,OAAO,KAAK,KAAK;AACnE,QAAI,YAAgC;AACpC,QAAI,aAAiC;AAErC,QAAI,cAAc,QAAW;AAE3B,UAAI,SAAS,oBAAoB,QAAW;AAC1C,oBAAY,IAAI,QAAQ,UAAU,QAAQ,MAAM,EAC7C,IAAI,SAAS,eAAe,EAC5B,QAAQ,EAAE;AAAA,MACf;AACA,UAAI,SAAS,qBAAqB,QAAW;AAC3C,qBAAa,IAAI,QAAQ,UAAU,QAAQ,UAAU,EAClD,IAAI,SAAS,gBAAgB,EAC7B,QAAQ,EAAE;AAAA,MACf;AAAA,IACF;AAEA,WAAO;AAAA,MACL,GAAG;AAAA,MACH;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAc,oBAAoB;AAMhC,SAAK,qBAAqB,KAAK,mBAC5B,KAAK,YAAY;AAChB;AAAA;AAAA,QAEE,KAAK,WAAW;AAAA,QAEhB,KAAK,IAAI,IAAI,KAAK,kBAAkB;AAAA,QACpC;AACA,eAAO,KAAK;AAAA,MACd;AAGA,aAAO,MACJ,IAAoB,GAAG,OAAO,SAAS,EACvC,KAAK,CAAC,QAAQ,IAAI,IAAI,EACtB,KAAK,CAAC,SAAS;AAEd,eAAO;AAAA,UACL,MAAM,KAAK,KAAK;AAAA,YACd,CAAC,MACC,EAAE,aAAa,iBAAiB,SAAS,MAAM,KAC/C,EAAE,aAAa,kBAAkB,SAAS,MAAM;AAAA,YAEhD,CAAC;AAAA,cACC;AAAA,cACA;AAAA,cACA;AAAA,YACF,EAAE,SAAS,EAAE,EAAE;AAAA,UACnB;AAAA,QACF;AAEA,aAAK,SAAS;AACd,aAAK,kBAAkB,KAAK,IAAI;AAEhC,eAAO;AAAA,MACT,CAAC;AAAA,IACL,CAAC,EACA,MAAM,MAAM,MAAS;AAGxB,UAAM,KAAK;AAAA,EACb;AACF;","names":[]}
@@ -5,7 +5,7 @@ import {
5
5
  // src/helpers/define-runner.ts
6
6
  import z from "zod";
7
7
  function defineRunner(config, fn) {
8
- return async (params) => {
8
+ const func = async (params) => {
9
9
  if (config.runConfigSchema && config.parseRunConfig !== false) {
10
10
  z.object(config.runConfigSchema).parse(params.runConfig);
11
11
  }
@@ -20,9 +20,18 @@ function defineRunner(config, fn) {
20
20
  }
21
21
  return await fn(params);
22
22
  };
23
+ return Object.assign(func, {
24
+ /**
25
+ * The configuration that was used to define the runner.
26
+ */
27
+ config: {
28
+ ...config,
29
+ runConfigSchema: z.object(config.runConfigSchema)
30
+ }
31
+ });
23
32
  }
24
33
 
25
34
  export {
26
35
  defineRunner
27
36
  };
28
- //# sourceMappingURL=chunk-QY5MPNNB.js.map
37
+ //# sourceMappingURL=chunk-RTEAK4II.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/helpers/define-runner.ts"],"sourcesContent":["import {\n IdGenerator,\n InferRunConfig,\n ProviderCtor,\n Runner,\n ScorerCtor,\n} from \"@/types\";\nimport { idGeneratorUUIDv7 } from \"@/utils\";\nimport z from \"zod\";\n\nexport function defineRunner<\n const TProviders extends ProviderCtor[],\n const TScorers extends ScorerCtor[],\n const TSchemaSets extends SchemaSetDefinition[],\n const TRunConfigSchema extends z.ZodRawShape = {},\n>(\n config: {\n schemaSets: TSchemaSets;\n providers: TProviders;\n scorers: TScorers;\n runConfigSchema?: TRunConfigSchema;\n\n /**\n * @default true\n */\n parseRunConfig?: boolean;\n defaults?: {\n scorer?: InstanceType<TScorers[number]>;\n responseIdGenerator?: IdGenerator;\n scoreIdGenerator?: IdGenerator;\n };\n },\n fn: Runner<\n TSchemaSets[number][\"testCase\"],\n TSchemaSets[number][\"response\"],\n TSchemaSets[number][\"score\"],\n InstanceType<TProviders[number]>,\n InstanceType<TScorers[number]>,\n InferRunConfig<TRunConfigSchema>\n >\n) {\n const func = async (params: Parameters<typeof fn>[0]) => {\n if (config.runConfigSchema && config.parseRunConfig !== false) {\n z.object(config.runConfigSchema).parse(params.runConfig);\n }\n\n if (params.idGenerators && !params.idGenerators.response) {\n params.idGenerators.response =\n config.defaults?.responseIdGenerator ?? idGeneratorUUIDv7;\n }\n\n if (params.idGenerators && !params.idGenerators.score) {\n params.idGenerators.score =\n config.defaults?.scoreIdGenerator ?? idGeneratorUUIDv7;\n }\n\n if (params.scorer === undefined) {\n params.scorer = config.defaults?.scorer ?? undefined;\n }\n\n return await fn(params);\n };\n\n return Object.assign(func, {\n /**\n * The configuration that was used to define the runner.\n */\n config: {\n ...config,\n runConfigSchema: z.object(config.runConfigSchema),\n },\n });\n}\n\ntype SchemaSetDefinition<\n TTestCase extends z.ZodObject = z.ZodObject,\n TResponse extends z.ZodObject = z.ZodObject,\n TScore extends z.ZodObject = z.ZodObject,\n> = {\n testCase: TTestCase;\n response: TResponse;\n score: TScore;\n};\n"],"mappings":";;;;;AAQA,OAAO,OAAO;AAEP,SAAS,aAMd,QAgBA,IAQA;AACA,QAAM,OAAO,OAAO,WAAqC;AACvD,QAAI,OAAO,mBAAmB,OAAO,mBAAmB,OAAO;AAC7D,QAAE,OAAO,OAAO,eAAe,EAAE,MAAM,OAAO,SAAS;AAAA,IACzD;AAEA,QAAI,OAAO,gBAAgB,CAAC,OAAO,aAAa,UAAU;AACxD,aAAO,aAAa,WAClB,OAAO,UAAU,uBAAuB;AAAA,IAC5C;AAEA,QAAI,OAAO,gBAAgB,CAAC,OAAO,aAAa,OAAO;AACrD,aAAO,aAAa,QAClB,OAAO,UAAU,oBAAoB;AAAA,IACzC;AAEA,QAAI,OAAO,WAAW,QAAW;AAC/B,aAAO,SAAS,OAAO,UAAU,UAAU;AAAA,IAC7C;AAEA,WAAO,MAAM,GAAG,MAAM;AAAA,EACxB;AAEA,SAAO,OAAO,OAAO,MAAM;AAAA;AAAA;AAAA;AAAA,IAIzB,QAAQ;AAAA,MACN,GAAG;AAAA,MACH,iBAAiB,EAAE,OAAO,OAAO,eAAe;AAAA,IAClD;AAAA,EACF,CAAC;AACH;","names":[]}
package/dist/index.d.ts CHANGED
@@ -69,10 +69,29 @@ declare function defineRunner<const TProviders extends ProviderCtor[], const TSc
69
69
  responseIdGenerator?: IdGenerator;
70
70
  scoreIdGenerator?: IdGenerator;
71
71
  };
72
- }, fn: Runner<TSchemaSets[number]["testCase"], TSchemaSets[number]["response"], TSchemaSets[number]["score"], InstanceType<TProviders[number]>, InstanceType<TScorers[number]>, InferRunConfig<TRunConfigSchema>>): (params: Parameters<typeof fn>[0]) => Promise<{
72
+ }, fn: Runner<TSchemaSets[number]["testCase"], TSchemaSets[number]["response"], TSchemaSets[number]["score"], InstanceType<TProviders[number]>, InstanceType<TScorers[number]>, InferRunConfig<TRunConfigSchema>>): ((params: Parameters<typeof fn>[0]) => Promise<{
73
73
  response: z__default.core.output<TSchemaSets[number]["response"]>;
74
74
  score?: z__default.core.output<TSchemaSets[number]["score"]> | undefined;
75
- }>;
75
+ }>) & {
76
+ /**
77
+ * The configuration that was used to define the runner.
78
+ */
79
+ config: {
80
+ runConfigSchema: z__default.ZodObject<{ -readonly [P in keyof TRunConfigSchema]: TRunConfigSchema[P]; }, z__default.core.$strip>;
81
+ schemaSets: TSchemaSets;
82
+ providers: TProviders;
83
+ scorers: TScorers;
84
+ /**
85
+ * @default true
86
+ */
87
+ parseRunConfig?: boolean;
88
+ defaults?: {
89
+ scorer?: InstanceType<TScorers[number]>;
90
+ responseIdGenerator?: IdGenerator;
91
+ scoreIdGenerator?: IdGenerator;
92
+ };
93
+ };
94
+ };
76
95
  type SchemaSetDefinition<TTestCase extends z__default.ZodObject = z__default.ZodObject, TResponse extends z__default.ZodObject = z__default.ZodObject, TScore extends z__default.ZodObject = z__default.ZodObject> = {
77
96
  testCase: TTestCase;
78
97
  response: TResponse;
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  defineRunner
3
- } from "./chunk-QY5MPNNB.js";
3
+ } from "./chunk-RTEAK4II.js";
4
4
  import {
5
5
  ScoringMethod
6
6
  } from "./chunk-HMQYGCKI.js";
@@ -4,7 +4,7 @@ import {
4
4
  MastraProvider,
5
5
  OpenAIProvider,
6
6
  OpenRouterProvider
7
- } from "../chunk-HPPCDSJ3.js";
7
+ } from "../chunk-3JF7SHLC.js";
8
8
  import "../chunk-UHHHSYVE.js";
9
9
  import "../chunk-4UBK6452.js";
10
10
  import "../chunk-PZ5AY32C.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "peerbench",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "description": "Tooling help work with AI benchmarking data and create trustworthy AI",
5
5
  "keywords": [
6
6
  "ai",
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/providers/abstract/provider.ts","../src/providers/abstract/llm.ts","../src/providers/mastra.ts","../src/providers/openai.ts","../src/providers/openrouter.ts"],"sourcesContent":["export abstract class AbstractProvider {\n abstract readonly kind: string;\n}\n\nexport type ProviderResponse<TData = unknown> = {\n startedAt: number;\n completedAt: number;\n data: TData;\n};\n","import { AbstractProvider, ProviderResponse } from \"./provider\";\nimport {\n ResponseFormatJSONObject,\n ResponseFormatJSONSchema,\n ResponseFormatText,\n} from \"openai/resources/shared\";\nimport { ChatCompletionMessageParam } from \"openai/resources/chat/completions\";\n\nexport abstract class AbstractLLMProvider extends AbstractProvider {\n abstract forward(args: LLMProviderForwardArgs): Promise<ChatResponse>;\n}\n\nexport type LLMProviderForwardArgs = {\n messages: ChatCompletionMessageParam[];\n model: string;\n abortSignal?: AbortSignal;\n temperature?: number;\n responseFormat?:\n | ResponseFormatText\n | ResponseFormatJSONSchema\n | ResponseFormatJSONObject;\n};\n\nexport type ChatResponse = ProviderResponse<string> & {\n inputTokensUsed?: number;\n outputTokensUsed?: number;\n inputCost?: string;\n outputCost?: string;\n\n metadata?: Record<string, unknown>;\n};\n","import {\n AbstractLLMProvider,\n type ChatResponse,\n type LLMProviderForwardArgs,\n} from \"./abstract/llm\";\nimport { MastraClient, type GetAgentResponse } from \"@mastra/client-js\";\n\nexport class MastraProvider extends AbstractLLMProvider {\n override readonly kind = \"mastra\";\n\n private readonly endpoint: string;\n private readonly authToken?: string;\n private client: MastraClient;\n private memory?: AgentMemoryOption;\n\n constructor(params: {\n endpoint: string;\n authToken?: string;\n memory?: AgentMemoryOption;\n }) {\n super();\n this.endpoint = params.endpoint;\n this.authToken = params.authToken;\n this.client = new MastraClient({\n baseUrl: this.endpoint,\n headers: this.authToken\n ? {\n Authorization: `Bearer ${this.authToken}`,\n }\n : undefined,\n });\n this.memory = params.memory;\n }\n\n override async forward(\n args: LLMProviderForwardArgs & {\n memory?: AgentMemoryOption;\n\n /**\n * The model that will be used as the brain for the agent.\n */\n modelName?: string\n }\n ): Promise<ChatResponse> {\n const apiMessages = args.messages\n .filter((m) => m.role === \"user\" || m.role === \"assistant\")\n .map((m) => ({\n role: m.role as \"user\" | \"assistant\",\n content: String((m as any).content ?? \"\"),\n }));\n\n const agent = this.client.getAgent(args.model);\n\n const startedAt = Date.now();\n const response = await agent.generate(\n {\n messages: apiMessages,\n runtimeContext: {\n \"model-id\": args.model,\n },\n },\n { memory: args.memory ?? this.memory }\n );\n\n return {\n data: response.text,\n startedAt,\n completedAt: Date.now(),\n };\n }\n\n async getAgentInfo(args: {\n agentId: string;\n runtimeContext?: MastraRuntimeContext;\n }) {\n return await this.client\n .getAgent(args.agentId)\n .details(args.runtimeContext);\n }\n\n async getAgents(args?: {\n runtimeContext?: MastraRuntimeContext;\n partial?: boolean;\n }): Promise<Record<string, GetAgentResponse>> {\n return this.client.getAgents(args?.runtimeContext, args?.partial);\n }\n}\n\n// NOTE: Mastra client does not export these types\nexport type AgentMemoryOption = Parameters<\n Parameters<MastraClient[\"getAgent\"]>[\"0\"] extends string\n ? ReturnType<MastraClient[\"getAgent\"]>[\"generate\"]\n : never\n>[0] extends { memory?: infer M }\n ? M\n : never;\n\ntype MastraRuntimeContext = Parameters<\n Parameters<MastraClient[\"getAgent\"]>[\"0\"] extends string\n ? ReturnType<MastraClient[\"getAgent\"]>[\"generate\"]\n : never\n>[0] extends { runtimeContext?: infer R }\n ? R\n : never;\n","import { RateLimiter } from \"@/utils\";\nimport { ChatCompletionMessageParam } from \"openai/resources/chat/completions\";\nimport {\n ResponseFormatJSONObject,\n ResponseFormatJSONSchema,\n ResponseFormatText,\n} from \"openai/resources/shared\";\nimport OpenAI, { APIError } from \"openai\";\nimport { AbstractLLMProvider, ChatResponse } from \"./abstract/llm\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\n\nexport class OpenAIProvider extends AbstractLLMProvider {\n override readonly kind = `${PEERBENCH_NAMESPACE}/llm/openai` as const;\n\n private client: OpenAI;\n private rateLimiter: RateLimiter;\n private maxRetries: number;\n\n constructor(config: {\n apiKey: string;\n baseURL: string;\n maxRetries?: number;\n timeout?: number;\n rateLimiter?: RateLimiter;\n }) {\n super();\n this.maxRetries = config.maxRetries ?? 3;\n this.rateLimiter =\n config.rateLimiter ??\n new RateLimiter({\n maxWeight: 20,\n timeWindow: 3_000,\n });\n\n this.client = new OpenAI({\n baseURL: config.baseURL,\n apiKey: config.apiKey,\n timeout: config.timeout,\n dangerouslyAllowBrowser: true,\n });\n }\n\n async forward(args: {\n messages: ChatCompletionMessageParam[];\n model: string;\n abortSignal?: AbortSignal;\n temperature?: number;\n responseFormat?:\n | ResponseFormatText\n | ResponseFormatJSONSchema\n | ResponseFormatJSONObject;\n }): Promise<ChatResponse> {\n let retryCount = this.maxRetries;\n while (retryCount > 0) {\n let startedAt: Date = new Date();\n\n try {\n const response = await this.rateLimiter.execute(\n async () => {\n // Capture the start time of the request\n startedAt = new Date();\n return await this.client.chat.completions.create(\n {\n model: args.model,\n messages: args.messages,\n temperature: args.temperature,\n response_format: args.responseFormat,\n },\n // Signal for request\n { signal: args.abortSignal }\n );\n },\n // Signal for rate limiting\n { signal: args.abortSignal }\n );\n\n if (\"error\" in response) {\n const err = response.error as any;\n throw new Error(\n `${err.message} - Code ${err.code} - ${JSON.stringify(err)}`\n );\n }\n\n if (!response?.choices?.[0]?.message?.content) {\n throw new Error(\"No content returned from the model\");\n }\n\n return {\n data: response.choices[0].message.content,\n\n inputTokensUsed: response?.usage?.prompt_tokens,\n outputTokensUsed: response?.usage?.completion_tokens,\n\n startedAt: startedAt.getTime(),\n completedAt: Date.now(),\n };\n } catch (err) {\n if (err instanceof APIError && err.status === 401) {\n throw new Error(`Invalid credentials provided`, { cause: err });\n }\n\n retryCount--;\n\n // More likely an empty HTTP response returned by the Provider\n // and it couldn't be parsed as JSON by the OpenAI SDK. We need to retry the request\n // More info can be found in the following links:\n // https://www.reddit.com/r/SillyTavernAI/comments/1ik95vr/deepseek_r1_on_openrouter_returning_blank_messages/\n // https://github.com/cline/cline/issues/60\n if (err instanceof SyntaxError) {\n console.debug(err);\n continue;\n }\n\n // If it was another error, just continue until we run out of retries\n if (retryCount !== 0) {\n continue;\n }\n\n throw new Error(\n `Failed to forward prompt to the model: ${err instanceof Error ? err.message : err}`,\n { cause: err }\n );\n }\n }\n\n throw new Error(\n `Failed to forward prompt to the model: Max retries reached`,\n { cause: new Error(\"Max retries reached\") }\n );\n }\n}\n","import {\n AbstractLLMProvider,\n ChatResponse,\n LLMProviderForwardArgs,\n} from \"./abstract/llm\";\nimport { RateLimiter } from \"@/utils\";\nimport { OpenAIProvider } from \"./openai\";\nimport { PEERBENCH_NAMESPACE } from \"@/constants\";\nimport Decimal from \"decimal.js\";\nimport axios from \"axios\";\n\nconst baseURL = \"https://openrouter.ai/api/v1\";\nconst MODELS_CACHE_TTL = 1000 * 60 * 60 * 24; // 24 hours\n\nexport class OpenRouterProvider extends AbstractLLMProvider {\n override readonly kind = `${PEERBENCH_NAMESPACE}/llm/openrouter.ai` as const;\n\n private models: ModelsResponse | undefined = undefined;\n private modelsCachePromise: Promise<ModelsResponse | undefined> =\n Promise.resolve(undefined);\n private modelsUpdatedAt = 0;\n private openAIProvider: OpenAIProvider;\n\n constructor(config: {\n apiKey: string;\n maxRetries?: number;\n timeout?: number;\n rateLimiter?: RateLimiter;\n }) {\n super();\n this.openAIProvider = new OpenAIProvider({\n baseURL,\n apiKey: config.apiKey,\n maxRetries: config.maxRetries,\n timeout: config.timeout,\n rateLimiter: config.rateLimiter,\n });\n }\n\n override async forward(args: LLMProviderForwardArgs): Promise<ChatResponse> {\n // Update models cache concurrently (non-blocking)\n const [response] = await Promise.all([\n this.openAIProvider.forward(args),\n this.updateModelsCache().catch(() => {\n // Silently fail if cache update fails so we won't have cost info in the result\n }),\n ]);\n\n // Get the model info from the cache\n const modelInfo = this.models?.data.find((m) => m.id === args.model);\n let inputCost: string | undefined = undefined;\n let outputCost: string | undefined = undefined;\n\n if (modelInfo !== undefined) {\n // Use Decimal.js for more accurate calculation\n if (response.inputTokensUsed !== undefined) {\n inputCost = new Decimal(modelInfo.pricing.prompt)\n .mul(response.inputTokensUsed)\n .toFixed(10);\n }\n if (response.outputTokensUsed !== undefined) {\n outputCost = new Decimal(modelInfo.pricing.completion)\n .mul(response.outputTokensUsed)\n .toFixed(10);\n }\n }\n\n return {\n ...response,\n inputCost,\n outputCost,\n };\n }\n\n /**\n * Updates the cache that holds information about OpenRouter models\n * including pricing information. It will be valid for 24 hours as\n * long as the instance of this Provider object is alive.\n */\n private async updateModelsCache() {\n // Chain each update method call to the promise.\n // This approach prevents race conditions between multiple calls.\n // Since each call is chained to the end of the previous one,\n // each promise makes a request only if the models cache is not updated\n // in the last call. Otherwise it simply resolves to the cached value.\n this.modelsCachePromise = this.modelsCachePromise\n .then(async () => {\n if (\n // The data presented in the cache\n this.models !== undefined &&\n // The cache is still valid\n Date.now() - this.modelsUpdatedAt < MODELS_CACHE_TTL\n ) {\n return this.models;\n }\n\n // If the cache is not valid, update it\n return axios\n .get<ModelsResponse>(`${baseURL}/models`)\n .then((res) => res.data)\n .then((data) => {\n // Only get the models that supports text input and output\n data = {\n data: data.data.filter(\n (m) =>\n m.architecture.input_modalities.includes(\"text\") &&\n m.architecture.output_modalities.includes(\"text\") &&\n // These models are \"fast apply model\" and don't support multi turn conversations so don't include them\n ![\n \"morph/morph-v3-large\",\n \"morph/morph-v3-fast\",\n \"relace/relace-apply-3\",\n ].includes(m.id)\n ),\n };\n\n this.models = data;\n this.modelsUpdatedAt = Date.now();\n\n return data;\n });\n })\n .catch(() => undefined);\n\n // Wait for the promise chain to resolve\n await this.modelsCachePromise;\n }\n}\n\ntype PutModality = \"text\" | \"image\" | \"file\" | \"audio\";\ntype Modality = \"text->text\" | \"text+image->text\" | \"text+image->text+image\";\ntype ModelsResponse = {\n data: {\n readonly id: string;\n readonly canonical_slug: string;\n readonly hugging_face_id: null | string;\n readonly name: string;\n readonly created: number;\n readonly description: string;\n readonly context_length: number;\n readonly architecture: {\n readonly modality: Modality;\n readonly input_modalities: PutModality[];\n readonly output_modalities: PutModality[];\n readonly instruct_type: null | string;\n };\n readonly pricing: {\n readonly prompt: string;\n readonly completion: string;\n readonly request?: string;\n readonly image?: string;\n readonly web_search?: string;\n readonly internal_reasoning?: string;\n readonly input_cache_read?: string;\n readonly input_cache_write?: string;\n readonly audio?: string;\n };\n }[];\n};\n"],"mappings":";;;;;;;;AAAO,IAAe,mBAAf,MAAgC;AAEvC;;;ACMO,IAAe,sBAAf,cAA2C,iBAAiB;AAEnE;;;ACLA,SAAS,oBAA2C;AAE7C,IAAM,iBAAN,cAA6B,oBAAoB;AAAA,EACpC,OAAO;AAAA,EAER;AAAA,EACA;AAAA,EACT;AAAA,EACA;AAAA,EAER,YAAY,QAIT;AACD,UAAM;AACN,SAAK,WAAW,OAAO;AACvB,SAAK,YAAY,OAAO;AACxB,SAAK,SAAS,IAAI,aAAa;AAAA,MAC7B,SAAS,KAAK;AAAA,MACd,SAAS,KAAK,YACV;AAAA,QACA,eAAe,UAAU,KAAK,SAAS;AAAA,MACzC,IACE;AAAA,IACN,CAAC;AACD,SAAK,SAAS,OAAO;AAAA,EACvB;AAAA,EAEA,MAAe,QACb,MAQuB;AACvB,UAAM,cAAc,KAAK,SACtB,OAAO,CAAC,MAAM,EAAE,SAAS,UAAU,EAAE,SAAS,WAAW,EACzD,IAAI,CAAC,OAAO;AAAA,MACX,MAAM,EAAE;AAAA,MACR,SAAS,OAAQ,EAAU,WAAW,EAAE;AAAA,IAC1C,EAAE;AAEJ,UAAM,QAAQ,KAAK,OAAO,SAAS,KAAK,KAAK;AAE7C,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,WAAW,MAAM,MAAM;AAAA,MAC3B;AAAA,QACE,UAAU;AAAA,QACV,gBAAgB;AAAA,UACd,YAAY,KAAK;AAAA,QACnB;AAAA,MACF;AAAA,MACA,EAAE,QAAQ,KAAK,UAAU,KAAK,OAAO;AAAA,IACvC;AAEA,WAAO;AAAA,MACL,MAAM,SAAS;AAAA,MACf;AAAA,MACA,aAAa,KAAK,IAAI;AAAA,IACxB;AAAA,EACF;AAAA,EAEA,MAAM,aAAa,MAGhB;AACD,WAAO,MAAM,KAAK,OACf,SAAS,KAAK,OAAO,EACrB,QAAQ,KAAK,cAAc;AAAA,EAChC;AAAA,EAEA,MAAM,UAAU,MAG8B;AAC5C,WAAO,KAAK,OAAO,UAAU,MAAM,gBAAgB,MAAM,OAAO;AAAA,EAClE;AACF;;;AC/EA,OAAO,UAAU,gBAAgB;AAI1B,IAAM,iBAAN,cAA6B,oBAAoB;AAAA,EACpC,OAAO,GAAG,mBAAmB;AAAA,EAEvC;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,QAMT;AACD,UAAM;AACN,SAAK,aAAa,OAAO,cAAc;AACvC,SAAK,cACH,OAAO,eACP,IAAI,YAAY;AAAA,MACd,WAAW;AAAA,MACX,YAAY;AAAA,IACd,CAAC;AAEH,SAAK,SAAS,IAAI,OAAO;AAAA,MACvB,SAAS,OAAO;AAAA,MAChB,QAAQ,OAAO;AAAA,MACf,SAAS,OAAO;AAAA,MAChB,yBAAyB;AAAA,IAC3B,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ,MASY;AACxB,QAAI,aAAa,KAAK;AACtB,WAAO,aAAa,GAAG;AACrB,UAAI,YAAkB,oBAAI,KAAK;AAE/B,UAAI;AACF,cAAM,WAAW,MAAM,KAAK,YAAY;AAAA,UACtC,YAAY;AAEV,wBAAY,oBAAI,KAAK;AACrB,mBAAO,MAAM,KAAK,OAAO,KAAK,YAAY;AAAA,cACxC;AAAA,gBACE,OAAO,KAAK;AAAA,gBACZ,UAAU,KAAK;AAAA,gBACf,aAAa,KAAK;AAAA,gBAClB,iBAAiB,KAAK;AAAA,cACxB;AAAA;AAAA,cAEA,EAAE,QAAQ,KAAK,YAAY;AAAA,YAC7B;AAAA,UACF;AAAA;AAAA,UAEA,EAAE,QAAQ,KAAK,YAAY;AAAA,QAC7B;AAEA,YAAI,WAAW,UAAU;AACvB,gBAAM,MAAM,SAAS;AACrB,gBAAM,IAAI;AAAA,YACR,GAAG,IAAI,OAAO,WAAW,IAAI,IAAI,MAAM,KAAK,UAAU,GAAG,CAAC;AAAA,UAC5D;AAAA,QACF;AAEA,YAAI,CAAC,UAAU,UAAU,CAAC,GAAG,SAAS,SAAS;AAC7C,gBAAM,IAAI,MAAM,oCAAoC;AAAA,QACtD;AAEA,eAAO;AAAA,UACL,MAAM,SAAS,QAAQ,CAAC,EAAE,QAAQ;AAAA,UAElC,iBAAiB,UAAU,OAAO;AAAA,UAClC,kBAAkB,UAAU,OAAO;AAAA,UAEnC,WAAW,UAAU,QAAQ;AAAA,UAC7B,aAAa,KAAK,IAAI;AAAA,QACxB;AAAA,MACF,SAAS,KAAK;AACZ,YAAI,eAAe,YAAY,IAAI,WAAW,KAAK;AACjD,gBAAM,IAAI,MAAM,gCAAgC,EAAE,OAAO,IAAI,CAAC;AAAA,QAChE;AAEA;AAOA,YAAI,eAAe,aAAa;AAC9B,kBAAQ,MAAM,GAAG;AACjB;AAAA,QACF;AAGA,YAAI,eAAe,GAAG;AACpB;AAAA,QACF;AAEA,cAAM,IAAI;AAAA,UACR,0CAA0C,eAAe,QAAQ,IAAI,UAAU,GAAG;AAAA,UAClF,EAAE,OAAO,IAAI;AAAA,QACf;AAAA,MACF;AAAA,IACF;AAEA,UAAM,IAAI;AAAA,MACR;AAAA,MACA,EAAE,OAAO,IAAI,MAAM,qBAAqB,EAAE;AAAA,IAC5C;AAAA,EACF;AACF;;;AC1HA,OAAO,aAAa;AACpB,OAAO,WAAW;AAElB,IAAM,UAAU;AAChB,IAAM,mBAAmB,MAAO,KAAK,KAAK;AAEnC,IAAM,qBAAN,cAAiC,oBAAoB;AAAA,EACxC,OAAO,GAAG,mBAAmB;AAAA,EAEvC,SAAqC;AAAA,EACrC,qBACN,QAAQ,QAAQ,MAAS;AAAA,EACnB,kBAAkB;AAAA,EAClB;AAAA,EAER,YAAY,QAKT;AACD,UAAM;AACN,SAAK,iBAAiB,IAAI,eAAe;AAAA,MACvC;AAAA,MACA,QAAQ,OAAO;AAAA,MACf,YAAY,OAAO;AAAA,MACnB,SAAS,OAAO;AAAA,MAChB,aAAa,OAAO;AAAA,IACtB,CAAC;AAAA,EACH;AAAA,EAEA,MAAe,QAAQ,MAAqD;AAE1E,UAAM,CAAC,QAAQ,IAAI,MAAM,QAAQ,IAAI;AAAA,MACnC,KAAK,eAAe,QAAQ,IAAI;AAAA,MAChC,KAAK,kBAAkB,EAAE,MAAM,MAAM;AAAA,MAErC,CAAC;AAAA,IACH,CAAC;AAGD,UAAM,YAAY,KAAK,QAAQ,KAAK,KAAK,CAAC,MAAM,EAAE,OAAO,KAAK,KAAK;AACnE,QAAI,YAAgC;AACpC,QAAI,aAAiC;AAErC,QAAI,cAAc,QAAW;AAE3B,UAAI,SAAS,oBAAoB,QAAW;AAC1C,oBAAY,IAAI,QAAQ,UAAU,QAAQ,MAAM,EAC7C,IAAI,SAAS,eAAe,EAC5B,QAAQ,EAAE;AAAA,MACf;AACA,UAAI,SAAS,qBAAqB,QAAW;AAC3C,qBAAa,IAAI,QAAQ,UAAU,QAAQ,UAAU,EAClD,IAAI,SAAS,gBAAgB,EAC7B,QAAQ,EAAE;AAAA,MACf;AAAA,IACF;AAEA,WAAO;AAAA,MACL,GAAG;AAAA,MACH;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAc,oBAAoB;AAMhC,SAAK,qBAAqB,KAAK,mBAC5B,KAAK,YAAY;AAChB;AAAA;AAAA,QAEE,KAAK,WAAW;AAAA,QAEhB,KAAK,IAAI,IAAI,KAAK,kBAAkB;AAAA,QACpC;AACA,eAAO,KAAK;AAAA,MACd;AAGA,aAAO,MACJ,IAAoB,GAAG,OAAO,SAAS,EACvC,KAAK,CAAC,QAAQ,IAAI,IAAI,EACtB,KAAK,CAAC,SAAS;AAEd,eAAO;AAAA,UACL,MAAM,KAAK,KAAK;AAAA,YACd,CAAC,MACC,EAAE,aAAa,iBAAiB,SAAS,MAAM,KAC/C,EAAE,aAAa,kBAAkB,SAAS,MAAM;AAAA,YAEhD,CAAC;AAAA,cACC;AAAA,cACA;AAAA,cACA;AAAA,YACF,EAAE,SAAS,EAAE,EAAE;AAAA,UACnB;AAAA,QACF;AAEA,aAAK,SAAS;AACd,aAAK,kBAAkB,KAAK,IAAI;AAEhC,eAAO;AAAA,MACT,CAAC;AAAA,IACL,CAAC,EACA,MAAM,MAAM,MAAS;AAGxB,UAAM,KAAK;AAAA,EACb;AACF;","names":[]}
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/helpers/define-runner.ts"],"sourcesContent":["import {\n IdGenerator,\n InferRunConfig,\n ProviderCtor,\n Runner,\n ScorerCtor,\n} from \"@/types\";\nimport { idGeneratorUUIDv7 } from \"@/utils\";\nimport z from \"zod\";\n\nexport function defineRunner<\n const TProviders extends ProviderCtor[],\n const TScorers extends ScorerCtor[],\n const TSchemaSets extends SchemaSetDefinition[],\n const TRunConfigSchema extends z.ZodRawShape = {},\n>(\n config: {\n schemaSets: TSchemaSets;\n providers: TProviders;\n scorers: TScorers;\n runConfigSchema?: TRunConfigSchema;\n\n /**\n * @default true\n */\n parseRunConfig?: boolean;\n defaults?: {\n scorer?: InstanceType<TScorers[number]>;\n responseIdGenerator?: IdGenerator;\n scoreIdGenerator?: IdGenerator;\n };\n },\n fn: Runner<\n TSchemaSets[number][\"testCase\"],\n TSchemaSets[number][\"response\"],\n TSchemaSets[number][\"score\"],\n InstanceType<TProviders[number]>,\n InstanceType<TScorers[number]>,\n InferRunConfig<TRunConfigSchema>\n >\n) {\n return async (params: Parameters<typeof fn>[0]) => {\n if (config.runConfigSchema && config.parseRunConfig !== false) {\n z.object(config.runConfigSchema).parse(params.runConfig);\n }\n\n if (params.idGenerators && !params.idGenerators.response) {\n params.idGenerators.response =\n config.defaults?.responseIdGenerator ?? idGeneratorUUIDv7;\n }\n\n if (params.idGenerators && !params.idGenerators.score) {\n params.idGenerators.score =\n config.defaults?.scoreIdGenerator ?? idGeneratorUUIDv7;\n }\n\n if (params.scorer === undefined) {\n params.scorer = config.defaults?.scorer ?? undefined;\n }\n\n return await fn(params);\n };\n}\n\ntype SchemaSetDefinition<\n TTestCase extends z.ZodObject = z.ZodObject,\n TResponse extends z.ZodObject = z.ZodObject,\n TScore extends z.ZodObject = z.ZodObject,\n> = {\n testCase: TTestCase;\n response: TResponse;\n score: TScore;\n};\n"],"mappings":";;;;;AAQA,OAAO,OAAO;AAEP,SAAS,aAMd,QAgBA,IAQA;AACA,SAAO,OAAO,WAAqC;AACjD,QAAI,OAAO,mBAAmB,OAAO,mBAAmB,OAAO;AAC7D,QAAE,OAAO,OAAO,eAAe,EAAE,MAAM,OAAO,SAAS;AAAA,IACzD;AAEA,QAAI,OAAO,gBAAgB,CAAC,OAAO,aAAa,UAAU;AACxD,aAAO,aAAa,WAClB,OAAO,UAAU,uBAAuB;AAAA,IAC5C;AAEA,QAAI,OAAO,gBAAgB,CAAC,OAAO,aAAa,OAAO;AACrD,aAAO,aAAa,QAClB,OAAO,UAAU,oBAAoB;AAAA,IACzC;AAEA,QAAI,OAAO,WAAW,QAAW;AAC/B,aAAO,SAAS,OAAO,UAAU,UAAU;AAAA,IAC7C;AAEA,WAAO,MAAM,GAAG,MAAM;AAAA,EACxB;AACF;","names":[]}