peerbench 0.0.1 → 0.0.2-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +308 -2
- package/dist/abstract-Dec9Sc5O.d.ts +12 -0
- package/dist/benchmarks/index.d.ts +1698 -0
- package/dist/benchmarks/index.js +915 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/catalogs/index.d.ts +75 -0
- package/dist/catalogs/index.js +88 -0
- package/dist/catalogs/index.js.map +1 -0
- package/dist/chunk-22HU24QF.js +8 -0
- package/dist/chunk-22HU24QF.js.map +1 -0
- package/dist/chunk-232PY7K3.js +50 -0
- package/dist/chunk-232PY7K3.js.map +1 -0
- package/dist/chunk-7TREBPSJ.js +26 -0
- package/dist/chunk-7TREBPSJ.js.map +1 -0
- package/dist/chunk-DUBKY73H.js +128 -0
- package/dist/chunk-DUBKY73H.js.map +1 -0
- package/dist/chunk-GVF4YZF3.js +15 -0
- package/dist/chunk-GVF4YZF3.js.map +1 -0
- package/dist/chunk-HJH3SW3L.js +103 -0
- package/dist/chunk-HJH3SW3L.js.map +1 -0
- package/dist/chunk-IUN2IUCS.js +58 -0
- package/dist/chunk-IUN2IUCS.js.map +1 -0
- package/dist/chunk-PZ5AY32C.js +10 -0
- package/dist/chunk-PZ5AY32C.js.map +1 -0
- package/dist/chunk-VBOM2YEG.js +47 -0
- package/dist/chunk-VBOM2YEG.js.map +1 -0
- package/dist/chunk-ZJWSK4VO.js +11 -0
- package/dist/chunk-ZJWSK4VO.js.map +1 -0
- package/dist/data-BmN5WjZ4.d.ts +57 -0
- package/dist/generic-array-DLHWSvf1.d.ts +22 -0
- package/dist/index-WiPjF2AL.d.ts +15 -0
- package/dist/index.d.ts +38 -3845
- package/dist/index.js +40 -3557
- package/dist/index.js.map +1 -1
- package/dist/llm-DNj_tp2T.d.ts +22 -0
- package/dist/llm-judge-DIG1f1Az.d.ts +67 -0
- package/dist/provider-BDjGp2y-.d.ts +10 -0
- package/dist/providers/index.d.ts +72 -0
- package/dist/providers/index.js +263 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/rate-limiter-CSmVIRsM.d.ts +60 -0
- package/dist/schemas/extensions/index.d.ts +14 -0
- package/dist/schemas/extensions/index.js +13 -0
- package/dist/schemas/extensions/index.js.map +1 -0
- package/dist/schemas/index.d.ts +233 -0
- package/dist/schemas/index.js +27 -0
- package/dist/schemas/index.js.map +1 -0
- package/dist/schemas/llm/index.d.ts +98 -0
- package/dist/schemas/llm/index.js +37 -0
- package/dist/schemas/llm/index.js.map +1 -0
- package/dist/scorers/index.d.ts +63 -0
- package/dist/scorers/index.js +494 -0
- package/dist/scorers/index.js.map +1 -0
- package/dist/simple-system-prompt-CzPYuvo0.d.ts +49 -0
- package/dist/system-prompt--0FdPWqK.d.ts +58 -0
- package/dist/utilities-BrRH32rD.d.ts +30 -0
- package/package.json +39 -21
- package/LICENSE +0 -21
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import {
|
|
2
|
+
bufferToString
|
|
3
|
+
} from "./chunk-DUBKY73H.js";
|
|
4
|
+
|
|
5
|
+
// src/loaders/abstract/loader.ts
|
|
6
|
+
var AbstractLoader = class {
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
// src/loaders/abstract/data.ts
|
|
10
|
+
var AbstractDataLoader = class extends AbstractLoader {
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
// src/utils/json.ts
|
|
14
|
+
import { stringify } from "safe-stable-stringify";
|
|
15
|
+
function parseJSONL(str, options) {
|
|
16
|
+
return str.split("\n").filter((line) => line.trim() !== "").map((line) => {
|
|
17
|
+
const obj = tryParseJson(line);
|
|
18
|
+
if (options?.errorOnInvalid && !obj) {
|
|
19
|
+
throw new Error(`Invalid JSON line: ${line}`);
|
|
20
|
+
}
|
|
21
|
+
return obj;
|
|
22
|
+
}).filter((obj) => obj !== void 0);
|
|
23
|
+
}
|
|
24
|
+
function tryParseJson(content) {
|
|
25
|
+
try {
|
|
26
|
+
return JSON.parse(content);
|
|
27
|
+
} catch {
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function stableStringify(value) {
|
|
31
|
+
return stringify(value);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// src/loaders/generic-array.ts
|
|
35
|
+
var GenericJSONArrayDataLoader = class extends AbstractDataLoader {
|
|
36
|
+
async responseBuilder(data, context) {
|
|
37
|
+
return void 0;
|
|
38
|
+
}
|
|
39
|
+
async scoreBuilder(data, context) {
|
|
40
|
+
return void 0;
|
|
41
|
+
}
|
|
42
|
+
async loadData(params) {
|
|
43
|
+
const contentStr = bufferToString(params.content);
|
|
44
|
+
let data = tryParseJson(contentStr);
|
|
45
|
+
if (!data) {
|
|
46
|
+
data = parseJSONL(contentStr);
|
|
47
|
+
}
|
|
48
|
+
if (!data || !Array.isArray(data) || data.length === 0) {
|
|
49
|
+
throw new Error(
|
|
50
|
+
"Invalid data: content must be a non-empty JSON or JSONL array"
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
return this.transformArrayToResult(data);
|
|
54
|
+
}
|
|
55
|
+
async transformArrayToResult(data) {
|
|
56
|
+
const includedTestCaseIds = /* @__PURE__ */ new Set();
|
|
57
|
+
const includedResponseIds = /* @__PURE__ */ new Set();
|
|
58
|
+
const includedScoreIds = /* @__PURE__ */ new Set();
|
|
59
|
+
const result = {
|
|
60
|
+
testCases: [],
|
|
61
|
+
responses: [],
|
|
62
|
+
scores: []
|
|
63
|
+
};
|
|
64
|
+
for (const item of data) {
|
|
65
|
+
const testCase = await this.testCaseBuilder(item, { result });
|
|
66
|
+
const response = await this.responseBuilder(item, { result });
|
|
67
|
+
const score = await this.scoreBuilder(item, { result });
|
|
68
|
+
if (testCase) {
|
|
69
|
+
const testCaseIdStr = String(testCase.id);
|
|
70
|
+
if (!includedTestCaseIds.has(testCaseIdStr)) {
|
|
71
|
+
includedTestCaseIds.add(testCaseIdStr);
|
|
72
|
+
result.testCases.push(testCase);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (response) {
|
|
76
|
+
const responseIdStr = String(response.id);
|
|
77
|
+
if (!includedResponseIds.has(responseIdStr)) {
|
|
78
|
+
includedResponseIds.add(responseIdStr);
|
|
79
|
+
result.responses.push(response);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (score) {
|
|
83
|
+
const scoreIdStr = String(score.id);
|
|
84
|
+
if (!includedScoreIds.has(scoreIdStr)) {
|
|
85
|
+
includedScoreIds.add(scoreIdStr);
|
|
86
|
+
result.scores.push(score);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
if (!testCase && !response && !score) {
|
|
90
|
+
throw new Error("Incompatible object format");
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
export {
|
|
98
|
+
AbstractLoader,
|
|
99
|
+
AbstractDataLoader,
|
|
100
|
+
stableStringify,
|
|
101
|
+
GenericJSONArrayDataLoader
|
|
102
|
+
};
|
|
103
|
+
//# sourceMappingURL=chunk-HJH3SW3L.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/loaders/abstract/loader.ts","../src/loaders/abstract/data.ts","../src/utils/json.ts","../src/loaders/generic-array.ts"],"sourcesContent":["import { BaseResponseV1, BaseScoreV1, BaseTestCaseV1 } from \"@/schemas\";\nimport { BaseBenchmarkSpecV1 } from \"@/schemas/benchmark-spec\";\nimport { MaybePromise } from \"@/types\";\n\nexport type LoaderResult<\n TTestCase extends BaseTestCaseV1 = BaseTestCaseV1,\n TResponse extends BaseResponseV1 = BaseResponseV1,\n TScore extends BaseScoreV1 = BaseScoreV1,\n TBenchmarkSpec extends BaseBenchmarkSpecV1 = BaseBenchmarkSpecV1,\n> = {\n testCases: TTestCase[];\n responses: TResponse[];\n scores: TScore[];\n\n benchmarkSpec?: TBenchmarkSpec;\n};\n\nexport abstract class AbstractLoader {\n abstract readonly kind: string;\n\n abstract loadData(params: unknown): MaybePromise<LoaderResult>;\n abstract loadBenchmarkSpec(\n params: unknown\n ): MaybePromise<BaseBenchmarkSpecV1>;\n}\n","import { MaybePromise } from \"@/types\";\nimport { AbstractLoader, LoaderResult } from \"./loader\";\nimport { BaseBenchmarkSpecV1 } from \"@/schemas/benchmark-spec\";\n\nexport abstract class AbstractDataLoader extends AbstractLoader {\n abstract loadData(params: {\n content: Uint8Array;\n }): MaybePromise<LoaderResult>;\n\n abstract loadBenchmarkSpec(params: {\n content: Uint8Array;\n }): MaybePromise<BaseBenchmarkSpecV1>;\n}\n","import { stringify } from \"safe-stable-stringify\";\n\n/**\n * Parses JSONL formatted string into an array\n * @returns An array of parsed JSON lines\n */\nexport function parseJSONL<T>(\n str: string,\n options?: { errorOnInvalid?: boolean }\n): T[] {\n return str\n .split(\"\\n\") // Split per line\n .filter((line) => line.trim() !== \"\") // Filter empty lines\n .map((line) => {\n const obj = tryParseJson(line);\n if (options?.errorOnInvalid && !obj) {\n throw new Error(`Invalid JSON line: ${line}`);\n }\n return obj;\n }) // Parse line (parse as undefined if it is invalid)\n .filter((obj) => obj !== undefined); // Filter invalid lines\n}\n\n/**\n * Tries to parse the given string as JSON.\n * Returns `undefined` if it is not a valid JSON entity.\n */\nexport function tryParseJson<T = any>(content: string): T | undefined {\n try {\n return JSON.parse(content);\n } catch {\n // Invalid JSON\n }\n}\n\n/**\n * Stringifies the given JSON value using `safe-stable-stringify` in a stable manner.\n * This stable method generates the same string output for the same input value (including objects).\n */\nexport function stableStringify(value: any) {\n return stringify(value);\n}\n","import { BaseTestCaseV1 } from \"@/schemas/test-case\";\nimport { BaseResponseV1 } from \"@/schemas/response\";\nimport { BaseScoreV1 } from \"@/schemas/score\";\nimport { MaybePromise } from \"@/types\";\nimport { AbstractDataLoader } from \"@/loaders/abstract/data\";\nimport { LoaderResult } from \"@/loaders/abstract/loader\";\nimport { tryParseJson, parseJSONL } from \"@/utils/json\";\nimport { bufferToString } from \"@/utils/string\";\n\nexport type GenericJSONArrayLoaderResult<\n TTestCase extends BaseTestCaseV1 = BaseTestCaseV1,\n TResponse extends BaseResponseV1 = BaseResponseV1,\n TScore extends BaseScoreV1 = BaseScoreV1,\n> = LoaderResult<TTestCase, TResponse, TScore>;\n\nexport abstract class GenericJSONArrayDataLoader<\n TTestCase extends BaseTestCaseV1 = BaseTestCaseV1,\n TResponse extends BaseResponseV1 = BaseResponseV1,\n TScore extends BaseScoreV1 = BaseScoreV1,\n> extends AbstractDataLoader {\n protected abstract testCaseBuilder(\n data: any,\n context: {\n result: GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>;\n }\n ): MaybePromise<TTestCase | undefined>;\n\n protected async responseBuilder(\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n data: any,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n context: {\n result: GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>;\n }\n ): Promise<TResponse | undefined> {\n return undefined;\n }\n\n protected async scoreBuilder(\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n data: any,\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n context: {\n result: GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>;\n }\n ): Promise<TScore | undefined> {\n return undefined;\n }\n\n async loadData(params: {\n content: Uint8Array;\n }): Promise<GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>> {\n const contentStr = bufferToString(params.content);\n let data: unknown[] | undefined = tryParseJson<unknown[]>(contentStr);\n\n if (!data) {\n data = parseJSONL<unknown>(contentStr);\n }\n\n if (!data || !Array.isArray(data) || data.length === 0) {\n throw new Error(\n \"Invalid data: content must be a non-empty JSON or JSONL array\"\n );\n }\n\n return this.transformArrayToResult(data);\n }\n\n private async transformArrayToResult(\n data: unknown[]\n ): Promise<GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>> {\n const includedTestCaseIds: Set<string> = new Set();\n const includedResponseIds: Set<string> = new Set();\n const includedScoreIds: Set<string> = new Set();\n const result: GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore> = {\n testCases: [],\n responses: [],\n scores: [],\n };\n\n for (const item of data) {\n const testCase = await this.testCaseBuilder(item, { result });\n const response = await this.responseBuilder(item, { result });\n const score = await this.scoreBuilder(item, { result });\n\n if (testCase) {\n const testCaseIdStr = String(testCase.id);\n if (!includedTestCaseIds.has(testCaseIdStr)) {\n includedTestCaseIds.add(testCaseIdStr);\n result.testCases.push(testCase);\n }\n }\n\n if (response) {\n const responseIdStr = String(response.id);\n if (!includedResponseIds.has(responseIdStr)) {\n includedResponseIds.add(responseIdStr);\n result.responses.push(response);\n }\n }\n\n if (score) {\n const scoreIdStr = String(score.id);\n if (!includedScoreIds.has(scoreIdStr)) {\n includedScoreIds.add(scoreIdStr);\n result.scores.push(score);\n }\n }\n\n if (!testCase && !response && !score) {\n throw new Error(\"Incompatible object format\");\n }\n }\n\n return result;\n }\n}\n"],"mappings":";;;;;AAiBO,IAAe,iBAAf,MAA8B;AAOrC;;;ACpBO,IAAe,qBAAf,cAA0C,eAAe;AAQhE;;;ACZA,SAAS,iBAAiB;AAMnB,SAAS,WACd,KACA,SACK;AACL,SAAO,IACJ,MAAM,IAAI,EACV,OAAO,CAAC,SAAS,KAAK,KAAK,MAAM,EAAE,EACnC,IAAI,CAAC,SAAS;AACb,UAAM,MAAM,aAAa,IAAI;AAC7B,QAAI,SAAS,kBAAkB,CAAC,KAAK;AACnC,YAAM,IAAI,MAAM,sBAAsB,IAAI,EAAE;AAAA,IAC9C;AACA,WAAO;AAAA,EACT,CAAC,EACA,OAAO,CAAC,QAAQ,QAAQ,MAAS;AACtC;AAMO,SAAS,aAAsB,SAAgC;AACpE,MAAI;AACF,WAAO,KAAK,MAAM,OAAO;AAAA,EAC3B,QAAQ;AAAA,EAER;AACF;AAMO,SAAS,gBAAgB,OAAY;AAC1C,SAAO,UAAU,KAAK;AACxB;;;AC1BO,IAAe,6BAAf,cAIG,mBAAmB;AAAA,EAQ3B,MAAgB,gBAEd,MAEA,SAGgC;AAChC,WAAO;AAAA,EACT;AAAA,EAEA,MAAgB,aAEd,MAEA,SAG6B;AAC7B,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,SAAS,QAEyD;AACtE,UAAM,aAAa,eAAe,OAAO,OAAO;AAChD,QAAI,OAA8B,aAAwB,UAAU;AAEpE,QAAI,CAAC,MAAM;AACT,aAAO,WAAoB,UAAU;AAAA,IACvC;AAEA,QAAI,CAAC,QAAQ,CAAC,MAAM,QAAQ,IAAI,KAAK,KAAK,WAAW,GAAG;AACtD,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,WAAO,KAAK,uBAAuB,IAAI;AAAA,EACzC;AAAA,EAEA,MAAc,uBACZ,MACqE;AACrE,UAAM,sBAAmC,oBAAI,IAAI;AACjD,UAAM,sBAAmC,oBAAI,IAAI;AACjD,UAAM,mBAAgC,oBAAI,IAAI;AAC9C,UAAM,SAAqE;AAAA,MACzE,WAAW,CAAC;AAAA,MACZ,WAAW,CAAC;AAAA,MACZ,QAAQ,CAAC;AAAA,IACX;AAEA,eAAW,QAAQ,MAAM;AACvB,YAAM,WAAW,MAAM,KAAK,gBAAgB,MAAM,EAAE,OAAO,CAAC;AAC5D,YAAM,WAAW,MAAM,KAAK,gBAAgB,MAAM,EAAE,OAAO,CAAC;AAC5D,YAAM,QAAQ,MAAM,KAAK,aAAa,MAAM,EAAE,OAAO,CAAC;AAEtD,UAAI,UAAU;AACZ,cAAM,gBAAgB,OAAO,SAAS,EAAE;AACxC,YAAI,CAAC,oBAAoB,IAAI,aAAa,GAAG;AAC3C,8BAAoB,IAAI,aAAa;AACrC,iBAAO,UAAU,KAAK,QAAQ;AAAA,QAChC;AAAA,MACF;AAEA,UAAI,UAAU;AACZ,cAAM,gBAAgB,OAAO,SAAS,EAAE;AACxC,YAAI,CAAC,oBAAoB,IAAI,aAAa,GAAG;AAC3C,8BAAoB,IAAI,aAAa;AACrC,iBAAO,UAAU,KAAK,QAAQ;AAAA,QAChC;AAAA,MACF;AAEA,UAAI,OAAO;AACT,cAAM,aAAa,OAAO,MAAM,EAAE;AAClC,YAAI,CAAC,iBAAiB,IAAI,UAAU,GAAG;AACrC,2BAAiB,IAAI,UAAU;AAC/B,iBAAO,OAAO,KAAK,KAAK;AAAA,QAC1B;AAAA,MACF;AAEA,UAAI,CAAC,YAAY,CAAC,YAAY,CAAC,OAAO;AACpC,cAAM,IAAI,MAAM,4BAA4B;AAAA,MAC9C;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AACF;","names":[]}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// src/schemas/id.ts
|
|
2
|
+
import z from "zod";
|
|
3
|
+
var IdSchema = z.string();
|
|
4
|
+
|
|
5
|
+
// src/schemas/schema-definer.ts
|
|
6
|
+
import z2 from "zod";
|
|
7
|
+
function buildSchemaDefiner() {
|
|
8
|
+
return function(config) {
|
|
9
|
+
const schema = config.baseSchema.omit({ kind: true, schemaVersion: true }).extend({
|
|
10
|
+
...config.fields ?? {},
|
|
11
|
+
kind: config.kind !== void 0 ? z2.literal(config.kind) : config.baseSchema.shape.kind,
|
|
12
|
+
schemaVersion: config.schemaVersion !== void 0 ? z2.literal(config.schemaVersion) : config.baseSchema.shape.schemaVersion
|
|
13
|
+
});
|
|
14
|
+
return Object.assign(schema, {
|
|
15
|
+
new(input) {
|
|
16
|
+
return schema.parse({
|
|
17
|
+
...input,
|
|
18
|
+
kind: config.kind,
|
|
19
|
+
schemaVersion: config.schemaVersion
|
|
20
|
+
});
|
|
21
|
+
},
|
|
22
|
+
async newWithId(input, generator) {
|
|
23
|
+
const obj = schema.parse({
|
|
24
|
+
...input,
|
|
25
|
+
id: "",
|
|
26
|
+
kind: config.kind,
|
|
27
|
+
schemaVersion: config.schemaVersion
|
|
28
|
+
});
|
|
29
|
+
const id = await generator(obj);
|
|
30
|
+
return {
|
|
31
|
+
...obj,
|
|
32
|
+
id
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// src/schemas/response.ts
|
|
40
|
+
import z3 from "zod";
|
|
41
|
+
var BaseResponseSchemaV1 = z3.object({
|
|
42
|
+
id: IdSchema,
|
|
43
|
+
kind: z3.string(),
|
|
44
|
+
schemaVersion: z3.number(),
|
|
45
|
+
startedAt: z3.number(),
|
|
46
|
+
completedAt: z3.number(),
|
|
47
|
+
testCaseId: IdSchema,
|
|
48
|
+
metadata: z3.record(z3.string(), z3.unknown()).optional()
|
|
49
|
+
});
|
|
50
|
+
var defineResponseSchema = buildSchemaDefiner();
|
|
51
|
+
|
|
52
|
+
export {
|
|
53
|
+
buildSchemaDefiner,
|
|
54
|
+
IdSchema,
|
|
55
|
+
BaseResponseSchemaV1,
|
|
56
|
+
defineResponseSchema
|
|
57
|
+
};
|
|
58
|
+
//# sourceMappingURL=chunk-IUN2IUCS.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/schemas/id.ts","../src/schemas/schema-definer.ts","../src/schemas/response.ts"],"sourcesContent":["import z from \"zod\";\n\nexport const IdSchema = z.string();\n","import { IdGenerator } from \"@/types\";\nimport z from \"zod\";\n\nexport function buildSchemaDefiner<\n TBaseShape extends {\n kind: z.ZodType;\n schemaVersion: z.ZodType;\n [key: string]: z.ZodType;\n },\n>() {\n return function <\n TBaseSchema extends z.ZodObject<\n Omit<TBaseShape, \"kind\" | \"schemaVersion\"> & {\n kind: z.ZodString | z.ZodLiteral<string>;\n schemaVersion: z.ZodNumber | z.ZodLiteral<number>;\n }\n >,\n TKind extends string | undefined = undefined,\n TSchemaVersion extends number | undefined = undefined,\n TFields extends z.ZodRawShape = {},\n >(config: {\n baseSchema: TBaseSchema;\n kind?: TKind;\n schemaVersion?: TSchemaVersion;\n fields?: TFields;\n }) {\n const schema = config.baseSchema\n .omit({ kind: true, schemaVersion: true })\n .extend({\n ...((config.fields ?? {}) as TFields),\n kind: (config.kind !== undefined\n ? z.literal(config.kind)\n : config.baseSchema.shape.kind) as TKind extends undefined\n ? TBaseSchema[\"shape\"][\"kind\"]\n : z.ZodLiteral<TKind>,\n schemaVersion: (config.schemaVersion !== undefined\n ? z.literal(config.schemaVersion)\n : config.baseSchema.shape\n .schemaVersion) as TSchemaVersion extends undefined\n ? TBaseSchema[\"shape\"][\"schemaVersion\"]\n : z.ZodLiteral<TSchemaVersion>,\n });\n\n type SchemaType =\n TBaseSchema extends z.ZodObject<infer U>\n ? z.ZodObject<\n Omit<U, \"kind\" | \"schemaVersion\"> &\n TFields & {\n kind: TKind extends undefined ? U[\"kind\"] : z.ZodLiteral<TKind>;\n schemaVersion: TSchemaVersion extends undefined\n ? U[\"schemaVersion\"]\n : z.ZodLiteral<TSchemaVersion>;\n }\n >\n : never;\n\n return Object.assign(schema, {\n new(input: Omit<z.infer<typeof schema>, \"kind\" | \"schemaVersion\">) {\n return schema.parse({\n ...input,\n kind: config.kind,\n schemaVersion: config.schemaVersion,\n });\n },\n async newWithId(\n input: Omit<z.infer<typeof schema>, \"kind\" | \"schemaVersion\" | \"id\">,\n generator: IdGenerator\n ) {\n const obj = schema.parse({\n ...input,\n id: \"\",\n kind: config.kind,\n schemaVersion: config.schemaVersion,\n });\n const id = await generator(obj);\n\n return {\n ...obj,\n id,\n };\n },\n }) as unknown as SchemaType & {\n /**\n * Creates a new object with the given input. Uses the `kind` and\n * `schemaVersion` from the schema config. Although the `input` is already typed,\n * it still uses `.parse()` to validate it.\n * @param input\n * @returns The object that follows the schema\n */\n new: (\n input: Omit<z.infer<SchemaType>, \"kind\" | \"schemaVersion\">\n ) => z.infer<SchemaType>;\n\n newWithId(\n input: Omit<z.infer<SchemaType>, \"id\" | \"kind\" | \"schemaVersion\">,\n generator: IdGenerator\n ): Promise<z.infer<SchemaType>>;\n };\n };\n}\n","import { IdSchema } from \"./id\";\nimport { buildSchemaDefiner } from \"./schema-definer\";\nimport z from \"zod\";\n\nexport const BaseResponseSchemaV1 = z.object({\n id: IdSchema,\n kind: z.string(),\n schemaVersion: z.number(),\n\n startedAt: z.number(),\n completedAt: z.number(),\n testCaseId: IdSchema,\n metadata: z.record(z.string(), z.unknown()).optional(),\n});\nexport type BaseResponseV1 = z.infer<typeof BaseResponseSchemaV1>;\n\nexport const defineResponseSchema =\n buildSchemaDefiner<typeof BaseResponseSchemaV1.shape>();\n"],"mappings":";AAAA,OAAO,OAAO;AAEP,IAAM,WAAW,EAAE,OAAO;;;ACDjC,OAAOA,QAAO;AAEP,SAAS,qBAMZ;AACF,SAAO,SAUL,QAKC;AACD,UAAM,SAAS,OAAO,WACnB,KAAK,EAAE,MAAM,MAAM,eAAe,KAAK,CAAC,EACxC,OAAO;AAAA,MACN,GAAK,OAAO,UAAU,CAAC;AAAA,MACvB,MAAO,OAAO,SAAS,SACnBA,GAAE,QAAQ,OAAO,IAAI,IACrB,OAAO,WAAW,MAAM;AAAA,MAG5B,eAAgB,OAAO,kBAAkB,SACrCA,GAAE,QAAQ,OAAO,aAAa,IAC9B,OAAO,WAAW,MACf;AAAA,IAGT,CAAC;AAeH,WAAO,OAAO,OAAO,QAAQ;AAAA,MAC3B,IAAI,OAA+D;AACjE,eAAO,OAAO,MAAM;AAAA,UAClB,GAAG;AAAA,UACH,MAAM,OAAO;AAAA,UACb,eAAe,OAAO;AAAA,QACxB,CAAC;AAAA,MACH;AAAA,MACA,MAAM,UACJ,OACA,WACA;AACA,cAAM,MAAM,OAAO,MAAM;AAAA,UACvB,GAAG;AAAA,UACH,IAAI;AAAA,UACJ,MAAM,OAAO;AAAA,UACb,eAAe,OAAO;AAAA,QACxB,CAAC;AACD,cAAM,KAAK,MAAM,UAAU,GAAG;AAE9B,eAAO;AAAA,UACL,GAAG;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EAiBH;AACF;;;ACjGA,OAAOC,QAAO;AAEP,IAAM,uBAAuBA,GAAE,OAAO;AAAA,EAC3C,IAAI;AAAA,EACJ,MAAMA,GAAE,OAAO;AAAA,EACf,eAAeA,GAAE,OAAO;AAAA,EAExB,WAAWA,GAAE,OAAO;AAAA,EACpB,aAAaA,GAAE,OAAO;AAAA,EACtB,YAAY;AAAA,EACZ,UAAUA,GAAE,OAAOA,GAAE,OAAO,GAAGA,GAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC;AAGM,IAAM,uBACX,mBAAsD;","names":["z","z"]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
// src/errors/polyfill.ts
|
|
2
|
+
function captureStackTrace(error, constructor) {
|
|
3
|
+
if (typeof window === "undefined") {
|
|
4
|
+
Error.captureStackTrace(error, constructor);
|
|
5
|
+
} else {
|
|
6
|
+
error.stack = new Error(error.message).stack;
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// src/errors/peerbench.ts
|
|
11
|
+
var PeerbenchError = class extends Error {
|
|
12
|
+
code;
|
|
13
|
+
constructor(message, options) {
|
|
14
|
+
super(message, options);
|
|
15
|
+
this.code = options?.code ?? 0;
|
|
16
|
+
captureStackTrace(this, this.constructor);
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
// src/errors/catalog.ts
|
|
21
|
+
var CatalogItemNotFoundError = class extends PeerbenchError {
|
|
22
|
+
constructor(itemName) {
|
|
23
|
+
super(
|
|
24
|
+
typeof itemName === "string" ? `Item with name "${itemName}" not found in the catalog` : itemName.message,
|
|
25
|
+
{
|
|
26
|
+
code: 100
|
|
27
|
+
}
|
|
28
|
+
);
|
|
29
|
+
this.itemName = itemName;
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
var CatalogItemHasNoInstantiateMethodError = class extends PeerbenchError {
|
|
33
|
+
constructor(itemName) {
|
|
34
|
+
super(`Item with name "${itemName}" has no instantiate method`, {
|
|
35
|
+
code: 101
|
|
36
|
+
});
|
|
37
|
+
this.itemName = itemName;
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
export {
|
|
42
|
+
captureStackTrace,
|
|
43
|
+
PeerbenchError,
|
|
44
|
+
CatalogItemNotFoundError,
|
|
45
|
+
CatalogItemHasNoInstantiateMethodError
|
|
46
|
+
};
|
|
47
|
+
//# sourceMappingURL=chunk-VBOM2YEG.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/errors/polyfill.ts","../src/errors/peerbench.ts","../src/errors/catalog.ts"],"sourcesContent":["// This is an internal polyfill that is used by custom Error classes\n// to make them work in both Node.js and browser environments.\n\nexport function captureStackTrace(\n error: Error,\n // eslint-disable-next-line @typescript-eslint/no-unsafe-function-type\n constructor: Function\n) {\n if (typeof window === \"undefined\") {\n Error.captureStackTrace(error, constructor);\n } else {\n error.stack = new Error(error.message).stack;\n }\n}\n","import { captureStackTrace } from \"./polyfill\";\n\nexport class PeerbenchError extends Error {\n code: number;\n\n constructor(message?: string, options?: ErrorOptions & { code: number }) {\n super(message, options);\n this.code = options?.code ?? 0;\n captureStackTrace(this, this.constructor);\n }\n}\n","import { PeerbenchError } from \"./peerbench\";\n\nexport class CatalogItemNotFoundError extends PeerbenchError {\n constructor(public readonly itemName: string | { message: string }) {\n super(\n typeof itemName === \"string\"\n ? `Item with name \"${itemName}\" not found in the catalog`\n : itemName.message,\n {\n code: 100,\n }\n );\n }\n}\n\nexport class CatalogItemHasNoInstantiateMethodError extends PeerbenchError {\n constructor(public readonly itemName: string) {\n super(`Item with name \"${itemName}\" has no instantiate method`, {\n code: 101,\n });\n }\n}\n"],"mappings":";AAGO,SAAS,kBACd,OAEA,aACA;AACA,MAAI,OAAO,WAAW,aAAa;AACjC,UAAM,kBAAkB,OAAO,WAAW;AAAA,EAC5C,OAAO;AACL,UAAM,QAAQ,IAAI,MAAM,MAAM,OAAO,EAAE;AAAA,EACzC;AACF;;;ACXO,IAAM,iBAAN,cAA6B,MAAM;AAAA,EACxC;AAAA,EAEA,YAAY,SAAkB,SAA2C;AACvE,UAAM,SAAS,OAAO;AACtB,SAAK,OAAO,SAAS,QAAQ;AAC7B,sBAAkB,MAAM,KAAK,WAAW;AAAA,EAC1C;AACF;;;ACRO,IAAM,2BAAN,cAAuC,eAAe;AAAA,EAC3D,YAA4B,UAAwC;AAClE;AAAA,MACE,OAAO,aAAa,WAChB,mBAAmB,QAAQ,+BAC3B,SAAS;AAAA,MACb;AAAA,QACE,MAAM;AAAA,MACR;AAAA,IACF;AAR0B;AAAA,EAS5B;AACF;AAEO,IAAM,yCAAN,cAAqD,eAAe;AAAA,EACzE,YAA4B,UAAkB;AAC5C,UAAM,mBAAmB,QAAQ,+BAA+B;AAAA,MAC9D,MAAM;AAAA,IACR,CAAC;AAHyB;AAAA,EAI5B;AACF;","names":[]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/types/index.ts"],"sourcesContent":["export * from \"./runner\";\n\nimport { IdSchema } from \"@/schemas/id\";\nimport z from \"zod\";\n\nexport type Id = z.infer<typeof IdSchema>;\n\nexport type IdGenerator<TInput = unknown> = (input: TInput) => MaybePromise<Id>;\n\nexport type MaybePromise<T> = T | Promise<T>;\n\nexport const ScoringMethod = {\n ai: \"ai\",\n human: \"human\",\n algo: \"algo\",\n} as const;\nexport type ScoringMethod = (typeof ScoringMethod)[keyof typeof ScoringMethod];\n"],"mappings":";AAWO,IAAM,gBAAgB;AAAA,EAC3B,IAAI;AAAA,EACJ,OAAO;AAAA,EACP,MAAM;AACR;","names":[]}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { BaseTestCaseV1, BaseBenchmarkSpecV1, BaseResponseV1, BaseScoreV1 } from './schemas/index.js';
|
|
2
|
+
import { A as AbstractProvider } from './provider-BDjGp2y-.js';
|
|
3
|
+
import { I as IdGenerator, M as MaybePromise } from './index-WiPjF2AL.js';
|
|
4
|
+
import { A as AbstractScorer } from './abstract-Dec9Sc5O.js';
|
|
5
|
+
import { a as BaseSystemPromptV1 } from './system-prompt--0FdPWqK.js';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Minimum set of parameters that a runner needs to accept
|
|
9
|
+
*/
|
|
10
|
+
type RunnerParams = {
|
|
11
|
+
testCase: BaseTestCaseV1;
|
|
12
|
+
provider: AbstractProvider;
|
|
13
|
+
spec?: BaseBenchmarkSpecV1;
|
|
14
|
+
scorer?: AbstractScorer;
|
|
15
|
+
runConfig: {
|
|
16
|
+
model: string;
|
|
17
|
+
llmJudgeModel?: string;
|
|
18
|
+
};
|
|
19
|
+
idGenerators?: {
|
|
20
|
+
response?: IdGenerator;
|
|
21
|
+
score?: IdGenerator;
|
|
22
|
+
};
|
|
23
|
+
[key: string]: unknown;
|
|
24
|
+
};
|
|
25
|
+
type RunnerResult<TResponse extends BaseResponseV1 = BaseResponseV1, TScore extends BaseScoreV1 = BaseScoreV1> = {
|
|
26
|
+
response: TResponse;
|
|
27
|
+
score?: TScore;
|
|
28
|
+
[key: string]: unknown;
|
|
29
|
+
};
|
|
30
|
+
type Runner<TResponse extends BaseResponseV1 = BaseResponseV1, TScore extends BaseScoreV1 = BaseScoreV1, TParams extends RunnerParams = RunnerParams> = (params: TParams) => Promise<RunnerResult<TResponse, TScore>>;
|
|
31
|
+
type LLMChatRunnerParams = RunnerParams & {
|
|
32
|
+
systemPrompt?: BaseSystemPromptV1;
|
|
33
|
+
};
|
|
34
|
+
type LLMChatRunner<TResponse extends BaseResponseV1 = BaseResponseV1, TScore extends BaseScoreV1 = BaseScoreV1, TParams extends LLMChatRunnerParams = LLMChatRunnerParams> = (params: TParams) => Promise<RunnerResult<TResponse, TScore>>;
|
|
35
|
+
|
|
36
|
+
type LoaderResult<TTestCase extends BaseTestCaseV1 = BaseTestCaseV1, TResponse extends BaseResponseV1 = BaseResponseV1, TScore extends BaseScoreV1 = BaseScoreV1, TBenchmarkSpec extends BaseBenchmarkSpecV1 = BaseBenchmarkSpecV1> = {
|
|
37
|
+
testCases: TTestCase[];
|
|
38
|
+
responses: TResponse[];
|
|
39
|
+
scores: TScore[];
|
|
40
|
+
benchmarkSpec?: TBenchmarkSpec;
|
|
41
|
+
};
|
|
42
|
+
declare abstract class AbstractLoader {
|
|
43
|
+
abstract readonly kind: string;
|
|
44
|
+
abstract loadData(params: unknown): MaybePromise<LoaderResult>;
|
|
45
|
+
abstract loadBenchmarkSpec(params: unknown): MaybePromise<BaseBenchmarkSpecV1>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
declare abstract class AbstractDataLoader extends AbstractLoader {
|
|
49
|
+
abstract loadData(params: {
|
|
50
|
+
content: Uint8Array;
|
|
51
|
+
}): MaybePromise<LoaderResult>;
|
|
52
|
+
abstract loadBenchmarkSpec(params: {
|
|
53
|
+
content: Uint8Array;
|
|
54
|
+
}): MaybePromise<BaseBenchmarkSpecV1>;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export { AbstractDataLoader as A, type LoaderResult as L, type Runner as R, AbstractLoader as a, type RunnerParams as b, type RunnerResult as c, type LLMChatRunnerParams as d, type LLMChatRunner as e };
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { BaseTestCaseV1, BaseResponseV1, BaseScoreV1 } from './schemas/index.js';
|
|
2
|
+
import { M as MaybePromise } from './index-WiPjF2AL.js';
|
|
3
|
+
import { L as LoaderResult, A as AbstractDataLoader } from './data-BmN5WjZ4.js';
|
|
4
|
+
|
|
5
|
+
type GenericJSONArrayLoaderResult<TTestCase extends BaseTestCaseV1 = BaseTestCaseV1, TResponse extends BaseResponseV1 = BaseResponseV1, TScore extends BaseScoreV1 = BaseScoreV1> = LoaderResult<TTestCase, TResponse, TScore>;
|
|
6
|
+
declare abstract class GenericJSONArrayDataLoader<TTestCase extends BaseTestCaseV1 = BaseTestCaseV1, TResponse extends BaseResponseV1 = BaseResponseV1, TScore extends BaseScoreV1 = BaseScoreV1> extends AbstractDataLoader {
|
|
7
|
+
protected abstract testCaseBuilder(data: any, context: {
|
|
8
|
+
result: GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>;
|
|
9
|
+
}): MaybePromise<TTestCase | undefined>;
|
|
10
|
+
protected responseBuilder(data: any, context: {
|
|
11
|
+
result: GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>;
|
|
12
|
+
}): Promise<TResponse | undefined>;
|
|
13
|
+
protected scoreBuilder(data: any, context: {
|
|
14
|
+
result: GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>;
|
|
15
|
+
}): Promise<TScore | undefined>;
|
|
16
|
+
loadData(params: {
|
|
17
|
+
content: Uint8Array;
|
|
18
|
+
}): Promise<GenericJSONArrayLoaderResult<TTestCase, TResponse, TScore>>;
|
|
19
|
+
private transformArrayToResult;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export { type GenericJSONArrayLoaderResult as G, GenericJSONArrayDataLoader as a };
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import z__default from 'zod';
|
|
2
|
+
|
|
3
|
+
declare const IdSchema: z__default.ZodString;
|
|
4
|
+
|
|
5
|
+
type Id = z__default.infer<typeof IdSchema>;
|
|
6
|
+
type IdGenerator<TInput = unknown> = (input: TInput) => MaybePromise<Id>;
|
|
7
|
+
type MaybePromise<T> = T | Promise<T>;
|
|
8
|
+
declare const ScoringMethod: {
|
|
9
|
+
readonly ai: "ai";
|
|
10
|
+
readonly human: "human";
|
|
11
|
+
readonly algo: "algo";
|
|
12
|
+
};
|
|
13
|
+
type ScoringMethod = (typeof ScoringMethod)[keyof typeof ScoringMethod];
|
|
14
|
+
|
|
15
|
+
export { type IdGenerator as I, type MaybePromise as M, ScoringMethod as S, type Id as a, IdSchema as b };
|