@deepagents/evals 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dataset/hf.d.ts +1 -0
- package/dist/dataset/hf.d.ts.map +1 -1
- package/dist/dataset/index.d.ts +1 -1
- package/dist/dataset/index.d.ts.map +1 -1
- package/dist/dataset/index.js +23 -0
- package/dist/dataset/index.js.map +2 -2
- package/dist/engine/index.d.ts +1 -0
- package/dist/engine/index.d.ts.map +1 -1
- package/dist/engine/index.js +9 -7
- package/dist/engine/index.js.map +2 -2
- package/dist/evaluate/index.js +9 -7
- package/dist/evaluate/index.js.map +2 -2
- package/dist/index.js +23 -7
- package/dist/index.js.map +2 -2
- package/dist/store/index.d.ts +3 -0
- package/dist/store/index.d.ts.map +1 -1
- package/dist/store/index.js +14 -0
- package/dist/store/index.js.map +2 -2
- package/package.json +2 -17
package/dist/dataset/hf.d.ts
CHANGED
package/dist/dataset/hf.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hf.d.ts","sourceRoot":"","sources":["../../src/dataset/hf.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAUD,wBAAgB,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC5C,OAAO,EAAE,SAAS,GACjB,aAAa,CAAC,CAAC,CAAC,CAMlB;AA2CD,wBAAsB,WAAW,CAC/B,OAAO,EAAE;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EAC3D,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,GACb,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAa7D"}
|
|
1
|
+
{"version":3,"file":"hf.d.ts","sourceRoot":"","sources":["../../src/dataset/hf.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAUD,wBAAgB,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC5C,OAAO,EAAE,SAAS,GACjB,aAAa,CAAC,CAAC,CAAC,CAMlB;AA2CD,wBAAsB,WAAW,CAC/B,OAAO,EAAE;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EAC3D,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,GACb,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAa7D;AAED,wBAAsB,UAAU,CAAC,OAAO,EAAE,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAMpE"}
|
package/dist/dataset/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { hf } from './hf.ts';
|
|
1
|
+
export { downloadHf, fetchHfRows, hf } from './hf.ts';
|
|
2
2
|
export type { HfOptions } from './hf.ts';
|
|
3
3
|
export { filterRecordsByIndex, parseRecordSelection, pickFromArray, } from './record-selection.ts';
|
|
4
4
|
export type { ParsedRecordSelection } from './record-selection.ts';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/dataset/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,EAAE,EAAE,MAAM,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/dataset/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,EAAE,EAAE,MAAM,SAAS,CAAC;AACtD,YAAY,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAEzC,OAAO,EACL,oBAAoB,EACpB,oBAAoB,EACpB,aAAa,GACd,MAAM,uBAAuB,CAAC;AAC/B,YAAY,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAEnE,MAAM,MAAM,WAAW,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC;AAC/C,MAAM,MAAM,WAAW,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,OAAO,CAAC;AAElD,qBAAa,OAAO,CAAC,CAAC,CAAE,YAAW,aAAa,CAAC,CAAC,CAAC;;gBAGrC,MAAM,EAAE,MAAM,aAAa,CAAC,CAAC,CAAC;IAI1C,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,WAAW,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;IASzC,MAAM,CAAC,EAAE,EAAE,WAAW,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;IAStC,KAAK,CAAC,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC;IAY5B,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC;IAiBrB,MAAM,CAAC,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC;IAoB7B,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;IAiBhC,OAAO,IAAI,OAAO,CAAC,CAAC,EAAE,CAAC;IAQ7B,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,aAAa,CAAC,CAAC,CAAC;CAG3C;AA+FD,wBAAgB,OAAO,CAAC,CAAC,EACvB,MAAM,EAAE,CAAC,EAAE,GAAG,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,GACtC,OAAO,CAAC,CAAC,CAAC,CAwBZ"}
|
package/dist/dataset/index.js
CHANGED
|
@@ -42,6 +42,27 @@ function buildUrl(dataset2, config, split, offset, length) {
|
|
|
42
42
|
url.searchParams.set("length", String(length));
|
|
43
43
|
return url.toString();
|
|
44
44
|
}
|
|
45
|
+
async function fetchHfRows(options, offset, length) {
|
|
46
|
+
const url = buildUrl(
|
|
47
|
+
options.dataset,
|
|
48
|
+
options.config,
|
|
49
|
+
options.split,
|
|
50
|
+
offset,
|
|
51
|
+
length
|
|
52
|
+
);
|
|
53
|
+
const page = await fetchPage(url);
|
|
54
|
+
return {
|
|
55
|
+
rows: page.rows.map((entry) => entry.row),
|
|
56
|
+
total: page.num_rows_total
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
async function downloadHf(options) {
|
|
60
|
+
const lines = [];
|
|
61
|
+
for await (const row of hf(options)) {
|
|
62
|
+
lines.push(JSON.stringify(row));
|
|
63
|
+
}
|
|
64
|
+
return lines.join("\n");
|
|
65
|
+
}
|
|
45
66
|
async function fetchPage(url) {
|
|
46
67
|
const response = await fetch(url);
|
|
47
68
|
if (!response.ok) {
|
|
@@ -331,6 +352,8 @@ function dataset(source) {
|
|
|
331
352
|
export {
|
|
332
353
|
Dataset,
|
|
333
354
|
dataset,
|
|
355
|
+
downloadHf,
|
|
356
|
+
fetchHfRows,
|
|
334
357
|
filterRecordsByIndex,
|
|
335
358
|
hf,
|
|
336
359
|
parseRecordSelection,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../src/dataset/index.ts", "../../src/dataset/hf.ts", "../../src/dataset/record-selection.ts"],
|
|
4
|
-
"sourcesContent": ["import { createReadStream } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { extname } from 'node:path';\nimport { createInterface } from 'node:readline';\n\nexport { hf } from './hf.ts';\nexport type { HfOptions } from './hf.ts';\n\nexport {\n filterRecordsByIndex,\n parseRecordSelection,\n pickFromArray,\n} from './record-selection.ts';\nexport type { ParsedRecordSelection } from './record-selection.ts';\n\nexport type TransformFn<T, U> = (item: T) => U;\nexport type PredicateFn<T> = (item: T) => boolean;\n\nexport class Dataset<T> implements AsyncIterable<T> {\n #source: () => AsyncIterable<T>;\n\n constructor(source: () => AsyncIterable<T>) {\n this.#source = source;\n }\n\n map<U>(fn: TransformFn<T, U>): Dataset<U> {\n const source = this.#source;\n return new Dataset(async function* () {\n for await (const item of source()) {\n yield fn(item);\n }\n });\n }\n\n filter(fn: PredicateFn<T>): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n for await (const item of source()) {\n if (fn(item)) yield item;\n }\n });\n }\n\n limit(n: number): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n let count = 0;\n for await (const item of source()) {\n if (count >= n) return;\n yield item;\n count++;\n }\n });\n }\n\n shuffle(): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n const items: T[] = [];\n for await (const item of source()) {\n items.push(item);\n }\n for (let i = items.length - 1; i > 0; i--) {\n const j = Math.floor(Math.random() * (i + 1));\n const temp = items[i] as T;\n items[i] = items[j] as T;\n items[j] = temp;\n }\n yield* items;\n });\n }\n\n sample(n: number): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n const items: T[] = [];\n for await (const item of source()) {\n items.push(item);\n }\n const count = Math.min(Math.max(0, n), items.length);\n for (let i = items.length - 1; i > items.length - count - 1; i--) {\n const j = Math.floor(Math.random() * (i + 1));\n const temp = items[i] as T;\n items[i] = items[j] as T;\n items[j] = temp;\n }\n for (let i = items.length - count; i < items.length; i++) {\n yield items[i]!;\n }\n });\n }\n\n pick(indexes: Set<number>): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n if (indexes.size === 0) {\n yield* source();\n return;\n }\n let idx = 0;\n for await (const item of source()) {\n if (indexes.has(idx)) {\n yield item;\n }\n idx++;\n }\n });\n }\n\n async toArray(): Promise<T[]> {\n const result: T[] = [];\n for await (const item of this.#source()) {\n result.push(item);\n }\n return result;\n }\n\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.#source()[Symbol.asyncIterator]();\n }\n}\n\nfunction parseCSVLine(line: string): string[] {\n const fields: string[] = [];\n let current = '';\n let inQuotes = false;\n\n for (let i = 0; i < line.length; i++) {\n const char = line[i]!;\n if (inQuotes) {\n if (char === '\"') {\n if (i + 1 < line.length && line[i + 1] === '\"') {\n current += '\"';\n i++;\n } else {\n inQuotes = false;\n }\n } else {\n current += char;\n }\n } else {\n if (char === '\"' && current === '') {\n inQuotes = true;\n } else if (char === ',') {\n fields.push(current);\n current = '';\n } else {\n current += char;\n }\n }\n }\n fields.push(current);\n return fields;\n}\n\nfunction loadJSON<T>(filePath: string): () => AsyncIterable<T> {\n return async function* () {\n const content = await readFile(filePath, 'utf-8');\n const data = JSON.parse(content);\n if (!Array.isArray(data)) {\n throw new Error(`JSON file \"${filePath}\" does not contain an array`);\n }\n yield* data;\n };\n}\n\nfunction loadJSONL<T>(filePath: string): () => AsyncIterable<T> {\n return async function* () {\n const rl = createInterface({\n input: createReadStream(filePath, 'utf-8'),\n crlfDelay: Infinity,\n });\n try {\n for await (const line of rl) {\n const trimmed = line.trim();\n if (trimmed) {\n yield JSON.parse(trimmed);\n }\n }\n } finally {\n rl.close();\n }\n };\n}\n\nfunction loadCSV(\n filePath: string,\n): () => AsyncIterable<Record<string, string>> {\n return async function* () {\n const rl = createInterface({\n input: createReadStream(filePath, 'utf-8'),\n crlfDelay: Infinity,\n });\n try {\n let headers: string[] | undefined;\n for await (const line of rl) {\n const trimmed = line.trim();\n if (!trimmed) continue;\n const fields = parseCSVLine(trimmed);\n if (!headers) {\n headers = fields;\n continue;\n }\n const row: Record<string, string> = {};\n for (let i = 0; i < headers.length; i++) {\n row[headers[i]!] = fields[i] ?? '';\n }\n yield row;\n }\n } finally {\n rl.close();\n }\n };\n}\n\nexport function dataset<T>(\n source: T[] | string | AsyncIterable<T>,\n): Dataset<T> {\n if (Array.isArray(source)) {\n return new Dataset(async function* () {\n yield* source;\n });\n }\n\n if (typeof source === 'object' && Symbol.asyncIterator in source) {\n return new Dataset(() => source);\n }\n\n const ext = extname(source).toLowerCase();\n switch (ext) {\n case '.json':\n return new Dataset(loadJSON<T>(source));\n case '.jsonl':\n return new Dataset(loadJSONL<T>(source));\n case '.csv':\n return new Dataset(loadCSV(source) as () => AsyncIterable<T>);\n default:\n throw new Error(\n `Unsupported file extension \"${ext}\" for dataset file \"${source}\". Supported: .json, .jsonl, .csv`,\n );\n }\n}\n", "export interface HfOptions {\n dataset: string;\n config: string;\n split: string;\n rows?: number;\n}\n\ninterface HfApiResponse {\n rows: Array<{ row_idx: number; row: Record<string, unknown> }>;\n num_rows_total: number;\n}\n\nconst HF_BASE_URL = 'https://datasets-server.huggingface.co/rows';\nconst PAGE_SIZE = 100;\n\nexport function hf<T = Record<string, unknown>>(\n options: HfOptions,\n): AsyncIterable<T> {\n return {\n [Symbol.asyncIterator]() {\n return paginate<T>(options);\n },\n };\n}\n\nasync function* paginate<T>(options: HfOptions): AsyncGenerator<T> {\n const { dataset, config, split, rows } = options;\n const limit = rows ?? Infinity;\n let offset = 0;\n let yielded = 0;\n\n while (yielded < limit) {\n const pageSize =\n limit === Infinity ? PAGE_SIZE : Math.min(PAGE_SIZE, limit - yielded);\n const url = buildUrl(dataset, config, split, offset, pageSize);\n const page = await fetchPage(url);\n\n if (page.rows.length === 0) return;\n\n for (const entry of page.rows) {\n yield entry.row as T;\n yielded++;\n if (yielded >= limit) return;\n }\n\n offset += page.rows.length;\n if (page.rows.length < pageSize || offset >= page.num_rows_total) return;\n }\n}\n\nfunction buildUrl(\n dataset: string,\n config: string,\n split: string,\n offset: number,\n length: number,\n): string {\n const url = new URL(HF_BASE_URL);\n url.searchParams.set('dataset', dataset);\n url.searchParams.set('config', config);\n url.searchParams.set('split', split);\n url.searchParams.set('offset', String(offset));\n url.searchParams.set('length', String(length));\n return url.toString();\n}\n\nexport async function fetchHfRows(\n options: { dataset: string; config: string; split: string },\n offset: number,\n length: number,\n): Promise<{ rows: Record<string, unknown>[]; total: number }> {\n const url = buildUrl(\n options.dataset,\n options.config,\n options.split,\n offset,\n length,\n );\n const page = await fetchPage(url);\n return {\n rows: page.rows.map((entry) => entry.row),\n total: page.num_rows_total,\n };\n}\n\nasync function fetchPage(url: string): Promise<HfApiResponse> {\n const response = await fetch(url);\n if (!response.ok) {\n const body = await response.text().catch(() => '');\n throw new Error(\n `HuggingFace API error ${response.status}: ${body || response.statusText}`,\n );\n }\n const text = await response.text();\n try {\n return JSON.parse(text) as HfApiResponse;\n } catch {\n throw new Error(\n `HuggingFace API returned non-JSON response from ${url}: ${text.slice(0, 200)}`,\n );\n }\n}\n", "export interface ParsedRecordSelection {\n indexes: Set<number>;\n normalized: string;\n}\n\nfunction parsePositiveInt(token: string): number {\n if (!/^\\d+$/.test(token)) {\n throw new Error(`Invalid record token \"${token}\"`);\n }\n const value = Number(token);\n if (!Number.isInteger(value) || value < 1) {\n throw new Error(`Record numbers must be >= 1. Received \"${token}\"`);\n }\n return value;\n}\n\nexport function parseRecordSelection(spec: string): ParsedRecordSelection {\n const trimmed = spec.trim();\n if (!trimmed) {\n return { indexes: new Set(), normalized: '' };\n }\n\n const indexes = new Set<number>();\n const parts = trimmed\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n if (parts.length === 0) {\n throw new Error('Record selection is empty.');\n }\n\n for (const part of parts) {\n const rangeMatch = /^(\\d+)\\s*-\\s*(\\d+)$/.exec(part);\n if (rangeMatch) {\n const start = parsePositiveInt(rangeMatch[1]!);\n const end = parsePositiveInt(rangeMatch[2]!);\n if (end < start) {\n throw new Error(\n `Invalid range \"${part}\". Range end must be >= range start.`,\n );\n }\n for (let i = start; i <= end; i++) {\n indexes.add(i - 1);\n }\n continue;\n }\n\n const value = parsePositiveInt(part);\n indexes.add(value - 1);\n }\n\n return {\n indexes,\n normalized: Array.from(indexes)\n .sort((a, b) => a - b)\n .map((i) => String(i + 1))\n .join(','),\n };\n}\n\nexport function pickFromArray<T>(items: T[], indexes: Set<number>): T[] {\n if (indexes.size === 0) return items;\n return items.filter((_, i) => indexes.has(i));\n}\n\nexport async function* filterRecordsByIndex<T>(\n source: AsyncIterable<T>,\n indexes: Set<number>,\n): AsyncIterable<T> {\n if (indexes.size === 0) {\n for await (const item of source) {\n yield item;\n }\n return;\n }\n\n let idx = 0;\n for await (const item of source) {\n if (indexes.has(idx)) {\n yield item;\n }\n idx++;\n }\n}\n"],
|
|
5
|
-
"mappings": ";AAAA,SAAS,wBAAwB;AACjC,SAAS,gBAAgB;AACzB,SAAS,eAAe;AACxB,SAAS,uBAAuB;;;ACShC,IAAM,cAAc;AACpB,IAAM,YAAY;AAEX,SAAS,GACd,SACkB;AAClB,SAAO;AAAA,IACL,CAAC,OAAO,aAAa,IAAI;AACvB,aAAO,SAAY,OAAO;AAAA,IAC5B;AAAA,EACF;AACF;AAEA,gBAAgB,SAAY,SAAuC;AACjE,QAAM,EAAE,SAAAA,UAAS,QAAQ,OAAO,KAAK,IAAI;AACzC,QAAM,QAAQ,QAAQ;AACtB,MAAI,SAAS;AACb,MAAI,UAAU;AAEd,SAAO,UAAU,OAAO;AACtB,UAAM,WACJ,UAAU,WAAW,YAAY,KAAK,IAAI,WAAW,QAAQ,OAAO;AACtE,UAAM,MAAM,SAASA,UAAS,QAAQ,OAAO,QAAQ,QAAQ;AAC7D,UAAM,OAAO,MAAM,UAAU,GAAG;AAEhC,QAAI,KAAK,KAAK,WAAW,EAAG;AAE5B,eAAW,SAAS,KAAK,MAAM;AAC7B,YAAM,MAAM;AACZ;AACA,UAAI,WAAW,MAAO;AAAA,IACxB;AAEA,cAAU,KAAK,KAAK;AACpB,QAAI,KAAK,KAAK,SAAS,YAAY,UAAU,KAAK,eAAgB;AAAA,EACpE;AACF;AAEA,SAAS,SACPA,UACA,QACA,OACA,QACA,QACQ;AACR,QAAM,MAAM,IAAI,IAAI,WAAW;AAC/B,MAAI,aAAa,IAAI,WAAWA,QAAO;AACvC,MAAI,aAAa,IAAI,UAAU,MAAM;AACrC,MAAI,aAAa,IAAI,SAAS,KAAK;AACnC,MAAI,aAAa,IAAI,UAAU,OAAO,MAAM,CAAC;AAC7C,MAAI,aAAa,IAAI,UAAU,OAAO,MAAM,CAAC;AAC7C,SAAO,IAAI,SAAS;AACtB;
|
|
4
|
+
"sourcesContent": ["import { createReadStream } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { extname } from 'node:path';\nimport { createInterface } from 'node:readline';\n\nexport { downloadHf, fetchHfRows, hf } from './hf.ts';\nexport type { HfOptions } from './hf.ts';\n\nexport {\n filterRecordsByIndex,\n parseRecordSelection,\n pickFromArray,\n} from './record-selection.ts';\nexport type { ParsedRecordSelection } from './record-selection.ts';\n\nexport type TransformFn<T, U> = (item: T) => U;\nexport type PredicateFn<T> = (item: T) => boolean;\n\nexport class Dataset<T> implements AsyncIterable<T> {\n #source: () => AsyncIterable<T>;\n\n constructor(source: () => AsyncIterable<T>) {\n this.#source = source;\n }\n\n map<U>(fn: TransformFn<T, U>): Dataset<U> {\n const source = this.#source;\n return new Dataset(async function* () {\n for await (const item of source()) {\n yield fn(item);\n }\n });\n }\n\n filter(fn: PredicateFn<T>): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n for await (const item of source()) {\n if (fn(item)) yield item;\n }\n });\n }\n\n limit(n: number): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n let count = 0;\n for await (const item of source()) {\n if (count >= n) return;\n yield item;\n count++;\n }\n });\n }\n\n shuffle(): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n const items: T[] = [];\n for await (const item of source()) {\n items.push(item);\n }\n for (let i = items.length - 1; i > 0; i--) {\n const j = Math.floor(Math.random() * (i + 1));\n const temp = items[i] as T;\n items[i] = items[j] as T;\n items[j] = temp;\n }\n yield* items;\n });\n }\n\n sample(n: number): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n const items: T[] = [];\n for await (const item of source()) {\n items.push(item);\n }\n const count = Math.min(Math.max(0, n), items.length);\n for (let i = items.length - 1; i > items.length - count - 1; i--) {\n const j = Math.floor(Math.random() * (i + 1));\n const temp = items[i] as T;\n items[i] = items[j] as T;\n items[j] = temp;\n }\n for (let i = items.length - count; i < items.length; i++) {\n yield items[i]!;\n }\n });\n }\n\n pick(indexes: Set<number>): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n if (indexes.size === 0) {\n yield* source();\n return;\n }\n let idx = 0;\n for await (const item of source()) {\n if (indexes.has(idx)) {\n yield item;\n }\n idx++;\n }\n });\n }\n\n async toArray(): Promise<T[]> {\n const result: T[] = [];\n for await (const item of this.#source()) {\n result.push(item);\n }\n return result;\n }\n\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.#source()[Symbol.asyncIterator]();\n }\n}\n\nfunction parseCSVLine(line: string): string[] {\n const fields: string[] = [];\n let current = '';\n let inQuotes = false;\n\n for (let i = 0; i < line.length; i++) {\n const char = line[i]!;\n if (inQuotes) {\n if (char === '\"') {\n if (i + 1 < line.length && line[i + 1] === '\"') {\n current += '\"';\n i++;\n } else {\n inQuotes = false;\n }\n } else {\n current += char;\n }\n } else {\n if (char === '\"' && current === '') {\n inQuotes = true;\n } else if (char === ',') {\n fields.push(current);\n current = '';\n } else {\n current += char;\n }\n }\n }\n fields.push(current);\n return fields;\n}\n\nfunction loadJSON<T>(filePath: string): () => AsyncIterable<T> {\n return async function* () {\n const content = await readFile(filePath, 'utf-8');\n const data = JSON.parse(content);\n if (!Array.isArray(data)) {\n throw new Error(`JSON file \"${filePath}\" does not contain an array`);\n }\n yield* data;\n };\n}\n\nfunction loadJSONL<T>(filePath: string): () => AsyncIterable<T> {\n return async function* () {\n const rl = createInterface({\n input: createReadStream(filePath, 'utf-8'),\n crlfDelay: Infinity,\n });\n try {\n for await (const line of rl) {\n const trimmed = line.trim();\n if (trimmed) {\n yield JSON.parse(trimmed);\n }\n }\n } finally {\n rl.close();\n }\n };\n}\n\nfunction loadCSV(\n filePath: string,\n): () => AsyncIterable<Record<string, string>> {\n return async function* () {\n const rl = createInterface({\n input: createReadStream(filePath, 'utf-8'),\n crlfDelay: Infinity,\n });\n try {\n let headers: string[] | undefined;\n for await (const line of rl) {\n const trimmed = line.trim();\n if (!trimmed) continue;\n const fields = parseCSVLine(trimmed);\n if (!headers) {\n headers = fields;\n continue;\n }\n const row: Record<string, string> = {};\n for (let i = 0; i < headers.length; i++) {\n row[headers[i]!] = fields[i] ?? '';\n }\n yield row;\n }\n } finally {\n rl.close();\n }\n };\n}\n\nexport function dataset<T>(\n source: T[] | string | AsyncIterable<T>,\n): Dataset<T> {\n if (Array.isArray(source)) {\n return new Dataset(async function* () {\n yield* source;\n });\n }\n\n if (typeof source === 'object' && Symbol.asyncIterator in source) {\n return new Dataset(() => source);\n }\n\n const ext = extname(source).toLowerCase();\n switch (ext) {\n case '.json':\n return new Dataset(loadJSON<T>(source));\n case '.jsonl':\n return new Dataset(loadJSONL<T>(source));\n case '.csv':\n return new Dataset(loadCSV(source) as () => AsyncIterable<T>);\n default:\n throw new Error(\n `Unsupported file extension \"${ext}\" for dataset file \"${source}\". Supported: .json, .jsonl, .csv`,\n );\n }\n}\n", "export interface HfOptions {\n dataset: string;\n config: string;\n split: string;\n rows?: number;\n}\n\ninterface HfApiResponse {\n rows: Array<{ row_idx: number; row: Record<string, unknown> }>;\n num_rows_total: number;\n}\n\nconst HF_BASE_URL = 'https://datasets-server.huggingface.co/rows';\nconst PAGE_SIZE = 100;\n\nexport function hf<T = Record<string, unknown>>(\n options: HfOptions,\n): AsyncIterable<T> {\n return {\n [Symbol.asyncIterator]() {\n return paginate<T>(options);\n },\n };\n}\n\nasync function* paginate<T>(options: HfOptions): AsyncGenerator<T> {\n const { dataset, config, split, rows } = options;\n const limit = rows ?? Infinity;\n let offset = 0;\n let yielded = 0;\n\n while (yielded < limit) {\n const pageSize =\n limit === Infinity ? PAGE_SIZE : Math.min(PAGE_SIZE, limit - yielded);\n const url = buildUrl(dataset, config, split, offset, pageSize);\n const page = await fetchPage(url);\n\n if (page.rows.length === 0) return;\n\n for (const entry of page.rows) {\n yield entry.row as T;\n yielded++;\n if (yielded >= limit) return;\n }\n\n offset += page.rows.length;\n if (page.rows.length < pageSize || offset >= page.num_rows_total) return;\n }\n}\n\nfunction buildUrl(\n dataset: string,\n config: string,\n split: string,\n offset: number,\n length: number,\n): string {\n const url = new URL(HF_BASE_URL);\n url.searchParams.set('dataset', dataset);\n url.searchParams.set('config', config);\n url.searchParams.set('split', split);\n url.searchParams.set('offset', String(offset));\n url.searchParams.set('length', String(length));\n return url.toString();\n}\n\nexport async function fetchHfRows(\n options: { dataset: string; config: string; split: string },\n offset: number,\n length: number,\n): Promise<{ rows: Record<string, unknown>[]; total: number }> {\n const url = buildUrl(\n options.dataset,\n options.config,\n options.split,\n offset,\n length,\n );\n const page = await fetchPage(url);\n return {\n rows: page.rows.map((entry) => entry.row),\n total: page.num_rows_total,\n };\n}\n\nexport async function downloadHf(options: HfOptions): Promise<string> {\n const lines: string[] = [];\n for await (const row of hf(options)) {\n lines.push(JSON.stringify(row));\n }\n return lines.join('\\n');\n}\n\nasync function fetchPage(url: string): Promise<HfApiResponse> {\n const response = await fetch(url);\n if (!response.ok) {\n const body = await response.text().catch(() => '');\n throw new Error(\n `HuggingFace API error ${response.status}: ${body || response.statusText}`,\n );\n }\n const text = await response.text();\n try {\n return JSON.parse(text) as HfApiResponse;\n } catch {\n throw new Error(\n `HuggingFace API returned non-JSON response from ${url}: ${text.slice(0, 200)}`,\n );\n }\n}\n", "export interface ParsedRecordSelection {\n indexes: Set<number>;\n normalized: string;\n}\n\nfunction parsePositiveInt(token: string): number {\n if (!/^\\d+$/.test(token)) {\n throw new Error(`Invalid record token \"${token}\"`);\n }\n const value = Number(token);\n if (!Number.isInteger(value) || value < 1) {\n throw new Error(`Record numbers must be >= 1. Received \"${token}\"`);\n }\n return value;\n}\n\nexport function parseRecordSelection(spec: string): ParsedRecordSelection {\n const trimmed = spec.trim();\n if (!trimmed) {\n return { indexes: new Set(), normalized: '' };\n }\n\n const indexes = new Set<number>();\n const parts = trimmed\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n if (parts.length === 0) {\n throw new Error('Record selection is empty.');\n }\n\n for (const part of parts) {\n const rangeMatch = /^(\\d+)\\s*-\\s*(\\d+)$/.exec(part);\n if (rangeMatch) {\n const start = parsePositiveInt(rangeMatch[1]!);\n const end = parsePositiveInt(rangeMatch[2]!);\n if (end < start) {\n throw new Error(\n `Invalid range \"${part}\". Range end must be >= range start.`,\n );\n }\n for (let i = start; i <= end; i++) {\n indexes.add(i - 1);\n }\n continue;\n }\n\n const value = parsePositiveInt(part);\n indexes.add(value - 1);\n }\n\n return {\n indexes,\n normalized: Array.from(indexes)\n .sort((a, b) => a - b)\n .map((i) => String(i + 1))\n .join(','),\n };\n}\n\nexport function pickFromArray<T>(items: T[], indexes: Set<number>): T[] {\n if (indexes.size === 0) return items;\n return items.filter((_, i) => indexes.has(i));\n}\n\nexport async function* filterRecordsByIndex<T>(\n source: AsyncIterable<T>,\n indexes: Set<number>,\n): AsyncIterable<T> {\n if (indexes.size === 0) {\n for await (const item of source) {\n yield item;\n }\n return;\n }\n\n let idx = 0;\n for await (const item of source) {\n if (indexes.has(idx)) {\n yield item;\n }\n idx++;\n }\n}\n"],
|
|
5
|
+
"mappings": ";AAAA,SAAS,wBAAwB;AACjC,SAAS,gBAAgB;AACzB,SAAS,eAAe;AACxB,SAAS,uBAAuB;;;ACShC,IAAM,cAAc;AACpB,IAAM,YAAY;AAEX,SAAS,GACd,SACkB;AAClB,SAAO;AAAA,IACL,CAAC,OAAO,aAAa,IAAI;AACvB,aAAO,SAAY,OAAO;AAAA,IAC5B;AAAA,EACF;AACF;AAEA,gBAAgB,SAAY,SAAuC;AACjE,QAAM,EAAE,SAAAA,UAAS,QAAQ,OAAO,KAAK,IAAI;AACzC,QAAM,QAAQ,QAAQ;AACtB,MAAI,SAAS;AACb,MAAI,UAAU;AAEd,SAAO,UAAU,OAAO;AACtB,UAAM,WACJ,UAAU,WAAW,YAAY,KAAK,IAAI,WAAW,QAAQ,OAAO;AACtE,UAAM,MAAM,SAASA,UAAS,QAAQ,OAAO,QAAQ,QAAQ;AAC7D,UAAM,OAAO,MAAM,UAAU,GAAG;AAEhC,QAAI,KAAK,KAAK,WAAW,EAAG;AAE5B,eAAW,SAAS,KAAK,MAAM;AAC7B,YAAM,MAAM;AACZ;AACA,UAAI,WAAW,MAAO;AAAA,IACxB;AAEA,cAAU,KAAK,KAAK;AACpB,QAAI,KAAK,KAAK,SAAS,YAAY,UAAU,KAAK,eAAgB;AAAA,EACpE;AACF;AAEA,SAAS,SACPA,UACA,QACA,OACA,QACA,QACQ;AACR,QAAM,MAAM,IAAI,IAAI,WAAW;AAC/B,MAAI,aAAa,IAAI,WAAWA,QAAO;AACvC,MAAI,aAAa,IAAI,UAAU,MAAM;AACrC,MAAI,aAAa,IAAI,SAAS,KAAK;AACnC,MAAI,aAAa,IAAI,UAAU,OAAO,MAAM,CAAC;AAC7C,MAAI,aAAa,IAAI,UAAU,OAAO,MAAM,CAAC;AAC7C,SAAO,IAAI,SAAS;AACtB;AAEA,eAAsB,YACpB,SACA,QACA,QAC6D;AAC7D,QAAM,MAAM;AAAA,IACV,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR;AAAA,IACA;AAAA,EACF;AACA,QAAM,OAAO,MAAM,UAAU,GAAG;AAChC,SAAO;AAAA,IACL,MAAM,KAAK,KAAK,IAAI,CAAC,UAAU,MAAM,GAAG;AAAA,IACxC,OAAO,KAAK;AAAA,EACd;AACF;AAEA,eAAsB,WAAW,SAAqC;AACpE,QAAM,QAAkB,CAAC;AACzB,mBAAiB,OAAO,GAAG,OAAO,GAAG;AACnC,UAAM,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,EAChC;AACA,SAAO,MAAM,KAAK,IAAI;AACxB;AAEA,eAAe,UAAU,KAAqC;AAC5D,QAAM,WAAW,MAAM,MAAM,GAAG;AAChC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,OAAO,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACjD,UAAM,IAAI;AAAA,MACR,yBAAyB,SAAS,MAAM,KAAK,QAAQ,SAAS,UAAU;AAAA,IAC1E;AAAA,EACF;AACA,QAAM,OAAO,MAAM,SAAS,KAAK;AACjC,MAAI;AACF,WAAO,KAAK,MAAM,IAAI;AAAA,EACxB,QAAQ;AACN,UAAM,IAAI;AAAA,MACR,mDAAmD,GAAG,KAAK,KAAK,MAAM,GAAG,GAAG,CAAC;AAAA,IAC/E;AAAA,EACF;AACF;;;ACxGA,SAAS,iBAAiB,OAAuB;AAC/C,MAAI,CAAC,QAAQ,KAAK,KAAK,GAAG;AACxB,UAAM,IAAI,MAAM,yBAAyB,KAAK,GAAG;AAAA,EACnD;AACA,QAAM,QAAQ,OAAO,KAAK;AAC1B,MAAI,CAAC,OAAO,UAAU,KAAK,KAAK,QAAQ,GAAG;AACzC,UAAM,IAAI,MAAM,0CAA0C,KAAK,GAAG;AAAA,EACpE;AACA,SAAO;AACT;AAEO,SAAS,qBAAqB,MAAqC;AACxE,QAAM,UAAU,KAAK,KAAK;AAC1B,MAAI,CAAC,SAAS;AACZ,WAAO,EAAE,SAAS,oBAAI,IAAI,GAAG,YAAY,GAAG;AAAA,EAC9C;AAEA,QAAM,UAAU,oBAAI,IAAY;AAChC,QAAM,QAAQ,QACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AACjB,MAAI,MAAM,WAAW,GAAG;AACtB,UAAM,IAAI,MAAM,4BAA4B;AAAA,EAC9C;AAEA,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,sBAAsB,KAAK,IAAI;AAClD,QAAI,YAAY;AACd,YAAM,QAAQ,iBAAiB,WAAW,CAAC,CAAE;AAC7C,YAAM,MAAM,iBAAiB,WAAW,CAAC,CAAE;AAC3C,UAAI,MAAM,OAAO;AACf,cAAM,IAAI;AAAA,UACR,kBAAkB,IAAI;AAAA,QACxB;AAAA,MACF;AACA,eAAS,IAAI,OAAO,KAAK,KAAK,KAAK;AACjC,gBAAQ,IAAI,IAAI,CAAC;AAAA,MACnB;AACA;AAAA,IACF;AAEA,UAAM,QAAQ,iBAAiB,IAAI;AACnC,YAAQ,IAAI,QAAQ,CAAC;AAAA,EACvB;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,MAAM,KAAK,OAAO,EAC3B,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC,EACpB,IAAI,CAAC,MAAM,OAAO,IAAI,CAAC,CAAC,EACxB,KAAK,GAAG;AAAA,EACb;AACF;AAEO,SAAS,cAAiB,OAAY,SAA2B;AACtE,MAAI,QAAQ,SAAS,EAAG,QAAO;AAC/B,SAAO,MAAM,OAAO,CAAC,GAAG,MAAM,QAAQ,IAAI,CAAC,CAAC;AAC9C;AAEA,gBAAuB,qBACrB,QACA,SACkB;AAClB,MAAI,QAAQ,SAAS,GAAG;AACtB,qBAAiB,QAAQ,QAAQ;AAC/B,YAAM;AAAA,IACR;AACA;AAAA,EACF;AAEA,MAAI,MAAM;AACV,mBAAiB,QAAQ,QAAQ;AAC/B,QAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,YAAM;AAAA,IACR;AACA;AAAA,EACF;AACF;;;AFjEO,IAAM,UAAN,MAAM,SAAuC;AAAA,EAClD;AAAA,EAEA,YAAY,QAAgC;AAC1C,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,IAAO,IAAmC;AACxC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,GAAG,IAAI;AAAA,MACf;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,OAAO,IAAgC;AACrC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,GAAG,IAAI,EAAG,OAAM;AAAA,MACtB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,GAAuB;AAC3B,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,UAAI,QAAQ;AACZ,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,SAAS,EAAG;AAChB,cAAM;AACN;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,UAAsB;AACpB,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,YAAM,QAAa,CAAC;AACpB,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,KAAK,IAAI;AAAA,MACjB;AACA,eAAS,IAAI,MAAM,SAAS,GAAG,IAAI,GAAG,KAAK;AACzC,cAAM,IAAI,KAAK,MAAM,KAAK,OAAO,KAAK,IAAI,EAAE;AAC5C,cAAM,OAAO,MAAM,CAAC;AACpB,cAAM,CAAC,IAAI,MAAM,CAAC;AAClB,cAAM,CAAC,IAAI;AAAA,MACb;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAAA,EAEA,OAAO,GAAuB;AAC5B,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,YAAM,QAAa,CAAC;AACpB,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,KAAK,IAAI;AAAA,MACjB;AACA,YAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,GAAG,CAAC,GAAG,MAAM,MAAM;AACnD,eAAS,IAAI,MAAM,SAAS,GAAG,IAAI,MAAM,SAAS,QAAQ,GAAG,KAAK;AAChE,cAAM,IAAI,KAAK,MAAM,KAAK,OAAO,KAAK,IAAI,EAAE;AAC5C,cAAM,OAAO,MAAM,CAAC;AACpB,cAAM,CAAC,IAAI,MAAM,CAAC;AAClB,cAAM,CAAC,IAAI;AAAA,MACb;AACA,eAAS,IAAI,MAAM,SAAS,OAAO,IAAI,MAAM,QAAQ,KAAK;AACxD,cAAM,MAAM,CAAC;AAAA,MACf;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,KAAK,SAAkC;AACrC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,UAAI,QAAQ,SAAS,GAAG;AACtB,eAAO,OAAO;AACd;AAAA,MACF;AACA,UAAI,MAAM;AACV,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,gBAAM;AAAA,QACR;AACA;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAwB;AAC5B,UAAM,SAAc,CAAC;AACrB,qBAAiB,QAAQ,KAAK,QAAQ,GAAG;AACvC,aAAO,KAAK,IAAI;AAAA,IAClB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO,KAAK,QAAQ,EAAE,OAAO,aAAa,EAAE;AAAA,EAC9C;AACF;AAEA,SAAS,aAAa,MAAwB;AAC5C,QAAM,SAAmB,CAAC;AAC1B,MAAI,UAAU;AACd,MAAI,WAAW;AAEf,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,OAAO,KAAK,CAAC;AACnB,QAAI,UAAU;AACZ,UAAI,SAAS,KAAK;AAChB,YAAI,IAAI,IAAI,KAAK,UAAU,KAAK,IAAI,CAAC,MAAM,KAAK;AAC9C,qBAAW;AACX;AAAA,QACF,OAAO;AACL,qBAAW;AAAA,QACb;AAAA,MACF,OAAO;AACL,mBAAW;AAAA,MACb;AAAA,IACF,OAAO;AACL,UAAI,SAAS,OAAO,YAAY,IAAI;AAClC,mBAAW;AAAA,MACb,WAAW,SAAS,KAAK;AACvB,eAAO,KAAK,OAAO;AACnB,kBAAU;AAAA,MACZ,OAAO;AACL,mBAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AACA,SAAO,KAAK,OAAO;AACnB,SAAO;AACT;AAEA,SAAS,SAAY,UAA0C;AAC7D,SAAO,mBAAmB;AACxB,UAAM,UAAU,MAAM,SAAS,UAAU,OAAO;AAChD,UAAM,OAAO,KAAK,MAAM,OAAO;AAC/B,QAAI,CAAC,MAAM,QAAQ,IAAI,GAAG;AACxB,YAAM,IAAI,MAAM,cAAc,QAAQ,6BAA6B;AAAA,IACrE;AACA,WAAO;AAAA,EACT;AACF;AAEA,SAAS,UAAa,UAA0C;AAC9D,SAAO,mBAAmB;AACxB,UAAM,KAAK,gBAAgB;AAAA,MACzB,OAAO,iBAAiB,UAAU,OAAO;AAAA,MACzC,WAAW;AAAA,IACb,CAAC;AACD,QAAI;AACF,uBAAiB,QAAQ,IAAI;AAC3B,cAAM,UAAU,KAAK,KAAK;AAC1B,YAAI,SAAS;AACX,gBAAM,KAAK,MAAM,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF,UAAE;AACA,SAAG,MAAM;AAAA,IACX;AAAA,EACF;AACF;AAEA,SAAS,QACP,UAC6C;AAC7C,SAAO,mBAAmB;AACxB,UAAM,KAAK,gBAAgB;AAAA,MACzB,OAAO,iBAAiB,UAAU,OAAO;AAAA,MACzC,WAAW;AAAA,IACb,CAAC;AACD,QAAI;AACF,UAAI;AACJ,uBAAiB,QAAQ,IAAI;AAC3B,cAAM,UAAU,KAAK,KAAK;AAC1B,YAAI,CAAC,QAAS;AACd,cAAM,SAAS,aAAa,OAAO;AACnC,YAAI,CAAC,SAAS;AACZ,oBAAU;AACV;AAAA,QACF;AACA,cAAM,MAA8B,CAAC;AACrC,iBAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,cAAI,QAAQ,CAAC,CAAE,IAAI,OAAO,CAAC,KAAK;AAAA,QAClC;AACA,cAAM;AAAA,MACR;AAAA,IACF,UAAE;AACA,SAAG,MAAM;AAAA,IACX;AAAA,EACF;AACF;AAEO,SAAS,QACd,QACY;AACZ,MAAI,MAAM,QAAQ,MAAM,GAAG;AACzB,WAAO,IAAI,QAAQ,mBAAmB;AACpC,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAEA,MAAI,OAAO,WAAW,YAAY,OAAO,iBAAiB,QAAQ;AAChE,WAAO,IAAI,QAAQ,MAAM,MAAM;AAAA,EACjC;AAEA,QAAM,MAAM,QAAQ,MAAM,EAAE,YAAY;AACxC,UAAQ,KAAK;AAAA,IACX,KAAK;AACH,aAAO,IAAI,QAAQ,SAAY,MAAM,CAAC;AAAA,IACxC,KAAK;AACH,aAAO,IAAI,QAAQ,UAAa,MAAM,CAAC;AAAA,IACzC,KAAK;AACH,aAAO,IAAI,QAAQ,QAAQ,MAAM,CAA2B;AAAA,IAC9D;AACE,YAAM,IAAI;AAAA,QACR,+BAA+B,GAAG,uBAAuB,MAAM;AAAA,MACjE;AAAA,EACJ;AACF;",
|
|
6
6
|
"names": ["dataset"]
|
|
7
7
|
}
|
package/dist/engine/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/engine/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,KAAK,EAEV,QAAQ,EACR,UAAU,EAEX,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;CACvD;AAED,MAAM,MAAM,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;AAE1D,MAAM,WAAW,YAAY;IAC3B,WAAW,EAAE;QACX,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,YAAY,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,OAAO,CAAA;KAAE,CAAC;IAC/D,aAAa,EAAE;QACb,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,OAAO,CAAC;QACf,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QACrC,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,YAAY,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9D,SAAS,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,UAAU,CAAA;KAAE,CAAC;CACnD;AAED,qBAAa,WAAY,SAAQ,YAAY;IAClC,EAAE,CAAC,CAAC,SAAS,MAAM,YAAY,EACtC,KAAK,EAAE,CAAC,EACR,QAAQ,EAAE,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC,CAAC,KAAK,IAAI,GACxC,IAAI;IAIE,IAAI,CAAC,CAAC,SAAS,MAAM,YAAY,EACxC,KAAK,EAAE,CAAC,EACR,IAAI,EAAE,YAAY,CAAC,CAAC,CAAC,GACpB,OAAO;CAGX;AAED,MAAM,WAAW,UAAU,CAAC,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC;IAC1B,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,KAAK,EAAE,QAAQ,CAAC;IAChB,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAkID,wBAAsB,OAAO,CAAC,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/engine/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,KAAK,EAEV,QAAQ,EACR,UAAU,EAEX,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;CACvD;AAED,MAAM,MAAM,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;AAE1D,MAAM,WAAW,YAAY;IAC3B,WAAW,EAAE;QACX,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,YAAY,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,OAAO,CAAA;KAAE,CAAC;IAC/D,aAAa,EAAE;QACb,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,OAAO,CAAC;QACf,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QACrC,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,YAAY,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9D,SAAS,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,UAAU,CAAA;KAAE,CAAC;CACnD;AAED,qBAAa,WAAY,SAAQ,YAAY;IAClC,EAAE,CAAC,CAAC,SAAS,MAAM,YAAY,EACtC,KAAK,EAAE,CAAC,EACR,QAAQ,EAAE,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC,CAAC,KAAK,IAAI,GACxC,IAAI;IAIE,IAAI,CAAC,CAAC,SAAS,MAAM,YAAY,EACxC,KAAK,EAAE,CAAC,EACR,IAAI,EAAE,YAAY,CAAC,CAAC,CAAC,GACpB,OAAO;CAGX;AAED,MAAM,WAAW,UAAU,CAAC,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC;IAC1B,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,KAAK,EAAE,QAAQ,CAAC;IAChB,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAkID,wBAAsB,OAAO,CAAC,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC,CAkO3E"}
|
package/dist/engine/index.js
CHANGED
|
@@ -127,13 +127,15 @@ async function runEval(config) {
|
|
|
127
127
|
threshold = 0.5
|
|
128
128
|
} = config;
|
|
129
129
|
const emitter = config.emitter ?? new EvalEmitter();
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
130
|
+
const runId = config.runId ?? (() => {
|
|
131
|
+
const resolvedSuiteId = suiteId ?? store.createSuite(name).id;
|
|
132
|
+
return store.createRun({
|
|
133
|
+
suite_id: resolvedSuiteId,
|
|
134
|
+
name,
|
|
135
|
+
model,
|
|
136
|
+
config: config.config
|
|
137
|
+
});
|
|
138
|
+
})();
|
|
137
139
|
const items = [];
|
|
138
140
|
let idx = 0;
|
|
139
141
|
for await (const item of ds) {
|
package/dist/engine/index.js.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../src/engine/index.ts"],
|
|
4
|
-
"sourcesContent": ["import { EventEmitter } from 'node:events';\n\nimport type { Scorer, ScorerResult } from '../scorers/index.ts';\nimport type {\n CaseData,\n RunStore,\n RunSummary,\n ScoreData,\n} from '../store/index.ts';\n\nexport interface TaskResult {\n output: string;\n usage?: { inputTokens: number; outputTokens: number };\n}\n\nexport type TaskFn<T> = (input: T) => Promise<TaskResult>;\n\nexport interface EngineEvents {\n 'run:start': {\n runId: string;\n totalCases: number;\n name: string;\n model: string;\n };\n 'case:start': { runId: string; index: number; input: unknown };\n 'case:scored': {\n runId: string;\n index: number;\n input: unknown;\n output: string;\n expected: unknown;\n scores: Record<string, ScorerResult>;\n error?: unknown;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n };\n 'case:error': { runId: string; index: number; error: string };\n 'run:end': { runId: string; summary: RunSummary };\n}\n\nexport class EvalEmitter extends EventEmitter {\n override on<K extends keyof EngineEvents>(\n event: K,\n listener: (data: EngineEvents[K]) => void,\n ): this {\n return super.on(event, listener);\n }\n\n override emit<K extends keyof EngineEvents>(\n event: K,\n data: EngineEvents[K],\n ): boolean {\n return super.emit(event, data);\n }\n}\n\nexport interface EvalConfig<T> {\n name: string;\n model: string;\n dataset: AsyncIterable<T>;\n task: TaskFn<T>;\n scorers: Record<string, Scorer>;\n store: RunStore;\n emitter?: EvalEmitter;\n suiteId?: string;\n config?: Record<string, unknown>;\n maxConcurrency?: number;\n batchSize?: number;\n timeout?: number;\n trials?: number;\n threshold?: number;\n}\n\ninterface WrappedResult {\n output: string;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n error?: unknown;\n}\n\nfunction errorMessage(err: unknown): string {\n if (err instanceof Error) {\n return `${err.name}: ${err.message}`;\n }\n if (typeof err === 'string') return err;\n if (err == null) return 'Unknown error';\n try {\n return JSON.stringify(err);\n } catch {\n return String(err);\n }\n}\n\nfunction serializeError(err: unknown): string {\n if (err instanceof Error) {\n return JSON.stringify({\n name: err.name,\n message: err.message,\n stack: err.stack,\n cause:\n err.cause instanceof Error\n ? {\n name: err.cause.name,\n message: err.cause.message,\n }\n : err.cause,\n });\n }\n if (typeof err === 'string') return JSON.stringify({ message: err });\n if (err == null) return JSON.stringify({ message: 'Unknown error' });\n try {\n return JSON.stringify(err);\n } catch {\n return JSON.stringify({ message: String(err) });\n }\n}\n\nfunction failureScores(\n scorerNames: string[],\n error: unknown,\n): Record<string, ScorerResult> {\n const reason = `Task failed: ${errorMessage(error)}`;\n const scores: Record<string, ScorerResult> = {};\n for (const scorerName of scorerNames) {\n scores[scorerName] = { score: 0, reason };\n }\n return scores;\n}\n\nfunction createSemaphore(maxConcurrency: number) {\n let active = 0;\n const queue: Array<() => void> = [];\n\n return {\n async acquire(): Promise<void> {\n if (active < maxConcurrency) {\n active++;\n return;\n }\n return new Promise<void>((resolve) => queue.push(resolve));\n },\n release(): void {\n active--;\n const next = queue.shift();\n if (next) {\n active++;\n next();\n }\n },\n };\n}\n\nasync function wrapTask<T>(\n task: TaskFn<T>,\n input: T,\n timeoutMs: number,\n): Promise<WrappedResult> {\n const start = performance.now();\n let timerId: ReturnType<typeof setTimeout> | undefined;\n try {\n const result = await Promise.race([\n task(input),\n new Promise<never>((_, reject) => {\n timerId = setTimeout(\n () => reject(new Error('timeout exceeded')),\n timeoutMs,\n );\n }),\n ]);\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: result.output,\n latencyMs,\n tokensIn: result.usage?.inputTokens ?? 0,\n tokensOut: result.usage?.outputTokens ?? 0,\n };\n } catch (err) {\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: '',\n latencyMs,\n tokensIn: 0,\n tokensOut: 0,\n error: err,\n };\n }\n}\n\nfunction clampScore(score: number, scorerName: string): number {\n if (score < 0 || score > 1) {\n console.warn(\n `Scorer \"${scorerName}\" returned out-of-range score ${score}, clamping to 0..1`,\n );\n return Math.max(0, Math.min(1, score));\n }\n return score;\n}\n\nexport async function runEval<T>(config: EvalConfig<T>): Promise<RunSummary> {\n const {\n name,\n model,\n dataset: ds,\n task,\n scorers,\n store,\n suiteId,\n maxConcurrency = 10,\n batchSize,\n timeout = 30_000,\n trials = 1,\n threshold = 0.5,\n } = config;\n\n const emitter = config.emitter ?? new EvalEmitter();\n const resolvedSuiteId = suiteId ?? store.createSuite(name).id;\n const runId = store.createRun({\n suite_id: resolvedSuiteId,\n name,\n model,\n config: config.config,\n });\n\n const items: Array<{ index: number; input: T }> = [];\n let idx = 0;\n for await (const item of ds) {\n items.push({ index: idx++, input: item });\n }\n\n emitter.emit('run:start', { runId, totalCases: items.length, name, model });\n\n const semaphore = createSemaphore(maxConcurrency);\n const scorerNames = Object.keys(scorers);\n\n const allCaseScores: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }> = [];\n\n const processItem = async ({ index, input }: { index: number; input: T }) => {\n await semaphore.acquire();\n try {\n emitter.emit('case:start', { runId, index, input });\n\n let finalResult: WrappedResult;\n let finalScores: Record<string, ScorerResult>;\n\n if (trials > 1) {\n const trialResults: Array<{\n result: WrappedResult;\n scores: Record<string, ScorerResult>;\n }> = [];\n\n for (let t = 0; t < trials; t++) {\n const result = await wrapTask(task, input, timeout);\n if (result.error) {\n trialResults.push({\n result,\n scores: failureScores(scorerNames, result.error),\n });\n } else {\n const scores: Record<string, ScorerResult> = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: result.output,\n expected: (input as Record<string, unknown>).expected,\n });\n scores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n trialResults.push({ result, scores });\n }\n }\n\n const lastSuccessful = [...trialResults]\n .reverse()\n .find((t) => !t.result.error);\n const baseResult =\n lastSuccessful?.result ??\n trialResults[trialResults.length - 1]!.result;\n finalResult = {\n output: baseResult.output,\n latencyMs: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.latencyMs, 0) /\n trials,\n ),\n tokensIn: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensIn, 0) /\n trials,\n ),\n tokensOut: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensOut, 0) /\n trials,\n ),\n error: lastSuccessful ? undefined : baseResult.error,\n };\n\n finalScores = {};\n for (const sName of scorerNames) {\n const meanScore =\n trialResults.reduce((sum, t) => sum + t.scores[sName]!.score, 0) /\n trials;\n finalScores[sName] = {\n score: meanScore,\n reason:\n trialResults[trialResults.length - 1]!.scores[sName]?.reason,\n metadata:\n trialResults[trialResults.length - 1]!.scores[sName]?.metadata,\n };\n }\n } else {\n finalResult = await wrapTask(task, input, timeout);\n if (finalResult.error) {\n finalScores = failureScores(scorerNames, finalResult.error);\n } else {\n finalScores = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n });\n finalScores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n }\n }\n\n const caseId = crypto.randomUUID();\n\n const caseData: CaseData = {\n id: caseId,\n run_id: runId,\n idx: index,\n input,\n output: finalResult.output || null,\n expected: (input as Record<string, unknown>).expected,\n latency_ms: finalResult.latencyMs,\n tokens_in: finalResult.tokensIn,\n tokens_out: finalResult.tokensOut,\n error: finalResult.error\n ? serializeError(finalResult.error)\n : undefined,\n };\n store.saveCases([caseData]);\n\n const scoreDataList: ScoreData[] = scorerNames.map((sName) => ({\n id: crypto.randomUUID(),\n case_id: caseId,\n scorer_name: sName,\n score: finalScores[sName]!.score,\n reason: finalScores[sName]!.reason,\n }));\n store.saveScores(scoreDataList);\n\n allCaseScores.push({\n index,\n scores: Object.fromEntries(\n scorerNames.map((sName) => [sName, finalScores[sName]!.score]),\n ),\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n\n if (finalResult.error) {\n emitter.emit('case:error', {\n runId,\n index,\n error: errorMessage(finalResult.error),\n });\n }\n\n emitter.emit('case:scored', {\n runId,\n index,\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n scores: finalScores,\n error: finalResult.error,\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n } finally {\n semaphore.release();\n }\n };\n\n const batches = batchSize\n ? Array.from({ length: Math.ceil(items.length / batchSize) }, (_, i) =>\n items.slice(i * batchSize, (i + 1) * batchSize),\n )\n : [items];\n\n try {\n for (const batch of batches) {\n await Promise.all(batch.map(processItem));\n }\n } catch (err) {\n store.finishRun(runId, 'failed');\n throw err;\n }\n\n const summary = computeSummary(allCaseScores, scorerNames, threshold);\n store.finishRun(runId, 'completed', summary);\n emitter.emit('run:end', { runId, summary });\n\n return summary;\n}\n\nfunction computeSummary(\n cases: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }>,\n scorerNames: string[],\n threshold: number,\n): RunSummary {\n const totalCases = cases.length;\n let passCount = 0;\n let failCount = 0;\n let totalLatencyMs = 0;\n let totalTokensIn = 0;\n let totalTokensOut = 0;\n\n const scoreSums: Record<string, number> = {};\n for (const name of scorerNames) {\n scoreSums[name] = 0;\n }\n\n for (const c of cases) {\n totalLatencyMs += c.latencyMs;\n totalTokensIn += c.tokensIn;\n totalTokensOut += c.tokensOut;\n\n let allPass = true;\n for (const name of scorerNames) {\n const score = c.scores[name] ?? 0;\n scoreSums[name]! += score;\n if (score < threshold) allPass = false;\n }\n if (allPass) passCount++;\n else failCount++;\n }\n\n const meanScores: Record<string, number> = {};\n for (const name of scorerNames) {\n meanScores[name] = totalCases > 0 ? scoreSums[name]! / totalCases : 0;\n }\n\n return {\n totalCases,\n passCount,\n failCount,\n meanScores,\n totalLatencyMs,\n totalTokensIn,\n totalTokensOut,\n };\n}\n"],
|
|
5
|
-
"mappings": ";AAAA,SAAS,oBAAoB;AAyCtB,IAAM,cAAN,cAA0B,aAAa;AAAA,EACnC,GACP,OACA,UACM;AACN,WAAO,MAAM,GAAG,OAAO,QAAQ;AAAA,EACjC;AAAA,EAES,KACP,OACA,MACS;AACT,WAAO,MAAM,KAAK,OAAO,IAAI;AAAA,EAC/B;AACF;
|
|
4
|
+
"sourcesContent": ["import { EventEmitter } from 'node:events';\n\nimport type { Scorer, ScorerResult } from '../scorers/index.ts';\nimport type {\n CaseData,\n RunStore,\n RunSummary,\n ScoreData,\n} from '../store/index.ts';\n\nexport interface TaskResult {\n output: string;\n usage?: { inputTokens: number; outputTokens: number };\n}\n\nexport type TaskFn<T> = (input: T) => Promise<TaskResult>;\n\nexport interface EngineEvents {\n 'run:start': {\n runId: string;\n totalCases: number;\n name: string;\n model: string;\n };\n 'case:start': { runId: string; index: number; input: unknown };\n 'case:scored': {\n runId: string;\n index: number;\n input: unknown;\n output: string;\n expected: unknown;\n scores: Record<string, ScorerResult>;\n error?: unknown;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n };\n 'case:error': { runId: string; index: number; error: string };\n 'run:end': { runId: string; summary: RunSummary };\n}\n\nexport class EvalEmitter extends EventEmitter {\n override on<K extends keyof EngineEvents>(\n event: K,\n listener: (data: EngineEvents[K]) => void,\n ): this {\n return super.on(event, listener);\n }\n\n override emit<K extends keyof EngineEvents>(\n event: K,\n data: EngineEvents[K],\n ): boolean {\n return super.emit(event, data);\n }\n}\n\nexport interface EvalConfig<T> {\n name: string;\n model: string;\n dataset: AsyncIterable<T>;\n task: TaskFn<T>;\n scorers: Record<string, Scorer>;\n store: RunStore;\n emitter?: EvalEmitter;\n runId?: string;\n suiteId?: string;\n config?: Record<string, unknown>;\n maxConcurrency?: number;\n batchSize?: number;\n timeout?: number;\n trials?: number;\n threshold?: number;\n}\n\ninterface WrappedResult {\n output: string;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n error?: unknown;\n}\n\nfunction errorMessage(err: unknown): string {\n if (err instanceof Error) {\n return `${err.name}: ${err.message}`;\n }\n if (typeof err === 'string') return err;\n if (err == null) return 'Unknown error';\n try {\n return JSON.stringify(err);\n } catch {\n return String(err);\n }\n}\n\nfunction serializeError(err: unknown): string {\n if (err instanceof Error) {\n return JSON.stringify({\n name: err.name,\n message: err.message,\n stack: err.stack,\n cause:\n err.cause instanceof Error\n ? {\n name: err.cause.name,\n message: err.cause.message,\n }\n : err.cause,\n });\n }\n if (typeof err === 'string') return JSON.stringify({ message: err });\n if (err == null) return JSON.stringify({ message: 'Unknown error' });\n try {\n return JSON.stringify(err);\n } catch {\n return JSON.stringify({ message: String(err) });\n }\n}\n\nfunction failureScores(\n scorerNames: string[],\n error: unknown,\n): Record<string, ScorerResult> {\n const reason = `Task failed: ${errorMessage(error)}`;\n const scores: Record<string, ScorerResult> = {};\n for (const scorerName of scorerNames) {\n scores[scorerName] = { score: 0, reason };\n }\n return scores;\n}\n\nfunction createSemaphore(maxConcurrency: number) {\n let active = 0;\n const queue: Array<() => void> = [];\n\n return {\n async acquire(): Promise<void> {\n if (active < maxConcurrency) {\n active++;\n return;\n }\n return new Promise<void>((resolve) => queue.push(resolve));\n },\n release(): void {\n active--;\n const next = queue.shift();\n if (next) {\n active++;\n next();\n }\n },\n };\n}\n\nasync function wrapTask<T>(\n task: TaskFn<T>,\n input: T,\n timeoutMs: number,\n): Promise<WrappedResult> {\n const start = performance.now();\n let timerId: ReturnType<typeof setTimeout> | undefined;\n try {\n const result = await Promise.race([\n task(input),\n new Promise<never>((_, reject) => {\n timerId = setTimeout(\n () => reject(new Error('timeout exceeded')),\n timeoutMs,\n );\n }),\n ]);\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: result.output,\n latencyMs,\n tokensIn: result.usage?.inputTokens ?? 0,\n tokensOut: result.usage?.outputTokens ?? 0,\n };\n } catch (err) {\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: '',\n latencyMs,\n tokensIn: 0,\n tokensOut: 0,\n error: err,\n };\n }\n}\n\nfunction clampScore(score: number, scorerName: string): number {\n if (score < 0 || score > 1) {\n console.warn(\n `Scorer \"${scorerName}\" returned out-of-range score ${score}, clamping to 0..1`,\n );\n return Math.max(0, Math.min(1, score));\n }\n return score;\n}\n\nexport async function runEval<T>(config: EvalConfig<T>): Promise<RunSummary> {\n const {\n name,\n model,\n dataset: ds,\n task,\n scorers,\n store,\n suiteId,\n maxConcurrency = 10,\n batchSize,\n timeout = 30_000,\n trials = 1,\n threshold = 0.5,\n } = config;\n\n const emitter = config.emitter ?? new EvalEmitter();\n const runId =\n config.runId ??\n (() => {\n const resolvedSuiteId = suiteId ?? store.createSuite(name).id;\n return store.createRun({\n suite_id: resolvedSuiteId,\n name,\n model,\n config: config.config,\n });\n })();\n\n const items: Array<{ index: number; input: T }> = [];\n let idx = 0;\n for await (const item of ds) {\n items.push({ index: idx++, input: item });\n }\n\n emitter.emit('run:start', { runId, totalCases: items.length, name, model });\n\n const semaphore = createSemaphore(maxConcurrency);\n const scorerNames = Object.keys(scorers);\n\n const allCaseScores: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }> = [];\n\n const processItem = async ({ index, input }: { index: number; input: T }) => {\n await semaphore.acquire();\n try {\n emitter.emit('case:start', { runId, index, input });\n\n let finalResult: WrappedResult;\n let finalScores: Record<string, ScorerResult>;\n\n if (trials > 1) {\n const trialResults: Array<{\n result: WrappedResult;\n scores: Record<string, ScorerResult>;\n }> = [];\n\n for (let t = 0; t < trials; t++) {\n const result = await wrapTask(task, input, timeout);\n if (result.error) {\n trialResults.push({\n result,\n scores: failureScores(scorerNames, result.error),\n });\n } else {\n const scores: Record<string, ScorerResult> = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: result.output,\n expected: (input as Record<string, unknown>).expected,\n });\n scores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n trialResults.push({ result, scores });\n }\n }\n\n const lastSuccessful = [...trialResults]\n .reverse()\n .find((t) => !t.result.error);\n const baseResult =\n lastSuccessful?.result ??\n trialResults[trialResults.length - 1]!.result;\n finalResult = {\n output: baseResult.output,\n latencyMs: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.latencyMs, 0) /\n trials,\n ),\n tokensIn: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensIn, 0) /\n trials,\n ),\n tokensOut: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensOut, 0) /\n trials,\n ),\n error: lastSuccessful ? undefined : baseResult.error,\n };\n\n finalScores = {};\n for (const sName of scorerNames) {\n const meanScore =\n trialResults.reduce((sum, t) => sum + t.scores[sName]!.score, 0) /\n trials;\n finalScores[sName] = {\n score: meanScore,\n reason:\n trialResults[trialResults.length - 1]!.scores[sName]?.reason,\n metadata:\n trialResults[trialResults.length - 1]!.scores[sName]?.metadata,\n };\n }\n } else {\n finalResult = await wrapTask(task, input, timeout);\n if (finalResult.error) {\n finalScores = failureScores(scorerNames, finalResult.error);\n } else {\n finalScores = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n });\n finalScores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n }\n }\n\n const caseId = crypto.randomUUID();\n\n const caseData: CaseData = {\n id: caseId,\n run_id: runId,\n idx: index,\n input,\n output: finalResult.output || null,\n expected: (input as Record<string, unknown>).expected,\n latency_ms: finalResult.latencyMs,\n tokens_in: finalResult.tokensIn,\n tokens_out: finalResult.tokensOut,\n error: finalResult.error\n ? serializeError(finalResult.error)\n : undefined,\n };\n store.saveCases([caseData]);\n\n const scoreDataList: ScoreData[] = scorerNames.map((sName) => ({\n id: crypto.randomUUID(),\n case_id: caseId,\n scorer_name: sName,\n score: finalScores[sName]!.score,\n reason: finalScores[sName]!.reason,\n }));\n store.saveScores(scoreDataList);\n\n allCaseScores.push({\n index,\n scores: Object.fromEntries(\n scorerNames.map((sName) => [sName, finalScores[sName]!.score]),\n ),\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n\n if (finalResult.error) {\n emitter.emit('case:error', {\n runId,\n index,\n error: errorMessage(finalResult.error),\n });\n }\n\n emitter.emit('case:scored', {\n runId,\n index,\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n scores: finalScores,\n error: finalResult.error,\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n } finally {\n semaphore.release();\n }\n };\n\n const batches = batchSize\n ? Array.from({ length: Math.ceil(items.length / batchSize) }, (_, i) =>\n items.slice(i * batchSize, (i + 1) * batchSize),\n )\n : [items];\n\n try {\n for (const batch of batches) {\n await Promise.all(batch.map(processItem));\n }\n } catch (err) {\n store.finishRun(runId, 'failed');\n throw err;\n }\n\n const summary = computeSummary(allCaseScores, scorerNames, threshold);\n store.finishRun(runId, 'completed', summary);\n emitter.emit('run:end', { runId, summary });\n\n return summary;\n}\n\nfunction computeSummary(\n cases: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }>,\n scorerNames: string[],\n threshold: number,\n): RunSummary {\n const totalCases = cases.length;\n let passCount = 0;\n let failCount = 0;\n let totalLatencyMs = 0;\n let totalTokensIn = 0;\n let totalTokensOut = 0;\n\n const scoreSums: Record<string, number> = {};\n for (const name of scorerNames) {\n scoreSums[name] = 0;\n }\n\n for (const c of cases) {\n totalLatencyMs += c.latencyMs;\n totalTokensIn += c.tokensIn;\n totalTokensOut += c.tokensOut;\n\n let allPass = true;\n for (const name of scorerNames) {\n const score = c.scores[name] ?? 0;\n scoreSums[name]! += score;\n if (score < threshold) allPass = false;\n }\n if (allPass) passCount++;\n else failCount++;\n }\n\n const meanScores: Record<string, number> = {};\n for (const name of scorerNames) {\n meanScores[name] = totalCases > 0 ? scoreSums[name]! / totalCases : 0;\n }\n\n return {\n totalCases,\n passCount,\n failCount,\n meanScores,\n totalLatencyMs,\n totalTokensIn,\n totalTokensOut,\n };\n}\n"],
|
|
5
|
+
"mappings": ";AAAA,SAAS,oBAAoB;AAyCtB,IAAM,cAAN,cAA0B,aAAa;AAAA,EACnC,GACP,OACA,UACM;AACN,WAAO,MAAM,GAAG,OAAO,QAAQ;AAAA,EACjC;AAAA,EAES,KACP,OACA,MACS;AACT,WAAO,MAAM,KAAK,OAAO,IAAI;AAAA,EAC/B;AACF;AA4BA,SAAS,aAAa,KAAsB;AAC1C,MAAI,eAAe,OAAO;AACxB,WAAO,GAAG,IAAI,IAAI,KAAK,IAAI,OAAO;AAAA,EACpC;AACA,MAAI,OAAO,QAAQ,SAAU,QAAO;AACpC,MAAI,OAAO,KAAM,QAAO;AACxB,MAAI;AACF,WAAO,KAAK,UAAU,GAAG;AAAA,EAC3B,QAAQ;AACN,WAAO,OAAO,GAAG;AAAA,EACnB;AACF;AAEA,SAAS,eAAe,KAAsB;AAC5C,MAAI,eAAe,OAAO;AACxB,WAAO,KAAK,UAAU;AAAA,MACpB,MAAM,IAAI;AAAA,MACV,SAAS,IAAI;AAAA,MACb,OAAO,IAAI;AAAA,MACX,OACE,IAAI,iBAAiB,QACjB;AAAA,QACE,MAAM,IAAI,MAAM;AAAA,QAChB,SAAS,IAAI,MAAM;AAAA,MACrB,IACA,IAAI;AAAA,IACZ,CAAC;AAAA,EACH;AACA,MAAI,OAAO,QAAQ,SAAU,QAAO,KAAK,UAAU,EAAE,SAAS,IAAI,CAAC;AACnE,MAAI,OAAO,KAAM,QAAO,KAAK,UAAU,EAAE,SAAS,gBAAgB,CAAC;AACnE,MAAI;AACF,WAAO,KAAK,UAAU,GAAG;AAAA,EAC3B,QAAQ;AACN,WAAO,KAAK,UAAU,EAAE,SAAS,OAAO,GAAG,EAAE,CAAC;AAAA,EAChD;AACF;AAEA,SAAS,cACP,aACA,OAC8B;AAC9B,QAAM,SAAS,gBAAgB,aAAa,KAAK,CAAC;AAClD,QAAM,SAAuC,CAAC;AAC9C,aAAW,cAAc,aAAa;AACpC,WAAO,UAAU,IAAI,EAAE,OAAO,GAAG,OAAO;AAAA,EAC1C;AACA,SAAO;AACT;AAEA,SAAS,gBAAgB,gBAAwB;AAC/C,MAAI,SAAS;AACb,QAAM,QAA2B,CAAC;AAElC,SAAO;AAAA,IACL,MAAM,UAAyB;AAC7B,UAAI,SAAS,gBAAgB;AAC3B;AACA;AAAA,MACF;AACA,aAAO,IAAI,QAAc,CAAC,YAAY,MAAM,KAAK,OAAO,CAAC;AAAA,IAC3D;AAAA,IACA,UAAgB;AACd;AACA,YAAM,OAAO,MAAM,MAAM;AACzB,UAAI,MAAM;AACR;AACA,aAAK;AAAA,MACP;AAAA,IACF;AAAA,EACF;AACF;AAEA,eAAe,SACb,MACA,OACA,WACwB;AACxB,QAAM,QAAQ,YAAY,IAAI;AAC9B,MAAI;AACJ,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,MAChC,KAAK,KAAK;AAAA,MACV,IAAI,QAAe,CAAC,GAAG,WAAW;AAChC,kBAAU;AAAA,UACR,MAAM,OAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UAC1C;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AACD,iBAAa,OAAO;AACpB,UAAM,YAAY,KAAK,MAAM,YAAY,IAAI,IAAI,KAAK;AACtD,WAAO;AAAA,MACL,QAAQ,OAAO;AAAA,MACf;AAAA,MACA,UAAU,OAAO,OAAO,eAAe;AAAA,MACvC,WAAW,OAAO,OAAO,gBAAgB;AAAA,IAC3C;AAAA,EACF,SAAS,KAAK;AACZ,iBAAa,OAAO;AACpB,UAAM,YAAY,KAAK,MAAM,YAAY,IAAI,IAAI,KAAK;AACtD,WAAO;AAAA,MACL,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,MACV,WAAW;AAAA,MACX,OAAO;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,WAAW,OAAe,YAA4B;AAC7D,MAAI,QAAQ,KAAK,QAAQ,GAAG;AAC1B,YAAQ;AAAA,MACN,WAAW,UAAU,iCAAiC,KAAK;AAAA,IAC7D;AACA,WAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AAAA,EACvC;AACA,SAAO;AACT;AAEA,eAAsB,QAAW,QAA4C;AAC3E,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,iBAAiB;AAAA,IACjB;AAAA,IACA,UAAU;AAAA,IACV,SAAS;AAAA,IACT,YAAY;AAAA,EACd,IAAI;AAEJ,QAAM,UAAU,OAAO,WAAW,IAAI,YAAY;AAClD,QAAM,QACJ,OAAO,UACN,MAAM;AACL,UAAM,kBAAkB,WAAW,MAAM,YAAY,IAAI,EAAE;AAC3D,WAAO,MAAM,UAAU;AAAA,MACrB,UAAU;AAAA,MACV;AAAA,MACA;AAAA,MACA,QAAQ,OAAO;AAAA,IACjB,CAAC;AAAA,EACH,GAAG;AAEL,QAAM,QAA4C,CAAC;AACnD,MAAI,MAAM;AACV,mBAAiB,QAAQ,IAAI;AAC3B,UAAM,KAAK,EAAE,OAAO,OAAO,OAAO,KAAK,CAAC;AAAA,EAC1C;AAEA,UAAQ,KAAK,aAAa,EAAE,OAAO,YAAY,MAAM,QAAQ,MAAM,MAAM,CAAC;AAE1E,QAAM,YAAY,gBAAgB,cAAc;AAChD,QAAM,cAAc,OAAO,KAAK,OAAO;AAEvC,QAAM,gBAMD,CAAC;AAEN,QAAM,cAAc,OAAO,EAAE,OAAO,MAAM,MAAmC;AAC3E,UAAM,UAAU,QAAQ;AACxB,QAAI;AACF,cAAQ,KAAK,cAAc,EAAE,OAAO,OAAO,MAAM,CAAC;AAElD,UAAI;AACJ,UAAI;AAEJ,UAAI,SAAS,GAAG;AACd,cAAM,eAGD,CAAC;AAEN,iBAAS,IAAI,GAAG,IAAI,QAAQ,KAAK;AAC/B,gBAAM,SAAS,MAAM,SAAS,MAAM,OAAO,OAAO;AAClD,cAAI,OAAO,OAAO;AAChB,yBAAa,KAAK;AAAA,cAChB;AAAA,cACA,QAAQ,cAAc,aAAa,OAAO,KAAK;AAAA,YACjD,CAAC;AAAA,UACH,OAAO;AACL,kBAAM,SAAuC,CAAC;AAC9C,uBAAW,CAAC,OAAO,MAAM,KAAK,OAAO,QAAQ,OAAO,GAAG;AACrD,oBAAM,KAAK,MAAM,OAAO;AAAA,gBACtB;AAAA,gBACA,QAAQ,OAAO;AAAA,gBACf,UAAW,MAAkC;AAAA,cAC/C,CAAC;AACD,qBAAO,KAAK,IAAI;AAAA,gBACd,OAAO,WAAW,GAAG,OAAO,KAAK;AAAA,gBACjC,QAAQ,GAAG;AAAA,gBACX,UAAU,GAAG;AAAA,cACf;AAAA,YACF;AACA,yBAAa,KAAK,EAAE,QAAQ,OAAO,CAAC;AAAA,UACtC;AAAA,QACF;AAEA,cAAM,iBAAiB,CAAC,GAAG,YAAY,EACpC,QAAQ,EACR,KAAK,CAAC,MAAM,CAAC,EAAE,OAAO,KAAK;AAC9B,cAAM,aACJ,gBAAgB,UAChB,aAAa,aAAa,SAAS,CAAC,EAAG;AACzC,sBAAc;AAAA,UACZ,QAAQ,WAAW;AAAA,UACnB,WAAW,KAAK;AAAA,YACd,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,WAAW,CAAC,IACzD;AAAA,UACJ;AAAA,UACA,UAAU,KAAK;AAAA,YACb,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,UAAU,CAAC,IACxD;AAAA,UACJ;AAAA,UACA,WAAW,KAAK;AAAA,YACd,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,WAAW,CAAC,IACzD;AAAA,UACJ;AAAA,UACA,OAAO,iBAAiB,SAAY,WAAW;AAAA,QACjD;AAEA,sBAAc,CAAC;AACf,mBAAW,SAAS,aAAa;AAC/B,gBAAM,YACJ,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,KAAK,EAAG,OAAO,CAAC,IAC/D;AACF,sBAAY,KAAK,IAAI;AAAA,YACnB,OAAO;AAAA,YACP,QACE,aAAa,aAAa,SAAS,CAAC,EAAG,OAAO,KAAK,GAAG;AAAA,YACxD,UACE,aAAa,aAAa,SAAS,CAAC,EAAG,OAAO,KAAK,GAAG;AAAA,UAC1D;AAAA,QACF;AAAA,MACF,OAAO;AACL,sBAAc,MAAM,SAAS,MAAM,OAAO,OAAO;AACjD,YAAI,YAAY,OAAO;AACrB,wBAAc,cAAc,aAAa,YAAY,KAAK;AAAA,QAC5D,OAAO;AACL,wBAAc,CAAC;AACf,qBAAW,CAAC,OAAO,MAAM,KAAK,OAAO,QAAQ,OAAO,GAAG;AACrD,kBAAM,KAAK,MAAM,OAAO;AAAA,cACtB;AAAA,cACA,QAAQ,YAAY;AAAA,cACpB,UAAW,MAAkC;AAAA,YAC/C,CAAC;AACD,wBAAY,KAAK,IAAI;AAAA,cACnB,OAAO,WAAW,GAAG,OAAO,KAAK;AAAA,cACjC,QAAQ,GAAG;AAAA,cACX,UAAU,GAAG;AAAA,YACf;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAEA,YAAM,SAAS,OAAO,WAAW;AAEjC,YAAM,WAAqB;AAAA,QACzB,IAAI;AAAA,QACJ,QAAQ;AAAA,QACR,KAAK;AAAA,QACL;AAAA,QACA,QAAQ,YAAY,UAAU;AAAA,QAC9B,UAAW,MAAkC;AAAA,QAC7C,YAAY,YAAY;AAAA,QACxB,WAAW,YAAY;AAAA,QACvB,YAAY,YAAY;AAAA,QACxB,OAAO,YAAY,QACf,eAAe,YAAY,KAAK,IAChC;AAAA,MACN;AACA,YAAM,UAAU,CAAC,QAAQ,CAAC;AAE1B,YAAM,gBAA6B,YAAY,IAAI,CAAC,WAAW;AAAA,QAC7D,IAAI,OAAO,WAAW;AAAA,QACtB,SAAS;AAAA,QACT,aAAa;AAAA,QACb,OAAO,YAAY,KAAK,EAAG;AAAA,QAC3B,QAAQ,YAAY,KAAK,EAAG;AAAA,MAC9B,EAAE;AACF,YAAM,WAAW,aAAa;AAE9B,oBAAc,KAAK;AAAA,QACjB;AAAA,QACA,QAAQ,OAAO;AAAA,UACb,YAAY,IAAI,CAAC,UAAU,CAAC,OAAO,YAAY,KAAK,EAAG,KAAK,CAAC;AAAA,QAC/D;AAAA,QACA,WAAW,YAAY;AAAA,QACvB,UAAU,YAAY;AAAA,QACtB,WAAW,YAAY;AAAA,MACzB,CAAC;AAED,UAAI,YAAY,OAAO;AACrB,gBAAQ,KAAK,cAAc;AAAA,UACzB;AAAA,UACA;AAAA,UACA,OAAO,aAAa,YAAY,KAAK;AAAA,QACvC,CAAC;AAAA,MACH;AAEA,cAAQ,KAAK,eAAe;AAAA,QAC1B;AAAA,QACA;AAAA,QACA;AAAA,QACA,QAAQ,YAAY;AAAA,QACpB,UAAW,MAAkC;AAAA,QAC7C,QAAQ;AAAA,QACR,OAAO,YAAY;AAAA,QACnB,WAAW,YAAY;AAAA,QACvB,UAAU,YAAY;AAAA,QACtB,WAAW,YAAY;AAAA,MACzB,CAAC;AAAA,IACH,UAAE;AACA,gBAAU,QAAQ;AAAA,IACpB;AAAA,EACF;AAEA,QAAM,UAAU,YACZ,MAAM;AAAA,IAAK,EAAE,QAAQ,KAAK,KAAK,MAAM,SAAS,SAAS,EAAE;AAAA,IAAG,CAAC,GAAG,MAC9D,MAAM,MAAM,IAAI,YAAY,IAAI,KAAK,SAAS;AAAA,EAChD,IACA,CAAC,KAAK;AAEV,MAAI;AACF,eAAW,SAAS,SAAS;AAC3B,YAAM,QAAQ,IAAI,MAAM,IAAI,WAAW,CAAC;AAAA,IAC1C;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,UAAU,OAAO,QAAQ;AAC/B,UAAM;AAAA,EACR;AAEA,QAAM,UAAU,eAAe,eAAe,aAAa,SAAS;AACpE,QAAM,UAAU,OAAO,aAAa,OAAO;AAC3C,UAAQ,KAAK,WAAW,EAAE,OAAO,QAAQ,CAAC;AAE1C,SAAO;AACT;AAEA,SAAS,eACP,OAOA,aACA,WACY;AACZ,QAAM,aAAa,MAAM;AACzB,MAAI,YAAY;AAChB,MAAI,YAAY;AAChB,MAAI,iBAAiB;AACrB,MAAI,gBAAgB;AACpB,MAAI,iBAAiB;AAErB,QAAM,YAAoC,CAAC;AAC3C,aAAW,QAAQ,aAAa;AAC9B,cAAU,IAAI,IAAI;AAAA,EACpB;AAEA,aAAW,KAAK,OAAO;AACrB,sBAAkB,EAAE;AACpB,qBAAiB,EAAE;AACnB,sBAAkB,EAAE;AAEpB,QAAI,UAAU;AACd,eAAW,QAAQ,aAAa;AAC9B,YAAM,QAAQ,EAAE,OAAO,IAAI,KAAK;AAChC,gBAAU,IAAI,KAAM;AACpB,UAAI,QAAQ,UAAW,WAAU;AAAA,IACnC;AACA,QAAI,QAAS;AAAA,QACR;AAAA,EACP;AAEA,QAAM,aAAqC,CAAC;AAC5C,aAAW,QAAQ,aAAa;AAC9B,eAAW,IAAI,IAAI,aAAa,IAAI,UAAU,IAAI,IAAK,aAAa;AAAA,EACtE;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/evaluate/index.js
CHANGED
|
@@ -398,13 +398,15 @@ async function runEval(config) {
|
|
|
398
398
|
threshold = 0.5
|
|
399
399
|
} = config;
|
|
400
400
|
const emitter = config.emitter ?? new EvalEmitter();
|
|
401
|
-
const
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
401
|
+
const runId = config.runId ?? (() => {
|
|
402
|
+
const resolvedSuiteId = suiteId ?? store.createSuite(name).id;
|
|
403
|
+
return store.createRun({
|
|
404
|
+
suite_id: resolvedSuiteId,
|
|
405
|
+
name,
|
|
406
|
+
model,
|
|
407
|
+
config: config.config
|
|
408
|
+
});
|
|
409
|
+
})();
|
|
408
410
|
const items = [];
|
|
409
411
|
let idx = 0;
|
|
410
412
|
for await (const item of ds) {
|