@deepagents/evals 0.24.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/engine/index.d.ts +1 -0
- package/dist/engine/index.d.ts.map +1 -1
- package/dist/engine/index.js +9 -7
- package/dist/engine/index.js.map +2 -2
- package/dist/evaluate/index.js +9 -7
- package/dist/evaluate/index.js.map +2 -2
- package/dist/index.js +23 -7
- package/dist/index.js.map +2 -2
- package/dist/store/index.d.ts +3 -0
- package/dist/store/index.d.ts.map +1 -1
- package/dist/store/index.js +14 -0
- package/dist/store/index.js.map +2 -2
- package/package.json +1 -1
package/dist/engine/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/engine/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,KAAK,EAEV,QAAQ,EACR,UAAU,EAEX,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;CACvD;AAED,MAAM,MAAM,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;AAE1D,MAAM,WAAW,YAAY;IAC3B,WAAW,EAAE;QACX,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,YAAY,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,OAAO,CAAA;KAAE,CAAC;IAC/D,aAAa,EAAE;QACb,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,OAAO,CAAC;QACf,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QACrC,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,YAAY,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9D,SAAS,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,UAAU,CAAA;KAAE,CAAC;CACnD;AAED,qBAAa,WAAY,SAAQ,YAAY;IAClC,EAAE,CAAC,CAAC,SAAS,MAAM,YAAY,EACtC,KAAK,EAAE,CAAC,EACR,QAAQ,EAAE,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC,CAAC,KAAK,IAAI,GACxC,IAAI;IAIE,IAAI,CAAC,CAAC,SAAS,MAAM,YAAY,EACxC,KAAK,EAAE,CAAC,EACR,IAAI,EAAE,YAAY,CAAC,CAAC,CAAC,GACpB,OAAO;CAGX;AAED,MAAM,WAAW,UAAU,CAAC,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC;IAC1B,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,KAAK,EAAE,QAAQ,CAAC;IAChB,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAkID,wBAAsB,OAAO,CAAC,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/engine/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,KAAK,EAEV,QAAQ,EACR,UAAU,EAEX,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;CACvD;AAED,MAAM,MAAM,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,OAAO,CAAC,UAAU,CAAC,CAAC;AAE1D,MAAM,WAAW,YAAY;IAC3B,WAAW,EAAE;QACX,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,YAAY,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,OAAO,CAAA;KAAE,CAAC;IAC/D,aAAa,EAAE;QACb,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,OAAO,CAAC;QACf,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,OAAO,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QACrC,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,YAAY,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9D,SAAS,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,UAAU,CAAA;KAAE,CAAC;CACnD;AAED,qBAAa,WAAY,SAAQ,YAAY;IAClC,EAAE,CAAC,CAAC,SAAS,MAAM,YAAY,EACtC,KAAK,EAAE,CAAC,EACR,QAAQ,EAAE,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC,CAAC,KAAK,IAAI,GACxC,IAAI;IAIE,IAAI,CAAC,CAAC,SAAS,MAAM,YAAY,EACxC,KAAK,EAAE,CAAC,EACR,IAAI,EAAE,YAAY,CAAC,CAAC,CAAC,GACpB,OAAO;CAGX;AAED,MAAM,WAAW,UAAU,CAAC,CAAC;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC;IAC1B,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,KAAK,EAAE,QAAQ,CAAC;IAChB,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACjC,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAkID,wBAAsB,OAAO,CAAC,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,UAAU,CAAC,CAkO3E"}
|
package/dist/engine/index.js
CHANGED
|
@@ -127,13 +127,15 @@ async function runEval(config) {
|
|
|
127
127
|
threshold = 0.5
|
|
128
128
|
} = config;
|
|
129
129
|
const emitter = config.emitter ?? new EvalEmitter();
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
130
|
+
const runId = config.runId ?? (() => {
|
|
131
|
+
const resolvedSuiteId = suiteId ?? store.createSuite(name).id;
|
|
132
|
+
return store.createRun({
|
|
133
|
+
suite_id: resolvedSuiteId,
|
|
134
|
+
name,
|
|
135
|
+
model,
|
|
136
|
+
config: config.config
|
|
137
|
+
});
|
|
138
|
+
})();
|
|
137
139
|
const items = [];
|
|
138
140
|
let idx = 0;
|
|
139
141
|
for await (const item of ds) {
|
package/dist/engine/index.js.map
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../src/engine/index.ts"],
|
|
4
|
-
"sourcesContent": ["import { EventEmitter } from 'node:events';\n\nimport type { Scorer, ScorerResult } from '../scorers/index.ts';\nimport type {\n CaseData,\n RunStore,\n RunSummary,\n ScoreData,\n} from '../store/index.ts';\n\nexport interface TaskResult {\n output: string;\n usage?: { inputTokens: number; outputTokens: number };\n}\n\nexport type TaskFn<T> = (input: T) => Promise<TaskResult>;\n\nexport interface EngineEvents {\n 'run:start': {\n runId: string;\n totalCases: number;\n name: string;\n model: string;\n };\n 'case:start': { runId: string; index: number; input: unknown };\n 'case:scored': {\n runId: string;\n index: number;\n input: unknown;\n output: string;\n expected: unknown;\n scores: Record<string, ScorerResult>;\n error?: unknown;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n };\n 'case:error': { runId: string; index: number; error: string };\n 'run:end': { runId: string; summary: RunSummary };\n}\n\nexport class EvalEmitter extends EventEmitter {\n override on<K extends keyof EngineEvents>(\n event: K,\n listener: (data: EngineEvents[K]) => void,\n ): this {\n return super.on(event, listener);\n }\n\n override emit<K extends keyof EngineEvents>(\n event: K,\n data: EngineEvents[K],\n ): boolean {\n return super.emit(event, data);\n }\n}\n\nexport interface EvalConfig<T> {\n name: string;\n model: string;\n dataset: AsyncIterable<T>;\n task: TaskFn<T>;\n scorers: Record<string, Scorer>;\n store: RunStore;\n emitter?: EvalEmitter;\n suiteId?: string;\n config?: Record<string, unknown>;\n maxConcurrency?: number;\n batchSize?: number;\n timeout?: number;\n trials?: number;\n threshold?: number;\n}\n\ninterface WrappedResult {\n output: string;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n error?: unknown;\n}\n\nfunction errorMessage(err: unknown): string {\n if (err instanceof Error) {\n return `${err.name}: ${err.message}`;\n }\n if (typeof err === 'string') return err;\n if (err == null) return 'Unknown error';\n try {\n return JSON.stringify(err);\n } catch {\n return String(err);\n }\n}\n\nfunction serializeError(err: unknown): string {\n if (err instanceof Error) {\n return JSON.stringify({\n name: err.name,\n message: err.message,\n stack: err.stack,\n cause:\n err.cause instanceof Error\n ? {\n name: err.cause.name,\n message: err.cause.message,\n }\n : err.cause,\n });\n }\n if (typeof err === 'string') return JSON.stringify({ message: err });\n if (err == null) return JSON.stringify({ message: 'Unknown error' });\n try {\n return JSON.stringify(err);\n } catch {\n return JSON.stringify({ message: String(err) });\n }\n}\n\nfunction failureScores(\n scorerNames: string[],\n error: unknown,\n): Record<string, ScorerResult> {\n const reason = `Task failed: ${errorMessage(error)}`;\n const scores: Record<string, ScorerResult> = {};\n for (const scorerName of scorerNames) {\n scores[scorerName] = { score: 0, reason };\n }\n return scores;\n}\n\nfunction createSemaphore(maxConcurrency: number) {\n let active = 0;\n const queue: Array<() => void> = [];\n\n return {\n async acquire(): Promise<void> {\n if (active < maxConcurrency) {\n active++;\n return;\n }\n return new Promise<void>((resolve) => queue.push(resolve));\n },\n release(): void {\n active--;\n const next = queue.shift();\n if (next) {\n active++;\n next();\n }\n },\n };\n}\n\nasync function wrapTask<T>(\n task: TaskFn<T>,\n input: T,\n timeoutMs: number,\n): Promise<WrappedResult> {\n const start = performance.now();\n let timerId: ReturnType<typeof setTimeout> | undefined;\n try {\n const result = await Promise.race([\n task(input),\n new Promise<never>((_, reject) => {\n timerId = setTimeout(\n () => reject(new Error('timeout exceeded')),\n timeoutMs,\n );\n }),\n ]);\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: result.output,\n latencyMs,\n tokensIn: result.usage?.inputTokens ?? 0,\n tokensOut: result.usage?.outputTokens ?? 0,\n };\n } catch (err) {\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: '',\n latencyMs,\n tokensIn: 0,\n tokensOut: 0,\n error: err,\n };\n }\n}\n\nfunction clampScore(score: number, scorerName: string): number {\n if (score < 0 || score > 1) {\n console.warn(\n `Scorer \"${scorerName}\" returned out-of-range score ${score}, clamping to 0..1`,\n );\n return Math.max(0, Math.min(1, score));\n }\n return score;\n}\n\nexport async function runEval<T>(config: EvalConfig<T>): Promise<RunSummary> {\n const {\n name,\n model,\n dataset: ds,\n task,\n scorers,\n store,\n suiteId,\n maxConcurrency = 10,\n batchSize,\n timeout = 30_000,\n trials = 1,\n threshold = 0.5,\n } = config;\n\n const emitter = config.emitter ?? new EvalEmitter();\n const resolvedSuiteId = suiteId ?? store.createSuite(name).id;\n const runId = store.createRun({\n suite_id: resolvedSuiteId,\n name,\n model,\n config: config.config,\n });\n\n const items: Array<{ index: number; input: T }> = [];\n let idx = 0;\n for await (const item of ds) {\n items.push({ index: idx++, input: item });\n }\n\n emitter.emit('run:start', { runId, totalCases: items.length, name, model });\n\n const semaphore = createSemaphore(maxConcurrency);\n const scorerNames = Object.keys(scorers);\n\n const allCaseScores: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }> = [];\n\n const processItem = async ({ index, input }: { index: number; input: T }) => {\n await semaphore.acquire();\n try {\n emitter.emit('case:start', { runId, index, input });\n\n let finalResult: WrappedResult;\n let finalScores: Record<string, ScorerResult>;\n\n if (trials > 1) {\n const trialResults: Array<{\n result: WrappedResult;\n scores: Record<string, ScorerResult>;\n }> = [];\n\n for (let t = 0; t < trials; t++) {\n const result = await wrapTask(task, input, timeout);\n if (result.error) {\n trialResults.push({\n result,\n scores: failureScores(scorerNames, result.error),\n });\n } else {\n const scores: Record<string, ScorerResult> = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: result.output,\n expected: (input as Record<string, unknown>).expected,\n });\n scores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n trialResults.push({ result, scores });\n }\n }\n\n const lastSuccessful = [...trialResults]\n .reverse()\n .find((t) => !t.result.error);\n const baseResult =\n lastSuccessful?.result ??\n trialResults[trialResults.length - 1]!.result;\n finalResult = {\n output: baseResult.output,\n latencyMs: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.latencyMs, 0) /\n trials,\n ),\n tokensIn: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensIn, 0) /\n trials,\n ),\n tokensOut: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensOut, 0) /\n trials,\n ),\n error: lastSuccessful ? undefined : baseResult.error,\n };\n\n finalScores = {};\n for (const sName of scorerNames) {\n const meanScore =\n trialResults.reduce((sum, t) => sum + t.scores[sName]!.score, 0) /\n trials;\n finalScores[sName] = {\n score: meanScore,\n reason:\n trialResults[trialResults.length - 1]!.scores[sName]?.reason,\n metadata:\n trialResults[trialResults.length - 1]!.scores[sName]?.metadata,\n };\n }\n } else {\n finalResult = await wrapTask(task, input, timeout);\n if (finalResult.error) {\n finalScores = failureScores(scorerNames, finalResult.error);\n } else {\n finalScores = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n });\n finalScores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n }\n }\n\n const caseId = crypto.randomUUID();\n\n const caseData: CaseData = {\n id: caseId,\n run_id: runId,\n idx: index,\n input,\n output: finalResult.output || null,\n expected: (input as Record<string, unknown>).expected,\n latency_ms: finalResult.latencyMs,\n tokens_in: finalResult.tokensIn,\n tokens_out: finalResult.tokensOut,\n error: finalResult.error\n ? serializeError(finalResult.error)\n : undefined,\n };\n store.saveCases([caseData]);\n\n const scoreDataList: ScoreData[] = scorerNames.map((sName) => ({\n id: crypto.randomUUID(),\n case_id: caseId,\n scorer_name: sName,\n score: finalScores[sName]!.score,\n reason: finalScores[sName]!.reason,\n }));\n store.saveScores(scoreDataList);\n\n allCaseScores.push({\n index,\n scores: Object.fromEntries(\n scorerNames.map((sName) => [sName, finalScores[sName]!.score]),\n ),\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n\n if (finalResult.error) {\n emitter.emit('case:error', {\n runId,\n index,\n error: errorMessage(finalResult.error),\n });\n }\n\n emitter.emit('case:scored', {\n runId,\n index,\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n scores: finalScores,\n error: finalResult.error,\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n } finally {\n semaphore.release();\n }\n };\n\n const batches = batchSize\n ? Array.from({ length: Math.ceil(items.length / batchSize) }, (_, i) =>\n items.slice(i * batchSize, (i + 1) * batchSize),\n )\n : [items];\n\n try {\n for (const batch of batches) {\n await Promise.all(batch.map(processItem));\n }\n } catch (err) {\n store.finishRun(runId, 'failed');\n throw err;\n }\n\n const summary = computeSummary(allCaseScores, scorerNames, threshold);\n store.finishRun(runId, 'completed', summary);\n emitter.emit('run:end', { runId, summary });\n\n return summary;\n}\n\nfunction computeSummary(\n cases: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }>,\n scorerNames: string[],\n threshold: number,\n): RunSummary {\n const totalCases = cases.length;\n let passCount = 0;\n let failCount = 0;\n let totalLatencyMs = 0;\n let totalTokensIn = 0;\n let totalTokensOut = 0;\n\n const scoreSums: Record<string, number> = {};\n for (const name of scorerNames) {\n scoreSums[name] = 0;\n }\n\n for (const c of cases) {\n totalLatencyMs += c.latencyMs;\n totalTokensIn += c.tokensIn;\n totalTokensOut += c.tokensOut;\n\n let allPass = true;\n for (const name of scorerNames) {\n const score = c.scores[name] ?? 0;\n scoreSums[name]! += score;\n if (score < threshold) allPass = false;\n }\n if (allPass) passCount++;\n else failCount++;\n }\n\n const meanScores: Record<string, number> = {};\n for (const name of scorerNames) {\n meanScores[name] = totalCases > 0 ? scoreSums[name]! / totalCases : 0;\n }\n\n return {\n totalCases,\n passCount,\n failCount,\n meanScores,\n totalLatencyMs,\n totalTokensIn,\n totalTokensOut,\n };\n}\n"],
|
|
5
|
-
"mappings": ";AAAA,SAAS,oBAAoB;AAyCtB,IAAM,cAAN,cAA0B,aAAa;AAAA,EACnC,GACP,OACA,UACM;AACN,WAAO,MAAM,GAAG,OAAO,QAAQ;AAAA,EACjC;AAAA,EAES,KACP,OACA,MACS;AACT,WAAO,MAAM,KAAK,OAAO,IAAI;AAAA,EAC/B;AACF;
|
|
4
|
+
"sourcesContent": ["import { EventEmitter } from 'node:events';\n\nimport type { Scorer, ScorerResult } from '../scorers/index.ts';\nimport type {\n CaseData,\n RunStore,\n RunSummary,\n ScoreData,\n} from '../store/index.ts';\n\nexport interface TaskResult {\n output: string;\n usage?: { inputTokens: number; outputTokens: number };\n}\n\nexport type TaskFn<T> = (input: T) => Promise<TaskResult>;\n\nexport interface EngineEvents {\n 'run:start': {\n runId: string;\n totalCases: number;\n name: string;\n model: string;\n };\n 'case:start': { runId: string; index: number; input: unknown };\n 'case:scored': {\n runId: string;\n index: number;\n input: unknown;\n output: string;\n expected: unknown;\n scores: Record<string, ScorerResult>;\n error?: unknown;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n };\n 'case:error': { runId: string; index: number; error: string };\n 'run:end': { runId: string; summary: RunSummary };\n}\n\nexport class EvalEmitter extends EventEmitter {\n override on<K extends keyof EngineEvents>(\n event: K,\n listener: (data: EngineEvents[K]) => void,\n ): this {\n return super.on(event, listener);\n }\n\n override emit<K extends keyof EngineEvents>(\n event: K,\n data: EngineEvents[K],\n ): boolean {\n return super.emit(event, data);\n }\n}\n\nexport interface EvalConfig<T> {\n name: string;\n model: string;\n dataset: AsyncIterable<T>;\n task: TaskFn<T>;\n scorers: Record<string, Scorer>;\n store: RunStore;\n emitter?: EvalEmitter;\n runId?: string;\n suiteId?: string;\n config?: Record<string, unknown>;\n maxConcurrency?: number;\n batchSize?: number;\n timeout?: number;\n trials?: number;\n threshold?: number;\n}\n\ninterface WrappedResult {\n output: string;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n error?: unknown;\n}\n\nfunction errorMessage(err: unknown): string {\n if (err instanceof Error) {\n return `${err.name}: ${err.message}`;\n }\n if (typeof err === 'string') return err;\n if (err == null) return 'Unknown error';\n try {\n return JSON.stringify(err);\n } catch {\n return String(err);\n }\n}\n\nfunction serializeError(err: unknown): string {\n if (err instanceof Error) {\n return JSON.stringify({\n name: err.name,\n message: err.message,\n stack: err.stack,\n cause:\n err.cause instanceof Error\n ? {\n name: err.cause.name,\n message: err.cause.message,\n }\n : err.cause,\n });\n }\n if (typeof err === 'string') return JSON.stringify({ message: err });\n if (err == null) return JSON.stringify({ message: 'Unknown error' });\n try {\n return JSON.stringify(err);\n } catch {\n return JSON.stringify({ message: String(err) });\n }\n}\n\nfunction failureScores(\n scorerNames: string[],\n error: unknown,\n): Record<string, ScorerResult> {\n const reason = `Task failed: ${errorMessage(error)}`;\n const scores: Record<string, ScorerResult> = {};\n for (const scorerName of scorerNames) {\n scores[scorerName] = { score: 0, reason };\n }\n return scores;\n}\n\nfunction createSemaphore(maxConcurrency: number) {\n let active = 0;\n const queue: Array<() => void> = [];\n\n return {\n async acquire(): Promise<void> {\n if (active < maxConcurrency) {\n active++;\n return;\n }\n return new Promise<void>((resolve) => queue.push(resolve));\n },\n release(): void {\n active--;\n const next = queue.shift();\n if (next) {\n active++;\n next();\n }\n },\n };\n}\n\nasync function wrapTask<T>(\n task: TaskFn<T>,\n input: T,\n timeoutMs: number,\n): Promise<WrappedResult> {\n const start = performance.now();\n let timerId: ReturnType<typeof setTimeout> | undefined;\n try {\n const result = await Promise.race([\n task(input),\n new Promise<never>((_, reject) => {\n timerId = setTimeout(\n () => reject(new Error('timeout exceeded')),\n timeoutMs,\n );\n }),\n ]);\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: result.output,\n latencyMs,\n tokensIn: result.usage?.inputTokens ?? 0,\n tokensOut: result.usage?.outputTokens ?? 0,\n };\n } catch (err) {\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: '',\n latencyMs,\n tokensIn: 0,\n tokensOut: 0,\n error: err,\n };\n }\n}\n\nfunction clampScore(score: number, scorerName: string): number {\n if (score < 0 || score > 1) {\n console.warn(\n `Scorer \"${scorerName}\" returned out-of-range score ${score}, clamping to 0..1`,\n );\n return Math.max(0, Math.min(1, score));\n }\n return score;\n}\n\nexport async function runEval<T>(config: EvalConfig<T>): Promise<RunSummary> {\n const {\n name,\n model,\n dataset: ds,\n task,\n scorers,\n store,\n suiteId,\n maxConcurrency = 10,\n batchSize,\n timeout = 30_000,\n trials = 1,\n threshold = 0.5,\n } = config;\n\n const emitter = config.emitter ?? new EvalEmitter();\n const runId =\n config.runId ??\n (() => {\n const resolvedSuiteId = suiteId ?? store.createSuite(name).id;\n return store.createRun({\n suite_id: resolvedSuiteId,\n name,\n model,\n config: config.config,\n });\n })();\n\n const items: Array<{ index: number; input: T }> = [];\n let idx = 0;\n for await (const item of ds) {\n items.push({ index: idx++, input: item });\n }\n\n emitter.emit('run:start', { runId, totalCases: items.length, name, model });\n\n const semaphore = createSemaphore(maxConcurrency);\n const scorerNames = Object.keys(scorers);\n\n const allCaseScores: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }> = [];\n\n const processItem = async ({ index, input }: { index: number; input: T }) => {\n await semaphore.acquire();\n try {\n emitter.emit('case:start', { runId, index, input });\n\n let finalResult: WrappedResult;\n let finalScores: Record<string, ScorerResult>;\n\n if (trials > 1) {\n const trialResults: Array<{\n result: WrappedResult;\n scores: Record<string, ScorerResult>;\n }> = [];\n\n for (let t = 0; t < trials; t++) {\n const result = await wrapTask(task, input, timeout);\n if (result.error) {\n trialResults.push({\n result,\n scores: failureScores(scorerNames, result.error),\n });\n } else {\n const scores: Record<string, ScorerResult> = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: result.output,\n expected: (input as Record<string, unknown>).expected,\n });\n scores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n trialResults.push({ result, scores });\n }\n }\n\n const lastSuccessful = [...trialResults]\n .reverse()\n .find((t) => !t.result.error);\n const baseResult =\n lastSuccessful?.result ??\n trialResults[trialResults.length - 1]!.result;\n finalResult = {\n output: baseResult.output,\n latencyMs: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.latencyMs, 0) /\n trials,\n ),\n tokensIn: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensIn, 0) /\n trials,\n ),\n tokensOut: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensOut, 0) /\n trials,\n ),\n error: lastSuccessful ? undefined : baseResult.error,\n };\n\n finalScores = {};\n for (const sName of scorerNames) {\n const meanScore =\n trialResults.reduce((sum, t) => sum + t.scores[sName]!.score, 0) /\n trials;\n finalScores[sName] = {\n score: meanScore,\n reason:\n trialResults[trialResults.length - 1]!.scores[sName]?.reason,\n metadata:\n trialResults[trialResults.length - 1]!.scores[sName]?.metadata,\n };\n }\n } else {\n finalResult = await wrapTask(task, input, timeout);\n if (finalResult.error) {\n finalScores = failureScores(scorerNames, finalResult.error);\n } else {\n finalScores = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n });\n finalScores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n }\n }\n\n const caseId = crypto.randomUUID();\n\n const caseData: CaseData = {\n id: caseId,\n run_id: runId,\n idx: index,\n input,\n output: finalResult.output || null,\n expected: (input as Record<string, unknown>).expected,\n latency_ms: finalResult.latencyMs,\n tokens_in: finalResult.tokensIn,\n tokens_out: finalResult.tokensOut,\n error: finalResult.error\n ? serializeError(finalResult.error)\n : undefined,\n };\n store.saveCases([caseData]);\n\n const scoreDataList: ScoreData[] = scorerNames.map((sName) => ({\n id: crypto.randomUUID(),\n case_id: caseId,\n scorer_name: sName,\n score: finalScores[sName]!.score,\n reason: finalScores[sName]!.reason,\n }));\n store.saveScores(scoreDataList);\n\n allCaseScores.push({\n index,\n scores: Object.fromEntries(\n scorerNames.map((sName) => [sName, finalScores[sName]!.score]),\n ),\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n\n if (finalResult.error) {\n emitter.emit('case:error', {\n runId,\n index,\n error: errorMessage(finalResult.error),\n });\n }\n\n emitter.emit('case:scored', {\n runId,\n index,\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n scores: finalScores,\n error: finalResult.error,\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n } finally {\n semaphore.release();\n }\n };\n\n const batches = batchSize\n ? Array.from({ length: Math.ceil(items.length / batchSize) }, (_, i) =>\n items.slice(i * batchSize, (i + 1) * batchSize),\n )\n : [items];\n\n try {\n for (const batch of batches) {\n await Promise.all(batch.map(processItem));\n }\n } catch (err) {\n store.finishRun(runId, 'failed');\n throw err;\n }\n\n const summary = computeSummary(allCaseScores, scorerNames, threshold);\n store.finishRun(runId, 'completed', summary);\n emitter.emit('run:end', { runId, summary });\n\n return summary;\n}\n\nfunction computeSummary(\n cases: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }>,\n scorerNames: string[],\n threshold: number,\n): RunSummary {\n const totalCases = cases.length;\n let passCount = 0;\n let failCount = 0;\n let totalLatencyMs = 0;\n let totalTokensIn = 0;\n let totalTokensOut = 0;\n\n const scoreSums: Record<string, number> = {};\n for (const name of scorerNames) {\n scoreSums[name] = 0;\n }\n\n for (const c of cases) {\n totalLatencyMs += c.latencyMs;\n totalTokensIn += c.tokensIn;\n totalTokensOut += c.tokensOut;\n\n let allPass = true;\n for (const name of scorerNames) {\n const score = c.scores[name] ?? 0;\n scoreSums[name]! += score;\n if (score < threshold) allPass = false;\n }\n if (allPass) passCount++;\n else failCount++;\n }\n\n const meanScores: Record<string, number> = {};\n for (const name of scorerNames) {\n meanScores[name] = totalCases > 0 ? scoreSums[name]! / totalCases : 0;\n }\n\n return {\n totalCases,\n passCount,\n failCount,\n meanScores,\n totalLatencyMs,\n totalTokensIn,\n totalTokensOut,\n };\n}\n"],
|
|
5
|
+
"mappings": ";AAAA,SAAS,oBAAoB;AAyCtB,IAAM,cAAN,cAA0B,aAAa;AAAA,EACnC,GACP,OACA,UACM;AACN,WAAO,MAAM,GAAG,OAAO,QAAQ;AAAA,EACjC;AAAA,EAES,KACP,OACA,MACS;AACT,WAAO,MAAM,KAAK,OAAO,IAAI;AAAA,EAC/B;AACF;AA4BA,SAAS,aAAa,KAAsB;AAC1C,MAAI,eAAe,OAAO;AACxB,WAAO,GAAG,IAAI,IAAI,KAAK,IAAI,OAAO;AAAA,EACpC;AACA,MAAI,OAAO,QAAQ,SAAU,QAAO;AACpC,MAAI,OAAO,KAAM,QAAO;AACxB,MAAI;AACF,WAAO,KAAK,UAAU,GAAG;AAAA,EAC3B,QAAQ;AACN,WAAO,OAAO,GAAG;AAAA,EACnB;AACF;AAEA,SAAS,eAAe,KAAsB;AAC5C,MAAI,eAAe,OAAO;AACxB,WAAO,KAAK,UAAU;AAAA,MACpB,MAAM,IAAI;AAAA,MACV,SAAS,IAAI;AAAA,MACb,OAAO,IAAI;AAAA,MACX,OACE,IAAI,iBAAiB,QACjB;AAAA,QACE,MAAM,IAAI,MAAM;AAAA,QAChB,SAAS,IAAI,MAAM;AAAA,MACrB,IACA,IAAI;AAAA,IACZ,CAAC;AAAA,EACH;AACA,MAAI,OAAO,QAAQ,SAAU,QAAO,KAAK,UAAU,EAAE,SAAS,IAAI,CAAC;AACnE,MAAI,OAAO,KAAM,QAAO,KAAK,UAAU,EAAE,SAAS,gBAAgB,CAAC;AACnE,MAAI;AACF,WAAO,KAAK,UAAU,GAAG;AAAA,EAC3B,QAAQ;AACN,WAAO,KAAK,UAAU,EAAE,SAAS,OAAO,GAAG,EAAE,CAAC;AAAA,EAChD;AACF;AAEA,SAAS,cACP,aACA,OAC8B;AAC9B,QAAM,SAAS,gBAAgB,aAAa,KAAK,CAAC;AAClD,QAAM,SAAuC,CAAC;AAC9C,aAAW,cAAc,aAAa;AACpC,WAAO,UAAU,IAAI,EAAE,OAAO,GAAG,OAAO;AAAA,EAC1C;AACA,SAAO;AACT;AAEA,SAAS,gBAAgB,gBAAwB;AAC/C,MAAI,SAAS;AACb,QAAM,QAA2B,CAAC;AAElC,SAAO;AAAA,IACL,MAAM,UAAyB;AAC7B,UAAI,SAAS,gBAAgB;AAC3B;AACA;AAAA,MACF;AACA,aAAO,IAAI,QAAc,CAAC,YAAY,MAAM,KAAK,OAAO,CAAC;AAAA,IAC3D;AAAA,IACA,UAAgB;AACd;AACA,YAAM,OAAO,MAAM,MAAM;AACzB,UAAI,MAAM;AACR;AACA,aAAK;AAAA,MACP;AAAA,IACF;AAAA,EACF;AACF;AAEA,eAAe,SACb,MACA,OACA,WACwB;AACxB,QAAM,QAAQ,YAAY,IAAI;AAC9B,MAAI;AACJ,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,MAChC,KAAK,KAAK;AAAA,MACV,IAAI,QAAe,CAAC,GAAG,WAAW;AAChC,kBAAU;AAAA,UACR,MAAM,OAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UAC1C;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AACD,iBAAa,OAAO;AACpB,UAAM,YAAY,KAAK,MAAM,YAAY,IAAI,IAAI,KAAK;AACtD,WAAO;AAAA,MACL,QAAQ,OAAO;AAAA,MACf;AAAA,MACA,UAAU,OAAO,OAAO,eAAe;AAAA,MACvC,WAAW,OAAO,OAAO,gBAAgB;AAAA,IAC3C;AAAA,EACF,SAAS,KAAK;AACZ,iBAAa,OAAO;AACpB,UAAM,YAAY,KAAK,MAAM,YAAY,IAAI,IAAI,KAAK;AACtD,WAAO;AAAA,MACL,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,MACV,WAAW;AAAA,MACX,OAAO;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,WAAW,OAAe,YAA4B;AAC7D,MAAI,QAAQ,KAAK,QAAQ,GAAG;AAC1B,YAAQ;AAAA,MACN,WAAW,UAAU,iCAAiC,KAAK;AAAA,IAC7D;AACA,WAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AAAA,EACvC;AACA,SAAO;AACT;AAEA,eAAsB,QAAW,QAA4C;AAC3E,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,iBAAiB;AAAA,IACjB;AAAA,IACA,UAAU;AAAA,IACV,SAAS;AAAA,IACT,YAAY;AAAA,EACd,IAAI;AAEJ,QAAM,UAAU,OAAO,WAAW,IAAI,YAAY;AAClD,QAAM,QACJ,OAAO,UACN,MAAM;AACL,UAAM,kBAAkB,WAAW,MAAM,YAAY,IAAI,EAAE;AAC3D,WAAO,MAAM,UAAU;AAAA,MACrB,UAAU;AAAA,MACV;AAAA,MACA;AAAA,MACA,QAAQ,OAAO;AAAA,IACjB,CAAC;AAAA,EACH,GAAG;AAEL,QAAM,QAA4C,CAAC;AACnD,MAAI,MAAM;AACV,mBAAiB,QAAQ,IAAI;AAC3B,UAAM,KAAK,EAAE,OAAO,OAAO,OAAO,KAAK,CAAC;AAAA,EAC1C;AAEA,UAAQ,KAAK,aAAa,EAAE,OAAO,YAAY,MAAM,QAAQ,MAAM,MAAM,CAAC;AAE1E,QAAM,YAAY,gBAAgB,cAAc;AAChD,QAAM,cAAc,OAAO,KAAK,OAAO;AAEvC,QAAM,gBAMD,CAAC;AAEN,QAAM,cAAc,OAAO,EAAE,OAAO,MAAM,MAAmC;AAC3E,UAAM,UAAU,QAAQ;AACxB,QAAI;AACF,cAAQ,KAAK,cAAc,EAAE,OAAO,OAAO,MAAM,CAAC;AAElD,UAAI;AACJ,UAAI;AAEJ,UAAI,SAAS,GAAG;AACd,cAAM,eAGD,CAAC;AAEN,iBAAS,IAAI,GAAG,IAAI,QAAQ,KAAK;AAC/B,gBAAM,SAAS,MAAM,SAAS,MAAM,OAAO,OAAO;AAClD,cAAI,OAAO,OAAO;AAChB,yBAAa,KAAK;AAAA,cAChB;AAAA,cACA,QAAQ,cAAc,aAAa,OAAO,KAAK;AAAA,YACjD,CAAC;AAAA,UACH,OAAO;AACL,kBAAM,SAAuC,CAAC;AAC9C,uBAAW,CAAC,OAAO,MAAM,KAAK,OAAO,QAAQ,OAAO,GAAG;AACrD,oBAAM,KAAK,MAAM,OAAO;AAAA,gBACtB;AAAA,gBACA,QAAQ,OAAO;AAAA,gBACf,UAAW,MAAkC;AAAA,cAC/C,CAAC;AACD,qBAAO,KAAK,IAAI;AAAA,gBACd,OAAO,WAAW,GAAG,OAAO,KAAK;AAAA,gBACjC,QAAQ,GAAG;AAAA,gBACX,UAAU,GAAG;AAAA,cACf;AAAA,YACF;AACA,yBAAa,KAAK,EAAE,QAAQ,OAAO,CAAC;AAAA,UACtC;AAAA,QACF;AAEA,cAAM,iBAAiB,CAAC,GAAG,YAAY,EACpC,QAAQ,EACR,KAAK,CAAC,MAAM,CAAC,EAAE,OAAO,KAAK;AAC9B,cAAM,aACJ,gBAAgB,UAChB,aAAa,aAAa,SAAS,CAAC,EAAG;AACzC,sBAAc;AAAA,UACZ,QAAQ,WAAW;AAAA,UACnB,WAAW,KAAK;AAAA,YACd,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,WAAW,CAAC,IACzD;AAAA,UACJ;AAAA,UACA,UAAU,KAAK;AAAA,YACb,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,UAAU,CAAC,IACxD;AAAA,UACJ;AAAA,UACA,WAAW,KAAK;AAAA,YACd,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,WAAW,CAAC,IACzD;AAAA,UACJ;AAAA,UACA,OAAO,iBAAiB,SAAY,WAAW;AAAA,QACjD;AAEA,sBAAc,CAAC;AACf,mBAAW,SAAS,aAAa;AAC/B,gBAAM,YACJ,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,KAAK,EAAG,OAAO,CAAC,IAC/D;AACF,sBAAY,KAAK,IAAI;AAAA,YACnB,OAAO;AAAA,YACP,QACE,aAAa,aAAa,SAAS,CAAC,EAAG,OAAO,KAAK,GAAG;AAAA,YACxD,UACE,aAAa,aAAa,SAAS,CAAC,EAAG,OAAO,KAAK,GAAG;AAAA,UAC1D;AAAA,QACF;AAAA,MACF,OAAO;AACL,sBAAc,MAAM,SAAS,MAAM,OAAO,OAAO;AACjD,YAAI,YAAY,OAAO;AACrB,wBAAc,cAAc,aAAa,YAAY,KAAK;AAAA,QAC5D,OAAO;AACL,wBAAc,CAAC;AACf,qBAAW,CAAC,OAAO,MAAM,KAAK,OAAO,QAAQ,OAAO,GAAG;AACrD,kBAAM,KAAK,MAAM,OAAO;AAAA,cACtB;AAAA,cACA,QAAQ,YAAY;AAAA,cACpB,UAAW,MAAkC;AAAA,YAC/C,CAAC;AACD,wBAAY,KAAK,IAAI;AAAA,cACnB,OAAO,WAAW,GAAG,OAAO,KAAK;AAAA,cACjC,QAAQ,GAAG;AAAA,cACX,UAAU,GAAG;AAAA,YACf;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAEA,YAAM,SAAS,OAAO,WAAW;AAEjC,YAAM,WAAqB;AAAA,QACzB,IAAI;AAAA,QACJ,QAAQ;AAAA,QACR,KAAK;AAAA,QACL;AAAA,QACA,QAAQ,YAAY,UAAU;AAAA,QAC9B,UAAW,MAAkC;AAAA,QAC7C,YAAY,YAAY;AAAA,QACxB,WAAW,YAAY;AAAA,QACvB,YAAY,YAAY;AAAA,QACxB,OAAO,YAAY,QACf,eAAe,YAAY,KAAK,IAChC;AAAA,MACN;AACA,YAAM,UAAU,CAAC,QAAQ,CAAC;AAE1B,YAAM,gBAA6B,YAAY,IAAI,CAAC,WAAW;AAAA,QAC7D,IAAI,OAAO,WAAW;AAAA,QACtB,SAAS;AAAA,QACT,aAAa;AAAA,QACb,OAAO,YAAY,KAAK,EAAG;AAAA,QAC3B,QAAQ,YAAY,KAAK,EAAG;AAAA,MAC9B,EAAE;AACF,YAAM,WAAW,aAAa;AAE9B,oBAAc,KAAK;AAAA,QACjB;AAAA,QACA,QAAQ,OAAO;AAAA,UACb,YAAY,IAAI,CAAC,UAAU,CAAC,OAAO,YAAY,KAAK,EAAG,KAAK,CAAC;AAAA,QAC/D;AAAA,QACA,WAAW,YAAY;AAAA,QACvB,UAAU,YAAY;AAAA,QACtB,WAAW,YAAY;AAAA,MACzB,CAAC;AAED,UAAI,YAAY,OAAO;AACrB,gBAAQ,KAAK,cAAc;AAAA,UACzB;AAAA,UACA;AAAA,UACA,OAAO,aAAa,YAAY,KAAK;AAAA,QACvC,CAAC;AAAA,MACH;AAEA,cAAQ,KAAK,eAAe;AAAA,QAC1B;AAAA,QACA;AAAA,QACA;AAAA,QACA,QAAQ,YAAY;AAAA,QACpB,UAAW,MAAkC;AAAA,QAC7C,QAAQ;AAAA,QACR,OAAO,YAAY;AAAA,QACnB,WAAW,YAAY;AAAA,QACvB,UAAU,YAAY;AAAA,QACtB,WAAW,YAAY;AAAA,MACzB,CAAC;AAAA,IACH,UAAE;AACA,gBAAU,QAAQ;AAAA,IACpB;AAAA,EACF;AAEA,QAAM,UAAU,YACZ,MAAM;AAAA,IAAK,EAAE,QAAQ,KAAK,KAAK,MAAM,SAAS,SAAS,EAAE;AAAA,IAAG,CAAC,GAAG,MAC9D,MAAM,MAAM,IAAI,YAAY,IAAI,KAAK,SAAS;AAAA,EAChD,IACA,CAAC,KAAK;AAEV,MAAI;AACF,eAAW,SAAS,SAAS;AAC3B,YAAM,QAAQ,IAAI,MAAM,IAAI,WAAW,CAAC;AAAA,IAC1C;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,UAAU,OAAO,QAAQ;AAC/B,UAAM;AAAA,EACR;AAEA,QAAM,UAAU,eAAe,eAAe,aAAa,SAAS;AACpE,QAAM,UAAU,OAAO,aAAa,OAAO;AAC3C,UAAQ,KAAK,WAAW,EAAE,OAAO,QAAQ,CAAC;AAE1C,SAAO;AACT;AAEA,SAAS,eACP,OAOA,aACA,WACY;AACZ,QAAM,aAAa,MAAM;AACzB,MAAI,YAAY;AAChB,MAAI,YAAY;AAChB,MAAI,iBAAiB;AACrB,MAAI,gBAAgB;AACpB,MAAI,iBAAiB;AAErB,QAAM,YAAoC,CAAC;AAC3C,aAAW,QAAQ,aAAa;AAC9B,cAAU,IAAI,IAAI;AAAA,EACpB;AAEA,aAAW,KAAK,OAAO;AACrB,sBAAkB,EAAE;AACpB,qBAAiB,EAAE;AACnB,sBAAkB,EAAE;AAEpB,QAAI,UAAU;AACd,eAAW,QAAQ,aAAa;AAC9B,YAAM,QAAQ,EAAE,OAAO,IAAI,KAAK;AAChC,gBAAU,IAAI,KAAM;AACpB,UAAI,QAAQ,UAAW,WAAU;AAAA,IACnC;AACA,QAAI,QAAS;AAAA,QACR;AAAA,EACP;AAEA,QAAM,aAAqC,CAAC;AAC5C,aAAW,QAAQ,aAAa;AAC9B,eAAW,IAAI,IAAI,aAAa,IAAI,UAAU,IAAI,IAAK,aAAa;AAAA,EACtE;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/evaluate/index.js
CHANGED
|
@@ -398,13 +398,15 @@ async function runEval(config) {
|
|
|
398
398
|
threshold = 0.5
|
|
399
399
|
} = config;
|
|
400
400
|
const emitter = config.emitter ?? new EvalEmitter();
|
|
401
|
-
const
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
401
|
+
const runId = config.runId ?? (() => {
|
|
402
|
+
const resolvedSuiteId = suiteId ?? store.createSuite(name).id;
|
|
403
|
+
return store.createRun({
|
|
404
|
+
suite_id: resolvedSuiteId,
|
|
405
|
+
name,
|
|
406
|
+
model,
|
|
407
|
+
config: config.config
|
|
408
|
+
});
|
|
409
|
+
})();
|
|
408
410
|
const items = [];
|
|
409
411
|
let idx = 0;
|
|
410
412
|
for await (const item of ds) {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"version": 3,
|
|
3
3
|
"sources": ["../../src/dataset/index.ts", "../../src/dataset/record-selection.ts", "../../src/engine/index.ts", "../../src/store/index.ts", "../../src/evaluate/index.ts"],
|
|
4
|
-
"sourcesContent": ["import { createReadStream } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { extname } from 'node:path';\nimport { createInterface } from 'node:readline';\n\nexport { downloadHf, fetchHfRows, hf } from './hf.ts';\nexport type { HfOptions } from './hf.ts';\n\nexport {\n filterRecordsByIndex,\n parseRecordSelection,\n pickFromArray,\n} from './record-selection.ts';\nexport type { ParsedRecordSelection } from './record-selection.ts';\n\nexport type TransformFn<T, U> = (item: T) => U;\nexport type PredicateFn<T> = (item: T) => boolean;\n\nexport class Dataset<T> implements AsyncIterable<T> {\n #source: () => AsyncIterable<T>;\n\n constructor(source: () => AsyncIterable<T>) {\n this.#source = source;\n }\n\n map<U>(fn: TransformFn<T, U>): Dataset<U> {\n const source = this.#source;\n return new Dataset(async function* () {\n for await (const item of source()) {\n yield fn(item);\n }\n });\n }\n\n filter(fn: PredicateFn<T>): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n for await (const item of source()) {\n if (fn(item)) yield item;\n }\n });\n }\n\n limit(n: number): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n let count = 0;\n for await (const item of source()) {\n if (count >= n) return;\n yield item;\n count++;\n }\n });\n }\n\n shuffle(): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n const items: T[] = [];\n for await (const item of source()) {\n items.push(item);\n }\n for (let i = items.length - 1; i > 0; i--) {\n const j = Math.floor(Math.random() * (i + 1));\n const temp = items[i] as T;\n items[i] = items[j] as T;\n items[j] = temp;\n }\n yield* items;\n });\n }\n\n sample(n: number): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n const items: T[] = [];\n for await (const item of source()) {\n items.push(item);\n }\n const count = Math.min(Math.max(0, n), items.length);\n for (let i = items.length - 1; i > items.length - count - 1; i--) {\n const j = Math.floor(Math.random() * (i + 1));\n const temp = items[i] as T;\n items[i] = items[j] as T;\n items[j] = temp;\n }\n for (let i = items.length - count; i < items.length; i++) {\n yield items[i]!;\n }\n });\n }\n\n pick(indexes: Set<number>): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n if (indexes.size === 0) {\n yield* source();\n return;\n }\n let idx = 0;\n for await (const item of source()) {\n if (indexes.has(idx)) {\n yield item;\n }\n idx++;\n }\n });\n }\n\n async toArray(): Promise<T[]> {\n const result: T[] = [];\n for await (const item of this.#source()) {\n result.push(item);\n }\n return result;\n }\n\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.#source()[Symbol.asyncIterator]();\n }\n}\n\nfunction parseCSVLine(line: string): string[] {\n const fields: string[] = [];\n let current = '';\n let inQuotes = false;\n\n for (let i = 0; i < line.length; i++) {\n const char = line[i]!;\n if (inQuotes) {\n if (char === '\"') {\n if (i + 1 < line.length && line[i + 1] === '\"') {\n current += '\"';\n i++;\n } else {\n inQuotes = false;\n }\n } else {\n current += char;\n }\n } else {\n if (char === '\"' && current === '') {\n inQuotes = true;\n } else if (char === ',') {\n fields.push(current);\n current = '';\n } else {\n current += char;\n }\n }\n }\n fields.push(current);\n return fields;\n}\n\nfunction loadJSON<T>(filePath: string): () => AsyncIterable<T> {\n return async function* () {\n const content = await readFile(filePath, 'utf-8');\n const data = JSON.parse(content);\n if (!Array.isArray(data)) {\n throw new Error(`JSON file \"${filePath}\" does not contain an array`);\n }\n yield* data;\n };\n}\n\nfunction loadJSONL<T>(filePath: string): () => AsyncIterable<T> {\n return async function* () {\n const rl = createInterface({\n input: createReadStream(filePath, 'utf-8'),\n crlfDelay: Infinity,\n });\n try {\n for await (const line of rl) {\n const trimmed = line.trim();\n if (trimmed) {\n yield JSON.parse(trimmed);\n }\n }\n } finally {\n rl.close();\n }\n };\n}\n\nfunction loadCSV(\n filePath: string,\n): () => AsyncIterable<Record<string, string>> {\n return async function* () {\n const rl = createInterface({\n input: createReadStream(filePath, 'utf-8'),\n crlfDelay: Infinity,\n });\n try {\n let headers: string[] | undefined;\n for await (const line of rl) {\n const trimmed = line.trim();\n if (!trimmed) continue;\n const fields = parseCSVLine(trimmed);\n if (!headers) {\n headers = fields;\n continue;\n }\n const row: Record<string, string> = {};\n for (let i = 0; i < headers.length; i++) {\n row[headers[i]!] = fields[i] ?? '';\n }\n yield row;\n }\n } finally {\n rl.close();\n }\n };\n}\n\nexport function dataset<T>(\n source: T[] | string | AsyncIterable<T>,\n): Dataset<T> {\n if (Array.isArray(source)) {\n return new Dataset(async function* () {\n yield* source;\n });\n }\n\n if (typeof source === 'object' && Symbol.asyncIterator in source) {\n return new Dataset(() => source);\n }\n\n const ext = extname(source).toLowerCase();\n switch (ext) {\n case '.json':\n return new Dataset(loadJSON<T>(source));\n case '.jsonl':\n return new Dataset(loadJSONL<T>(source));\n case '.csv':\n return new Dataset(loadCSV(source) as () => AsyncIterable<T>);\n default:\n throw new Error(\n `Unsupported file extension \"${ext}\" for dataset file \"${source}\". Supported: .json, .jsonl, .csv`,\n );\n }\n}\n", "export interface ParsedRecordSelection {\n indexes: Set<number>;\n normalized: string;\n}\n\nfunction parsePositiveInt(token: string): number {\n if (!/^\\d+$/.test(token)) {\n throw new Error(`Invalid record token \"${token}\"`);\n }\n const value = Number(token);\n if (!Number.isInteger(value) || value < 1) {\n throw new Error(`Record numbers must be >= 1. Received \"${token}\"`);\n }\n return value;\n}\n\nexport function parseRecordSelection(spec: string): ParsedRecordSelection {\n const trimmed = spec.trim();\n if (!trimmed) {\n return { indexes: new Set(), normalized: '' };\n }\n\n const indexes = new Set<number>();\n const parts = trimmed\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n if (parts.length === 0) {\n throw new Error('Record selection is empty.');\n }\n\n for (const part of parts) {\n const rangeMatch = /^(\\d+)\\s*-\\s*(\\d+)$/.exec(part);\n if (rangeMatch) {\n const start = parsePositiveInt(rangeMatch[1]!);\n const end = parsePositiveInt(rangeMatch[2]!);\n if (end < start) {\n throw new Error(\n `Invalid range \"${part}\". Range end must be >= range start.`,\n );\n }\n for (let i = start; i <= end; i++) {\n indexes.add(i - 1);\n }\n continue;\n }\n\n const value = parsePositiveInt(part);\n indexes.add(value - 1);\n }\n\n return {\n indexes,\n normalized: Array.from(indexes)\n .sort((a, b) => a - b)\n .map((i) => String(i + 1))\n .join(','),\n };\n}\n\nexport function pickFromArray<T>(items: T[], indexes: Set<number>): T[] {\n if (indexes.size === 0) return items;\n return items.filter((_, i) => indexes.has(i));\n}\n\nexport async function* filterRecordsByIndex<T>(\n source: AsyncIterable<T>,\n indexes: Set<number>,\n): AsyncIterable<T> {\n if (indexes.size === 0) {\n for await (const item of source) {\n yield item;\n }\n return;\n }\n\n let idx = 0;\n for await (const item of source) {\n if (indexes.has(idx)) {\n yield item;\n }\n idx++;\n }\n}\n", "import { EventEmitter } from 'node:events';\n\nimport type { Scorer, ScorerResult } from '../scorers/index.ts';\nimport type {\n CaseData,\n RunStore,\n RunSummary,\n ScoreData,\n} from '../store/index.ts';\n\nexport interface TaskResult {\n output: string;\n usage?: { inputTokens: number; outputTokens: number };\n}\n\nexport type TaskFn<T> = (input: T) => Promise<TaskResult>;\n\nexport interface EngineEvents {\n 'run:start': {\n runId: string;\n totalCases: number;\n name: string;\n model: string;\n };\n 'case:start': { runId: string; index: number; input: unknown };\n 'case:scored': {\n runId: string;\n index: number;\n input: unknown;\n output: string;\n expected: unknown;\n scores: Record<string, ScorerResult>;\n error?: unknown;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n };\n 'case:error': { runId: string; index: number; error: string };\n 'run:end': { runId: string; summary: RunSummary };\n}\n\nexport class EvalEmitter extends EventEmitter {\n override on<K extends keyof EngineEvents>(\n event: K,\n listener: (data: EngineEvents[K]) => void,\n ): this {\n return super.on(event, listener);\n }\n\n override emit<K extends keyof EngineEvents>(\n event: K,\n data: EngineEvents[K],\n ): boolean {\n return super.emit(event, data);\n }\n}\n\nexport interface EvalConfig<T> {\n name: string;\n model: string;\n dataset: AsyncIterable<T>;\n task: TaskFn<T>;\n scorers: Record<string, Scorer>;\n store: RunStore;\n emitter?: EvalEmitter;\n suiteId?: string;\n config?: Record<string, unknown>;\n maxConcurrency?: number;\n batchSize?: number;\n timeout?: number;\n trials?: number;\n threshold?: number;\n}\n\ninterface WrappedResult {\n output: string;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n error?: unknown;\n}\n\nfunction errorMessage(err: unknown): string {\n if (err instanceof Error) {\n return `${err.name}: ${err.message}`;\n }\n if (typeof err === 'string') return err;\n if (err == null) return 'Unknown error';\n try {\n return JSON.stringify(err);\n } catch {\n return String(err);\n }\n}\n\nfunction serializeError(err: unknown): string {\n if (err instanceof Error) {\n return JSON.stringify({\n name: err.name,\n message: err.message,\n stack: err.stack,\n cause:\n err.cause instanceof Error\n ? {\n name: err.cause.name,\n message: err.cause.message,\n }\n : err.cause,\n });\n }\n if (typeof err === 'string') return JSON.stringify({ message: err });\n if (err == null) return JSON.stringify({ message: 'Unknown error' });\n try {\n return JSON.stringify(err);\n } catch {\n return JSON.stringify({ message: String(err) });\n }\n}\n\nfunction failureScores(\n scorerNames: string[],\n error: unknown,\n): Record<string, ScorerResult> {\n const reason = `Task failed: ${errorMessage(error)}`;\n const scores: Record<string, ScorerResult> = {};\n for (const scorerName of scorerNames) {\n scores[scorerName] = { score: 0, reason };\n }\n return scores;\n}\n\nfunction createSemaphore(maxConcurrency: number) {\n let active = 0;\n const queue: Array<() => void> = [];\n\n return {\n async acquire(): Promise<void> {\n if (active < maxConcurrency) {\n active++;\n return;\n }\n return new Promise<void>((resolve) => queue.push(resolve));\n },\n release(): void {\n active--;\n const next = queue.shift();\n if (next) {\n active++;\n next();\n }\n },\n };\n}\n\nasync function wrapTask<T>(\n task: TaskFn<T>,\n input: T,\n timeoutMs: number,\n): Promise<WrappedResult> {\n const start = performance.now();\n let timerId: ReturnType<typeof setTimeout> | undefined;\n try {\n const result = await Promise.race([\n task(input),\n new Promise<never>((_, reject) => {\n timerId = setTimeout(\n () => reject(new Error('timeout exceeded')),\n timeoutMs,\n );\n }),\n ]);\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: result.output,\n latencyMs,\n tokensIn: result.usage?.inputTokens ?? 0,\n tokensOut: result.usage?.outputTokens ?? 0,\n };\n } catch (err) {\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: '',\n latencyMs,\n tokensIn: 0,\n tokensOut: 0,\n error: err,\n };\n }\n}\n\nfunction clampScore(score: number, scorerName: string): number {\n if (score < 0 || score > 1) {\n console.warn(\n `Scorer \"${scorerName}\" returned out-of-range score ${score}, clamping to 0..1`,\n );\n return Math.max(0, Math.min(1, score));\n }\n return score;\n}\n\nexport async function runEval<T>(config: EvalConfig<T>): Promise<RunSummary> {\n const {\n name,\n model,\n dataset: ds,\n task,\n scorers,\n store,\n suiteId,\n maxConcurrency = 10,\n batchSize,\n timeout = 30_000,\n trials = 1,\n threshold = 0.5,\n } = config;\n\n const emitter = config.emitter ?? new EvalEmitter();\n const resolvedSuiteId = suiteId ?? store.createSuite(name).id;\n const runId = store.createRun({\n suite_id: resolvedSuiteId,\n name,\n model,\n config: config.config,\n });\n\n const items: Array<{ index: number; input: T }> = [];\n let idx = 0;\n for await (const item of ds) {\n items.push({ index: idx++, input: item });\n }\n\n emitter.emit('run:start', { runId, totalCases: items.length, name, model });\n\n const semaphore = createSemaphore(maxConcurrency);\n const scorerNames = Object.keys(scorers);\n\n const allCaseScores: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }> = [];\n\n const processItem = async ({ index, input }: { index: number; input: T }) => {\n await semaphore.acquire();\n try {\n emitter.emit('case:start', { runId, index, input });\n\n let finalResult: WrappedResult;\n let finalScores: Record<string, ScorerResult>;\n\n if (trials > 1) {\n const trialResults: Array<{\n result: WrappedResult;\n scores: Record<string, ScorerResult>;\n }> = [];\n\n for (let t = 0; t < trials; t++) {\n const result = await wrapTask(task, input, timeout);\n if (result.error) {\n trialResults.push({\n result,\n scores: failureScores(scorerNames, result.error),\n });\n } else {\n const scores: Record<string, ScorerResult> = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: result.output,\n expected: (input as Record<string, unknown>).expected,\n });\n scores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n trialResults.push({ result, scores });\n }\n }\n\n const lastSuccessful = [...trialResults]\n .reverse()\n .find((t) => !t.result.error);\n const baseResult =\n lastSuccessful?.result ??\n trialResults[trialResults.length - 1]!.result;\n finalResult = {\n output: baseResult.output,\n latencyMs: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.latencyMs, 0) /\n trials,\n ),\n tokensIn: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensIn, 0) /\n trials,\n ),\n tokensOut: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensOut, 0) /\n trials,\n ),\n error: lastSuccessful ? undefined : baseResult.error,\n };\n\n finalScores = {};\n for (const sName of scorerNames) {\n const meanScore =\n trialResults.reduce((sum, t) => sum + t.scores[sName]!.score, 0) /\n trials;\n finalScores[sName] = {\n score: meanScore,\n reason:\n trialResults[trialResults.length - 1]!.scores[sName]?.reason,\n metadata:\n trialResults[trialResults.length - 1]!.scores[sName]?.metadata,\n };\n }\n } else {\n finalResult = await wrapTask(task, input, timeout);\n if (finalResult.error) {\n finalScores = failureScores(scorerNames, finalResult.error);\n } else {\n finalScores = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n });\n finalScores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n }\n }\n\n const caseId = crypto.randomUUID();\n\n const caseData: CaseData = {\n id: caseId,\n run_id: runId,\n idx: index,\n input,\n output: finalResult.output || null,\n expected: (input as Record<string, unknown>).expected,\n latency_ms: finalResult.latencyMs,\n tokens_in: finalResult.tokensIn,\n tokens_out: finalResult.tokensOut,\n error: finalResult.error\n ? serializeError(finalResult.error)\n : undefined,\n };\n store.saveCases([caseData]);\n\n const scoreDataList: ScoreData[] = scorerNames.map((sName) => ({\n id: crypto.randomUUID(),\n case_id: caseId,\n scorer_name: sName,\n score: finalScores[sName]!.score,\n reason: finalScores[sName]!.reason,\n }));\n store.saveScores(scoreDataList);\n\n allCaseScores.push({\n index,\n scores: Object.fromEntries(\n scorerNames.map((sName) => [sName, finalScores[sName]!.score]),\n ),\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n\n if (finalResult.error) {\n emitter.emit('case:error', {\n runId,\n index,\n error: errorMessage(finalResult.error),\n });\n }\n\n emitter.emit('case:scored', {\n runId,\n index,\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n scores: finalScores,\n error: finalResult.error,\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n } finally {\n semaphore.release();\n }\n };\n\n const batches = batchSize\n ? Array.from({ length: Math.ceil(items.length / batchSize) }, (_, i) =>\n items.slice(i * batchSize, (i + 1) * batchSize),\n )\n : [items];\n\n try {\n for (const batch of batches) {\n await Promise.all(batch.map(processItem));\n }\n } catch (err) {\n store.finishRun(runId, 'failed');\n throw err;\n }\n\n const summary = computeSummary(allCaseScores, scorerNames, threshold);\n store.finishRun(runId, 'completed', summary);\n emitter.emit('run:end', { runId, summary });\n\n return summary;\n}\n\nfunction computeSummary(\n cases: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }>,\n scorerNames: string[],\n threshold: number,\n): RunSummary {\n const totalCases = cases.length;\n let passCount = 0;\n let failCount = 0;\n let totalLatencyMs = 0;\n let totalTokensIn = 0;\n let totalTokensOut = 0;\n\n const scoreSums: Record<string, number> = {};\n for (const name of scorerNames) {\n scoreSums[name] = 0;\n }\n\n for (const c of cases) {\n totalLatencyMs += c.latencyMs;\n totalTokensIn += c.tokensIn;\n totalTokensOut += c.tokensOut;\n\n let allPass = true;\n for (const name of scorerNames) {\n const score = c.scores[name] ?? 0;\n scoreSums[name]! += score;\n if (score < threshold) allPass = false;\n }\n if (allPass) passCount++;\n else failCount++;\n }\n\n const meanScores: Record<string, number> = {};\n for (const name of scorerNames) {\n meanScores[name] = totalCases > 0 ? scoreSums[name]! / totalCases : 0;\n }\n\n return {\n totalCases,\n passCount,\n failCount,\n meanScores,\n totalLatencyMs,\n totalTokensIn,\n totalTokensOut,\n };\n}\n", "import { mkdirSync } from 'node:fs';\nimport { dirname } from 'node:path';\nimport { DatabaseSync } from 'node:sqlite';\n\nimport DDL from './ddl.sqlite.sql';\n\nexport interface SuiteRow {\n id: string;\n name: string;\n created_at: number;\n}\n\nexport interface RunRow {\n id: string;\n suite_id: string;\n name: string;\n model: string;\n config: Record<string, unknown> | null;\n started_at: number;\n finished_at: number | null;\n status: 'running' | 'completed' | 'failed';\n summary: RunSummary | null;\n}\n\nexport interface CaseRow {\n id: string;\n run_id: string;\n idx: number;\n input: unknown;\n output: string | null;\n expected: unknown | null;\n latency_ms: number;\n tokens_in: number;\n tokens_out: number;\n error: string | null;\n}\n\nexport interface CaseWithScores extends CaseRow {\n scores: Array<{ scorer_name: string; score: number; reason: string | null }>;\n}\n\nexport interface ScoreRow {\n id: string;\n case_id: string;\n scorer_name: string;\n score: number;\n reason: string | null;\n}\n\nexport interface RunSummary {\n totalCases: number;\n passCount: number;\n failCount: number;\n meanScores: Record<string, number>;\n totalLatencyMs: number;\n totalTokensIn: number;\n totalTokensOut: number;\n}\n\nexport interface PromptRow {\n id: string;\n name: string;\n version: number;\n content: string;\n created_at: number;\n}\n\nexport interface CaseData {\n id: string;\n run_id: string;\n idx: number;\n input: unknown;\n output: string | null;\n expected?: unknown;\n latency_ms: number;\n tokens_in: number;\n tokens_out: number;\n error?: string;\n}\n\nexport interface ScoreData {\n id: string;\n case_id: string;\n scorer_name: string;\n score: number;\n reason?: string;\n}\n\nexport class RunStore {\n #db: DatabaseSync;\n #statements = new Map<string, ReturnType<DatabaseSync['prepare']>>();\n\n #stmt(sql: string): ReturnType<DatabaseSync['prepare']> {\n let stmt = this.#statements.get(sql);\n if (!stmt) {\n stmt = this.#db.prepare(sql);\n this.#statements.set(sql, stmt);\n }\n return stmt;\n }\n\n #transaction<T>(fn: () => T): T {\n this.#db.exec('BEGIN TRANSACTION');\n try {\n const result = fn();\n this.#db.exec('COMMIT');\n return result;\n } catch (error) {\n this.#db.exec('ROLLBACK');\n throw error;\n }\n }\n\n constructor(pathOrDb?: string | DatabaseSync) {\n if (pathOrDb instanceof DatabaseSync) {\n this.#db = pathOrDb;\n } else {\n const dbPath = pathOrDb ?? '.evals/store.db';\n mkdirSync(dirname(dbPath), { recursive: true });\n this.#db = new DatabaseSync(dbPath);\n }\n this.#db.exec(DDL);\n this.#migrateRunsTableToSuiteRequired();\n this.#migratePromptsTableIfNeeded();\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_prompts_name_version ON prompts(name, version DESC)',\n );\n }\n\n #migratePromptsTableIfNeeded(): void {\n const columns = this.#stmt('PRAGMA table_info(prompts)').all() as Array<{\n name: string;\n }>;\n\n if (columns.length === 0) return;\n if (columns.some((column) => column.name === 'version')) return;\n\n this.#transaction(() => {\n this.#db.exec('ALTER TABLE prompts RENAME TO prompts_legacy');\n this.#db.exec(`\n CREATE TABLE prompts (\n id TEXT PRIMARY KEY,\n name TEXT NOT NULL,\n version INTEGER NOT NULL,\n content TEXT NOT NULL,\n created_at INTEGER NOT NULL DEFAULT (unixepoch() * 1000),\n UNIQUE(name, version)\n )\n `);\n this.#db.exec(`\n INSERT INTO prompts (id, name, version, content, created_at)\n SELECT id, name, 1, content, created_at\n FROM prompts_legacy\n `);\n this.#db.exec('DROP TABLE prompts_legacy');\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_prompts_created_at ON prompts(created_at)',\n );\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_prompts_name_version ON prompts(name, version DESC)',\n );\n });\n }\n\n #migrateRunsTableToSuiteRequired(): void {\n const runColumns = this.#stmt('PRAGMA table_info(runs)').all() as Array<{\n name: string;\n notnull: number;\n }>;\n\n if (runColumns.length === 0) return;\n\n const suiteColumn = runColumns.find((column) => column.name === 'suite_id');\n const hasNonNullSuite = suiteColumn?.notnull === 1;\n\n const runForeignKeys = this.#stmt(\n 'PRAGMA foreign_key_list(runs)',\n ).all() as Array<{\n from: string;\n on_delete: string;\n table: string;\n }>;\n const suiteForeignKey = runForeignKeys.find(\n (fk) => fk.from === 'suite_id' && fk.table === 'suites',\n );\n const hasCascadeDelete = suiteForeignKey?.on_delete === 'CASCADE';\n\n if (hasNonNullSuite && hasCascadeDelete) return;\n\n this.#statements.clear();\n this.#transaction(() => {\n this.#db.exec(`\n CREATE TABLE runs_next (\n id TEXT PRIMARY KEY,\n suite_id TEXT NOT NULL,\n name TEXT NOT NULL,\n model TEXT NOT NULL,\n config TEXT,\n started_at INTEGER NOT NULL,\n finished_at INTEGER,\n status TEXT NOT NULL DEFAULT 'running' CHECK(status IN ('running', 'completed', 'failed')),\n summary TEXT,\n FOREIGN KEY (suite_id) REFERENCES suites(id) ON DELETE CASCADE\n )\n `);\n\n // Drop legacy orphaned runs that do not belong to a suite.\n this.#db.exec('DELETE FROM runs WHERE suite_id IS NULL');\n\n this.#db.exec(`\n INSERT INTO runs_next (id, suite_id, name, model, config, started_at, finished_at, status, summary)\n SELECT r.id, r.suite_id, r.name, r.model, r.config, r.started_at, r.finished_at, r.status, r.summary\n FROM runs r\n JOIN suites s ON s.id = r.suite_id\n `);\n\n this.#db.exec('DROP TABLE runs');\n this.#db.exec('ALTER TABLE runs_next RENAME TO runs');\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_runs_suite_id ON runs(suite_id)',\n );\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_runs_started_at ON runs(started_at)',\n );\n });\n this.#statements.clear();\n }\n\n createSuite(name: string): SuiteRow {\n const id = crypto.randomUUID();\n const now = Date.now();\n this.#stmt(\n 'INSERT INTO suites (id, name, created_at) VALUES (?, ?, ?)',\n ).run(id, name, now);\n return { id, name, created_at: now };\n }\n\n getSuite(id: string): SuiteRow | undefined {\n const row = this.#stmt('SELECT * FROM suites WHERE id = ?').get(id) as\n | { id: string; name: string; created_at: number }\n | undefined;\n return row ?? undefined;\n }\n\n renameSuite(id: string, name: string): void {\n this.#stmt('UPDATE suites SET name = ? WHERE id = ?').run(name, id);\n }\n\n renameRun(id: string, name: string): void {\n this.#stmt('UPDATE runs SET name = ? WHERE id = ?').run(name, id);\n }\n\n createRun(run: {\n suite_id: string;\n name: string;\n model: string;\n config?: Record<string, unknown>;\n }): string {\n const id = crypto.randomUUID();\n const now = Date.now();\n this.#stmt(\n 'INSERT INTO runs (id, suite_id, name, model, config, started_at) VALUES (?, ?, ?, ?, ?, ?)',\n ).run(\n id,\n run.suite_id,\n run.name,\n run.model,\n run.config ? JSON.stringify(run.config) : null,\n now,\n );\n return id;\n }\n\n finishRun(\n runId: string,\n status: 'completed' | 'failed',\n summary?: RunSummary,\n ): void {\n this.#stmt(\n 'UPDATE runs SET finished_at = ?, status = ?, summary = ? WHERE id = ?',\n ).run(Date.now(), status, summary ? JSON.stringify(summary) : null, runId);\n }\n\n saveCases(cases: CaseData[]): void {\n this.#transaction(() => {\n const stmt = this.#stmt(\n 'INSERT INTO cases (id, run_id, idx, input, output, expected, latency_ms, tokens_in, tokens_out, error) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',\n );\n for (const c of cases) {\n stmt.run(\n c.id,\n c.run_id,\n c.idx,\n JSON.stringify(c.input),\n c.output,\n c.expected != null ? JSON.stringify(c.expected) : null,\n c.latency_ms,\n c.tokens_in,\n c.tokens_out,\n c.error ?? null,\n );\n }\n });\n }\n\n saveScores(scores: ScoreData[]): void {\n this.#transaction(() => {\n const stmt = this.#stmt(\n 'INSERT INTO scores (id, case_id, scorer_name, score, reason) VALUES (?, ?, ?, ?, ?)',\n );\n for (const s of scores) {\n stmt.run(s.id, s.case_id, s.scorer_name, s.score, s.reason ?? null);\n }\n });\n }\n\n getRun(runId: string): RunRow | undefined {\n const row = this.#stmt('SELECT * FROM runs WHERE id = ?').get(runId) as\n | {\n id: string;\n suite_id: string;\n name: string;\n model: string;\n config: string | null;\n started_at: number;\n finished_at: number | null;\n status: string;\n summary: string | null;\n }\n | undefined;\n if (!row) return undefined;\n return {\n id: row.id,\n suite_id: row.suite_id,\n name: row.name,\n model: row.model,\n config: row.config ? JSON.parse(row.config) : null,\n started_at: row.started_at,\n finished_at: row.finished_at,\n status: row.status as RunRow['status'],\n summary: row.summary ? JSON.parse(row.summary) : null,\n };\n }\n\n listRuns(suiteId?: string): RunRow[] {\n const sql = suiteId\n ? 'SELECT * FROM runs WHERE suite_id = ? ORDER BY started_at'\n : 'SELECT * FROM runs ORDER BY started_at';\n const rows = (\n suiteId ? this.#stmt(sql).all(suiteId) : this.#stmt(sql).all()\n ) as Array<{\n id: string;\n suite_id: string;\n name: string;\n model: string;\n config: string | null;\n started_at: number;\n finished_at: number | null;\n status: string;\n summary: string | null;\n }>;\n return rows.map((row) => ({\n id: row.id,\n suite_id: row.suite_id,\n name: row.name,\n model: row.model,\n config: row.config ? JSON.parse(row.config) : null,\n started_at: row.started_at,\n finished_at: row.finished_at,\n status: row.status as RunRow['status'],\n summary: row.summary ? JSON.parse(row.summary) : null,\n }));\n }\n\n getCases(runId: string): CaseRow[] {\n const rows = this.#stmt(\n 'SELECT * FROM cases WHERE run_id = ? ORDER BY idx',\n ).all(runId) as Array<{\n id: string;\n run_id: string;\n idx: number;\n input: string;\n output: string | null;\n expected: string | null;\n latency_ms: number;\n tokens_in: number;\n tokens_out: number;\n error: string | null;\n }>;\n return rows.map((row) => ({\n id: row.id,\n run_id: row.run_id,\n idx: row.idx,\n input: JSON.parse(row.input),\n output: row.output,\n expected: row.expected ? JSON.parse(row.expected) : null,\n latency_ms: row.latency_ms,\n tokens_in: row.tokens_in,\n tokens_out: row.tokens_out,\n error: row.error,\n }));\n }\n\n getFailingCases(runId: string, threshold = 0.5): CaseWithScores[] {\n const rows = this.#stmt(\n `SELECT c.*, s.scorer_name, s.score, s.reason as score_reason\n FROM cases c\n JOIN scores s ON s.case_id = c.id\n WHERE c.run_id = ? AND s.score < ?\n ORDER BY c.idx`,\n ).all(runId, threshold) as Array<{\n id: string;\n run_id: string;\n idx: number;\n input: string;\n output: string | null;\n expected: string | null;\n latency_ms: number;\n tokens_in: number;\n tokens_out: number;\n error: string | null;\n scorer_name: string;\n score: number;\n score_reason: string | null;\n }>;\n\n const caseMap = new Map<string, CaseWithScores>();\n for (const row of rows) {\n let c = caseMap.get(row.id);\n if (!c) {\n c = {\n id: row.id,\n run_id: row.run_id,\n idx: row.idx,\n input: JSON.parse(row.input),\n output: row.output,\n expected: row.expected ? JSON.parse(row.expected) : null,\n latency_ms: row.latency_ms,\n tokens_in: row.tokens_in,\n tokens_out: row.tokens_out,\n error: row.error,\n scores: [],\n };\n caseMap.set(row.id, c);\n }\n c.scores.push({\n scorer_name: row.scorer_name,\n score: row.score,\n reason: row.score_reason,\n });\n }\n return Array.from(caseMap.values());\n }\n\n getRunSummary(runId: string, threshold = 0.5): RunSummary {\n const totals = this.#stmt(\n `SELECT\n COUNT(DISTINCT c.id) as totalCases,\n COALESCE(SUM(c.latency_ms), 0) as totalLatencyMs,\n COALESCE(SUM(c.tokens_in), 0) as totalTokensIn,\n COALESCE(SUM(c.tokens_out), 0) as totalTokensOut\n FROM cases c WHERE c.run_id = ?`,\n ).get(runId) as {\n totalCases: number;\n totalLatencyMs: number;\n totalTokensIn: number;\n totalTokensOut: number;\n };\n\n const scorerMeans = this.#stmt(\n `SELECT s.scorer_name, AVG(s.score) as meanScore\n FROM scores s\n JOIN cases c ON c.id = s.case_id\n WHERE c.run_id = ?\n GROUP BY s.scorer_name`,\n ).all(runId) as Array<{ scorer_name: string; meanScore: number }>;\n\n const meanScores: Record<string, number> = {};\n for (const row of scorerMeans) {\n meanScores[row.scorer_name] = row.meanScore;\n }\n\n const passFail = this.#stmt(\n `SELECT c.id,\n MIN(s.score) as minScore\n FROM cases c\n JOIN scores s ON s.case_id = c.id\n WHERE c.run_id = ?\n GROUP BY c.id`,\n ).all(runId) as Array<{ id: string; minScore: number }>;\n\n let passCount = 0;\n let failCount = 0;\n for (const row of passFail) {\n if (row.minScore >= threshold) passCount++;\n else failCount++;\n }\n\n return {\n totalCases: totals.totalCases,\n passCount,\n failCount,\n meanScores,\n totalLatencyMs: totals.totalLatencyMs,\n totalTokensIn: totals.totalTokensIn,\n totalTokensOut: totals.totalTokensOut,\n };\n }\n\n findSuiteByName(name: string): SuiteRow | undefined {\n const row = this.#stmt(\n 'SELECT * FROM suites WHERE name = ? ORDER BY created_at DESC LIMIT 1',\n ).get(name) as { id: string; name: string; created_at: number } | undefined;\n return row ?? undefined;\n }\n\n getLatestCompletedRun(suiteId: string, model?: string): RunRow | undefined {\n const sql = model\n ? 'SELECT * FROM runs WHERE suite_id = ? AND status = ? AND model = ? ORDER BY started_at DESC LIMIT 1'\n : 'SELECT * FROM runs WHERE suite_id = ? AND status = ? ORDER BY started_at DESC LIMIT 1';\n const row = (\n model\n ? this.#stmt(sql).get(suiteId, 'completed', model)\n : this.#stmt(sql).get(suiteId, 'completed')\n ) as\n | {\n id: string;\n suite_id: string;\n name: string;\n model: string;\n config: string | null;\n started_at: number;\n finished_at: number | null;\n status: string;\n summary: string | null;\n }\n | undefined;\n if (!row) return undefined;\n return {\n id: row.id,\n suite_id: row.suite_id,\n name: row.name,\n model: row.model,\n config: row.config ? JSON.parse(row.config) : null,\n started_at: row.started_at,\n finished_at: row.finished_at,\n status: row.status as RunRow['status'],\n summary: row.summary ? JSON.parse(row.summary) : null,\n };\n }\n\n listSuites(): SuiteRow[] {\n const rows = this.#stmt(\n 'SELECT * FROM suites ORDER BY created_at DESC',\n ).all() as Array<{ id: string; name: string; created_at: number }>;\n return rows.map((row) => ({\n id: row.id,\n name: row.name,\n created_at: row.created_at,\n }));\n }\n\n createPrompt(name: string, content: string): PromptRow {\n const id = crypto.randomUUID();\n const now = Date.now();\n\n const latest = this.#stmt(\n 'SELECT MAX(version) as latestVersion FROM prompts WHERE name = ?',\n ).get(name) as { latestVersion: number | null } | undefined;\n const version = (latest?.latestVersion ?? 0) + 1;\n\n this.#stmt(\n 'INSERT INTO prompts (id, name, version, content, created_at) VALUES (?, ?, ?, ?, ?)',\n ).run(id, name, version, content, now);\n return { id, name, version, content, created_at: now };\n }\n\n listPrompts(): PromptRow[] {\n const rows = this.#stmt(\n 'SELECT * FROM prompts ORDER BY name COLLATE NOCASE ASC, version DESC',\n ).all() as Array<{\n id: string;\n name: string;\n version: number;\n content: string;\n created_at: number;\n }>;\n return rows.map((row) => ({\n id: row.id,\n name: row.name,\n version: row.version,\n content: row.content,\n created_at: row.created_at,\n }));\n }\n\n getPrompt(id: string): PromptRow | undefined {\n const row = this.#stmt('SELECT * FROM prompts WHERE id = ?').get(id) as\n | {\n id: string;\n name: string;\n version: number;\n content: string;\n created_at: number;\n }\n | undefined;\n if (!row) return undefined;\n return {\n id: row.id,\n name: row.name,\n version: row.version,\n content: row.content,\n created_at: row.created_at,\n };\n }\n\n deletePrompt(id: string): void {\n this.#stmt('DELETE FROM prompts WHERE id = ?').run(id);\n }\n}\n", "import { dataset } from '../dataset/index.ts';\nimport {\n filterRecordsByIndex,\n parseRecordSelection,\n} from '../dataset/record-selection.ts';\nimport type { TaskFn, TaskResult } from '../engine/index.ts';\nimport { EvalEmitter, runEval } from '../engine/index.ts';\nimport type { CaseResult, Reporter } from '../reporters/index.ts';\nimport type { Scorer } from '../scorers/index.ts';\nimport type { RunSummary } from '../store/index.ts';\nimport { RunStore } from '../store/index.ts';\n\ninterface BaseEvalOptions<T> {\n /** Human-readable name for this evaluation run, used in reports and filenames. */\n name: string;\n /** The dataset of input/expected pairs to evaluate against. */\n dataset: AsyncIterable<T>;\n /** Named scoring functions that assess model output quality. Each key becomes a column in reports. */\n scorers: Record<string, Scorer>;\n /** Reporters that receive lifecycle events and produce output (console, JSON, CSV, etc.). */\n reporters: Reporter[];\n /** Persistent store for run history. Accepts a `RunStore` instance or a file path for SQLite storage. */\n store: RunStore;\n /** Maximum number of dataset cases to run concurrently. Defaults to unbounded. */\n maxConcurrency?: number;\n /** Per-case timeout in milliseconds before the case is marked as failed. */\n timeout?: number;\n /** Number of times to run each case and average the scores. Useful for reducing LLM variance. */\n trials?: number;\n /** Minimum average score (0\u20131) required to consider the run passing. Defaults to `0.5`. */\n threshold?: number;\n}\n\nexport interface EvaluateOptions<T> extends BaseEvalOptions<T> {\n /** The model identifier passed to the task function. */\n model: string;\n /** Function that calls the model under evaluation and returns its output for a single dataset item. */\n task: TaskFn<T>;\n /** Associates this run with an existing suite ID for grouped comparisons. */\n suiteId?: string;\n}\n\nexport interface EvaluateEachOptions<\n T,\n V extends { name: string },\n> extends BaseEvalOptions<T> {\n /** List of model variants to evaluate. Each variant runs the full dataset independently. */\n models: V[];\n /** Function that calls the model under evaluation for a given dataset item and model variant. */\n task: (input: T, variant: V) => Promise<TaskResult>;\n}\n\ntype Selection =\n | { type: 'all' }\n | { type: 'failed' }\n | { type: 'cases'; indexes: Set<number> }\n | { type: 'sample'; count: number };\n\nexport class EvalAssertionError extends Error {\n summary: RunSummary | RunSummary[];\n\n constructor(summary: RunSummary | RunSummary[]) {\n const msg = Array.isArray(summary)\n ? `Eval assertion failed: ${summary.filter((s) => s.failCount > 0).length} of ${summary.length} model runs have failures`\n : `Eval assertion failed: ${summary.failCount} of ${summary.totalCases} cases failed`;\n super(msg);\n this.name = 'EvalAssertionError';\n this.summary = summary;\n }\n}\n\nfunction resolveFailedIndexes(\n store: RunStore,\n suiteName: string,\n model?: string,\n threshold?: number,\n): Set<number> {\n const suite = store.findSuiteByName(suiteName);\n if (!suite) {\n console.warn(\n `No previous suite found for '${suiteName}'. Running all cases.`,\n );\n return new Set();\n }\n const run = store.getLatestCompletedRun(suite.id, model);\n if (!run) {\n console.warn(\n `No previous completed run found for '${suiteName}'${model ? ` [${model}]` : ''}. Running all cases.`,\n );\n return new Set();\n }\n const failingCases = store.getFailingCases(run.id, threshold);\n if (failingCases.length === 0) {\n console.warn(`No failed cases in previous run. Running all cases.`);\n return new Set();\n }\n console.warn(\n `Retrying ${failingCases.length} failed cases from previous run`,\n );\n return new Set(failingCases.map((c) => c.idx));\n}\n\nexport class EvalBuilder<R> implements PromiseLike<R> {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n #options: EvaluateOptions<any> | EvaluateEachOptions<any, any>;\n #selection: Selection = { type: 'all' };\n #shouldAssert = false;\n\n constructor(\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n options: EvaluateOptions<any> | EvaluateEachOptions<any, any>,\n ) {\n this.#options = options;\n }\n\n #setSelection(selection: Selection): this {\n if (this.#selection.type !== 'all') {\n throw new Error(\n `Cannot combine .${this.#selection.type}() with .${selection.type}()`,\n );\n }\n this.#selection = selection;\n return this;\n }\n\n failed(): this {\n return this.#setSelection({ type: 'failed' });\n }\n\n cases(spec: string): this {\n const { indexes } = parseRecordSelection(spec);\n return this.#setSelection({ type: 'cases', indexes });\n }\n\n sample(count: number): this {\n if (count < 1) {\n throw new Error('Sample count must be >= 1');\n }\n return this.#setSelection({ type: 'sample', count });\n }\n\n assert(): this {\n this.#shouldAssert = true;\n return this;\n }\n\n then<TResult1 = R, TResult2 = never>(\n onfulfilled?:\n | ((value: R) => TResult1 | PromiseLike<TResult1>)\n | null\n | undefined,\n onrejected?:\n | ((reason: unknown) => TResult2 | PromiseLike<TResult2>)\n | null\n | undefined,\n ): Promise<TResult1 | TResult2> {\n return this.#execute().then(onfulfilled, onrejected);\n }\n\n async #execute(): Promise<R> {\n if ('models' in this.#options) {\n return this.#executeMulti() as Promise<R>;\n }\n return this.#executeSingle() as Promise<R>;\n }\n\n #applyDatasetFilter(ds: AsyncIterable<unknown>): AsyncIterable<unknown> {\n switch (this.#selection.type) {\n case 'all':\n return ds;\n case 'cases':\n return this.#selection.indexes.size > 0\n ? filterRecordsByIndex(ds, this.#selection.indexes)\n : ds;\n case 'sample':\n return dataset(ds).sample(this.#selection.count);\n case 'failed':\n return ds;\n }\n }\n\n async #executeSingle(): Promise<RunSummary> {\n const options = this.#options as EvaluateOptions<unknown>;\n let ds: AsyncIterable<unknown> = options.dataset;\n\n if (this.#selection.type === 'failed') {\n const indexes = resolveFailedIndexes(\n options.store,\n options.name,\n options.model,\n options.threshold,\n );\n if (indexes.size > 0) {\n ds = filterRecordsByIndex(ds, indexes);\n }\n } else {\n ds = this.#applyDatasetFilter(ds);\n }\n\n const result = await evaluateSingle({ ...options, dataset: ds });\n\n if (this.#shouldAssert && result.failCount > 0) {\n throw new EvalAssertionError(result);\n }\n\n return result;\n }\n\n async #executeMulti(): Promise<RunSummary[]> {\n const options = this.#options as EvaluateEachOptions<\n unknown,\n { name: string }\n >;\n\n let result: RunSummary[];\n\n if (this.#selection.type === 'failed') {\n const perModelIndexes = new Map<string, Set<number>>();\n for (const variant of options.models) {\n perModelIndexes.set(\n variant.name,\n resolveFailedIndexes(\n options.store,\n options.name,\n variant.name,\n options.threshold,\n ),\n );\n }\n result = await evaluateEach(options, perModelIndexes);\n } else {\n const filtered = this.#applyDatasetFilter(options.dataset);\n result = await evaluateEach({ ...options, dataset: filtered });\n }\n\n if (this.#shouldAssert && result.some((s) => s.failCount > 0)) {\n throw new EvalAssertionError(result);\n }\n\n return result;\n }\n}\n\nexport function evaluate<T>(\n options: EvaluateOptions<T>,\n): EvalBuilder<RunSummary>;\nexport function evaluate<T, V extends { name: string }>(\n options: EvaluateEachOptions<T, V>,\n): EvalBuilder<RunSummary[]>;\nexport function evaluate<T, V extends { name: string }>(\n options: EvaluateOptions<T> | EvaluateEachOptions<T, V>,\n): EvalBuilder<RunSummary> | EvalBuilder<RunSummary[]> {\n if ('models' in options) {\n return new EvalBuilder<RunSummary[]>(options);\n }\n return new EvalBuilder<RunSummary>(options);\n}\n\nfunction wireReporters(reporters: Reporter[]) {\n const emitter = new EvalEmitter();\n const cases: CaseResult[] = [];\n let runId = '';\n\n emitter.on('run:start', (data) => {\n runId = data.runId;\n for (const r of reporters) r.onRunStart?.(data);\n });\n\n emitter.on('case:scored', (data) => {\n const result: CaseResult = {\n runId: data.runId,\n index: data.index,\n input: data.input,\n output: data.output,\n expected: data.expected,\n scores: data.scores,\n error: data.error ?? null,\n latencyMs: data.latencyMs,\n tokensIn: data.tokensIn,\n tokensOut: data.tokensOut,\n };\n cases.push(result);\n for (const r of reporters) r.onCaseEnd?.(result);\n });\n\n return { emitter, cases, getRunId: () => runId };\n}\n\nasync function notifyRunEnd(\n reporters: Reporter[],\n data: {\n runId: string;\n name: string;\n model: string;\n summary: RunSummary;\n cases: CaseResult[];\n threshold: number;\n },\n): Promise<void> {\n data.cases.sort((a, b) => a.index - b.index);\n await Promise.all(reporters.map((r) => r.onRunEnd?.(data)));\n}\n\nasync function evaluateSingle<T>(\n options: EvaluateOptions<T>,\n): Promise<RunSummary> {\n const threshold = options.threshold ?? 0.5;\n const { emitter, cases, getRunId } = wireReporters(options.reporters);\n\n const summary = await runEval({\n name: options.name,\n model: options.model,\n dataset: options.dataset,\n task: options.task,\n scorers: options.scorers,\n store: options.store,\n emitter,\n suiteId: options.suiteId,\n maxConcurrency: options.maxConcurrency,\n timeout: options.timeout,\n trials: options.trials,\n threshold: options.threshold,\n });\n\n await notifyRunEnd(options.reporters, {\n runId: getRunId(),\n name: options.name,\n model: options.model,\n summary,\n cases,\n threshold,\n });\n\n return summary;\n}\n\nasync function evaluateEach<T, V extends { name: string }>(\n options: EvaluateEachOptions<T, V>,\n perModelFailedIndexes?: Map<string, Set<number>>,\n): Promise<RunSummary[]> {\n const items: T[] = [];\n for await (const item of options.dataset) {\n items.push(item);\n }\n\n const suite = options.store.createSuite(options.name);\n\n return Promise.all(\n options.models.map((variant) => {\n let ds: AsyncIterable<T> = dataset(items);\n const failedIndexes = perModelFailedIndexes?.get(variant.name);\n if (failedIndexes && failedIndexes.size > 0) {\n ds = filterRecordsByIndex(ds, failedIndexes);\n }\n return evaluateSingle({\n name: `${options.name} [${variant.name}]`,\n model: variant.name,\n dataset: ds,\n task: (input: T) => options.task(input, variant),\n scorers: options.scorers,\n reporters: options.reporters,\n store: options.store,\n suiteId: suite.id,\n maxConcurrency: options.maxConcurrency,\n timeout: options.timeout,\n trials: options.trials,\n threshold: options.threshold,\n });\n }),\n );\n}\n"],
|
|
5
|
-
"mappings": ";AAAA,SAAS,wBAAwB;AACjC,SAAS,gBAAgB;AACzB,SAAS,eAAe;AACxB,SAAS,uBAAuB;;;ACEhC,SAAS,iBAAiB,OAAuB;AAC/C,MAAI,CAAC,QAAQ,KAAK,KAAK,GAAG;AACxB,UAAM,IAAI,MAAM,yBAAyB,KAAK,GAAG;AAAA,EACnD;AACA,QAAM,QAAQ,OAAO,KAAK;AAC1B,MAAI,CAAC,OAAO,UAAU,KAAK,KAAK,QAAQ,GAAG;AACzC,UAAM,IAAI,MAAM,0CAA0C,KAAK,GAAG;AAAA,EACpE;AACA,SAAO;AACT;AAEO,SAAS,qBAAqB,MAAqC;AACxE,QAAM,UAAU,KAAK,KAAK;AAC1B,MAAI,CAAC,SAAS;AACZ,WAAO,EAAE,SAAS,oBAAI,IAAI,GAAG,YAAY,GAAG;AAAA,EAC9C;AAEA,QAAM,UAAU,oBAAI,IAAY;AAChC,QAAM,QAAQ,QACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AACjB,MAAI,MAAM,WAAW,GAAG;AACtB,UAAM,IAAI,MAAM,4BAA4B;AAAA,EAC9C;AAEA,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,sBAAsB,KAAK,IAAI;AAClD,QAAI,YAAY;AACd,YAAM,QAAQ,iBAAiB,WAAW,CAAC,CAAE;AAC7C,YAAM,MAAM,iBAAiB,WAAW,CAAC,CAAE;AAC3C,UAAI,MAAM,OAAO;AACf,cAAM,IAAI;AAAA,UACR,kBAAkB,IAAI;AAAA,QACxB;AAAA,MACF;AACA,eAAS,IAAI,OAAO,KAAK,KAAK,KAAK;AACjC,gBAAQ,IAAI,IAAI,CAAC;AAAA,MACnB;AACA;AAAA,IACF;AAEA,UAAM,QAAQ,iBAAiB,IAAI;AACnC,YAAQ,IAAI,QAAQ,CAAC;AAAA,EACvB;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,MAAM,KAAK,OAAO,EAC3B,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC,EACpB,IAAI,CAAC,MAAM,OAAO,IAAI,CAAC,CAAC,EACxB,KAAK,GAAG;AAAA,EACb;AACF;AAOA,gBAAuB,qBACrB,QACA,SACkB;AAClB,MAAI,QAAQ,SAAS,GAAG;AACtB,qBAAiB,QAAQ,QAAQ;AAC/B,YAAM;AAAA,IACR;AACA;AAAA,EACF;AAEA,MAAI,MAAM;AACV,mBAAiB,QAAQ,QAAQ;AAC/B,QAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,YAAM;AAAA,IACR;AACA;AAAA,EACF;AACF;;;ADjEO,IAAM,UAAN,MAAM,SAAuC;AAAA,EAClD;AAAA,EAEA,YAAY,QAAgC;AAC1C,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,IAAO,IAAmC;AACxC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,GAAG,IAAI;AAAA,MACf;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,OAAO,IAAgC;AACrC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,GAAG,IAAI,EAAG,OAAM;AAAA,MACtB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,GAAuB;AAC3B,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,UAAI,QAAQ;AACZ,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,SAAS,EAAG;AAChB,cAAM;AACN;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,UAAsB;AACpB,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,YAAM,QAAa,CAAC;AACpB,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,KAAK,IAAI;AAAA,MACjB;AACA,eAAS,IAAI,MAAM,SAAS,GAAG,IAAI,GAAG,KAAK;AACzC,cAAM,IAAI,KAAK,MAAM,KAAK,OAAO,KAAK,IAAI,EAAE;AAC5C,cAAM,OAAO,MAAM,CAAC;AACpB,cAAM,CAAC,IAAI,MAAM,CAAC;AAClB,cAAM,CAAC,IAAI;AAAA,MACb;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAAA,EAEA,OAAO,GAAuB;AAC5B,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,YAAM,QAAa,CAAC;AACpB,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,KAAK,IAAI;AAAA,MACjB;AACA,YAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,GAAG,CAAC,GAAG,MAAM,MAAM;AACnD,eAAS,IAAI,MAAM,SAAS,GAAG,IAAI,MAAM,SAAS,QAAQ,GAAG,KAAK;AAChE,cAAM,IAAI,KAAK,MAAM,KAAK,OAAO,KAAK,IAAI,EAAE;AAC5C,cAAM,OAAO,MAAM,CAAC;AACpB,cAAM,CAAC,IAAI,MAAM,CAAC;AAClB,cAAM,CAAC,IAAI;AAAA,MACb;AACA,eAAS,IAAI,MAAM,SAAS,OAAO,IAAI,MAAM,QAAQ,KAAK;AACxD,cAAM,MAAM,CAAC;AAAA,MACf;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,KAAK,SAAkC;AACrC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,UAAI,QAAQ,SAAS,GAAG;AACtB,eAAO,OAAO;AACd;AAAA,MACF;AACA,UAAI,MAAM;AACV,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,gBAAM;AAAA,QACR;AACA;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAwB;AAC5B,UAAM,SAAc,CAAC;AACrB,qBAAiB,QAAQ,KAAK,QAAQ,GAAG;AACvC,aAAO,KAAK,IAAI;AAAA,IAClB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO,KAAK,QAAQ,EAAE,OAAO,aAAa,EAAE;AAAA,EAC9C;AACF;AAEA,SAAS,aAAa,MAAwB;AAC5C,QAAM,SAAmB,CAAC;AAC1B,MAAI,UAAU;AACd,MAAI,WAAW;AAEf,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,OAAO,KAAK,CAAC;AACnB,QAAI,UAAU;AACZ,UAAI,SAAS,KAAK;AAChB,YAAI,IAAI,IAAI,KAAK,UAAU,KAAK,IAAI,CAAC,MAAM,KAAK;AAC9C,qBAAW;AACX;AAAA,QACF,OAAO;AACL,qBAAW;AAAA,QACb;AAAA,MACF,OAAO;AACL,mBAAW;AAAA,MACb;AAAA,IACF,OAAO;AACL,UAAI,SAAS,OAAO,YAAY,IAAI;AAClC,mBAAW;AAAA,MACb,WAAW,SAAS,KAAK;AACvB,eAAO,KAAK,OAAO;AACnB,kBAAU;AAAA,MACZ,OAAO;AACL,mBAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AACA,SAAO,KAAK,OAAO;AACnB,SAAO;AACT;AAEA,SAAS,SAAY,UAA0C;AAC7D,SAAO,mBAAmB;AACxB,UAAM,UAAU,MAAM,SAAS,UAAU,OAAO;AAChD,UAAM,OAAO,KAAK,MAAM,OAAO;AAC/B,QAAI,CAAC,MAAM,QAAQ,IAAI,GAAG;AACxB,YAAM,IAAI,MAAM,cAAc,QAAQ,6BAA6B;AAAA,IACrE;AACA,WAAO;AAAA,EACT;AACF;AAEA,SAAS,UAAa,UAA0C;AAC9D,SAAO,mBAAmB;AACxB,UAAM,KAAK,gBAAgB;AAAA,MACzB,OAAO,iBAAiB,UAAU,OAAO;AAAA,MACzC,WAAW;AAAA,IACb,CAAC;AACD,QAAI;AACF,uBAAiB,QAAQ,IAAI;AAC3B,cAAM,UAAU,KAAK,KAAK;AAC1B,YAAI,SAAS;AACX,gBAAM,KAAK,MAAM,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF,UAAE;AACA,SAAG,MAAM;AAAA,IACX;AAAA,EACF;AACF;AAEA,SAAS,QACP,UAC6C;AAC7C,SAAO,mBAAmB;AACxB,UAAM,KAAK,gBAAgB;AAAA,MACzB,OAAO,iBAAiB,UAAU,OAAO;AAAA,MACzC,WAAW;AAAA,IACb,CAAC;AACD,QAAI;AACF,UAAI;AACJ,uBAAiB,QAAQ,IAAI;AAC3B,cAAM,UAAU,KAAK,KAAK;AAC1B,YAAI,CAAC,QAAS;AACd,cAAM,SAAS,aAAa,OAAO;AACnC,YAAI,CAAC,SAAS;AACZ,oBAAU;AACV;AAAA,QACF;AACA,cAAM,MAA8B,CAAC;AACrC,iBAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,cAAI,QAAQ,CAAC,CAAE,IAAI,OAAO,CAAC,KAAK;AAAA,QAClC;AACA,cAAM;AAAA,MACR;AAAA,IACF,UAAE;AACA,SAAG,MAAM;AAAA,IACX;AAAA,EACF;AACF;AAEO,SAAS,QACd,QACY;AACZ,MAAI,MAAM,QAAQ,MAAM,GAAG;AACzB,WAAO,IAAI,QAAQ,mBAAmB;AACpC,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAEA,MAAI,OAAO,WAAW,YAAY,OAAO,iBAAiB,QAAQ;AAChE,WAAO,IAAI,QAAQ,MAAM,MAAM;AAAA,EACjC;AAEA,QAAM,MAAM,QAAQ,MAAM,EAAE,YAAY;AACxC,UAAQ,KAAK;AAAA,IACX,KAAK;AACH,aAAO,IAAI,QAAQ,SAAY,MAAM,CAAC;AAAA,IACxC,KAAK;AACH,aAAO,IAAI,QAAQ,UAAa,MAAM,CAAC;AAAA,IACzC,KAAK;AACH,aAAO,IAAI,QAAQ,QAAQ,MAAM,CAA2B;AAAA,IAC9D;AACE,YAAM,IAAI;AAAA,QACR,+BAA+B,GAAG,uBAAuB,MAAM;AAAA,MACjE;AAAA,EACJ;AACF;;;AEjPA,SAAS,oBAAoB;AAyCtB,IAAM,cAAN,cAA0B,aAAa;AAAA,EACnC,GACP,OACA,UACM;AACN,WAAO,MAAM,GAAG,OAAO,QAAQ;AAAA,EACjC;AAAA,EAES,KACP,OACA,MACS;AACT,WAAO,MAAM,KAAK,OAAO,IAAI;AAAA,EAC/B;AACF;AA2BA,SAAS,aAAa,KAAsB;AAC1C,MAAI,eAAe,OAAO;AACxB,WAAO,GAAG,IAAI,IAAI,KAAK,IAAI,OAAO;AAAA,EACpC;AACA,MAAI,OAAO,QAAQ,SAAU,QAAO;AACpC,MAAI,OAAO,KAAM,QAAO;AACxB,MAAI;AACF,WAAO,KAAK,UAAU,GAAG;AAAA,EAC3B,QAAQ;AACN,WAAO,OAAO,GAAG;AAAA,EACnB;AACF;AAEA,SAAS,eAAe,KAAsB;AAC5C,MAAI,eAAe,OAAO;AACxB,WAAO,KAAK,UAAU;AAAA,MACpB,MAAM,IAAI;AAAA,MACV,SAAS,IAAI;AAAA,MACb,OAAO,IAAI;AAAA,MACX,OACE,IAAI,iBAAiB,QACjB;AAAA,QACE,MAAM,IAAI,MAAM;AAAA,QAChB,SAAS,IAAI,MAAM;AAAA,MACrB,IACA,IAAI;AAAA,IACZ,CAAC;AAAA,EACH;AACA,MAAI,OAAO,QAAQ,SAAU,QAAO,KAAK,UAAU,EAAE,SAAS,IAAI,CAAC;AACnE,MAAI,OAAO,KAAM,QAAO,KAAK,UAAU,EAAE,SAAS,gBAAgB,CAAC;AACnE,MAAI;AACF,WAAO,KAAK,UAAU,GAAG;AAAA,EAC3B,QAAQ;AACN,WAAO,KAAK,UAAU,EAAE,SAAS,OAAO,GAAG,EAAE,CAAC;AAAA,EAChD;AACF;AAEA,SAAS,cACP,aACA,OAC8B;AAC9B,QAAM,SAAS,gBAAgB,aAAa,KAAK,CAAC;AAClD,QAAM,SAAuC,CAAC;AAC9C,aAAW,cAAc,aAAa;AACpC,WAAO,UAAU,IAAI,EAAE,OAAO,GAAG,OAAO;AAAA,EAC1C;AACA,SAAO;AACT;AAEA,SAAS,gBAAgB,gBAAwB;AAC/C,MAAI,SAAS;AACb,QAAM,QAA2B,CAAC;AAElC,SAAO;AAAA,IACL,MAAM,UAAyB;AAC7B,UAAI,SAAS,gBAAgB;AAC3B;AACA;AAAA,MACF;AACA,aAAO,IAAI,QAAc,CAAC,YAAY,MAAM,KAAK,OAAO,CAAC;AAAA,IAC3D;AAAA,IACA,UAAgB;AACd;AACA,YAAM,OAAO,MAAM,MAAM;AACzB,UAAI,MAAM;AACR;AACA,aAAK;AAAA,MACP;AAAA,IACF;AAAA,EACF;AACF;AAEA,eAAe,SACb,MACA,OACA,WACwB;AACxB,QAAM,QAAQ,YAAY,IAAI;AAC9B,MAAI;AACJ,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,MAChC,KAAK,KAAK;AAAA,MACV,IAAI,QAAe,CAAC,GAAG,WAAW;AAChC,kBAAU;AAAA,UACR,MAAM,OAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UAC1C;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AACD,iBAAa,OAAO;AACpB,UAAM,YAAY,KAAK,MAAM,YAAY,IAAI,IAAI,KAAK;AACtD,WAAO;AAAA,MACL,QAAQ,OAAO;AAAA,MACf;AAAA,MACA,UAAU,OAAO,OAAO,eAAe;AAAA,MACvC,WAAW,OAAO,OAAO,gBAAgB;AAAA,IAC3C;AAAA,EACF,SAAS,KAAK;AACZ,iBAAa,OAAO;AACpB,UAAM,YAAY,KAAK,MAAM,YAAY,IAAI,IAAI,KAAK;AACtD,WAAO;AAAA,MACL,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,MACV,WAAW;AAAA,MACX,OAAO;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,WAAW,OAAe,YAA4B;AAC7D,MAAI,QAAQ,KAAK,QAAQ,GAAG;AAC1B,YAAQ;AAAA,MACN,WAAW,UAAU,iCAAiC,KAAK;AAAA,IAC7D;AACA,WAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AAAA,EACvC;AACA,SAAO;AACT;AAEA,eAAsB,QAAW,QAA4C;AAC3E,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,iBAAiB;AAAA,IACjB;AAAA,IACA,UAAU;AAAA,IACV,SAAS;AAAA,IACT,YAAY;AAAA,EACd,IAAI;AAEJ,QAAM,UAAU,OAAO,WAAW,IAAI,YAAY;AAClD,QAAM,kBAAkB,WAAW,MAAM,YAAY,IAAI,EAAE;AAC3D,QAAM,QAAQ,MAAM,UAAU;AAAA,IAC5B,UAAU;AAAA,IACV;AAAA,IACA;AAAA,IACA,QAAQ,OAAO;AAAA,EACjB,CAAC;AAED,QAAM,QAA4C,CAAC;AACnD,MAAI,MAAM;AACV,mBAAiB,QAAQ,IAAI;AAC3B,UAAM,KAAK,EAAE,OAAO,OAAO,OAAO,KAAK,CAAC;AAAA,EAC1C;AAEA,UAAQ,KAAK,aAAa,EAAE,OAAO,YAAY,MAAM,QAAQ,MAAM,MAAM,CAAC;AAE1E,QAAM,YAAY,gBAAgB,cAAc;AAChD,QAAM,cAAc,OAAO,KAAK,OAAO;AAEvC,QAAM,gBAMD,CAAC;AAEN,QAAM,cAAc,OAAO,EAAE,OAAO,MAAM,MAAmC;AAC3E,UAAM,UAAU,QAAQ;AACxB,QAAI;AACF,cAAQ,KAAK,cAAc,EAAE,OAAO,OAAO,MAAM,CAAC;AAElD,UAAI;AACJ,UAAI;AAEJ,UAAI,SAAS,GAAG;AACd,cAAM,eAGD,CAAC;AAEN,iBAAS,IAAI,GAAG,IAAI,QAAQ,KAAK;AAC/B,gBAAM,SAAS,MAAM,SAAS,MAAM,OAAO,OAAO;AAClD,cAAI,OAAO,OAAO;AAChB,yBAAa,KAAK;AAAA,cAChB;AAAA,cACA,QAAQ,cAAc,aAAa,OAAO,KAAK;AAAA,YACjD,CAAC;AAAA,UACH,OAAO;AACL,kBAAM,SAAuC,CAAC;AAC9C,uBAAW,CAAC,OAAO,MAAM,KAAK,OAAO,QAAQ,OAAO,GAAG;AACrD,oBAAM,KAAK,MAAM,OAAO;AAAA,gBACtB;AAAA,gBACA,QAAQ,OAAO;AAAA,gBACf,UAAW,MAAkC;AAAA,cAC/C,CAAC;AACD,qBAAO,KAAK,IAAI;AAAA,gBACd,OAAO,WAAW,GAAG,OAAO,KAAK;AAAA,gBACjC,QAAQ,GAAG;AAAA,gBACX,UAAU,GAAG;AAAA,cACf;AAAA,YACF;AACA,yBAAa,KAAK,EAAE,QAAQ,OAAO,CAAC;AAAA,UACtC;AAAA,QACF;AAEA,cAAM,iBAAiB,CAAC,GAAG,YAAY,EACpC,QAAQ,EACR,KAAK,CAAC,MAAM,CAAC,EAAE,OAAO,KAAK;AAC9B,cAAM,aACJ,gBAAgB,UAChB,aAAa,aAAa,SAAS,CAAC,EAAG;AACzC,sBAAc;AAAA,UACZ,QAAQ,WAAW;AAAA,UACnB,WAAW,KAAK;AAAA,YACd,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,WAAW,CAAC,IACzD;AAAA,UACJ;AAAA,UACA,UAAU,KAAK;AAAA,YACb,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,UAAU,CAAC,IACxD;AAAA,UACJ;AAAA,UACA,WAAW,KAAK;AAAA,YACd,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,WAAW,CAAC,IACzD;AAAA,UACJ;AAAA,UACA,OAAO,iBAAiB,SAAY,WAAW;AAAA,QACjD;AAEA,sBAAc,CAAC;AACf,mBAAW,SAAS,aAAa;AAC/B,gBAAM,YACJ,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,KAAK,EAAG,OAAO,CAAC,IAC/D;AACF,sBAAY,KAAK,IAAI;AAAA,YACnB,OAAO;AAAA,YACP,QACE,aAAa,aAAa,SAAS,CAAC,EAAG,OAAO,KAAK,GAAG;AAAA,YACxD,UACE,aAAa,aAAa,SAAS,CAAC,EAAG,OAAO,KAAK,GAAG;AAAA,UAC1D;AAAA,QACF;AAAA,MACF,OAAO;AACL,sBAAc,MAAM,SAAS,MAAM,OAAO,OAAO;AACjD,YAAI,YAAY,OAAO;AACrB,wBAAc,cAAc,aAAa,YAAY,KAAK;AAAA,QAC5D,OAAO;AACL,wBAAc,CAAC;AACf,qBAAW,CAAC,OAAO,MAAM,KAAK,OAAO,QAAQ,OAAO,GAAG;AACrD,kBAAM,KAAK,MAAM,OAAO;AAAA,cACtB;AAAA,cACA,QAAQ,YAAY;AAAA,cACpB,UAAW,MAAkC;AAAA,YAC/C,CAAC;AACD,wBAAY,KAAK,IAAI;AAAA,cACnB,OAAO,WAAW,GAAG,OAAO,KAAK;AAAA,cACjC,QAAQ,GAAG;AAAA,cACX,UAAU,GAAG;AAAA,YACf;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAEA,YAAM,SAAS,OAAO,WAAW;AAEjC,YAAM,WAAqB;AAAA,QACzB,IAAI;AAAA,QACJ,QAAQ;AAAA,QACR,KAAK;AAAA,QACL;AAAA,QACA,QAAQ,YAAY,UAAU;AAAA,QAC9B,UAAW,MAAkC;AAAA,QAC7C,YAAY,YAAY;AAAA,QACxB,WAAW,YAAY;AAAA,QACvB,YAAY,YAAY;AAAA,QACxB,OAAO,YAAY,QACf,eAAe,YAAY,KAAK,IAChC;AAAA,MACN;AACA,YAAM,UAAU,CAAC,QAAQ,CAAC;AAE1B,YAAM,gBAA6B,YAAY,IAAI,CAAC,WAAW;AAAA,QAC7D,IAAI,OAAO,WAAW;AAAA,QACtB,SAAS;AAAA,QACT,aAAa;AAAA,QACb,OAAO,YAAY,KAAK,EAAG;AAAA,QAC3B,QAAQ,YAAY,KAAK,EAAG;AAAA,MAC9B,EAAE;AACF,YAAM,WAAW,aAAa;AAE9B,oBAAc,KAAK;AAAA,QACjB;AAAA,QACA,QAAQ,OAAO;AAAA,UACb,YAAY,IAAI,CAAC,UAAU,CAAC,OAAO,YAAY,KAAK,EAAG,KAAK,CAAC;AAAA,QAC/D;AAAA,QACA,WAAW,YAAY;AAAA,QACvB,UAAU,YAAY;AAAA,QACtB,WAAW,YAAY;AAAA,MACzB,CAAC;AAED,UAAI,YAAY,OAAO;AACrB,gBAAQ,KAAK,cAAc;AAAA,UACzB;AAAA,UACA;AAAA,UACA,OAAO,aAAa,YAAY,KAAK;AAAA,QACvC,CAAC;AAAA,MACH;AAEA,cAAQ,KAAK,eAAe;AAAA,QAC1B;AAAA,QACA;AAAA,QACA;AAAA,QACA,QAAQ,YAAY;AAAA,QACpB,UAAW,MAAkC;AAAA,QAC7C,QAAQ;AAAA,QACR,OAAO,YAAY;AAAA,QACnB,WAAW,YAAY;AAAA,QACvB,UAAU,YAAY;AAAA,QACtB,WAAW,YAAY;AAAA,MACzB,CAAC;AAAA,IACH,UAAE;AACA,gBAAU,QAAQ;AAAA,IACpB;AAAA,EACF;AAEA,QAAM,UAAU,YACZ,MAAM;AAAA,IAAK,EAAE,QAAQ,KAAK,KAAK,MAAM,SAAS,SAAS,EAAE;AAAA,IAAG,CAAC,GAAG,MAC9D,MAAM,MAAM,IAAI,YAAY,IAAI,KAAK,SAAS;AAAA,EAChD,IACA,CAAC,KAAK;AAEV,MAAI;AACF,eAAW,SAAS,SAAS;AAC3B,YAAM,QAAQ,IAAI,MAAM,IAAI,WAAW,CAAC;AAAA,IAC1C;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,UAAU,OAAO,QAAQ;AAC/B,UAAM;AAAA,EACR;AAEA,QAAM,UAAU,eAAe,eAAe,aAAa,SAAS;AACpE,QAAM,UAAU,OAAO,aAAa,OAAO;AAC3C,UAAQ,KAAK,WAAW,EAAE,OAAO,QAAQ,CAAC;AAE1C,SAAO;AACT;AAEA,SAAS,eACP,OAOA,aACA,WACY;AACZ,QAAM,aAAa,MAAM;AACzB,MAAI,YAAY;AAChB,MAAI,YAAY;AAChB,MAAI,iBAAiB;AACrB,MAAI,gBAAgB;AACpB,MAAI,iBAAiB;AAErB,QAAM,YAAoC,CAAC;AAC3C,aAAW,QAAQ,aAAa;AAC9B,cAAU,IAAI,IAAI;AAAA,EACpB;AAEA,aAAW,KAAK,OAAO;AACrB,sBAAkB,EAAE;AACpB,qBAAiB,EAAE;AACnB,sBAAkB,EAAE;AAEpB,QAAI,UAAU;AACd,eAAW,QAAQ,aAAa;AAC9B,YAAM,QAAQ,EAAE,OAAO,IAAI,KAAK;AAChC,gBAAU,IAAI,KAAM;AACpB,UAAI,QAAQ,UAAW,WAAU;AAAA,IACnC;AACA,QAAI,QAAS;AAAA,QACR;AAAA,EACP;AAEA,QAAM,aAAqC,CAAC;AAC5C,aAAW,QAAQ,aAAa;AAC9B,eAAW,IAAI,IAAI,aAAa,IAAI,UAAU,IAAI,IAAK,aAAa;AAAA,EACtE;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;AC5dA,SAAS,oBAAoB;;;ACwDtB,IAAM,qBAAN,cAAiC,MAAM;AAAA,EAC5C;AAAA,EAEA,YAAY,SAAoC;AAC9C,UAAM,MAAM,MAAM,QAAQ,OAAO,IAC7B,0BAA0B,QAAQ,OAAO,CAAC,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM,OAAO,QAAQ,MAAM,8BAC5F,0BAA0B,QAAQ,SAAS,OAAO,QAAQ,UAAU;AACxE,UAAM,GAAG;AACT,SAAK,OAAO;AACZ,SAAK,UAAU;AAAA,EACjB;AACF;AAEA,SAAS,qBACP,OACA,WACA,OACA,WACa;AACb,QAAM,QAAQ,MAAM,gBAAgB,SAAS;AAC7C,MAAI,CAAC,OAAO;AACV,YAAQ;AAAA,MACN,gCAAgC,SAAS;AAAA,IAC3C;AACA,WAAO,oBAAI,IAAI;AAAA,EACjB;AACA,QAAM,MAAM,MAAM,sBAAsB,MAAM,IAAI,KAAK;AACvD,MAAI,CAAC,KAAK;AACR,YAAQ;AAAA,MACN,wCAAwC,SAAS,IAAI,QAAQ,KAAK,KAAK,MAAM,EAAE;AAAA,IACjF;AACA,WAAO,oBAAI,IAAI;AAAA,EACjB;AACA,QAAM,eAAe,MAAM,gBAAgB,IAAI,IAAI,SAAS;AAC5D,MAAI,aAAa,WAAW,GAAG;AAC7B,YAAQ,KAAK,qDAAqD;AAClE,WAAO,oBAAI,IAAI;AAAA,EACjB;AACA,UAAQ;AAAA,IACN,YAAY,aAAa,MAAM;AAAA,EACjC;AACA,SAAO,IAAI,IAAI,aAAa,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC;AAC/C;AAEO,IAAM,cAAN,MAA+C;AAAA;AAAA,EAEpD;AAAA,EACA,aAAwB,EAAE,MAAM,MAAM;AAAA,EACtC,gBAAgB;AAAA,EAEhB,YAEE,SACA;AACA,SAAK,WAAW;AAAA,EAClB;AAAA,EAEA,cAAc,WAA4B;AACxC,QAAI,KAAK,WAAW,SAAS,OAAO;AAClC,YAAM,IAAI;AAAA,QACR,mBAAmB,KAAK,WAAW,IAAI,YAAY,UAAU,IAAI;AAAA,MACnE;AAAA,IACF;AACA,SAAK,aAAa;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,SAAe;AACb,WAAO,KAAK,cAAc,EAAE,MAAM,SAAS,CAAC;AAAA,EAC9C;AAAA,EAEA,MAAM,MAAoB;AACxB,UAAM,EAAE,QAAQ,IAAI,qBAAqB,IAAI;AAC7C,WAAO,KAAK,cAAc,EAAE,MAAM,SAAS,QAAQ,CAAC;AAAA,EACtD;AAAA,EAEA,OAAO,OAAqB;AAC1B,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AACA,WAAO,KAAK,cAAc,EAAE,MAAM,UAAU,MAAM,CAAC;AAAA,EACrD;AAAA,EAEA,SAAe;AACb,SAAK,gBAAgB;AACrB,WAAO;AAAA,EACT;AAAA,EAEA,KACE,aAIA,YAI8B;AAC9B,WAAO,KAAK,SAAS,EAAE,KAAK,aAAa,UAAU;AAAA,EACrD;AAAA,EAEA,MAAM,WAAuB;AAC3B,QAAI,YAAY,KAAK,UAAU;AAC7B,aAAO,KAAK,cAAc;AAAA,IAC5B;AACA,WAAO,KAAK,eAAe;AAAA,EAC7B;AAAA,EAEA,oBAAoB,IAAoD;AACtE,YAAQ,KAAK,WAAW,MAAM;AAAA,MAC5B,KAAK;AACH,eAAO;AAAA,MACT,KAAK;AACH,eAAO,KAAK,WAAW,QAAQ,OAAO,IAClC,qBAAqB,IAAI,KAAK,WAAW,OAAO,IAChD;AAAA,MACN,KAAK;AACH,eAAO,QAAQ,EAAE,EAAE,OAAO,KAAK,WAAW,KAAK;AAAA,MACjD,KAAK;AACH,eAAO;AAAA,IACX;AAAA,EACF;AAAA,EAEA,MAAM,iBAAsC;AAC1C,UAAM,UAAU,KAAK;AACrB,QAAI,KAA6B,QAAQ;AAEzC,QAAI,KAAK,WAAW,SAAS,UAAU;AACrC,YAAM,UAAU;AAAA,QACd,QAAQ;AAAA,QACR,QAAQ;AAAA,QACR,QAAQ;AAAA,QACR,QAAQ;AAAA,MACV;AACA,UAAI,QAAQ,OAAO,GAAG;AACpB,aAAK,qBAAqB,IAAI,OAAO;AAAA,MACvC;AAAA,IACF,OAAO;AACL,WAAK,KAAK,oBAAoB,EAAE;AAAA,IAClC;AAEA,UAAM,SAAS,MAAM,eAAe,EAAE,GAAG,SAAS,SAAS,GAAG,CAAC;AAE/D,QAAI,KAAK,iBAAiB,OAAO,YAAY,GAAG;AAC9C,YAAM,IAAI,mBAAmB,MAAM;AAAA,IACrC;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,gBAAuC;AAC3C,UAAM,UAAU,KAAK;AAKrB,QAAI;AAEJ,QAAI,KAAK,WAAW,SAAS,UAAU;AACrC,YAAM,kBAAkB,oBAAI,IAAyB;AACrD,iBAAW,WAAW,QAAQ,QAAQ;AACpC,wBAAgB;AAAA,UACd,QAAQ;AAAA,UACR;AAAA,YACE,QAAQ;AAAA,YACR,QAAQ;AAAA,YACR,QAAQ;AAAA,YACR,QAAQ;AAAA,UACV;AAAA,QACF;AAAA,MACF;AACA,eAAS,MAAM,aAAa,SAAS,eAAe;AAAA,IACtD,OAAO;AACL,YAAM,WAAW,KAAK,oBAAoB,QAAQ,OAAO;AACzD,eAAS,MAAM,aAAa,EAAE,GAAG,SAAS,SAAS,SAAS,CAAC;AAAA,IAC/D;AAEA,QAAI,KAAK,iBAAiB,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC,GAAG;AAC7D,YAAM,IAAI,mBAAmB,MAAM;AAAA,IACrC;AAEA,WAAO;AAAA,EACT;AACF;AAQO,SAAS,SACd,SACqD;AACrD,MAAI,YAAY,SAAS;AACvB,WAAO,IAAI,YAA0B,OAAO;AAAA,EAC9C;AACA,SAAO,IAAI,YAAwB,OAAO;AAC5C;AAEA,SAAS,cAAc,WAAuB;AAC5C,QAAM,UAAU,IAAI,YAAY;AAChC,QAAM,QAAsB,CAAC;AAC7B,MAAI,QAAQ;AAEZ,UAAQ,GAAG,aAAa,CAAC,SAAS;AAChC,YAAQ,KAAK;AACb,eAAW,KAAK,UAAW,GAAE,aAAa,IAAI;AAAA,EAChD,CAAC;AAED,UAAQ,GAAG,eAAe,CAAC,SAAS;AAClC,UAAM,SAAqB;AAAA,MACzB,OAAO,KAAK;AAAA,MACZ,OAAO,KAAK;AAAA,MACZ,OAAO,KAAK;AAAA,MACZ,QAAQ,KAAK;AAAA,MACb,UAAU,KAAK;AAAA,MACf,QAAQ,KAAK;AAAA,MACb,OAAO,KAAK,SAAS;AAAA,MACrB,WAAW,KAAK;AAAA,MAChB,UAAU,KAAK;AAAA,MACf,WAAW,KAAK;AAAA,IAClB;AACA,UAAM,KAAK,MAAM;AACjB,eAAW,KAAK,UAAW,GAAE,YAAY,MAAM;AAAA,EACjD,CAAC;AAED,SAAO,EAAE,SAAS,OAAO,UAAU,MAAM,MAAM;AACjD;AAEA,eAAe,aACb,WACA,MAQe;AACf,OAAK,MAAM,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC3C,QAAM,QAAQ,IAAI,UAAU,IAAI,CAAC,MAAM,EAAE,WAAW,IAAI,CAAC,CAAC;AAC5D;AAEA,eAAe,eACb,SACqB;AACrB,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,EAAE,SAAS,OAAO,SAAS,IAAI,cAAc,QAAQ,SAAS;AAEpE,QAAM,UAAU,MAAM,QAAQ;AAAA,IAC5B,MAAM,QAAQ;AAAA,IACd,OAAO,QAAQ;AAAA,IACf,SAAS,QAAQ;AAAA,IACjB,MAAM,QAAQ;AAAA,IACd,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,gBAAgB,QAAQ;AAAA,IACxB,SAAS,QAAQ;AAAA,IACjB,QAAQ,QAAQ;AAAA,IAChB,WAAW,QAAQ;AAAA,EACrB,CAAC;AAED,QAAM,aAAa,QAAQ,WAAW;AAAA,IACpC,OAAO,SAAS;AAAA,IAChB,MAAM,QAAQ;AAAA,IACd,OAAO,QAAQ;AAAA,IACf;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,eAAe,aACb,SACA,uBACuB;AACvB,QAAM,QAAa,CAAC;AACpB,mBAAiB,QAAQ,QAAQ,SAAS;AACxC,UAAM,KAAK,IAAI;AAAA,EACjB;AAEA,QAAM,QAAQ,QAAQ,MAAM,YAAY,QAAQ,IAAI;AAEpD,SAAO,QAAQ;AAAA,IACb,QAAQ,OAAO,IAAI,CAAC,YAAY;AAC9B,UAAI,KAAuB,QAAQ,KAAK;AACxC,YAAM,gBAAgB,uBAAuB,IAAI,QAAQ,IAAI;AAC7D,UAAI,iBAAiB,cAAc,OAAO,GAAG;AAC3C,aAAK,qBAAqB,IAAI,aAAa;AAAA,MAC7C;AACA,aAAO,eAAe;AAAA,QACpB,MAAM,GAAG,QAAQ,IAAI,KAAK,QAAQ,IAAI;AAAA,QACtC,OAAO,QAAQ;AAAA,QACf,SAAS;AAAA,QACT,MAAM,CAAC,UAAa,QAAQ,KAAK,OAAO,OAAO;AAAA,QAC/C,SAAS,QAAQ;AAAA,QACjB,WAAW,QAAQ;AAAA,QACnB,OAAO,QAAQ;AAAA,QACf,SAAS,MAAM;AAAA,QACf,gBAAgB,QAAQ;AAAA,QACxB,SAAS,QAAQ;AAAA,QACjB,QAAQ,QAAQ;AAAA,QAChB,WAAW,QAAQ;AAAA,MACrB,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;",
|
|
4
|
+
"sourcesContent": ["import { createReadStream } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { extname } from 'node:path';\nimport { createInterface } from 'node:readline';\n\nexport { downloadHf, fetchHfRows, hf } from './hf.ts';\nexport type { HfOptions } from './hf.ts';\n\nexport {\n filterRecordsByIndex,\n parseRecordSelection,\n pickFromArray,\n} from './record-selection.ts';\nexport type { ParsedRecordSelection } from './record-selection.ts';\n\nexport type TransformFn<T, U> = (item: T) => U;\nexport type PredicateFn<T> = (item: T) => boolean;\n\nexport class Dataset<T> implements AsyncIterable<T> {\n #source: () => AsyncIterable<T>;\n\n constructor(source: () => AsyncIterable<T>) {\n this.#source = source;\n }\n\n map<U>(fn: TransformFn<T, U>): Dataset<U> {\n const source = this.#source;\n return new Dataset(async function* () {\n for await (const item of source()) {\n yield fn(item);\n }\n });\n }\n\n filter(fn: PredicateFn<T>): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n for await (const item of source()) {\n if (fn(item)) yield item;\n }\n });\n }\n\n limit(n: number): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n let count = 0;\n for await (const item of source()) {\n if (count >= n) return;\n yield item;\n count++;\n }\n });\n }\n\n shuffle(): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n const items: T[] = [];\n for await (const item of source()) {\n items.push(item);\n }\n for (let i = items.length - 1; i > 0; i--) {\n const j = Math.floor(Math.random() * (i + 1));\n const temp = items[i] as T;\n items[i] = items[j] as T;\n items[j] = temp;\n }\n yield* items;\n });\n }\n\n sample(n: number): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n const items: T[] = [];\n for await (const item of source()) {\n items.push(item);\n }\n const count = Math.min(Math.max(0, n), items.length);\n for (let i = items.length - 1; i > items.length - count - 1; i--) {\n const j = Math.floor(Math.random() * (i + 1));\n const temp = items[i] as T;\n items[i] = items[j] as T;\n items[j] = temp;\n }\n for (let i = items.length - count; i < items.length; i++) {\n yield items[i]!;\n }\n });\n }\n\n pick(indexes: Set<number>): Dataset<T> {\n const source = this.#source;\n return new Dataset(async function* () {\n if (indexes.size === 0) {\n yield* source();\n return;\n }\n let idx = 0;\n for await (const item of source()) {\n if (indexes.has(idx)) {\n yield item;\n }\n idx++;\n }\n });\n }\n\n async toArray(): Promise<T[]> {\n const result: T[] = [];\n for await (const item of this.#source()) {\n result.push(item);\n }\n return result;\n }\n\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.#source()[Symbol.asyncIterator]();\n }\n}\n\nfunction parseCSVLine(line: string): string[] {\n const fields: string[] = [];\n let current = '';\n let inQuotes = false;\n\n for (let i = 0; i < line.length; i++) {\n const char = line[i]!;\n if (inQuotes) {\n if (char === '\"') {\n if (i + 1 < line.length && line[i + 1] === '\"') {\n current += '\"';\n i++;\n } else {\n inQuotes = false;\n }\n } else {\n current += char;\n }\n } else {\n if (char === '\"' && current === '') {\n inQuotes = true;\n } else if (char === ',') {\n fields.push(current);\n current = '';\n } else {\n current += char;\n }\n }\n }\n fields.push(current);\n return fields;\n}\n\nfunction loadJSON<T>(filePath: string): () => AsyncIterable<T> {\n return async function* () {\n const content = await readFile(filePath, 'utf-8');\n const data = JSON.parse(content);\n if (!Array.isArray(data)) {\n throw new Error(`JSON file \"${filePath}\" does not contain an array`);\n }\n yield* data;\n };\n}\n\nfunction loadJSONL<T>(filePath: string): () => AsyncIterable<T> {\n return async function* () {\n const rl = createInterface({\n input: createReadStream(filePath, 'utf-8'),\n crlfDelay: Infinity,\n });\n try {\n for await (const line of rl) {\n const trimmed = line.trim();\n if (trimmed) {\n yield JSON.parse(trimmed);\n }\n }\n } finally {\n rl.close();\n }\n };\n}\n\nfunction loadCSV(\n filePath: string,\n): () => AsyncIterable<Record<string, string>> {\n return async function* () {\n const rl = createInterface({\n input: createReadStream(filePath, 'utf-8'),\n crlfDelay: Infinity,\n });\n try {\n let headers: string[] | undefined;\n for await (const line of rl) {\n const trimmed = line.trim();\n if (!trimmed) continue;\n const fields = parseCSVLine(trimmed);\n if (!headers) {\n headers = fields;\n continue;\n }\n const row: Record<string, string> = {};\n for (let i = 0; i < headers.length; i++) {\n row[headers[i]!] = fields[i] ?? '';\n }\n yield row;\n }\n } finally {\n rl.close();\n }\n };\n}\n\nexport function dataset<T>(\n source: T[] | string | AsyncIterable<T>,\n): Dataset<T> {\n if (Array.isArray(source)) {\n return new Dataset(async function* () {\n yield* source;\n });\n }\n\n if (typeof source === 'object' && Symbol.asyncIterator in source) {\n return new Dataset(() => source);\n }\n\n const ext = extname(source).toLowerCase();\n switch (ext) {\n case '.json':\n return new Dataset(loadJSON<T>(source));\n case '.jsonl':\n return new Dataset(loadJSONL<T>(source));\n case '.csv':\n return new Dataset(loadCSV(source) as () => AsyncIterable<T>);\n default:\n throw new Error(\n `Unsupported file extension \"${ext}\" for dataset file \"${source}\". Supported: .json, .jsonl, .csv`,\n );\n }\n}\n", "export interface ParsedRecordSelection {\n indexes: Set<number>;\n normalized: string;\n}\n\nfunction parsePositiveInt(token: string): number {\n if (!/^\\d+$/.test(token)) {\n throw new Error(`Invalid record token \"${token}\"`);\n }\n const value = Number(token);\n if (!Number.isInteger(value) || value < 1) {\n throw new Error(`Record numbers must be >= 1. Received \"${token}\"`);\n }\n return value;\n}\n\nexport function parseRecordSelection(spec: string): ParsedRecordSelection {\n const trimmed = spec.trim();\n if (!trimmed) {\n return { indexes: new Set(), normalized: '' };\n }\n\n const indexes = new Set<number>();\n const parts = trimmed\n .split(',')\n .map((part) => part.trim())\n .filter(Boolean);\n if (parts.length === 0) {\n throw new Error('Record selection is empty.');\n }\n\n for (const part of parts) {\n const rangeMatch = /^(\\d+)\\s*-\\s*(\\d+)$/.exec(part);\n if (rangeMatch) {\n const start = parsePositiveInt(rangeMatch[1]!);\n const end = parsePositiveInt(rangeMatch[2]!);\n if (end < start) {\n throw new Error(\n `Invalid range \"${part}\". Range end must be >= range start.`,\n );\n }\n for (let i = start; i <= end; i++) {\n indexes.add(i - 1);\n }\n continue;\n }\n\n const value = parsePositiveInt(part);\n indexes.add(value - 1);\n }\n\n return {\n indexes,\n normalized: Array.from(indexes)\n .sort((a, b) => a - b)\n .map((i) => String(i + 1))\n .join(','),\n };\n}\n\nexport function pickFromArray<T>(items: T[], indexes: Set<number>): T[] {\n if (indexes.size === 0) return items;\n return items.filter((_, i) => indexes.has(i));\n}\n\nexport async function* filterRecordsByIndex<T>(\n source: AsyncIterable<T>,\n indexes: Set<number>,\n): AsyncIterable<T> {\n if (indexes.size === 0) {\n for await (const item of source) {\n yield item;\n }\n return;\n }\n\n let idx = 0;\n for await (const item of source) {\n if (indexes.has(idx)) {\n yield item;\n }\n idx++;\n }\n}\n", "import { EventEmitter } from 'node:events';\n\nimport type { Scorer, ScorerResult } from '../scorers/index.ts';\nimport type {\n CaseData,\n RunStore,\n RunSummary,\n ScoreData,\n} from '../store/index.ts';\n\nexport interface TaskResult {\n output: string;\n usage?: { inputTokens: number; outputTokens: number };\n}\n\nexport type TaskFn<T> = (input: T) => Promise<TaskResult>;\n\nexport interface EngineEvents {\n 'run:start': {\n runId: string;\n totalCases: number;\n name: string;\n model: string;\n };\n 'case:start': { runId: string; index: number; input: unknown };\n 'case:scored': {\n runId: string;\n index: number;\n input: unknown;\n output: string;\n expected: unknown;\n scores: Record<string, ScorerResult>;\n error?: unknown;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n };\n 'case:error': { runId: string; index: number; error: string };\n 'run:end': { runId: string; summary: RunSummary };\n}\n\nexport class EvalEmitter extends EventEmitter {\n override on<K extends keyof EngineEvents>(\n event: K,\n listener: (data: EngineEvents[K]) => void,\n ): this {\n return super.on(event, listener);\n }\n\n override emit<K extends keyof EngineEvents>(\n event: K,\n data: EngineEvents[K],\n ): boolean {\n return super.emit(event, data);\n }\n}\n\nexport interface EvalConfig<T> {\n name: string;\n model: string;\n dataset: AsyncIterable<T>;\n task: TaskFn<T>;\n scorers: Record<string, Scorer>;\n store: RunStore;\n emitter?: EvalEmitter;\n runId?: string;\n suiteId?: string;\n config?: Record<string, unknown>;\n maxConcurrency?: number;\n batchSize?: number;\n timeout?: number;\n trials?: number;\n threshold?: number;\n}\n\ninterface WrappedResult {\n output: string;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n error?: unknown;\n}\n\nfunction errorMessage(err: unknown): string {\n if (err instanceof Error) {\n return `${err.name}: ${err.message}`;\n }\n if (typeof err === 'string') return err;\n if (err == null) return 'Unknown error';\n try {\n return JSON.stringify(err);\n } catch {\n return String(err);\n }\n}\n\nfunction serializeError(err: unknown): string {\n if (err instanceof Error) {\n return JSON.stringify({\n name: err.name,\n message: err.message,\n stack: err.stack,\n cause:\n err.cause instanceof Error\n ? {\n name: err.cause.name,\n message: err.cause.message,\n }\n : err.cause,\n });\n }\n if (typeof err === 'string') return JSON.stringify({ message: err });\n if (err == null) return JSON.stringify({ message: 'Unknown error' });\n try {\n return JSON.stringify(err);\n } catch {\n return JSON.stringify({ message: String(err) });\n }\n}\n\nfunction failureScores(\n scorerNames: string[],\n error: unknown,\n): Record<string, ScorerResult> {\n const reason = `Task failed: ${errorMessage(error)}`;\n const scores: Record<string, ScorerResult> = {};\n for (const scorerName of scorerNames) {\n scores[scorerName] = { score: 0, reason };\n }\n return scores;\n}\n\nfunction createSemaphore(maxConcurrency: number) {\n let active = 0;\n const queue: Array<() => void> = [];\n\n return {\n async acquire(): Promise<void> {\n if (active < maxConcurrency) {\n active++;\n return;\n }\n return new Promise<void>((resolve) => queue.push(resolve));\n },\n release(): void {\n active--;\n const next = queue.shift();\n if (next) {\n active++;\n next();\n }\n },\n };\n}\n\nasync function wrapTask<T>(\n task: TaskFn<T>,\n input: T,\n timeoutMs: number,\n): Promise<WrappedResult> {\n const start = performance.now();\n let timerId: ReturnType<typeof setTimeout> | undefined;\n try {\n const result = await Promise.race([\n task(input),\n new Promise<never>((_, reject) => {\n timerId = setTimeout(\n () => reject(new Error('timeout exceeded')),\n timeoutMs,\n );\n }),\n ]);\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: result.output,\n latencyMs,\n tokensIn: result.usage?.inputTokens ?? 0,\n tokensOut: result.usage?.outputTokens ?? 0,\n };\n } catch (err) {\n clearTimeout(timerId);\n const latencyMs = Math.round(performance.now() - start);\n return {\n output: '',\n latencyMs,\n tokensIn: 0,\n tokensOut: 0,\n error: err,\n };\n }\n}\n\nfunction clampScore(score: number, scorerName: string): number {\n if (score < 0 || score > 1) {\n console.warn(\n `Scorer \"${scorerName}\" returned out-of-range score ${score}, clamping to 0..1`,\n );\n return Math.max(0, Math.min(1, score));\n }\n return score;\n}\n\nexport async function runEval<T>(config: EvalConfig<T>): Promise<RunSummary> {\n const {\n name,\n model,\n dataset: ds,\n task,\n scorers,\n store,\n suiteId,\n maxConcurrency = 10,\n batchSize,\n timeout = 30_000,\n trials = 1,\n threshold = 0.5,\n } = config;\n\n const emitter = config.emitter ?? new EvalEmitter();\n const runId =\n config.runId ??\n (() => {\n const resolvedSuiteId = suiteId ?? store.createSuite(name).id;\n return store.createRun({\n suite_id: resolvedSuiteId,\n name,\n model,\n config: config.config,\n });\n })();\n\n const items: Array<{ index: number; input: T }> = [];\n let idx = 0;\n for await (const item of ds) {\n items.push({ index: idx++, input: item });\n }\n\n emitter.emit('run:start', { runId, totalCases: items.length, name, model });\n\n const semaphore = createSemaphore(maxConcurrency);\n const scorerNames = Object.keys(scorers);\n\n const allCaseScores: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }> = [];\n\n const processItem = async ({ index, input }: { index: number; input: T }) => {\n await semaphore.acquire();\n try {\n emitter.emit('case:start', { runId, index, input });\n\n let finalResult: WrappedResult;\n let finalScores: Record<string, ScorerResult>;\n\n if (trials > 1) {\n const trialResults: Array<{\n result: WrappedResult;\n scores: Record<string, ScorerResult>;\n }> = [];\n\n for (let t = 0; t < trials; t++) {\n const result = await wrapTask(task, input, timeout);\n if (result.error) {\n trialResults.push({\n result,\n scores: failureScores(scorerNames, result.error),\n });\n } else {\n const scores: Record<string, ScorerResult> = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: result.output,\n expected: (input as Record<string, unknown>).expected,\n });\n scores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n trialResults.push({ result, scores });\n }\n }\n\n const lastSuccessful = [...trialResults]\n .reverse()\n .find((t) => !t.result.error);\n const baseResult =\n lastSuccessful?.result ??\n trialResults[trialResults.length - 1]!.result;\n finalResult = {\n output: baseResult.output,\n latencyMs: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.latencyMs, 0) /\n trials,\n ),\n tokensIn: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensIn, 0) /\n trials,\n ),\n tokensOut: Math.round(\n trialResults.reduce((sum, t) => sum + t.result.tokensOut, 0) /\n trials,\n ),\n error: lastSuccessful ? undefined : baseResult.error,\n };\n\n finalScores = {};\n for (const sName of scorerNames) {\n const meanScore =\n trialResults.reduce((sum, t) => sum + t.scores[sName]!.score, 0) /\n trials;\n finalScores[sName] = {\n score: meanScore,\n reason:\n trialResults[trialResults.length - 1]!.scores[sName]?.reason,\n metadata:\n trialResults[trialResults.length - 1]!.scores[sName]?.metadata,\n };\n }\n } else {\n finalResult = await wrapTask(task, input, timeout);\n if (finalResult.error) {\n finalScores = failureScores(scorerNames, finalResult.error);\n } else {\n finalScores = {};\n for (const [sName, scorer] of Object.entries(scorers)) {\n const sr = await scorer({\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n });\n finalScores[sName] = {\n score: clampScore(sr.score, sName),\n reason: sr.reason,\n metadata: sr.metadata,\n };\n }\n }\n }\n\n const caseId = crypto.randomUUID();\n\n const caseData: CaseData = {\n id: caseId,\n run_id: runId,\n idx: index,\n input,\n output: finalResult.output || null,\n expected: (input as Record<string, unknown>).expected,\n latency_ms: finalResult.latencyMs,\n tokens_in: finalResult.tokensIn,\n tokens_out: finalResult.tokensOut,\n error: finalResult.error\n ? serializeError(finalResult.error)\n : undefined,\n };\n store.saveCases([caseData]);\n\n const scoreDataList: ScoreData[] = scorerNames.map((sName) => ({\n id: crypto.randomUUID(),\n case_id: caseId,\n scorer_name: sName,\n score: finalScores[sName]!.score,\n reason: finalScores[sName]!.reason,\n }));\n store.saveScores(scoreDataList);\n\n allCaseScores.push({\n index,\n scores: Object.fromEntries(\n scorerNames.map((sName) => [sName, finalScores[sName]!.score]),\n ),\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n\n if (finalResult.error) {\n emitter.emit('case:error', {\n runId,\n index,\n error: errorMessage(finalResult.error),\n });\n }\n\n emitter.emit('case:scored', {\n runId,\n index,\n input,\n output: finalResult.output,\n expected: (input as Record<string, unknown>).expected,\n scores: finalScores,\n error: finalResult.error,\n latencyMs: finalResult.latencyMs,\n tokensIn: finalResult.tokensIn,\n tokensOut: finalResult.tokensOut,\n });\n } finally {\n semaphore.release();\n }\n };\n\n const batches = batchSize\n ? Array.from({ length: Math.ceil(items.length / batchSize) }, (_, i) =>\n items.slice(i * batchSize, (i + 1) * batchSize),\n )\n : [items];\n\n try {\n for (const batch of batches) {\n await Promise.all(batch.map(processItem));\n }\n } catch (err) {\n store.finishRun(runId, 'failed');\n throw err;\n }\n\n const summary = computeSummary(allCaseScores, scorerNames, threshold);\n store.finishRun(runId, 'completed', summary);\n emitter.emit('run:end', { runId, summary });\n\n return summary;\n}\n\nfunction computeSummary(\n cases: Array<{\n index: number;\n scores: Record<string, number>;\n latencyMs: number;\n tokensIn: number;\n tokensOut: number;\n }>,\n scorerNames: string[],\n threshold: number,\n): RunSummary {\n const totalCases = cases.length;\n let passCount = 0;\n let failCount = 0;\n let totalLatencyMs = 0;\n let totalTokensIn = 0;\n let totalTokensOut = 0;\n\n const scoreSums: Record<string, number> = {};\n for (const name of scorerNames) {\n scoreSums[name] = 0;\n }\n\n for (const c of cases) {\n totalLatencyMs += c.latencyMs;\n totalTokensIn += c.tokensIn;\n totalTokensOut += c.tokensOut;\n\n let allPass = true;\n for (const name of scorerNames) {\n const score = c.scores[name] ?? 0;\n scoreSums[name]! += score;\n if (score < threshold) allPass = false;\n }\n if (allPass) passCount++;\n else failCount++;\n }\n\n const meanScores: Record<string, number> = {};\n for (const name of scorerNames) {\n meanScores[name] = totalCases > 0 ? scoreSums[name]! / totalCases : 0;\n }\n\n return {\n totalCases,\n passCount,\n failCount,\n meanScores,\n totalLatencyMs,\n totalTokensIn,\n totalTokensOut,\n };\n}\n", "import { mkdirSync } from 'node:fs';\nimport { dirname } from 'node:path';\nimport { DatabaseSync } from 'node:sqlite';\n\nimport DDL from './ddl.sqlite.sql';\n\nexport interface SuiteRow {\n id: string;\n name: string;\n created_at: number;\n}\n\nexport interface RunRow {\n id: string;\n suite_id: string;\n name: string;\n model: string;\n config: Record<string, unknown> | null;\n started_at: number;\n finished_at: number | null;\n status: 'running' | 'completed' | 'failed';\n summary: RunSummary | null;\n}\n\nexport interface CaseRow {\n id: string;\n run_id: string;\n idx: number;\n input: unknown;\n output: string | null;\n expected: unknown | null;\n latency_ms: number;\n tokens_in: number;\n tokens_out: number;\n error: string | null;\n}\n\nexport interface CaseWithScores extends CaseRow {\n scores: Array<{ scorer_name: string; score: number; reason: string | null }>;\n}\n\nexport interface ScoreRow {\n id: string;\n case_id: string;\n scorer_name: string;\n score: number;\n reason: string | null;\n}\n\nexport interface RunSummary {\n totalCases: number;\n passCount: number;\n failCount: number;\n meanScores: Record<string, number>;\n totalLatencyMs: number;\n totalTokensIn: number;\n totalTokensOut: number;\n}\n\nexport interface PromptRow {\n id: string;\n name: string;\n version: number;\n content: string;\n created_at: number;\n}\n\nexport interface CaseData {\n id: string;\n run_id: string;\n idx: number;\n input: unknown;\n output: string | null;\n expected?: unknown;\n latency_ms: number;\n tokens_in: number;\n tokens_out: number;\n error?: string;\n}\n\nexport interface ScoreData {\n id: string;\n case_id: string;\n scorer_name: string;\n score: number;\n reason?: string;\n}\n\nexport class RunStore {\n #db: DatabaseSync;\n #statements = new Map<string, ReturnType<DatabaseSync['prepare']>>();\n\n #stmt(sql: string): ReturnType<DatabaseSync['prepare']> {\n let stmt = this.#statements.get(sql);\n if (!stmt) {\n stmt = this.#db.prepare(sql);\n this.#statements.set(sql, stmt);\n }\n return stmt;\n }\n\n #transaction<T>(fn: () => T): T {\n this.#db.exec('BEGIN TRANSACTION');\n try {\n const result = fn();\n this.#db.exec('COMMIT');\n return result;\n } catch (error) {\n this.#db.exec('ROLLBACK');\n throw error;\n }\n }\n\n constructor(pathOrDb?: string | DatabaseSync) {\n if (pathOrDb instanceof DatabaseSync) {\n this.#db = pathOrDb;\n } else {\n const dbPath = pathOrDb ?? '.evals/store.db';\n mkdirSync(dirname(dbPath), { recursive: true });\n this.#db = new DatabaseSync(dbPath);\n }\n this.#db.exec(DDL);\n this.#migrateRunsTableToSuiteRequired();\n this.#migratePromptsTableIfNeeded();\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_prompts_name_version ON prompts(name, version DESC)',\n );\n }\n\n #migratePromptsTableIfNeeded(): void {\n const columns = this.#stmt('PRAGMA table_info(prompts)').all() as Array<{\n name: string;\n }>;\n\n if (columns.length === 0) return;\n if (columns.some((column) => column.name === 'version')) return;\n\n this.#transaction(() => {\n this.#db.exec('ALTER TABLE prompts RENAME TO prompts_legacy');\n this.#db.exec(`\n CREATE TABLE prompts (\n id TEXT PRIMARY KEY,\n name TEXT NOT NULL,\n version INTEGER NOT NULL,\n content TEXT NOT NULL,\n created_at INTEGER NOT NULL DEFAULT (unixepoch() * 1000),\n UNIQUE(name, version)\n )\n `);\n this.#db.exec(`\n INSERT INTO prompts (id, name, version, content, created_at)\n SELECT id, name, 1, content, created_at\n FROM prompts_legacy\n `);\n this.#db.exec('DROP TABLE prompts_legacy');\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_prompts_created_at ON prompts(created_at)',\n );\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_prompts_name_version ON prompts(name, version DESC)',\n );\n });\n }\n\n #migrateRunsTableToSuiteRequired(): void {\n const runColumns = this.#stmt('PRAGMA table_info(runs)').all() as Array<{\n name: string;\n notnull: number;\n }>;\n\n if (runColumns.length === 0) return;\n\n const suiteColumn = runColumns.find((column) => column.name === 'suite_id');\n const hasNonNullSuite = suiteColumn?.notnull === 1;\n\n const runForeignKeys = this.#stmt(\n 'PRAGMA foreign_key_list(runs)',\n ).all() as Array<{\n from: string;\n on_delete: string;\n table: string;\n }>;\n const suiteForeignKey = runForeignKeys.find(\n (fk) => fk.from === 'suite_id' && fk.table === 'suites',\n );\n const hasCascadeDelete = suiteForeignKey?.on_delete === 'CASCADE';\n\n if (hasNonNullSuite && hasCascadeDelete) return;\n\n this.#statements.clear();\n this.#transaction(() => {\n this.#db.exec(`\n CREATE TABLE runs_next (\n id TEXT PRIMARY KEY,\n suite_id TEXT NOT NULL,\n name TEXT NOT NULL,\n model TEXT NOT NULL,\n config TEXT,\n started_at INTEGER NOT NULL,\n finished_at INTEGER,\n status TEXT NOT NULL DEFAULT 'running' CHECK(status IN ('running', 'completed', 'failed')),\n summary TEXT,\n FOREIGN KEY (suite_id) REFERENCES suites(id) ON DELETE CASCADE\n )\n `);\n\n // Drop legacy orphaned runs that do not belong to a suite.\n this.#db.exec('DELETE FROM runs WHERE suite_id IS NULL');\n\n this.#db.exec(`\n INSERT INTO runs_next (id, suite_id, name, model, config, started_at, finished_at, status, summary)\n SELECT r.id, r.suite_id, r.name, r.model, r.config, r.started_at, r.finished_at, r.status, r.summary\n FROM runs r\n JOIN suites s ON s.id = r.suite_id\n `);\n\n this.#db.exec('DROP TABLE runs');\n this.#db.exec('ALTER TABLE runs_next RENAME TO runs');\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_runs_suite_id ON runs(suite_id)',\n );\n this.#db.exec(\n 'CREATE INDEX IF NOT EXISTS idx_runs_started_at ON runs(started_at)',\n );\n });\n this.#statements.clear();\n }\n\n createSuite(name: string): SuiteRow {\n const id = crypto.randomUUID();\n const now = Date.now();\n this.#stmt(\n 'INSERT INTO suites (id, name, created_at) VALUES (?, ?, ?)',\n ).run(id, name, now);\n return { id, name, created_at: now };\n }\n\n getSuite(id: string): SuiteRow | undefined {\n const row = this.#stmt('SELECT * FROM suites WHERE id = ?').get(id) as\n | { id: string; name: string; created_at: number }\n | undefined;\n return row ?? undefined;\n }\n\n renameSuite(id: string, name: string): void {\n this.#stmt('UPDATE suites SET name = ? WHERE id = ?').run(name, id);\n }\n\n renameRun(id: string, name: string): void {\n this.#stmt('UPDATE runs SET name = ? WHERE id = ?').run(name, id);\n }\n\n createRun(run: {\n suite_id: string;\n name: string;\n model: string;\n config?: Record<string, unknown>;\n }): string {\n const id = crypto.randomUUID();\n const now = Date.now();\n this.#stmt(\n 'INSERT INTO runs (id, suite_id, name, model, config, started_at) VALUES (?, ?, ?, ?, ?, ?)',\n ).run(\n id,\n run.suite_id,\n run.name,\n run.model,\n run.config ? JSON.stringify(run.config) : null,\n now,\n );\n return id;\n }\n\n finishRun(\n runId: string,\n status: 'completed' | 'failed',\n summary?: RunSummary,\n ): void {\n this.#stmt(\n 'UPDATE runs SET finished_at = ?, status = ?, summary = ? WHERE id = ?',\n ).run(Date.now(), status, summary ? JSON.stringify(summary) : null, runId);\n }\n\n saveCases(cases: CaseData[]): void {\n this.#transaction(() => {\n const stmt = this.#stmt(\n 'INSERT INTO cases (id, run_id, idx, input, output, expected, latency_ms, tokens_in, tokens_out, error) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)',\n );\n for (const c of cases) {\n stmt.run(\n c.id,\n c.run_id,\n c.idx,\n JSON.stringify(c.input),\n c.output,\n c.expected != null ? JSON.stringify(c.expected) : null,\n c.latency_ms,\n c.tokens_in,\n c.tokens_out,\n c.error ?? null,\n );\n }\n });\n }\n\n saveScores(scores: ScoreData[]): void {\n this.#transaction(() => {\n const stmt = this.#stmt(\n 'INSERT INTO scores (id, case_id, scorer_name, score, reason) VALUES (?, ?, ?, ?, ?)',\n );\n for (const s of scores) {\n stmt.run(s.id, s.case_id, s.scorer_name, s.score, s.reason ?? null);\n }\n });\n }\n\n getRun(runId: string): RunRow | undefined {\n const row = this.#stmt('SELECT * FROM runs WHERE id = ?').get(runId) as\n | {\n id: string;\n suite_id: string;\n name: string;\n model: string;\n config: string | null;\n started_at: number;\n finished_at: number | null;\n status: string;\n summary: string | null;\n }\n | undefined;\n if (!row) return undefined;\n return {\n id: row.id,\n suite_id: row.suite_id,\n name: row.name,\n model: row.model,\n config: row.config ? JSON.parse(row.config) : null,\n started_at: row.started_at,\n finished_at: row.finished_at,\n status: row.status as RunRow['status'],\n summary: row.summary ? JSON.parse(row.summary) : null,\n };\n }\n\n listRuns(suiteId?: string): RunRow[] {\n const sql = suiteId\n ? 'SELECT * FROM runs WHERE suite_id = ? ORDER BY started_at'\n : 'SELECT * FROM runs ORDER BY started_at';\n const rows = (\n suiteId ? this.#stmt(sql).all(suiteId) : this.#stmt(sql).all()\n ) as Array<{\n id: string;\n suite_id: string;\n name: string;\n model: string;\n config: string | null;\n started_at: number;\n finished_at: number | null;\n status: string;\n summary: string | null;\n }>;\n return rows.map((row) => ({\n id: row.id,\n suite_id: row.suite_id,\n name: row.name,\n model: row.model,\n config: row.config ? JSON.parse(row.config) : null,\n started_at: row.started_at,\n finished_at: row.finished_at,\n status: row.status as RunRow['status'],\n summary: row.summary ? JSON.parse(row.summary) : null,\n }));\n }\n\n getCases(runId: string): CaseRow[] {\n const rows = this.#stmt(\n 'SELECT * FROM cases WHERE run_id = ? ORDER BY idx',\n ).all(runId) as Array<{\n id: string;\n run_id: string;\n idx: number;\n input: string;\n output: string | null;\n expected: string | null;\n latency_ms: number;\n tokens_in: number;\n tokens_out: number;\n error: string | null;\n }>;\n return rows.map((row) => ({\n id: row.id,\n run_id: row.run_id,\n idx: row.idx,\n input: JSON.parse(row.input),\n output: row.output,\n expected: row.expected ? JSON.parse(row.expected) : null,\n latency_ms: row.latency_ms,\n tokens_in: row.tokens_in,\n tokens_out: row.tokens_out,\n error: row.error,\n }));\n }\n\n getFailingCases(runId: string, threshold = 0.5): CaseWithScores[] {\n const rows = this.#stmt(\n `SELECT c.*, s.scorer_name, s.score, s.reason as score_reason\n FROM cases c\n JOIN scores s ON s.case_id = c.id\n WHERE c.run_id = ? AND s.score < ?\n ORDER BY c.idx`,\n ).all(runId, threshold) as Array<{\n id: string;\n run_id: string;\n idx: number;\n input: string;\n output: string | null;\n expected: string | null;\n latency_ms: number;\n tokens_in: number;\n tokens_out: number;\n error: string | null;\n scorer_name: string;\n score: number;\n score_reason: string | null;\n }>;\n\n const caseMap = new Map<string, CaseWithScores>();\n for (const row of rows) {\n let c = caseMap.get(row.id);\n if (!c) {\n c = {\n id: row.id,\n run_id: row.run_id,\n idx: row.idx,\n input: JSON.parse(row.input),\n output: row.output,\n expected: row.expected ? JSON.parse(row.expected) : null,\n latency_ms: row.latency_ms,\n tokens_in: row.tokens_in,\n tokens_out: row.tokens_out,\n error: row.error,\n scores: [],\n };\n caseMap.set(row.id, c);\n }\n c.scores.push({\n scorer_name: row.scorer_name,\n score: row.score,\n reason: row.score_reason,\n });\n }\n return Array.from(caseMap.values());\n }\n\n getRunSummary(runId: string, threshold = 0.5): RunSummary {\n const totals = this.#stmt(\n `SELECT\n COUNT(DISTINCT c.id) as totalCases,\n COALESCE(SUM(c.latency_ms), 0) as totalLatencyMs,\n COALESCE(SUM(c.tokens_in), 0) as totalTokensIn,\n COALESCE(SUM(c.tokens_out), 0) as totalTokensOut\n FROM cases c WHERE c.run_id = ?`,\n ).get(runId) as {\n totalCases: number;\n totalLatencyMs: number;\n totalTokensIn: number;\n totalTokensOut: number;\n };\n\n const scorerMeans = this.#stmt(\n `SELECT s.scorer_name, AVG(s.score) as meanScore\n FROM scores s\n JOIN cases c ON c.id = s.case_id\n WHERE c.run_id = ?\n GROUP BY s.scorer_name`,\n ).all(runId) as Array<{ scorer_name: string; meanScore: number }>;\n\n const meanScores: Record<string, number> = {};\n for (const row of scorerMeans) {\n meanScores[row.scorer_name] = row.meanScore;\n }\n\n const passFail = this.#stmt(\n `SELECT c.id,\n MIN(s.score) as minScore\n FROM cases c\n JOIN scores s ON s.case_id = c.id\n WHERE c.run_id = ?\n GROUP BY c.id`,\n ).all(runId) as Array<{ id: string; minScore: number }>;\n\n let passCount = 0;\n let failCount = 0;\n for (const row of passFail) {\n if (row.minScore >= threshold) passCount++;\n else failCount++;\n }\n\n return {\n totalCases: totals.totalCases,\n passCount,\n failCount,\n meanScores,\n totalLatencyMs: totals.totalLatencyMs,\n totalTokensIn: totals.totalTokensIn,\n totalTokensOut: totals.totalTokensOut,\n };\n }\n\n findSuiteByName(name: string): SuiteRow | undefined {\n const row = this.#stmt(\n 'SELECT * FROM suites WHERE name = ? ORDER BY created_at DESC LIMIT 1',\n ).get(name) as { id: string; name: string; created_at: number } | undefined;\n return row ?? undefined;\n }\n\n getLatestCompletedRun(suiteId: string, model?: string): RunRow | undefined {\n const sql = model\n ? 'SELECT * FROM runs WHERE suite_id = ? AND status = ? AND model = ? ORDER BY started_at DESC LIMIT 1'\n : 'SELECT * FROM runs WHERE suite_id = ? AND status = ? ORDER BY started_at DESC LIMIT 1';\n const row = (\n model\n ? this.#stmt(sql).get(suiteId, 'completed', model)\n : this.#stmt(sql).get(suiteId, 'completed')\n ) as\n | {\n id: string;\n suite_id: string;\n name: string;\n model: string;\n config: string | null;\n started_at: number;\n finished_at: number | null;\n status: string;\n summary: string | null;\n }\n | undefined;\n if (!row) return undefined;\n return {\n id: row.id,\n suite_id: row.suite_id,\n name: row.name,\n model: row.model,\n config: row.config ? JSON.parse(row.config) : null,\n started_at: row.started_at,\n finished_at: row.finished_at,\n status: row.status as RunRow['status'],\n summary: row.summary ? JSON.parse(row.summary) : null,\n };\n }\n\n listSuites(): SuiteRow[] {\n const rows = this.#stmt(\n 'SELECT * FROM suites ORDER BY created_at DESC',\n ).all() as Array<{ id: string; name: string; created_at: number }>;\n return rows.map((row) => ({\n id: row.id,\n name: row.name,\n created_at: row.created_at,\n }));\n }\n\n createPrompt(name: string, content: string): PromptRow {\n const id = crypto.randomUUID();\n const now = Date.now();\n\n const latest = this.#stmt(\n 'SELECT MAX(version) as latestVersion FROM prompts WHERE name = ?',\n ).get(name) as { latestVersion: number | null } | undefined;\n const version = (latest?.latestVersion ?? 0) + 1;\n\n this.#stmt(\n 'INSERT INTO prompts (id, name, version, content, created_at) VALUES (?, ?, ?, ?, ?)',\n ).run(id, name, version, content, now);\n return { id, name, version, content, created_at: now };\n }\n\n listPrompts(): PromptRow[] {\n const rows = this.#stmt(\n 'SELECT * FROM prompts ORDER BY name COLLATE NOCASE ASC, version DESC',\n ).all() as Array<{\n id: string;\n name: string;\n version: number;\n content: string;\n created_at: number;\n }>;\n return rows.map((row) => ({\n id: row.id,\n name: row.name,\n version: row.version,\n content: row.content,\n created_at: row.created_at,\n }));\n }\n\n getPrompt(id: string): PromptRow | undefined {\n const row = this.#stmt('SELECT * FROM prompts WHERE id = ?').get(id) as\n | {\n id: string;\n name: string;\n version: number;\n content: string;\n created_at: number;\n }\n | undefined;\n if (!row) return undefined;\n return {\n id: row.id,\n name: row.name,\n version: row.version,\n content: row.content,\n created_at: row.created_at,\n };\n }\n\n deletePrompt(id: string): void {\n this.#stmt('DELETE FROM prompts WHERE id = ?').run(id);\n }\n\n resetRun(id: string): void {\n this.#transaction(() => {\n this.#stmt('DELETE FROM cases WHERE run_id = ?').run(id);\n this.#stmt(\n 'UPDATE runs SET status = ?, started_at = ?, finished_at = NULL, summary = NULL WHERE id = ?',\n ).run('running', Date.now(), id);\n });\n }\n\n deleteRun(id: string): void {\n this.#stmt('DELETE FROM runs WHERE id = ?').run(id);\n }\n\n deleteSuite(id: string): void {\n this.#stmt('DELETE FROM suites WHERE id = ?').run(id);\n }\n}\n", "import { dataset } from '../dataset/index.ts';\nimport {\n filterRecordsByIndex,\n parseRecordSelection,\n} from '../dataset/record-selection.ts';\nimport type { TaskFn, TaskResult } from '../engine/index.ts';\nimport { EvalEmitter, runEval } from '../engine/index.ts';\nimport type { CaseResult, Reporter } from '../reporters/index.ts';\nimport type { Scorer } from '../scorers/index.ts';\nimport type { RunSummary } from '../store/index.ts';\nimport { RunStore } from '../store/index.ts';\n\ninterface BaseEvalOptions<T> {\n /** Human-readable name for this evaluation run, used in reports and filenames. */\n name: string;\n /** The dataset of input/expected pairs to evaluate against. */\n dataset: AsyncIterable<T>;\n /** Named scoring functions that assess model output quality. Each key becomes a column in reports. */\n scorers: Record<string, Scorer>;\n /** Reporters that receive lifecycle events and produce output (console, JSON, CSV, etc.). */\n reporters: Reporter[];\n /** Persistent store for run history. Accepts a `RunStore` instance or a file path for SQLite storage. */\n store: RunStore;\n /** Maximum number of dataset cases to run concurrently. Defaults to unbounded. */\n maxConcurrency?: number;\n /** Per-case timeout in milliseconds before the case is marked as failed. */\n timeout?: number;\n /** Number of times to run each case and average the scores. Useful for reducing LLM variance. */\n trials?: number;\n /** Minimum average score (0\u20131) required to consider the run passing. Defaults to `0.5`. */\n threshold?: number;\n}\n\nexport interface EvaluateOptions<T> extends BaseEvalOptions<T> {\n /** The model identifier passed to the task function. */\n model: string;\n /** Function that calls the model under evaluation and returns its output for a single dataset item. */\n task: TaskFn<T>;\n /** Associates this run with an existing suite ID for grouped comparisons. */\n suiteId?: string;\n}\n\nexport interface EvaluateEachOptions<\n T,\n V extends { name: string },\n> extends BaseEvalOptions<T> {\n /** List of model variants to evaluate. Each variant runs the full dataset independently. */\n models: V[];\n /** Function that calls the model under evaluation for a given dataset item and model variant. */\n task: (input: T, variant: V) => Promise<TaskResult>;\n}\n\ntype Selection =\n | { type: 'all' }\n | { type: 'failed' }\n | { type: 'cases'; indexes: Set<number> }\n | { type: 'sample'; count: number };\n\nexport class EvalAssertionError extends Error {\n summary: RunSummary | RunSummary[];\n\n constructor(summary: RunSummary | RunSummary[]) {\n const msg = Array.isArray(summary)\n ? `Eval assertion failed: ${summary.filter((s) => s.failCount > 0).length} of ${summary.length} model runs have failures`\n : `Eval assertion failed: ${summary.failCount} of ${summary.totalCases} cases failed`;\n super(msg);\n this.name = 'EvalAssertionError';\n this.summary = summary;\n }\n}\n\nfunction resolveFailedIndexes(\n store: RunStore,\n suiteName: string,\n model?: string,\n threshold?: number,\n): Set<number> {\n const suite = store.findSuiteByName(suiteName);\n if (!suite) {\n console.warn(\n `No previous suite found for '${suiteName}'. Running all cases.`,\n );\n return new Set();\n }\n const run = store.getLatestCompletedRun(suite.id, model);\n if (!run) {\n console.warn(\n `No previous completed run found for '${suiteName}'${model ? ` [${model}]` : ''}. Running all cases.`,\n );\n return new Set();\n }\n const failingCases = store.getFailingCases(run.id, threshold);\n if (failingCases.length === 0) {\n console.warn(`No failed cases in previous run. Running all cases.`);\n return new Set();\n }\n console.warn(\n `Retrying ${failingCases.length} failed cases from previous run`,\n );\n return new Set(failingCases.map((c) => c.idx));\n}\n\nexport class EvalBuilder<R> implements PromiseLike<R> {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n #options: EvaluateOptions<any> | EvaluateEachOptions<any, any>;\n #selection: Selection = { type: 'all' };\n #shouldAssert = false;\n\n constructor(\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n options: EvaluateOptions<any> | EvaluateEachOptions<any, any>,\n ) {\n this.#options = options;\n }\n\n #setSelection(selection: Selection): this {\n if (this.#selection.type !== 'all') {\n throw new Error(\n `Cannot combine .${this.#selection.type}() with .${selection.type}()`,\n );\n }\n this.#selection = selection;\n return this;\n }\n\n failed(): this {\n return this.#setSelection({ type: 'failed' });\n }\n\n cases(spec: string): this {\n const { indexes } = parseRecordSelection(spec);\n return this.#setSelection({ type: 'cases', indexes });\n }\n\n sample(count: number): this {\n if (count < 1) {\n throw new Error('Sample count must be >= 1');\n }\n return this.#setSelection({ type: 'sample', count });\n }\n\n assert(): this {\n this.#shouldAssert = true;\n return this;\n }\n\n then<TResult1 = R, TResult2 = never>(\n onfulfilled?:\n | ((value: R) => TResult1 | PromiseLike<TResult1>)\n | null\n | undefined,\n onrejected?:\n | ((reason: unknown) => TResult2 | PromiseLike<TResult2>)\n | null\n | undefined,\n ): Promise<TResult1 | TResult2> {\n return this.#execute().then(onfulfilled, onrejected);\n }\n\n async #execute(): Promise<R> {\n if ('models' in this.#options) {\n return this.#executeMulti() as Promise<R>;\n }\n return this.#executeSingle() as Promise<R>;\n }\n\n #applyDatasetFilter(ds: AsyncIterable<unknown>): AsyncIterable<unknown> {\n switch (this.#selection.type) {\n case 'all':\n return ds;\n case 'cases':\n return this.#selection.indexes.size > 0\n ? filterRecordsByIndex(ds, this.#selection.indexes)\n : ds;\n case 'sample':\n return dataset(ds).sample(this.#selection.count);\n case 'failed':\n return ds;\n }\n }\n\n async #executeSingle(): Promise<RunSummary> {\n const options = this.#options as EvaluateOptions<unknown>;\n let ds: AsyncIterable<unknown> = options.dataset;\n\n if (this.#selection.type === 'failed') {\n const indexes = resolveFailedIndexes(\n options.store,\n options.name,\n options.model,\n options.threshold,\n );\n if (indexes.size > 0) {\n ds = filterRecordsByIndex(ds, indexes);\n }\n } else {\n ds = this.#applyDatasetFilter(ds);\n }\n\n const result = await evaluateSingle({ ...options, dataset: ds });\n\n if (this.#shouldAssert && result.failCount > 0) {\n throw new EvalAssertionError(result);\n }\n\n return result;\n }\n\n async #executeMulti(): Promise<RunSummary[]> {\n const options = this.#options as EvaluateEachOptions<\n unknown,\n { name: string }\n >;\n\n let result: RunSummary[];\n\n if (this.#selection.type === 'failed') {\n const perModelIndexes = new Map<string, Set<number>>();\n for (const variant of options.models) {\n perModelIndexes.set(\n variant.name,\n resolveFailedIndexes(\n options.store,\n options.name,\n variant.name,\n options.threshold,\n ),\n );\n }\n result = await evaluateEach(options, perModelIndexes);\n } else {\n const filtered = this.#applyDatasetFilter(options.dataset);\n result = await evaluateEach({ ...options, dataset: filtered });\n }\n\n if (this.#shouldAssert && result.some((s) => s.failCount > 0)) {\n throw new EvalAssertionError(result);\n }\n\n return result;\n }\n}\n\nexport function evaluate<T>(\n options: EvaluateOptions<T>,\n): EvalBuilder<RunSummary>;\nexport function evaluate<T, V extends { name: string }>(\n options: EvaluateEachOptions<T, V>,\n): EvalBuilder<RunSummary[]>;\nexport function evaluate<T, V extends { name: string }>(\n options: EvaluateOptions<T> | EvaluateEachOptions<T, V>,\n): EvalBuilder<RunSummary> | EvalBuilder<RunSummary[]> {\n if ('models' in options) {\n return new EvalBuilder<RunSummary[]>(options);\n }\n return new EvalBuilder<RunSummary>(options);\n}\n\nfunction wireReporters(reporters: Reporter[]) {\n const emitter = new EvalEmitter();\n const cases: CaseResult[] = [];\n let runId = '';\n\n emitter.on('run:start', (data) => {\n runId = data.runId;\n for (const r of reporters) r.onRunStart?.(data);\n });\n\n emitter.on('case:scored', (data) => {\n const result: CaseResult = {\n runId: data.runId,\n index: data.index,\n input: data.input,\n output: data.output,\n expected: data.expected,\n scores: data.scores,\n error: data.error ?? null,\n latencyMs: data.latencyMs,\n tokensIn: data.tokensIn,\n tokensOut: data.tokensOut,\n };\n cases.push(result);\n for (const r of reporters) r.onCaseEnd?.(result);\n });\n\n return { emitter, cases, getRunId: () => runId };\n}\n\nasync function notifyRunEnd(\n reporters: Reporter[],\n data: {\n runId: string;\n name: string;\n model: string;\n summary: RunSummary;\n cases: CaseResult[];\n threshold: number;\n },\n): Promise<void> {\n data.cases.sort((a, b) => a.index - b.index);\n await Promise.all(reporters.map((r) => r.onRunEnd?.(data)));\n}\n\nasync function evaluateSingle<T>(\n options: EvaluateOptions<T>,\n): Promise<RunSummary> {\n const threshold = options.threshold ?? 0.5;\n const { emitter, cases, getRunId } = wireReporters(options.reporters);\n\n const summary = await runEval({\n name: options.name,\n model: options.model,\n dataset: options.dataset,\n task: options.task,\n scorers: options.scorers,\n store: options.store,\n emitter,\n suiteId: options.suiteId,\n maxConcurrency: options.maxConcurrency,\n timeout: options.timeout,\n trials: options.trials,\n threshold: options.threshold,\n });\n\n await notifyRunEnd(options.reporters, {\n runId: getRunId(),\n name: options.name,\n model: options.model,\n summary,\n cases,\n threshold,\n });\n\n return summary;\n}\n\nasync function evaluateEach<T, V extends { name: string }>(\n options: EvaluateEachOptions<T, V>,\n perModelFailedIndexes?: Map<string, Set<number>>,\n): Promise<RunSummary[]> {\n const items: T[] = [];\n for await (const item of options.dataset) {\n items.push(item);\n }\n\n const suite = options.store.createSuite(options.name);\n\n return Promise.all(\n options.models.map((variant) => {\n let ds: AsyncIterable<T> = dataset(items);\n const failedIndexes = perModelFailedIndexes?.get(variant.name);\n if (failedIndexes && failedIndexes.size > 0) {\n ds = filterRecordsByIndex(ds, failedIndexes);\n }\n return evaluateSingle({\n name: `${options.name} [${variant.name}]`,\n model: variant.name,\n dataset: ds,\n task: (input: T) => options.task(input, variant),\n scorers: options.scorers,\n reporters: options.reporters,\n store: options.store,\n suiteId: suite.id,\n maxConcurrency: options.maxConcurrency,\n timeout: options.timeout,\n trials: options.trials,\n threshold: options.threshold,\n });\n }),\n );\n}\n"],
|
|
5
|
+
"mappings": ";AAAA,SAAS,wBAAwB;AACjC,SAAS,gBAAgB;AACzB,SAAS,eAAe;AACxB,SAAS,uBAAuB;;;ACEhC,SAAS,iBAAiB,OAAuB;AAC/C,MAAI,CAAC,QAAQ,KAAK,KAAK,GAAG;AACxB,UAAM,IAAI,MAAM,yBAAyB,KAAK,GAAG;AAAA,EACnD;AACA,QAAM,QAAQ,OAAO,KAAK;AAC1B,MAAI,CAAC,OAAO,UAAU,KAAK,KAAK,QAAQ,GAAG;AACzC,UAAM,IAAI,MAAM,0CAA0C,KAAK,GAAG;AAAA,EACpE;AACA,SAAO;AACT;AAEO,SAAS,qBAAqB,MAAqC;AACxE,QAAM,UAAU,KAAK,KAAK;AAC1B,MAAI,CAAC,SAAS;AACZ,WAAO,EAAE,SAAS,oBAAI,IAAI,GAAG,YAAY,GAAG;AAAA,EAC9C;AAEA,QAAM,UAAU,oBAAI,IAAY;AAChC,QAAM,QAAQ,QACX,MAAM,GAAG,EACT,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,EACzB,OAAO,OAAO;AACjB,MAAI,MAAM,WAAW,GAAG;AACtB,UAAM,IAAI,MAAM,4BAA4B;AAAA,EAC9C;AAEA,aAAW,QAAQ,OAAO;AACxB,UAAM,aAAa,sBAAsB,KAAK,IAAI;AAClD,QAAI,YAAY;AACd,YAAM,QAAQ,iBAAiB,WAAW,CAAC,CAAE;AAC7C,YAAM,MAAM,iBAAiB,WAAW,CAAC,CAAE;AAC3C,UAAI,MAAM,OAAO;AACf,cAAM,IAAI;AAAA,UACR,kBAAkB,IAAI;AAAA,QACxB;AAAA,MACF;AACA,eAAS,IAAI,OAAO,KAAK,KAAK,KAAK;AACjC,gBAAQ,IAAI,IAAI,CAAC;AAAA,MACnB;AACA;AAAA,IACF;AAEA,UAAM,QAAQ,iBAAiB,IAAI;AACnC,YAAQ,IAAI,QAAQ,CAAC;AAAA,EACvB;AAEA,SAAO;AAAA,IACL;AAAA,IACA,YAAY,MAAM,KAAK,OAAO,EAC3B,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC,EACpB,IAAI,CAAC,MAAM,OAAO,IAAI,CAAC,CAAC,EACxB,KAAK,GAAG;AAAA,EACb;AACF;AAOA,gBAAuB,qBACrB,QACA,SACkB;AAClB,MAAI,QAAQ,SAAS,GAAG;AACtB,qBAAiB,QAAQ,QAAQ;AAC/B,YAAM;AAAA,IACR;AACA;AAAA,EACF;AAEA,MAAI,MAAM;AACV,mBAAiB,QAAQ,QAAQ;AAC/B,QAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,YAAM;AAAA,IACR;AACA;AAAA,EACF;AACF;;;ADjEO,IAAM,UAAN,MAAM,SAAuC;AAAA,EAClD;AAAA,EAEA,YAAY,QAAgC;AAC1C,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,IAAO,IAAmC;AACxC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,GAAG,IAAI;AAAA,MACf;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,OAAO,IAAgC;AACrC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,GAAG,IAAI,EAAG,OAAM;AAAA,MACtB;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,GAAuB;AAC3B,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,UAAI,QAAQ;AACZ,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,SAAS,EAAG;AAChB,cAAM;AACN;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,UAAsB;AACpB,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,YAAM,QAAa,CAAC;AACpB,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,KAAK,IAAI;AAAA,MACjB;AACA,eAAS,IAAI,MAAM,SAAS,GAAG,IAAI,GAAG,KAAK;AACzC,cAAM,IAAI,KAAK,MAAM,KAAK,OAAO,KAAK,IAAI,EAAE;AAC5C,cAAM,OAAO,MAAM,CAAC;AACpB,cAAM,CAAC,IAAI,MAAM,CAAC;AAClB,cAAM,CAAC,IAAI;AAAA,MACb;AACA,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAAA,EAEA,OAAO,GAAuB;AAC5B,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,YAAM,QAAa,CAAC;AACpB,uBAAiB,QAAQ,OAAO,GAAG;AACjC,cAAM,KAAK,IAAI;AAAA,MACjB;AACA,YAAM,QAAQ,KAAK,IAAI,KAAK,IAAI,GAAG,CAAC,GAAG,MAAM,MAAM;AACnD,eAAS,IAAI,MAAM,SAAS,GAAG,IAAI,MAAM,SAAS,QAAQ,GAAG,KAAK;AAChE,cAAM,IAAI,KAAK,MAAM,KAAK,OAAO,KAAK,IAAI,EAAE;AAC5C,cAAM,OAAO,MAAM,CAAC;AACpB,cAAM,CAAC,IAAI,MAAM,CAAC;AAClB,cAAM,CAAC,IAAI;AAAA,MACb;AACA,eAAS,IAAI,MAAM,SAAS,OAAO,IAAI,MAAM,QAAQ,KAAK;AACxD,cAAM,MAAM,CAAC;AAAA,MACf;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,KAAK,SAAkC;AACrC,UAAM,SAAS,KAAK;AACpB,WAAO,IAAI,SAAQ,mBAAmB;AACpC,UAAI,QAAQ,SAAS,GAAG;AACtB,eAAO,OAAO;AACd;AAAA,MACF;AACA,UAAI,MAAM;AACV,uBAAiB,QAAQ,OAAO,GAAG;AACjC,YAAI,QAAQ,IAAI,GAAG,GAAG;AACpB,gBAAM;AAAA,QACR;AACA;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAwB;AAC5B,UAAM,SAAc,CAAC;AACrB,qBAAiB,QAAQ,KAAK,QAAQ,GAAG;AACvC,aAAO,KAAK,IAAI;AAAA,IAClB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO,KAAK,QAAQ,EAAE,OAAO,aAAa,EAAE;AAAA,EAC9C;AACF;AAEA,SAAS,aAAa,MAAwB;AAC5C,QAAM,SAAmB,CAAC;AAC1B,MAAI,UAAU;AACd,MAAI,WAAW;AAEf,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,UAAM,OAAO,KAAK,CAAC;AACnB,QAAI,UAAU;AACZ,UAAI,SAAS,KAAK;AAChB,YAAI,IAAI,IAAI,KAAK,UAAU,KAAK,IAAI,CAAC,MAAM,KAAK;AAC9C,qBAAW;AACX;AAAA,QACF,OAAO;AACL,qBAAW;AAAA,QACb;AAAA,MACF,OAAO;AACL,mBAAW;AAAA,MACb;AAAA,IACF,OAAO;AACL,UAAI,SAAS,OAAO,YAAY,IAAI;AAClC,mBAAW;AAAA,MACb,WAAW,SAAS,KAAK;AACvB,eAAO,KAAK,OAAO;AACnB,kBAAU;AAAA,MACZ,OAAO;AACL,mBAAW;AAAA,MACb;AAAA,IACF;AAAA,EACF;AACA,SAAO,KAAK,OAAO;AACnB,SAAO;AACT;AAEA,SAAS,SAAY,UAA0C;AAC7D,SAAO,mBAAmB;AACxB,UAAM,UAAU,MAAM,SAAS,UAAU,OAAO;AAChD,UAAM,OAAO,KAAK,MAAM,OAAO;AAC/B,QAAI,CAAC,MAAM,QAAQ,IAAI,GAAG;AACxB,YAAM,IAAI,MAAM,cAAc,QAAQ,6BAA6B;AAAA,IACrE;AACA,WAAO;AAAA,EACT;AACF;AAEA,SAAS,UAAa,UAA0C;AAC9D,SAAO,mBAAmB;AACxB,UAAM,KAAK,gBAAgB;AAAA,MACzB,OAAO,iBAAiB,UAAU,OAAO;AAAA,MACzC,WAAW;AAAA,IACb,CAAC;AACD,QAAI;AACF,uBAAiB,QAAQ,IAAI;AAC3B,cAAM,UAAU,KAAK,KAAK;AAC1B,YAAI,SAAS;AACX,gBAAM,KAAK,MAAM,OAAO;AAAA,QAC1B;AAAA,MACF;AAAA,IACF,UAAE;AACA,SAAG,MAAM;AAAA,IACX;AAAA,EACF;AACF;AAEA,SAAS,QACP,UAC6C;AAC7C,SAAO,mBAAmB;AACxB,UAAM,KAAK,gBAAgB;AAAA,MACzB,OAAO,iBAAiB,UAAU,OAAO;AAAA,MACzC,WAAW;AAAA,IACb,CAAC;AACD,QAAI;AACF,UAAI;AACJ,uBAAiB,QAAQ,IAAI;AAC3B,cAAM,UAAU,KAAK,KAAK;AAC1B,YAAI,CAAC,QAAS;AACd,cAAM,SAAS,aAAa,OAAO;AACnC,YAAI,CAAC,SAAS;AACZ,oBAAU;AACV;AAAA,QACF;AACA,cAAM,MAA8B,CAAC;AACrC,iBAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;AACvC,cAAI,QAAQ,CAAC,CAAE,IAAI,OAAO,CAAC,KAAK;AAAA,QAClC;AACA,cAAM;AAAA,MACR;AAAA,IACF,UAAE;AACA,SAAG,MAAM;AAAA,IACX;AAAA,EACF;AACF;AAEO,SAAS,QACd,QACY;AACZ,MAAI,MAAM,QAAQ,MAAM,GAAG;AACzB,WAAO,IAAI,QAAQ,mBAAmB;AACpC,aAAO;AAAA,IACT,CAAC;AAAA,EACH;AAEA,MAAI,OAAO,WAAW,YAAY,OAAO,iBAAiB,QAAQ;AAChE,WAAO,IAAI,QAAQ,MAAM,MAAM;AAAA,EACjC;AAEA,QAAM,MAAM,QAAQ,MAAM,EAAE,YAAY;AACxC,UAAQ,KAAK;AAAA,IACX,KAAK;AACH,aAAO,IAAI,QAAQ,SAAY,MAAM,CAAC;AAAA,IACxC,KAAK;AACH,aAAO,IAAI,QAAQ,UAAa,MAAM,CAAC;AAAA,IACzC,KAAK;AACH,aAAO,IAAI,QAAQ,QAAQ,MAAM,CAA2B;AAAA,IAC9D;AACE,YAAM,IAAI;AAAA,QACR,+BAA+B,GAAG,uBAAuB,MAAM;AAAA,MACjE;AAAA,EACJ;AACF;;;AEjPA,SAAS,oBAAoB;AAyCtB,IAAM,cAAN,cAA0B,aAAa;AAAA,EACnC,GACP,OACA,UACM;AACN,WAAO,MAAM,GAAG,OAAO,QAAQ;AAAA,EACjC;AAAA,EAES,KACP,OACA,MACS;AACT,WAAO,MAAM,KAAK,OAAO,IAAI;AAAA,EAC/B;AACF;AA4BA,SAAS,aAAa,KAAsB;AAC1C,MAAI,eAAe,OAAO;AACxB,WAAO,GAAG,IAAI,IAAI,KAAK,IAAI,OAAO;AAAA,EACpC;AACA,MAAI,OAAO,QAAQ,SAAU,QAAO;AACpC,MAAI,OAAO,KAAM,QAAO;AACxB,MAAI;AACF,WAAO,KAAK,UAAU,GAAG;AAAA,EAC3B,QAAQ;AACN,WAAO,OAAO,GAAG;AAAA,EACnB;AACF;AAEA,SAAS,eAAe,KAAsB;AAC5C,MAAI,eAAe,OAAO;AACxB,WAAO,KAAK,UAAU;AAAA,MACpB,MAAM,IAAI;AAAA,MACV,SAAS,IAAI;AAAA,MACb,OAAO,IAAI;AAAA,MACX,OACE,IAAI,iBAAiB,QACjB;AAAA,QACE,MAAM,IAAI,MAAM;AAAA,QAChB,SAAS,IAAI,MAAM;AAAA,MACrB,IACA,IAAI;AAAA,IACZ,CAAC;AAAA,EACH;AACA,MAAI,OAAO,QAAQ,SAAU,QAAO,KAAK,UAAU,EAAE,SAAS,IAAI,CAAC;AACnE,MAAI,OAAO,KAAM,QAAO,KAAK,UAAU,EAAE,SAAS,gBAAgB,CAAC;AACnE,MAAI;AACF,WAAO,KAAK,UAAU,GAAG;AAAA,EAC3B,QAAQ;AACN,WAAO,KAAK,UAAU,EAAE,SAAS,OAAO,GAAG,EAAE,CAAC;AAAA,EAChD;AACF;AAEA,SAAS,cACP,aACA,OAC8B;AAC9B,QAAM,SAAS,gBAAgB,aAAa,KAAK,CAAC;AAClD,QAAM,SAAuC,CAAC;AAC9C,aAAW,cAAc,aAAa;AACpC,WAAO,UAAU,IAAI,EAAE,OAAO,GAAG,OAAO;AAAA,EAC1C;AACA,SAAO;AACT;AAEA,SAAS,gBAAgB,gBAAwB;AAC/C,MAAI,SAAS;AACb,QAAM,QAA2B,CAAC;AAElC,SAAO;AAAA,IACL,MAAM,UAAyB;AAC7B,UAAI,SAAS,gBAAgB;AAC3B;AACA;AAAA,MACF;AACA,aAAO,IAAI,QAAc,CAAC,YAAY,MAAM,KAAK,OAAO,CAAC;AAAA,IAC3D;AAAA,IACA,UAAgB;AACd;AACA,YAAM,OAAO,MAAM,MAAM;AACzB,UAAI,MAAM;AACR;AACA,aAAK;AAAA,MACP;AAAA,IACF;AAAA,EACF;AACF;AAEA,eAAe,SACb,MACA,OACA,WACwB;AACxB,QAAM,QAAQ,YAAY,IAAI;AAC9B,MAAI;AACJ,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,MAChC,KAAK,KAAK;AAAA,MACV,IAAI,QAAe,CAAC,GAAG,WAAW;AAChC,kBAAU;AAAA,UACR,MAAM,OAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UAC1C;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AACD,iBAAa,OAAO;AACpB,UAAM,YAAY,KAAK,MAAM,YAAY,IAAI,IAAI,KAAK;AACtD,WAAO;AAAA,MACL,QAAQ,OAAO;AAAA,MACf;AAAA,MACA,UAAU,OAAO,OAAO,eAAe;AAAA,MACvC,WAAW,OAAO,OAAO,gBAAgB;AAAA,IAC3C;AAAA,EACF,SAAS,KAAK;AACZ,iBAAa,OAAO;AACpB,UAAM,YAAY,KAAK,MAAM,YAAY,IAAI,IAAI,KAAK;AACtD,WAAO;AAAA,MACL,QAAQ;AAAA,MACR;AAAA,MACA,UAAU;AAAA,MACV,WAAW;AAAA,MACX,OAAO;AAAA,IACT;AAAA,EACF;AACF;AAEA,SAAS,WAAW,OAAe,YAA4B;AAC7D,MAAI,QAAQ,KAAK,QAAQ,GAAG;AAC1B,YAAQ;AAAA,MACN,WAAW,UAAU,iCAAiC,KAAK;AAAA,IAC7D;AACA,WAAO,KAAK,IAAI,GAAG,KAAK,IAAI,GAAG,KAAK,CAAC;AAAA,EACvC;AACA,SAAO;AACT;AAEA,eAAsB,QAAW,QAA4C;AAC3E,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,iBAAiB;AAAA,IACjB;AAAA,IACA,UAAU;AAAA,IACV,SAAS;AAAA,IACT,YAAY;AAAA,EACd,IAAI;AAEJ,QAAM,UAAU,OAAO,WAAW,IAAI,YAAY;AAClD,QAAM,QACJ,OAAO,UACN,MAAM;AACL,UAAM,kBAAkB,WAAW,MAAM,YAAY,IAAI,EAAE;AAC3D,WAAO,MAAM,UAAU;AAAA,MACrB,UAAU;AAAA,MACV;AAAA,MACA;AAAA,MACA,QAAQ,OAAO;AAAA,IACjB,CAAC;AAAA,EACH,GAAG;AAEL,QAAM,QAA4C,CAAC;AACnD,MAAI,MAAM;AACV,mBAAiB,QAAQ,IAAI;AAC3B,UAAM,KAAK,EAAE,OAAO,OAAO,OAAO,KAAK,CAAC;AAAA,EAC1C;AAEA,UAAQ,KAAK,aAAa,EAAE,OAAO,YAAY,MAAM,QAAQ,MAAM,MAAM,CAAC;AAE1E,QAAM,YAAY,gBAAgB,cAAc;AAChD,QAAM,cAAc,OAAO,KAAK,OAAO;AAEvC,QAAM,gBAMD,CAAC;AAEN,QAAM,cAAc,OAAO,EAAE,OAAO,MAAM,MAAmC;AAC3E,UAAM,UAAU,QAAQ;AACxB,QAAI;AACF,cAAQ,KAAK,cAAc,EAAE,OAAO,OAAO,MAAM,CAAC;AAElD,UAAI;AACJ,UAAI;AAEJ,UAAI,SAAS,GAAG;AACd,cAAM,eAGD,CAAC;AAEN,iBAAS,IAAI,GAAG,IAAI,QAAQ,KAAK;AAC/B,gBAAM,SAAS,MAAM,SAAS,MAAM,OAAO,OAAO;AAClD,cAAI,OAAO,OAAO;AAChB,yBAAa,KAAK;AAAA,cAChB;AAAA,cACA,QAAQ,cAAc,aAAa,OAAO,KAAK;AAAA,YACjD,CAAC;AAAA,UACH,OAAO;AACL,kBAAM,SAAuC,CAAC;AAC9C,uBAAW,CAAC,OAAO,MAAM,KAAK,OAAO,QAAQ,OAAO,GAAG;AACrD,oBAAM,KAAK,MAAM,OAAO;AAAA,gBACtB;AAAA,gBACA,QAAQ,OAAO;AAAA,gBACf,UAAW,MAAkC;AAAA,cAC/C,CAAC;AACD,qBAAO,KAAK,IAAI;AAAA,gBACd,OAAO,WAAW,GAAG,OAAO,KAAK;AAAA,gBACjC,QAAQ,GAAG;AAAA,gBACX,UAAU,GAAG;AAAA,cACf;AAAA,YACF;AACA,yBAAa,KAAK,EAAE,QAAQ,OAAO,CAAC;AAAA,UACtC;AAAA,QACF;AAEA,cAAM,iBAAiB,CAAC,GAAG,YAAY,EACpC,QAAQ,EACR,KAAK,CAAC,MAAM,CAAC,EAAE,OAAO,KAAK;AAC9B,cAAM,aACJ,gBAAgB,UAChB,aAAa,aAAa,SAAS,CAAC,EAAG;AACzC,sBAAc;AAAA,UACZ,QAAQ,WAAW;AAAA,UACnB,WAAW,KAAK;AAAA,YACd,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,WAAW,CAAC,IACzD;AAAA,UACJ;AAAA,UACA,UAAU,KAAK;AAAA,YACb,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,UAAU,CAAC,IACxD;AAAA,UACJ;AAAA,UACA,WAAW,KAAK;AAAA,YACd,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,WAAW,CAAC,IACzD;AAAA,UACJ;AAAA,UACA,OAAO,iBAAiB,SAAY,WAAW;AAAA,QACjD;AAEA,sBAAc,CAAC;AACf,mBAAW,SAAS,aAAa;AAC/B,gBAAM,YACJ,aAAa,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,OAAO,KAAK,EAAG,OAAO,CAAC,IAC/D;AACF,sBAAY,KAAK,IAAI;AAAA,YACnB,OAAO;AAAA,YACP,QACE,aAAa,aAAa,SAAS,CAAC,EAAG,OAAO,KAAK,GAAG;AAAA,YACxD,UACE,aAAa,aAAa,SAAS,CAAC,EAAG,OAAO,KAAK,GAAG;AAAA,UAC1D;AAAA,QACF;AAAA,MACF,OAAO;AACL,sBAAc,MAAM,SAAS,MAAM,OAAO,OAAO;AACjD,YAAI,YAAY,OAAO;AACrB,wBAAc,cAAc,aAAa,YAAY,KAAK;AAAA,QAC5D,OAAO;AACL,wBAAc,CAAC;AACf,qBAAW,CAAC,OAAO,MAAM,KAAK,OAAO,QAAQ,OAAO,GAAG;AACrD,kBAAM,KAAK,MAAM,OAAO;AAAA,cACtB;AAAA,cACA,QAAQ,YAAY;AAAA,cACpB,UAAW,MAAkC;AAAA,YAC/C,CAAC;AACD,wBAAY,KAAK,IAAI;AAAA,cACnB,OAAO,WAAW,GAAG,OAAO,KAAK;AAAA,cACjC,QAAQ,GAAG;AAAA,cACX,UAAU,GAAG;AAAA,YACf;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAEA,YAAM,SAAS,OAAO,WAAW;AAEjC,YAAM,WAAqB;AAAA,QACzB,IAAI;AAAA,QACJ,QAAQ;AAAA,QACR,KAAK;AAAA,QACL;AAAA,QACA,QAAQ,YAAY,UAAU;AAAA,QAC9B,UAAW,MAAkC;AAAA,QAC7C,YAAY,YAAY;AAAA,QACxB,WAAW,YAAY;AAAA,QACvB,YAAY,YAAY;AAAA,QACxB,OAAO,YAAY,QACf,eAAe,YAAY,KAAK,IAChC;AAAA,MACN;AACA,YAAM,UAAU,CAAC,QAAQ,CAAC;AAE1B,YAAM,gBAA6B,YAAY,IAAI,CAAC,WAAW;AAAA,QAC7D,IAAI,OAAO,WAAW;AAAA,QACtB,SAAS;AAAA,QACT,aAAa;AAAA,QACb,OAAO,YAAY,KAAK,EAAG;AAAA,QAC3B,QAAQ,YAAY,KAAK,EAAG;AAAA,MAC9B,EAAE;AACF,YAAM,WAAW,aAAa;AAE9B,oBAAc,KAAK;AAAA,QACjB;AAAA,QACA,QAAQ,OAAO;AAAA,UACb,YAAY,IAAI,CAAC,UAAU,CAAC,OAAO,YAAY,KAAK,EAAG,KAAK,CAAC;AAAA,QAC/D;AAAA,QACA,WAAW,YAAY;AAAA,QACvB,UAAU,YAAY;AAAA,QACtB,WAAW,YAAY;AAAA,MACzB,CAAC;AAED,UAAI,YAAY,OAAO;AACrB,gBAAQ,KAAK,cAAc;AAAA,UACzB;AAAA,UACA;AAAA,UACA,OAAO,aAAa,YAAY,KAAK;AAAA,QACvC,CAAC;AAAA,MACH;AAEA,cAAQ,KAAK,eAAe;AAAA,QAC1B;AAAA,QACA;AAAA,QACA;AAAA,QACA,QAAQ,YAAY;AAAA,QACpB,UAAW,MAAkC;AAAA,QAC7C,QAAQ;AAAA,QACR,OAAO,YAAY;AAAA,QACnB,WAAW,YAAY;AAAA,QACvB,UAAU,YAAY;AAAA,QACtB,WAAW,YAAY;AAAA,MACzB,CAAC;AAAA,IACH,UAAE;AACA,gBAAU,QAAQ;AAAA,IACpB;AAAA,EACF;AAEA,QAAM,UAAU,YACZ,MAAM;AAAA,IAAK,EAAE,QAAQ,KAAK,KAAK,MAAM,SAAS,SAAS,EAAE;AAAA,IAAG,CAAC,GAAG,MAC9D,MAAM,MAAM,IAAI,YAAY,IAAI,KAAK,SAAS;AAAA,EAChD,IACA,CAAC,KAAK;AAEV,MAAI;AACF,eAAW,SAAS,SAAS;AAC3B,YAAM,QAAQ,IAAI,MAAM,IAAI,WAAW,CAAC;AAAA,IAC1C;AAAA,EACF,SAAS,KAAK;AACZ,UAAM,UAAU,OAAO,QAAQ;AAC/B,UAAM;AAAA,EACR;AAEA,QAAM,UAAU,eAAe,eAAe,aAAa,SAAS;AACpE,QAAM,UAAU,OAAO,aAAa,OAAO;AAC3C,UAAQ,KAAK,WAAW,EAAE,OAAO,QAAQ,CAAC;AAE1C,SAAO;AACT;AAEA,SAAS,eACP,OAOA,aACA,WACY;AACZ,QAAM,aAAa,MAAM;AACzB,MAAI,YAAY;AAChB,MAAI,YAAY;AAChB,MAAI,iBAAiB;AACrB,MAAI,gBAAgB;AACpB,MAAI,iBAAiB;AAErB,QAAM,YAAoC,CAAC;AAC3C,aAAW,QAAQ,aAAa;AAC9B,cAAU,IAAI,IAAI;AAAA,EACpB;AAEA,aAAW,KAAK,OAAO;AACrB,sBAAkB,EAAE;AACpB,qBAAiB,EAAE;AACnB,sBAAkB,EAAE;AAEpB,QAAI,UAAU;AACd,eAAW,QAAQ,aAAa;AAC9B,YAAM,QAAQ,EAAE,OAAO,IAAI,KAAK;AAChC,gBAAU,IAAI,KAAM;AACpB,UAAI,QAAQ,UAAW,WAAU;AAAA,IACnC;AACA,QAAI,QAAS;AAAA,QACR;AAAA,EACP;AAEA,QAAM,aAAqC,CAAC;AAC5C,aAAW,QAAQ,aAAa;AAC9B,eAAW,IAAI,IAAI,aAAa,IAAI,UAAU,IAAI,IAAK,aAAa;AAAA,EACtE;AAEA,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;;;ACjeA,SAAS,oBAAoB;;;ACwDtB,IAAM,qBAAN,cAAiC,MAAM;AAAA,EAC5C;AAAA,EAEA,YAAY,SAAoC;AAC9C,UAAM,MAAM,MAAM,QAAQ,OAAO,IAC7B,0BAA0B,QAAQ,OAAO,CAAC,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM,OAAO,QAAQ,MAAM,8BAC5F,0BAA0B,QAAQ,SAAS,OAAO,QAAQ,UAAU;AACxE,UAAM,GAAG;AACT,SAAK,OAAO;AACZ,SAAK,UAAU;AAAA,EACjB;AACF;AAEA,SAAS,qBACP,OACA,WACA,OACA,WACa;AACb,QAAM,QAAQ,MAAM,gBAAgB,SAAS;AAC7C,MAAI,CAAC,OAAO;AACV,YAAQ;AAAA,MACN,gCAAgC,SAAS;AAAA,IAC3C;AACA,WAAO,oBAAI,IAAI;AAAA,EACjB;AACA,QAAM,MAAM,MAAM,sBAAsB,MAAM,IAAI,KAAK;AACvD,MAAI,CAAC,KAAK;AACR,YAAQ;AAAA,MACN,wCAAwC,SAAS,IAAI,QAAQ,KAAK,KAAK,MAAM,EAAE;AAAA,IACjF;AACA,WAAO,oBAAI,IAAI;AAAA,EACjB;AACA,QAAM,eAAe,MAAM,gBAAgB,IAAI,IAAI,SAAS;AAC5D,MAAI,aAAa,WAAW,GAAG;AAC7B,YAAQ,KAAK,qDAAqD;AAClE,WAAO,oBAAI,IAAI;AAAA,EACjB;AACA,UAAQ;AAAA,IACN,YAAY,aAAa,MAAM;AAAA,EACjC;AACA,SAAO,IAAI,IAAI,aAAa,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC;AAC/C;AAEO,IAAM,cAAN,MAA+C;AAAA;AAAA,EAEpD;AAAA,EACA,aAAwB,EAAE,MAAM,MAAM;AAAA,EACtC,gBAAgB;AAAA,EAEhB,YAEE,SACA;AACA,SAAK,WAAW;AAAA,EAClB;AAAA,EAEA,cAAc,WAA4B;AACxC,QAAI,KAAK,WAAW,SAAS,OAAO;AAClC,YAAM,IAAI;AAAA,QACR,mBAAmB,KAAK,WAAW,IAAI,YAAY,UAAU,IAAI;AAAA,MACnE;AAAA,IACF;AACA,SAAK,aAAa;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,SAAe;AACb,WAAO,KAAK,cAAc,EAAE,MAAM,SAAS,CAAC;AAAA,EAC9C;AAAA,EAEA,MAAM,MAAoB;AACxB,UAAM,EAAE,QAAQ,IAAI,qBAAqB,IAAI;AAC7C,WAAO,KAAK,cAAc,EAAE,MAAM,SAAS,QAAQ,CAAC;AAAA,EACtD;AAAA,EAEA,OAAO,OAAqB;AAC1B,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AACA,WAAO,KAAK,cAAc,EAAE,MAAM,UAAU,MAAM,CAAC;AAAA,EACrD;AAAA,EAEA,SAAe;AACb,SAAK,gBAAgB;AACrB,WAAO;AAAA,EACT;AAAA,EAEA,KACE,aAIA,YAI8B;AAC9B,WAAO,KAAK,SAAS,EAAE,KAAK,aAAa,UAAU;AAAA,EACrD;AAAA,EAEA,MAAM,WAAuB;AAC3B,QAAI,YAAY,KAAK,UAAU;AAC7B,aAAO,KAAK,cAAc;AAAA,IAC5B;AACA,WAAO,KAAK,eAAe;AAAA,EAC7B;AAAA,EAEA,oBAAoB,IAAoD;AACtE,YAAQ,KAAK,WAAW,MAAM;AAAA,MAC5B,KAAK;AACH,eAAO;AAAA,MACT,KAAK;AACH,eAAO,KAAK,WAAW,QAAQ,OAAO,IAClC,qBAAqB,IAAI,KAAK,WAAW,OAAO,IAChD;AAAA,MACN,KAAK;AACH,eAAO,QAAQ,EAAE,EAAE,OAAO,KAAK,WAAW,KAAK;AAAA,MACjD,KAAK;AACH,eAAO;AAAA,IACX;AAAA,EACF;AAAA,EAEA,MAAM,iBAAsC;AAC1C,UAAM,UAAU,KAAK;AACrB,QAAI,KAA6B,QAAQ;AAEzC,QAAI,KAAK,WAAW,SAAS,UAAU;AACrC,YAAM,UAAU;AAAA,QACd,QAAQ;AAAA,QACR,QAAQ;AAAA,QACR,QAAQ;AAAA,QACR,QAAQ;AAAA,MACV;AACA,UAAI,QAAQ,OAAO,GAAG;AACpB,aAAK,qBAAqB,IAAI,OAAO;AAAA,MACvC;AAAA,IACF,OAAO;AACL,WAAK,KAAK,oBAAoB,EAAE;AAAA,IAClC;AAEA,UAAM,SAAS,MAAM,eAAe,EAAE,GAAG,SAAS,SAAS,GAAG,CAAC;AAE/D,QAAI,KAAK,iBAAiB,OAAO,YAAY,GAAG;AAC9C,YAAM,IAAI,mBAAmB,MAAM;AAAA,IACrC;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,gBAAuC;AAC3C,UAAM,UAAU,KAAK;AAKrB,QAAI;AAEJ,QAAI,KAAK,WAAW,SAAS,UAAU;AACrC,YAAM,kBAAkB,oBAAI,IAAyB;AACrD,iBAAW,WAAW,QAAQ,QAAQ;AACpC,wBAAgB;AAAA,UACd,QAAQ;AAAA,UACR;AAAA,YACE,QAAQ;AAAA,YACR,QAAQ;AAAA,YACR,QAAQ;AAAA,YACR,QAAQ;AAAA,UACV;AAAA,QACF;AAAA,MACF;AACA,eAAS,MAAM,aAAa,SAAS,eAAe;AAAA,IACtD,OAAO;AACL,YAAM,WAAW,KAAK,oBAAoB,QAAQ,OAAO;AACzD,eAAS,MAAM,aAAa,EAAE,GAAG,SAAS,SAAS,SAAS,CAAC;AAAA,IAC/D;AAEA,QAAI,KAAK,iBAAiB,OAAO,KAAK,CAAC,MAAM,EAAE,YAAY,CAAC,GAAG;AAC7D,YAAM,IAAI,mBAAmB,MAAM;AAAA,IACrC;AAEA,WAAO;AAAA,EACT;AACF;AAQO,SAAS,SACd,SACqD;AACrD,MAAI,YAAY,SAAS;AACvB,WAAO,IAAI,YAA0B,OAAO;AAAA,EAC9C;AACA,SAAO,IAAI,YAAwB,OAAO;AAC5C;AAEA,SAAS,cAAc,WAAuB;AAC5C,QAAM,UAAU,IAAI,YAAY;AAChC,QAAM,QAAsB,CAAC;AAC7B,MAAI,QAAQ;AAEZ,UAAQ,GAAG,aAAa,CAAC,SAAS;AAChC,YAAQ,KAAK;AACb,eAAW,KAAK,UAAW,GAAE,aAAa,IAAI;AAAA,EAChD,CAAC;AAED,UAAQ,GAAG,eAAe,CAAC,SAAS;AAClC,UAAM,SAAqB;AAAA,MACzB,OAAO,KAAK;AAAA,MACZ,OAAO,KAAK;AAAA,MACZ,OAAO,KAAK;AAAA,MACZ,QAAQ,KAAK;AAAA,MACb,UAAU,KAAK;AAAA,MACf,QAAQ,KAAK;AAAA,MACb,OAAO,KAAK,SAAS;AAAA,MACrB,WAAW,KAAK;AAAA,MAChB,UAAU,KAAK;AAAA,MACf,WAAW,KAAK;AAAA,IAClB;AACA,UAAM,KAAK,MAAM;AACjB,eAAW,KAAK,UAAW,GAAE,YAAY,MAAM;AAAA,EACjD,CAAC;AAED,SAAO,EAAE,SAAS,OAAO,UAAU,MAAM,MAAM;AACjD;AAEA,eAAe,aACb,WACA,MAQe;AACf,OAAK,MAAM,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC3C,QAAM,QAAQ,IAAI,UAAU,IAAI,CAAC,MAAM,EAAE,WAAW,IAAI,CAAC,CAAC;AAC5D;AAEA,eAAe,eACb,SACqB;AACrB,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,EAAE,SAAS,OAAO,SAAS,IAAI,cAAc,QAAQ,SAAS;AAEpE,QAAM,UAAU,MAAM,QAAQ;AAAA,IAC5B,MAAM,QAAQ;AAAA,IACd,OAAO,QAAQ;AAAA,IACf,SAAS,QAAQ;AAAA,IACjB,MAAM,QAAQ;AAAA,IACd,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,gBAAgB,QAAQ;AAAA,IACxB,SAAS,QAAQ;AAAA,IACjB,QAAQ,QAAQ;AAAA,IAChB,WAAW,QAAQ;AAAA,EACrB,CAAC;AAED,QAAM,aAAa,QAAQ,WAAW;AAAA,IACpC,OAAO,SAAS;AAAA,IAChB,MAAM,QAAQ;AAAA,IACd,OAAO,QAAQ;AAAA,IACf;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,eAAe,aACb,SACA,uBACuB;AACvB,QAAM,QAAa,CAAC;AACpB,mBAAiB,QAAQ,QAAQ,SAAS;AACxC,UAAM,KAAK,IAAI;AAAA,EACjB;AAEA,QAAM,QAAQ,QAAQ,MAAM,YAAY,QAAQ,IAAI;AAEpD,SAAO,QAAQ;AAAA,IACb,QAAQ,OAAO,IAAI,CAAC,YAAY;AAC9B,UAAI,KAAuB,QAAQ,KAAK;AACxC,YAAM,gBAAgB,uBAAuB,IAAI,QAAQ,IAAI;AAC7D,UAAI,iBAAiB,cAAc,OAAO,GAAG;AAC3C,aAAK,qBAAqB,IAAI,aAAa;AAAA,MAC7C;AACA,aAAO,eAAe;AAAA,QACpB,MAAM,GAAG,QAAQ,IAAI,KAAK,QAAQ,IAAI;AAAA,QACtC,OAAO,QAAQ;AAAA,QACf,SAAS;AAAA,QACT,MAAM,CAAC,UAAa,QAAQ,KAAK,OAAO,OAAO;AAAA,QAC/C,SAAS,QAAQ;AAAA,QACjB,WAAW,QAAQ;AAAA,QACnB,OAAO,QAAQ;AAAA,QACf,SAAS,MAAM;AAAA,QACf,gBAAgB,QAAQ;AAAA,QACxB,SAAS,QAAQ;AAAA,QACjB,QAAQ,QAAQ;AAAA,QAChB,WAAW,QAAQ;AAAA,MACrB,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AACF;",
|
|
6
6
|
"names": []
|
|
7
7
|
}
|
package/dist/index.js
CHANGED
|
@@ -859,6 +859,20 @@ var RunStore = class {
|
|
|
859
859
|
deletePrompt(id) {
|
|
860
860
|
this.#stmt("DELETE FROM prompts WHERE id = ?").run(id);
|
|
861
861
|
}
|
|
862
|
+
resetRun(id) {
|
|
863
|
+
this.#transaction(() => {
|
|
864
|
+
this.#stmt("DELETE FROM cases WHERE run_id = ?").run(id);
|
|
865
|
+
this.#stmt(
|
|
866
|
+
"UPDATE runs SET status = ?, started_at = ?, finished_at = NULL, summary = NULL WHERE id = ?"
|
|
867
|
+
).run("running", Date.now(), id);
|
|
868
|
+
});
|
|
869
|
+
}
|
|
870
|
+
deleteRun(id) {
|
|
871
|
+
this.#stmt("DELETE FROM runs WHERE id = ?").run(id);
|
|
872
|
+
}
|
|
873
|
+
deleteSuite(id) {
|
|
874
|
+
this.#stmt("DELETE FROM suites WHERE id = ?").run(id);
|
|
875
|
+
}
|
|
862
876
|
};
|
|
863
877
|
|
|
864
878
|
// packages/evals/src/engine/index.ts
|
|
@@ -990,13 +1004,15 @@ async function runEval(config) {
|
|
|
990
1004
|
threshold = 0.5
|
|
991
1005
|
} = config;
|
|
992
1006
|
const emitter = config.emitter ?? new EvalEmitter();
|
|
993
|
-
const
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1007
|
+
const runId = config.runId ?? (() => {
|
|
1008
|
+
const resolvedSuiteId = suiteId ?? store.createSuite(name).id;
|
|
1009
|
+
return store.createRun({
|
|
1010
|
+
suite_id: resolvedSuiteId,
|
|
1011
|
+
name,
|
|
1012
|
+
model,
|
|
1013
|
+
config: config.config
|
|
1014
|
+
});
|
|
1015
|
+
})();
|
|
1000
1016
|
const items = [];
|
|
1001
1017
|
let idx = 0;
|
|
1002
1018
|
for await (const item of ds) {
|