vieval 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/vieval.mjs +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-ImxGpoYQ.mjs → cli-Dao25VxV.mjs} +2 -2
- package/dist/cli-Dao25VxV.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/config.mjs +1 -1
- package/dist/core/assertions/index.d.mts +1 -1
- package/dist/core/inference-executors/index.mjs +1 -1
- package/dist/core/processors/results/index.d.mts +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +6 -40
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/{env--94B0UtW.mjs → env-BFSjny07.mjs} +1 -1
- package/dist/{env--94B0UtW.mjs.map → env-BFSjny07.mjs.map} +1 -1
- package/dist/{index-5R1_k2nv.d.mts → index-BkjyCInx.d.mts} +12 -37
- package/dist/index.d.mts +4 -4
- package/dist/index.mjs +1 -1
- package/dist/{models-DIGdOUpJ.mjs → models-pBSRUZhY.mjs} +1 -1
- package/dist/{models-DIGdOUpJ.mjs.map → models-pBSRUZhY.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +63 -6
- package/dist/plugins/chat-models/index.mjs +60 -6
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/registry-BHGMxjpA.mjs.map +1 -1
- package/package.json +1 -1
- package/dist/cli-ImxGpoYQ.mjs.map +0 -1
package/dist/config.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as TaskDefinition, D as TaskRunContext, E as TaskReporterHooks, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, T as TaskReporterEventPayload, _ as ScopedMatrices, a as CliOpenTelemetryReportingConfig, b as TaskCaseReporterPayload, c as EvalDefinition, d as MatrixAxisValues, f as MatrixDefinition, g as MatrixValue, h as MatrixRow, i as Awaitable, l as EvalModule, m as MatrixPrimitive, n as defineEval, o as CliReportingConfig, p as MatrixLayer, r as defineTask, s as CollectedEvalEntry, t as ConfigHookPlugin, u as EvalModuleMap, v as TaskAutoRetryDelay, w as TaskExecutionPolicy, x as TaskCaseState, y as TaskCaseReporterEndPayload, z as resolveModelByName } from "./index-BkjyCInx.mjs";
|
|
2
2
|
export { Awaitable, CliOpenTelemetryReportingConfig, CliReportingConfig, CollectedEvalEntry, ConfigHookPlugin, EvalDefinition, EvalModule, EvalModuleMap, MatrixAxisValues, MatrixDefinition, MatrixLayer, MatrixPrimitive, MatrixRow, MatrixValue, ModelDefinition, ScopedMatrices, TaskAutoRetryDelay, TaskCaseReporterEndPayload, TaskCaseReporterPayload, TaskCaseState, TaskConcurrencyConfig, TaskDefinition, TaskExecutionPolicy, TaskReporterEventPayload, TaskReporterHooks, TaskRunContext, TaskRunOutput, defineEval, defineTask, resolveModelByName };
|
package/dist/config.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { n as requiredEnvFrom, t as envFrom } from "../../env
|
|
1
|
+
import { n as requiredEnvFrom, t as envFrom } from "../../env-BFSjny07.mjs";
|
|
2
2
|
import process from "node:process";
|
|
3
3
|
import { errorMessageFrom, errorNameFrom, sleep } from "@moeru/std";
|
|
4
4
|
import { createOpenAI } from "@xsai-ext/providers/create";
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { $ as
|
|
1
|
+
import { $ as InferenceExecutor, A as RunScheduledTasksOptions, B as asProjectRelativePath, F as CreateTaskExecutionContextOptions, G as AggregatedProviderSummary, H as CreateVievalRunnerRuntimeContextOptions, I as TaskExecutionContext, J as RunResult, K as AggregatedRunResults, L as createTaskExecutionContext, M as RunnerTaskState, N as ScheduledTaskExecutor, P as runScheduledTasks, Q as CreateRunnerScheduleOptions, U as RunnerRuntimeContext, V as collectEvalEntries, W as createRunnerRuntimeContext, X as RunScoreKind, Y as RunScore, Z as aggregateRunResults, at as ScheduledTaskMatrixMeta, ct as createFilesystemTaskCacheRuntime, dt as CacheFileOptions, et as RunnerMatrixDefinition, ft as CacheNamespace, it as ScheduledTaskMatrix, j as RunnerExecutionError, lt as normalizeCacheFilePathSegments, nt as RunnerMatrixSelection, ot as createRunnerSchedule, pt as TaskCacheRuntime, q as AggregatedRunSummary, rt as ScheduledTask, st as CreateFilesystemTaskCacheRuntimeOptions, tt as RunnerMatrixInput, ut as CacheFileHandle } from "../../index-BkjyCInx.mjs";
|
|
2
2
|
import { a as SchedulerMiddleware, c as SchedulerScopeContext, i as SchedulerConcurrencyConfig, n as getActiveScopes, o as SchedulerRuntime, r as CreateSchedulerRuntimeOptions, s as SchedulerScope, t as createSchedulerRuntime } from "../../index-fakXoZEe.mjs";
|
|
3
|
-
export { AggregatedProviderSummary, AggregatedRunResults, AggregatedRunSummary, CacheFileHandle, CacheFileOptions, CacheNamespace, CreateFilesystemTaskCacheRuntimeOptions, CreateRunnerScheduleOptions, CreateSchedulerRuntimeOptions, CreateTaskExecutionContextOptions, CreateVievalRunnerRuntimeContextOptions, InferenceExecutor, RunResult, RunScheduledTasksOptions, RunScore, RunScoreKind, RunnerExecutionError, RunnerMatrixDefinition, RunnerMatrixInput, RunnerMatrixSelection, RunnerRuntimeContext, RunnerTaskState, ScheduledTask, ScheduledTaskExecutor, ScheduledTaskMatrix, ScheduledTaskMatrixMeta, SchedulerConcurrencyConfig, SchedulerMiddleware, SchedulerRuntime, SchedulerScope, SchedulerScopeContext, TaskCacheRuntime, TaskExecutionContext,
|
|
3
|
+
export { AggregatedProviderSummary, AggregatedRunResults, AggregatedRunSummary, CacheFileHandle, CacheFileOptions, CacheNamespace, CreateFilesystemTaskCacheRuntimeOptions, CreateRunnerScheduleOptions, CreateSchedulerRuntimeOptions, CreateTaskExecutionContextOptions, CreateVievalRunnerRuntimeContextOptions, InferenceExecutor, RunResult, RunScheduledTasksOptions, RunScore, RunScoreKind, RunnerExecutionError, RunnerMatrixDefinition, RunnerMatrixInput, RunnerMatrixSelection, RunnerRuntimeContext, RunnerTaskState, ScheduledTask, ScheduledTaskExecutor, ScheduledTaskMatrix, ScheduledTaskMatrixMeta, SchedulerConcurrencyConfig, SchedulerMiddleware, SchedulerRuntime, SchedulerScope, SchedulerScopeContext, TaskCacheRuntime, TaskExecutionContext, aggregateRunResults, asProjectRelativePath, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createSchedulerRuntime, createTaskExecutionContext, getActiveScopes, normalizeCacheFilePathSegments, runScheduledTasks };
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { createSchedulerRuntime, getActiveScopes } from "../scheduler/index.mjs";
|
|
2
|
-
import { t as resolveModelByName } from "../../models-DIGdOUpJ.mjs";
|
|
3
2
|
import { createRequire } from "node:module";
|
|
4
3
|
import process from "node:process";
|
|
5
4
|
import { errorMessageFrom } from "@moeru/std";
|
|
@@ -304,7 +303,7 @@ function collectEvalEntries(modules, context) {
|
|
|
304
303
|
}
|
|
305
304
|
//#endregion
|
|
306
305
|
//#region src/core/runner/run.ts
|
|
307
|
-
function createDefaultExecutionContext(
|
|
306
|
+
function createDefaultExecutionContext() {
|
|
308
307
|
return {
|
|
309
308
|
cache: { namespace(name) {
|
|
310
309
|
return { file(options) {
|
|
@@ -312,11 +311,7 @@ function createDefaultExecutionContext(task) {
|
|
|
312
311
|
throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
|
|
313
312
|
} };
|
|
314
313
|
} },
|
|
315
|
-
|
|
316
|
-
const requestedModelName = typeof options === "string" ? options : options?.name;
|
|
317
|
-
if (requestedModelName != null) throw new Error(`No model registry configured. Requested model: ${requestedModelName}`);
|
|
318
|
-
throw new Error(`No model registry configured for task inferenceExecutor id "${task.inferenceExecutor.id}".`);
|
|
319
|
-
}
|
|
314
|
+
models: []
|
|
320
315
|
};
|
|
321
316
|
}
|
|
322
317
|
/**
|
|
@@ -366,7 +361,7 @@ async function runScheduledTasks(tasks, executor, options = {}) {
|
|
|
366
361
|
async function executeScheduledTask(task) {
|
|
367
362
|
let executionContext;
|
|
368
363
|
try {
|
|
369
|
-
executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext(
|
|
364
|
+
executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext();
|
|
370
365
|
} catch (error) {
|
|
371
366
|
throw createRunnerExecutionError(task.id, error);
|
|
372
367
|
}
|
|
@@ -587,48 +582,19 @@ function createNoopTaskCacheRuntime() {
|
|
|
587
582
|
} };
|
|
588
583
|
} };
|
|
589
584
|
}
|
|
590
|
-
function resolveDefaultTaskModel(models, task) {
|
|
591
|
-
const runMatrixModelName = task.matrix.run.model;
|
|
592
|
-
if (runMatrixModelName != null) {
|
|
593
|
-
const matrixSelectedModel = resolveModelByName(models, runMatrixModelName);
|
|
594
|
-
if (matrixSelectedModel != null) return matrixSelectedModel;
|
|
595
|
-
throw new Error(`Unknown configured model "${runMatrixModelName}" from task.matrix.run.model.`);
|
|
596
|
-
}
|
|
597
|
-
const matched = resolveModelByName(models, task.inferenceExecutor.id);
|
|
598
|
-
if (matched != null) return matched;
|
|
599
|
-
if (models.length > 1) throw new Error([
|
|
600
|
-
`Multiple configured models are available, but no default model is selected for inferenceExecutor "${task.inferenceExecutor.id}".`,
|
|
601
|
-
"Select one model explicitly by either:",
|
|
602
|
-
"- setting runMatrix.override.model (or task matrix run.model)",
|
|
603
|
-
"- setting project.inferenceExecutors to a matching model id",
|
|
604
|
-
"- calling context.model({ name: \"your-model-id-or-alias\" })"
|
|
605
|
-
].join("\n"));
|
|
606
|
-
if (models.length === 1) {
|
|
607
|
-
const firstModel = models[0];
|
|
608
|
-
if (firstModel != null) return firstModel;
|
|
609
|
-
}
|
|
610
|
-
throw new Error(`No configured model found for inferenceExecutor id "${task.inferenceExecutor.id}".`);
|
|
611
|
-
}
|
|
612
585
|
/**
|
|
613
|
-
* Creates task-scoped
|
|
586
|
+
* Creates task-scoped context data for runner execution.
|
|
614
587
|
*
|
|
615
588
|
* Call stack:
|
|
616
589
|
*
|
|
617
590
|
* {@link runScheduledTasks}
|
|
618
591
|
* -> {@link createTaskExecutionContext}
|
|
619
|
-
* ->
|
|
620
|
-
* -> `task.model()` / `task.model({ name })`
|
|
592
|
+
* -> `TaskExecutionContext`
|
|
621
593
|
*/
|
|
622
594
|
function createTaskExecutionContext(options) {
|
|
623
595
|
return {
|
|
624
596
|
cache: options.cache ?? createNoopTaskCacheRuntime(),
|
|
625
|
-
|
|
626
|
-
if (selection == null) return resolveDefaultTaskModel(options.models, options.task);
|
|
627
|
-
const name = typeof selection === "string" ? selection : selection.name;
|
|
628
|
-
const namedModel = resolveModelByName(options.models, name);
|
|
629
|
-
if (namedModel == null) throw new Error(`Unknown configured model "${name}".`);
|
|
630
|
-
return namedModel;
|
|
631
|
-
}
|
|
597
|
+
models: options.models
|
|
632
598
|
};
|
|
633
599
|
}
|
|
634
600
|
//#endregion
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/cache/filesystem.ts","../../../src/core/runner/aggregate.ts","../../../src/core/runner/collect.ts","../../../src/core/runner/run.ts","../../../src/core/runner/runtime-context.ts","../../../src/core/runner/schedule.ts","../../../src/core/runner/task-context.ts"],"sourcesContent":["import type { CacheFileHandle, CacheFileOptions, CacheNamespace, TaskCacheRuntime } from './types'\n\nimport process from 'node:process'\n\nimport { Buffer } from 'node:buffer'\nimport { createReadStream, createWriteStream } from 'node:fs'\nimport { access, mkdir, readFile, rename, writeFile } from 'node:fs/promises'\nimport { dirname, join } from 'node:path'\n\n/**\n * Options for creating the filesystem-backed task cache runtime.\n */\nexport interface CreateFilesystemTaskCacheRuntimeOptions {\n /**\n * Absolute cache root directory.\n */\n cacheRootDirectory: string\n /**\n * Project identifier under one workspace cache scope.\n */\n projectName: string\n /**\n * Workspace identifier used to share cache roots across projects.\n */\n workspaceId: string\n}\n\nfunction sanitizePathSegment(value: string): string {\n const normalized = value.trim()\n if (normalized.length === 0) {\n return 'default'\n }\n\n return normalized.replace(/[^\\w.-]+/g, '-')\n}\n\nfunction normalizeExtension(extension: string | undefined, mediaType: string | undefined): string | undefined {\n if (extension != null && extension.length > 0) {\n return extension.startsWith('.') ? extension.slice(1) : extension\n }\n\n if (mediaType == null || mediaType.length === 0) {\n return undefined\n }\n\n if (mediaType === 'application/json') {\n return 'json'\n }\n\n if (mediaType === 'text/plain') {\n return 'txt'\n }\n\n if (mediaType === 'audio/wav') {\n return 'wav'\n }\n\n return undefined\n}\n\n/**\n * Normalizes cache file options into deterministic relative path segments.\n *\n * Before:\n * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`\n *\n * After:\n * - `['cases', 'dataset-hash', 'v1.json']`\n */\nexport function normalizeCacheFilePathSegments(options: CacheFileOptions): string[] {\n const sanitizedKey = options.key.map(segment => sanitizePathSegment(segment))\n const extension = normalizeExtension(options.ext, options.mediaType)\n\n if (sanitizedKey.length === 0) {\n return extension == null ? ['artifact'] : [`artifact.${extension}`]\n }\n\n if (extension == null) {\n return sanitizedKey\n }\n\n const withoutTail = sanitizedKey.slice(0, Math.max(0, sanitizedKey.length - 1))\n const tail = sanitizedKey[sanitizedKey.length - 1] ?? 'artifact'\n return [...withoutTail, `${tail}.${extension}`]\n}\n\nasync function writeAtomically(path: string, content: Buffer | string): Promise<void> {\n const directory = dirname(path)\n const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`\n await mkdir(directory, { recursive: true })\n await writeFile(temporaryPath, content)\n await rename(temporaryPath, path)\n}\n\nfunction createCacheFileHandle(path: string): CacheFileHandle {\n return {\n path,\n async exists() {\n try {\n await access(path)\n return true\n }\n catch {\n return false\n }\n },\n openReadStream() {\n return createReadStream(path)\n },\n async openWriteStream() {\n await mkdir(dirname(path), { recursive: true })\n return createWriteStream(path)\n },\n async readBuffer() {\n return await readFile(path)\n },\n async writeBuffer(value) {\n await writeAtomically(path, value)\n },\n async readText(encoding = 'utf-8') {\n return await readFile(path, encoding)\n },\n async writeText(value, encoding = 'utf-8') {\n await writeAtomically(path, Buffer.from(value, encoding))\n },\n async readJson<T>() {\n return JSON.parse(await readFile(path, 'utf-8')) as T\n },\n async writeJson(value) {\n await writeAtomically(path, `${JSON.stringify(value, null, 2)}\\n`)\n },\n async loadAsCasesInput<T>() {\n return await this.readJson<T[]>()\n },\n async loadAsExpectFixture<T>() {\n return await this.readJson<T>()\n },\n }\n}\n\nfunction createCacheNamespace(baseDirectory: string, namespace: string): CacheNamespace {\n return {\n file(options) {\n const relativePathSegments = normalizeCacheFilePathSegments(options)\n return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments))\n },\n }\n}\n\n/**\n * Creates a deterministic filesystem-backed task cache runtime.\n *\n * Use when:\n * - eval tasks need reproducible cache paths for expensive pre-processing outputs\n * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes\n *\n * Expects:\n * - `cacheRootDirectory` to be writable by the running process\n * - `workspaceId` + `projectName` to stay stable for reproducible paths\n *\n * Returns:\n * - task cache runtime that resolves namespaced file handles under:\n * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`\n */\nexport function createFilesystemTaskCacheRuntime(\n options: CreateFilesystemTaskCacheRuntimeOptions,\n): TaskCacheRuntime {\n const workspaceDirectory = sanitizePathSegment(options.workspaceId)\n const projectDirectory = sanitizePathSegment(options.projectName)\n const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory)\n\n return {\n namespace(name) {\n return createCacheNamespace(baseDirectory, name)\n },\n }\n}\n","import type { ScheduledTaskMatrix } from './schedule'\n\n/**\n * Identifies the scoring family for a single eval score.\n */\nexport type RunScoreKind = 'exact' | 'judge'\n\n/**\n * Represents one normalized score emitted by a completed eval run.\n */\nexport interface RunScore {\n /**\n * Score family used for aggregation.\n */\n kind: RunScoreKind\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n}\n\n/**\n * Captures the output of one scheduled runner task.\n */\nexport interface RunResult {\n /**\n * Stable run id, usually copied from the scheduled task id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Raw scores emitted by the eval.\n */\n scores: readonly RunScore[]\n}\n\n/**\n * Stores the per-run score averages after normalization.\n */\nexport interface AggregatedRunSummary {\n /**\n * Stable run id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Mean of exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average. Uses both families when present, otherwise falls back to the\n * single available family.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores inferenceExecutor-level score aggregates across multiple runs.\n */\nexport interface AggregatedProviderSummary {\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Number of runs included in this inferenceExecutor bucket.\n */\n runCount: number\n /**\n * Mean of all exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of all judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average derived from the inferenceExecutor exact and judge means.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores the final aggregation output for a batch of runner results.\n */\nexport interface AggregatedRunResults {\n /**\n * Per-run normalized score summaries.\n */\n runs: AggregatedRunSummary[]\n /**\n * Provider-level summaries sorted by inferenceExecutor id.\n */\n inferenceExecutors: AggregatedProviderSummary[]\n /**\n * Overall summary across every run.\n */\n overall: {\n exactAverage: number | null\n judgeAverage: number | null\n hybridAverage: number | null\n runCount: number\n }\n}\n\ninterface ScoreBuckets {\n exact: number[]\n judge: number[]\n}\n\nfunction cloneScheduledTaskMatrix(matrix: ScheduledTaskMatrix): ScheduledTaskMatrix {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction assertKnownScoreKind(kind: string): RunScoreKind {\n if (kind === 'exact' || kind === 'judge') {\n return kind\n }\n\n throw new TypeError(`Unknown eval score kind \"${kind}\".`)\n}\n\nfunction average(scores: readonly number[]): number | null {\n if (scores.length === 0) {\n return null\n }\n\n const total = scores.reduce((sum, score) => sum + score, 0)\n return total / scores.length\n}\n\nfunction createHybridAverage(exactAverage: number | null, judgeAverage: number | null): number | null {\n if (exactAverage != null && judgeAverage != null) {\n return (exactAverage + judgeAverage) / 2\n }\n\n if (exactAverage != null) {\n return exactAverage\n }\n\n if (judgeAverage != null) {\n return judgeAverage\n }\n\n return null\n}\n\nfunction collectScoreBuckets(scores: readonly RunScore[]): ScoreBuckets {\n const buckets: ScoreBuckets = {\n exact: [],\n judge: [],\n }\n\n for (const score of scores) {\n const kind = assertKnownScoreKind(score.kind)\n\n if (kind === 'exact') {\n buckets.exact.push(score.score)\n continue\n }\n\n buckets.judge.push(score.score)\n }\n\n return buckets\n}\n\nfunction createRunSummary(result: RunResult): AggregatedRunSummary {\n const buckets = collectScoreBuckets(result.scores)\n const exactAverage = average(buckets.exact)\n const judgeAverage = average(buckets.judge)\n\n return {\n entryId: result.entryId,\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n id: result.id,\n judgeAverage,\n matrix: cloneScheduledTaskMatrix(result.matrix),\n inferenceExecutorId: result.inferenceExecutorId,\n }\n}\n\nfunction createProviderSummary(inferenceExecutorId: string, results: readonly RunResult[]): AggregatedProviderSummary {\n const exactScores: number[] = []\n const judgeScores: number[] = []\n\n for (const result of results) {\n const buckets = collectScoreBuckets(result.scores)\n exactScores.push(...buckets.exact)\n judgeScores.push(...buckets.judge)\n }\n\n const exactAverage = average(exactScores)\n const judgeAverage = average(judgeScores)\n\n return {\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n judgeAverage,\n inferenceExecutorId,\n runCount: results.length,\n }\n}\n\n/**\n * Aggregates exact-match and judge-based scores into hybrid runner summaries.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link createRunSummary}\n * -> {@link createProviderSummary}\n * -> `report output`\n *\n * Use when:\n * - a runner batch mixes deterministic exact checks with judge-based grading\n * - inferenceExecutor comparison should preserve both score families and one hybrid view\n *\n * Expects:\n * - each score to be normalized to the `0..1` range before aggregation\n * - `scores.kind` to use only `'exact'` or `'judge'`\n */\nexport function aggregateRunResults(results: readonly RunResult[]): AggregatedRunResults {\n const runs = results.map(createRunSummary)\n\n const inferenceExecutorIds = Array.from(new Set(results.map(result => result.inferenceExecutorId)))\n const inferenceExecutors = inferenceExecutorIds\n .map((inferenceExecutorId) => {\n const providerResults = results.filter(result => result.inferenceExecutorId === inferenceExecutorId)\n return createProviderSummary(inferenceExecutorId, providerResults)\n })\n .sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId))\n\n const overall = createProviderSummary(\n 'overall',\n results,\n )\n\n return {\n overall: {\n exactAverage: overall.exactAverage,\n hybridAverage: overall.hybridAverage,\n judgeAverage: overall.judgeAverage,\n runCount: overall.runCount,\n },\n inferenceExecutors,\n runs,\n }\n}\n","import type { CollectedEvalEntry, EvalModule, EvalModuleMap } from '../../config'\nimport type { RunnerRuntimeContext } from './runtime-context'\n\nimport { basename, dirname, relative } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst evalFileSuffix = '.eval.ts'\nconst absolutePathPattern = /^(?:[A-Z]:\\/|\\/|\\\\\\\\)/i\n\nfunction normalizePath(value: string): string {\n return value.replaceAll('\\\\', '/')\n}\n\n/**\n * Converts a file path into a project-relative path when possible.\n *\n * Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n *\n * Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n */\nexport function asProjectRelativePath(filePath: string, context: RunnerRuntimeContext): string {\n const normalizedFilePath = normalizePath(filePath)\n const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory)\n const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\\//i)?.[0]\n const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\\//i)?.[0]\n\n if (filePathWindowsDrive != null && projectRootWindowsDrive == null) {\n return normalizedFilePath\n }\n\n if (\n filePathWindowsDrive != null\n && projectRootWindowsDrive != null\n && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()\n ) {\n return normalizedFilePath\n }\n\n const projectRootDirectory = context.projectRootDirectory\n const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath))\n\n if (!absolutePathPattern.test(relativeFilePath)) {\n if (relativeFilePath === '..') {\n return normalizePath(filePath)\n }\n\n if (!relativeFilePath.startsWith('../')) {\n return relativeFilePath\n }\n }\n\n return normalizePath(filePath)\n}\n\nfunction resolveModuleFilePath(moduleHref: string): string | null {\n if (!moduleHref.startsWith('file:')) {\n return null\n }\n\n try {\n return fileURLToPath(moduleHref)\n }\n catch {\n return null\n }\n}\n\nfunction createCollectedEvalEntry(\n moduleHref: string,\n moduleDefinition: EvalModule,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry | null {\n const filePath = resolveModuleFilePath(moduleHref)\n\n if (!filePath) {\n return null\n }\n\n const relativeFilePath = asProjectRelativePath(filePath, context)\n\n if (!relativeFilePath.endsWith(evalFileSuffix)) {\n return null\n }\n\n const entryName = basename(relativeFilePath, evalFileSuffix)\n\n if (entryName.length === 0) {\n return null\n }\n\n const relativeDirectory = dirname(relativeFilePath)\n const directory = relativeDirectory === '.' ? '' : relativeDirectory\n\n return {\n ...moduleDefinition.default,\n directory,\n filePath,\n id: directory.length === 0 ? entryName : `${directory}/${entryName}`,\n name: entryName,\n }\n}\n\n/**\n * Collects loaded vieval modules into sorted runner entries with stable ids.\n *\n * Call stack:\n *\n * `import.meta.glob(...)`\n * -> {@link collectEvalEntries}\n * -> {@link createCollectedEvalEntry}\n * -> {@link CollectedEvalEntry}[]\n *\n * Use when:\n * - the runner has already loaded candidate eval modules\n * - downstream scheduling needs stable entry ids and directory metadata\n */\nexport function collectEvalEntries(\n modules: EvalModuleMap,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry[] {\n return Object.entries(modules)\n .flatMap(([moduleHref, moduleDefinition]) => {\n const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context)\n\n if (!entry) {\n return []\n }\n\n return [entry]\n })\n .sort((left, right) => left.id.localeCompare(right.id))\n}\n","import type { TaskCacheRuntime } from '../cache'\nimport type { AggregatedRunResults, RunResult } from './aggregate'\nimport type { ScheduledTask } from './schedule'\nimport type { TaskExecutionContext } from './task-context'\n\nimport { errorMessageFrom } from '@moeru/std'\nimport { limitConcurrency } from '@vitest/runner/utils'\n\nimport { aggregateRunResults } from './aggregate'\n\n/**\n * Executes one scheduled runner task and returns a normalized run result.\n *\n * Use when:\n * - a scheduler already selected the task and execution context\n * - the caller wants a typed executor contract for runner workers\n *\n * Expects:\n * - the task context to be ready for model resolution and task-scoped work\n *\n * Returns:\n * - a normalized run result with score entries ready for aggregation\n */\nexport type ScheduledTaskExecutor = (\n task: ScheduledTask,\n context: TaskExecutionContext,\n) => Promise<RunResult>\n\n/**\n * Terminal task state reported by runner lifecycle hooks.\n *\n * Use when:\n * - reporting the outcome of one scheduled task to lifecycle observers\n *\n * Expects:\n * - hooks treat the value as final for the completed task\n */\nexport type RunnerTaskState = 'passed' | 'failed'\n\n/**\n * Optional runner execution hooks used while processing scheduled tasks.\n *\n * Use when:\n * - callers want lifecycle visibility around sequential task execution\n * - task execution should remain deterministic while still observable\n *\n * Expects:\n * - hook functions are synchronous lifecycle observers\n */\nexport interface RunScheduledTasksOptions {\n /**\n * Creates per-task execution context.\n *\n * Use when:\n * - executor code needs per-task model resolution or other task-scoped data\n */\n createExecutionContext?: (task: ScheduledTask) => TaskExecutionContext\n /**\n * Runs before the executor starts handling a task.\n *\n * Use when:\n * - callers want to observe task activation before execution begins\n *\n * Expects:\n * - thrown errors abort the task before executor work starts\n */\n onTaskStart?: (task: ScheduledTask) => void\n /**\n * Runs after the executor settles for a task.\n *\n * Use when:\n * - callers want to observe successful and failed task completion\n *\n * Expects:\n * - thrown errors abort successful runs\n * - failed-task observers do not override the executor error for the task\n */\n onTaskEnd?: (task: ScheduledTask, state: RunnerTaskState) => void\n /**\n * Maximum number of tasks to execute concurrently.\n *\n * @default 1\n */\n maxConcurrency?: number\n}\n\nfunction createDefaultExecutionContext(task: ScheduledTask): TaskExecutionContext {\n const cache: TaskCacheRuntime = {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n\n return {\n cache,\n model(options) {\n const requestedModelName = typeof options === 'string' ? options : options?.name\n if (requestedModelName != null) {\n throw new Error(`No model registry configured. Requested model: ${requestedModelName}`)\n }\n\n throw new Error(`No model registry configured for task inferenceExecutor id \"${task.inferenceExecutor.id}\".`)\n },\n }\n}\n\n/**\n * Error thrown when a scheduled run fails before producing a normalized result.\n */\nexport class RunnerExecutionError extends Error {\n /**\n * Stable task id that failed.\n */\n taskId: string\n\n constructor(taskId: string, cause: unknown) {\n const message = errorMessageFrom(cause) ?? 'Unknown runner execution failure.'\n super(`Runner task \"${taskId}\" failed: ${message}`)\n this.name = 'RunnerExecutionError'\n this.taskId = taskId\n this.cause = cause\n }\n}\n\nfunction createRunnerExecutionError(taskId: string, cause: unknown): RunnerExecutionError {\n if (cause instanceof RunnerExecutionError && cause.taskId === taskId) {\n return cause\n }\n\n return new RunnerExecutionError(taskId, cause)\n}\n\n/**\n * Executes runner tasks sequentially and aggregates the normalized results.\n *\n * Call stack:\n *\n * {@link createRunnerSchedule}\n * -> {@link runScheduledTasks}\n * -> `executor(task)`\n * -> {@link aggregateRunResults}\n *\n * Use when:\n * - the caller already expanded the runner matrix\n * - task execution should stay deterministic and easy to debug\n *\n * Expects:\n * - `executor` to return normalized `0..1` scores\n * - callers to handle concurrency outside this helper when needed\n * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers\n *\n * Throws:\n * - `RunnerExecutionError` when task setup, hooks, or the executor throws\n */\nexport async function runScheduledTasks(\n tasks: readonly ScheduledTask[],\n executor: ScheduledTaskExecutor,\n options: RunScheduledTasksOptions = {},\n): Promise<AggregatedRunResults> {\n if (tasks.length === 0) {\n return aggregateRunResults([])\n }\n\n async function executeScheduledTask(task: ScheduledTask): Promise<RunResult> {\n let executionContext: TaskExecutionContext\n\n try {\n executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskStart?.(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n let runResult: RunResult\n try {\n runResult = await executor(task, executionContext)\n }\n catch (error) {\n try {\n options.onTaskEnd?.(task, 'failed')\n }\n catch {\n // Failed-task observers must not mask the task execution failure.\n }\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskEnd?.(task, 'passed')\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n return runResult\n }\n\n const maxConcurrency = options.maxConcurrency ?? 1\n if (maxConcurrency <= 1) {\n const results: RunResult[] = []\n for (const task of tasks) {\n results.push(await executeScheduledTask(task))\n }\n return aggregateRunResults(results)\n }\n\n const runWithLimit = limitConcurrency(maxConcurrency)\n const resultPairs = await Promise.all(tasks.map(async (task, index) => {\n const result = await runWithLimit(async () => executeScheduledTask(task))\n return { index, result }\n }))\n\n const sortedResults = resultPairs\n .sort((left, right) => left.index - right.index)\n .map(item => item.result)\n\n return aggregateRunResults(sortedResults)\n}\n","import { createRequire } from 'node:module'\nimport { dirname } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst require = createRequire(import.meta.url)\n\n/**\n * Shared runtime context used by the vieval runner.\n *\n * Use when:\n * - runner services need stable path resolution without module-level side effects\n * - call sites want deterministic control over workspace root detection\n */\nexport interface RunnerRuntimeContext {\n /**\n * Absolute project root directory used for path normalization.\n */\n projectRootDirectory: string\n}\n\n/**\n * Options used to construct the runner runtime context.\n */\nexport interface CreateVievalRunnerRuntimeContextOptions {\n /**\n * Directory used to search for the nearest pnpm workspace.\n *\n * @default directory of this module file\n */\n cwd?: string\n /**\n * Absolute fallback directory when a pnpm workspace root is not found.\n *\n * @default package root directory (`packages/vieval`)\n */\n fallbackProjectRootDirectory?: string\n}\n\n/**\n * Creates a side-effect-free runtime context for runner path normalization.\n *\n * Call stack:\n *\n * {@link createRunnerRuntimeContext}\n * -> `findWorkspaceDir(cwd)`\n * -> `resolve projectRootDirectory`\n * -> `{ projectRootDirectory }`\n *\n * Use when:\n * - initializing runner infrastructure before collecting eval modules\n * - tests need deterministic root resolution behavior\n */\nexport async function createRunnerRuntimeContext(\n options: CreateVievalRunnerRuntimeContextOptions = {},\n): Promise<RunnerRuntimeContext> {\n const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url))\n const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory\n ?? fileURLToPath(new URL('../../../', import.meta.url))\n\n // NOTICE:\n // We use dynamic `require` here because `@pnpm/find-workspace-dir` is CommonJS.\n // Keeping this load inside the factory avoids module-level initialization side effects.\n const { findWorkspaceDir } = require('@pnpm/find-workspace-dir') as {\n findWorkspaceDir: (currentWorkingDirectory: string) => Promise<string | undefined>\n }\n\n // NOTICE:\n // Workspace discovery is required to keep collected eval ids stable when this\n // package is moved inside different monorepo layouts.\n const workspaceDirectory = await findWorkspaceDir(cwd)\n\n return {\n projectRootDirectory: workspaceDirectory ?? fallbackProjectRootDirectory,\n }\n}\n","import type { CollectedEvalEntry, MatrixDefinition, MatrixLayer, MatrixValue } from '../../config'\n\n/**\n * Describes the inferenceExecutor target for a scheduled eval run.\n */\nexport interface InferenceExecutor {\n /**\n * Stable inferenceExecutor identifier such as `openai:gpt-4.1-mini`.\n */\n id: string\n}\n\n/**\n * Stores the selected value for each matrix axis.\n */\nexport type RunnerMatrixSelection = Record<string, string>\n\n/**\n * Stores stable row ids for one resolved scheduled task matrix.\n */\nexport interface ScheduledTaskMatrixMeta {\n /**\n * Stable row id for the resolved run matrix selection.\n */\n runRowId: string\n /**\n * Stable row id for the resolved eval matrix selection.\n */\n evalRowId: string\n}\n\n/**\n * Stores the structured matrix payload for one scheduled task.\n */\nexport interface ScheduledTaskMatrix {\n /**\n * Runtime matrix selection visible to task code.\n */\n run: RunnerMatrixSelection\n /**\n * Eval-time matrix selection visible to task code.\n */\n eval: RunnerMatrixSelection\n /**\n * Stable row ids for both scopes.\n */\n meta: ScheduledTaskMatrixMeta\n}\n\n/**\n * Maps matrix axis names to the values that should be expanded.\n */\nexport type RunnerMatrixDefinition = MatrixDefinition\n\n/**\n * Accepts either flat axis definitions or one layered matrix object.\n */\nexport type RunnerMatrixInput = RunnerMatrixDefinition | MatrixLayer\n\nconst matrixLayerKeys = new Set(['disable', 'extend', 'override'])\nconst ambiguousMatrixDefinitionErrorMessage = 'Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.'\n\n/**\n * Represents one fully expanded runner task.\n */\nexport interface ScheduledTask {\n /**\n * Stable task id derived from the entry, inferenceExecutor, and matrix selection.\n */\n id: string\n /**\n * The collected eval entry to execute.\n */\n entry: CollectedEvalEntry\n /**\n * The inferenceExecutor selected for this task.\n */\n inferenceExecutor: InferenceExecutor\n /**\n * The concrete scoped matrix selection for this task.\n */\n matrix: ScheduledTaskMatrix\n}\n\n/**\n * Configures how the runner should expand its execution matrix.\n */\nexport interface CreateRunnerScheduleOptions {\n /**\n * Collected eval entries that should be scheduled.\n */\n entries: readonly CollectedEvalEntry[]\n /**\n * Providers that should run each entry.\n */\n inferenceExecutors: readonly InferenceExecutor[]\n /**\n * Optional run-time matrix axes expanded as a cartesian product.\n */\n runMatrix?: RunnerMatrixInput\n /**\n * Optional eval-time matrix axes expanded as a cartesian product.\n */\n evalMatrix?: RunnerMatrixInput\n}\n\nfunction encodeTaskIdSegment(value: string): string {\n return encodeURIComponent(value)\n}\n\nfunction stringifyMatrixValue(value: MatrixValue): string {\n return String(value)\n}\n\nfunction cloneMatrixSelection(matrix: RunnerMatrixSelection): RunnerMatrixSelection {\n return { ...matrix }\n}\n\nfunction createScheduledTaskMatrix(\n runMatrix: RunnerMatrixSelection,\n evalMatrix: RunnerMatrixSelection,\n): ScheduledTaskMatrix {\n return {\n eval: cloneMatrixSelection(evalMatrix),\n meta: {\n evalRowId: createStableRowId(evalMatrix),\n runRowId: createStableRowId(runMatrix),\n },\n run: cloneMatrixSelection(runMatrix),\n }\n}\n\nfunction isMatrixLayer(matrix: RunnerMatrixInput): matrix is MatrixLayer {\n const matrixKeys = Object.keys(matrix)\n return (\n matrixKeys.length > 0\n && matrixKeys.every(key => matrixLayerKeys.has(key))\n )\n}\n\nfunction assertNonAmbiguousMatrixDefinition(matrix: RunnerMatrixInput): void {\n const matrixKeys = Object.keys(matrix)\n const hasReservedKeys = matrixKeys.some(key => matrixLayerKeys.has(key))\n const hasAxisKeys = matrixKeys.some(key => !matrixLayerKeys.has(key))\n\n if (hasReservedKeys && hasAxisKeys) {\n throw new TypeError(ambiguousMatrixDefinitionErrorMessage)\n }\n}\n\nfunction normalizeLayerInputToAxes(matrix: RunnerMatrixInput | undefined): MatrixLayer | undefined {\n if (matrix == null) {\n return undefined\n }\n\n assertNonAmbiguousMatrixDefinition(matrix)\n\n if (isMatrixLayer(matrix)) {\n return matrix\n }\n\n return {\n extend: matrix,\n }\n}\n\nfunction dedupeAxisValues(values: readonly MatrixValue[]): string[] {\n return Array.from(new Set(values.map(stringifyMatrixValue)))\n}\n\nfunction applyAxisValues(\n axes: Map<string, string[]>,\n definition: RunnerMatrixDefinition | undefined,\n mode: 'extend' | 'override',\n): void {\n if (definition == null) {\n return\n }\n\n for (const [axis, values] of Object.entries(definition)) {\n const nextValues = dedupeAxisValues(values)\n\n if (mode === 'extend') {\n const existingValues = axes.get(axis) ?? []\n axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])))\n continue\n }\n\n axes.set(axis, nextValues)\n }\n}\n\nfunction applyLayer(\n baseAxes: ReadonlyMap<string, string[]>,\n layer: MatrixLayer | undefined,\n): Map<string, string[]> {\n const nextAxes = new Map<string, string[]>(\n Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]),\n )\n\n for (const axis of layer?.disable ?? []) {\n nextAxes.delete(axis)\n }\n\n applyAxisValues(nextAxes, layer?.extend, 'extend')\n applyAxisValues(nextAxes, layer?.override, 'override')\n\n return nextAxes\n}\n\nfunction expandAxesToRows(axes: ReadonlyMap<string, readonly string[]>): RunnerMatrixSelection[] {\n if (axes.size === 0) {\n return [{}]\n }\n\n const dimensions = Array.from(axes.entries())\n\n let selections: RunnerMatrixSelection[] = [{}]\n\n for (const [axis, values] of dimensions) {\n if (values.length === 0) {\n return []\n }\n\n const nextSelections: RunnerMatrixSelection[] = []\n\n for (const selection of selections) {\n for (const value of values) {\n nextSelections.push({\n ...selection,\n [axis]: value,\n })\n }\n }\n\n selections = nextSelections\n }\n\n return selections\n}\n\nfunction createStableRowId(matrix: RunnerMatrixSelection): string {\n const segments = Object.entries(matrix)\n .sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis))\n .map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`)\n\n if (segments.length === 0) {\n return 'default'\n }\n\n return segments.join('&')\n}\n\nfunction createTaskId(entryId: string, inferenceExecutorId: string, runRowId: string, evalRowId: string): string {\n const encodedEntryId = encodeTaskIdSegment(entryId)\n const encodedProviderId = encodeTaskIdSegment(inferenceExecutorId)\n\n return [\n encodedEntryId,\n encodedProviderId,\n `run=${encodeTaskIdSegment(runRowId)}`,\n `eval=${encodeTaskIdSegment(evalRowId)}`,\n ].join('::')\n}\n\nfunction createResolvedRunAxes(\n entry: CollectedEvalEntry,\n runMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n runMatrix,\n entry.matrix?.runMatrix,\n entry.task?.matrix?.runMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\nfunction createResolvedEvalAxes(\n entry: CollectedEvalEntry,\n evalMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n evalMatrix,\n entry.matrix?.evalMatrix,\n entry.task?.matrix?.evalMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\n/**\n * Expands collected entries into a stable runner schedule.\n *\n * Call stack:\n *\n * {@link collectEvalEntries} (`../runner`)\n * -> {@link createRunnerSchedule}\n * -> {@link expandAxesToRows}\n * -> {@link ScheduledTask}[]\n *\n * Use when:\n * - the runner already knows which eval entries are available\n * - each entry must run against multiple inferenceExecutors or matrix variants\n *\n * Expects:\n * - `entries` and `inferenceExecutors` to be provided in the desired execution order\n * - matrix axes to use insertion order when generating combinations\n */\nexport function createRunnerSchedule(options: CreateRunnerScheduleOptions): ScheduledTask[] {\n if (options.entries.length === 0) {\n return []\n }\n\n if (options.inferenceExecutors.length === 0) {\n return []\n }\n\n const tasks: ScheduledTask[] = []\n\n for (const entry of options.entries) {\n const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix))\n const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix))\n\n if (runSelections.length === 0 || evalSelections.length === 0) {\n continue\n }\n\n for (const inferenceExecutor of options.inferenceExecutors) {\n for (const runMatrix of runSelections) {\n for (const evalMatrix of evalSelections) {\n const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix)\n\n tasks.push({\n entry,\n id: createTaskId(\n entry.id,\n inferenceExecutor.id,\n isolatedMatrix.meta.runRowId,\n isolatedMatrix.meta.evalRowId,\n ),\n matrix: isolatedMatrix,\n inferenceExecutor,\n })\n }\n }\n }\n }\n\n return tasks\n}\n","import type { ModelDefinition } from '../../config/models'\nimport type { TaskCacheRuntime } from '../cache'\nimport type { ScheduledTask } from './schedule'\n\nimport { resolveModelByName } from '../../config/models'\n\n/**\n * Options for selecting a model from the execution context.\n */\nexport interface TaskModelSelectionOptions {\n /**\n * Model id or alias name.\n */\n name: string\n}\n\n/**\n * Task-scoped execution context exposed to runner executors.\n */\nexport interface TaskExecutionContext {\n /**\n * Deterministic cache runtime scoped to the current task project.\n */\n cache: TaskCacheRuntime\n /**\n * Resolves model configuration for the current task.\n *\n * Use when:\n * - no arguments are provided to use the model selected by run matrix/inferenceExecutor\n * - `name` is provided to resolve a specific model id or alias\n */\n model: (\n selection?: string | TaskModelSelectionOptions,\n ) => ModelDefinition\n}\n\n/**\n * Inputs used to build task execution context.\n */\nexport interface CreateTaskExecutionContextOptions {\n cache?: TaskCacheRuntime\n models: readonly ModelDefinition[]\n task: ScheduledTask\n}\n\nfunction createNoopTaskCacheRuntime(): TaskCacheRuntime {\n return {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n}\n\nfunction resolveDefaultTaskModel(\n models: readonly ModelDefinition[],\n task: ScheduledTask,\n): ModelDefinition {\n const runMatrixModelName = task.matrix.run.model\n if (runMatrixModelName != null) {\n const matrixSelectedModel = resolveModelByName(models, runMatrixModelName)\n if (matrixSelectedModel != null) {\n return matrixSelectedModel\n }\n\n throw new Error(`Unknown configured model \"${runMatrixModelName}\" from task.matrix.run.model.`)\n }\n\n const matched = resolveModelByName(models, task.inferenceExecutor.id)\n if (matched != null) {\n return matched\n }\n\n if (models.length > 1) {\n throw new Error(\n [\n `Multiple configured models are available, but no default model is selected for inferenceExecutor \"${task.inferenceExecutor.id}\".`,\n 'Select one model explicitly by either:',\n '- setting runMatrix.override.model (or task matrix run.model)',\n '- setting project.inferenceExecutors to a matching model id',\n '- calling context.model({ name: \"your-model-id-or-alias\" })',\n ].join('\\n'),\n )\n }\n\n if (models.length === 1) {\n const firstModel = models[0]\n if (firstModel != null) {\n return firstModel\n }\n }\n\n throw new Error(`No configured model found for inferenceExecutor id \"${task.inferenceExecutor.id}\".`)\n}\n\n/**\n * Creates task-scoped model resolver context for runner execution.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link createTaskExecutionContext}\n * -> {@link resolveModelByName}\n * -> `task.model()` / `task.model({ name })`\n */\nexport function createTaskExecutionContext(options: CreateTaskExecutionContextOptions): TaskExecutionContext {\n return {\n cache: options.cache ?? createNoopTaskCacheRuntime(),\n model(selection) {\n if (selection == null) {\n return resolveDefaultTaskModel(options.models, options.task)\n }\n\n const name = typeof selection === 'string' ? selection : selection.name\n\n const namedModel = resolveModelByName(options.models, name)\n if (namedModel == null) {\n throw new Error(`Unknown configured model \"${name}\".`)\n }\n\n return namedModel\n },\n }\n}\n"],"mappings":";;;;;;;;;;;;AA2BA,SAAS,oBAAoB,OAAuB;CAClD,MAAM,aAAa,MAAM,MAAM;AAC/B,KAAI,WAAW,WAAW,EACxB,QAAO;AAGT,QAAO,WAAW,QAAQ,aAAa,IAAI;;AAG7C,SAAS,mBAAmB,WAA+B,WAAmD;AAC5G,KAAI,aAAa,QAAQ,UAAU,SAAS,EAC1C,QAAO,UAAU,WAAW,IAAI,GAAG,UAAU,MAAM,EAAE,GAAG;AAG1D,KAAI,aAAa,QAAQ,UAAU,WAAW,EAC5C;AAGF,KAAI,cAAc,mBAChB,QAAO;AAGT,KAAI,cAAc,aAChB,QAAO;AAGT,KAAI,cAAc,YAChB,QAAO;;;;;;;;;;;AAeX,SAAgB,+BAA+B,SAAqC;CAClF,MAAM,eAAe,QAAQ,IAAI,KAAI,YAAW,oBAAoB,QAAQ,CAAC;CAC7E,MAAM,YAAY,mBAAmB,QAAQ,KAAK,QAAQ,UAAU;AAEpE,KAAI,aAAa,WAAW,EAC1B,QAAO,aAAa,OAAO,CAAC,WAAW,GAAG,CAAC,YAAY,YAAY;AAGrE,KAAI,aAAa,KACf,QAAO;CAGT,MAAM,cAAc,aAAa,MAAM,GAAG,KAAK,IAAI,GAAG,aAAa,SAAS,EAAE,CAAC;CAC/E,MAAM,OAAO,aAAa,aAAa,SAAS,MAAM;AACtD,QAAO,CAAC,GAAG,aAAa,GAAG,KAAK,GAAG,YAAY;;AAGjD,eAAe,gBAAgB,MAAc,SAAyC;CACpF,MAAM,YAAY,QAAQ,KAAK;CAC/B,MAAM,gBAAgB,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG,KAAK,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,SAAS,GAAG,CAAC,MAAM,GAAG,GAAG;AACzG,OAAM,MAAM,WAAW,EAAE,WAAW,MAAM,CAAC;AAC3C,OAAM,UAAU,eAAe,QAAQ;AACvC,OAAM,OAAO,eAAe,KAAK;;AAGnC,SAAS,sBAAsB,MAA+B;AAC5D,QAAO;EACL;EACA,MAAM,SAAS;AACb,OAAI;AACF,UAAM,OAAO,KAAK;AAClB,WAAO;WAEH;AACJ,WAAO;;;EAGX,iBAAiB;AACf,UAAO,iBAAiB,KAAK;;EAE/B,MAAM,kBAAkB;AACtB,SAAM,MAAM,QAAQ,KAAK,EAAE,EAAE,WAAW,MAAM,CAAC;AAC/C,UAAO,kBAAkB,KAAK;;EAEhC,MAAM,aAAa;AACjB,UAAO,MAAM,SAAS,KAAK;;EAE7B,MAAM,YAAY,OAAO;AACvB,SAAM,gBAAgB,MAAM,MAAM;;EAEpC,MAAM,SAAS,WAAW,SAAS;AACjC,UAAO,MAAM,SAAS,MAAM,SAAS;;EAEvC,MAAM,UAAU,OAAO,WAAW,SAAS;AACzC,SAAM,gBAAgB,MAAM,OAAO,KAAK,OAAO,SAAS,CAAC;;EAE3D,MAAM,WAAc;AAClB,UAAO,KAAK,MAAM,MAAM,SAAS,MAAM,QAAQ,CAAC;;EAElD,MAAM,UAAU,OAAO;AACrB,SAAM,gBAAgB,MAAM,GAAG,KAAK,UAAU,OAAO,MAAM,EAAE,CAAC,IAAI;;EAEpE,MAAM,mBAAsB;AAC1B,UAAO,MAAM,KAAK,UAAe;;EAEnC,MAAM,sBAAyB;AAC7B,UAAO,MAAM,KAAK,UAAa;;EAElC;;AAGH,SAAS,qBAAqB,eAAuB,WAAmC;AACtF,QAAO,EACL,KAAK,SAAS;EACZ,MAAM,uBAAuB,+BAA+B,QAAQ;AACpE,SAAO,sBAAsB,KAAK,eAAe,oBAAoB,UAAU,EAAE,GAAG,qBAAqB,CAAC;IAE7G;;;;;;;;;;;;;;;;;AAkBH,SAAgB,iCACd,SACkB;CAClB,MAAM,qBAAqB,oBAAoB,QAAQ,YAAY;CACnE,MAAM,mBAAmB,oBAAoB,QAAQ,YAAY;CACjE,MAAM,gBAAgB,KAAK,QAAQ,oBAAoB,oBAAoB,iBAAiB;AAE5F,QAAO,EACL,UAAU,MAAM;AACd,SAAO,qBAAqB,eAAe,KAAK;IAEnD;;;;ACvCH,SAAS,yBAAyB,QAAkD;AAClF,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,qBAAqB,MAA4B;AACxD,KAAI,SAAS,WAAW,SAAS,QAC/B,QAAO;AAGT,OAAM,IAAI,UAAU,4BAA4B,KAAK,IAAI;;AAG3D,SAAS,QAAQ,QAA0C;AACzD,KAAI,OAAO,WAAW,EACpB,QAAO;AAIT,QADc,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAC5C,OAAO;;AAGxB,SAAS,oBAAoB,cAA6B,cAA4C;AACpG,KAAI,gBAAgB,QAAQ,gBAAgB,KAC1C,SAAQ,eAAe,gBAAgB;AAGzC,KAAI,gBAAgB,KAClB,QAAO;AAGT,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO;;AAGT,SAAS,oBAAoB,QAA2C;CACtE,MAAM,UAAwB;EAC5B,OAAO,EAAE;EACT,OAAO,EAAE;EACV;AAED,MAAK,MAAM,SAAS,QAAQ;AAG1B,MAFa,qBAAqB,MAAM,KAAK,KAEhC,SAAS;AACpB,WAAQ,MAAM,KAAK,MAAM,MAAM;AAC/B;;AAGF,UAAQ,MAAM,KAAK,MAAM,MAAM;;AAGjC,QAAO;;AAGT,SAAS,iBAAiB,QAAyC;CACjE,MAAM,UAAU,oBAAoB,OAAO,OAAO;CAClD,MAAM,eAAe,QAAQ,QAAQ,MAAM;CAC3C,MAAM,eAAe,QAAQ,QAAQ,MAAM;AAE3C,QAAO;EACL,SAAS,OAAO;EAChB;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D,IAAI,OAAO;EACX;EACA,QAAQ,yBAAyB,OAAO,OAAO;EAC/C,qBAAqB,OAAO;EAC7B;;AAGH,SAAS,sBAAsB,qBAA6B,SAA0D;CACpH,MAAM,cAAwB,EAAE;CAChC,MAAM,cAAwB,EAAE;AAEhC,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,UAAU,oBAAoB,OAAO,OAAO;AAClD,cAAY,KAAK,GAAG,QAAQ,MAAM;AAClC,cAAY,KAAK,GAAG,QAAQ,MAAM;;CAGpC,MAAM,eAAe,QAAQ,YAAY;CACzC,MAAM,eAAe,QAAQ,YAAY;AAEzC,QAAO;EACL;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D;EACA;EACA,UAAU,QAAQ;EACnB;;;;;;;;;;;;;;;;;;;;;AAsBH,SAAgB,oBAAoB,SAAqD;CACvF,MAAM,OAAO,QAAQ,IAAI,iBAAiB;CAG1C,MAAM,qBADuB,MAAM,KAAK,IAAI,IAAI,QAAQ,KAAI,WAAU,OAAO,oBAAoB,CAAC,CAAC,CAEhG,KAAK,wBAAwB;AAE5B,SAAO,sBAAsB,qBADL,QAAQ,QAAO,WAAU,OAAO,wBAAwB,oBAAoB,CAClC;GAClE,CACD,MAAM,MAAM,UAAU,KAAK,oBAAoB,cAAc,MAAM,oBAAoB,CAAC;CAE3F,MAAM,UAAU,sBACd,WACA,QACD;AAED,QAAO;EACL,SAAS;GACP,cAAc,QAAQ;GACtB,eAAe,QAAQ;GACvB,cAAc,QAAQ;GACtB,UAAU,QAAQ;GACnB;EACD;EACA;EACD;;;;ACvRH,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAE5B,SAAS,cAAc,OAAuB;AAC5C,QAAO,MAAM,WAAW,MAAM,IAAI;;;;;;;;;;;AAYpC,SAAgB,sBAAsB,UAAkB,SAAuC;CAC7F,MAAM,qBAAqB,cAAc,SAAS;CAClD,MAAM,iCAAiC,cAAc,QAAQ,qBAAqB;CAClF,MAAM,uBAAuB,mBAAmB,MAAM,aAAa,GAAG;CACtE,MAAM,0BAA0B,+BAA+B,MAAM,aAAa,GAAG;AAErF,KAAI,wBAAwB,QAAQ,2BAA2B,KAC7D,QAAO;AAGT,KACE,wBAAwB,QACrB,2BAA2B,QAC3B,qBAAqB,aAAa,KAAK,wBAAwB,aAAa,CAE/E,QAAO;CAGT,MAAM,uBAAuB,QAAQ;CACrC,MAAM,mBAAmB,cAAc,SAAS,sBAAsB,SAAS,CAAC;AAEhF,KAAI,CAAC,oBAAoB,KAAK,iBAAiB,EAAE;AAC/C,MAAI,qBAAqB,KACvB,QAAO,cAAc,SAAS;AAGhC,MAAI,CAAC,iBAAiB,WAAW,MAAM,CACrC,QAAO;;AAIX,QAAO,cAAc,SAAS;;AAGhC,SAAS,sBAAsB,YAAmC;AAChE,KAAI,CAAC,WAAW,WAAW,QAAQ,CACjC,QAAO;AAGT,KAAI;AACF,SAAO,cAAc,WAAW;SAE5B;AACJ,SAAO;;;AAIX,SAAS,yBACP,YACA,kBACA,SAC2B;CAC3B,MAAM,WAAW,sBAAsB,WAAW;AAElD,KAAI,CAAC,SACH,QAAO;CAGT,MAAM,mBAAmB,sBAAsB,UAAU,QAAQ;AAEjE,KAAI,CAAC,iBAAiB,SAAS,eAAe,CAC5C,QAAO;CAGT,MAAM,YAAY,SAAS,kBAAkB,eAAe;AAE5D,KAAI,UAAU,WAAW,EACvB,QAAO;CAGT,MAAM,oBAAoB,QAAQ,iBAAiB;CACnD,MAAM,YAAY,sBAAsB,MAAM,KAAK;AAEnD,QAAO;EACL,GAAG,iBAAiB;EACpB;EACA;EACA,IAAI,UAAU,WAAW,IAAI,YAAY,GAAG,UAAU,GAAG;EACzD,MAAM;EACP;;;;;;;;;;;;;;;;AAiBH,SAAgB,mBACd,SACA,SACsB;AACtB,QAAO,OAAO,QAAQ,QAAQ,CAC3B,SAAS,CAAC,YAAY,sBAAsB;EAC3C,MAAM,QAAQ,yBAAyB,YAAY,kBAAkB,QAAQ;AAE7E,MAAI,CAAC,MACH,QAAO,EAAE;AAGX,SAAO,CAAC,MAAM;GACd,CACD,MAAM,MAAM,UAAU,KAAK,GAAG,cAAc,MAAM,GAAG,CAAC;;;;AC9C3D,SAAS,8BAA8B,MAA2C;AAYhF,QAAO;EACL,OAZ8B,EAC9B,UAAU,MAAM;AACd,UAAO,EACL,KAAK,SAAS;IACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,UAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;MAE3G;KAEJ;EAIC,MAAM,SAAS;GACb,MAAM,qBAAqB,OAAO,YAAY,WAAW,UAAU,SAAS;AAC5E,OAAI,sBAAsB,KACxB,OAAM,IAAI,MAAM,kDAAkD,qBAAqB;AAGzF,SAAM,IAAI,MAAM,+DAA+D,KAAK,kBAAkB,GAAG,IAAI;;EAEhH;;;;;AAMH,IAAa,uBAAb,cAA0C,MAAM;;;;CAI9C;CAEA,YAAY,QAAgB,OAAgB;EAC1C,MAAM,UAAU,iBAAiB,MAAM,IAAI;AAC3C,QAAM,gBAAgB,OAAO,YAAY,UAAU;AACnD,OAAK,OAAO;AACZ,OAAK,SAAS;AACd,OAAK,QAAQ;;;AAIjB,SAAS,2BAA2B,QAAgB,OAAsC;AACxF,KAAI,iBAAiB,wBAAwB,MAAM,WAAW,OAC5D,QAAO;AAGT,QAAO,IAAI,qBAAqB,QAAQ,MAAM;;;;;;;;;;;;;;;;;;;;;;;;AAyBhD,eAAsB,kBACpB,OACA,UACA,UAAoC,EAAE,EACP;AAC/B,KAAI,MAAM,WAAW,EACnB,QAAO,oBAAoB,EAAE,CAAC;CAGhC,eAAe,qBAAqB,MAAyC;EAC3E,IAAI;AAEJ,MAAI;AACF,sBAAmB,QAAQ,yBAAyB,KAAK,IAAI,8BAA8B,KAAK;WAE3F,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,cAAc,KAAK;WAEtB,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;EAGlD,IAAI;AACJ,MAAI;AACF,eAAY,MAAM,SAAS,MAAM,iBAAiB;WAE7C,OAAO;AACZ,OAAI;AACF,YAAQ,YAAY,MAAM,SAAS;WAE/B;AAGN,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,YAAY,MAAM,SAAS;WAE9B,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,SAAO;;CAGT,MAAM,iBAAiB,QAAQ,kBAAkB;AACjD,KAAI,kBAAkB,GAAG;EACvB,MAAM,UAAuB,EAAE;AAC/B,OAAK,MAAM,QAAQ,MACjB,SAAQ,KAAK,MAAM,qBAAqB,KAAK,CAAC;AAEhD,SAAO,oBAAoB,QAAQ;;CAGrC,MAAM,eAAe,iBAAiB,eAAe;AAUrD,QAAO,qBATa,MAAM,QAAQ,IAAI,MAAM,IAAI,OAAO,MAAM,UAAU;AAErE,SAAO;GAAE;GAAO,QADD,MAAM,aAAa,YAAY,qBAAqB,KAAK,CAAC;GACjD;GACxB,CAAC,EAGA,MAAM,MAAM,UAAU,KAAK,QAAQ,MAAM,MAAM,CAC/C,KAAI,SAAQ,KAAK,OAAO,CAEc;;;;AChO3C,MAAM,UAAU,cAAc,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;AAgD9C,eAAsB,2BACpB,UAAmD,EAAE,EACtB;CAC/B,MAAM,MAAM,QAAQ,OAAO,QAAQ,cAAc,OAAO,KAAK,IAAI,CAAC;CAClE,MAAM,+BAA+B,QAAQ,gCACxC,cAAc,IAAI,IAAI,aAAa,OAAO,KAAK,IAAI,CAAC;CAKzD,MAAM,EAAE,qBAAqB,QAAQ,2BAA2B;AAShE,QAAO,EACL,sBAHyB,MAAM,iBAAiB,IAAI,IAGR,8BAC7C;;;;ACdH,MAAM,kBAAkB,IAAI,IAAI;CAAC;CAAW;CAAU;CAAW,CAAC;AAClE,MAAM,wCAAwC;AA8C9C,SAAS,oBAAoB,OAAuB;AAClD,QAAO,mBAAmB,MAAM;;AAGlC,SAAS,qBAAqB,OAA4B;AACxD,QAAO,OAAO,MAAM;;AAGtB,SAAS,qBAAqB,QAAsD;AAClF,QAAO,EAAE,GAAG,QAAQ;;AAGtB,SAAS,0BACP,WACA,YACqB;AACrB,QAAO;EACL,MAAM,qBAAqB,WAAW;EACtC,MAAM;GACJ,WAAW,kBAAkB,WAAW;GACxC,UAAU,kBAAkB,UAAU;GACvC;EACD,KAAK,qBAAqB,UAAU;EACrC;;AAGH,SAAS,cAAc,QAAkD;CACvE,MAAM,aAAa,OAAO,KAAK,OAAO;AACtC,QACE,WAAW,SAAS,KACjB,WAAW,OAAM,QAAO,gBAAgB,IAAI,IAAI,CAAC;;AAIxD,SAAS,mCAAmC,QAAiC;CAC3E,MAAM,aAAa,OAAO,KAAK,OAAO;CACtC,MAAM,kBAAkB,WAAW,MAAK,QAAO,gBAAgB,IAAI,IAAI,CAAC;CACxE,MAAM,cAAc,WAAW,MAAK,QAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC;AAErE,KAAI,mBAAmB,YACrB,OAAM,IAAI,UAAU,sCAAsC;;AAI9D,SAAS,0BAA0B,QAAgE;AACjG,KAAI,UAAU,KACZ;AAGF,oCAAmC,OAAO;AAE1C,KAAI,cAAc,OAAO,CACvB,QAAO;AAGT,QAAO,EACL,QAAQ,QACT;;AAGH,SAAS,iBAAiB,QAA0C;AAClE,QAAO,MAAM,KAAK,IAAI,IAAI,OAAO,IAAI,qBAAqB,CAAC,CAAC;;AAG9D,SAAS,gBACP,MACA,YACA,MACM;AACN,KAAI,cAAc,KAChB;AAGF,MAAK,MAAM,CAAC,MAAM,WAAW,OAAO,QAAQ,WAAW,EAAE;EACvD,MAAM,aAAa,iBAAiB,OAAO;AAE3C,MAAI,SAAS,UAAU;GACrB,MAAM,iBAAiB,KAAK,IAAI,KAAK,IAAI,EAAE;AAC3C,QAAK,IAAI,MAAM,MAAM,KAAK,IAAI,IAAI,CAAC,GAAG,gBAAgB,GAAG,WAAW,CAAC,CAAC,CAAC;AACvE;;AAGF,OAAK,IAAI,MAAM,WAAW;;;AAI9B,SAAS,WACP,UACA,OACuB;CACvB,MAAM,WAAW,IAAI,IACnB,MAAM,KAAK,SAAS,SAAS,CAAC,CAAC,KAAK,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAC5E;AAED,MAAK,MAAM,QAAQ,OAAO,WAAW,EAAE,CACrC,UAAS,OAAO,KAAK;AAGvB,iBAAgB,UAAU,OAAO,QAAQ,SAAS;AAClD,iBAAgB,UAAU,OAAO,UAAU,WAAW;AAEtD,QAAO;;AAGT,SAAS,iBAAiB,MAAuE;AAC/F,KAAI,KAAK,SAAS,EAChB,QAAO,CAAC,EAAE,CAAC;CAGb,MAAM,aAAa,MAAM,KAAK,KAAK,SAAS,CAAC;CAE7C,IAAI,aAAsC,CAAC,EAAE,CAAC;AAE9C,MAAK,MAAM,CAAC,MAAM,WAAW,YAAY;AACvC,MAAI,OAAO,WAAW,EACpB,QAAO,EAAE;EAGX,MAAM,iBAA0C,EAAE;AAElD,OAAK,MAAM,aAAa,WACtB,MAAK,MAAM,SAAS,OAClB,gBAAe,KAAK;GAClB,GAAG;IACF,OAAO;GACT,CAAC;AAIN,eAAa;;AAGf,QAAO;;AAGT,SAAS,kBAAkB,QAAuC;CAChE,MAAM,WAAW,OAAO,QAAQ,OAAO,CACpC,MAAM,CAAC,WAAW,CAAC,eAAe,SAAS,cAAc,UAAU,CAAC,CACpE,KAAK,CAAC,MAAM,WAAW,GAAG,oBAAoB,KAAK,CAAC,GAAG,oBAAoB,MAAM,GAAG;AAEvF,KAAI,SAAS,WAAW,EACtB,QAAO;AAGT,QAAO,SAAS,KAAK,IAAI;;AAG3B,SAAS,aAAa,SAAiB,qBAA6B,UAAkB,WAA2B;AAI/G,QAAO;EAHgB,oBAAoB,QAAQ;EACzB,oBAAoB,oBAAoB;EAKhE,OAAO,oBAAoB,SAAS;EACpC,QAAQ,oBAAoB,UAAU;EACvC,CAAC,KAAK,KAAK;;AAGd,SAAS,sBACP,OACA,WACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;AAGT,SAAS,uBACP,OACA,YACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;;;;;;;;;;;;;;;;;;;AAqBT,SAAgB,qBAAqB,SAAuD;AAC1F,KAAI,QAAQ,QAAQ,WAAW,EAC7B,QAAO,EAAE;AAGX,KAAI,QAAQ,mBAAmB,WAAW,EACxC,QAAO,EAAE;CAGX,MAAM,QAAyB,EAAE;AAEjC,MAAK,MAAM,SAAS,QAAQ,SAAS;EACnC,MAAM,gBAAgB,iBAAiB,sBAAsB,OAAO,QAAQ,UAAU,CAAC;EACvF,MAAM,iBAAiB,iBAAiB,uBAAuB,OAAO,QAAQ,WAAW,CAAC;AAE1F,MAAI,cAAc,WAAW,KAAK,eAAe,WAAW,EAC1D;AAGF,OAAK,MAAM,qBAAqB,QAAQ,mBACtC,MAAK,MAAM,aAAa,cACtB,MAAK,MAAM,cAAc,gBAAgB;GACvC,MAAM,iBAAiB,0BAA0B,WAAW,WAAW;AAEvE,SAAM,KAAK;IACT;IACA,IAAI,aACF,MAAM,IACN,kBAAkB,IAClB,eAAe,KAAK,UACpB,eAAe,KAAK,UACrB;IACD,QAAQ;IACR;IACD,CAAC;;;AAMV,QAAO;;;;ACxTT,SAAS,6BAA+C;AACtD,QAAO,EACL,UAAU,MAAM;AACd,SAAO,EACL,KAAK,SAAS;GACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,SAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;KAE3G;IAEJ;;AAGH,SAAS,wBACP,QACA,MACiB;CACjB,MAAM,qBAAqB,KAAK,OAAO,IAAI;AAC3C,KAAI,sBAAsB,MAAM;EAC9B,MAAM,sBAAsB,mBAAmB,QAAQ,mBAAmB;AAC1E,MAAI,uBAAuB,KACzB,QAAO;AAGT,QAAM,IAAI,MAAM,6BAA6B,mBAAmB,+BAA+B;;CAGjG,MAAM,UAAU,mBAAmB,QAAQ,KAAK,kBAAkB,GAAG;AACrE,KAAI,WAAW,KACb,QAAO;AAGT,KAAI,OAAO,SAAS,EAClB,OAAM,IAAI,MACR;EACE,qGAAqG,KAAK,kBAAkB,GAAG;EAC/H;EACA;EACA;EACA;EACD,CAAC,KAAK,KAAK,CACb;AAGH,KAAI,OAAO,WAAW,GAAG;EACvB,MAAM,aAAa,OAAO;AAC1B,MAAI,cAAc,KAChB,QAAO;;AAIX,OAAM,IAAI,MAAM,uDAAuD,KAAK,kBAAkB,GAAG,IAAI;;;;;;;;;;;;AAavG,SAAgB,2BAA2B,SAAkE;AAC3G,QAAO;EACL,OAAO,QAAQ,SAAS,4BAA4B;EACpD,MAAM,WAAW;AACf,OAAI,aAAa,KACf,QAAO,wBAAwB,QAAQ,QAAQ,QAAQ,KAAK;GAG9D,MAAM,OAAO,OAAO,cAAc,WAAW,YAAY,UAAU;GAEnE,MAAM,aAAa,mBAAmB,QAAQ,QAAQ,KAAK;AAC3D,OAAI,cAAc,KAChB,OAAM,IAAI,MAAM,6BAA6B,KAAK,IAAI;AAGxD,UAAO;;EAEV"}
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/cache/filesystem.ts","../../../src/core/runner/aggregate.ts","../../../src/core/runner/collect.ts","../../../src/core/runner/run.ts","../../../src/core/runner/runtime-context.ts","../../../src/core/runner/schedule.ts","../../../src/core/runner/task-context.ts"],"sourcesContent":["import type { CacheFileHandle, CacheFileOptions, CacheNamespace, TaskCacheRuntime } from './types'\n\nimport process from 'node:process'\n\nimport { Buffer } from 'node:buffer'\nimport { createReadStream, createWriteStream } from 'node:fs'\nimport { access, mkdir, readFile, rename, writeFile } from 'node:fs/promises'\nimport { dirname, join } from 'node:path'\n\n/**\n * Options for creating the filesystem-backed task cache runtime.\n */\nexport interface CreateFilesystemTaskCacheRuntimeOptions {\n /**\n * Absolute cache root directory.\n */\n cacheRootDirectory: string\n /**\n * Project identifier under one workspace cache scope.\n */\n projectName: string\n /**\n * Workspace identifier used to share cache roots across projects.\n */\n workspaceId: string\n}\n\nfunction sanitizePathSegment(value: string): string {\n const normalized = value.trim()\n if (normalized.length === 0) {\n return 'default'\n }\n\n return normalized.replace(/[^\\w.-]+/g, '-')\n}\n\nfunction normalizeExtension(extension: string | undefined, mediaType: string | undefined): string | undefined {\n if (extension != null && extension.length > 0) {\n return extension.startsWith('.') ? extension.slice(1) : extension\n }\n\n if (mediaType == null || mediaType.length === 0) {\n return undefined\n }\n\n if (mediaType === 'application/json') {\n return 'json'\n }\n\n if (mediaType === 'text/plain') {\n return 'txt'\n }\n\n if (mediaType === 'audio/wav') {\n return 'wav'\n }\n\n return undefined\n}\n\n/**\n * Normalizes cache file options into deterministic relative path segments.\n *\n * Before:\n * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`\n *\n * After:\n * - `['cases', 'dataset-hash', 'v1.json']`\n */\nexport function normalizeCacheFilePathSegments(options: CacheFileOptions): string[] {\n const sanitizedKey = options.key.map(segment => sanitizePathSegment(segment))\n const extension = normalizeExtension(options.ext, options.mediaType)\n\n if (sanitizedKey.length === 0) {\n return extension == null ? ['artifact'] : [`artifact.${extension}`]\n }\n\n if (extension == null) {\n return sanitizedKey\n }\n\n const withoutTail = sanitizedKey.slice(0, Math.max(0, sanitizedKey.length - 1))\n const tail = sanitizedKey[sanitizedKey.length - 1] ?? 'artifact'\n return [...withoutTail, `${tail}.${extension}`]\n}\n\nasync function writeAtomically(path: string, content: Buffer | string): Promise<void> {\n const directory = dirname(path)\n const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`\n await mkdir(directory, { recursive: true })\n await writeFile(temporaryPath, content)\n await rename(temporaryPath, path)\n}\n\nfunction createCacheFileHandle(path: string): CacheFileHandle {\n return {\n path,\n async exists() {\n try {\n await access(path)\n return true\n }\n catch {\n return false\n }\n },\n openReadStream() {\n return createReadStream(path)\n },\n async openWriteStream() {\n await mkdir(dirname(path), { recursive: true })\n return createWriteStream(path)\n },\n async readBuffer() {\n return await readFile(path)\n },\n async writeBuffer(value) {\n await writeAtomically(path, value)\n },\n async readText(encoding = 'utf-8') {\n return await readFile(path, encoding)\n },\n async writeText(value, encoding = 'utf-8') {\n await writeAtomically(path, Buffer.from(value, encoding))\n },\n async readJson<T>() {\n return JSON.parse(await readFile(path, 'utf-8')) as T\n },\n async writeJson(value) {\n await writeAtomically(path, `${JSON.stringify(value, null, 2)}\\n`)\n },\n async loadAsCasesInput<T>() {\n return await this.readJson<T[]>()\n },\n async loadAsExpectFixture<T>() {\n return await this.readJson<T>()\n },\n }\n}\n\nfunction createCacheNamespace(baseDirectory: string, namespace: string): CacheNamespace {\n return {\n file(options) {\n const relativePathSegments = normalizeCacheFilePathSegments(options)\n return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments))\n },\n }\n}\n\n/**\n * Creates a deterministic filesystem-backed task cache runtime.\n *\n * Use when:\n * - eval tasks need reproducible cache paths for expensive pre-processing outputs\n * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes\n *\n * Expects:\n * - `cacheRootDirectory` to be writable by the running process\n * - `workspaceId` + `projectName` to stay stable for reproducible paths\n *\n * Returns:\n * - task cache runtime that resolves namespaced file handles under:\n * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`\n */\nexport function createFilesystemTaskCacheRuntime(\n options: CreateFilesystemTaskCacheRuntimeOptions,\n): TaskCacheRuntime {\n const workspaceDirectory = sanitizePathSegment(options.workspaceId)\n const projectDirectory = sanitizePathSegment(options.projectName)\n const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory)\n\n return {\n namespace(name) {\n return createCacheNamespace(baseDirectory, name)\n },\n }\n}\n","import type { ScheduledTaskMatrix } from './schedule'\n\n/**\n * Identifies the scoring family for a single eval score.\n */\nexport type RunScoreKind = 'exact' | 'judge'\n\n/**\n * Represents one normalized score emitted by a completed eval run.\n */\nexport interface RunScore {\n /**\n * Score family used for aggregation.\n */\n kind: RunScoreKind\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n}\n\n/**\n * Captures the output of one scheduled runner task.\n */\nexport interface RunResult {\n /**\n * Stable run id, usually copied from the scheduled task id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Raw scores emitted by the eval.\n */\n scores: readonly RunScore[]\n}\n\n/**\n * Stores the per-run score averages after normalization.\n */\nexport interface AggregatedRunSummary {\n /**\n * Stable run id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Mean of exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average. Uses both families when present, otherwise falls back to the\n * single available family.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores inferenceExecutor-level score aggregates across multiple runs.\n */\nexport interface AggregatedProviderSummary {\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Number of runs included in this inferenceExecutor bucket.\n */\n runCount: number\n /**\n * Mean of all exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of all judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average derived from the inferenceExecutor exact and judge means.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores the final aggregation output for a batch of runner results.\n */\nexport interface AggregatedRunResults {\n /**\n * Per-run normalized score summaries.\n */\n runs: AggregatedRunSummary[]\n /**\n * Provider-level summaries sorted by inferenceExecutor id.\n */\n inferenceExecutors: AggregatedProviderSummary[]\n /**\n * Overall summary across every run.\n */\n overall: {\n exactAverage: number | null\n judgeAverage: number | null\n hybridAverage: number | null\n runCount: number\n }\n}\n\ninterface ScoreBuckets {\n exact: number[]\n judge: number[]\n}\n\nfunction cloneScheduledTaskMatrix(matrix: ScheduledTaskMatrix): ScheduledTaskMatrix {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction assertKnownScoreKind(kind: string): RunScoreKind {\n if (kind === 'exact' || kind === 'judge') {\n return kind\n }\n\n throw new TypeError(`Unknown eval score kind \"${kind}\".`)\n}\n\nfunction average(scores: readonly number[]): number | null {\n if (scores.length === 0) {\n return null\n }\n\n const total = scores.reduce((sum, score) => sum + score, 0)\n return total / scores.length\n}\n\nfunction createHybridAverage(exactAverage: number | null, judgeAverage: number | null): number | null {\n if (exactAverage != null && judgeAverage != null) {\n return (exactAverage + judgeAverage) / 2\n }\n\n if (exactAverage != null) {\n return exactAverage\n }\n\n if (judgeAverage != null) {\n return judgeAverage\n }\n\n return null\n}\n\nfunction collectScoreBuckets(scores: readonly RunScore[]): ScoreBuckets {\n const buckets: ScoreBuckets = {\n exact: [],\n judge: [],\n }\n\n for (const score of scores) {\n const kind = assertKnownScoreKind(score.kind)\n\n if (kind === 'exact') {\n buckets.exact.push(score.score)\n continue\n }\n\n buckets.judge.push(score.score)\n }\n\n return buckets\n}\n\nfunction createRunSummary(result: RunResult): AggregatedRunSummary {\n const buckets = collectScoreBuckets(result.scores)\n const exactAverage = average(buckets.exact)\n const judgeAverage = average(buckets.judge)\n\n return {\n entryId: result.entryId,\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n id: result.id,\n judgeAverage,\n matrix: cloneScheduledTaskMatrix(result.matrix),\n inferenceExecutorId: result.inferenceExecutorId,\n }\n}\n\nfunction createProviderSummary(inferenceExecutorId: string, results: readonly RunResult[]): AggregatedProviderSummary {\n const exactScores: number[] = []\n const judgeScores: number[] = []\n\n for (const result of results) {\n const buckets = collectScoreBuckets(result.scores)\n exactScores.push(...buckets.exact)\n judgeScores.push(...buckets.judge)\n }\n\n const exactAverage = average(exactScores)\n const judgeAverage = average(judgeScores)\n\n return {\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n judgeAverage,\n inferenceExecutorId,\n runCount: results.length,\n }\n}\n\n/**\n * Aggregates exact-match and judge-based scores into hybrid runner summaries.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link createRunSummary}\n * -> {@link createProviderSummary}\n * -> `report output`\n *\n * Use when:\n * - a runner batch mixes deterministic exact checks with judge-based grading\n * - inferenceExecutor comparison should preserve both score families and one hybrid view\n *\n * Expects:\n * - each score to be normalized to the `0..1` range before aggregation\n * - `scores.kind` to use only `'exact'` or `'judge'`\n */\nexport function aggregateRunResults(results: readonly RunResult[]): AggregatedRunResults {\n const runs = results.map(createRunSummary)\n\n const inferenceExecutorIds = Array.from(new Set(results.map(result => result.inferenceExecutorId)))\n const inferenceExecutors = inferenceExecutorIds\n .map((inferenceExecutorId) => {\n const providerResults = results.filter(result => result.inferenceExecutorId === inferenceExecutorId)\n return createProviderSummary(inferenceExecutorId, providerResults)\n })\n .sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId))\n\n const overall = createProviderSummary(\n 'overall',\n results,\n )\n\n return {\n overall: {\n exactAverage: overall.exactAverage,\n hybridAverage: overall.hybridAverage,\n judgeAverage: overall.judgeAverage,\n runCount: overall.runCount,\n },\n inferenceExecutors,\n runs,\n }\n}\n","import type { CollectedEvalEntry, EvalModule, EvalModuleMap } from '../../config'\nimport type { RunnerRuntimeContext } from './runtime-context'\n\nimport { basename, dirname, relative } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst evalFileSuffix = '.eval.ts'\nconst absolutePathPattern = /^(?:[A-Z]:\\/|\\/|\\\\\\\\)/i\n\nfunction normalizePath(value: string): string {\n return value.replaceAll('\\\\', '/')\n}\n\n/**\n * Converts a file path into a project-relative path when possible.\n *\n * Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n *\n * Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n */\nexport function asProjectRelativePath(filePath: string, context: RunnerRuntimeContext): string {\n const normalizedFilePath = normalizePath(filePath)\n const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory)\n const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\\//i)?.[0]\n const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\\//i)?.[0]\n\n if (filePathWindowsDrive != null && projectRootWindowsDrive == null) {\n return normalizedFilePath\n }\n\n if (\n filePathWindowsDrive != null\n && projectRootWindowsDrive != null\n && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()\n ) {\n return normalizedFilePath\n }\n\n const projectRootDirectory = context.projectRootDirectory\n const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath))\n\n if (!absolutePathPattern.test(relativeFilePath)) {\n if (relativeFilePath === '..') {\n return normalizePath(filePath)\n }\n\n if (!relativeFilePath.startsWith('../')) {\n return relativeFilePath\n }\n }\n\n return normalizePath(filePath)\n}\n\nfunction resolveModuleFilePath(moduleHref: string): string | null {\n if (!moduleHref.startsWith('file:')) {\n return null\n }\n\n try {\n return fileURLToPath(moduleHref)\n }\n catch {\n return null\n }\n}\n\nfunction createCollectedEvalEntry(\n moduleHref: string,\n moduleDefinition: EvalModule,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry | null {\n const filePath = resolveModuleFilePath(moduleHref)\n\n if (!filePath) {\n return null\n }\n\n const relativeFilePath = asProjectRelativePath(filePath, context)\n\n if (!relativeFilePath.endsWith(evalFileSuffix)) {\n return null\n }\n\n const entryName = basename(relativeFilePath, evalFileSuffix)\n\n if (entryName.length === 0) {\n return null\n }\n\n const relativeDirectory = dirname(relativeFilePath)\n const directory = relativeDirectory === '.' ? '' : relativeDirectory\n\n return {\n ...moduleDefinition.default,\n directory,\n filePath,\n id: directory.length === 0 ? entryName : `${directory}/${entryName}`,\n name: entryName,\n }\n}\n\n/**\n * Collects loaded vieval modules into sorted runner entries with stable ids.\n *\n * Call stack:\n *\n * `import.meta.glob(...)`\n * -> {@link collectEvalEntries}\n * -> {@link createCollectedEvalEntry}\n * -> {@link CollectedEvalEntry}[]\n *\n * Use when:\n * - the runner has already loaded candidate eval modules\n * - downstream scheduling needs stable entry ids and directory metadata\n */\nexport function collectEvalEntries(\n modules: EvalModuleMap,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry[] {\n return Object.entries(modules)\n .flatMap(([moduleHref, moduleDefinition]) => {\n const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context)\n\n if (!entry) {\n return []\n }\n\n return [entry]\n })\n .sort((left, right) => left.id.localeCompare(right.id))\n}\n","import type { TaskCacheRuntime } from '../cache'\nimport type { AggregatedRunResults, RunResult } from './aggregate'\nimport type { ScheduledTask } from './schedule'\nimport type { TaskExecutionContext } from './task-context'\n\nimport { errorMessageFrom } from '@moeru/std'\nimport { limitConcurrency } from '@vitest/runner/utils'\n\nimport { aggregateRunResults } from './aggregate'\n\n/**\n * Executes one scheduled runner task and returns a normalized run result.\n *\n * Use when:\n * - a scheduler already selected the task and execution context\n * - the caller wants a typed executor contract for runner workers\n *\n * Expects:\n * - the task context to be ready for model resolution and task-scoped work\n *\n * Returns:\n * - a normalized run result with score entries ready for aggregation\n */\nexport type ScheduledTaskExecutor = (\n task: ScheduledTask,\n context: TaskExecutionContext,\n) => Promise<RunResult>\n\n/**\n * Terminal task state reported by runner lifecycle hooks.\n *\n * Use when:\n * - reporting the outcome of one scheduled task to lifecycle observers\n *\n * Expects:\n * - hooks treat the value as final for the completed task\n */\nexport type RunnerTaskState = 'passed' | 'failed'\n\n/**\n * Optional runner execution hooks used while processing scheduled tasks.\n *\n * Use when:\n * - callers want lifecycle visibility around sequential task execution\n * - task execution should remain deterministic while still observable\n *\n * Expects:\n * - hook functions are synchronous lifecycle observers\n */\nexport interface RunScheduledTasksOptions {\n /**\n * Creates per-task execution context.\n *\n * Use when:\n * - executor code needs per-task models, cache, or other task-scoped data\n */\n createExecutionContext?: (task: ScheduledTask) => TaskExecutionContext\n /**\n * Runs before the executor starts handling a task.\n *\n * Use when:\n * - callers want to observe task activation before execution begins\n *\n * Expects:\n * - thrown errors abort the task before executor work starts\n */\n onTaskStart?: (task: ScheduledTask) => void\n /**\n * Runs after the executor settles for a task.\n *\n * Use when:\n * - callers want to observe successful and failed task completion\n *\n * Expects:\n * - thrown errors abort successful runs\n * - failed-task observers do not override the executor error for the task\n */\n onTaskEnd?: (task: ScheduledTask, state: RunnerTaskState) => void\n /**\n * Maximum number of tasks to execute concurrently.\n *\n * @default 1\n */\n maxConcurrency?: number\n}\n\nfunction createDefaultExecutionContext(): TaskExecutionContext {\n const cache: TaskCacheRuntime = {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n\n return {\n cache,\n models: [],\n }\n}\n\n/**\n * Error thrown when a scheduled run fails before producing a normalized result.\n */\nexport class RunnerExecutionError extends Error {\n /**\n * Stable task id that failed.\n */\n taskId: string\n\n constructor(taskId: string, cause: unknown) {\n const message = errorMessageFrom(cause) ?? 'Unknown runner execution failure.'\n super(`Runner task \"${taskId}\" failed: ${message}`)\n this.name = 'RunnerExecutionError'\n this.taskId = taskId\n this.cause = cause\n }\n}\n\nfunction createRunnerExecutionError(taskId: string, cause: unknown): RunnerExecutionError {\n if (cause instanceof RunnerExecutionError && cause.taskId === taskId) {\n return cause\n }\n\n return new RunnerExecutionError(taskId, cause)\n}\n\n/**\n * Executes runner tasks sequentially and aggregates the normalized results.\n *\n * Call stack:\n *\n * {@link createRunnerSchedule}\n * -> {@link runScheduledTasks}\n * -> `executor(task)`\n * -> {@link aggregateRunResults}\n *\n * Use when:\n * - the caller already expanded the runner matrix\n * - task execution should stay deterministic and easy to debug\n *\n * Expects:\n * - `executor` to return normalized `0..1` scores\n * - callers to handle concurrency outside this helper when needed\n * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers\n *\n * Throws:\n * - `RunnerExecutionError` when task setup, hooks, or the executor throws\n */\nexport async function runScheduledTasks(\n tasks: readonly ScheduledTask[],\n executor: ScheduledTaskExecutor,\n options: RunScheduledTasksOptions = {},\n): Promise<AggregatedRunResults> {\n if (tasks.length === 0) {\n return aggregateRunResults([])\n }\n\n async function executeScheduledTask(task: ScheduledTask): Promise<RunResult> {\n let executionContext: TaskExecutionContext\n\n try {\n executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext()\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskStart?.(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n let runResult: RunResult\n try {\n runResult = await executor(task, executionContext)\n }\n catch (error) {\n try {\n options.onTaskEnd?.(task, 'failed')\n }\n catch {\n // Failed-task observers must not mask the task execution failure.\n }\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskEnd?.(task, 'passed')\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n return runResult\n }\n\n const maxConcurrency = options.maxConcurrency ?? 1\n if (maxConcurrency <= 1) {\n const results: RunResult[] = []\n for (const task of tasks) {\n results.push(await executeScheduledTask(task))\n }\n return aggregateRunResults(results)\n }\n\n const runWithLimit = limitConcurrency(maxConcurrency)\n const resultPairs = await Promise.all(tasks.map(async (task, index) => {\n const result = await runWithLimit(async () => executeScheduledTask(task))\n return { index, result }\n }))\n\n const sortedResults = resultPairs\n .sort((left, right) => left.index - right.index)\n .map(item => item.result)\n\n return aggregateRunResults(sortedResults)\n}\n","import { createRequire } from 'node:module'\nimport { dirname } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst require = createRequire(import.meta.url)\n\n/**\n * Shared runtime context used by the vieval runner.\n *\n * Use when:\n * - runner services need stable path resolution without module-level side effects\n * - call sites want deterministic control over workspace root detection\n */\nexport interface RunnerRuntimeContext {\n /**\n * Absolute project root directory used for path normalization.\n */\n projectRootDirectory: string\n}\n\n/**\n * Options used to construct the runner runtime context.\n */\nexport interface CreateVievalRunnerRuntimeContextOptions {\n /**\n * Directory used to search for the nearest pnpm workspace.\n *\n * @default directory of this module file\n */\n cwd?: string\n /**\n * Absolute fallback directory when a pnpm workspace root is not found.\n *\n * @default package root directory (`packages/vieval`)\n */\n fallbackProjectRootDirectory?: string\n}\n\n/**\n * Creates a side-effect-free runtime context for runner path normalization.\n *\n * Call stack:\n *\n * {@link createRunnerRuntimeContext}\n * -> `findWorkspaceDir(cwd)`\n * -> `resolve projectRootDirectory`\n * -> `{ projectRootDirectory }`\n *\n * Use when:\n * - initializing runner infrastructure before collecting eval modules\n * - tests need deterministic root resolution behavior\n */\nexport async function createRunnerRuntimeContext(\n options: CreateVievalRunnerRuntimeContextOptions = {},\n): Promise<RunnerRuntimeContext> {\n const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url))\n const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory\n ?? fileURLToPath(new URL('../../../', import.meta.url))\n\n // NOTICE:\n // We use dynamic `require` here because `@pnpm/find-workspace-dir` is CommonJS.\n // Keeping this load inside the factory avoids module-level initialization side effects.\n const { findWorkspaceDir } = require('@pnpm/find-workspace-dir') as {\n findWorkspaceDir: (currentWorkingDirectory: string) => Promise<string | undefined>\n }\n\n // NOTICE:\n // Workspace discovery is required to keep collected eval ids stable when this\n // package is moved inside different monorepo layouts.\n const workspaceDirectory = await findWorkspaceDir(cwd)\n\n return {\n projectRootDirectory: workspaceDirectory ?? fallbackProjectRootDirectory,\n }\n}\n","import type { CollectedEvalEntry, MatrixDefinition, MatrixLayer, MatrixValue } from '../../config'\n\n/**\n * Describes the inferenceExecutor target for a scheduled eval run.\n */\nexport interface InferenceExecutor {\n /**\n * Stable inferenceExecutor identifier such as `openai:gpt-4.1-mini`.\n */\n id: string\n}\n\n/**\n * Stores the selected value for each matrix axis.\n */\nexport type RunnerMatrixSelection = Record<string, string>\n\n/**\n * Stores stable row ids for one resolved scheduled task matrix.\n */\nexport interface ScheduledTaskMatrixMeta {\n /**\n * Stable row id for the resolved run matrix selection.\n */\n runRowId: string\n /**\n * Stable row id for the resolved eval matrix selection.\n */\n evalRowId: string\n}\n\n/**\n * Stores the structured matrix payload for one scheduled task.\n */\nexport interface ScheduledTaskMatrix {\n /**\n * Runtime matrix selection visible to task code.\n */\n run: RunnerMatrixSelection\n /**\n * Eval-time matrix selection visible to task code.\n */\n eval: RunnerMatrixSelection\n /**\n * Stable row ids for both scopes.\n */\n meta: ScheduledTaskMatrixMeta\n}\n\n/**\n * Maps matrix axis names to the values that should be expanded.\n */\nexport type RunnerMatrixDefinition = MatrixDefinition\n\n/**\n * Accepts either flat axis definitions or one layered matrix object.\n */\nexport type RunnerMatrixInput = RunnerMatrixDefinition | MatrixLayer\n\nconst matrixLayerKeys = new Set(['disable', 'extend', 'override'])\nconst ambiguousMatrixDefinitionErrorMessage = 'Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.'\n\n/**\n * Represents one fully expanded runner task.\n */\nexport interface ScheduledTask {\n /**\n * Stable task id derived from the entry, inferenceExecutor, and matrix selection.\n */\n id: string\n /**\n * The collected eval entry to execute.\n */\n entry: CollectedEvalEntry\n /**\n * The inferenceExecutor selected for this task.\n */\n inferenceExecutor: InferenceExecutor\n /**\n * The concrete scoped matrix selection for this task.\n */\n matrix: ScheduledTaskMatrix\n}\n\n/**\n * Configures how the runner should expand its execution matrix.\n */\nexport interface CreateRunnerScheduleOptions {\n /**\n * Collected eval entries that should be scheduled.\n */\n entries: readonly CollectedEvalEntry[]\n /**\n * Providers that should run each entry.\n */\n inferenceExecutors: readonly InferenceExecutor[]\n /**\n * Optional run-time matrix axes expanded as a cartesian product.\n */\n runMatrix?: RunnerMatrixInput\n /**\n * Optional eval-time matrix axes expanded as a cartesian product.\n */\n evalMatrix?: RunnerMatrixInput\n}\n\nfunction encodeTaskIdSegment(value: string): string {\n return encodeURIComponent(value)\n}\n\nfunction stringifyMatrixValue(value: MatrixValue): string {\n return String(value)\n}\n\nfunction cloneMatrixSelection(matrix: RunnerMatrixSelection): RunnerMatrixSelection {\n return { ...matrix }\n}\n\nfunction createScheduledTaskMatrix(\n runMatrix: RunnerMatrixSelection,\n evalMatrix: RunnerMatrixSelection,\n): ScheduledTaskMatrix {\n return {\n eval: cloneMatrixSelection(evalMatrix),\n meta: {\n evalRowId: createStableRowId(evalMatrix),\n runRowId: createStableRowId(runMatrix),\n },\n run: cloneMatrixSelection(runMatrix),\n }\n}\n\nfunction isMatrixLayer(matrix: RunnerMatrixInput): matrix is MatrixLayer {\n const matrixKeys = Object.keys(matrix)\n return (\n matrixKeys.length > 0\n && matrixKeys.every(key => matrixLayerKeys.has(key))\n )\n}\n\nfunction assertNonAmbiguousMatrixDefinition(matrix: RunnerMatrixInput): void {\n const matrixKeys = Object.keys(matrix)\n const hasReservedKeys = matrixKeys.some(key => matrixLayerKeys.has(key))\n const hasAxisKeys = matrixKeys.some(key => !matrixLayerKeys.has(key))\n\n if (hasReservedKeys && hasAxisKeys) {\n throw new TypeError(ambiguousMatrixDefinitionErrorMessage)\n }\n}\n\nfunction normalizeLayerInputToAxes(matrix: RunnerMatrixInput | undefined): MatrixLayer | undefined {\n if (matrix == null) {\n return undefined\n }\n\n assertNonAmbiguousMatrixDefinition(matrix)\n\n if (isMatrixLayer(matrix)) {\n return matrix\n }\n\n return {\n extend: matrix,\n }\n}\n\nfunction dedupeAxisValues(values: readonly MatrixValue[]): string[] {\n return Array.from(new Set(values.map(stringifyMatrixValue)))\n}\n\nfunction applyAxisValues(\n axes: Map<string, string[]>,\n definition: RunnerMatrixDefinition | undefined,\n mode: 'extend' | 'override',\n): void {\n if (definition == null) {\n return\n }\n\n for (const [axis, values] of Object.entries(definition)) {\n const nextValues = dedupeAxisValues(values)\n\n if (mode === 'extend') {\n const existingValues = axes.get(axis) ?? []\n axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])))\n continue\n }\n\n axes.set(axis, nextValues)\n }\n}\n\nfunction applyLayer(\n baseAxes: ReadonlyMap<string, string[]>,\n layer: MatrixLayer | undefined,\n): Map<string, string[]> {\n const nextAxes = new Map<string, string[]>(\n Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]),\n )\n\n for (const axis of layer?.disable ?? []) {\n nextAxes.delete(axis)\n }\n\n applyAxisValues(nextAxes, layer?.extend, 'extend')\n applyAxisValues(nextAxes, layer?.override, 'override')\n\n return nextAxes\n}\n\nfunction expandAxesToRows(axes: ReadonlyMap<string, readonly string[]>): RunnerMatrixSelection[] {\n if (axes.size === 0) {\n return [{}]\n }\n\n const dimensions = Array.from(axes.entries())\n\n let selections: RunnerMatrixSelection[] = [{}]\n\n for (const [axis, values] of dimensions) {\n if (values.length === 0) {\n return []\n }\n\n const nextSelections: RunnerMatrixSelection[] = []\n\n for (const selection of selections) {\n for (const value of values) {\n nextSelections.push({\n ...selection,\n [axis]: value,\n })\n }\n }\n\n selections = nextSelections\n }\n\n return selections\n}\n\nfunction createStableRowId(matrix: RunnerMatrixSelection): string {\n const segments = Object.entries(matrix)\n .sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis))\n .map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`)\n\n if (segments.length === 0) {\n return 'default'\n }\n\n return segments.join('&')\n}\n\nfunction createTaskId(entryId: string, inferenceExecutorId: string, runRowId: string, evalRowId: string): string {\n const encodedEntryId = encodeTaskIdSegment(entryId)\n const encodedProviderId = encodeTaskIdSegment(inferenceExecutorId)\n\n return [\n encodedEntryId,\n encodedProviderId,\n `run=${encodeTaskIdSegment(runRowId)}`,\n `eval=${encodeTaskIdSegment(evalRowId)}`,\n ].join('::')\n}\n\nfunction createResolvedRunAxes(\n entry: CollectedEvalEntry,\n runMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n runMatrix,\n entry.matrix?.runMatrix,\n entry.task?.matrix?.runMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\nfunction createResolvedEvalAxes(\n entry: CollectedEvalEntry,\n evalMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n evalMatrix,\n entry.matrix?.evalMatrix,\n entry.task?.matrix?.evalMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\n/**\n * Expands collected entries into a stable runner schedule.\n *\n * Call stack:\n *\n * {@link collectEvalEntries} (`../runner`)\n * -> {@link createRunnerSchedule}\n * -> {@link expandAxesToRows}\n * -> {@link ScheduledTask}[]\n *\n * Use when:\n * - the runner already knows which eval entries are available\n * - each entry must run against multiple inferenceExecutors or matrix variants\n *\n * Expects:\n * - `entries` and `inferenceExecutors` to be provided in the desired execution order\n * - matrix axes to use insertion order when generating combinations\n */\nexport function createRunnerSchedule(options: CreateRunnerScheduleOptions): ScheduledTask[] {\n if (options.entries.length === 0) {\n return []\n }\n\n if (options.inferenceExecutors.length === 0) {\n return []\n }\n\n const tasks: ScheduledTask[] = []\n\n for (const entry of options.entries) {\n const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix))\n const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix))\n\n if (runSelections.length === 0 || evalSelections.length === 0) {\n continue\n }\n\n for (const inferenceExecutor of options.inferenceExecutors) {\n for (const runMatrix of runSelections) {\n for (const evalMatrix of evalSelections) {\n const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix)\n\n tasks.push({\n entry,\n id: createTaskId(\n entry.id,\n inferenceExecutor.id,\n isolatedMatrix.meta.runRowId,\n isolatedMatrix.meta.evalRowId,\n ),\n matrix: isolatedMatrix,\n inferenceExecutor,\n })\n }\n }\n }\n }\n\n return tasks\n}\n","import type { ModelDefinition } from '../../config/models'\nimport type { TaskCacheRuntime } from '../cache'\nimport type { ScheduledTask } from './schedule'\n\n/**\n * Task-scoped execution context exposed to runner executors.\n */\nexport interface TaskExecutionContext {\n /**\n * Deterministic cache runtime scoped to the current task project.\n */\n cache: TaskCacheRuntime\n /**\n * Configured model registrations available to model plugins.\n */\n models: readonly ModelDefinition[]\n}\n\n/**\n * Inputs used to build task execution context.\n */\nexport interface CreateTaskExecutionContextOptions {\n cache?: TaskCacheRuntime\n models: readonly ModelDefinition[]\n task: ScheduledTask\n}\n\nfunction createNoopTaskCacheRuntime(): TaskCacheRuntime {\n return {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n}\n\n/**\n * Creates task-scoped context data for runner execution.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link createTaskExecutionContext}\n * -> `TaskExecutionContext`\n */\nexport function createTaskExecutionContext(options: CreateTaskExecutionContextOptions): TaskExecutionContext {\n return {\n cache: options.cache ?? createNoopTaskCacheRuntime(),\n models: options.models,\n }\n}\n"],"mappings":";;;;;;;;;;;AA2BA,SAAS,oBAAoB,OAAuB;CAClD,MAAM,aAAa,MAAM,MAAM;AAC/B,KAAI,WAAW,WAAW,EACxB,QAAO;AAGT,QAAO,WAAW,QAAQ,aAAa,IAAI;;AAG7C,SAAS,mBAAmB,WAA+B,WAAmD;AAC5G,KAAI,aAAa,QAAQ,UAAU,SAAS,EAC1C,QAAO,UAAU,WAAW,IAAI,GAAG,UAAU,MAAM,EAAE,GAAG;AAG1D,KAAI,aAAa,QAAQ,UAAU,WAAW,EAC5C;AAGF,KAAI,cAAc,mBAChB,QAAO;AAGT,KAAI,cAAc,aAChB,QAAO;AAGT,KAAI,cAAc,YAChB,QAAO;;;;;;;;;;;AAeX,SAAgB,+BAA+B,SAAqC;CAClF,MAAM,eAAe,QAAQ,IAAI,KAAI,YAAW,oBAAoB,QAAQ,CAAC;CAC7E,MAAM,YAAY,mBAAmB,QAAQ,KAAK,QAAQ,UAAU;AAEpE,KAAI,aAAa,WAAW,EAC1B,QAAO,aAAa,OAAO,CAAC,WAAW,GAAG,CAAC,YAAY,YAAY;AAGrE,KAAI,aAAa,KACf,QAAO;CAGT,MAAM,cAAc,aAAa,MAAM,GAAG,KAAK,IAAI,GAAG,aAAa,SAAS,EAAE,CAAC;CAC/E,MAAM,OAAO,aAAa,aAAa,SAAS,MAAM;AACtD,QAAO,CAAC,GAAG,aAAa,GAAG,KAAK,GAAG,YAAY;;AAGjD,eAAe,gBAAgB,MAAc,SAAyC;CACpF,MAAM,YAAY,QAAQ,KAAK;CAC/B,MAAM,gBAAgB,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG,KAAK,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,SAAS,GAAG,CAAC,MAAM,GAAG,GAAG;AACzG,OAAM,MAAM,WAAW,EAAE,WAAW,MAAM,CAAC;AAC3C,OAAM,UAAU,eAAe,QAAQ;AACvC,OAAM,OAAO,eAAe,KAAK;;AAGnC,SAAS,sBAAsB,MAA+B;AAC5D,QAAO;EACL;EACA,MAAM,SAAS;AACb,OAAI;AACF,UAAM,OAAO,KAAK;AAClB,WAAO;WAEH;AACJ,WAAO;;;EAGX,iBAAiB;AACf,UAAO,iBAAiB,KAAK;;EAE/B,MAAM,kBAAkB;AACtB,SAAM,MAAM,QAAQ,KAAK,EAAE,EAAE,WAAW,MAAM,CAAC;AAC/C,UAAO,kBAAkB,KAAK;;EAEhC,MAAM,aAAa;AACjB,UAAO,MAAM,SAAS,KAAK;;EAE7B,MAAM,YAAY,OAAO;AACvB,SAAM,gBAAgB,MAAM,MAAM;;EAEpC,MAAM,SAAS,WAAW,SAAS;AACjC,UAAO,MAAM,SAAS,MAAM,SAAS;;EAEvC,MAAM,UAAU,OAAO,WAAW,SAAS;AACzC,SAAM,gBAAgB,MAAM,OAAO,KAAK,OAAO,SAAS,CAAC;;EAE3D,MAAM,WAAc;AAClB,UAAO,KAAK,MAAM,MAAM,SAAS,MAAM,QAAQ,CAAC;;EAElD,MAAM,UAAU,OAAO;AACrB,SAAM,gBAAgB,MAAM,GAAG,KAAK,UAAU,OAAO,MAAM,EAAE,CAAC,IAAI;;EAEpE,MAAM,mBAAsB;AAC1B,UAAO,MAAM,KAAK,UAAe;;EAEnC,MAAM,sBAAyB;AAC7B,UAAO,MAAM,KAAK,UAAa;;EAElC;;AAGH,SAAS,qBAAqB,eAAuB,WAAmC;AACtF,QAAO,EACL,KAAK,SAAS;EACZ,MAAM,uBAAuB,+BAA+B,QAAQ;AACpE,SAAO,sBAAsB,KAAK,eAAe,oBAAoB,UAAU,EAAE,GAAG,qBAAqB,CAAC;IAE7G;;;;;;;;;;;;;;;;;AAkBH,SAAgB,iCACd,SACkB;CAClB,MAAM,qBAAqB,oBAAoB,QAAQ,YAAY;CACnE,MAAM,mBAAmB,oBAAoB,QAAQ,YAAY;CACjE,MAAM,gBAAgB,KAAK,QAAQ,oBAAoB,oBAAoB,iBAAiB;AAE5F,QAAO,EACL,UAAU,MAAM;AACd,SAAO,qBAAqB,eAAe,KAAK;IAEnD;;;;ACvCH,SAAS,yBAAyB,QAAkD;AAClF,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,qBAAqB,MAA4B;AACxD,KAAI,SAAS,WAAW,SAAS,QAC/B,QAAO;AAGT,OAAM,IAAI,UAAU,4BAA4B,KAAK,IAAI;;AAG3D,SAAS,QAAQ,QAA0C;AACzD,KAAI,OAAO,WAAW,EACpB,QAAO;AAIT,QADc,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAC5C,OAAO;;AAGxB,SAAS,oBAAoB,cAA6B,cAA4C;AACpG,KAAI,gBAAgB,QAAQ,gBAAgB,KAC1C,SAAQ,eAAe,gBAAgB;AAGzC,KAAI,gBAAgB,KAClB,QAAO;AAGT,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO;;AAGT,SAAS,oBAAoB,QAA2C;CACtE,MAAM,UAAwB;EAC5B,OAAO,EAAE;EACT,OAAO,EAAE;EACV;AAED,MAAK,MAAM,SAAS,QAAQ;AAG1B,MAFa,qBAAqB,MAAM,KAAK,KAEhC,SAAS;AACpB,WAAQ,MAAM,KAAK,MAAM,MAAM;AAC/B;;AAGF,UAAQ,MAAM,KAAK,MAAM,MAAM;;AAGjC,QAAO;;AAGT,SAAS,iBAAiB,QAAyC;CACjE,MAAM,UAAU,oBAAoB,OAAO,OAAO;CAClD,MAAM,eAAe,QAAQ,QAAQ,MAAM;CAC3C,MAAM,eAAe,QAAQ,QAAQ,MAAM;AAE3C,QAAO;EACL,SAAS,OAAO;EAChB;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D,IAAI,OAAO;EACX;EACA,QAAQ,yBAAyB,OAAO,OAAO;EAC/C,qBAAqB,OAAO;EAC7B;;AAGH,SAAS,sBAAsB,qBAA6B,SAA0D;CACpH,MAAM,cAAwB,EAAE;CAChC,MAAM,cAAwB,EAAE;AAEhC,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,UAAU,oBAAoB,OAAO,OAAO;AAClD,cAAY,KAAK,GAAG,QAAQ,MAAM;AAClC,cAAY,KAAK,GAAG,QAAQ,MAAM;;CAGpC,MAAM,eAAe,QAAQ,YAAY;CACzC,MAAM,eAAe,QAAQ,YAAY;AAEzC,QAAO;EACL;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D;EACA;EACA,UAAU,QAAQ;EACnB;;;;;;;;;;;;;;;;;;;;;AAsBH,SAAgB,oBAAoB,SAAqD;CACvF,MAAM,OAAO,QAAQ,IAAI,iBAAiB;CAG1C,MAAM,qBADuB,MAAM,KAAK,IAAI,IAAI,QAAQ,KAAI,WAAU,OAAO,oBAAoB,CAAC,CAAC,CAEhG,KAAK,wBAAwB;AAE5B,SAAO,sBAAsB,qBADL,QAAQ,QAAO,WAAU,OAAO,wBAAwB,oBAAoB,CAClC;GAClE,CACD,MAAM,MAAM,UAAU,KAAK,oBAAoB,cAAc,MAAM,oBAAoB,CAAC;CAE3F,MAAM,UAAU,sBACd,WACA,QACD;AAED,QAAO;EACL,SAAS;GACP,cAAc,QAAQ;GACtB,eAAe,QAAQ;GACvB,cAAc,QAAQ;GACtB,UAAU,QAAQ;GACnB;EACD;EACA;EACD;;;;ACvRH,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAE5B,SAAS,cAAc,OAAuB;AAC5C,QAAO,MAAM,WAAW,MAAM,IAAI;;;;;;;;;;;AAYpC,SAAgB,sBAAsB,UAAkB,SAAuC;CAC7F,MAAM,qBAAqB,cAAc,SAAS;CAClD,MAAM,iCAAiC,cAAc,QAAQ,qBAAqB;CAClF,MAAM,uBAAuB,mBAAmB,MAAM,aAAa,GAAG;CACtE,MAAM,0BAA0B,+BAA+B,MAAM,aAAa,GAAG;AAErF,KAAI,wBAAwB,QAAQ,2BAA2B,KAC7D,QAAO;AAGT,KACE,wBAAwB,QACrB,2BAA2B,QAC3B,qBAAqB,aAAa,KAAK,wBAAwB,aAAa,CAE/E,QAAO;CAGT,MAAM,uBAAuB,QAAQ;CACrC,MAAM,mBAAmB,cAAc,SAAS,sBAAsB,SAAS,CAAC;AAEhF,KAAI,CAAC,oBAAoB,KAAK,iBAAiB,EAAE;AAC/C,MAAI,qBAAqB,KACvB,QAAO,cAAc,SAAS;AAGhC,MAAI,CAAC,iBAAiB,WAAW,MAAM,CACrC,QAAO;;AAIX,QAAO,cAAc,SAAS;;AAGhC,SAAS,sBAAsB,YAAmC;AAChE,KAAI,CAAC,WAAW,WAAW,QAAQ,CACjC,QAAO;AAGT,KAAI;AACF,SAAO,cAAc,WAAW;SAE5B;AACJ,SAAO;;;AAIX,SAAS,yBACP,YACA,kBACA,SAC2B;CAC3B,MAAM,WAAW,sBAAsB,WAAW;AAElD,KAAI,CAAC,SACH,QAAO;CAGT,MAAM,mBAAmB,sBAAsB,UAAU,QAAQ;AAEjE,KAAI,CAAC,iBAAiB,SAAS,eAAe,CAC5C,QAAO;CAGT,MAAM,YAAY,SAAS,kBAAkB,eAAe;AAE5D,KAAI,UAAU,WAAW,EACvB,QAAO;CAGT,MAAM,oBAAoB,QAAQ,iBAAiB;CACnD,MAAM,YAAY,sBAAsB,MAAM,KAAK;AAEnD,QAAO;EACL,GAAG,iBAAiB;EACpB;EACA;EACA,IAAI,UAAU,WAAW,IAAI,YAAY,GAAG,UAAU,GAAG;EACzD,MAAM;EACP;;;;;;;;;;;;;;;;AAiBH,SAAgB,mBACd,SACA,SACsB;AACtB,QAAO,OAAO,QAAQ,QAAQ,CAC3B,SAAS,CAAC,YAAY,sBAAsB;EAC3C,MAAM,QAAQ,yBAAyB,YAAY,kBAAkB,QAAQ;AAE7E,MAAI,CAAC,MACH,QAAO,EAAE;AAGX,SAAO,CAAC,MAAM;GACd,CACD,MAAM,MAAM,UAAU,KAAK,GAAG,cAAc,MAAM,GAAG,CAAC;;;;AC9C3D,SAAS,gCAAsD;AAY7D,QAAO;EACL,OAZ8B,EAC9B,UAAU,MAAM;AACd,UAAO,EACL,KAAK,SAAS;IACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,UAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;MAE3G;KAEJ;EAIC,QAAQ,EAAE;EACX;;;;;AAMH,IAAa,uBAAb,cAA0C,MAAM;;;;CAI9C;CAEA,YAAY,QAAgB,OAAgB;EAC1C,MAAM,UAAU,iBAAiB,MAAM,IAAI;AAC3C,QAAM,gBAAgB,OAAO,YAAY,UAAU;AACnD,OAAK,OAAO;AACZ,OAAK,SAAS;AACd,OAAK,QAAQ;;;AAIjB,SAAS,2BAA2B,QAAgB,OAAsC;AACxF,KAAI,iBAAiB,wBAAwB,MAAM,WAAW,OAC5D,QAAO;AAGT,QAAO,IAAI,qBAAqB,QAAQ,MAAM;;;;;;;;;;;;;;;;;;;;;;;;AAyBhD,eAAsB,kBACpB,OACA,UACA,UAAoC,EAAE,EACP;AAC/B,KAAI,MAAM,WAAW,EACnB,QAAO,oBAAoB,EAAE,CAAC;CAGhC,eAAe,qBAAqB,MAAyC;EAC3E,IAAI;AAEJ,MAAI;AACF,sBAAmB,QAAQ,yBAAyB,KAAK,IAAI,+BAA+B;WAEvF,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,cAAc,KAAK;WAEtB,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;EAGlD,IAAI;AACJ,MAAI;AACF,eAAY,MAAM,SAAS,MAAM,iBAAiB;WAE7C,OAAO;AACZ,OAAI;AACF,YAAQ,YAAY,MAAM,SAAS;WAE/B;AAGN,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,YAAY,MAAM,SAAS;WAE9B,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,SAAO;;CAGT,MAAM,iBAAiB,QAAQ,kBAAkB;AACjD,KAAI,kBAAkB,GAAG;EACvB,MAAM,UAAuB,EAAE;AAC/B,OAAK,MAAM,QAAQ,MACjB,SAAQ,KAAK,MAAM,qBAAqB,KAAK,CAAC;AAEhD,SAAO,oBAAoB,QAAQ;;CAGrC,MAAM,eAAe,iBAAiB,eAAe;AAUrD,QAAO,qBATa,MAAM,QAAQ,IAAI,MAAM,IAAI,OAAO,MAAM,UAAU;AAErE,SAAO;GAAE;GAAO,QADD,MAAM,aAAa,YAAY,qBAAqB,KAAK,CAAC;GACjD;GACxB,CAAC,EAGA,MAAM,MAAM,UAAU,KAAK,QAAQ,MAAM,MAAM,CAC/C,KAAI,SAAQ,KAAK,OAAO,CAEc;;;;ACzN3C,MAAM,UAAU,cAAc,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;AAgD9C,eAAsB,2BACpB,UAAmD,EAAE,EACtB;CAC/B,MAAM,MAAM,QAAQ,OAAO,QAAQ,cAAc,OAAO,KAAK,IAAI,CAAC;CAClE,MAAM,+BAA+B,QAAQ,gCACxC,cAAc,IAAI,IAAI,aAAa,OAAO,KAAK,IAAI,CAAC;CAKzD,MAAM,EAAE,qBAAqB,QAAQ,2BAA2B;AAShE,QAAO,EACL,sBAHyB,MAAM,iBAAiB,IAAI,IAGR,8BAC7C;;;;ACdH,MAAM,kBAAkB,IAAI,IAAI;CAAC;CAAW;CAAU;CAAW,CAAC;AAClE,MAAM,wCAAwC;AA8C9C,SAAS,oBAAoB,OAAuB;AAClD,QAAO,mBAAmB,MAAM;;AAGlC,SAAS,qBAAqB,OAA4B;AACxD,QAAO,OAAO,MAAM;;AAGtB,SAAS,qBAAqB,QAAsD;AAClF,QAAO,EAAE,GAAG,QAAQ;;AAGtB,SAAS,0BACP,WACA,YACqB;AACrB,QAAO;EACL,MAAM,qBAAqB,WAAW;EACtC,MAAM;GACJ,WAAW,kBAAkB,WAAW;GACxC,UAAU,kBAAkB,UAAU;GACvC;EACD,KAAK,qBAAqB,UAAU;EACrC;;AAGH,SAAS,cAAc,QAAkD;CACvE,MAAM,aAAa,OAAO,KAAK,OAAO;AACtC,QACE,WAAW,SAAS,KACjB,WAAW,OAAM,QAAO,gBAAgB,IAAI,IAAI,CAAC;;AAIxD,SAAS,mCAAmC,QAAiC;CAC3E,MAAM,aAAa,OAAO,KAAK,OAAO;CACtC,MAAM,kBAAkB,WAAW,MAAK,QAAO,gBAAgB,IAAI,IAAI,CAAC;CACxE,MAAM,cAAc,WAAW,MAAK,QAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC;AAErE,KAAI,mBAAmB,YACrB,OAAM,IAAI,UAAU,sCAAsC;;AAI9D,SAAS,0BAA0B,QAAgE;AACjG,KAAI,UAAU,KACZ;AAGF,oCAAmC,OAAO;AAE1C,KAAI,cAAc,OAAO,CACvB,QAAO;AAGT,QAAO,EACL,QAAQ,QACT;;AAGH,SAAS,iBAAiB,QAA0C;AAClE,QAAO,MAAM,KAAK,IAAI,IAAI,OAAO,IAAI,qBAAqB,CAAC,CAAC;;AAG9D,SAAS,gBACP,MACA,YACA,MACM;AACN,KAAI,cAAc,KAChB;AAGF,MAAK,MAAM,CAAC,MAAM,WAAW,OAAO,QAAQ,WAAW,EAAE;EACvD,MAAM,aAAa,iBAAiB,OAAO;AAE3C,MAAI,SAAS,UAAU;GACrB,MAAM,iBAAiB,KAAK,IAAI,KAAK,IAAI,EAAE;AAC3C,QAAK,IAAI,MAAM,MAAM,KAAK,IAAI,IAAI,CAAC,GAAG,gBAAgB,GAAG,WAAW,CAAC,CAAC,CAAC;AACvE;;AAGF,OAAK,IAAI,MAAM,WAAW;;;AAI9B,SAAS,WACP,UACA,OACuB;CACvB,MAAM,WAAW,IAAI,IACnB,MAAM,KAAK,SAAS,SAAS,CAAC,CAAC,KAAK,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAC5E;AAED,MAAK,MAAM,QAAQ,OAAO,WAAW,EAAE,CACrC,UAAS,OAAO,KAAK;AAGvB,iBAAgB,UAAU,OAAO,QAAQ,SAAS;AAClD,iBAAgB,UAAU,OAAO,UAAU,WAAW;AAEtD,QAAO;;AAGT,SAAS,iBAAiB,MAAuE;AAC/F,KAAI,KAAK,SAAS,EAChB,QAAO,CAAC,EAAE,CAAC;CAGb,MAAM,aAAa,MAAM,KAAK,KAAK,SAAS,CAAC;CAE7C,IAAI,aAAsC,CAAC,EAAE,CAAC;AAE9C,MAAK,MAAM,CAAC,MAAM,WAAW,YAAY;AACvC,MAAI,OAAO,WAAW,EACpB,QAAO,EAAE;EAGX,MAAM,iBAA0C,EAAE;AAElD,OAAK,MAAM,aAAa,WACtB,MAAK,MAAM,SAAS,OAClB,gBAAe,KAAK;GAClB,GAAG;IACF,OAAO;GACT,CAAC;AAIN,eAAa;;AAGf,QAAO;;AAGT,SAAS,kBAAkB,QAAuC;CAChE,MAAM,WAAW,OAAO,QAAQ,OAAO,CACpC,MAAM,CAAC,WAAW,CAAC,eAAe,SAAS,cAAc,UAAU,CAAC,CACpE,KAAK,CAAC,MAAM,WAAW,GAAG,oBAAoB,KAAK,CAAC,GAAG,oBAAoB,MAAM,GAAG;AAEvF,KAAI,SAAS,WAAW,EACtB,QAAO;AAGT,QAAO,SAAS,KAAK,IAAI;;AAG3B,SAAS,aAAa,SAAiB,qBAA6B,UAAkB,WAA2B;AAI/G,QAAO;EAHgB,oBAAoB,QAAQ;EACzB,oBAAoB,oBAAoB;EAKhE,OAAO,oBAAoB,SAAS;EACpC,QAAQ,oBAAoB,UAAU;EACvC,CAAC,KAAK,KAAK;;AAGd,SAAS,sBACP,OACA,WACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;AAGT,SAAS,uBACP,OACA,YACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;;;;;;;;;;;;;;;;;;;AAqBT,SAAgB,qBAAqB,SAAuD;AAC1F,KAAI,QAAQ,QAAQ,WAAW,EAC7B,QAAO,EAAE;AAGX,KAAI,QAAQ,mBAAmB,WAAW,EACxC,QAAO,EAAE;CAGX,MAAM,QAAyB,EAAE;AAEjC,MAAK,MAAM,SAAS,QAAQ,SAAS;EACnC,MAAM,gBAAgB,iBAAiB,sBAAsB,OAAO,QAAQ,UAAU,CAAC;EACvF,MAAM,iBAAiB,iBAAiB,uBAAuB,OAAO,QAAQ,WAAW,CAAC;AAE1F,MAAI,cAAc,WAAW,KAAK,eAAe,WAAW,EAC1D;AAGF,OAAK,MAAM,qBAAqB,QAAQ,mBACtC,MAAK,MAAM,aAAa,cACtB,MAAK,MAAM,cAAc,gBAAgB;GACvC,MAAM,iBAAiB,0BAA0B,WAAW,WAAW;AAEvE,SAAM,KAAK;IACT;IACA,IAAI,aACF,MAAM,IACN,kBAAkB,IAClB,eAAe,KAAK,UACpB,eAAe,KAAK,UACrB;IACD,QAAQ;IACR;IACD,CAAC;;;AAMV,QAAO;;;;AC1UT,SAAS,6BAA+C;AACtD,QAAO,EACL,UAAU,MAAM;AACd,SAAO,EACL,KAAK,SAAS;GACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,SAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;KAE3G;IAEJ;;;;;;;;;;;AAYH,SAAgB,2BAA2B,SAAkE;AAC3G,QAAO;EACL,OAAO,QAAQ,SAAS,4BAA4B;EACpD,QAAQ,QAAQ;EACjB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"env
|
|
1
|
+
{"version":3,"file":"env-BFSjny07.mjs","names":[],"sources":["../src/core/inference-executors/env.ts"],"sourcesContent":["/**\n * Supported env value coercion types.\n */\nexport type EnvValueType = 'string'\n\n/**\n * Common options for env readers.\n */\nexport interface EnvFromOptions {\n /**\n * Expected env value type.\n */\n type: EnvValueType\n /**\n * Whether an empty or missing value should throw.\n *\n * @default false\n */\n required?: boolean\n /**\n * Optional key name used for clearer error messages.\n */\n name?: string\n}\n\n/**\n * Env options used by the required helper.\n *\n * `required` is intentionally omitted because this helper is always required.\n */\nexport type RequiredEnvFromOptions = Omit<EnvFromOptions, 'required'>\n\nfunction assertNonEmptyString(value: string | undefined, options: EnvFromOptions): string | undefined {\n if (value == null || value.trim().length === 0) {\n if (options.required === true) {\n const label = options.name ?? 'environment variable'\n throw new Error(`Missing required ${label}.`)\n }\n\n return undefined\n }\n\n return value\n}\n\n/**\n * Parses one env value with optional required behavior.\n *\n * Example:\n * `const apiKey = envFrom(process.env.OPENAI_API_KEY, { type: 'string', required: true, name: 'OPENAI_API_KEY' })`\n */\nexport function envFrom(\n value: string | undefined,\n options: EnvFromOptions,\n): string | undefined {\n if (options.type === 'string') {\n return assertNonEmptyString(value, options)\n }\n\n return undefined\n}\n\n/**\n * Parses one required env value.\n *\n * Example:\n * `const apiKey = requiredEnvFrom(process.env.OPENAI_API_KEY, { type: 'string', name: 'OPENAI_API_KEY' })`\n */\nexport function requiredEnvFrom(\n value: string | undefined,\n options: RequiredEnvFromOptions,\n): string {\n const parsed = envFrom(value, {\n ...options,\n required: true,\n })\n\n if (parsed == null) {\n const label = options.name ?? 'environment variable'\n throw new Error(`Missing required ${label}.`)\n }\n\n return parsed\n}\n"],"mappings":";AAgCA,SAAS,qBAAqB,OAA2B,SAA6C;AACpG,KAAI,SAAS,QAAQ,MAAM,MAAM,CAAC,WAAW,GAAG;AAC9C,MAAI,QAAQ,aAAa,MAAM;GAC7B,MAAM,QAAQ,QAAQ,QAAQ;AAC9B,SAAM,IAAI,MAAM,oBAAoB,MAAM,GAAG;;AAG/C;;AAGF,QAAO;;;;;;;;AAST,SAAgB,QACd,OACA,SACoB;AACpB,KAAI,QAAQ,SAAS,SACnB,QAAO,qBAAqB,OAAO,QAAQ;;;;;;;;AAY/C,SAAgB,gBACd,OACA,SACQ;CACR,MAAM,SAAS,QAAQ,OAAO;EAC5B,GAAG;EACH,UAAU;EACX,CAAC;AAEF,KAAI,UAAU,MAAM;EAClB,MAAM,QAAQ,QAAQ,QAAQ;AAC9B,QAAM,IAAI,MAAM,oBAAoB,MAAM,GAAG;;AAG/C,QAAO"}
|
|
@@ -495,15 +495,6 @@ interface ModelDefinition {
|
|
|
495
495
|
declare function resolveModelByName(models: readonly ModelDefinition[], name: string): ModelDefinition | undefined;
|
|
496
496
|
//#endregion
|
|
497
497
|
//#region src/core/runner/task-context.d.ts
|
|
498
|
-
/**
|
|
499
|
-
* Options for selecting a model from the execution context.
|
|
500
|
-
*/
|
|
501
|
-
interface TaskModelSelectionOptions {
|
|
502
|
-
/**
|
|
503
|
-
* Model id or alias name.
|
|
504
|
-
*/
|
|
505
|
-
name: string;
|
|
506
|
-
}
|
|
507
498
|
/**
|
|
508
499
|
* Task-scoped execution context exposed to runner executors.
|
|
509
500
|
*/
|
|
@@ -513,13 +504,9 @@ interface TaskExecutionContext {
|
|
|
513
504
|
*/
|
|
514
505
|
cache: TaskCacheRuntime;
|
|
515
506
|
/**
|
|
516
|
-
*
|
|
517
|
-
*
|
|
518
|
-
* Use when:
|
|
519
|
-
* - no arguments are provided to use the model selected by run matrix/inferenceExecutor
|
|
520
|
-
* - `name` is provided to resolve a specific model id or alias
|
|
507
|
+
* Configured model registrations available to model plugins.
|
|
521
508
|
*/
|
|
522
|
-
|
|
509
|
+
models: readonly ModelDefinition[];
|
|
523
510
|
}
|
|
524
511
|
/**
|
|
525
512
|
* Inputs used to build task execution context.
|
|
@@ -530,14 +517,13 @@ interface CreateTaskExecutionContextOptions {
|
|
|
530
517
|
task: ScheduledTask;
|
|
531
518
|
}
|
|
532
519
|
/**
|
|
533
|
-
* Creates task-scoped
|
|
520
|
+
* Creates task-scoped context data for runner execution.
|
|
534
521
|
*
|
|
535
522
|
* Call stack:
|
|
536
523
|
*
|
|
537
524
|
* {@link runScheduledTasks}
|
|
538
525
|
* -> {@link createTaskExecutionContext}
|
|
539
|
-
* ->
|
|
540
|
-
* -> `task.model()` / `task.model({ name })`
|
|
526
|
+
* -> `TaskExecutionContext`
|
|
541
527
|
*/
|
|
542
528
|
declare function createTaskExecutionContext(options: CreateTaskExecutionContextOptions): TaskExecutionContext;
|
|
543
529
|
//#endregion
|
|
@@ -581,7 +567,7 @@ interface RunScheduledTasksOptions {
|
|
|
581
567
|
* Creates per-task execution context.
|
|
582
568
|
*
|
|
583
569
|
* Use when:
|
|
584
|
-
* - executor code needs per-task
|
|
570
|
+
* - executor code needs per-task models, cache, or other task-scoped data
|
|
585
571
|
*/
|
|
586
572
|
createExecutionContext?: (task: ScheduledTask) => TaskExecutionContext;
|
|
587
573
|
/**
|
|
@@ -1035,24 +1021,13 @@ interface TaskRunContext {
|
|
|
1035
1021
|
*/
|
|
1036
1022
|
task: ScheduledTask;
|
|
1037
1023
|
/**
|
|
1038
|
-
*
|
|
1039
|
-
*
|
|
1040
|
-
* Runtime impact:
|
|
1041
|
-
* - `context.model()` uses `context.task.matrix.run.model` first when present
|
|
1042
|
-
* - then falls back to inferenceExecutor-id match
|
|
1043
|
-
* - then falls back to first configured model
|
|
1024
|
+
* Configured model registrations available to model plugins.
|
|
1044
1025
|
*
|
|
1045
|
-
*
|
|
1046
|
-
*
|
|
1047
|
-
*
|
|
1048
|
-
* const defaultModel = context.model()
|
|
1049
|
-
* // resolves the configured model whose id/model/alias matches 'gpt-4.1-mini'
|
|
1050
|
-
*
|
|
1051
|
-
* const judgeModel = context.model({ name: 'judge-large' })
|
|
1052
|
-
* // explicit lookup bypasses matrix default
|
|
1053
|
-
* ```
|
|
1026
|
+
* Use when:
|
|
1027
|
+
* - a plugin owns model selection semantics and needs access to registered models
|
|
1028
|
+
* - eval code resolves matrix-selected model axes through plugin helpers
|
|
1054
1029
|
*/
|
|
1055
|
-
|
|
1030
|
+
models: TaskExecutionContext['models'];
|
|
1056
1031
|
/**
|
|
1057
1032
|
* Optional reporter lifecycle hooks for task-local case events.
|
|
1058
1033
|
*
|
|
@@ -1378,5 +1353,5 @@ interface ConfigHookPlugin<TConfig> {
|
|
|
1378
1353
|
configVievalResolved?: (config: TConfig) => void | Promise<void>;
|
|
1379
1354
|
}
|
|
1380
1355
|
//#endregion
|
|
1381
|
-
export {
|
|
1382
|
-
//# sourceMappingURL=index-
|
|
1356
|
+
export { InferenceExecutor as $, RunScheduledTasksOptions as A, asProjectRelativePath as B, TaskDefinition as C, TaskRunContext as D, TaskReporterHooks as E, CreateTaskExecutionContextOptions as F, AggregatedProviderSummary as G, CreateVievalRunnerRuntimeContextOptions as H, TaskExecutionContext as I, RunResult as J, AggregatedRunResults as K, createTaskExecutionContext as L, RunnerTaskState as M, ScheduledTaskExecutor as N, TaskRunOutput as O, runScheduledTasks as P, CreateRunnerScheduleOptions as Q, ModelDefinition as R, TaskConcurrencyConfig as S, TaskReporterEventPayload as T, RunnerRuntimeContext as U, collectEvalEntries as V, createRunnerRuntimeContext as W, RunScoreKind as X, RunScore as Y, aggregateRunResults as Z, ScopedMatrices as _, CliOpenTelemetryReportingConfig as a, ScheduledTaskMatrixMeta as at, TaskCaseReporterPayload as b, EvalDefinition as c, createFilesystemTaskCacheRuntime as ct, MatrixAxisValues as d, CacheFileOptions as dt, RunnerMatrixDefinition as et, MatrixDefinition as f, CacheNamespace as ft, MatrixValue as g, MatrixRow as h, Awaitable as i, ScheduledTaskMatrix as it, RunnerExecutionError as j, TelemetryAttributeValue as k, EvalModule as l, normalizeCacheFilePathSegments as lt, MatrixPrimitive as m, defineEval as n, RunnerMatrixSelection as nt, CliReportingConfig as o, createRunnerSchedule as ot, MatrixLayer as p, TaskCacheRuntime as pt, AggregatedRunSummary as q, defineTask as r, ScheduledTask as rt, CollectedEvalEntry as s, CreateFilesystemTaskCacheRuntimeOptions as st, ConfigHookPlugin as t, RunnerMatrixInput as tt, EvalModuleMap as u, CacheFileHandle as ut, TaskAutoRetryDelay as v, TaskExecutionPolicy as w, TaskCaseState as x, TaskCaseReporterEndPayload as y, resolveModelByName as z };
|
|
1357
|
+
//# sourceMappingURL=index-BkjyCInx.d.mts.map
|
package/dist/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { D as TaskRunContext, I as TaskExecutionContext,
|
|
1
|
+
import { $ as InferenceExecutor, D as TaskRunContext, I as TaskExecutionContext, J as RunResult, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, X as RunScoreKind, f as MatrixDefinition, k as TelemetryAttributeValue, o as CliReportingConfig, p as MatrixLayer, rt as ScheduledTask, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "./index-BkjyCInx.mjs";
|
|
2
2
|
import { a as requiredEnvFrom } from "./env-BeHv_5mo.mjs";
|
|
3
3
|
import { expect } from "./expect.mjs";
|
|
4
4
|
import * as _$c12 from "c12";
|
|
@@ -137,7 +137,7 @@ interface CliProjectConfig {
|
|
|
137
137
|
* Model definitions available to project runtime execution.
|
|
138
138
|
*
|
|
139
139
|
* Inference executors control schedule fan-out, while models provide
|
|
140
|
-
* runtime lookup metadata for
|
|
140
|
+
* runtime lookup metadata for model plugin helpers during task execution.
|
|
141
141
|
*
|
|
142
142
|
* @default inherited from top-level config models
|
|
143
143
|
*/
|
|
@@ -253,11 +253,11 @@ interface CliComparisonConfig {
|
|
|
253
253
|
* Execution context exposed to project-level `executor` implementations.
|
|
254
254
|
*
|
|
255
255
|
* Use when:
|
|
256
|
-
* - a project executor needs
|
|
256
|
+
* - a project executor needs task-scoped models plus case reporter hooks
|
|
257
257
|
* - custom scheduling logic wants the same hook shape as `TaskRunContext`
|
|
258
258
|
*
|
|
259
259
|
* Expects:
|
|
260
|
-
* - `
|
|
260
|
+
* - `models` exposes configured model registrations for plugin helpers
|
|
261
261
|
* - `reporterHooks` follows `TaskRunContext['reporterHooks']`
|
|
262
262
|
* - `telemetry` follows `TaskRunContext['telemetry']`
|
|
263
263
|
* - `runtimeConcurrency` follows `TaskRunContext['runtimeConcurrency']`
|
package/dist/index.mjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { i as registerEvalDefinition, l as loadEnv, o as createNoopTelemetryRuntime, s as defineConfig } from "./registry-BHGMxjpA.mjs";
|
|
2
2
|
import { t as createSchedulerQueue } from "./queue-DsZQkZO_.mjs";
|
|
3
|
-
import { n as requiredEnvFrom } from "./env
|
|
3
|
+
import { n as requiredEnvFrom } from "./env-BFSjny07.mjs";
|
|
4
4
|
import { defineEval, defineTask } from "./config.mjs";
|
|
5
5
|
import { expect } from "./expect.mjs";
|
|
6
6
|
import { errorMessageFrom, sleep } from "@moeru/std";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"models-
|
|
1
|
+
{"version":3,"file":"models-pBSRUZhY.mjs","names":[],"sources":["../src/config/models.ts"],"sourcesContent":["import type { TaskExecutionPolicy } from './types'\n\n/**\n * Canonical model definition consumed by vieval runtime and config.\n *\n * Use when:\n * - declaring models in `vieval.config.*`\n * - resolving task runtime models by id, alias, or concrete model name\n *\n * Expects:\n * - `id` to be stable and unique within one config\n * - `inferenceExecutorId` to match scheduler/executor identifiers\n *\n * Returns:\n * - one normalized model registration record\n */\nexport interface ModelDefinition {\n /**\n * Stable model id.\n */\n id: string\n /**\n * Inference-executor id used for matching and reporting.\n */\n inferenceExecutorId: string\n /**\n * Executor reference passed through config.\n *\n * `vieval` core treats this as opaque runtime metadata. Builder plugins can\n * narrow this field with plugin-specific executor input types.\n */\n inferenceExecutor: unknown\n /**\n * Concrete model name passed to the inference executor.\n */\n model: string\n /**\n * Alias names that can resolve this model.\n */\n aliases: string[]\n /**\n * Optional execution policy hints attached to this model.\n */\n executionPolicy?: TaskExecutionPolicy\n /**\n * Optional model-level call parameters.\n */\n parameters?: Record<string, unknown>\n}\n\n/**\n * Resolves one model by id, model name, or alias in registration order.\n *\n * Returns:\n * - the first matching model, or `undefined` when no match exists\n */\nexport function resolveModelByName(\n models: readonly ModelDefinition[],\n name: string,\n): ModelDefinition | undefined {\n return models.find(model => model.id === name || model.model === name || model.aliases.includes(name))\n}\n"],"mappings":";;;;;;;AAwDA,SAAgB,mBACd,QACA,MAC6B;AAC7B,QAAO,OAAO,MAAK,UAAS,MAAM,OAAO,QAAQ,MAAM,UAAU,QAAQ,MAAM,QAAQ,SAAS,KAAK,CAAC"}
|