vieval 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"index.mjs","names":[],"sources":["../src/dsl/task.ts"],"sourcesContent":["import type { TaskConcurrencyConfig, TaskExecutionPolicy, TaskRunContext, TaskRunOutput } from '../config'\nimport type { RunScoreKind } from '../core/runner'\n\nimport { errorMessageFrom } from '@moeru/std'\n\nimport { defineEval, defineTask } from '../config'\nimport { createSchedulerQueue } from '../core/scheduler/queue'\nimport { registerEvalDefinition } from './registry'\n\n/**\n * Runtime context provided to a task case callback.\n */\nexport interface CaseRunContext<TInput> extends TaskRunContext {\n /**\n * Case-scoped matrix payload.\n */\n matrix: TaskRunContext['task']['matrix'] & { inputs: TInput }\n /**\n * Overrides one case score family with a custom normalized value.\n *\n * Use when:\n * - one case computes a benchmark-native score that should flow into run aggregation\n *\n * Expects:\n * - `score` to stay in the `0..1` range\n */\n score: (score: number, kind?: RunScoreKind) => void\n /**\n * Emits one custom case metric into report events.\n *\n * Use when:\n * - tasks need structured benchmark metadata beyond exact/judge score families\n *\n * Expects:\n * - `name` to be a stable metric identifier\n * - `value` to be JSON-serializable\n */\n metric: (name: string, value: boolean | number | string | null) => void\n /**\n * Cooperative abort signal for the current case execution.\n */\n signal: AbortSignal\n}\n\n/**\n * Callback for one task case.\n */\nexport type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<void> | void\n\ninterface RegisteredCase<TInput> {\n concurrency?: number\n executionPolicy?: TaskExecutionPolicy\n input: TInput\n name: string\n queueKey?: object\n run: CaseRunner<TInput>\n}\n\n/**\n * Per-group options for `casesFromInputs`.\n *\n * Use when:\n * - one generated case group should run with a lower case concurrency than the task default\n * - a task should keep a broader task-level cap while one expensive case family stays bounded\n *\n * Expects:\n * - `concurrency` to be a positive integer when provided\n *\n * Returns:\n * - one partial case-group execution descriptor\n */\nexport interface CasesFromInputsOptions extends TaskExecutionPolicy {\n /**\n * Case-level concurrency cap for cases registered by one `casesFromInputs(...)` call.\n */\n concurrency?: number\n}\n\n/**\n * Per-case registration options for `caseOf`.\n */\nexport interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {\n /**\n * Optional case input payload.\n */\n input: TInput\n}\n\ninterface CaseExecutionOutcome {\n errorMessage?: string\n scoresByKind: Map<RunScoreKind, number>\n state: 'failed' | 'passed' | 'timeout'\n}\n\nfunction cloneCaseMatrix(matrix: TaskRunContext['task']['matrix']): TaskRunContext['task']['matrix'] {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction createTaskCaseReporterId(index: number, name: string): string {\n return `${index}:${encodeURIComponent(name)}`\n}\n\nfunction assertValidScore(score: number): void {\n if (!Number.isFinite(score) || score < 0 || score > 1) {\n throw new Error(`Case score must be a finite number in range 0..1, got \"${score}\".`)\n }\n}\n\nfunction assertNonNegativeInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertPositiveInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction emitCaseStart(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n autoRetry?: number\n index: number\n name: string\n retryIndex?: number\n total: number\n },\n): void {\n try {\n hooks?.onCaseStart?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitCaseEnd(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n index: number\n state: 'passed' | 'failed' | 'timeout'\n name: string\n total: number\n errorMessage?: string\n },\n): void {\n try {\n hooks?.onCaseEnd?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction createCaseTimeoutError(timeout: number): Error {\n const error = new Error(`Case timed out after ${timeout}ms.`)\n error.name = 'TimeoutError'\n return error\n}\n\nfunction normalizeExecutionPolicy(policy: TaskExecutionPolicy | undefined, label: string): TaskExecutionPolicy | undefined {\n if (policy == null) {\n return undefined\n }\n\n if (policy.autoAttempt != null) {\n assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`)\n }\n\n if (policy.autoRetry != null) {\n assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`)\n }\n\n if (policy.timeout != null) {\n assertPositiveInteger(policy.timeout, `${label} timeout`)\n }\n\n const normalized = {\n autoAttempt: policy.autoAttempt,\n autoRetry: policy.autoRetry,\n timeout: policy.timeout,\n }\n\n return Object.values(normalized).some(value => value != null)\n ? normalized\n : undefined\n}\n\nfunction resolveCaseExecutionPolicy(\n taskCase: RegisteredCase<unknown>,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Required<Pick<TaskExecutionPolicy, 'autoAttempt' | 'autoRetry'>> & Pick<TaskExecutionPolicy, 'timeout'> {\n return {\n autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,\n autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,\n timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout,\n }\n}\n\nasync function runCaseOnce(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n timeout: number | undefined,\n): Promise<CaseExecutionOutcome> {\n const customScoresByKind = new Map<RunScoreKind, number>()\n const abortController = new AbortController()\n let timeoutHandle: ReturnType<typeof setTimeout> | undefined\n let timedOut = false\n let settled = false\n\n try {\n const runPromise = Promise.resolve(taskCase.run({\n ...context,\n matrix: {\n ...cloneCaseMatrix(context.task.matrix),\n inputs: taskCase.input,\n },\n metric(name, value) {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n context.reporterHooks?.onEvent?.({\n caseId: createTaskCaseReporterId(index, taskCase.name),\n data: {\n name,\n value,\n },\n event: 'task.case.metric',\n })\n },\n score(score, kind = 'exact') {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n assertValidScore(score)\n customScoresByKind.set(kind, score)\n },\n signal: abortController.signal,\n }))\n\n if (timeout != null) {\n const timeoutPromise = new Promise<never>((_, reject) => {\n timeoutHandle = setTimeout(() => {\n timedOut = true\n abortController.abort(createCaseTimeoutError(timeout))\n reject(createCaseTimeoutError(timeout))\n }, timeout)\n })\n\n await Promise.race([runPromise, timeoutPromise])\n }\n else {\n await runPromise\n }\n\n settled = true\n return {\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n }\n catch (error) {\n settled = true\n return {\n errorMessage: errorMessageFrom(error) ?? (timedOut && timeout != null ? `Case timed out after ${timeout}ms.` : 'Unknown case failure.'),\n scoresByKind: customScoresByKind,\n state: timedOut ? 'timeout' : 'failed',\n }\n }\n finally {\n if (timeoutHandle != null) {\n clearTimeout(timeoutHandle)\n }\n }\n}\n\nasync function executeRegisteredCase(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n totalCases: number,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Promise<CaseExecutionOutcome> {\n const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy)\n let lastOutcome: CaseExecutionOutcome | undefined\n\n for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {\n emitCaseStart(context.reporterHooks, {\n ...(resolvedPolicy.autoRetry > 0\n ? {\n autoRetry: resolvedPolicy.autoRetry,\n retryIndex,\n }\n : {}),\n index,\n name: taskCase.name,\n total: totalCases,\n })\n lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout)\n if (lastOutcome.state === 'passed') {\n return lastOutcome\n }\n }\n\n return lastOutcome ?? {\n errorMessage: 'Unknown case failure.',\n scoresByKind: new Map(),\n state: 'failed',\n }\n}\n\nfunction collectCaseOutcomeScores(\n outcome: CaseExecutionOutcome,\n scoreBucketsByKind: Record<RunScoreKind, number[]>,\n): void {\n if (outcome.state !== 'passed') {\n scoreBucketsByKind.exact.push(0)\n return\n }\n\n if (outcome.scoresByKind.size === 0) {\n scoreBucketsByKind.exact.push(1)\n return\n }\n\n scoreBucketsByKind.exact.push(outcome.scoresByKind.get('exact') ?? 1)\n const judgeScore = outcome.scoresByKind.get('judge')\n if (judgeScore != null) {\n scoreBucketsByKind.judge.push(judgeScore)\n }\n}\n\n/**\n * Builder callbacks passed into `describeTask`.\n */\nexport interface DescribeTaskBuilder {\n /**\n * Registers one explicit case.\n */\n caseOf: {\n (name: string, run: CaseRunner<undefined>): void\n <TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n }\n /**\n * Registers multiple cases from input list.\n */\n casesFromInputs: <TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n ) => void\n}\n\n/**\n * Options for `describeTask`.\n */\nexport interface DescribeTaskOptions extends TaskExecutionPolicy {\n /**\n * Optional description override.\n */\n description?: string\n /**\n * Optional task-local concurrency overrides.\n *\n * Use when:\n * - one task should cap attempt fan-out independently from the surrounding project\n * - one task should cap case fan-out without changing global scheduling defaults\n *\n * Expects:\n * - each provided value to be a positive integer\n *\n * @default inherited from project or CLI concurrency settings\n */\n concurrency?: TaskConcurrencyConfig\n}\n\nfunction createCaseBuilder(registeredCases: RegisteredCase<unknown>[]): DescribeTaskBuilder {\n function registerCase(name: string, run: CaseRunner<undefined>): void\n function registerCase<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n function registerCase<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n ): void {\n registeredCases.push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n }\n\n return {\n caseOf: registerCase,\n casesFromInputs(namePrefix, inputs, run, options) {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n registeredCases.push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n },\n }\n}\n\nlet activeCasesStack: RegisteredCase<unknown>[][] = []\n\nfunction withActiveCases<T>(cases: RegisteredCase<unknown>[], callback: () => T): T {\n activeCasesStack = [...activeCasesStack, cases]\n\n try {\n return callback()\n }\n finally {\n activeCasesStack = activeCasesStack.slice(0, -1)\n }\n}\n\nfunction getActiveCases(): RegisteredCase<unknown>[] {\n const active = activeCasesStack.at(-1)\n if (active == null) {\n throw new Error('caseOf/casesFromInputs must be called inside describeTask/describeEval.')\n }\n\n return active\n}\n\n/**\n * Registers one case in the currently active task scope.\n */\nexport function caseOf(\n name: string,\n run: CaseRunner<undefined>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput>,\n options: CaseRegistrationOptions<TInput>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n): void {\n getActiveCases().push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n}\n\n/**\n * Registers multiple cases in the currently active task scope.\n */\nexport function casesFromInputs<TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n): void {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n getActiveCases().push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n}\n\n/**\n * Resolves the effective case concurrency for one registered task case.\n *\n * Before:\n * - registered case override `2`, task default `4`\n * - registered case override `undefined`, task default `3`\n *\n * After:\n * - `2`\n * - `3`\n */\nfunction resolveCaseConcurrency(\n taskCase: RegisteredCase<unknown>,\n taskConcurrency: TaskConcurrencyConfig | undefined,\n runtimeConcurrency: TaskConcurrencyConfig | undefined,\n): number | undefined {\n const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case\n if (concurrency == null) {\n return undefined\n }\n\n if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) {\n throw new Error(`Invalid task case concurrency: ${String(concurrency)}`)\n }\n\n return concurrency\n}\n\nfunction resolveCaseQueueKey(taskCase: RegisteredCase<unknown>, defaultQueueKey: object): object {\n return taskCase.queueKey ?? defaultQueueKey\n}\n\n/**\n * Defines one eval task with task/case semantics similar to Vitest.\n *\n * Use when:\n * - task behavior should be declared with `caseOf` and `casesFromInputs`\n * - business agent code should be imported and run from eval task files\n */\nexport function describeTask(\n name: string,\n build: ((builder: DescribeTaskBuilder) => void) | (() => void),\n options: DescribeTaskOptions = {},\n) {\n const registeredCases: RegisteredCase<unknown>[] = []\n const builder = createCaseBuilder(registeredCases)\n withActiveCases(registeredCases, () => {\n if (build.length > 0) {\n (build as (builder: DescribeTaskBuilder) => void)(builder)\n return\n }\n\n ;(build as () => void)()\n })\n\n const description = options.description ?? name\n const taskExecutionPolicy = normalizeExecutionPolicy(options, 'describeTask')\n\n const definition = defineEval({\n description,\n name,\n task: defineTask({\n concurrency: options.concurrency,\n executionPolicy: taskExecutionPolicy,\n id: name,\n async run(context): Promise<TaskRunOutput> {\n if (registeredCases.length === 0) {\n return {\n scores: [{ kind: 'exact', score: 1 }],\n }\n }\n\n const totalCases = registeredCases.length\n const scoreBucketsByKind: Record<RunScoreKind, number[]> = {\n exact: [],\n judge: [],\n }\n const defaultCaseQueueKey = {}\n const caseQueues = new Map<object, ReturnType<typeof createSchedulerQueue>>()\n const hasAutoAttempt = registeredCases.some(taskCase => resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt > 0)\n const runtimeTaskConcurrency = context.task.entry.task?.concurrency ?? options.concurrency\n\n if (!hasAutoAttempt) {\n await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => {\n const outcome = await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n }\n\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n await executeCase()\n return\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n await queue.run(executeCase)\n }),\n )\n }\n else {\n let finalOutcomes: CaseExecutionOutcome[] = []\n let attemptIndex = 0\n\n for (;;) {\n finalOutcomes = await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n return await executeCase()\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n return await queue.run(executeCase)\n }),\n )\n\n const shouldContinue = finalOutcomes.some((outcome, index) => {\n if (outcome.state === 'passed') {\n return false\n }\n\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return false\n }\n\n return attemptIndex < resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt\n })\n\n if (!shouldContinue) {\n break\n }\n\n attemptIndex += 1\n }\n\n finalOutcomes.forEach((outcome, index) => {\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return\n }\n\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n })\n }\n\n const scores = (Object.keys(scoreBucketsByKind) as RunScoreKind[])\n .filter(kind => scoreBucketsByKind[kind].length > 0)\n .map((kind) => {\n const values = scoreBucketsByKind[kind]\n const total = values.reduce((sum, value) => sum + value, 0)\n return {\n kind,\n score: total / values.length,\n }\n })\n\n return {\n scores,\n }\n },\n }),\n })\n\n registerEvalDefinition(definition)\n\n return definition\n}\n\n/**\n * Alias of `describeTask` for eval-centric naming.\n */\nexport const describeEval = describeTask\n"],"mappings":";;;;;;;AA8FA,SAAS,gBAAgB,QAA4E;AACnG,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,yBAAyB,OAAe,MAAsB;AACrE,QAAO,GAAG,MAAM,GAAG,mBAAmB,KAAK;;AAG7C,SAAS,iBAAiB,OAAqB;AAC7C,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,KAAK,QAAQ,EAClD,OAAM,IAAI,MAAM,0DAA0D,MAAM,IAAI;;AAIxF,SAAS,yBAAyB,OAAe,OAAqB;AACpE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,sBAAsB,OAAe,OAAqB;AACjE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,SAAS,EAClE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,cACP,OACA,SAOM;AACN,KAAI;AACF,SAAO,cAAc,QAAQ;SAEzB;;AAKR,SAAS,YACP,OACA,SAOM;AACN,KAAI;AACF,SAAO,YAAY,QAAQ;SAEvB;;AAKR,SAAS,uBAAuB,SAAwB;CACtD,MAAM,wBAAQ,IAAI,MAAM,wBAAwB,QAAQ,KAAK;AAC7D,OAAM,OAAO;AACb,QAAO;;AAGT,SAAS,yBAAyB,QAAyC,OAAgD;AACzH,KAAI,UAAU,KACZ;AAGF,KAAI,OAAO,eAAe,KACxB,0BAAyB,OAAO,aAAa,GAAG,MAAM,cAAc;AAGtE,KAAI,OAAO,aAAa,KACtB,0BAAyB,OAAO,WAAW,GAAG,MAAM,YAAY;AAGlE,KAAI,OAAO,WAAW,KACpB,uBAAsB,OAAO,SAAS,GAAG,MAAM,UAAU;CAG3D,MAAM,aAAa;EACjB,aAAa,OAAO;EACpB,WAAW,OAAO;EAClB,SAAS,OAAO;EACjB;AAED,QAAO,OAAO,OAAO,WAAW,CAAC,MAAK,UAAS,SAAS,KAAK,GACzD,aACA,KAAA;;AAGN,SAAS,2BACP,UACA,qBACyG;AACzG,QAAO;EACL,aAAa,SAAS,iBAAiB,eAAe,qBAAqB,eAAe;EAC1F,WAAW,SAAS,iBAAiB,aAAa,qBAAqB,aAAa;EACpF,SAAS,SAAS,iBAAiB,WAAW,qBAAqB;EACpE;;AAGH,eAAe,YACb,SACA,UACA,OACA,SAC+B;CAC/B,MAAM,qCAAqB,IAAI,KAA2B;CAC1D,MAAM,kBAAkB,IAAI,iBAAiB;CAC7C,IAAI;CACJ,IAAI,WAAW;CACf,IAAI,UAAU;AAEd,KAAI;EACF,MAAM,aAAa,QAAQ,QAAQ,SAAS,IAAI;GAC9C,GAAG;GACH,QAAQ;IACN,GAAG,gBAAgB,QAAQ,KAAK,OAAO;IACvC,QAAQ,SAAS;IAClB;GACD,OAAO,MAAM,OAAO;AAClB,QAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,YAAQ,eAAe,UAAU;KAC/B,QAAQ,yBAAyB,OAAO,SAAS,KAAK;KACtD,MAAM;MACJ;MACA;MACD;KACD,OAAO;KACR,CAAC;;GAEJ,MAAM,OAAO,OAAO,SAAS;AAC3B,QAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,qBAAiB,MAAM;AACvB,uBAAmB,IAAI,MAAM,MAAM;;GAErC,QAAQ,gBAAgB;GACzB,CAAC,CAAC;AAEH,MAAI,WAAW,MAAM;GACnB,MAAM,iBAAiB,IAAI,SAAgB,GAAG,WAAW;AACvD,oBAAgB,iBAAiB;AAC/B,gBAAW;AACX,qBAAgB,MAAM,uBAAuB,QAAQ,CAAC;AACtD,YAAO,uBAAuB,QAAQ,CAAC;OACtC,QAAQ;KACX;AAEF,SAAM,QAAQ,KAAK,CAAC,YAAY,eAAe,CAAC;QAGhD,OAAM;AAGR,YAAU;AACV,SAAO;GACL,cAAc;GACd,OAAO;GACR;UAEI,OAAO;AACZ,YAAU;AACV,SAAO;GACL,cAAc,iBAAiB,MAAM,KAAK,YAAY,WAAW,OAAO,wBAAwB,QAAQ,OAAO;GAC/G,cAAc;GACd,OAAO,WAAW,YAAY;GAC/B;WAEK;AACN,MAAI,iBAAiB,KACnB,cAAa,cAAc;;;AAKjC,eAAe,sBACb,SACA,UACA,OACA,YACA,qBAC+B;CAC/B,MAAM,iBAAiB,2BAA2B,UAAU,oBAAoB;CAChF,IAAI;AAEJ,MAAK,IAAI,aAAa,GAAG,cAAc,eAAe,WAAW,cAAc,GAAG;AAChF,gBAAc,QAAQ,eAAe;GACnC,GAAI,eAAe,YAAY,IAC3B;IACE,WAAW,eAAe;IAC1B;IACD,GACD,EAAE;GACN;GACA,MAAM,SAAS;GACf,OAAO;GACR,CAAC;AACF,gBAAc,MAAM,YAAY,SAAS,UAAU,OAAO,eAAe,QAAQ;AACjF,MAAI,YAAY,UAAU,SACxB,QAAO;;AAIX,QAAO,eAAe;EACpB,cAAc;EACd,8BAAc,IAAI,KAAK;EACvB,OAAO;EACR;;AAGH,SAAS,yBACP,SACA,oBACM;AACN,KAAI,QAAQ,UAAU,UAAU;AAC9B,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,KAAI,QAAQ,aAAa,SAAS,GAAG;AACnC,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,oBAAmB,MAAM,KAAK,QAAQ,aAAa,IAAI,QAAQ,IAAI,EAAE;CACrE,MAAM,aAAa,QAAQ,aAAa,IAAI,QAAQ;AACpD,KAAI,cAAc,KAChB,oBAAmB,MAAM,KAAK,WAAW;;AAiD7C,SAAS,kBAAkB,iBAAiE;CAG1F,SAAS,aACP,MACA,KACA,SACM;AACN,kBAAgB,KAAK;GACnB,iBAAiB,yBAAyB,SAAS,YAAY;GAC/D,OAAO,SAAS;GAChB;GACK;GACN,CAAC;;AAGJ,QAAO;EACL,QAAQ;EACR,gBAAgB,YAAY,QAAQ,KAAK,SAAS;GAChD,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,UAAO,SAAS,OAAO,UAAU;AAC/B,oBAAgB,KAAK;KACnB,aAAa,SAAS;KACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;KACrE;KACA,MAAM,GAAG,WAAW,IAAI,QAAQ;KAChC;KACK;KACN,CAAC;KACF;;EAEL;;AAGH,IAAI,mBAAgD,EAAE;AAEtD,SAAS,gBAAmB,OAAkC,UAAsB;AAClF,oBAAmB,CAAC,GAAG,kBAAkB,MAAM;AAE/C,KAAI;AACF,SAAO,UAAU;WAEX;AACN,qBAAmB,iBAAiB,MAAM,GAAG,GAAG;;;AAIpD,SAAS,iBAA4C;CACnD,MAAM,SAAS,iBAAiB,GAAG,GAAG;AACtC,KAAI,UAAU,KACZ,OAAM,IAAI,MAAM,0EAA0E;AAG5F,QAAO;;AAiBT,SAAgB,OACd,MACA,KACA,SACM;AACN,iBAAgB,CAAC,KAAK;EACpB,iBAAiB,yBAAyB,SAAS,YAAY;EAC/D,OAAO,SAAS;EAChB;EACK;EACN,CAAC;;;;;AAMJ,SAAgB,gBACd,YACA,QACA,KACA,SACM;CACN,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,QAAO,SAAS,OAAO,UAAU;AAC/B,kBAAgB,CAAC,KAAK;GACpB,aAAa,SAAS;GACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;GACrE;GACA,MAAM,GAAG,WAAW,IAAI,QAAQ;GAChC;GACK;GACN,CAAC;GACF;;;;;;;;;;;;;AAcJ,SAAS,uBACP,UACA,iBACA,oBACoB;CACpB,MAAM,cAAc,oBAAoB,QAAQ,SAAS,eAAe,iBAAiB;AACzF,KAAI,eAAe,KACjB;AAGF,KAAI,CAAC,OAAO,SAAS,YAAY,IAAI,CAAC,OAAO,UAAU,YAAY,IAAI,eAAe,EACpF,OAAM,IAAI,MAAM,kCAAkC,OAAO,YAAY,GAAG;AAG1E,QAAO;;AAGT,SAAS,oBAAoB,UAAmC,iBAAiC;AAC/F,QAAO,SAAS,YAAY;;;;;;;;;AAU9B,SAAgB,aACd,MACA,OACA,UAA+B,EAAE,EACjC;CACA,MAAM,kBAA6C,EAAE;CACrD,MAAM,UAAU,kBAAkB,gBAAgB;AAClD,iBAAgB,uBAAuB;AACrC,MAAI,MAAM,SAAS,GAAG;AACnB,SAAiD,QAAQ;AAC1D;;AAGA,SAAsB;GACxB;CAEF,MAAM,cAAc,QAAQ,eAAe;CAC3C,MAAM,sBAAsB,yBAAyB,SAAS,eAAe;CAE7E,MAAM,aAAa,WAAW;EAC5B;EACA;EACA,MAAM,WAAW;GACf,aAAa,QAAQ;GACrB,iBAAiB;GACjB,IAAI;GACJ,MAAM,IAAI,SAAiC;AACzC,QAAI,gBAAgB,WAAW,EAC7B,QAAO,EACL,QAAQ,CAAC;KAAE,MAAM;KAAS,OAAO;KAAG,CAAC,EACtC;IAGH,MAAM,aAAa,gBAAgB;IACnC,MAAM,qBAAqD;KACzD,OAAO,EAAE;KACT,OAAO,EAAE;KACV;IACD,MAAM,sBAAsB,EAAE;IAC9B,MAAM,6BAAa,IAAI,KAAsD;IAC7E,MAAM,iBAAiB,gBAAgB,MAAK,aAAY,2BAA2B,UAAU,oBAAoB,CAAC,cAAc,EAAE;IAClI,MAAM,yBAAyB,QAAQ,KAAK,MAAM,MAAM,eAAe,QAAQ;AAE/E,QAAI,CAAC,eACH,OAAM,QAAQ,IACZ,gBAAgB,IAAI,OAAO,UAAU,UAAU;KAC7C,MAAM,cAAc,YAAY;MAC9B,MAAM,UAAU,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;AACtG,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;;KAGvD,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,SAAI,eAAe,MAAM;AACvB,YAAM,aAAa;AACnB;;KAGF,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;KACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,gBAAW,IAAI,UAAU,MAAM;AAC/B,WAAM,MAAM,IAAI,YAAY;MAC5B,CACH;SAEE;KACH,IAAI,gBAAwC,EAAE;KAC9C,IAAI,eAAe;AAEnB,cAAS;AACP,sBAAgB,MAAM,QAAQ,IAC5B,gBAAgB,IAAI,OAAO,UAAU,UAAU;OAC7C,MAAM,cAAc,YAAY,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;OACtH,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,WAAI,eAAe,KACjB,QAAO,MAAM,aAAa;OAG5B,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;OACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,kBAAW,IAAI,UAAU,MAAM;AAC/B,cAAO,MAAM,MAAM,IAAI,YAAY;QACnC,CACH;AAeD,UAAI,CAbmB,cAAc,MAAM,SAAS,UAAU;AAC5D,WAAI,QAAQ,UAAU,SACpB,QAAO;OAGT,MAAM,WAAW,gBAAgB;AACjC,WAAI,YAAY,KACd,QAAO;AAGT,cAAO,eAAe,2BAA2B,UAAU,oBAAoB,CAAC;QAChF,CAGA;AAGF,sBAAgB;;AAGlB,mBAAc,SAAS,SAAS,UAAU;MACxC,MAAM,WAAW,gBAAgB;AACjC,UAAI,YAAY,KACd;AAGF,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;OACrD;;AAcJ,WAAO,EACL,QAZc,OAAO,KAAK,mBAAmB,CAC5C,QAAO,SAAQ,mBAAmB,MAAM,SAAS,EAAE,CACnD,KAAK,SAAS;KACb,MAAM,SAAS,mBAAmB;AAElC,YAAO;MACL;MACA,OAHY,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAG1C,OAAO;MACvB;MACD,EAIH;;GAEJ,CAAC;EACH,CAAC;AAEF,wBAAuB,WAAW;AAElC,QAAO;;;;;AAMT,MAAa,eAAe"}
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../src/dsl/task.ts"],"sourcesContent":["import type { TaskConcurrencyConfig, TaskExecutionPolicy, TaskReporterEventPayload, TaskRunContext, TaskRunOutput } from '../config'\nimport type { RunScoreKind } from '../core/runner'\nimport type { TelemetryAttributeValue } from '../core/telemetry'\n\nimport { errorMessageFrom, sleep } from '@moeru/std'\n\nimport { defineEval, defineTask } from '../config'\nimport { createSchedulerQueue } from '../core/scheduler/queue'\nimport { createNoopTelemetryRuntime } from '../core/telemetry'\nimport { registerEvalDefinition } from './registry'\n\n/**\n * Runtime context provided to a task case callback.\n */\nexport interface CaseRunContext<TInput> extends TaskRunContext {\n /**\n * Case-scoped matrix payload.\n */\n matrix: TaskRunContext['task']['matrix'] & { inputs: TInput }\n /**\n * Overrides one case score family with a custom normalized value.\n *\n * Use when:\n * - one case computes a benchmark-native score that should flow into run aggregation\n *\n * Expects:\n * - `score` to stay in the `0..1` range\n */\n score: (score: number, kind?: RunScoreKind) => void\n /**\n * Emits one custom case metric into report events.\n *\n * Use when:\n * - tasks need structured benchmark metadata beyond exact/judge score families\n *\n * Expects:\n * - `name` to be a stable metric identifier\n * - `value` to be JSON-serializable\n */\n metric: (name: string, value: TelemetryAttributeValue) => void\n /**\n * Cooperative abort signal for the current case execution.\n */\n signal: AbortSignal\n}\n\n/**\n * Callback for one task case.\n */\nexport type CaseRunner<TInput> = (context: CaseRunContext<TInput>) => Promise<unknown> | unknown\n\ninterface RegisteredCase<TInput> {\n concurrency?: number\n executionPolicy?: TaskExecutionPolicy\n input: TInput\n name: string\n queueKey?: object\n run: CaseRunner<TInput>\n}\n\n/**\n * Per-group options for `casesFromInputs`.\n *\n * Use when:\n * - one generated case group should run with a lower case concurrency than the task default\n * - a task should keep a broader task-level cap while one expensive case family stays bounded\n *\n * Expects:\n * - `concurrency` to be a positive integer when provided\n *\n * Returns:\n * - one partial case-group execution descriptor\n */\nexport interface CasesFromInputsOptions extends TaskExecutionPolicy {\n /**\n * Case-level concurrency cap for cases registered by one `casesFromInputs(...)` call.\n */\n concurrency?: number\n}\n\n/**\n * Per-case registration options for `caseOf`.\n */\nexport interface CaseRegistrationOptions<TInput> extends TaskExecutionPolicy {\n /**\n * Optional case input payload.\n */\n input: TInput\n}\n\ninterface CaseExecutionOutcome {\n errorMessage?: string\n output?: unknown\n scoresByKind: Map<RunScoreKind, number>\n state: 'failed' | 'passed' | 'timeout'\n}\n\nfunction cloneCaseMatrix(matrix: TaskRunContext['task']['matrix']): TaskRunContext['task']['matrix'] {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction createTaskCaseReporterId(index: number, name: string): string {\n return `${index}:${encodeURIComponent(name)}`\n}\n\nfunction isTelemetryAttributeScalar(value: unknown): value is boolean | number | string {\n return typeof value === 'boolean' || typeof value === 'number' || typeof value === 'string'\n}\n\nfunction isTelemetryAttributeArray(value: readonly TelemetryAttributeValue[]): value is readonly boolean[] | readonly number[] | readonly string[] {\n return value.every(isTelemetryAttributeScalar)\n}\n\nfunction canAttachMetricAsAttribute(value: TelemetryAttributeValue): value is boolean | number | string | readonly boolean[] | readonly number[] | readonly string[] {\n if (isTelemetryAttributeScalar(value)) {\n return true\n }\n\n return Array.isArray(value) && isTelemetryAttributeArray(value)\n}\n\nfunction assertValidScore(score: number): void {\n if (!Number.isFinite(score) || score < 0 || score > 1) {\n throw new Error(`Case score must be a finite number in range 0..1, got \"${score}\".`)\n }\n}\n\nfunction assertNonNegativeInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertNonNegativeNumber(value: number, label: string): void {\n if (!Number.isFinite(value) || value < 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction assertPositiveInteger(value: number, label: string): void {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value <= 0) {\n throw new Error(`Invalid ${label}: ${String(value)}`)\n }\n}\n\nfunction autoRetryDelayMs(retryIndex: number): number {\n // Retry index 1 is the first retry after the initial case failure.\n return 500 * 2 ** (retryIndex - 1)\n}\n\nfunction resolveAutoRetryDelay(policy: TaskExecutionPolicy, retryIndex: number): number {\n const delay = policy.autoRetryDelay\n\n if (delay == null) {\n return autoRetryDelayMs(retryIndex)\n }\n\n return typeof delay === 'number' ? delay : delay(retryIndex)\n}\n\nfunction emitCaseStart(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n autoRetry?: number\n index: number\n input?: unknown\n name: string\n retryIndex?: number\n total: number\n },\n): void {\n try {\n hooks?.onCaseStart?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitCaseEnd(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: {\n index: number\n output?: unknown\n state: 'passed' | 'failed' | 'timeout'\n name: string\n total: number\n errorMessage?: string\n },\n): void {\n try {\n hooks?.onCaseEnd?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction emitReporterEvent(\n hooks: TaskRunContext['reporterHooks'] | undefined,\n payload: TaskReporterEventPayload,\n): void {\n try {\n hooks?.onEvent?.(payload)\n }\n catch {\n // Reporter hooks must never affect task scoring.\n }\n}\n\nfunction createCaseTimeoutError(timeout: number): Error {\n const error = new Error(`Case timed out after ${timeout}ms.`)\n error.name = 'TimeoutError'\n return error\n}\n\nfunction normalizeExecutionPolicy(policy: TaskExecutionPolicy | undefined, label: string): TaskExecutionPolicy | undefined {\n if (policy == null) {\n return undefined\n }\n\n if (policy.autoAttempt != null) {\n assertNonNegativeInteger(policy.autoAttempt, `${label} autoAttempt`)\n }\n\n if (policy.autoRetry != null) {\n assertNonNegativeInteger(policy.autoRetry, `${label} autoRetry`)\n }\n\n if (typeof policy.autoRetryDelay === 'number') {\n assertNonNegativeNumber(policy.autoRetryDelay, `${label} autoRetryDelay`)\n }\n\n if (policy.timeout != null) {\n assertPositiveInteger(policy.timeout, `${label} timeout`)\n }\n\n const normalized = {\n autoAttempt: policy.autoAttempt,\n autoRetry: policy.autoRetry,\n autoRetryDelay: policy.autoRetryDelay,\n timeout: policy.timeout,\n }\n\n return Object.values(normalized).some(value => value != null)\n ? normalized\n : undefined\n}\n\nfunction resolveCaseExecutionPolicy(\n taskCase: RegisteredCase<unknown>,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Required<Pick<TaskExecutionPolicy, 'autoAttempt' | 'autoRetry'>> & Pick<TaskExecutionPolicy, 'autoRetryDelay' | 'timeout'> {\n return {\n autoAttempt: taskCase.executionPolicy?.autoAttempt ?? taskExecutionPolicy?.autoAttempt ?? 0,\n autoRetry: taskCase.executionPolicy?.autoRetry ?? taskExecutionPolicy?.autoRetry ?? 0,\n autoRetryDelay: taskCase.executionPolicy?.autoRetryDelay ?? taskExecutionPolicy?.autoRetryDelay,\n timeout: taskCase.executionPolicy?.timeout ?? taskExecutionPolicy?.timeout,\n }\n}\n\nasync function runCaseOnce(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n timeout: number | undefined,\n): Promise<CaseExecutionOutcome> {\n const customScoresByKind = new Map<RunScoreKind, number>()\n const abortController = new AbortController()\n const telemetry = context.telemetry ?? createNoopTelemetryRuntime()\n const caseId = createTaskCaseReporterId(index, taskCase.name)\n let timeoutHandle: ReturnType<typeof setTimeout> | undefined\n let timedOut = false\n let settled = false\n\n try {\n return await telemetry.withSpan('vieval.case', {\n 'vieval.case.id': caseId,\n 'vieval.case.name': taskCase.name,\n 'vieval.task.id': context.task.id,\n 'vieval.task.name': context.task.entry.name,\n }, async () => {\n const runPromise = Promise.resolve(taskCase.run({\n ...context,\n matrix: {\n ...cloneCaseMatrix(context.task.matrix),\n inputs: taskCase.input,\n },\n metric(name, value) {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n emitReporterEvent(context.reporterHooks, {\n caseId,\n data: {\n name,\n value,\n },\n event: 'task.case.metric',\n })\n telemetry.addEvent('vieval.case.metric', { name, value })\n if (canAttachMetricAsAttribute(value)) {\n telemetry.setAttributes({ [name]: value })\n }\n },\n score(score, kind = 'exact') {\n if (abortController.signal.aborted || settled) {\n return\n }\n\n assertValidScore(score)\n customScoresByKind.set(kind, score)\n telemetry.addEvent('vieval.case.score', {\n 'vieval.score.kind': kind,\n 'vieval.score.value': score,\n })\n emitReporterEvent(context.reporterHooks, {\n caseId,\n data: { kind, score },\n event: 'task.case.score',\n })\n },\n signal: abortController.signal,\n }))\n\n if (timeout != null) {\n const timeoutPromise = new Promise<never>((_, reject) => {\n timeoutHandle = setTimeout(() => {\n timedOut = true\n abortController.abort(createCaseTimeoutError(timeout))\n reject(createCaseTimeoutError(timeout))\n }, timeout)\n })\n\n const output = await Promise.race([runPromise, timeoutPromise])\n settled = true\n return {\n output,\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n }\n\n const output = await runPromise\n settled = true\n return {\n output,\n scoresByKind: customScoresByKind,\n state: 'passed',\n }\n })\n }\n catch (error) {\n settled = true\n return {\n errorMessage: errorMessageFrom(error) ?? (timedOut && timeout != null ? `Case timed out after ${timeout}ms.` : 'Unknown case failure.'),\n scoresByKind: customScoresByKind,\n state: timedOut ? 'timeout' : 'failed',\n }\n }\n finally {\n if (timeoutHandle != null) {\n clearTimeout(timeoutHandle)\n }\n }\n}\n\nasync function executeRegisteredCase(\n context: TaskRunContext,\n taskCase: RegisteredCase<unknown>,\n index: number,\n totalCases: number,\n taskExecutionPolicy: TaskExecutionPolicy | undefined,\n): Promise<CaseExecutionOutcome> {\n const resolvedPolicy = resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy)\n let lastOutcome: CaseExecutionOutcome | undefined\n\n for (let retryIndex = 0; retryIndex <= resolvedPolicy.autoRetry; retryIndex += 1) {\n if (retryIndex > 0) {\n const retryDelayMs = resolveAutoRetryDelay(resolvedPolicy, retryIndex)\n assertNonNegativeNumber(retryDelayMs, 'autoRetryDelay result')\n\n if (retryDelayMs > 0) {\n await sleep(retryDelayMs)\n }\n }\n\n emitCaseStart(context.reporterHooks, {\n ...(resolvedPolicy.autoRetry > 0\n ? {\n autoRetry: resolvedPolicy.autoRetry,\n retryIndex,\n }\n : {}),\n index,\n ...(taskCase.input === undefined ? {} : { input: taskCase.input }),\n name: taskCase.name,\n total: totalCases,\n })\n lastOutcome = await runCaseOnce(context, taskCase, index, resolvedPolicy.timeout)\n if (lastOutcome.state === 'passed') {\n return lastOutcome\n }\n }\n\n return lastOutcome ?? {\n errorMessage: 'Unknown case failure.',\n scoresByKind: new Map(),\n state: 'failed',\n }\n}\n\nfunction collectCaseOutcomeScores(\n outcome: CaseExecutionOutcome,\n scoreBucketsByKind: Record<RunScoreKind, number[]>,\n): void {\n if (outcome.state !== 'passed') {\n scoreBucketsByKind.exact.push(0)\n return\n }\n\n if (outcome.scoresByKind.size === 0) {\n scoreBucketsByKind.exact.push(1)\n return\n }\n\n scoreBucketsByKind.exact.push(outcome.scoresByKind.get('exact') ?? 1)\n const judgeScore = outcome.scoresByKind.get('judge')\n if (judgeScore != null) {\n scoreBucketsByKind.judge.push(judgeScore)\n }\n}\n\n/**\n * Builder callbacks passed into `describeTask`.\n */\nexport interface DescribeTaskBuilder {\n /**\n * Registers one explicit case.\n */\n caseOf: {\n (name: string, run: CaseRunner<undefined>): void\n <TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n }\n /**\n * Registers multiple cases from input list.\n */\n casesFromInputs: <TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n ) => void\n}\n\n/**\n * Options for `describeTask`.\n */\nexport interface DescribeTaskOptions extends TaskExecutionPolicy {\n /**\n * Optional description override.\n */\n description?: string\n /**\n * Optional task-local concurrency overrides.\n *\n * Use when:\n * - one task should cap attempt fan-out independently from the surrounding project\n * - one task should cap case fan-out without changing global scheduling defaults\n *\n * Expects:\n * - each provided value to be a positive integer\n *\n * @default inherited from project or CLI concurrency settings\n */\n concurrency?: TaskConcurrencyConfig\n}\n\nfunction createCaseBuilder(registeredCases: RegisteredCase<unknown>[]): DescribeTaskBuilder {\n function registerCase(name: string, run: CaseRunner<undefined>): void\n function registerCase<TInput>(name: string, run: CaseRunner<TInput>, options: CaseRegistrationOptions<TInput>): void\n function registerCase<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n ): void {\n registeredCases.push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n }\n\n return {\n caseOf: registerCase,\n casesFromInputs(namePrefix, inputs, run, options) {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n registeredCases.push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n },\n }\n}\n\nlet activeCasesStack: RegisteredCase<unknown>[][] = []\n\nfunction withActiveCases<T>(cases: RegisteredCase<unknown>[], callback: () => T): T {\n activeCasesStack = [...activeCasesStack, cases]\n\n try {\n return callback()\n }\n finally {\n activeCasesStack = activeCasesStack.slice(0, -1)\n }\n}\n\nfunction getActiveCases(): RegisteredCase<unknown>[] {\n const active = activeCasesStack.at(-1)\n if (active == null) {\n throw new Error('caseOf/casesFromInputs must be called inside describeTask/describeEval.')\n }\n\n return active\n}\n\n/**\n * Registers one case in the currently active task scope.\n */\nexport function caseOf(\n name: string,\n run: CaseRunner<undefined>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput>,\n options: CaseRegistrationOptions<TInput>,\n): void\n\nexport function caseOf<TInput>(\n name: string,\n run: CaseRunner<TInput> | CaseRunner<undefined>,\n options?: CaseRegistrationOptions<TInput>,\n): void {\n getActiveCases().push({\n executionPolicy: normalizeExecutionPolicy(options, 'task case'),\n input: options?.input,\n name,\n run: run as CaseRunner<unknown>,\n })\n}\n\n/**\n * Registers multiple cases in the currently active task scope.\n */\nexport function casesFromInputs<TInput>(\n namePrefix: string,\n inputs: readonly TInput[],\n run: CaseRunner<TInput>,\n options?: CasesFromInputsOptions,\n): void {\n const queueKey = options?.concurrency == null ? undefined : {}\n\n inputs.forEach((input, index) => {\n getActiveCases().push({\n concurrency: options?.concurrency,\n executionPolicy: normalizeExecutionPolicy(options, 'casesFromInputs'),\n input,\n name: `${namePrefix} #${index + 1}`,\n queueKey,\n run: run as CaseRunner<unknown>,\n })\n })\n}\n\n/**\n * Resolves the effective case concurrency for one registered task case.\n *\n * Before:\n * - registered case override `2`, task default `4`\n * - registered case override `undefined`, task default `3`\n *\n * After:\n * - `2`\n * - `3`\n */\nfunction resolveCaseConcurrency(\n taskCase: RegisteredCase<unknown>,\n taskConcurrency: TaskConcurrencyConfig | undefined,\n runtimeConcurrency: TaskConcurrencyConfig | undefined,\n): number | undefined {\n const concurrency = runtimeConcurrency?.case ?? taskCase.concurrency ?? taskConcurrency?.case\n if (concurrency == null) {\n return undefined\n }\n\n if (!Number.isFinite(concurrency) || !Number.isInteger(concurrency) || concurrency <= 0) {\n throw new Error(`Invalid task case concurrency: ${String(concurrency)}`)\n }\n\n return concurrency\n}\n\nfunction resolveCaseQueueKey(taskCase: RegisteredCase<unknown>, defaultQueueKey: object): object {\n return taskCase.queueKey ?? defaultQueueKey\n}\n\n/**\n * Defines one eval task with task/case semantics similar to Vitest.\n *\n * Use when:\n * - task behavior should be declared with `caseOf` and `casesFromInputs`\n * - business agent code should be imported and run from eval task files\n */\nexport function describeTask(\n name: string,\n build: ((builder: DescribeTaskBuilder) => void) | (() => void),\n options: DescribeTaskOptions = {},\n) {\n const registeredCases: RegisteredCase<unknown>[] = []\n const builder = createCaseBuilder(registeredCases)\n withActiveCases(registeredCases, () => {\n if (build.length > 0) {\n (build as (builder: DescribeTaskBuilder) => void)(builder)\n return\n }\n\n ;(build as () => void)()\n })\n\n const description = options.description ?? name\n const taskExecutionPolicy = normalizeExecutionPolicy(options, 'describeTask')\n\n const definition = defineEval({\n description,\n name,\n task: defineTask({\n concurrency: options.concurrency,\n executionPolicy: taskExecutionPolicy,\n id: name,\n async run(context): Promise<TaskRunOutput> {\n if (registeredCases.length === 0) {\n return {\n scores: [{ kind: 'exact', score: 1 }],\n }\n }\n\n const totalCases = registeredCases.length\n const scoreBucketsByKind: Record<RunScoreKind, number[]> = {\n exact: [],\n judge: [],\n }\n const defaultCaseQueueKey = {}\n const caseQueues = new Map<object, ReturnType<typeof createSchedulerQueue>>()\n const hasAutoAttempt = registeredCases.some(taskCase => resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt > 0)\n const runtimeTaskConcurrency = context.task.entry.task?.concurrency ?? options.concurrency\n\n if (!hasAutoAttempt) {\n await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => {\n const outcome = await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n ...(outcome.output === undefined ? {} : { output: outcome.output }),\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n }\n\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n await executeCase()\n return\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n await queue.run(executeCase)\n }),\n )\n }\n else {\n let finalOutcomes: CaseExecutionOutcome[] = []\n let attemptIndex = 0\n\n for (;;) {\n finalOutcomes = await Promise.all(\n registeredCases.map(async (taskCase, index) => {\n const executeCase = async () => await executeRegisteredCase(context, taskCase, index, totalCases, taskExecutionPolicy)\n const concurrency = resolveCaseConcurrency(taskCase, runtimeTaskConcurrency, context.runtimeConcurrency)\n if (concurrency == null) {\n return await executeCase()\n }\n\n const queueKey = resolveCaseQueueKey(taskCase, defaultCaseQueueKey)\n const queue = caseQueues.get(queueKey) ?? createSchedulerQueue(concurrency)\n caseQueues.set(queueKey, queue)\n return await queue.run(executeCase)\n }),\n )\n\n const shouldContinue = finalOutcomes.some((outcome, index) => {\n if (outcome.state === 'passed') {\n return false\n }\n\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return false\n }\n\n return attemptIndex < resolveCaseExecutionPolicy(taskCase, taskExecutionPolicy).autoAttempt\n })\n\n if (!shouldContinue) {\n break\n }\n\n attemptIndex += 1\n }\n\n finalOutcomes.forEach((outcome, index) => {\n const taskCase = registeredCases[index]\n if (taskCase == null) {\n return\n }\n\n emitCaseEnd(context.reporterHooks, {\n ...(outcome.errorMessage == null ? {} : { errorMessage: outcome.errorMessage }),\n index,\n ...(outcome.output === undefined ? {} : { output: outcome.output }),\n state: outcome.state,\n name: taskCase.name,\n total: totalCases,\n })\n collectCaseOutcomeScores(outcome, scoreBucketsByKind)\n })\n }\n\n const scores = (Object.keys(scoreBucketsByKind) as RunScoreKind[])\n .filter(kind => scoreBucketsByKind[kind].length > 0)\n .map((kind) => {\n const values = scoreBucketsByKind[kind]\n const total = values.reduce((sum, value) => sum + value, 0)\n return {\n kind,\n score: total / values.length,\n }\n })\n\n return {\n scores,\n }\n },\n }),\n })\n\n registerEvalDefinition(definition)\n\n return definition\n}\n\n/**\n * Alias of `describeTask` for eval-centric naming.\n */\nexport const describeEval = describeTask\n"],"mappings":";;;;;;;AAiGA,SAAS,gBAAgB,QAA4E;AACnG,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,yBAAyB,OAAe,MAAsB;AACrE,QAAO,GAAG,MAAM,GAAG,mBAAmB,KAAK;;AAG7C,SAAS,2BAA2B,OAAoD;AACtF,QAAO,OAAO,UAAU,aAAa,OAAO,UAAU,YAAY,OAAO,UAAU;;AAGrF,SAAS,0BAA0B,OAAgH;AACjJ,QAAO,MAAM,MAAM,2BAA2B;;AAGhD,SAAS,2BAA2B,OAAiI;AACnK,KAAI,2BAA2B,MAAM,CACnC,QAAO;AAGT,QAAO,MAAM,QAAQ,MAAM,IAAI,0BAA0B,MAAM;;AAGjE,SAAS,iBAAiB,OAAqB;AAC7C,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,KAAK,QAAQ,EAClD,OAAM,IAAI,MAAM,0DAA0D,MAAM,IAAI;;AAIxF,SAAS,yBAAyB,OAAe,OAAqB;AACpE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,wBAAwB,OAAe,OAAqB;AACnE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,QAAQ,EACrC,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,sBAAsB,OAAe,OAAqB;AACjE,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,SAAS,EAClE,OAAM,IAAI,MAAM,WAAW,MAAM,IAAI,OAAO,MAAM,GAAG;;AAIzD,SAAS,iBAAiB,YAA4B;AAEpD,QAAO,MAAM,MAAM,aAAa;;AAGlC,SAAS,sBAAsB,QAA6B,YAA4B;CACtF,MAAM,QAAQ,OAAO;AAErB,KAAI,SAAS,KACX,QAAO,iBAAiB,WAAW;AAGrC,QAAO,OAAO,UAAU,WAAW,QAAQ,MAAM,WAAW;;AAG9D,SAAS,cACP,OACA,SAQM;AACN,KAAI;AACF,SAAO,cAAc,QAAQ;SAEzB;;AAKR,SAAS,YACP,OACA,SAQM;AACN,KAAI;AACF,SAAO,YAAY,QAAQ;SAEvB;;AAKR,SAAS,kBACP,OACA,SACM;AACN,KAAI;AACF,SAAO,UAAU,QAAQ;SAErB;;AAKR,SAAS,uBAAuB,SAAwB;CACtD,MAAM,wBAAQ,IAAI,MAAM,wBAAwB,QAAQ,KAAK;AAC7D,OAAM,OAAO;AACb,QAAO;;AAGT,SAAS,yBAAyB,QAAyC,OAAgD;AACzH,KAAI,UAAU,KACZ;AAGF,KAAI,OAAO,eAAe,KACxB,0BAAyB,OAAO,aAAa,GAAG,MAAM,cAAc;AAGtE,KAAI,OAAO,aAAa,KACtB,0BAAyB,OAAO,WAAW,GAAG,MAAM,YAAY;AAGlE,KAAI,OAAO,OAAO,mBAAmB,SACnC,yBAAwB,OAAO,gBAAgB,GAAG,MAAM,iBAAiB;AAG3E,KAAI,OAAO,WAAW,KACpB,uBAAsB,OAAO,SAAS,GAAG,MAAM,UAAU;CAG3D,MAAM,aAAa;EACjB,aAAa,OAAO;EACpB,WAAW,OAAO;EAClB,gBAAgB,OAAO;EACvB,SAAS,OAAO;EACjB;AAED,QAAO,OAAO,OAAO,WAAW,CAAC,MAAK,UAAS,SAAS,KAAK,GACzD,aACA,KAAA;;AAGN,SAAS,2BACP,UACA,qBAC4H;AAC5H,QAAO;EACL,aAAa,SAAS,iBAAiB,eAAe,qBAAqB,eAAe;EAC1F,WAAW,SAAS,iBAAiB,aAAa,qBAAqB,aAAa;EACpF,gBAAgB,SAAS,iBAAiB,kBAAkB,qBAAqB;EACjF,SAAS,SAAS,iBAAiB,WAAW,qBAAqB;EACpE;;AAGH,eAAe,YACb,SACA,UACA,OACA,SAC+B;CAC/B,MAAM,qCAAqB,IAAI,KAA2B;CAC1D,MAAM,kBAAkB,IAAI,iBAAiB;CAC7C,MAAM,YAAY,QAAQ,aAAa,4BAA4B;CACnE,MAAM,SAAS,yBAAyB,OAAO,SAAS,KAAK;CAC7D,IAAI;CACJ,IAAI,WAAW;CACf,IAAI,UAAU;AAEd,KAAI;AACF,SAAO,MAAM,UAAU,SAAS,eAAe;GAC7C,kBAAkB;GAClB,oBAAoB,SAAS;GAC7B,kBAAkB,QAAQ,KAAK;GAC/B,oBAAoB,QAAQ,KAAK,MAAM;GACxC,EAAE,YAAY;GACb,MAAM,aAAa,QAAQ,QAAQ,SAAS,IAAI;IAC9C,GAAG;IACH,QAAQ;KACN,GAAG,gBAAgB,QAAQ,KAAK,OAAO;KACvC,QAAQ,SAAS;KAClB;IACD,OAAO,MAAM,OAAO;AAClB,SAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,uBAAkB,QAAQ,eAAe;MACvC;MACA,MAAM;OACJ;OACA;OACD;MACD,OAAO;MACR,CAAC;AACF,eAAU,SAAS,sBAAsB;MAAE;MAAM;MAAO,CAAC;AACzD,SAAI,2BAA2B,MAAM,CACnC,WAAU,cAAc,GAAG,OAAO,OAAO,CAAC;;IAG9C,MAAM,OAAO,OAAO,SAAS;AAC3B,SAAI,gBAAgB,OAAO,WAAW,QACpC;AAGF,sBAAiB,MAAM;AACvB,wBAAmB,IAAI,MAAM,MAAM;AACnC,eAAU,SAAS,qBAAqB;MACtC,qBAAqB;MACrB,sBAAsB;MACvB,CAAC;AACF,uBAAkB,QAAQ,eAAe;MACvC;MACA,MAAM;OAAE;OAAM;OAAO;MACrB,OAAO;MACR,CAAC;;IAEJ,QAAQ,gBAAgB;IACzB,CAAC,CAAC;AAEH,OAAI,WAAW,MAAM;IACnB,MAAM,iBAAiB,IAAI,SAAgB,GAAG,WAAW;AACvD,qBAAgB,iBAAiB;AAC/B,iBAAW;AACX,sBAAgB,MAAM,uBAAuB,QAAQ,CAAC;AACtD,aAAO,uBAAuB,QAAQ,CAAC;QACtC,QAAQ;MACX;IAEF,MAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,YAAY,eAAe,CAAC;AAC/D,cAAU;AACV,WAAO;KACL;KACA,cAAc;KACd,OAAO;KACR;;GAGH,MAAM,SAAS,MAAM;AACrB,aAAU;AACV,UAAO;IACL;IACA,cAAc;IACd,OAAO;IACR;IACD;UAEG,OAAO;AACZ,YAAU;AACV,SAAO;GACL,cAAc,iBAAiB,MAAM,KAAK,YAAY,WAAW,OAAO,wBAAwB,QAAQ,OAAO;GAC/G,cAAc;GACd,OAAO,WAAW,YAAY;GAC/B;WAEK;AACN,MAAI,iBAAiB,KACnB,cAAa,cAAc;;;AAKjC,eAAe,sBACb,SACA,UACA,OACA,YACA,qBAC+B;CAC/B,MAAM,iBAAiB,2BAA2B,UAAU,oBAAoB;CAChF,IAAI;AAEJ,MAAK,IAAI,aAAa,GAAG,cAAc,eAAe,WAAW,cAAc,GAAG;AAChF,MAAI,aAAa,GAAG;GAClB,MAAM,eAAe,sBAAsB,gBAAgB,WAAW;AACtE,2BAAwB,cAAc,wBAAwB;AAE9D,OAAI,eAAe,EACjB,OAAM,MAAM,aAAa;;AAI7B,gBAAc,QAAQ,eAAe;GACnC,GAAI,eAAe,YAAY,IAC3B;IACE,WAAW,eAAe;IAC1B;IACD,GACD,EAAE;GACN;GACA,GAAI,SAAS,UAAU,KAAA,IAAY,EAAE,GAAG,EAAE,OAAO,SAAS,OAAO;GACjE,MAAM,SAAS;GACf,OAAO;GACR,CAAC;AACF,gBAAc,MAAM,YAAY,SAAS,UAAU,OAAO,eAAe,QAAQ;AACjF,MAAI,YAAY,UAAU,SACxB,QAAO;;AAIX,QAAO,eAAe;EACpB,cAAc;EACd,8BAAc,IAAI,KAAK;EACvB,OAAO;EACR;;AAGH,SAAS,yBACP,SACA,oBACM;AACN,KAAI,QAAQ,UAAU,UAAU;AAC9B,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,KAAI,QAAQ,aAAa,SAAS,GAAG;AACnC,qBAAmB,MAAM,KAAK,EAAE;AAChC;;AAGF,oBAAmB,MAAM,KAAK,QAAQ,aAAa,IAAI,QAAQ,IAAI,EAAE;CACrE,MAAM,aAAa,QAAQ,aAAa,IAAI,QAAQ;AACpD,KAAI,cAAc,KAChB,oBAAmB,MAAM,KAAK,WAAW;;AAiD7C,SAAS,kBAAkB,iBAAiE;CAG1F,SAAS,aACP,MACA,KACA,SACM;AACN,kBAAgB,KAAK;GACnB,iBAAiB,yBAAyB,SAAS,YAAY;GAC/D,OAAO,SAAS;GAChB;GACK;GACN,CAAC;;AAGJ,QAAO;EACL,QAAQ;EACR,gBAAgB,YAAY,QAAQ,KAAK,SAAS;GAChD,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,UAAO,SAAS,OAAO,UAAU;AAC/B,oBAAgB,KAAK;KACnB,aAAa,SAAS;KACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;KACrE;KACA,MAAM,GAAG,WAAW,IAAI,QAAQ;KAChC;KACK;KACN,CAAC;KACF;;EAEL;;AAGH,IAAI,mBAAgD,EAAE;AAEtD,SAAS,gBAAmB,OAAkC,UAAsB;AAClF,oBAAmB,CAAC,GAAG,kBAAkB,MAAM;AAE/C,KAAI;AACF,SAAO,UAAU;WAEX;AACN,qBAAmB,iBAAiB,MAAM,GAAG,GAAG;;;AAIpD,SAAS,iBAA4C;CACnD,MAAM,SAAS,iBAAiB,GAAG,GAAG;AACtC,KAAI,UAAU,KACZ,OAAM,IAAI,MAAM,0EAA0E;AAG5F,QAAO;;AAiBT,SAAgB,OACd,MACA,KACA,SACM;AACN,iBAAgB,CAAC,KAAK;EACpB,iBAAiB,yBAAyB,SAAS,YAAY;EAC/D,OAAO,SAAS;EAChB;EACK;EACN,CAAC;;;;;AAMJ,SAAgB,gBACd,YACA,QACA,KACA,SACM;CACN,MAAM,WAAW,SAAS,eAAe,OAAO,KAAA,IAAY,EAAE;AAE9D,QAAO,SAAS,OAAO,UAAU;AAC/B,kBAAgB,CAAC,KAAK;GACpB,aAAa,SAAS;GACtB,iBAAiB,yBAAyB,SAAS,kBAAkB;GACrE;GACA,MAAM,GAAG,WAAW,IAAI,QAAQ;GAChC;GACK;GACN,CAAC;GACF;;;;;;;;;;;;;AAcJ,SAAS,uBACP,UACA,iBACA,oBACoB;CACpB,MAAM,cAAc,oBAAoB,QAAQ,SAAS,eAAe,iBAAiB;AACzF,KAAI,eAAe,KACjB;AAGF,KAAI,CAAC,OAAO,SAAS,YAAY,IAAI,CAAC,OAAO,UAAU,YAAY,IAAI,eAAe,EACpF,OAAM,IAAI,MAAM,kCAAkC,OAAO,YAAY,GAAG;AAG1E,QAAO;;AAGT,SAAS,oBAAoB,UAAmC,iBAAiC;AAC/F,QAAO,SAAS,YAAY;;;;;;;;;AAU9B,SAAgB,aACd,MACA,OACA,UAA+B,EAAE,EACjC;CACA,MAAM,kBAA6C,EAAE;CACrD,MAAM,UAAU,kBAAkB,gBAAgB;AAClD,iBAAgB,uBAAuB;AACrC,MAAI,MAAM,SAAS,GAAG;AACnB,SAAiD,QAAQ;AAC1D;;AAGA,SAAsB;GACxB;CAEF,MAAM,cAAc,QAAQ,eAAe;CAC3C,MAAM,sBAAsB,yBAAyB,SAAS,eAAe;CAE7E,MAAM,aAAa,WAAW;EAC5B;EACA;EACA,MAAM,WAAW;GACf,aAAa,QAAQ;GACrB,iBAAiB;GACjB,IAAI;GACJ,MAAM,IAAI,SAAiC;AACzC,QAAI,gBAAgB,WAAW,EAC7B,QAAO,EACL,QAAQ,CAAC;KAAE,MAAM;KAAS,OAAO;KAAG,CAAC,EACtC;IAGH,MAAM,aAAa,gBAAgB;IACnC,MAAM,qBAAqD;KACzD,OAAO,EAAE;KACT,OAAO,EAAE;KACV;IACD,MAAM,sBAAsB,EAAE;IAC9B,MAAM,6BAAa,IAAI,KAAsD;IAC7E,MAAM,iBAAiB,gBAAgB,MAAK,aAAY,2BAA2B,UAAU,oBAAoB,CAAC,cAAc,EAAE;IAClI,MAAM,yBAAyB,QAAQ,KAAK,MAAM,MAAM,eAAe,QAAQ;AAE/E,QAAI,CAAC,eACH,OAAM,QAAQ,IACZ,gBAAgB,IAAI,OAAO,UAAU,UAAU;KAC7C,MAAM,cAAc,YAAY;MAC9B,MAAM,UAAU,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;AACtG,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,GAAI,QAAQ,WAAW,KAAA,IAAY,EAAE,GAAG,EAAE,QAAQ,QAAQ,QAAQ;OAClE,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;;KAGvD,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,SAAI,eAAe,MAAM;AACvB,YAAM,aAAa;AACnB;;KAGF,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;KACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,gBAAW,IAAI,UAAU,MAAM;AAC/B,WAAM,MAAM,IAAI,YAAY;MAC5B,CACH;SAEE;KACH,IAAI,gBAAwC,EAAE;KAC9C,IAAI,eAAe;AAEnB,cAAS;AACP,sBAAgB,MAAM,QAAQ,IAC5B,gBAAgB,IAAI,OAAO,UAAU,UAAU;OAC7C,MAAM,cAAc,YAAY,MAAM,sBAAsB,SAAS,UAAU,OAAO,YAAY,oBAAoB;OACtH,MAAM,cAAc,uBAAuB,UAAU,wBAAwB,QAAQ,mBAAmB;AACxG,WAAI,eAAe,KACjB,QAAO,MAAM,aAAa;OAG5B,MAAM,WAAW,oBAAoB,UAAU,oBAAoB;OACnE,MAAM,QAAQ,WAAW,IAAI,SAAS,IAAI,qBAAqB,YAAY;AAC3E,kBAAW,IAAI,UAAU,MAAM;AAC/B,cAAO,MAAM,MAAM,IAAI,YAAY;QACnC,CACH;AAeD,UAAI,CAbmB,cAAc,MAAM,SAAS,UAAU;AAC5D,WAAI,QAAQ,UAAU,SACpB,QAAO;OAGT,MAAM,WAAW,gBAAgB;AACjC,WAAI,YAAY,KACd,QAAO;AAGT,cAAO,eAAe,2BAA2B,UAAU,oBAAoB,CAAC;QAChF,CAGA;AAGF,sBAAgB;;AAGlB,mBAAc,SAAS,SAAS,UAAU;MACxC,MAAM,WAAW,gBAAgB;AACjC,UAAI,YAAY,KACd;AAGF,kBAAY,QAAQ,eAAe;OACjC,GAAI,QAAQ,gBAAgB,OAAO,EAAE,GAAG,EAAE,cAAc,QAAQ,cAAc;OAC9E;OACA,GAAI,QAAQ,WAAW,KAAA,IAAY,EAAE,GAAG,EAAE,QAAQ,QAAQ,QAAQ;OAClE,OAAO,QAAQ;OACf,MAAM,SAAS;OACf,OAAO;OACR,CAAC;AACF,+BAAyB,SAAS,mBAAmB;OACrD;;AAcJ,WAAO,EACL,QAZc,OAAO,KAAK,mBAAmB,CAC5C,QAAO,SAAQ,mBAAmB,MAAM,SAAS,EAAE,CACnD,KAAK,SAAS;KACb,MAAM,SAAS,mBAAmB;AAElC,YAAO;MACL;MACA,OAHY,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAG1C,OAAO;MACvB;MACD,EAIH;;GAEJ,CAAC;EACH,CAAC;AAEF,wBAAuB,WAAW;AAElC,QAAO;;;;;AAMT,MAAa,eAAe"}
@@ -11,4 +11,4 @@ function resolveModelByName(models, name) {
11
11
  //#endregion
12
12
  export { resolveModelByName as t };
13
13
 
14
- //# sourceMappingURL=models-DIGdOUpJ.mjs.map
14
+ //# sourceMappingURL=models-pBSRUZhY.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"models-DIGdOUpJ.mjs","names":[],"sources":["../src/config/models.ts"],"sourcesContent":["import type { TaskExecutionPolicy } from './types'\n\n/**\n * Canonical model definition consumed by vieval runtime and config.\n *\n * Use when:\n * - declaring models in `vieval.config.*`\n * - resolving task runtime models by id, alias, or concrete model name\n *\n * Expects:\n * - `id` to be stable and unique within one config\n * - `inferenceExecutorId` to match scheduler/executor identifiers\n *\n * Returns:\n * - one normalized model registration record\n */\nexport interface ModelDefinition {\n /**\n * Stable model id.\n */\n id: string\n /**\n * Inference-executor id used for matching and reporting.\n */\n inferenceExecutorId: string\n /**\n * Executor reference passed through config.\n *\n * `vieval` core treats this as opaque runtime metadata. Builder plugins can\n * narrow this field with plugin-specific executor input types.\n */\n inferenceExecutor: unknown\n /**\n * Concrete model name passed to the inference executor.\n */\n model: string\n /**\n * Alias names that can resolve this model.\n */\n aliases: string[]\n /**\n * Optional execution policy hints attached to this model.\n */\n executionPolicy?: TaskExecutionPolicy\n /**\n * Optional model-level call parameters.\n */\n parameters?: Record<string, unknown>\n}\n\n/**\n * Resolves one model by id, model name, or alias in registration order.\n *\n * Returns:\n * - the first matching model, or `undefined` when no match exists\n */\nexport function resolveModelByName(\n models: readonly ModelDefinition[],\n name: string,\n): ModelDefinition | undefined {\n return models.find(model => model.id === name || model.model === name || model.aliases.includes(name))\n}\n"],"mappings":";;;;;;;AAwDA,SAAgB,mBACd,QACA,MAC6B;AAC7B,QAAO,OAAO,MAAK,UAAS,MAAM,OAAO,QAAQ,MAAM,UAAU,QAAQ,MAAM,QAAQ,SAAS,KAAK,CAAC"}
1
+ {"version":3,"file":"models-pBSRUZhY.mjs","names":[],"sources":["../src/config/models.ts"],"sourcesContent":["import type { TaskExecutionPolicy } from './types'\n\n/**\n * Canonical model definition consumed by vieval runtime and config.\n *\n * Use when:\n * - declaring models in `vieval.config.*`\n * - resolving task runtime models by id, alias, or concrete model name\n *\n * Expects:\n * - `id` to be stable and unique within one config\n * - `inferenceExecutorId` to match scheduler/executor identifiers\n *\n * Returns:\n * - one normalized model registration record\n */\nexport interface ModelDefinition {\n /**\n * Stable model id.\n */\n id: string\n /**\n * Inference-executor id used for matching and reporting.\n */\n inferenceExecutorId: string\n /**\n * Executor reference passed through config.\n *\n * `vieval` core treats this as opaque runtime metadata. Builder plugins can\n * narrow this field with plugin-specific executor input types.\n */\n inferenceExecutor: unknown\n /**\n * Concrete model name passed to the inference executor.\n */\n model: string\n /**\n * Alias names that can resolve this model.\n */\n aliases: string[]\n /**\n * Optional execution policy hints attached to this model.\n */\n executionPolicy?: TaskExecutionPolicy\n /**\n * Optional model-level call parameters.\n */\n parameters?: Record<string, unknown>\n}\n\n/**\n * Resolves one model by id, model name, or alias in registration order.\n *\n * Returns:\n * - the first matching model, or `undefined` when no match exists\n */\nexport function resolveModelByName(\n models: readonly ModelDefinition[],\n name: string,\n): ModelDefinition | undefined {\n return models.find(model => model.id === name || model.model === name || model.aliases.includes(name))\n}\n"],"mappings":";;;;;;;AAwDA,SAAgB,mBACd,QACA,MAC6B;AAC7B,QAAO,OAAO,MAAK,UAAS,MAAM,OAAO,QAAQ,MAAM,UAAU,QAAQ,MAAM,QAAQ,SAAS,KAAK,CAAC"}
@@ -1,4 +1,4 @@
1
- import { C as TaskRunContext, P as ModelDefinition, b as TaskExecutionPolicy, l as MatrixDefinition, t as ConfigHookPlugin } from "../../index-DBZKkpBe.mjs";
1
+ import { D as TaskRunContext, R as ModelDefinition, f as MatrixDefinition, t as ConfigHookPlugin, w as TaskExecutionPolicy } from "../../index-BkjyCInx.mjs";
2
2
 
3
3
  //#region src/plugins/chat-models/runtime-config.d.ts
4
4
  /**
@@ -80,7 +80,7 @@ type ChatModelRuntimeConfig = OpenAIChatModelRuntimeConfig | OllamaChatModelRunt
80
80
  * Normalizes one configured chat model into runtime executor config.
81
81
  *
82
82
  * Use when:
83
- * - eval code needs typed provider constructor options from `context.model()`
83
+ * - eval code needs typed provider constructor options from a resolved model
84
84
  * - model parameters should be validated once with clear error messages
85
85
  *
86
86
  * Expects:
@@ -95,7 +95,7 @@ declare function toChatModelRuntimeConfig(model: ModelDefinition): ChatModelRunt
95
95
  * Resolves OpenAI runtime config from one resolved run-context model.
96
96
  *
97
97
  * Use when:
98
- * - task execution already has `context.model()` output
98
+ * - task execution already has a model resolved through chat-model helpers
99
99
  * - eval code wants typed OpenAI provider options with a concise helper name
100
100
  *
101
101
  * Expects:
@@ -109,7 +109,7 @@ declare function openaiFromRunContext(model: ModelDefinition): OpenAIChatModelRu
109
109
  * Resolves Ollama runtime config from one resolved run-context model.
110
110
  *
111
111
  * Use when:
112
- * - task execution already has `context.model()` output
112
+ * - task execution already has a model resolved through chat-model helpers
113
113
  * - eval code wants typed Ollama provider options with a concise helper name
114
114
  *
115
115
  * Expects:
@@ -123,7 +123,7 @@ declare function ollamaFromRunContext(model: ModelDefinition): OllamaChatModelRu
123
123
  * Resolves OpenRouter runtime config from one resolved run-context model.
124
124
  *
125
125
  * Use when:
126
- * - task execution already has `context.model()` output
126
+ * - task execution already has a model resolved through chat-model helpers
127
127
  * - eval code wants typed OpenRouter provider options with a concise helper name
128
128
  *
129
129
  * Expects:
@@ -392,6 +392,12 @@ interface ChatModelFromBaseOptions {
392
392
  * @default 0
393
393
  */
394
394
  autoRetry?: number;
395
+ /**
396
+ * Delay in milliseconds before a retry starts.
397
+ *
398
+ * @default retryIndex => 500 * 2 ** (retryIndex - 1)
399
+ */
400
+ autoRetryDelay?: TaskExecutionPolicy['autoRetryDelay'];
395
401
  /**
396
402
  * Additional full task attempts allowed after the current attempt settles.
397
403
  *
@@ -534,6 +540,63 @@ interface ChatModelsPluginOptions {
534
540
  */
535
541
  models: readonly ChatModelDefinition[];
536
542
  }
543
+ /**
544
+ * Matrix scope that can carry a chat model selector.
545
+ */
546
+ type MatrixModelScope = 'eval' | 'run';
547
+ /**
548
+ * Options for resolving a chat model from a matrix axis.
549
+ */
550
+ interface ModelFromMatrixOptions {
551
+ /**
552
+ * Matrix axis whose selected value is a model id, model name, or alias.
553
+ */
554
+ axis: string;
555
+ }
556
+ type MatrixModelContext = Pick<TaskRunContext, 'models' | 'task'>;
557
+ /**
558
+ * Resolves a configured chat model from one scoped matrix axis.
559
+ *
560
+ * Use when:
561
+ * - a matrix axis selects the agent, judge, or another chat model role
562
+ * - eval code should keep model lookup semantics inside the chat-models plugin
563
+ *
564
+ * Expects:
565
+ * - `scope` to identify `context.task.matrix.run` or `context.task.matrix.eval`
566
+ * - `options.axis` to exist and contain a model id, model name, or alias
567
+ *
568
+ * Returns:
569
+ * - the configured model matching the selected matrix value
570
+ */
571
+ declare function modelFromMatrix(context: MatrixModelContext, scope: MatrixModelScope, options: ModelFromMatrixOptions): ModelDefinition;
572
+ /**
573
+ * Resolves a configured chat model from one run-matrix axis.
574
+ *
575
+ * Use when:
576
+ * - run matrix selects the model used by the system under evaluation
577
+ * - callers want the scoped helper instead of passing `scope: 'run'`
578
+ *
579
+ * Expects:
580
+ * - `options.axis` to exist in `context.task.matrix.run`
581
+ *
582
+ * Returns:
583
+ * - the configured model matching the selected run-matrix value
584
+ */
585
+ declare function modelFromRun(context: MatrixModelContext, options: ModelFromMatrixOptions): ModelDefinition;
586
+ /**
587
+ * Resolves a configured chat model from one eval-matrix axis.
588
+ *
589
+ * Use when:
590
+ * - eval matrix selects a judge, rubric, or evaluator model
591
+ * - callers want the scoped helper instead of passing `scope: 'eval'`
592
+ *
593
+ * Expects:
594
+ * - `options.axis` to exist in `context.task.matrix.eval`
595
+ *
596
+ * Returns:
597
+ * - the configured model matching the selected eval-matrix value
598
+ */
599
+ declare function modelFromEval(context: MatrixModelContext, options: ModelFromMatrixOptions): ModelDefinition;
537
600
  /**
538
601
  * Creates a run-matrix `model` axis from configured chat model names.
539
602
  *
@@ -565,5 +628,5 @@ declare function ChatProviders(options: ChatProvidersPluginOptions): Plugin;
565
628
  */
566
629
  declare function ChatModels(options: ChatModelsPluginOptions): Plugin;
567
630
  //#endregion
568
- export { ChatModelDefinition, ChatModelExecutorInput, ChatModelExecutorLike, ChatModelFromBaseOptions, ChatModelFromOptions, ChatModelHeaders, ChatModelInferenceExecutor, ChatModelResolverContext, ChatModelResolverValue, ChatModelRuntimeConfig, ChatModelTelemetryProvider, ChatModelToolCall, ChatModels, ChatModelsPluginOptions, ChatProviderDefinition, ChatProviderFromOptions, ChatProviders, ChatProvidersPluginOptions, EmitChatModelErrorTelemetryOptions, EmitChatModelRequestTelemetryOptions, EmitChatModelResponseTelemetryOptions, GenericChatModelInferenceExecutor, OllamaChatModelInferenceExecutor, OllamaChatModelRuntimeConfig, OpenAIChatModelInferenceExecutor, OpenAIChatModelRuntimeConfig, OpenRouterChatModelInferenceExecutor, OpenRouterChatModelRuntimeConfig, OptionalProviderEnvMap, Plugin, PluginConfig, RequiredProviderEnvMap, chatModelFrom, chatModelMatrix, chatProviderFrom, emitChatModelErrorTelemetry, emitChatModelRequestTelemetry, emitChatModelResponseTelemetry, extractChatModelToolCalls, extractMeteringDimensions, ollamaFromRunContext, openaiFromRunContext, openrouterFromRunContext, toChatModelRuntimeConfig };
631
+ export { ChatModelDefinition, ChatModelExecutorInput, ChatModelExecutorLike, ChatModelFromBaseOptions, ChatModelFromOptions, ChatModelHeaders, ChatModelInferenceExecutor, ChatModelResolverContext, ChatModelResolverValue, ChatModelRuntimeConfig, ChatModelTelemetryProvider, ChatModelToolCall, ChatModels, ChatModelsPluginOptions, ChatProviderDefinition, ChatProviderFromOptions, ChatProviders, ChatProvidersPluginOptions, EmitChatModelErrorTelemetryOptions, EmitChatModelRequestTelemetryOptions, EmitChatModelResponseTelemetryOptions, GenericChatModelInferenceExecutor, MatrixModelScope, ModelFromMatrixOptions, OllamaChatModelInferenceExecutor, OllamaChatModelRuntimeConfig, OpenAIChatModelInferenceExecutor, OpenAIChatModelRuntimeConfig, OpenRouterChatModelInferenceExecutor, OpenRouterChatModelRuntimeConfig, OptionalProviderEnvMap, Plugin, PluginConfig, RequiredProviderEnvMap, chatModelFrom, chatModelMatrix, chatProviderFrom, emitChatModelErrorTelemetry, emitChatModelRequestTelemetry, emitChatModelResponseTelemetry, extractChatModelToolCalls, extractMeteringDimensions, modelFromEval, modelFromMatrix, modelFromRun, ollamaFromRunContext, openaiFromRunContext, openrouterFromRunContext, toChatModelRuntimeConfig };
569
632
  //# sourceMappingURL=index.d.mts.map
@@ -1,4 +1,5 @@
1
- import { n as requiredEnvFrom, t as envFrom } from "../../env--94B0UtW.mjs";
1
+ import { n as requiredEnvFrom, t as envFrom } from "../../env-BFSjny07.mjs";
2
+ import { t as resolveModelByName } from "../../models-pBSRUZhY.mjs";
2
3
  import process from "node:process";
3
4
  import { errorMessageFrom } from "@moeru/std";
4
5
  //#region src/plugins/chat-models/runtime-config.ts
@@ -41,7 +42,7 @@ function parseHeadersParameter(parameters, modelId) {
41
42
  * Normalizes one configured chat model into runtime executor config.
42
43
  *
43
44
  * Use when:
44
- * - eval code needs typed provider constructor options from `context.model()`
45
+ * - eval code needs typed provider constructor options from a resolved model
45
46
  * - model parameters should be validated once with clear error messages
46
47
  *
47
48
  * Expects:
@@ -79,7 +80,7 @@ function toChatModelRuntimeConfig(model) {
79
80
  * Resolves OpenAI runtime config from one resolved run-context model.
80
81
  *
81
82
  * Use when:
82
- * - task execution already has `context.model()` output
83
+ * - task execution already has a model resolved through chat-model helpers
83
84
  * - eval code wants typed OpenAI provider options with a concise helper name
84
85
  *
85
86
  * Expects:
@@ -97,7 +98,7 @@ function openaiFromRunContext(model) {
97
98
  * Resolves Ollama runtime config from one resolved run-context model.
98
99
  *
99
100
  * Use when:
100
- * - task execution already has `context.model()` output
101
+ * - task execution already has a model resolved through chat-model helpers
101
102
  * - eval code wants typed Ollama provider options with a concise helper name
102
103
  *
103
104
  * Expects:
@@ -115,7 +116,7 @@ function ollamaFromRunContext(model) {
115
116
  * Resolves OpenRouter runtime config from one resolved run-context model.
116
117
  *
117
118
  * Use when:
118
- * - task execution already has `context.model()` output
119
+ * - task execution already has a model resolved through chat-model helpers
119
120
  * - eval code wants typed OpenRouter provider options with a concise helper name
120
121
  *
121
122
  * Expects:
@@ -293,6 +294,7 @@ function normalizeExecutionPolicy(policy) {
293
294
  const normalized = {
294
295
  autoAttempt: policy.autoAttempt,
295
296
  autoRetry: policy.autoRetry,
297
+ autoRetryDelay: policy.autoRetryDelay,
296
298
  timeout: policy.timeout
297
299
  };
298
300
  return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
@@ -308,6 +310,7 @@ function resolveModelExecutionPolicy(options) {
308
310
  const explicitPolicy = normalizeExecutionPolicy({
309
311
  autoAttempt: options.autoAttempt ?? options.executionPolicy?.autoAttempt,
310
312
  autoRetry: options.autoRetry ?? options.executionPolicy?.autoRetry,
313
+ autoRetryDelay: options.autoRetryDelay ?? options.executionPolicy?.autoRetryDelay,
311
314
  timeout: options.timeout ?? options.executionPolicy?.timeout
312
315
  });
313
316
  if (explicitPolicy != null && Object.keys(explicitPolicy).length > 0) return explicitPolicy;
@@ -477,6 +480,59 @@ function chatProviderFrom(options) {
477
480
  };
478
481
  }
479
482
  /**
483
+ * Resolves a configured chat model from one scoped matrix axis.
484
+ *
485
+ * Use when:
486
+ * - a matrix axis selects the agent, judge, or another chat model role
487
+ * - eval code should keep model lookup semantics inside the chat-models plugin
488
+ *
489
+ * Expects:
490
+ * - `scope` to identify `context.task.matrix.run` or `context.task.matrix.eval`
491
+ * - `options.axis` to exist and contain a model id, model name, or alias
492
+ *
493
+ * Returns:
494
+ * - the configured model matching the selected matrix value
495
+ */
496
+ function modelFromMatrix(context, scope, options) {
497
+ const selectedModelName = context.task.matrix[scope][options.axis];
498
+ if (selectedModelName == null) throw new Error(`Missing ${scope} matrix axis "${options.axis}".`);
499
+ const model = resolveModelByName(context.models, selectedModelName);
500
+ if (model == null) throw new Error(`Unknown configured chat model "${selectedModelName}" from ${scope} matrix axis "${options.axis}".`);
501
+ return model;
502
+ }
503
+ /**
504
+ * Resolves a configured chat model from one run-matrix axis.
505
+ *
506
+ * Use when:
507
+ * - run matrix selects the model used by the system under evaluation
508
+ * - callers want the scoped helper instead of passing `scope: 'run'`
509
+ *
510
+ * Expects:
511
+ * - `options.axis` to exist in `context.task.matrix.run`
512
+ *
513
+ * Returns:
514
+ * - the configured model matching the selected run-matrix value
515
+ */
516
+ function modelFromRun(context, options) {
517
+ return modelFromMatrix(context, "run", options);
518
+ }
519
+ /**
520
+ * Resolves a configured chat model from one eval-matrix axis.
521
+ *
522
+ * Use when:
523
+ * - eval matrix selects a judge, rubric, or evaluator model
524
+ * - callers want the scoped helper instead of passing `scope: 'eval'`
525
+ *
526
+ * Expects:
527
+ * - `options.axis` to exist in `context.task.matrix.eval`
528
+ *
529
+ * Returns:
530
+ * - the configured model matching the selected eval-matrix value
531
+ */
532
+ function modelFromEval(context, options) {
533
+ return modelFromMatrix(context, "eval", options);
534
+ }
535
+ /**
480
536
  * Creates a run-matrix `model` axis from configured chat model names.
481
537
  *
482
538
  * Use when:
@@ -532,6 +588,6 @@ function ChatModels(options) {
532
588
  };
533
589
  }
534
590
  //#endregion
535
- export { ChatModels, ChatProviders, chatModelFrom, chatModelMatrix, chatProviderFrom, emitChatModelErrorTelemetry, emitChatModelRequestTelemetry, emitChatModelResponseTelemetry, extractChatModelToolCalls, extractMeteringDimensions, ollamaFromRunContext, openaiFromRunContext, openrouterFromRunContext, toChatModelRuntimeConfig };
591
+ export { ChatModels, ChatProviders, chatModelFrom, chatModelMatrix, chatProviderFrom, emitChatModelErrorTelemetry, emitChatModelRequestTelemetry, emitChatModelResponseTelemetry, extractChatModelToolCalls, extractMeteringDimensions, modelFromEval, modelFromMatrix, modelFromRun, ollamaFromRunContext, openaiFromRunContext, openrouterFromRunContext, toChatModelRuntimeConfig };
536
592
 
537
593
  //# sourceMappingURL=index.mjs.map