vieval 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -2
- package/dist/bin/vieval.mjs +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-ImxGpoYQ.mjs → cli-Dao25VxV.mjs} +2 -2
- package/dist/cli-Dao25VxV.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/config.mjs +1 -1
- package/dist/core/assertions/index.d.mts +1 -1
- package/dist/core/inference-executors/index.d.mts +1 -1
- package/dist/core/inference-executors/index.mjs +10 -4
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +6 -40
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/{env-BeHv_5mo.d.mts → env-DfWZy_n4.d.mts} +14 -9
- package/dist/env-nV5rVErX.mjs +35 -0
- package/dist/env-nV5rVErX.mjs.map +1 -0
- package/dist/{index-5R1_k2nv.d.mts → index-BkjyCInx.d.mts} +12 -37
- package/dist/index.d.mts +5 -5
- package/dist/index.mjs +1 -1
- package/dist/{models-DIGdOUpJ.mjs → models-pBSRUZhY.mjs} +1 -1
- package/dist/{models-DIGdOUpJ.mjs.map → models-pBSRUZhY.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +63 -6
- package/dist/plugins/chat-models/index.mjs +74 -18
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/registry-BHGMxjpA.mjs.map +1 -1
- package/package.json +1 -1
- package/dist/cli-ImxGpoYQ.mjs.map +0 -1
- package/dist/env--94B0UtW.mjs +0 -41
- package/dist/env--94B0UtW.mjs.map +0 -1
package/dist/config.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as TaskDefinition, D as TaskRunContext, E as TaskReporterHooks, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, T as TaskReporterEventPayload, _ as ScopedMatrices, a as CliOpenTelemetryReportingConfig, b as TaskCaseReporterPayload, c as EvalDefinition, d as MatrixAxisValues, f as MatrixDefinition, g as MatrixValue, h as MatrixRow, i as Awaitable, l as EvalModule, m as MatrixPrimitive, n as defineEval, o as CliReportingConfig, p as MatrixLayer, r as defineTask, s as CollectedEvalEntry, t as ConfigHookPlugin, u as EvalModuleMap, v as TaskAutoRetryDelay, w as TaskExecutionPolicy, x as TaskCaseState, y as TaskCaseReporterEndPayload, z as resolveModelByName } from "./index-BkjyCInx.mjs";
|
|
2
2
|
export { Awaitable, CliOpenTelemetryReportingConfig, CliReportingConfig, CollectedEvalEntry, ConfigHookPlugin, EvalDefinition, EvalModule, EvalModuleMap, MatrixAxisValues, MatrixDefinition, MatrixLayer, MatrixPrimitive, MatrixRow, MatrixValue, ModelDefinition, ScopedMatrices, TaskAutoRetryDelay, TaskCaseReporterEndPayload, TaskCaseReporterPayload, TaskCaseState, TaskConcurrencyConfig, TaskDefinition, TaskExecutionPolicy, TaskReporterEventPayload, TaskReporterHooks, TaskRunContext, TaskRunOutput, defineEval, defineTask, resolveModelByName };
|
package/dist/config.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a as requiredEnvFrom, i as envFrom, n as EnvValueType, r as RequiredEnvFromOptions, t as EnvFromOptions } from "../../env-
|
|
1
|
+
import { a as requiredEnvFrom, i as envFrom, n as EnvValueType, r as RequiredEnvFromOptions, t as EnvFromOptions } from "../../env-DfWZy_n4.mjs";
|
|
2
2
|
import { createOpenAI } from "@xsai-ext/providers/create";
|
|
3
3
|
|
|
4
4
|
//#region src/core/inference-executors/retry-policy.d.ts
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { n as requiredEnvFrom, t as envFrom } from "../../env
|
|
1
|
+
import { n as requiredEnvFrom, t as envFrom } from "../../env-nV5rVErX.mjs";
|
|
2
2
|
import process from "node:process";
|
|
3
3
|
import { errorMessageFrom, errorNameFrom, sleep } from "@moeru/std";
|
|
4
4
|
import { createOpenAI } from "@xsai-ext/providers/create";
|
|
@@ -150,15 +150,21 @@ function createOpenAIFromEnv(source = {}, defaults = {}) {
|
|
|
150
150
|
const apiKeyEnvKey = source.apiKey ?? "OPENAI_API_KEY";
|
|
151
151
|
const baseURLEnvKey = source.baseURL ?? "OPENAI_BASE_URL";
|
|
152
152
|
const modelEnvKey = source.model ?? "OPENAI_MODEL";
|
|
153
|
-
const
|
|
153
|
+
const envWithDefaults = {
|
|
154
|
+
...defaults.apiKey == null ? {} : { [apiKeyEnvKey]: defaults.apiKey },
|
|
155
|
+
...defaults.baseURL == null ? {} : { [baseURLEnvKey]: defaults.baseURL },
|
|
156
|
+
...defaults.model == null ? {} : { [modelEnvKey]: defaults.model },
|
|
157
|
+
...env
|
|
158
|
+
};
|
|
159
|
+
const apiKey = requiredEnvFrom(envWithDefaults, {
|
|
154
160
|
name: apiKeyEnvKey,
|
|
155
161
|
type: "string"
|
|
156
162
|
});
|
|
157
|
-
const model = requiredEnvFrom(
|
|
163
|
+
const model = requiredEnvFrom(envWithDefaults, {
|
|
158
164
|
name: modelEnvKey,
|
|
159
165
|
type: "string"
|
|
160
166
|
});
|
|
161
|
-
const baseURL = envFrom(
|
|
167
|
+
const baseURL = envFrom(envWithDefaults, {
|
|
162
168
|
name: baseURLEnvKey,
|
|
163
169
|
type: "string"
|
|
164
170
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":["defaultSleep"],"sources":["../../../src/core/inference-executors/retry-policy.ts","../../../src/core/inference-executors/adapters.ts","../../../src/core/inference-executors/remote-providers/openai/index.ts"],"sourcesContent":["import { sleep as defaultSleep, errorMessageFrom, errorNameFrom } from '@moeru/std'\n\n/**\n * Describes how provider retries should behave.\n *\n * ASCII flow:\n * attempt -> run request -> success return\n * attempt -> run request -> retriable failure -> sleep -> next attempt\n * attempt -> run request -> non-retriable failure -> throw\n */\nexport interface RetryPolicy {\n /**\n * Maximum number of total attempts, including the first try.\n */\n maxAttempts: number\n /**\n * Returns the wait time for a retry attempt.\n */\n delayMs: (attempt: number) => number\n /**\n * Determines whether an error can be retried safely.\n */\n shouldRetry: (error: unknown) => boolean\n /**\n * Suspends execution between retries.\n */\n sleep: (milliseconds: number) => Promise<void>\n}\n\n/**\n * Configures a retry policy before a provider call is executed.\n *\n * Use when:\n * - you want the default retry classifier but need to tune attempts or delay\n * - you need to replace the sleeper in tests\n *\n * Expects:\n * - `maxAttempts` to be a finite integer greater than or equal to `1`\n * - `delayMs` to return a non-negative wait time in milliseconds\n */\nexport interface RetryPolicyOptions {\n /**\n * Maximum total attempts, including the first request.\n *\n * @default 3\n */\n maxAttempts?: number\n /**\n * Computes the delay for a retry attempt.\n *\n * The attempt number starts at `1` for the first retry.\n */\n delayMs?: (attempt: number) => number\n /**\n * Overrides the retry classifier.\n */\n shouldRetry?: (error: unknown) => boolean\n /**\n * Overrides the sleeper used between attempts.\n */\n sleep?: (milliseconds: number) => Promise<void>\n}\n\nconst retryableStatusCodes = new Set([408, 425, 429, 500, 502, 503, 504])\nconst retryableErrorNames = new Set(['TimeoutError', 'FetchError'])\nconst retryableMessagePatterns = [\n /rate limit/i,\n /rate-limited/i,\n /temporarily unavailable/i,\n /service unavailable/i,\n /server error/i,\n /fetch failed/i,\n /network error/i,\n /socket hang up/i,\n /econnreset/i,\n /econnrefused/i,\n /eai_again/i,\n /etimedout/i,\n /timed out/i,\n /timeout/i,\n]\n\nfunction getStatusCode(error: unknown): number | undefined {\n if (error == null || typeof error !== 'object') {\n return undefined\n }\n\n const maybeStatusCode = (error as { statusCode?: unknown }).statusCode\n if (typeof maybeStatusCode === 'number') {\n return maybeStatusCode\n }\n\n const maybeStatus = (error as { status?: unknown }).status\n if (typeof maybeStatus === 'number') {\n return maybeStatus\n }\n\n const response = (error as { response?: unknown }).response\n if (response == null || typeof response !== 'object') {\n return undefined\n }\n\n const responseStatus = (response as { status?: unknown }).status\n return typeof responseStatus === 'number' ? responseStatus : undefined\n}\n\n/**\n * Returns true when a provider failure is temporary and a retry is reasonable.\n *\n * Use when:\n * - the upstream failure is a transport problem or a 5xx/429 response\n *\n * Expects:\n * - provider errors to expose a status code, name, or message when possible\n */\nexport function isRetriableProviderError(error: unknown): boolean {\n const statusCode = getStatusCode(error)\n\n if (statusCode != null) {\n return retryableStatusCodes.has(statusCode)\n }\n\n const errorName = errorNameFrom(error)\n if (errorName != null && retryableErrorNames.has(errorName)) {\n return true\n }\n\n const errorMessage = errorMessageFrom(error)\n if (errorMessage == null) {\n return false\n }\n\n return retryableMessagePatterns.some(pattern => pattern.test(errorMessage))\n}\n\nfunction defaultDelayMs(attempt: number): number {\n return 500 * 2 ** (attempt - 1)\n}\n\n/**\n * Creates a retry policy for provider work.\n *\n * Use when:\n * - you need a reusable retry runner for eval-time provider calls\n * - you want to keep retry behavior deterministic in tests\n *\n * Expects:\n * - callers to treat `maxAttempts` as total attempts, not retries\n *\n * Throws:\n * - `RangeError` when `maxAttempts` is not a finite integer greater than or equal to `1`\n */\nfunction assertValidMaxAttempts(value: number): number {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 1) {\n throw new RangeError('maxAttempts must be a finite integer greater than or equal to 1.')\n }\n\n return value\n}\n\nexport function createRetryPolicy(options: RetryPolicyOptions = {}): RetryPolicy {\n const maxAttempts = assertValidMaxAttempts(options.maxAttempts ?? 3)\n\n return {\n maxAttempts,\n delayMs: options.delayMs ?? defaultDelayMs,\n shouldRetry: options.shouldRetry ?? isRetriableProviderError,\n sleep: options.sleep ?? defaultSleep,\n }\n}\n\n/**\n * Runs an operation with bounded retries.\n *\n * Use when:\n * - you are calling an LLM provider or other temporary upstream dependency\n * - non-retriable failures should bubble immediately\n *\n * Expects:\n * - the operation to be idempotent across attempts\n */\nexport async function runWithRetry<T>(operation: () => Promise<T>, policy: RetryPolicy = createRetryPolicy()): Promise<T> {\n for (let attempt = 1; attempt <= policy.maxAttempts; attempt += 1) {\n try {\n return await operation()\n }\n catch (error) {\n if (attempt >= policy.maxAttempts || !policy.shouldRetry(error)) {\n throw error\n }\n\n const delayMilliseconds = policy.delayMs(attempt)\n if (delayMilliseconds > 0) {\n await policy.sleep(delayMilliseconds)\n }\n }\n }\n\n throw new Error('Retry loop exited without returning a value.')\n}\n","import type { RetryPolicy, RetryPolicyOptions } from './retry-policy'\n\nimport { createRetryPolicy, runWithRetry } from './retry-policy'\n\n/**\n * Bundles a provider with the retry policy used to call it.\n *\n * Use when:\n * - a provider instance should travel with the retry runner that governs it\n * - you want call sites to share one retry configuration object\n */\nexport interface ProviderAdapter<TProvider> {\n /**\n * The underlying provider instance.\n */\n provider: TProvider\n /**\n * The retry policy used for provider calls.\n */\n retryPolicy: RetryPolicy\n /**\n * Runs a provider-dependent operation with the adapter retry policy.\n */\n runWithRetry: <TResult>(operation: () => Promise<TResult>) => Promise<TResult>\n}\n\n/**\n * Creates a provider adapter with the default retry policy.\n *\n * Use when:\n * - you have a provider instance and want a consistent retry wrapper\n *\n * Expects:\n * - the provider to be safe to reuse across attempts\n */\nexport function createProviderAdapter<TProvider>(provider: TProvider, options: RetryPolicyOptions = {}): ProviderAdapter<TProvider> {\n const retryPolicy = createRetryPolicy(options)\n\n return {\n provider,\n retryPolicy,\n runWithRetry: operation => runWithRetry(operation, retryPolicy),\n }\n}\n","import type { ProviderAdapter } from '../../adapters'\nimport type { RetryPolicyOptions } from '../../retry-policy'\n\nimport process from 'node:process'\n\nimport { createOpenAI } from '@xsai-ext/providers/create'\n\nimport { createProviderAdapter } from '../../adapters'\nimport { envFrom, requiredEnvFrom } from '../../env'\n\n/**\n * Represents the OpenAI provider instance returned by xsai.\n */\nexport type OpenAIProvider = ReturnType<typeof createOpenAI>\n\n/**\n * Represents the OpenAI adapter used by vieval.\n */\nexport type OpenAIProviderAdapter = ProviderAdapter<OpenAIProvider>\n\n/**\n * Configures env key names and source for OpenAI provider setup.\n */\nexport interface OpenAIEnvSourceOptions {\n /**\n * Environment object used for variable lookup.\n *\n * @default process.env\n */\n env?: NodeJS.ProcessEnv\n /**\n * Env key name for API key.\n *\n * @default 'OPENAI_API_KEY'\n */\n apiKey?: string\n /**\n * Env key name for base URL.\n *\n * @default 'OPENAI_BASE_URL'\n */\n baseURL?: string\n /**\n * Env key name for model.\n *\n * @default 'OPENAI_MODEL'\n */\n model?: string\n}\n\n/**\n * Configures fallback defaults when env values are missing.\n */\nexport interface OpenAIFromEnvDefaultOptions {\n /**\n * API key fallback value.\n */\n apiKey?: string\n /**\n * Base URL fallback value.\n */\n baseURL?: string\n /**\n * Model fallback value.\n */\n model?: string\n /**\n * Retry policy override passed to provider adapter.\n */\n retryOptions?: RetryPolicyOptions\n}\n\n/**\n * Result produced by `createOpenAIFromEnv`.\n */\nexport interface OpenAIFromEnvResult {\n adapter: OpenAIProviderAdapter\n apiKey: string\n baseURL?: string\n model: string\n}\n\n/**\n * Minimal response shape returned by text-generation calls.\n */\nexport interface OpenAITextGenerationResult {\n /**\n * Text output from the provider.\n *\n * Some OpenAI-compatible implementations may return `null`.\n */\n text?: string | null\n}\n\n/**\n * Normalizes provider text output to a safe string.\n *\n * Before: `{ text: null }`\n * After: `''`\n *\n * Before: `{ text: 'hello' }`\n * After: `'hello'`\n */\nexport function normalizeOpenAITextOutput(result: OpenAITextGenerationResult): string {\n return typeof result.text === 'string' ? result.text : ''\n}\n\n/**\n * Creates an OpenAI provider adapter using environment variables with defaults.\n *\n * Example:\n * `const runtime = createOpenAIFromEnv({}, { model: 'gpt-4.1-mini' })`\n */\nexport function createOpenAIFromEnv(\n source: OpenAIEnvSourceOptions = {},\n defaults: OpenAIFromEnvDefaultOptions = {},\n): OpenAIFromEnvResult {\n const env = source.env ?? process.env\n const apiKeyEnvKey = source.apiKey ?? 'OPENAI_API_KEY'\n const baseURLEnvKey = source.baseURL ?? 'OPENAI_BASE_URL'\n const modelEnvKey = source.model ?? 'OPENAI_MODEL'\n\n const apiKey = requiredEnvFrom(env[apiKeyEnvKey] ?? defaults.apiKey, {\n name: apiKeyEnvKey,\n type: 'string',\n })\n const model = requiredEnvFrom(env[modelEnvKey] ?? defaults.model, {\n name: modelEnvKey,\n type: 'string',\n })\n const baseURL = envFrom(env[baseURLEnvKey] ?? defaults.baseURL, {\n name: baseURLEnvKey,\n type: 'string',\n })\n const adapter = createOpenAIProviderAdapter(apiKey, baseURL, defaults.retryOptions)\n\n return {\n adapter,\n apiKey,\n baseURL,\n model,\n }\n}\n\n/**\n * Creates an OpenAI provider adapter for eval-time requests.\n *\n * Use when:\n * - an eval needs the OpenAI SDK surface plus the shared retry runner\n *\n * Expects:\n * - `apiKey` and `baseURL` to point at an OpenAI-compatible endpoint\n * - `retryOptions` to follow the same invariants as `createRetryPolicy`\n */\nexport function createOpenAIProviderAdapter(apiKey: string, baseURL?: string, retryOptions: RetryPolicyOptions = {}): OpenAIProviderAdapter {\n return createProviderAdapter(createOpenAI(apiKey, baseURL), retryOptions)\n}\n"],"mappings":";;;;;AA+DA,MAAM,uBAAuB,IAAI,IAAI;CAAC;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAI,CAAC;AACzE,MAAM,sBAAsB,IAAI,IAAI,CAAC,gBAAgB,aAAa,CAAC;AACnE,MAAM,2BAA2B;CAC/B;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;AAED,SAAS,cAAc,OAAoC;AACzD,KAAI,SAAS,QAAQ,OAAO,UAAU,SACpC;CAGF,MAAM,kBAAmB,MAAmC;AAC5D,KAAI,OAAO,oBAAoB,SAC7B,QAAO;CAGT,MAAM,cAAe,MAA+B;AACpD,KAAI,OAAO,gBAAgB,SACzB,QAAO;CAGT,MAAM,WAAY,MAAiC;AACnD,KAAI,YAAY,QAAQ,OAAO,aAAa,SAC1C;CAGF,MAAM,iBAAkB,SAAkC;AAC1D,QAAO,OAAO,mBAAmB,WAAW,iBAAiB,KAAA;;;;;;;;;;;AAY/D,SAAgB,yBAAyB,OAAyB;CAChE,MAAM,aAAa,cAAc,MAAM;AAEvC,KAAI,cAAc,KAChB,QAAO,qBAAqB,IAAI,WAAW;CAG7C,MAAM,YAAY,cAAc,MAAM;AACtC,KAAI,aAAa,QAAQ,oBAAoB,IAAI,UAAU,CACzD,QAAO;CAGT,MAAM,eAAe,iBAAiB,MAAM;AAC5C,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO,yBAAyB,MAAK,YAAW,QAAQ,KAAK,aAAa,CAAC;;AAG7E,SAAS,eAAe,SAAyB;AAC/C,QAAO,MAAM,MAAM,UAAU;;;;;;;;;;;;;;;AAgB/B,SAAS,uBAAuB,OAAuB;AACrD,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,WAAW,mEAAmE;AAG1F,QAAO;;AAGT,SAAgB,kBAAkB,UAA8B,EAAE,EAAe;AAG/E,QAAO;EACL,aAHkB,uBAAuB,QAAQ,eAAe,EAAE;EAIlE,SAAS,QAAQ,WAAW;EAC5B,aAAa,QAAQ,eAAe;EACpC,OAAO,QAAQ,SAASA;EACzB;;;;;;;;;;;;AAaH,eAAsB,aAAgB,WAA6B,SAAsB,mBAAmB,EAAc;AACxH,MAAK,IAAI,UAAU,GAAG,WAAW,OAAO,aAAa,WAAW,EAC9D,KAAI;AACF,SAAO,MAAM,WAAW;UAEnB,OAAO;AACZ,MAAI,WAAW,OAAO,eAAe,CAAC,OAAO,YAAY,MAAM,CAC7D,OAAM;EAGR,MAAM,oBAAoB,OAAO,QAAQ,QAAQ;AACjD,MAAI,oBAAoB,EACtB,OAAM,OAAO,MAAM,kBAAkB;;AAK3C,OAAM,IAAI,MAAM,+CAA+C;;;;;;;;;;;;;ACnKjE,SAAgB,sBAAiC,UAAqB,UAA8B,EAAE,EAA8B;CAClI,MAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAO;EACL;EACA;EACA,eAAc,cAAa,aAAa,WAAW,YAAY;EAChE;;;;;;;;;;;;;AC6DH,SAAgB,0BAA0B,QAA4C;AACpF,QAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;;;;;;;;AASzD,SAAgB,oBACd,SAAiC,EAAE,EACnC,WAAwC,EAAE,EACrB;CACrB,MAAM,MAAM,OAAO,OAAO,QAAQ;CAClC,MAAM,eAAe,OAAO,UAAU;CACtC,MAAM,gBAAgB,OAAO,WAAW;CACxC,MAAM,cAAc,OAAO,SAAS;CAEpC,MAAM,SAAS,gBAAgB,IAAI,iBAAiB,SAAS,QAAQ;EACnE,MAAM;EACN,MAAM;EACP,CAAC;CACF,MAAM,QAAQ,gBAAgB,IAAI,gBAAgB,SAAS,OAAO;EAChE,MAAM;EACN,MAAM;EACP,CAAC;CACF,MAAM,UAAU,QAAQ,IAAI,kBAAkB,SAAS,SAAS;EAC9D,MAAM;EACN,MAAM;EACP,CAAC;AAGF,QAAO;EACL,SAHc,4BAA4B,QAAQ,SAAS,SAAS,aAAa;EAIjF;EACA;EACA;EACD;;;;;;;;;;;;AAaH,SAAgB,4BAA4B,QAAgB,SAAkB,eAAmC,EAAE,EAAyB;AAC1I,QAAO,sBAAsB,aAAa,QAAQ,QAAQ,EAAE,aAAa"}
|
|
1
|
+
{"version":3,"file":"index.mjs","names":["defaultSleep"],"sources":["../../../src/core/inference-executors/retry-policy.ts","../../../src/core/inference-executors/adapters.ts","../../../src/core/inference-executors/remote-providers/openai/index.ts"],"sourcesContent":["import { sleep as defaultSleep, errorMessageFrom, errorNameFrom } from '@moeru/std'\n\n/**\n * Describes how provider retries should behave.\n *\n * ASCII flow:\n * attempt -> run request -> success return\n * attempt -> run request -> retriable failure -> sleep -> next attempt\n * attempt -> run request -> non-retriable failure -> throw\n */\nexport interface RetryPolicy {\n /**\n * Maximum number of total attempts, including the first try.\n */\n maxAttempts: number\n /**\n * Returns the wait time for a retry attempt.\n */\n delayMs: (attempt: number) => number\n /**\n * Determines whether an error can be retried safely.\n */\n shouldRetry: (error: unknown) => boolean\n /**\n * Suspends execution between retries.\n */\n sleep: (milliseconds: number) => Promise<void>\n}\n\n/**\n * Configures a retry policy before a provider call is executed.\n *\n * Use when:\n * - you want the default retry classifier but need to tune attempts or delay\n * - you need to replace the sleeper in tests\n *\n * Expects:\n * - `maxAttempts` to be a finite integer greater than or equal to `1`\n * - `delayMs` to return a non-negative wait time in milliseconds\n */\nexport interface RetryPolicyOptions {\n /**\n * Maximum total attempts, including the first request.\n *\n * @default 3\n */\n maxAttempts?: number\n /**\n * Computes the delay for a retry attempt.\n *\n * The attempt number starts at `1` for the first retry.\n */\n delayMs?: (attempt: number) => number\n /**\n * Overrides the retry classifier.\n */\n shouldRetry?: (error: unknown) => boolean\n /**\n * Overrides the sleeper used between attempts.\n */\n sleep?: (milliseconds: number) => Promise<void>\n}\n\nconst retryableStatusCodes = new Set([408, 425, 429, 500, 502, 503, 504])\nconst retryableErrorNames = new Set(['TimeoutError', 'FetchError'])\nconst retryableMessagePatterns = [\n /rate limit/i,\n /rate-limited/i,\n /temporarily unavailable/i,\n /service unavailable/i,\n /server error/i,\n /fetch failed/i,\n /network error/i,\n /socket hang up/i,\n /econnreset/i,\n /econnrefused/i,\n /eai_again/i,\n /etimedout/i,\n /timed out/i,\n /timeout/i,\n]\n\nfunction getStatusCode(error: unknown): number | undefined {\n if (error == null || typeof error !== 'object') {\n return undefined\n }\n\n const maybeStatusCode = (error as { statusCode?: unknown }).statusCode\n if (typeof maybeStatusCode === 'number') {\n return maybeStatusCode\n }\n\n const maybeStatus = (error as { status?: unknown }).status\n if (typeof maybeStatus === 'number') {\n return maybeStatus\n }\n\n const response = (error as { response?: unknown }).response\n if (response == null || typeof response !== 'object') {\n return undefined\n }\n\n const responseStatus = (response as { status?: unknown }).status\n return typeof responseStatus === 'number' ? responseStatus : undefined\n}\n\n/**\n * Returns true when a provider failure is temporary and a retry is reasonable.\n *\n * Use when:\n * - the upstream failure is a transport problem or a 5xx/429 response\n *\n * Expects:\n * - provider errors to expose a status code, name, or message when possible\n */\nexport function isRetriableProviderError(error: unknown): boolean {\n const statusCode = getStatusCode(error)\n\n if (statusCode != null) {\n return retryableStatusCodes.has(statusCode)\n }\n\n const errorName = errorNameFrom(error)\n if (errorName != null && retryableErrorNames.has(errorName)) {\n return true\n }\n\n const errorMessage = errorMessageFrom(error)\n if (errorMessage == null) {\n return false\n }\n\n return retryableMessagePatterns.some(pattern => pattern.test(errorMessage))\n}\n\nfunction defaultDelayMs(attempt: number): number {\n return 500 * 2 ** (attempt - 1)\n}\n\n/**\n * Creates a retry policy for provider work.\n *\n * Use when:\n * - you need a reusable retry runner for eval-time provider calls\n * - you want to keep retry behavior deterministic in tests\n *\n * Expects:\n * - callers to treat `maxAttempts` as total attempts, not retries\n *\n * Throws:\n * - `RangeError` when `maxAttempts` is not a finite integer greater than or equal to `1`\n */\nfunction assertValidMaxAttempts(value: number): number {\n if (!Number.isFinite(value) || !Number.isInteger(value) || value < 1) {\n throw new RangeError('maxAttempts must be a finite integer greater than or equal to 1.')\n }\n\n return value\n}\n\nexport function createRetryPolicy(options: RetryPolicyOptions = {}): RetryPolicy {\n const maxAttempts = assertValidMaxAttempts(options.maxAttempts ?? 3)\n\n return {\n maxAttempts,\n delayMs: options.delayMs ?? defaultDelayMs,\n shouldRetry: options.shouldRetry ?? isRetriableProviderError,\n sleep: options.sleep ?? defaultSleep,\n }\n}\n\n/**\n * Runs an operation with bounded retries.\n *\n * Use when:\n * - you are calling an LLM provider or other temporary upstream dependency\n * - non-retriable failures should bubble immediately\n *\n * Expects:\n * - the operation to be idempotent across attempts\n */\nexport async function runWithRetry<T>(operation: () => Promise<T>, policy: RetryPolicy = createRetryPolicy()): Promise<T> {\n for (let attempt = 1; attempt <= policy.maxAttempts; attempt += 1) {\n try {\n return await operation()\n }\n catch (error) {\n if (attempt >= policy.maxAttempts || !policy.shouldRetry(error)) {\n throw error\n }\n\n const delayMilliseconds = policy.delayMs(attempt)\n if (delayMilliseconds > 0) {\n await policy.sleep(delayMilliseconds)\n }\n }\n }\n\n throw new Error('Retry loop exited without returning a value.')\n}\n","import type { RetryPolicy, RetryPolicyOptions } from './retry-policy'\n\nimport { createRetryPolicy, runWithRetry } from './retry-policy'\n\n/**\n * Bundles a provider with the retry policy used to call it.\n *\n * Use when:\n * - a provider instance should travel with the retry runner that governs it\n * - you want call sites to share one retry configuration object\n */\nexport interface ProviderAdapter<TProvider> {\n /**\n * The underlying provider instance.\n */\n provider: TProvider\n /**\n * The retry policy used for provider calls.\n */\n retryPolicy: RetryPolicy\n /**\n * Runs a provider-dependent operation with the adapter retry policy.\n */\n runWithRetry: <TResult>(operation: () => Promise<TResult>) => Promise<TResult>\n}\n\n/**\n * Creates a provider adapter with the default retry policy.\n *\n * Use when:\n * - you have a provider instance and want a consistent retry wrapper\n *\n * Expects:\n * - the provider to be safe to reuse across attempts\n */\nexport function createProviderAdapter<TProvider>(provider: TProvider, options: RetryPolicyOptions = {}): ProviderAdapter<TProvider> {\n const retryPolicy = createRetryPolicy(options)\n\n return {\n provider,\n retryPolicy,\n runWithRetry: operation => runWithRetry(operation, retryPolicy),\n }\n}\n","import type { ProviderAdapter } from '../../adapters'\nimport type { RetryPolicyOptions } from '../../retry-policy'\n\nimport process from 'node:process'\n\nimport { createOpenAI } from '@xsai-ext/providers/create'\n\nimport { createProviderAdapter } from '../../adapters'\nimport { envFrom, requiredEnvFrom } from '../../env'\n\n/**\n * Represents the OpenAI provider instance returned by xsai.\n */\nexport type OpenAIProvider = ReturnType<typeof createOpenAI>\n\n/**\n * Represents the OpenAI adapter used by vieval.\n */\nexport type OpenAIProviderAdapter = ProviderAdapter<OpenAIProvider>\n\n/**\n * Configures env key names and source for OpenAI provider setup.\n */\nexport interface OpenAIEnvSourceOptions {\n /**\n * Environment object used for variable lookup.\n *\n * @default process.env\n */\n env?: NodeJS.ProcessEnv\n /**\n * Env key name for API key.\n *\n * @default 'OPENAI_API_KEY'\n */\n apiKey?: string\n /**\n * Env key name for base URL.\n *\n * @default 'OPENAI_BASE_URL'\n */\n baseURL?: string\n /**\n * Env key name for model.\n *\n * @default 'OPENAI_MODEL'\n */\n model?: string\n}\n\n/**\n * Configures fallback defaults when env values are missing.\n */\nexport interface OpenAIFromEnvDefaultOptions {\n /**\n * API key fallback value.\n */\n apiKey?: string\n /**\n * Base URL fallback value.\n */\n baseURL?: string\n /**\n * Model fallback value.\n */\n model?: string\n /**\n * Retry policy override passed to provider adapter.\n */\n retryOptions?: RetryPolicyOptions\n}\n\n/**\n * Result produced by `createOpenAIFromEnv`.\n */\nexport interface OpenAIFromEnvResult {\n adapter: OpenAIProviderAdapter\n apiKey: string\n baseURL?: string\n model: string\n}\n\n/**\n * Minimal response shape returned by text-generation calls.\n */\nexport interface OpenAITextGenerationResult {\n /**\n * Text output from the provider.\n *\n * Some OpenAI-compatible implementations may return `null`.\n */\n text?: string | null\n}\n\n/**\n * Normalizes provider text output to a safe string.\n *\n * Before: `{ text: null }`\n * After: `''`\n *\n * Before: `{ text: 'hello' }`\n * After: `'hello'`\n */\nexport function normalizeOpenAITextOutput(result: OpenAITextGenerationResult): string {\n return typeof result.text === 'string' ? result.text : ''\n}\n\n/**\n * Creates an OpenAI provider adapter using environment variables with defaults.\n *\n * Example:\n * `const runtime = createOpenAIFromEnv({}, { model: 'gpt-4.1-mini' })`\n */\nexport function createOpenAIFromEnv(\n source: OpenAIEnvSourceOptions = {},\n defaults: OpenAIFromEnvDefaultOptions = {},\n): OpenAIFromEnvResult {\n const env = source.env ?? process.env\n const apiKeyEnvKey = source.apiKey ?? 'OPENAI_API_KEY'\n const baseURLEnvKey = source.baseURL ?? 'OPENAI_BASE_URL'\n const modelEnvKey = source.model ?? 'OPENAI_MODEL'\n\n const envWithDefaults = {\n ...(defaults.apiKey == null ? {} : { [apiKeyEnvKey]: defaults.apiKey }),\n ...(defaults.baseURL == null ? {} : { [baseURLEnvKey]: defaults.baseURL }),\n ...(defaults.model == null ? {} : { [modelEnvKey]: defaults.model }),\n ...env,\n }\n\n const apiKey = requiredEnvFrom(envWithDefaults, {\n name: apiKeyEnvKey,\n type: 'string',\n })\n const model = requiredEnvFrom(envWithDefaults, {\n name: modelEnvKey,\n type: 'string',\n })\n const baseURL = envFrom(envWithDefaults, {\n name: baseURLEnvKey,\n type: 'string',\n })\n const adapter = createOpenAIProviderAdapter(apiKey, baseURL, defaults.retryOptions)\n\n return {\n adapter,\n apiKey,\n baseURL,\n model,\n }\n}\n\n/**\n * Creates an OpenAI provider adapter for eval-time requests.\n *\n * Use when:\n * - an eval needs the OpenAI SDK surface plus the shared retry runner\n *\n * Expects:\n * - `apiKey` and `baseURL` to point at an OpenAI-compatible endpoint\n * - `retryOptions` to follow the same invariants as `createRetryPolicy`\n */\nexport function createOpenAIProviderAdapter(apiKey: string, baseURL?: string, retryOptions: RetryPolicyOptions = {}): OpenAIProviderAdapter {\n return createProviderAdapter(createOpenAI(apiKey, baseURL), retryOptions)\n}\n"],"mappings":";;;;;AA+DA,MAAM,uBAAuB,IAAI,IAAI;CAAC;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAI,CAAC;AACzE,MAAM,sBAAsB,IAAI,IAAI,CAAC,gBAAgB,aAAa,CAAC;AACnE,MAAM,2BAA2B;CAC/B;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;AAED,SAAS,cAAc,OAAoC;AACzD,KAAI,SAAS,QAAQ,OAAO,UAAU,SACpC;CAGF,MAAM,kBAAmB,MAAmC;AAC5D,KAAI,OAAO,oBAAoB,SAC7B,QAAO;CAGT,MAAM,cAAe,MAA+B;AACpD,KAAI,OAAO,gBAAgB,SACzB,QAAO;CAGT,MAAM,WAAY,MAAiC;AACnD,KAAI,YAAY,QAAQ,OAAO,aAAa,SAC1C;CAGF,MAAM,iBAAkB,SAAkC;AAC1D,QAAO,OAAO,mBAAmB,WAAW,iBAAiB,KAAA;;;;;;;;;;;AAY/D,SAAgB,yBAAyB,OAAyB;CAChE,MAAM,aAAa,cAAc,MAAM;AAEvC,KAAI,cAAc,KAChB,QAAO,qBAAqB,IAAI,WAAW;CAG7C,MAAM,YAAY,cAAc,MAAM;AACtC,KAAI,aAAa,QAAQ,oBAAoB,IAAI,UAAU,CACzD,QAAO;CAGT,MAAM,eAAe,iBAAiB,MAAM;AAC5C,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO,yBAAyB,MAAK,YAAW,QAAQ,KAAK,aAAa,CAAC;;AAG7E,SAAS,eAAe,SAAyB;AAC/C,QAAO,MAAM,MAAM,UAAU;;;;;;;;;;;;;;;AAgB/B,SAAS,uBAAuB,OAAuB;AACrD,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,WAAW,mEAAmE;AAG1F,QAAO;;AAGT,SAAgB,kBAAkB,UAA8B,EAAE,EAAe;AAG/E,QAAO;EACL,aAHkB,uBAAuB,QAAQ,eAAe,EAAE;EAIlE,SAAS,QAAQ,WAAW;EAC5B,aAAa,QAAQ,eAAe;EACpC,OAAO,QAAQ,SAASA;EACzB;;;;;;;;;;;;AAaH,eAAsB,aAAgB,WAA6B,SAAsB,mBAAmB,EAAc;AACxH,MAAK,IAAI,UAAU,GAAG,WAAW,OAAO,aAAa,WAAW,EAC9D,KAAI;AACF,SAAO,MAAM,WAAW;UAEnB,OAAO;AACZ,MAAI,WAAW,OAAO,eAAe,CAAC,OAAO,YAAY,MAAM,CAC7D,OAAM;EAGR,MAAM,oBAAoB,OAAO,QAAQ,QAAQ;AACjD,MAAI,oBAAoB,EACtB,OAAM,OAAO,MAAM,kBAAkB;;AAK3C,OAAM,IAAI,MAAM,+CAA+C;;;;;;;;;;;;;ACnKjE,SAAgB,sBAAiC,UAAqB,UAA8B,EAAE,EAA8B;CAClI,MAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAO;EACL;EACA;EACA,eAAc,cAAa,aAAa,WAAW,YAAY;EAChE;;;;;;;;;;;;;AC6DH,SAAgB,0BAA0B,QAA4C;AACpF,QAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;;;;;;;;AASzD,SAAgB,oBACd,SAAiC,EAAE,EACnC,WAAwC,EAAE,EACrB;CACrB,MAAM,MAAM,OAAO,OAAO,QAAQ;CAClC,MAAM,eAAe,OAAO,UAAU;CACtC,MAAM,gBAAgB,OAAO,WAAW;CACxC,MAAM,cAAc,OAAO,SAAS;CAEpC,MAAM,kBAAkB;EACtB,GAAI,SAAS,UAAU,OAAO,EAAE,GAAG,GAAG,eAAe,SAAS,QAAQ;EACtE,GAAI,SAAS,WAAW,OAAO,EAAE,GAAG,GAAG,gBAAgB,SAAS,SAAS;EACzE,GAAI,SAAS,SAAS,OAAO,EAAE,GAAG,GAAG,cAAc,SAAS,OAAO;EACnE,GAAG;EACJ;CAED,MAAM,SAAS,gBAAgB,iBAAiB;EAC9C,MAAM;EACN,MAAM;EACP,CAAC;CACF,MAAM,QAAQ,gBAAgB,iBAAiB;EAC7C,MAAM;EACN,MAAM;EACP,CAAC;CACF,MAAM,UAAU,QAAQ,iBAAiB;EACvC,MAAM;EACN,MAAM;EACP,CAAC;AAGF,QAAO;EACL,SAHc,4BAA4B,QAAQ,SAAS,SAAS,aAAa;EAIjF;EACA;EACA;EACD;;;;;;;;;;;;AAaH,SAAgB,4BAA4B,QAAgB,SAAkB,eAAmC,EAAE,EAAyB;AAC1I,QAAO,sBAAsB,aAAa,QAAQ,QAAQ,EAAE,aAAa"}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { $ as
|
|
1
|
+
import { $ as InferenceExecutor, A as RunScheduledTasksOptions, B as asProjectRelativePath, F as CreateTaskExecutionContextOptions, G as AggregatedProviderSummary, H as CreateVievalRunnerRuntimeContextOptions, I as TaskExecutionContext, J as RunResult, K as AggregatedRunResults, L as createTaskExecutionContext, M as RunnerTaskState, N as ScheduledTaskExecutor, P as runScheduledTasks, Q as CreateRunnerScheduleOptions, U as RunnerRuntimeContext, V as collectEvalEntries, W as createRunnerRuntimeContext, X as RunScoreKind, Y as RunScore, Z as aggregateRunResults, at as ScheduledTaskMatrixMeta, ct as createFilesystemTaskCacheRuntime, dt as CacheFileOptions, et as RunnerMatrixDefinition, ft as CacheNamespace, it as ScheduledTaskMatrix, j as RunnerExecutionError, lt as normalizeCacheFilePathSegments, nt as RunnerMatrixSelection, ot as createRunnerSchedule, pt as TaskCacheRuntime, q as AggregatedRunSummary, rt as ScheduledTask, st as CreateFilesystemTaskCacheRuntimeOptions, tt as RunnerMatrixInput, ut as CacheFileHandle } from "../../index-BkjyCInx.mjs";
|
|
2
2
|
import { a as SchedulerMiddleware, c as SchedulerScopeContext, i as SchedulerConcurrencyConfig, n as getActiveScopes, o as SchedulerRuntime, r as CreateSchedulerRuntimeOptions, s as SchedulerScope, t as createSchedulerRuntime } from "../../index-fakXoZEe.mjs";
|
|
3
|
-
export { AggregatedProviderSummary, AggregatedRunResults, AggregatedRunSummary, CacheFileHandle, CacheFileOptions, CacheNamespace, CreateFilesystemTaskCacheRuntimeOptions, CreateRunnerScheduleOptions, CreateSchedulerRuntimeOptions, CreateTaskExecutionContextOptions, CreateVievalRunnerRuntimeContextOptions, InferenceExecutor, RunResult, RunScheduledTasksOptions, RunScore, RunScoreKind, RunnerExecutionError, RunnerMatrixDefinition, RunnerMatrixInput, RunnerMatrixSelection, RunnerRuntimeContext, RunnerTaskState, ScheduledTask, ScheduledTaskExecutor, ScheduledTaskMatrix, ScheduledTaskMatrixMeta, SchedulerConcurrencyConfig, SchedulerMiddleware, SchedulerRuntime, SchedulerScope, SchedulerScopeContext, TaskCacheRuntime, TaskExecutionContext,
|
|
3
|
+
export { AggregatedProviderSummary, AggregatedRunResults, AggregatedRunSummary, CacheFileHandle, CacheFileOptions, CacheNamespace, CreateFilesystemTaskCacheRuntimeOptions, CreateRunnerScheduleOptions, CreateSchedulerRuntimeOptions, CreateTaskExecutionContextOptions, CreateVievalRunnerRuntimeContextOptions, InferenceExecutor, RunResult, RunScheduledTasksOptions, RunScore, RunScoreKind, RunnerExecutionError, RunnerMatrixDefinition, RunnerMatrixInput, RunnerMatrixSelection, RunnerRuntimeContext, RunnerTaskState, ScheduledTask, ScheduledTaskExecutor, ScheduledTaskMatrix, ScheduledTaskMatrixMeta, SchedulerConcurrencyConfig, SchedulerMiddleware, SchedulerRuntime, SchedulerScope, SchedulerScopeContext, TaskCacheRuntime, TaskExecutionContext, aggregateRunResults, asProjectRelativePath, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createSchedulerRuntime, createTaskExecutionContext, getActiveScopes, normalizeCacheFilePathSegments, runScheduledTasks };
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { createSchedulerRuntime, getActiveScopes } from "../scheduler/index.mjs";
|
|
2
|
-
import { t as resolveModelByName } from "../../models-DIGdOUpJ.mjs";
|
|
3
2
|
import { createRequire } from "node:module";
|
|
4
3
|
import process from "node:process";
|
|
5
4
|
import { errorMessageFrom } from "@moeru/std";
|
|
@@ -304,7 +303,7 @@ function collectEvalEntries(modules, context) {
|
|
|
304
303
|
}
|
|
305
304
|
//#endregion
|
|
306
305
|
//#region src/core/runner/run.ts
|
|
307
|
-
function createDefaultExecutionContext(
|
|
306
|
+
function createDefaultExecutionContext() {
|
|
308
307
|
return {
|
|
309
308
|
cache: { namespace(name) {
|
|
310
309
|
return { file(options) {
|
|
@@ -312,11 +311,7 @@ function createDefaultExecutionContext(task) {
|
|
|
312
311
|
throw new Error(`Task cache runtime is not configured. Requested namespace "${name}" and key "${key}".`);
|
|
313
312
|
} };
|
|
314
313
|
} },
|
|
315
|
-
|
|
316
|
-
const requestedModelName = typeof options === "string" ? options : options?.name;
|
|
317
|
-
if (requestedModelName != null) throw new Error(`No model registry configured. Requested model: ${requestedModelName}`);
|
|
318
|
-
throw new Error(`No model registry configured for task inferenceExecutor id "${task.inferenceExecutor.id}".`);
|
|
319
|
-
}
|
|
314
|
+
models: []
|
|
320
315
|
};
|
|
321
316
|
}
|
|
322
317
|
/**
|
|
@@ -366,7 +361,7 @@ async function runScheduledTasks(tasks, executor, options = {}) {
|
|
|
366
361
|
async function executeScheduledTask(task) {
|
|
367
362
|
let executionContext;
|
|
368
363
|
try {
|
|
369
|
-
executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext(
|
|
364
|
+
executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext();
|
|
370
365
|
} catch (error) {
|
|
371
366
|
throw createRunnerExecutionError(task.id, error);
|
|
372
367
|
}
|
|
@@ -587,48 +582,19 @@ function createNoopTaskCacheRuntime() {
|
|
|
587
582
|
} };
|
|
588
583
|
} };
|
|
589
584
|
}
|
|
590
|
-
function resolveDefaultTaskModel(models, task) {
|
|
591
|
-
const runMatrixModelName = task.matrix.run.model;
|
|
592
|
-
if (runMatrixModelName != null) {
|
|
593
|
-
const matrixSelectedModel = resolveModelByName(models, runMatrixModelName);
|
|
594
|
-
if (matrixSelectedModel != null) return matrixSelectedModel;
|
|
595
|
-
throw new Error(`Unknown configured model "${runMatrixModelName}" from task.matrix.run.model.`);
|
|
596
|
-
}
|
|
597
|
-
const matched = resolveModelByName(models, task.inferenceExecutor.id);
|
|
598
|
-
if (matched != null) return matched;
|
|
599
|
-
if (models.length > 1) throw new Error([
|
|
600
|
-
`Multiple configured models are available, but no default model is selected for inferenceExecutor "${task.inferenceExecutor.id}".`,
|
|
601
|
-
"Select one model explicitly by either:",
|
|
602
|
-
"- setting runMatrix.override.model (or task matrix run.model)",
|
|
603
|
-
"- setting project.inferenceExecutors to a matching model id",
|
|
604
|
-
"- calling context.model({ name: \"your-model-id-or-alias\" })"
|
|
605
|
-
].join("\n"));
|
|
606
|
-
if (models.length === 1) {
|
|
607
|
-
const firstModel = models[0];
|
|
608
|
-
if (firstModel != null) return firstModel;
|
|
609
|
-
}
|
|
610
|
-
throw new Error(`No configured model found for inferenceExecutor id "${task.inferenceExecutor.id}".`);
|
|
611
|
-
}
|
|
612
585
|
/**
|
|
613
|
-
* Creates task-scoped
|
|
586
|
+
* Creates task-scoped context data for runner execution.
|
|
614
587
|
*
|
|
615
588
|
* Call stack:
|
|
616
589
|
*
|
|
617
590
|
* {@link runScheduledTasks}
|
|
618
591
|
* -> {@link createTaskExecutionContext}
|
|
619
|
-
* ->
|
|
620
|
-
* -> `task.model()` / `task.model({ name })`
|
|
592
|
+
* -> `TaskExecutionContext`
|
|
621
593
|
*/
|
|
622
594
|
function createTaskExecutionContext(options) {
|
|
623
595
|
return {
|
|
624
596
|
cache: options.cache ?? createNoopTaskCacheRuntime(),
|
|
625
|
-
|
|
626
|
-
if (selection == null) return resolveDefaultTaskModel(options.models, options.task);
|
|
627
|
-
const name = typeof selection === "string" ? selection : selection.name;
|
|
628
|
-
const namedModel = resolveModelByName(options.models, name);
|
|
629
|
-
if (namedModel == null) throw new Error(`Unknown configured model "${name}".`);
|
|
630
|
-
return namedModel;
|
|
631
|
-
}
|
|
597
|
+
models: options.models
|
|
632
598
|
};
|
|
633
599
|
}
|
|
634
600
|
//#endregion
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/cache/filesystem.ts","../../../src/core/runner/aggregate.ts","../../../src/core/runner/collect.ts","../../../src/core/runner/run.ts","../../../src/core/runner/runtime-context.ts","../../../src/core/runner/schedule.ts","../../../src/core/runner/task-context.ts"],"sourcesContent":["import type { CacheFileHandle, CacheFileOptions, CacheNamespace, TaskCacheRuntime } from './types'\n\nimport process from 'node:process'\n\nimport { Buffer } from 'node:buffer'\nimport { createReadStream, createWriteStream } from 'node:fs'\nimport { access, mkdir, readFile, rename, writeFile } from 'node:fs/promises'\nimport { dirname, join } from 'node:path'\n\n/**\n * Options for creating the filesystem-backed task cache runtime.\n */\nexport interface CreateFilesystemTaskCacheRuntimeOptions {\n /**\n * Absolute cache root directory.\n */\n cacheRootDirectory: string\n /**\n * Project identifier under one workspace cache scope.\n */\n projectName: string\n /**\n * Workspace identifier used to share cache roots across projects.\n */\n workspaceId: string\n}\n\nfunction sanitizePathSegment(value: string): string {\n const normalized = value.trim()\n if (normalized.length === 0) {\n return 'default'\n }\n\n return normalized.replace(/[^\\w.-]+/g, '-')\n}\n\nfunction normalizeExtension(extension: string | undefined, mediaType: string | undefined): string | undefined {\n if (extension != null && extension.length > 0) {\n return extension.startsWith('.') ? extension.slice(1) : extension\n }\n\n if (mediaType == null || mediaType.length === 0) {\n return undefined\n }\n\n if (mediaType === 'application/json') {\n return 'json'\n }\n\n if (mediaType === 'text/plain') {\n return 'txt'\n }\n\n if (mediaType === 'audio/wav') {\n return 'wav'\n }\n\n return undefined\n}\n\n/**\n * Normalizes cache file options into deterministic relative path segments.\n *\n * Before:\n * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`\n *\n * After:\n * - `['cases', 'dataset-hash', 'v1.json']`\n */\nexport function normalizeCacheFilePathSegments(options: CacheFileOptions): string[] {\n const sanitizedKey = options.key.map(segment => sanitizePathSegment(segment))\n const extension = normalizeExtension(options.ext, options.mediaType)\n\n if (sanitizedKey.length === 0) {\n return extension == null ? ['artifact'] : [`artifact.${extension}`]\n }\n\n if (extension == null) {\n return sanitizedKey\n }\n\n const withoutTail = sanitizedKey.slice(0, Math.max(0, sanitizedKey.length - 1))\n const tail = sanitizedKey[sanitizedKey.length - 1] ?? 'artifact'\n return [...withoutTail, `${tail}.${extension}`]\n}\n\nasync function writeAtomically(path: string, content: Buffer | string): Promise<void> {\n const directory = dirname(path)\n const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`\n await mkdir(directory, { recursive: true })\n await writeFile(temporaryPath, content)\n await rename(temporaryPath, path)\n}\n\nfunction createCacheFileHandle(path: string): CacheFileHandle {\n return {\n path,\n async exists() {\n try {\n await access(path)\n return true\n }\n catch {\n return false\n }\n },\n openReadStream() {\n return createReadStream(path)\n },\n async openWriteStream() {\n await mkdir(dirname(path), { recursive: true })\n return createWriteStream(path)\n },\n async readBuffer() {\n return await readFile(path)\n },\n async writeBuffer(value) {\n await writeAtomically(path, value)\n },\n async readText(encoding = 'utf-8') {\n return await readFile(path, encoding)\n },\n async writeText(value, encoding = 'utf-8') {\n await writeAtomically(path, Buffer.from(value, encoding))\n },\n async readJson<T>() {\n return JSON.parse(await readFile(path, 'utf-8')) as T\n },\n async writeJson(value) {\n await writeAtomically(path, `${JSON.stringify(value, null, 2)}\\n`)\n },\n async loadAsCasesInput<T>() {\n return await this.readJson<T[]>()\n },\n async loadAsExpectFixture<T>() {\n return await this.readJson<T>()\n },\n }\n}\n\nfunction createCacheNamespace(baseDirectory: string, namespace: string): CacheNamespace {\n return {\n file(options) {\n const relativePathSegments = normalizeCacheFilePathSegments(options)\n return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments))\n },\n }\n}\n\n/**\n * Creates a deterministic filesystem-backed task cache runtime.\n *\n * Use when:\n * - eval tasks need reproducible cache paths for expensive pre-processing outputs\n * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes\n *\n * Expects:\n * - `cacheRootDirectory` to be writable by the running process\n * - `workspaceId` + `projectName` to stay stable for reproducible paths\n *\n * Returns:\n * - task cache runtime that resolves namespaced file handles under:\n * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`\n */\nexport function createFilesystemTaskCacheRuntime(\n options: CreateFilesystemTaskCacheRuntimeOptions,\n): TaskCacheRuntime {\n const workspaceDirectory = sanitizePathSegment(options.workspaceId)\n const projectDirectory = sanitizePathSegment(options.projectName)\n const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory)\n\n return {\n namespace(name) {\n return createCacheNamespace(baseDirectory, name)\n },\n }\n}\n","import type { ScheduledTaskMatrix } from './schedule'\n\n/**\n * Identifies the scoring family for a single eval score.\n */\nexport type RunScoreKind = 'exact' | 'judge'\n\n/**\n * Represents one normalized score emitted by a completed eval run.\n */\nexport interface RunScore {\n /**\n * Score family used for aggregation.\n */\n kind: RunScoreKind\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n}\n\n/**\n * Captures the output of one scheduled runner task.\n */\nexport interface RunResult {\n /**\n * Stable run id, usually copied from the scheduled task id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Raw scores emitted by the eval.\n */\n scores: readonly RunScore[]\n}\n\n/**\n * Stores the per-run score averages after normalization.\n */\nexport interface AggregatedRunSummary {\n /**\n * Stable run id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Mean of exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average. Uses both families when present, otherwise falls back to the\n * single available family.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores inferenceExecutor-level score aggregates across multiple runs.\n */\nexport interface AggregatedProviderSummary {\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Number of runs included in this inferenceExecutor bucket.\n */\n runCount: number\n /**\n * Mean of all exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of all judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average derived from the inferenceExecutor exact and judge means.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores the final aggregation output for a batch of runner results.\n */\nexport interface AggregatedRunResults {\n /**\n * Per-run normalized score summaries.\n */\n runs: AggregatedRunSummary[]\n /**\n * Provider-level summaries sorted by inferenceExecutor id.\n */\n inferenceExecutors: AggregatedProviderSummary[]\n /**\n * Overall summary across every run.\n */\n overall: {\n exactAverage: number | null\n judgeAverage: number | null\n hybridAverage: number | null\n runCount: number\n }\n}\n\ninterface ScoreBuckets {\n exact: number[]\n judge: number[]\n}\n\nfunction cloneScheduledTaskMatrix(matrix: ScheduledTaskMatrix): ScheduledTaskMatrix {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction assertKnownScoreKind(kind: string): RunScoreKind {\n if (kind === 'exact' || kind === 'judge') {\n return kind\n }\n\n throw new TypeError(`Unknown eval score kind \"${kind}\".`)\n}\n\nfunction average(scores: readonly number[]): number | null {\n if (scores.length === 0) {\n return null\n }\n\n const total = scores.reduce((sum, score) => sum + score, 0)\n return total / scores.length\n}\n\nfunction createHybridAverage(exactAverage: number | null, judgeAverage: number | null): number | null {\n if (exactAverage != null && judgeAverage != null) {\n return (exactAverage + judgeAverage) / 2\n }\n\n if (exactAverage != null) {\n return exactAverage\n }\n\n if (judgeAverage != null) {\n return judgeAverage\n }\n\n return null\n}\n\nfunction collectScoreBuckets(scores: readonly RunScore[]): ScoreBuckets {\n const buckets: ScoreBuckets = {\n exact: [],\n judge: [],\n }\n\n for (const score of scores) {\n const kind = assertKnownScoreKind(score.kind)\n\n if (kind === 'exact') {\n buckets.exact.push(score.score)\n continue\n }\n\n buckets.judge.push(score.score)\n }\n\n return buckets\n}\n\nfunction createRunSummary(result: RunResult): AggregatedRunSummary {\n const buckets = collectScoreBuckets(result.scores)\n const exactAverage = average(buckets.exact)\n const judgeAverage = average(buckets.judge)\n\n return {\n entryId: result.entryId,\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n id: result.id,\n judgeAverage,\n matrix: cloneScheduledTaskMatrix(result.matrix),\n inferenceExecutorId: result.inferenceExecutorId,\n }\n}\n\nfunction createProviderSummary(inferenceExecutorId: string, results: readonly RunResult[]): AggregatedProviderSummary {\n const exactScores: number[] = []\n const judgeScores: number[] = []\n\n for (const result of results) {\n const buckets = collectScoreBuckets(result.scores)\n exactScores.push(...buckets.exact)\n judgeScores.push(...buckets.judge)\n }\n\n const exactAverage = average(exactScores)\n const judgeAverage = average(judgeScores)\n\n return {\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n judgeAverage,\n inferenceExecutorId,\n runCount: results.length,\n }\n}\n\n/**\n * Aggregates exact-match and judge-based scores into hybrid runner summaries.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link createRunSummary}\n * -> {@link createProviderSummary}\n * -> `report output`\n *\n * Use when:\n * - a runner batch mixes deterministic exact checks with judge-based grading\n * - inferenceExecutor comparison should preserve both score families and one hybrid view\n *\n * Expects:\n * - each score to be normalized to the `0..1` range before aggregation\n * - `scores.kind` to use only `'exact'` or `'judge'`\n */\nexport function aggregateRunResults(results: readonly RunResult[]): AggregatedRunResults {\n const runs = results.map(createRunSummary)\n\n const inferenceExecutorIds = Array.from(new Set(results.map(result => result.inferenceExecutorId)))\n const inferenceExecutors = inferenceExecutorIds\n .map((inferenceExecutorId) => {\n const providerResults = results.filter(result => result.inferenceExecutorId === inferenceExecutorId)\n return createProviderSummary(inferenceExecutorId, providerResults)\n })\n .sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId))\n\n const overall = createProviderSummary(\n 'overall',\n results,\n )\n\n return {\n overall: {\n exactAverage: overall.exactAverage,\n hybridAverage: overall.hybridAverage,\n judgeAverage: overall.judgeAverage,\n runCount: overall.runCount,\n },\n inferenceExecutors,\n runs,\n }\n}\n","import type { CollectedEvalEntry, EvalModule, EvalModuleMap } from '../../config'\nimport type { RunnerRuntimeContext } from './runtime-context'\n\nimport { basename, dirname, relative } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst evalFileSuffix = '.eval.ts'\nconst absolutePathPattern = /^(?:[A-Z]:\\/|\\/|\\\\\\\\)/i\n\nfunction normalizePath(value: string): string {\n return value.replaceAll('\\\\', '/')\n}\n\n/**\n * Converts a file path into a project-relative path when possible.\n *\n * Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n *\n * Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n */\nexport function asProjectRelativePath(filePath: string, context: RunnerRuntimeContext): string {\n const normalizedFilePath = normalizePath(filePath)\n const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory)\n const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\\//i)?.[0]\n const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\\//i)?.[0]\n\n if (filePathWindowsDrive != null && projectRootWindowsDrive == null) {\n return normalizedFilePath\n }\n\n if (\n filePathWindowsDrive != null\n && projectRootWindowsDrive != null\n && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()\n ) {\n return normalizedFilePath\n }\n\n const projectRootDirectory = context.projectRootDirectory\n const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath))\n\n if (!absolutePathPattern.test(relativeFilePath)) {\n if (relativeFilePath === '..') {\n return normalizePath(filePath)\n }\n\n if (!relativeFilePath.startsWith('../')) {\n return relativeFilePath\n }\n }\n\n return normalizePath(filePath)\n}\n\nfunction resolveModuleFilePath(moduleHref: string): string | null {\n if (!moduleHref.startsWith('file:')) {\n return null\n }\n\n try {\n return fileURLToPath(moduleHref)\n }\n catch {\n return null\n }\n}\n\nfunction createCollectedEvalEntry(\n moduleHref: string,\n moduleDefinition: EvalModule,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry | null {\n const filePath = resolveModuleFilePath(moduleHref)\n\n if (!filePath) {\n return null\n }\n\n const relativeFilePath = asProjectRelativePath(filePath, context)\n\n if (!relativeFilePath.endsWith(evalFileSuffix)) {\n return null\n }\n\n const entryName = basename(relativeFilePath, evalFileSuffix)\n\n if (entryName.length === 0) {\n return null\n }\n\n const relativeDirectory = dirname(relativeFilePath)\n const directory = relativeDirectory === '.' ? '' : relativeDirectory\n\n return {\n ...moduleDefinition.default,\n directory,\n filePath,\n id: directory.length === 0 ? entryName : `${directory}/${entryName}`,\n name: entryName,\n }\n}\n\n/**\n * Collects loaded vieval modules into sorted runner entries with stable ids.\n *\n * Call stack:\n *\n * `import.meta.glob(...)`\n * -> {@link collectEvalEntries}\n * -> {@link createCollectedEvalEntry}\n * -> {@link CollectedEvalEntry}[]\n *\n * Use when:\n * - the runner has already loaded candidate eval modules\n * - downstream scheduling needs stable entry ids and directory metadata\n */\nexport function collectEvalEntries(\n modules: EvalModuleMap,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry[] {\n return Object.entries(modules)\n .flatMap(([moduleHref, moduleDefinition]) => {\n const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context)\n\n if (!entry) {\n return []\n }\n\n return [entry]\n })\n .sort((left, right) => left.id.localeCompare(right.id))\n}\n","import type { TaskCacheRuntime } from '../cache'\nimport type { AggregatedRunResults, RunResult } from './aggregate'\nimport type { ScheduledTask } from './schedule'\nimport type { TaskExecutionContext } from './task-context'\n\nimport { errorMessageFrom } from '@moeru/std'\nimport { limitConcurrency } from '@vitest/runner/utils'\n\nimport { aggregateRunResults } from './aggregate'\n\n/**\n * Executes one scheduled runner task and returns a normalized run result.\n *\n * Use when:\n * - a scheduler already selected the task and execution context\n * - the caller wants a typed executor contract for runner workers\n *\n * Expects:\n * - the task context to be ready for model resolution and task-scoped work\n *\n * Returns:\n * - a normalized run result with score entries ready for aggregation\n */\nexport type ScheduledTaskExecutor = (\n task: ScheduledTask,\n context: TaskExecutionContext,\n) => Promise<RunResult>\n\n/**\n * Terminal task state reported by runner lifecycle hooks.\n *\n * Use when:\n * - reporting the outcome of one scheduled task to lifecycle observers\n *\n * Expects:\n * - hooks treat the value as final for the completed task\n */\nexport type RunnerTaskState = 'passed' | 'failed'\n\n/**\n * Optional runner execution hooks used while processing scheduled tasks.\n *\n * Use when:\n * - callers want lifecycle visibility around sequential task execution\n * - task execution should remain deterministic while still observable\n *\n * Expects:\n * - hook functions are synchronous lifecycle observers\n */\nexport interface RunScheduledTasksOptions {\n /**\n * Creates per-task execution context.\n *\n * Use when:\n * - executor code needs per-task model resolution or other task-scoped data\n */\n createExecutionContext?: (task: ScheduledTask) => TaskExecutionContext\n /**\n * Runs before the executor starts handling a task.\n *\n * Use when:\n * - callers want to observe task activation before execution begins\n *\n * Expects:\n * - thrown errors abort the task before executor work starts\n */\n onTaskStart?: (task: ScheduledTask) => void\n /**\n * Runs after the executor settles for a task.\n *\n * Use when:\n * - callers want to observe successful and failed task completion\n *\n * Expects:\n * - thrown errors abort successful runs\n * - failed-task observers do not override the executor error for the task\n */\n onTaskEnd?: (task: ScheduledTask, state: RunnerTaskState) => void\n /**\n * Maximum number of tasks to execute concurrently.\n *\n * @default 1\n */\n maxConcurrency?: number\n}\n\nfunction createDefaultExecutionContext(task: ScheduledTask): TaskExecutionContext {\n const cache: TaskCacheRuntime = {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n\n return {\n cache,\n model(options) {\n const requestedModelName = typeof options === 'string' ? options : options?.name\n if (requestedModelName != null) {\n throw new Error(`No model registry configured. Requested model: ${requestedModelName}`)\n }\n\n throw new Error(`No model registry configured for task inferenceExecutor id \"${task.inferenceExecutor.id}\".`)\n },\n }\n}\n\n/**\n * Error thrown when a scheduled run fails before producing a normalized result.\n */\nexport class RunnerExecutionError extends Error {\n /**\n * Stable task id that failed.\n */\n taskId: string\n\n constructor(taskId: string, cause: unknown) {\n const message = errorMessageFrom(cause) ?? 'Unknown runner execution failure.'\n super(`Runner task \"${taskId}\" failed: ${message}`)\n this.name = 'RunnerExecutionError'\n this.taskId = taskId\n this.cause = cause\n }\n}\n\nfunction createRunnerExecutionError(taskId: string, cause: unknown): RunnerExecutionError {\n if (cause instanceof RunnerExecutionError && cause.taskId === taskId) {\n return cause\n }\n\n return new RunnerExecutionError(taskId, cause)\n}\n\n/**\n * Executes runner tasks sequentially and aggregates the normalized results.\n *\n * Call stack:\n *\n * {@link createRunnerSchedule}\n * -> {@link runScheduledTasks}\n * -> `executor(task)`\n * -> {@link aggregateRunResults}\n *\n * Use when:\n * - the caller already expanded the runner matrix\n * - task execution should stay deterministic and easy to debug\n *\n * Expects:\n * - `executor` to return normalized `0..1` scores\n * - callers to handle concurrency outside this helper when needed\n * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers\n *\n * Throws:\n * - `RunnerExecutionError` when task setup, hooks, or the executor throws\n */\nexport async function runScheduledTasks(\n tasks: readonly ScheduledTask[],\n executor: ScheduledTaskExecutor,\n options: RunScheduledTasksOptions = {},\n): Promise<AggregatedRunResults> {\n if (tasks.length === 0) {\n return aggregateRunResults([])\n }\n\n async function executeScheduledTask(task: ScheduledTask): Promise<RunResult> {\n let executionContext: TaskExecutionContext\n\n try {\n executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskStart?.(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n let runResult: RunResult\n try {\n runResult = await executor(task, executionContext)\n }\n catch (error) {\n try {\n options.onTaskEnd?.(task, 'failed')\n }\n catch {\n // Failed-task observers must not mask the task execution failure.\n }\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskEnd?.(task, 'passed')\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n return runResult\n }\n\n const maxConcurrency = options.maxConcurrency ?? 1\n if (maxConcurrency <= 1) {\n const results: RunResult[] = []\n for (const task of tasks) {\n results.push(await executeScheduledTask(task))\n }\n return aggregateRunResults(results)\n }\n\n const runWithLimit = limitConcurrency(maxConcurrency)\n const resultPairs = await Promise.all(tasks.map(async (task, index) => {\n const result = await runWithLimit(async () => executeScheduledTask(task))\n return { index, result }\n }))\n\n const sortedResults = resultPairs\n .sort((left, right) => left.index - right.index)\n .map(item => item.result)\n\n return aggregateRunResults(sortedResults)\n}\n","import { createRequire } from 'node:module'\nimport { dirname } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst require = createRequire(import.meta.url)\n\n/**\n * Shared runtime context used by the vieval runner.\n *\n * Use when:\n * - runner services need stable path resolution without module-level side effects\n * - call sites want deterministic control over workspace root detection\n */\nexport interface RunnerRuntimeContext {\n /**\n * Absolute project root directory used for path normalization.\n */\n projectRootDirectory: string\n}\n\n/**\n * Options used to construct the runner runtime context.\n */\nexport interface CreateVievalRunnerRuntimeContextOptions {\n /**\n * Directory used to search for the nearest pnpm workspace.\n *\n * @default directory of this module file\n */\n cwd?: string\n /**\n * Absolute fallback directory when a pnpm workspace root is not found.\n *\n * @default package root directory (`packages/vieval`)\n */\n fallbackProjectRootDirectory?: string\n}\n\n/**\n * Creates a side-effect-free runtime context for runner path normalization.\n *\n * Call stack:\n *\n * {@link createRunnerRuntimeContext}\n * -> `findWorkspaceDir(cwd)`\n * -> `resolve projectRootDirectory`\n * -> `{ projectRootDirectory }`\n *\n * Use when:\n * - initializing runner infrastructure before collecting eval modules\n * - tests need deterministic root resolution behavior\n */\nexport async function createRunnerRuntimeContext(\n options: CreateVievalRunnerRuntimeContextOptions = {},\n): Promise<RunnerRuntimeContext> {\n const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url))\n const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory\n ?? fileURLToPath(new URL('../../../', import.meta.url))\n\n // NOTICE:\n // We use dynamic `require` here because `@pnpm/find-workspace-dir` is CommonJS.\n // Keeping this load inside the factory avoids module-level initialization side effects.\n const { findWorkspaceDir } = require('@pnpm/find-workspace-dir') as {\n findWorkspaceDir: (currentWorkingDirectory: string) => Promise<string | undefined>\n }\n\n // NOTICE:\n // Workspace discovery is required to keep collected eval ids stable when this\n // package is moved inside different monorepo layouts.\n const workspaceDirectory = await findWorkspaceDir(cwd)\n\n return {\n projectRootDirectory: workspaceDirectory ?? fallbackProjectRootDirectory,\n }\n}\n","import type { CollectedEvalEntry, MatrixDefinition, MatrixLayer, MatrixValue } from '../../config'\n\n/**\n * Describes the inferenceExecutor target for a scheduled eval run.\n */\nexport interface InferenceExecutor {\n /**\n * Stable inferenceExecutor identifier such as `openai:gpt-4.1-mini`.\n */\n id: string\n}\n\n/**\n * Stores the selected value for each matrix axis.\n */\nexport type RunnerMatrixSelection = Record<string, string>\n\n/**\n * Stores stable row ids for one resolved scheduled task matrix.\n */\nexport interface ScheduledTaskMatrixMeta {\n /**\n * Stable row id for the resolved run matrix selection.\n */\n runRowId: string\n /**\n * Stable row id for the resolved eval matrix selection.\n */\n evalRowId: string\n}\n\n/**\n * Stores the structured matrix payload for one scheduled task.\n */\nexport interface ScheduledTaskMatrix {\n /**\n * Runtime matrix selection visible to task code.\n */\n run: RunnerMatrixSelection\n /**\n * Eval-time matrix selection visible to task code.\n */\n eval: RunnerMatrixSelection\n /**\n * Stable row ids for both scopes.\n */\n meta: ScheduledTaskMatrixMeta\n}\n\n/**\n * Maps matrix axis names to the values that should be expanded.\n */\nexport type RunnerMatrixDefinition = MatrixDefinition\n\n/**\n * Accepts either flat axis definitions or one layered matrix object.\n */\nexport type RunnerMatrixInput = RunnerMatrixDefinition | MatrixLayer\n\nconst matrixLayerKeys = new Set(['disable', 'extend', 'override'])\nconst ambiguousMatrixDefinitionErrorMessage = 'Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.'\n\n/**\n * Represents one fully expanded runner task.\n */\nexport interface ScheduledTask {\n /**\n * Stable task id derived from the entry, inferenceExecutor, and matrix selection.\n */\n id: string\n /**\n * The collected eval entry to execute.\n */\n entry: CollectedEvalEntry\n /**\n * The inferenceExecutor selected for this task.\n */\n inferenceExecutor: InferenceExecutor\n /**\n * The concrete scoped matrix selection for this task.\n */\n matrix: ScheduledTaskMatrix\n}\n\n/**\n * Configures how the runner should expand its execution matrix.\n */\nexport interface CreateRunnerScheduleOptions {\n /**\n * Collected eval entries that should be scheduled.\n */\n entries: readonly CollectedEvalEntry[]\n /**\n * Providers that should run each entry.\n */\n inferenceExecutors: readonly InferenceExecutor[]\n /**\n * Optional run-time matrix axes expanded as a cartesian product.\n */\n runMatrix?: RunnerMatrixInput\n /**\n * Optional eval-time matrix axes expanded as a cartesian product.\n */\n evalMatrix?: RunnerMatrixInput\n}\n\nfunction encodeTaskIdSegment(value: string): string {\n return encodeURIComponent(value)\n}\n\nfunction stringifyMatrixValue(value: MatrixValue): string {\n return String(value)\n}\n\nfunction cloneMatrixSelection(matrix: RunnerMatrixSelection): RunnerMatrixSelection {\n return { ...matrix }\n}\n\nfunction createScheduledTaskMatrix(\n runMatrix: RunnerMatrixSelection,\n evalMatrix: RunnerMatrixSelection,\n): ScheduledTaskMatrix {\n return {\n eval: cloneMatrixSelection(evalMatrix),\n meta: {\n evalRowId: createStableRowId(evalMatrix),\n runRowId: createStableRowId(runMatrix),\n },\n run: cloneMatrixSelection(runMatrix),\n }\n}\n\nfunction isMatrixLayer(matrix: RunnerMatrixInput): matrix is MatrixLayer {\n const matrixKeys = Object.keys(matrix)\n return (\n matrixKeys.length > 0\n && matrixKeys.every(key => matrixLayerKeys.has(key))\n )\n}\n\nfunction assertNonAmbiguousMatrixDefinition(matrix: RunnerMatrixInput): void {\n const matrixKeys = Object.keys(matrix)\n const hasReservedKeys = matrixKeys.some(key => matrixLayerKeys.has(key))\n const hasAxisKeys = matrixKeys.some(key => !matrixLayerKeys.has(key))\n\n if (hasReservedKeys && hasAxisKeys) {\n throw new TypeError(ambiguousMatrixDefinitionErrorMessage)\n }\n}\n\nfunction normalizeLayerInputToAxes(matrix: RunnerMatrixInput | undefined): MatrixLayer | undefined {\n if (matrix == null) {\n return undefined\n }\n\n assertNonAmbiguousMatrixDefinition(matrix)\n\n if (isMatrixLayer(matrix)) {\n return matrix\n }\n\n return {\n extend: matrix,\n }\n}\n\nfunction dedupeAxisValues(values: readonly MatrixValue[]): string[] {\n return Array.from(new Set(values.map(stringifyMatrixValue)))\n}\n\nfunction applyAxisValues(\n axes: Map<string, string[]>,\n definition: RunnerMatrixDefinition | undefined,\n mode: 'extend' | 'override',\n): void {\n if (definition == null) {\n return\n }\n\n for (const [axis, values] of Object.entries(definition)) {\n const nextValues = dedupeAxisValues(values)\n\n if (mode === 'extend') {\n const existingValues = axes.get(axis) ?? []\n axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])))\n continue\n }\n\n axes.set(axis, nextValues)\n }\n}\n\nfunction applyLayer(\n baseAxes: ReadonlyMap<string, string[]>,\n layer: MatrixLayer | undefined,\n): Map<string, string[]> {\n const nextAxes = new Map<string, string[]>(\n Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]),\n )\n\n for (const axis of layer?.disable ?? []) {\n nextAxes.delete(axis)\n }\n\n applyAxisValues(nextAxes, layer?.extend, 'extend')\n applyAxisValues(nextAxes, layer?.override, 'override')\n\n return nextAxes\n}\n\nfunction expandAxesToRows(axes: ReadonlyMap<string, readonly string[]>): RunnerMatrixSelection[] {\n if (axes.size === 0) {\n return [{}]\n }\n\n const dimensions = Array.from(axes.entries())\n\n let selections: RunnerMatrixSelection[] = [{}]\n\n for (const [axis, values] of dimensions) {\n if (values.length === 0) {\n return []\n }\n\n const nextSelections: RunnerMatrixSelection[] = []\n\n for (const selection of selections) {\n for (const value of values) {\n nextSelections.push({\n ...selection,\n [axis]: value,\n })\n }\n }\n\n selections = nextSelections\n }\n\n return selections\n}\n\nfunction createStableRowId(matrix: RunnerMatrixSelection): string {\n const segments = Object.entries(matrix)\n .sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis))\n .map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`)\n\n if (segments.length === 0) {\n return 'default'\n }\n\n return segments.join('&')\n}\n\nfunction createTaskId(entryId: string, inferenceExecutorId: string, runRowId: string, evalRowId: string): string {\n const encodedEntryId = encodeTaskIdSegment(entryId)\n const encodedProviderId = encodeTaskIdSegment(inferenceExecutorId)\n\n return [\n encodedEntryId,\n encodedProviderId,\n `run=${encodeTaskIdSegment(runRowId)}`,\n `eval=${encodeTaskIdSegment(evalRowId)}`,\n ].join('::')\n}\n\nfunction createResolvedRunAxes(\n entry: CollectedEvalEntry,\n runMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n runMatrix,\n entry.matrix?.runMatrix,\n entry.task?.matrix?.runMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\nfunction createResolvedEvalAxes(\n entry: CollectedEvalEntry,\n evalMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n evalMatrix,\n entry.matrix?.evalMatrix,\n entry.task?.matrix?.evalMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\n/**\n * Expands collected entries into a stable runner schedule.\n *\n * Call stack:\n *\n * {@link collectEvalEntries} (`../runner`)\n * -> {@link createRunnerSchedule}\n * -> {@link expandAxesToRows}\n * -> {@link ScheduledTask}[]\n *\n * Use when:\n * - the runner already knows which eval entries are available\n * - each entry must run against multiple inferenceExecutors or matrix variants\n *\n * Expects:\n * - `entries` and `inferenceExecutors` to be provided in the desired execution order\n * - matrix axes to use insertion order when generating combinations\n */\nexport function createRunnerSchedule(options: CreateRunnerScheduleOptions): ScheduledTask[] {\n if (options.entries.length === 0) {\n return []\n }\n\n if (options.inferenceExecutors.length === 0) {\n return []\n }\n\n const tasks: ScheduledTask[] = []\n\n for (const entry of options.entries) {\n const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix))\n const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix))\n\n if (runSelections.length === 0 || evalSelections.length === 0) {\n continue\n }\n\n for (const inferenceExecutor of options.inferenceExecutors) {\n for (const runMatrix of runSelections) {\n for (const evalMatrix of evalSelections) {\n const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix)\n\n tasks.push({\n entry,\n id: createTaskId(\n entry.id,\n inferenceExecutor.id,\n isolatedMatrix.meta.runRowId,\n isolatedMatrix.meta.evalRowId,\n ),\n matrix: isolatedMatrix,\n inferenceExecutor,\n })\n }\n }\n }\n }\n\n return tasks\n}\n","import type { ModelDefinition } from '../../config/models'\nimport type { TaskCacheRuntime } from '../cache'\nimport type { ScheduledTask } from './schedule'\n\nimport { resolveModelByName } from '../../config/models'\n\n/**\n * Options for selecting a model from the execution context.\n */\nexport interface TaskModelSelectionOptions {\n /**\n * Model id or alias name.\n */\n name: string\n}\n\n/**\n * Task-scoped execution context exposed to runner executors.\n */\nexport interface TaskExecutionContext {\n /**\n * Deterministic cache runtime scoped to the current task project.\n */\n cache: TaskCacheRuntime\n /**\n * Resolves model configuration for the current task.\n *\n * Use when:\n * - no arguments are provided to use the model selected by run matrix/inferenceExecutor\n * - `name` is provided to resolve a specific model id or alias\n */\n model: (\n selection?: string | TaskModelSelectionOptions,\n ) => ModelDefinition\n}\n\n/**\n * Inputs used to build task execution context.\n */\nexport interface CreateTaskExecutionContextOptions {\n cache?: TaskCacheRuntime\n models: readonly ModelDefinition[]\n task: ScheduledTask\n}\n\nfunction createNoopTaskCacheRuntime(): TaskCacheRuntime {\n return {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n}\n\nfunction resolveDefaultTaskModel(\n models: readonly ModelDefinition[],\n task: ScheduledTask,\n): ModelDefinition {\n const runMatrixModelName = task.matrix.run.model\n if (runMatrixModelName != null) {\n const matrixSelectedModel = resolveModelByName(models, runMatrixModelName)\n if (matrixSelectedModel != null) {\n return matrixSelectedModel\n }\n\n throw new Error(`Unknown configured model \"${runMatrixModelName}\" from task.matrix.run.model.`)\n }\n\n const matched = resolveModelByName(models, task.inferenceExecutor.id)\n if (matched != null) {\n return matched\n }\n\n if (models.length > 1) {\n throw new Error(\n [\n `Multiple configured models are available, but no default model is selected for inferenceExecutor \"${task.inferenceExecutor.id}\".`,\n 'Select one model explicitly by either:',\n '- setting runMatrix.override.model (or task matrix run.model)',\n '- setting project.inferenceExecutors to a matching model id',\n '- calling context.model({ name: \"your-model-id-or-alias\" })',\n ].join('\\n'),\n )\n }\n\n if (models.length === 1) {\n const firstModel = models[0]\n if (firstModel != null) {\n return firstModel\n }\n }\n\n throw new Error(`No configured model found for inferenceExecutor id \"${task.inferenceExecutor.id}\".`)\n}\n\n/**\n * Creates task-scoped model resolver context for runner execution.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link createTaskExecutionContext}\n * -> {@link resolveModelByName}\n * -> `task.model()` / `task.model({ name })`\n */\nexport function createTaskExecutionContext(options: CreateTaskExecutionContextOptions): TaskExecutionContext {\n return {\n cache: options.cache ?? createNoopTaskCacheRuntime(),\n model(selection) {\n if (selection == null) {\n return resolveDefaultTaskModel(options.models, options.task)\n }\n\n const name = typeof selection === 'string' ? selection : selection.name\n\n const namedModel = resolveModelByName(options.models, name)\n if (namedModel == null) {\n throw new Error(`Unknown configured model \"${name}\".`)\n }\n\n return namedModel\n },\n }\n}\n"],"mappings":";;;;;;;;;;;;AA2BA,SAAS,oBAAoB,OAAuB;CAClD,MAAM,aAAa,MAAM,MAAM;AAC/B,KAAI,WAAW,WAAW,EACxB,QAAO;AAGT,QAAO,WAAW,QAAQ,aAAa,IAAI;;AAG7C,SAAS,mBAAmB,WAA+B,WAAmD;AAC5G,KAAI,aAAa,QAAQ,UAAU,SAAS,EAC1C,QAAO,UAAU,WAAW,IAAI,GAAG,UAAU,MAAM,EAAE,GAAG;AAG1D,KAAI,aAAa,QAAQ,UAAU,WAAW,EAC5C;AAGF,KAAI,cAAc,mBAChB,QAAO;AAGT,KAAI,cAAc,aAChB,QAAO;AAGT,KAAI,cAAc,YAChB,QAAO;;;;;;;;;;;AAeX,SAAgB,+BAA+B,SAAqC;CAClF,MAAM,eAAe,QAAQ,IAAI,KAAI,YAAW,oBAAoB,QAAQ,CAAC;CAC7E,MAAM,YAAY,mBAAmB,QAAQ,KAAK,QAAQ,UAAU;AAEpE,KAAI,aAAa,WAAW,EAC1B,QAAO,aAAa,OAAO,CAAC,WAAW,GAAG,CAAC,YAAY,YAAY;AAGrE,KAAI,aAAa,KACf,QAAO;CAGT,MAAM,cAAc,aAAa,MAAM,GAAG,KAAK,IAAI,GAAG,aAAa,SAAS,EAAE,CAAC;CAC/E,MAAM,OAAO,aAAa,aAAa,SAAS,MAAM;AACtD,QAAO,CAAC,GAAG,aAAa,GAAG,KAAK,GAAG,YAAY;;AAGjD,eAAe,gBAAgB,MAAc,SAAyC;CACpF,MAAM,YAAY,QAAQ,KAAK;CAC/B,MAAM,gBAAgB,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG,KAAK,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,SAAS,GAAG,CAAC,MAAM,GAAG,GAAG;AACzG,OAAM,MAAM,WAAW,EAAE,WAAW,MAAM,CAAC;AAC3C,OAAM,UAAU,eAAe,QAAQ;AACvC,OAAM,OAAO,eAAe,KAAK;;AAGnC,SAAS,sBAAsB,MAA+B;AAC5D,QAAO;EACL;EACA,MAAM,SAAS;AACb,OAAI;AACF,UAAM,OAAO,KAAK;AAClB,WAAO;WAEH;AACJ,WAAO;;;EAGX,iBAAiB;AACf,UAAO,iBAAiB,KAAK;;EAE/B,MAAM,kBAAkB;AACtB,SAAM,MAAM,QAAQ,KAAK,EAAE,EAAE,WAAW,MAAM,CAAC;AAC/C,UAAO,kBAAkB,KAAK;;EAEhC,MAAM,aAAa;AACjB,UAAO,MAAM,SAAS,KAAK;;EAE7B,MAAM,YAAY,OAAO;AACvB,SAAM,gBAAgB,MAAM,MAAM;;EAEpC,MAAM,SAAS,WAAW,SAAS;AACjC,UAAO,MAAM,SAAS,MAAM,SAAS;;EAEvC,MAAM,UAAU,OAAO,WAAW,SAAS;AACzC,SAAM,gBAAgB,MAAM,OAAO,KAAK,OAAO,SAAS,CAAC;;EAE3D,MAAM,WAAc;AAClB,UAAO,KAAK,MAAM,MAAM,SAAS,MAAM,QAAQ,CAAC;;EAElD,MAAM,UAAU,OAAO;AACrB,SAAM,gBAAgB,MAAM,GAAG,KAAK,UAAU,OAAO,MAAM,EAAE,CAAC,IAAI;;EAEpE,MAAM,mBAAsB;AAC1B,UAAO,MAAM,KAAK,UAAe;;EAEnC,MAAM,sBAAyB;AAC7B,UAAO,MAAM,KAAK,UAAa;;EAElC;;AAGH,SAAS,qBAAqB,eAAuB,WAAmC;AACtF,QAAO,EACL,KAAK,SAAS;EACZ,MAAM,uBAAuB,+BAA+B,QAAQ;AACpE,SAAO,sBAAsB,KAAK,eAAe,oBAAoB,UAAU,EAAE,GAAG,qBAAqB,CAAC;IAE7G;;;;;;;;;;;;;;;;;AAkBH,SAAgB,iCACd,SACkB;CAClB,MAAM,qBAAqB,oBAAoB,QAAQ,YAAY;CACnE,MAAM,mBAAmB,oBAAoB,QAAQ,YAAY;CACjE,MAAM,gBAAgB,KAAK,QAAQ,oBAAoB,oBAAoB,iBAAiB;AAE5F,QAAO,EACL,UAAU,MAAM;AACd,SAAO,qBAAqB,eAAe,KAAK;IAEnD;;;;ACvCH,SAAS,yBAAyB,QAAkD;AAClF,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,qBAAqB,MAA4B;AACxD,KAAI,SAAS,WAAW,SAAS,QAC/B,QAAO;AAGT,OAAM,IAAI,UAAU,4BAA4B,KAAK,IAAI;;AAG3D,SAAS,QAAQ,QAA0C;AACzD,KAAI,OAAO,WAAW,EACpB,QAAO;AAIT,QADc,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAC5C,OAAO;;AAGxB,SAAS,oBAAoB,cAA6B,cAA4C;AACpG,KAAI,gBAAgB,QAAQ,gBAAgB,KAC1C,SAAQ,eAAe,gBAAgB;AAGzC,KAAI,gBAAgB,KAClB,QAAO;AAGT,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO;;AAGT,SAAS,oBAAoB,QAA2C;CACtE,MAAM,UAAwB;EAC5B,OAAO,EAAE;EACT,OAAO,EAAE;EACV;AAED,MAAK,MAAM,SAAS,QAAQ;AAG1B,MAFa,qBAAqB,MAAM,KAAK,KAEhC,SAAS;AACpB,WAAQ,MAAM,KAAK,MAAM,MAAM;AAC/B;;AAGF,UAAQ,MAAM,KAAK,MAAM,MAAM;;AAGjC,QAAO;;AAGT,SAAS,iBAAiB,QAAyC;CACjE,MAAM,UAAU,oBAAoB,OAAO,OAAO;CAClD,MAAM,eAAe,QAAQ,QAAQ,MAAM;CAC3C,MAAM,eAAe,QAAQ,QAAQ,MAAM;AAE3C,QAAO;EACL,SAAS,OAAO;EAChB;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D,IAAI,OAAO;EACX;EACA,QAAQ,yBAAyB,OAAO,OAAO;EAC/C,qBAAqB,OAAO;EAC7B;;AAGH,SAAS,sBAAsB,qBAA6B,SAA0D;CACpH,MAAM,cAAwB,EAAE;CAChC,MAAM,cAAwB,EAAE;AAEhC,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,UAAU,oBAAoB,OAAO,OAAO;AAClD,cAAY,KAAK,GAAG,QAAQ,MAAM;AAClC,cAAY,KAAK,GAAG,QAAQ,MAAM;;CAGpC,MAAM,eAAe,QAAQ,YAAY;CACzC,MAAM,eAAe,QAAQ,YAAY;AAEzC,QAAO;EACL;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D;EACA;EACA,UAAU,QAAQ;EACnB;;;;;;;;;;;;;;;;;;;;;AAsBH,SAAgB,oBAAoB,SAAqD;CACvF,MAAM,OAAO,QAAQ,IAAI,iBAAiB;CAG1C,MAAM,qBADuB,MAAM,KAAK,IAAI,IAAI,QAAQ,KAAI,WAAU,OAAO,oBAAoB,CAAC,CAAC,CAEhG,KAAK,wBAAwB;AAE5B,SAAO,sBAAsB,qBADL,QAAQ,QAAO,WAAU,OAAO,wBAAwB,oBAAoB,CAClC;GAClE,CACD,MAAM,MAAM,UAAU,KAAK,oBAAoB,cAAc,MAAM,oBAAoB,CAAC;CAE3F,MAAM,UAAU,sBACd,WACA,QACD;AAED,QAAO;EACL,SAAS;GACP,cAAc,QAAQ;GACtB,eAAe,QAAQ;GACvB,cAAc,QAAQ;GACtB,UAAU,QAAQ;GACnB;EACD;EACA;EACD;;;;ACvRH,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAE5B,SAAS,cAAc,OAAuB;AAC5C,QAAO,MAAM,WAAW,MAAM,IAAI;;;;;;;;;;;AAYpC,SAAgB,sBAAsB,UAAkB,SAAuC;CAC7F,MAAM,qBAAqB,cAAc,SAAS;CAClD,MAAM,iCAAiC,cAAc,QAAQ,qBAAqB;CAClF,MAAM,uBAAuB,mBAAmB,MAAM,aAAa,GAAG;CACtE,MAAM,0BAA0B,+BAA+B,MAAM,aAAa,GAAG;AAErF,KAAI,wBAAwB,QAAQ,2BAA2B,KAC7D,QAAO;AAGT,KACE,wBAAwB,QACrB,2BAA2B,QAC3B,qBAAqB,aAAa,KAAK,wBAAwB,aAAa,CAE/E,QAAO;CAGT,MAAM,uBAAuB,QAAQ;CACrC,MAAM,mBAAmB,cAAc,SAAS,sBAAsB,SAAS,CAAC;AAEhF,KAAI,CAAC,oBAAoB,KAAK,iBAAiB,EAAE;AAC/C,MAAI,qBAAqB,KACvB,QAAO,cAAc,SAAS;AAGhC,MAAI,CAAC,iBAAiB,WAAW,MAAM,CACrC,QAAO;;AAIX,QAAO,cAAc,SAAS;;AAGhC,SAAS,sBAAsB,YAAmC;AAChE,KAAI,CAAC,WAAW,WAAW,QAAQ,CACjC,QAAO;AAGT,KAAI;AACF,SAAO,cAAc,WAAW;SAE5B;AACJ,SAAO;;;AAIX,SAAS,yBACP,YACA,kBACA,SAC2B;CAC3B,MAAM,WAAW,sBAAsB,WAAW;AAElD,KAAI,CAAC,SACH,QAAO;CAGT,MAAM,mBAAmB,sBAAsB,UAAU,QAAQ;AAEjE,KAAI,CAAC,iBAAiB,SAAS,eAAe,CAC5C,QAAO;CAGT,MAAM,YAAY,SAAS,kBAAkB,eAAe;AAE5D,KAAI,UAAU,WAAW,EACvB,QAAO;CAGT,MAAM,oBAAoB,QAAQ,iBAAiB;CACnD,MAAM,YAAY,sBAAsB,MAAM,KAAK;AAEnD,QAAO;EACL,GAAG,iBAAiB;EACpB;EACA;EACA,IAAI,UAAU,WAAW,IAAI,YAAY,GAAG,UAAU,GAAG;EACzD,MAAM;EACP;;;;;;;;;;;;;;;;AAiBH,SAAgB,mBACd,SACA,SACsB;AACtB,QAAO,OAAO,QAAQ,QAAQ,CAC3B,SAAS,CAAC,YAAY,sBAAsB;EAC3C,MAAM,QAAQ,yBAAyB,YAAY,kBAAkB,QAAQ;AAE7E,MAAI,CAAC,MACH,QAAO,EAAE;AAGX,SAAO,CAAC,MAAM;GACd,CACD,MAAM,MAAM,UAAU,KAAK,GAAG,cAAc,MAAM,GAAG,CAAC;;;;AC9C3D,SAAS,8BAA8B,MAA2C;AAYhF,QAAO;EACL,OAZ8B,EAC9B,UAAU,MAAM;AACd,UAAO,EACL,KAAK,SAAS;IACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,UAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;MAE3G;KAEJ;EAIC,MAAM,SAAS;GACb,MAAM,qBAAqB,OAAO,YAAY,WAAW,UAAU,SAAS;AAC5E,OAAI,sBAAsB,KACxB,OAAM,IAAI,MAAM,kDAAkD,qBAAqB;AAGzF,SAAM,IAAI,MAAM,+DAA+D,KAAK,kBAAkB,GAAG,IAAI;;EAEhH;;;;;AAMH,IAAa,uBAAb,cAA0C,MAAM;;;;CAI9C;CAEA,YAAY,QAAgB,OAAgB;EAC1C,MAAM,UAAU,iBAAiB,MAAM,IAAI;AAC3C,QAAM,gBAAgB,OAAO,YAAY,UAAU;AACnD,OAAK,OAAO;AACZ,OAAK,SAAS;AACd,OAAK,QAAQ;;;AAIjB,SAAS,2BAA2B,QAAgB,OAAsC;AACxF,KAAI,iBAAiB,wBAAwB,MAAM,WAAW,OAC5D,QAAO;AAGT,QAAO,IAAI,qBAAqB,QAAQ,MAAM;;;;;;;;;;;;;;;;;;;;;;;;AAyBhD,eAAsB,kBACpB,OACA,UACA,UAAoC,EAAE,EACP;AAC/B,KAAI,MAAM,WAAW,EACnB,QAAO,oBAAoB,EAAE,CAAC;CAGhC,eAAe,qBAAqB,MAAyC;EAC3E,IAAI;AAEJ,MAAI;AACF,sBAAmB,QAAQ,yBAAyB,KAAK,IAAI,8BAA8B,KAAK;WAE3F,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,cAAc,KAAK;WAEtB,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;EAGlD,IAAI;AACJ,MAAI;AACF,eAAY,MAAM,SAAS,MAAM,iBAAiB;WAE7C,OAAO;AACZ,OAAI;AACF,YAAQ,YAAY,MAAM,SAAS;WAE/B;AAGN,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,YAAY,MAAM,SAAS;WAE9B,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,SAAO;;CAGT,MAAM,iBAAiB,QAAQ,kBAAkB;AACjD,KAAI,kBAAkB,GAAG;EACvB,MAAM,UAAuB,EAAE;AAC/B,OAAK,MAAM,QAAQ,MACjB,SAAQ,KAAK,MAAM,qBAAqB,KAAK,CAAC;AAEhD,SAAO,oBAAoB,QAAQ;;CAGrC,MAAM,eAAe,iBAAiB,eAAe;AAUrD,QAAO,qBATa,MAAM,QAAQ,IAAI,MAAM,IAAI,OAAO,MAAM,UAAU;AAErE,SAAO;GAAE;GAAO,QADD,MAAM,aAAa,YAAY,qBAAqB,KAAK,CAAC;GACjD;GACxB,CAAC,EAGA,MAAM,MAAM,UAAU,KAAK,QAAQ,MAAM,MAAM,CAC/C,KAAI,SAAQ,KAAK,OAAO,CAEc;;;;AChO3C,MAAM,UAAU,cAAc,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;AAgD9C,eAAsB,2BACpB,UAAmD,EAAE,EACtB;CAC/B,MAAM,MAAM,QAAQ,OAAO,QAAQ,cAAc,OAAO,KAAK,IAAI,CAAC;CAClE,MAAM,+BAA+B,QAAQ,gCACxC,cAAc,IAAI,IAAI,aAAa,OAAO,KAAK,IAAI,CAAC;CAKzD,MAAM,EAAE,qBAAqB,QAAQ,2BAA2B;AAShE,QAAO,EACL,sBAHyB,MAAM,iBAAiB,IAAI,IAGR,8BAC7C;;;;ACdH,MAAM,kBAAkB,IAAI,IAAI;CAAC;CAAW;CAAU;CAAW,CAAC;AAClE,MAAM,wCAAwC;AA8C9C,SAAS,oBAAoB,OAAuB;AAClD,QAAO,mBAAmB,MAAM;;AAGlC,SAAS,qBAAqB,OAA4B;AACxD,QAAO,OAAO,MAAM;;AAGtB,SAAS,qBAAqB,QAAsD;AAClF,QAAO,EAAE,GAAG,QAAQ;;AAGtB,SAAS,0BACP,WACA,YACqB;AACrB,QAAO;EACL,MAAM,qBAAqB,WAAW;EACtC,MAAM;GACJ,WAAW,kBAAkB,WAAW;GACxC,UAAU,kBAAkB,UAAU;GACvC;EACD,KAAK,qBAAqB,UAAU;EACrC;;AAGH,SAAS,cAAc,QAAkD;CACvE,MAAM,aAAa,OAAO,KAAK,OAAO;AACtC,QACE,WAAW,SAAS,KACjB,WAAW,OAAM,QAAO,gBAAgB,IAAI,IAAI,CAAC;;AAIxD,SAAS,mCAAmC,QAAiC;CAC3E,MAAM,aAAa,OAAO,KAAK,OAAO;CACtC,MAAM,kBAAkB,WAAW,MAAK,QAAO,gBAAgB,IAAI,IAAI,CAAC;CACxE,MAAM,cAAc,WAAW,MAAK,QAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC;AAErE,KAAI,mBAAmB,YACrB,OAAM,IAAI,UAAU,sCAAsC;;AAI9D,SAAS,0BAA0B,QAAgE;AACjG,KAAI,UAAU,KACZ;AAGF,oCAAmC,OAAO;AAE1C,KAAI,cAAc,OAAO,CACvB,QAAO;AAGT,QAAO,EACL,QAAQ,QACT;;AAGH,SAAS,iBAAiB,QAA0C;AAClE,QAAO,MAAM,KAAK,IAAI,IAAI,OAAO,IAAI,qBAAqB,CAAC,CAAC;;AAG9D,SAAS,gBACP,MACA,YACA,MACM;AACN,KAAI,cAAc,KAChB;AAGF,MAAK,MAAM,CAAC,MAAM,WAAW,OAAO,QAAQ,WAAW,EAAE;EACvD,MAAM,aAAa,iBAAiB,OAAO;AAE3C,MAAI,SAAS,UAAU;GACrB,MAAM,iBAAiB,KAAK,IAAI,KAAK,IAAI,EAAE;AAC3C,QAAK,IAAI,MAAM,MAAM,KAAK,IAAI,IAAI,CAAC,GAAG,gBAAgB,GAAG,WAAW,CAAC,CAAC,CAAC;AACvE;;AAGF,OAAK,IAAI,MAAM,WAAW;;;AAI9B,SAAS,WACP,UACA,OACuB;CACvB,MAAM,WAAW,IAAI,IACnB,MAAM,KAAK,SAAS,SAAS,CAAC,CAAC,KAAK,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAC5E;AAED,MAAK,MAAM,QAAQ,OAAO,WAAW,EAAE,CACrC,UAAS,OAAO,KAAK;AAGvB,iBAAgB,UAAU,OAAO,QAAQ,SAAS;AAClD,iBAAgB,UAAU,OAAO,UAAU,WAAW;AAEtD,QAAO;;AAGT,SAAS,iBAAiB,MAAuE;AAC/F,KAAI,KAAK,SAAS,EAChB,QAAO,CAAC,EAAE,CAAC;CAGb,MAAM,aAAa,MAAM,KAAK,KAAK,SAAS,CAAC;CAE7C,IAAI,aAAsC,CAAC,EAAE,CAAC;AAE9C,MAAK,MAAM,CAAC,MAAM,WAAW,YAAY;AACvC,MAAI,OAAO,WAAW,EACpB,QAAO,EAAE;EAGX,MAAM,iBAA0C,EAAE;AAElD,OAAK,MAAM,aAAa,WACtB,MAAK,MAAM,SAAS,OAClB,gBAAe,KAAK;GAClB,GAAG;IACF,OAAO;GACT,CAAC;AAIN,eAAa;;AAGf,QAAO;;AAGT,SAAS,kBAAkB,QAAuC;CAChE,MAAM,WAAW,OAAO,QAAQ,OAAO,CACpC,MAAM,CAAC,WAAW,CAAC,eAAe,SAAS,cAAc,UAAU,CAAC,CACpE,KAAK,CAAC,MAAM,WAAW,GAAG,oBAAoB,KAAK,CAAC,GAAG,oBAAoB,MAAM,GAAG;AAEvF,KAAI,SAAS,WAAW,EACtB,QAAO;AAGT,QAAO,SAAS,KAAK,IAAI;;AAG3B,SAAS,aAAa,SAAiB,qBAA6B,UAAkB,WAA2B;AAI/G,QAAO;EAHgB,oBAAoB,QAAQ;EACzB,oBAAoB,oBAAoB;EAKhE,OAAO,oBAAoB,SAAS;EACpC,QAAQ,oBAAoB,UAAU;EACvC,CAAC,KAAK,KAAK;;AAGd,SAAS,sBACP,OACA,WACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;AAGT,SAAS,uBACP,OACA,YACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;;;;;;;;;;;;;;;;;;;AAqBT,SAAgB,qBAAqB,SAAuD;AAC1F,KAAI,QAAQ,QAAQ,WAAW,EAC7B,QAAO,EAAE;AAGX,KAAI,QAAQ,mBAAmB,WAAW,EACxC,QAAO,EAAE;CAGX,MAAM,QAAyB,EAAE;AAEjC,MAAK,MAAM,SAAS,QAAQ,SAAS;EACnC,MAAM,gBAAgB,iBAAiB,sBAAsB,OAAO,QAAQ,UAAU,CAAC;EACvF,MAAM,iBAAiB,iBAAiB,uBAAuB,OAAO,QAAQ,WAAW,CAAC;AAE1F,MAAI,cAAc,WAAW,KAAK,eAAe,WAAW,EAC1D;AAGF,OAAK,MAAM,qBAAqB,QAAQ,mBACtC,MAAK,MAAM,aAAa,cACtB,MAAK,MAAM,cAAc,gBAAgB;GACvC,MAAM,iBAAiB,0BAA0B,WAAW,WAAW;AAEvE,SAAM,KAAK;IACT;IACA,IAAI,aACF,MAAM,IACN,kBAAkB,IAClB,eAAe,KAAK,UACpB,eAAe,KAAK,UACrB;IACD,QAAQ;IACR;IACD,CAAC;;;AAMV,QAAO;;;;ACxTT,SAAS,6BAA+C;AACtD,QAAO,EACL,UAAU,MAAM;AACd,SAAO,EACL,KAAK,SAAS;GACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,SAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;KAE3G;IAEJ;;AAGH,SAAS,wBACP,QACA,MACiB;CACjB,MAAM,qBAAqB,KAAK,OAAO,IAAI;AAC3C,KAAI,sBAAsB,MAAM;EAC9B,MAAM,sBAAsB,mBAAmB,QAAQ,mBAAmB;AAC1E,MAAI,uBAAuB,KACzB,QAAO;AAGT,QAAM,IAAI,MAAM,6BAA6B,mBAAmB,+BAA+B;;CAGjG,MAAM,UAAU,mBAAmB,QAAQ,KAAK,kBAAkB,GAAG;AACrE,KAAI,WAAW,KACb,QAAO;AAGT,KAAI,OAAO,SAAS,EAClB,OAAM,IAAI,MACR;EACE,qGAAqG,KAAK,kBAAkB,GAAG;EAC/H;EACA;EACA;EACA;EACD,CAAC,KAAK,KAAK,CACb;AAGH,KAAI,OAAO,WAAW,GAAG;EACvB,MAAM,aAAa,OAAO;AAC1B,MAAI,cAAc,KAChB,QAAO;;AAIX,OAAM,IAAI,MAAM,uDAAuD,KAAK,kBAAkB,GAAG,IAAI;;;;;;;;;;;;AAavG,SAAgB,2BAA2B,SAAkE;AAC3G,QAAO;EACL,OAAO,QAAQ,SAAS,4BAA4B;EACpD,MAAM,WAAW;AACf,OAAI,aAAa,KACf,QAAO,wBAAwB,QAAQ,QAAQ,QAAQ,KAAK;GAG9D,MAAM,OAAO,OAAO,cAAc,WAAW,YAAY,UAAU;GAEnE,MAAM,aAAa,mBAAmB,QAAQ,QAAQ,KAAK;AAC3D,OAAI,cAAc,KAChB,OAAM,IAAI,MAAM,6BAA6B,KAAK,IAAI;AAGxD,UAAO;;EAEV"}
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/cache/filesystem.ts","../../../src/core/runner/aggregate.ts","../../../src/core/runner/collect.ts","../../../src/core/runner/run.ts","../../../src/core/runner/runtime-context.ts","../../../src/core/runner/schedule.ts","../../../src/core/runner/task-context.ts"],"sourcesContent":["import type { CacheFileHandle, CacheFileOptions, CacheNamespace, TaskCacheRuntime } from './types'\n\nimport process from 'node:process'\n\nimport { Buffer } from 'node:buffer'\nimport { createReadStream, createWriteStream } from 'node:fs'\nimport { access, mkdir, readFile, rename, writeFile } from 'node:fs/promises'\nimport { dirname, join } from 'node:path'\n\n/**\n * Options for creating the filesystem-backed task cache runtime.\n */\nexport interface CreateFilesystemTaskCacheRuntimeOptions {\n /**\n * Absolute cache root directory.\n */\n cacheRootDirectory: string\n /**\n * Project identifier under one workspace cache scope.\n */\n projectName: string\n /**\n * Workspace identifier used to share cache roots across projects.\n */\n workspaceId: string\n}\n\nfunction sanitizePathSegment(value: string): string {\n const normalized = value.trim()\n if (normalized.length === 0) {\n return 'default'\n }\n\n return normalized.replace(/[^\\w.-]+/g, '-')\n}\n\nfunction normalizeExtension(extension: string | undefined, mediaType: string | undefined): string | undefined {\n if (extension != null && extension.length > 0) {\n return extension.startsWith('.') ? extension.slice(1) : extension\n }\n\n if (mediaType == null || mediaType.length === 0) {\n return undefined\n }\n\n if (mediaType === 'application/json') {\n return 'json'\n }\n\n if (mediaType === 'text/plain') {\n return 'txt'\n }\n\n if (mediaType === 'audio/wav') {\n return 'wav'\n }\n\n return undefined\n}\n\n/**\n * Normalizes cache file options into deterministic relative path segments.\n *\n * Before:\n * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`\n *\n * After:\n * - `['cases', 'dataset-hash', 'v1.json']`\n */\nexport function normalizeCacheFilePathSegments(options: CacheFileOptions): string[] {\n const sanitizedKey = options.key.map(segment => sanitizePathSegment(segment))\n const extension = normalizeExtension(options.ext, options.mediaType)\n\n if (sanitizedKey.length === 0) {\n return extension == null ? ['artifact'] : [`artifact.${extension}`]\n }\n\n if (extension == null) {\n return sanitizedKey\n }\n\n const withoutTail = sanitizedKey.slice(0, Math.max(0, sanitizedKey.length - 1))\n const tail = sanitizedKey[sanitizedKey.length - 1] ?? 'artifact'\n return [...withoutTail, `${tail}.${extension}`]\n}\n\nasync function writeAtomically(path: string, content: Buffer | string): Promise<void> {\n const directory = dirname(path)\n const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`\n await mkdir(directory, { recursive: true })\n await writeFile(temporaryPath, content)\n await rename(temporaryPath, path)\n}\n\nfunction createCacheFileHandle(path: string): CacheFileHandle {\n return {\n path,\n async exists() {\n try {\n await access(path)\n return true\n }\n catch {\n return false\n }\n },\n openReadStream() {\n return createReadStream(path)\n },\n async openWriteStream() {\n await mkdir(dirname(path), { recursive: true })\n return createWriteStream(path)\n },\n async readBuffer() {\n return await readFile(path)\n },\n async writeBuffer(value) {\n await writeAtomically(path, value)\n },\n async readText(encoding = 'utf-8') {\n return await readFile(path, encoding)\n },\n async writeText(value, encoding = 'utf-8') {\n await writeAtomically(path, Buffer.from(value, encoding))\n },\n async readJson<T>() {\n return JSON.parse(await readFile(path, 'utf-8')) as T\n },\n async writeJson(value) {\n await writeAtomically(path, `${JSON.stringify(value, null, 2)}\\n`)\n },\n async loadAsCasesInput<T>() {\n return await this.readJson<T[]>()\n },\n async loadAsExpectFixture<T>() {\n return await this.readJson<T>()\n },\n }\n}\n\nfunction createCacheNamespace(baseDirectory: string, namespace: string): CacheNamespace {\n return {\n file(options) {\n const relativePathSegments = normalizeCacheFilePathSegments(options)\n return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments))\n },\n }\n}\n\n/**\n * Creates a deterministic filesystem-backed task cache runtime.\n *\n * Use when:\n * - eval tasks need reproducible cache paths for expensive pre-processing outputs\n * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes\n *\n * Expects:\n * - `cacheRootDirectory` to be writable by the running process\n * - `workspaceId` + `projectName` to stay stable for reproducible paths\n *\n * Returns:\n * - task cache runtime that resolves namespaced file handles under:\n * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`\n */\nexport function createFilesystemTaskCacheRuntime(\n options: CreateFilesystemTaskCacheRuntimeOptions,\n): TaskCacheRuntime {\n const workspaceDirectory = sanitizePathSegment(options.workspaceId)\n const projectDirectory = sanitizePathSegment(options.projectName)\n const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory)\n\n return {\n namespace(name) {\n return createCacheNamespace(baseDirectory, name)\n },\n }\n}\n","import type { ScheduledTaskMatrix } from './schedule'\n\n/**\n * Identifies the scoring family for a single eval score.\n */\nexport type RunScoreKind = 'exact' | 'judge'\n\n/**\n * Represents one normalized score emitted by a completed eval run.\n */\nexport interface RunScore {\n /**\n * Score family used for aggregation.\n */\n kind: RunScoreKind\n /**\n * Normalized score in the `0..1` range.\n */\n score: number\n}\n\n/**\n * Captures the output of one scheduled runner task.\n */\nexport interface RunResult {\n /**\n * Stable run id, usually copied from the scheduled task id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Raw scores emitted by the eval.\n */\n scores: readonly RunScore[]\n}\n\n/**\n * Stores the per-run score averages after normalization.\n */\nexport interface AggregatedRunSummary {\n /**\n * Stable run id.\n */\n id: string\n /**\n * Collected eval entry id.\n */\n entryId: string\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Concrete matrix selection used by the run.\n */\n matrix: ScheduledTaskMatrix\n /**\n * Mean of exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average. Uses both families when present, otherwise falls back to the\n * single available family.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores inferenceExecutor-level score aggregates across multiple runs.\n */\nexport interface AggregatedProviderSummary {\n /**\n * Stable inferenceExecutor id.\n */\n inferenceExecutorId: string\n /**\n * Number of runs included in this inferenceExecutor bucket.\n */\n runCount: number\n /**\n * Mean of all exact-match scores or `null` when absent.\n */\n exactAverage: number | null\n /**\n * Mean of all judge-based scores or `null` when absent.\n */\n judgeAverage: number | null\n /**\n * Hybrid average derived from the inferenceExecutor exact and judge means.\n */\n hybridAverage: number | null\n}\n\n/**\n * Stores the final aggregation output for a batch of runner results.\n */\nexport interface AggregatedRunResults {\n /**\n * Per-run normalized score summaries.\n */\n runs: AggregatedRunSummary[]\n /**\n * Provider-level summaries sorted by inferenceExecutor id.\n */\n inferenceExecutors: AggregatedProviderSummary[]\n /**\n * Overall summary across every run.\n */\n overall: {\n exactAverage: number | null\n judgeAverage: number | null\n hybridAverage: number | null\n runCount: number\n }\n}\n\ninterface ScoreBuckets {\n exact: number[]\n judge: number[]\n}\n\nfunction cloneScheduledTaskMatrix(matrix: ScheduledTaskMatrix): ScheduledTaskMatrix {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction assertKnownScoreKind(kind: string): RunScoreKind {\n if (kind === 'exact' || kind === 'judge') {\n return kind\n }\n\n throw new TypeError(`Unknown eval score kind \"${kind}\".`)\n}\n\nfunction average(scores: readonly number[]): number | null {\n if (scores.length === 0) {\n return null\n }\n\n const total = scores.reduce((sum, score) => sum + score, 0)\n return total / scores.length\n}\n\nfunction createHybridAverage(exactAverage: number | null, judgeAverage: number | null): number | null {\n if (exactAverage != null && judgeAverage != null) {\n return (exactAverage + judgeAverage) / 2\n }\n\n if (exactAverage != null) {\n return exactAverage\n }\n\n if (judgeAverage != null) {\n return judgeAverage\n }\n\n return null\n}\n\nfunction collectScoreBuckets(scores: readonly RunScore[]): ScoreBuckets {\n const buckets: ScoreBuckets = {\n exact: [],\n judge: [],\n }\n\n for (const score of scores) {\n const kind = assertKnownScoreKind(score.kind)\n\n if (kind === 'exact') {\n buckets.exact.push(score.score)\n continue\n }\n\n buckets.judge.push(score.score)\n }\n\n return buckets\n}\n\nfunction createRunSummary(result: RunResult): AggregatedRunSummary {\n const buckets = collectScoreBuckets(result.scores)\n const exactAverage = average(buckets.exact)\n const judgeAverage = average(buckets.judge)\n\n return {\n entryId: result.entryId,\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n id: result.id,\n judgeAverage,\n matrix: cloneScheduledTaskMatrix(result.matrix),\n inferenceExecutorId: result.inferenceExecutorId,\n }\n}\n\nfunction createProviderSummary(inferenceExecutorId: string, results: readonly RunResult[]): AggregatedProviderSummary {\n const exactScores: number[] = []\n const judgeScores: number[] = []\n\n for (const result of results) {\n const buckets = collectScoreBuckets(result.scores)\n exactScores.push(...buckets.exact)\n judgeScores.push(...buckets.judge)\n }\n\n const exactAverage = average(exactScores)\n const judgeAverage = average(judgeScores)\n\n return {\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n judgeAverage,\n inferenceExecutorId,\n runCount: results.length,\n }\n}\n\n/**\n * Aggregates exact-match and judge-based scores into hybrid runner summaries.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link createRunSummary}\n * -> {@link createProviderSummary}\n * -> `report output`\n *\n * Use when:\n * - a runner batch mixes deterministic exact checks with judge-based grading\n * - inferenceExecutor comparison should preserve both score families and one hybrid view\n *\n * Expects:\n * - each score to be normalized to the `0..1` range before aggregation\n * - `scores.kind` to use only `'exact'` or `'judge'`\n */\nexport function aggregateRunResults(results: readonly RunResult[]): AggregatedRunResults {\n const runs = results.map(createRunSummary)\n\n const inferenceExecutorIds = Array.from(new Set(results.map(result => result.inferenceExecutorId)))\n const inferenceExecutors = inferenceExecutorIds\n .map((inferenceExecutorId) => {\n const providerResults = results.filter(result => result.inferenceExecutorId === inferenceExecutorId)\n return createProviderSummary(inferenceExecutorId, providerResults)\n })\n .sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId))\n\n const overall = createProviderSummary(\n 'overall',\n results,\n )\n\n return {\n overall: {\n exactAverage: overall.exactAverage,\n hybridAverage: overall.hybridAverage,\n judgeAverage: overall.judgeAverage,\n runCount: overall.runCount,\n },\n inferenceExecutors,\n runs,\n }\n}\n","import type { CollectedEvalEntry, EvalModule, EvalModuleMap } from '../../config'\nimport type { RunnerRuntimeContext } from './runtime-context'\n\nimport { basename, dirname, relative } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst evalFileSuffix = '.eval.ts'\nconst absolutePathPattern = /^(?:[A-Z]:\\/|\\/|\\\\\\\\)/i\n\nfunction normalizePath(value: string): string {\n return value.replaceAll('\\\\', '/')\n}\n\n/**\n * Converts a file path into a project-relative path when possible.\n *\n * Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n *\n * Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n */\nexport function asProjectRelativePath(filePath: string, context: RunnerRuntimeContext): string {\n const normalizedFilePath = normalizePath(filePath)\n const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory)\n const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\\//i)?.[0]\n const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\\//i)?.[0]\n\n if (filePathWindowsDrive != null && projectRootWindowsDrive == null) {\n return normalizedFilePath\n }\n\n if (\n filePathWindowsDrive != null\n && projectRootWindowsDrive != null\n && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()\n ) {\n return normalizedFilePath\n }\n\n const projectRootDirectory = context.projectRootDirectory\n const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath))\n\n if (!absolutePathPattern.test(relativeFilePath)) {\n if (relativeFilePath === '..') {\n return normalizePath(filePath)\n }\n\n if (!relativeFilePath.startsWith('../')) {\n return relativeFilePath\n }\n }\n\n return normalizePath(filePath)\n}\n\nfunction resolveModuleFilePath(moduleHref: string): string | null {\n if (!moduleHref.startsWith('file:')) {\n return null\n }\n\n try {\n return fileURLToPath(moduleHref)\n }\n catch {\n return null\n }\n}\n\nfunction createCollectedEvalEntry(\n moduleHref: string,\n moduleDefinition: EvalModule,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry | null {\n const filePath = resolveModuleFilePath(moduleHref)\n\n if (!filePath) {\n return null\n }\n\n const relativeFilePath = asProjectRelativePath(filePath, context)\n\n if (!relativeFilePath.endsWith(evalFileSuffix)) {\n return null\n }\n\n const entryName = basename(relativeFilePath, evalFileSuffix)\n\n if (entryName.length === 0) {\n return null\n }\n\n const relativeDirectory = dirname(relativeFilePath)\n const directory = relativeDirectory === '.' ? '' : relativeDirectory\n\n return {\n ...moduleDefinition.default,\n directory,\n filePath,\n id: directory.length === 0 ? entryName : `${directory}/${entryName}`,\n name: entryName,\n }\n}\n\n/**\n * Collects loaded vieval modules into sorted runner entries with stable ids.\n *\n * Call stack:\n *\n * `import.meta.glob(...)`\n * -> {@link collectEvalEntries}\n * -> {@link createCollectedEvalEntry}\n * -> {@link CollectedEvalEntry}[]\n *\n * Use when:\n * - the runner has already loaded candidate eval modules\n * - downstream scheduling needs stable entry ids and directory metadata\n */\nexport function collectEvalEntries(\n modules: EvalModuleMap,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry[] {\n return Object.entries(modules)\n .flatMap(([moduleHref, moduleDefinition]) => {\n const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context)\n\n if (!entry) {\n return []\n }\n\n return [entry]\n })\n .sort((left, right) => left.id.localeCompare(right.id))\n}\n","import type { TaskCacheRuntime } from '../cache'\nimport type { AggregatedRunResults, RunResult } from './aggregate'\nimport type { ScheduledTask } from './schedule'\nimport type { TaskExecutionContext } from './task-context'\n\nimport { errorMessageFrom } from '@moeru/std'\nimport { limitConcurrency } from '@vitest/runner/utils'\n\nimport { aggregateRunResults } from './aggregate'\n\n/**\n * Executes one scheduled runner task and returns a normalized run result.\n *\n * Use when:\n * - a scheduler already selected the task and execution context\n * - the caller wants a typed executor contract for runner workers\n *\n * Expects:\n * - the task context to be ready for model resolution and task-scoped work\n *\n * Returns:\n * - a normalized run result with score entries ready for aggregation\n */\nexport type ScheduledTaskExecutor = (\n task: ScheduledTask,\n context: TaskExecutionContext,\n) => Promise<RunResult>\n\n/**\n * Terminal task state reported by runner lifecycle hooks.\n *\n * Use when:\n * - reporting the outcome of one scheduled task to lifecycle observers\n *\n * Expects:\n * - hooks treat the value as final for the completed task\n */\nexport type RunnerTaskState = 'passed' | 'failed'\n\n/**\n * Optional runner execution hooks used while processing scheduled tasks.\n *\n * Use when:\n * - callers want lifecycle visibility around sequential task execution\n * - task execution should remain deterministic while still observable\n *\n * Expects:\n * - hook functions are synchronous lifecycle observers\n */\nexport interface RunScheduledTasksOptions {\n /**\n * Creates per-task execution context.\n *\n * Use when:\n * - executor code needs per-task models, cache, or other task-scoped data\n */\n createExecutionContext?: (task: ScheduledTask) => TaskExecutionContext\n /**\n * Runs before the executor starts handling a task.\n *\n * Use when:\n * - callers want to observe task activation before execution begins\n *\n * Expects:\n * - thrown errors abort the task before executor work starts\n */\n onTaskStart?: (task: ScheduledTask) => void\n /**\n * Runs after the executor settles for a task.\n *\n * Use when:\n * - callers want to observe successful and failed task completion\n *\n * Expects:\n * - thrown errors abort successful runs\n * - failed-task observers do not override the executor error for the task\n */\n onTaskEnd?: (task: ScheduledTask, state: RunnerTaskState) => void\n /**\n * Maximum number of tasks to execute concurrently.\n *\n * @default 1\n */\n maxConcurrency?: number\n}\n\nfunction createDefaultExecutionContext(): TaskExecutionContext {\n const cache: TaskCacheRuntime = {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n\n return {\n cache,\n models: [],\n }\n}\n\n/**\n * Error thrown when a scheduled run fails before producing a normalized result.\n */\nexport class RunnerExecutionError extends Error {\n /**\n * Stable task id that failed.\n */\n taskId: string\n\n constructor(taskId: string, cause: unknown) {\n const message = errorMessageFrom(cause) ?? 'Unknown runner execution failure.'\n super(`Runner task \"${taskId}\" failed: ${message}`)\n this.name = 'RunnerExecutionError'\n this.taskId = taskId\n this.cause = cause\n }\n}\n\nfunction createRunnerExecutionError(taskId: string, cause: unknown): RunnerExecutionError {\n if (cause instanceof RunnerExecutionError && cause.taskId === taskId) {\n return cause\n }\n\n return new RunnerExecutionError(taskId, cause)\n}\n\n/**\n * Executes runner tasks sequentially and aggregates the normalized results.\n *\n * Call stack:\n *\n * {@link createRunnerSchedule}\n * -> {@link runScheduledTasks}\n * -> `executor(task)`\n * -> {@link aggregateRunResults}\n *\n * Use when:\n * - the caller already expanded the runner matrix\n * - task execution should stay deterministic and easy to debug\n *\n * Expects:\n * - `executor` to return normalized `0..1` scores\n * - callers to handle concurrency outside this helper when needed\n * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers\n *\n * Throws:\n * - `RunnerExecutionError` when task setup, hooks, or the executor throws\n */\nexport async function runScheduledTasks(\n tasks: readonly ScheduledTask[],\n executor: ScheduledTaskExecutor,\n options: RunScheduledTasksOptions = {},\n): Promise<AggregatedRunResults> {\n if (tasks.length === 0) {\n return aggregateRunResults([])\n }\n\n async function executeScheduledTask(task: ScheduledTask): Promise<RunResult> {\n let executionContext: TaskExecutionContext\n\n try {\n executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext()\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskStart?.(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n let runResult: RunResult\n try {\n runResult = await executor(task, executionContext)\n }\n catch (error) {\n try {\n options.onTaskEnd?.(task, 'failed')\n }\n catch {\n // Failed-task observers must not mask the task execution failure.\n }\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskEnd?.(task, 'passed')\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n return runResult\n }\n\n const maxConcurrency = options.maxConcurrency ?? 1\n if (maxConcurrency <= 1) {\n const results: RunResult[] = []\n for (const task of tasks) {\n results.push(await executeScheduledTask(task))\n }\n return aggregateRunResults(results)\n }\n\n const runWithLimit = limitConcurrency(maxConcurrency)\n const resultPairs = await Promise.all(tasks.map(async (task, index) => {\n const result = await runWithLimit(async () => executeScheduledTask(task))\n return { index, result }\n }))\n\n const sortedResults = resultPairs\n .sort((left, right) => left.index - right.index)\n .map(item => item.result)\n\n return aggregateRunResults(sortedResults)\n}\n","import { createRequire } from 'node:module'\nimport { dirname } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst require = createRequire(import.meta.url)\n\n/**\n * Shared runtime context used by the vieval runner.\n *\n * Use when:\n * - runner services need stable path resolution without module-level side effects\n * - call sites want deterministic control over workspace root detection\n */\nexport interface RunnerRuntimeContext {\n /**\n * Absolute project root directory used for path normalization.\n */\n projectRootDirectory: string\n}\n\n/**\n * Options used to construct the runner runtime context.\n */\nexport interface CreateVievalRunnerRuntimeContextOptions {\n /**\n * Directory used to search for the nearest pnpm workspace.\n *\n * @default directory of this module file\n */\n cwd?: string\n /**\n * Absolute fallback directory when a pnpm workspace root is not found.\n *\n * @default package root directory (`packages/vieval`)\n */\n fallbackProjectRootDirectory?: string\n}\n\n/**\n * Creates a side-effect-free runtime context for runner path normalization.\n *\n * Call stack:\n *\n * {@link createRunnerRuntimeContext}\n * -> `findWorkspaceDir(cwd)`\n * -> `resolve projectRootDirectory`\n * -> `{ projectRootDirectory }`\n *\n * Use when:\n * - initializing runner infrastructure before collecting eval modules\n * - tests need deterministic root resolution behavior\n */\nexport async function createRunnerRuntimeContext(\n options: CreateVievalRunnerRuntimeContextOptions = {},\n): Promise<RunnerRuntimeContext> {\n const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url))\n const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory\n ?? fileURLToPath(new URL('../../../', import.meta.url))\n\n // NOTICE:\n // We use dynamic `require` here because `@pnpm/find-workspace-dir` is CommonJS.\n // Keeping this load inside the factory avoids module-level initialization side effects.\n const { findWorkspaceDir } = require('@pnpm/find-workspace-dir') as {\n findWorkspaceDir: (currentWorkingDirectory: string) => Promise<string | undefined>\n }\n\n // NOTICE:\n // Workspace discovery is required to keep collected eval ids stable when this\n // package is moved inside different monorepo layouts.\n const workspaceDirectory = await findWorkspaceDir(cwd)\n\n return {\n projectRootDirectory: workspaceDirectory ?? fallbackProjectRootDirectory,\n }\n}\n","import type { CollectedEvalEntry, MatrixDefinition, MatrixLayer, MatrixValue } from '../../config'\n\n/**\n * Describes the inferenceExecutor target for a scheduled eval run.\n */\nexport interface InferenceExecutor {\n /**\n * Stable inferenceExecutor identifier such as `openai:gpt-4.1-mini`.\n */\n id: string\n}\n\n/**\n * Stores the selected value for each matrix axis.\n */\nexport type RunnerMatrixSelection = Record<string, string>\n\n/**\n * Stores stable row ids for one resolved scheduled task matrix.\n */\nexport interface ScheduledTaskMatrixMeta {\n /**\n * Stable row id for the resolved run matrix selection.\n */\n runRowId: string\n /**\n * Stable row id for the resolved eval matrix selection.\n */\n evalRowId: string\n}\n\n/**\n * Stores the structured matrix payload for one scheduled task.\n */\nexport interface ScheduledTaskMatrix {\n /**\n * Runtime matrix selection visible to task code.\n */\n run: RunnerMatrixSelection\n /**\n * Eval-time matrix selection visible to task code.\n */\n eval: RunnerMatrixSelection\n /**\n * Stable row ids for both scopes.\n */\n meta: ScheduledTaskMatrixMeta\n}\n\n/**\n * Maps matrix axis names to the values that should be expanded.\n */\nexport type RunnerMatrixDefinition = MatrixDefinition\n\n/**\n * Accepts either flat axis definitions or one layered matrix object.\n */\nexport type RunnerMatrixInput = RunnerMatrixDefinition | MatrixLayer\n\nconst matrixLayerKeys = new Set(['disable', 'extend', 'override'])\nconst ambiguousMatrixDefinitionErrorMessage = 'Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.'\n\n/**\n * Represents one fully expanded runner task.\n */\nexport interface ScheduledTask {\n /**\n * Stable task id derived from the entry, inferenceExecutor, and matrix selection.\n */\n id: string\n /**\n * The collected eval entry to execute.\n */\n entry: CollectedEvalEntry\n /**\n * The inferenceExecutor selected for this task.\n */\n inferenceExecutor: InferenceExecutor\n /**\n * The concrete scoped matrix selection for this task.\n */\n matrix: ScheduledTaskMatrix\n}\n\n/**\n * Configures how the runner should expand its execution matrix.\n */\nexport interface CreateRunnerScheduleOptions {\n /**\n * Collected eval entries that should be scheduled.\n */\n entries: readonly CollectedEvalEntry[]\n /**\n * Providers that should run each entry.\n */\n inferenceExecutors: readonly InferenceExecutor[]\n /**\n * Optional run-time matrix axes expanded as a cartesian product.\n */\n runMatrix?: RunnerMatrixInput\n /**\n * Optional eval-time matrix axes expanded as a cartesian product.\n */\n evalMatrix?: RunnerMatrixInput\n}\n\nfunction encodeTaskIdSegment(value: string): string {\n return encodeURIComponent(value)\n}\n\nfunction stringifyMatrixValue(value: MatrixValue): string {\n return String(value)\n}\n\nfunction cloneMatrixSelection(matrix: RunnerMatrixSelection): RunnerMatrixSelection {\n return { ...matrix }\n}\n\nfunction createScheduledTaskMatrix(\n runMatrix: RunnerMatrixSelection,\n evalMatrix: RunnerMatrixSelection,\n): ScheduledTaskMatrix {\n return {\n eval: cloneMatrixSelection(evalMatrix),\n meta: {\n evalRowId: createStableRowId(evalMatrix),\n runRowId: createStableRowId(runMatrix),\n },\n run: cloneMatrixSelection(runMatrix),\n }\n}\n\nfunction isMatrixLayer(matrix: RunnerMatrixInput): matrix is MatrixLayer {\n const matrixKeys = Object.keys(matrix)\n return (\n matrixKeys.length > 0\n && matrixKeys.every(key => matrixLayerKeys.has(key))\n )\n}\n\nfunction assertNonAmbiguousMatrixDefinition(matrix: RunnerMatrixInput): void {\n const matrixKeys = Object.keys(matrix)\n const hasReservedKeys = matrixKeys.some(key => matrixLayerKeys.has(key))\n const hasAxisKeys = matrixKeys.some(key => !matrixLayerKeys.has(key))\n\n if (hasReservedKeys && hasAxisKeys) {\n throw new TypeError(ambiguousMatrixDefinitionErrorMessage)\n }\n}\n\nfunction normalizeLayerInputToAxes(matrix: RunnerMatrixInput | undefined): MatrixLayer | undefined {\n if (matrix == null) {\n return undefined\n }\n\n assertNonAmbiguousMatrixDefinition(matrix)\n\n if (isMatrixLayer(matrix)) {\n return matrix\n }\n\n return {\n extend: matrix,\n }\n}\n\nfunction dedupeAxisValues(values: readonly MatrixValue[]): string[] {\n return Array.from(new Set(values.map(stringifyMatrixValue)))\n}\n\nfunction applyAxisValues(\n axes: Map<string, string[]>,\n definition: RunnerMatrixDefinition | undefined,\n mode: 'extend' | 'override',\n): void {\n if (definition == null) {\n return\n }\n\n for (const [axis, values] of Object.entries(definition)) {\n const nextValues = dedupeAxisValues(values)\n\n if (mode === 'extend') {\n const existingValues = axes.get(axis) ?? []\n axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])))\n continue\n }\n\n axes.set(axis, nextValues)\n }\n}\n\nfunction applyLayer(\n baseAxes: ReadonlyMap<string, string[]>,\n layer: MatrixLayer | undefined,\n): Map<string, string[]> {\n const nextAxes = new Map<string, string[]>(\n Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]),\n )\n\n for (const axis of layer?.disable ?? []) {\n nextAxes.delete(axis)\n }\n\n applyAxisValues(nextAxes, layer?.extend, 'extend')\n applyAxisValues(nextAxes, layer?.override, 'override')\n\n return nextAxes\n}\n\nfunction expandAxesToRows(axes: ReadonlyMap<string, readonly string[]>): RunnerMatrixSelection[] {\n if (axes.size === 0) {\n return [{}]\n }\n\n const dimensions = Array.from(axes.entries())\n\n let selections: RunnerMatrixSelection[] = [{}]\n\n for (const [axis, values] of dimensions) {\n if (values.length === 0) {\n return []\n }\n\n const nextSelections: RunnerMatrixSelection[] = []\n\n for (const selection of selections) {\n for (const value of values) {\n nextSelections.push({\n ...selection,\n [axis]: value,\n })\n }\n }\n\n selections = nextSelections\n }\n\n return selections\n}\n\nfunction createStableRowId(matrix: RunnerMatrixSelection): string {\n const segments = Object.entries(matrix)\n .sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis))\n .map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`)\n\n if (segments.length === 0) {\n return 'default'\n }\n\n return segments.join('&')\n}\n\nfunction createTaskId(entryId: string, inferenceExecutorId: string, runRowId: string, evalRowId: string): string {\n const encodedEntryId = encodeTaskIdSegment(entryId)\n const encodedProviderId = encodeTaskIdSegment(inferenceExecutorId)\n\n return [\n encodedEntryId,\n encodedProviderId,\n `run=${encodeTaskIdSegment(runRowId)}`,\n `eval=${encodeTaskIdSegment(evalRowId)}`,\n ].join('::')\n}\n\nfunction createResolvedRunAxes(\n entry: CollectedEvalEntry,\n runMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n runMatrix,\n entry.matrix?.runMatrix,\n entry.task?.matrix?.runMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\nfunction createResolvedEvalAxes(\n entry: CollectedEvalEntry,\n evalMatrix: RunnerMatrixInput | undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n evalMatrix,\n entry.matrix?.evalMatrix,\n entry.task?.matrix?.evalMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\n/**\n * Expands collected entries into a stable runner schedule.\n *\n * Call stack:\n *\n * {@link collectEvalEntries} (`../runner`)\n * -> {@link createRunnerSchedule}\n * -> {@link expandAxesToRows}\n * -> {@link ScheduledTask}[]\n *\n * Use when:\n * - the runner already knows which eval entries are available\n * - each entry must run against multiple inferenceExecutors or matrix variants\n *\n * Expects:\n * - `entries` and `inferenceExecutors` to be provided in the desired execution order\n * - matrix axes to use insertion order when generating combinations\n */\nexport function createRunnerSchedule(options: CreateRunnerScheduleOptions): ScheduledTask[] {\n if (options.entries.length === 0) {\n return []\n }\n\n if (options.inferenceExecutors.length === 0) {\n return []\n }\n\n const tasks: ScheduledTask[] = []\n\n for (const entry of options.entries) {\n const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix))\n const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix))\n\n if (runSelections.length === 0 || evalSelections.length === 0) {\n continue\n }\n\n for (const inferenceExecutor of options.inferenceExecutors) {\n for (const runMatrix of runSelections) {\n for (const evalMatrix of evalSelections) {\n const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix)\n\n tasks.push({\n entry,\n id: createTaskId(\n entry.id,\n inferenceExecutor.id,\n isolatedMatrix.meta.runRowId,\n isolatedMatrix.meta.evalRowId,\n ),\n matrix: isolatedMatrix,\n inferenceExecutor,\n })\n }\n }\n }\n }\n\n return tasks\n}\n","import type { ModelDefinition } from '../../config/models'\nimport type { TaskCacheRuntime } from '../cache'\nimport type { ScheduledTask } from './schedule'\n\n/**\n * Task-scoped execution context exposed to runner executors.\n */\nexport interface TaskExecutionContext {\n /**\n * Deterministic cache runtime scoped to the current task project.\n */\n cache: TaskCacheRuntime\n /**\n * Configured model registrations available to model plugins.\n */\n models: readonly ModelDefinition[]\n}\n\n/**\n * Inputs used to build task execution context.\n */\nexport interface CreateTaskExecutionContextOptions {\n cache?: TaskCacheRuntime\n models: readonly ModelDefinition[]\n task: ScheduledTask\n}\n\nfunction createNoopTaskCacheRuntime(): TaskCacheRuntime {\n return {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n}\n\n/**\n * Creates task-scoped context data for runner execution.\n *\n * Call stack:\n *\n * {@link runScheduledTasks}\n * -> {@link createTaskExecutionContext}\n * -> `TaskExecutionContext`\n */\nexport function createTaskExecutionContext(options: CreateTaskExecutionContextOptions): TaskExecutionContext {\n return {\n cache: options.cache ?? createNoopTaskCacheRuntime(),\n models: options.models,\n }\n}\n"],"mappings":";;;;;;;;;;;AA2BA,SAAS,oBAAoB,OAAuB;CAClD,MAAM,aAAa,MAAM,MAAM;AAC/B,KAAI,WAAW,WAAW,EACxB,QAAO;AAGT,QAAO,WAAW,QAAQ,aAAa,IAAI;;AAG7C,SAAS,mBAAmB,WAA+B,WAAmD;AAC5G,KAAI,aAAa,QAAQ,UAAU,SAAS,EAC1C,QAAO,UAAU,WAAW,IAAI,GAAG,UAAU,MAAM,EAAE,GAAG;AAG1D,KAAI,aAAa,QAAQ,UAAU,WAAW,EAC5C;AAGF,KAAI,cAAc,mBAChB,QAAO;AAGT,KAAI,cAAc,aAChB,QAAO;AAGT,KAAI,cAAc,YAChB,QAAO;;;;;;;;;;;AAeX,SAAgB,+BAA+B,SAAqC;CAClF,MAAM,eAAe,QAAQ,IAAI,KAAI,YAAW,oBAAoB,QAAQ,CAAC;CAC7E,MAAM,YAAY,mBAAmB,QAAQ,KAAK,QAAQ,UAAU;AAEpE,KAAI,aAAa,WAAW,EAC1B,QAAO,aAAa,OAAO,CAAC,WAAW,GAAG,CAAC,YAAY,YAAY;AAGrE,KAAI,aAAa,KACf,QAAO;CAGT,MAAM,cAAc,aAAa,MAAM,GAAG,KAAK,IAAI,GAAG,aAAa,SAAS,EAAE,CAAC;CAC/E,MAAM,OAAO,aAAa,aAAa,SAAS,MAAM;AACtD,QAAO,CAAC,GAAG,aAAa,GAAG,KAAK,GAAG,YAAY;;AAGjD,eAAe,gBAAgB,MAAc,SAAyC;CACpF,MAAM,YAAY,QAAQ,KAAK;CAC/B,MAAM,gBAAgB,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG,KAAK,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,SAAS,GAAG,CAAC,MAAM,GAAG,GAAG;AACzG,OAAM,MAAM,WAAW,EAAE,WAAW,MAAM,CAAC;AAC3C,OAAM,UAAU,eAAe,QAAQ;AACvC,OAAM,OAAO,eAAe,KAAK;;AAGnC,SAAS,sBAAsB,MAA+B;AAC5D,QAAO;EACL;EACA,MAAM,SAAS;AACb,OAAI;AACF,UAAM,OAAO,KAAK;AAClB,WAAO;WAEH;AACJ,WAAO;;;EAGX,iBAAiB;AACf,UAAO,iBAAiB,KAAK;;EAE/B,MAAM,kBAAkB;AACtB,SAAM,MAAM,QAAQ,KAAK,EAAE,EAAE,WAAW,MAAM,CAAC;AAC/C,UAAO,kBAAkB,KAAK;;EAEhC,MAAM,aAAa;AACjB,UAAO,MAAM,SAAS,KAAK;;EAE7B,MAAM,YAAY,OAAO;AACvB,SAAM,gBAAgB,MAAM,MAAM;;EAEpC,MAAM,SAAS,WAAW,SAAS;AACjC,UAAO,MAAM,SAAS,MAAM,SAAS;;EAEvC,MAAM,UAAU,OAAO,WAAW,SAAS;AACzC,SAAM,gBAAgB,MAAM,OAAO,KAAK,OAAO,SAAS,CAAC;;EAE3D,MAAM,WAAc;AAClB,UAAO,KAAK,MAAM,MAAM,SAAS,MAAM,QAAQ,CAAC;;EAElD,MAAM,UAAU,OAAO;AACrB,SAAM,gBAAgB,MAAM,GAAG,KAAK,UAAU,OAAO,MAAM,EAAE,CAAC,IAAI;;EAEpE,MAAM,mBAAsB;AAC1B,UAAO,MAAM,KAAK,UAAe;;EAEnC,MAAM,sBAAyB;AAC7B,UAAO,MAAM,KAAK,UAAa;;EAElC;;AAGH,SAAS,qBAAqB,eAAuB,WAAmC;AACtF,QAAO,EACL,KAAK,SAAS;EACZ,MAAM,uBAAuB,+BAA+B,QAAQ;AACpE,SAAO,sBAAsB,KAAK,eAAe,oBAAoB,UAAU,EAAE,GAAG,qBAAqB,CAAC;IAE7G;;;;;;;;;;;;;;;;;AAkBH,SAAgB,iCACd,SACkB;CAClB,MAAM,qBAAqB,oBAAoB,QAAQ,YAAY;CACnE,MAAM,mBAAmB,oBAAoB,QAAQ,YAAY;CACjE,MAAM,gBAAgB,KAAK,QAAQ,oBAAoB,oBAAoB,iBAAiB;AAE5F,QAAO,EACL,UAAU,MAAM;AACd,SAAO,qBAAqB,eAAe,KAAK;IAEnD;;;;ACvCH,SAAS,yBAAyB,QAAkD;AAClF,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,qBAAqB,MAA4B;AACxD,KAAI,SAAS,WAAW,SAAS,QAC/B,QAAO;AAGT,OAAM,IAAI,UAAU,4BAA4B,KAAK,IAAI;;AAG3D,SAAS,QAAQ,QAA0C;AACzD,KAAI,OAAO,WAAW,EACpB,QAAO;AAIT,QADc,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAC5C,OAAO;;AAGxB,SAAS,oBAAoB,cAA6B,cAA4C;AACpG,KAAI,gBAAgB,QAAQ,gBAAgB,KAC1C,SAAQ,eAAe,gBAAgB;AAGzC,KAAI,gBAAgB,KAClB,QAAO;AAGT,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO;;AAGT,SAAS,oBAAoB,QAA2C;CACtE,MAAM,UAAwB;EAC5B,OAAO,EAAE;EACT,OAAO,EAAE;EACV;AAED,MAAK,MAAM,SAAS,QAAQ;AAG1B,MAFa,qBAAqB,MAAM,KAAK,KAEhC,SAAS;AACpB,WAAQ,MAAM,KAAK,MAAM,MAAM;AAC/B;;AAGF,UAAQ,MAAM,KAAK,MAAM,MAAM;;AAGjC,QAAO;;AAGT,SAAS,iBAAiB,QAAyC;CACjE,MAAM,UAAU,oBAAoB,OAAO,OAAO;CAClD,MAAM,eAAe,QAAQ,QAAQ,MAAM;CAC3C,MAAM,eAAe,QAAQ,QAAQ,MAAM;AAE3C,QAAO;EACL,SAAS,OAAO;EAChB;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D,IAAI,OAAO;EACX;EACA,QAAQ,yBAAyB,OAAO,OAAO;EAC/C,qBAAqB,OAAO;EAC7B;;AAGH,SAAS,sBAAsB,qBAA6B,SAA0D;CACpH,MAAM,cAAwB,EAAE;CAChC,MAAM,cAAwB,EAAE;AAEhC,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,UAAU,oBAAoB,OAAO,OAAO;AAClD,cAAY,KAAK,GAAG,QAAQ,MAAM;AAClC,cAAY,KAAK,GAAG,QAAQ,MAAM;;CAGpC,MAAM,eAAe,QAAQ,YAAY;CACzC,MAAM,eAAe,QAAQ,YAAY;AAEzC,QAAO;EACL;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D;EACA;EACA,UAAU,QAAQ;EACnB;;;;;;;;;;;;;;;;;;;;;AAsBH,SAAgB,oBAAoB,SAAqD;CACvF,MAAM,OAAO,QAAQ,IAAI,iBAAiB;CAG1C,MAAM,qBADuB,MAAM,KAAK,IAAI,IAAI,QAAQ,KAAI,WAAU,OAAO,oBAAoB,CAAC,CAAC,CAEhG,KAAK,wBAAwB;AAE5B,SAAO,sBAAsB,qBADL,QAAQ,QAAO,WAAU,OAAO,wBAAwB,oBAAoB,CAClC;GAClE,CACD,MAAM,MAAM,UAAU,KAAK,oBAAoB,cAAc,MAAM,oBAAoB,CAAC;CAE3F,MAAM,UAAU,sBACd,WACA,QACD;AAED,QAAO;EACL,SAAS;GACP,cAAc,QAAQ;GACtB,eAAe,QAAQ;GACvB,cAAc,QAAQ;GACtB,UAAU,QAAQ;GACnB;EACD;EACA;EACD;;;;ACvRH,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAE5B,SAAS,cAAc,OAAuB;AAC5C,QAAO,MAAM,WAAW,MAAM,IAAI;;;;;;;;;;;AAYpC,SAAgB,sBAAsB,UAAkB,SAAuC;CAC7F,MAAM,qBAAqB,cAAc,SAAS;CAClD,MAAM,iCAAiC,cAAc,QAAQ,qBAAqB;CAClF,MAAM,uBAAuB,mBAAmB,MAAM,aAAa,GAAG;CACtE,MAAM,0BAA0B,+BAA+B,MAAM,aAAa,GAAG;AAErF,KAAI,wBAAwB,QAAQ,2BAA2B,KAC7D,QAAO;AAGT,KACE,wBAAwB,QACrB,2BAA2B,QAC3B,qBAAqB,aAAa,KAAK,wBAAwB,aAAa,CAE/E,QAAO;CAGT,MAAM,uBAAuB,QAAQ;CACrC,MAAM,mBAAmB,cAAc,SAAS,sBAAsB,SAAS,CAAC;AAEhF,KAAI,CAAC,oBAAoB,KAAK,iBAAiB,EAAE;AAC/C,MAAI,qBAAqB,KACvB,QAAO,cAAc,SAAS;AAGhC,MAAI,CAAC,iBAAiB,WAAW,MAAM,CACrC,QAAO;;AAIX,QAAO,cAAc,SAAS;;AAGhC,SAAS,sBAAsB,YAAmC;AAChE,KAAI,CAAC,WAAW,WAAW,QAAQ,CACjC,QAAO;AAGT,KAAI;AACF,SAAO,cAAc,WAAW;SAE5B;AACJ,SAAO;;;AAIX,SAAS,yBACP,YACA,kBACA,SAC2B;CAC3B,MAAM,WAAW,sBAAsB,WAAW;AAElD,KAAI,CAAC,SACH,QAAO;CAGT,MAAM,mBAAmB,sBAAsB,UAAU,QAAQ;AAEjE,KAAI,CAAC,iBAAiB,SAAS,eAAe,CAC5C,QAAO;CAGT,MAAM,YAAY,SAAS,kBAAkB,eAAe;AAE5D,KAAI,UAAU,WAAW,EACvB,QAAO;CAGT,MAAM,oBAAoB,QAAQ,iBAAiB;CACnD,MAAM,YAAY,sBAAsB,MAAM,KAAK;AAEnD,QAAO;EACL,GAAG,iBAAiB;EACpB;EACA;EACA,IAAI,UAAU,WAAW,IAAI,YAAY,GAAG,UAAU,GAAG;EACzD,MAAM;EACP;;;;;;;;;;;;;;;;AAiBH,SAAgB,mBACd,SACA,SACsB;AACtB,QAAO,OAAO,QAAQ,QAAQ,CAC3B,SAAS,CAAC,YAAY,sBAAsB;EAC3C,MAAM,QAAQ,yBAAyB,YAAY,kBAAkB,QAAQ;AAE7E,MAAI,CAAC,MACH,QAAO,EAAE;AAGX,SAAO,CAAC,MAAM;GACd,CACD,MAAM,MAAM,UAAU,KAAK,GAAG,cAAc,MAAM,GAAG,CAAC;;;;AC9C3D,SAAS,gCAAsD;AAY7D,QAAO;EACL,OAZ8B,EAC9B,UAAU,MAAM;AACd,UAAO,EACL,KAAK,SAAS;IACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,UAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;MAE3G;KAEJ;EAIC,QAAQ,EAAE;EACX;;;;;AAMH,IAAa,uBAAb,cAA0C,MAAM;;;;CAI9C;CAEA,YAAY,QAAgB,OAAgB;EAC1C,MAAM,UAAU,iBAAiB,MAAM,IAAI;AAC3C,QAAM,gBAAgB,OAAO,YAAY,UAAU;AACnD,OAAK,OAAO;AACZ,OAAK,SAAS;AACd,OAAK,QAAQ;;;AAIjB,SAAS,2BAA2B,QAAgB,OAAsC;AACxF,KAAI,iBAAiB,wBAAwB,MAAM,WAAW,OAC5D,QAAO;AAGT,QAAO,IAAI,qBAAqB,QAAQ,MAAM;;;;;;;;;;;;;;;;;;;;;;;;AAyBhD,eAAsB,kBACpB,OACA,UACA,UAAoC,EAAE,EACP;AAC/B,KAAI,MAAM,WAAW,EACnB,QAAO,oBAAoB,EAAE,CAAC;CAGhC,eAAe,qBAAqB,MAAyC;EAC3E,IAAI;AAEJ,MAAI;AACF,sBAAmB,QAAQ,yBAAyB,KAAK,IAAI,+BAA+B;WAEvF,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,cAAc,KAAK;WAEtB,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;EAGlD,IAAI;AACJ,MAAI;AACF,eAAY,MAAM,SAAS,MAAM,iBAAiB;WAE7C,OAAO;AACZ,OAAI;AACF,YAAQ,YAAY,MAAM,SAAS;WAE/B;AAGN,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,YAAY,MAAM,SAAS;WAE9B,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,SAAO;;CAGT,MAAM,iBAAiB,QAAQ,kBAAkB;AACjD,KAAI,kBAAkB,GAAG;EACvB,MAAM,UAAuB,EAAE;AAC/B,OAAK,MAAM,QAAQ,MACjB,SAAQ,KAAK,MAAM,qBAAqB,KAAK,CAAC;AAEhD,SAAO,oBAAoB,QAAQ;;CAGrC,MAAM,eAAe,iBAAiB,eAAe;AAUrD,QAAO,qBATa,MAAM,QAAQ,IAAI,MAAM,IAAI,OAAO,MAAM,UAAU;AAErE,SAAO;GAAE;GAAO,QADD,MAAM,aAAa,YAAY,qBAAqB,KAAK,CAAC;GACjD;GACxB,CAAC,EAGA,MAAM,MAAM,UAAU,KAAK,QAAQ,MAAM,MAAM,CAC/C,KAAI,SAAQ,KAAK,OAAO,CAEc;;;;ACzN3C,MAAM,UAAU,cAAc,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;AAgD9C,eAAsB,2BACpB,UAAmD,EAAE,EACtB;CAC/B,MAAM,MAAM,QAAQ,OAAO,QAAQ,cAAc,OAAO,KAAK,IAAI,CAAC;CAClE,MAAM,+BAA+B,QAAQ,gCACxC,cAAc,IAAI,IAAI,aAAa,OAAO,KAAK,IAAI,CAAC;CAKzD,MAAM,EAAE,qBAAqB,QAAQ,2BAA2B;AAShE,QAAO,EACL,sBAHyB,MAAM,iBAAiB,IAAI,IAGR,8BAC7C;;;;ACdH,MAAM,kBAAkB,IAAI,IAAI;CAAC;CAAW;CAAU;CAAW,CAAC;AAClE,MAAM,wCAAwC;AA8C9C,SAAS,oBAAoB,OAAuB;AAClD,QAAO,mBAAmB,MAAM;;AAGlC,SAAS,qBAAqB,OAA4B;AACxD,QAAO,OAAO,MAAM;;AAGtB,SAAS,qBAAqB,QAAsD;AAClF,QAAO,EAAE,GAAG,QAAQ;;AAGtB,SAAS,0BACP,WACA,YACqB;AACrB,QAAO;EACL,MAAM,qBAAqB,WAAW;EACtC,MAAM;GACJ,WAAW,kBAAkB,WAAW;GACxC,UAAU,kBAAkB,UAAU;GACvC;EACD,KAAK,qBAAqB,UAAU;EACrC;;AAGH,SAAS,cAAc,QAAkD;CACvE,MAAM,aAAa,OAAO,KAAK,OAAO;AACtC,QACE,WAAW,SAAS,KACjB,WAAW,OAAM,QAAO,gBAAgB,IAAI,IAAI,CAAC;;AAIxD,SAAS,mCAAmC,QAAiC;CAC3E,MAAM,aAAa,OAAO,KAAK,OAAO;CACtC,MAAM,kBAAkB,WAAW,MAAK,QAAO,gBAAgB,IAAI,IAAI,CAAC;CACxE,MAAM,cAAc,WAAW,MAAK,QAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC;AAErE,KAAI,mBAAmB,YACrB,OAAM,IAAI,UAAU,sCAAsC;;AAI9D,SAAS,0BAA0B,QAAgE;AACjG,KAAI,UAAU,KACZ;AAGF,oCAAmC,OAAO;AAE1C,KAAI,cAAc,OAAO,CACvB,QAAO;AAGT,QAAO,EACL,QAAQ,QACT;;AAGH,SAAS,iBAAiB,QAA0C;AAClE,QAAO,MAAM,KAAK,IAAI,IAAI,OAAO,IAAI,qBAAqB,CAAC,CAAC;;AAG9D,SAAS,gBACP,MACA,YACA,MACM;AACN,KAAI,cAAc,KAChB;AAGF,MAAK,MAAM,CAAC,MAAM,WAAW,OAAO,QAAQ,WAAW,EAAE;EACvD,MAAM,aAAa,iBAAiB,OAAO;AAE3C,MAAI,SAAS,UAAU;GACrB,MAAM,iBAAiB,KAAK,IAAI,KAAK,IAAI,EAAE;AAC3C,QAAK,IAAI,MAAM,MAAM,KAAK,IAAI,IAAI,CAAC,GAAG,gBAAgB,GAAG,WAAW,CAAC,CAAC,CAAC;AACvE;;AAGF,OAAK,IAAI,MAAM,WAAW;;;AAI9B,SAAS,WACP,UACA,OACuB;CACvB,MAAM,WAAW,IAAI,IACnB,MAAM,KAAK,SAAS,SAAS,CAAC,CAAC,KAAK,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAC5E;AAED,MAAK,MAAM,QAAQ,OAAO,WAAW,EAAE,CACrC,UAAS,OAAO,KAAK;AAGvB,iBAAgB,UAAU,OAAO,QAAQ,SAAS;AAClD,iBAAgB,UAAU,OAAO,UAAU,WAAW;AAEtD,QAAO;;AAGT,SAAS,iBAAiB,MAAuE;AAC/F,KAAI,KAAK,SAAS,EAChB,QAAO,CAAC,EAAE,CAAC;CAGb,MAAM,aAAa,MAAM,KAAK,KAAK,SAAS,CAAC;CAE7C,IAAI,aAAsC,CAAC,EAAE,CAAC;AAE9C,MAAK,MAAM,CAAC,MAAM,WAAW,YAAY;AACvC,MAAI,OAAO,WAAW,EACpB,QAAO,EAAE;EAGX,MAAM,iBAA0C,EAAE;AAElD,OAAK,MAAM,aAAa,WACtB,MAAK,MAAM,SAAS,OAClB,gBAAe,KAAK;GAClB,GAAG;IACF,OAAO;GACT,CAAC;AAIN,eAAa;;AAGf,QAAO;;AAGT,SAAS,kBAAkB,QAAuC;CAChE,MAAM,WAAW,OAAO,QAAQ,OAAO,CACpC,MAAM,CAAC,WAAW,CAAC,eAAe,SAAS,cAAc,UAAU,CAAC,CACpE,KAAK,CAAC,MAAM,WAAW,GAAG,oBAAoB,KAAK,CAAC,GAAG,oBAAoB,MAAM,GAAG;AAEvF,KAAI,SAAS,WAAW,EACtB,QAAO;AAGT,QAAO,SAAS,KAAK,IAAI;;AAG3B,SAAS,aAAa,SAAiB,qBAA6B,UAAkB,WAA2B;AAI/G,QAAO;EAHgB,oBAAoB,QAAQ;EACzB,oBAAoB,oBAAoB;EAKhE,OAAO,oBAAoB,SAAS;EACpC,QAAQ,oBAAoB,UAAU;EACvC,CAAC,KAAK,KAAK;;AAGd,SAAS,sBACP,OACA,WACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;AAGT,SAAS,uBACP,OACA,YACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;;;;;;;;;;;;;;;;;;;AAqBT,SAAgB,qBAAqB,SAAuD;AAC1F,KAAI,QAAQ,QAAQ,WAAW,EAC7B,QAAO,EAAE;AAGX,KAAI,QAAQ,mBAAmB,WAAW,EACxC,QAAO,EAAE;CAGX,MAAM,QAAyB,EAAE;AAEjC,MAAK,MAAM,SAAS,QAAQ,SAAS;EACnC,MAAM,gBAAgB,iBAAiB,sBAAsB,OAAO,QAAQ,UAAU,CAAC;EACvF,MAAM,iBAAiB,iBAAiB,uBAAuB,OAAO,QAAQ,WAAW,CAAC;AAE1F,MAAI,cAAc,WAAW,KAAK,eAAe,WAAW,EAC1D;AAGF,OAAK,MAAM,qBAAqB,QAAQ,mBACtC,MAAK,MAAM,aAAa,cACtB,MAAK,MAAM,cAAc,gBAAgB;GACvC,MAAM,iBAAiB,0BAA0B,WAAW,WAAW;AAEvE,SAAM,KAAK;IACT;IACA,IAAI,aACF,MAAM,IACN,kBAAkB,IAClB,eAAe,KAAK,UACpB,eAAe,KAAK,UACrB;IACD,QAAQ;IACR;IACD,CAAC;;;AAMV,QAAO;;;;AC1UT,SAAS,6BAA+C;AACtD,QAAO,EACL,UAAU,MAAM;AACd,SAAO,EACL,KAAK,SAAS;GACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,SAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;KAE3G;IAEJ;;;;;;;;;;;AAYH,SAAgB,2BAA2B,SAAkE;AAC3G,QAAO;EACL,OAAO,QAAQ,SAAS,4BAA4B;EACpD,QAAQ,QAAQ;EACjB"}
|
|
@@ -7,6 +7,10 @@ type EnvValueType = 'string';
|
|
|
7
7
|
* Common options for env readers.
|
|
8
8
|
*/
|
|
9
9
|
interface EnvFromOptions {
|
|
10
|
+
/**
|
|
11
|
+
* Env key to read and use in error messages.
|
|
12
|
+
*/
|
|
13
|
+
name: string;
|
|
10
14
|
/**
|
|
11
15
|
* Expected env value type.
|
|
12
16
|
*/
|
|
@@ -17,10 +21,6 @@ interface EnvFromOptions {
|
|
|
17
21
|
* @default false
|
|
18
22
|
*/
|
|
19
23
|
required?: boolean;
|
|
20
|
-
/**
|
|
21
|
-
* Optional key name used for clearer error messages.
|
|
22
|
-
*/
|
|
23
|
-
name?: string;
|
|
24
24
|
}
|
|
25
25
|
/**
|
|
26
26
|
* Env options used by the required helper.
|
|
@@ -28,20 +28,25 @@ interface EnvFromOptions {
|
|
|
28
28
|
* `required` is intentionally omitted because this helper is always required.
|
|
29
29
|
*/
|
|
30
30
|
type RequiredEnvFromOptions = Omit<EnvFromOptions, 'required'>;
|
|
31
|
+
type EnvSource = Record<string, string | undefined>;
|
|
31
32
|
/**
|
|
32
33
|
* Parses one env value with optional required behavior.
|
|
33
34
|
*
|
|
34
35
|
* Example:
|
|
35
|
-
* `const apiKey = envFrom(process.env
|
|
36
|
+
* `const apiKey = envFrom(process.env, { type: 'string', required: true, name: 'OPENAI_API_KEY' })`
|
|
36
37
|
*/
|
|
37
|
-
declare function envFrom(
|
|
38
|
+
declare function envFrom<TEnv extends EnvSource>(env: TEnv, options: EnvFromOptions & {
|
|
39
|
+
name: keyof TEnv & string;
|
|
40
|
+
}): string | undefined;
|
|
38
41
|
/**
|
|
39
42
|
* Parses one required env value.
|
|
40
43
|
*
|
|
41
44
|
* Example:
|
|
42
|
-
* `const apiKey = requiredEnvFrom(process.env
|
|
45
|
+
* `const apiKey = requiredEnvFrom(process.env, { type: 'string', name: 'OPENAI_API_KEY' })`
|
|
43
46
|
*/
|
|
44
|
-
declare function requiredEnvFrom(
|
|
47
|
+
declare function requiredEnvFrom<TEnv extends EnvSource>(env: TEnv, options: RequiredEnvFromOptions & {
|
|
48
|
+
name: keyof TEnv & string;
|
|
49
|
+
}): string;
|
|
45
50
|
//#endregion
|
|
46
51
|
export { requiredEnvFrom as a, envFrom as i, EnvValueType as n, RequiredEnvFromOptions as r, EnvFromOptions as t };
|
|
47
|
-
//# sourceMappingURL=env-
|
|
52
|
+
//# sourceMappingURL=env-DfWZy_n4.d.mts.map
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
//#region src/core/inference-executors/env.ts
|
|
2
|
+
function assertNonEmptyString(value, options) {
|
|
3
|
+
if (value == null || value.trim().length === 0) {
|
|
4
|
+
if (options.required === true) throw new Error(`Missing required ${options.name}.`);
|
|
5
|
+
return;
|
|
6
|
+
}
|
|
7
|
+
return value;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Parses one env value with optional required behavior.
|
|
11
|
+
*
|
|
12
|
+
* Example:
|
|
13
|
+
* `const apiKey = envFrom(process.env, { type: 'string', required: true, name: 'OPENAI_API_KEY' })`
|
|
14
|
+
*/
|
|
15
|
+
function envFrom(env, options) {
|
|
16
|
+
if (options.type === "string") return assertNonEmptyString(env[options.name], options);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Parses one required env value.
|
|
20
|
+
*
|
|
21
|
+
* Example:
|
|
22
|
+
* `const apiKey = requiredEnvFrom(process.env, { type: 'string', name: 'OPENAI_API_KEY' })`
|
|
23
|
+
*/
|
|
24
|
+
function requiredEnvFrom(env, options) {
|
|
25
|
+
const parsed = envFrom(env, {
|
|
26
|
+
...options,
|
|
27
|
+
required: true
|
|
28
|
+
});
|
|
29
|
+
if (parsed == null) throw new Error(`Missing required ${options.name}.`);
|
|
30
|
+
return parsed;
|
|
31
|
+
}
|
|
32
|
+
//#endregion
|
|
33
|
+
export { requiredEnvFrom as n, envFrom as t };
|
|
34
|
+
|
|
35
|
+
//# sourceMappingURL=env-nV5rVErX.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"env-nV5rVErX.mjs","names":[],"sources":["../src/core/inference-executors/env.ts"],"sourcesContent":["/**\n * Supported env value coercion types.\n */\nexport type EnvValueType = 'string'\n\n/**\n * Common options for env readers.\n */\nexport interface EnvFromOptions {\n /**\n * Env key to read and use in error messages.\n */\n name: string\n /**\n * Expected env value type.\n */\n type: EnvValueType\n /**\n * Whether an empty or missing value should throw.\n *\n * @default false\n */\n required?: boolean\n}\n\n/**\n * Env options used by the required helper.\n *\n * `required` is intentionally omitted because this helper is always required.\n */\nexport type RequiredEnvFromOptions = Omit<EnvFromOptions, 'required'>\n\ntype EnvSource = Record<string, string | undefined>\n\nfunction assertNonEmptyString(value: string | undefined, options: EnvFromOptions): string | undefined {\n if (value == null || value.trim().length === 0) {\n if (options.required === true) {\n throw new Error(`Missing required ${options.name}.`)\n }\n\n return undefined\n }\n\n return value\n}\n\n/**\n * Parses one env value with optional required behavior.\n *\n * Example:\n * `const apiKey = envFrom(process.env, { type: 'string', required: true, name: 'OPENAI_API_KEY' })`\n */\nexport function envFrom<TEnv extends EnvSource>(\n env: TEnv,\n options: EnvFromOptions & { name: keyof TEnv & string },\n): string | undefined {\n if (options.type === 'string') {\n return assertNonEmptyString(env[options.name], options)\n }\n\n return undefined\n}\n\n/**\n * Parses one required env value.\n *\n * Example:\n * `const apiKey = requiredEnvFrom(process.env, { type: 'string', name: 'OPENAI_API_KEY' })`\n */\nexport function requiredEnvFrom<TEnv extends EnvSource>(\n env: TEnv,\n options: RequiredEnvFromOptions & { name: keyof TEnv & string },\n): string {\n const parsed = envFrom(env, {\n ...options,\n required: true,\n })\n\n if (parsed == null) {\n throw new Error(`Missing required ${options.name}.`)\n }\n\n return parsed\n}\n"],"mappings":";AAkCA,SAAS,qBAAqB,OAA2B,SAA6C;AACpG,KAAI,SAAS,QAAQ,MAAM,MAAM,CAAC,WAAW,GAAG;AAC9C,MAAI,QAAQ,aAAa,KACvB,OAAM,IAAI,MAAM,oBAAoB,QAAQ,KAAK,GAAG;AAGtD;;AAGF,QAAO;;;;;;;;AAST,SAAgB,QACd,KACA,SACoB;AACpB,KAAI,QAAQ,SAAS,SACnB,QAAO,qBAAqB,IAAI,QAAQ,OAAO,QAAQ;;;;;;;;AAY3D,SAAgB,gBACd,KACA,SACQ;CACR,MAAM,SAAS,QAAQ,KAAK;EAC1B,GAAG;EACH,UAAU;EACX,CAAC;AAEF,KAAI,UAAU,KACZ,OAAM,IAAI,MAAM,oBAAoB,QAAQ,KAAK,GAAG;AAGtD,QAAO"}
|