npm - vieval - Versions diffs - 0.0.9 → 0.0.11 - Mend

vieval 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/README.md +27 -2
package/dist/bin/vieval.mjs +1 -1
package/dist/bin/vieval.mjs.map +1 -1
package/dist/cli/index.mjs +1 -1
package/dist/{cli-Dao25VxV.mjs → cli-CHFCF8UR.mjs} +670 -600
package/dist/cli-CHFCF8UR.mjs.map +1 -0
package/dist/config.d.mts +1 -1
package/dist/config.mjs +1 -1
package/dist/config.mjs.map +1 -1
package/dist/core/assertions/index.d.mts +1 -1
package/dist/core/assertions/index.mjs.map +1 -1
package/dist/core/inference-executors/index.d.mts +1 -1
package/dist/core/inference-executors/index.mjs +3 -3
package/dist/core/inference-executors/index.mjs.map +1 -1
package/dist/core/processors/results/index.d.mts +1 -1
package/dist/core/processors/results/index.mjs.map +1 -1
package/dist/core/runner/index.d.mts +2 -2
package/dist/core/runner/index.mjs +4 -4
package/dist/core/runner/index.mjs.map +1 -1
package/dist/core/scheduler/index.d.mts +1 -1
package/dist/core/scheduler/index.mjs +3 -3
package/dist/core/scheduler/index.mjs.map +1 -1
package/dist/{env-nV5rVErX.mjs → env-BVYeJhGA.mjs} +1 -1
package/dist/{env-nV5rVErX.mjs.map → env-BVYeJhGA.mjs.map} +1 -1
package/dist/{env-DfWZy_n4.d.mts → env-bRH0K6fU.d.mts} +1 -1
package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-Mf1sMNBv.mjs} +1 -1
package/dist/{expect-extensions-DCSqlneN.mjs.map → expect-extensions-Mf1sMNBv.mjs.map} +1 -1
package/dist/expect.d.mts +1 -3
package/dist/expect.mjs +1 -1
package/dist/expect.mjs.map +1 -1
package/dist/{index-fakXoZEe.d.mts → index-Be5I1ZJL.d.mts} +4 -3
package/dist/{index-BkjyCInx.d.mts → index-CwKBlCG9.d.mts} +2 -2
package/dist/index.d.mts +3 -4
package/dist/index.mjs +22 -27
package/dist/index.mjs.map +1 -1
package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
package/dist/plugins/chat-models/index.d.mts +1 -1
package/dist/plugins/chat-models/index.mjs +2 -2
package/dist/plugins/chat-models/index.mjs.map +1 -1
package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
package/dist/{registry-BHGMxjpA.mjs → registry-BSyjwZFx.mjs} +55 -11
package/dist/registry-BSyjwZFx.mjs.map +1 -0
package/dist/testing/expect-extensions.mjs +1 -1
package/package.json +10 -10
package/dist/cli-Dao25VxV.mjs.map +0 -1
package/dist/registry-BHGMxjpA.mjs.map +0 -1

package/dist/config.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-import { C as TaskDefinition, D as TaskRunContext, E as TaskReporterHooks, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, T as TaskReporterEventPayload, _ as ScopedMatrices, a as CliOpenTelemetryReportingConfig, b as TaskCaseReporterPayload, c as EvalDefinition, d as MatrixAxisValues, f as MatrixDefinition, g as MatrixValue, h as MatrixRow, i as Awaitable, l as EvalModule, m as MatrixPrimitive, n as defineEval, o as CliReportingConfig, p as MatrixLayer, r as defineTask, s as CollectedEvalEntry, t as ConfigHookPlugin, u as EvalModuleMap, v as TaskAutoRetryDelay, w as TaskExecutionPolicy, x as TaskCaseState, y as TaskCaseReporterEndPayload, z as resolveModelByName } from "./index-BkjyCInx.mjs";
+import { C as TaskDefinition, D as TaskRunContext, E as TaskReporterHooks, O as TaskRunOutput, R as ModelDefinition, S as TaskConcurrencyConfig, T as TaskReporterEventPayload, _ as ScopedMatrices, a as CliOpenTelemetryReportingConfig, b as TaskCaseReporterPayload, c as EvalDefinition, d as MatrixAxisValues, f as MatrixDefinition, g as MatrixValue, h as MatrixRow, i as Awaitable, l as EvalModule, m as MatrixPrimitive, n as defineEval, o as CliReportingConfig, p as MatrixLayer, r as defineTask, s as CollectedEvalEntry, t as ConfigHookPlugin, u as EvalModuleMap, v as TaskAutoRetryDelay, w as TaskExecutionPolicy, x as TaskCaseState, y as TaskCaseReporterEndPayload, z as resolveModelByName } from "./index-CwKBlCG9.mjs";
 export { Awaitable, CliOpenTelemetryReportingConfig, CliReportingConfig, CollectedEvalEntry, ConfigHookPlugin, EvalDefinition, EvalModule, EvalModuleMap, MatrixAxisValues, MatrixDefinition, MatrixLayer, MatrixPrimitive, MatrixRow, MatrixValue, ModelDefinition, ScopedMatrices, TaskAutoRetryDelay, TaskCaseReporterEndPayload, TaskCaseReporterPayload, TaskCaseState, TaskConcurrencyConfig, TaskDefinition, TaskExecutionPolicy, TaskReporterEventPayload, TaskReporterHooks, TaskRunContext, TaskRunOutput, defineEval, defineTask, resolveModelByName };

package/dist/config.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { t as resolveModelByName } from "./models-pBSRUZhY.mjs";
+import { t as resolveModelByName } from "./models-CaCOUPZw.mjs";
 //#region src/config/define.ts
 /**
 * Returns the provided vieval definition while preserving literal field types.

package/dist/config.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"config.mjs","names":[],"sources":["../src/config/define.ts"],"sourcesContent":["import type { EvalDefinition, TaskDefinition } from './types'\n\n/*\n Returns the provided vieval definition while preserving literal field types.\n /\nexport function defineEval<const TDefinition extends EvalDefinition>(definition: TDefinition): TDefinition {\n return definition\n}\n\n/\n Returns the provided task definition while preserving literal field types.\n */\nexport function defineTask<const TDefinition extends TaskDefinition>(definition: TDefinition): TDefinition {\n return definition\n}\n"],"mappings":";;;;;AAKA,SAAgB,WAAqD,YAAsC;~~AACzG~~,~~QAAO;;;;;AAMT~~,SAAgB,WAAqD,YAAsC;~~AACzG~~,~~QAAO~~"}
1	+ {"version":3,"file":"config.mjs","names":[],"sources":["../src/config/define.ts"],"sourcesContent":["import type { EvalDefinition, TaskDefinition } from './types'\n\n/*\n Returns the provided vieval definition while preserving literal field types.\n /\nexport function defineEval<const TDefinition extends EvalDefinition>(definition: TDefinition): TDefinition {\n return definition\n}\n\n/\n Returns the provided task definition while preserving literal field types.\n */\nexport function defineTask<const TDefinition extends TaskDefinition>(definition: TDefinition): TDefinition {\n return definition\n}\n"],"mappings":";;;;;AAKA,SAAgB,WAAqD,YAAsC;CACzG,OAAO;AACT;;;;AAKA,SAAgB,WAAqD,YAAsC;CACzG,OAAO;AACT"}

package/dist/core/assertions/index.d.mts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { X as RunScoreKind, Y as RunScore } from "../../index-BkjyCInx.mjs";
+import { X as RunScoreKind, Y as RunScore } from "../../index-CwKBlCG9.mjs";
 //#region src/core/assertions/index.d.ts
 /**

package/dist/core/assertions/index.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/assertions/index.ts"],"sourcesContent":["import type { RunScore, RunScoreKind } from '../runner/aggregate'\n\n/*\n Stores mutable evaluation state for stateful assertion flows.\n \n Use when:\n * - assertions need to share counters, rolling metrics, or memoized values\n * - a scenario evaluates multiple steps and expects state-aware checks\n /\nexport type AssertionState = Map<string, unknown>\n\n/\n Represents one tool call emitted by a model response.\n /\nexport interface ToolCall {\n /\n Tool name used by the call.\n /\n name: string\n /\n Tool arguments payload.\n /\n args: unknown\n}\n\n/\n Normalized assertion context for one model output.\n /\nexport interface AssertionContext {\n /\n Plain text model output used by text assertions.\n /\n text: string\n /\n Optional structured output parsed from the model response.\n /\n structuredOutput?: unknown\n /\n Optional tool calls extracted from the model response.\n /\n toolCalls?: readonly ToolCall[]\n /\n Shared mutable state for stateful assertion measurement.\n /\n state: AssertionState\n}\n\n/\n Result for one assertion evaluation.\n /\nexport interface AssertionOutcome {\n /\n Stable assertion id.\n /\n id: string\n /\n Assertion family emitted as run score kind.\n /\n scoreKind: RunScoreKind\n /\n Whether the assertion passed.\n /\n pass: boolean\n /\n Normalized score in the `0..1` range.\n /\n score: number\n /\n Human-readable reason for logs and reports.\n /\n reason: string\n}\n\n/\n Async assertion function used by eval scenarios.\n /\nexport type Assertion = (context: AssertionContext) => Promise<AssertionOutcome>\n\n/\n Normalizes text for matching.\n \n Before: `\" Hello\\nWorld \"`\n * After: `\"hello world\"`\n /\nexport function normalizeMatchText(value: string, caseSensitive: boolean): string {\n const compactedWhitespace = value.trim().replaceAll(/\\s+/g, ' ')\n\n if (caseSensitive) {\n return compactedWhitespace\n }\n\n return compactedWhitespace.toLowerCase()\n}\n\nfunction clampScore(score: number): number {\n if (Number.isNaN(score)) {\n return 0\n }\n\n if (score < 0) {\n return 0\n }\n\n if (score > 1) {\n return 1\n }\n\n return score\n}\n\nfunction createOutcome(\n id: string,\n scoreKind: RunScoreKind,\n pass: boolean,\n score: number,\n reason: string,\n): AssertionOutcome {\n return {\n id,\n pass,\n reason,\n score: clampScore(score),\n scoreKind,\n }\n}\n\n/\n Options for include-keyword assertions.\n /\nexport interface MustIncludeAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Keywords that must be present.\n /\n keywords: readonly string[]\n /\n Match mode for keywords.\n \n @default 'all'\n /\n mode?: 'all' \| 'any'\n /\n Case-sensitive matching toggle.\n \n @default false\n /\n caseSensitive?: boolean\n}\n\n/\n Creates an assertion that requires specific keywords in model text.\n \n Example:\n * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`\n /\nexport function expectMustInclude(options: MustIncludeAssertionOptions): Assertion {\n return async (context) => {\n if (options.keywords.length === 0) {\n return createOutcome(options.id, 'exact', true, 1, 'No required keywords configured.')\n }\n\n const caseSensitive = options.caseSensitive ?? false\n const normalizedText = normalizeMatchText(context.text, caseSensitive)\n const matches = options.keywords.filter((keyword) => {\n const normalizedKeyword = normalizeMatchText(keyword, caseSensitive)\n return normalizedText.includes(normalizedKeyword)\n })\n\n const mode = options.mode ?? 'all'\n const pass = mode === 'all'\n ? matches.length === options.keywords.length\n : matches.length > 0\n\n const score = options.keywords.length === 0 ? 1 : matches.length / options.keywords.length\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n score,\n pass\n ? `Matched ${matches.length}/${options.keywords.length} required keywords.`\n : `Matched ${matches.length}/${options.keywords.length} required keywords.`,\n )\n }\n}\n\n/\n Options for exclude-keyword assertions.\n /\nexport interface MustExcludeAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Keywords that must not appear.\n /\n keywords: readonly string[]\n /\n Case-sensitive matching toggle.\n \n @default false\n /\n caseSensitive?: boolean\n}\n\n/\n Creates an assertion that forbids specific keywords.\n \n Example:\n * `expectMustExclude({ id: 'no-engine-dump', keywords: ['bestmove', 'ponder'] })`\n /\nexport function expectMustExclude(options: MustExcludeAssertionOptions): Assertion {\n return async (context) => {\n if (options.keywords.length === 0) {\n return createOutcome(options.id, 'exact', true, 1, 'No excluded keywords configured.')\n }\n\n const caseSensitive = options.caseSensitive ?? false\n const normalizedText = normalizeMatchText(context.text, caseSensitive)\n const forbiddenMatches = options.keywords.filter((keyword) => {\n const normalizedKeyword = normalizeMatchText(keyword, caseSensitive)\n return normalizedText.includes(normalizedKeyword)\n })\n\n const pass = forbiddenMatches.length === 0\n const score = pass ? 1 : 0\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n score,\n pass\n ? 'No forbidden keywords found.'\n : `Forbidden keywords found: ${forbiddenMatches.join(', ')}`,\n )\n }\n}\n\n/\n Options for regular-expression assertions.\n /\nexport interface RegexAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Pattern to apply to model text.\n /\n pattern: RegExp\n}\n\n/\n Creates an assertion based on a regular expression.\n \n Example:\n * `expectRegex({ id: 'starts-with-act', pattern: /^<\\\|ACT:/ })`\n /\nexport function expectRegex(options: RegexAssertionOptions): Assertion {\n return async (context) => {\n const pass = options.pattern.test(context.text)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? 'Regex matched response text.' : `Regex did not match: ${options.pattern}`,\n )\n }\n}\n\n/\n Options for structured-output assertions.\n /\nexport interface StructuredOutputAssertionOptions<TValue> {\n /\n Stable assertion id.\n /\n id: string\n /\n Runtime validator for structured output.\n /\n validate: (value: unknown) => value is TValue\n /\n Optional failure reason.\n /\n failureReason?: string\n}\n\n/\n Creates an assertion for structured model output.\n \n Example:\n * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`\n /\nexport function expectStructuredOutput<TValue>(options: StructuredOutputAssertionOptions<TValue>): Assertion {\n return async (context) => {\n const pass = options.validate(context.structuredOutput)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? 'Structured output matched validator.' : (options.failureReason ?? 'Structured output validation failed.'),\n )\n }\n}\n\n/\n Options for tool-call argument assertions.\n /\nexport interface ToolCallArgsAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Tool name to inspect.\n /\n toolName: string\n /\n Runtime validator for tool arguments.\n /\n validate: (args: unknown) => boolean\n}\n\n/\n Creates an assertion for validating tool-call arguments.\n \n Example:\n * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`\n /\nexport function expectToolCallArgs(options: ToolCallArgsAssertionOptions): Assertion {\n return async (context) => {\n const targetCall = (context.toolCalls ?? []).find(call => call.name === options.toolName)\n\n if (targetCall == null) {\n return createOutcome(options.id, 'exact', false, 0, `Missing tool call: ${options.toolName}`)\n }\n\n const pass = options.validate(targetCall.args)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? `Tool call args validated for ${options.toolName}.` : `Tool call args validation failed for ${options.toolName}.`,\n )\n }\n}\n\n/\n Rubric judge result returned by teacher-model or rubric logic.\n /\nexport interface RubricJudgeResult {\n /\n Normalized score in the `0..1` range.\n /\n score: number\n /\n Judge explanation text.\n /\n reason: string\n /\n Optional judge model id.\n /\n judgeModel?: string\n}\n\n/\n Options for rubric assertions.\n /\nexport interface RubricAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Async rubric judge callback.\n /\n judge: (context: AssertionContext) => Promise<RubricJudgeResult>\n /\n Minimum passing score.\n \n @default 0.7\n /\n minScore?: number\n}\n\n/\n Creates a rubric assertion driven by teacher-model style scoring.\n \n Example:\n * `expectRubric({ id: 'human-like-tone', judge: judgeFn, minScore: 0.8 })`\n /\nexport function expectRubric(options: RubricAssertionOptions): Assertion {\n return async (context) => {\n const result = await options.judge(context)\n const minScore = options.minScore ?? 0.7\n const normalizedScore = clampScore(result.score)\n const pass = normalizedScore >= minScore\n\n return createOutcome(\n options.id,\n 'judge',\n pass,\n normalizedScore,\n `${result.reason}${result.judgeModel ? ` (judge: ${result.judgeModel})` : ''}`,\n )\n }\n}\n\n/\n Options for custom assertions.\n /\nexport interface CustomAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Score family emitted by this custom assertion.\n /\n scoreKind: RunScoreKind\n /\n Custom evaluator callback.\n /\n evaluate: (context: AssertionContext) => Promise<{ pass: boolean, reason: string, score: number }> \| { pass: boolean, reason: string, score: number }\n}\n\n/\n Creates a custom assertion with fully user-defined logic.\n \n Example:\n * `expectCustom({ id: 'stateful-window', scoreKind: 'exact', evaluate: (ctx) => ... })`\n /\nexport function expectCustom(options: CustomAssertionOptions): Assertion {\n return async (context) => {\n const result = await options.evaluate(context)\n\n return createOutcome(options.id, options.scoreKind, result.pass, result.score, result.reason)\n }\n}\n\n/\n Creates an inverse assertion.\n \n Example:\n * `expectNot(expectMustInclude({ id: 'contains-engine-word', keywords: ['bestmove'] }), { id: 'no-engine-word' })`\n /\nexport function expectNot(assertion: Assertion, options: { id: string }): Assertion {\n return async (context) => {\n const baseOutcome = await assertion(context)\n\n return createOutcome(\n options.id,\n baseOutcome.scoreKind,\n !baseOutcome.pass,\n 1 - baseOutcome.score,\n `NOT(${baseOutcome.id}): ${baseOutcome.reason}`,\n )\n }\n}\n\n/\n Executes assertion list and returns all outcomes.\n \n Call stack:\n \n {@link evaluateAssertions}\n * -> `assertion(context)`\n * -> {@link AssertionOutcome}[]\n /\nexport async function evaluateAssertions(\n assertions: readonly Assertion[],\n context: Omit<AssertionContext, 'state'> & { state?: AssertionState },\n): Promise<AssertionOutcome[]> {\n const state = context.state ?? new Map<string, unknown>()\n const normalizedContext: AssertionContext = {\n state,\n structuredOutput: context.structuredOutput,\n text: context.text,\n toolCalls: context.toolCalls,\n }\n\n const outcomes: AssertionOutcome[] = []\n\n for (const assertion of assertions) {\n outcomes.push(await assertion(normalizedContext))\n }\n\n return outcomes\n}\n\n/\n Converts assertion outcomes to run-score tuples consumed by aggregation.\n /\nexport function toRunScores(outcomes: readonly AssertionOutcome[]): RunScore[] {\n return outcomes.map(outcome => ({\n kind: outcome.scoreKind,\n score: outcome.score,\n }))\n}\n\n/\n Returns failing assertion outcomes in original order.\n */\nexport function collectFailedAssertions(outcomes: readonly AssertionOutcome[]): AssertionOutcome[] {\n return outcomes.filter(outcome => !outcome.pass)\n}\n"],"mappings":";;;;;;;AAoFA,SAAgB,mBAAmB,OAAe,eAAgC;CAChF,MAAM,sBAAsB,MAAM,MAAM,CAAC,WAAW,QAAQ,IAAI;AAEhE,KAAI,cACF,QAAO;AAGT,QAAO,oBAAoB,aAAa;;AAG1C,SAAS,WAAW,OAAuB;AACzC,KAAI,OAAO,MAAM,MAAM,CACrB,QAAO;AAGT,KAAI,QAAQ,EACV,QAAO;AAGT,KAAI,QAAQ,EACV,QAAO;AAGT,QAAO;;AAGT,SAAS,cACP,IACA,WACA,MACA,OACA,QACkB;AAClB,QAAO;EACL;EACA;EACA;EACA,OAAO,WAAW,MAAM;EACxB;EACD;;;;;;;;AAmCH,SAAgB,kBAAkB,SAAiD;AACjF,QAAO,OAAO,YAAY;AACxB,MAAI,QAAQ,SAAS,WAAW,EAC9B,QAAO,cAAc,QAAQ,IAAI,SAAS,MAAM,GAAG,mCAAmC;EAGxF,MAAM,gBAAgB,QAAQ,iBAAiB;EAC/C,MAAM,iBAAiB,mBAAmB,QAAQ,MAAM,cAAc;EACtE,MAAM,UAAU,QAAQ,SAAS,QAAQ,YAAY;GACnD,MAAM,oBAAoB,mBAAmB,SAAS,cAAc;AACpE,UAAO,eAAe,SAAS,kBAAkB;IACjD;EAGF,MAAM,QADO,QAAQ,QAAQ,WACP,QAClB,QAAQ,WAAW,QAAQ,SAAS,SACpC,QAAQ,SAAS;EAErB,MAAM,QAAQ,QAAQ,SAAS,WAAW,IAAI,IAAI,QAAQ,SAAS,QAAQ,SAAS;AAEpF,SAAO,cACL,QAAQ,IACR,SACA,MACA,OACA,OACI,WAAW,QAAQ,OAAO,GAAG,QAAQ,SAAS,OAAO,uBACrD,WAAW,QAAQ,OAAO,GAAG,QAAQ,SAAS,OAAO,qBAC1D;;;;;;;;;AA8BL,SAAgB,kBAAkB,SAAiD;AACjF,QAAO,OAAO,YAAY;AACxB,MAAI,QAAQ,SAAS,WAAW,EAC9B,QAAO,cAAc,QAAQ,IAAI,SAAS,MAAM,GAAG,mCAAmC;EAGxF,MAAM,gBAAgB,QAAQ,iBAAiB;EAC/C,MAAM,iBAAiB,mBAAmB,QAAQ,MAAM,cAAc;EACtE,MAAM,mBAAmB,QAAQ,SAAS,QAAQ,YAAY;GAC5D,MAAM,oBAAoB,mBAAmB,SAAS,cAAc;AACpE,UAAO,eAAe,SAAS,kBAAkB;IACjD;EAEF,MAAM,OAAO,iBAAiB,WAAW;EACzC,MAAM,QAAQ,OAAO,IAAI;AAEzB,SAAO,cACL,QAAQ,IACR,SACA,MACA,OACA,OACI,iCACA,6BAA6B,iBAAiB,KAAK,KAAK,GAC7D;;;;;;;;;AAwBL,SAAgB,YAAY,SAA2C;AACrE,QAAO,OAAO,YAAY;EACxB,MAAM,OAAO,QAAQ,QAAQ,KAAK,QAAQ,KAAK;AAE/C,SAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,iCAAiC,wBAAwB,QAAQ,UACzE;;;;;;;;;AA4BL,SAAgB,uBAA+B,SAA8D;AAC3G,QAAO,OAAO,YAAY;EACxB,MAAM,OAAO,QAAQ,SAAS,QAAQ,iBAAiB;AAEvD,SAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,yCAA0C,QAAQ,iBAAiB,uCAC3E;;;;;;;;;AA4BL,SAAgB,mBAAmB,SAAkD;AACnF,QAAO,OAAO,YAAY;EACxB,MAAM,cAAc,QAAQ,aAAa,EAAE,EAAE,MAAK,SAAQ,KAAK,SAAS,QAAQ,SAAS;AAEzF,MAAI,cAAc,KAChB,QAAO,cAAc,QAAQ,IAAI,SAAS,OAAO,GAAG,sBAAsB,QAAQ,WAAW;EAG/F,MAAM,OAAO,QAAQ,SAAS,WAAW,KAAK;AAE9C,SAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,gCAAgC,QAAQ,SAAS,KAAK,wCAAwC,QAAQ,SAAS,GACvH;;;;;;;;;AAgDL,SAAgB,aAAa,SAA4C;AACvE,QAAO,OAAO,YAAY;EACxB,MAAM,SAAS,MAAM,QAAQ,MAAM,QAAQ;EAC3C,MAAM,WAAW,QAAQ,YAAY;EACrC,MAAM,kBAAkB,WAAW,OAAO,MAAM;EAChD,MAAM,OAAO,mBAAmB;AAEhC,SAAO,cACL,QAAQ,IACR,SACA,MACA,iBACA,GAAG,OAAO,SAAS,OAAO,aAAa,YAAY,OAAO,WAAW,KAAK,KAC3E;;;;;;;;;AA4BL,SAAgB,aAAa,SAA4C;AACvE,QAAO,OAAO,YAAY;EACxB,MAAM,SAAS,MAAM,QAAQ,SAAS,QAAQ;AAE9C,SAAO,cAAc,QAAQ,IAAI,QAAQ,WAAW,OAAO,MAAM,OAAO,OAAO,OAAO,OAAO;;;;;;;;;AAUjG,SAAgB,UAAU,WAAsB,SAAoC;AAClF,QAAO,OAAO,YAAY;EACxB,MAAM,cAAc,MAAM,UAAU,QAAQ;AAE5C,SAAO,cACL,QAAQ,IACR,YAAY,WACZ,CAAC,YAAY,MACb,IAAI,YAAY,OAChB,OAAO,YAAY,GAAG,KAAK,YAAY,SACxC;;;;;;;;;;;;AAaL,eAAsB,mBACpB,YACA,SAC6B;CAE7B,MAAM,oBAAsC;EAC1C,OAFY,QAAQ,yBAAS,IAAI,KAAsB;EAGvD,kBAAkB,QAAQ;EAC1B,MAAM,QAAQ;EACd,WAAW,QAAQ;EACpB;CAED,MAAM,WAA+B,EAAE;AAEvC,MAAK,MAAM,aAAa,WACtB,UAAS,KAAK,MAAM,UAAU,kBAAkB,CAAC;AAGnD,QAAO;;;;;AAMT,SAAgB,YAAY,UAAmD;AAC7E,QAAO,SAAS,KAAI,aAAY;EAC9B,MAAM,QAAQ;EACd,OAAO,QAAQ;EAChB,EAAE;;;;;AAML,SAAgB,wBAAwB,UAA2D;AACjG,QAAO,SAAS,QAAO,YAAW,CAAC,QAAQ,KAAK"}
1	+ {"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/assertions/index.ts"],"sourcesContent":["import type { RunScore, RunScoreKind } from '../runner/aggregate'\n\n/*\n Stores mutable evaluation state for stateful assertion flows.\n \n Use when:\n * - assertions need to share counters, rolling metrics, or memoized values\n * - a scenario evaluates multiple steps and expects state-aware checks\n /\nexport type AssertionState = Map<string, unknown>\n\n/\n Represents one tool call emitted by a model response.\n /\nexport interface ToolCall {\n /\n Tool name used by the call.\n /\n name: string\n /\n Tool arguments payload.\n /\n args: unknown\n}\n\n/\n Normalized assertion context for one model output.\n /\nexport interface AssertionContext {\n /\n Plain text model output used by text assertions.\n /\n text: string\n /\n Optional structured output parsed from the model response.\n /\n structuredOutput?: unknown\n /\n Optional tool calls extracted from the model response.\n /\n toolCalls?: readonly ToolCall[]\n /\n Shared mutable state for stateful assertion measurement.\n /\n state: AssertionState\n}\n\n/\n Result for one assertion evaluation.\n /\nexport interface AssertionOutcome {\n /\n Stable assertion id.\n /\n id: string\n /\n Assertion family emitted as run score kind.\n /\n scoreKind: RunScoreKind\n /\n Whether the assertion passed.\n /\n pass: boolean\n /\n Normalized score in the `0..1` range.\n /\n score: number\n /\n Human-readable reason for logs and reports.\n /\n reason: string\n}\n\n/\n Async assertion function used by eval scenarios.\n /\nexport type Assertion = (context: AssertionContext) => Promise<AssertionOutcome>\n\n/\n Normalizes text for matching.\n \n Before: `\" Hello\\nWorld \"`\n * After: `\"hello world\"`\n /\nexport function normalizeMatchText(value: string, caseSensitive: boolean): string {\n const compactedWhitespace = value.trim().replaceAll(/\\s+/g, ' ')\n\n if (caseSensitive) {\n return compactedWhitespace\n }\n\n return compactedWhitespace.toLowerCase()\n}\n\nfunction clampScore(score: number): number {\n if (Number.isNaN(score)) {\n return 0\n }\n\n if (score < 0) {\n return 0\n }\n\n if (score > 1) {\n return 1\n }\n\n return score\n}\n\nfunction createOutcome(\n id: string,\n scoreKind: RunScoreKind,\n pass: boolean,\n score: number,\n reason: string,\n): AssertionOutcome {\n return {\n id,\n pass,\n reason,\n score: clampScore(score),\n scoreKind,\n }\n}\n\n/\n Options for include-keyword assertions.\n /\nexport interface MustIncludeAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Keywords that must be present.\n /\n keywords: readonly string[]\n /\n Match mode for keywords.\n \n @default 'all'\n /\n mode?: 'all' \| 'any'\n /\n Case-sensitive matching toggle.\n \n @default false\n /\n caseSensitive?: boolean\n}\n\n/\n Creates an assertion that requires specific keywords in model text.\n \n Example:\n * `expectMustInclude({ id: 'tone', keywords: ['calm', 'move'] })`\n /\nexport function expectMustInclude(options: MustIncludeAssertionOptions): Assertion {\n return async (context) => {\n if (options.keywords.length === 0) {\n return createOutcome(options.id, 'exact', true, 1, 'No required keywords configured.')\n }\n\n const caseSensitive = options.caseSensitive ?? false\n const normalizedText = normalizeMatchText(context.text, caseSensitive)\n const matches = options.keywords.filter((keyword) => {\n const normalizedKeyword = normalizeMatchText(keyword, caseSensitive)\n return normalizedText.includes(normalizedKeyword)\n })\n\n const mode = options.mode ?? 'all'\n const pass = mode === 'all'\n ? matches.length === options.keywords.length\n : matches.length > 0\n\n const score = options.keywords.length === 0 ? 1 : matches.length / options.keywords.length\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n score,\n pass\n ? `Matched ${matches.length}/${options.keywords.length} required keywords.`\n : `Matched ${matches.length}/${options.keywords.length} required keywords.`,\n )\n }\n}\n\n/\n Options for exclude-keyword assertions.\n /\nexport interface MustExcludeAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Keywords that must not appear.\n /\n keywords: readonly string[]\n /\n Case-sensitive matching toggle.\n \n @default false\n /\n caseSensitive?: boolean\n}\n\n/\n Creates an assertion that forbids specific keywords.\n \n Example:\n * `expectMustExclude({ id: 'no-engine-dump', keywords: ['bestmove', 'ponder'] })`\n /\nexport function expectMustExclude(options: MustExcludeAssertionOptions): Assertion {\n return async (context) => {\n if (options.keywords.length === 0) {\n return createOutcome(options.id, 'exact', true, 1, 'No excluded keywords configured.')\n }\n\n const caseSensitive = options.caseSensitive ?? false\n const normalizedText = normalizeMatchText(context.text, caseSensitive)\n const forbiddenMatches = options.keywords.filter((keyword) => {\n const normalizedKeyword = normalizeMatchText(keyword, caseSensitive)\n return normalizedText.includes(normalizedKeyword)\n })\n\n const pass = forbiddenMatches.length === 0\n const score = pass ? 1 : 0\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n score,\n pass\n ? 'No forbidden keywords found.'\n : `Forbidden keywords found: ${forbiddenMatches.join(', ')}`,\n )\n }\n}\n\n/\n Options for regular-expression assertions.\n /\nexport interface RegexAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Pattern to apply to model text.\n /\n pattern: RegExp\n}\n\n/\n Creates an assertion based on a regular expression.\n \n Example:\n * `expectRegex({ id: 'starts-with-act', pattern: /^<\\\|ACT:/ })`\n /\nexport function expectRegex(options: RegexAssertionOptions): Assertion {\n return async (context) => {\n const pass = options.pattern.test(context.text)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? 'Regex matched response text.' : `Regex did not match: ${options.pattern}`,\n )\n }\n}\n\n/\n Options for structured-output assertions.\n /\nexport interface StructuredOutputAssertionOptions<TValue> {\n /\n Stable assertion id.\n /\n id: string\n /\n Runtime validator for structured output.\n /\n validate: (value: unknown) => value is TValue\n /\n Optional failure reason.\n /\n failureReason?: string\n}\n\n/\n Creates an assertion for structured model output.\n \n Example:\n * `expectStructuredOutput({ id: 'json-shape', validate: isMySchema })`\n /\nexport function expectStructuredOutput<TValue>(options: StructuredOutputAssertionOptions<TValue>): Assertion {\n return async (context) => {\n const pass = options.validate(context.structuredOutput)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? 'Structured output matched validator.' : (options.failureReason ?? 'Structured output validation failed.'),\n )\n }\n}\n\n/\n Options for tool-call argument assertions.\n /\nexport interface ToolCallArgsAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Tool name to inspect.\n /\n toolName: string\n /\n Runtime validator for tool arguments.\n /\n validate: (args: unknown) => boolean\n}\n\n/\n Creates an assertion for validating tool-call arguments.\n \n Example:\n * `expectToolCallArgs({ id: 'spark-command-shape', toolName: 'builtIn_sparkCommand', validate: isSparkArgs })`\n /\nexport function expectToolCallArgs(options: ToolCallArgsAssertionOptions): Assertion {\n return async (context) => {\n const targetCall = (context.toolCalls ?? []).find(call => call.name === options.toolName)\n\n if (targetCall == null) {\n return createOutcome(options.id, 'exact', false, 0, `Missing tool call: ${options.toolName}`)\n }\n\n const pass = options.validate(targetCall.args)\n\n return createOutcome(\n options.id,\n 'exact',\n pass,\n pass ? 1 : 0,\n pass ? `Tool call args validated for ${options.toolName}.` : `Tool call args validation failed for ${options.toolName}.`,\n )\n }\n}\n\n/\n Rubric judge result returned by teacher-model or rubric logic.\n /\nexport interface RubricJudgeResult {\n /\n Normalized score in the `0..1` range.\n /\n score: number\n /\n Judge explanation text.\n /\n reason: string\n /\n Optional judge model id.\n /\n judgeModel?: string\n}\n\n/\n Options for rubric assertions.\n /\nexport interface RubricAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Async rubric judge callback.\n /\n judge: (context: AssertionContext) => Promise<RubricJudgeResult>\n /\n Minimum passing score.\n \n @default 0.7\n /\n minScore?: number\n}\n\n/\n Creates a rubric assertion driven by teacher-model style scoring.\n \n Example:\n * `expectRubric({ id: 'human-like-tone', judge: judgeFn, minScore: 0.8 })`\n /\nexport function expectRubric(options: RubricAssertionOptions): Assertion {\n return async (context) => {\n const result = await options.judge(context)\n const minScore = options.minScore ?? 0.7\n const normalizedScore = clampScore(result.score)\n const pass = normalizedScore >= minScore\n\n return createOutcome(\n options.id,\n 'judge',\n pass,\n normalizedScore,\n `${result.reason}${result.judgeModel ? ` (judge: ${result.judgeModel})` : ''}`,\n )\n }\n}\n\n/\n Options for custom assertions.\n /\nexport interface CustomAssertionOptions {\n /\n Stable assertion id.\n /\n id: string\n /\n Score family emitted by this custom assertion.\n /\n scoreKind: RunScoreKind\n /\n Custom evaluator callback.\n /\n evaluate: (context: AssertionContext) => Promise<{ pass: boolean, reason: string, score: number }> \| { pass: boolean, reason: string, score: number }\n}\n\n/\n Creates a custom assertion with fully user-defined logic.\n \n Example:\n * `expectCustom({ id: 'stateful-window', scoreKind: 'exact', evaluate: (ctx) => ... })`\n /\nexport function expectCustom(options: CustomAssertionOptions): Assertion {\n return async (context) => {\n const result = await options.evaluate(context)\n\n return createOutcome(options.id, options.scoreKind, result.pass, result.score, result.reason)\n }\n}\n\n/\n Creates an inverse assertion.\n \n Example:\n * `expectNot(expectMustInclude({ id: 'contains-engine-word', keywords: ['bestmove'] }), { id: 'no-engine-word' })`\n /\nexport function expectNot(assertion: Assertion, options: { id: string }): Assertion {\n return async (context) => {\n const baseOutcome = await assertion(context)\n\n return createOutcome(\n options.id,\n baseOutcome.scoreKind,\n !baseOutcome.pass,\n 1 - baseOutcome.score,\n `NOT(${baseOutcome.id}): ${baseOutcome.reason}`,\n )\n }\n}\n\n/\n Executes assertion list and returns all outcomes.\n \n Call stack:\n \n {@link evaluateAssertions}\n * -> `assertion(context)`\n * -> {@link AssertionOutcome}[]\n /\nexport async function evaluateAssertions(\n assertions: readonly Assertion[],\n context: Omit<AssertionContext, 'state'> & { state?: AssertionState },\n): Promise<AssertionOutcome[]> {\n const state = context.state ?? new Map<string, unknown>()\n const normalizedContext: AssertionContext = {\n state,\n structuredOutput: context.structuredOutput,\n text: context.text,\n toolCalls: context.toolCalls,\n }\n\n const outcomes: AssertionOutcome[] = []\n\n for (const assertion of assertions) {\n outcomes.push(await assertion(normalizedContext))\n }\n\n return outcomes\n}\n\n/\n Converts assertion outcomes to run-score tuples consumed by aggregation.\n /\nexport function toRunScores(outcomes: readonly AssertionOutcome[]): RunScore[] {\n return outcomes.map(outcome => ({\n kind: outcome.scoreKind,\n score: outcome.score,\n }))\n}\n\n/\n Returns failing assertion outcomes in original order.\n */\nexport function collectFailedAssertions(outcomes: readonly AssertionOutcome[]): AssertionOutcome[] {\n return outcomes.filter(outcome => !outcome.pass)\n}\n"],"mappings":";;;;;;;AAoFA,SAAgB,mBAAmB,OAAe,eAAgC;CAChF,MAAM,sBAAsB,MAAM,KAAK,CAAC,CAAC,WAAW,QAAQ,GAAG;CAE/D,IAAI,eACF,OAAO;CAGT,OAAO,oBAAoB,YAAY;AACzC;AAEA,SAAS,WAAW,OAAuB;CACzC,IAAI,OAAO,MAAM,KAAK,GACpB,OAAO;CAGT,IAAI,QAAQ,GACV,OAAO;CAGT,IAAI,QAAQ,GACV,OAAO;CAGT,OAAO;AACT;AAEA,SAAS,cACP,IACA,WACA,MACA,OACA,QACkB;CAClB,OAAO;EACL;EACA;EACA;EACA,OAAO,WAAW,KAAK;EACvB;CACF;AACF;;;;;;;AAkCA,SAAgB,kBAAkB,SAAiD;CACjF,OAAO,OAAO,YAAY;EACxB,IAAI,QAAQ,SAAS,WAAW,GAC9B,OAAO,cAAc,QAAQ,IAAI,SAAS,MAAM,GAAG,kCAAkC;EAGvF,MAAM,gBAAgB,QAAQ,iBAAiB;EAC/C,MAAM,iBAAiB,mBAAmB,QAAQ,MAAM,aAAa;EACrE,MAAM,UAAU,QAAQ,SAAS,QAAQ,YAAY;GACnD,MAAM,oBAAoB,mBAAmB,SAAS,aAAa;GACnE,OAAO,eAAe,SAAS,iBAAiB;EAClD,CAAC;EAGD,MAAM,QADO,QAAQ,QAAQ,WACP,QAClB,QAAQ,WAAW,QAAQ,SAAS,SACpC,QAAQ,SAAS;EAErB,MAAM,QAAQ,QAAQ,SAAS,WAAW,IAAI,IAAI,QAAQ,SAAS,QAAQ,SAAS;EAEpF,OAAO,cACL,QAAQ,IACR,SACA,MACA,OACA,OACI,WAAW,QAAQ,OAAO,GAAG,QAAQ,SAAS,OAAO,uBACrD,WAAW,QAAQ,OAAO,GAAG,QAAQ,SAAS,OAAO,oBAC3D;CACF;AACF;;;;;;;AA4BA,SAAgB,kBAAkB,SAAiD;CACjF,OAAO,OAAO,YAAY;EACxB,IAAI,QAAQ,SAAS,WAAW,GAC9B,OAAO,cAAc,QAAQ,IAAI,SAAS,MAAM,GAAG,kCAAkC;EAGvF,MAAM,gBAAgB,QAAQ,iBAAiB;EAC/C,MAAM,iBAAiB,mBAAmB,QAAQ,MAAM,aAAa;EACrE,MAAM,mBAAmB,QAAQ,SAAS,QAAQ,YAAY;GAC5D,MAAM,oBAAoB,mBAAmB,SAAS,aAAa;GACnE,OAAO,eAAe,SAAS,iBAAiB;EAClD,CAAC;EAED,MAAM,OAAO,iBAAiB,WAAW;EACzC,MAAM,QAAQ,OAAO,IAAI;EAEzB,OAAO,cACL,QAAQ,IACR,SACA,MACA,OACA,OACI,iCACA,6BAA6B,iBAAiB,KAAK,IAAI,GAC7D;CACF;AACF;;;;;;;AAsBA,SAAgB,YAAY,SAA2C;CACrE,OAAO,OAAO,YAAY;EACxB,MAAM,OAAO,QAAQ,QAAQ,KAAK,QAAQ,IAAI;EAE9C,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,iCAAiC,wBAAwB,QAAQ,SAC1E;CACF;AACF;;;;;;;AA0BA,SAAgB,uBAA+B,SAA8D;CAC3G,OAAO,OAAO,YAAY;EACxB,MAAM,OAAO,QAAQ,SAAS,QAAQ,gBAAgB;EAEtD,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,yCAA0C,QAAQ,iBAAiB,sCAC5E;CACF;AACF;;;;;;;AA0BA,SAAgB,mBAAmB,SAAkD;CACnF,OAAO,OAAO,YAAY;EACxB,MAAM,cAAc,QAAQ,aAAa,CAAC,EAAA,CAAG,MAAK,SAAQ,KAAK,SAAS,QAAQ,QAAQ;EAExF,IAAI,cAAc,MAChB,OAAO,cAAc,QAAQ,IAAI,SAAS,OAAO,GAAG,sBAAsB,QAAQ,UAAU;EAG9F,MAAM,OAAO,QAAQ,SAAS,WAAW,IAAI;EAE7C,OAAO,cACL,QAAQ,IACR,SACA,MACA,OAAO,IAAI,GACX,OAAO,gCAAgC,QAAQ,SAAS,KAAK,wCAAwC,QAAQ,SAAS,EACxH;CACF;AACF;;;;;;;AA8CA,SAAgB,aAAa,SAA4C;CACvE,OAAO,OAAO,YAAY;EACxB,MAAM,SAAS,MAAM,QAAQ,MAAM,OAAO;EAC1C,MAAM,WAAW,QAAQ,YAAY;EACrC,MAAM,kBAAkB,WAAW,OAAO,KAAK;EAC/C,MAAM,OAAO,mBAAmB;EAEhC,OAAO,cACL,QAAQ,IACR,SACA,MACA,iBACA,GAAG,OAAO,SAAS,OAAO,aAAa,YAAY,OAAO,WAAW,KAAK,IAC5E;CACF;AACF;;;;;;;AA0BA,SAAgB,aAAa,SAA4C;CACvE,OAAO,OAAO,YAAY;EACxB,MAAM,SAAS,MAAM,QAAQ,SAAS,OAAO;EAE7C,OAAO,cAAc,QAAQ,IAAI,QAAQ,WAAW,OAAO,MAAM,OAAO,OAAO,OAAO,MAAM;CAC9F;AACF;;;;;;;AAQA,SAAgB,UAAU,WAAsB,SAAoC;CAClF,OAAO,OAAO,YAAY;EACxB,MAAM,cAAc,MAAM,UAAU,OAAO;EAE3C,OAAO,cACL,QAAQ,IACR,YAAY,WACZ,CAAC,YAAY,MACb,IAAI,YAAY,OAChB,OAAO,YAAY,GAAG,KAAK,YAAY,QACzC;CACF;AACF;;;;;;;;;;AAWA,eAAsB,mBACpB,YACA,SAC6B;CAE7B,MAAM,oBAAsC;EAC1C,OAFY,QAAQ,yBAAS,IAAI,IAAqB;EAGtD,kBAAkB,QAAQ;EAC1B,MAAM,QAAQ;EACd,WAAW,QAAQ;CACrB;CAEA,MAAM,WAA+B,CAAC;CAEtC,KAAK,MAAM,aAAa,YACtB,SAAS,KAAK,MAAM,UAAU,iBAAiB,CAAC;CAGlD,OAAO;AACT;;;;AAKA,SAAgB,YAAY,UAAmD;CAC7E,OAAO,SAAS,KAAI,aAAY;EAC9B,MAAM,QAAQ;EACd,OAAO,QAAQ;CACjB,EAAE;AACJ;;;;AAKA,SAAgB,wBAAwB,UAA2D;CACjG,OAAO,SAAS,QAAO,YAAW,CAAC,QAAQ,IAAI;AACjD"}

package/dist/core/inference-executors/index.d.mts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { a as requiredEnvFrom, i as envFrom, n as EnvValueType, r as RequiredEnvFromOptions, t as EnvFromOptions } from "../../env-DfWZy_n4.mjs";
+import { a as requiredEnvFrom, i as envFrom, n as EnvValueType, r as RequiredEnvFromOptions, t as EnvFromOptions } from "../../env-bRH0K6fU.mjs";
 import { createOpenAI } from "@xsai-ext/providers/create";
 //#region src/core/inference-executors/retry-policy.d.ts

package/dist/core/inference-executors/index.mjs CHANGED Viewed

@@ -1,9 +1,9 @@
-import { n as requiredEnvFrom, t as envFrom } from "../../env-nV5rVErX.mjs";
+import { n as requiredEnvFrom, t as envFrom } from "../../env-BVYeJhGA.mjs";
 import process from "node:process";
 import { errorMessageFrom, errorNameFrom, sleep } from "@moeru/std";
 import { createOpenAI } from "@xsai-ext/providers/create";
 //#region src/core/inference-executors/retry-policy.ts
-const retryableStatusCodes = new Set([
+const retryableStatusCodes = /* @__PURE__ */ new Set([
 	408,
 	425,
 	429,
@@ -12,7 +12,7 @@ const retryableStatusCodes = new Set([
 	503,
 	504
 ]);
-const retryableErrorNames = new Set(["TimeoutError", "FetchError"]);
+const retryableErrorNames = /* @__PURE__ */ new Set(["TimeoutError", "FetchError"]);
 const retryableMessagePatterns = [
 	/rate limit/i,
 	/rate-limited/i,

package/dist/core/inference-executors/index.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.mjs","names":["defaultSleep"],"sources":["../../../src/core/inference-executors/retry-policy.ts","../../../src/core/inference-executors/adapters.ts","../../../src/core/inference-executors/remote-providers/openai/index.ts"],"sourcesContent":["import { sleep as defaultSleep, errorMessageFrom, errorNameFrom } from '@moeru/std'\n\n/*\n Describes how provider retries should behave.\n \n ASCII flow:\n * attempt -> run request -> success return\n * attempt -> run request -> retriable failure -> sleep -> next attempt\n * attempt -> run request -> non-retriable failure -> throw\n /\nexport interface RetryPolicy {\n /\n Maximum number of total attempts, including the first try.\n /\n maxAttempts: number\n /\n Returns the wait time for a retry attempt.\n /\n delayMs: (attempt: number) => number\n /\n Determines whether an error can be retried safely.\n /\n shouldRetry: (error: unknown) => boolean\n /\n Suspends execution between retries.\n /\n sleep: (milliseconds: number) => Promise<void>\n}\n\n/\n Configures a retry policy before a provider call is executed.\n \n Use when:\n * - you want the default retry classifier but need to tune attempts or delay\n * - you need to replace the sleeper in tests\n \n Expects:\n * - `maxAttempts` to be a finite integer greater than or equal to `1`\n * - `delayMs` to return a non-negative wait time in milliseconds\n /\nexport interface RetryPolicyOptions {\n /\n Maximum total attempts, including the first request.\n \n @default 3\n /\n maxAttempts?: number\n /\n Computes the delay for a retry attempt.\n \n The attempt number starts at `1` for the first retry.\n /\n delayMs?: (attempt: number) => number\n /\n Overrides the retry classifier.\n /\n shouldRetry?: (error: unknown) => boolean\n /\n Overrides the sleeper used between attempts.\n /\n sleep?: (milliseconds: number) => Promise<void>\n}\n\nconst retryableStatusCodes = new Set([408, 425, 429, 500, 502, 503, 504])\nconst retryableErrorNames = new Set(['TimeoutError', 'FetchError'])\nconst retryableMessagePatterns = [\n /rate limit/i,\n /rate-limited/i,\n /temporarily unavailable/i,\n /service unavailable/i,\n /server error/i,\n /fetch failed/i,\n /network error/i,\n /socket hang up/i,\n /econnreset/i,\n /econnrefused/i,\n /eai_again/i,\n /etimedout/i,\n /timed out/i,\n /timeout/i,\n]\n\nfunction getStatusCode(error: unknown): number \| undefined {\n if (error == null \|\| typeof error !== 'object') {\n return undefined\n }\n\n const maybeStatusCode = (error as { statusCode?: unknown }).statusCode\n if (typeof maybeStatusCode === 'number') {\n return maybeStatusCode\n }\n\n const maybeStatus = (error as { status?: unknown }).status\n if (typeof maybeStatus === 'number') {\n return maybeStatus\n }\n\n const response = (error as { response?: unknown }).response\n if (response == null \|\| typeof response !== 'object') {\n return undefined\n }\n\n const responseStatus = (response as { status?: unknown }).status\n return typeof responseStatus === 'number' ? responseStatus : undefined\n}\n\n/\n Returns true when a provider failure is temporary and a retry is reasonable.\n \n Use when:\n * - the upstream failure is a transport problem or a 5xx/429 response\n \n Expects:\n * - provider errors to expose a status code, name, or message when possible\n /\nexport function isRetriableProviderError(error: unknown): boolean {\n const statusCode = getStatusCode(error)\n\n if (statusCode != null) {\n return retryableStatusCodes.has(statusCode)\n }\n\n const errorName = errorNameFrom(error)\n if (errorName != null && retryableErrorNames.has(errorName)) {\n return true\n }\n\n const errorMessage = errorMessageFrom(error)\n if (errorMessage == null) {\n return false\n }\n\n return retryableMessagePatterns.some(pattern => pattern.test(errorMessage))\n}\n\nfunction defaultDelayMs(attempt: number): number {\n return 500 2 (attempt - 1)\n}\n\n/\n * Creates a retry policy for provider work.\n \n Use when:\n * - you need a reusable retry runner for eval-time provider calls\n * - you want to keep retry behavior deterministic in tests\n \n Expects:\n * - callers to treat `maxAttempts` as total attempts, not retries\n \n Throws:\n * - `RangeError` when `maxAttempts` is not a finite integer greater than or equal to `1`\n /\nfunction assertValidMaxAttempts(value: number): number {\n if (!Number.isFinite(value) \|\| !Number.isInteger(value) \|\| value < 1) {\n throw new RangeError('maxAttempts must be a finite integer greater than or equal to 1.')\n }\n\n return value\n}\n\nexport function createRetryPolicy(options: RetryPolicyOptions = {}): RetryPolicy {\n const maxAttempts = assertValidMaxAttempts(options.maxAttempts ?? 3)\n\n return {\n maxAttempts,\n delayMs: options.delayMs ?? defaultDelayMs,\n shouldRetry: options.shouldRetry ?? isRetriableProviderError,\n sleep: options.sleep ?? defaultSleep,\n }\n}\n\n/\n Runs an operation with bounded retries.\n \n Use when:\n * - you are calling an LLM provider or other temporary upstream dependency\n * - non-retriable failures should bubble immediately\n \n Expects:\n * - the operation to be idempotent across attempts\n /\nexport async function runWithRetry<T>(operation: () => Promise<T>, policy: RetryPolicy = createRetryPolicy()): Promise<T> {\n for (let attempt = 1; attempt <= policy.maxAttempts; attempt += 1) {\n try {\n return await operation()\n }\n catch (error) {\n if (attempt >= policy.maxAttempts \|\| !policy.shouldRetry(error)) {\n throw error\n }\n\n const delayMilliseconds = policy.delayMs(attempt)\n if (delayMilliseconds > 0) {\n await policy.sleep(delayMilliseconds)\n }\n }\n }\n\n throw new Error('Retry loop exited without returning a value.')\n}\n","import type { RetryPolicy, RetryPolicyOptions } from './retry-policy'\n\nimport { createRetryPolicy, runWithRetry } from './retry-policy'\n\n/\n Bundles a provider with the retry policy used to call it.\n \n Use when:\n * - a provider instance should travel with the retry runner that governs it\n * - you want call sites to share one retry configuration object\n /\nexport interface ProviderAdapter<TProvider> {\n /\n The underlying provider instance.\n /\n provider: TProvider\n /\n The retry policy used for provider calls.\n /\n retryPolicy: RetryPolicy\n /\n Runs a provider-dependent operation with the adapter retry policy.\n /\n runWithRetry: <TResult>(operation: () => Promise<TResult>) => Promise<TResult>\n}\n\n/\n Creates a provider adapter with the default retry policy.\n \n Use when:\n * - you have a provider instance and want a consistent retry wrapper\n \n Expects:\n * - the provider to be safe to reuse across attempts\n /\nexport function createProviderAdapter<TProvider>(provider: TProvider, options: RetryPolicyOptions = {}): ProviderAdapter<TProvider> {\n const retryPolicy = createRetryPolicy(options)\n\n return {\n provider,\n retryPolicy,\n runWithRetry: operation => runWithRetry(operation, retryPolicy),\n }\n}\n","import type { ProviderAdapter } from '../../adapters'\nimport type { RetryPolicyOptions } from '../../retry-policy'\n\nimport process from 'node:process'\n\nimport { createOpenAI } from '@xsai-ext/providers/create'\n\nimport { createProviderAdapter } from '../../adapters'\nimport { envFrom, requiredEnvFrom } from '../../env'\n\n/\n Represents the OpenAI provider instance returned by xsai.\n /\nexport type OpenAIProvider = ReturnType<typeof createOpenAI>\n\n/\n Represents the OpenAI adapter used by vieval.\n /\nexport type OpenAIProviderAdapter = ProviderAdapter<OpenAIProvider>\n\n/\n Configures env key names and source for OpenAI provider setup.\n /\nexport interface OpenAIEnvSourceOptions {\n /\n Environment object used for variable lookup.\n \n @default process.env\n /\n env?: NodeJS.ProcessEnv\n /\n Env key name for API key.\n \n @default 'OPENAI_API_KEY'\n /\n apiKey?: string\n /\n Env key name for base URL.\n \n @default 'OPENAI_BASE_URL'\n /\n baseURL?: string\n /\n Env key name for model.\n \n @default 'OPENAI_MODEL'\n /\n model?: string\n}\n\n/\n Configures fallback defaults when env values are missing.\n /\nexport interface OpenAIFromEnvDefaultOptions {\n /\n API key fallback value.\n /\n apiKey?: string\n /\n Base URL fallback value.\n /\n baseURL?: string\n /\n Model fallback value.\n /\n model?: string\n /\n Retry policy override passed to provider adapter.\n /\n retryOptions?: RetryPolicyOptions\n}\n\n/\n Result produced by `createOpenAIFromEnv`.\n /\nexport interface OpenAIFromEnvResult {\n adapter: OpenAIProviderAdapter\n apiKey: string\n baseURL?: string\n model: string\n}\n\n/\n Minimal response shape returned by text-generation calls.\n /\nexport interface OpenAITextGenerationResult {\n /\n Text output from the provider.\n \n Some OpenAI-compatible implementations may return `null`.\n /\n text?: string \| null\n}\n\n/\n Normalizes provider text output to a safe string.\n \n Before: `{ text: null }`\n * After: `''`\n \n Before: `{ text: 'hello' }`\n * After: `'hello'`\n /\nexport function normalizeOpenAITextOutput(result: OpenAITextGenerationResult): string {\n return typeof result.text === 'string' ? result.text : ''\n}\n\n/\n Creates an OpenAI provider adapter using environment variables with defaults.\n \n Example:\n * `const runtime = createOpenAIFromEnv({}, { model: 'gpt-4.1-mini' })`\n /\nexport function createOpenAIFromEnv(\n source: OpenAIEnvSourceOptions = {},\n defaults: OpenAIFromEnvDefaultOptions = {},\n): OpenAIFromEnvResult {\n const env = source.env ?? process.env\n const apiKeyEnvKey = source.apiKey ?? 'OPENAI_API_KEY'\n const baseURLEnvKey = source.baseURL ?? 'OPENAI_BASE_URL'\n const modelEnvKey = source.model ?? 'OPENAI_MODEL'\n\n const envWithDefaults = {\n ...(defaults.apiKey == null ? {} : { [apiKeyEnvKey]: defaults.apiKey }),\n ...(defaults.baseURL == null ? {} : { [baseURLEnvKey]: defaults.baseURL }),\n ...(defaults.model == null ? {} : { [modelEnvKey]: defaults.model }),\n ...env,\n }\n\n const apiKey = requiredEnvFrom(envWithDefaults, {\n name: apiKeyEnvKey,\n type: 'string',\n })\n const model = requiredEnvFrom(envWithDefaults, {\n name: modelEnvKey,\n type: 'string',\n })\n const baseURL = envFrom(envWithDefaults, {\n name: baseURLEnvKey,\n type: 'string',\n })\n const adapter = createOpenAIProviderAdapter(apiKey, baseURL, defaults.retryOptions)\n\n return {\n adapter,\n apiKey,\n baseURL,\n model,\n }\n}\n\n/\n Creates an OpenAI provider adapter for eval-time requests.\n \n Use when:\n * - an eval needs the OpenAI SDK surface plus the shared retry runner\n \n Expects:\n * - `apiKey` and `baseURL` to point at an OpenAI-compatible endpoint\n * - `retryOptions` to follow the same invariants as `createRetryPolicy`\n */\nexport function createOpenAIProviderAdapter(apiKey: string, baseURL?: string, retryOptions: RetryPolicyOptions = {}): OpenAIProviderAdapter {\n return createProviderAdapter(createOpenAI(apiKey, baseURL), retryOptions)\n}\n"],"mappings":";;;;;AA+DA,MAAM,uBAAuB,IAAI,IAAI;CAAC;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAI,CAAC;AACzE,MAAM,sBAAsB,IAAI,IAAI,CAAC,gBAAgB,aAAa,CAAC;AACnE,MAAM,2BAA2B;CAC/B;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;AAED,SAAS,cAAc,OAAoC;AACzD,KAAI,SAAS,QAAQ,OAAO,UAAU,SACpC;CAGF,MAAM,kBAAmB,MAAmC;AAC5D,KAAI,OAAO,oBAAoB,SAC7B,QAAO;CAGT,MAAM,cAAe,MAA+B;AACpD,KAAI,OAAO,gBAAgB,SACzB,QAAO;CAGT,MAAM,WAAY,MAAiC;AACnD,KAAI,YAAY,QAAQ,OAAO,aAAa,SAC1C;CAGF,MAAM,iBAAkB,SAAkC;AAC1D,QAAO,OAAO,mBAAmB,WAAW,iBAAiB,KAAA;;;;;;;;;;;AAY/D,SAAgB,yBAAyB,OAAyB;CAChE,MAAM,aAAa,cAAc,MAAM;AAEvC,KAAI,cAAc,KAChB,QAAO,qBAAqB,IAAI,WAAW;CAG7C,MAAM,YAAY,cAAc,MAAM;AACtC,KAAI,aAAa,QAAQ,oBAAoB,IAAI,UAAU,CACzD,QAAO;CAGT,MAAM,eAAe,iBAAiB,MAAM;AAC5C,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO,yBAAyB,MAAK,YAAW,QAAQ,KAAK,aAAa,CAAC;;AAG7E,SAAS,eAAe,SAAyB;AAC/C,QAAO,MAAM,MAAM,UAAU;;;;;;;;;;;;;;;AAgB/B,SAAS,uBAAuB,OAAuB;AACrD,KAAI,CAAC,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,UAAU,MAAM,IAAI,QAAQ,EACjE,OAAM,IAAI,WAAW,mEAAmE;AAG1F,QAAO;;AAGT,SAAgB,kBAAkB,UAA8B,EAAE,EAAe;AAG/E,QAAO;EACL,aAHkB,uBAAuB,QAAQ,eAAe,EAAE;EAIlE,SAAS,QAAQ,WAAW;EAC5B,aAAa,QAAQ,eAAe;EACpC,OAAO,QAAQ,SAASA;EACzB;;;;;;;;;;;;AAaH,eAAsB,aAAgB,WAA6B,SAAsB,mBAAmB,EAAc;AACxH,MAAK,IAAI,UAAU,GAAG,WAAW,OAAO,aAAa,WAAW,EAC9D,KAAI;AACF,SAAO,MAAM,WAAW;UAEnB,OAAO;AACZ,MAAI,WAAW,OAAO,eAAe,CAAC,OAAO,YAAY,MAAM,CAC7D,OAAM;EAGR,MAAM,oBAAoB,OAAO,QAAQ,QAAQ;AACjD,MAAI,oBAAoB,EACtB,OAAM,OAAO,MAAM,kBAAkB;;AAK3C,OAAM,IAAI,MAAM,+CAA+C;;;;;;;;;;;;;ACnKjE,SAAgB,sBAAiC,UAAqB,UAA8B,EAAE,EAA8B;CAClI,MAAM,cAAc,kBAAkB,QAAQ;AAE9C,QAAO;EACL;EACA;EACA,eAAc,cAAa,aAAa,WAAW,YAAY;EAChE;;;;;;;;;;;;;AC6DH,SAAgB,0BAA0B,QAA4C;AACpF,QAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;;;;;;;;AASzD,SAAgB,oBACd,SAAiC,EAAE,EACnC,WAAwC,EAAE,EACrB;CACrB,MAAM,MAAM,OAAO,OAAO,QAAQ;CAClC,MAAM,eAAe,OAAO,UAAU;CACtC,MAAM,gBAAgB,OAAO,WAAW;CACxC,MAAM,cAAc,OAAO,SAAS;CAEpC,MAAM,kBAAkB;EACtB,GAAI,SAAS,UAAU,OAAO,EAAE,GAAG,GAAG,eAAe,SAAS,QAAQ;EACtE,GAAI,SAAS,WAAW,OAAO,EAAE,GAAG,GAAG,gBAAgB,SAAS,SAAS;EACzE,GAAI,SAAS,SAAS,OAAO,EAAE,GAAG,GAAG,cAAc,SAAS,OAAO;EACnE,GAAG;EACJ;CAED,MAAM,SAAS,gBAAgB,iBAAiB;EAC9C,MAAM;EACN,MAAM;EACP,CAAC;CACF,MAAM,QAAQ,gBAAgB,iBAAiB;EAC7C,MAAM;EACN,MAAM;EACP,CAAC;CACF,MAAM,UAAU,QAAQ,iBAAiB;EACvC,MAAM;EACN,MAAM;EACP,CAAC;AAGF,QAAO;EACL,SAHc,4BAA4B,QAAQ,SAAS,SAAS,aAAa;EAIjF;EACA;EACA;EACD;;;;;;;;;;;;AAaH,SAAgB,4BAA4B,QAAgB,SAAkB,eAAmC,EAAE,EAAyB;AAC1I,QAAO,sBAAsB,aAAa,QAAQ,QAAQ,EAAE,aAAa"}
1	+ {"version":3,"file":"index.mjs","names":["defaultSleep"],"sources":["../../../src/core/inference-executors/retry-policy.ts","../../../src/core/inference-executors/adapters.ts","../../../src/core/inference-executors/remote-providers/openai/index.ts"],"sourcesContent":["import { sleep as defaultSleep, errorMessageFrom, errorNameFrom } from '@moeru/std'\n\n/*\n Describes how provider retries should behave.\n \n ASCII flow:\n * attempt -> run request -> success return\n * attempt -> run request -> retriable failure -> sleep -> next attempt\n * attempt -> run request -> non-retriable failure -> throw\n /\nexport interface RetryPolicy {\n /\n Maximum number of total attempts, including the first try.\n /\n maxAttempts: number\n /\n Returns the wait time for a retry attempt.\n /\n delayMs: (attempt: number) => number\n /\n Determines whether an error can be retried safely.\n /\n shouldRetry: (error: unknown) => boolean\n /\n Suspends execution between retries.\n /\n sleep: (milliseconds: number) => Promise<void>\n}\n\n/\n Configures a retry policy before a provider call is executed.\n \n Use when:\n * - you want the default retry classifier but need to tune attempts or delay\n * - you need to replace the sleeper in tests\n \n Expects:\n * - `maxAttempts` to be a finite integer greater than or equal to `1`\n * - `delayMs` to return a non-negative wait time in milliseconds\n /\nexport interface RetryPolicyOptions {\n /\n Maximum total attempts, including the first request.\n \n @default 3\n /\n maxAttempts?: number\n /\n Computes the delay for a retry attempt.\n \n The attempt number starts at `1` for the first retry.\n /\n delayMs?: (attempt: number) => number\n /\n Overrides the retry classifier.\n /\n shouldRetry?: (error: unknown) => boolean\n /\n Overrides the sleeper used between attempts.\n /\n sleep?: (milliseconds: number) => Promise<void>\n}\n\nconst retryableStatusCodes = new Set([408, 425, 429, 500, 502, 503, 504])\nconst retryableErrorNames = new Set(['TimeoutError', 'FetchError'])\nconst retryableMessagePatterns = [\n /rate limit/i,\n /rate-limited/i,\n /temporarily unavailable/i,\n /service unavailable/i,\n /server error/i,\n /fetch failed/i,\n /network error/i,\n /socket hang up/i,\n /econnreset/i,\n /econnrefused/i,\n /eai_again/i,\n /etimedout/i,\n /timed out/i,\n /timeout/i,\n]\n\nfunction getStatusCode(error: unknown): number \| undefined {\n if (error == null \|\| typeof error !== 'object') {\n return undefined\n }\n\n const maybeStatusCode = (error as { statusCode?: unknown }).statusCode\n if (typeof maybeStatusCode === 'number') {\n return maybeStatusCode\n }\n\n const maybeStatus = (error as { status?: unknown }).status\n if (typeof maybeStatus === 'number') {\n return maybeStatus\n }\n\n const response = (error as { response?: unknown }).response\n if (response == null \|\| typeof response !== 'object') {\n return undefined\n }\n\n const responseStatus = (response as { status?: unknown }).status\n return typeof responseStatus === 'number' ? responseStatus : undefined\n}\n\n/\n Returns true when a provider failure is temporary and a retry is reasonable.\n \n Use when:\n * - the upstream failure is a transport problem or a 5xx/429 response\n \n Expects:\n * - provider errors to expose a status code, name, or message when possible\n /\nexport function isRetriableProviderError(error: unknown): boolean {\n const statusCode = getStatusCode(error)\n\n if (statusCode != null) {\n return retryableStatusCodes.has(statusCode)\n }\n\n const errorName = errorNameFrom(error)\n if (errorName != null && retryableErrorNames.has(errorName)) {\n return true\n }\n\n const errorMessage = errorMessageFrom(error)\n if (errorMessage == null) {\n return false\n }\n\n return retryableMessagePatterns.some(pattern => pattern.test(errorMessage))\n}\n\nfunction defaultDelayMs(attempt: number): number {\n return 500 2 (attempt - 1)\n}\n\n/\n * Creates a retry policy for provider work.\n \n Use when:\n * - you need a reusable retry runner for eval-time provider calls\n * - you want to keep retry behavior deterministic in tests\n \n Expects:\n * - callers to treat `maxAttempts` as total attempts, not retries\n \n Throws:\n * - `RangeError` when `maxAttempts` is not a finite integer greater than or equal to `1`\n /\nfunction assertValidMaxAttempts(value: number): number {\n if (!Number.isFinite(value) \|\| !Number.isInteger(value) \|\| value < 1) {\n throw new RangeError('maxAttempts must be a finite integer greater than or equal to 1.')\n }\n\n return value\n}\n\nexport function createRetryPolicy(options: RetryPolicyOptions = {}): RetryPolicy {\n const maxAttempts = assertValidMaxAttempts(options.maxAttempts ?? 3)\n\n return {\n maxAttempts,\n delayMs: options.delayMs ?? defaultDelayMs,\n shouldRetry: options.shouldRetry ?? isRetriableProviderError,\n sleep: options.sleep ?? defaultSleep,\n }\n}\n\n/\n Runs an operation with bounded retries.\n \n Use when:\n * - you are calling an LLM provider or other temporary upstream dependency\n * - non-retriable failures should bubble immediately\n \n Expects:\n * - the operation to be idempotent across attempts\n /\nexport async function runWithRetry<T>(operation: () => Promise<T>, policy: RetryPolicy = createRetryPolicy()): Promise<T> {\n for (let attempt = 1; attempt <= policy.maxAttempts; attempt += 1) {\n try {\n return await operation()\n }\n catch (error) {\n if (attempt >= policy.maxAttempts \|\| !policy.shouldRetry(error)) {\n throw error\n }\n\n const delayMilliseconds = policy.delayMs(attempt)\n if (delayMilliseconds > 0) {\n await policy.sleep(delayMilliseconds)\n }\n }\n }\n\n throw new Error('Retry loop exited without returning a value.')\n}\n","import type { RetryPolicy, RetryPolicyOptions } from './retry-policy'\n\nimport { createRetryPolicy, runWithRetry } from './retry-policy'\n\n/\n Bundles a provider with the retry policy used to call it.\n \n Use when:\n * - a provider instance should travel with the retry runner that governs it\n * - you want call sites to share one retry configuration object\n /\nexport interface ProviderAdapter<TProvider> {\n /\n The underlying provider instance.\n /\n provider: TProvider\n /\n The retry policy used for provider calls.\n /\n retryPolicy: RetryPolicy\n /\n Runs a provider-dependent operation with the adapter retry policy.\n /\n runWithRetry: <TResult>(operation: () => Promise<TResult>) => Promise<TResult>\n}\n\n/\n Creates a provider adapter with the default retry policy.\n \n Use when:\n * - you have a provider instance and want a consistent retry wrapper\n \n Expects:\n * - the provider to be safe to reuse across attempts\n /\nexport function createProviderAdapter<TProvider>(provider: TProvider, options: RetryPolicyOptions = {}): ProviderAdapter<TProvider> {\n const retryPolicy = createRetryPolicy(options)\n\n return {\n provider,\n retryPolicy,\n runWithRetry: operation => runWithRetry(operation, retryPolicy),\n }\n}\n","import type { ProviderAdapter } from '../../adapters'\nimport type { RetryPolicyOptions } from '../../retry-policy'\n\nimport process from 'node:process'\n\nimport { createOpenAI } from '@xsai-ext/providers/create'\n\nimport { createProviderAdapter } from '../../adapters'\nimport { envFrom, requiredEnvFrom } from '../../env'\n\n/\n Represents the OpenAI provider instance returned by xsai.\n /\nexport type OpenAIProvider = ReturnType<typeof createOpenAI>\n\n/\n Represents the OpenAI adapter used by vieval.\n /\nexport type OpenAIProviderAdapter = ProviderAdapter<OpenAIProvider>\n\n/\n Configures env key names and source for OpenAI provider setup.\n /\nexport interface OpenAIEnvSourceOptions {\n /\n Environment object used for variable lookup.\n \n @default process.env\n /\n env?: NodeJS.ProcessEnv\n /\n Env key name for API key.\n \n @default 'OPENAI_API_KEY'\n /\n apiKey?: string\n /\n Env key name for base URL.\n \n @default 'OPENAI_BASE_URL'\n /\n baseURL?: string\n /\n Env key name for model.\n \n @default 'OPENAI_MODEL'\n /\n model?: string\n}\n\n/\n Configures fallback defaults when env values are missing.\n /\nexport interface OpenAIFromEnvDefaultOptions {\n /\n API key fallback value.\n /\n apiKey?: string\n /\n Base URL fallback value.\n /\n baseURL?: string\n /\n Model fallback value.\n /\n model?: string\n /\n Retry policy override passed to provider adapter.\n /\n retryOptions?: RetryPolicyOptions\n}\n\n/\n Result produced by `createOpenAIFromEnv`.\n /\nexport interface OpenAIFromEnvResult {\n adapter: OpenAIProviderAdapter\n apiKey: string\n baseURL?: string\n model: string\n}\n\n/\n Minimal response shape returned by text-generation calls.\n /\nexport interface OpenAITextGenerationResult {\n /\n Text output from the provider.\n \n Some OpenAI-compatible implementations may return `null`.\n /\n text?: string \| null\n}\n\n/\n Normalizes provider text output to a safe string.\n \n Before: `{ text: null }`\n * After: `''`\n \n Before: `{ text: 'hello' }`\n * After: `'hello'`\n /\nexport function normalizeOpenAITextOutput(result: OpenAITextGenerationResult): string {\n return typeof result.text === 'string' ? result.text : ''\n}\n\n/\n Creates an OpenAI provider adapter using environment variables with defaults.\n \n Example:\n * `const runtime = createOpenAIFromEnv({}, { model: 'gpt-4.1-mini' })`\n /\nexport function createOpenAIFromEnv(\n source: OpenAIEnvSourceOptions = {},\n defaults: OpenAIFromEnvDefaultOptions = {},\n): OpenAIFromEnvResult {\n const env = source.env ?? process.env\n const apiKeyEnvKey = source.apiKey ?? 'OPENAI_API_KEY'\n const baseURLEnvKey = source.baseURL ?? 'OPENAI_BASE_URL'\n const modelEnvKey = source.model ?? 'OPENAI_MODEL'\n\n const envWithDefaults = {\n ...(defaults.apiKey == null ? {} : { [apiKeyEnvKey]: defaults.apiKey }),\n ...(defaults.baseURL == null ? {} : { [baseURLEnvKey]: defaults.baseURL }),\n ...(defaults.model == null ? {} : { [modelEnvKey]: defaults.model }),\n ...env,\n }\n\n const apiKey = requiredEnvFrom(envWithDefaults, {\n name: apiKeyEnvKey,\n type: 'string',\n })\n const model = requiredEnvFrom(envWithDefaults, {\n name: modelEnvKey,\n type: 'string',\n })\n const baseURL = envFrom(envWithDefaults, {\n name: baseURLEnvKey,\n type: 'string',\n })\n const adapter = createOpenAIProviderAdapter(apiKey, baseURL, defaults.retryOptions)\n\n return {\n adapter,\n apiKey,\n baseURL,\n model,\n }\n}\n\n/\n Creates an OpenAI provider adapter for eval-time requests.\n \n Use when:\n * - an eval needs the OpenAI SDK surface plus the shared retry runner\n \n Expects:\n * - `apiKey` and `baseURL` to point at an OpenAI-compatible endpoint\n * - `retryOptions` to follow the same invariants as `createRetryPolicy`\n */\nexport function createOpenAIProviderAdapter(apiKey: string, baseURL?: string, retryOptions: RetryPolicyOptions = {}): OpenAIProviderAdapter {\n return createProviderAdapter(createOpenAI(apiKey, baseURL), retryOptions)\n}\n"],"mappings":";;;;;AA+DA,MAAM,uCAAuB,IAAI,IAAI;CAAC;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;AAAG,CAAC;AACxE,MAAM,sCAAsB,IAAI,IAAI,CAAC,gBAAgB,YAAY,CAAC;AAClE,MAAM,2BAA2B;CAC/B;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;AACF;AAEA,SAAS,cAAc,OAAoC;CACzD,IAAI,SAAS,QAAQ,OAAO,UAAU,UACpC;CAGF,MAAM,kBAAmB,MAAmC;CAC5D,IAAI,OAAO,oBAAoB,UAC7B,OAAO;CAGT,MAAM,cAAe,MAA+B;CACpD,IAAI,OAAO,gBAAgB,UACzB,OAAO;CAGT,MAAM,WAAY,MAAiC;CACnD,IAAI,YAAY,QAAQ,OAAO,aAAa,UAC1C;CAGF,MAAM,iBAAkB,SAAkC;CAC1D,OAAO,OAAO,mBAAmB,WAAW,iBAAiB,KAAA;AAC/D;;;;;;;;;;AAWA,SAAgB,yBAAyB,OAAyB;CAChE,MAAM,aAAa,cAAc,KAAK;CAEtC,IAAI,cAAc,MAChB,OAAO,qBAAqB,IAAI,UAAU;CAG5C,MAAM,YAAY,cAAc,KAAK;CACrC,IAAI,aAAa,QAAQ,oBAAoB,IAAI,SAAS,GACxD,OAAO;CAGT,MAAM,eAAe,iBAAiB,KAAK;CAC3C,IAAI,gBAAgB,MAClB,OAAO;CAGT,OAAO,yBAAyB,MAAK,YAAW,QAAQ,KAAK,YAAY,CAAC;AAC5E;AAEA,SAAS,eAAe,SAAyB;CAC/C,OAAO,MAAM,MAAM,UAAU;AAC/B;;;;;;;;;;;;;;AAeA,SAAS,uBAAuB,OAAuB;CACrD,IAAI,CAAC,OAAO,SAAS,KAAK,KAAK,CAAC,OAAO,UAAU,KAAK,KAAK,QAAQ,GACjE,MAAM,IAAI,WAAW,kEAAkE;CAGzF,OAAO;AACT;AAEA,SAAgB,kBAAkB,UAA8B,CAAC,GAAgB;CAG/E,OAAO;EACL,aAHkB,uBAAuB,QAAQ,eAAe,CAGtD;EACV,SAAS,QAAQ,WAAW;EAC5B,aAAa,QAAQ,eAAe;EACpC,OAAO,QAAQ,SAASA;CAC1B;AACF;;;;;;;;;;;AAYA,eAAsB,aAAgB,WAA6B,SAAsB,kBAAkB,GAAe;CACxH,KAAK,IAAI,UAAU,GAAG,WAAW,OAAO,aAAa,WAAW,GAC9D,IAAI;EACF,OAAO,MAAM,UAAU;CACzB,SACO,OAAO;EACZ,IAAI,WAAW,OAAO,eAAe,CAAC,OAAO,YAAY,KAAK,GAC5D,MAAM;EAGR,MAAM,oBAAoB,OAAO,QAAQ,OAAO;EAChD,IAAI,oBAAoB,GACtB,MAAM,OAAO,MAAM,iBAAiB;CAExC;CAGF,MAAM,IAAI,MAAM,8CAA8C;AAChE;;;;;;;;;;;;ACpKA,SAAgB,sBAAiC,UAAqB,UAA8B,CAAC,GAA+B;CAClI,MAAM,cAAc,kBAAkB,OAAO;CAE7C,OAAO;EACL;EACA;EACA,eAAc,cAAa,aAAa,WAAW,WAAW;CAChE;AACF;;;;;;;;;;;;AC4DA,SAAgB,0BAA0B,QAA4C;CACpF,OAAO,OAAO,OAAO,SAAS,WAAW,OAAO,OAAO;AACzD;;;;;;;AAQA,SAAgB,oBACd,SAAiC,CAAC,GAClC,WAAwC,CAAC,GACpB;CACrB,MAAM,MAAM,OAAO,OAAO,QAAQ;CAClC,MAAM,eAAe,OAAO,UAAU;CACtC,MAAM,gBAAgB,OAAO,WAAW;CACxC,MAAM,cAAc,OAAO,SAAS;CAEpC,MAAM,kBAAkB;EACtB,GAAI,SAAS,UAAU,OAAO,CAAC,IAAI,GAAG,eAAe,SAAS,OAAO;EACrE,GAAI,SAAS,WAAW,OAAO,CAAC,IAAI,GAAG,gBAAgB,SAAS,QAAQ;EACxE,GAAI,SAAS,SAAS,OAAO,CAAC,IAAI,GAAG,cAAc,SAAS,MAAM;EAClE,GAAG;CACL;CAEA,MAAM,SAAS,gBAAgB,iBAAiB;EAC9C,MAAM;EACN,MAAM;CACR,CAAC;CACD,MAAM,QAAQ,gBAAgB,iBAAiB;EAC7C,MAAM;EACN,MAAM;CACR,CAAC;CACD,MAAM,UAAU,QAAQ,iBAAiB;EACvC,MAAM;EACN,MAAM;CACR,CAAC;CAGD,OAAO;EACL,SAHc,4BAA4B,QAAQ,SAAS,SAAS,YAG9D;EACN;EACA;EACA;CACF;AACF;;;;;;;;;;;AAYA,SAAgB,4BAA4B,QAAgB,SAAkB,eAAmC,CAAC,GAA0B;CAC1I,OAAO,sBAAsB,aAAa,QAAQ,OAAO,GAAG,YAAY;AAC1E"}

package/dist/core/processors/results/index.d.mts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { K as AggregatedRunResults } from "../../../index-BkjyCInx.mjs";
+import { K as AggregatedRunResults } from "../../../index-CwKBlCG9.mjs";
 //#region src/core/processors/results/policies/hybrid-threshold.d.ts
 /**

package/dist/core/processors/results/index.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.mjs","names":[],"sources":["../../../../src/core/processors/results/policies/hybrid-threshold.ts","../../../../src/core/processors/results/policies/max-failed-runs.ts","../../../../src/core/processors/results/index.ts"],"sourcesContent":["import type { AggregatedRunResults } from '../../../runner/aggregate'\n\n/*\n Violation emitted when result policies fail.\n /\nexport interface ResultPolicyViolation {\n /\n Stable policy id.\n /\n policyId: string\n /\n Human-readable violation reason.\n /\n reason: string\n}\n\n/\n Configures hybrid-threshold policy behavior.\n /\nexport interface HybridThresholdPolicyOptions {\n /\n Minimum required overall hybrid score.\n \n @default 0.7\n /\n minOverallHybridScore?: number\n /\n Minimum required inferenceExecutor hybrid score.\n \n @default 0.6\n /\n minProviderHybridScore?: number\n}\n\n/\n Evaluates threshold policy against aggregated results.\n /\nexport function evaluateHybridThresholdPolicy(\n results: AggregatedRunResults,\n options: HybridThresholdPolicyOptions = {},\n): ResultPolicyViolation[] {\n const minOverallHybridScore = options.minOverallHybridScore ?? 0.7\n const minProviderHybridScore = options.minProviderHybridScore ?? 0.6\n\n const violations: ResultPolicyViolation[] = []\n\n const overallHybridAverage = results.overall.hybridAverage\n if (overallHybridAverage == null \|\| overallHybridAverage < minOverallHybridScore) {\n violations.push({\n policyId: 'threshold:overall-hybrid',\n reason: `Overall hybrid average ${overallHybridAverage ?? 'null'} is below ${minOverallHybridScore}.`,\n })\n }\n\n for (const inferenceExecutor of results.inferenceExecutors) {\n if (inferenceExecutor.hybridAverage == null \|\| inferenceExecutor.hybridAverage < minProviderHybridScore) {\n violations.push({\n policyId: 'threshold:inferenceExecutor-hybrid',\n reason: `Provider ${inferenceExecutor.inferenceExecutorId} hybrid average ${inferenceExecutor.hybridAverage ?? 'null'} is below ${minProviderHybridScore}.`,\n })\n }\n }\n\n return violations\n}\n","import type { AggregatedRunResults } from '../../../runner/aggregate'\nimport type { ResultPolicyViolation } from './hybrid-threshold'\n\n/\n Configures hard-limit policy for failed runs.\n /\nexport interface MaxFailedRunsPolicyOptions {\n /\n Maximum allowed failed run count.\n \n @default 0\n /\n maxFailedRuns?: number\n /\n Hybrid score threshold below which a run counts as failed.\n \n @default 0.6\n /\n minRunHybridScore?: number\n}\n\n/\n Evaluates hard-limit policy for failed runs.\n /\nexport function evaluateMaxFailedRunsPolicy(\n results: AggregatedRunResults,\n options: MaxFailedRunsPolicyOptions = {},\n): ResultPolicyViolation[] {\n const maxFailedRuns = options.maxFailedRuns ?? 0\n const minRunHybridScore = options.minRunHybridScore ?? 0.6\n\n const failedRuns = results.runs.filter((run) => {\n if (run.hybridAverage == null) {\n return true\n }\n\n return run.hybridAverage < minRunHybridScore\n })\n\n if (failedRuns.length <= maxFailedRuns) {\n return []\n }\n\n return [{\n policyId: 'hard-limit:max-failed-runs',\n reason: `Failed runs ${failedRuns.length} exceed maxFailedRuns ${maxFailedRuns} with minRunHybridScore ${minRunHybridScore}.`,\n }]\n}\n","import type { AggregatedRunResults } from '../../runner/aggregate'\nimport type { HybridThresholdPolicyOptions, ResultPolicyViolation } from './policies/hybrid-threshold'\nimport type { MaxFailedRunsPolicyOptions } from './policies/max-failed-runs'\n\nimport { evaluateHybridThresholdPolicy } from './policies/hybrid-threshold'\nimport { evaluateMaxFailedRunsPolicy } from './policies/max-failed-runs'\n\n/\n Configures result-processing policies for eval gating.\n /\nexport interface ProcessRunResultsOptions {\n /\n Threshold policy options.\n /\n threshold?: HybridThresholdPolicyOptions\n /\n Hard-limit failed-run policy options.\n /\n maxFailedRuns?: MaxFailedRunsPolicyOptions\n}\n\n/\n Final gate decision returned by result processors.\n /\nexport interface ResultGateDecision {\n /\n Whether the result batch passes all policies.\n /\n pass: boolean\n /\n Collected policy violations.\n /\n violations: ResultPolicyViolation[]\n}\n\n/\n Processes aggregated run results through built-in gating policies.\n \n Call stack:\n \n {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link processRunResults}\n * -> {@link evaluateHybridThresholdPolicy}\n * -> {@link evaluateMaxFailedRunsPolicy}\n * -> {@link ResultGateDecision}\n */\nexport function processRunResults(\n results: AggregatedRunResults,\n options: ProcessRunResultsOptions = {},\n): ResultGateDecision {\n const thresholdViolations = evaluateHybridThresholdPolicy(results, options.threshold)\n const maxFailedRunsViolations = evaluateMaxFailedRunsPolicy(results, options.maxFailedRuns)\n\n const violations = [\n ...thresholdViolations,\n ...maxFailedRunsViolations,\n ]\n\n return {\n pass: violations.length === 0,\n violations,\n }\n}\n\nexport type {\n HybridThresholdPolicyOptions,\n MaxFailedRunsPolicyOptions,\n ResultPolicyViolation,\n}\n"],"mappings":";;;;AAqCA,SAAgB,8BACd,SACA,UAAwC,~~EAAE~~,~~EACjB~~;CACzB,MAAM,wBAAwB,QAAQ,yBAAyB;CAC/D,MAAM,yBAAyB,QAAQ,0BAA0B;CAEjE,MAAM,aAAsC,~~EAAE~~;~~CAE9C~~,MAAM,uBAAuB,QAAQ,QAAQ;~~AAC7C~~,~~KAAI~~,wBAAwB,QAAQ,uBAAuB,~~sBACzD~~,~~YAAW~~,KAAK;EACd,UAAU;EACV,QAAQ,0BAA0B,wBAAwB,OAAO,YAAY,sBAAsB;~~EACpG~~,CAAC;~~AAGJ~~,~~MAAK~~,MAAM,qBAAqB,QAAQ,~~mBACtC~~,~~KAAI~~,kBAAkB,iBAAiB,QAAQ,kBAAkB,gBAAgB,~~uBAC~~/E,~~YAAW~~,KAAK;EACd,UAAU;EACV,QAAQ,YAAY,kBAAkB,oBAAoB,kBAAkB,kBAAkB,iBAAiB,OAAO,YAAY,uBAAuB;~~EAC1J~~,CAAC;~~AAIN~~,~~QAAO;;;;;;;ACvCT~~,SAAgB,4BACd,SACA,UAAsC,~~EAAE~~,~~EACf~~;CACzB,MAAM,gBAAgB,QAAQ,iBAAiB;CAC/C,MAAM,oBAAoB,QAAQ,qBAAqB;CAEvD,MAAM,aAAa,QAAQ,KAAK,QAAQ,QAAQ;~~AAC9C~~,~~MAAI~~,IAAI,iBAAiB,~~KACvB~~,~~QAAO~~;~~AAGT~~,~~SAAO~~,IAAI,gBAAgB;~~GAC3B~~;~~AAEF~~,~~KAAI~~,WAAW,UAAU,~~cACvB~~,~~QAAO~~,~~EAAE~~;~~AAGX~~,~~QAAO~~,CAAC;EACN,UAAU;EACV,QAAQ,eAAe,WAAW,OAAO,wBAAwB,cAAc,0BAA0B,kBAAkB;~~EAC5H~~,CAAC~~;;;;;;;;;;;;;;;;ACCJ~~,SAAgB,kBACd,SACA,UAAoC,~~EAAE~~,~~EAClB~~;CACpB,MAAM,sBAAsB,8BAA8B,SAAS,QAAQ,~~UAAU~~;~~CACrF~~,MAAM,0BAA0B,4BAA4B,SAAS,QAAQ,~~cAAc~~;~~CAE3F~~,MAAM,aAAa,CACjB,GAAG,qBACH,GAAG,~~wBACJ~~;~~AAED~~,~~QAAO~~;EACL,MAAM,WAAW,WAAW;EAC5B;~~EACD~~"}
1	+ {"version":3,"file":"index.mjs","names":[],"sources":["../../../../src/core/processors/results/policies/hybrid-threshold.ts","../../../../src/core/processors/results/policies/max-failed-runs.ts","../../../../src/core/processors/results/index.ts"],"sourcesContent":["import type { AggregatedRunResults } from '../../../runner/aggregate'\n\n/*\n Violation emitted when result policies fail.\n /\nexport interface ResultPolicyViolation {\n /\n Stable policy id.\n /\n policyId: string\n /\n Human-readable violation reason.\n /\n reason: string\n}\n\n/\n Configures hybrid-threshold policy behavior.\n /\nexport interface HybridThresholdPolicyOptions {\n /\n Minimum required overall hybrid score.\n \n @default 0.7\n /\n minOverallHybridScore?: number\n /\n Minimum required inferenceExecutor hybrid score.\n \n @default 0.6\n /\n minProviderHybridScore?: number\n}\n\n/\n Evaluates threshold policy against aggregated results.\n /\nexport function evaluateHybridThresholdPolicy(\n results: AggregatedRunResults,\n options: HybridThresholdPolicyOptions = {},\n): ResultPolicyViolation[] {\n const minOverallHybridScore = options.minOverallHybridScore ?? 0.7\n const minProviderHybridScore = options.minProviderHybridScore ?? 0.6\n\n const violations: ResultPolicyViolation[] = []\n\n const overallHybridAverage = results.overall.hybridAverage\n if (overallHybridAverage == null \|\| overallHybridAverage < minOverallHybridScore) {\n violations.push({\n policyId: 'threshold:overall-hybrid',\n reason: `Overall hybrid average ${overallHybridAverage ?? 'null'} is below ${minOverallHybridScore}.`,\n })\n }\n\n for (const inferenceExecutor of results.inferenceExecutors) {\n if (inferenceExecutor.hybridAverage == null \|\| inferenceExecutor.hybridAverage < minProviderHybridScore) {\n violations.push({\n policyId: 'threshold:inferenceExecutor-hybrid',\n reason: `Provider ${inferenceExecutor.inferenceExecutorId} hybrid average ${inferenceExecutor.hybridAverage ?? 'null'} is below ${minProviderHybridScore}.`,\n })\n }\n }\n\n return violations\n}\n","import type { AggregatedRunResults } from '../../../runner/aggregate'\nimport type { ResultPolicyViolation } from './hybrid-threshold'\n\n/\n Configures hard-limit policy for failed runs.\n /\nexport interface MaxFailedRunsPolicyOptions {\n /\n Maximum allowed failed run count.\n \n @default 0\n /\n maxFailedRuns?: number\n /\n Hybrid score threshold below which a run counts as failed.\n \n @default 0.6\n /\n minRunHybridScore?: number\n}\n\n/\n Evaluates hard-limit policy for failed runs.\n /\nexport function evaluateMaxFailedRunsPolicy(\n results: AggregatedRunResults,\n options: MaxFailedRunsPolicyOptions = {},\n): ResultPolicyViolation[] {\n const maxFailedRuns = options.maxFailedRuns ?? 0\n const minRunHybridScore = options.minRunHybridScore ?? 0.6\n\n const failedRuns = results.runs.filter((run) => {\n if (run.hybridAverage == null) {\n return true\n }\n\n return run.hybridAverage < minRunHybridScore\n })\n\n if (failedRuns.length <= maxFailedRuns) {\n return []\n }\n\n return [{\n policyId: 'hard-limit:max-failed-runs',\n reason: `Failed runs ${failedRuns.length} exceed maxFailedRuns ${maxFailedRuns} with minRunHybridScore ${minRunHybridScore}.`,\n }]\n}\n","import type { AggregatedRunResults } from '../../runner/aggregate'\nimport type { HybridThresholdPolicyOptions, ResultPolicyViolation } from './policies/hybrid-threshold'\nimport type { MaxFailedRunsPolicyOptions } from './policies/max-failed-runs'\n\nimport { evaluateHybridThresholdPolicy } from './policies/hybrid-threshold'\nimport { evaluateMaxFailedRunsPolicy } from './policies/max-failed-runs'\n\n/\n Configures result-processing policies for eval gating.\n /\nexport interface ProcessRunResultsOptions {\n /\n Threshold policy options.\n /\n threshold?: HybridThresholdPolicyOptions\n /\n Hard-limit failed-run policy options.\n /\n maxFailedRuns?: MaxFailedRunsPolicyOptions\n}\n\n/\n Final gate decision returned by result processors.\n /\nexport interface ResultGateDecision {\n /\n Whether the result batch passes all policies.\n /\n pass: boolean\n /\n Collected policy violations.\n /\n violations: ResultPolicyViolation[]\n}\n\n/\n Processes aggregated run results through built-in gating policies.\n \n Call stack:\n \n {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link processRunResults}\n * -> {@link evaluateHybridThresholdPolicy}\n * -> {@link evaluateMaxFailedRunsPolicy}\n * -> {@link ResultGateDecision}\n */\nexport function processRunResults(\n results: AggregatedRunResults,\n options: ProcessRunResultsOptions = {},\n): ResultGateDecision {\n const thresholdViolations = evaluateHybridThresholdPolicy(results, options.threshold)\n const maxFailedRunsViolations = evaluateMaxFailedRunsPolicy(results, options.maxFailedRuns)\n\n const violations = [\n ...thresholdViolations,\n ...maxFailedRunsViolations,\n ]\n\n return {\n pass: violations.length === 0,\n violations,\n }\n}\n\nexport type {\n HybridThresholdPolicyOptions,\n MaxFailedRunsPolicyOptions,\n ResultPolicyViolation,\n}\n"],"mappings":";;;;AAqCA,SAAgB,8BACd,SACA,UAAwC,CAAC,GAChB;CACzB,MAAM,wBAAwB,QAAQ,yBAAyB;CAC/D,MAAM,yBAAyB,QAAQ,0BAA0B;CAEjE,MAAM,aAAsC,CAAC;CAE7C,MAAM,uBAAuB,QAAQ,QAAQ;CAC7C,IAAI,wBAAwB,QAAQ,uBAAuB,uBACzD,WAAW,KAAK;EACd,UAAU;EACV,QAAQ,0BAA0B,wBAAwB,OAAO,YAAY,sBAAsB;CACrG,CAAC;CAGH,KAAK,MAAM,qBAAqB,QAAQ,oBACtC,IAAI,kBAAkB,iBAAiB,QAAQ,kBAAkB,gBAAgB,wBAC/E,WAAW,KAAK;EACd,UAAU;EACV,QAAQ,YAAY,kBAAkB,oBAAoB,kBAAkB,kBAAkB,iBAAiB,OAAO,YAAY,uBAAuB;CAC3J,CAAC;CAIL,OAAO;AACT;;;;;;ACxCA,SAAgB,4BACd,SACA,UAAsC,CAAC,GACd;CACzB,MAAM,gBAAgB,QAAQ,iBAAiB;CAC/C,MAAM,oBAAoB,QAAQ,qBAAqB;CAEvD,MAAM,aAAa,QAAQ,KAAK,QAAQ,QAAQ;EAC9C,IAAI,IAAI,iBAAiB,MACvB,OAAO;EAGT,OAAO,IAAI,gBAAgB;CAC7B,CAAC;CAED,IAAI,WAAW,UAAU,eACvB,OAAO,CAAC;CAGV,OAAO,CAAC;EACN,UAAU;EACV,QAAQ,eAAe,WAAW,OAAO,wBAAwB,cAAc,0BAA0B,kBAAkB;CAC7H,CAAC;AACH;;;;;;;;;;;;;;;ACAA,SAAgB,kBACd,SACA,UAAoC,CAAC,GACjB;CACpB,MAAM,sBAAsB,8BAA8B,SAAS,QAAQ,SAAS;CACpF,MAAM,0BAA0B,4BAA4B,SAAS,QAAQ,aAAa;CAE1F,MAAM,aAAa,CACjB,GAAG,qBACH,GAAG,uBACL;CAEA,OAAO;EACL,MAAM,WAAW,WAAW;EAC5B;CACF;AACF"}

package/dist/core/runner/index.d.mts CHANGED Viewed

@@ -1,3 +1,3 @@
-import { $ as InferenceExecutor, A as RunScheduledTasksOptions, B as asProjectRelativePath, F as CreateTaskExecutionContextOptions, G as AggregatedProviderSummary, H as CreateVievalRunnerRuntimeContextOptions, I as TaskExecutionContext, J as RunResult, K as AggregatedRunResults, L as createTaskExecutionContext, M as RunnerTaskState, N as ScheduledTaskExecutor, P as runScheduledTasks, Q as CreateRunnerScheduleOptions, U as RunnerRuntimeContext, V as collectEvalEntries, W as createRunnerRuntimeContext, X as RunScoreKind, Y as RunScore, Z as aggregateRunResults, at as ScheduledTaskMatrixMeta, ct as createFilesystemTaskCacheRuntime, dt as CacheFileOptions, et as RunnerMatrixDefinition, ft as CacheNamespace, it as ScheduledTaskMatrix, j as RunnerExecutionError, lt as normalizeCacheFilePathSegments, nt as RunnerMatrixSelection, ot as createRunnerSchedule, pt as TaskCacheRuntime, q as AggregatedRunSummary, rt as ScheduledTask, st as CreateFilesystemTaskCacheRuntimeOptions, tt as RunnerMatrixInput, ut as CacheFileHandle } from "../../index-BkjyCInx.mjs";
-import { a as SchedulerMiddleware, c as SchedulerScopeContext, i as SchedulerConcurrencyConfig, n as getActiveScopes, o as SchedulerRuntime, r as CreateSchedulerRuntimeOptions, s as SchedulerScope, t as createSchedulerRuntime } from "../../index-fakXoZEe.mjs";
+import { $ as InferenceExecutor, A as RunScheduledTasksOptions, B as asProjectRelativePath, F as CreateTaskExecutionContextOptions, G as AggregatedProviderSummary, H as CreateVievalRunnerRuntimeContextOptions, I as TaskExecutionContext, J as RunResult, K as AggregatedRunResults, L as createTaskExecutionContext, M as RunnerTaskState, N as ScheduledTaskExecutor, P as runScheduledTasks, Q as CreateRunnerScheduleOptions, U as RunnerRuntimeContext, V as collectEvalEntries, W as createRunnerRuntimeContext, X as RunScoreKind, Y as RunScore, Z as aggregateRunResults, at as ScheduledTaskMatrixMeta, ct as createFilesystemTaskCacheRuntime, dt as CacheFileOptions, et as RunnerMatrixDefinition, ft as CacheNamespace, it as ScheduledTaskMatrix, j as RunnerExecutionError, lt as normalizeCacheFilePathSegments, nt as RunnerMatrixSelection, ot as createRunnerSchedule, pt as TaskCacheRuntime, q as AggregatedRunSummary, rt as ScheduledTask, st as CreateFilesystemTaskCacheRuntimeOptions, tt as RunnerMatrixInput, ut as CacheFileHandle } from "../../index-CwKBlCG9.mjs";
+import { a as SchedulerMiddleware, c as SchedulerScopeContext, i as SchedulerConcurrencyConfig, n as getActiveScopes, o as SchedulerRuntime, r as CreateSchedulerRuntimeOptions, s as SchedulerScope, t as createSchedulerRuntime } from "../../index-Be5I1ZJL.mjs";
 export { AggregatedProviderSummary, AggregatedRunResults, AggregatedRunSummary, CacheFileHandle, CacheFileOptions, CacheNamespace, CreateFilesystemTaskCacheRuntimeOptions, CreateRunnerScheduleOptions, CreateSchedulerRuntimeOptions, CreateTaskExecutionContextOptions, CreateVievalRunnerRuntimeContextOptions, InferenceExecutor, RunResult, RunScheduledTasksOptions, RunScore, RunScoreKind, RunnerExecutionError, RunnerMatrixDefinition, RunnerMatrixInput, RunnerMatrixSelection, RunnerRuntimeContext, RunnerTaskState, ScheduledTask, ScheduledTaskExecutor, ScheduledTaskMatrix, ScheduledTaskMatrixMeta, SchedulerConcurrencyConfig, SchedulerMiddleware, SchedulerRuntime, SchedulerScope, SchedulerScopeContext, TaskCacheRuntime, TaskExecutionContext, aggregateRunResults, asProjectRelativePath, collectEvalEntries, createFilesystemTaskCacheRuntime, createRunnerRuntimeContext, createRunnerSchedule, createSchedulerRuntime, createTaskExecutionContext, getActiveScopes, normalizeCacheFilePathSegments, runScheduledTasks };

package/dist/core/runner/index.mjs CHANGED Viewed

@@ -2,11 +2,11 @@ import { createSchedulerRuntime, getActiveScopes } from "../scheduler/index.mjs"
 import { createRequire } from "node:module";
 import process from "node:process";
 import { errorMessageFrom } from "@moeru/std";
-import { basename, dirname, join, relative } from "node:path";
 import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
+import { basename, dirname, join, relative } from "node:path";
 import { fileURLToPath } from "node:url";
-import { Buffer } from "node:buffer";
 import { createReadStream, createWriteStream } from "node:fs";
+import { Buffer } from "node:buffer";
 import { limitConcurrency } from "@vitest/runner/utils";
 //#region src/core/cache/filesystem.ts
 function sanitizePathSegment(value) {
@@ -425,7 +425,7 @@ async function createRunnerRuntimeContext(options = {}) {
 }
 //#endregion
 //#region src/core/runner/schedule.ts
-const matrixLayerKeys = new Set([
+const matrixLayerKeys = /* @__PURE__ */ new Set([
 	"disable",
 	"extend",
 	"override"
@@ -475,7 +475,7 @@ function applyAxisValues(axes, definition, mode) {
 		const nextValues = dedupeAxisValues(values);
 		if (mode === "extend") {
 			const existingValues = axes.get(axis) ?? [];
-			axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])));
+			axes.set(axis, Array.from(/* @__PURE__ */ new Set([...existingValues, ...nextValues])));
 			continue;
 		}
 		axes.set(axis, nextValues);

package/dist/core/runner/index.mjs.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/cache/filesystem.ts","../../../src/core/runner/aggregate.ts","../../../src/core/runner/collect.ts","../../../src/core/runner/run.ts","../../../src/core/runner/runtime-context.ts","../../../src/core/runner/schedule.ts","../../../src/core/runner/task-context.ts"],"sourcesContent":["import type { CacheFileHandle, CacheFileOptions, CacheNamespace, TaskCacheRuntime } from './types'\n\nimport process from 'node:process'\n\nimport { Buffer } from 'node:buffer'\nimport { createReadStream, createWriteStream } from 'node:fs'\nimport { access, mkdir, readFile, rename, writeFile } from 'node:fs/promises'\nimport { dirname, join } from 'node:path'\n\n/*\n Options for creating the filesystem-backed task cache runtime.\n /\nexport interface CreateFilesystemTaskCacheRuntimeOptions {\n /\n Absolute cache root directory.\n /\n cacheRootDirectory: string\n /\n Project identifier under one workspace cache scope.\n /\n projectName: string\n /\n Workspace identifier used to share cache roots across projects.\n /\n workspaceId: string\n}\n\nfunction sanitizePathSegment(value: string): string {\n const normalized = value.trim()\n if (normalized.length === 0) {\n return 'default'\n }\n\n return normalized.replace(/[^\\w.-]+/g, '-')\n}\n\nfunction normalizeExtension(extension: string \| undefined, mediaType: string \| undefined): string \| undefined {\n if (extension != null && extension.length > 0) {\n return extension.startsWith('.') ? extension.slice(1) : extension\n }\n\n if (mediaType == null \|\| mediaType.length === 0) {\n return undefined\n }\n\n if (mediaType === 'application/json') {\n return 'json'\n }\n\n if (mediaType === 'text/plain') {\n return 'txt'\n }\n\n if (mediaType === 'audio/wav') {\n return 'wav'\n }\n\n return undefined\n}\n\n/\n Normalizes cache file options into deterministic relative path segments.\n \n Before:\n * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`\n \n After:\n * - `['cases', 'dataset-hash', 'v1.json']`\n /\nexport function normalizeCacheFilePathSegments(options: CacheFileOptions): string[] {\n const sanitizedKey = options.key.map(segment => sanitizePathSegment(segment))\n const extension = normalizeExtension(options.ext, options.mediaType)\n\n if (sanitizedKey.length === 0) {\n return extension == null ? ['artifact'] : [`artifact.${extension}`]\n }\n\n if (extension == null) {\n return sanitizedKey\n }\n\n const withoutTail = sanitizedKey.slice(0, Math.max(0, sanitizedKey.length - 1))\n const tail = sanitizedKey[sanitizedKey.length - 1] ?? 'artifact'\n return [...withoutTail, `${tail}.${extension}`]\n}\n\nasync function writeAtomically(path: string, content: Buffer \| string): Promise<void> {\n const directory = dirname(path)\n const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`\n await mkdir(directory, { recursive: true })\n await writeFile(temporaryPath, content)\n await rename(temporaryPath, path)\n}\n\nfunction createCacheFileHandle(path: string): CacheFileHandle {\n return {\n path,\n async exists() {\n try {\n await access(path)\n return true\n }\n catch {\n return false\n }\n },\n openReadStream() {\n return createReadStream(path)\n },\n async openWriteStream() {\n await mkdir(dirname(path), { recursive: true })\n return createWriteStream(path)\n },\n async readBuffer() {\n return await readFile(path)\n },\n async writeBuffer(value) {\n await writeAtomically(path, value)\n },\n async readText(encoding = 'utf-8') {\n return await readFile(path, encoding)\n },\n async writeText(value, encoding = 'utf-8') {\n await writeAtomically(path, Buffer.from(value, encoding))\n },\n async readJson<T>() {\n return JSON.parse(await readFile(path, 'utf-8')) as T\n },\n async writeJson(value) {\n await writeAtomically(path, `${JSON.stringify(value, null, 2)}\\n`)\n },\n async loadAsCasesInput<T>() {\n return await this.readJson<T[]>()\n },\n async loadAsExpectFixture<T>() {\n return await this.readJson<T>()\n },\n }\n}\n\nfunction createCacheNamespace(baseDirectory: string, namespace: string): CacheNamespace {\n return {\n file(options) {\n const relativePathSegments = normalizeCacheFilePathSegments(options)\n return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments))\n },\n }\n}\n\n/\n Creates a deterministic filesystem-backed task cache runtime.\n \n Use when:\n * - eval tasks need reproducible cache paths for expensive pre-processing outputs\n * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes\n \n Expects:\n * - `cacheRootDirectory` to be writable by the running process\n * - `workspaceId` + `projectName` to stay stable for reproducible paths\n \n Returns:\n * - task cache runtime that resolves namespaced file handles under:\n * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`\n /\nexport function createFilesystemTaskCacheRuntime(\n options: CreateFilesystemTaskCacheRuntimeOptions,\n): TaskCacheRuntime {\n const workspaceDirectory = sanitizePathSegment(options.workspaceId)\n const projectDirectory = sanitizePathSegment(options.projectName)\n const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory)\n\n return {\n namespace(name) {\n return createCacheNamespace(baseDirectory, name)\n },\n }\n}\n","import type { ScheduledTaskMatrix } from './schedule'\n\n/\n Identifies the scoring family for a single eval score.\n /\nexport type RunScoreKind = 'exact' \| 'judge'\n\n/\n Represents one normalized score emitted by a completed eval run.\n /\nexport interface RunScore {\n /\n Score family used for aggregation.\n /\n kind: RunScoreKind\n /\n Normalized score in the `0..1` range.\n /\n score: number\n}\n\n/\n Captures the output of one scheduled runner task.\n /\nexport interface RunResult {\n /\n Stable run id, usually copied from the scheduled task id.\n /\n id: string\n /\n Collected eval entry id.\n /\n entryId: string\n /\n Stable inferenceExecutor id.\n /\n inferenceExecutorId: string\n /\n Concrete matrix selection used by the run.\n /\n matrix: ScheduledTaskMatrix\n /\n Raw scores emitted by the eval.\n /\n scores: readonly RunScore[]\n}\n\n/\n Stores the per-run score averages after normalization.\n /\nexport interface AggregatedRunSummary {\n /\n Stable run id.\n /\n id: string\n /\n Collected eval entry id.\n /\n entryId: string\n /\n Stable inferenceExecutor id.\n /\n inferenceExecutorId: string\n /\n Concrete matrix selection used by the run.\n /\n matrix: ScheduledTaskMatrix\n /\n Mean of exact-match scores or `null` when absent.\n /\n exactAverage: number \| null\n /\n Mean of judge-based scores or `null` when absent.\n /\n judgeAverage: number \| null\n /\n Hybrid average. Uses both families when present, otherwise falls back to the\n * single available family.\n /\n hybridAverage: number \| null\n}\n\n/\n Stores inferenceExecutor-level score aggregates across multiple runs.\n /\nexport interface AggregatedProviderSummary {\n /\n Stable inferenceExecutor id.\n /\n inferenceExecutorId: string\n /\n Number of runs included in this inferenceExecutor bucket.\n /\n runCount: number\n /\n Mean of all exact-match scores or `null` when absent.\n /\n exactAverage: number \| null\n /\n Mean of all judge-based scores or `null` when absent.\n /\n judgeAverage: number \| null\n /\n Hybrid average derived from the inferenceExecutor exact and judge means.\n /\n hybridAverage: number \| null\n}\n\n/\n Stores the final aggregation output for a batch of runner results.\n /\nexport interface AggregatedRunResults {\n /\n Per-run normalized score summaries.\n /\n runs: AggregatedRunSummary[]\n /\n Provider-level summaries sorted by inferenceExecutor id.\n /\n inferenceExecutors: AggregatedProviderSummary[]\n /\n Overall summary across every run.\n /\n overall: {\n exactAverage: number \| null\n judgeAverage: number \| null\n hybridAverage: number \| null\n runCount: number\n }\n}\n\ninterface ScoreBuckets {\n exact: number[]\n judge: number[]\n}\n\nfunction cloneScheduledTaskMatrix(matrix: ScheduledTaskMatrix): ScheduledTaskMatrix {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction assertKnownScoreKind(kind: string): RunScoreKind {\n if (kind === 'exact' \|\| kind === 'judge') {\n return kind\n }\n\n throw new TypeError(`Unknown eval score kind \"${kind}\".`)\n}\n\nfunction average(scores: readonly number[]): number \| null {\n if (scores.length === 0) {\n return null\n }\n\n const total = scores.reduce((sum, score) => sum + score, 0)\n return total / scores.length\n}\n\nfunction createHybridAverage(exactAverage: number \| null, judgeAverage: number \| null): number \| null {\n if (exactAverage != null && judgeAverage != null) {\n return (exactAverage + judgeAverage) / 2\n }\n\n if (exactAverage != null) {\n return exactAverage\n }\n\n if (judgeAverage != null) {\n return judgeAverage\n }\n\n return null\n}\n\nfunction collectScoreBuckets(scores: readonly RunScore[]): ScoreBuckets {\n const buckets: ScoreBuckets = {\n exact: [],\n judge: [],\n }\n\n for (const score of scores) {\n const kind = assertKnownScoreKind(score.kind)\n\n if (kind === 'exact') {\n buckets.exact.push(score.score)\n continue\n }\n\n buckets.judge.push(score.score)\n }\n\n return buckets\n}\n\nfunction createRunSummary(result: RunResult): AggregatedRunSummary {\n const buckets = collectScoreBuckets(result.scores)\n const exactAverage = average(buckets.exact)\n const judgeAverage = average(buckets.judge)\n\n return {\n entryId: result.entryId,\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n id: result.id,\n judgeAverage,\n matrix: cloneScheduledTaskMatrix(result.matrix),\n inferenceExecutorId: result.inferenceExecutorId,\n }\n}\n\nfunction createProviderSummary(inferenceExecutorId: string, results: readonly RunResult[]): AggregatedProviderSummary {\n const exactScores: number[] = []\n const judgeScores: number[] = []\n\n for (const result of results) {\n const buckets = collectScoreBuckets(result.scores)\n exactScores.push(...buckets.exact)\n judgeScores.push(...buckets.judge)\n }\n\n const exactAverage = average(exactScores)\n const judgeAverage = average(judgeScores)\n\n return {\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n judgeAverage,\n inferenceExecutorId,\n runCount: results.length,\n }\n}\n\n/\n Aggregates exact-match and judge-based scores into hybrid runner summaries.\n \n Call stack:\n \n {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link createRunSummary}\n * -> {@link createProviderSummary}\n * -> `report output`\n \n Use when:\n * - a runner batch mixes deterministic exact checks with judge-based grading\n * - inferenceExecutor comparison should preserve both score families and one hybrid view\n \n Expects:\n * - each score to be normalized to the `0..1` range before aggregation\n * - `scores.kind` to use only `'exact'` or `'judge'`\n /\nexport function aggregateRunResults(results: readonly RunResult[]): AggregatedRunResults {\n const runs = results.map(createRunSummary)\n\n const inferenceExecutorIds = Array.from(new Set(results.map(result => result.inferenceExecutorId)))\n const inferenceExecutors = inferenceExecutorIds\n .map((inferenceExecutorId) => {\n const providerResults = results.filter(result => result.inferenceExecutorId === inferenceExecutorId)\n return createProviderSummary(inferenceExecutorId, providerResults)\n })\n .sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId))\n\n const overall = createProviderSummary(\n 'overall',\n results,\n )\n\n return {\n overall: {\n exactAverage: overall.exactAverage,\n hybridAverage: overall.hybridAverage,\n judgeAverage: overall.judgeAverage,\n runCount: overall.runCount,\n },\n inferenceExecutors,\n runs,\n }\n}\n","import type { CollectedEvalEntry, EvalModule, EvalModuleMap } from '../../config'\nimport type { RunnerRuntimeContext } from './runtime-context'\n\nimport { basename, dirname, relative } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst evalFileSuffix = '.eval.ts'\nconst absolutePathPattern = /^(?:[A-Z]:\\/\|\\/\|\\\\\\\\)/i\n\nfunction normalizePath(value: string): string {\n return value.replaceAll('\\\\', '/')\n}\n\n/\n Converts a file path into a project-relative path when possible.\n \n Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n \n Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n /\nexport function asProjectRelativePath(filePath: string, context: RunnerRuntimeContext): string {\n const normalizedFilePath = normalizePath(filePath)\n const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory)\n const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\\//i)?.[0]\n const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\\//i)?.[0]\n\n if (filePathWindowsDrive != null && projectRootWindowsDrive == null) {\n return normalizedFilePath\n }\n\n if (\n filePathWindowsDrive != null\n && projectRootWindowsDrive != null\n && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()\n ) {\n return normalizedFilePath\n }\n\n const projectRootDirectory = context.projectRootDirectory\n const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath))\n\n if (!absolutePathPattern.test(relativeFilePath)) {\n if (relativeFilePath === '..') {\n return normalizePath(filePath)\n }\n\n if (!relativeFilePath.startsWith('../')) {\n return relativeFilePath\n }\n }\n\n return normalizePath(filePath)\n}\n\nfunction resolveModuleFilePath(moduleHref: string): string \| null {\n if (!moduleHref.startsWith('file:')) {\n return null\n }\n\n try {\n return fileURLToPath(moduleHref)\n }\n catch {\n return null\n }\n}\n\nfunction createCollectedEvalEntry(\n moduleHref: string,\n moduleDefinition: EvalModule,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry \| null {\n const filePath = resolveModuleFilePath(moduleHref)\n\n if (!filePath) {\n return null\n }\n\n const relativeFilePath = asProjectRelativePath(filePath, context)\n\n if (!relativeFilePath.endsWith(evalFileSuffix)) {\n return null\n }\n\n const entryName = basename(relativeFilePath, evalFileSuffix)\n\n if (entryName.length === 0) {\n return null\n }\n\n const relativeDirectory = dirname(relativeFilePath)\n const directory = relativeDirectory === '.' ? '' : relativeDirectory\n\n return {\n ...moduleDefinition.default,\n directory,\n filePath,\n id: directory.length === 0 ? entryName : `${directory}/${entryName}`,\n name: entryName,\n }\n}\n\n/\n Collects loaded vieval modules into sorted runner entries with stable ids.\n \n Call stack:\n \n `import.meta.glob(...)`\n * -> {@link collectEvalEntries}\n * -> {@link createCollectedEvalEntry}\n * -> {@link CollectedEvalEntry}[]\n \n Use when:\n * - the runner has already loaded candidate eval modules\n * - downstream scheduling needs stable entry ids and directory metadata\n /\nexport function collectEvalEntries(\n modules: EvalModuleMap,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry[] {\n return Object.entries(modules)\n .flatMap(([moduleHref, moduleDefinition]) => {\n const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context)\n\n if (!entry) {\n return []\n }\n\n return [entry]\n })\n .sort((left, right) => left.id.localeCompare(right.id))\n}\n","import type { TaskCacheRuntime } from '../cache'\nimport type { AggregatedRunResults, RunResult } from './aggregate'\nimport type { ScheduledTask } from './schedule'\nimport type { TaskExecutionContext } from './task-context'\n\nimport { errorMessageFrom } from '@moeru/std'\nimport { limitConcurrency } from '@vitest/runner/utils'\n\nimport { aggregateRunResults } from './aggregate'\n\n/\n Executes one scheduled runner task and returns a normalized run result.\n \n Use when:\n * - a scheduler already selected the task and execution context\n * - the caller wants a typed executor contract for runner workers\n \n Expects:\n * - the task context to be ready for model resolution and task-scoped work\n \n Returns:\n * - a normalized run result with score entries ready for aggregation\n /\nexport type ScheduledTaskExecutor = (\n task: ScheduledTask,\n context: TaskExecutionContext,\n) => Promise<RunResult>\n\n/\n Terminal task state reported by runner lifecycle hooks.\n \n Use when:\n * - reporting the outcome of one scheduled task to lifecycle observers\n \n Expects:\n * - hooks treat the value as final for the completed task\n /\nexport type RunnerTaskState = 'passed' \| 'failed'\n\n/\n Optional runner execution hooks used while processing scheduled tasks.\n \n Use when:\n * - callers want lifecycle visibility around sequential task execution\n * - task execution should remain deterministic while still observable\n \n Expects:\n * - hook functions are synchronous lifecycle observers\n /\nexport interface RunScheduledTasksOptions {\n /\n Creates per-task execution context.\n \n Use when:\n * - executor code needs per-task models, cache, or other task-scoped data\n /\n createExecutionContext?: (task: ScheduledTask) => TaskExecutionContext\n /\n Runs before the executor starts handling a task.\n \n Use when:\n * - callers want to observe task activation before execution begins\n \n Expects:\n * - thrown errors abort the task before executor work starts\n /\n onTaskStart?: (task: ScheduledTask) => void\n /\n Runs after the executor settles for a task.\n \n Use when:\n * - callers want to observe successful and failed task completion\n \n Expects:\n * - thrown errors abort successful runs\n * - failed-task observers do not override the executor error for the task\n /\n onTaskEnd?: (task: ScheduledTask, state: RunnerTaskState) => void\n /\n Maximum number of tasks to execute concurrently.\n \n @default 1\n /\n maxConcurrency?: number\n}\n\nfunction createDefaultExecutionContext(): TaskExecutionContext {\n const cache: TaskCacheRuntime = {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n\n return {\n cache,\n models: [],\n }\n}\n\n/\n Error thrown when a scheduled run fails before producing a normalized result.\n /\nexport class RunnerExecutionError extends Error {\n /\n Stable task id that failed.\n /\n taskId: string\n\n constructor(taskId: string, cause: unknown) {\n const message = errorMessageFrom(cause) ?? 'Unknown runner execution failure.'\n super(`Runner task \"${taskId}\" failed: ${message}`)\n this.name = 'RunnerExecutionError'\n this.taskId = taskId\n this.cause = cause\n }\n}\n\nfunction createRunnerExecutionError(taskId: string, cause: unknown): RunnerExecutionError {\n if (cause instanceof RunnerExecutionError && cause.taskId === taskId) {\n return cause\n }\n\n return new RunnerExecutionError(taskId, cause)\n}\n\n/\n Executes runner tasks sequentially and aggregates the normalized results.\n \n Call stack:\n \n {@link createRunnerSchedule}\n * -> {@link runScheduledTasks}\n * -> `executor(task)`\n * -> {@link aggregateRunResults}\n \n Use when:\n * - the caller already expanded the runner matrix\n * - task execution should stay deterministic and easy to debug\n \n Expects:\n * - `executor` to return normalized `0..1` scores\n * - callers to handle concurrency outside this helper when needed\n * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers\n \n Throws:\n * - `RunnerExecutionError` when task setup, hooks, or the executor throws\n /\nexport async function runScheduledTasks(\n tasks: readonly ScheduledTask[],\n executor: ScheduledTaskExecutor,\n options: RunScheduledTasksOptions = {},\n): Promise<AggregatedRunResults> {\n if (tasks.length === 0) {\n return aggregateRunResults([])\n }\n\n async function executeScheduledTask(task: ScheduledTask): Promise<RunResult> {\n let executionContext: TaskExecutionContext\n\n try {\n executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext()\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskStart?.(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n let runResult: RunResult\n try {\n runResult = await executor(task, executionContext)\n }\n catch (error) {\n try {\n options.onTaskEnd?.(task, 'failed')\n }\n catch {\n // Failed-task observers must not mask the task execution failure.\n }\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskEnd?.(task, 'passed')\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n return runResult\n }\n\n const maxConcurrency = options.maxConcurrency ?? 1\n if (maxConcurrency <= 1) {\n const results: RunResult[] = []\n for (const task of tasks) {\n results.push(await executeScheduledTask(task))\n }\n return aggregateRunResults(results)\n }\n\n const runWithLimit = limitConcurrency(maxConcurrency)\n const resultPairs = await Promise.all(tasks.map(async (task, index) => {\n const result = await runWithLimit(async () => executeScheduledTask(task))\n return { index, result }\n }))\n\n const sortedResults = resultPairs\n .sort((left, right) => left.index - right.index)\n .map(item => item.result)\n\n return aggregateRunResults(sortedResults)\n}\n","import { createRequire } from 'node:module'\nimport { dirname } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst require = createRequire(import.meta.url)\n\n/\n Shared runtime context used by the vieval runner.\n \n Use when:\n * - runner services need stable path resolution without module-level side effects\n * - call sites want deterministic control over workspace root detection\n /\nexport interface RunnerRuntimeContext {\n /\n Absolute project root directory used for path normalization.\n /\n projectRootDirectory: string\n}\n\n/\n Options used to construct the runner runtime context.\n /\nexport interface CreateVievalRunnerRuntimeContextOptions {\n /\n Directory used to search for the nearest pnpm workspace.\n \n @default directory of this module file\n /\n cwd?: string\n /\n Absolute fallback directory when a pnpm workspace root is not found.\n \n @default package root directory (`packages/vieval`)\n /\n fallbackProjectRootDirectory?: string\n}\n\n/\n Creates a side-effect-free runtime context for runner path normalization.\n \n Call stack:\n \n {@link createRunnerRuntimeContext}\n * -> `findWorkspaceDir(cwd)`\n * -> `resolve projectRootDirectory`\n * -> `{ projectRootDirectory }`\n \n Use when:\n * - initializing runner infrastructure before collecting eval modules\n * - tests need deterministic root resolution behavior\n /\nexport async function createRunnerRuntimeContext(\n options: CreateVievalRunnerRuntimeContextOptions = {},\n): Promise<RunnerRuntimeContext> {\n const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url))\n const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory\n ?? fileURLToPath(new URL('../../../', import.meta.url))\n\n // NOTICE:\n // We use dynamic `require` here because `@pnpm/find-workspace-dir` is CommonJS.\n // Keeping this load inside the factory avoids module-level initialization side effects.\n const { findWorkspaceDir } = require('@pnpm/find-workspace-dir') as {\n findWorkspaceDir: (currentWorkingDirectory: string) => Promise<string \| undefined>\n }\n\n // NOTICE:\n // Workspace discovery is required to keep collected eval ids stable when this\n // package is moved inside different monorepo layouts.\n const workspaceDirectory = await findWorkspaceDir(cwd)\n\n return {\n projectRootDirectory: workspaceDirectory ?? fallbackProjectRootDirectory,\n }\n}\n","import type { CollectedEvalEntry, MatrixDefinition, MatrixLayer, MatrixValue } from '../../config'\n\n/\n Describes the inferenceExecutor target for a scheduled eval run.\n /\nexport interface InferenceExecutor {\n /\n Stable inferenceExecutor identifier such as `openai:gpt-4.1-mini`.\n /\n id: string\n}\n\n/\n Stores the selected value for each matrix axis.\n /\nexport type RunnerMatrixSelection = Record<string, string>\n\n/\n Stores stable row ids for one resolved scheduled task matrix.\n /\nexport interface ScheduledTaskMatrixMeta {\n /\n Stable row id for the resolved run matrix selection.\n /\n runRowId: string\n /\n Stable row id for the resolved eval matrix selection.\n /\n evalRowId: string\n}\n\n/\n Stores the structured matrix payload for one scheduled task.\n /\nexport interface ScheduledTaskMatrix {\n /\n Runtime matrix selection visible to task code.\n /\n run: RunnerMatrixSelection\n /\n Eval-time matrix selection visible to task code.\n /\n eval: RunnerMatrixSelection\n /\n Stable row ids for both scopes.\n /\n meta: ScheduledTaskMatrixMeta\n}\n\n/\n Maps matrix axis names to the values that should be expanded.\n /\nexport type RunnerMatrixDefinition = MatrixDefinition\n\n/\n Accepts either flat axis definitions or one layered matrix object.\n /\nexport type RunnerMatrixInput = RunnerMatrixDefinition \| MatrixLayer\n\nconst matrixLayerKeys = new Set(['disable', 'extend', 'override'])\nconst ambiguousMatrixDefinitionErrorMessage = 'Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.'\n\n/\n Represents one fully expanded runner task.\n /\nexport interface ScheduledTask {\n /\n Stable task id derived from the entry, inferenceExecutor, and matrix selection.\n /\n id: string\n /\n The collected eval entry to execute.\n /\n entry: CollectedEvalEntry\n /\n The inferenceExecutor selected for this task.\n /\n inferenceExecutor: InferenceExecutor\n /\n The concrete scoped matrix selection for this task.\n /\n matrix: ScheduledTaskMatrix\n}\n\n/\n Configures how the runner should expand its execution matrix.\n /\nexport interface CreateRunnerScheduleOptions {\n /\n Collected eval entries that should be scheduled.\n /\n entries: readonly CollectedEvalEntry[]\n /\n Providers that should run each entry.\n /\n inferenceExecutors: readonly InferenceExecutor[]\n /\n Optional run-time matrix axes expanded as a cartesian product.\n /\n runMatrix?: RunnerMatrixInput\n /\n Optional eval-time matrix axes expanded as a cartesian product.\n /\n evalMatrix?: RunnerMatrixInput\n}\n\nfunction encodeTaskIdSegment(value: string): string {\n return encodeURIComponent(value)\n}\n\nfunction stringifyMatrixValue(value: MatrixValue): string {\n return String(value)\n}\n\nfunction cloneMatrixSelection(matrix: RunnerMatrixSelection): RunnerMatrixSelection {\n return { ...matrix }\n}\n\nfunction createScheduledTaskMatrix(\n runMatrix: RunnerMatrixSelection,\n evalMatrix: RunnerMatrixSelection,\n): ScheduledTaskMatrix {\n return {\n eval: cloneMatrixSelection(evalMatrix),\n meta: {\n evalRowId: createStableRowId(evalMatrix),\n runRowId: createStableRowId(runMatrix),\n },\n run: cloneMatrixSelection(runMatrix),\n }\n}\n\nfunction isMatrixLayer(matrix: RunnerMatrixInput): matrix is MatrixLayer {\n const matrixKeys = Object.keys(matrix)\n return (\n matrixKeys.length > 0\n && matrixKeys.every(key => matrixLayerKeys.has(key))\n )\n}\n\nfunction assertNonAmbiguousMatrixDefinition(matrix: RunnerMatrixInput): void {\n const matrixKeys = Object.keys(matrix)\n const hasReservedKeys = matrixKeys.some(key => matrixLayerKeys.has(key))\n const hasAxisKeys = matrixKeys.some(key => !matrixLayerKeys.has(key))\n\n if (hasReservedKeys && hasAxisKeys) {\n throw new TypeError(ambiguousMatrixDefinitionErrorMessage)\n }\n}\n\nfunction normalizeLayerInputToAxes(matrix: RunnerMatrixInput \| undefined): MatrixLayer \| undefined {\n if (matrix == null) {\n return undefined\n }\n\n assertNonAmbiguousMatrixDefinition(matrix)\n\n if (isMatrixLayer(matrix)) {\n return matrix\n }\n\n return {\n extend: matrix,\n }\n}\n\nfunction dedupeAxisValues(values: readonly MatrixValue[]): string[] {\n return Array.from(new Set(values.map(stringifyMatrixValue)))\n}\n\nfunction applyAxisValues(\n axes: Map<string, string[]>,\n definition: RunnerMatrixDefinition \| undefined,\n mode: 'extend' \| 'override',\n): void {\n if (definition == null) {\n return\n }\n\n for (const [axis, values] of Object.entries(definition)) {\n const nextValues = dedupeAxisValues(values)\n\n if (mode === 'extend') {\n const existingValues = axes.get(axis) ?? []\n axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])))\n continue\n }\n\n axes.set(axis, nextValues)\n }\n}\n\nfunction applyLayer(\n baseAxes: ReadonlyMap<string, string[]>,\n layer: MatrixLayer \| undefined,\n): Map<string, string[]> {\n const nextAxes = new Map<string, string[]>(\n Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]),\n )\n\n for (const axis of layer?.disable ?? []) {\n nextAxes.delete(axis)\n }\n\n applyAxisValues(nextAxes, layer?.extend, 'extend')\n applyAxisValues(nextAxes, layer?.override, 'override')\n\n return nextAxes\n}\n\nfunction expandAxesToRows(axes: ReadonlyMap<string, readonly string[]>): RunnerMatrixSelection[] {\n if (axes.size === 0) {\n return [{}]\n }\n\n const dimensions = Array.from(axes.entries())\n\n let selections: RunnerMatrixSelection[] = [{}]\n\n for (const [axis, values] of dimensions) {\n if (values.length === 0) {\n return []\n }\n\n const nextSelections: RunnerMatrixSelection[] = []\n\n for (const selection of selections) {\n for (const value of values) {\n nextSelections.push({\n ...selection,\n [axis]: value,\n })\n }\n }\n\n selections = nextSelections\n }\n\n return selections\n}\n\nfunction createStableRowId(matrix: RunnerMatrixSelection): string {\n const segments = Object.entries(matrix)\n .sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis))\n .map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`)\n\n if (segments.length === 0) {\n return 'default'\n }\n\n return segments.join('&')\n}\n\nfunction createTaskId(entryId: string, inferenceExecutorId: string, runRowId: string, evalRowId: string): string {\n const encodedEntryId = encodeTaskIdSegment(entryId)\n const encodedProviderId = encodeTaskIdSegment(inferenceExecutorId)\n\n return [\n encodedEntryId,\n encodedProviderId,\n `run=${encodeTaskIdSegment(runRowId)}`,\n `eval=${encodeTaskIdSegment(evalRowId)}`,\n ].join('::')\n}\n\nfunction createResolvedRunAxes(\n entry: CollectedEvalEntry,\n runMatrix: RunnerMatrixInput \| undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n runMatrix,\n entry.matrix?.runMatrix,\n entry.task?.matrix?.runMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\nfunction createResolvedEvalAxes(\n entry: CollectedEvalEntry,\n evalMatrix: RunnerMatrixInput \| undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n evalMatrix,\n entry.matrix?.evalMatrix,\n entry.task?.matrix?.evalMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\n/\n Expands collected entries into a stable runner schedule.\n \n Call stack:\n \n {@link collectEvalEntries} (`../runner`)\n * -> {@link createRunnerSchedule}\n * -> {@link expandAxesToRows}\n * -> {@link ScheduledTask}[]\n \n Use when:\n * - the runner already knows which eval entries are available\n * - each entry must run against multiple inferenceExecutors or matrix variants\n \n Expects:\n * - `entries` and `inferenceExecutors` to be provided in the desired execution order\n * - matrix axes to use insertion order when generating combinations\n /\nexport function createRunnerSchedule(options: CreateRunnerScheduleOptions): ScheduledTask[] {\n if (options.entries.length === 0) {\n return []\n }\n\n if (options.inferenceExecutors.length === 0) {\n return []\n }\n\n const tasks: ScheduledTask[] = []\n\n for (const entry of options.entries) {\n const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix))\n const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix))\n\n if (runSelections.length === 0 \|\| evalSelections.length === 0) {\n continue\n }\n\n for (const inferenceExecutor of options.inferenceExecutors) {\n for (const runMatrix of runSelections) {\n for (const evalMatrix of evalSelections) {\n const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix)\n\n tasks.push({\n entry,\n id: createTaskId(\n entry.id,\n inferenceExecutor.id,\n isolatedMatrix.meta.runRowId,\n isolatedMatrix.meta.evalRowId,\n ),\n matrix: isolatedMatrix,\n inferenceExecutor,\n })\n }\n }\n }\n }\n\n return tasks\n}\n","import type { ModelDefinition } from '../../config/models'\nimport type { TaskCacheRuntime } from '../cache'\nimport type { ScheduledTask } from './schedule'\n\n/\n Task-scoped execution context exposed to runner executors.\n /\nexport interface TaskExecutionContext {\n /\n Deterministic cache runtime scoped to the current task project.\n /\n cache: TaskCacheRuntime\n /\n Configured model registrations available to model plugins.\n /\n models: readonly ModelDefinition[]\n}\n\n/\n Inputs used to build task execution context.\n /\nexport interface CreateTaskExecutionContextOptions {\n cache?: TaskCacheRuntime\n models: readonly ModelDefinition[]\n task: ScheduledTask\n}\n\nfunction createNoopTaskCacheRuntime(): TaskCacheRuntime {\n return {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n}\n\n/\n Creates task-scoped context data for runner execution.\n \n Call stack:\n \n {@link runScheduledTasks}\n * -> {@link createTaskExecutionContext}\n * -> `TaskExecutionContext`\n */\nexport function createTaskExecutionContext(options: CreateTaskExecutionContextOptions): TaskExecutionContext {\n return {\n cache: options.cache ?? createNoopTaskCacheRuntime(),\n models: options.models,\n }\n}\n"],"mappings":";;;;;;;;;;;AA2BA,SAAS,oBAAoB,OAAuB;CAClD,MAAM,aAAa,MAAM,MAAM;AAC/B,KAAI,WAAW,WAAW,EACxB,QAAO;AAGT,QAAO,WAAW,QAAQ,aAAa,IAAI;;AAG7C,SAAS,mBAAmB,WAA+B,WAAmD;AAC5G,KAAI,aAAa,QAAQ,UAAU,SAAS,EAC1C,QAAO,UAAU,WAAW,IAAI,GAAG,UAAU,MAAM,EAAE,GAAG;AAG1D,KAAI,aAAa,QAAQ,UAAU,WAAW,EAC5C;AAGF,KAAI,cAAc,mBAChB,QAAO;AAGT,KAAI,cAAc,aAChB,QAAO;AAGT,KAAI,cAAc,YAChB,QAAO;;;;;;;;;;;AAeX,SAAgB,+BAA+B,SAAqC;CAClF,MAAM,eAAe,QAAQ,IAAI,KAAI,YAAW,oBAAoB,QAAQ,CAAC;CAC7E,MAAM,YAAY,mBAAmB,QAAQ,KAAK,QAAQ,UAAU;AAEpE,KAAI,aAAa,WAAW,EAC1B,QAAO,aAAa,OAAO,CAAC,WAAW,GAAG,CAAC,YAAY,YAAY;AAGrE,KAAI,aAAa,KACf,QAAO;CAGT,MAAM,cAAc,aAAa,MAAM,GAAG,KAAK,IAAI,GAAG,aAAa,SAAS,EAAE,CAAC;CAC/E,MAAM,OAAO,aAAa,aAAa,SAAS,MAAM;AACtD,QAAO,CAAC,GAAG,aAAa,GAAG,KAAK,GAAG,YAAY;;AAGjD,eAAe,gBAAgB,MAAc,SAAyC;CACpF,MAAM,YAAY,QAAQ,KAAK;CAC/B,MAAM,gBAAgB,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG,KAAK,KAAK,CAAC,GAAG,KAAK,QAAQ,CAAC,SAAS,GAAG,CAAC,MAAM,GAAG,GAAG;AACzG,OAAM,MAAM,WAAW,EAAE,WAAW,MAAM,CAAC;AAC3C,OAAM,UAAU,eAAe,QAAQ;AACvC,OAAM,OAAO,eAAe,KAAK;;AAGnC,SAAS,sBAAsB,MAA+B;AAC5D,QAAO;EACL;EACA,MAAM,SAAS;AACb,OAAI;AACF,UAAM,OAAO,KAAK;AAClB,WAAO;WAEH;AACJ,WAAO;;;EAGX,iBAAiB;AACf,UAAO,iBAAiB,KAAK;;EAE/B,MAAM,kBAAkB;AACtB,SAAM,MAAM,QAAQ,KAAK,EAAE,EAAE,WAAW,MAAM,CAAC;AAC/C,UAAO,kBAAkB,KAAK;;EAEhC,MAAM,aAAa;AACjB,UAAO,MAAM,SAAS,KAAK;;EAE7B,MAAM,YAAY,OAAO;AACvB,SAAM,gBAAgB,MAAM,MAAM;;EAEpC,MAAM,SAAS,WAAW,SAAS;AACjC,UAAO,MAAM,SAAS,MAAM,SAAS;;EAEvC,MAAM,UAAU,OAAO,WAAW,SAAS;AACzC,SAAM,gBAAgB,MAAM,OAAO,KAAK,OAAO,SAAS,CAAC;;EAE3D,MAAM,WAAc;AAClB,UAAO,KAAK,MAAM,MAAM,SAAS,MAAM,QAAQ,CAAC;;EAElD,MAAM,UAAU,OAAO;AACrB,SAAM,gBAAgB,MAAM,GAAG,KAAK,UAAU,OAAO,MAAM,EAAE,CAAC,IAAI;;EAEpE,MAAM,mBAAsB;AAC1B,UAAO,MAAM,KAAK,UAAe;;EAEnC,MAAM,sBAAyB;AAC7B,UAAO,MAAM,KAAK,UAAa;;EAElC;;AAGH,SAAS,qBAAqB,eAAuB,WAAmC;AACtF,QAAO,EACL,KAAK,SAAS;EACZ,MAAM,uBAAuB,+BAA+B,QAAQ;AACpE,SAAO,sBAAsB,KAAK,eAAe,oBAAoB,UAAU,EAAE,GAAG,qBAAqB,CAAC;IAE7G;;;;;;;;;;;;;;;;;AAkBH,SAAgB,iCACd,SACkB;CAClB,MAAM,qBAAqB,oBAAoB,QAAQ,YAAY;CACnE,MAAM,mBAAmB,oBAAoB,QAAQ,YAAY;CACjE,MAAM,gBAAgB,KAAK,QAAQ,oBAAoB,oBAAoB,iBAAiB;AAE5F,QAAO,EACL,UAAU,MAAM;AACd,SAAO,qBAAqB,eAAe,KAAK;IAEnD;;;;ACvCH,SAAS,yBAAyB,QAAkD;AAClF,QAAO;EACL,MAAM,EACJ,GAAG,OAAO,MACX;EACD,MAAM,EACJ,GAAG,OAAO,MACX;EACD,KAAK,EACH,GAAG,OAAO,KACX;EACF;;AAGH,SAAS,qBAAqB,MAA4B;AACxD,KAAI,SAAS,WAAW,SAAS,QAC/B,QAAO;AAGT,OAAM,IAAI,UAAU,4BAA4B,KAAK,IAAI;;AAG3D,SAAS,QAAQ,QAA0C;AACzD,KAAI,OAAO,WAAW,EACpB,QAAO;AAIT,QADc,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,EAAE,GAC5C,OAAO;;AAGxB,SAAS,oBAAoB,cAA6B,cAA4C;AACpG,KAAI,gBAAgB,QAAQ,gBAAgB,KAC1C,SAAQ,eAAe,gBAAgB;AAGzC,KAAI,gBAAgB,KAClB,QAAO;AAGT,KAAI,gBAAgB,KAClB,QAAO;AAGT,QAAO;;AAGT,SAAS,oBAAoB,QAA2C;CACtE,MAAM,UAAwB;EAC5B,OAAO,EAAE;EACT,OAAO,EAAE;EACV;AAED,MAAK,MAAM,SAAS,QAAQ;AAG1B,MAFa,qBAAqB,MAAM,KAAK,KAEhC,SAAS;AACpB,WAAQ,MAAM,KAAK,MAAM,MAAM;AAC/B;;AAGF,UAAQ,MAAM,KAAK,MAAM,MAAM;;AAGjC,QAAO;;AAGT,SAAS,iBAAiB,QAAyC;CACjE,MAAM,UAAU,oBAAoB,OAAO,OAAO;CAClD,MAAM,eAAe,QAAQ,QAAQ,MAAM;CAC3C,MAAM,eAAe,QAAQ,QAAQ,MAAM;AAE3C,QAAO;EACL,SAAS,OAAO;EAChB;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D,IAAI,OAAO;EACX;EACA,QAAQ,yBAAyB,OAAO,OAAO;EAC/C,qBAAqB,OAAO;EAC7B;;AAGH,SAAS,sBAAsB,qBAA6B,SAA0D;CACpH,MAAM,cAAwB,EAAE;CAChC,MAAM,cAAwB,EAAE;AAEhC,MAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,UAAU,oBAAoB,OAAO,OAAO;AAClD,cAAY,KAAK,GAAG,QAAQ,MAAM;AAClC,cAAY,KAAK,GAAG,QAAQ,MAAM;;CAGpC,MAAM,eAAe,QAAQ,YAAY;CACzC,MAAM,eAAe,QAAQ,YAAY;AAEzC,QAAO;EACL;EACA,eAAe,oBAAoB,cAAc,aAAa;EAC9D;EACA;EACA,UAAU,QAAQ;EACnB;;;;;;;;;;;;;;;;;;;;;AAsBH,SAAgB,oBAAoB,SAAqD;CACvF,MAAM,OAAO,QAAQ,IAAI,iBAAiB;CAG1C,MAAM,qBADuB,MAAM,KAAK,IAAI,IAAI,QAAQ,KAAI,WAAU,OAAO,oBAAoB,CAAC,CAAC,CAEhG,KAAK,wBAAwB;AAE5B,SAAO,sBAAsB,qBADL,QAAQ,QAAO,WAAU,OAAO,wBAAwB,oBAAoB,CAClC;GAClE,CACD,MAAM,MAAM,UAAU,KAAK,oBAAoB,cAAc,MAAM,oBAAoB,CAAC;CAE3F,MAAM,UAAU,sBACd,WACA,QACD;AAED,QAAO;EACL,SAAS;GACP,cAAc,QAAQ;GACtB,eAAe,QAAQ;GACvB,cAAc,QAAQ;GACtB,UAAU,QAAQ;GACnB;EACD;EACA;EACD;;;;ACvRH,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAE5B,SAAS,cAAc,OAAuB;AAC5C,QAAO,MAAM,WAAW,MAAM,IAAI;;;;;;;;;;;AAYpC,SAAgB,sBAAsB,UAAkB,SAAuC;CAC7F,MAAM,qBAAqB,cAAc,SAAS;CAClD,MAAM,iCAAiC,cAAc,QAAQ,qBAAqB;CAClF,MAAM,uBAAuB,mBAAmB,MAAM,aAAa,GAAG;CACtE,MAAM,0BAA0B,+BAA+B,MAAM,aAAa,GAAG;AAErF,KAAI,wBAAwB,QAAQ,2BAA2B,KAC7D,QAAO;AAGT,KACE,wBAAwB,QACrB,2BAA2B,QAC3B,qBAAqB,aAAa,KAAK,wBAAwB,aAAa,CAE/E,QAAO;CAGT,MAAM,uBAAuB,QAAQ;CACrC,MAAM,mBAAmB,cAAc,SAAS,sBAAsB,SAAS,CAAC;AAEhF,KAAI,CAAC,oBAAoB,KAAK,iBAAiB,EAAE;AAC/C,MAAI,qBAAqB,KACvB,QAAO,cAAc,SAAS;AAGhC,MAAI,CAAC,iBAAiB,WAAW,MAAM,CACrC,QAAO;;AAIX,QAAO,cAAc,SAAS;;AAGhC,SAAS,sBAAsB,YAAmC;AAChE,KAAI,CAAC,WAAW,WAAW,QAAQ,CACjC,QAAO;AAGT,KAAI;AACF,SAAO,cAAc,WAAW;SAE5B;AACJ,SAAO;;;AAIX,SAAS,yBACP,YACA,kBACA,SAC2B;CAC3B,MAAM,WAAW,sBAAsB,WAAW;AAElD,KAAI,CAAC,SACH,QAAO;CAGT,MAAM,mBAAmB,sBAAsB,UAAU,QAAQ;AAEjE,KAAI,CAAC,iBAAiB,SAAS,eAAe,CAC5C,QAAO;CAGT,MAAM,YAAY,SAAS,kBAAkB,eAAe;AAE5D,KAAI,UAAU,WAAW,EACvB,QAAO;CAGT,MAAM,oBAAoB,QAAQ,iBAAiB;CACnD,MAAM,YAAY,sBAAsB,MAAM,KAAK;AAEnD,QAAO;EACL,GAAG,iBAAiB;EACpB;EACA;EACA,IAAI,UAAU,WAAW,IAAI,YAAY,GAAG,UAAU,GAAG;EACzD,MAAM;EACP;;;;;;;;;;;;;;;;AAiBH,SAAgB,mBACd,SACA,SACsB;AACtB,QAAO,OAAO,QAAQ,QAAQ,CAC3B,SAAS,CAAC,YAAY,sBAAsB;EAC3C,MAAM,QAAQ,yBAAyB,YAAY,kBAAkB,QAAQ;AAE7E,MAAI,CAAC,MACH,QAAO,EAAE;AAGX,SAAO,CAAC,MAAM;GACd,CACD,MAAM,MAAM,UAAU,KAAK,GAAG,cAAc,MAAM,GAAG,CAAC;;;;AC9C3D,SAAS,gCAAsD;AAY7D,QAAO;EACL,OAZ8B,EAC9B,UAAU,MAAM;AACd,UAAO,EACL,KAAK,SAAS;IACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,UAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;MAE3G;KAEJ;EAIC,QAAQ,EAAE;EACX;;;;;AAMH,IAAa,uBAAb,cAA0C,MAAM;;;;CAI9C;CAEA,YAAY,QAAgB,OAAgB;EAC1C,MAAM,UAAU,iBAAiB,MAAM,IAAI;AAC3C,QAAM,gBAAgB,OAAO,YAAY,UAAU;AACnD,OAAK,OAAO;AACZ,OAAK,SAAS;AACd,OAAK,QAAQ;;;AAIjB,SAAS,2BAA2B,QAAgB,OAAsC;AACxF,KAAI,iBAAiB,wBAAwB,MAAM,WAAW,OAC5D,QAAO;AAGT,QAAO,IAAI,qBAAqB,QAAQ,MAAM;;;;;;;;;;;;;;;;;;;;;;;;AAyBhD,eAAsB,kBACpB,OACA,UACA,UAAoC,EAAE,EACP;AAC/B,KAAI,MAAM,WAAW,EACnB,QAAO,oBAAoB,EAAE,CAAC;CAGhC,eAAe,qBAAqB,MAAyC;EAC3E,IAAI;AAEJ,MAAI;AACF,sBAAmB,QAAQ,yBAAyB,KAAK,IAAI,+BAA+B;WAEvF,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,cAAc,KAAK;WAEtB,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;EAGlD,IAAI;AACJ,MAAI;AACF,eAAY,MAAM,SAAS,MAAM,iBAAiB;WAE7C,OAAO;AACZ,OAAI;AACF,YAAQ,YAAY,MAAM,SAAS;WAE/B;AAGN,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,MAAI;AACF,WAAQ,YAAY,MAAM,SAAS;WAE9B,OAAO;AACZ,SAAM,2BAA2B,KAAK,IAAI,MAAM;;AAGlD,SAAO;;CAGT,MAAM,iBAAiB,QAAQ,kBAAkB;AACjD,KAAI,kBAAkB,GAAG;EACvB,MAAM,UAAuB,EAAE;AAC/B,OAAK,MAAM,QAAQ,MACjB,SAAQ,KAAK,MAAM,qBAAqB,KAAK,CAAC;AAEhD,SAAO,oBAAoB,QAAQ;;CAGrC,MAAM,eAAe,iBAAiB,eAAe;AAUrD,QAAO,qBATa,MAAM,QAAQ,IAAI,MAAM,IAAI,OAAO,MAAM,UAAU;AAErE,SAAO;GAAE;GAAO,QADD,MAAM,aAAa,YAAY,qBAAqB,KAAK,CAAC;GACjD;GACxB,CAAC,EAGA,MAAM,MAAM,UAAU,KAAK,QAAQ,MAAM,MAAM,CAC/C,KAAI,SAAQ,KAAK,OAAO,CAEc;;;;ACzN3C,MAAM,UAAU,cAAc,OAAO,KAAK,IAAI;;;;;;;;;;;;;;;AAgD9C,eAAsB,2BACpB,UAAmD,EAAE,EACtB;CAC/B,MAAM,MAAM,QAAQ,OAAO,QAAQ,cAAc,OAAO,KAAK,IAAI,CAAC;CAClE,MAAM,+BAA+B,QAAQ,gCACxC,cAAc,IAAI,IAAI,aAAa,OAAO,KAAK,IAAI,CAAC;CAKzD,MAAM,EAAE,qBAAqB,QAAQ,2BAA2B;AAShE,QAAO,EACL,sBAHyB,MAAM,iBAAiB,IAAI,IAGR,8BAC7C;;;;ACdH,MAAM,kBAAkB,IAAI,IAAI;CAAC;CAAW;CAAU;CAAW,CAAC;AAClE,MAAM,wCAAwC;AA8C9C,SAAS,oBAAoB,OAAuB;AAClD,QAAO,mBAAmB,MAAM;;AAGlC,SAAS,qBAAqB,OAA4B;AACxD,QAAO,OAAO,MAAM;;AAGtB,SAAS,qBAAqB,QAAsD;AAClF,QAAO,EAAE,GAAG,QAAQ;;AAGtB,SAAS,0BACP,WACA,YACqB;AACrB,QAAO;EACL,MAAM,qBAAqB,WAAW;EACtC,MAAM;GACJ,WAAW,kBAAkB,WAAW;GACxC,UAAU,kBAAkB,UAAU;GACvC;EACD,KAAK,qBAAqB,UAAU;EACrC;;AAGH,SAAS,cAAc,QAAkD;CACvE,MAAM,aAAa,OAAO,KAAK,OAAO;AACtC,QACE,WAAW,SAAS,KACjB,WAAW,OAAM,QAAO,gBAAgB,IAAI,IAAI,CAAC;;AAIxD,SAAS,mCAAmC,QAAiC;CAC3E,MAAM,aAAa,OAAO,KAAK,OAAO;CACtC,MAAM,kBAAkB,WAAW,MAAK,QAAO,gBAAgB,IAAI,IAAI,CAAC;CACxE,MAAM,cAAc,WAAW,MAAK,QAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC;AAErE,KAAI,mBAAmB,YACrB,OAAM,IAAI,UAAU,sCAAsC;;AAI9D,SAAS,0BAA0B,QAAgE;AACjG,KAAI,UAAU,KACZ;AAGF,oCAAmC,OAAO;AAE1C,KAAI,cAAc,OAAO,CACvB,QAAO;AAGT,QAAO,EACL,QAAQ,QACT;;AAGH,SAAS,iBAAiB,QAA0C;AAClE,QAAO,MAAM,KAAK,IAAI,IAAI,OAAO,IAAI,qBAAqB,CAAC,CAAC;;AAG9D,SAAS,gBACP,MACA,YACA,MACM;AACN,KAAI,cAAc,KAChB;AAGF,MAAK,MAAM,CAAC,MAAM,WAAW,OAAO,QAAQ,WAAW,EAAE;EACvD,MAAM,aAAa,iBAAiB,OAAO;AAE3C,MAAI,SAAS,UAAU;GACrB,MAAM,iBAAiB,KAAK,IAAI,KAAK,IAAI,EAAE;AAC3C,QAAK,IAAI,MAAM,MAAM,KAAK,IAAI,IAAI,CAAC,GAAG,gBAAgB,GAAG,WAAW,CAAC,CAAC,CAAC;AACvE;;AAGF,OAAK,IAAI,MAAM,WAAW;;;AAI9B,SAAS,WACP,UACA,OACuB;CACvB,MAAM,WAAW,IAAI,IACnB,MAAM,KAAK,SAAS,SAAS,CAAC,CAAC,KAAK,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAC5E;AAED,MAAK,MAAM,QAAQ,OAAO,WAAW,EAAE,CACrC,UAAS,OAAO,KAAK;AAGvB,iBAAgB,UAAU,OAAO,QAAQ,SAAS;AAClD,iBAAgB,UAAU,OAAO,UAAU,WAAW;AAEtD,QAAO;;AAGT,SAAS,iBAAiB,MAAuE;AAC/F,KAAI,KAAK,SAAS,EAChB,QAAO,CAAC,EAAE,CAAC;CAGb,MAAM,aAAa,MAAM,KAAK,KAAK,SAAS,CAAC;CAE7C,IAAI,aAAsC,CAAC,EAAE,CAAC;AAE9C,MAAK,MAAM,CAAC,MAAM,WAAW,YAAY;AACvC,MAAI,OAAO,WAAW,EACpB,QAAO,EAAE;EAGX,MAAM,iBAA0C,EAAE;AAElD,OAAK,MAAM,aAAa,WACtB,MAAK,MAAM,SAAS,OAClB,gBAAe,KAAK;GAClB,GAAG;IACF,OAAO;GACT,CAAC;AAIN,eAAa;;AAGf,QAAO;;AAGT,SAAS,kBAAkB,QAAuC;CAChE,MAAM,WAAW,OAAO,QAAQ,OAAO,CACpC,MAAM,CAAC,WAAW,CAAC,eAAe,SAAS,cAAc,UAAU,CAAC,CACpE,KAAK,CAAC,MAAM,WAAW,GAAG,oBAAoB,KAAK,CAAC,GAAG,oBAAoB,MAAM,GAAG;AAEvF,KAAI,SAAS,WAAW,EACtB,QAAO;AAGT,QAAO,SAAS,KAAK,IAAI;;AAG3B,SAAS,aAAa,SAAiB,qBAA6B,UAAkB,WAA2B;AAI/G,QAAO;EAHgB,oBAAoB,QAAQ;EACzB,oBAAoB,oBAAoB;EAKhE,OAAO,oBAAoB,SAAS;EACpC,QAAQ,oBAAoB,UAAU;EACvC,CAAC,KAAK,KAAK;;AAGd,SAAS,sBACP,OACA,WACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;AAGT,SAAS,uBACP,OACA,YACuB;CACvB,IAAI,+BAAe,IAAI,KAAuB;AAE9C,MAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;EACrB,CACC,gBAAe,WAAW,cAAc,0BAA0B,WAAW,CAAC;AAGhF,QAAO;;;;;;;;;;;;;;;;;;;;AAqBT,SAAgB,qBAAqB,SAAuD;AAC1F,KAAI,QAAQ,QAAQ,WAAW,EAC7B,QAAO,EAAE;AAGX,KAAI,QAAQ,mBAAmB,WAAW,EACxC,QAAO,EAAE;CAGX,MAAM,QAAyB,EAAE;AAEjC,MAAK,MAAM,SAAS,QAAQ,SAAS;EACnC,MAAM,gBAAgB,iBAAiB,sBAAsB,OAAO,QAAQ,UAAU,CAAC;EACvF,MAAM,iBAAiB,iBAAiB,uBAAuB,OAAO,QAAQ,WAAW,CAAC;AAE1F,MAAI,cAAc,WAAW,KAAK,eAAe,WAAW,EAC1D;AAGF,OAAK,MAAM,qBAAqB,QAAQ,mBACtC,MAAK,MAAM,aAAa,cACtB,MAAK,MAAM,cAAc,gBAAgB;GACvC,MAAM,iBAAiB,0BAA0B,WAAW,WAAW;AAEvE,SAAM,KAAK;IACT;IACA,IAAI,aACF,MAAM,IACN,kBAAkB,IAClB,eAAe,KAAK,UACpB,eAAe,KAAK,UACrB;IACD,QAAQ;IACR;IACD,CAAC;;;AAMV,QAAO;;;;AC1UT,SAAS,6BAA+C;AACtD,QAAO,EACL,UAAU,MAAM;AACd,SAAO,EACL,KAAK,SAAS;GACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,IAAI;AACjC,SAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,IAAI;KAE3G;IAEJ;;;;;;;;;;;AAYH,SAAgB,2BAA2B,SAAkE;AAC3G,QAAO;EACL,OAAO,QAAQ,SAAS,4BAA4B;EACpD,QAAQ,QAAQ;EACjB"}
1	+ {"version":3,"file":"index.mjs","names":[],"sources":["../../../src/core/cache/filesystem.ts","../../../src/core/runner/aggregate.ts","../../../src/core/runner/collect.ts","../../../src/core/runner/run.ts","../../../src/core/runner/runtime-context.ts","../../../src/core/runner/schedule.ts","../../../src/core/runner/task-context.ts"],"sourcesContent":["import type { CacheFileHandle, CacheFileOptions, CacheNamespace, TaskCacheRuntime } from './types'\n\nimport process from 'node:process'\n\nimport { Buffer } from 'node:buffer'\nimport { createReadStream, createWriteStream } from 'node:fs'\nimport { access, mkdir, readFile, rename, writeFile } from 'node:fs/promises'\nimport { dirname, join } from 'node:path'\n\n/*\n Options for creating the filesystem-backed task cache runtime.\n /\nexport interface CreateFilesystemTaskCacheRuntimeOptions {\n /\n Absolute cache root directory.\n /\n cacheRootDirectory: string\n /\n Project identifier under one workspace cache scope.\n /\n projectName: string\n /\n Workspace identifier used to share cache roots across projects.\n /\n workspaceId: string\n}\n\nfunction sanitizePathSegment(value: string): string {\n const normalized = value.trim()\n if (normalized.length === 0) {\n return 'default'\n }\n\n return normalized.replace(/[^\\w.-]+/g, '-')\n}\n\nfunction normalizeExtension(extension: string \| undefined, mediaType: string \| undefined): string \| undefined {\n if (extension != null && extension.length > 0) {\n return extension.startsWith('.') ? extension.slice(1) : extension\n }\n\n if (mediaType == null \|\| mediaType.length === 0) {\n return undefined\n }\n\n if (mediaType === 'application/json') {\n return 'json'\n }\n\n if (mediaType === 'text/plain') {\n return 'txt'\n }\n\n if (mediaType === 'audio/wav') {\n return 'wav'\n }\n\n return undefined\n}\n\n/\n Normalizes cache file options into deterministic relative path segments.\n \n Before:\n * - `{ key: ['cases', 'dataset hash', 'v1'], ext: 'json' }`\n \n After:\n * - `['cases', 'dataset-hash', 'v1.json']`\n /\nexport function normalizeCacheFilePathSegments(options: CacheFileOptions): string[] {\n const sanitizedKey = options.key.map(segment => sanitizePathSegment(segment))\n const extension = normalizeExtension(options.ext, options.mediaType)\n\n if (sanitizedKey.length === 0) {\n return extension == null ? ['artifact'] : [`artifact.${extension}`]\n }\n\n if (extension == null) {\n return sanitizedKey\n }\n\n const withoutTail = sanitizedKey.slice(0, Math.max(0, sanitizedKey.length - 1))\n const tail = sanitizedKey[sanitizedKey.length - 1] ?? 'artifact'\n return [...withoutTail, `${tail}.${extension}`]\n}\n\nasync function writeAtomically(path: string, content: Buffer \| string): Promise<void> {\n const directory = dirname(path)\n const temporaryPath = `${path}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`\n await mkdir(directory, { recursive: true })\n await writeFile(temporaryPath, content)\n await rename(temporaryPath, path)\n}\n\nfunction createCacheFileHandle(path: string): CacheFileHandle {\n return {\n path,\n async exists() {\n try {\n await access(path)\n return true\n }\n catch {\n return false\n }\n },\n openReadStream() {\n return createReadStream(path)\n },\n async openWriteStream() {\n await mkdir(dirname(path), { recursive: true })\n return createWriteStream(path)\n },\n async readBuffer() {\n return await readFile(path)\n },\n async writeBuffer(value) {\n await writeAtomically(path, value)\n },\n async readText(encoding = 'utf-8') {\n return await readFile(path, encoding)\n },\n async writeText(value, encoding = 'utf-8') {\n await writeAtomically(path, Buffer.from(value, encoding))\n },\n async readJson<T>() {\n return JSON.parse(await readFile(path, 'utf-8')) as T\n },\n async writeJson(value) {\n await writeAtomically(path, `${JSON.stringify(value, null, 2)}\\n`)\n },\n async loadAsCasesInput<T>() {\n return await this.readJson<T[]>()\n },\n async loadAsExpectFixture<T>() {\n return await this.readJson<T>()\n },\n }\n}\n\nfunction createCacheNamespace(baseDirectory: string, namespace: string): CacheNamespace {\n return {\n file(options) {\n const relativePathSegments = normalizeCacheFilePathSegments(options)\n return createCacheFileHandle(join(baseDirectory, sanitizePathSegment(namespace), ...relativePathSegments))\n },\n }\n}\n\n/\n Creates a deterministic filesystem-backed task cache runtime.\n \n Use when:\n * - eval tasks need reproducible cache paths for expensive pre-processing outputs\n * - benchmark adapters need one artifact-oriented API for text/json/binary reads and writes\n \n Expects:\n * - `cacheRootDirectory` to be writable by the running process\n * - `workspaceId` + `projectName` to stay stable for reproducible paths\n \n Returns:\n * - task cache runtime that resolves namespaced file handles under:\n * `<cacheRootDirectory>/<workspaceId>/<projectName>/<namespace>/...`\n /\nexport function createFilesystemTaskCacheRuntime(\n options: CreateFilesystemTaskCacheRuntimeOptions,\n): TaskCacheRuntime {\n const workspaceDirectory = sanitizePathSegment(options.workspaceId)\n const projectDirectory = sanitizePathSegment(options.projectName)\n const baseDirectory = join(options.cacheRootDirectory, workspaceDirectory, projectDirectory)\n\n return {\n namespace(name) {\n return createCacheNamespace(baseDirectory, name)\n },\n }\n}\n","import type { ScheduledTaskMatrix } from './schedule'\n\n/\n Identifies the scoring family for a single eval score.\n /\nexport type RunScoreKind = 'exact' \| 'judge'\n\n/\n Represents one normalized score emitted by a completed eval run.\n /\nexport interface RunScore {\n /\n Score family used for aggregation.\n /\n kind: RunScoreKind\n /\n Normalized score in the `0..1` range.\n /\n score: number\n}\n\n/\n Captures the output of one scheduled runner task.\n /\nexport interface RunResult {\n /\n Stable run id, usually copied from the scheduled task id.\n /\n id: string\n /\n Collected eval entry id.\n /\n entryId: string\n /\n Stable inferenceExecutor id.\n /\n inferenceExecutorId: string\n /\n Concrete matrix selection used by the run.\n /\n matrix: ScheduledTaskMatrix\n /\n Raw scores emitted by the eval.\n /\n scores: readonly RunScore[]\n}\n\n/\n Stores the per-run score averages after normalization.\n /\nexport interface AggregatedRunSummary {\n /\n Stable run id.\n /\n id: string\n /\n Collected eval entry id.\n /\n entryId: string\n /\n Stable inferenceExecutor id.\n /\n inferenceExecutorId: string\n /\n Concrete matrix selection used by the run.\n /\n matrix: ScheduledTaskMatrix\n /\n Mean of exact-match scores or `null` when absent.\n /\n exactAverage: number \| null\n /\n Mean of judge-based scores or `null` when absent.\n /\n judgeAverage: number \| null\n /\n Hybrid average. Uses both families when present, otherwise falls back to the\n * single available family.\n /\n hybridAverage: number \| null\n}\n\n/\n Stores inferenceExecutor-level score aggregates across multiple runs.\n /\nexport interface AggregatedProviderSummary {\n /\n Stable inferenceExecutor id.\n /\n inferenceExecutorId: string\n /\n Number of runs included in this inferenceExecutor bucket.\n /\n runCount: number\n /\n Mean of all exact-match scores or `null` when absent.\n /\n exactAverage: number \| null\n /\n Mean of all judge-based scores or `null` when absent.\n /\n judgeAverage: number \| null\n /\n Hybrid average derived from the inferenceExecutor exact and judge means.\n /\n hybridAverage: number \| null\n}\n\n/\n Stores the final aggregation output for a batch of runner results.\n /\nexport interface AggregatedRunResults {\n /\n Per-run normalized score summaries.\n /\n runs: AggregatedRunSummary[]\n /\n Provider-level summaries sorted by inferenceExecutor id.\n /\n inferenceExecutors: AggregatedProviderSummary[]\n /\n Overall summary across every run.\n /\n overall: {\n exactAverage: number \| null\n judgeAverage: number \| null\n hybridAverage: number \| null\n runCount: number\n }\n}\n\ninterface ScoreBuckets {\n exact: number[]\n judge: number[]\n}\n\nfunction cloneScheduledTaskMatrix(matrix: ScheduledTaskMatrix): ScheduledTaskMatrix {\n return {\n eval: {\n ...matrix.eval,\n },\n meta: {\n ...matrix.meta,\n },\n run: {\n ...matrix.run,\n },\n }\n}\n\nfunction assertKnownScoreKind(kind: string): RunScoreKind {\n if (kind === 'exact' \|\| kind === 'judge') {\n return kind\n }\n\n throw new TypeError(`Unknown eval score kind \"${kind}\".`)\n}\n\nfunction average(scores: readonly number[]): number \| null {\n if (scores.length === 0) {\n return null\n }\n\n const total = scores.reduce((sum, score) => sum + score, 0)\n return total / scores.length\n}\n\nfunction createHybridAverage(exactAverage: number \| null, judgeAverage: number \| null): number \| null {\n if (exactAverage != null && judgeAverage != null) {\n return (exactAverage + judgeAverage) / 2\n }\n\n if (exactAverage != null) {\n return exactAverage\n }\n\n if (judgeAverage != null) {\n return judgeAverage\n }\n\n return null\n}\n\nfunction collectScoreBuckets(scores: readonly RunScore[]): ScoreBuckets {\n const buckets: ScoreBuckets = {\n exact: [],\n judge: [],\n }\n\n for (const score of scores) {\n const kind = assertKnownScoreKind(score.kind)\n\n if (kind === 'exact') {\n buckets.exact.push(score.score)\n continue\n }\n\n buckets.judge.push(score.score)\n }\n\n return buckets\n}\n\nfunction createRunSummary(result: RunResult): AggregatedRunSummary {\n const buckets = collectScoreBuckets(result.scores)\n const exactAverage = average(buckets.exact)\n const judgeAverage = average(buckets.judge)\n\n return {\n entryId: result.entryId,\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n id: result.id,\n judgeAverage,\n matrix: cloneScheduledTaskMatrix(result.matrix),\n inferenceExecutorId: result.inferenceExecutorId,\n }\n}\n\nfunction createProviderSummary(inferenceExecutorId: string, results: readonly RunResult[]): AggregatedProviderSummary {\n const exactScores: number[] = []\n const judgeScores: number[] = []\n\n for (const result of results) {\n const buckets = collectScoreBuckets(result.scores)\n exactScores.push(...buckets.exact)\n judgeScores.push(...buckets.judge)\n }\n\n const exactAverage = average(exactScores)\n const judgeAverage = average(judgeScores)\n\n return {\n exactAverage,\n hybridAverage: createHybridAverage(exactAverage, judgeAverage),\n judgeAverage,\n inferenceExecutorId,\n runCount: results.length,\n }\n}\n\n/\n Aggregates exact-match and judge-based scores into hybrid runner summaries.\n \n Call stack:\n \n {@link runScheduledTasks}\n * -> {@link aggregateRunResults}\n * -> {@link createRunSummary}\n * -> {@link createProviderSummary}\n * -> `report output`\n \n Use when:\n * - a runner batch mixes deterministic exact checks with judge-based grading\n * - inferenceExecutor comparison should preserve both score families and one hybrid view\n \n Expects:\n * - each score to be normalized to the `0..1` range before aggregation\n * - `scores.kind` to use only `'exact'` or `'judge'`\n /\nexport function aggregateRunResults(results: readonly RunResult[]): AggregatedRunResults {\n const runs = results.map(createRunSummary)\n\n const inferenceExecutorIds = Array.from(new Set(results.map(result => result.inferenceExecutorId)))\n const inferenceExecutors = inferenceExecutorIds\n .map((inferenceExecutorId) => {\n const providerResults = results.filter(result => result.inferenceExecutorId === inferenceExecutorId)\n return createProviderSummary(inferenceExecutorId, providerResults)\n })\n .sort((left, right) => left.inferenceExecutorId.localeCompare(right.inferenceExecutorId))\n\n const overall = createProviderSummary(\n 'overall',\n results,\n )\n\n return {\n overall: {\n exactAverage: overall.exactAverage,\n hybridAverage: overall.hybridAverage,\n judgeAverage: overall.judgeAverage,\n runCount: overall.runCount,\n },\n inferenceExecutors,\n runs,\n }\n}\n","import type { CollectedEvalEntry, EvalModule, EvalModuleMap } from '../../config'\nimport type { RunnerRuntimeContext } from './runtime-context'\n\nimport { basename, dirname, relative } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst evalFileSuffix = '.eval.ts'\nconst absolutePathPattern = /^(?:[A-Z]:\\/\|\\/\|\\\\\\\\)/i\n\nfunction normalizePath(value: string): string {\n return value.replaceAll('\\\\', '/')\n}\n\n/\n Converts a file path into a project-relative path when possible.\n \n Before: `/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n \n Before: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n * After: `D:/repo/plugins/airi-plugin-game-chess/src/agent/evals/chess-commentary.eval.ts`\n /\nexport function asProjectRelativePath(filePath: string, context: RunnerRuntimeContext): string {\n const normalizedFilePath = normalizePath(filePath)\n const normalizedProjectRootDirectory = normalizePath(context.projectRootDirectory)\n const filePathWindowsDrive = normalizedFilePath.match(/^[A-Z]:\\//i)?.[0]\n const projectRootWindowsDrive = normalizedProjectRootDirectory.match(/^[A-Z]:\\//i)?.[0]\n\n if (filePathWindowsDrive != null && projectRootWindowsDrive == null) {\n return normalizedFilePath\n }\n\n if (\n filePathWindowsDrive != null\n && projectRootWindowsDrive != null\n && filePathWindowsDrive.toLowerCase() !== projectRootWindowsDrive.toLowerCase()\n ) {\n return normalizedFilePath\n }\n\n const projectRootDirectory = context.projectRootDirectory\n const relativeFilePath = normalizePath(relative(projectRootDirectory, filePath))\n\n if (!absolutePathPattern.test(relativeFilePath)) {\n if (relativeFilePath === '..') {\n return normalizePath(filePath)\n }\n\n if (!relativeFilePath.startsWith('../')) {\n return relativeFilePath\n }\n }\n\n return normalizePath(filePath)\n}\n\nfunction resolveModuleFilePath(moduleHref: string): string \| null {\n if (!moduleHref.startsWith('file:')) {\n return null\n }\n\n try {\n return fileURLToPath(moduleHref)\n }\n catch {\n return null\n }\n}\n\nfunction createCollectedEvalEntry(\n moduleHref: string,\n moduleDefinition: EvalModule,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry \| null {\n const filePath = resolveModuleFilePath(moduleHref)\n\n if (!filePath) {\n return null\n }\n\n const relativeFilePath = asProjectRelativePath(filePath, context)\n\n if (!relativeFilePath.endsWith(evalFileSuffix)) {\n return null\n }\n\n const entryName = basename(relativeFilePath, evalFileSuffix)\n\n if (entryName.length === 0) {\n return null\n }\n\n const relativeDirectory = dirname(relativeFilePath)\n const directory = relativeDirectory === '.' ? '' : relativeDirectory\n\n return {\n ...moduleDefinition.default,\n directory,\n filePath,\n id: directory.length === 0 ? entryName : `${directory}/${entryName}`,\n name: entryName,\n }\n}\n\n/\n Collects loaded vieval modules into sorted runner entries with stable ids.\n \n Call stack:\n \n `import.meta.glob(...)`\n * -> {@link collectEvalEntries}\n * -> {@link createCollectedEvalEntry}\n * -> {@link CollectedEvalEntry}[]\n \n Use when:\n * - the runner has already loaded candidate eval modules\n * - downstream scheduling needs stable entry ids and directory metadata\n /\nexport function collectEvalEntries(\n modules: EvalModuleMap,\n context: RunnerRuntimeContext,\n): CollectedEvalEntry[] {\n return Object.entries(modules)\n .flatMap(([moduleHref, moduleDefinition]) => {\n const entry = createCollectedEvalEntry(moduleHref, moduleDefinition, context)\n\n if (!entry) {\n return []\n }\n\n return [entry]\n })\n .sort((left, right) => left.id.localeCompare(right.id))\n}\n","import type { TaskCacheRuntime } from '../cache'\nimport type { AggregatedRunResults, RunResult } from './aggregate'\nimport type { ScheduledTask } from './schedule'\nimport type { TaskExecutionContext } from './task-context'\n\nimport { errorMessageFrom } from '@moeru/std'\nimport { limitConcurrency } from '@vitest/runner/utils'\n\nimport { aggregateRunResults } from './aggregate'\n\n/\n Executes one scheduled runner task and returns a normalized run result.\n \n Use when:\n * - a scheduler already selected the task and execution context\n * - the caller wants a typed executor contract for runner workers\n \n Expects:\n * - the task context to be ready for model resolution and task-scoped work\n \n Returns:\n * - a normalized run result with score entries ready for aggregation\n /\nexport type ScheduledTaskExecutor = (\n task: ScheduledTask,\n context: TaskExecutionContext,\n) => Promise<RunResult>\n\n/\n Terminal task state reported by runner lifecycle hooks.\n \n Use when:\n * - reporting the outcome of one scheduled task to lifecycle observers\n \n Expects:\n * - hooks treat the value as final for the completed task\n /\nexport type RunnerTaskState = 'passed' \| 'failed'\n\n/\n Optional runner execution hooks used while processing scheduled tasks.\n \n Use when:\n * - callers want lifecycle visibility around sequential task execution\n * - task execution should remain deterministic while still observable\n \n Expects:\n * - hook functions are synchronous lifecycle observers\n /\nexport interface RunScheduledTasksOptions {\n /\n Creates per-task execution context.\n \n Use when:\n * - executor code needs per-task models, cache, or other task-scoped data\n /\n createExecutionContext?: (task: ScheduledTask) => TaskExecutionContext\n /\n Runs before the executor starts handling a task.\n \n Use when:\n * - callers want to observe task activation before execution begins\n \n Expects:\n * - thrown errors abort the task before executor work starts\n /\n onTaskStart?: (task: ScheduledTask) => void\n /\n Runs after the executor settles for a task.\n \n Use when:\n * - callers want to observe successful and failed task completion\n \n Expects:\n * - thrown errors abort successful runs\n * - failed-task observers do not override the executor error for the task\n /\n onTaskEnd?: (task: ScheduledTask, state: RunnerTaskState) => void\n /\n Maximum number of tasks to execute concurrently.\n \n @default 1\n /\n maxConcurrency?: number\n}\n\nfunction createDefaultExecutionContext(): TaskExecutionContext {\n const cache: TaskCacheRuntime = {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n\n return {\n cache,\n models: [],\n }\n}\n\n/\n Error thrown when a scheduled run fails before producing a normalized result.\n /\nexport class RunnerExecutionError extends Error {\n /\n Stable task id that failed.\n /\n taskId: string\n\n constructor(taskId: string, cause: unknown) {\n const message = errorMessageFrom(cause) ?? 'Unknown runner execution failure.'\n super(`Runner task \"${taskId}\" failed: ${message}`)\n this.name = 'RunnerExecutionError'\n this.taskId = taskId\n this.cause = cause\n }\n}\n\nfunction createRunnerExecutionError(taskId: string, cause: unknown): RunnerExecutionError {\n if (cause instanceof RunnerExecutionError && cause.taskId === taskId) {\n return cause\n }\n\n return new RunnerExecutionError(taskId, cause)\n}\n\n/\n Executes runner tasks sequentially and aggregates the normalized results.\n \n Call stack:\n \n {@link createRunnerSchedule}\n * -> {@link runScheduledTasks}\n * -> `executor(task)`\n * -> {@link aggregateRunResults}\n \n Use when:\n * - the caller already expanded the runner matrix\n * - task execution should stay deterministic and easy to debug\n \n Expects:\n * - `executor` to return normalized `0..1` scores\n * - callers to handle concurrency outside this helper when needed\n * - `onTaskStart` / `onTaskEnd` hooks to be synchronous lifecycle observers\n \n Throws:\n * - `RunnerExecutionError` when task setup, hooks, or the executor throws\n /\nexport async function runScheduledTasks(\n tasks: readonly ScheduledTask[],\n executor: ScheduledTaskExecutor,\n options: RunScheduledTasksOptions = {},\n): Promise<AggregatedRunResults> {\n if (tasks.length === 0) {\n return aggregateRunResults([])\n }\n\n async function executeScheduledTask(task: ScheduledTask): Promise<RunResult> {\n let executionContext: TaskExecutionContext\n\n try {\n executionContext = options.createExecutionContext?.(task) ?? createDefaultExecutionContext()\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskStart?.(task)\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n let runResult: RunResult\n try {\n runResult = await executor(task, executionContext)\n }\n catch (error) {\n try {\n options.onTaskEnd?.(task, 'failed')\n }\n catch {\n // Failed-task observers must not mask the task execution failure.\n }\n throw createRunnerExecutionError(task.id, error)\n }\n\n try {\n options.onTaskEnd?.(task, 'passed')\n }\n catch (error) {\n throw createRunnerExecutionError(task.id, error)\n }\n\n return runResult\n }\n\n const maxConcurrency = options.maxConcurrency ?? 1\n if (maxConcurrency <= 1) {\n const results: RunResult[] = []\n for (const task of tasks) {\n results.push(await executeScheduledTask(task))\n }\n return aggregateRunResults(results)\n }\n\n const runWithLimit = limitConcurrency(maxConcurrency)\n const resultPairs = await Promise.all(tasks.map(async (task, index) => {\n const result = await runWithLimit(async () => executeScheduledTask(task))\n return { index, result }\n }))\n\n const sortedResults = resultPairs\n .sort((left, right) => left.index - right.index)\n .map(item => item.result)\n\n return aggregateRunResults(sortedResults)\n}\n","import { createRequire } from 'node:module'\nimport { dirname } from 'node:path'\nimport { fileURLToPath } from 'node:url'\n\nconst require = createRequire(import.meta.url)\n\n/\n Shared runtime context used by the vieval runner.\n \n Use when:\n * - runner services need stable path resolution without module-level side effects\n * - call sites want deterministic control over workspace root detection\n /\nexport interface RunnerRuntimeContext {\n /\n Absolute project root directory used for path normalization.\n /\n projectRootDirectory: string\n}\n\n/\n Options used to construct the runner runtime context.\n /\nexport interface CreateVievalRunnerRuntimeContextOptions {\n /\n Directory used to search for the nearest pnpm workspace.\n \n @default directory of this module file\n /\n cwd?: string\n /\n Absolute fallback directory when a pnpm workspace root is not found.\n \n @default package root directory (`packages/vieval`)\n /\n fallbackProjectRootDirectory?: string\n}\n\n/\n Creates a side-effect-free runtime context for runner path normalization.\n \n Call stack:\n \n {@link createRunnerRuntimeContext}\n * -> `findWorkspaceDir(cwd)`\n * -> `resolve projectRootDirectory`\n * -> `{ projectRootDirectory }`\n \n Use when:\n * - initializing runner infrastructure before collecting eval modules\n * - tests need deterministic root resolution behavior\n /\nexport async function createRunnerRuntimeContext(\n options: CreateVievalRunnerRuntimeContextOptions = {},\n): Promise<RunnerRuntimeContext> {\n const cwd = options.cwd ?? dirname(fileURLToPath(import.meta.url))\n const fallbackProjectRootDirectory = options.fallbackProjectRootDirectory\n ?? fileURLToPath(new URL('../../../', import.meta.url))\n\n // NOTICE:\n // We use dynamic `require` here because `@pnpm/find-workspace-dir` is CommonJS.\n // Keeping this load inside the factory avoids module-level initialization side effects.\n const { findWorkspaceDir } = require('@pnpm/find-workspace-dir') as {\n findWorkspaceDir: (currentWorkingDirectory: string) => Promise<string \| undefined>\n }\n\n // NOTICE:\n // Workspace discovery is required to keep collected eval ids stable when this\n // package is moved inside different monorepo layouts.\n const workspaceDirectory = await findWorkspaceDir(cwd)\n\n return {\n projectRootDirectory: workspaceDirectory ?? fallbackProjectRootDirectory,\n }\n}\n","import type { CollectedEvalEntry, MatrixDefinition, MatrixLayer, MatrixValue } from '../../config'\n\n/\n Describes the inferenceExecutor target for a scheduled eval run.\n /\nexport interface InferenceExecutor {\n /\n Stable inferenceExecutor identifier such as `openai:gpt-4.1-mini`.\n /\n id: string\n}\n\n/\n Stores the selected value for each matrix axis.\n /\nexport type RunnerMatrixSelection = Record<string, string>\n\n/\n Stores stable row ids for one resolved scheduled task matrix.\n /\nexport interface ScheduledTaskMatrixMeta {\n /\n Stable row id for the resolved run matrix selection.\n /\n runRowId: string\n /\n Stable row id for the resolved eval matrix selection.\n /\n evalRowId: string\n}\n\n/\n Stores the structured matrix payload for one scheduled task.\n /\nexport interface ScheduledTaskMatrix {\n /\n Runtime matrix selection visible to task code.\n /\n run: RunnerMatrixSelection\n /\n Eval-time matrix selection visible to task code.\n /\n eval: RunnerMatrixSelection\n /\n Stable row ids for both scopes.\n /\n meta: ScheduledTaskMatrixMeta\n}\n\n/\n Maps matrix axis names to the values that should be expanded.\n /\nexport type RunnerMatrixDefinition = MatrixDefinition\n\n/\n Accepts either flat axis definitions or one layered matrix object.\n /\nexport type RunnerMatrixInput = RunnerMatrixDefinition \| MatrixLayer\n\nconst matrixLayerKeys = new Set(['disable', 'extend', 'override'])\nconst ambiguousMatrixDefinitionErrorMessage = 'Ambiguous matrix definition: cannot mix reserved layer keys (disable, extend, override) with matrix axis keys.'\n\n/\n Represents one fully expanded runner task.\n /\nexport interface ScheduledTask {\n /\n Stable task id derived from the entry, inferenceExecutor, and matrix selection.\n /\n id: string\n /\n The collected eval entry to execute.\n /\n entry: CollectedEvalEntry\n /\n The inferenceExecutor selected for this task.\n /\n inferenceExecutor: InferenceExecutor\n /\n The concrete scoped matrix selection for this task.\n /\n matrix: ScheduledTaskMatrix\n}\n\n/\n Configures how the runner should expand its execution matrix.\n /\nexport interface CreateRunnerScheduleOptions {\n /\n Collected eval entries that should be scheduled.\n /\n entries: readonly CollectedEvalEntry[]\n /\n Providers that should run each entry.\n /\n inferenceExecutors: readonly InferenceExecutor[]\n /\n Optional run-time matrix axes expanded as a cartesian product.\n /\n runMatrix?: RunnerMatrixInput\n /\n Optional eval-time matrix axes expanded as a cartesian product.\n /\n evalMatrix?: RunnerMatrixInput\n}\n\nfunction encodeTaskIdSegment(value: string): string {\n return encodeURIComponent(value)\n}\n\nfunction stringifyMatrixValue(value: MatrixValue): string {\n return String(value)\n}\n\nfunction cloneMatrixSelection(matrix: RunnerMatrixSelection): RunnerMatrixSelection {\n return { ...matrix }\n}\n\nfunction createScheduledTaskMatrix(\n runMatrix: RunnerMatrixSelection,\n evalMatrix: RunnerMatrixSelection,\n): ScheduledTaskMatrix {\n return {\n eval: cloneMatrixSelection(evalMatrix),\n meta: {\n evalRowId: createStableRowId(evalMatrix),\n runRowId: createStableRowId(runMatrix),\n },\n run: cloneMatrixSelection(runMatrix),\n }\n}\n\nfunction isMatrixLayer(matrix: RunnerMatrixInput): matrix is MatrixLayer {\n const matrixKeys = Object.keys(matrix)\n return (\n matrixKeys.length > 0\n && matrixKeys.every(key => matrixLayerKeys.has(key))\n )\n}\n\nfunction assertNonAmbiguousMatrixDefinition(matrix: RunnerMatrixInput): void {\n const matrixKeys = Object.keys(matrix)\n const hasReservedKeys = matrixKeys.some(key => matrixLayerKeys.has(key))\n const hasAxisKeys = matrixKeys.some(key => !matrixLayerKeys.has(key))\n\n if (hasReservedKeys && hasAxisKeys) {\n throw new TypeError(ambiguousMatrixDefinitionErrorMessage)\n }\n}\n\nfunction normalizeLayerInputToAxes(matrix: RunnerMatrixInput \| undefined): MatrixLayer \| undefined {\n if (matrix == null) {\n return undefined\n }\n\n assertNonAmbiguousMatrixDefinition(matrix)\n\n if (isMatrixLayer(matrix)) {\n return matrix\n }\n\n return {\n extend: matrix,\n }\n}\n\nfunction dedupeAxisValues(values: readonly MatrixValue[]): string[] {\n return Array.from(new Set(values.map(stringifyMatrixValue)))\n}\n\nfunction applyAxisValues(\n axes: Map<string, string[]>,\n definition: RunnerMatrixDefinition \| undefined,\n mode: 'extend' \| 'override',\n): void {\n if (definition == null) {\n return\n }\n\n for (const [axis, values] of Object.entries(definition)) {\n const nextValues = dedupeAxisValues(values)\n\n if (mode === 'extend') {\n const existingValues = axes.get(axis) ?? []\n axes.set(axis, Array.from(new Set([...existingValues, ...nextValues])))\n continue\n }\n\n axes.set(axis, nextValues)\n }\n}\n\nfunction applyLayer(\n baseAxes: ReadonlyMap<string, string[]>,\n layer: MatrixLayer \| undefined,\n): Map<string, string[]> {\n const nextAxes = new Map<string, string[]>(\n Array.from(baseAxes.entries()).map(([axis, values]) => [axis, [...values]]),\n )\n\n for (const axis of layer?.disable ?? []) {\n nextAxes.delete(axis)\n }\n\n applyAxisValues(nextAxes, layer?.extend, 'extend')\n applyAxisValues(nextAxes, layer?.override, 'override')\n\n return nextAxes\n}\n\nfunction expandAxesToRows(axes: ReadonlyMap<string, readonly string[]>): RunnerMatrixSelection[] {\n if (axes.size === 0) {\n return [{}]\n }\n\n const dimensions = Array.from(axes.entries())\n\n let selections: RunnerMatrixSelection[] = [{}]\n\n for (const [axis, values] of dimensions) {\n if (values.length === 0) {\n return []\n }\n\n const nextSelections: RunnerMatrixSelection[] = []\n\n for (const selection of selections) {\n for (const value of values) {\n nextSelections.push({\n ...selection,\n [axis]: value,\n })\n }\n }\n\n selections = nextSelections\n }\n\n return selections\n}\n\nfunction createStableRowId(matrix: RunnerMatrixSelection): string {\n const segments = Object.entries(matrix)\n .sort(([leftAxis], [rightAxis]) => leftAxis.localeCompare(rightAxis))\n .map(([axis, value]) => `${encodeTaskIdSegment(axis)}=${encodeTaskIdSegment(value)}`)\n\n if (segments.length === 0) {\n return 'default'\n }\n\n return segments.join('&')\n}\n\nfunction createTaskId(entryId: string, inferenceExecutorId: string, runRowId: string, evalRowId: string): string {\n const encodedEntryId = encodeTaskIdSegment(entryId)\n const encodedProviderId = encodeTaskIdSegment(inferenceExecutorId)\n\n return [\n encodedEntryId,\n encodedProviderId,\n `run=${encodeTaskIdSegment(runRowId)}`,\n `eval=${encodeTaskIdSegment(evalRowId)}`,\n ].join('::')\n}\n\nfunction createResolvedRunAxes(\n entry: CollectedEvalEntry,\n runMatrix: RunnerMatrixInput \| undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n runMatrix,\n entry.matrix?.runMatrix,\n entry.task?.matrix?.runMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\nfunction createResolvedEvalAxes(\n entry: CollectedEvalEntry,\n evalMatrix: RunnerMatrixInput \| undefined,\n): Map<string, string[]> {\n let resolvedAxes = new Map<string, string[]>()\n\n for (const layerInput of [\n evalMatrix,\n entry.matrix?.evalMatrix,\n entry.task?.matrix?.evalMatrix,\n ]) {\n resolvedAxes = applyLayer(resolvedAxes, normalizeLayerInputToAxes(layerInput))\n }\n\n return resolvedAxes\n}\n\n/\n Expands collected entries into a stable runner schedule.\n \n Call stack:\n \n {@link collectEvalEntries} (`../runner`)\n * -> {@link createRunnerSchedule}\n * -> {@link expandAxesToRows}\n * -> {@link ScheduledTask}[]\n \n Use when:\n * - the runner already knows which eval entries are available\n * - each entry must run against multiple inferenceExecutors or matrix variants\n \n Expects:\n * - `entries` and `inferenceExecutors` to be provided in the desired execution order\n * - matrix axes to use insertion order when generating combinations\n /\nexport function createRunnerSchedule(options: CreateRunnerScheduleOptions): ScheduledTask[] {\n if (options.entries.length === 0) {\n return []\n }\n\n if (options.inferenceExecutors.length === 0) {\n return []\n }\n\n const tasks: ScheduledTask[] = []\n\n for (const entry of options.entries) {\n const runSelections = expandAxesToRows(createResolvedRunAxes(entry, options.runMatrix))\n const evalSelections = expandAxesToRows(createResolvedEvalAxes(entry, options.evalMatrix))\n\n if (runSelections.length === 0 \|\| evalSelections.length === 0) {\n continue\n }\n\n for (const inferenceExecutor of options.inferenceExecutors) {\n for (const runMatrix of runSelections) {\n for (const evalMatrix of evalSelections) {\n const isolatedMatrix = createScheduledTaskMatrix(runMatrix, evalMatrix)\n\n tasks.push({\n entry,\n id: createTaskId(\n entry.id,\n inferenceExecutor.id,\n isolatedMatrix.meta.runRowId,\n isolatedMatrix.meta.evalRowId,\n ),\n matrix: isolatedMatrix,\n inferenceExecutor,\n })\n }\n }\n }\n }\n\n return tasks\n}\n","import type { ModelDefinition } from '../../config/models'\nimport type { TaskCacheRuntime } from '../cache'\nimport type { ScheduledTask } from './schedule'\n\n/\n Task-scoped execution context exposed to runner executors.\n /\nexport interface TaskExecutionContext {\n /\n Deterministic cache runtime scoped to the current task project.\n /\n cache: TaskCacheRuntime\n /\n Configured model registrations available to model plugins.\n /\n models: readonly ModelDefinition[]\n}\n\n/\n Inputs used to build task execution context.\n /\nexport interface CreateTaskExecutionContextOptions {\n cache?: TaskCacheRuntime\n models: readonly ModelDefinition[]\n task: ScheduledTask\n}\n\nfunction createNoopTaskCacheRuntime(): TaskCacheRuntime {\n return {\n namespace(name) {\n return {\n file(options) {\n const key = options.key.join('/')\n throw new Error(`Task cache runtime is not configured. Requested namespace \"${name}\" and key \"${key}\".`)\n },\n }\n },\n }\n}\n\n/\n Creates task-scoped context data for runner execution.\n \n Call stack:\n \n {@link runScheduledTasks}\n * -> {@link createTaskExecutionContext}\n * -> `TaskExecutionContext`\n */\nexport function createTaskExecutionContext(options: CreateTaskExecutionContextOptions): TaskExecutionContext {\n return {\n cache: options.cache ?? createNoopTaskCacheRuntime(),\n models: options.models,\n }\n}\n"],"mappings":";;;;;;;;;;;AA2BA,SAAS,oBAAoB,OAAuB;CAClD,MAAM,aAAa,MAAM,KAAK;CAC9B,IAAI,WAAW,WAAW,GACxB,OAAO;CAGT,OAAO,WAAW,QAAQ,aAAa,GAAG;AAC5C;AAEA,SAAS,mBAAmB,WAA+B,WAAmD;CAC5G,IAAI,aAAa,QAAQ,UAAU,SAAS,GAC1C,OAAO,UAAU,WAAW,GAAG,IAAI,UAAU,MAAM,CAAC,IAAI;CAG1D,IAAI,aAAa,QAAQ,UAAU,WAAW,GAC5C;CAGF,IAAI,cAAc,oBAChB,OAAO;CAGT,IAAI,cAAc,cAChB,OAAO;CAGT,IAAI,cAAc,aAChB,OAAO;AAIX;;;;;;;;;;AAWA,SAAgB,+BAA+B,SAAqC;CAClF,MAAM,eAAe,QAAQ,IAAI,KAAI,YAAW,oBAAoB,OAAO,CAAC;CAC5E,MAAM,YAAY,mBAAmB,QAAQ,KAAK,QAAQ,SAAS;CAEnE,IAAI,aAAa,WAAW,GAC1B,OAAO,aAAa,OAAO,CAAC,UAAU,IAAI,CAAC,YAAY,WAAW;CAGpE,IAAI,aAAa,MACf,OAAO;CAGT,MAAM,cAAc,aAAa,MAAM,GAAG,KAAK,IAAI,GAAG,aAAa,SAAS,CAAC,CAAC;CAC9E,MAAM,OAAO,aAAa,aAAa,SAAS,MAAM;CACtD,OAAO,CAAC,GAAG,aAAa,GAAG,KAAK,GAAG,WAAW;AAChD;AAEA,eAAe,gBAAgB,MAAc,SAAyC;CACpF,MAAM,YAAY,QAAQ,IAAI;CAC9B,MAAM,gBAAgB,GAAG,KAAK,OAAO,QAAQ,IAAI,GAAG,KAAK,IAAI,EAAE,GAAG,KAAK,OAAO,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,MAAM,GAAG,EAAE;CACxG,MAAM,MAAM,WAAW,EAAE,WAAW,KAAK,CAAC;CAC1C,MAAM,UAAU,eAAe,OAAO;CACtC,MAAM,OAAO,eAAe,IAAI;AAClC;AAEA,SAAS,sBAAsB,MAA+B;CAC5D,OAAO;EACL;EACA,MAAM,SAAS;GACb,IAAI;IACF,MAAM,OAAO,IAAI;IACjB,OAAO;GACT,QACM;IACJ,OAAO;GACT;EACF;EACA,iBAAiB;GACf,OAAO,iBAAiB,IAAI;EAC9B;EACA,MAAM,kBAAkB;GACtB,MAAM,MAAM,QAAQ,IAAI,GAAG,EAAE,WAAW,KAAK,CAAC;GAC9C,OAAO,kBAAkB,IAAI;EAC/B;EACA,MAAM,aAAa;GACjB,OAAO,MAAM,SAAS,IAAI;EAC5B;EACA,MAAM,YAAY,OAAO;GACvB,MAAM,gBAAgB,MAAM,KAAK;EACnC;EACA,MAAM,SAAS,WAAW,SAAS;GACjC,OAAO,MAAM,SAAS,MAAM,QAAQ;EACtC;EACA,MAAM,UAAU,OAAO,WAAW,SAAS;GACzC,MAAM,gBAAgB,MAAM,OAAO,KAAK,OAAO,QAAQ,CAAC;EAC1D;EACA,MAAM,WAAc;GAClB,OAAO,KAAK,MAAM,MAAM,SAAS,MAAM,OAAO,CAAC;EACjD;EACA,MAAM,UAAU,OAAO;GACrB,MAAM,gBAAgB,MAAM,GAAG,KAAK,UAAU,OAAO,MAAM,CAAC,EAAE,GAAG;EACnE;EACA,MAAM,mBAAsB;GAC1B,OAAO,MAAM,KAAK,SAAc;EAClC;EACA,MAAM,sBAAyB;GAC7B,OAAO,MAAM,KAAK,SAAY;EAChC;CACF;AACF;AAEA,SAAS,qBAAqB,eAAuB,WAAmC;CACtF,OAAO,EACL,KAAK,SAAS;EACZ,MAAM,uBAAuB,+BAA+B,OAAO;EACnE,OAAO,sBAAsB,KAAK,eAAe,oBAAoB,SAAS,GAAG,GAAG,oBAAoB,CAAC;CAC3G,EACF;AACF;;;;;;;;;;;;;;;;AAiBA,SAAgB,iCACd,SACkB;CAClB,MAAM,qBAAqB,oBAAoB,QAAQ,WAAW;CAClE,MAAM,mBAAmB,oBAAoB,QAAQ,WAAW;CAChE,MAAM,gBAAgB,KAAK,QAAQ,oBAAoB,oBAAoB,gBAAgB;CAE3F,OAAO,EACL,UAAU,MAAM;EACd,OAAO,qBAAqB,eAAe,IAAI;CACjD,EACF;AACF;;;ACxCA,SAAS,yBAAyB,QAAkD;CAClF,OAAO;EACL,MAAM,EACJ,GAAG,OAAO,KACZ;EACA,MAAM,EACJ,GAAG,OAAO,KACZ;EACA,KAAK,EACH,GAAG,OAAO,IACZ;CACF;AACF;AAEA,SAAS,qBAAqB,MAA4B;CACxD,IAAI,SAAS,WAAW,SAAS,SAC/B,OAAO;CAGT,MAAM,IAAI,UAAU,4BAA4B,KAAK,GAAG;AAC1D;AAEA,SAAS,QAAQ,QAA0C;CACzD,IAAI,OAAO,WAAW,GACpB,OAAO;CAIT,OADc,OAAO,QAAQ,KAAK,UAAU,MAAM,OAAO,CAC9C,IAAI,OAAO;AACxB;AAEA,SAAS,oBAAoB,cAA6B,cAA4C;CACpG,IAAI,gBAAgB,QAAQ,gBAAgB,MAC1C,QAAQ,eAAe,gBAAgB;CAGzC,IAAI,gBAAgB,MAClB,OAAO;CAGT,IAAI,gBAAgB,MAClB,OAAO;CAGT,OAAO;AACT;AAEA,SAAS,oBAAoB,QAA2C;CACtE,MAAM,UAAwB;EAC5B,OAAO,CAAC;EACR,OAAO,CAAC;CACV;CAEA,KAAK,MAAM,SAAS,QAAQ;EAG1B,IAFa,qBAAqB,MAAM,IAEjC,MAAM,SAAS;GACpB,QAAQ,MAAM,KAAK,MAAM,KAAK;GAC9B;EACF;EAEA,QAAQ,MAAM,KAAK,MAAM,KAAK;CAChC;CAEA,OAAO;AACT;AAEA,SAAS,iBAAiB,QAAyC;CACjE,MAAM,UAAU,oBAAoB,OAAO,MAAM;CACjD,MAAM,eAAe,QAAQ,QAAQ,KAAK;CAC1C,MAAM,eAAe,QAAQ,QAAQ,KAAK;CAE1C,OAAO;EACL,SAAS,OAAO;EAChB;EACA,eAAe,oBAAoB,cAAc,YAAY;EAC7D,IAAI,OAAO;EACX;EACA,QAAQ,yBAAyB,OAAO,MAAM;EAC9C,qBAAqB,OAAO;CAC9B;AACF;AAEA,SAAS,sBAAsB,qBAA6B,SAA0D;CACpH,MAAM,cAAwB,CAAC;CAC/B,MAAM,cAAwB,CAAC;CAE/B,KAAK,MAAM,UAAU,SAAS;EAC5B,MAAM,UAAU,oBAAoB,OAAO,MAAM;EACjD,YAAY,KAAK,GAAG,QAAQ,KAAK;EACjC,YAAY,KAAK,GAAG,QAAQ,KAAK;CACnC;CAEA,MAAM,eAAe,QAAQ,WAAW;CACxC,MAAM,eAAe,QAAQ,WAAW;CAExC,OAAO;EACL;EACA,eAAe,oBAAoB,cAAc,YAAY;EAC7D;EACA;EACA,UAAU,QAAQ;CACpB;AACF;;;;;;;;;;;;;;;;;;;;AAqBA,SAAgB,oBAAoB,SAAqD;CACvF,MAAM,OAAO,QAAQ,IAAI,gBAAgB;CAGzC,MAAM,qBADuB,MAAM,KAAK,IAAI,IAAI,QAAQ,KAAI,WAAU,OAAO,mBAAmB,CAAC,CACnD,CAAC,CAC5C,KAAK,wBAAwB;EAE5B,OAAO,sBAAsB,qBADL,QAAQ,QAAO,WAAU,OAAO,wBAAwB,mBAChB,CAAC;CACnE,CAAC,CAAC,CACD,MAAM,MAAM,UAAU,KAAK,oBAAoB,cAAc,MAAM,mBAAmB,CAAC;CAE1F,MAAM,UAAU,sBACd,WACA,OACF;CAEA,OAAO;EACL,SAAS;GACP,cAAc,QAAQ;GACtB,eAAe,QAAQ;GACvB,cAAc,QAAQ;GACtB,UAAU,QAAQ;EACpB;EACA;EACA;CACF;AACF;;;ACxRA,MAAM,iBAAiB;AACvB,MAAM,sBAAsB;AAE5B,SAAS,cAAc,OAAuB;CAC5C,OAAO,MAAM,WAAW,MAAM,GAAG;AACnC;;;;;;;;;;AAWA,SAAgB,sBAAsB,UAAkB,SAAuC;CAC7F,MAAM,qBAAqB,cAAc,QAAQ;CACjD,MAAM,iCAAiC,cAAc,QAAQ,oBAAoB;CACjF,MAAM,uBAAuB,mBAAmB,MAAM,YAAY,CAAC,GAAG;CACtE,MAAM,0BAA0B,+BAA+B,MAAM,YAAY,CAAC,GAAG;CAErF,IAAI,wBAAwB,QAAQ,2BAA2B,MAC7D,OAAO;CAGT,IACE,wBAAwB,QACrB,2BAA2B,QAC3B,qBAAqB,YAAY,MAAM,wBAAwB,YAAY,GAE9E,OAAO;CAGT,MAAM,uBAAuB,QAAQ;CACrC,MAAM,mBAAmB,cAAc,SAAS,sBAAsB,QAAQ,CAAC;CAE/E,IAAI,CAAC,oBAAoB,KAAK,gBAAgB,GAAG;EAC/C,IAAI,qBAAqB,MACvB,OAAO,cAAc,QAAQ;EAG/B,IAAI,CAAC,iBAAiB,WAAW,KAAK,GACpC,OAAO;CAEX;CAEA,OAAO,cAAc,QAAQ;AAC/B;AAEA,SAAS,sBAAsB,YAAmC;CAChE,IAAI,CAAC,WAAW,WAAW,OAAO,GAChC,OAAO;CAGT,IAAI;EACF,OAAO,cAAc,UAAU;CACjC,QACM;EACJ,OAAO;CACT;AACF;AAEA,SAAS,yBACP,YACA,kBACA,SAC2B;CAC3B,MAAM,WAAW,sBAAsB,UAAU;CAEjD,IAAI,CAAC,UACH,OAAO;CAGT,MAAM,mBAAmB,sBAAsB,UAAU,OAAO;CAEhE,IAAI,CAAC,iBAAiB,SAAS,cAAc,GAC3C,OAAO;CAGT,MAAM,YAAY,SAAS,kBAAkB,cAAc;CAE3D,IAAI,UAAU,WAAW,GACvB,OAAO;CAGT,MAAM,oBAAoB,QAAQ,gBAAgB;CAClD,MAAM,YAAY,sBAAsB,MAAM,KAAK;CAEnD,OAAO;EACL,GAAG,iBAAiB;EACpB;EACA;EACA,IAAI,UAAU,WAAW,IAAI,YAAY,GAAG,UAAU,GAAG;EACzD,MAAM;CACR;AACF;;;;;;;;;;;;;;;AAgBA,SAAgB,mBACd,SACA,SACsB;CACtB,OAAO,OAAO,QAAQ,OAAO,CAAC,CAC3B,SAAS,CAAC,YAAY,sBAAsB;EAC3C,MAAM,QAAQ,yBAAyB,YAAY,kBAAkB,OAAO;EAE5E,IAAI,CAAC,OACH,OAAO,CAAC;EAGV,OAAO,CAAC,KAAK;CACf,CAAC,CAAC,CACD,MAAM,MAAM,UAAU,KAAK,GAAG,cAAc,MAAM,EAAE,CAAC;AAC1D;;;AC/CA,SAAS,gCAAsD;CAY7D,OAAO;EACL,OAAA,EAXA,UAAU,MAAM;GACd,OAAO,EACL,KAAK,SAAS;IACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,GAAG;IAChC,MAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,GAAG;GACzG,EACF;EACF,EAII;EACJ,QAAQ,CAAC;CACX;AACF;;;;AAKA,IAAa,uBAAb,cAA0C,MAAM;;;;CAI9C;CAEA,YAAY,QAAgB,OAAgB;EAC1C,MAAM,UAAU,iBAAiB,KAAK,KAAK;EAC3C,MAAM,gBAAgB,OAAO,YAAY,SAAS;EAClD,KAAK,OAAO;EACZ,KAAK,SAAS;EACd,KAAK,QAAQ;CACf;AACF;AAEA,SAAS,2BAA2B,QAAgB,OAAsC;CACxF,IAAI,iBAAiB,wBAAwB,MAAM,WAAW,QAC5D,OAAO;CAGT,OAAO,IAAI,qBAAqB,QAAQ,KAAK;AAC/C;;;;;;;;;;;;;;;;;;;;;;;AAwBA,eAAsB,kBACpB,OACA,UACA,UAAoC,CAAC,GACN;CAC/B,IAAI,MAAM,WAAW,GACnB,OAAO,oBAAoB,CAAC,CAAC;CAG/B,eAAe,qBAAqB,MAAyC;EAC3E,IAAI;EAEJ,IAAI;GACF,mBAAmB,QAAQ,yBAAyB,IAAI,KAAK,8BAA8B;EAC7F,SACO,OAAO;GACZ,MAAM,2BAA2B,KAAK,IAAI,KAAK;EACjD;EAEA,IAAI;GACF,QAAQ,cAAc,IAAI;EAC5B,SACO,OAAO;GACZ,MAAM,2BAA2B,KAAK,IAAI,KAAK;EACjD;EAEA,IAAI;EACJ,IAAI;GACF,YAAY,MAAM,SAAS,MAAM,gBAAgB;EACnD,SACO,OAAO;GACZ,IAAI;IACF,QAAQ,YAAY,MAAM,QAAQ;GACpC,QACM,CAEN;GACA,MAAM,2BAA2B,KAAK,IAAI,KAAK;EACjD;EAEA,IAAI;GACF,QAAQ,YAAY,MAAM,QAAQ;EACpC,SACO,OAAO;GACZ,MAAM,2BAA2B,KAAK,IAAI,KAAK;EACjD;EAEA,OAAO;CACT;CAEA,MAAM,iBAAiB,QAAQ,kBAAkB;CACjD,IAAI,kBAAkB,GAAG;EACvB,MAAM,UAAuB,CAAC;EAC9B,KAAK,MAAM,QAAQ,OACjB,QAAQ,KAAK,MAAM,qBAAqB,IAAI,CAAC;EAE/C,OAAO,oBAAoB,OAAO;CACpC;CAEA,MAAM,eAAe,iBAAiB,cAAc;CAUpD,OAAO,qBAJe,MALI,QAAQ,IAAI,MAAM,IAAI,OAAO,MAAM,UAAU;EAErE,OAAO;GAAE;GAAO,QAAA,MADK,aAAa,YAAY,qBAAqB,IAAI,CAAC;EACjD;CACzB,CAAC,CAAC,EAAA,CAGC,MAAM,MAAM,UAAU,KAAK,QAAQ,MAAM,KAAK,CAAC,CAC/C,KAAI,SAAQ,KAAK,MAEmB,CAAC;AAC1C;;;AC1NA,MAAM,UAAU,cAAc,OAAO,KAAK,GAAG;;;;;;;;;;;;;;;AAgD7C,eAAsB,2BACpB,UAAmD,CAAC,GACrB;CAC/B,MAAM,MAAM,QAAQ,OAAO,QAAQ,cAAc,OAAO,KAAK,GAAG,CAAC;CACjE,MAAM,+BAA+B,QAAQ,gCACxC,cAAc,IAAI,IAAI,aAAa,OAAO,KAAK,GAAG,CAAC;CAKxD,MAAM,EAAE,qBAAqB,QAAQ,0BAA0B;CAS/D,OAAO,EACL,sBAAsB,MAHS,iBAAiB,GAAG,KAGP,6BAC9C;AACF;;;ACfA,MAAM,kCAAkB,IAAI,IAAI;CAAC;CAAW;CAAU;AAAU,CAAC;AACjE,MAAM,wCAAwC;AA8C9C,SAAS,oBAAoB,OAAuB;CAClD,OAAO,mBAAmB,KAAK;AACjC;AAEA,SAAS,qBAAqB,OAA4B;CACxD,OAAO,OAAO,KAAK;AACrB;AAEA,SAAS,qBAAqB,QAAsD;CAClF,OAAO,EAAE,GAAG,OAAO;AACrB;AAEA,SAAS,0BACP,WACA,YACqB;CACrB,OAAO;EACL,MAAM,qBAAqB,UAAU;EACrC,MAAM;GACJ,WAAW,kBAAkB,UAAU;GACvC,UAAU,kBAAkB,SAAS;EACvC;EACA,KAAK,qBAAqB,SAAS;CACrC;AACF;AAEA,SAAS,cAAc,QAAkD;CACvE,MAAM,aAAa,OAAO,KAAK,MAAM;CACrC,OACE,WAAW,SAAS,KACjB,WAAW,OAAM,QAAO,gBAAgB,IAAI,GAAG,CAAC;AAEvD;AAEA,SAAS,mCAAmC,QAAiC;CAC3E,MAAM,aAAa,OAAO,KAAK,MAAM;CACrC,MAAM,kBAAkB,WAAW,MAAK,QAAO,gBAAgB,IAAI,GAAG,CAAC;CACvE,MAAM,cAAc,WAAW,MAAK,QAAO,CAAC,gBAAgB,IAAI,GAAG,CAAC;CAEpE,IAAI,mBAAmB,aACrB,MAAM,IAAI,UAAU,qCAAqC;AAE7D;AAEA,SAAS,0BAA0B,QAAgE;CACjG,IAAI,UAAU,MACZ;CAGF,mCAAmC,MAAM;CAEzC,IAAI,cAAc,MAAM,GACtB,OAAO;CAGT,OAAO,EACL,QAAQ,OACV;AACF;AAEA,SAAS,iBAAiB,QAA0C;CAClE,OAAO,MAAM,KAAK,IAAI,IAAI,OAAO,IAAI,oBAAoB,CAAC,CAAC;AAC7D;AAEA,SAAS,gBACP,MACA,YACA,MACM;CACN,IAAI,cAAc,MAChB;CAGF,KAAK,MAAM,CAAC,MAAM,WAAW,OAAO,QAAQ,UAAU,GAAG;EACvD,MAAM,aAAa,iBAAiB,MAAM;EAE1C,IAAI,SAAS,UAAU;GACrB,MAAM,iBAAiB,KAAK,IAAI,IAAI,KAAK,CAAC;GAC1C,KAAK,IAAI,MAAM,MAAM,qBAAK,IAAI,IAAI,CAAC,GAAG,gBAAgB,GAAG,UAAU,CAAC,CAAC,CAAC;GACtE;EACF;EAEA,KAAK,IAAI,MAAM,UAAU;CAC3B;AACF;AAEA,SAAS,WACP,UACA,OACuB;CACvB,MAAM,WAAW,IAAI,IACnB,MAAM,KAAK,SAAS,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAC5E;CAEA,KAAK,MAAM,QAAQ,OAAO,WAAW,CAAC,GACpC,SAAS,OAAO,IAAI;CAGtB,gBAAgB,UAAU,OAAO,QAAQ,QAAQ;CACjD,gBAAgB,UAAU,OAAO,UAAU,UAAU;CAErD,OAAO;AACT;AAEA,SAAS,iBAAiB,MAAuE;CAC/F,IAAI,KAAK,SAAS,GAChB,OAAO,CAAC,CAAC,CAAC;CAGZ,MAAM,aAAa,MAAM,KAAK,KAAK,QAAQ,CAAC;CAE5C,IAAI,aAAsC,CAAC,CAAC,CAAC;CAE7C,KAAK,MAAM,CAAC,MAAM,WAAW,YAAY;EACvC,IAAI,OAAO,WAAW,GACpB,OAAO,CAAC;EAGV,MAAM,iBAA0C,CAAC;EAEjD,KAAK,MAAM,aAAa,YACtB,KAAK,MAAM,SAAS,QAClB,eAAe,KAAK;GAClB,GAAG;IACF,OAAO;EACV,CAAC;EAIL,aAAa;CACf;CAEA,OAAO;AACT;AAEA,SAAS,kBAAkB,QAAuC;CAChE,MAAM,WAAW,OAAO,QAAQ,MAAM,CAAC,CACpC,MAAM,CAAC,WAAW,CAAC,eAAe,SAAS,cAAc,SAAS,CAAC,CAAC,CACpE,KAAK,CAAC,MAAM,WAAW,GAAG,oBAAoB,IAAI,EAAE,GAAG,oBAAoB,KAAK,GAAG;CAEtF,IAAI,SAAS,WAAW,GACtB,OAAO;CAGT,OAAO,SAAS,KAAK,GAAG;AAC1B;AAEA,SAAS,aAAa,SAAiB,qBAA6B,UAAkB,WAA2B;CAI/G,OAAO;EAHgB,oBAAoB,OAI5B;EAHW,oBAAoB,mBAI5B;EAChB,OAAO,oBAAoB,QAAQ;EACnC,QAAQ,oBAAoB,SAAS;CACvC,CAAC,CAAC,KAAK,IAAI;AACb;AAEA,SAAS,sBACP,OACA,WACuB;CACvB,IAAI,+BAAe,IAAI,IAAsB;CAE7C,KAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;CACtB,GACE,eAAe,WAAW,cAAc,0BAA0B,UAAU,CAAC;CAG/E,OAAO;AACT;AAEA,SAAS,uBACP,OACA,YACuB;CACvB,IAAI,+BAAe,IAAI,IAAsB;CAE7C,KAAK,MAAM,cAAc;EACvB;EACA,MAAM,QAAQ;EACd,MAAM,MAAM,QAAQ;CACtB,GACE,eAAe,WAAW,cAAc,0BAA0B,UAAU,CAAC;CAG/E,OAAO;AACT;;;;;;;;;;;;;;;;;;;AAoBA,SAAgB,qBAAqB,SAAuD;CAC1F,IAAI,QAAQ,QAAQ,WAAW,GAC7B,OAAO,CAAC;CAGV,IAAI,QAAQ,mBAAmB,WAAW,GACxC,OAAO,CAAC;CAGV,MAAM,QAAyB,CAAC;CAEhC,KAAK,MAAM,SAAS,QAAQ,SAAS;EACnC,MAAM,gBAAgB,iBAAiB,sBAAsB,OAAO,QAAQ,SAAS,CAAC;EACtF,MAAM,iBAAiB,iBAAiB,uBAAuB,OAAO,QAAQ,UAAU,CAAC;EAEzF,IAAI,cAAc,WAAW,KAAK,eAAe,WAAW,GAC1D;EAGF,KAAK,MAAM,qBAAqB,QAAQ,oBACtC,KAAK,MAAM,aAAa,eACtB,KAAK,MAAM,cAAc,gBAAgB;GACvC,MAAM,iBAAiB,0BAA0B,WAAW,UAAU;GAEtE,MAAM,KAAK;IACT;IACA,IAAI,aACF,MAAM,IACN,kBAAkB,IAClB,eAAe,KAAK,UACpB,eAAe,KAAK,SACtB;IACA,QAAQ;IACR;GACF,CAAC;EACH;CAGN;CAEA,OAAO;AACT;;;AC3UA,SAAS,6BAA+C;CACtD,OAAO,EACL,UAAU,MAAM;EACd,OAAO,EACL,KAAK,SAAS;GACZ,MAAM,MAAM,QAAQ,IAAI,KAAK,GAAG;GAChC,MAAM,IAAI,MAAM,8DAA8D,KAAK,aAAa,IAAI,GAAG;EACzG,EACF;CACF,EACF;AACF;;;;;;;;;;AAWA,SAAgB,2BAA2B,SAAkE;CAC3G,OAAO;EACL,OAAO,QAAQ,SAAS,2BAA2B;EACnD,QAAQ,QAAQ;CAClB;AACF"}

package/dist/core/scheduler/index.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-import { a as SchedulerMiddleware, c as SchedulerScopeContext, i as SchedulerConcurrencyConfig, n as getActiveScopes, o as SchedulerRuntime, r as CreateSchedulerRuntimeOptions, s as SchedulerScope, t as createSchedulerRuntime } from "../../index-fakXoZEe.mjs";
+import { a as SchedulerMiddleware, c as SchedulerScopeContext, i as SchedulerConcurrencyConfig, n as getActiveScopes, o as SchedulerRuntime, r as CreateSchedulerRuntimeOptions, s as SchedulerScope, t as createSchedulerRuntime } from "../../index-Be5I1ZJL.mjs";
 export { CreateSchedulerRuntimeOptions, SchedulerConcurrencyConfig, SchedulerMiddleware, SchedulerRuntime, SchedulerScope, SchedulerScopeContext, createSchedulerRuntime, getActiveScopes };

package/dist/core/scheduler/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { t as createSchedulerQueue } from "../../queue-DsZQkZO_.mjs";
+import { t as createSchedulerQueue } from "../../queue-BL86z2W_.mjs";
 //#region src/core/scheduler/runtime.ts
 const schedulerScopeOrder = [
 	"workspace",
@@ -45,7 +45,7 @@ function createSchedulerRuntime(options = {}) {
 * Resolves the scheduler scopes that apply to a context.
 *
 * Before:
-* - `{ scope: 'case', workspaceId: 'ws', experimentId: 'exp', caseId: 'case-1' }`
+* - `{ scope: 'case', workspaceId: 'ws', projectName: 'project', caseId: 'case-1' }`
 *
 * After:
 * - `['workspace', 'project', 'task', 'attempt', 'case']` up to the requested scope
@@ -86,7 +86,7 @@ function getScopeQueue(scope, context, queues) {
 	return queue;
 }
 function getSchedulerScopeInstanceKey(scope, context) {
-	const workspaceKey = `workspace:${context.workspaceId}:experiment:${context.experimentId}`;
+	const workspaceKey = `workspace:${context.workspaceId}`;
 	const projectKey = `${workspaceKey}:project:${context.projectName ?? "(missing-project)"}`;
 	const taskKey = `${projectKey}:task:${context.taskId ?? "(missing-task)"}`;
 	const attemptKey = `${taskKey}:attempt:${context.attemptIndex ?? "(missing-attempt)"}`;