vieval 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +219 -109
  2. package/dist/bin/vieval.mjs +1 -1
  3. package/dist/cli/index.mjs +1 -1
  4. package/dist/{cli-DayPXzHX.mjs → cli-ImxGpoYQ.mjs} +1447 -195
  5. package/dist/cli-ImxGpoYQ.mjs.map +1 -0
  6. package/dist/config.d.mts +2 -2
  7. package/dist/config.mjs +1 -1
  8. package/dist/core/assertions/index.d.mts +1 -1
  9. package/dist/core/inference-executors/index.d.mts +1 -1
  10. package/dist/core/inference-executors/index.mjs +1 -1
  11. package/dist/core/processors/results/index.d.mts +1 -1
  12. package/dist/core/runner/index.d.mts +3 -2
  13. package/dist/core/runner/index.mjs +3 -2
  14. package/dist/core/runner/index.mjs.map +1 -1
  15. package/dist/core/scheduler/index.d.mts +2 -0
  16. package/dist/core/scheduler/index.mjs +188 -0
  17. package/dist/core/scheduler/index.mjs.map +1 -0
  18. package/dist/{env-BFSjny07.mjs → env--94B0UtW.mjs} +1 -1
  19. package/dist/{env-BFSjny07.mjs.map → env--94B0UtW.mjs.map} +1 -1
  20. package/dist/{env-BTq3dV7C.d.mts → env-BeHv_5mo.d.mts} +1 -1
  21. package/dist/{expect-extensions-QLXESWjn.mjs → expect-extensions-DCSqlneN.mjs} +1 -1
  22. package/dist/{expect-extensions-QLXESWjn.mjs.map → expect-extensions-DCSqlneN.mjs.map} +1 -1
  23. package/dist/expect.mjs +1 -1
  24. package/dist/{index-OEdqjQSe.d.mts → index-5R1_k2nv.d.mts} +195 -3
  25. package/dist/index-fakXoZEe.d.mts +147 -0
  26. package/dist/index.d.mts +120 -13
  27. package/dist/index.mjs +286 -54
  28. package/dist/index.mjs.map +1 -1
  29. package/dist/{models-D_MsBtYw.mjs → models-DIGdOUpJ.mjs} +1 -1
  30. package/dist/models-DIGdOUpJ.mjs.map +1 -0
  31. package/dist/plugins/chat-models/index.d.mts +27 -1
  32. package/dist/plugins/chat-models/index.mjs +29 -1
  33. package/dist/plugins/chat-models/index.mjs.map +1 -1
  34. package/dist/queue-DsZQkZO_.mjs +21 -0
  35. package/dist/queue-DsZQkZO_.mjs.map +1 -0
  36. package/dist/{registry-CwcMMjnZ.mjs → registry-BHGMxjpA.mjs} +164 -6
  37. package/dist/registry-BHGMxjpA.mjs.map +1 -0
  38. package/dist/testing/expect-extensions.mjs +1 -1
  39. package/package.json +8 -1
  40. package/dist/cli-DayPXzHX.mjs.map +0 -1
  41. package/dist/models-D_MsBtYw.mjs.map +0 -1
  42. package/dist/registry-CwcMMjnZ.mjs.map +0 -1
@@ -1 +1 @@
1
- {"version":3,"file":"expect-extensions-QLXESWjn.mjs","names":[],"sources":["../src/testing/runtime-expect.ts","../src/testing/expect-extensions.ts"],"sourcesContent":["import type { ExpectStatic, MatchersObject, MatcherState, Tester } from '@vitest/expect'\n\nimport {\n addCustomEqualityTesters,\n ASYMMETRIC_MATCHERS_OBJECT,\n chai,\n ChaiStyleAssertions,\n customMatchers,\n getState,\n GLOBAL_EXPECT,\n JestAsymmetricMatchers,\n JestChaiExpect,\n JestExtend,\n setState,\n} from '@vitest/expect'\n\nlet isPluginInstalled = false\nlet runtimeExpectInstance: ExpectStatic | undefined\n\n/**\n * Installs Vitest expect plugins once for process-local runtime assertions.\n *\n * Use when:\n * - running eval tasks outside Vitest worker runtime\n * - building an `expect` instance that does not rely on Vitest internal state\n *\n * Expects:\n * - `@vitest/expect` is available in runtime dependencies\n *\n * Returns:\n * - nothing; side-effects are applied to `chai`\n */\nfunction ensureRuntimeExpectPluginsInstalled(): void {\n if (isPluginInstalled) {\n return\n }\n\n chai.use(JestExtend)\n chai.use(JestChaiExpect)\n chai.use(ChaiStyleAssertions)\n chai.use(JestAsymmetricMatchers)\n isPluginInstalled = true\n}\n\n/**\n * Creates a Vitest-compatible `expect` instance without worker-state coupling.\n *\n * Use when:\n * - CLI runtime needs assertion helpers from `vieval/expect`\n * - code is executed outside `vitest run`\n *\n * Expects:\n * - plugins from {@link ensureRuntimeExpectPluginsInstalled} are installed\n * - callers do not depend on Vitest worker-only features (snapshot/poll internals)\n *\n * Returns:\n * - standalone expect instance with core matcher APIs and `extend`\n */\nfunction createRuntimeExpect(): ExpectStatic {\n ensureRuntimeExpectPluginsInstalled()\n\n const runtimeExpect = ((value: unknown, message?: string) => {\n const currentState = getState(runtimeExpect)\n setState({ assertionCalls: currentState.assertionCalls + 1 }, runtimeExpect)\n return chai.expect(value, message)\n }) as unknown as ExpectStatic\n\n Object.assign(runtimeExpect, chai.expect)\n Object.assign(runtimeExpect, (globalThis as Record<PropertyKey, unknown>)[ASYMMETRIC_MATCHERS_OBJECT] as object)\n\n runtimeExpect.getState = () => getState(runtimeExpect)\n runtimeExpect.setState = (state: Partial<MatcherState>) => setState(state, runtimeExpect)\n runtimeExpect.assert = chai.assert\n // NOTICE:\n // Chai's public `ExpectStatic` type does not expose Vitest's plugin-added `extend`.\n // Runtime `chai.expect.extend` exists after `JestExtend` plugin installation.\n // Source/context: `@vitest/expect` plugin pipeline in `dist/index.js`.\n // Removal condition: remove this cast if upstream exposes `extend` on Chai expect types.\n const chaiExpectWithExtend = chai.expect as unknown as {\n extend: (expect: ExpectStatic, matchers: MatchersObject) => void\n }\n runtimeExpect.extend = (matchers: MatchersObject) => chaiExpectWithExtend.extend(runtimeExpect, matchers)\n runtimeExpect.addEqualityTesters = (customTesters: Tester[]) => addCustomEqualityTesters(customTesters)\n runtimeExpect.unreachable = (message?: string) => {\n chai.assert.fail(`expected${message ? ` \"${message}\" ` : ' '}not to be reached`)\n }\n\n runtimeExpect.setState({\n assertionCalls: 0,\n currentTestName: '',\n expectedAssertionsNumber: null,\n expectedAssertionsNumberErrorGen: null,\n isExpectingAssertions: false,\n isExpectingAssertionsError: null,\n })\n\n runtimeExpect.extend(customMatchers)\n\n return runtimeExpect\n}\n\n/**\n * Returns process-local runtime `expect` instance used by Vieval.\n *\n * Use when:\n * - you need matcher assertions in eval files and CLI runtime\n * - importing from `vitest` would crash outside Vitest worker contexts\n *\n * Expects:\n * - single-process usage (instance is memoized per process)\n *\n * Returns:\n * - memoized runtime `expect` instance\n */\nexport function getRuntimeExpect(): ExpectStatic {\n if (runtimeExpectInstance != null) {\n return runtimeExpectInstance\n }\n\n runtimeExpectInstance = createRuntimeExpect()\n Object.defineProperty(globalThis, GLOBAL_EXPECT, {\n configurable: true,\n value: runtimeExpectInstance,\n writable: true,\n })\n\n return runtimeExpectInstance\n}\n","import type { RubricJudgeResult, ToolCall } from '../core/assertions'\n\nimport { normalizeMatchText } from '../core/assertions'\nimport { getRuntimeExpect } from './runtime-expect'\n\n/**\n * Options for keyword-based matcher behavior.\n */\nexport interface KeywordMatcherOptions {\n /**\n * Case-sensitive matching toggle.\n *\n * @default false\n */\n caseSensitive?: boolean\n /**\n * Match mode.\n *\n * @default 'all'\n */\n mode?: 'all' | 'any'\n}\n\n/**\n * Shape used by tool-call matchers.\n */\nexport interface ToolCallContainer {\n /**\n * Tool calls to inspect.\n */\n toolCalls?: readonly ToolCall[]\n}\n\nfunction toKeywordArray(keywords: string | readonly string[]): readonly string[] {\n if (typeof keywords === 'string') {\n return [keywords]\n }\n\n return keywords\n}\n\n/**\n * Registers vieval custom matchers on Vitest `expect`.\n *\n * Call stack:\n *\n * {@link installVievalExpectMatchers}\n * -> `expect.extend(...)`\n * -> `expect(received).toMustInclude(...)`\n * -> `expect(received).toScoreRubricGreaterThan(...)`\n *\n * Use when:\n * - eval suites need domain assertions while preserving native Vitest ergonomics\n * - callers want native `.not` chaining with the same matchers\n */\nexport function installVievalExpectMatchers(): void {\n const expect = getRuntimeExpect()\n\n expect.extend({\n toMustExclude(received: unknown, keywords: string | readonly string[], options: KeywordMatcherOptions = {}) {\n const keywordList = toKeywordArray(keywords)\n\n if (typeof received !== 'string') {\n return {\n message: () => 'Expected received value to be a string.',\n pass: false,\n }\n }\n\n const normalizedText = normalizeMatchText(received, options.caseSensitive ?? false)\n const forbiddenMatches = keywordList.filter((keyword) => {\n return normalizedText.includes(normalizeMatchText(keyword, options.caseSensitive ?? false))\n })\n\n const pass = forbiddenMatches.length === 0\n\n return {\n message: () => {\n if (pass) {\n return `Expected text to include forbidden keywords: ${keywordList.join(', ')}`\n }\n\n return `Expected text not to include forbidden keywords, but matched: ${forbiddenMatches.join(', ')}`\n },\n pass,\n }\n },\n\n toMustInclude(received: unknown, keywords: string | readonly string[], options: KeywordMatcherOptions = {}) {\n const keywordList = toKeywordArray(keywords)\n\n if (typeof received !== 'string') {\n return {\n message: () => 'Expected received value to be a string.',\n pass: false,\n }\n }\n\n const normalizedText = normalizeMatchText(received, options.caseSensitive ?? false)\n const matches = keywordList.filter((keyword) => {\n return normalizedText.includes(normalizeMatchText(keyword, options.caseSensitive ?? false))\n })\n\n const mode = options.mode ?? 'all'\n const pass = mode === 'all' ? matches.length === keywordList.length : matches.length > 0\n\n return {\n message: () => {\n if (pass) {\n return `Expected text not to match required keywords, but matched: ${matches.join(', ')}`\n }\n\n return `Expected text to match required keywords (${mode}), but matched ${matches.length}/${keywordList.length}.`\n },\n pass,\n }\n },\n\n toScoreRubricGreaterThan(received: unknown, threshold: number) {\n const score = typeof received === 'number'\n ? received\n : (received as RubricJudgeResult | null)?.score\n\n if (typeof score !== 'number') {\n return {\n message: () => 'Expected received value to be a number or RubricJudgeResult.',\n pass: false,\n }\n }\n\n const pass = score > threshold\n\n return {\n message: () => {\n if (pass) {\n return `Expected rubric score ${score} to be less than or equal to ${threshold}.`\n }\n\n return `Expected rubric score ${score} to be greater than ${threshold}.`\n },\n pass,\n }\n },\n\n toSatisfyStructuredOutput<T>(received: unknown, validator: (value: unknown) => value is T) {\n const pass = validator(received)\n\n return {\n message: () => pass\n ? 'Expected structured output validator to fail.'\n : 'Expected structured output validator to pass.',\n pass,\n }\n },\n\n toSatisfyToolCallArgs(\n received: unknown,\n toolName: string,\n validator: (args: unknown) => boolean,\n ) {\n const toolCalls = (received as ToolCallContainer | null)?.toolCalls\n\n if (toolCalls == null) {\n return {\n message: () => 'Expected received value to provide toolCalls array.',\n pass: false,\n }\n }\n\n const targetCall = toolCalls.find(call => call.name === toolName)\n if (targetCall == null) {\n return {\n message: () => `Expected tool call ${toolName} to exist.`,\n pass: false,\n }\n }\n\n const pass = validator(targetCall.args)\n\n return {\n message: () => pass\n ? `Expected tool call args for ${toolName} to fail validation.`\n : `Expected tool call args for ${toolName} to pass validation.`,\n pass,\n }\n },\n })\n}\n\ninterface VievalCustomMatchers {\n /**\n * Asserts that text includes required keywords.\n *\n * Example:\n * `expect('calm answer').toMustInclude(['calm'])`\n */\n toMustInclude: (keywords: string | readonly string[], options?: KeywordMatcherOptions) => void\n /**\n * Asserts that text excludes forbidden keywords.\n *\n * Example:\n * `expect('calm answer').toMustExclude(['bestmove'])`\n */\n toMustExclude: (keywords: string | readonly string[], options?: KeywordMatcherOptions) => void\n /**\n * Asserts rubric score is greater than a threshold.\n *\n * Example:\n * `expect({ score: 0.91 }).toScoreRubricGreaterThan(0.8)`\n */\n toScoreRubricGreaterThan: (threshold: number) => void\n /**\n * Asserts structured output satisfies a validator.\n *\n * Example:\n * `expect(value).toSatisfyStructuredOutput(isMyShape)`\n */\n toSatisfyStructuredOutput: <TValue>(validator: (value: unknown) => value is TValue) => void\n /**\n * Asserts selected tool-call args satisfy validator.\n *\n * Example:\n * `expect({ toolCalls }).toSatisfyToolCallArgs('builtIn_sparkCommand', isSparkArgs)`\n */\n toSatisfyToolCallArgs: (toolName: string, validator: (args: unknown) => boolean) => void\n}\n\n/* eslint-disable unused-imports/no-unused-vars */\ndeclare module '@vitest/expect' {\n interface Matchers<T = any> extends VievalCustomMatchers {}\n interface Assertion<T = any> extends VievalCustomMatchers {}\n}\n\ndeclare module 'vitest' {\n interface Assertion extends VievalCustomMatchers {}\n interface Matchers<T = any> extends VievalCustomMatchers {}\n}\n/* eslint-enable unused-imports/no-unused-vars */\n"],"mappings":";;;AAgBA,IAAI,oBAAoB;AACxB,IAAI;;;;;;;;;;;;;;AAeJ,SAAS,sCAA4C;AACnD,KAAI,kBACF;AAGF,MAAK,IAAI,WAAW;AACpB,MAAK,IAAI,eAAe;AACxB,MAAK,IAAI,oBAAoB;AAC7B,MAAK,IAAI,uBAAuB;AAChC,qBAAoB;;;;;;;;;;;;;;;;AAiBtB,SAAS,sBAAoC;AAC3C,sCAAqC;CAErC,MAAM,kBAAkB,OAAgB,YAAqB;AAE3D,WAAS,EAAE,gBADU,SAAS,cAAc,CACJ,iBAAiB,GAAG,EAAE,cAAc;AAC5E,SAAO,KAAK,OAAO,OAAO,QAAQ;;AAGpC,QAAO,OAAO,eAAe,KAAK,OAAO;AACzC,QAAO,OAAO,eAAgB,WAA4C,4BAAsC;AAEhH,eAAc,iBAAiB,SAAS,cAAc;AACtD,eAAc,YAAY,UAAiC,SAAS,OAAO,cAAc;AACzF,eAAc,SAAS,KAAK;CAM5B,MAAM,uBAAuB,KAAK;AAGlC,eAAc,UAAU,aAA6B,qBAAqB,OAAO,eAAe,SAAS;AACzG,eAAc,sBAAsB,kBAA4B,yBAAyB,cAAc;AACvG,eAAc,eAAe,YAAqB;AAChD,OAAK,OAAO,KAAK,WAAW,UAAU,KAAK,QAAQ,MAAM,IAAI,mBAAmB;;AAGlF,eAAc,SAAS;EACrB,gBAAgB;EAChB,iBAAiB;EACjB,0BAA0B;EAC1B,kCAAkC;EAClC,uBAAuB;EACvB,4BAA4B;EAC7B,CAAC;AAEF,eAAc,OAAO,eAAe;AAEpC,QAAO;;;;;;;;;;;;;;;AAgBT,SAAgB,mBAAiC;AAC/C,KAAI,yBAAyB,KAC3B,QAAO;AAGT,yBAAwB,qBAAqB;AAC7C,QAAO,eAAe,YAAY,eAAe;EAC/C,cAAc;EACd,OAAO;EACP,UAAU;EACX,CAAC;AAEF,QAAO;;;;AC7FT,SAAS,eAAe,UAAyD;AAC/E,KAAI,OAAO,aAAa,SACtB,QAAO,CAAC,SAAS;AAGnB,QAAO;;;;;;;;;;;;;;;;AAiBT,SAAgB,8BAAoC;AACnC,mBAAkB,CAE1B,OAAO;EACZ,cAAc,UAAmB,UAAsC,UAAiC,EAAE,EAAE;GAC1G,MAAM,cAAc,eAAe,SAAS;AAE5C,OAAI,OAAO,aAAa,SACtB,QAAO;IACL,eAAe;IACf,MAAM;IACP;GAGH,MAAM,iBAAiB,mBAAmB,UAAU,QAAQ,iBAAiB,MAAM;GACnF,MAAM,mBAAmB,YAAY,QAAQ,YAAY;AACvD,WAAO,eAAe,SAAS,mBAAmB,SAAS,QAAQ,iBAAiB,MAAM,CAAC;KAC3F;GAEF,MAAM,OAAO,iBAAiB,WAAW;AAEzC,UAAO;IACL,eAAe;AACb,SAAI,KACF,QAAO,gDAAgD,YAAY,KAAK,KAAK;AAG/E,YAAO,iEAAiE,iBAAiB,KAAK,KAAK;;IAErG;IACD;;EAGH,cAAc,UAAmB,UAAsC,UAAiC,EAAE,EAAE;GAC1G,MAAM,cAAc,eAAe,SAAS;AAE5C,OAAI,OAAO,aAAa,SACtB,QAAO;IACL,eAAe;IACf,MAAM;IACP;GAGH,MAAM,iBAAiB,mBAAmB,UAAU,QAAQ,iBAAiB,MAAM;GACnF,MAAM,UAAU,YAAY,QAAQ,YAAY;AAC9C,WAAO,eAAe,SAAS,mBAAmB,SAAS,QAAQ,iBAAiB,MAAM,CAAC;KAC3F;GAEF,MAAM,OAAO,QAAQ,QAAQ;GAC7B,MAAM,OAAO,SAAS,QAAQ,QAAQ,WAAW,YAAY,SAAS,QAAQ,SAAS;AAEvF,UAAO;IACL,eAAe;AACb,SAAI,KACF,QAAO,8DAA8D,QAAQ,KAAK,KAAK;AAGzF,YAAO,6CAA6C,KAAK,iBAAiB,QAAQ,OAAO,GAAG,YAAY,OAAO;;IAEjH;IACD;;EAGH,yBAAyB,UAAmB,WAAmB;GAC7D,MAAM,QAAQ,OAAO,aAAa,WAC9B,WACC,UAAuC;AAE5C,OAAI,OAAO,UAAU,SACnB,QAAO;IACL,eAAe;IACf,MAAM;IACP;GAGH,MAAM,OAAO,QAAQ;AAErB,UAAO;IACL,eAAe;AACb,SAAI,KACF,QAAO,yBAAyB,MAAM,+BAA+B,UAAU;AAGjF,YAAO,yBAAyB,MAAM,sBAAsB,UAAU;;IAExE;IACD;;EAGH,0BAA6B,UAAmB,WAA2C;GACzF,MAAM,OAAO,UAAU,SAAS;AAEhC,UAAO;IACL,eAAe,OACX,kDACA;IACJ;IACD;;EAGH,sBACE,UACA,UACA,WACA;GACA,MAAM,YAAa,UAAuC;AAE1D,OAAI,aAAa,KACf,QAAO;IACL,eAAe;IACf,MAAM;IACP;GAGH,MAAM,aAAa,UAAU,MAAK,SAAQ,KAAK,SAAS,SAAS;AACjE,OAAI,cAAc,KAChB,QAAO;IACL,eAAe,sBAAsB,SAAS;IAC9C,MAAM;IACP;GAGH,MAAM,OAAO,UAAU,WAAW,KAAK;AAEvC,UAAO;IACL,eAAe,OACX,+BAA+B,SAAS,wBACxC,+BAA+B,SAAS;IAC5C;IACD;;EAEJ,CAAC"}
1
+ {"version":3,"file":"expect-extensions-DCSqlneN.mjs","names":[],"sources":["../src/testing/runtime-expect.ts","../src/testing/expect-extensions.ts"],"sourcesContent":["import type { ExpectStatic, MatchersObject, MatcherState, Tester } from '@vitest/expect'\n\nimport {\n addCustomEqualityTesters,\n ASYMMETRIC_MATCHERS_OBJECT,\n chai,\n ChaiStyleAssertions,\n customMatchers,\n getState,\n GLOBAL_EXPECT,\n JestAsymmetricMatchers,\n JestChaiExpect,\n JestExtend,\n setState,\n} from '@vitest/expect'\n\nlet isPluginInstalled = false\nlet runtimeExpectInstance: ExpectStatic | undefined\n\n/**\n * Installs Vitest expect plugins once for process-local runtime assertions.\n *\n * Use when:\n * - running eval tasks outside Vitest worker runtime\n * - building an `expect` instance that does not rely on Vitest internal state\n *\n * Expects:\n * - `@vitest/expect` is available in runtime dependencies\n *\n * Returns:\n * - nothing; side-effects are applied to `chai`\n */\nfunction ensureRuntimeExpectPluginsInstalled(): void {\n if (isPluginInstalled) {\n return\n }\n\n chai.use(JestExtend)\n chai.use(JestChaiExpect)\n chai.use(ChaiStyleAssertions)\n chai.use(JestAsymmetricMatchers)\n isPluginInstalled = true\n}\n\n/**\n * Creates a Vitest-compatible `expect` instance without worker-state coupling.\n *\n * Use when:\n * - CLI runtime needs assertion helpers from `vieval/expect`\n * - code is executed outside `vitest run`\n *\n * Expects:\n * - plugins from {@link ensureRuntimeExpectPluginsInstalled} are installed\n * - callers do not depend on Vitest worker-only features (snapshot/poll internals)\n *\n * Returns:\n * - standalone expect instance with core matcher APIs and `extend`\n */\nfunction createRuntimeExpect(): ExpectStatic {\n ensureRuntimeExpectPluginsInstalled()\n\n const runtimeExpect = ((value: unknown, message?: string) => {\n const currentState = getState(runtimeExpect)\n setState({ assertionCalls: currentState.assertionCalls + 1 }, runtimeExpect)\n return chai.expect(value, message)\n }) as unknown as ExpectStatic\n\n Object.assign(runtimeExpect, chai.expect)\n Object.assign(runtimeExpect, (globalThis as Record<PropertyKey, unknown>)[ASYMMETRIC_MATCHERS_OBJECT] as object)\n\n runtimeExpect.getState = () => getState(runtimeExpect)\n runtimeExpect.setState = (state: Partial<MatcherState>) => setState(state, runtimeExpect)\n runtimeExpect.assert = chai.assert\n // NOTICE:\n // Chai's public `ExpectStatic` type does not expose Vitest's plugin-added `extend`.\n // Runtime `chai.expect.extend` exists after `JestExtend` plugin installation.\n // Source/context: `@vitest/expect` plugin pipeline in `dist/index.js`.\n // Removal condition: remove this cast if upstream exposes `extend` on Chai expect types.\n const chaiExpectWithExtend = chai.expect as unknown as {\n extend: (expect: ExpectStatic, matchers: MatchersObject) => void\n }\n runtimeExpect.extend = (matchers: MatchersObject) => chaiExpectWithExtend.extend(runtimeExpect, matchers)\n runtimeExpect.addEqualityTesters = (customTesters: Tester[]) => addCustomEqualityTesters(customTesters)\n runtimeExpect.unreachable = (message?: string) => {\n chai.assert.fail(`expected${message ? ` \"${message}\" ` : ' '}not to be reached`)\n }\n\n runtimeExpect.setState({\n assertionCalls: 0,\n currentTestName: '',\n expectedAssertionsNumber: null,\n expectedAssertionsNumberErrorGen: null,\n isExpectingAssertions: false,\n isExpectingAssertionsError: null,\n })\n\n runtimeExpect.extend(customMatchers)\n\n return runtimeExpect\n}\n\n/**\n * Returns process-local runtime `expect` instance used by Vieval.\n *\n * Use when:\n * - you need matcher assertions in eval files and CLI runtime\n * - importing from `vitest` would crash outside Vitest worker contexts\n *\n * Expects:\n * - single-process usage (instance is memoized per process)\n *\n * Returns:\n * - memoized runtime `expect` instance\n */\nexport function getRuntimeExpect(): ExpectStatic {\n if (runtimeExpectInstance != null) {\n return runtimeExpectInstance\n }\n\n runtimeExpectInstance = createRuntimeExpect()\n Object.defineProperty(globalThis, GLOBAL_EXPECT, {\n configurable: true,\n value: runtimeExpectInstance,\n writable: true,\n })\n\n return runtimeExpectInstance\n}\n","import type { RubricJudgeResult, ToolCall } from '../core/assertions'\n\nimport { normalizeMatchText } from '../core/assertions'\nimport { getRuntimeExpect } from './runtime-expect'\n\n/**\n * Options for keyword-based matcher behavior.\n */\nexport interface KeywordMatcherOptions {\n /**\n * Case-sensitive matching toggle.\n *\n * @default false\n */\n caseSensitive?: boolean\n /**\n * Match mode.\n *\n * @default 'all'\n */\n mode?: 'all' | 'any'\n}\n\n/**\n * Shape used by tool-call matchers.\n */\nexport interface ToolCallContainer {\n /**\n * Tool calls to inspect.\n */\n toolCalls?: readonly ToolCall[]\n}\n\nfunction toKeywordArray(keywords: string | readonly string[]): readonly string[] {\n if (typeof keywords === 'string') {\n return [keywords]\n }\n\n return keywords\n}\n\n/**\n * Registers vieval custom matchers on Vitest `expect`.\n *\n * Call stack:\n *\n * {@link installVievalExpectMatchers}\n * -> `expect.extend(...)`\n * -> `expect(received).toMustInclude(...)`\n * -> `expect(received).toScoreRubricGreaterThan(...)`\n *\n * Use when:\n * - eval suites need domain assertions while preserving native Vitest ergonomics\n * - callers want native `.not` chaining with the same matchers\n */\nexport function installVievalExpectMatchers(): void {\n const expect = getRuntimeExpect()\n\n expect.extend({\n toMustExclude(received: unknown, keywords: string | readonly string[], options: KeywordMatcherOptions = {}) {\n const keywordList = toKeywordArray(keywords)\n\n if (typeof received !== 'string') {\n return {\n message: () => 'Expected received value to be a string.',\n pass: false,\n }\n }\n\n const normalizedText = normalizeMatchText(received, options.caseSensitive ?? false)\n const forbiddenMatches = keywordList.filter((keyword) => {\n return normalizedText.includes(normalizeMatchText(keyword, options.caseSensitive ?? false))\n })\n\n const pass = forbiddenMatches.length === 0\n\n return {\n message: () => {\n if (pass) {\n return `Expected text to include forbidden keywords: ${keywordList.join(', ')}`\n }\n\n return `Expected text not to include forbidden keywords, but matched: ${forbiddenMatches.join(', ')}`\n },\n pass,\n }\n },\n\n toMustInclude(received: unknown, keywords: string | readonly string[], options: KeywordMatcherOptions = {}) {\n const keywordList = toKeywordArray(keywords)\n\n if (typeof received !== 'string') {\n return {\n message: () => 'Expected received value to be a string.',\n pass: false,\n }\n }\n\n const normalizedText = normalizeMatchText(received, options.caseSensitive ?? false)\n const matches = keywordList.filter((keyword) => {\n return normalizedText.includes(normalizeMatchText(keyword, options.caseSensitive ?? false))\n })\n\n const mode = options.mode ?? 'all'\n const pass = mode === 'all' ? matches.length === keywordList.length : matches.length > 0\n\n return {\n message: () => {\n if (pass) {\n return `Expected text not to match required keywords, but matched: ${matches.join(', ')}`\n }\n\n return `Expected text to match required keywords (${mode}), but matched ${matches.length}/${keywordList.length}.`\n },\n pass,\n }\n },\n\n toScoreRubricGreaterThan(received: unknown, threshold: number) {\n const score = typeof received === 'number'\n ? received\n : (received as RubricJudgeResult | null)?.score\n\n if (typeof score !== 'number') {\n return {\n message: () => 'Expected received value to be a number or RubricJudgeResult.',\n pass: false,\n }\n }\n\n const pass = score > threshold\n\n return {\n message: () => {\n if (pass) {\n return `Expected rubric score ${score} to be less than or equal to ${threshold}.`\n }\n\n return `Expected rubric score ${score} to be greater than ${threshold}.`\n },\n pass,\n }\n },\n\n toSatisfyStructuredOutput<T>(received: unknown, validator: (value: unknown) => value is T) {\n const pass = validator(received)\n\n return {\n message: () => pass\n ? 'Expected structured output validator to fail.'\n : 'Expected structured output validator to pass.',\n pass,\n }\n },\n\n toSatisfyToolCallArgs(\n received: unknown,\n toolName: string,\n validator: (args: unknown) => boolean,\n ) {\n const toolCalls = (received as ToolCallContainer | null)?.toolCalls\n\n if (toolCalls == null) {\n return {\n message: () => 'Expected received value to provide toolCalls array.',\n pass: false,\n }\n }\n\n const targetCall = toolCalls.find(call => call.name === toolName)\n if (targetCall == null) {\n return {\n message: () => `Expected tool call ${toolName} to exist.`,\n pass: false,\n }\n }\n\n const pass = validator(targetCall.args)\n\n return {\n message: () => pass\n ? `Expected tool call args for ${toolName} to fail validation.`\n : `Expected tool call args for ${toolName} to pass validation.`,\n pass,\n }\n },\n })\n}\n\ninterface VievalCustomMatchers {\n /**\n * Asserts that text includes required keywords.\n *\n * Example:\n * `expect('calm answer').toMustInclude(['calm'])`\n */\n toMustInclude: (keywords: string | readonly string[], options?: KeywordMatcherOptions) => void\n /**\n * Asserts that text excludes forbidden keywords.\n *\n * Example:\n * `expect('calm answer').toMustExclude(['bestmove'])`\n */\n toMustExclude: (keywords: string | readonly string[], options?: KeywordMatcherOptions) => void\n /**\n * Asserts rubric score is greater than a threshold.\n *\n * Example:\n * `expect({ score: 0.91 }).toScoreRubricGreaterThan(0.8)`\n */\n toScoreRubricGreaterThan: (threshold: number) => void\n /**\n * Asserts structured output satisfies a validator.\n *\n * Example:\n * `expect(value).toSatisfyStructuredOutput(isMyShape)`\n */\n toSatisfyStructuredOutput: <TValue>(validator: (value: unknown) => value is TValue) => void\n /**\n * Asserts selected tool-call args satisfy validator.\n *\n * Example:\n * `expect({ toolCalls }).toSatisfyToolCallArgs('builtIn_sparkCommand', isSparkArgs)`\n */\n toSatisfyToolCallArgs: (toolName: string, validator: (args: unknown) => boolean) => void\n}\n\n/* eslint-disable unused-imports/no-unused-vars */\ndeclare module '@vitest/expect' {\n interface Matchers<T = any> extends VievalCustomMatchers {}\n interface Assertion<T = any> extends VievalCustomMatchers {}\n}\n\ndeclare module 'vitest' {\n interface Assertion extends VievalCustomMatchers {}\n interface Matchers<T = any> extends VievalCustomMatchers {}\n}\n/* eslint-enable unused-imports/no-unused-vars */\n"],"mappings":";;;AAgBA,IAAI,oBAAoB;AACxB,IAAI;;;;;;;;;;;;;;AAeJ,SAAS,sCAA4C;AACnD,KAAI,kBACF;AAGF,MAAK,IAAI,WAAW;AACpB,MAAK,IAAI,eAAe;AACxB,MAAK,IAAI,oBAAoB;AAC7B,MAAK,IAAI,uBAAuB;AAChC,qBAAoB;;;;;;;;;;;;;;;;AAiBtB,SAAS,sBAAoC;AAC3C,sCAAqC;CAErC,MAAM,kBAAkB,OAAgB,YAAqB;AAE3D,WAAS,EAAE,gBADU,SAAS,cAAc,CACJ,iBAAiB,GAAG,EAAE,cAAc;AAC5E,SAAO,KAAK,OAAO,OAAO,QAAQ;;AAGpC,QAAO,OAAO,eAAe,KAAK,OAAO;AACzC,QAAO,OAAO,eAAgB,WAA4C,4BAAsC;AAEhH,eAAc,iBAAiB,SAAS,cAAc;AACtD,eAAc,YAAY,UAAiC,SAAS,OAAO,cAAc;AACzF,eAAc,SAAS,KAAK;CAM5B,MAAM,uBAAuB,KAAK;AAGlC,eAAc,UAAU,aAA6B,qBAAqB,OAAO,eAAe,SAAS;AACzG,eAAc,sBAAsB,kBAA4B,yBAAyB,cAAc;AACvG,eAAc,eAAe,YAAqB;AAChD,OAAK,OAAO,KAAK,WAAW,UAAU,KAAK,QAAQ,MAAM,IAAI,mBAAmB;;AAGlF,eAAc,SAAS;EACrB,gBAAgB;EAChB,iBAAiB;EACjB,0BAA0B;EAC1B,kCAAkC;EAClC,uBAAuB;EACvB,4BAA4B;EAC7B,CAAC;AAEF,eAAc,OAAO,eAAe;AAEpC,QAAO;;;;;;;;;;;;;;;AAgBT,SAAgB,mBAAiC;AAC/C,KAAI,yBAAyB,KAC3B,QAAO;AAGT,yBAAwB,qBAAqB;AAC7C,QAAO,eAAe,YAAY,eAAe;EAC/C,cAAc;EACd,OAAO;EACP,UAAU;EACX,CAAC;AAEF,QAAO;;;;AC7FT,SAAS,eAAe,UAAyD;AAC/E,KAAI,OAAO,aAAa,SACtB,QAAO,CAAC,SAAS;AAGnB,QAAO;;;;;;;;;;;;;;;;AAiBT,SAAgB,8BAAoC;AACnC,mBAAkB,CAE1B,OAAO;EACZ,cAAc,UAAmB,UAAsC,UAAiC,EAAE,EAAE;GAC1G,MAAM,cAAc,eAAe,SAAS;AAE5C,OAAI,OAAO,aAAa,SACtB,QAAO;IACL,eAAe;IACf,MAAM;IACP;GAGH,MAAM,iBAAiB,mBAAmB,UAAU,QAAQ,iBAAiB,MAAM;GACnF,MAAM,mBAAmB,YAAY,QAAQ,YAAY;AACvD,WAAO,eAAe,SAAS,mBAAmB,SAAS,QAAQ,iBAAiB,MAAM,CAAC;KAC3F;GAEF,MAAM,OAAO,iBAAiB,WAAW;AAEzC,UAAO;IACL,eAAe;AACb,SAAI,KACF,QAAO,gDAAgD,YAAY,KAAK,KAAK;AAG/E,YAAO,iEAAiE,iBAAiB,KAAK,KAAK;;IAErG;IACD;;EAGH,cAAc,UAAmB,UAAsC,UAAiC,EAAE,EAAE;GAC1G,MAAM,cAAc,eAAe,SAAS;AAE5C,OAAI,OAAO,aAAa,SACtB,QAAO;IACL,eAAe;IACf,MAAM;IACP;GAGH,MAAM,iBAAiB,mBAAmB,UAAU,QAAQ,iBAAiB,MAAM;GACnF,MAAM,UAAU,YAAY,QAAQ,YAAY;AAC9C,WAAO,eAAe,SAAS,mBAAmB,SAAS,QAAQ,iBAAiB,MAAM,CAAC;KAC3F;GAEF,MAAM,OAAO,QAAQ,QAAQ;GAC7B,MAAM,OAAO,SAAS,QAAQ,QAAQ,WAAW,YAAY,SAAS,QAAQ,SAAS;AAEvF,UAAO;IACL,eAAe;AACb,SAAI,KACF,QAAO,8DAA8D,QAAQ,KAAK,KAAK;AAGzF,YAAO,6CAA6C,KAAK,iBAAiB,QAAQ,OAAO,GAAG,YAAY,OAAO;;IAEjH;IACD;;EAGH,yBAAyB,UAAmB,WAAmB;GAC7D,MAAM,QAAQ,OAAO,aAAa,WAC9B,WACC,UAAuC;AAE5C,OAAI,OAAO,UAAU,SACnB,QAAO;IACL,eAAe;IACf,MAAM;IACP;GAGH,MAAM,OAAO,QAAQ;AAErB,UAAO;IACL,eAAe;AACb,SAAI,KACF,QAAO,yBAAyB,MAAM,+BAA+B,UAAU;AAGjF,YAAO,yBAAyB,MAAM,sBAAsB,UAAU;;IAExE;IACD;;EAGH,0BAA6B,UAAmB,WAA2C;GACzF,MAAM,OAAO,UAAU,SAAS;AAEhC,UAAO;IACL,eAAe,OACX,kDACA;IACJ;IACD;;EAGH,sBACE,UACA,UACA,WACA;GACA,MAAM,YAAa,UAAuC;AAE1D,OAAI,aAAa,KACf,QAAO;IACL,eAAe;IACf,MAAM;IACP;GAGH,MAAM,aAAa,UAAU,MAAK,SAAQ,KAAK,SAAS,SAAS;AACjE,OAAI,cAAc,KAChB,QAAO;IACL,eAAe,sBAAsB,SAAS;IAC9C,MAAM;IACP;GAGH,MAAM,OAAO,UAAU,WAAW,KAAK;AAEvC,UAAO;IACL,eAAe,OACX,+BAA+B,SAAS,wBACxC,+BAA+B,SAAS;IAC5C;IACD;;EAEJ,CAAC"}
package/dist/expect.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { n as getRuntimeExpect, t as installVievalExpectMatchers } from "./expect-extensions-QLXESWjn.mjs";
1
+ import { n as getRuntimeExpect, t as installVievalExpectMatchers } from "./expect-extensions-DCSqlneN.mjs";
2
2
  //#region src/expect.ts
3
3
  let isInstalled = false;
4
4
  function ensureExpectMatchersInstalled() {
@@ -477,6 +477,10 @@ interface ModelDefinition {
477
477
  * Alias names that can resolve this model.
478
478
  */
479
479
  aliases: string[];
480
+ /**
481
+ * Optional execution policy hints attached to this model.
482
+ */
483
+ executionPolicy?: TaskExecutionPolicy;
480
484
  /**
481
485
  * Optional model-level call parameters.
482
486
  */
@@ -642,7 +646,39 @@ declare class RunnerExecutionError extends Error {
642
646
  */
643
647
  declare function runScheduledTasks(tasks: readonly ScheduledTask[], executor: ScheduledTaskExecutor, options?: RunScheduledTasksOptions): Promise<AggregatedRunResults>;
644
648
  //#endregion
649
+ //#region src/core/telemetry/types.d.ts
650
+ /** JSON-compatible scalar values accepted as telemetry attributes. */
651
+ type TelemetryAttributeValue = boolean | number | string | null | readonly TelemetryAttributeValue[];
652
+ /** Attribute map shared by local report projection and OpenTelemetry span calls. */
653
+ type TelemetryAttributes = Record<string, TelemetryAttributeValue | undefined>;
654
+ /**
655
+ * Internal Vieval telemetry runtime.
656
+ *
657
+ * Use when:
658
+ * - runner code needs one execution path for disabled and enabled telemetry
659
+ * - case code should run inside an active OpenTelemetry span when configured
660
+ *
661
+ * Expects:
662
+ * - attributes are JSON-compatible and stable enough for report filtering
663
+ * - callbacks are awaited by the caller
664
+ *
665
+ * Returns:
666
+ * - callback result, preserving thrown errors after telemetry records them
667
+ */
668
+ interface TelemetryRuntime {
669
+ withSpan: <T>(name: string, attributes: TelemetryAttributes, callback: () => Promise<T>) => Promise<T>;
670
+ addEvent: (name: string, attributes?: TelemetryAttributes) => void;
671
+ setAttributes: (attributes: TelemetryAttributes) => void;
672
+ recordException: (error: unknown) => void;
673
+ }
674
+ //#endregion
645
675
  //#region src/config/types.d.ts
676
+ /**
677
+ * Value that can be returned directly or through a promise.
678
+ *
679
+ * @param T - Resolved value type.
680
+ */
681
+ type Awaitable<T> = Promise<T> | T;
646
682
  /**
647
683
  * Primitive value allowed in one matrix cell.
648
684
  *
@@ -858,6 +894,100 @@ interface TaskRunOutput {
858
894
  */
859
895
  scores: readonly RunScore[];
860
896
  }
897
+ /**
898
+ * Delay policy for retries within one task case attempt.
899
+ *
900
+ * @param retryIndex Retry number where `1` is the first retry after the initial failure.
901
+ */
902
+ type TaskAutoRetryDelay = number | ((retryIndex: number) => number);
903
+ /**
904
+ * Execution policy applied to task and case callbacks.
905
+ *
906
+ * Use when:
907
+ * - one task or case should time out after a bounded duration
908
+ * - failures should retry within the current attempt or trigger a later full task attempt
909
+ *
910
+ * Expects:
911
+ * - `timeout` to be a positive integer when provided
912
+ * - `autoRetry` and `autoAttempt` to be non-negative integers when provided
913
+ *
914
+ * Returns:
915
+ * - one partial execution policy descriptor
916
+ */
917
+ interface TaskExecutionPolicy {
918
+ /**
919
+ * Additional retries allowed within the current attempt.
920
+ *
921
+ * @default 0
922
+ */
923
+ autoRetry?: number;
924
+ /**
925
+ * Delay in milliseconds before a case auto retry starts.
926
+ *
927
+ * A number applies the same delay to every retry. A function receives the
928
+ * retry index where `1` is the first retry after the initial failure.
929
+ *
930
+ * @default retryIndex => 500 * 2 ** (retryIndex - 1)
931
+ */
932
+ autoRetryDelay?: TaskAutoRetryDelay;
933
+ /**
934
+ * Additional full task attempts allowed after the current attempt settles.
935
+ *
936
+ * @default 0
937
+ */
938
+ autoAttempt?: number;
939
+ /**
940
+ * Timeout in milliseconds for one case execution.
941
+ */
942
+ timeout?: number;
943
+ }
944
+ /**
945
+ * Task-local concurrency metadata.
946
+ *
947
+ * Use when:
948
+ * - task declarations need to preserve attempt and case caps for later runtime coordination
949
+ * - DSL execution needs to resolve the default task-level case concurrency for registered cases
950
+ *
951
+ * Expects:
952
+ * - each provided value to be a positive integer chosen by the caller
953
+ *
954
+ * Returns:
955
+ * - one partial task-local concurrency descriptor
956
+ */
957
+ interface TaskConcurrencyConfig {
958
+ /**
959
+ * Attempt-level concurrency cap for this task.
960
+ */
961
+ attempt?: number;
962
+ /**
963
+ * Case-level concurrency cap for this task.
964
+ */
965
+ case?: number;
966
+ }
967
+ /**
968
+ * Reporting configuration for local artifacts and optional OpenTelemetry integration.
969
+ */
970
+ interface CliReportingConfig {
971
+ /**
972
+ * Optional OpenTelemetry API integration.
973
+ */
974
+ openTelemetry?: CliOpenTelemetryReportingConfig;
975
+ }
976
+ /**
977
+ * OpenTelemetry reporting configuration managed by user config setup.
978
+ */
979
+ interface CliOpenTelemetryReportingConfig {
980
+ /**
981
+ * Enables Vieval active span wrapping through `@opentelemetry/api`.
982
+ *
983
+ * @default false
984
+ */
985
+ enabled?: boolean;
986
+ /**
987
+ * Called after all telemetry events and local report artifacts have been emitted.
988
+ */
989
+ onRunEnd?: () => Awaitable<void>;
990
+ }
861
991
  /**
862
992
  * Runtime context passed into eval task `run`.
863
993
  */
@@ -933,6 +1063,34 @@ interface TaskRunContext {
933
1063
  * - hooks are best-effort observers and should not affect task scoring
934
1064
  */
935
1065
  reporterHooks?: TaskReporterHooks;
1066
+ /**
1067
+ * Optional telemetry runtime shared by runner, DSL, and reporter integrations.
1068
+ *
1069
+ * Use when:
1070
+ * - task execution should emit events to the currently active telemetry runtime
1071
+ * - enabled and disabled telemetry should keep the same execution path
1072
+ *
1073
+ * Expects:
1074
+ * - callers inject a no-op runtime when telemetry is disabled
1075
+ */
1076
+ telemetry?: TelemetryRuntime;
1077
+ /**
1078
+ * Optional runtime scheduling overrides supplied by CLI or host execution.
1079
+ *
1080
+ * Use when:
1081
+ * - run operators need to override task/case concurrency without editing eval code
1082
+ * - DSL task runners need to distinguish runtime flags from code defaults
1083
+ *
1084
+ * Expects:
1085
+ * - values are positive integers when provided
1086
+ *
1087
+ * @default undefined
1088
+ */
1089
+ runtimeConcurrency?: TaskConcurrencyConfig;
1090
+ /**
1091
+ * Cooperative abort signal for the current execution.
1092
+ */
1093
+ signal?: AbortSignal;
936
1094
  }
937
1095
  /**
938
1096
  * Allowed terminal outcomes for one task case.
@@ -943,7 +1101,7 @@ interface TaskRunContext {
943
1101
  * Expects:
944
1102
  * - consumers treat the value as the final state for the case
945
1103
  */
946
- type TaskCaseState = 'passed' | 'failed';
1104
+ type TaskCaseState = 'passed' | 'failed' | 'timeout';
947
1105
  /**
948
1106
  * Payload emitted when a task case starts.
949
1107
  *
@@ -956,10 +1114,22 @@ type TaskCaseState = 'passed' | 'failed';
956
1114
  * - `total` is the total number of registered cases
957
1115
  */
958
1116
  interface TaskCaseReporterPayload {
1117
+ /**
1118
+ * Maximum retry count configured for this case.
1119
+ */
1120
+ autoRetry?: number;
1121
+ /**
1122
+ * Optional case input payload registered by the task DSL.
1123
+ */
1124
+ input?: unknown;
959
1125
  /**
960
1126
  * Declared case label.
961
1127
  */
962
1128
  name: string;
1129
+ /**
1130
+ * Current retry attempt index, where `0` is the first try.
1131
+ */
1132
+ retryIndex?: number;
963
1133
  /**
964
1134
  * Zero-based case position within the task.
965
1135
  */
@@ -982,6 +1152,10 @@ interface TaskCaseReporterPayload {
982
1152
  * - `state` describes the final case result
983
1153
  */
984
1154
  interface TaskCaseReporterEndPayload extends TaskCaseReporterPayload {
1155
+ /**
1156
+ * Optional case output returned by the task case callback.
1157
+ */
1158
+ output?: unknown;
985
1159
  /**
986
1160
  * Final case state.
987
1161
  */
@@ -1052,6 +1226,24 @@ interface TaskDefinition {
1052
1226
  * Stable task id for diagnostics.
1053
1227
  */
1054
1228
  id: string;
1229
+ /**
1230
+ * Optional task-local concurrency metadata.
1231
+ *
1232
+ * Use when:
1233
+ * - task declarations need to preserve task-scoped attempt/case caps for later scheduler wiring
1234
+ * - higher-level orchestration wants to inspect task-local concurrency without executing the task
1235
+ *
1236
+ * Expects:
1237
+ * - each provided value to be a positive integer chosen by the caller
1238
+ *
1239
+ * Returns:
1240
+ * - one partial task-local concurrency descriptor
1241
+ */
1242
+ concurrency?: TaskConcurrencyConfig;
1243
+ /**
1244
+ * Optional task-local execution policy.
1245
+ */
1246
+ executionPolicy?: TaskExecutionPolicy;
1055
1247
  /**
1056
1248
  * Optional matrix layering for this task definition.
1057
1249
  *
@@ -1186,5 +1378,5 @@ interface ConfigHookPlugin<TConfig> {
1186
1378
  configVievalResolved?: (config: TConfig) => void | Promise<void>;
1187
1379
  }
1188
1380
  //#endregion
1189
- export { ScheduledTaskMatrixMeta as $, TaskModelSelectionOptions as A, AggregatedRunResults as B, RunScheduledTasksOptions as C, runScheduledTasks as D, ScheduledTaskExecutor as E, collectEvalEntries as F, aggregateRunResults as G, RunResult as H, CreateVievalRunnerRuntimeContextOptions as I, RunnerMatrixDefinition as J, CreateRunnerScheduleOptions as K, RunnerRuntimeContext as L, ModelDefinition as M, resolveModelByName as N, CreateTaskExecutionContextOptions as O, asProjectRelativePath as P, ScheduledTaskMatrix as Q, createRunnerRuntimeContext as R, TaskRunOutput as S, RunnerTaskState as T, RunScore as U, AggregatedRunSummary as V, RunScoreKind as W, RunnerMatrixSelection as X, RunnerMatrixInput as Y, ScheduledTask as Z, TaskCaseState as _, EvalDefinition as a, CacheFileOptions as at, TaskReporterHooks as b, MatrixAxisValues as c, MatrixPrimitive as d, createRunnerSchedule as et, MatrixRow as f, TaskCaseReporterPayload as g, TaskCaseReporterEndPayload as h, CollectedEvalEntry as i, CacheFileHandle as it, createTaskExecutionContext as j, TaskExecutionContext as k, MatrixDefinition as l, ScopedMatrices as m, defineEval as n, createFilesystemTaskCacheRuntime as nt, EvalModule as o, CacheNamespace as ot, MatrixValue as p, InferenceExecutor as q, defineTask as r, normalizeCacheFilePathSegments as rt, EvalModuleMap as s, TaskCacheRuntime as st, ConfigHookPlugin as t, CreateFilesystemTaskCacheRuntimeOptions as tt, MatrixLayer as u, TaskDefinition as v, RunnerExecutionError as w, TaskRunContext as x, TaskReporterEventPayload as y, AggregatedProviderSummary as z };
1190
- //# sourceMappingURL=index-OEdqjQSe.d.mts.map
1381
+ export { CreateRunnerScheduleOptions as $, RunScheduledTasksOptions as A, resolveModelByName as B, TaskDefinition as C, TaskRunContext as D, TaskReporterHooks as E, CreateTaskExecutionContextOptions as F, createRunnerRuntimeContext as G, collectEvalEntries as H, TaskExecutionContext as I, AggregatedRunSummary as J, AggregatedProviderSummary as K, TaskModelSelectionOptions as L, RunnerTaskState as M, ScheduledTaskExecutor as N, TaskRunOutput as O, runScheduledTasks as P, aggregateRunResults as Q, createTaskExecutionContext as R, TaskConcurrencyConfig as S, TaskReporterEventPayload as T, CreateVievalRunnerRuntimeContextOptions as U, asProjectRelativePath as V, RunnerRuntimeContext as W, RunScore as X, RunResult as Y, RunScoreKind as Z, ScopedMatrices as _, CliOpenTelemetryReportingConfig as a, ScheduledTaskMatrix as at, TaskCaseReporterPayload as b, EvalDefinition as c, CreateFilesystemTaskCacheRuntimeOptions as ct, MatrixAxisValues as d, CacheFileHandle as dt, InferenceExecutor as et, MatrixDefinition as f, CacheFileOptions as ft, MatrixValue as g, MatrixRow as h, Awaitable as i, ScheduledTask as it, RunnerExecutionError as j, TelemetryAttributeValue as k, EvalModule as l, createFilesystemTaskCacheRuntime as lt, MatrixPrimitive as m, TaskCacheRuntime as mt, defineEval as n, RunnerMatrixInput as nt, CliReportingConfig as o, ScheduledTaskMatrixMeta as ot, MatrixLayer as p, CacheNamespace as pt, AggregatedRunResults as q, defineTask as r, RunnerMatrixSelection as rt, CollectedEvalEntry as s, createRunnerSchedule as st, ConfigHookPlugin as t, RunnerMatrixDefinition as tt, EvalModuleMap as u, normalizeCacheFilePathSegments as ut, TaskAutoRetryDelay as v, TaskExecutionPolicy as w, TaskCaseState as x, TaskCaseReporterEndPayload as y, ModelDefinition as z };
1382
+ //# sourceMappingURL=index-5R1_k2nv.d.mts.map
@@ -0,0 +1,147 @@
1
+ //#region src/core/scheduler/types.d.ts
2
+ /**
3
+ * Hierarchical scheduler scopes used by the queue runtime.
4
+ *
5
+ * Use when:
6
+ * - selecting which concurrency cap applies to a unit of work
7
+ * - ordering middleware acquisition and release hooks
8
+ *
9
+ * Expects:
10
+ * - values move from broad to narrow scope in this order:
11
+ * `workspace -> project -> task -> attempt -> case`
12
+ *
13
+ * Returns:
14
+ * - a string literal scope identifier
15
+ */
16
+ type SchedulerScope = 'workspace' | 'project' | 'task' | 'attempt' | 'case';
17
+ /**
18
+ * Context carried through queue acquisition, execution, and release.
19
+ *
20
+ * Use when:
21
+ * - middleware needs stable identifiers for logging or instrumentation
22
+ * - runtime helpers need to know which hierarchical scope is being executed
23
+ *
24
+ * Expects:
25
+ * - `workspaceId` and `experimentId` are always present
26
+ * - narrower ids are only provided when the selected scope requires them
27
+ *
28
+ * Returns:
29
+ * - a serializable scope context object
30
+ */
31
+ interface SchedulerScopeContext {
32
+ scope: SchedulerScope;
33
+ workspaceId: string;
34
+ experimentId: string;
35
+ projectName?: string;
36
+ taskId?: string;
37
+ attemptIndex?: number;
38
+ caseId?: string;
39
+ }
40
+ /**
41
+ * Middleware hooks wrapped around scheduler execution.
42
+ *
43
+ * Use when:
44
+ * - recording queue lifecycle telemetry
45
+ * - attaching tracing or temporary resources around queued work
46
+ *
47
+ * Expects:
48
+ * - implementations call `next()` exactly once to continue the pipeline
49
+ *
50
+ * Returns:
51
+ * - optional async acquire and release hooks
52
+ */
53
+ interface SchedulerMiddleware {
54
+ onAcquire?: (context: SchedulerScopeContext, next: () => Promise<void>) => Promise<void> | void;
55
+ onRelease?: (context: SchedulerScopeContext, next: () => Promise<void>) => Promise<void> | void;
56
+ }
57
+ /**
58
+ * Per-scope concurrency limits used by the scheduler runtime.
59
+ *
60
+ * Use when:
61
+ * - bounding parallel work for a specific scope
62
+ * - disabling a scope cap by omitting its entry
63
+ *
64
+ * Expects:
65
+ * - values are positive integers when provided
66
+ *
67
+ * Returns:
68
+ * - a partial map of scheduler scope to concurrency cap
69
+ */
70
+ interface SchedulerConcurrencyConfig {
71
+ workspace?: number;
72
+ project?: number;
73
+ task?: number;
74
+ attempt?: number;
75
+ case?: number;
76
+ }
77
+ /**
78
+ * Options accepted by {@link createSchedulerRuntime}.
79
+ *
80
+ * Use when:
81
+ * - constructing a scheduler runtime with queue limits or middleware
82
+ *
83
+ * Expects:
84
+ * - omitted configuration falls back to unbounded execution for that concern
85
+ *
86
+ * Returns:
87
+ * - queue and middleware configuration for the runtime
88
+ */
89
+ interface CreateSchedulerRuntimeOptions {
90
+ concurrency?: SchedulerConcurrencyConfig;
91
+ middleware?: SchedulerMiddleware[];
92
+ }
93
+ /**
94
+ * Runtime API used to execute case-level work through scheduler policies.
95
+ *
96
+ * Use when:
97
+ * - the runner needs to enqueue case execution under middleware and queue caps
98
+ *
99
+ * Expects:
100
+ * - `runCase` receives a case context and a callback that performs the work
101
+ *
102
+ * Returns:
103
+ * - a promise that resolves with the callback result once all guards release
104
+ */
105
+ interface SchedulerRuntime {
106
+ runCase: <T>(context: SchedulerScopeContext, execute: () => Promise<T>) => Promise<T>;
107
+ }
108
+ //#endregion
109
+ //#region src/core/scheduler/runtime.d.ts
110
+ /**
111
+ * Creates the core scheduler runtime used to serialize work by scope.
112
+ *
113
+ * Call stack:
114
+ *
115
+ * {@link createSchedulerRuntime}
116
+ * -> `createRuntimeQueues`
117
+ * -> `runtime.runCase(context, execute)`
118
+ * -> `runWithQueues`
119
+ * -> `runAcquireMiddleware`
120
+ * -> `execute`
121
+ * -> `runReleaseMiddleware`
122
+ *
123
+ * Use when:
124
+ * - runner code needs concurrency caps for queued case execution
125
+ * - middleware should wrap work with acquire/release lifecycle hooks
126
+ *
127
+ * Expects:
128
+ * - middleware is ordered from outermost to innermost concern
129
+ * - concurrency caps are positive integers when provided
130
+ *
131
+ * Returns:
132
+ * - a scheduler runtime with case execution support
133
+ */
134
+ declare function createSchedulerRuntime(options?: CreateSchedulerRuntimeOptions): SchedulerRuntime;
135
+ /**
136
+ * Resolves the scheduler scopes that apply to a context.
137
+ *
138
+ * Before:
139
+ * - `{ scope: 'case', workspaceId: 'ws', experimentId: 'exp', caseId: 'case-1' }`
140
+ *
141
+ * After:
142
+ * - `['workspace', 'project', 'task', 'attempt', 'case']` up to the requested scope
143
+ */
144
+ declare function getActiveScopes(context: SchedulerScopeContext): SchedulerScope[];
145
+ //#endregion
146
+ export { SchedulerMiddleware as a, SchedulerScopeContext as c, SchedulerConcurrencyConfig as i, getActiveScopes as n, SchedulerRuntime as o, CreateSchedulerRuntimeOptions as r, SchedulerScope as s, createSchedulerRuntime as t };
147
+ //# sourceMappingURL=index-fakXoZEe.d.mts.map